From 4addc4e81aabc1e54ddde58699cfa0eb72434060 Mon Sep 17 00:00:00 2001
From: Simeon <51403284+simeross@users.noreply.github.com>
Date: Fri, 14 Jun 2024 12:46:09 +0200
Subject: [PATCH] funannotate integration

---
 .../nextflow_results/MidP1831_erik_protein.fa |  2 +-
 Mid_test_data/nextflow_results/fixed.gff      |  2 +-
 pinf_genome_annotation.nf                     | 56 ++++++++++++++-----
 3 files changed, 45 insertions(+), 15 deletions(-)

diff --git a/Mid_test_data/nextflow_results/MidP1831_erik_protein.fa b/Mid_test_data/nextflow_results/MidP1831_erik_protein.fa
index 8651bf1..6fd5905 120000
--- a/Mid_test_data/nextflow_results/MidP1831_erik_protein.fa
+++ b/Mid_test_data/nextflow_results/MidP1831_erik_protein.fa
@@ -1 +1 @@
-/home/nibio/Documents/Git/DivGene-genome-annotation/work/e7/a068fcec054038bf87119d1ee6337f/MidP1831_erik_protein.fa
\ No newline at end of file
+/home/nibio/Documents/Git/DivGene-genome-annotation/work/78/6fcc4ffe77038176167dfd203d726c/MidP1831_erik_protein.fa
\ No newline at end of file
diff --git a/Mid_test_data/nextflow_results/fixed.gff b/Mid_test_data/nextflow_results/fixed.gff
index 5a8fb47..556cc72 120000
--- a/Mid_test_data/nextflow_results/fixed.gff
+++ b/Mid_test_data/nextflow_results/fixed.gff
@@ -1 +1 @@
-/home/nibio/Documents/Git/DivGene-genome-annotation/work/70/b78113120a64510c00c77fa6634cdb/fixed.gff
\ No newline at end of file
+/home/nibio/Documents/Git/DivGene-genome-annotation/work/df/36adcda51128f8821bac442fd4f4b5/fixed.gff
\ No newline at end of file
diff --git a/pinf_genome_annotation.nf b/pinf_genome_annotation.nf
index b5502ce..1ae7ee7 100644
--- a/pinf_genome_annotation.nf
+++ b/pinf_genome_annotation.nf
@@ -10,9 +10,12 @@
 
 // Script parameters, some modified in nextflow.config
 params.project = "project_path"
+params.species = "Phytophthora infestans"
 params.genome = "genome_basename"
+params.speciesStrain = "${params.species} ${params.genome}"
 params.genomeGFF = "${params.project}/${params.genome}.gff"
 params.genomeFasta = "${params.project}/${params.genome}.fsa"
+params.sbt = "${params.project}/template.sbt"
 
 process fixGFF {
   publishDir "${params.project}/nextflow_results", mode: 'symlink'
@@ -40,7 +43,9 @@ process extractProteins {
     path "${params.genome}_protein.fa"
 
     """
-    funannotate util gff2prot -g $fixedGFF -f $genomeFasta --no_stop | seqkit seq -w 0 | sed 's/\\*.*//' > "${params.genome}_protein.fa"
+    funannotate util gff2prot -g $fixedGFF -f $genomeFasta --no_stop |\
+     seqkit seq -w 0 |\
+     sed 's/\\*.*//' > "${params.genome}_protein.fa"
     """
 }
 
@@ -49,12 +54,16 @@ input:
 path proteinFasta
 
 output:
-path "${params.genome}_em*"
+path "${params.genome}_em.emapper.annotations"
 
-publishDir "${params.project}/nextflow_results", mode: 'symlink'
+publishDir "${params.project}/nextflow_results/emapper", mode: 'symlink'
 
 """
-emapper.py -i $proteinFasta -o  "${params.genome}_em" --data_dir /home/nibio/funannotate_db --cpu 0 --tax_scope Peronosporales
+emapper.py -i $proteinFasta \
+-o  "${params.genome}_em" \
+--data_dir /home/nibio/funannotate_db \
+--cpu 0 \
+--tax_scope Peronosporales
 """
 }
 
@@ -63,29 +72,50 @@ input:
 path proteinFasta
 
 output:
-path "${params.genome}_interpro*"
+path "${params.genome}_interpro.xml"
 
-publishDir "${params.project}/nextflow_results", mode: 'symlink'
+publishDir "${params.project}/nextflow_results/interpro", mode: 'symlink'
 
 """
-~/interproscan-5.68-100.0/interproscan.sh -i $proteinFasta -b "${params.genome}_interpro"
+~/interproscan-5.68-100.0/interproscan.sh \
+  -i $proteinFasta \
+  -f xml \
+  -b "${params.genome}_interpro.xml"
 """
 }
 
-/*
-
-
-
 
 process funannotateIntegration{
+input:
+path fixedGFF
+path genomeFasta
+path emapperAnnot
+path interproAnnot
+path sbt
+val speciesStrain
 
+output:
+path "funannotate/*"
+
+publishDir "${params.project}/nextflow_results", mode: 'symlink'
+"""
+funannotate annotate \
+--gff $fixedGFF \
+--fasta $genomeFasta \
+-s $speciesStrain \
+-o "funannotate" \
+--sbt $sbt  \
+--eggnogg $emapperAnnot \
+--interpro $interproAnnot \
+--cpus 8
+"""
 }
-*/
 
 workflow {
   def gff_ch = Channel.fromPath(params.genomeGFF)
   fixGFF(gff_ch)
   extractProteins(params.genomeFasta, fixGFF.out)
   searchAndAnnotateEggnogg(extractProteins.out)
-  searchAndAnnotateInterPro (extractProteins.out)
+  searchAndAnnotateInterPro(extractProteins.out)
+  funannotateIntegration(fixGFF.out, params.genomeFasta, searchAndAnnotateEggnogg.out, searchAndAnnotateInterPro.out, params.sbt, params.speciesStrain)
 }
\ No newline at end of file
-- 
GitLab