From 4addc4e81aabc1e54ddde58699cfa0eb72434060 Mon Sep 17 00:00:00 2001 From: Simeon <51403284+simeross@users.noreply.github.com> Date: Fri, 14 Jun 2024 12:46:09 +0200 Subject: [PATCH] funannotate integration --- .../nextflow_results/MidP1831_erik_protein.fa | 2 +- Mid_test_data/nextflow_results/fixed.gff | 2 +- pinf_genome_annotation.nf | 56 ++++++++++++++----- 3 files changed, 45 insertions(+), 15 deletions(-) diff --git a/Mid_test_data/nextflow_results/MidP1831_erik_protein.fa b/Mid_test_data/nextflow_results/MidP1831_erik_protein.fa index 8651bf1..6fd5905 120000 --- a/Mid_test_data/nextflow_results/MidP1831_erik_protein.fa +++ b/Mid_test_data/nextflow_results/MidP1831_erik_protein.fa @@ -1 +1 @@ -/home/nibio/Documents/Git/DivGene-genome-annotation/work/e7/a068fcec054038bf87119d1ee6337f/MidP1831_erik_protein.fa \ No newline at end of file +/home/nibio/Documents/Git/DivGene-genome-annotation/work/78/6fcc4ffe77038176167dfd203d726c/MidP1831_erik_protein.fa \ No newline at end of file diff --git a/Mid_test_data/nextflow_results/fixed.gff b/Mid_test_data/nextflow_results/fixed.gff index 5a8fb47..556cc72 120000 --- a/Mid_test_data/nextflow_results/fixed.gff +++ b/Mid_test_data/nextflow_results/fixed.gff @@ -1 +1 @@ -/home/nibio/Documents/Git/DivGene-genome-annotation/work/70/b78113120a64510c00c77fa6634cdb/fixed.gff \ No newline at end of file +/home/nibio/Documents/Git/DivGene-genome-annotation/work/df/36adcda51128f8821bac442fd4f4b5/fixed.gff \ No newline at end of file diff --git a/pinf_genome_annotation.nf b/pinf_genome_annotation.nf index b5502ce..1ae7ee7 100644 --- a/pinf_genome_annotation.nf +++ b/pinf_genome_annotation.nf @@ -10,9 +10,12 @@ // Script parameters, some modified in nextflow.config params.project = "project_path" +params.species = "Phytophthora infestans" params.genome = "genome_basename" +params.speciesStrain = "${params.species} ${params.genome}" params.genomeGFF = "${params.project}/${params.genome}.gff" params.genomeFasta = "${params.project}/${params.genome}.fsa" +params.sbt = "${params.project}/template.sbt" process fixGFF { publishDir "${params.project}/nextflow_results", mode: 'symlink' @@ -40,7 +43,9 @@ process extractProteins { path "${params.genome}_protein.fa" """ - funannotate util gff2prot -g $fixedGFF -f $genomeFasta --no_stop | seqkit seq -w 0 | sed 's/\\*.*//' > "${params.genome}_protein.fa" + funannotate util gff2prot -g $fixedGFF -f $genomeFasta --no_stop |\ + seqkit seq -w 0 |\ + sed 's/\\*.*//' > "${params.genome}_protein.fa" """ } @@ -49,12 +54,16 @@ input: path proteinFasta output: -path "${params.genome}_em*" +path "${params.genome}_em.emapper.annotations" -publishDir "${params.project}/nextflow_results", mode: 'symlink' +publishDir "${params.project}/nextflow_results/emapper", mode: 'symlink' """ -emapper.py -i $proteinFasta -o "${params.genome}_em" --data_dir /home/nibio/funannotate_db --cpu 0 --tax_scope Peronosporales +emapper.py -i $proteinFasta \ +-o "${params.genome}_em" \ +--data_dir /home/nibio/funannotate_db \ +--cpu 0 \ +--tax_scope Peronosporales """ } @@ -63,29 +72,50 @@ input: path proteinFasta output: -path "${params.genome}_interpro*" +path "${params.genome}_interpro.xml" -publishDir "${params.project}/nextflow_results", mode: 'symlink' +publishDir "${params.project}/nextflow_results/interpro", mode: 'symlink' """ -~/interproscan-5.68-100.0/interproscan.sh -i $proteinFasta -b "${params.genome}_interpro" +~/interproscan-5.68-100.0/interproscan.sh \ + -i $proteinFasta \ + -f xml \ + -b "${params.genome}_interpro.xml" """ } -/* - - - process funannotateIntegration{ +input: +path fixedGFF +path genomeFasta +path emapperAnnot +path interproAnnot +path sbt +val speciesStrain +output: +path "funannotate/*" + +publishDir "${params.project}/nextflow_results", mode: 'symlink' +""" +funannotate annotate \ +--gff $fixedGFF \ +--fasta $genomeFasta \ +-s $speciesStrain \ +-o "funannotate" \ +--sbt $sbt \ +--eggnogg $emapperAnnot \ +--interpro $interproAnnot \ +--cpus 8 +""" } -*/ workflow { def gff_ch = Channel.fromPath(params.genomeGFF) fixGFF(gff_ch) extractProteins(params.genomeFasta, fixGFF.out) searchAndAnnotateEggnogg(extractProteins.out) - searchAndAnnotateInterPro (extractProteins.out) + searchAndAnnotateInterPro(extractProteins.out) + funannotateIntegration(fixGFF.out, params.genomeFasta, searchAndAnnotateEggnogg.out, searchAndAnnotateInterPro.out, params.sbt, params.speciesStrain) } \ No newline at end of file -- GitLab