From fc91192737f4e511780aa190284a1c953ebf33ed Mon Sep 17 00:00:00 2001 From: Simeon <51403284+simeross@users.noreply.github.com> Date: Thu, 13 Jun 2024 16:31:21 +0200 Subject: [PATCH] working minimal nextflow script --- nextflow.config | 1 + pinf_genome_annotation.nf | 25 ++++++++++++++----------- 2 files changed, 15 insertions(+), 11 deletions(-) create mode 100644 nextflow.config diff --git a/nextflow.config b/nextflow.config new file mode 100644 index 0000000..4fa053e --- /dev/null +++ b/nextflow.config @@ -0,0 +1 @@ +params.genome = "MP1831" diff --git a/pinf_genome_annotation.nf b/pinf_genome_annotation.nf index 4b6c047..d575b34 100644 --- a/pinf_genome_annotation.nf +++ b/pinf_genome_annotation.nf @@ -1,15 +1,14 @@ /* Concept for the genome annotation pipeline - 1. Parse genome to protein using gff2prot util from funannotate 2. Remove everything after stop codon in protein fasta with: seqkit -> sed -3. - - -*/ +3. */ -// Script parameters +// Script parameters +params.project = "/home/nibio/Documents/Erik_NCBI_tbl_test/Simeon_annotering/table2asn/" params.genome = "genome_basename" +params.genomeGFF = "${params.project}${params.genome}.gff" +params.genomeFasta = "${params.project}${params.genome}.fsa" process extractProteins { input: @@ -17,11 +16,15 @@ process extractProteins { path genomeGFF output: - path "${genome}_protein.fa" + path "${params.genome}_protein.fa" + """ - funannotate util gff2prot -g $genomeGFF -f $genomeFasta --no_stop |\ - seqkit seq -w 0 |\ - sed 's/\*.*//' >\ - ${genome}_protein.fa + funannotate util gff2prot -g $genomeGFF -f $genomeFasta --no_stop | seqkit seq -w 0 | sed 's/\\*.*//' > "${params.genome}_protein.fa" """ } + + +workflow { + def genome_ch = Channel.fromPath(params.genomeFasta) + extractProteins(genome_ch, params.genomeGFF) +} \ No newline at end of file -- GitLab