diff --git a/nextflow.config b/nextflow.config new file mode 100644 index 0000000000000000000000000000000000000000..4fa053e93be03f7c1642b2dce4b19f36012b8edd --- /dev/null +++ b/nextflow.config @@ -0,0 +1 @@ +params.genome = "MP1831" diff --git a/pinf_genome_annotation.nf b/pinf_genome_annotation.nf index 4b6c0470cdd6d6ae18e149d39dd5fb55caa325dd..d575b34a8793399505f25f2b9a0caec2337a4654 100644 --- a/pinf_genome_annotation.nf +++ b/pinf_genome_annotation.nf @@ -1,15 +1,14 @@ /* Concept for the genome annotation pipeline - 1. Parse genome to protein using gff2prot util from funannotate 2. Remove everything after stop codon in protein fasta with: seqkit -> sed -3. - - -*/ +3. */ -// Script parameters +// Script parameters +params.project = "/home/nibio/Documents/Erik_NCBI_tbl_test/Simeon_annotering/table2asn/" params.genome = "genome_basename" +params.genomeGFF = "${params.project}${params.genome}.gff" +params.genomeFasta = "${params.project}${params.genome}.fsa" process extractProteins { input: @@ -17,11 +16,15 @@ process extractProteins { path genomeGFF output: - path "${genome}_protein.fa" + path "${params.genome}_protein.fa" + """ - funannotate util gff2prot -g $genomeGFF -f $genomeFasta --no_stop |\ - seqkit seq -w 0 |\ - sed 's/\*.*//' >\ - ${genome}_protein.fa + funannotate util gff2prot -g $genomeGFF -f $genomeFasta --no_stop | seqkit seq -w 0 | sed 's/\\*.*//' > "${params.genome}_protein.fa" """ } + + +workflow { + def genome_ch = Channel.fromPath(params.genomeFasta) + extractProteins(genome_ch, params.genomeGFF) +} \ No newline at end of file