From fc91192737f4e511780aa190284a1c953ebf33ed Mon Sep 17 00:00:00 2001
From: Simeon <51403284+simeross@users.noreply.github.com>
Date: Thu, 13 Jun 2024 16:31:21 +0200
Subject: [PATCH] working minimal nextflow script

---
 nextflow.config           |  1 +
 pinf_genome_annotation.nf | 25 ++++++++++++++-----------
 2 files changed, 15 insertions(+), 11 deletions(-)
 create mode 100644 nextflow.config

diff --git a/nextflow.config b/nextflow.config
new file mode 100644
index 0000000..4fa053e
--- /dev/null
+++ b/nextflow.config
@@ -0,0 +1 @@
+params.genome = "MP1831"
diff --git a/pinf_genome_annotation.nf b/pinf_genome_annotation.nf
index 4b6c047..d575b34 100644
--- a/pinf_genome_annotation.nf
+++ b/pinf_genome_annotation.nf
@@ -1,15 +1,14 @@
 /* Concept for the genome annotation pipeline
-
 1. Parse genome to protein using gff2prot util from funannotate
 2. Remove everything after stop codon in protein fasta with:
     seqkit -> sed
-3. 
-
-
-*/
+3. */
 
-// Script parameters
+// Script parameters 
+params.project = "/home/nibio/Documents/Erik_NCBI_tbl_test/Simeon_annotering/table2asn/"
 params.genome = "genome_basename"
+params.genomeGFF = "${params.project}${params.genome}.gff"
+params.genomeFasta = "${params.project}${params.genome}.fsa"
 
 process extractProteins {
     input:
@@ -17,11 +16,15 @@ process extractProteins {
     path genomeGFF
 
     output:
-    path "${genome}_protein.fa"
+    path "${params.genome}_protein.fa"
+
     """
-    funannotate util gff2prot -g $genomeGFF -f $genomeFasta --no_stop |\
-     seqkit seq -w 0 |\
-     sed 's/\*.*//' >\
-     ${genome}_protein.fa
+    funannotate util gff2prot -g $genomeGFF -f $genomeFasta --no_stop | seqkit seq -w 0 | sed 's/\\*.*//' > "${params.genome}_protein.fa"
     """
 }
+
+
+workflow {
+  def genome_ch = Channel.fromPath(params.genomeFasta)
+  extractProteins(genome_ch, params.genomeGFF)
+}
\ No newline at end of file
-- 
GitLab