Skip to content
Snippets Groups Projects
ena_parser.Rmd 2.10 KiB
title: "ena parser"
author: "Simeon Lim Rossmann"
date: "2024-10-23"
output: html_document
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
template <- "fastq2_template_1729686589164.tsv"
header <- scan(template, nlines = 1, what = character())
tbl <- readr::read_tsv(template, skip = 1) %>% 
  dplyr::right_join(tibble(
    instrument_model= "Illumina MiSeq",
    library_source= "METAGENOMIC",
    library_selection = "PCR",
    library_strategy= "AMPLICON",
    library_layout= "PAIRED")
  )

file_reader <- function(marker){
  fs <- list.files(file.path("~/Documents/Marte_Metabarcoding/Run_August21", 
                             marker, "raw_data"), pattern = "R1_001.fastq.gz")
  fs_full <- list.files(file.path("~/Documents/Marte_Metabarcoding/Run_August21", 
                                  marker, "raw_data"), pattern = "R1_001.fastq.gz", 
                        full.names = TRUE)
  rs <- list.files(file.path("~/Documents/Marte_Metabarcoding/Run_August21", marker, "raw_data"), pattern = "R2_001.fastq.gz")
  rs_full <- list.files(file.path("~/Documents/Marte_Metabarcoding/Run_August21", marker, "raw_data"), pattern = "R2_001.fastq.gz",
                        full.names = TRUE)
  
  md5_f <- sapply(fs_full, tools::md5sum)
  md5_r <- sapply(rs_full, tools::md5sum)
                                
  samp <- stringr::str_replace(fs, ".*_S", "S") %>% 
    stringr::str_remove("_L001.*")
  
  lib_name <- paste0("NIBIO_mpt_", marker)
  stud <- "ENA_acc"
  
  tibble(sample = samp,
         study = stud,
         instrument_model= "Illumina MiSeq",
         library_name = lib_name,
         library_source= "METAGENOMIC",
         library_selection = "PCR",
         library_strategy= "AMPLICON",
         library_layout= "PAIRED",
         forward_file_name = fs,
         forward_file_md5 = md5_f,
         reverse_file_name = rs,
         reverse_file_md5 = md5_r
         )
  
}

markers <- c("Nems",
             "16S",
             "FITS1",
             "FITS2",
             "OITS",
             "Trich")

tb <- lapply(markers, file_reader) %>% 
  bind_rows()