-
Simeon Rossmann authoredSimeon Rossmann authored
ena_parser.Rmd 2.10 KiB
title: "ena parser"
author: "Simeon Lim Rossmann"
date: "2024-10-23"
output: html_document
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
template <- "fastq2_template_1729686589164.tsv"
header <- scan(template, nlines = 1, what = character())
tbl <- readr::read_tsv(template, skip = 1) %>%
dplyr::right_join(tibble(
instrument_model= "Illumina MiSeq",
library_source= "METAGENOMIC",
library_selection = "PCR",
library_strategy= "AMPLICON",
library_layout= "PAIRED")
)
file_reader <- function(marker){
fs <- list.files(file.path("~/Documents/Marte_Metabarcoding/Run_August21",
marker, "raw_data"), pattern = "R1_001.fastq.gz")
fs_full <- list.files(file.path("~/Documents/Marte_Metabarcoding/Run_August21",
marker, "raw_data"), pattern = "R1_001.fastq.gz",
full.names = TRUE)
rs <- list.files(file.path("~/Documents/Marte_Metabarcoding/Run_August21", marker, "raw_data"), pattern = "R2_001.fastq.gz")
rs_full <- list.files(file.path("~/Documents/Marte_Metabarcoding/Run_August21", marker, "raw_data"), pattern = "R2_001.fastq.gz",
full.names = TRUE)
md5_f <- sapply(fs_full, tools::md5sum)
md5_r <- sapply(rs_full, tools::md5sum)
samp <- stringr::str_replace(fs, ".*_S", "S") %>%
stringr::str_remove("_L001.*")
lib_name <- paste0("NIBIO_mpt_", marker)
stud <- "ENA_acc"
tibble(sample = samp,
study = stud,
instrument_model= "Illumina MiSeq",
library_name = lib_name,
library_source= "METAGENOMIC",
library_selection = "PCR",
library_strategy= "AMPLICON",
library_layout= "PAIRED",
forward_file_name = fs,
forward_file_md5 = md5_f,
reverse_file_name = rs,
reverse_file_md5 = md5_r
)
}
markers <- c("Nems",
"16S",
"FITS1",
"FITS2",
"OITS",
"Trich")
tb <- lapply(markers, file_reader) %>%
bind_rows()