Skip to content
Snippets Groups Projects
Commit a9bca3ea authored by Simeon's avatar Simeon
Browse files

sra parser

parent 73a1fa3c
Branches
No related tags found
No related merge requests found
File moved
sample_name library_ID title library_strategy library_source library_selection library_layout platform instrument_model design_description filetype filename filename2 filename3 filename4 assembly fasta_file sample
---
title: "sra parser"
author: "Simeon Lim Rossmann"
date: "2024-11-22"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
```
```{r fill tbl}
template <- "SRA_metadata.txt"
tbl <- readr::read_tsv(template) %>%
dplyr::right_join(tibble(
instrument_model= "Illumina MiSeq",
platform = 'ILLUMINA',
library_source= "METAGENOMIC",
library_selection = "PCR",
library_strategy= "AMPLICON",
library_layout= "PAIRED",
filetype = 'fastq'
)
)
file_reader <- function(marker){
fs <- list.files(file.path("~/Documents/Marte_Metabarcoding/Run_August21",
marker[1], "raw_data"),
pattern = "R1_001.fastq.gz")
fs_full <- list.files(file.path("~/Documents/Marte_Metabarcoding/Run_August21",
marker[1], "raw_data"),
pattern = "R1_001.fastq.gz",
full.names = TRUE)
rs <- list.files(file.path("~/Documents/Marte_Metabarcoding/Run_August21",
marker[1], "raw_data"),
pattern = "R2_001.fastq.gz")
rs_full <- list.files(file.path("~/Documents/Marte_Metabarcoding/Run_August21",
marker[1], "raw_data"),
pattern = "R2_001.fastq.gz",
full.names = TRUE)
# md5_f <- sapply(fs_full, tools::md5sum)
# md5_r <- sapply(rs_full, tools::md5sum)
samp <- stringr::str_replace(fs, ".*_S", "S") %>%
stringr::str_remove("_L001.*")
lib_name <- paste0("NIBIO_mpt_", marker[1],
'_', samp)
out <- tibble(sample = samp,
library_ID = lib_name,
instrument_model= "Illumina MiSeq",
platform = 'ILLUMINA',
library_source= "METAGENOMIC",
library_selection = "PCR",
library_strategy= "AMPLICON",
library_layout= "PAIRED",
filetype = 'fastq',
design_description = paste0('Total DNA was extracted from 45 mL soil samples. ',
'Each soil sample and DNA from the appropriate ',
'mock control was amplified with "', marker[1],
'" PCR primers targeting the amplification ',
'of ', marker[2], ' sequences. Samples were ',
'indexed in the amplification PCR (1-step) ',
'and demultiplexed by the MiSeq.'),
filename=fs,
filename2=rs
)
return(out)
}
markers <- list(c("Nems",'nematode 18S'),
c("16S",'bacterial 16S'),
c("FITS1", 'fungal ITS1'),
c("FITS2", 'fungal ITS2'),
c("OITS", 'oomycete ITS1'),
c("Trich", 'Trichodoridae 18S'))
tb <- lapply(markers, file_reader) %>%
bind_rows()
### Fuse back to template to make sure all column names are identical
tbl <- right_join(tbl, tb)
all.equal(as.data.frame(tbl), as.data.frame(tb))
write_tsv(tbl, "SRA_submission.tsv", col_names = TRUE)
```
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment