-
Maciej Wielgosz authoredMaciej Wielgosz authored
general_merger.py 2.73 KiB
import argparse
import os
import pandas as pd
from tqdm import tqdm
class GeneralMerger(object):
def __init__(self, input_folder, output_folder):
self.input_folder = input_folder
self.output_folder = output_folder
def get_file_names(self):
# read files in the folder_path
files = [os.path.join(self.input_folder, f) for f in os.listdir(self.input_folder) if f.endswith('.txt')]
files_split = [os.path.basename(f).split('---')[0] for f in files]
files_set = list(set(files_split))
return files_set
def merge(self,
file_core_name=None, # this is the core name of the files to be merged
):
# read files in the folder_path
files = [os.path.join(self.input_folder, f)
for f in os.listdir(self.input_folder) if f.endswith('.txt') and os.path.basename(f).split('---')[0] == file_core_name]
# create an empty list to hold dataframes
dfs = []
# get the header from the first file
header = pd.read_csv(files[0], sep=',', header=0).iloc[0]
# read each file and append to the list
for file in files:
dfs.append(pd.read_csv(file, sep=',', header=0))
# concatenate all dataframes into a single one
merged_df = pd.concat(dfs, ignore_index=True)
# add the header
merged_df.columns = header
# write the merged dataframe to a csv file
path_to_save = os.path.join(self.output_folder, file_core_name + '_merged.txt')
merged_df.to_csv(path_to_save, index=False, sep=',')
return merged_df
def merge_all(self):
os.makedirs(self.output_folder, exist_ok=True)
files = self.get_file_names()
for file in tqdm(files):
self.merge(file_core_name=file)
# test the code
# input_folder = '/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/data_split_merge/output_text'
# output_foler = '/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/data_split_merge/merged_output'
# merger = GeneralMerger(input_folder, output_foler)
# # merger.merge()
# files = merger.get_file_names()
# merger.merge_all()
# print(files)
if __name__ == '__main__':
# parse the arguments
parser = argparse.ArgumentParser()
parser.add_argument('--input_folder', type=str, default='/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/data_split_merge/output_text')
parser.add_argument('--output_folder', type=str, default='/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/data_split_merge/merged_output')
args = parser.parse_args()
# create the merger object
merger = GeneralMerger(args.input_folder, args.output_folder)
# merge all the files
merger.merge_all()