import argparse import os import pandas as pd from tqdm import tqdm class GeneralMerger(object): def __init__(self, input_folder, output_folder): self.input_folder = input_folder self.output_folder = output_folder def get_file_names(self): # read files in the folder_path files = [os.path.join(self.input_folder, f) for f in os.listdir(self.input_folder) if f.endswith('.txt')] files_split = [os.path.basename(f).split('---')[0] for f in files] files_set = list(set(files_split)) return files_set def merge(self, file_core_name=None, # this is the core name of the files to be merged ): # read files in the folder_path files = [os.path.join(self.input_folder, f) for f in os.listdir(self.input_folder) if f.endswith('.txt') and os.path.basename(f).split('---')[0] == file_core_name] # create an empty list to hold dataframes dfs = [] # get the header from the first file header = pd.read_csv(files[0], sep=',', header=0).iloc[0] # read each file and append to the list for file in files: dfs.append(pd.read_csv(file, sep=',', header=0)) # concatenate all dataframes into a single one merged_df = pd.concat(dfs, ignore_index=True) # add the header merged_df.columns = header # write the merged dataframe to a csv file path_to_save = os.path.join(self.output_folder, file_core_name + '_merged.txt') merged_df.to_csv(path_to_save, index=False, sep=',') return merged_df def merge_all(self): os.makedirs(self.output_folder, exist_ok=True) files = self.get_file_names() for file in tqdm(files): self.merge(file_core_name=file) # test the code # input_folder = '/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/data_split_merge/output_text' # output_foler = '/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/data_split_merge/merged_output' # merger = GeneralMerger(input_folder, output_foler) # # merger.merge() # files = merger.get_file_names() # merger.merge_all() # print(files) if __name__ == '__main__': # parse the arguments parser = argparse.ArgumentParser() parser.add_argument('--input_folder', type=str, default='/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/data_split_merge/output_text') parser.add_argument('--output_folder', type=str, default='/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/data_split_merge/merged_output') args = parser.parse_args() # create the merger object merger = GeneralMerger(args.input_folder, args.output_folder) # merge all the files merger.merge_all()