diff --git a/test_split/general_merger.py b/test_split/general_merger.py new file mode 100644 index 0000000000000000000000000000000000000000..32ba63ebc78481e9f78b2ecd0ea0833fe0226ccf --- /dev/null +++ b/test_split/general_merger.py @@ -0,0 +1,37 @@ +import os +import laspy +import pandas as pd + +class GeneralMerger(object): + def __init__(self, folder_path, file_name): + self.folder_path = folder_path + self.file_name = file_name + + def merge(self): + # read files in the folder_path + files = [os.path.join(self.folder_path, f) for f in os.listdir(self.folder_path) if f.endswith('.txt')] + + # create an empty list to hold dataframes + dfs = [] + + # get the header from the first file + header = pd.read_csv(files[0], sep=',', header=None).iloc[0] + + # read each file and append to the list + for file in files: + dfs.append(pd.read_csv(file, sep=',', header=None)) + + # concatenate all dataframes into a single one + merged_df = pd.concat(dfs, ignore_index=True) + + # add the header + merged_df.columns = header + + # write the merged dataframe to a csv file + merged_df.to_csv(self.file_name, index=False) + +# test the code +folder_path = '/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/test_split/output_text' +file_name = '/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/test_split/merged_output.txt' +merger = GeneralMerger(folder_path, file_name) +merger.merge() diff --git a/test_split/general_splitter.py b/test_split/general_splitter.py new file mode 100644 index 0000000000000000000000000000000000000000..2ae83aafa6e6d719992cdc5348b2b109377b0937 --- /dev/null +++ b/test_split/general_splitter.py @@ -0,0 +1,110 @@ +import json +import os +import argparse +import laspy +import numpy as np +from sklearn.neighbors import KDTree + +SPLIT_TEMPLATE = json.dumps({ + "pipeline": [ + "input.las", + { + "type": "filters.chipper", + "capacity": "5000" + }, + { + "type": "writers.las", + "filename": "output_#.las" + } + ] +}) + +LIST_OF_LAS_FIELDS = ['label', 'treeID'] + +class GeneralSplitter(object): + def __init__(self, input_file, output_folder, capacity=5000): + self.input_file = input_file + self.output_folder = output_folder + self.capacity = capacity + + def split(self): + # create the output folder if it does not exist + if not os.path.exists(self.output_folder): + os.makedirs(self.output_folder) + + with open('split_pipeline.json', 'w') as f: + f.write( + SPLIT_TEMPLATE.replace('input.las', + self.input_file).replace('5000', str(self.capacity)).replace('output_#.las', + self.output_folder + '/output_#.las')) + # run the pipeline + os.system('pdal pipeline split_pipeline.json') + # remove the pipeline file + os.remove('split_pipeline.json') + + def transfer_extra_fields(self): + # get list of extra fields in the input file using laspy + input_file = os.path.join(self.input_file) + las = laspy.read(input_file) + + # get pointcloud and put it into KDTree + point_cloud = np.vstack((las.x, las.y, las.z)).transpose() + + tree = KDTree(point_cloud) + + # get the list of output files + output_files = [os.path.join(self.output_folder, f) for f in os.listdir(self.output_folder) if f.endswith('.las')] + for output_file in output_files: + # read the output file + target = laspy.read(output_file) + target_xyz = target.xyz + # find the nearest neighbor for each point in target las file + ind = tree.query(target_xyz, k=1, return_distance=False) + + selected_points_x = las.x[ind.ravel()] + selected_points_y = las.y[ind.ravel()] + selected_points_z = las.z[ind.ravel()] + selected_points = np.vstack([selected_points_x, selected_points_y, selected_points_z]).T + + + new_header = laspy.LasHeader(point_format=las.point_format.id, version=las.header.version) + + # add extra dimensions to new las file + for item in LIST_OF_LAS_FIELDS: + new_header.add_extra_dim(laspy.ExtraBytesParams(name=item, type=np.int32)) + + new_las = laspy.LasData(new_header) + + # copy x, y, z, gt_label and target_label from target las file to the new las file + new_las.x = selected_points_x + new_las.y = selected_points_y + new_las.z = selected_points_z + + # copy contents of extra dimensions from target las file to the new las file + for item in new_header.point_format.dimension_names: + if item in LIST_OF_LAS_FIELDS: + new_las[item] = las[item][ind.ravel()] + + + # write the new las file + new_las.write(output_file) + + +filepath = '/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/test_split/input.las' +output_folder = '/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/test_split/output' +splitter = GeneralSplitter(filepath, output_folder) +splitter.split() +splitter.transfer_extra_fields() + + +# if __name__ == '__main__': +# parser = argparse.ArgumentParser(description='Split a LAS file into multiple LAS files') +# parser.add_argument('--input_file', type=str, help='Input LAS file') +# parser.add_argument('--output_folder', type=str, help='Output folder') +# parser.add_argument('--capacity', type=int, default=5000, help='Capacity of each output LAS file') +# args = parser.parse_args() + +# splitter = GeneralSplitter(args.input_file, args.output_folder, args.capacity) +# # splitter.split() +# splitter.transfer_extra_fields() + diff --git a/utils/las2text_mapper.py b/utils/las2text_mapper.py new file mode 100755 index 0000000000000000000000000000000000000000..654ae163ffc545f2e6096e044f68ed7b0bb33370 --- /dev/null +++ b/utils/las2text_mapper.py @@ -0,0 +1,94 @@ +import glob +import os +import pandas as pd +import numpy as np +from tqdm import tqdm +import laspy +import argparse + +class Las2TextMapper: + + """ Mapper class for las2text.py """ + + def __init__(self, data_dir, save_dir, verbose=False): + self.data_dir = data_dir + self.save_dir = save_dir + self.verbose = verbose + + # + def read_single_las(self, filepath): + """read_single_las. + + Args: + filepath: path to the main las file + + Returns: + points: numpy array of points + """ + las = laspy.read(filepath) + # get x, y, z + points = np.vstack((las.x, las.y, las.z)).transpose() + # get intensity + # points = np.hstack((points, las.intensity[..., None])) + # get rgb + points = np.hstack((points, las.red[..., None])) + points = np.hstack((points, las.green[..., None])) + points = np.hstack((points, las.blue[..., None])) + + # get label + points = np.hstack((points, las.label[..., None])) + # get treeID + points = np.hstack((points, las.treeID[..., None])) + + # put all together to pandas dataframe + points = pd.DataFrame( + points, + columns=['x', 'y', 'z', 'red', 'green', 'blue', 'label', 'treeID'] + ) + + return points + + def process_folder(self): + + """process_folder. + + Args: + mode: train, test or validation + """ + # read all las files in the folder data_dir using glob + list_of_files = glob.glob(self.data_dir + "/*.las", recursive=False) + + # if self.save_dir does not exist, create it + if not os.path.exists(self.save_dir): + os.makedirs(self.save_dir) + + # iterate over all files + for filepath in tqdm(list_of_files): + if self.verbose: + print("Processing file: ", filepath) + # read the las file + points = self.read_single_las(filepath) + + # save the points as text file in self.save_dir + filepath = filepath.split("/")[-1].split(".")[0] + filepath = self.save_dir + "/" + filepath + + # save the points + points.to_csv(filepath + ".txt", sep=',', index=False, header=True) + +if __name__ == "__main__": + # use argparse to get the data_dir and save_dir + + parser = argparse.ArgumentParser() + parser.add_argument('--data_dir', type=str, default='data/stpls3d/val', help='path to the data directory') + parser.add_argument('--save_dir', type=str, default='data/stpls3d/val', help='path to the save directory') + # get verbose + parser.add_argument('--verbose', action='store_true', help='verbose') + args = parser.parse_args() + + # create the mapper + mapper = Las2TextMapper(args.data_dir, args.save_dir, args.verbose) + + # process the folder + mapper.process_folder() +