updated flow for splitting and merging

9d7fb3af · Maciej Wielgosz · 41e3da5f · 9d7fb3af · 9d7fb3af · 9d7fb3af
Commit 9d7fb3af authored 2 years ago by Maciej Wielgosz
--- a/test_split/general_merger.py
+++ b/test_split/general_merger.py
+import os
+import laspy
+import pandas as pd
+class GeneralMerger(object):
+    def __init__(self, folder_path, file_name):
+        self.folder_path = folder_path
+        self.file_name = file_name
+    def merge(self):
+        # read files in the folder_path
+        files = [os.path.join(self.folder_path, f) for f in os.listdir(self.folder_path) if f.endswith('.txt')]
+        # create an empty list to hold dataframes
+        dfs = []
+        # get the header from the first file
+        header = pd.read_csv(files[0], sep=',', header=None).iloc[0]
+        # read each file and append to the list
+        for file in files:
+            dfs.append(pd.read_csv(file, sep=',', header=None))
+        # concatenate all dataframes into a single one
+        merged_df = pd.concat(dfs, ignore_index=True)
+        # add the header
+        merged_df.columns = header
+        # write the merged dataframe to a csv file
+        merged_df.to_csv(self.file_name, index=False)
+# test the code
+folder_path = '/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/test_split/output_text'
+file_name = '/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/test_split/merged_output.txt'
+merger = GeneralMerger(folder_path, file_name)
+merger.merge()
--- a/test_split/general_splitter.py
+++ b/test_split/general_splitter.py
+import json
+import os
+import argparse
+import laspy
+import numpy as np
+from sklearn.neighbors import KDTree
+SPLIT_TEMPLATE = json.dumps({
+    "pipeline": [
+    "input.las",
+    {
+      "type": "filters.chipper",
+      "capacity": "5000"
+    },
+    {
+      "type": "writers.las",
+      "filename": "output_#.las"
+    }
+  ]
+})
+LIST_OF_LAS_FIELDS = ['label', 'treeID']
+class GeneralSplitter(object):
+    def __init__(self, input_file, output_folder, capacity=5000):
+        self.input_file = input_file
+        self.output_folder = output_folder
+        self.capacity = capacity
+    def split(self):
+        # create the output folder if it does not exist
+        if not os.path.exists(self.output_folder):
+            os.makedirs(self.output_folder)
+        with open('split_pipeline.json', 'w') as f:
+            f.write(
+                SPLIT_TEMPLATE.replace('input.las', 
+                                       self.input_file).replace('5000', str(self.capacity)).replace('output_#.las', 
+                                                                                                    self.output_folder + '/output_#.las'))
+        # run the pipeline
+        os.system('pdal pipeline split_pipeline.json')
+        # remove the pipeline file
+        os.remove('split_pipeline.json')
+    def transfer_extra_fields(self):
+        # get list of extra fields in the input file using laspy
+        input_file = os.path.join(self.input_file)
+        las = laspy.read(input_file)
+        # get pointcloud and put it into KDTree
+        point_cloud = np.vstack((las.x, las.y, las.z)).transpose()
+        tree = KDTree(point_cloud)
+        # get the list of output files
+        output_files = [os.path.join(self.output_folder, f) for f in os.listdir(self.output_folder) if f.endswith('.las')]
+        for output_file in output_files:
+            # read the output file
+            target = laspy.read(output_file)
+            target_xyz = target.xyz
+             # find the nearest neighbor for each point in target las file
+            ind = tree.query(target_xyz, k=1, return_distance=False)
+            selected_points_x = las.x[ind.ravel()]
+            selected_points_y = las.y[ind.ravel()]
+            selected_points_z = las.z[ind.ravel()]
+            selected_points = np.vstack([selected_points_x, selected_points_y, selected_points_z]).T
+            new_header = laspy.LasHeader(point_format=las.point_format.id, version=las.header.version)
+            # add extra dimensions to new las file
+            for item in LIST_OF_LAS_FIELDS:
+                new_header.add_extra_dim(laspy.ExtraBytesParams(name=item, type=np.int32))
+            new_las = laspy.LasData(new_header)
+            # copy x, y, z, gt_label and target_label from target las file to the new las file
+            new_las.x = selected_points_x
+            new_las.y = selected_points_y
+            new_las.z = selected_points_z
+            # copy contents of extra dimensions from target las file to the new las file
+            for item in new_header.point_format.dimension_names:
+                if item in LIST_OF_LAS_FIELDS:
+                    new_las[item] = las[item][ind.ravel()]
+            # write the new las file
+            new_las.write(output_file)
+filepath = '/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/test_split/input.las'
+output_folder = '/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/test_split/output'
+splitter = GeneralSplitter(filepath, output_folder)
+splitter.split()
+splitter.transfer_extra_fields()
+# if __name__ == '__main__':
+#     parser = argparse.ArgumentParser(description='Split a LAS file into multiple LAS files')
+#     parser.add_argument('--input_file', type=str, help='Input LAS file')
+#     parser.add_argument('--output_folder', type=str, help='Output folder')
+#     parser.add_argument('--capacity', type=int, default=5000, help='Capacity of each output LAS file')
+#     args = parser.parse_args()
+#     splitter = GeneralSplitter(args.input_file, args.output_folder, args.capacity)
+#     # splitter.split()
+#     splitter.transfer_extra_fields()
--- a/utils/las2text_mapper.py
+++ b/utils/las2text_mapper.py
+import glob
+import os
+import pandas as pd
+import numpy as np
+from tqdm import tqdm
+import laspy
+import argparse
+class Las2TextMapper:
+    """ Mapper class for las2text.py """
+    def __init__(self, data_dir, save_dir, verbose=False):
+        self.data_dir = data_dir
+        self.save_dir = save_dir
+        self.verbose = verbose
+        #
+    def read_single_las(self, filepath):
+        """read_single_las.
+        Args:
+            filepath: path to the main las file
+        Returns:
+            points: numpy array of points
+        """
+        las = laspy.read(filepath)
+        # get x, y, z
+        points = np.vstack((las.x, las.y, las.z)).transpose()
+        # get intensity
+        # points = np.hstack((points, las.intensity[..., None]))
+        # get rgb
+        points = np.hstack((points, las.red[..., None]))
+        points = np.hstack((points, las.green[..., None]))
+        points = np.hstack((points, las.blue[..., None]))
+        # get label
+        points = np.hstack((points, las.label[..., None]))
+        # get treeID
+        points = np.hstack((points, las.treeID[..., None]))
+        # put all together to pandas dataframe
+        points = pd.DataFrame(
+            points, 
+            columns=['x', 'y', 'z', 'red', 'green', 'blue', 'label', 'treeID']
+            )
+        return points
+    def process_folder(self):
+        """process_folder.
+        Args:
+            mode: train, test or validation
+        """
+        # read all las files in the folder data_dir using glob
+        list_of_files = glob.glob(self.data_dir + "/*.las", recursive=False)
+        # if self.save_dir does not exist, create it
+        if not os.path.exists(self.save_dir):
+            os.makedirs(self.save_dir)
+        # iterate over all files
+        for filepath in tqdm(list_of_files):
+            if self.verbose:
+                print("Processing file: ", filepath)
+            # read the las file
+            points = self.read_single_las(filepath)
+            # save the points as text file in self.save_dir
+            filepath = filepath.split("/")[-1].split(".")[0]
+            filepath = self.save_dir + "/" + filepath
+            # save the points
+            points.to_csv(filepath + ".txt", sep=',', index=False, header=True)
+if __name__ == "__main__":
+    # use argparse to get the data_dir and save_dir
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--data_dir', type=str, default='data/stpls3d/val', help='path to the data directory')
+    parser.add_argument('--save_dir', type=str, default='data/stpls3d/val', help='path to the save directory')
+    # get verbose
+    parser.add_argument('--verbose', action='store_true', help='verbose')
+    args = parser.parse_args()
+    # create the mapper
+    mapper = Las2TextMapper(args.data_dir, args.save_dir, args.verbose)
+    # process the folder
+    mapper.process_folder()