Skip to content
Snippets Groups Projects
Commit 9d7fb3af authored by Maciej Wielgosz's avatar Maciej Wielgosz
Browse files

updated flow for splitting and merging

parent 41e3da5f
No related branches found
No related tags found
No related merge requests found
import os
import laspy
import pandas as pd
class GeneralMerger(object):
def __init__(self, folder_path, file_name):
self.folder_path = folder_path
self.file_name = file_name
def merge(self):
# read files in the folder_path
files = [os.path.join(self.folder_path, f) for f in os.listdir(self.folder_path) if f.endswith('.txt')]
# create an empty list to hold dataframes
dfs = []
# get the header from the first file
header = pd.read_csv(files[0], sep=',', header=None).iloc[0]
# read each file and append to the list
for file in files:
dfs.append(pd.read_csv(file, sep=',', header=None))
# concatenate all dataframes into a single one
merged_df = pd.concat(dfs, ignore_index=True)
# add the header
merged_df.columns = header
# write the merged dataframe to a csv file
merged_df.to_csv(self.file_name, index=False)
# test the code
folder_path = '/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/test_split/output_text'
file_name = '/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/test_split/merged_output.txt'
merger = GeneralMerger(folder_path, file_name)
merger.merge()
import json
import os
import argparse
import laspy
import numpy as np
from sklearn.neighbors import KDTree
SPLIT_TEMPLATE = json.dumps({
"pipeline": [
"input.las",
{
"type": "filters.chipper",
"capacity": "5000"
},
{
"type": "writers.las",
"filename": "output_#.las"
}
]
})
LIST_OF_LAS_FIELDS = ['label', 'treeID']
class GeneralSplitter(object):
def __init__(self, input_file, output_folder, capacity=5000):
self.input_file = input_file
self.output_folder = output_folder
self.capacity = capacity
def split(self):
# create the output folder if it does not exist
if not os.path.exists(self.output_folder):
os.makedirs(self.output_folder)
with open('split_pipeline.json', 'w') as f:
f.write(
SPLIT_TEMPLATE.replace('input.las',
self.input_file).replace('5000', str(self.capacity)).replace('output_#.las',
self.output_folder + '/output_#.las'))
# run the pipeline
os.system('pdal pipeline split_pipeline.json')
# remove the pipeline file
os.remove('split_pipeline.json')
def transfer_extra_fields(self):
# get list of extra fields in the input file using laspy
input_file = os.path.join(self.input_file)
las = laspy.read(input_file)
# get pointcloud and put it into KDTree
point_cloud = np.vstack((las.x, las.y, las.z)).transpose()
tree = KDTree(point_cloud)
# get the list of output files
output_files = [os.path.join(self.output_folder, f) for f in os.listdir(self.output_folder) if f.endswith('.las')]
for output_file in output_files:
# read the output file
target = laspy.read(output_file)
target_xyz = target.xyz
# find the nearest neighbor for each point in target las file
ind = tree.query(target_xyz, k=1, return_distance=False)
selected_points_x = las.x[ind.ravel()]
selected_points_y = las.y[ind.ravel()]
selected_points_z = las.z[ind.ravel()]
selected_points = np.vstack([selected_points_x, selected_points_y, selected_points_z]).T
new_header = laspy.LasHeader(point_format=las.point_format.id, version=las.header.version)
# add extra dimensions to new las file
for item in LIST_OF_LAS_FIELDS:
new_header.add_extra_dim(laspy.ExtraBytesParams(name=item, type=np.int32))
new_las = laspy.LasData(new_header)
# copy x, y, z, gt_label and target_label from target las file to the new las file
new_las.x = selected_points_x
new_las.y = selected_points_y
new_las.z = selected_points_z
# copy contents of extra dimensions from target las file to the new las file
for item in new_header.point_format.dimension_names:
if item in LIST_OF_LAS_FIELDS:
new_las[item] = las[item][ind.ravel()]
# write the new las file
new_las.write(output_file)
filepath = '/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/test_split/input.las'
output_folder = '/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/test_split/output'
splitter = GeneralSplitter(filepath, output_folder)
splitter.split()
splitter.transfer_extra_fields()
# if __name__ == '__main__':
# parser = argparse.ArgumentParser(description='Split a LAS file into multiple LAS files')
# parser.add_argument('--input_file', type=str, help='Input LAS file')
# parser.add_argument('--output_folder', type=str, help='Output folder')
# parser.add_argument('--capacity', type=int, default=5000, help='Capacity of each output LAS file')
# args = parser.parse_args()
# splitter = GeneralSplitter(args.input_file, args.output_folder, args.capacity)
# # splitter.split()
# splitter.transfer_extra_fields()
import glob
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
import laspy
import argparse
class Las2TextMapper:
""" Mapper class for las2text.py """
def __init__(self, data_dir, save_dir, verbose=False):
self.data_dir = data_dir
self.save_dir = save_dir
self.verbose = verbose
#
def read_single_las(self, filepath):
"""read_single_las.
Args:
filepath: path to the main las file
Returns:
points: numpy array of points
"""
las = laspy.read(filepath)
# get x, y, z
points = np.vstack((las.x, las.y, las.z)).transpose()
# get intensity
# points = np.hstack((points, las.intensity[..., None]))
# get rgb
points = np.hstack((points, las.red[..., None]))
points = np.hstack((points, las.green[..., None]))
points = np.hstack((points, las.blue[..., None]))
# get label
points = np.hstack((points, las.label[..., None]))
# get treeID
points = np.hstack((points, las.treeID[..., None]))
# put all together to pandas dataframe
points = pd.DataFrame(
points,
columns=['x', 'y', 'z', 'red', 'green', 'blue', 'label', 'treeID']
)
return points
def process_folder(self):
"""process_folder.
Args:
mode: train, test or validation
"""
# read all las files in the folder data_dir using glob
list_of_files = glob.glob(self.data_dir + "/*.las", recursive=False)
# if self.save_dir does not exist, create it
if not os.path.exists(self.save_dir):
os.makedirs(self.save_dir)
# iterate over all files
for filepath in tqdm(list_of_files):
if self.verbose:
print("Processing file: ", filepath)
# read the las file
points = self.read_single_las(filepath)
# save the points as text file in self.save_dir
filepath = filepath.split("/")[-1].split(".")[0]
filepath = self.save_dir + "/" + filepath
# save the points
points.to_csv(filepath + ".txt", sep=',', index=False, header=True)
if __name__ == "__main__":
# use argparse to get the data_dir and save_dir
parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', type=str, default='data/stpls3d/val', help='path to the data directory')
parser.add_argument('--save_dir', type=str, default='data/stpls3d/val', help='path to the save directory')
# get verbose
parser.add_argument('--verbose', action='store_true', help='verbose')
args = parser.parse_args()
# create the mapper
mapper = Las2TextMapper(args.data_dir, args.save_dir, args.verbose)
# process the folder
mapper.process_folder()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment