Skip to content
Snippets Groups Projects
Commit f5ad3556 authored by Maciej Wielgosz's avatar Maciej Wielgosz
Browse files

spliting trees files for the model - unparallel done

parent 41a20aa6
No related branches found
No related tags found
No related merge requests found
......@@ -118,6 +118,7 @@ pip_egg_info/
*.las
*.png
*.h5
*.zip
# Deep learning
model.h5
......@@ -132,5 +133,8 @@ ShapeNet/
maciek_data
nibio_data
nibio_data_no_commas
maciek_data
nibio_data
nibio_data_las_single_file
nibio_data_las/
```
......@@ -51,7 +51,7 @@ config.initial_lr = 1e-3
config.lr_scheduler_step_size = 5
config.gamma = 0.8
config.epochs = 1
config.epochs = 40
transform = T.Compose([
......@@ -62,10 +62,17 @@ transform = T.Compose([
])
pre_transform = T.NormalizeScale()
dataset_path = "/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/maciek_data/plane_maciek"
dataset_path = "/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/nibio_data_txt_single_file"
# dataset_path = "/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/maciek_data/plane_maciek"
# dataset_path = "/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/nibio_data_no_commas"
train_val_dataset = MyData(dataset_path, split='trainval', transform=transform, pre_transform=pre_transform)
train_val_dataset = MyData(
dataset_path,
label_location=-2,
split='trainval',
transform=transform,
pre_transform=pre_transform
)
segmentation_class_frequency = {}
......
import argparse
import os
import pandas as pd
from tqdm import tqdm
class GeneralMerger(object):
def __init__(self, input_folder, output_folder):
self.input_folder = input_folder
self.output_folder = output_folder
def get_file_names(self):
# read files in the folder_path
files = [os.path.join(self.input_folder, f) for f in os.listdir(self.input_folder) if f.endswith('.txt')]
files_split = [os.path.basename(f).split('---')[0] for f in files]
files_set = list(set(files_split))
return files_set
def merge(self,
file_core_name=None, # this is the core name of the files to be merged
):
# read files in the folder_path
files = [os.path.join(self.input_folder, f)
for f in os.listdir(self.input_folder) if f.endswith('.txt') and os.path.basename(f).split('---')[0] == file_core_name]
# create an empty list to hold dataframes
dfs = []
# get the header from the first file
header = pd.read_csv(files[0], sep=',', header=0).iloc[0]
# read each file and append to the list
for file in files:
dfs.append(pd.read_csv(file, sep=',', header=0))
# concatenate all dataframes into a single one
merged_df = pd.concat(dfs, ignore_index=True)
# add the header
merged_df.columns = header
# write the merged dataframe to a csv file
path_to_save = os.path.join(self.output_folder, file_core_name + '_merged.txt')
merged_df.to_csv(path_to_save, index=False, sep=',')
return merged_df
def merge_all(self):
os.makedirs(self.output_folder, exist_ok=True)
files = self.get_file_names()
for file in tqdm(files):
self.merge(file_core_name=file)
# test the code
# input_folder = '/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/data_split_merge/output_text'
# output_foler = '/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/data_split_merge/merged_output'
# merger = GeneralMerger(input_folder, output_foler)
# # merger.merge()
# files = merger.get_file_names()
# merger.merge_all()
# print(files)
if __name__ == '__main__':
# parse the arguments
parser = argparse.ArgumentParser()
parser.add_argument('--input_folder', type=str, default='/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/data_split_merge/output_text')
parser.add_argument('--output_folder', type=str, default='/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/data_split_merge/merged_output')
args = parser.parse_args()
# create the merger object
merger = GeneralMerger(args.input_folder, args.output_folder)
# merge all the files
merger.merge_all()
import json
import os
import argparse
import laspy
import numpy as np
from pathlib import Path
from sklearn.neighbors import KDTree
from tqdm import tqdm
from joblib import Parallel, delayed
import subprocess
from distutils.dir_util import copy_tree
import pdal
SPLIT_TEMPLATE = json.dumps({
"pipeline": [
"input.las",
{
"type": "filters.chipper",
"capacity": "5000"
},
{
"type": "writers.las",
"filename": "output_#.las"
}
]
})
LIST_OF_LAS_FIELDS = ['label', 'treeID']
def split(file_path, output_folder, capacity=5000):
# create the output folder if it does not exist
if not os.path.exists(output_folder):
os.makedirs(output_folder)
# Generate unique filename
unique_filename = 'split_pipeline_' + str(os.getpid()) + '.json'
with open(unique_filename, 'w') as f:
f.write(
SPLIT_TEMPLATE
.replace('input.las', file_path)
.replace('5000', str(capacity))
.replace('output_#.las',
os.path.join(output_folder,
Path(file_path).stem + '---#.las'))
)
# run the pipeline
try:
subprocess.run(['pdal', 'pipeline', unique_filename], check=True)
except subprocess.CalledProcessError as e:
print(f'Command failed with error {e.returncode}. Output was:\n{e.output}')
# remove the pipeline file
os.remove(unique_filename)
def transfer_extra_fields(file_path, output_folder):
# get list of extra fields in the input file using laspy
input_file = os.path.join(file_path)
las = laspy.read(input_file)
# get pointcloud and put it into KDTree
point_cloud = np.vstack((las.x, las.y, las.z)).transpose()
tree = KDTree(point_cloud)
# get the list of output files
output_files = [os.path.join(output_folder, f) for f in os.listdir(output_folder) if f.endswith('.las')]
for output_file in output_files:
# read the output file
target = laspy.read(output_file)
target_xyz = target.xyz
# find the nearest neighbor for each point in target las file
ind = tree.query(target_xyz, k=1, return_distance=False)
selected_points_x = las.x[ind.ravel()]
selected_points_y = las.y[ind.ravel()]
selected_points_z = las.z[ind.ravel()]
new_header = laspy.LasHeader(point_format=las.point_format.id, version=las.header.version)
# add extra dimensions to new las file
for item in LIST_OF_LAS_FIELDS:
new_header.add_extra_dim(laspy.ExtraBytesParams(name=item, type=np.int32))
new_las = laspy.LasData(new_header)
# copy x, y, z, gt_label and target_label from target las file to the new las file
new_las.x = selected_points_x
new_las.y = selected_points_y
new_las.z = selected_points_z
# copy contents of extra dimensions from target las file to the new las file
for item in new_header.point_format.dimension_names:
if item in LIST_OF_LAS_FIELDS:
new_las[item] = las[item][ind.ravel()]
# write the new las file
new_las.write(output_file)
class GeneralSplitter(object):
def __init__(self, input_folder, output_folder, capacity=5000):
self.input_folder = input_folder
self.output_folder = output_folder
self.capacity = capacity
def process_file(self, file, capacity, output_folder):
# create the temporary output folder
temp_output_folder = 'temp_output_folder'
if not os.path.exists(temp_output_folder):
os.makedirs(temp_output_folder)
# split and transfer extra fields
split(file, temp_output_folder, capacity)
transfer_extra_fields(file, temp_output_folder)
# copy all the files from the temporary output folder to the output folder using shutil
copy_tree(temp_output_folder, output_folder)
# remove the temporary output folder
subprocess.run(['rm', '-r', temp_output_folder], check=True)
def split_and_transfer_in_folder(self):
# get list of files in the input folder
files = [os.path.join(self.input_folder, f) for f in os.listdir(self.input_folder) if f.endswith('.las')]
for file in tqdm(files):
self.process_file(file, self.capacity, self.output_folder)
# use parallel processing
# Parallel(n_jobs=4)(delayed(self.process_file)(file, self.capacity, self.output_folder) for file in tqdm(files))
# filepath = '/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/data_split_merge/input_folder'
# output_folder = '/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/data_split_merge/output'
# splitter = GeneralSplitter(filepath, output_folder)
# splitter.split_and_transfer_in_folder()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Split a LAS file into multiple LAS files')
parser.add_argument('--input_folder', type=str, help='Input LAS file')
parser.add_argument('--output_folder', type=str, help='Output folder')
parser.add_argument('--capacity', type=int, default=5000, help='Capacity of each output LAS file')
args = parser.parse_args()
splitter = GeneralSplitter(args.input_folder, args.output_folder, args.capacity)
splitter.split_and_transfer_in_folder()
\ No newline at end of file
import json
import glob
# Get a list of all .las files in the current directory
las_files = glob.glob('output_*.las')
# Create the pipeline
pipeline = {
"pipeline": las_files + [{
"type": "writers.las",
"filename": "merged_output.las"
}]
}
# Write the pipeline to a file
with open('merge_pipeline.json', 'w') as f:
json.dump(pipeline, f)
......@@ -12,7 +12,7 @@ backcall==0.2.0
Cerberus==1.3.4
certifi==2022.12.7
cffi==1.15.1
chamfer-distance==0.1
# chamfer-distance==0.1
charset-normalizer==3.0.1
circuitbreaker==1.4.0
click==8.1.3
......
import os
import laspy
import pandas as pd
class GeneralMerger(object):
def __init__(self, folder_path, file_name):
self.folder_path = folder_path
self.file_name = file_name
def merge(self):
# read files in the folder_path
files = [os.path.join(self.folder_path, f) for f in os.listdir(self.folder_path) if f.endswith('.txt')]
# create an empty list to hold dataframes
dfs = []
# get the header from the first file
header = pd.read_csv(files[0], sep=',', header=None).iloc[0]
# read each file and append to the list
for file in files:
dfs.append(pd.read_csv(file, sep=',', header=None))
# concatenate all dataframes into a single one
merged_df = pd.concat(dfs, ignore_index=True)
# add the header
merged_df.columns = header
# write the merged dataframe to a csv file
merged_df.to_csv(self.file_name, index=False)
# test the code
folder_path = '/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/test_split/output_text'
file_name = '/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/test_split/merged_output.txt'
merger = GeneralMerger(folder_path, file_name)
merger.merge()
import json
import os
import argparse
import laspy
import numpy as np
from sklearn.neighbors import KDTree
SPLIT_TEMPLATE = json.dumps({
"pipeline": [
"input.las",
{
"type": "filters.chipper",
"capacity": "5000"
},
{
"type": "writers.las",
"filename": "output_#.las"
}
]
})
LIST_OF_LAS_FIELDS = ['label', 'treeID']
class GeneralSplitter(object):
def __init__(self, input_file, output_folder, capacity=5000):
self.input_file = input_file
self.output_folder = output_folder
self.capacity = capacity
def split(self):
# create the output folder if it does not exist
if not os.path.exists(self.output_folder):
os.makedirs(self.output_folder)
with open('split_pipeline.json', 'w') as f:
f.write(
SPLIT_TEMPLATE.replace('input.las',
self.input_file).replace('5000', str(self.capacity)).replace('output_#.las',
self.output_folder + '/output_#.las'))
# run the pipeline
os.system('pdal pipeline split_pipeline.json')
# remove the pipeline file
os.remove('split_pipeline.json')
def transfer_extra_fields(self):
# get list of extra fields in the input file using laspy
input_file = os.path.join(self.input_file)
las = laspy.read(input_file)
# get pointcloud and put it into KDTree
point_cloud = np.vstack((las.x, las.y, las.z)).transpose()
tree = KDTree(point_cloud)
# get the list of output files
output_files = [os.path.join(self.output_folder, f) for f in os.listdir(self.output_folder) if f.endswith('.las')]
for output_file in output_files:
# read the output file
target = laspy.read(output_file)
target_xyz = target.xyz
# find the nearest neighbor for each point in target las file
ind = tree.query(target_xyz, k=1, return_distance=False)
selected_points_x = las.x[ind.ravel()]
selected_points_y = las.y[ind.ravel()]
selected_points_z = las.z[ind.ravel()]
selected_points = np.vstack([selected_points_x, selected_points_y, selected_points_z]).T
new_header = laspy.LasHeader(point_format=las.point_format.id, version=las.header.version)
# add extra dimensions to new las file
for item in LIST_OF_LAS_FIELDS:
new_header.add_extra_dim(laspy.ExtraBytesParams(name=item, type=np.int32))
new_las = laspy.LasData(new_header)
# copy x, y, z, gt_label and target_label from target las file to the new las file
new_las.x = selected_points_x
new_las.y = selected_points_y
new_las.z = selected_points_z
# copy contents of extra dimensions from target las file to the new las file
for item in new_header.point_format.dimension_names:
if item in LIST_OF_LAS_FIELDS:
new_las[item] = las[item][ind.ravel()]
# write the new las file
new_las.write(output_file)
filepath = '/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/test_split/input.las'
output_folder = '/home/nibio/mutable-outside-world/code/nibio_graph_sem_seg/test_split/output'
splitter = GeneralSplitter(filepath, output_folder)
splitter.split()
splitter.transfer_extra_fields()
# if __name__ == '__main__':
# parser = argparse.ArgumentParser(description='Split a LAS file into multiple LAS files')
# parser.add_argument('--input_file', type=str, help='Input LAS file')
# parser.add_argument('--output_folder', type=str, help='Output folder')
# parser.add_argument('--capacity', type=int, default=5000, help='Capacity of each output LAS file')
# args = parser.parse_args()
# splitter = GeneralSplitter(args.input_file, args.output_folder, args.capacity)
# # splitter.split()
# splitter.transfer_extra_fields()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment