From cca432fa2e01d06d2cb668eb583b85027d03a822 Mon Sep 17 00:00:00 2001 From: Maciej Wielgosz <maciej.wielgosz@nibio.no> Date: Wed, 25 Jan 2023 10:54:59 +0100 Subject: [PATCH] update oracle run for multiple files reading --- run_oracle_wrapper.py | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/run_oracle_wrapper.py b/run_oracle_wrapper.py index 5c6a063..b2b7f8a 100644 --- a/run_oracle_wrapper.py +++ b/run_oracle_wrapper.py @@ -30,30 +30,43 @@ def run_oracle_wrapper(path_to_config_file): # get the bucket name bucket_name = 'bucket_lidar_data' - # get the object name - object_name = './geoslam/plot72_tile_-25_-25.las' + # folder name inside the bucket - # get the object - file = client.get_object(namespace, bucket_name, object_name) + input_folder_in_bucket = 'geoslam' - # write the object to a file - with open('plot_from_bucket.las', 'wb') as f: - for chunk in file.data.raw.stream(1024 * 1024, decode_content=False): - f.write(chunk) - - - # read the config file from config folder + # read the config file from config folder with open(path_to_config_file) as f: config_flow_params = yaml.load(f, Loader=yaml.FullLoader) + # copy all files from the bucket to the input folder + # get the list of objects in the bucket + objects = client.list_objects(namespace, bucket_name).data.objects + # create the input folder if it does not exist if not os.path.exists(config_flow_params['general']['input_folder']): os.mkdir(config_flow_params['general']['input_folder']) - # move the file to the input folder using shutil if it does not exist - if not os.path.exists(config_flow_params['general']['input_folder'] + '/plot_from_bucket.las'): - shutil.move('plot_from_bucket.las', config_flow_params['general']['input_folder']) + # download the files from the bucket to the input folder + for item in objects: + if item.name.split('/')[0] == input_folder_in_bucket: + if not (item.name.split('/')[1] == ''): + object_name = item.name.split('/')[1] + + print('Downloading the file ' + object_name + ' from the bucket ' + bucket_name) + path_to_object = os.path.join(input_folder_in_bucket, object_name) + # get the object + file = client.get_object(namespace, bucket_name, path_to_object) + + # write the object to a file + with open(object_name, 'wb') as f: + for chunk in file.data.raw.stream(1024 * 1024, decode_content=False): + f.write(chunk) + # check if the file already exists in the input folder and delete it if it does + if os.path.exists(config_flow_params['general']['input_folder'] + '/' + object_name): + os.remove(config_flow_params['general']['input_folder'] + '/' + object_name) + # move the file to the input folder and overwrite if it already exists + shutil.move(object_name, config_flow_params['general']['input_folder']) from run import main @@ -81,5 +94,6 @@ if __name__ == '__main__': args = parser.parse_args() # run the main function + print('Running the main function in run_oracle_wrapper.py') run_oracle_wrapper(args.path_to_config_file) -- GitLab