chore: Code cleanup, improve logging, ensure dirs exist

8ac28ec9 · Lene Wasskog · ddefb975 · 8ac28ec9
Commit 8ac28ec9 authored 4 months ago by Lene Wasskog
--- a/SEPTREFHUM.py
+++ b/SEPTREFHUM.py
@@ -22,16 +22,20 @@
 # * GDAL >= v 3.4.3 built with Python support
 # * For Python: See requirements.txt
-import os, sys, subprocess,glob
+import os
-from dotenv import load_dotenv
+import sys
-from datetime import datetime, timezone, timedelta
+import subprocess
-from jinja2 import Environment, FileSystemLoader
+import glob
 import pytz
 import netCDF4 as nc
 import configparser
 import logging
 import shutil
+from dotenv import load_dotenv
+from datetime import datetime, timedelta
+from jinja2 import Environment, FileSystemLoader
+# Introduced to improve the logging of the script
 def run_command(command, stdout=subprocess.PIPE):
    logging.debug(f"{command}")
    try:
@@ -55,11 +59,17 @@ def run_command(command, stdout=subprocess.PIPE):
        logging.error(f"{e}")
 # Paths config
-# Create a .env file from dotenv-sample
 load_dotenv()
 # Get language stuff
 config = configparser.ConfigParser()
 config.read("SEPTREFHUM.cfg")
+# Logging config
+DEBUG = False if os.getenv("DEBUG") is None or os.getenv("DEBUG").lower() == "false" else True
+logging.getLogger().handlers.clear()
+logging.basicConfig(
+    level=logging.DEBUG if DEBUG else logging.INFO,
+    format="%(asctime)s - %(levelname).4s - (%(filename)s:%(lineno)d) - %(message)s",
+)
 # Paths to weather data
 model_id = os.getenv("MODEL_ID")
@@ -87,22 +97,16 @@ weather_data_path = infile_path
 if year < today.year:
    weather_data_path = f"{infile_archive_path}{year}/"
-# Path to store intermediary calculations
+# Path to store intermediary calculations and generated results
 tmpfile_path = f"tmp/{year}/"
+os.makedirs(tmpfile_path, exist_ok=True)
-# Paths to store generated result files
+result_tif_path = f"{os.getenv('DATA_DIR')}{year}/"
-outfile_path = f"{os.getenv('DATA_DIR')}{year}/"
+os.makedirs(result_tif_path, exist_ok=True)
-mapfile_outdir = f"{os.getenv('MAPFILE_DIR')}{year}/"
+result_mapfile_path = f"{os.getenv('MAPFILE_DIR')}{year}/"
+os.makedirs(result_mapfile_path, exist_ok=True)
 TEMPERATURE_THRESHOLD = 8.0
-DEBUG = False if os.getenv("DEBUG") is None or os.getenv("DEBUG").lower() == "false" else True
-logging.getLogger().handlers.clear()
-logging.basicConfig(
-    level=logging.DEBUG if DEBUG else logging.INFO,
-    format="%(asctime)s - %(levelname).4s - (%(filename)s:%(lineno)d) - %(message)s",
-)
 # Iterate the set of hourly weather data files
 # 1. When's the latest wh_[DATE].nc file? - set earliest weather data file: start_date = [DATE]-2 days
 # --> If no wh_DATE.nc file exists, set start_date = None
@@ -152,7 +156,7 @@ for file_path in sorted(weatherdata_files):
 # Concatenate daily files > one file with daily values
 # Additive calculation - this file can be stored between calculations
-subprocess.run(f'cdo -s -O mergetime {tmpfile_path}wh_2[0-9][0-9][0-9]-[01][0-9]-[0123][0-9].nc {tmpfile_path}wh_daysum.nc', shell=True)
+run_command(command=f'cdo -s -O mergetime {tmpfile_path}wh_2[0-9][0-9][0-9]-[01][0-9]-[0123][0-9].nc {tmpfile_path}wh_daysum.nc')
 # Check timesteps - that we have no missing data
 wh_daysum = nc.Dataset(f'{tmpfile_path}wh_daysum.nc', 'r')
@@ -163,7 +167,7 @@ for timestep in timesteps:
        if timestep - previous_timestep != 86400.0:
            timestep_str = datetime.fromtimestamp(timestep).astimezone(local_timezone).strftime("%Y-%m-%d")
            previous_timestep_str = datetime.fromtimestamp(previous_timestep).astimezone(local_timezone).strftime("%Y-%m-%d")
-            logging.info(f"ERROR: Missing weather data between {previous_timestep_str} and {timestep_str}. Exiting.", file=sys.stderr)
+            logging.error(f"Missing weather data between {previous_timestep_str} and {timestep_str}. Exiting.", file=sys.stderr)
            exit(1)
    previous_timestep = timestep
 wh_daysum.close()
@@ -173,11 +177,11 @@ wh_daysum.close()
 # Add sum of WH_DAYSUM[yesterday] + WH_DAYSUM[today] + WH_DAYSUM[tomorrow] into WHS[today]
 # timselsum skips every 3 steps when summing 3 timestemps, so we must
 # create three different files and then merge them
-subprocess.run(f'cdo -s timselsum,3,0 {tmpfile_path}wh_daysum.nc {tmpfile_path}wh_3daysum_tmp_0.nc', shell=True)
+run_command(command=f'cdo -s timselsum,3,0 {tmpfile_path}wh_daysum.nc {tmpfile_path}wh_3daysum_tmp_0.nc')
-subprocess.run(f'cdo -s timselsum,3,1 {tmpfile_path}wh_daysum.nc {tmpfile_path}wh_3daysum_tmp_1.nc', shell=True)
+run_command(command=f'cdo -s timselsum,3,1 {tmpfile_path}wh_daysum.nc {tmpfile_path}wh_3daysum_tmp_1.nc')
-subprocess.run(f'cdo -s timselsum,3,2 {tmpfile_path}wh_daysum.nc {tmpfile_path}wh_3daysum_tmp_2.nc', shell=True)
+run_command(command=f'cdo -s timselsum,3,2 {tmpfile_path}wh_daysum.nc {tmpfile_path}wh_3daysum_tmp_2.nc')
-subprocess.run(f'cdo -s -chname,WH_DAYSUM,WHS -mergetime {tmpfile_path}wh_3daysum_tmp_*.nc {tmpfile_path}wh_3daysum_tmp_merged.nc', shell=True)
+run_command(command=f'cdo -s -chname,WH_DAYSUM,WHS -mergetime {tmpfile_path}wh_3daysum_tmp_*.nc {tmpfile_path}wh_3daysum_tmp_merged.nc')
 # the last timesteps are most likely wrong, due to lack of "tomorrows" when performing timselsum
 # To remove the last ones:
@@ -208,23 +212,23 @@ for time_bnd in time_bnds:
        number_of_timesteps_to_remove = number_of_timesteps_to_remove + 1
 wh_3daysum.close()
 number_of_timesteps_to_keep = len(time_bnds) - number_of_timesteps_to_remove
-subprocess.run(f'cdo -s -seltimestep,1/{number_of_timesteps_to_keep} {tmpfile_path}wh_3daysum_tmp_merged.nc {tmpfile_path}wh_3daysum.nc', shell=True)
+run_command(command=f'cdo -s -seltimestep,1/{number_of_timesteps_to_keep} {tmpfile_path}wh_3daysum_tmp_merged.nc {tmpfile_path}wh_3daysum.nc')
 # Classifying warning status for the WHS model
 # 0  =<  WHS <  20      --> Green
 # 20 <= WHS < 40       --> Yellow
 # 40 <= WHS            --> Red
-subprocess.run(f'cdo -s -aexpr,"WARNING_STATUS = WHS < 0 ? 0 : -1; WARNING_STATUS = WHS < 20 && WARNING_STATUS == -1 ? 2 : WARNING_STATUS; WARNING_STATUS = WHS < 40 && WARNING_STATUS == -1 ? 3 : WARNING_STATUS; WARNING_STATUS = WHS >= 40 && WARNING_STATUS == -1 ? 4 : WARNING_STATUS" {tmpfile_path}wh_3daysum.nc {tmpfile_path}result_unmasked.nc', shell=True)
+run_command(command=f'cdo -s -aexpr,"WARNING_STATUS = WHS < 0 ? 0 : -1; WARNING_STATUS = WHS < 20 && WARNING_STATUS == -1 ? 2 : WARNING_STATUS; WARNING_STATUS = WHS < 40 && WARNING_STATUS == -1 ? 3 : WARNING_STATUS; WARNING_STATUS = WHS >= 40 && WARNING_STATUS == -1 ? 4 : WARNING_STATUS" {tmpfile_path}wh_3daysum.nc {tmpfile_path}result_unmasked.nc')
-subprocess.run(f"rm {tmpfile_path}result.nc", shell=True)
+run_command(command=f"rm {tmpfile_path}result.nc")
 # Mask results using a CSV file with polygons
 # Env variable MASK_FILE must be set
 if os.getenv("MASK_FILE") is not None:
    mask_file = os.getenv("MASK_FILE")
    logging.info(f"Applying mask file {mask_file} to result.nc")
-    subprocess.run(f'cdo -P 6 -maskregion,{mask_file} {tmpfile_path}result_unmasked.nc {tmpfile_path}result.nc', shell=True)
+    run_command(command=f'cdo -P 6 -maskregion,{mask_file} {tmpfile_path}result_unmasked.nc {tmpfile_path}result.nc')
 else:
    os.rename(f"{tmpfile_path}result_unmasked.nc", f"{tmpfile_path}result.nc")
 #"""
@@ -243,19 +247,19 @@ for timestep in timesteps:
    timestep_dates.append(file_date)
    # Create NetCDF result file
-    subprocess.run(f'cdo -s -seltimestep,{timestep_index}/{timestep_index} {tmpfile_path}result.nc {tmpfile_path}result_{file_date}.nc', shell=True)
+    run_command(command=f'cdo -s -seltimestep,{timestep_index}/{timestep_index} {tmpfile_path}result.nc {tmpfile_path}result_{file_date}.nc')
    # Convert to GeoTIFF
    # We only need WHS and WARNING_STATUS
    # Merge the WARNING_STATUS and WHS variables into one GeoTIFF file with two bands. 
    # The WARNING_STATUS should always be band #1
    # We must delete the GeoTIFF file before merging
-    subprocess.run(f'rm {outfile_path}result_*{file_date}*.tif', shell=True)
+    run_command(command=f'rm {result_tif_path}result_*{file_date}*.tif')
    with open("/dev/null", "w") as devnull:
-        subprocess.run(f'gdal_translate -ot Int16 -of GTiff  NETCDF:"{tmpfile_path}result_{file_date}.nc":WARNING_STATUS {tmpfile_path}result_WARNING_STATUS_{file_date}_lcc.tif', shell=True, stdout=devnull)
+        run_command(command=f'gdal_translate -ot Int16 -of GTiff  NETCDF:"{tmpfile_path}result_{file_date}.nc":WARNING_STATUS {tmpfile_path}result_WARNING_STATUS_{file_date}_lcc.tif', stdout=devnull)
-        subprocess.run(f'gdal_translate -ot Float32 -of GTiff  NETCDF:"{tmpfile_path}result_{file_date}.nc":WHS {tmpfile_path}result_{file_date}_lcc.tif', shell=True, stdout=devnull)
+        run_command(command=f'gdal_translate -ot Float32 -of GTiff  NETCDF:"{tmpfile_path}result_{file_date}.nc":WHS {tmpfile_path}result_{file_date}_lcc.tif', stdout=devnull)
        # Need to reproject the files, to ensure we have the projection given in the generted mapfile. We always use EPSG:4326 for this
-        subprocess.run(f'gdalwarp -t_srs EPSG:4326 {tmpfile_path}result_WARNING_STATUS_{file_date}_lcc.tif {outfile_path}result_WARNING_STATUS_{file_date}.tif', shell=True, stdout=devnull)
+        run_command(command=f'gdalwarp -t_srs EPSG:4326 {tmpfile_path}result_WARNING_STATUS_{file_date}_lcc.tif {result_tif_path}result_WARNING_STATUS_{file_date}.tif', stdout=devnull)
-        subprocess.run(f'gdalwarp -t_srs EPSG:4326 {tmpfile_path}result_{file_date}_lcc.tif {outfile_path}result_{file_date}.tif', shell=True, stdout=devnull)
+        run_command(command=f'gdalwarp -t_srs EPSG:4326 {tmpfile_path}result_{file_date}_lcc.tif {result_tif_path}result_{file_date}.tif', stdout=devnull)
    timestep_index = timestep_index + 1
@@ -285,15 +289,16 @@ output = template.render({
     "language_codes": language_codes
 })
-with open(f"{mapfile_outdir}{model_id}.map", 'w') as f:
+with open(f"{result_mapfile_path}{model_id}.map", 'w') as f:
        f.write(output)
+# Copy query templates to outdir
 query_template = os.path.join(home_dir, "mapfile/query_template.xml")
 query_template_whs = os.path.join(home_dir, "mapfile/query_template_WHS.xml")
-shutil.copy(query_template, mapfile_outdir)
+shutil.copy(query_template, result_mapfile_path)
-shutil.copy(query_template_whs, mapfile_outdir)
+shutil.copy(query_template_whs, result_mapfile_path)
 # Remove all temporary/intermediary files
-subprocess.run(f"rm {tmpfile_path}wh_3daysum*.nc", shell=True)
+run_command(command=f"rm {tmpfile_path}wh_3daysum*.nc")
-subprocess.run(f"rm {tmpfile_path}result_*.nc", shell=True)
+run_command(command=f"rm {tmpfile_path}result_*.nc")
-subprocess.run(f"rm {tmpfile_path}result*.tif", shell=True)
+run_command(command=f"rm {tmpfile_path}result*.tif")