From 0e07f060b9ae00c82cd85c8e3336c6e99c2718bf Mon Sep 17 00:00:00 2001
From: Lene Wasskog <lene.wasskog@nibio.no>
Date: Wed, 26 Feb 2025 14:23:22 +0100
Subject: [PATCH] feat: Add year as param with current as default, configure
 logging

---
 README.md         | 18 +++++++++++-------
 SEPTREFHUM.py     | 38 ++++++++++++++++++++++++--------------
 env-sample        | 29 +++++++++++++++--------------
 run_SEPTREFHUM.sh | 28 +++++++++++++++++++++++++---
 4 files changed, 75 insertions(+), 38 deletions(-)

diff --git a/README.md b/README.md
index bcb898b..12a8b58 100644
--- a/README.md
+++ b/README.md
@@ -56,15 +56,17 @@ It is required that you have set the following environment variables:
 
 ```bash
 # This is used to auto generate some variables and file names
-MODEL_ID="SEPTREFHUM"
+MODEL_ID="SEPTREFHUM_EU"
 # Where your application resides
-HOME_DIR=/home/foo/2023_vips_in_space/
+HOME_DIR=/disks/data01/gridmodels/SEPTREFHUM_EU/
 # Path to the weather data
 WEATHER_DATA_DIR=in/
+# Path to the archived weather data for previous years
+ARCHIVE_WEATHER_DATA_DIR=in/
 # Used for iterating the weather data files
-FILENAME_PATTERN="met_1_0km_nordic-*.nc"
+FILENAME_PATTERN="daily_archive_*.nc"
 # Used to extract date info from the filename
-FILENAME_DATEFORMAT="met_1_0km_nordic-%Y-%m-%d.nc"
+FILENAME_DATEFORMAT="daily_archive_%Y%m%d.nc"
 # Names of weather parameters in NetCDF files
 # Hourly precipitation
 RR="hourly_precipitation"
@@ -73,9 +75,9 @@ UM="relative_humidity_2m"
 # Relative humidity (2m)
 TM="air_temperature_2m"
 # Timezone for weather data/daily aggregations
-LOCAL_TIMEZONE="Europe/Oslo"
+LOCAL_TIMEZONE="CET"
 # Path to optional CSV file with polygons for masking result. 
-MASK_FILE=Norge_landomrader.csv
+MASK_FILE=europe_coastline.csv
 # Path to the output (GeoTIFF) files as seen from the running model code
 DATA_DIR=out/
 # Path to the generated mapfile as seen from the running model code
@@ -89,7 +91,9 @@ MAPSERVER_LOG_FILE=/foo/mapserver/log/SEPTREFHUM.log
 # Path to the temporary directory for writing temporary files and images. Must be writable by the user the web server is running as
 MAPSERVER_IMAGE_PATH=/foo/mapserver/tmp/
 # The value of the EXTENT parameter in Mapserver's mapfile. Units are DD (Decimal degrees)
-MAPSERVER_EXTENT="-1.5831861262936526 52.4465003983706595 39.2608060398730458 71.7683216082912736"
+MAPSERVER_EXTENT="-23.5 29.5 62.5 70.5"
+# Whether or not to debug log. False if not given.
+DEBUG=False
 ```
 
 ...this is the contents of the `env-sample` file
diff --git a/SEPTREFHUM.py b/SEPTREFHUM.py
index cad4c06..7bd9083 100755
--- a/SEPTREFHUM.py
+++ b/SEPTREFHUM.py
@@ -22,7 +22,6 @@
 # * GDAL >= v 3.4.3 built with Python support
 # * For Python: See requirements.txt
 
-
 import os, sys, subprocess,glob
 from dotenv import load_dotenv
 from datetime import datetime, timezone, timedelta
@@ -30,6 +29,7 @@ from jinja2 import Environment, FileSystemLoader
 import pytz
 import netCDF4 as nc
 import configparser
+import logging
 
 # Paths config
 # Create a .env file from dotenv-sample
@@ -38,27 +38,38 @@ load_dotenv()
 config = configparser.ConfigParser()
 config.read("SEPTREFHUM.cfg")
 
-# Path to weather data
+# Paths to weather data
 model_id = os.getenv("MODEL_ID")
 infile_path  = os.getenv("WEATHER_DATA_DIR")
+infile_archive_path  = os.getenv("ARCHIVE_WEATHER_DATA_DIR")
 # Used for iterating the weather data files
 filename_pattern = os.getenv("FILENAME_PATTERN")
 # Date format of weather data filenames
 filename_dateformat = os.getenv("FILENAME_DATEFORMAT")
 # Path to store generated GeoTIFF files
 outfile_path = os.getenv("DATA_DIR")
-# Where to store intermediary calculations
-tmpfile_path = "tmp/"
 # Names of weather parameters in NetCDF files
 RR = os.getenv("RR")
 UM = os.getenv("UM")
 TM = os.getenv("TM")
+
 local_timezone = pytz.timezone(os.getenv("LOCAL_TIMEZONE"))
+today = datetime.now(local_timezone)
+if len(sys.argv) > 1:
+    year = int(sys.argv[1])
+else:
+    year = today.year
+
+# Where to store intermediary calculations
+tmpfile_path = f"tmp/{year}/"
 
 TEMPERATURE_THRESHOLD = 8.0
 
 DEBUG = False if os.getenv("DEBUG") is None or os.getenv("DEBUG").lower() == "false" else True
-
+logging.basicConfig(
+    level=logging.DEBUG if DEBUG else logging.INFO,
+    format="%(asctime)s - %(levelname).4s - (%(filename)s:%(lineno)d) - %(message)s",
+)
 
 # Iterate the set of hourly weather data files
 # 1. When's the latest wh_[DATE].nc file? - set earliest weather data file: start_date = [DATE]-2 days
@@ -72,14 +83,13 @@ for wh_file in glob.glob(f"{tmpfile_path}wh_2[0-9][0-9][0-9]-[01][0-9]-[0123][0-
        last_wh_date = current_wh_file_date
 if last_wh_date is not None:
     start_date = last_wh_date - timedelta(days=2)
-    print(f"Last date of WH calculations is {last_wh_date}. Start date = {start_date}")
-
+    logging.info(f"Last date of WH calculations is {last_wh_date}. Start date = {start_date}")
 
 weatherdata_files = glob.glob(f"{infile_path}{filename_pattern}")
 if DEBUG:
-    print(f"{infile_path}{filename_pattern}")
-    print("What are the weatherdata files?")
-    print(weatherdata_files)
+    logging.info(f"{infile_path}{filename_pattern}")
+    logging.info("What are the weatherdata files?")
+    logging.info(weatherdata_files)
 for file_path in sorted(weatherdata_files):
     # TODO: When filename/pattern is configurable: make the string search adaptable
     file_name = os.path.basename(file_path)
@@ -87,7 +97,7 @@ for file_path in sorted(weatherdata_files):
     try:
         wh_sum_date = local_timezone.localize(datetime.strptime(file_name, filename_dateformat))
     except ValueError as e:
-        print(e)
+        logging.info(e)
         continue
 
     # Only process files from the three last days (if this is not a work from scratch)
@@ -98,7 +108,7 @@ for file_path in sorted(weatherdata_files):
     with nc.Dataset(file_path, 'r') as weatherdata_file:
         file_timesteps = len(weatherdata_file.variables["time"])
         if  file_timesteps < 23:
-            print(f"{file_path} has {file_timesteps} timesteps. Skipping it.")
+            logging.info(f"{file_path} has {file_timesteps} timesteps. Skipping it.")
             continue
 
     # Produce daily files with WH_SUM, which is the number of "Wet hours" (WH) for a given day
@@ -123,7 +133,7 @@ for timestep in timesteps:
         if timestep - previous_timestep != 86400.0:
             timestep_str = datetime.fromtimestamp(timestep).astimezone(local_timezone).strftime("%Y-%m-%d")
             previous_timestep_str = datetime.fromtimestamp(previous_timestep).astimezone(local_timezone).strftime("%Y-%m-%d")
-            print(f"ERROR: Missing weather data between {previous_timestep_str} and {timestep_str}. Exiting.", file=sys.stderr)
+            logging.info(f"ERROR: Missing weather data between {previous_timestep_str} and {timestep_str}. Exiting.", file=sys.stderr)
             exit(1)
     previous_timestep = timestep
 wh_daysum.close()
@@ -183,7 +193,7 @@ subprocess.run(f"rm {tmpfile_path}result.nc", shell=True)
 # Env variable MASK_FILE must be set
 if os.getenv("MASK_FILE") is not None:
     mask_file = os.getenv("MASK_FILE")
-    print(f"Applying mask file {mask_file} to result.nc")
+    logging.info(f"Applying mask file {mask_file} to result.nc")
     subprocess.run(f'cdo -P 6 -maskregion,{mask_file} {tmpfile_path}result_unmasked.nc {tmpfile_path}result.nc', shell=True)
 else:
     os.rename(f"{tmpfile_path}result_unmasked.nc", f"{tmpfile_path}result.nc")
diff --git a/env-sample b/env-sample
index 792be08..8303ca3 100644
--- a/env-sample
+++ b/env-sample
@@ -1,26 +1,28 @@
 # Use this as an example to create your own .env file
 
 # This is used to auto generate some variables and file names
-MODEL_ID="SEPTREFHUM"
+MODEL_ID="SEPTREFHUM_EU"
 # Where your application resides
-HOME_DIR=/home/foo/2023_vips_in_space/SEPTREFHUM/
+HOME_DIR=/disks/data01/gridmodels/SEPTREFHUM_EU/
 # Path to the weather data
 WEATHER_DATA_DIR=in/
+# Path to the archived weather data for previous years
+ARCHIVE_WEATHER_DATA_DIR=in/
 # Used for iterating the weather data files
-FILENAME_PATTERN="met_1_0km_nordic-*.nc"
+FILENAME_PATTERN="daily_archive_*.nc"
 # Used to extract date info from the filename
-FILENAME_DATEFORMAT="met_1_0km_nordic-%Y-%m-%d.nc"
+FILENAME_DATEFORMAT="daily_archive_%Y%m%d.nc"
 # Names of weather parameters in NetCDF files
 # Hourly precipitation
-RR="RR"
+RR="hourly_precipitation"
 # Relative humidity (2m)
-UM="UM"
-# Mean temperature (2m)
-TM="TM"
+UM="relative_humidity_2m"
+# Relative humidity (2m)
+TM="air_temperature_2m"
 # Timezone for weather data/daily aggregations
-LOCAL_TIMEZONE="Europe/Oslo"
-# Path to optional CSV file with polygons for masking result. 
-# MASK_FILE=Norge_landomrader.csv
+LOCAL_TIMEZONE="CET"
+# Path to optional CSV file with polygons for masking result.
+MASK_FILE=europe_coastline.csv
 # Path to the output (GeoTIFF) files as seen from the running model code
 DATA_DIR=out/
 # Path to the generated mapfile as seen from the running model code
@@ -34,7 +36,6 @@ MAPSERVER_LOG_FILE=/foo/mapserver/log/SEPTREFHUM.log
 # Path to the temporary directory for writing temporary files and images. Must be writable by the user the web server is running as
 MAPSERVER_IMAGE_PATH=/foo/mapserver/tmp/
 # The value of the EXTENT parameter in Mapserver's mapfile. Units are DD (Decimal degrees)
-MAPSERVER_EXTENT="-1.5831861262936526 52.4465003983706595 39.2608060398730458 71.7683216082912736"
-
+MAPSERVER_EXTENT="-23.5 29.5 62.5 70.5"
 # Default value is false
-#DEBUG=True
\ No newline at end of file
+DEBUG=False
diff --git a/run_SEPTREFHUM.sh b/run_SEPTREFHUM.sh
index fe0e4db..9be44c2 100755
--- a/run_SEPTREFHUM.sh
+++ b/run_SEPTREFHUM.sh
@@ -21,6 +21,24 @@
 # Defines HOME_DIR
 source .env
 
+validate_year() {
+    if [[ $1 =~ ^[0-9]{4}$ ]]; then
+        return 0
+    else
+        return 1
+    fi
+}
+
+# Check if the year parameter is passed and validate it
+if [ -n "$1" ]; then
+    if validate_year "$1"; then
+        year=$1
+    else
+        echo "Invalid year: $1. Please provide a valid 4-digit year."
+        exit 1
+    fi
+fi
+
 # Check for HOME_DIR
 if [ -z "${HOME_DIR}" ]
 then
@@ -69,9 +87,13 @@ else
 fi
 
 # Run the model
-echo "==== `date`: Running model" &>> "$LOG_FILE"
-python3 ${HOME_DIR}SEPTREFHUM.py &>> "$LOG_FILE"
-echo "==== `date`: DONE running model" &>> "$LOG_FILE"
+if [ -z "${year}" ]; then
+    echo "==== $(date): Running model for current year" >> "$LOG_FILE" 2>&1
+    python3 ${HOME_DIR}SEPTREFHUM.py >> "$LOG_FILE" 2>&1
+else
+    echo "==== $(date): Running model for ${year}" >> "$LOG_FILE" 2>&1
+    python3 ${HOME_DIR}SEPTREFHUM.py "$year" >> "$LOG_FILE" 2>&1
+fi
 
 # Deactivate the virtual environment
 conda deactivate
-- 
GitLab