From 6035a8d07d5388ab5cd4308632ddcbd11b1a26f2 Mon Sep 17 00:00:00 2001
From: lewa <lene.wasskog@nibio.no>
Date: Sat, 20 Apr 2024 14:35:37 +0200
Subject: [PATCH] feat: Use year file, remove previous results before
 calculating new, simplify

---
 .gitignore         |   4 +-
 ADASMELIAE.py      | 240 +++++++++++++++++----------------------------
 run_ADASMELIAE.sh  |   4 +-
 test_ADASMELIAE.py |  77 ++++-----------
 4 files changed, 117 insertions(+), 208 deletions(-)

diff --git a/.gitignore b/.gitignore
index edf7d24..3025c3d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,4 +8,6 @@ in
 out
 log
 tmp
-.pytest_cache
\ No newline at end of file
+.pytest_cache
+run_cleanup.sh
+.idea
\ No newline at end of file
diff --git a/ADASMELIAE.py b/ADASMELIAE.py
index 0b5d9e0..05f7dcc 100755
--- a/ADASMELIAE.py
+++ b/ADASMELIAE.py
@@ -22,7 +22,6 @@
 # * GDAL >= v 3.4.3 built with Python support
 # * For Python: See requirements.txt
 
-
 import os
 import time
 import subprocess
@@ -32,12 +31,13 @@ from datetime import datetime, timedelta
 from jinja2 import Environment, FileSystemLoader
 import logging
 import pytz
-import netCDF4 as nc
 import configparser
 import sys
 
 load_dotenv()
 
+# TODO Should start before the first European crops reach growth stage 51 and end when the northernmost regions have passed stage 59
+
 DEBUG = (
     False
     if os.getenv("DEBUG") is None or os.getenv("DEBUG").lower() == "false"
@@ -45,26 +45,18 @@ DEBUG = (
 )
 
 model_id = os.getenv("MODEL_ID")
-filename_pattern = os.getenv("WEATHER_DATA_FILENAME_PATTERN")
-filename_dateformat = os.getenv("WEATHER_DATA_FILENAME_DATEFORMAT")
-
-# TODO Should start before the first European crops reach growth stage 51 and end when the northernmost regions have passed stage 59
-current_year = datetime.now().year
-local_timezone = pytz.timezone(os.getenv("LOCAL_TIMEZONE"))
-
-MODEL_START_DATE = datetime.strptime(
-    f"{current_year}-{os.getenv('START_DATE')}", "%Y-%m-%d"
-).date()
-MODEL_END_DATE = datetime.strptime(
-    f"{current_year}-{os.getenv('END_DATE')}", "%Y-%m-%d"
-).date()
-
 home_dir = os.getenv("HOME_DIR")
 weather_data_dir = os.getenv("WEATHER_DATA_DIR")
+result_tif_dir = os.getenv("RESULT_TIF_DIR")
+result_mapfile_dir = os.getenv("RESULT_MAPFILE_DIR")
 tmp_dir = os.getenv("TMP_DIR")
-data_dir = os.getenv("DATA_DIR")
-template_dir = f"{os.getenv('HOME_DIR')}mapfile/"
-mapfile_outdir = os.getenv("MAPFILE_DIR")
+template_dir = f"{home_dir}mapfile/"
+
+weather_data_filename_pattern = os.getenv("WEATHER_DATA_FILENAME_DATEFORMAT")
+local_timezone = pytz.timezone(os.getenv("LOCAL_TIMEZONE"))
+start_MM_DD = os.getenv("START_DATE")
+end_MM_DD = os.getenv("END_DATE")
+
 
 config_file_path = f"{os.getenv('HOME_DIR')}{model_id}.cfg"
 mask_file_path = (
@@ -83,8 +75,7 @@ logging.basicConfig(
     format="%(asctime)s - %(levelname).4s - (%(filename)s:%(lineno)d) - %(message)s",
 )
 
-TM = "air_temperature_2m"
-TM_MAX = "TM_MAX"
+tm_max = "air_temperature_2m_max"
 THRESHOLD = 15
 
 
@@ -96,7 +87,7 @@ def run_command(command, shell=True, stdout=subprocess.PIPE):
             command,
             shell=shell,
             stdout=stdout,
-            stderr=subprocess.PIPE,
+            stderr=stdout,
             text=True,
             check=True,
         )
@@ -109,154 +100,111 @@ def run_command(command, shell=True, stdout=subprocess.PIPE):
             for line in result.stderr.splitlines():
                 logging.error(line.strip())
     except subprocess.CalledProcessError as e:
-        logging.error(f"Command failed: '{command}'")
         logging.error(f"{e}")
         quit()
 
 
-# Iterate the set of previously calculated result files. Find latest result date,
-# and return result date + 1 day. If no files exist, return MODEL_START_DATE.
-def find_start_date(model_start_date):
-    result_file_names = os.listdir(data_dir)
-    result_file_names = [
-        file for file in result_file_names if file.startswith("result_2")
-    ]
-    try:
-        dates = [
-            datetime.strptime(file.split("_")[1].split(".")[0], "%Y-%m-%d").date()
-            for file in result_file_names
-        ]
-        logging.debug(
-            f"Found results for the following dates: {sorted([date.strftime('%Y-%m-%d') for date in dates])}"
-        )
-    except ValueError as e:
-        logging.error(f"Error parsing dates: {e}")
-        return MODEL_START_DATE
-    latest_result_date = max(dates, default=None)
-
-    # Quit if latest result date is today (script was already run today)
-    if latest_result_date and latest_result_date == datetime.now().date():
-        logging.error("Script already run for today. Quit.")
-        sys.exit()
+# Remove all temporary/intermediary files
+def remove_temporary_files():
+    if glob.glob(f"{tmp_dir}*.nc"):
+        run_command(command=f"rm {tmp_dir}*.nc")
+    if glob.glob(f"{tmp_dir}*.tif"):
+        run_command(command=f"rm {tmp_dir}*.tif")
 
-    return (
-        latest_result_date + timedelta(days=1)
-        if latest_result_date
-        else model_start_date
-    )
+
+# Remove previously calculated results
+def remove_old_results():
+    if glob.glob(f"{result_tif_dir}*.tif"):
+        run_command(command=f"rm {result_tif_dir}*.tif")
+    if glob.glob(f"rm {result_mapfile_dir}*.map"):
+        run_command(command=f"rm {result_mapfile_dir}*.map")
 
 
-def find_end_date(model_end_date):
+if __name__ == "__main__":
+    start_time = time.time()  # For logging execution time
     today = datetime.now().date()
-    return today if today < model_end_date else model_end_date
 
+    remove_old_results()
 
-if __name__ == "__main__":
-    logging.info(f"Start model {model_id}")
-    start_time = time.time()
+    year = today.year
+    model_start_date = datetime.strptime(f"{year}-{start_MM_DD}", "%Y-%m-%d").date()
+    model_end_date = datetime.strptime(f"{year}-{end_MM_DD}", "%Y-%m-%d").date()
 
-    start_date = find_start_date(MODEL_START_DATE)
-    end_date = find_end_date(MODEL_END_DATE)
+    start_date = model_start_date
+    end_date = today + timedelta(days=2)
+
+    if start_date > end_date:
+        logging.error("Model period not started. Quit.")
+        sys.exit()
 
     logging.info(
-        f"Start running model for start date {start_date} and end date {end_date}"
+        f"Start running model {model_id} for start date {start_date} and end date {end_date}"
     )
 
-    weather_data_file_pattern = f"{weather_data_dir}{filename_pattern}"
-    weather_data_files = glob.glob(weather_data_file_pattern)
-    logging.debug(f"Find weather data files: {weather_data_file_pattern}")
-    logging.debug([os.path.basename(path) for path in weather_data_files])
-
-    timestep_dates = []
-    for weather_data_file_path in sorted(weather_data_files):
-        weather_data_file_name = os.path.basename(weather_data_file_path)
-
-        # Skip if we don't have a valid date
-        try:
-            file_date = datetime.strptime(
-                weather_data_file_name, filename_dateformat
-            ).date()
-        except ValueError as e:
-            logging.info(f"{weather_data_file_name} - Skip file due to invalid date")
-            logging.debug(e)
-            continue
-
-        # Set up file names based on date YYYY-MM-DD
-        date_YYYY_MM_DD = file_date.strftime("%Y-%m-%d")
-        max_temp_file_path = f"{tmp_dir}max_temp_{date_YYYY_MM_DD}.nc"
-        result_unmasked_path = f"{tmp_dir}result_unmasked_{date_YYYY_MM_DD}.nc"
-        result_path = f"{tmp_dir}result_{date_YYYY_MM_DD}.nc"
-
-        # Only process files from within valid interval
-        if file_date < start_date or file_date > end_date:
-            logging.debug(
-                f"{weather_data_file_name} - Skip file with date outside calculation period"
-            )
-            continue
-
-        # Check that the file has at least 23 timesteps
-        with nc.Dataset(weather_data_file_path, "r") as weatherdata_file:
-            timestep_count = len(weatherdata_file.variables["time"])
-            if timestep_count < 23:
-                logging.info(
-                    f"{weather_data_file_name} - Skip file with {timestep_count} timesteps"
-                )
-                continue
-
-        # Skip if result files for date already exist
-        if os.path.isfile(
-            f"{data_dir}result_WARNING_STATUS_{date_YYYY_MM_DD}.tif"
-        ) and os.path.isfile(f"{data_dir}result_{date_YYYY_MM_DD}.tif"):
-            logging.info(f"Result files for {date_YYYY_MM_DD} already exist, skip")
-            continue
-
-        # Produce daily files with MAX_TEMP, which is the highest recorded temperature within the given day
-        run_command(
-            command=f"cdo -s -O -L -settime,00:00:00 -setdate,{date_YYYY_MM_DD} -chname,{TM},{TM_MAX} -timmax -selvar,{TM} {weather_data_file_path} {max_temp_file_path}"
-        )
-
-        # Classifying warning status for the model
-        # temperature < 15     --> 2 (Yellow)
-        # temperature >= 15    --> 4 (Red)
-        run_command(
-            command=f'cdo -s -aexpr,"WARNING_STATUS = {TM_MAX} < {THRESHOLD} ? 2 : 4" {max_temp_file_path} {result_unmasked_path}',
-        )
-
-        # Mask results using a CSV file with polygons. Env variable MASK_FILE must be set, or else we use the file as it is.
-        if mask_file_path is not None:
-            logging.info(f"Applying mask file {mask_file_path} to result file")
-            run_command(
-                command=f"cdo -maskregion,{mask_file_path} {result_unmasked_path} {result_path}",
-            )
-        else:
-            os.rename(result_unmasked_path, result_path)
+    weather_data_file = weather_data_dir + today.strftime(weather_data_filename_pattern)
+    max_temp_in_period_file = f"{tmp_dir}{year}_max_temp_for_period.nc"
+    unmasked_result_file = f"{tmp_dir}{year}_unmasked_result.nc"
+    final_result_file = f"{tmp_dir}{year}_result.nc"
 
-        # Convert to GeoTIFF
-        # We only need WHS and WARNING_STATUS
-        # Merge the WARNING_STATUS and WHS variables into one GeoTIFF file with two bands.
-        # The WARNING_STATUS should always be band #1
+    # Extract air_temperature_2m_max for the required period into file MAX_TEMP_FOR_PERIOD_FILE_PATH
+    run_command(
+        command=f"cdo -L seldate,{start_date},{end_date} -selname,{tm_max} {weather_data_file} {max_temp_in_period_file}"
+    )
 
-        # We must delete the GeoTIFF file before merging WHY?
-        # run_command(command=f"rm {data_dir}result_*{file_date_str}*.tif")
+    # Classifying warning status for the model
+    # temperature < 15     --> 2 (Yellow)
+    # temperature >= 15    --> 4 (Red)
+    run_command(
+        command=f'cdo -s -aexpr,"WARNING_STATUS = {tm_max} < {THRESHOLD} ? 2 : 4" {max_temp_in_period_file} {unmasked_result_file}',
+    )
 
-        timestep_dates.append(date_YYYY_MM_DD)
+    # Mask results using a CSV file with polygons. Env variable MASK_FILE must be set, or else we use the file as it is.
+    if mask_file_path is not None:
+        run_command(
+            command=f"cdo -maskregion,{mask_file_path} {unmasked_result_file} {final_result_file}",
+        )
+    else:
+        os.rename(unmasked_result_file, final_result_file)
+
+    # Split result file into separate files for each timestep
+    prefix = f"{tmp_dir}timestep_"
+    run_command(command=f"cdo -s splitdate {final_result_file} {prefix}")
+
+    timestep_file_pattern = f"{prefix}*.nc"
+    logging.info(f"Find timestep files: {timestep_file_pattern}")
+    timestep_files = sorted(glob.glob(timestep_file_pattern))
+    timestep_filenames = [os.path.basename(file) for file in timestep_files]
+    logging.info(f"Timestep files sorted: {timestep_filenames}")
+    timestep_dates = [
+        filename.split("_")[1].split(".")[0] for filename in timestep_filenames
+    ]
 
+    for file in timestep_files:
+        filename = os.path.basename(file)
+        date_str = filename.split("_")[1].split(".")[0]
         with open("/dev/null", "w") as devnull:
+            warning_status_lcc = f"{tmp_dir}result_WARNING_STATUS_{date_str}_lcc.tif"
+            result_lcc = f"{tmp_dir}result_{date_str}_lcc.tif"
+            warning_status = f"{result_tif_dir}result_WARNING_STATUS_{date_str}.tif"
+            result = f"{result_tif_dir}result_{date_str}.tif"
+
+            # For warning status:
             run_command(
-                command=f'gdal_translate -ot Int16 -of GTiff  NETCDF:"{tmp_dir}result_{date_YYYY_MM_DD}.nc":WARNING_STATUS {tmp_dir}result_WARNING_STATUS_{date_YYYY_MM_DD}_lcc.tif',
+                command=f'gdal_translate -ot Int16 -of GTiff  NETCDF:"{file}":WARNING_STATUS {warning_status_lcc}',
                 stdout=devnull,
             )
+            # For max temperature:
             run_command(
-                command=f'gdal_translate -ot Float32 -of GTiff  NETCDF:"{tmp_dir}result_{date_YYYY_MM_DD}.nc":{TM_MAX} {tmp_dir}result_{date_YYYY_MM_DD}_lcc.tif',
+                command=f'gdal_translate -ot Float32 -of GTiff  NETCDF:"{file}":{tm_max} {result_lcc}',
                 stdout=devnull,
             )
             # Need to reproject the files, to ensure we have the projection given in the generted mapfile. We always use EPSG:4326 for this
             run_command(
-                command=f"gdalwarp -t_srs EPSG:4326 {tmp_dir}result_WARNING_STATUS_{date_YYYY_MM_DD}_lcc.tif {data_dir}result_WARNING_STATUS_{date_YYYY_MM_DD}.tif",
+                command=f"gdalwarp -t_srs EPSG:4326 {warning_status_lcc} {warning_status}",
                 stdout=devnull,
             )
             run_command(
-                command=f"gdalwarp -t_srs EPSG:4326 {tmp_dir}result_{date_YYYY_MM_DD}_lcc.tif {data_dir}result_{date_YYYY_MM_DD}.tif",
+                command=f"gdalwarp -t_srs EPSG:4326 {result_lcc} {result}",
                 stdout=devnull,
             )
 
@@ -271,15 +219,14 @@ if __name__ == "__main__":
                 language[keyword] = config["i18n.%s" % language_code][keyword]
             languages.append(language)
 
-    # The paths should be set in a .env file
     env = Environment(loader=FileSystemLoader(template_dir))
     template = env.get_template(template_file_name)
     output = template.render(
         {
             "model_id": model_id,
             "timestep_dates": timestep_dates,
-            "mapserver_data_dir": os.getenv("MAPSERVER_DATA_DIR"),
-            "mapserver_mapfile_dir": os.getenv("MAPSERVER_MAPFILE_DIR"),
+            "mapserver_data_dir": os.getenv("MAPSERVER_RESULT_TIF_DIR"),
+            "mapserver_mapfile_dir": os.getenv("MAPSERVER_RESULT_MAPFILE_DIR"),
             "mapserver_log_file": os.getenv("MAPSERVER_LOG_FILE"),
             "mapserver_image_path": os.getenv("MAPSERVER_IMAGE_PATH"),
             "mapserver_extent": os.getenv("MAPSERVER_EXTENT"),
@@ -287,14 +234,11 @@ if __name__ == "__main__":
             "language_codes": language_codes,
         }
     )
-    with open(f"{mapfile_outdir}/{model_id}.map", "w") as f:
+    with open(f"{result_mapfile_dir}/{model_id}.map", "w") as f:
         f.write(output)
-
-    # Remove all temporary/intermediary files
-    # run_command(command=f"rm {tmp_dir}result*.nc")
-    # run_command(command=f"rm {tmp_dir}result*.tif")
+    remove_temporary_files()
 
     end_time = time.time()
     logging.info(
-        f"End model {model_id} - time spent: {'Execution time: {:.2f} seconds'.format(end_time - start_time)}"
+        f"End model {model_id} - Execution time: {'{:.2f} seconds'.format(end_time - start_time)}"
     )
diff --git a/run_ADASMELIAE.sh b/run_ADASMELIAE.sh
index fb91d32..caa9d0c 100755
--- a/run_ADASMELIAE.sh
+++ b/run_ADASMELIAE.sh
@@ -16,7 +16,6 @@
 #    along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
 # Configures environment and logging before running the model
-# @author: Tor-Einar Skog <tor-einar.skog@nibio.no>
 
 
 # First: Test that we have CDO and GDAL installed
@@ -47,8 +46,9 @@ fi
 LOG_FILE=${HOME_DIR}log/ADASMELIAE.log
 REQUIREMENTS=${HOME_DIR}requirements.txt
 
+echo "Start model. Follow log at $LOG_FILE"
+
 # Create and activate the virtual environment
-echo "Run model. Follow log at $LOG_FILE"
 echo "Activate virtual environment .venv" >> "$LOG_FILE"
 python3 -m venv .venv
 . .venv/bin/activate
diff --git a/test_ADASMELIAE.py b/test_ADASMELIAE.py
index 34fb857..fe002cb 100644
--- a/test_ADASMELIAE.py
+++ b/test_ADASMELIAE.py
@@ -1,67 +1,30 @@
 import pytest
-from datetime import datetime, timedelta
-from ADASMELIAE import find_start_date, find_end_date
+import logging
+from ADASMELIAE import run_command
 
 
-@pytest.fixture
-def example_result_files():
-    # Fixture for generating content of result dir
-    return [
-        "result_2023-04-15.nc",
-        "result_2023-04-16.nc",
-        "result_2023-04-17.nc",
-        "result_WARNING_STATUS_2023-04-15.nc",
-        "result_WARNING_STATUS_2023-04-16.nc",
-        "result_WARNING_STATUS_2023-04-17.nc",
-        "result_WARNING_STATUS_2023-04-18.nc",
-    ]
+# Define a fixture for setting up logging
+@pytest.fixture(autouse=True)
+def setup_logging(caplog):
+    caplog.set_level(logging.DEBUG)
 
 
-@pytest.fixture
-def today():
-    # Fixture for generating date for testing
-    return datetime.now().date()
+# Test case for successful command execution
+def test_run_command_success(caplog):
+    result = run_command("echo 'Hello, World!'")
+    assert result == ["Hello, World!"]
+    assert "Hello, World!" in caplog.text
 
 
-def test_find_start_date_with_previous_results(example_result_files, monkeypatch):
-    MODEL_START_DATE = datetime(2023, 3, 1)
-    monkeypatch.setenv("DATA_DIR", "out")
+# Test case for command failure
+def test_run_command_failure(caplog):
+    with pytest.raises(SystemExit):
+        run_command("nonexistent_command")
 
-    # Mock os.listdir to return the example result files
-    with monkeypatch.context() as m:
-        m.setattr("os.listdir", lambda _: example_result_files)
-        start_date = find_start_date(MODEL_START_DATE)
+    assert "nonexistent_command" in caplog.text
 
-    # Assert the expected start date
-    expected_start_date = datetime(2023, 4, 18)
-    assert start_date == expected_start_date
 
-
-def test_find_start_date_without_previous_results(monkeypatch):
-    MODEL_START_DATE = datetime(2023, 3, 1)
-    monkeypatch.setenv("DATA_DIR", "out")
-
-    # Mock os.listdir to return the example result files
-    with monkeypatch.context() as m:
-        m.setattr("os.listdir", lambda _: [])
-        start_date = find_start_date(MODEL_START_DATE)
-
-    # Assert the expected start date
-    assert start_date == MODEL_START_DATE
-
-
-def test_find_end_date_when_model_end_date_is_in_past(today):
-    # Test when model end date is in the past
-    model_end_date = datetime.now().date() - timedelta(days=1)
-    assert find_end_date(model_end_date=model_end_date) == model_end_date
-
-
-def test_find_end_date_when_model_end_date_is_in_future(today):
-    # Test when model end date is in the future
-    model_end_date = datetime.now().date() + timedelta(days=1)
-    assert find_end_date(model_end_date=model_end_date) == today
-
-
-def test_find_end_date_equal(today):
-    # Test when model end date is today
-    assert find_end_date(model_end_date=today) == today
+# Test case for handling stderr
+def test_run_command_stderr(caplog):
+    run_command("echo 'This is an error' >&2")
+    assert "This is an error" in caplog.text
-- 
GitLab