feat: Use year file, remove previous results before calculating new, simplify

6035a8d0 · Lene Wasskog · 4cfa992b · 6035a8d0 · 6035a8d0 · 6035a8d0
Commit 6035a8d0 authored 1 year ago by Lene Wasskog
--- a/.gitignore
+++ b/.gitignore
@@ -8,4 +8,6 @@ in
 out
 log
 tmp
 .pytest_cache
\ No newline at end of file
+run_cleanup.sh
+.idea
\ No newline at end of file
--- a/ADASMELIAE.py
+++ b/ADASMELIAE.py
@@ -22,7 +22,6 @@
 # * GDAL >= v 3.4.3 built with Python support
 # * For Python: See requirements.txt
 import os
 import time
 import subprocess
@@ -32,12 +31,13 @@ from datetime import datetime, timedelta
 from jinja2 import Environment, FileSystemLoader
 import logging
 import pytz
-import netCDF4 as nc
 import configparser
 import sys
 load_dotenv()
+# TODO Should start before the first European crops reach growth stage 51 and end when the northernmost regions have passed stage 59
 DEBUG = (
    False
    if os.getenv("DEBUG") is None or os.getenv("DEBUG").lower() == "false"
@@ -45,26 +45,18 @@ DEBUG = (
 )
 model_id = os.getenv("MODEL_ID")
-filename_pattern = os.getenv("WEATHER_DATA_FILENAME_PATTERN")
-filename_dateformat = os.getenv("WEATHER_DATA_FILENAME_DATEFORMAT")
-# TODO Should start before the first European crops reach growth stage 51 and end when the northernmost regions have passed stage 59
-current_year = datetime.now().year
-local_timezone = pytz.timezone(os.getenv("LOCAL_TIMEZONE"))
-MODEL_START_DATE = datetime.strptime(
-    f"{current_year}-{os.getenv('START_DATE')}", "%Y-%m-%d"
-).date()
-MODEL_END_DATE = datetime.strptime(
-    f"{current_year}-{os.getenv('END_DATE')}", "%Y-%m-%d"
-).date()
 home_dir = os.getenv("HOME_DIR")
 weather_data_dir = os.getenv("WEATHER_DATA_DIR")
+result_tif_dir = os.getenv("RESULT_TIF_DIR")
+result_mapfile_dir = os.getenv("RESULT_MAPFILE_DIR")
 tmp_dir = os.getenv("TMP_DIR")
-data_dir = os.getenv("DATA_DIR")
+template_dir = f"{home_dir}mapfile/"
-template_dir = f"{os.getenv('HOME_DIR')}mapfile/"
-mapfile_outdir = os.getenv("MAPFILE_DIR")
+weather_data_filename_pattern = os.getenv("WEATHER_DATA_FILENAME_DATEFORMAT")
+local_timezone = pytz.timezone(os.getenv("LOCAL_TIMEZONE"))
+start_MM_DD = os.getenv("START_DATE")
+end_MM_DD = os.getenv("END_DATE")
 config_file_path = f"{os.getenv('HOME_DIR')}{model_id}.cfg"
 mask_file_path = (
@@ -83,8 +75,7 @@ logging.basicConfig(
    format="%(asctime)s - %(levelname).4s - (%(filename)s:%(lineno)d) - %(message)s",
 )
-TM = "air_temperature_2m"
+tm_max = "air_temperature_2m_max"
-TM_MAX = "TM_MAX"
 THRESHOLD = 15
@@ -96,7 +87,7 @@ def run_command(command, shell=True, stdout=subprocess.PIPE):
            command,
            shell=shell,
            stdout=stdout,
-            stderr=subprocess.PIPE,
+            stderr=stdout,
            text=True,
            check=True,
        )
@@ -109,154 +100,111 @@ def run_command(command, shell=True, stdout=subprocess.PIPE):
            for line in result.stderr.splitlines():
                logging.error(line.strip())
    except subprocess.CalledProcessError as e:
-        logging.error(f"Command failed: '{command}'")
        logging.error(f"{e}")
        quit()
-# Iterate the set of previously calculated result files. Find latest result date,
+# Remove all temporary/intermediary files
-# and return result date + 1 day. If no files exist, return MODEL_START_DATE.
+def remove_temporary_files():
-def find_start_date(model_start_date):
+    if glob.glob(f"{tmp_dir}*.nc"):
-    result_file_names = os.listdir(data_dir)
+        run_command(command=f"rm {tmp_dir}*.nc")
-    result_file_names = [
+    if glob.glob(f"{tmp_dir}*.tif"):
-        file for file in result_file_names if file.startswith("result_2")
+        run_command(command=f"rm {tmp_dir}*.tif")
-    ]
-    try:
-        dates = [
-            datetime.strptime(file.split("_")[1].split(".")[0], "%Y-%m-%d").date()
-            for file in result_file_names
-        ]
-        logging.debug(
-            f"Found results for the following dates: {sorted([date.strftime('%Y-%m-%d') for date in dates])}"
-        )
-    except ValueError as e:
-        logging.error(f"Error parsing dates: {e}")
-        return MODEL_START_DATE
-    latest_result_date = max(dates, default=None)
-    # Quit if latest result date is today (script was already run today)
-    if latest_result_date and latest_result_date == datetime.now().date():
-        logging.error("Script already run for today. Quit.")
-        sys.exit()
-    return (
-        latest_result_date + timedelta(days=1)
+# Remove previously calculated results
-        if latest_result_date
+def remove_old_results():
-        else model_start_date
+    if glob.glob(f"{result_tif_dir}*.tif"):
-    )
+        run_command(command=f"rm {result_tif_dir}*.tif")
+    if glob.glob(f"rm {result_mapfile_dir}*.map"):
+        run_command(command=f"rm {result_mapfile_dir}*.map")
-def find_end_date(model_end_date):
+if __name__ == "__main__":
+    start_time = time.time()  # For logging execution time
    today = datetime.now().date()
-    return today if today < model_end_date else model_end_date
+    remove_old_results()
-if __name__ == "__main__":
+    year = today.year
-    logging.info(f"Start model {model_id}")
+    model_start_date = datetime.strptime(f"{year}-{start_MM_DD}", "%Y-%m-%d").date()
-    start_time = time.time()
+    model_end_date = datetime.strptime(f"{year}-{end_MM_DD}", "%Y-%m-%d").date()
-    start_date = find_start_date(MODEL_START_DATE)
+    start_date = model_start_date
-    end_date = find_end_date(MODEL_END_DATE)
+    end_date = today + timedelta(days=2)
+    if start_date > end_date:
+        logging.error("Model period not started. Quit.")
+        sys.exit()
    logging.info(
-        f"Start running model for start date {start_date} and end date {end_date}"
+        f"Start running model {model_id} for start date {start_date} and end date {end_date}"
    )
-    weather_data_file_pattern = f"{weather_data_dir}{filename_pattern}"
+    weather_data_file = weather_data_dir + today.strftime(weather_data_filename_pattern)
-    weather_data_files = glob.glob(weather_data_file_pattern)
+    max_temp_in_period_file = f"{tmp_dir}{year}_max_temp_for_period.nc"
-    logging.debug(f"Find weather data files: {weather_data_file_pattern}")
+    unmasked_result_file = f"{tmp_dir}{year}_unmasked_result.nc"
-    logging.debug([os.path.basename(path) for path in weather_data_files])
+    final_result_file = f"{tmp_dir}{year}_result.nc"
-    timestep_dates = []
-    for weather_data_file_path in sorted(weather_data_files):
-        weather_data_file_name = os.path.basename(weather_data_file_path)
-        # Skip if we don't have a valid date
-        try:
-            file_date = datetime.strptime(
-                weather_data_file_name, filename_dateformat
-            ).date()
-        except ValueError as e:
-            logging.info(f"{weather_data_file_name} - Skip file due to invalid date")
-            logging.debug(e)
-            continue
-        # Set up file names based on date YYYY-MM-DD
-        date_YYYY_MM_DD = file_date.strftime("%Y-%m-%d")
-        max_temp_file_path = f"{tmp_dir}max_temp_{date_YYYY_MM_DD}.nc"
-        result_unmasked_path = f"{tmp_dir}result_unmasked_{date_YYYY_MM_DD}.nc"
-        result_path = f"{tmp_dir}result_{date_YYYY_MM_DD}.nc"
-        # Only process files from within valid interval
-        if file_date < start_date or file_date > end_date:
-            logging.debug(
-                f"{weather_data_file_name} - Skip file with date outside calculation period"
-            )
-            continue
-        # Check that the file has at least 23 timesteps
-        with nc.Dataset(weather_data_file_path, "r") as weatherdata_file:
-            timestep_count = len(weatherdata_file.variables["time"])
-            if timestep_count < 23:
-                logging.info(
-                    f"{weather_data_file_name} - Skip file with {timestep_count} timesteps"
-                )
-                continue
-        # Skip if result files for date already exist
-        if os.path.isfile(
-            f"{data_dir}result_WARNING_STATUS_{date_YYYY_MM_DD}.tif"
-        ) and os.path.isfile(f"{data_dir}result_{date_YYYY_MM_DD}.tif"):
-            logging.info(f"Result files for {date_YYYY_MM_DD} already exist, skip")
-            continue
-        # Produce daily files with MAX_TEMP, which is the highest recorded temperature within the given day
-        run_command(
-            command=f"cdo -s -O -L -settime,00:00:00 -setdate,{date_YYYY_MM_DD} -chname,{TM},{TM_MAX} -timmax -selvar,{TM} {weather_data_file_path} {max_temp_file_path}"
-        )
-        # Classifying warning status for the model
-        # temperature < 15     --> 2 (Yellow)
-        # temperature >= 15    --> 4 (Red)
-        run_command(
-            command=f'cdo -s -aexpr,"WARNING_STATUS = {TM_MAX} < {THRESHOLD} ? 2 : 4" {max_temp_file_path} {result_unmasked_path}',
-        )
-        # Mask results using a CSV file with polygons. Env variable MASK_FILE must be set, or else we use the file as it is.
-        if mask_file_path is not None:
-            logging.info(f"Applying mask file {mask_file_path} to result file")
-            run_command(
-                command=f"cdo -maskregion,{mask_file_path} {result_unmasked_path} {result_path}",
-            )
-        else:
-            os.rename(result_unmasked_path, result_path)
-        # Convert to GeoTIFF
+    # Extract air_temperature_2m_max for the required period into file MAX_TEMP_FOR_PERIOD_FILE_PATH
-        # We only need WHS and WARNING_STATUS
+    run_command(
-        # Merge the WARNING_STATUS and WHS variables into one GeoTIFF file with two bands.
+        command=f"cdo -L seldate,{start_date},{end_date} -selname,{tm_max} {weather_data_file} {max_temp_in_period_file}"
-        # The WARNING_STATUS should always be band #1
+    )
-        # We must delete the GeoTIFF file before merging WHY?
+    # Classifying warning status for the model
-        # run_command(command=f"rm {data_dir}result_*{file_date_str}*.tif")
+    # temperature < 15     --> 2 (Yellow)
+    # temperature >= 15    --> 4 (Red)
+    run_command(
+        command=f'cdo -s -aexpr,"WARNING_STATUS = {tm_max} < {THRESHOLD} ? 2 : 4" {max_temp_in_period_file} {unmasked_result_file}',
+    )
-        timestep_dates.append(date_YYYY_MM_DD)
+    # Mask results using a CSV file with polygons. Env variable MASK_FILE must be set, or else we use the file as it is.
+    if mask_file_path is not None:
+        run_command(
+            command=f"cdo -maskregion,{mask_file_path} {unmasked_result_file} {final_result_file}",
+        )
+    else:
+        os.rename(unmasked_result_file, final_result_file)
+    # Split result file into separate files for each timestep
+    prefix = f"{tmp_dir}timestep_"
+    run_command(command=f"cdo -s splitdate {final_result_file} {prefix}")
+    timestep_file_pattern = f"{prefix}*.nc"
+    logging.info(f"Find timestep files: {timestep_file_pattern}")
+    timestep_files = sorted(glob.glob(timestep_file_pattern))
+    timestep_filenames = [os.path.basename(file) for file in timestep_files]
+    logging.info(f"Timestep files sorted: {timestep_filenames}")
+    timestep_dates = [
+        filename.split("_")[1].split(".")[0] for filename in timestep_filenames
+    ]
+    for file in timestep_files:
+        filename = os.path.basename(file)
+        date_str = filename.split("_")[1].split(".")[0]
        with open("/dev/null", "w") as devnull:
+            warning_status_lcc = f"{tmp_dir}result_WARNING_STATUS_{date_str}_lcc.tif"
+            result_lcc = f"{tmp_dir}result_{date_str}_lcc.tif"
+            warning_status = f"{result_tif_dir}result_WARNING_STATUS_{date_str}.tif"
+            result = f"{result_tif_dir}result_{date_str}.tif"
+            # For warning status:
            run_command(
-                command=f'gdal_translate -ot Int16 -of GTiff  NETCDF:"{tmp_dir}result_{date_YYYY_MM_DD}.nc":WARNING_STATUS {tmp_dir}result_WARNING_STATUS_{date_YYYY_MM_DD}_lcc.tif',
+                command=f'gdal_translate -ot Int16 -of GTiff  NETCDF:"{file}":WARNING_STATUS {warning_status_lcc}',
                stdout=devnull,
            )
+            # For max temperature:
            run_command(
-                command=f'gdal_translate -ot Float32 -of GTiff  NETCDF:"{tmp_dir}result_{date_YYYY_MM_DD}.nc":{TM_MAX} {tmp_dir}result_{date_YYYY_MM_DD}_lcc.tif',
+                command=f'gdal_translate -ot Float32 -of GTiff  NETCDF:"{file}":{tm_max} {result_lcc}',
                stdout=devnull,
            )
            # Need to reproject the files, to ensure we have the projection given in the generted mapfile. We always use EPSG:4326 for this
            run_command(
-                command=f"gdalwarp -t_srs EPSG:4326 {tmp_dir}result_WARNING_STATUS_{date_YYYY_MM_DD}_lcc.tif {data_dir}result_WARNING_STATUS_{date_YYYY_MM_DD}.tif",
+                command=f"gdalwarp -t_srs EPSG:4326 {warning_status_lcc} {warning_status}",
                stdout=devnull,
            )
            run_command(
-                command=f"gdalwarp -t_srs EPSG:4326 {tmp_dir}result_{date_YYYY_MM_DD}_lcc.tif {data_dir}result_{date_YYYY_MM_DD}.tif",
+                command=f"gdalwarp -t_srs EPSG:4326 {result_lcc} {result}",
                stdout=devnull,
            )
@@ -271,15 +219,14 @@ if __name__ == "__main__":
                language[keyword] = config["i18n.%s" % language_code][keyword]
            languages.append(language)
-    # The paths should be set in a .env file
    env = Environment(loader=FileSystemLoader(template_dir))
    template = env.get_template(template_file_name)
    output = template.render(
        {
            "model_id": model_id,
            "timestep_dates": timestep_dates,
-            "mapserver_data_dir": os.getenv("MAPSERVER_DATA_DIR"),
+            "mapserver_data_dir": os.getenv("MAPSERVER_RESULT_TIF_DIR"),
-            "mapserver_mapfile_dir": os.getenv("MAPSERVER_MAPFILE_DIR"),
+            "mapserver_mapfile_dir": os.getenv("MAPSERVER_RESULT_MAPFILE_DIR"),
            "mapserver_log_file": os.getenv("MAPSERVER_LOG_FILE"),
            "mapserver_image_path": os.getenv("MAPSERVER_IMAGE_PATH"),
            "mapserver_extent": os.getenv("MAPSERVER_EXTENT"),
@@ -287,14 +234,11 @@ if __name__ == "__main__":
            "language_codes": language_codes,
        }
    )
-    with open(f"{mapfile_outdir}/{model_id}.map", "w") as f:
+    with open(f"{result_mapfile_dir}/{model_id}.map", "w") as f:
        f.write(output)
+    remove_temporary_files()
-    # Remove all temporary/intermediary files
-    # run_command(command=f"rm {tmp_dir}result*.nc")
-    # run_command(command=f"rm {tmp_dir}result*.tif")
    end_time = time.time()
    logging.info(
-        f"End model {model_id} - time spent: {'Execution time: {:.2f} seconds'.format(end_time - start_time)}"
+        f"End model {model_id} - Execution time: {'{:.2f} seconds'.format(end_time - start_time)}"
    )
--- a/run_ADASMELIAE.sh
+++ b/run_ADASMELIAE.sh
@@ -16,7 +16,6 @@
 #    along with this program.  If not, see <https://www.gnu.org/licenses/>.
 # Configures environment and logging before running the model
-# @author: Tor-Einar Skog <tor-einar.skog@nibio.no>
 # First: Test that we have CDO and GDAL installed
@@ -47,8 +46,9 @@ fi
 LOG_FILE=${HOME_DIR}log/ADASMELIAE.log
 REQUIREMENTS=${HOME_DIR}requirements.txt
+echo "Start model. Follow log at $LOG_FILE"
 # Create and activate the virtual environment
-echo "Run model. Follow log at $LOG_FILE"
 echo "Activate virtual environment .venv" >> "$LOG_FILE"
 python3 -m venv .venv
 . .venv/bin/activate

--- a/test_ADASMELIAE.py
+++ b/test_ADASMELIAE.py
 import pytest
-from datetime import datetime, timedelta
+import logging
-from ADASMELIAE import find_start_date, find_end_date
+from ADASMELIAE import run_command
-@pytest.fixture
+# Define a fixture for setting up logging
-def example_result_files():
+@pytest.fixture(autouse=True)
-    # Fixture for generating content of result dir
+def setup_logging(caplog):
-    return [
+    caplog.set_level(logging.DEBUG)
-        "result_2023-04-15.nc",
-        "result_2023-04-16.nc",
-        "result_2023-04-17.nc",
-        "result_WARNING_STATUS_2023-04-15.nc",
-        "result_WARNING_STATUS_2023-04-16.nc",
-        "result_WARNING_STATUS_2023-04-17.nc",
-        "result_WARNING_STATUS_2023-04-18.nc",
-    ]
-@pytest.fixture
+# Test case for successful command execution
-def today():
+def test_run_command_success(caplog):
-    # Fixture for generating date for testing
+    result = run_command("echo 'Hello, World!'")
-    return datetime.now().date()
+    assert result == ["Hello, World!"]
+    assert "Hello, World!" in caplog.text
-def test_find_start_date_with_previous_results(example_result_files, monkeypatch):
+# Test case for command failure
-    MODEL_START_DATE = datetime(2023, 3, 1)
+def test_run_command_failure(caplog):
-    monkeypatch.setenv("DATA_DIR", "out")
+    with pytest.raises(SystemExit):
+        run_command("nonexistent_command")
-    # Mock os.listdir to return the example result files
+    assert "nonexistent_command" in caplog.text
-    with monkeypatch.context() as m:
-        m.setattr("os.listdir", lambda _: example_result_files)
-        start_date = find_start_date(MODEL_START_DATE)
-    # Assert the expected start date
-    expected_start_date = datetime(2023, 4, 18)
-    assert start_date == expected_start_date
+# Test case for handling stderr
-def test_find_start_date_without_previous_results(monkeypatch):
+def test_run_command_stderr(caplog):
-    MODEL_START_DATE = datetime(2023, 3, 1)
+    run_command("echo 'This is an error' >&2")
-    monkeypatch.setenv("DATA_DIR", "out")
+    assert "This is an error" in caplog.text
-    # Mock os.listdir to return the example result files
-    with monkeypatch.context() as m:
-        m.setattr("os.listdir", lambda _: [])
-        start_date = find_start_date(MODEL_START_DATE)
-    # Assert the expected start date
-    assert start_date == MODEL_START_DATE
-def test_find_end_date_when_model_end_date_is_in_past(today):
-    # Test when model end date is in the past
-    model_end_date = datetime.now().date() - timedelta(days=1)
-    assert find_end_date(model_end_date=model_end_date) == model_end_date
-def test_find_end_date_when_model_end_date_is_in_future(today):
-    # Test when model end date is in the future
-    model_end_date = datetime.now().date() + timedelta(days=1)
-    assert find_end_date(model_end_date=model_end_date) == today
-def test_find_end_date_equal(today):
-    # Test when model end date is today
-    assert find_end_date(model_end_date=today) == today