From 6035a8d07d5388ab5cd4308632ddcbd11b1a26f2 Mon Sep 17 00:00:00 2001 From: lewa <lene.wasskog@nibio.no> Date: Sat, 20 Apr 2024 14:35:37 +0200 Subject: [PATCH] feat: Use year file, remove previous results before calculating new, simplify --- .gitignore | 4 +- ADASMELIAE.py | 240 +++++++++++++++++---------------------------- run_ADASMELIAE.sh | 4 +- test_ADASMELIAE.py | 77 ++++----------- 4 files changed, 117 insertions(+), 208 deletions(-) diff --git a/.gitignore b/.gitignore index edf7d24..3025c3d 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,6 @@ in out log tmp -.pytest_cache \ No newline at end of file +.pytest_cache +run_cleanup.sh +.idea \ No newline at end of file diff --git a/ADASMELIAE.py b/ADASMELIAE.py index 0b5d9e0..05f7dcc 100755 --- a/ADASMELIAE.py +++ b/ADASMELIAE.py @@ -22,7 +22,6 @@ # * GDAL >= v 3.4.3 built with Python support # * For Python: See requirements.txt - import os import time import subprocess @@ -32,12 +31,13 @@ from datetime import datetime, timedelta from jinja2 import Environment, FileSystemLoader import logging import pytz -import netCDF4 as nc import configparser import sys load_dotenv() +# TODO Should start before the first European crops reach growth stage 51 and end when the northernmost regions have passed stage 59 + DEBUG = ( False if os.getenv("DEBUG") is None or os.getenv("DEBUG").lower() == "false" @@ -45,26 +45,18 @@ DEBUG = ( ) model_id = os.getenv("MODEL_ID") -filename_pattern = os.getenv("WEATHER_DATA_FILENAME_PATTERN") -filename_dateformat = os.getenv("WEATHER_DATA_FILENAME_DATEFORMAT") - -# TODO Should start before the first European crops reach growth stage 51 and end when the northernmost regions have passed stage 59 -current_year = datetime.now().year -local_timezone = pytz.timezone(os.getenv("LOCAL_TIMEZONE")) - -MODEL_START_DATE = datetime.strptime( - f"{current_year}-{os.getenv('START_DATE')}", "%Y-%m-%d" -).date() -MODEL_END_DATE = datetime.strptime( - f"{current_year}-{os.getenv('END_DATE')}", "%Y-%m-%d" -).date() - home_dir = os.getenv("HOME_DIR") weather_data_dir = os.getenv("WEATHER_DATA_DIR") +result_tif_dir = os.getenv("RESULT_TIF_DIR") +result_mapfile_dir = os.getenv("RESULT_MAPFILE_DIR") tmp_dir = os.getenv("TMP_DIR") -data_dir = os.getenv("DATA_DIR") -template_dir = f"{os.getenv('HOME_DIR')}mapfile/" -mapfile_outdir = os.getenv("MAPFILE_DIR") +template_dir = f"{home_dir}mapfile/" + +weather_data_filename_pattern = os.getenv("WEATHER_DATA_FILENAME_DATEFORMAT") +local_timezone = pytz.timezone(os.getenv("LOCAL_TIMEZONE")) +start_MM_DD = os.getenv("START_DATE") +end_MM_DD = os.getenv("END_DATE") + config_file_path = f"{os.getenv('HOME_DIR')}{model_id}.cfg" mask_file_path = ( @@ -83,8 +75,7 @@ logging.basicConfig( format="%(asctime)s - %(levelname).4s - (%(filename)s:%(lineno)d) - %(message)s", ) -TM = "air_temperature_2m" -TM_MAX = "TM_MAX" +tm_max = "air_temperature_2m_max" THRESHOLD = 15 @@ -96,7 +87,7 @@ def run_command(command, shell=True, stdout=subprocess.PIPE): command, shell=shell, stdout=stdout, - stderr=subprocess.PIPE, + stderr=stdout, text=True, check=True, ) @@ -109,154 +100,111 @@ def run_command(command, shell=True, stdout=subprocess.PIPE): for line in result.stderr.splitlines(): logging.error(line.strip()) except subprocess.CalledProcessError as e: - logging.error(f"Command failed: '{command}'") logging.error(f"{e}") quit() -# Iterate the set of previously calculated result files. Find latest result date, -# and return result date + 1 day. If no files exist, return MODEL_START_DATE. -def find_start_date(model_start_date): - result_file_names = os.listdir(data_dir) - result_file_names = [ - file for file in result_file_names if file.startswith("result_2") - ] - try: - dates = [ - datetime.strptime(file.split("_")[1].split(".")[0], "%Y-%m-%d").date() - for file in result_file_names - ] - logging.debug( - f"Found results for the following dates: {sorted([date.strftime('%Y-%m-%d') for date in dates])}" - ) - except ValueError as e: - logging.error(f"Error parsing dates: {e}") - return MODEL_START_DATE - latest_result_date = max(dates, default=None) - - # Quit if latest result date is today (script was already run today) - if latest_result_date and latest_result_date == datetime.now().date(): - logging.error("Script already run for today. Quit.") - sys.exit() +# Remove all temporary/intermediary files +def remove_temporary_files(): + if glob.glob(f"{tmp_dir}*.nc"): + run_command(command=f"rm {tmp_dir}*.nc") + if glob.glob(f"{tmp_dir}*.tif"): + run_command(command=f"rm {tmp_dir}*.tif") - return ( - latest_result_date + timedelta(days=1) - if latest_result_date - else model_start_date - ) + +# Remove previously calculated results +def remove_old_results(): + if glob.glob(f"{result_tif_dir}*.tif"): + run_command(command=f"rm {result_tif_dir}*.tif") + if glob.glob(f"rm {result_mapfile_dir}*.map"): + run_command(command=f"rm {result_mapfile_dir}*.map") -def find_end_date(model_end_date): +if __name__ == "__main__": + start_time = time.time() # For logging execution time today = datetime.now().date() - return today if today < model_end_date else model_end_date + remove_old_results() -if __name__ == "__main__": - logging.info(f"Start model {model_id}") - start_time = time.time() + year = today.year + model_start_date = datetime.strptime(f"{year}-{start_MM_DD}", "%Y-%m-%d").date() + model_end_date = datetime.strptime(f"{year}-{end_MM_DD}", "%Y-%m-%d").date() - start_date = find_start_date(MODEL_START_DATE) - end_date = find_end_date(MODEL_END_DATE) + start_date = model_start_date + end_date = today + timedelta(days=2) + + if start_date > end_date: + logging.error("Model period not started. Quit.") + sys.exit() logging.info( - f"Start running model for start date {start_date} and end date {end_date}" + f"Start running model {model_id} for start date {start_date} and end date {end_date}" ) - weather_data_file_pattern = f"{weather_data_dir}{filename_pattern}" - weather_data_files = glob.glob(weather_data_file_pattern) - logging.debug(f"Find weather data files: {weather_data_file_pattern}") - logging.debug([os.path.basename(path) for path in weather_data_files]) - - timestep_dates = [] - for weather_data_file_path in sorted(weather_data_files): - weather_data_file_name = os.path.basename(weather_data_file_path) - - # Skip if we don't have a valid date - try: - file_date = datetime.strptime( - weather_data_file_name, filename_dateformat - ).date() - except ValueError as e: - logging.info(f"{weather_data_file_name} - Skip file due to invalid date") - logging.debug(e) - continue - - # Set up file names based on date YYYY-MM-DD - date_YYYY_MM_DD = file_date.strftime("%Y-%m-%d") - max_temp_file_path = f"{tmp_dir}max_temp_{date_YYYY_MM_DD}.nc" - result_unmasked_path = f"{tmp_dir}result_unmasked_{date_YYYY_MM_DD}.nc" - result_path = f"{tmp_dir}result_{date_YYYY_MM_DD}.nc" - - # Only process files from within valid interval - if file_date < start_date or file_date > end_date: - logging.debug( - f"{weather_data_file_name} - Skip file with date outside calculation period" - ) - continue - - # Check that the file has at least 23 timesteps - with nc.Dataset(weather_data_file_path, "r") as weatherdata_file: - timestep_count = len(weatherdata_file.variables["time"]) - if timestep_count < 23: - logging.info( - f"{weather_data_file_name} - Skip file with {timestep_count} timesteps" - ) - continue - - # Skip if result files for date already exist - if os.path.isfile( - f"{data_dir}result_WARNING_STATUS_{date_YYYY_MM_DD}.tif" - ) and os.path.isfile(f"{data_dir}result_{date_YYYY_MM_DD}.tif"): - logging.info(f"Result files for {date_YYYY_MM_DD} already exist, skip") - continue - - # Produce daily files with MAX_TEMP, which is the highest recorded temperature within the given day - run_command( - command=f"cdo -s -O -L -settime,00:00:00 -setdate,{date_YYYY_MM_DD} -chname,{TM},{TM_MAX} -timmax -selvar,{TM} {weather_data_file_path} {max_temp_file_path}" - ) - - # Classifying warning status for the model - # temperature < 15 --> 2 (Yellow) - # temperature >= 15 --> 4 (Red) - run_command( - command=f'cdo -s -aexpr,"WARNING_STATUS = {TM_MAX} < {THRESHOLD} ? 2 : 4" {max_temp_file_path} {result_unmasked_path}', - ) - - # Mask results using a CSV file with polygons. Env variable MASK_FILE must be set, or else we use the file as it is. - if mask_file_path is not None: - logging.info(f"Applying mask file {mask_file_path} to result file") - run_command( - command=f"cdo -maskregion,{mask_file_path} {result_unmasked_path} {result_path}", - ) - else: - os.rename(result_unmasked_path, result_path) + weather_data_file = weather_data_dir + today.strftime(weather_data_filename_pattern) + max_temp_in_period_file = f"{tmp_dir}{year}_max_temp_for_period.nc" + unmasked_result_file = f"{tmp_dir}{year}_unmasked_result.nc" + final_result_file = f"{tmp_dir}{year}_result.nc" - # Convert to GeoTIFF - # We only need WHS and WARNING_STATUS - # Merge the WARNING_STATUS and WHS variables into one GeoTIFF file with two bands. - # The WARNING_STATUS should always be band #1 + # Extract air_temperature_2m_max for the required period into file MAX_TEMP_FOR_PERIOD_FILE_PATH + run_command( + command=f"cdo -L seldate,{start_date},{end_date} -selname,{tm_max} {weather_data_file} {max_temp_in_period_file}" + ) - # We must delete the GeoTIFF file before merging WHY? - # run_command(command=f"rm {data_dir}result_*{file_date_str}*.tif") + # Classifying warning status for the model + # temperature < 15 --> 2 (Yellow) + # temperature >= 15 --> 4 (Red) + run_command( + command=f'cdo -s -aexpr,"WARNING_STATUS = {tm_max} < {THRESHOLD} ? 2 : 4" {max_temp_in_period_file} {unmasked_result_file}', + ) - timestep_dates.append(date_YYYY_MM_DD) + # Mask results using a CSV file with polygons. Env variable MASK_FILE must be set, or else we use the file as it is. + if mask_file_path is not None: + run_command( + command=f"cdo -maskregion,{mask_file_path} {unmasked_result_file} {final_result_file}", + ) + else: + os.rename(unmasked_result_file, final_result_file) + + # Split result file into separate files for each timestep + prefix = f"{tmp_dir}timestep_" + run_command(command=f"cdo -s splitdate {final_result_file} {prefix}") + + timestep_file_pattern = f"{prefix}*.nc" + logging.info(f"Find timestep files: {timestep_file_pattern}") + timestep_files = sorted(glob.glob(timestep_file_pattern)) + timestep_filenames = [os.path.basename(file) for file in timestep_files] + logging.info(f"Timestep files sorted: {timestep_filenames}") + timestep_dates = [ + filename.split("_")[1].split(".")[0] for filename in timestep_filenames + ] + for file in timestep_files: + filename = os.path.basename(file) + date_str = filename.split("_")[1].split(".")[0] with open("/dev/null", "w") as devnull: + warning_status_lcc = f"{tmp_dir}result_WARNING_STATUS_{date_str}_lcc.tif" + result_lcc = f"{tmp_dir}result_{date_str}_lcc.tif" + warning_status = f"{result_tif_dir}result_WARNING_STATUS_{date_str}.tif" + result = f"{result_tif_dir}result_{date_str}.tif" + + # For warning status: run_command( - command=f'gdal_translate -ot Int16 -of GTiff NETCDF:"{tmp_dir}result_{date_YYYY_MM_DD}.nc":WARNING_STATUS {tmp_dir}result_WARNING_STATUS_{date_YYYY_MM_DD}_lcc.tif', + command=f'gdal_translate -ot Int16 -of GTiff NETCDF:"{file}":WARNING_STATUS {warning_status_lcc}', stdout=devnull, ) + # For max temperature: run_command( - command=f'gdal_translate -ot Float32 -of GTiff NETCDF:"{tmp_dir}result_{date_YYYY_MM_DD}.nc":{TM_MAX} {tmp_dir}result_{date_YYYY_MM_DD}_lcc.tif', + command=f'gdal_translate -ot Float32 -of GTiff NETCDF:"{file}":{tm_max} {result_lcc}', stdout=devnull, ) # Need to reproject the files, to ensure we have the projection given in the generted mapfile. We always use EPSG:4326 for this run_command( - command=f"gdalwarp -t_srs EPSG:4326 {tmp_dir}result_WARNING_STATUS_{date_YYYY_MM_DD}_lcc.tif {data_dir}result_WARNING_STATUS_{date_YYYY_MM_DD}.tif", + command=f"gdalwarp -t_srs EPSG:4326 {warning_status_lcc} {warning_status}", stdout=devnull, ) run_command( - command=f"gdalwarp -t_srs EPSG:4326 {tmp_dir}result_{date_YYYY_MM_DD}_lcc.tif {data_dir}result_{date_YYYY_MM_DD}.tif", + command=f"gdalwarp -t_srs EPSG:4326 {result_lcc} {result}", stdout=devnull, ) @@ -271,15 +219,14 @@ if __name__ == "__main__": language[keyword] = config["i18n.%s" % language_code][keyword] languages.append(language) - # The paths should be set in a .env file env = Environment(loader=FileSystemLoader(template_dir)) template = env.get_template(template_file_name) output = template.render( { "model_id": model_id, "timestep_dates": timestep_dates, - "mapserver_data_dir": os.getenv("MAPSERVER_DATA_DIR"), - "mapserver_mapfile_dir": os.getenv("MAPSERVER_MAPFILE_DIR"), + "mapserver_data_dir": os.getenv("MAPSERVER_RESULT_TIF_DIR"), + "mapserver_mapfile_dir": os.getenv("MAPSERVER_RESULT_MAPFILE_DIR"), "mapserver_log_file": os.getenv("MAPSERVER_LOG_FILE"), "mapserver_image_path": os.getenv("MAPSERVER_IMAGE_PATH"), "mapserver_extent": os.getenv("MAPSERVER_EXTENT"), @@ -287,14 +234,11 @@ if __name__ == "__main__": "language_codes": language_codes, } ) - with open(f"{mapfile_outdir}/{model_id}.map", "w") as f: + with open(f"{result_mapfile_dir}/{model_id}.map", "w") as f: f.write(output) - - # Remove all temporary/intermediary files - # run_command(command=f"rm {tmp_dir}result*.nc") - # run_command(command=f"rm {tmp_dir}result*.tif") + remove_temporary_files() end_time = time.time() logging.info( - f"End model {model_id} - time spent: {'Execution time: {:.2f} seconds'.format(end_time - start_time)}" + f"End model {model_id} - Execution time: {'{:.2f} seconds'.format(end_time - start_time)}" ) diff --git a/run_ADASMELIAE.sh b/run_ADASMELIAE.sh index fb91d32..caa9d0c 100755 --- a/run_ADASMELIAE.sh +++ b/run_ADASMELIAE.sh @@ -16,7 +16,6 @@ # along with this program. If not, see <https://www.gnu.org/licenses/>. # Configures environment and logging before running the model -# @author: Tor-Einar Skog <tor-einar.skog@nibio.no> # First: Test that we have CDO and GDAL installed @@ -47,8 +46,9 @@ fi LOG_FILE=${HOME_DIR}log/ADASMELIAE.log REQUIREMENTS=${HOME_DIR}requirements.txt +echo "Start model. Follow log at $LOG_FILE" + # Create and activate the virtual environment -echo "Run model. Follow log at $LOG_FILE" echo "Activate virtual environment .venv" >> "$LOG_FILE" python3 -m venv .venv . .venv/bin/activate diff --git a/test_ADASMELIAE.py b/test_ADASMELIAE.py index 34fb857..fe002cb 100644 --- a/test_ADASMELIAE.py +++ b/test_ADASMELIAE.py @@ -1,67 +1,30 @@ import pytest -from datetime import datetime, timedelta -from ADASMELIAE import find_start_date, find_end_date +import logging +from ADASMELIAE import run_command -@pytest.fixture -def example_result_files(): - # Fixture for generating content of result dir - return [ - "result_2023-04-15.nc", - "result_2023-04-16.nc", - "result_2023-04-17.nc", - "result_WARNING_STATUS_2023-04-15.nc", - "result_WARNING_STATUS_2023-04-16.nc", - "result_WARNING_STATUS_2023-04-17.nc", - "result_WARNING_STATUS_2023-04-18.nc", - ] +# Define a fixture for setting up logging +@pytest.fixture(autouse=True) +def setup_logging(caplog): + caplog.set_level(logging.DEBUG) -@pytest.fixture -def today(): - # Fixture for generating date for testing - return datetime.now().date() +# Test case for successful command execution +def test_run_command_success(caplog): + result = run_command("echo 'Hello, World!'") + assert result == ["Hello, World!"] + assert "Hello, World!" in caplog.text -def test_find_start_date_with_previous_results(example_result_files, monkeypatch): - MODEL_START_DATE = datetime(2023, 3, 1) - monkeypatch.setenv("DATA_DIR", "out") +# Test case for command failure +def test_run_command_failure(caplog): + with pytest.raises(SystemExit): + run_command("nonexistent_command") - # Mock os.listdir to return the example result files - with monkeypatch.context() as m: - m.setattr("os.listdir", lambda _: example_result_files) - start_date = find_start_date(MODEL_START_DATE) + assert "nonexistent_command" in caplog.text - # Assert the expected start date - expected_start_date = datetime(2023, 4, 18) - assert start_date == expected_start_date - -def test_find_start_date_without_previous_results(monkeypatch): - MODEL_START_DATE = datetime(2023, 3, 1) - monkeypatch.setenv("DATA_DIR", "out") - - # Mock os.listdir to return the example result files - with monkeypatch.context() as m: - m.setattr("os.listdir", lambda _: []) - start_date = find_start_date(MODEL_START_DATE) - - # Assert the expected start date - assert start_date == MODEL_START_DATE - - -def test_find_end_date_when_model_end_date_is_in_past(today): - # Test when model end date is in the past - model_end_date = datetime.now().date() - timedelta(days=1) - assert find_end_date(model_end_date=model_end_date) == model_end_date - - -def test_find_end_date_when_model_end_date_is_in_future(today): - # Test when model end date is in the future - model_end_date = datetime.now().date() + timedelta(days=1) - assert find_end_date(model_end_date=model_end_date) == today - - -def test_find_end_date_equal(today): - # Test when model end date is today - assert find_end_date(model_end_date=today) == today +# Test case for handling stderr +def test_run_command_stderr(caplog): + run_command("echo 'This is an error' >&2") + assert "This is an error" in caplog.text -- GitLab