feat: First version of ADASMELIAE, with pipeline config

66ac9d8d · Lene Wasskog · 4cd15b4e · 66ac9d8d · 66ac9d8d · 66ac9d8d
Commit 66ac9d8d authored 1 year ago by Lene Wasskog
--- a/ADASMELIAE.cfg
+++ b/ADASMELIAE.cfg
@@ -3,15 +3,11 @@
 languages=en,nb 

 [i18n.en]
-no_risk = No infection risk
 low_risk = Low infection risk
-medium_risk = Medium infection risk
 high_risk = High infection risk
-whs = Wet Hour Sum (0-72)
+temperature = Temperature

 [i18n.nb]
-no_risk = Ingen infeksjonsrisiko
 low_risk = Lav infeksjonsrisiko
-medium_risk = Middels infeksjonsrisiko
 high_risk = Høy infeksjonsrisiko
-whs = Sum fuktige timer (0-72)
\ No newline at end of file
+temperature = Temperatur
\ No newline at end of file
--- a/ADASMELIAE.py
+++ b/ADASMELIAE.py
@@ -24,7 +24,7 @@


 import os
-import sys
+import time
 import subprocess
 import glob
 from dotenv import load_dotenv
@@ -35,231 +35,243 @@ import pytz
 import netCDF4 as nc
 import configparser

-# Paths config
-# Create a .env file from dotenv-sample
 load_dotenv()
+
+DEBUG = (
+    False
+    if os.getenv("DEBUG") is None or os.getenv("DEBUG").lower() == "false"
+    else True
+)
+
+model_id = os.getenv("MODEL_ID")
+filename_pattern = os.getenv("WEATHER_DATA_FILENAME_PATTERN")
+filename_dateformat = os.getenv("WEATHER_DATA_FILENAME_DATEFORMAT")
+
+# TODO Should start before the first European crops reach growth stage 51 and end when the northernmost regions have passed stage 59
+current_year = datetime.now().year
+local_timezone = pytz.timezone(os.getenv("LOCAL_TIMEZONE"))
+
+MODEL_START_DATE = datetime.strptime(
+    f"{current_year}-{os.getenv('START_DATE')}", "%Y-%m-%d"
+)
+MODEL_END_DATE = datetime.strptime(
+    f"{current_year}-{os.getenv('END_DATE')}", "%Y-%m-%d"
+)
+
+weather_data_dir = os.getenv("WEATHER_DATA_DIR")
+tmp_dir = os.getenv("TMP_DIR")
+data_dir = os.getenv("DATA_DIR")
+
 # Get language stuff
 config = configparser.ConfigParser()
 config.read("ADASMELIAE.cfg")

-DEBUG = False if os.getenv("DEBUG") is None or os.getenv("DEBUG").lower() == "false" else True
-
 logging.basicConfig(
    level=logging.DEBUG if DEBUG else logging.INFO,
    format="%(asctime)s - %(levelname).4s - (%(filename)s:%(lineno)d) - %(message)s",
 )

-# Path to weather data
-model_id = os.getenv("MODEL_ID")
-infile_path = os.getenv("WEATHER_DATA_DIR")
-# Used for iterating the weather data files
-filename_pattern = os.getenv("FILENAME_PATTERN")
-# Date format of weather data filenames
-filename_dateformat = os.getenv("FILENAME_DATEFORMAT")
-# Path to store generated GeoTIFF files
-outfile_path = os.getenv("DATA_DIR")
-# Where to store intermediary calculations
-tmpfile_path = "tmp/"
-# Names of weather parameters in NetCDF files
-RR = os.getenv("RR")
-UM = os.getenv("UM")
-local_timezone = pytz.timezone(os.getenv("LOCAL_TIMEZONE"))
+TM = "air_temperature_2m"
+TM_MAX = "TM_MAX"
+THRESHOLD = 15
+

-# Iterate the set of hourly weather data files
-# 1. When's the latest wh_[DATE].nc file? - set earliest weather data file: start_date = [DATE]-2 days
-# --> If no wh_DATE.nc file exists, set start_date = None
-start_date = None
-last_wh_date = None
-
-logging.info("Start running model")
-logging.info(f"Read files from path pattern: {tmpfile_path}wh_2[0-9][0-9][0-9]-[01][0-9]-[0123][0-9].nc")
-for wh_file in glob.glob(f"{tmpfile_path}wh_2[0-9][0-9][0-9]-[01][0-9]-[0123][0-9].nc"):
-    current_wh_file_date = local_timezone.localize(datetime.strptime(f"{wh_file[7:17]}", "%Y-%m-%d"))
-    if last_wh_date is None or last_wh_date < current_wh_file_date:
-        last_wh_date = current_wh_file_date
-if last_wh_date is not None:
-    start_date = last_wh_date - timedelta(days=2)
-    logging.info(f"Last date of WH calculations is {last_wh_date}. Start date = {start_date}")
-
-
-weatherdata_files = glob.glob(f"{infile_path}{filename_pattern}")
-logging.debug(f"{infile_path}{filename_pattern}")
-logging.debug("What are the weatherdata files?")
-logging.debug(weatherdata_files)
-for file_path in sorted(weatherdata_files):
-    # TODO: When filename/pattern is configurable: make the string search adaptable
-    file_name = os.path.basename(file_path)
-    # Skip if we don't have a complete date, which could indicate that we are looking at a yearly file of daily data
+# Common method for running commands using subprocess.run. Handles logging.
+def run_command(command, shell=True, stdout=subprocess.PIPE):
+    logging.debug(f"{command}")
    try:
-        wh_sum_date = local_timezone.localize(datetime.strptime(file_name, filename_dateformat))
+        result = subprocess.run(
+            command,
+            shell=shell,
+            stdout=stdout,
+            stderr=subprocess.PIPE,
+            text=True,
+            check=True,
+        )
+        if result.stdout:
+            result_lines = result.stdout.splitlines()
+            for line in result_lines:
+                logging.debug(line.strip())
+            return result_lines
+        if result.stderr:
+            for line in result.stderr.splitlines():
+                logging.error(line.strip())
+    except subprocess.CalledProcessError as e:
+        logging.error(f"Command failed: '{command}'")
+        logging.error(f"{e}")
+        quit()
+
+
+# Iterate the set of previously calculated result files. Find latest result date,
+# and return result date + 1 day. If no files exist, return MODEL_START_DATE.
+def find_start_date(default_date):
+    result_file_names = os.listdir(data_dir)
+    result_file_names = [
+        file for file in result_file_names if file.startswith("result_2")
+    ]
+    try:
+        dates = [
+            datetime.strptime(file.split("_")[1].split(".")[0], "%Y-%m-%d")
+            for file in result_file_names
+        ]
+        logging.debug(f"Found results for the following dates: {dates}")
    except ValueError as e:
-        logging.info(e)
-        continue
-
-    # Only process files from the three last days (if this is not a work from scratch)
-    if start_date is not None and wh_sum_date < start_date:
-        continue
-
-    # Check that the file has at least 23 timesteps
-    with nc.Dataset(file_path, 'r') as weatherdata_file:
-        file_timesteps = len(weatherdata_file.variables["time"])
-        if file_timesteps < 23:
-            logging.info(f"{file_path} has {file_timesteps} timesteps. Skipping it.")
+        logging.error(f"Error parsing dates: {e}")
+        return MODEL_START_DATE
+    latest_result_date = max(dates, default=None)
+    return (
+        latest_result_date + timedelta(days=1) if latest_result_date else default_date
+    )
+
+
+def find_end_date(default_date):
+    today = datetime.now()
+    return today if today < default_date else default_date
+
+
+if __name__ == "__main__":
+    logging.info(f"Start model {model_id}")
+    start_time = time.time()
+
+    start_date = find_start_date(MODEL_START_DATE)
+    end_date = find_end_date(MODEL_END_DATE)
+    logging.info(
+        f"Start running model for start date {start_date.date()} and end date {end_date.date()}"
+    )
+
+    weather_data_file_pattern = f"{weather_data_dir}{filename_pattern}"
+    weather_data_files = glob.glob(weather_data_file_pattern)
+    logging.debug(f"Find weather data files: {weather_data_file_pattern}")
+    logging.debug(weather_data_files)
+
+    timestep_dates = []
+    for weather_data_file_path in sorted(weather_data_files):
+        weather_data_file_name = os.path.basename(weather_data_file_path)
+
+        # Skip if we don't have a valid date
+        try:
+            file_date = datetime.strptime(weather_data_file_name, filename_dateformat)
+        except ValueError as e:
+            logging.info(f"{weather_data_file_name} - Skip file due to invalid date")
+            logging.debug(e)
            continue

-    # Produce daily files with WH_SUM, which is the number of "Wet hours" (WH) for a given day
-    # WH is defined as RR > 0.2 || UM > 88.0
-    wh_sum_date_str = wh_sum_date.strftime("%Y-%m-%d")
-    wh_sum_hour_str = wh_sum_date.strftime("%H") 
-    subprocess.run(
-        f'cdo -s -O -setdate,{wh_sum_date_str} -settime,{wh_sum_hour_str}:00:00 -chname,WH,WH_DAYSUM -timsum -selname,WH -aexpr,"WH = {RR} > 0.2 || {UM} > 88.0 ? 1 : 0;" {file_path} {tmpfile_path}wh_{wh_sum_date_str}.nc',
-        shell=True
-        )
+        # Set up file names based on date YYYY-MM-DD
+        date_YYYY_MM_DD = file_date.strftime("%Y-%m-%d")
+        max_temp_file_path = f"{tmp_dir}max_temp_{date_YYYY_MM_DD}.nc"
+        result_unmasked_path = f"{tmp_dir}result_unmasked_{date_YYYY_MM_DD}.nc"
+        result_path = f"{tmp_dir}result_{date_YYYY_MM_DD}.nc"
+
+        # Only process files from within valid interval
+        if file_date < start_date or file_date > end_date:
+            logging.debug(
+                f"{weather_data_file_name} - Skip file with date outside calculation period"
+            )
+            continue

-# Concatenate daily files > one file with daily values
-# Additive calculation - this file can be stored between calculations
-subprocess.run(f'cdo -s -O mergetime {tmpfile_path}wh_2[0-9][0-9][0-9]-[01][0-9]-[0123][0-9].nc {tmpfile_path}wh_daysum.nc', shell=True)
-
-# Check timesteps - that we have no missing data
-wh_daysum = nc.Dataset(f'{tmpfile_path}wh_daysum.nc', 'r')
-timesteps = wh_daysum.variables["time"][:]
-previous_timestep = None
-for timestep in timesteps:
-    if previous_timestep is not None:
-        if timestep - previous_timestep != 86400.0:
-            timestep_str = datetime.fromtimestamp(timestep).astimezone(local_timezone).strftime("%Y-%m-%d")
-            previous_timestep_str = datetime.fromtimestamp(previous_timestep).astimezone(local_timezone).strftime("%Y-%m-%d")
-            logging.error(f"Missing weather data between {previous_timestep_str} and {timestep_str}. Exiting.", file=sys.stderr)
-            exit(1)
-    previous_timestep = timestep
-wh_daysum.close()
-
-# From here, no additive calculation; we calculate from the beginning of the w_daysum.nc file every time. 
-
-# Add sum of WH_DAYSUM[yesterday] + WH_DAYSUM[today] + WH_DAYSUM[tomorrow] into WHS[today]
-# timselsum skips every 3 steps when summing 3 timestemps, so we must
-# create three different files and then merge them
-subprocess.run(f'cdo -s timselsum,3,0 {tmpfile_path}wh_daysum.nc {tmpfile_path}wh_3daysum_tmp_0.nc', shell=True)
-subprocess.run(f'cdo -s timselsum,3,1 {tmpfile_path}wh_daysum.nc {tmpfile_path}wh_3daysum_tmp_1.nc', shell=True)
-subprocess.run(f'cdo -s timselsum,3,2 {tmpfile_path}wh_daysum.nc {tmpfile_path}wh_3daysum_tmp_2.nc', shell=True)
-
-subprocess.run(f'cdo -s -chname,WH_DAYSUM,WHS -mergetime {tmpfile_path}wh_3daysum_tmp_*.nc {tmpfile_path}wh_3daysum_tmp_merged.nc', shell=True)
-
-# the last timesteps are most likely wrong, due to lack of "tomorrows" when performing timselsum
-# To remove the last ones:
-#
-# The variable time_bnds (which has two dimensions: time and bnds. bnds is size 2) contains the 
-# time (in seconds) between the first and last timesteps used for summing up. e.g like this:
-# $ ncdump -v time_bnds tmp/wh_3daysum.nc
-# time_bnds =
-# 1696111200, 1696284000,
-# 1696197600, 1696370400,
-# 1696284000, 1696456800,
-# 1696370400, 1696543200,
-# 1696456800, 1696629600,
-# 1696543200, 1696716000,
-# 1696629600, 1696716000,
-# 1696716000, 1696716000;
-#}
-# The difference [1] - [0] should be 172800 seconds = 48 hours
-# Timesteps with [1] - [0] != 172800 should be discarded
-# Using netCDF4 to accomplish this
-
-wh_3daysum = nc.Dataset(f'{tmpfile_path}wh_3daysum_tmp_merged.nc', 'r')
-time_bnds = wh_3daysum.variables["time_bnds"][:]
-# Assuming that wrong time bounds only exist at the end of the time series, this works
-number_of_timesteps_to_remove = 0
-for time_bnd in time_bnds:
-    if time_bnd[1]-time_bnd[0] != 172800.0:
-        number_of_timesteps_to_remove = number_of_timesteps_to_remove + 1
-wh_3daysum.close()
-number_of_timesteps_to_keep = len(time_bnds) - number_of_timesteps_to_remove
-subprocess.run(f'cdo -s -seltimestep,1/{number_of_timesteps_to_keep} {tmpfile_path}wh_3daysum_tmp_merged.nc {tmpfile_path}wh_3daysum.nc', shell=True)
-
-
-# Classifying warning status for the WHS model
-# 0  == WHS            --> Grey
-# 0  <  WHS <  20      --> Yellow
-# 20 <= WHS < 40       --> Orange
-# 40 <= WHS            --> Red
-subprocess.run(f'cdo -s -aexpr,"WARNING_STATUS = WHS <= 0 ? 0 : -1; WARNING_STATUS = WHS < 20 && WARNING_STATUS == -1 ? 2 : WARNING_STATUS; WARNING_STATUS = WHS < 40 && WARNING_STATUS == -1 ? 3 : WARNING_STATUS; WARNING_STATUS = WHS >= 40 && WARNING_STATUS == -1 ? 4 : WARNING_STATUS" {tmpfile_path}wh_3daysum.nc {tmpfile_path}result_unmasked.nc', shell=True)
-
-# Mask results using a CSV file with polygons
-# Env variable MASK_FILE must be set
-if os.getenv("MASK_FILE") is not None:
-    mask_file = os.getenv("MASK_FILE")
-    logging.info(f"Applying mask file {mask_file} to result.nc")
-    subprocess.run(f'cdo -maskregion,{mask_file} {tmpfile_path}result_unmasked.nc {tmpfile_path}result.nc', shell=True)
-else:
-    os.rename(f"{tmpfile_path}result_unmasked.nc", f"{tmpfile_path}result.nc")
-#"""
-
-# Split the combined file into daily .nc files again, with YYYY-MM-DD in the filename. Convert to corresponding GeoTIFF files
-# Variables that needs discrete classification, must be integers in order for mapserver to work properly (Don't ask!)
-# Since we need WARNING_STATUS to discretely classified, we need to create a separate GeoTIFF file for it
-wh_3daysum = nc.Dataset(f'{tmpfile_path}wh_3daysum.nc', 'r')
-timesteps = wh_3daysum.variables["time"][:]
-timestep_index = 1
-timestep_dates = [] # Used in the mapfile template
-# TODO: We are (?) assuming all timesteps are equal. This might NOT be the case if there are holes in the result data
-for timestep in timesteps:
-    timestep_date = datetime.fromtimestamp(timestep).astimezone(local_timezone)
-    file_date = timestep_date.astimezone(local_timezone).strftime("%Y-%m-%d")
-    timestep_dates.append(file_date)
-    # If start_date is set, that means that we should only generate result files from that date on 
-    # -- IF GeoTIFF fiels already exists before that.
-    if start_date is not None and timestep_date < start_date:
-        if os.path.isfile(f'{outfile_path}result_WARNING_STATUS_{file_date}.tif') and os.path.isfile(f'{outfile_path}result_{file_date}.tif'):
+        # Check that the file has at least 23 timesteps
+        with nc.Dataset(weather_data_file_path, "r") as weatherdata_file:
+            timestep_count = len(weatherdata_file.variables["time"])
+            if timestep_count < 23:
+                logging.info(
+                    f"{weather_data_file_name} - Skip file with {timestep_count} timesteps"
+                )
+                continue
+
+        # Skip if result files for date already exist
+        if os.path.isfile(
+            f"{data_dir}result_WARNING_STATUS_{date_YYYY_MM_DD}.tif"
+        ) and os.path.isfile(f"{data_dir}result_{date_YYYY_MM_DD}.tif"):
+            logging.info(f"Result files for {date_YYYY_MM_DD} already exist, skip")
            continue

-    # Create NetCDF result file
-    subprocess.run(f'cdo -s -seltimestep,{timestep_index}/{timestep_index} {tmpfile_path}result.nc {tmpfile_path}result_{file_date}.nc', shell=True)
-    # Convert to GeoTIFF
-    # We only need WHS and WARNING_STATUS
-    # Merge the WARNING_STATUS and WHS variables into one GeoTIFF file with two bands. 
-    # The WARNING_STATUS should always be band #1
-    # We must delete the GeoTIFF file before merging
-    subprocess.run(f'rm {outfile_path}result_*{file_date}*.tif', shell=True)
-    with open("/dev/null", "w") as devnull:
-        subprocess.run(f'gdal_translate -ot Int16 -of GTiff  NETCDF:"{tmpfile_path}result_{file_date}.nc":WARNING_STATUS {tmpfile_path}result_WARNING_STATUS_{file_date}_lcc.tif', shell=True, stdout=devnull)
-        subprocess.run(f'gdal_translate -ot Float32 -of GTiff  NETCDF:"{tmpfile_path}result_{file_date}.nc":WHS {tmpfile_path}result_{file_date}_lcc.tif', shell=True, stdout=devnull)
-        # Need to reproject the files, to ensure we have the projection given in the generted mapfile. We always use EPSG:4326 for this
-        subprocess.run(f'gdalwarp -t_srs EPSG:4326 {tmpfile_path}result_WARNING_STATUS_{file_date}_lcc.tif {outfile_path}result_WARNING_STATUS_{file_date}.tif', shell=True, stdout=devnull)
-        subprocess.run(f'gdalwarp -t_srs EPSG:4326 {tmpfile_path}result_{file_date}_lcc.tif {outfile_path}result_{file_date}.tif', shell=True, stdout=devnull)
-
-    timestep_index = timestep_index + 1
-
-# Generate mapfile
-# Building data sets for language specific legends
-languages = []
-language_codes = config["i18n"]["languages"].split(",");
-for language_code in language_codes:
-    language = {"language_code": language_code}
-    if ("i18n.%s" % language_code) in config:
-        for keyword in config["i18n.%s" % language_code]:
-            language[keyword] = config["i18n.%s" % language_code][keyword]
-        languages.append(language)
-
-# The paths should be set in a .env file
-env = Environment(loader=FileSystemLoader('.'))
-template = env.get_template("mapfile/template.j2")
-output = template.render({
-     "model_id":model_id,
-     "timestep_dates": timestep_dates,
-     "mapserver_data_dir": os.getenv("MAPSERVER_DATA_DIR"),
-     "mapserver_mapfile_dir": os.getenv("MAPSERVER_MAPFILE_DIR"),
-     "mapserver_log_file": os.getenv("MAPSERVER_LOG_FILE"),
-     "mapserver_image_path": os.getenv("MAPSERVER_IMAGE_PATH"),
-     "mapserver_extent": os.getenv("MAPSERVER_EXTENT"),
-     "languages": languages,
-     "language_codes": language_codes
-})
-mapfile_outdir = os.getenv("MAPFILE_DIR")
-with open(f"{mapfile_outdir}/{model_id}.map", 'w') as f:
-    f.write(output)
-
-
-# Remove all temporary/intermediary files
-subprocess.run(f"rm {tmpfile_path}wh_3daysum*.nc", shell=True)
-subprocess.run(f"rm {tmpfile_path}result*.nc", shell=True)
-subprocess.run(f"rm {tmpfile_path}result*.tif", shell=True)
+        # Produce daily files with MAX_TEMP, which is the highest recorded temperature within the given day
+        run_command(
+            command=f"cdo -s -O -L -settime,00:00:00 -setdate,{date_YYYY_MM_DD} -chname,{TM},{TM_MAX} -timmax -selvar,{TM} {weather_data_file_path} {max_temp_file_path}"
+        )
+
+        # Classifying warning status for the model
+        # temperature < 15     --> 2 (Yellow)
+        # temperature >= 15    --> 4 (Red)
+        run_command(
+            command=f'cdo -s -aexpr,"WARNING_STATUS = {TM_MAX} < {THRESHOLD} ? 2 : 4" {max_temp_file_path} {result_unmasked_path}',
+        )
+
+        # Mask results using a CSV file with polygons. Env variable MASK_FILE must be set, or else we use the file as it is.
+        if os.getenv("MASK_FILE") is not None:
+            mask_file = os.getenv("MASK_FILE")
+            logging.info(f"Applying mask file {mask_file} to result file")
+            run_command(
+                command=f"cdo -maskregion,{mask_file} {result_unmasked_path} {result_path}",
+            )
+        else:
+            os.rename(result_unmasked_path, result_path)
+
+        # Convert to GeoTIFF
+        # We only need WHS and WARNING_STATUS
+        # Merge the WARNING_STATUS and WHS variables into one GeoTIFF file with two bands.
+        # The WARNING_STATUS should always be band #1
+
+        # We must delete the GeoTIFF file before merging WHY?
+        # run_command(command=f"rm {data_dir}result_*{file_date_str}*.tif")
+
+        timestep_dates.append(date_YYYY_MM_DD)
+
+        with open("/dev/null", "w") as devnull:
+            run_command(
+                command=f'gdal_translate -ot Int16 -of GTiff  NETCDF:"{tmp_dir}result_{date_YYYY_MM_DD}.nc":WARNING_STATUS {tmp_dir}result_WARNING_STATUS_{date_YYYY_MM_DD}_lcc.tif',
+                stdout=devnull,
+            )
+            run_command(
+                command=f'gdal_translate -ot Float32 -of GTiff  NETCDF:"{tmp_dir}result_{date_YYYY_MM_DD}.nc":{TM_MAX} {tmp_dir}result_{date_YYYY_MM_DD}_lcc.tif',
+                stdout=devnull,
+            )
+            # Need to reproject the files, to ensure we have the projection given in the generted mapfile. We always use EPSG:4326 for this
+            run_command(
+                command=f"gdalwarp -t_srs EPSG:4326 {tmp_dir}result_WARNING_STATUS_{date_YYYY_MM_DD}_lcc.tif {data_dir}result_WARNING_STATUS_{date_YYYY_MM_DD}.tif",
+                stdout=devnull,
+            )
+            run_command(
+                command=f"gdalwarp -t_srs EPSG:4326 {tmp_dir}result_{date_YYYY_MM_DD}_lcc.tif {data_dir}result_{date_YYYY_MM_DD}.tif",
+                stdout=devnull,
+            )
+
+    # Generate mapfile
+    # Building data sets for language specific legends
+    languages = []
+    language_codes = config["i18n"]["languages"].split(",")
+    for language_code in language_codes:
+        language = {"language_code": language_code}
+        if ("i18n.%s" % language_code) in config:
+            for keyword in config["i18n.%s" % language_code]:
+                language[keyword] = config["i18n.%s" % language_code][keyword]
+            languages.append(language)
+
+    # The paths should be set in a .env file
+    env = Environment(loader=FileSystemLoader("."))
+    template = env.get_template("mapfile/template.j2")
+    output = template.render(
+        {
+            "model_id": model_id,
+            "timestep_dates": timestep_dates,
+            "mapserver_data_dir": os.getenv("MAPSERVER_DATA_DIR"),
+            "mapserver_mapfile_dir": os.getenv("MAPSERVER_MAPFILE_DIR"),
+            "mapserver_log_file": os.getenv("MAPSERVER_LOG_FILE"),
+            "mapserver_image_path": os.getenv("MAPSERVER_IMAGE_PATH"),
+            "mapserver_extent": os.getenv("MAPSERVER_EXTENT"),
+            "languages": languages,
+            "language_codes": language_codes,
+        }
+    )
+    mapfile_outdir = os.getenv("MAPFILE_DIR")
+    with open(f"{mapfile_outdir}/{model_id}.map", "w") as f:
+        f.write(output)
+
+    # Remove all temporary/intermediary files
+    # run_command(command=f"rm {tmp_dir}result*.nc")
+    # run_command(command=f"rm {tmp_dir}result*.tif")
+
+    end_time = time.time()
+    logging.info(
+        f"End model {model_id} - time spent: {'Execution time: {:.2f} seconds'.format(end_time - start_time)}"
+    )
--- a/README.md
+++ b/README.md
@@ -6,18 +6,21 @@ This model is based on the work of Ferguson et al., (2015) to predict migration
 Input data for opprinnelig modell:

 DSSInput_Parameters
+
 - weatherData weatherData (required)
 - int growthStage (required)
 - optionalData optionalData

 weatherData
- string timeStart 
- string timeEnd 
- int Interval 
- List<int> weatherParameters 
+
+- string timeStart
+- string timeEnd
+- int Interval
+- List<int> weatherParameters
 - List<locationWeatherData> LocationWeatherData z

 locationWeatherData
+
 - double longitude
 - double latitude
 - double altitude
@@ -28,36 +31,39 @@ locationWeatherData
 - int length

 optionalData
+
 - double temp_threshold
- DateTime? startDate 
+- DateTime? startDate
 - DateTime? endDate

 Se MELIAEController.cs for oppstart av applikasjonen
 Se MELIAE_DSS.cs for kjøring av modellen

-
-
-
-
 ## Technical description
+
 The model has been implemented by [Tor-Einar Skog](https://nibio.no/en/employees/tor-einar-skog), [NIBIO](https://nibio.no/en). It is designed to fit into the gridded pest prediction models of [VIPS](https://nibio.no/en/services/vips).

 ### Software requirements
+
 The model can only be run on Linux, as some of the tools mentioned below are only available on Linux. The development and testing of the model has been done using [Ubuntu Linux 22.04LTS](https://ubuntu.com/).

 #### CDO and GDAL
+
 The heavy lifting in this model is done by the tools [CDO](https://code.mpimet.mpg.de/projects/cdo) and [GDAL](https://gdal.org/). These tools need to be installed and available. CDO is only available on Linux.

 #### Python requirements
+
 The Python requirements are specified in `requirements.txt` file, and are included in the virtualenv created by the `run_ADASMELIAE.sh` (see below).

 ### Input data requirements
+
 The model (as per 2023-10-25) assumes that weather data files named `met_1_0km_nordic-[YYYY-MM-DD].nc` are available in the `in/` folder. The files must contain hourly timesteps with the following weather parameters:

-* RR (Rainfall in mm)
-* UM (Relative humidity in %)
+- RR (Rainfall in mm)
+- UM (Relative humidity in %)

 ### Running the model
+
 It is required that you have set the following environment variables:

 ```bash
@@ -68,9 +74,9 @@ HOME_DIR=/home/foo/2023_vips_in_space/
 # Path to the weather data
 WEATHER_DATA_DIR=in/
 # Used for iterating the weather data files
-FILENAME_PATTERN="met_1_0km_nordic-*.nc"
+WEATHER_DATA_FILENAME_PATTERN="met_1_0km_nordic-*.nc"
 # Used to extract date info from the filename
-FILENAME_DATEFORMAT="met_1_0km_nordic-%Y-%m-%d.nc"
+WEATHER_DATA_FILENAME_DATEFORMAT="met_1_0km_nordic-%Y-%m-%d.nc"
 # Names of weather parameters in NetCDF files
 # Hourly precipitation
 RR="RR"
@@ -78,7 +84,7 @@ RR="RR"
 UM="UM"
 # Timezone for weather data/daily aggregations
 LOCAL_TIMEZONE="Europe/Oslo"
-# Path to optional CSV file with polygons for masking result. 
+# Path to optional CSV file with polygons for masking result.
 MASK_FILE=Norge_landomrader.csv
 # Path to the output (GeoTIFF) files as seen from the running model code
 DATA_DIR=out/
@@ -101,9 +107,11 @@ MAPSERVER_EXTENT="-1.5831861262936526 52.4465003983706595 39.2608060398730458 71
 ```bash
 $ ./run_ADASMELIAE.sh
 ```
-This creates a Python virtualenv, installs all the Python dependencies, runs the model and stores output in a log file. 
+
+This creates a Python virtualenv, installs all the Python dependencies, runs the model and stores output in a log file.

 Alternatively, primarily for development purposes, you can run the Python script ADASMELIAE directly:
+
 ```bash
 $ ./ADASMELIAE.py
 ```
@@ -114,28 +122,23 @@ All intermediary files are stored in the `tmp/` folder, and they are all deleted

 The model outputs GeoTIFF files, two per day in the season/period of calculation:

-* `result_WARNING_STATUS_[YYYY-MM-DD].tif`, wich indicates infection risk of Septoria. 
-  * 0 = No infection risk (grey)
-  * 2 = Low infection risk (yellow)
-  * 3 = Medium infection risk (orange)
-  * 4 = High risk (red)
-* `result_[YYYY-MM-DD].tif`, which contains the wet hour sum (WHS) - which is the sum of wet hours for "yesterday", "today" and "tomorrow", relative to the current file date.
+- `result_WARNING_STATUS_[YYYY-MM-DD].tif`, wich indicates infection risk of Septoria.
+  - 0 = No infection risk (grey)
+  - 2 = Low infection risk (yellow)
+  - 3 = Medium infection risk (orange)
+  - 4 = High risk (red)
+- `result_[YYYY-MM-DD].tif`, which contains the wet hour sum (WHS) - which is the sum of wet hours for "yesterday", "today" and "tomorrow", relative to the current file date.

-A [Jinja2](https://pypi.org/project/Jinja2/) template mapfile (for [Mapserver](https://mapserver.org/)) with separate layers (WARNING_STATUS and WHS) for each date is found in the `mapfile/` folder. 
+A [Jinja2](https://pypi.org/project/Jinja2/) template mapfile (for [Mapserver](https://mapserver.org/)) with separate layers (WARNING_STATUS and WHS) for each date is found in the `mapfile/` folder.

 Examples of the two layers are shown below

 ![WARNING_STATUS example. Showing Norway and surrounding area](./WARNING_STATUS_layer_example.png)

-*WARNING_STATUS example. Showing Norway*
+_WARNING_STATUS example. Showing Norway_

 ![WHS (Wet Hour Sum) example. Showing Norway and surrounding area](./WHS_layer_example.png)

-*WHS (Wet Hour Sum) example. Showing Norway*
+_WHS (Wet Hour Sum) example. Showing Norway_

 ## Notes to self
-
-
-
-
-
--- a/__pycache__/ADASMELIAE.cpython-311.pyc
+++ b/__pycache__/ADASMELIAE.cpython-311.pyc
--- a/__pycache__/test_ADASMELIAE.cpython-311-pytest-7.4.0.pyc
+++ b/__pycache__/test_ADASMELIAE.cpython-311-pytest-7.4.0.pyc
--- a/__pycache__/test_ADASMELIAE.cpython-311-pytest-8.1.1.pyc
+++ b/__pycache__/test_ADASMELIAE.cpython-311-pytest-8.1.1.pyc
--- a/env-sample
+++ b/env-sample
@@ -7,9 +7,9 @@ HOME_DIR=/home/foo/2023_vips_in_space/ADASMELIAE/
 # Path to the weather data
 WEATHER_DATA_DIR=in/
 # Used for iterating the weather data files
-FILENAME_PATTERN="met_1_0km_nordic-*.nc"
+WEATHER_DATA_FILENAME_PATTERN="met_1_0km_nordic-*.nc"
 # Used to extract date info from the filename
-FILENAME_DATEFORMAT="met_1_0km_nordic-%Y-%m-%d.nc"
+WEATHER_DATA_FILENAME_DATEFORMAT="met_1_0km_nordic-%Y-%m-%d.nc"
 # Names of weather parameters in NetCDF files
 # Hourly precipitation
 RR="RR"

--- a/mapfile/query_template.xml
+++ b/mapfile/query_template.xml
 <!--mapserver template-->
 <?xml version="1.0" encoding="UTF-8"?>
 <vipsResult>
-    <modelName value="Referansefuktmodell"/>
+    <modelName value="Pollen beetle migration model"/>
    <modelId value="ADASMELIAE"/>
    <warningStatus value="[value_0]"/>
 </vipsResult>
\ No newline at end of file
--- a/mapfile/query_template_WHS.xml
+++ b/mapfile/query_template_WHS.xml
@@ -3,5 +3,5 @@
 <vipsResult>
    <modelName value="Pollen beetle migration model"/>
    <modelId value="ADASMELIAE"/>
-    <parameter name="WHS" value="[value_0]"/>
+    <parameter name="air_temperature_2m" value="[value_0]"/>
 </vipsResult>
\ No newline at end of file
--- a/mapfile/template.j2
+++ b/mapfile/template.j2
@@ -35,54 +35,44 @@ WEB
    # List of standard metadata: https://mapserver.org/ogc/wms_server.html#web-object-metadata
    # i18n support: https://mapserver.org/ogc/inspire.html#inspire-multi-language-support  
    METADATA
-            "wms_keywordlist" "VIPS model Septoria Reference Humidity Model (ADASMELIAE)"
+            "wms_keywordlist" "Pollen Beetle Migration Model (ADASMELIAE)"
            {% if languages %}
            "wms_inspire_capabilities" "embed"
            "wms_languages" "{{ language_codes|join(",")}}" # The first is the default
            {% endif %}
            "wms_abstract"  "<div id='preamble'>
-              <p>The reference humidity model was developed as a supplement to
-              the Humidity model. In this model 20 consecutive hours are
-              required to fulfil a risk period. One constraint in this
-              method is that you can have 19 consecutive risk hours  or,
-              fx 14 hours with risk then one hour below the Rh threshold
-              and then maybe 14 hours again with risk hours. In one of
-              these situations, the model will indicate a risk. In the
-              reference model the definition of Humid hours was introduced.
-              The Rh threshold was avoided as humid hours do not need to be
-              consecutive. The running sum of humid hours across three days
-              indicate that the Septoria risk is higher than if you have
-              three days with humid conditions than two or one. The operation
-              of the model should include weather forecast data and should
-              run 6 days ahead from current day if you include a 7-day weather
-              forecast (60 hours from national met office and until 7 days from ECMWF)</p>
+              <p>Pollen beetle (Meligethes spp.) adults are approximately 2.5 mm,
+      metallic greenish-black. Females bite oilseed rape buds and lay their eggs
+      inside. Adults and larvae attack buds and flowers, resulting in withered
+      buds and reduced pod set. In oilseed rape, adult and larval feeding can
+      lead to bud abortion and reduced pod set. However, damage rarely results
+      in reduced yields for winter crops. Spring crops are more vulnerable, as
+      the susceptible green/yellow bud stage often coincides with beetle
+      migration. </p>
            </div>
            <div id='body'>
              <p>
-              The model was tested against the Humidity model in a Danish
-              Septoria project funded by the GUDP. The threshold of 40 was
-              defined as high risk as this coincided with periods when the
-              humidity model recommended to apply fungicide (if not already protected).
-              The humidity model includes a decision model about when to spray,
-              protection periods ect. The reference model was used to quality
-              control the recommendations in a way that, if the reference humidity
-              hours were higher than 40 (no thresholds) then the user should
-              check the raw data for calculation of the Humidity model (threshold,
-              20 consecutive hours). If 2-3 periods of 17, 18, or 19 consecutive
-              hours appear, then one can consider to protect the crop based on the
-              reference model alone.</p>
-              <p>The Humidity model is considered as a DSS with several components.
-              The reference humidity model is considered as a weather based submodel
-              for the risk of Septoria, Easy to map and calculate based on weather data alone.</p>
+              Oilseed rape is only vulnerable if large numbers of pollen
+      beetle migrate into the crop during green bud stage. This DSS predicts
+      migration into crops based on air temperature, and so can be used to
+      evaluate risk to crop. Daily maximum air temperature is used to predict Migration
+      Risk. The default value of 15 degrees celsius is used, as that is the
+      temperature advised in the UK at which pollen beetles will fly.   
+            </p>
+              <p>This DSS was adapted from work carried out in the UK, and is
+      considered applicable, but not yet validated in, Belgium, Luxembourg,
+      Netherlands, France, Germany, Rep. Ireland, and Denmark. Only to be used during Oilseed rape growth stages 51-59. This
+      model is a simplification of a more detailed model described in Ferguson et al. (2015) Pest Management Science 72, 609-317.
+      <a href="https://doi.org/10.1002/ps.4069">https://doi.org/10.1002/ps.4069</a></p>
              
              <h3>Explanation of parameters</h3>
              <ul>
-              <li>WHS = <span itemprop=\"WHS\">Wet hour sum</span></li>
+              <li>TM_MAX = <span itemprop=\"TM_MAX\">Maximum Air Temperature at 2m</span></li>
              </ul>
            </div>
            "
        "wms_enable_request"  "*"
-        "wms_title"     "Septoria Reference Humidity Model"
+        "wms_title"     "Pollen Beetle Migration Model"
        "wms_getfeatureinfo_formatlist" "text/plain,text/html,text/xml"
        "wms_accessconstraints" "none"
        "wms_addresstype" ""
@@ -124,27 +114,17 @@ LAYER
    
    STATUS ON
    METADATA
-      "wms_title"     "Reference humidity model {{ timestep_date }}"  
+      "wms_title"     "Pollen Beetle Migration Model {{ timestep_date }}"  
      {% for language in languages %}
      "wms_abstract.{{language.language_code}}" "
      {
          \"isWarningStatus\": true,
          \"legendItems\": [
-          {
-            \"classification\": 0,
-            \"legendLabel\": \"{{ language.no_risk }}\",
-            \"legendIconCSS\": \"width: 25px; background-color: #707070;\"
-          },
          {
            \"classification\": 2,
            \"legendLabel\": \"{{ language.low_risk }}\",
            \"legendIconCSS\": \"width: 25px; background-color: #FFCC00;\"
          },
-          {
-            \"classification\": 3,
-            \"legendLabel\": \"{{ language.medium_risk }}\",
-            \"legendIconCSS\": \"width: 25px; background-color: #FFCC99;\"
-          },
          {
            \"classification\": 4,
            \"legendLabel\": \"{{ language.high_risk }}\",
@@ -159,30 +139,16 @@ LAYER
  
    # class using simple string comparison, equivalent to ([pixel] = 0)
  
-    CLASS
-      NAME "No infection risk"
-      EXPRESSION ([pixel] >= 0 AND [pixel] < 2) 
-      STYLE
-          COLOR 112 112 112
-      END
-    END
    CLASS
      NAME "Low infection risk"
-      EXPRESSION ([pixel] >= 2 AND [pixel] < 3) 
+      EXPRESSION ([pixel] < 3) 
      STYLE
-          COLOR 255 204 0
-      END
-    END
-    CLASS
-      NAME "Medium infection risk"
-      EXPRESSION ([pixel] >= 3 AND [pixel] < 4) 
-      STYLE
-          COLOR 255 153 0
+          COLOR 112 112 112
      END
    END
    CLASS
      NAME "High infection risk"
-      EXPRESSION ([pixel] >= 4) 
+      EXPRESSION ([pixel] >= 3) 
      STYLE
          COLOR  255 0 0
      END
@@ -190,25 +156,25 @@ LAYER
 END # Layer
  
 LAYER
-     NAME "{{model_id}}.WHS.{{ timestep_date }}"
+     NAME "{{model_id}}.temperature.{{ timestep_date }}"
     DATA "{{mapserver_data_dir}}result_{{ timestep_date }}.tif"
-     TEMPLATE "{{mapserver_mapfile_dir}}query_template_WHS.xml" TOLERANCE 1 TOLERANCEUNITS PIXELS
+     TEMPLATE "{{mapserver_mapfile_dir}}query_template_temperature.xml" TOLERANCE 1 TOLERANCEUNITS PIXELS
     TYPE RASTER
-     #PROCESSING "BANDS=1" # WHS band on top (others invisible, but band values are available in the query template)
+     #PROCESSING "BANDS=1" # Temperature band on top (others invisible, but band values are available in the query template)
     #PROCESSING "SCALE=AUTO"
     #PROCESSING "NODATA=-1"
   
   
     STATUS ON
     METADATA
-       "wms_title"     "Reference humidity model WHS {{ timestep_date }}"
+       "wms_title"     "Pollen Beetle Migration Model temperature {{ timestep_date }}"
       {% for language in languages %}
      "wms_abstract.{{language.language_code}}" "
      {
          \"isWarningStatus\": false,
          \"legendItems\": [
          {
-            \"legendLabel\": \"{{ language.whs }}\",
+            \"legendLabel\": \"{{ language.temperature }}\",
            \"legendIconCSS\": \"width: 25px; background: linear-gradient(to right, #FFFF00, #0000FF);\"
          }
        ]
@@ -218,10 +184,10 @@ LAYER
     END
    CLASSITEM "[pixel]"
    CLASS
-      NAME "Wet hour sum (yesterday + today + tomorrow) [0-72]"
-      EXPRESSION ([pixel] >= 0 AND [pixel] <= 72)
+      NAME "Temperature range"
+      EXPRESSION ([pixel] >= -40 AND [pixel] <= 40)
      STYLE
-        DATARANGE 0 72
+        DATARANGE -40 40
        COLORRANGE 255 255 0 0 0 255
      END
    END

--- a/requirements.txt
+++ b/requirements.txt
 Jinja2
 netCDF4
 pytz
-python-dotenv
\ No newline at end of file
+python-dotenv
+pytest
\ No newline at end of file
--- a/test_ADASMELIAE.py
+++ b/test_ADASMELIAE.py
+import pytest
+from datetime import datetime
+from ADASMELIAE import find_start_date
+
+
+@pytest.fixture
+def example_result_files():
+    return [
+        "result_2023-04-15.nc",
+        "result_2023-04-16.nc",
+        "result_2023-04-17.nc",
+        "result_WARNING_STATUS_2023-04-15.nc",
+        "result_WARNING_STATUS_2023-04-16.nc",
+        "result_WARNING_STATUS_2023-04-17.nc",
+        "result_WARNING_STATUS_2023-04-18.nc",
+    ]
+
+
+def test_find_start_date_with_previous_results(example_result_files, monkeypatch):
+    MODEL_START_DATE = datetime(2023, 3, 1)
+    monkeypatch.setenv("DATA_DIR", "out")
+
+    # Mock os.listdir to return the example result files
+    with monkeypatch.context() as m:
+        m.setattr("os.listdir", lambda _: example_result_files)
+        start_date = find_start_date(MODEL_START_DATE)
+
+    # Assert the expected start date
+    expected_start_date = datetime(2023, 4, 18)
+    assert start_date == expected_start_date
+
+
+def test_find_start_date_without_previous_results(monkeypatch):
+    MODEL_START_DATE = datetime(2023, 3, 1)
+    monkeypatch.setenv("DATA_DIR", "out")
+
+    # Mock os.listdir to return the example result files
+    with monkeypatch.context() as m:
+        m.setattr("os.listdir", lambda _: [])
+        start_date = find_start_date(MODEL_START_DATE)
+
+    # Assert the expected start date
+    assert start_date == MODEL_START_DATE