Skip to content
Snippets Groups Projects
Commit 6035a8d0 authored by Lene Wasskog's avatar Lene Wasskog
Browse files

feat: Use year file, remove previous results before calculating new, simplify

parent 4cfa992b
No related branches found
No related tags found
No related merge requests found
...@@ -8,4 +8,6 @@ in ...@@ -8,4 +8,6 @@ in
out out
log log
tmp tmp
.pytest_cache .pytest_cache
\ No newline at end of file run_cleanup.sh
.idea
\ No newline at end of file
...@@ -22,7 +22,6 @@ ...@@ -22,7 +22,6 @@
# * GDAL >= v 3.4.3 built with Python support # * GDAL >= v 3.4.3 built with Python support
# * For Python: See requirements.txt # * For Python: See requirements.txt
import os import os
import time import time
import subprocess import subprocess
...@@ -32,12 +31,13 @@ from datetime import datetime, timedelta ...@@ -32,12 +31,13 @@ from datetime import datetime, timedelta
from jinja2 import Environment, FileSystemLoader from jinja2 import Environment, FileSystemLoader
import logging import logging
import pytz import pytz
import netCDF4 as nc
import configparser import configparser
import sys import sys
load_dotenv() load_dotenv()
# TODO Should start before the first European crops reach growth stage 51 and end when the northernmost regions have passed stage 59
DEBUG = ( DEBUG = (
False False
if os.getenv("DEBUG") is None or os.getenv("DEBUG").lower() == "false" if os.getenv("DEBUG") is None or os.getenv("DEBUG").lower() == "false"
...@@ -45,26 +45,18 @@ DEBUG = ( ...@@ -45,26 +45,18 @@ DEBUG = (
) )
model_id = os.getenv("MODEL_ID") model_id = os.getenv("MODEL_ID")
filename_pattern = os.getenv("WEATHER_DATA_FILENAME_PATTERN")
filename_dateformat = os.getenv("WEATHER_DATA_FILENAME_DATEFORMAT")
# TODO Should start before the first European crops reach growth stage 51 and end when the northernmost regions have passed stage 59
current_year = datetime.now().year
local_timezone = pytz.timezone(os.getenv("LOCAL_TIMEZONE"))
MODEL_START_DATE = datetime.strptime(
f"{current_year}-{os.getenv('START_DATE')}", "%Y-%m-%d"
).date()
MODEL_END_DATE = datetime.strptime(
f"{current_year}-{os.getenv('END_DATE')}", "%Y-%m-%d"
).date()
home_dir = os.getenv("HOME_DIR") home_dir = os.getenv("HOME_DIR")
weather_data_dir = os.getenv("WEATHER_DATA_DIR") weather_data_dir = os.getenv("WEATHER_DATA_DIR")
result_tif_dir = os.getenv("RESULT_TIF_DIR")
result_mapfile_dir = os.getenv("RESULT_MAPFILE_DIR")
tmp_dir = os.getenv("TMP_DIR") tmp_dir = os.getenv("TMP_DIR")
data_dir = os.getenv("DATA_DIR") template_dir = f"{home_dir}mapfile/"
template_dir = f"{os.getenv('HOME_DIR')}mapfile/"
mapfile_outdir = os.getenv("MAPFILE_DIR") weather_data_filename_pattern = os.getenv("WEATHER_DATA_FILENAME_DATEFORMAT")
local_timezone = pytz.timezone(os.getenv("LOCAL_TIMEZONE"))
start_MM_DD = os.getenv("START_DATE")
end_MM_DD = os.getenv("END_DATE")
config_file_path = f"{os.getenv('HOME_DIR')}{model_id}.cfg" config_file_path = f"{os.getenv('HOME_DIR')}{model_id}.cfg"
mask_file_path = ( mask_file_path = (
...@@ -83,8 +75,7 @@ logging.basicConfig( ...@@ -83,8 +75,7 @@ logging.basicConfig(
format="%(asctime)s - %(levelname).4s - (%(filename)s:%(lineno)d) - %(message)s", format="%(asctime)s - %(levelname).4s - (%(filename)s:%(lineno)d) - %(message)s",
) )
TM = "air_temperature_2m" tm_max = "air_temperature_2m_max"
TM_MAX = "TM_MAX"
THRESHOLD = 15 THRESHOLD = 15
...@@ -96,7 +87,7 @@ def run_command(command, shell=True, stdout=subprocess.PIPE): ...@@ -96,7 +87,7 @@ def run_command(command, shell=True, stdout=subprocess.PIPE):
command, command,
shell=shell, shell=shell,
stdout=stdout, stdout=stdout,
stderr=subprocess.PIPE, stderr=stdout,
text=True, text=True,
check=True, check=True,
) )
...@@ -109,154 +100,111 @@ def run_command(command, shell=True, stdout=subprocess.PIPE): ...@@ -109,154 +100,111 @@ def run_command(command, shell=True, stdout=subprocess.PIPE):
for line in result.stderr.splitlines(): for line in result.stderr.splitlines():
logging.error(line.strip()) logging.error(line.strip())
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
logging.error(f"Command failed: '{command}'")
logging.error(f"{e}") logging.error(f"{e}")
quit() quit()
# Iterate the set of previously calculated result files. Find latest result date, # Remove all temporary/intermediary files
# and return result date + 1 day. If no files exist, return MODEL_START_DATE. def remove_temporary_files():
def find_start_date(model_start_date): if glob.glob(f"{tmp_dir}*.nc"):
result_file_names = os.listdir(data_dir) run_command(command=f"rm {tmp_dir}*.nc")
result_file_names = [ if glob.glob(f"{tmp_dir}*.tif"):
file for file in result_file_names if file.startswith("result_2") run_command(command=f"rm {tmp_dir}*.tif")
]
try:
dates = [
datetime.strptime(file.split("_")[1].split(".")[0], "%Y-%m-%d").date()
for file in result_file_names
]
logging.debug(
f"Found results for the following dates: {sorted([date.strftime('%Y-%m-%d') for date in dates])}"
)
except ValueError as e:
logging.error(f"Error parsing dates: {e}")
return MODEL_START_DATE
latest_result_date = max(dates, default=None)
# Quit if latest result date is today (script was already run today)
if latest_result_date and latest_result_date == datetime.now().date():
logging.error("Script already run for today. Quit.")
sys.exit()
return (
latest_result_date + timedelta(days=1) # Remove previously calculated results
if latest_result_date def remove_old_results():
else model_start_date if glob.glob(f"{result_tif_dir}*.tif"):
) run_command(command=f"rm {result_tif_dir}*.tif")
if glob.glob(f"rm {result_mapfile_dir}*.map"):
run_command(command=f"rm {result_mapfile_dir}*.map")
def find_end_date(model_end_date): if __name__ == "__main__":
start_time = time.time() # For logging execution time
today = datetime.now().date() today = datetime.now().date()
return today if today < model_end_date else model_end_date
remove_old_results()
if __name__ == "__main__": year = today.year
logging.info(f"Start model {model_id}") model_start_date = datetime.strptime(f"{year}-{start_MM_DD}", "%Y-%m-%d").date()
start_time = time.time() model_end_date = datetime.strptime(f"{year}-{end_MM_DD}", "%Y-%m-%d").date()
start_date = find_start_date(MODEL_START_DATE) start_date = model_start_date
end_date = find_end_date(MODEL_END_DATE) end_date = today + timedelta(days=2)
if start_date > end_date:
logging.error("Model period not started. Quit.")
sys.exit()
logging.info( logging.info(
f"Start running model for start date {start_date} and end date {end_date}" f"Start running model {model_id} for start date {start_date} and end date {end_date}"
) )
weather_data_file_pattern = f"{weather_data_dir}{filename_pattern}" weather_data_file = weather_data_dir + today.strftime(weather_data_filename_pattern)
weather_data_files = glob.glob(weather_data_file_pattern) max_temp_in_period_file = f"{tmp_dir}{year}_max_temp_for_period.nc"
logging.debug(f"Find weather data files: {weather_data_file_pattern}") unmasked_result_file = f"{tmp_dir}{year}_unmasked_result.nc"
logging.debug([os.path.basename(path) for path in weather_data_files]) final_result_file = f"{tmp_dir}{year}_result.nc"
timestep_dates = []
for weather_data_file_path in sorted(weather_data_files):
weather_data_file_name = os.path.basename(weather_data_file_path)
# Skip if we don't have a valid date
try:
file_date = datetime.strptime(
weather_data_file_name, filename_dateformat
).date()
except ValueError as e:
logging.info(f"{weather_data_file_name} - Skip file due to invalid date")
logging.debug(e)
continue
# Set up file names based on date YYYY-MM-DD
date_YYYY_MM_DD = file_date.strftime("%Y-%m-%d")
max_temp_file_path = f"{tmp_dir}max_temp_{date_YYYY_MM_DD}.nc"
result_unmasked_path = f"{tmp_dir}result_unmasked_{date_YYYY_MM_DD}.nc"
result_path = f"{tmp_dir}result_{date_YYYY_MM_DD}.nc"
# Only process files from within valid interval
if file_date < start_date or file_date > end_date:
logging.debug(
f"{weather_data_file_name} - Skip file with date outside calculation period"
)
continue
# Check that the file has at least 23 timesteps
with nc.Dataset(weather_data_file_path, "r") as weatherdata_file:
timestep_count = len(weatherdata_file.variables["time"])
if timestep_count < 23:
logging.info(
f"{weather_data_file_name} - Skip file with {timestep_count} timesteps"
)
continue
# Skip if result files for date already exist
if os.path.isfile(
f"{data_dir}result_WARNING_STATUS_{date_YYYY_MM_DD}.tif"
) and os.path.isfile(f"{data_dir}result_{date_YYYY_MM_DD}.tif"):
logging.info(f"Result files for {date_YYYY_MM_DD} already exist, skip")
continue
# Produce daily files with MAX_TEMP, which is the highest recorded temperature within the given day
run_command(
command=f"cdo -s -O -L -settime,00:00:00 -setdate,{date_YYYY_MM_DD} -chname,{TM},{TM_MAX} -timmax -selvar,{TM} {weather_data_file_path} {max_temp_file_path}"
)
# Classifying warning status for the model
# temperature < 15 --> 2 (Yellow)
# temperature >= 15 --> 4 (Red)
run_command(
command=f'cdo -s -aexpr,"WARNING_STATUS = {TM_MAX} < {THRESHOLD} ? 2 : 4" {max_temp_file_path} {result_unmasked_path}',
)
# Mask results using a CSV file with polygons. Env variable MASK_FILE must be set, or else we use the file as it is.
if mask_file_path is not None:
logging.info(f"Applying mask file {mask_file_path} to result file")
run_command(
command=f"cdo -maskregion,{mask_file_path} {result_unmasked_path} {result_path}",
)
else:
os.rename(result_unmasked_path, result_path)
# Convert to GeoTIFF # Extract air_temperature_2m_max for the required period into file MAX_TEMP_FOR_PERIOD_FILE_PATH
# We only need WHS and WARNING_STATUS run_command(
# Merge the WARNING_STATUS and WHS variables into one GeoTIFF file with two bands. command=f"cdo -L seldate,{start_date},{end_date} -selname,{tm_max} {weather_data_file} {max_temp_in_period_file}"
# The WARNING_STATUS should always be band #1 )
# We must delete the GeoTIFF file before merging WHY? # Classifying warning status for the model
# run_command(command=f"rm {data_dir}result_*{file_date_str}*.tif") # temperature < 15 --> 2 (Yellow)
# temperature >= 15 --> 4 (Red)
run_command(
command=f'cdo -s -aexpr,"WARNING_STATUS = {tm_max} < {THRESHOLD} ? 2 : 4" {max_temp_in_period_file} {unmasked_result_file}',
)
timestep_dates.append(date_YYYY_MM_DD) # Mask results using a CSV file with polygons. Env variable MASK_FILE must be set, or else we use the file as it is.
if mask_file_path is not None:
run_command(
command=f"cdo -maskregion,{mask_file_path} {unmasked_result_file} {final_result_file}",
)
else:
os.rename(unmasked_result_file, final_result_file)
# Split result file into separate files for each timestep
prefix = f"{tmp_dir}timestep_"
run_command(command=f"cdo -s splitdate {final_result_file} {prefix}")
timestep_file_pattern = f"{prefix}*.nc"
logging.info(f"Find timestep files: {timestep_file_pattern}")
timestep_files = sorted(glob.glob(timestep_file_pattern))
timestep_filenames = [os.path.basename(file) for file in timestep_files]
logging.info(f"Timestep files sorted: {timestep_filenames}")
timestep_dates = [
filename.split("_")[1].split(".")[0] for filename in timestep_filenames
]
for file in timestep_files:
filename = os.path.basename(file)
date_str = filename.split("_")[1].split(".")[0]
with open("/dev/null", "w") as devnull: with open("/dev/null", "w") as devnull:
warning_status_lcc = f"{tmp_dir}result_WARNING_STATUS_{date_str}_lcc.tif"
result_lcc = f"{tmp_dir}result_{date_str}_lcc.tif"
warning_status = f"{result_tif_dir}result_WARNING_STATUS_{date_str}.tif"
result = f"{result_tif_dir}result_{date_str}.tif"
# For warning status:
run_command( run_command(
command=f'gdal_translate -ot Int16 -of GTiff NETCDF:"{tmp_dir}result_{date_YYYY_MM_DD}.nc":WARNING_STATUS {tmp_dir}result_WARNING_STATUS_{date_YYYY_MM_DD}_lcc.tif', command=f'gdal_translate -ot Int16 -of GTiff NETCDF:"{file}":WARNING_STATUS {warning_status_lcc}',
stdout=devnull, stdout=devnull,
) )
# For max temperature:
run_command( run_command(
command=f'gdal_translate -ot Float32 -of GTiff NETCDF:"{tmp_dir}result_{date_YYYY_MM_DD}.nc":{TM_MAX} {tmp_dir}result_{date_YYYY_MM_DD}_lcc.tif', command=f'gdal_translate -ot Float32 -of GTiff NETCDF:"{file}":{tm_max} {result_lcc}',
stdout=devnull, stdout=devnull,
) )
# Need to reproject the files, to ensure we have the projection given in the generted mapfile. We always use EPSG:4326 for this # Need to reproject the files, to ensure we have the projection given in the generted mapfile. We always use EPSG:4326 for this
run_command( run_command(
command=f"gdalwarp -t_srs EPSG:4326 {tmp_dir}result_WARNING_STATUS_{date_YYYY_MM_DD}_lcc.tif {data_dir}result_WARNING_STATUS_{date_YYYY_MM_DD}.tif", command=f"gdalwarp -t_srs EPSG:4326 {warning_status_lcc} {warning_status}",
stdout=devnull, stdout=devnull,
) )
run_command( run_command(
command=f"gdalwarp -t_srs EPSG:4326 {tmp_dir}result_{date_YYYY_MM_DD}_lcc.tif {data_dir}result_{date_YYYY_MM_DD}.tif", command=f"gdalwarp -t_srs EPSG:4326 {result_lcc} {result}",
stdout=devnull, stdout=devnull,
) )
...@@ -271,15 +219,14 @@ if __name__ == "__main__": ...@@ -271,15 +219,14 @@ if __name__ == "__main__":
language[keyword] = config["i18n.%s" % language_code][keyword] language[keyword] = config["i18n.%s" % language_code][keyword]
languages.append(language) languages.append(language)
# The paths should be set in a .env file
env = Environment(loader=FileSystemLoader(template_dir)) env = Environment(loader=FileSystemLoader(template_dir))
template = env.get_template(template_file_name) template = env.get_template(template_file_name)
output = template.render( output = template.render(
{ {
"model_id": model_id, "model_id": model_id,
"timestep_dates": timestep_dates, "timestep_dates": timestep_dates,
"mapserver_data_dir": os.getenv("MAPSERVER_DATA_DIR"), "mapserver_data_dir": os.getenv("MAPSERVER_RESULT_TIF_DIR"),
"mapserver_mapfile_dir": os.getenv("MAPSERVER_MAPFILE_DIR"), "mapserver_mapfile_dir": os.getenv("MAPSERVER_RESULT_MAPFILE_DIR"),
"mapserver_log_file": os.getenv("MAPSERVER_LOG_FILE"), "mapserver_log_file": os.getenv("MAPSERVER_LOG_FILE"),
"mapserver_image_path": os.getenv("MAPSERVER_IMAGE_PATH"), "mapserver_image_path": os.getenv("MAPSERVER_IMAGE_PATH"),
"mapserver_extent": os.getenv("MAPSERVER_EXTENT"), "mapserver_extent": os.getenv("MAPSERVER_EXTENT"),
...@@ -287,14 +234,11 @@ if __name__ == "__main__": ...@@ -287,14 +234,11 @@ if __name__ == "__main__":
"language_codes": language_codes, "language_codes": language_codes,
} }
) )
with open(f"{mapfile_outdir}/{model_id}.map", "w") as f: with open(f"{result_mapfile_dir}/{model_id}.map", "w") as f:
f.write(output) f.write(output)
remove_temporary_files()
# Remove all temporary/intermediary files
# run_command(command=f"rm {tmp_dir}result*.nc")
# run_command(command=f"rm {tmp_dir}result*.tif")
end_time = time.time() end_time = time.time()
logging.info( logging.info(
f"End model {model_id} - time spent: {'Execution time: {:.2f} seconds'.format(end_time - start_time)}" f"End model {model_id} - Execution time: {'{:.2f} seconds'.format(end_time - start_time)}"
) )
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>. # along with this program. If not, see <https://www.gnu.org/licenses/>.
# Configures environment and logging before running the model # Configures environment and logging before running the model
# @author: Tor-Einar Skog <tor-einar.skog@nibio.no>
# First: Test that we have CDO and GDAL installed # First: Test that we have CDO and GDAL installed
...@@ -47,8 +46,9 @@ fi ...@@ -47,8 +46,9 @@ fi
LOG_FILE=${HOME_DIR}log/ADASMELIAE.log LOG_FILE=${HOME_DIR}log/ADASMELIAE.log
REQUIREMENTS=${HOME_DIR}requirements.txt REQUIREMENTS=${HOME_DIR}requirements.txt
echo "Start model. Follow log at $LOG_FILE"
# Create and activate the virtual environment # Create and activate the virtual environment
echo "Run model. Follow log at $LOG_FILE"
echo "Activate virtual environment .venv" >> "$LOG_FILE" echo "Activate virtual environment .venv" >> "$LOG_FILE"
python3 -m venv .venv python3 -m venv .venv
. .venv/bin/activate . .venv/bin/activate
......
import pytest import pytest
from datetime import datetime, timedelta import logging
from ADASMELIAE import find_start_date, find_end_date from ADASMELIAE import run_command
@pytest.fixture # Define a fixture for setting up logging
def example_result_files(): @pytest.fixture(autouse=True)
# Fixture for generating content of result dir def setup_logging(caplog):
return [ caplog.set_level(logging.DEBUG)
"result_2023-04-15.nc",
"result_2023-04-16.nc",
"result_2023-04-17.nc",
"result_WARNING_STATUS_2023-04-15.nc",
"result_WARNING_STATUS_2023-04-16.nc",
"result_WARNING_STATUS_2023-04-17.nc",
"result_WARNING_STATUS_2023-04-18.nc",
]
@pytest.fixture # Test case for successful command execution
def today(): def test_run_command_success(caplog):
# Fixture for generating date for testing result = run_command("echo 'Hello, World!'")
return datetime.now().date() assert result == ["Hello, World!"]
assert "Hello, World!" in caplog.text
def test_find_start_date_with_previous_results(example_result_files, monkeypatch): # Test case for command failure
MODEL_START_DATE = datetime(2023, 3, 1) def test_run_command_failure(caplog):
monkeypatch.setenv("DATA_DIR", "out") with pytest.raises(SystemExit):
run_command("nonexistent_command")
# Mock os.listdir to return the example result files assert "nonexistent_command" in caplog.text
with monkeypatch.context() as m:
m.setattr("os.listdir", lambda _: example_result_files)
start_date = find_start_date(MODEL_START_DATE)
# Assert the expected start date
expected_start_date = datetime(2023, 4, 18)
assert start_date == expected_start_date
# Test case for handling stderr
def test_find_start_date_without_previous_results(monkeypatch): def test_run_command_stderr(caplog):
MODEL_START_DATE = datetime(2023, 3, 1) run_command("echo 'This is an error' >&2")
monkeypatch.setenv("DATA_DIR", "out") assert "This is an error" in caplog.text
# Mock os.listdir to return the example result files
with monkeypatch.context() as m:
m.setattr("os.listdir", lambda _: [])
start_date = find_start_date(MODEL_START_DATE)
# Assert the expected start date
assert start_date == MODEL_START_DATE
def test_find_end_date_when_model_end_date_is_in_past(today):
# Test when model end date is in the past
model_end_date = datetime.now().date() - timedelta(days=1)
assert find_end_date(model_end_date=model_end_date) == model_end_date
def test_find_end_date_when_model_end_date_is_in_future(today):
# Test when model end date is in the future
model_end_date = datetime.now().date() + timedelta(days=1)
assert find_end_date(model_end_date=model_end_date) == today
def test_find_end_date_equal(today):
# Test when model end date is today
assert find_end_date(model_end_date=today) == today
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment