Something went wrong on our end
-
Lene Wasskog authoredLene Wasskog authored
SEPTREFHUM.py 12.47 KiB
#!/usr/bin/python3
"""
Copyright (C) 2023 NIBIO <https://www.nibio.no/>.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
# Requirements:
# * CDO >= v 2.0
# * GDAL >= v 3.4.3 built with Python support
# * For Python: See requirements.txt
import os, sys, subprocess,glob
from dotenv import load_dotenv
from datetime import datetime, timezone, timedelta
from jinja2 import Environment, FileSystemLoader
import pytz
import netCDF4 as nc
import configparser
import logging
# Paths config
# Create a .env file from dotenv-sample
load_dotenv()
# Get language stuff
config = configparser.ConfigParser()
config.read("SEPTREFHUM.cfg")
# Paths to weather data
model_id = os.getenv("MODEL_ID")
infile_path = os.getenv("WEATHER_DATA_DIR")
infile_archive_path = os.getenv("ARCHIVE_WEATHER_DATA_DIR")
# Used for iterating the weather data files
filename_pattern = os.getenv("FILENAME_PATTERN")
# Date format of weather data filenames
filename_dateformat = os.getenv("FILENAME_DATEFORMAT")
# Path to store generated GeoTIFF files
outfile_path = os.getenv("DATA_DIR")
# Names of weather parameters in NetCDF files
RR = os.getenv("RR")
UM = os.getenv("UM")
TM = os.getenv("TM")
local_timezone = pytz.timezone(os.getenv("LOCAL_TIMEZONE"))
today = datetime.now(local_timezone)
if len(sys.argv) > 1:
year = int(sys.argv[1])
else:
year = today.year
# Where to store intermediary calculations
tmpfile_path = f"tmp/{year}/"
TEMPERATURE_THRESHOLD = 8.0
DEBUG = False if os.getenv("DEBUG") is None or os.getenv("DEBUG").lower() == "false" else True
logging.getLogger().handlers.clear()
logging.basicConfig(
level=logging.DEBUG if DEBUG else logging.INFO,
format="%(asctime)s - %(levelname).4s - (%(filename)s:%(lineno)d) - %(message)s",
)
# Iterate the set of hourly weather data files
# 1. When's the latest wh_[DATE].nc file? - set earliest weather data file: start_date = [DATE]-2 days
# --> If no wh_DATE.nc file exists, set start_date = None
start_date = None
last_wh_date = None
#"""
for wh_file in glob.glob(f"{tmpfile_path}wh_2[0-9][0-9][0-9]-[01][0-9]-[0123][0-9].nc"):
current_wh_file_date = local_timezone.localize(datetime.strptime(f"{wh_file[12:22]}", "%Y-%m-%d"))
if last_wh_date is None or last_wh_date < current_wh_file_date:
last_wh_date = current_wh_file_date
if last_wh_date is not None:
start_date = last_wh_date - timedelta(days=2)
logging.info(f"Last date of WH calculations is {last_wh_date}. Start date = {start_date}")
weatherdata_files = glob.glob(f"{infile_path}{filename_pattern}")
if DEBUG:
logging.info(f"{infile_path}{filename_pattern}")
logging.info("What are the weatherdata files?")
logging.info(weatherdata_files)
for file_path in sorted(weatherdata_files):
# TODO: When filename/pattern is configurable: make the string search adaptable
file_name = os.path.basename(file_path)
# Skip if we don't have a complete date, which could indicate that we are looking at a yearly file of daily data
try:
wh_sum_date = local_timezone.localize(datetime.strptime(file_name, filename_dateformat))
except ValueError as e:
logging.info(e)
continue
# Only process files from the three last days (if this is not a work from scratch)
if start_date is not None and wh_sum_date < start_date:
continue
# Check that the file has at least 23 timesteps
with nc.Dataset(file_path, 'r') as weatherdata_file:
file_timesteps = len(weatherdata_file.variables["time"])
if file_timesteps < 23:
logging.info(f"{file_path} has {file_timesteps} timesteps. Skipping it.")
continue
# Produce daily files with WH_SUM, which is the number of "Wet hours" (WH) for a given day
# WH is defined as RR > 0.2 || UM > 88.0
wh_sum_date_str = wh_sum_date.strftime("%Y-%m-%d")
wh_sum_hour_str = wh_sum_date.strftime("%H")
subprocess.run(
f'cdo -s -O -setdate,{wh_sum_date_str} -settime,{wh_sum_hour_str}:00:00 -chname,WH,WH_DAYSUM -timsum -selname,WH -aexpr,"WH = ({TM} >= {TEMPERATURE_THRESHOLD} && ({RR} > 0.2 || {UM} > 88.0)) ? 1 : 0;" {file_path} {tmpfile_path}wh_{wh_sum_date_str}.nc',
shell=True
)
# Concatenate daily files > one file with daily values
# Additive calculation - this file can be stored between calculations
subprocess.run(f'cdo -s -O mergetime {tmpfile_path}wh_2[0-9][0-9][0-9]-[01][0-9]-[0123][0-9].nc {tmpfile_path}wh_daysum.nc', shell=True)
# Check timesteps - that we have no missing data
wh_daysum = nc.Dataset(f'{tmpfile_path}wh_daysum.nc', 'r')
timesteps = wh_daysum.variables["time"][:]
previous_timestep = None
for timestep in timesteps:
if previous_timestep is not None:
if timestep - previous_timestep != 86400.0:
timestep_str = datetime.fromtimestamp(timestep).astimezone(local_timezone).strftime("%Y-%m-%d")
previous_timestep_str = datetime.fromtimestamp(previous_timestep).astimezone(local_timezone).strftime("%Y-%m-%d")
logging.info(f"ERROR: Missing weather data between {previous_timestep_str} and {timestep_str}. Exiting.", file=sys.stderr)
exit(1)
previous_timestep = timestep
wh_daysum.close()
# From here, no additive calculation; we calculate from the beginning of the w_daysum.nc file every time.
# Add sum of WH_DAYSUM[yesterday] + WH_DAYSUM[today] + WH_DAYSUM[tomorrow] into WHS[today]
# timselsum skips every 3 steps when summing 3 timestemps, so we must
# create three different files and then merge them
subprocess.run(f'cdo -s timselsum,3,0 {tmpfile_path}wh_daysum.nc {tmpfile_path}wh_3daysum_tmp_0.nc', shell=True)
subprocess.run(f'cdo -s timselsum,3,1 {tmpfile_path}wh_daysum.nc {tmpfile_path}wh_3daysum_tmp_1.nc', shell=True)
subprocess.run(f'cdo -s timselsum,3,2 {tmpfile_path}wh_daysum.nc {tmpfile_path}wh_3daysum_tmp_2.nc', shell=True)
subprocess.run(f'cdo -s -chname,WH_DAYSUM,WHS -mergetime {tmpfile_path}wh_3daysum_tmp_*.nc {tmpfile_path}wh_3daysum_tmp_merged.nc', shell=True)
# the last timesteps are most likely wrong, due to lack of "tomorrows" when performing timselsum
# To remove the last ones:
#
# The variable time_bnds (which has two dimensions: time and bnds. bnds is size 2) contains the
# time (in seconds) between the first and last timesteps used for summing up. e.g like this:
# $ ncdump -v time_bnds tmp/wh_3daysum.nc
# time_bnds =
# 1696111200, 1696284000,
# 1696197600, 1696370400,
# 1696284000, 1696456800,
# 1696370400, 1696543200,
# 1696456800, 1696629600,
# 1696543200, 1696716000,
# 1696629600, 1696716000,
# 1696716000, 1696716000;
#}
# The difference [1] - [0] should be 172800 seconds = 48 hours
# Timesteps with [1] - [0] != 172800 should be discarded
# Using netCDF4 to accomplish this
wh_3daysum = nc.Dataset(f'{tmpfile_path}wh_3daysum_tmp_merged.nc', 'r')
time_bnds = wh_3daysum.variables["time_bnds"][:]
# Assuming that wrong time bounds only exist at the end of the time series, this works
number_of_timesteps_to_remove = 0
for time_bnd in time_bnds:
if time_bnd[1]-time_bnd[0] != 172800.0:
number_of_timesteps_to_remove = number_of_timesteps_to_remove + 1
wh_3daysum.close()
number_of_timesteps_to_keep = len(time_bnds) - number_of_timesteps_to_remove
subprocess.run(f'cdo -s -seltimestep,1/{number_of_timesteps_to_keep} {tmpfile_path}wh_3daysum_tmp_merged.nc {tmpfile_path}wh_3daysum.nc', shell=True)
# Classifying warning status for the WHS model
# 0 =< WHS < 20 --> Green
# 20 <= WHS < 40 --> Yellow
# 40 <= WHS --> Red
subprocess.run(f'cdo -s -aexpr,"WARNING_STATUS = WHS < 0 ? 0 : -1; WARNING_STATUS = WHS < 20 && WARNING_STATUS == -1 ? 2 : WARNING_STATUS; WARNING_STATUS = WHS < 40 && WARNING_STATUS == -1 ? 3 : WARNING_STATUS; WARNING_STATUS = WHS >= 40 && WARNING_STATUS == -1 ? 4 : WARNING_STATUS" {tmpfile_path}wh_3daysum.nc {tmpfile_path}result_unmasked.nc', shell=True)
subprocess.run(f"rm {tmpfile_path}result.nc", shell=True)
# Mask results using a CSV file with polygons
# Env variable MASK_FILE must be set
if os.getenv("MASK_FILE") is not None:
mask_file = os.getenv("MASK_FILE")
logging.info(f"Applying mask file {mask_file} to result.nc")
subprocess.run(f'cdo -P 6 -maskregion,{mask_file} {tmpfile_path}result_unmasked.nc {tmpfile_path}result.nc', shell=True)
else:
os.rename(f"{tmpfile_path}result_unmasked.nc", f"{tmpfile_path}result.nc")
#"""
# Split the combined file into daily .nc files again, with YYYY-MM-DD in the filename. Convert to corresponding GeoTIFF files
# Variables that needs discrete classification, must be integers in order for mapserver to work properly (Don't ask!)
# Since we need WARNING_STATUS to discretely classified, we need to create a separate GeoTIFF file for it
wh_3daysum = nc.Dataset(f'{tmpfile_path}wh_3daysum.nc', 'r')
timesteps = wh_3daysum.variables["time"][:]
timestep_index = 1
timestep_dates = [] # Used in the mapfile template
# TODO: We are (?) assuming all timesteps are equal. This might NOT be the case if there are holes in the result data
for timestep in timesteps:
timestep_date = datetime.fromtimestamp(timestep).astimezone(local_timezone)
file_date = timestep_date.astimezone(local_timezone).strftime("%Y-%m-%d")
timestep_dates.append(file_date)
# Create NetCDF result file
subprocess.run(f'cdo -s -seltimestep,{timestep_index}/{timestep_index} {tmpfile_path}result.nc {tmpfile_path}result_{file_date}.nc', shell=True)
# Convert to GeoTIFF
# We only need WHS and WARNING_STATUS
# Merge the WARNING_STATUS and WHS variables into one GeoTIFF file with two bands.
# The WARNING_STATUS should always be band #1
# We must delete the GeoTIFF file before merging
subprocess.run(f'rm {outfile_path}result_*{file_date}*.tif', shell=True)
with open("/dev/null", "w") as devnull:
subprocess.run(f'gdal_translate -ot Int16 -of GTiff NETCDF:"{tmpfile_path}result_{file_date}.nc":WARNING_STATUS {tmpfile_path}result_WARNING_STATUS_{file_date}_lcc.tif', shell=True, stdout=devnull)
subprocess.run(f'gdal_translate -ot Float32 -of GTiff NETCDF:"{tmpfile_path}result_{file_date}.nc":WHS {tmpfile_path}result_{file_date}_lcc.tif', shell=True, stdout=devnull)
# Need to reproject the files, to ensure we have the projection given in the generted mapfile. We always use EPSG:4326 for this
subprocess.run(f'gdalwarp -t_srs EPSG:4326 {tmpfile_path}result_WARNING_STATUS_{file_date}_lcc.tif {outfile_path}result_WARNING_STATUS_{file_date}.tif', shell=True, stdout=devnull)
subprocess.run(f'gdalwarp -t_srs EPSG:4326 {tmpfile_path}result_{file_date}_lcc.tif {outfile_path}result_{file_date}.tif', shell=True, stdout=devnull)
timestep_index = timestep_index + 1
# Generate mapfile
# Building data sets for language specific legends
languages = []
language_codes = config["i18n"]["languages"].split(",");
for language_code in language_codes:
language = {"language_code": language_code}
if ("i18n.%s" % language_code) in config:
for keyword in config["i18n.%s" % language_code]:
language[keyword] = config["i18n.%s" % language_code][keyword]
languages.append(language)
# The paths should be set in a .env file
env = Environment(loader=FileSystemLoader('.'))
template = env.get_template("mapfile/template.j2")
output = template.render({
"model_id":model_id,
"timestep_dates": timestep_dates,
"mapserver_data_dir": os.getenv("MAPSERVER_DATA_DIR"),
"mapserver_mapfile_dir": os.getenv("MAPSERVER_MAPFILE_DIR"),
"mapserver_log_file": os.getenv("MAPSERVER_LOG_FILE"),
"mapserver_image_path": os.getenv("MAPSERVER_IMAGE_PATH"),
"mapserver_extent": os.getenv("MAPSERVER_EXTENT"),
"languages": languages,
"language_codes": language_codes
})
mapfile_outdir = os.getenv("MAPFILE_DIR")
with open(f"{mapfile_outdir}/{model_id}.map", 'w') as f:
f.write(output)
# Remove all temporary/intermediary files
subprocess.run(f"rm {tmpfile_path}wh_3daysum*.nc", shell=True)
subprocess.run(f"rm {tmpfile_path}result_*.nc", shell=True)
subprocess.run(f"rm {tmpfile_path}result*.tif", shell=True)