Something went wrong on our end
-
Tor-Einar Skog authoredTor-Einar Skog authored
SEPTREFHUM.py 11.27 KiB
#!/usr/bin/python3
"""
Copyright (C) 2023 NIBIO <https://www.nibio.no/>.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
# Requirements:
# * CDO >= v 2.0
# * GDAL >= v 3.4.3 built with Python support
# * For Python: See requirements.txt
import os, sys, subprocess,glob
from dotenv import load_dotenv
from datetime import datetime, timezone, timedelta
from jinja2 import Environment, FileSystemLoader
import pytz
import netCDF4 as nc
# Paths config
# Create a .env file from dotenv-sample
load_dotenv()
# Path to weather data
infile_path = os.getenv("WEATHER_DATA_DIR")
# Path to store generated GeoTIFF files
outfile_path = os.getenv("DATA_DIR")
# Where to store intermediary calculations
tmpfile_path = "tmp/"
# TODO: Make this configurable (.env)
local_timezone = pytz.timezone("Europe/Oslo")
# Iterate the set of hourly weather data files
# 1. When's the latest wh_[DATE].nc file? - set earliest weather data file: start_date = [DATE]-2 days
# --> If no wh_DATE.nc file exists, set start_date = None
start_date = None
last_wh_date = None
#"""
for wh_file in glob.glob(f"{tmpfile_path}wh_2[0-9][0-9][0-9]-[01][0-9]-[0123][0-9].nc"):
current_wh_file_date = local_timezone.localize(datetime.strptime(f"{wh_file[7:17]}", "%Y-%m-%d"))
if last_wh_date is None or last_wh_date < current_wh_file_date:
last_wh_date = current_wh_file_date
if last_wh_date is not None:
start_date = last_wh_date - timedelta(days=2)
print(f"Last date of WH calculations is {last_wh_date}. Start date = {start_date}")
# TODO: let filename/pattern be configurable
weatherdata_files = glob.glob(f"{infile_path}met_1_0km_nordic-*.nc")
for file_path in weatherdata_files:
# TODO: When filename/pattern is configurable: make the string search adaptable
file_name = os.path.basename(file_path)
file_date = file_name[file_name.index("nordic")+7:file_name.index("nordic")+17]
# Skip if we don't have a complete date (10 characters), which could indicate that we are looking at a yearly file of daily data
if len(file_date) != 10:
continue
wh_sum_date = local_timezone.localize(datetime.strptime(f"{file_date}", "%Y-%m-%d"))
# Only process files from the three last days (if this is not a work from scratch)
if start_date is not None and wh_sum_date < start_date:
continue
# Check that the file has at least 23 timesteps
with nc.Dataset(file_path, 'r') as weatherdata_file:
file_timesteps = len(weatherdata_file.variables["time"])
if file_timesteps < 23:
print(f"{file_path} has {file_timesteps} timesteps. Skipping it.")
continue
# Produce daily files with WH_SUM, which is the number of "Wet hours" (WH) for a given day
# WH is defined as RR > 0.2 || UM > 88.0
wh_sum_date_str = wh_sum_date.strftime("%Y-%m-%d")
wh_sum_hour_str = wh_sum_date.strftime("%H")
subprocess.run(
f'cdo -s -O -setdate,{wh_sum_date_str} -settime,{wh_sum_hour_str}:00:00 -chname,WH,WH_DAYSUM -timsum -selname,WH -aexpr,"WH = RR > 0.2 || UM > 88.0 ? 1 : 0;" {file_path} {tmpfile_path}wh_{file_date}.nc',
shell=True
)
# Concatenate daily files > one file with daily values
# Additive calculation - this file can be stored between calculations
subprocess.run(f'cdo -s -O mergetime {tmpfile_path}wh_2[0-9][0-9][0-9]-[01][0-9]-[0123][0-9].nc {tmpfile_path}wh_daysum.nc', shell=True)
# Check timesteps - that we have no missing data
wh_daysum = nc.Dataset(f'{tmpfile_path}wh_daysum.nc', 'r')
timesteps = wh_daysum.variables["time"][:]
previous_timestep = None
for timestep in timesteps:
if previous_timestep is not None:
if timestep - previous_timestep != 86400.0:
timestep_str = datetime.fromtimestamp(timestep).astimezone(local_timezone).strftime("%Y-%m-%d")
previous_timestep_str = datetime.fromtimestamp(previous_timestep).astimezone(local_timezone).strftime("%Y-%m-%d")
print(f"ERROR: Missing weather data between {previous_timestep_str} and {timestep_str}. Exiting.", file=sys.stderr)
exit(1)
previous_timestep = timestep
wh_daysum.close()
# From here, no additive calculation; we calculate from the beginning of the w_daysum.nc file every time.
# Add sum of WH_DAYSUM[yesterday] + WH_DAYSUM[today] + WH_DAYSUM[tomorrow] into WHS[today]
# timselsum skips every 3 steps when summing 3 timestemps, so we must
# create three different files and then merge them
subprocess.run(f'cdo -s timselsum,3,0 {tmpfile_path}wh_daysum.nc {tmpfile_path}wh_3daysum_tmp_0.nc', shell=True)
subprocess.run(f'cdo -s timselsum,3,1 {tmpfile_path}wh_daysum.nc {tmpfile_path}wh_3daysum_tmp_1.nc', shell=True)
subprocess.run(f'cdo -s timselsum,3,2 {tmpfile_path}wh_daysum.nc {tmpfile_path}wh_3daysum_tmp_2.nc', shell=True)
subprocess.run(f'cdo -s -chname,WH_DAYSUM,WHS -mergetime {tmpfile_path}wh_3daysum_tmp_*.nc {tmpfile_path}wh_3daysum_tmp_merged.nc', shell=True)
# the last timesteps are most likely wrong, due to lack of "tomorrows" when performing timselsum
# To remove the last ones:
#
# The variable time_bnds (which has two dimensions: time and bnds. bnds is size 2) contains the
# time (in seconds) between the first and last timesteps used for summing up. e.g like this:
# $ ncdump -v time_bnds tmp/wh_3daysum.nc
# time_bnds =
# 1696111200, 1696284000,
# 1696197600, 1696370400,
# 1696284000, 1696456800,
# 1696370400, 1696543200,
# 1696456800, 1696629600,
# 1696543200, 1696716000,
# 1696629600, 1696716000,
# 1696716000, 1696716000;
#}
# The difference [1] - [0] should be 172800 seconds = 48 hours
# Timesteps with [1] - [0] != 172800 should be discarded
# Using netCDF4 to accomplish this
wh_3daysum = nc.Dataset(f'{tmpfile_path}wh_3daysum_tmp_merged.nc', 'r')
time_bnds = wh_3daysum.variables["time_bnds"][:]
# Assuming that wrong time bounds only exist at the end of the time series, this works
number_of_timesteps_to_remove = 0
for time_bnd in time_bnds:
if time_bnd[1]-time_bnd[0] != 172800.0:
number_of_timesteps_to_remove = number_of_timesteps_to_remove + 1
wh_3daysum.close()
number_of_timesteps_to_keep = len(time_bnds) - number_of_timesteps_to_remove
subprocess.run(f'cdo -s -seltimestep,1/{number_of_timesteps_to_keep} {tmpfile_path}wh_3daysum_tmp_merged.nc {tmpfile_path}wh_3daysum.nc', shell=True)
# Classifying warning status for the WHS model
# 0 == WHS --> Grey
# 0 < WHS < 20 --> Yellow
# 20 <= WHS < 40 --> Orange
# 40 <= WHS --> Red
subprocess.run(f'cdo -s -aexpr,"WARNING_STATUS = WHS <= 0 ? 0 : -1; WARNING_STATUS = WHS < 20 && WARNING_STATUS == -1 ? 2 : WARNING_STATUS; WARNING_STATUS = WHS < 40 && WARNING_STATUS == -1 ? 3 : WARNING_STATUS; WARNING_STATUS = WHS >= 40 && WARNING_STATUS == -1 ? 4 : WARNING_STATUS" {tmpfile_path}wh_3daysum.nc {tmpfile_path}result_unmasked.nc', shell=True)
# Mask results using a CSV file with polygons
# Env variable MASK_FILE must be set
if os.getenv("MASK_FILE") is not None:
mask_file = os.getenv("MASK_FILE")
print(f"Applying mask file {mask_file} to result.nc")
subprocess.run(f'cdo -maskregion,{mask_file} {tmpfile_path}result_unmasked.nc {tmpfile_path}result.nc', shell=True)
else:
os.rename(f"{tmpfile_path}result_unmasked.nc", f"{tmpfile_path}result.nc")
#"""
# Split the combined file into daily .nc files again, with YYYY-MM-DD in the filename. Convert to corresponding GeoTIFF files
# Variables that needs discrete classification, must be integers in order for mapserver to work properly (Don't ask!)
# Since we need WARNING_STATUS to discretely classified, we need to create a separate GeoTIFF file for it
wh_3daysum = nc.Dataset(f'{tmpfile_path}wh_3daysum.nc', 'r')
timesteps = wh_3daysum.variables["time"][:]
timestep_index = 1
timestep_dates = [] # Used in the mapfile template
# TODO: We are (?) assuming all timesteps are equal. This might NOT be the case if there are holes in the result data
for timestep in timesteps:
timestep_date = datetime.fromtimestamp(timestep).astimezone(local_timezone)
file_date = timestep_date.astimezone(local_timezone).strftime("%Y-%m-%d")
timestep_dates.append(file_date)
# If start_date is set, that means that we should only generate result files from that date on
# -- IF GeoTIFF fiels already exists before that.
if start_date is not None and timestep_date < start_date:
if os.path.isfile(f'{outfile_path}result_WARNING_STATUS_{file_date}.tif') and os.path.isfile(f'{outfile_path}result_{file_date}.tif'):
continue
# Create NetCDF result file
subprocess.run(f'cdo -s -seltimestep,{timestep_index}/{timestep_index} {tmpfile_path}result.nc {tmpfile_path}result_{file_date}.nc', shell=True)
# Convert to GeoTIFF
# We only need WHS and WARNING_STATUS
# Merge the WARNING_STATUS and WHS variables into one GeoTIFF file with two bands.
# The WARNING_STATUS should always be band #1
# We must delete the GeoTIFF file before merging
subprocess.run(f'rm {outfile_path}result_*{file_date}*.tif', shell=True)
with open("/dev/null", "w") as devnull:
subprocess.run(f'gdal_translate -ot Int16 -of GTiff NETCDF:"{tmpfile_path}result_{file_date}.nc":WARNING_STATUS {tmpfile_path}result_WARNING_STATUS_{file_date}_lcc.tif', shell=True, stdout=devnull)
subprocess.run(f'gdal_translate -ot Float32 -of GTiff NETCDF:"{tmpfile_path}result_{file_date}.nc":WHS {tmpfile_path}result_{file_date}_lcc.tif', shell=True, stdout=devnull)
# Need to reproject the files, to ensure we have the projection given in the generted mapfile. We always use EPSG:4326 for this
subprocess.run(f'gdalwarp -t_srs EPSG:4326 {tmpfile_path}result_WARNING_STATUS_{file_date}_lcc.tif {outfile_path}result_WARNING_STATUS_{file_date}.tif', shell=True, stdout=devnull)
subprocess.run(f'gdalwarp -t_srs EPSG:4326 {tmpfile_path}result_{file_date}_lcc.tif {outfile_path}result_{file_date}.tif', shell=True, stdout=devnull)
timestep_index = timestep_index + 1
# Generate mapfile
# The paths should be set in a .env file
env = Environment(loader=FileSystemLoader('.'))
template = env.get_template("mapfile/template.j2")
output = template.render({
"timestep_dates": timestep_dates,
"mapserver_data_dir": os.getenv("MAPSERVER_DATA_DIR"),
"mapserver_mapfile_dir": os.getenv("MAPSERVER_MAPFILE_DIR"),
"mapserver_log_file": os.getenv("MAPSERVER_LOG_FILE"),
"mapserver_image_path": os.getenv("MAPSERVER_IMAGE_PATH"),
"mapserver_extent": os.getenv("MAPSERVER_EXTENT")
})
mapfile_outdir = os.getenv("MAPFILE_DIR")
with open(f"{mapfile_outdir}/SEPTREFHUM.map", 'w') as f:
f.write(output)
# Remove all temporary/intermediary files
subprocess.run(f"rm {tmpfile_path}wh_3daysum*.nc", shell=True)
subprocess.run(f"rm {tmpfile_path}result*.nc", shell=True)
subprocess.run(f"rm {tmpfile_path}result*.tif", shell=True)