Skip to content
Snippets Groups Projects
SEPTREFHUM.py 11.27 KiB
#!/usr/bin/python3

"""
    Copyright (C) 2023 NIBIO <https://www.nibio.no/>. 

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as
    published by the Free Software Foundation, either version 3 of the
    License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
"""

# Requirements: 
# * CDO >= v 2.0
# * GDAL >= v 3.4.3 built with Python support
# * For Python: See requirements.txt


import os, sys, subprocess,glob
from dotenv import load_dotenv
from datetime import datetime, timezone, timedelta
from jinja2 import Environment, FileSystemLoader
import pytz
import netCDF4 as nc

# Paths config
# Create a .env file from dotenv-sample
load_dotenv()
# Path to weather data
infile_path  = os.getenv("WEATHER_DATA_DIR")
# Path to store generated GeoTIFF files
outfile_path = os.getenv("DATA_DIR")
# Where to store intermediary calculations
tmpfile_path = "tmp/"

# TODO: Make this configurable (.env)
local_timezone = pytz.timezone("Europe/Oslo")

# Iterate the set of hourly weather data files
# 1. When's the latest wh_[DATE].nc file? - set earliest weather data file: start_date = [DATE]-2 days
# --> If no wh_DATE.nc file exists, set start_date = None
start_date = None
last_wh_date = None
#"""
for wh_file in glob.glob(f"{tmpfile_path}wh_2[0-9][0-9][0-9]-[01][0-9]-[0123][0-9].nc"):          
    current_wh_file_date = local_timezone.localize(datetime.strptime(f"{wh_file[7:17]}", "%Y-%m-%d"))
    if last_wh_date is None or last_wh_date < current_wh_file_date:
       last_wh_date = current_wh_file_date
if last_wh_date is not None:
    start_date = last_wh_date - timedelta(days=2)
    print(f"Last date of WH calculations is {last_wh_date}. Start date = {start_date}")



# TODO: let filename/pattern be configurable
weatherdata_files = glob.glob(f"{infile_path}met_1_0km_nordic-*.nc")
for file_path in weatherdata_files:
    # TODO: When filename/pattern is configurable: make the string search adaptable
    file_name = os.path.basename(file_path)
    file_date = file_name[file_name.index("nordic")+7:file_name.index("nordic")+17]
    # Skip if we don't have a complete date (10 characters), which could indicate that we are looking at a yearly file of daily data
    if len(file_date) != 10:
        continue
    
    wh_sum_date = local_timezone.localize(datetime.strptime(f"{file_date}", "%Y-%m-%d"))
    # Only process files from the three last days (if this is not a work from scratch)
    if start_date is not None and wh_sum_date < start_date:
         continue
    
    # Check that the file has at least 23 timesteps
    with nc.Dataset(file_path, 'r') as weatherdata_file:
        file_timesteps = len(weatherdata_file.variables["time"])
        if  file_timesteps < 23:
            print(f"{file_path} has {file_timesteps} timesteps. Skipping it.")
            continue

    # Produce daily files with WH_SUM, which is the number of "Wet hours" (WH) for a given day
    # WH is defined as RR > 0.2 || UM > 88.0
    wh_sum_date_str = wh_sum_date.strftime("%Y-%m-%d")
    wh_sum_hour_str = wh_sum_date.strftime("%H") 
    subprocess.run(
        f'cdo -s -O -setdate,{wh_sum_date_str} -settime,{wh_sum_hour_str}:00:00 -chname,WH,WH_DAYSUM -timsum -selname,WH -aexpr,"WH = RR > 0.2 || UM > 88.0 ? 1 : 0;" {file_path} {tmpfile_path}wh_{file_date}.nc',
        shell=True
        )

# Concatenate daily files > one file with daily values
# Additive calculation - this file can be stored between calculations
subprocess.run(f'cdo -s -O mergetime {tmpfile_path}wh_2[0-9][0-9][0-9]-[01][0-9]-[0123][0-9].nc {tmpfile_path}wh_daysum.nc', shell=True)

# Check timesteps - that we have no missing data
wh_daysum = nc.Dataset(f'{tmpfile_path}wh_daysum.nc', 'r')
timesteps = wh_daysum.variables["time"][:]
previous_timestep = None
for timestep in timesteps:
    if previous_timestep is not None:
        if timestep - previous_timestep != 86400.0:
            timestep_str = datetime.fromtimestamp(timestep).astimezone(local_timezone).strftime("%Y-%m-%d")
            previous_timestep_str = datetime.fromtimestamp(previous_timestep).astimezone(local_timezone).strftime("%Y-%m-%d")
            print(f"ERROR: Missing weather data between {previous_timestep_str} and {timestep_str}. Exiting.", file=sys.stderr)
            exit(1)
    previous_timestep = timestep
wh_daysum.close()

# From here, no additive calculation; we calculate from the beginning of the w_daysum.nc file every time. 

# Add sum of WH_DAYSUM[yesterday] + WH_DAYSUM[today] + WH_DAYSUM[tomorrow] into WHS[today]
# timselsum skips every 3 steps when summing 3 timestemps, so we must
# create three different files and then merge them
subprocess.run(f'cdo -s timselsum,3,0 {tmpfile_path}wh_daysum.nc {tmpfile_path}wh_3daysum_tmp_0.nc', shell=True)
subprocess.run(f'cdo -s timselsum,3,1 {tmpfile_path}wh_daysum.nc {tmpfile_path}wh_3daysum_tmp_1.nc', shell=True)
subprocess.run(f'cdo -s timselsum,3,2 {tmpfile_path}wh_daysum.nc {tmpfile_path}wh_3daysum_tmp_2.nc', shell=True)

subprocess.run(f'cdo -s -chname,WH_DAYSUM,WHS -mergetime {tmpfile_path}wh_3daysum_tmp_*.nc {tmpfile_path}wh_3daysum_tmp_merged.nc', shell=True)

# the last timesteps are most likely wrong, due to lack of "tomorrows" when performing timselsum
# To remove the last ones:
#
# The variable time_bnds (which has two dimensions: time and bnds. bnds is size 2) contains the 
# time (in seconds) between the first and last timesteps used for summing up. e.g like this:
# $ ncdump -v time_bnds tmp/wh_3daysum.nc
# time_bnds =
# 1696111200, 1696284000,
# 1696197600, 1696370400,
# 1696284000, 1696456800,
# 1696370400, 1696543200,
# 1696456800, 1696629600,
# 1696543200, 1696716000,
# 1696629600, 1696716000,
# 1696716000, 1696716000;
#}
# The difference [1] - [0] should be 172800 seconds = 48 hours
# Timesteps with [1] - [0] != 172800 should be discarded
# Using netCDF4 to accomplish this

wh_3daysum = nc.Dataset(f'{tmpfile_path}wh_3daysum_tmp_merged.nc', 'r')
time_bnds = wh_3daysum.variables["time_bnds"][:] 
# Assuming that wrong time bounds only exist at the end of the time series, this works
number_of_timesteps_to_remove = 0
for time_bnd in time_bnds:
    if time_bnd[1]-time_bnd[0] != 172800.0:
        number_of_timesteps_to_remove = number_of_timesteps_to_remove + 1
wh_3daysum.close()
number_of_timesteps_to_keep = len(time_bnds) - number_of_timesteps_to_remove
subprocess.run(f'cdo -s -seltimestep,1/{number_of_timesteps_to_keep} {tmpfile_path}wh_3daysum_tmp_merged.nc {tmpfile_path}wh_3daysum.nc', shell=True)


# Classifying warning status for the WHS model
# 0  == WHS            --> Grey
# 0  <  WHS <  20      --> Yellow
# 20 <= WHS < 40       --> Orange
# 40 <= WHS            --> Red
subprocess.run(f'cdo -s -aexpr,"WARNING_STATUS = WHS <= 0 ? 0 : -1; WARNING_STATUS = WHS < 20 && WARNING_STATUS == -1 ? 2 : WARNING_STATUS; WARNING_STATUS = WHS < 40 && WARNING_STATUS == -1 ? 3 : WARNING_STATUS; WARNING_STATUS = WHS >= 40 && WARNING_STATUS == -1 ? 4 : WARNING_STATUS" {tmpfile_path}wh_3daysum.nc {tmpfile_path}result_unmasked.nc', shell=True)

# Mask results using a CSV file with polygons
# Env variable MASK_FILE must be set
if os.getenv("MASK_FILE") is not None:
    mask_file = os.getenv("MASK_FILE")
    print(f"Applying mask file {mask_file} to result.nc")
    subprocess.run(f'cdo -maskregion,{mask_file} {tmpfile_path}result_unmasked.nc {tmpfile_path}result.nc', shell=True)
else:
    os.rename(f"{tmpfile_path}result_unmasked.nc", f"{tmpfile_path}result.nc")
#"""

# Split the combined file into daily .nc files again, with YYYY-MM-DD in the filename. Convert to corresponding GeoTIFF files
# Variables that needs discrete classification, must be integers in order for mapserver to work properly (Don't ask!)
# Since we need WARNING_STATUS to discretely classified, we need to create a separate GeoTIFF file for it
wh_3daysum = nc.Dataset(f'{tmpfile_path}wh_3daysum.nc', 'r')
timesteps = wh_3daysum.variables["time"][:]
timestep_index = 1
timestep_dates = [] # Used in the mapfile template
# TODO: We are (?) assuming all timesteps are equal. This might NOT be the case if there are holes in the result data
for timestep in timesteps:
    timestep_date = datetime.fromtimestamp(timestep).astimezone(local_timezone)
    file_date = timestep_date.astimezone(local_timezone).strftime("%Y-%m-%d")
    timestep_dates.append(file_date)
    # If start_date is set, that means that we should only generate result files from that date on 
    # -- IF GeoTIFF fiels already exists before that.
    if start_date is not None and timestep_date < start_date:
         if os.path.isfile(f'{outfile_path}result_WARNING_STATUS_{file_date}.tif') and os.path.isfile(f'{outfile_path}result_{file_date}.tif'):
            continue


    # Create NetCDF result file
    subprocess.run(f'cdo -s -seltimestep,{timestep_index}/{timestep_index} {tmpfile_path}result.nc {tmpfile_path}result_{file_date}.nc', shell=True)
    # Convert to GeoTIFF
    # We only need WHS and WARNING_STATUS
    # Merge the WARNING_STATUS and WHS variables into one GeoTIFF file with two bands. 
    # The WARNING_STATUS should always be band #1
    # We must delete the GeoTIFF file before merging
    subprocess.run(f'rm {outfile_path}result_*{file_date}*.tif', shell=True)
    with open("/dev/null", "w") as devnull:
        subprocess.run(f'gdal_translate -ot Int16 -of GTiff  NETCDF:"{tmpfile_path}result_{file_date}.nc":WARNING_STATUS {tmpfile_path}result_WARNING_STATUS_{file_date}_lcc.tif', shell=True, stdout=devnull)
        subprocess.run(f'gdal_translate -ot Float32 -of GTiff  NETCDF:"{tmpfile_path}result_{file_date}.nc":WHS {tmpfile_path}result_{file_date}_lcc.tif', shell=True, stdout=devnull)
        # Need to reproject the files, to ensure we have the projection given in the generted mapfile. We always use EPSG:4326 for this
        subprocess.run(f'gdalwarp -t_srs EPSG:4326 {tmpfile_path}result_WARNING_STATUS_{file_date}_lcc.tif {outfile_path}result_WARNING_STATUS_{file_date}.tif', shell=True, stdout=devnull)
        subprocess.run(f'gdalwarp -t_srs EPSG:4326 {tmpfile_path}result_{file_date}_lcc.tif {outfile_path}result_{file_date}.tif', shell=True, stdout=devnull)

    timestep_index = timestep_index + 1

# Generate mapfile
# The paths should be set in a .env file
env = Environment(loader=FileSystemLoader('.'))
template = env.get_template("mapfile/template.j2")
output = template.render({
     "timestep_dates": timestep_dates,
     "mapserver_data_dir": os.getenv("MAPSERVER_DATA_DIR"),
     "mapserver_mapfile_dir": os.getenv("MAPSERVER_MAPFILE_DIR"),
     "mapserver_log_file": os.getenv("MAPSERVER_LOG_FILE"),
     "mapserver_image_path": os.getenv("MAPSERVER_IMAGE_PATH"),
     "mapserver_extent": os.getenv("MAPSERVER_EXTENT")
})
mapfile_outdir = os.getenv("MAPFILE_DIR")
with open(f"{mapfile_outdir}/SEPTREFHUM.map", 'w') as f:
        f.write(output)


# Remove all temporary/intermediary files
subprocess.run(f"rm {tmpfile_path}wh_3daysum*.nc", shell=True)
subprocess.run(f"rm {tmpfile_path}result*.nc", shell=True)
subprocess.run(f"rm {tmpfile_path}result*.tif", shell=True)