diff --git a/.gitignore b/.gitignore index dfea6207d201eeb1461a08877d209b51f94ffb6b..60e0b3efb905e9e28235bb57fb8069e1111fb052 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ -*.nc \ No newline at end of file +*.nc +*.tif +*.tif.aux.xml \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1eb6514a52d667f04d8e7f3a448a428c19638ce7 --- /dev/null +++ b/README.md @@ -0,0 +1,57 @@ +# Septoria reference humidity model - spatial version +The reference humidity model was developed as a supplement to +the Humidity model. In this model 20 consecutive hours are +required to fulfil a risk period. One constraint in this +method is that you can have 19 consecutive risk hours or, +fx 14 hours with risk then one hour below the Rh threshold +and then maybe 14 hours again with risk hours. In one of +these situations, the model will indicate a risk. In the +reference model the definition of Humid hours was introduced. +The Rh threshold was avoided as humid hours do not need to be +consecutive. The running sum of humid hours across three days +indicate that the Septoria risk is higher than if you have +three days with humid conditions than two or one. The operation +of the model should include weather forecast data and should +run 6 days ahead from current day if you include a 7-day weather +forecast (60 hours from national met office and until 7 days from ECMWF) + +The model was tested against the Humidity model in a Danish +Septoria project funded by the GUDP. The threshold of 40 was +defined as high risk as this coincided with periods when the +humidity model recommended to apply fungicide (if not already protected). +The humidity model includes a decision model about when to spray, +protection periods ect. The reference model was used to quality +control the recommendations in a way that, if the reference humidity +hours were higher than 40 (no thresholds) then the user should +check the raw data for calculation of the Humidity model (threshold, +20 consecutive hours). If 2-3 periods of 17, 18, or 19 consecutive +hours appear, then one can consider to protect the crop based on the +reference model alone. + +The Humidity model is considered as a DSS with several components. +The reference humidity model is considered as a weather based submodel +for the risk of Septoria, Easy to map and calculate based on weather data alone.< + +## Notes to self + + + +### Converting variables in NetCDF to GeoTIFF +Given the one-timestep weather datafile `analysis-2023-10-08_1.nc`, this command: + +```bash +gdal_translate -of GTiff -sds analysis-2023-10-08_1.nc test.tif +``` + +...produces 7 tiff files (test_1.tif ... test_7.tif) that each represent one variable from the NetCDF file. The first two are latitude and longitude, then the weather data variables. + +The files can be merged using gdal_merge.py, like this: + +```bash +gdal_merge.py -separate in/test_3.tif in/test_4.tif in/test_5.tif in/test_6.tif in/test_7.tif +``` + +**Could we live with x GeoTIFF files per timestamp, where x is the number of variables?** + + +### \ No newline at end of file diff --git a/SEPTREFHUM.py b/SEPTREFHUM.py index 3e3d01f19e7df262ca1d79c45042d3f1e6456ee7..0c6a49bbc0cc49353548375bb5a0dcce64dfe39d 100755 --- a/SEPTREFHUM.py +++ b/SEPTREFHUM.py @@ -1,50 +1,66 @@ #!/usr/bin/python3 +# Requirements: +# * CDO >= v 2.0 +# * GDAL >= v 3.4.3 built with Python support + + import subprocess,glob -from datetime import datetime, timedelta +from datetime import datetime, timezone +import pytz +import netCDF4 as nc + # TODO: Figure out where files will be collected from and stored to. infile_path = "in/" outfile_path = "out/" +tmpfile_path = "tmp/" + +# TODO: Make this truly independent of time zones/Norwegian conditions? +utc_offset = "+02:00" +local_timezone = pytz.timezone("Europe/Oslo") -# Remove all out files -subprocess.run("rm %s*.nc" % outfile_path, shell=True) + +#""" +# Empty the tmpfile folder +subprocess.run("rm %s*" % tmpfile_path, shell=True) weatherdata_files = glob.glob("%sanalysis-*.nc" % infile_path) # Iterate the set of hourly weather data files for file in weatherdata_files: file_date = file[file.index("analysis")+9:file.index("analysis")+19] - wh_sum_date = datetime.fromisoformat(file_date) - timedelta(hours=2) - print(wh_sum_date) + # Assuming we're in the summer + wh_sum_date = datetime.fromisoformat("%sT00:00:00%s" % (file_date, utc_offset)) + #print(wh_sum_date) #print('cdo -setdate,%s -settime,22:00:00 -chname,WH,WH_SUM -timsum -selname,WH -aexpr,"WH = RR > 0.2 || UM > 88.0 ? 1 : 0;" %s %swh_%s.nc' % (file_date, file, file_path, file_date)) # Produce daily files with WH_SUM, which is the number of "Wet hours" (WH) for a given day # WH is defined as RR > 0.2 || UM > 88.0 subprocess.run( - 'cdo -setdate,%s -settime,22:00:00 -chname,WH,WH_DAYSUM -timsum -selname,WH -aexpr,"WH = RR > 0.2 || UM > 88.0 ? 1 : 0;" %s %swh_%s.nc' % (wh_sum_date.strftime("%Y-%m-%d"), file, outfile_path, file_date), + 'cdo -setdate,%s -settime,%s:00:00 -chname,WH,WH_DAYSUM -timsum -selname,WH -aexpr,"WH = RR > 0.2 || UM > 88.0 ? 1 : 0;" %s %swh_%s.nc' % (wh_sum_date.astimezone(timezone.utc).strftime("%Y-%m-%d"), wh_sum_date.astimezone(timezone.utc).strftime("%H"), file, tmpfile_path, file_date), shell=True ) # Concatenate daily files > one file with daily values -subprocess.run('cdo -O mergetime %swh_*.nc %swh_daysum.nc' % (outfile_path, outfile_path), shell=True) +subprocess.run('cdo -O mergetime %swh_*.nc %swh_daysum.nc' % (tmpfile_path, tmpfile_path), shell=True) # Add sum of WH_DAYSUM[yesterday] + WH_DAYSUM[today] + WH_DAYSUM[tomorrow] into WHS[today] # timselsum skips every 3 steps when summing 3 timestemps, so we must # create three different files and then merge them -subprocess.run('cdo timselsum,3,0 out/wh_daysum.nc out/wh_3daysum_tmp_0.nc', shell=True) -subprocess.run('cdo timselsum,3,1 out/wh_daysum.nc out/wh_3daysum_tmp_1.nc', shell=True) -subprocess.run('cdo timselsum,3,2 out/wh_daysum.nc out/wh_3daysum_tmp_2.nc', shell=True) +subprocess.run('cdo timselsum,3,0 %swh_daysum.nc %swh_3daysum_tmp_0.nc' % (tmpfile_path, tmpfile_path), shell=True) +subprocess.run('cdo timselsum,3,1 %swh_daysum.nc %swh_3daysum_tmp_1.nc' % (tmpfile_path, tmpfile_path), shell=True) +subprocess.run('cdo timselsum,3,2 %swh_daysum.nc %swh_3daysum_tmp_2.nc' % (tmpfile_path, tmpfile_path), shell=True) -subprocess.run('cdo -chname,WH_DAYSUM,WHS -mergetime out/wh_3daysum_tmp_*.nc out/wh_3daysum_tmp_merged.nc', shell=True) +subprocess.run('cdo -chname,WH_DAYSUM,WHS -mergetime %swh_3daysum_tmp_*.nc %swh_3daysum_tmp_merged.nc' % (tmpfile_path, tmpfile_path), shell=True) # the last timesteps are most likely wrong, due to lack of "tomorrows" when performing timselsum # To remove the last ones: # # The variable time_bnds (which has two dimensions: time and bnds. bnds is size 2) contains the # time (in seconds) between the first and last timesteps used for summing up. e.g like this: -# $ ncdump -v time_bnds out/wh_3daysum.nc +# $ ncdump -v time_bnds tmp/wh_3daysum.nc # time_bnds = # 1696111200, 1696284000, # 1696197600, 1696370400, @@ -59,11 +75,7 @@ subprocess.run('cdo -chname,WH_DAYSUM,WHS -mergetime out/wh_3daysum_tmp_*.nc out # Timesteps with [1] - [0] != 172800 should be discarded # Using netCDF4 to accomplish this -import netCDF4 as nc -import numpy as np - - -wh_3daysum = nc.Dataset('out/wh_3daysum_tmp_merged.nc', 'r') +wh_3daysum = nc.Dataset('%swh_3daysum_tmp_merged.nc' % tmpfile_path, 'r') time_bnds = wh_3daysum.variables["time_bnds"][:] # Assuming that wrong time bounds only exist at the end of the time series, this works number_of_timesteps_to_remove = 0 @@ -72,10 +84,39 @@ for time_bnd in time_bnds: number_of_timesteps_to_remove = number_of_timesteps_to_remove + 1 wh_3daysum.close() number_of_timesteps_to_keep = len(time_bnds) - number_of_timesteps_to_remove -subprocess.run('cdo -seltimestep,1/%s out/wh_3daysum_tmp_merged.nc out/wh_3daysum.nc' % number_of_timesteps_to_keep, shell=True) +subprocess.run('cdo -seltimestep,1/%s %swh_3daysum_tmp_merged.nc %swh_3daysum.nc' % (number_of_timesteps_to_keep, tmpfile_path, tmpfile_path), shell=True) +#""" # Classifying warning status for the WHS model -# WHS == 0 --> Grey -# 0 < WHS < 20 --> Green +# 0 <= WHS < 20 --> Green # 20 <= WHS < 40 --> Orange # 40 <= WHS --> Red + +subprocess.run('cdo -aexpr,"WARNING_STATUS = WHS < 20 ? 2 : -1; WARNING_STATUS = WHS < 40 && WARNING_STATUS == -1 ? 3 : WARNING_STATUS; WARNING_STATUS = WHS >= 40 ? 4 : WARNING_STATUS" %swh_3daysum.nc %sresult.nc' % (tmpfile_path, tmpfile_path), shell=True) + + +# Split the combined file into daily .nc files again, with YYYY-MM-DD in the filename. Convert to corresponding GeoTIFF files +wh_3daysum = nc.Dataset('%swh_3daysum.nc' % tmpfile_path, 'r') +timesteps = wh_3daysum.variables["time"][:] +timestep_index = 1 +for timestep in timesteps: + timestep_date = datetime.fromtimestamp(timestep) + file_date = timestep_date.astimezone(local_timezone).strftime("%Y-%m-%d") + # Create NetCDF result file + subprocess.run('cdo -seltimestep,%s/%s %sresult.nc %sresult_%s.nc' % (timestep_index, timestep_index, tmpfile_path, tmpfile_path, file_date), shell=True) + # Split all variables into separate GeoTIFF files, using GDAL + subprocess.run("gdal_translate -of GTiff -sds %sresult_%s.nc %stmp.tif" % (tmpfile_path, file_date, tmpfile_path) , shell=True) + # The variables are: 1 = time_bnds, 2= lon, 3= lat, 4= WHS, 5 = WARNING_STATUS + # We only need WHS and WARNING_STATUS + # Merge the WARNING_STATUS and WHS GeoTIFF files into one file with two bands. The WARNING_STATUS should always be + # Band #1 + subprocess.run("gdal_merge.py -separate -o %sresult_%s.tif %stmp_5.tif %stmp_4.tif " % (outfile_path, file_date, tmpfile_path, tmpfile_path) , shell=True) + subprocess.run("rm %stmp_*.tif" % tmpfile_path, shell=True) + timestep_index = timestep_index + 1 + + +# Remove all temporary/intermediary files +subprocess.run("rm %s*" % tmpfile_path, shell=True) + + +