Skip to content

Commit

Permalink
Assuring RDRS ends properly (#47)
Browse files Browse the repository at this point in the history
In this commit, two concerns are addressed: 1) for small domains some
times, the process ends without actually doing the job and 2) using ncks
and index-based hyperslab to extract a spatial subset.

For the former concern, a `until ... do` loop is used to assure each
process on each file is finished properly. Furthermore, proper error
messages are thrown at the user. For a simply case of `Bow River at
Banff`, where the final NetCDF files are in the magnitute of a few
kilobytes, the script was not behaving properly before. This behaviour
is as expected with this commit.

For the latter concern, the `ncks` is used to avoid using `cdo`. `ncks`
proves to be more stable and reliable.

Signed-off-by: Kasra Keshavarz <kasra.keshavarz1@ucalgary.ca>
  • Loading branch information
kasra-keshavarz authored Mar 17, 2024
1 parent a37e34d commit 479300d
Showing 1 changed file with 123 additions and 24 deletions.
147 changes: 123 additions & 24 deletions scripts/eccc-rdrs/rdrs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,14 @@ do
esac
done

# useful log date format function
logDate () { echo "($(date +"%Y-%m-%d %H:%M:%S")) "; }

# check if $ensemble is provided
if [[ -n "$ensemble" ]] || \
[[ -n "$scenario" ]] || \
[[ -n "$model" ]]; then
echo "ERROR $(basename $0): redundant argument provided";
echo "$(logDate)$(basename $0): ERROR! redundant argument provided";
exit 1;
fi

Expand All @@ -108,6 +111,13 @@ alias date='TZ=UTC date'
# expand aliases for the one stated above
shopt -s expand_aliases

# paths
datatoolPath="$(dirname $0)/../../" # datatool's path
# daymet index scripts works on RDRSv2.1 grids as well
# and ESPO-G6-R2 has similar grid system as RDRSv2.1
coordIdxScript="$datatoolPath/assets/ncl_scripts/coord_daymet_idx.ncl"
coordClosestIdxScript="$datatoolPath/assets/ncl_scripts/coord_closest_daymet_idx.ncl"


# ==========================
# Necessary global variables
Expand All @@ -117,19 +127,37 @@ rdrsFormat="%Y%m%d" # rdrs file date format
exportFormat="%Y%m%d" # exported file date format
fileStruct="" # source dataset files' prefix constant

latVar="rlat"
lonVar="rlon"
latDim="rlat"
lonDim="rlon"


# ===================
# Necessary functions
# ===================
# Modules below available on Compute Canada (CC) Graham Cluster Server
# Modules below available on Digital Research Alliance of Canada's Graham HPC
## core modules
function load_core_modules () {
module -q load StdEnv/2020
module -q load gcc/9.3.0
module -q load cdo/2.0.4
module -q load nco/5.0.6
}
function unload_core_modules () {
# WARNING: DO NOT USE IF YOU ARE NOT SURE HOW TO URE IT
module -q unload cdo/2.0.4
module -q unload nco/5.0.6
}
## ncl modules
function load_ncl_module () {
module -q load StdEnv/2020
module -q load gcc/9.3.0
module -q load ncl/6.6.2
}
function unload_ncl_module () {
module -q unload ncl/6.6.2
}

# loading core modules for the script
load_core_modules


Expand All @@ -156,14 +184,57 @@ lims_to_float () { IFS=',' read -ra l <<< $@; f_arr=(); for i in "${l[@]}"; do f
# Data processing
# ===============
# display info
echo "$(basename $0): processing ECCC RDRSv2.1..."
echo "$(logDate)$(basename $0): processing ECCC RDRSv2.1..."

# make the output directory
echo "$(basename $0): creating output directory under $outputDir"
echo "$(logDate)$(basename $0): creating output directory under $outputDir"
mkdir -p "$outputDir"
echo "$(basename $0): creating cache directory under $cache"
echo "$(logDate)$(basename $0): creating cache directory under $cache"
mkdir -p "$cache"


# ======================
# Extract domain extents
# ======================

# parse the upper and lower bounds of a given spatial limit
minLat=$(echo $latLims | cut -d ',' -f 1)
maxLat=$(echo $latLims | cut -d ',' -f 2)
minLon=$(echo $lonLims | cut -d ',' -f 1)
maxLon=$(echo $lonLims | cut -d ',' -f 2)

# unload and load necessary modules
unload_core_modules
load_ncl_module
# choose a sample file as all files share the same grid
domainFile="$(find "${datasetDir}/" -type f -name "*.nc" | head -n 1)"
# parse the upper and lower bounds of a given spatial limit
minLat=$(echo $latLims | cut -d ',' -f 1)
maxLat=$(echo $latLims | cut -d ',' -f 2)
minLon=$(echo $lonLims | cut -d ',' -f 1)
maxLon=$(echo $lonLims | cut -d ',' -f 2)

# extract the associated indices corresponding to $latLims and $lonLims
coordIdx="$(ncl -nQ 'coord_file='\"$domainFile\" 'minlat='"$minLat" 'maxlat='"$maxLat" 'minlon='"$minLon" 'maxlon='"$maxLon" "$coordIdxScript")"

# if spatial index out-of-bound, i.e., 'ERROR' is return
if [[ "${coordIdx}" == "ERROR" ]]; then
# extract the closest index values
coordIdx="$(ncl -nQ 'coord_file='\"$domainFile\" 'minlat='"$minLat" 'maxlat='"$maxLat" 'minlon='"$minLon" 'maxlon='"$maxLon" "$coordClosestIdxScript")"
fi

# parse the output index for latitude and longitude
lonLimsIdx+="$(echo $coordIdx | cut -d ' ' -f 1)"
latLimsIdx+="$(echo $coordIdx | cut -d ' ' -f 2)"

# reload necessary modules
unload_ncl_module
load_core_modules


# =====================
# Extract dataset files
# =====================
# define necessary dates
startYear=$(date --date="$startDate" +"%Y") # start year (first folder)
endYear=$(date --date="$endDate" +"%Y") # end year (last folder)
Expand All @@ -175,11 +246,12 @@ endDateUnix="$(unix_epoch "$endDate")"

for yr in $yearsRange; do
# creating yearly directory
mkdir -p "$outputDir/$yr" # making the output directory
mkdir -p "$cache/$yr" # making the cache directory
mkdir -p "$outputDir/$yr" # output directory
mkdir -p "$cache/$yr" # cache directory

# setting the end point, either the end of current year, or the $endDate
endOfCurrentYearUnix=$(date --date="$yr-01-01 +1year -1day" "+%s") # last time-step of the current year
# last time-step of the current year
endOfCurrentYearUnix=$(date --date="$yr-01-01 +1year -1day" "+%s")
if [[ $endOfCurrentYearUnix -le $endDateUnix ]]; then
endPointUnix=$endOfCurrentYearUnix
else
Expand All @@ -189,29 +261,56 @@ for yr in $yearsRange; do
# extract variables from the forcing data files
while [[ "$toDateUnix" -le "$endPointUnix" ]]; do
# date manipulations
toDateFormatted=$(date --date "$toDate" +"$rdrsFormat") # current timestamp formatted to conform to RDRS naming convention
# current timestamp formatted to conform to RDRS naming convention
toDateFormatted=$(date --date "$toDate" +"$rdrsFormat")

# creating file name
file="${toDateFormatted}12.nc" # current file name

# change lon values so the extents are from ~-180 to 0
ncap2 -s 'where(lon>0) lon=lon-360' "${datasetDir}/${yr}/${file}" "${cache}/${yr}/${file}"

# extracting variables from the files and spatial subsetting
cdo -z zip -s -L -sellonlatbox,"$lonLims","$latLims" \
-selvar,"$variables" \
"${cache}/${yr}/${file}" "${outputDir}/${yr}/${prefix}${file}"
# assuring the process finished using an `until` loop
until ncks -A -v ${variables} \
-d "$latDim","${latLimsIdx}" \
-d "$lonDim","${lonLimsIdx}" \
${datasetDir}/${yr}/${file} \
${cache}/${yr}/${file}; do
echo "$(logDate)$(basename $0): Process killed: restarting process in 10 sec" >&2
echo "NCKS [...] failed" >&2
sleep 10;
done # until ncks

# remove any left-over .tmp file
if [[ -e ${cache}/${yr}/${file}*.tmp ]]; then
rm -r "${cache}/${yr}/${file}*.tmp"
fi

# wait for any left-over processes to finish
wait

[ $( jobs | wc -l ) -ge $( nproc ) ] && wait # forking shell processes
# change lon values so the extents are from ~-180 to 0
# assuring the process finished using an `until` loop
until ncap2 -O -s 'where(lon>0) lon=lon-360' \
"${cache}/${yr}/${file}" \
"${outputDir}/${yr}/${prefix}${file}"; do
rm "${outputDir}/${yr}/${prefix}${file}"
echo "$(logDate)$(basename $0): Process killed: restarting process in 10 sec" >&2
echo "$(logDate)$(basename $0): NCAP2 -s [...] failed" >&2
sleep 10;
done

# remove any left-over .tmp file
if [[ -e ${cache}/${yr}/${file}*.tmp ]]; then
rm -r "${cache}/${yr}/${file}*.tmp"
fi

# wait for any left-over processes to finish
wait

# increment time-step by one unit
toDate="$(date --date "$toDate 1day")" # current time-step
toDateUnix="$(unix_epoch "$toDate")" # current timestamp in unix EPOCH time
done

# wait to make sure the while loop is finished
wait

# go to the next year if necessary
if [[ "$toDateUnix" == "$endOfCurrentYearUnix" ]]; then
toDate=$(date --date "$toDate 1day")
Expand All @@ -220,9 +319,9 @@ for yr in $yearsRange; do
done

mkdir "$HOME/empty_dir"
echo "$(basename $0): deleting temporary files from $cache"
echo "$(logDate)$(basename $0): deleting temporary files from $cache"
rsync -aP --delete "$HOME/empty_dir/" "$cache"
rm -r "$cache"
echo "$(basename $0): temporary files from $cache are removed"
echo "$(basename $0): results are produced under $outputDir"
echo "$(logDate)$(basename $0): temporary files from $cache are removed"
echo "$(logDate)$(basename $0): results are produced under $outputDir"

0 comments on commit 479300d

Please sign in to comment.