Skip to content

Instantly share code, notes, and snippets.

@bennyistanto
Created June 8, 2024 07:04
Show Gist options
  • Save bennyistanto/b8dfbd0d0c700d22c82f48ec5b334006 to your computer and use it in GitHub Desktop.
Save bennyistanto/b8dfbd0d0c700d22c82f48ec5b334006 to your computer and use it in GitHub Desktop.
Calculates the thresholds for extreme rainfall events using both percentile-based and Generalized Extreme Value (GEV) methods
import xarray as xr
import numpy as np
import os
import glob
import pandas as pd
from tqdm import tqdm
from scipy.stats import genextreme
# Define directories and filenames for threshold calculations
threshold_calculations = [
{"days": 1, "input_dir": "/mnt/d/temp/imerg/data/fddmaxm_1day", "output_dir": "/mnt/d/temp/imerg/data/threshold_fddmaxm_1day"},
{"days": 2, "input_dir": "/mnt/d/temp/imerg/data/fddmaxm_2day", "output_dir": "/mnt/d/temp/imerg/data/threshold_fddmaxm_2day"},
{"days": 3, "input_dir": "/mnt/d/temp/imerg/data/fddmaxm_3day", "output_dir": "/mnt/d/temp/imerg/data/threshold_fddmaxm_3day"},
{"days": 4, "input_dir": "/mnt/d/temp/imerg/data/fddmaxm_4day", "output_dir": "/mnt/d/temp/imerg/data/threshold_fddmaxm_4day"},
{"days": 5, "input_dir": "/mnt/d/temp/imerg/data/fddmaxm_5day", "output_dir": "/mnt/d/temp/imerg/data/threshold_fddmaxm_5day"},
]
# Threshold definitions
threshold_definitions = [
{"name": "moderate", "percentile": 50, "years": 2, "output_suffix": "02yr"},
{"name": "heavy", "percentile": 80, "years": 5, "output_suffix": "05yr"},
{"name": "intense", "percentile": None, "years": 10, "output_suffix": "10yr"},
{"name": "extreme", "percentile": None, "years": 25, "output_suffix": "25yr"},
]
# Global variable to store user's choice
user_choice = None
def set_user_decision():
"""
Prompt user for decision on existing files and store it globally.
This function prompts the user to decide whether to replace, skip, or abort
if an output file already exists. The decision is stored in a global variable
to be used throughout the script.
"""
global user_choice
if user_choice is None:
decision = input("An output file already exists. Choose an action - Replace (R), Skip (S), Abort (A): ").upper()
while decision not in ['R', 'S', 'A']:
print("Invalid choice. Please choose again.")
decision = input("Choose an action - Replace (R), Skip (S), Abort (A): ").upper()
user_choice = decision
def calculate_percentile_threshold(data, percentile):
"""
Calculate percentile threshold for given data.
Parameters:
data (numpy.ndarray): The input precipitation data.
percentile (int): The percentile to calculate (e.g., 50 for P50).
Returns:
numpy.ndarray: The calculated percentile threshold.
"""
return np.nanpercentile(data, percentile, axis=0)
def calculate_gev_threshold(data, return_period):
"""
Calculate GEV threshold using Maximum Likelihood Estimation (MLE) for fitting the GEV distribution.
Parameters:
data (numpy.ndarray): The input precipitation data.
return_period (int): The return period for the threshold (e.g., 10 years).
Returns:
float: The calculated GEV threshold.
"""
shape, loc, scale = genextreme.fit(data) # MLE is used by default
return genextreme.ppf(1 - 1 / return_period, shape, loc=loc, scale=scale)
# Process each accumulation period
for calc in threshold_calculations:
days = calc["days"]
input_dir = calc["input_dir"]
output_dir = calc["output_dir"]
# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)
# Create a list of files
file_list = sorted(glob.glob(os.path.join(input_dir, f"wld_cli_imerg_fddmax_{days}day_*.nc4")))
# Group files by month
files_by_month = {}
for file_path in file_list:
date_str = file_path.split('_')[-1].split('.')[0]
month = date_str[4:6]
if month not in files_by_month:
files_by_month[month] = []
files_by_month[month].append(file_path)
# Process each month
with tqdm(total=len(files_by_month), desc=f"Processing {days}-day data by month") as month_pbar:
for month, month_files in files_by_month.items():
with tqdm(total=len(threshold_definitions), desc=f"Calculating thresholds for month {month}", leave=False) as threshold_pbar:
for threshold_def in threshold_definitions:
name = threshold_def["name"]
percentile = threshold_def["percentile"]
years = threshold_def["years"]
output_suffix = threshold_def["output_suffix"]
output_filename = f"wld_cli_imerg_{days}day_extreme_threshold_{output_suffix}_{month}.nc4"
output_filepath = os.path.join(output_dir, output_filename)
if os.path.exists(output_filepath):
if user_choice is None:
set_user_decision()
if user_choice == 'S':
print(f"Skipping existing file: {output_filepath}")
continue # Skip to the next file
elif user_choice == 'A':
print("Aborting process.")
exit(0) # Exit the script
elif user_choice == 'R':
pass # Replace the file
try:
# Load data for the current month
datasets = [xr.open_dataset(f) for f in month_files]
combined_ds = xr.concat(datasets, dim='time')
data = combined_ds['precipitation'].values
# Calculate threshold
if percentile is not None:
threshold = calculate_percentile_threshold(data, percentile)
else:
threshold = calculate_gev_threshold(data.flatten(), years)
# Create an xarray DataArray for the threshold
threshold_da = xr.DataArray(threshold, dims=["lat", "lon"], coords={"lat": combined_ds["lat"], "lon": combined_ds["lon"]})
threshold_ds = threshold_da.to_dataset(name="threshold")
threshold_ds.attrs['Conventions'] = 'CF-1.8'
# Ensure CF conventions and compression
encoding = {var: {'zlib': True, 'complevel': 5} for var in threshold_ds.data_vars}
threshold_ds.to_netcdf(output_filepath, encoding=encoding)
print(f"Saved: {output_filepath}")
except Exception as e:
print(f"Error processing month {month} for {name} threshold: {e}")
finally:
for ds in datasets:
ds.close()
threshold_pbar.update(1)
month_pbar.update(1)
print("All thresholds calculated and saved.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment