|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
from scipy.stats import iqr |
|
|
|
# Set the figure DPI to 300 |
|
plt.rcParams['figure.dpi'] = 300 |
|
|
|
# Seed for reproducibility |
|
np.random.seed(42) |
|
|
|
# Define the provided data with significant differences between months |
|
monthly_max = [ |
|
20, 27, 30, 25, 20, 5, 5, 12, 30, 15, 25, 16, # Current data list is inhomogeneous data |
|
35, 30, 45, 50, 55, 60, 55, 50, 10, 20, 25, 50 # <== Change 50 to 10 to get homogeneous |
|
] |
|
|
|
# Detect and handle outliers using IQR |
|
def handle_outliers(data): |
|
q1, q3 = np.percentile(data, [25, 75]) |
|
iqr_value = q3 - q1 |
|
lower_bound = q1 - 3 * iqr_value |
|
upper_bound = q3 + 3 * iqr_value |
|
return np.clip(data, lower_bound, upper_bound) |
|
|
|
adjusted_max = handle_outliers(monthly_max) |
|
|
|
# Homogeneity testing using Pettitt test |
|
def pettitt_test(data): |
|
n = len(data) |
|
k = np.zeros(n) |
|
for t in range(1, n): |
|
for i in range(t): |
|
for j in range(t, n): |
|
k[t] += np.sign(data[j] - data[i]) |
|
U = np.max(np.abs(k)) |
|
p_value = 2 * np.exp((-6 * U**2) / (n**3 + n**2)) |
|
return p_value > 0.05 # True if data is homogeneous |
|
|
|
is_homogeneous = pettitt_test(adjusted_max) |
|
|
|
# Adjust inhomogeneities if data is not homogeneous |
|
if not is_homogeneous: |
|
mean_value = np.mean(adjusted_max) |
|
adjusted_homogeneous_max = [mean_value if x > mean_value else x for x in adjusted_max] |
|
else: |
|
adjusted_homogeneous_max = adjusted_max |
|
|
|
# Function to generate precipitation data with more zero values and realistic gaps |
|
def generate_precipitation(max_value, days): |
|
precip = np.zeros(days) |
|
day = 0 |
|
zero_day_gaps = [3, 4, 5, 8, 2, 4, 7, 1, 4, 6] |
|
while day < days: |
|
gap_length = np.random.choice(zero_day_gaps) |
|
if day + gap_length >= days: |
|
gap_length = days - day |
|
day += gap_length |
|
if day >= days: |
|
break |
|
non_zero_length = np.random.randint(1, 5) |
|
if day + non_zero_length >= days: |
|
non_zero_length = days - day |
|
precip[day:day + non_zero_length] = np.random.gamma(2, max_value / 10, size=non_zero_length) |
|
day += non_zero_length |
|
return precip |
|
|
|
# Generate daily precipitation data based on monthly maxima |
|
days_in_month = [30] * len(monthly_max) |
|
data = [generate_precipitation(max_val, days) for max_val, days in zip(monthly_max, days_in_month)] |
|
|
|
# Concatenate daily data for plotting |
|
daily_precip = np.concatenate(data) |
|
days = np.arange(len(daily_precip)) |
|
|
|
# Plot the results |
|
x = np.arange(len(monthly_max)) |
|
|
|
# Plotting in XKCD style |
|
plt.xkcd() |
|
plt.figure(figsize=(15, 10)) |
|
|
|
# Panel 1: Original Daily Precipitation Data |
|
plt.subplot(231) |
|
plt.plot(days, daily_precip, color='skyblue') |
|
plt.title('Original Daily Precipitation Data') |
|
plt.xlabel('Day') |
|
plt.ylabel('Precipitation (mm)') |
|
|
|
# Panel 2: Monthly Maxima |
|
plt.subplot(232) |
|
bars = plt.bar(x, monthly_max, color='blue') |
|
plt.title('Monthly Maxima') |
|
plt.xlabel('Month') |
|
plt.ylabel('Precipitation (mm)') |
|
for bar, value in zip(bars, monthly_max): |
|
height = bar.get_height() |
|
plt.text(bar.get_x() + bar.get_width() / 2, height, f'{value:.1f}', ha='center', va='bottom', fontsize=10, color='black') |
|
|
|
# Panel 3: Homogeneity Test Result |
|
plt.subplot(233) |
|
bars = plt.bar(x, adjusted_max, color='green') |
|
plt.title('Homogeneity Test') |
|
plt.xlabel('Month') |
|
plt.ylabel('Precipitation (mm)') |
|
plt.axhline(np.mean(adjusted_max), color='red', linestyle='--', label='Mean') |
|
if not is_homogeneous: |
|
plt.title('Inhomogeneous Data Detected') |
|
else: |
|
plt.title('Data is Homogeneous') |
|
plt.legend() |
|
for bar, value in zip(bars, adjusted_max): |
|
height = bar.get_height() |
|
plt.text(bar.get_x() + bar.get_width() / 2, height, f'{value:.1f}', ha='center', va='bottom', fontsize=10, color='black') |
|
# Add text to indicate 'After Outlier Handling' |
|
plt.text(0.05, 0.75, 'After\nOutlier Handling', transform=plt.gca().transAxes, fontsize=14, ha='left', va='bottom') |
|
|
|
# Panel 4: Adjusted for Homogeneity |
|
plt.subplot(234) |
|
bars = plt.bar(x, adjusted_homogeneous_max, color='orange') |
|
plt.title('After Homogeneity Adjustment') |
|
plt.xlabel('Month') |
|
plt.ylabel('Precipitation (mm)') |
|
plt.axhline(np.mean(adjusted_homogeneous_max), color='red', linestyle='--', label='Mean') |
|
plt.legend() |
|
for bar, value in zip(bars, adjusted_homogeneous_max): |
|
height = bar.get_height() |
|
plt.text(bar.get_x() + bar.get_width() / 2, height, f'{value:.1f}', ha='center', va='bottom', fontsize=10, color='black') |
|
|
|
# Panel 5: Histogram and Normality Plot |
|
plt.subplot(235) |
|
plt.hist(adjusted_homogeneous_max, bins=10, color='purple', alpha=0.7, label='Histogram') |
|
plt.title('Histogram and Normality Plot') |
|
plt.xlabel('Precipitation (mm)') |
|
plt.ylabel('Frequency') |
|
plt.legend() |
|
|
|
# Panel 6: Final Adjusted Data |
|
plt.subplot(236) |
|
bars = plt.bar(x, adjusted_homogeneous_max, color='red') |
|
plt.title('Final Adjusted Data') |
|
plt.xlabel('Month') |
|
plt.ylabel('Precipitation (mm)') |
|
for bar, value in zip(bars, adjusted_homogeneous_max): |
|
height = bar.get_height() |
|
plt.text(bar.get_x() + bar.get_width() / 2, height, f'{value:.1f}', ha='center', va='bottom', fontsize=10, color='black') |
|
|
|
plt.tight_layout() |
|
plt.show() |