Skip to content

Instantly share code, notes, and snippets.

@dneuman
Created June 28, 2019 01:45
Show Gist options
  • Save dneuman/d92035a63d404f0b9a4ce69552b49a6f to your computer and use it in GitHub Desktop.
Save dneuman/d92035a63d404f0b9a4ce69552b49a6f to your computer and use it in GitHub Desktop.
Download and process NOAA 5-min data
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jun 25 18:45:37 2019
@author: dan
https://www.ncdc.noaa.gov/crn/qcdatasets.html
Change `filebase` for your local storage location
Run setup() to download data and do all the necessary processing. About 4 GB of data.
Run plot() to plot comparison with 5-min mean. Options are 'mid', 'time',
and 'week'
"""
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import glob
plt.style.use('weather')
filebase = '/Users/dan/Documents/Python/tmp_compare/data/'
# list of non-CONUS stations and stations with less 2006 data
exclude = ['AK_Fairbanks_11_NE', 'AK_Sitka_1_NE', 'AK_St._Paul_4_NE',
'AK_Utqiagvik_formerly_Barrow_4_ENE', 'HI_Hilo_5_S',
'HI_Mauna_Loa_5_NNE', 'AR_Batesville_8_WNW',
'TX_Bronte_11_NNE', 'SD_Pierre_24_S',
'ND_Northgate_5_ESE', 'OR_Corvallis_10_SSW', 'WA_Quinault_4_NE',
'AL_Russellville_4_SSE', 'AL_Scottsboro_2_NE','AL_Fairhope_3_NE',
'PA_Avondale_2_N', 'AL_Cullman_3_ENE',
'AL_Courtland_2_WSW', 'AL_Valley_Head_1_SSW']
def get_names():
names = pd.read_csv(filebase+'names.txt', sep='\t', index_col=0)
return names
def save_names(names):
names.to_csv(filebase+'names.txt', sep='\t')
def get_data():
'''Run once to download all the data. 'names.txt' starts as a simple
list of stations to download.
'''
names = get_names()
base = 'https://www1.ncdc.noaa.gov/pub/data/uscrn/products/subhourly01/'
pre = '/CRNS0101-05-'
sfx = '.txt'
for i in names.index:
name = names.loc[i] # view into names
print(name.name+' start')
# check for file
files = glob.glob(filebase+f'{name.name}-*.txt')
# check if files exist but need update
if (len(files) > 0) and (np.isnan(name.Lat)):
url = base + '2006' + pre + '2006-' + name.name + sfx
df = pd.read_csv(url,
sep='\s+',
header=None,
index_col=False,
usecols=[0, 6, 7],
names=['WBANNO', 'Long', 'Lat'],
nrows=2,
)
name.WBANNO = df.WBANNO[0]
name.Long = df.Long[0]
name.Lat = df.Lat[0]
save_names(names)
continue
for year in range(2006, 2019):
print(year)
furl = filebase+f'{name.name}-{year}.txt'
if furl in files: continue
url = base+f'{year}'+pre+f'{year}-'+name.name+sfx
try:
df = pd.read_csv(url,
sep='\s+',
header=None,
index_col=False,
usecols=[0, 1, 2, 6, 7, 8],
names=['WBANNO', 'date', 'time',
'Long', 'Lat', 't'],
parse_dates={'dt': [1, 2]},
na_values=-9999)
except:
print(f'{year} not found')
continue
if np.isnan(name.Lat):
name.WBANNO = df.WBANNO[0]
name.Long = df.Long[0]
name.Lat = df.Lat[0]
save_names(names)
df.to_csv(filebase+f'{name.name}-{year}.txt', sep='\t',
index=False, columns=['dt', 't'])
print(name.name+' end')
def list_data(names):
for i in names.index:
n = names.loc[i]
files = glob.glob(filebase+f'{n.name}-*.txt')
number = len(files)
if number < 13:
print(f'{n.name} {number}')
def read_data(name):
files = glob.glob(filebase+f'{name}-*.txt')
files.sort()
df = pd.concat([pd.read_csv(f,
sep='\t', index_col=0, parse_dates=True,
) for f in files])
df.interpolate(method='time', inplace=True)
df = df.loc[df.index.year < 2019]
return df
def process():
''' Run once to process data for faster plotting
'''
names = get_names()
for name in names.index:
print(name)
mf = read_data(name)
mf['y'] = mf.index.year
mf['d'] = mf.index.dayofyear
mgb = mf.groupby(['y', 'd'])
df = mgb.mean()
df['mid'] = (mgb.max() + mgb.min())/2.
df['diff'] = df.mid - df.t
# calculate 9am local time
l = names.Long[name]
hh = (9. - l * 24./360.) % 24
h = np.floor(hh)
mm = (hh - h) * 60
m = (np.floor(mm/5 + .5) * 5)
sf = mf.loc[mf.index.hour == h, ['y', 't']]
sf = sf.loc[sf.index.minute == m]
wf = sf.loc[sf.index.dayofweek == 1] # weekly sample
yf = df.groupby('y').mean()
yf['time'] = sf.groupby('y').mean()
yf['week'] = wf.groupby('y').mean()
yf.to_csv(filebase+f'{name}_annual.txt')
def read_annual(name):
yf = pd.read_csv(filebase+f'{name}_annual.txt', index_col=0)
return yf
def set_weights():
''' Run once to calculate weights for each station
'''
names = get_names()
for i in names.index:
n = names.loc[i]
y1 = n.Lat
x1 = n.Long*np.cos(y1 * np.pi/180.)
weights = []
for j in names.index:
nn = names.loc[j]
y2 = nn.Lat
x2 = nn.Long*np.cos(y1 * np.pi/180.)
weights.append((x1-x2)**2 + (y1-y2)**2)
weights.sort()
n.Weight = weights[1] # 1st is always 0
if n.Weight < .05:
n.Weight = weights[2]/2.
save_names(names)
def setup():
get_data()
set_weights()
process()
def plot(sample='mid'):
names = get_names()
# exclude stations with incomplete data in 2006, plus non-CONUS
names.drop(labels=exclude, inplace=True)
fig = plt.figure(f'Average-{sample}', clear=True)
ax1 = fig.add_subplot(211)
ax2 = fig.add_subplot(212)
af = pd.DataFrame(index=list(range(2006,2019)))
df = af.copy()
wtscale = 0
# plot stations, collect weights for scaling later
for name in names.index:
wtscale += names.Weight[name]
yf = read_annual(name)
af[name] = yf.t - yf.t[2006]
df[name] = yf[sample] - yf[sample][2006]
ax1.plot(af[name], color='k', alpha=0.1, lw=1.0)
ax2.plot(df[name], color='k', alpha=0.1, lw=1.0)
# Weight each station
for name in af.columns:
af[name] *= names.Weight[name]/wtscale
df[name] *= names.Weight[name]/wtscale
# Calculate means of weighted stations (by adding)
for fr, ax in zip([af, df], [ax1, ax2]):
fr['Avg'] = fr.sum(axis=1)
ax.plot(fr.Avg, color='r',lw=3.0)
x = fr.index.values
slope, intercept = np.polyfit(x, fr.Avg.values, 1)
y = slope * x + intercept
ax.plot(x, y, color='b', lw=2.0)
decade = np.around(slope*10, decimals=3)
val = str(decade)
text = f'Trend: {val}°C/decade'
ax.annotate(text, (x[7], y[7]),
xytext=(-20,20), textcoords='offset points')
ax1.set_title('Temperature Trend in CONUS Using Different Techniques')
ax1.set_ylabel('Anomoly in °C')
ax2.set_ylabel('Anomoly in °C')
ax1.annotate('Daily Temperature from Mean of 5-min Values',
(.01, .9), xycoords='axes fraction')
tdict = {'time': 'Daily local 9 am Temperature',
'week': 'Weekly Sunday 9 am Temperature',
'mid': 'Daily Temperature from Average of Hi/Lo Values'}
ax2.annotate(tdict[sample], (.01, .9), xycoords='axes fraction')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment