Created
January 21, 2015 03:27
-
-
Save samuelschumacher/86a5bc6e40e611007dec to your computer and use it in GitHub Desktop.
ControlFinder
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
GPS1448 | |
GPS1536 | |
GPS1563 | |
GPS1751 | |
GPS1846 | |
GPS1874 | |
GPS1875 | |
GPS1998 | |
GPS2213 | |
GPS2471 | |
GPS2474 | |
GPS2518 | |
GPS2626 | |
GPS2829 | |
GPS2870 | |
GPS3009 | |
L26028 | |
L26041 | |
L26191 | |
L26401 | |
L26624 | |
L26768 | |
L26874 | |
L26993 | |
L27016 | |
L27110 | |
L27162 | |
L27163 | |
L27186 | |
L27187 | |
L27253 | |
L27314 | |
L27316 | |
L27614 | |
L27615 | |
L27930 | |
L27931 | |
L28089 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This script automates the HMP download process. This script downloads all the HMP shapefiles according to a text file containing the relevant project IDs. The script then unzips the files and merges the shapefiles. | |
# The hmp_download.py script depends on a great many internal and external settings to run smoothly. | |
# First, the computer running it should have a Python environment as well as an editor that can display script content in a readable format, like IDLE or Notepad++. | |
# Second, the script must be able to reach our internal servers as well as the external websites (NOAA.gov). | |
# Third, the script must have valid packages to decompress, and following this decompression each of the resulting shapefiles must contain a valid geometry in order to be merged. | |
# What this means is that if an error is encountered: | |
# * Before anything even seems to happen, the computer may not have BeautifulSoup installed. | |
# Read the software requirements and try again. | |
# * During the download, look through the download directory to determine the final file downloaded. | |
# Attempt a manual download of this file. | |
# The site is given in the manual method, and will be listed in the python file in comments. | |
# If Python-savvy, you can then modify and rerun the script to resume downloading where you left off, otherwise continue manually. | |
# hmp_unzipmerge may be run separately from the download script after all files have been downloaded. | |
# * Following a successful download, check the integrity of the .zip files. | |
# If any may not be unzipped manually, this is what is causing the error. | |
# Attempt a re-download and, in worst cases, merge only functional files. | |
# * Following successful unzipping, one shapefile may contain a corrupt or empty geometry. | |
# Although qGIS can automatically merge only valid geometries, do examine the files individually in qGIS to determine the problem dataset so that it may be flagged and reported to Brenda for removal from the list. | |
# At this point, if problems are being encountered, a manual merge is necessary. | |
# In qGIS, select 'Merge shapefiles to one' under the 'Data Management' submenu in the 'Vector' menu and responding to the resulting prompt. | |
# * Following unzipping, but all shapefiles are valid, the computer performing the operations may not have the correct path specified to 'shapefile.py.' | |
# After ensuring that the correct version of the script is on the machine, move it to C:\PythonXX (where XX is the version, likely 27). | |
# Add this directory to the system path by visiting the 'System' control panel item, selecting 'Advanced system settings,' then clicking on 'Environment Variables...' under the 'Advanced' tab. | |
# Double-click on the 'Path' variable and without deleting anything, append a semicolon and the directory's path to the end of the variable definition. | |
import urllib2, urllib, zipfile, os, glob, shapefile, time | |
from bs4 import BeautifulSoup | |
from urlparse import urlsplit | |
from time import localtime, strftime, gmtime | |
# Define NOAA website url | |
noaaUrl = 'http://www.ngs.noaa.gov/cgi-bin/ds_proj_sf.prl' | |
# Define directories based on current month and year | |
dir = 'Z:/PROJECTS/controlfinder/NGS/' | |
year_dir = dir + 'NGS_' + strftime("%Y/", localtime()) | |
month_dir = year_dir + 'NGS_' + strftime("%b_%Y/", localtime()) | |
# Test if directory 'NGS_YEAR' exists, if not create it | |
if (not(os.path.isdir(year_dir))): | |
os.mkdir(year_dir) | |
# Test if month directory exists, if not create it | |
if (not(os.path.isdir(month_dir))): | |
os.mkdir(month_dir) | |
# Test if hmp zip and unzip directories exist, if not create them | |
if (not(os.path.isdir(month_dir + 'hmp_zip'))): | |
os.mkdir(month_dir + 'hmp_zip') | |
if (not(os.path.isdir(month_dir + 'hmp_unzip'))): | |
os.mkdir(month_dir + 'hmp_unzip') | |
# Create parameter dictionary to be encoded and sent to first page of NOAA url. | |
valueDict = { | |
'ProjBox': 'GPS1448', | |
'PREFIX': 'GPS1448', | |
'IncludeSelected': 'N', | |
'TypeSelected': 'MOD', | |
'StabilSelected': '0', | |
'MetaDataFormat': 'HTML', | |
'CompressType': 'Zipped'} | |
# Open text file with list of hmp project IDs. | |
projList = open('hmp_projIDs.txt').readlines() | |
# Print start time | |
print 'The START time is: ' + strftime("%A %c", localtime()) + '\n' | |
# Save the start time for calculation purpose | |
starttime = time.clock() | |
# Test with shortened project list | |
#projList = open('hmp_projIDs_short.txt').readlines() | |
# Create edited list to remove newline (\n) characters from each list item. Necessary to do so in this fashion because the last item does not have a trailing newline character. | |
projListEdit = [] | |
a = 0 | |
while a < len(projList)-1: | |
projListEdit.append(projList[a][:-1]) | |
a += 1 | |
projListEdit.append(projList[len(projList)-1]) | |
count = 0 | |
for each in projListEdit: | |
# Replace 'ProjBox' and 'PREFIX' value with new project ID and send post. Open html response and parse out station values using BeautifulSoup. Create list of station values. | |
projstarttime = time.clock() | |
valueDict['ProjBox'] = each | |
valueDict['PREFIX'] = each | |
projData = urllib.urlencode(valueDict) | |
req = urllib2.Request(noaaUrl, projData) | |
response = urllib2.urlopen(req) | |
soup = BeautifulSoup(response.read()) | |
# print 'Received response for project code: '+each+'\n' | |
optionList = [] | |
for options in soup.find_all('option'): | |
optionList.append(options.get('value')[:-1]) | |
del optionList[0:2] | |
# print 'Parsed out station values for project code: '+each+'\n' | |
# Url-encoded key/value pairs for parameters other than station values. | |
valueDict2 = { | |
'QueryHidden': each, | |
'MetaDataFormatHidden': 'HTML', | |
'DisplayActive': 'IsDisplayed', | |
'Get Shapefile': 'Get Shapefile', | |
'CompressTypeHidden': 'Zipped', | |
'SortSelected': 'Designation', | |
'PrefixHidden': each, | |
'ControlTypeHidden': 'MOD-0-0-0-0'} | |
paramString = urllib.urlencode(valueDict2) | |
# Append each encoded key (MarkSelected) - value (station) pair to the parameter string. | |
for station in optionList: | |
i = urllib.urlencode({'MarkSelected': station}) | |
paramString = paramString + '&' + i | |
# print 'Created parameter string including all station values for project code: '+each+'\n' | |
# Send request with NOAA url and parameter values encoded in string, open response and send filename into variable, open file in output directory, write contents of received file and close. | |
request = urllib2.Request(noaaUrl, paramString) | |
response = urllib2.urlopen(request) | |
filename = response.info()['Content-Disposition'].split('filename=')[1] | |
f = open(month_dir+'/hmp_zip/'+filename, 'wb') | |
f.write(response.read()) | |
f.close() | |
# Update progress | |
count += 1 | |
print 'Completed '+str(count)+' of '+str(len(projListEdit))+' projects. Download took ' + strftime("%M minutes and %S seconds", (gmtime(time.clock() - projstarttime))) | |
# print 'Saved zip file for project code: '+each+'\n' | |
print '\nFinished downloading hmp shapefiles!!!\n' | |
print '\nTotal download time: ' + strftime("%H hours, %M minutes and %S seconds", (gmtime(time.clock() - starttime))) | |
# Create list of folder contents | |
zipList = os.listdir(month_dir+'/hmp_zip') | |
# Unzip and output contents to output directory for all files in input folder | |
for i in zipList: | |
zip = zipfile.ZipFile(month_dir+'/hmp_zip/'+i) | |
zip.extractall(month_dir+'/hmp_unzip/') | |
fileList = glob.glob(month_dir+'/hmp_unzip/'+'*.shp') | |
w = shapefile.Writer() | |
# Create shapefile using pyshp (shapefile) library...no idea how, but it works! | |
for f in fileList: | |
r = shapefile.Reader(f) | |
w._shapes.extend(r.shapes()) | |
w.records.extend(r.records()) | |
w.fields = list(r.fields) | |
w.save(month_dir+'merged_shape') | |
# Create .prj file for GCS_North_American_1983_HARN | |
prj = open(month_dir+'merged_shape.prj', 'w') | |
epsg = 'GEOGCS["GCS_North_American_1983_HARN",DATUM["D_North_American_1983_HARN",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]' | |
prj.write(epsg) | |
prj.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment