Skip to content

Instantly share code, notes, and snippets.

@ingenieroariel
Created December 6, 2011 23:14
Show Gist options
  • Save ingenieroariel/1440520 to your computer and use it in GitHub Desktop.
Save ingenieroariel/1440520 to your computer and use it in GitHub Desktop.
Update metadata from a excel file in a GeoNode site.
pip install xlrd
python metadata.py mymetadata.xls
"""
Utilities for reading Microsoft Excel files.
"""
import xlrd
import datetime
class ExcelDictReader(object):
"""
Provides an API that lets you iterate over every row in an Excel worksheet,
much like csv.DictReader. This assumes that the worksheet is a simple table
with a single header row at the top.
header_row_num is the zero-indexed row number of the headers. (Note that
you can specify the headers manually by using the "custom_headers"
argument.)
start_row_num is the zero-indexed row number of where the data starts.
use_last_header_if_duplicate, either True or False, dictates the behavior
to use in the case of duplicate column headers. If True, then the *last*
column's value will be used. If False, then the *first* column's value will
be used. Note that there's no way to access the other column, either way.
custom_headers, if given, will be used instead of the values in
header_row_num. If you provide custom_headers, the value of header_row_num
will be ignored.
Example usage:
reader = ExcelDictReader('/path/to/my.xls', 0, 0, 1)
for row in reader:
print row
This yields dictionaries like:
{'header1': 'value1', 'header2': 'value2'}
"""
def __init__(self, filename, sheet_index=0, header_row_num=0, start_row_num=0,
use_last_header_if_duplicate=True, custom_headers=None):
self.workbook = xlrd.open_workbook(filename)
self.sheet_index = sheet_index
self.header_row, self.start_row = header_row_num, start_row_num
self.use_last_header_if_duplicate = use_last_header_if_duplicate
self.custom_headers = custom_headers
def __iter__(self):
worksheet = self.workbook.sheet_by_index(self.sheet_index)
if self.custom_headers:
headers = self.custom_headers
else:
headers = [v.value.strip() for v in worksheet.row(self.header_row)]
for row_num in xrange(self.start_row, worksheet.nrows):
data_dict = {}
for i, cell in enumerate(worksheet.row(row_num)):
value = cell.value
# Clean up the value. The xlrd library doesn't convert date
# values to Python objects automatically, so we have to do that
# here. Also, strip whitespace from any text field.
# cell.ctype is documented here:
# http://www.lexicon.net/sjmachin/xlrd.html#xlrd.Cell-class
if cell.ctype == 3:
try:
value = datetime.datetime(*xlrd.xldate_as_tuple(value, self.workbook.datemode))
except ValueError:
# The datetime module raises ValueError for invalid
# dates, like the year 0. Rather than skipping the
# value (which would lose data), we just keep it as
# a string.
pass
elif cell.ctype == 1:
value = value.strip()
# Only append the value to the dictionary if
if self.use_last_header_if_duplicate or headers[i] not in data_dict:
data_dict[headers[i]] = value
yield data_dict
from excel import ExcelDictReader
from geonode.maps.models import Layer, Contact, User
from decimal import Decimal
def contact(username, name, organization):
user, __ = User.objects.get_or_create(username=username)
contact, __ = Contact.objects.get_or_create(user=user, defaults={'name': name, 'organization': organization})
return contact
def update(filename):
# Modify the parameters depending on the offset, in this case the content starts
# at the 10th row and the 12th column (or viceversa) ;)
reader = ExcelDictReader(filename, 0, 10, 12)
layer_names = Layer.objects.values_list('name', flat=True)
for row in reader:
name = row['layer_name']
if row['layer_ref'] is None:
sys.exit()
if name in layer_names:
v = Layer.objects.get(name=name)
v.supplemental_information= row['attribut_values']
v.date = row['date']
c = contact(row['datasource_shortname'], row['poc_fullname'], row['datasource_fullname'])
v.poc = c
try:
v.save()
print '>> Updated layer: %s' % name
except Exception, e:
print 'Problem saving %s: %s' % ( name, str(e))
else:
print '## Not found: %s' % name
if __name__ == '__main__':
import sys
if len(sys.argv) != 2:
print 'Usage: python metadata.py mymetadata.xls'
sys.exit(-1)
update(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment