ingenieroariel · December 6, 2011 23:14
diff --git a/README b/README
 pip install xlrd
 python metadata.py mymetadata.xls
diff --git a/excel.py b/excel.py
 """
 Utilities for reading Microsoft Excel files.
 """

 import xlrd
 import datetime

 class ExcelDictReader(object):
    """
    Provides an API that lets you iterate over every row in an Excel worksheet,
    much like csv.DictReader. This assumes that the worksheet is a simple table
    with a single header row at the top.

    header_row_num is the zero-indexed row number of the headers. (Note that
    you can specify the headers manually by using the "custom_headers"
    argument.)

    start_row_num is the zero-indexed row number of where the data starts.

    use_last_header_if_duplicate, either True or False, dictates the behavior
    to use in the case of duplicate column headers. If True, then the *last*
    column's value will be used. If False, then the *first* column's value will
    be used. Note that there's no way to access the other column, either way.

    custom_headers, if given, will be used instead of the values in
    header_row_num. If you provide custom_headers, the value of header_row_num
    will be ignored.

    Example usage:
        reader = ExcelDictReader('/path/to/my.xls', 0, 0, 1)
        for row in reader:
            print row

    This yields dictionaries like:
        {'header1': 'value1', 'header2': 'value2'}
    """
    def __init__(self, filename, sheet_index=0, header_row_num=0, start_row_num=0,
            use_last_header_if_duplicate=True, custom_headers=None):
        self.workbook = xlrd.open_workbook(filename)
        self.sheet_index = sheet_index
        self.header_row, self.start_row = header_row_num, start_row_num
        self.use_last_header_if_duplicate = use_last_header_if_duplicate
        self.custom_headers = custom_headers

    def __iter__(self):
        worksheet = self.workbook.sheet_by_index(self.sheet_index)
        if self.custom_headers:
            headers = self.custom_headers
        else:
            headers = [v.value.strip() for v in worksheet.row(self.header_row)]
        for row_num in xrange(self.start_row, worksheet.nrows):
            data_dict = {}
            for i, cell in enumerate(worksheet.row(row_num)):
                value = cell.value

                # Clean up the value. The xlrd library doesn't convert date
                # values to Python objects automatically, so we have to do that
                # here. Also, strip whitespace from any text field.
                # cell.ctype is documented here:
                # http://www.lexicon.net/sjmachin/xlrd.html#xlrd.Cell-class
                if cell.ctype == 3:
                    try:
                        value = datetime.datetime(*xlrd.xldate_as_tuple(value, self.workbook.datemode))
                    except ValueError:
                        # The datetime module raises ValueError for invalid
                        # dates, like the year 0. Rather than skipping the
                        # value (which would lose data), we just keep it as
                        # a string.
                        pass
                elif cell.ctype == 1:
                    value = value.strip()

                # Only append the value to the dictionary if
                if self.use_last_header_if_duplicate or headers[i] not in data_dict:
                    data_dict[headers[i]] = value

            yield data_dict
diff --git a/metadata.py b/metadata.py
 from excel import ExcelDictReader
 from geonode.maps.models import Layer, Contact, User
 from decimal import Decimal

 def contact(username, name, organization):
    user, __ = User.objects.get_or_create(username=username)
    contact, __ = Contact.objects.get_or_create(user=user, defaults={'name': name, 'organization': organization})
    return contact

 def update(filename):
    # Modify the parameters depending on the offset, in this case the content starts
    # at the 10th row and the 12th column (or viceversa) ;)
    reader = ExcelDictReader(filename, 0, 10, 12)
    layer_names = Layer.objects.values_list('name', flat=True)
    for row in reader:
        name = row['layer_name']

        if row['layer_ref'] is None:
            sys.exit()

        if name in layer_names:
            v = Layer.objects.get(name=name)
            v.supplemental_information= row['attribut_values']
            v.date = row['date']
            c = contact(row['datasource_shortname'], row['poc_fullname'], row['datasource_fullname'])
            v.poc = c
            try:
                v.save()
                print '>> Updated layer: %s' % name
            except Exception, e:
                print 'Problem saving %s: %s' % ( name, str(e))
        else:
            print '## Not found: %s' % name

 if __name__ == '__main__':
    import sys
    if len(sys.argv) != 2:
        print 'Usage: python metadata.py mymetadata.xls'
        sys.exit(-1)
    update(sys.argv[1])
	"""
	Utilities for reading Microsoft Excel files.
	"""

	import xlrd
	import datetime

	class ExcelDictReader(object):
	"""
	Provides an API that lets you iterate over every row in an Excel worksheet,
	much like csv.DictReader. This assumes that the worksheet is a simple table
	with a single header row at the top.

	header_row_num is the zero-indexed row number of the headers. (Note that
	you can specify the headers manually by using the "custom_headers"
	argument.)

	start_row_num is the zero-indexed row number of where the data starts.

	use_last_header_if_duplicate, either True or False, dictates the behavior
	to use in the case of duplicate column headers. If True, then the last
	column's value will be used. If False, then the first column's value will
	be used. Note that there's no way to access the other column, either way.

	custom_headers, if given, will be used instead of the values in
	header_row_num. If you provide custom_headers, the value of header_row_num
	will be ignored.

	Example usage:
	reader = ExcelDictReader('/path/to/my.xls', 0, 0, 1)
	for row in reader:
	print row

	This yields dictionaries like:
	{'header1': 'value1', 'header2': 'value2'}
	"""
	def __init__(self, filename, sheet_index=0, header_row_num=0, start_row_num=0,
	use_last_header_if_duplicate=True, custom_headers=None):
	self.workbook = xlrd.open_workbook(filename)
	self.sheet_index = sheet_index
	self.header_row, self.start_row = header_row_num, start_row_num
	self.use_last_header_if_duplicate = use_last_header_if_duplicate
	self.custom_headers = custom_headers

	def __iter__(self):
	worksheet = self.workbook.sheet_by_index(self.sheet_index)
	if self.custom_headers:
	headers = self.custom_headers
	else:
	headers = [v.value.strip() for v in worksheet.row(self.header_row)]
	for row_num in xrange(self.start_row, worksheet.nrows):
	data_dict = {}
	for i, cell in enumerate(worksheet.row(row_num)):
	value = cell.value

	# Clean up the value. The xlrd library doesn't convert date
	# values to Python objects automatically, so we have to do that
	# here. Also, strip whitespace from any text field.
	# cell.ctype is documented here:
	# http://www.lexicon.net/sjmachin/xlrd.html#xlrd.Cell-class
	if cell.ctype == 3:
	try:
	value = datetime.datetime(*xlrd.xldate_as_tuple(value, self.workbook.datemode))
	except ValueError:
	# The datetime module raises ValueError for invalid
	# dates, like the year 0. Rather than skipping the
	# value (which would lose data), we just keep it as
	# a string.
	pass
	elif cell.ctype == 1:
	value = value.strip()

	# Only append the value to the dictionary if
	if self.use_last_header_if_duplicate or headers[i] not in data_dict:
	data_dict[headers[i]] = value

	yield data_dict
	from excel import ExcelDictReader
	from geonode.maps.models import Layer, Contact, User
	from decimal import Decimal

	def contact(username, name, organization):
	user, __ = User.objects.get_or_create(username=username)
	contact, __ = Contact.objects.get_or_create(user=user, defaults={'name': name, 'organization': organization})
	return contact

	def update(filename):
	# Modify the parameters depending on the offset, in this case the content starts
	# at the 10th row and the 12th column (or viceversa) ;)
	reader = ExcelDictReader(filename, 0, 10, 12)
	layer_names = Layer.objects.values_list('name', flat=True)
	for row in reader:
	name = row['layer_name']

	if row['layer_ref'] is None:
	sys.exit()

	if name in layer_names:
	v = Layer.objects.get(name=name)
	v.supplemental_information= row['attribut_values']
	v.date = row['date']
	c = contact(row['datasource_shortname'], row['poc_fullname'], row['datasource_fullname'])
	v.poc = c
	try:
	v.save()
	print '>> Updated layer: %s' % name
	except Exception, e:
	print 'Problem saving %s: %s' % ( name, str(e))
	else:
	print '## Not found: %s' % name

	if __name__ == '__main__':
	import sys
	if len(sys.argv) != 2:
	print 'Usage: python metadata.py mymetadata.xls'
	sys.exit(-1)
	update(sys.argv[1])