Skip to content

Instantly share code, notes, and snippets.

@myrddian
Last active November 18, 2018 11:34
Show Gist options
  • Save myrddian/d9ab8ddc5afa48a26b3cf5e76fe15d0a to your computer and use it in GitHub Desktop.
Save myrddian/d9ab8ddc5afa48a26b3cf5e76fe15d0a to your computer and use it in GitHub Desktop.
excel_file = 'excel.xlsx'
out_file = 'excel.csv'
#External Global variables
__col_size__ = 14
__i_start__ = 6
__c_size__ = 202
"""Read the country file and parse its contents"""
def read_country_data(xls_file, country_size, index_start, output_file, col_size):
print('Conversion Starting')
#Open using XLRD
book = xlrd.open_workbook(xls_file)
first_sheet = book.sheet_by_index(0)
output_conversion = open(output_file, "w")
#Write the CSV header
write_header(output_conversion,col_size)
#Loop throught the countries, get index + offset, read row parse to write_line
for x in range(country_size):
row_items = first_sheet.row_values( x + index_start)
write_line(output_conversion,row_items,col_size)
#Flush and close file handles
output_conversion.flush()
output_conversion.close()
print('Conversion completed')
"""Writes a header for the CSV"""
def write_header(out_file, col_num):
header = "Country Name"
for x in range(col_num):
header = header +','+str(x)
header += '\n'
out_file.write(header)
"""Writes the line out given the items field, col_size specifies how many variables to
write before the function stops"""
def write_line(out_file, items, col_size):
line = ""
var_count = 0
rsv_str = "–"
skip = "x"
#Start through the items in the row
for x in range(len(items)):
#Ignore less than 1, this is to keep the offsets sane
if(x < 1 ):
continue
else:
#Write the country
if(x == 1):
line += items[x]
else:
#Start processing the variables, if string process here
if(type(items[x]) == str):
if(items[x] == rsv_str):
line += ","
var_count+=1
elif(items[x] == skip):
continue
#Integer here, remove any trailing zeros
elif(items[x].is_integer()):
line += ","+str(int(items[x]))
var_count+=1
#Float use round to set the float as XLS output
else:
line += ","+str(round(items[x]))
var_count+=1
if(var_count == col_size):
break
line+='\n'
out_file.write(line)
read_country_data(excel_file,__c_size__,__i_start__,out_file,__col_size__)
df = pd.read_csv(out_file)
print(df.head())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment