johnconroy · November 8, 2010 12:07
diff --git a/iterate_thru_dic_csv_extract.py b/iterate_thru_dic_csv_extract.py
 #assume we have a bunch of files in a directory
 #each file is csv... comma seperated. eg it has a bunch of lines like this"henry_1, 9939393, 02/11/1991, ARTS2bc1, ..."
 #assume the first comma seperated value on each line in each file is the name
 #Assume we want a list of unique names in all the files in the directory

 import os

 basedir="c://somedir//somesubdir//"

 for root, dirs, files in os.walk(basedir): #iterates thru each file in directory, treating each file name as a string
    for file in files:
        thisfile=open(basedir+file, 'r')

        list1=thisfile.readlines()

        for i in range(len(list1)):
            #next line breaks the line into a list, splitting on commas
            list_items=list1[i].split(',')

            #we're assuming 'name' is always the first element in the comma seperated line
            for f in range(len(list_items)):
                 if f==0:
                      if list_names[f] not in list_names: #note: this can be slow... there are ways around this tho...
                           list_names.append(list_items[f])
        thisfile.close()


 #finally, write your unique names to file
 f_uniquenames=open('c://somedir//somesubdir//unique_names.txt', 'w')

 for name in list_names:
    f_uniquenames.write(name+'\n')
	#assume we have a bunch of files in a directory
	#each file is csv... comma seperated. eg it has a bunch of lines like this"henry_1, 9939393, 02/11/1991, ARTS2bc1, ..."
	#assume the first comma seperated value on each line in each file is the name
	#Assume we want a list of unique names in all the files in the directory

	import os

	basedir="c://somedir//somesubdir//"

	for root, dirs, files in os.walk(basedir): #iterates thru each file in directory, treating each file name as a string
	for file in files:
	thisfile=open(basedir+file, 'r')

	list1=thisfile.readlines()

	for i in range(len(list1)):
	#next line breaks the line into a list, splitting on commas
	list_items=list1[i].split(',')

	#we're assuming 'name' is always the first element in the comma seperated line
	for f in range(len(list_items)):
	if f==0:
	if list_names[f] not in list_names: #note: this can be slow... there are ways around this tho...
	list_names.append(list_items[f])
	thisfile.close()


	#finally, write your unique names to file
	f_uniquenames=open('c://somedir//somesubdir//unique_names.txt', 'w')

	for name in list_names:
	f_uniquenames.write(name+'\n')