89465127 · June 6, 2013 18:10
diff --git a/open_hadoop.py b/open_hadoop.py
 import glob
 import os

 def filelist(path, _filter="part-*"):
    basepath = os.path.abspath(os.path.expanduser(path))
    return [filename for filename in glob.glob(basepath + '/' + _filter)]

 def hfile(path, _filter="part-*"):
    for filename in filelist(path, _filter):
        with open(filename) as f:
            for line in f:
                yield line


 ''' Usage example:

 from open_hadoop import hfile

 for line in hfile('./input/path/'):
    print line

 '''


 ''' Installation:

 - Place open_hadoop.py in your site-packages directory.
 - Your site-packages directory can be located by running:
 python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())"

 '''
	import glob
	import os

	def filelist(path, _filter="part-*"):
	basepath = os.path.abspath(os.path.expanduser(path))
	return [filename for filename in glob.glob(basepath + '/' + _filter)]

	def hfile(path, _filter="part-*"):
	for filename in filelist(path, _filter):
	with open(filename) as f:
	for line in f:
	yield line


	''' Usage example:

	from open_hadoop import hfile

	for line in hfile('./input/path/'):
	print line

	'''


	''' Installation:

	- Place open_hadoop.py in your site-packages directory.
	- Your site-packages directory can be located by running:
	python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())"

	'''