kenttw · July 8, 2015 07:57
diff --git a/gistfile1.py b/gistfile1.py
 from pyspark.mllib.feature import ChiSqSelector
 model = ChiSqSelector(5000).fit(sc.parallelize(lc))
 chi_l = l.mapValues(lambda x : model.transform (x))
 print chi_l.first()

 出現以下的訊息


 ---------------------------------------------------------------------------
 Exception                                 Traceback (most recent call last)
 <ipython-input-6-aff828b23220> in <module>()
      2 model = ChiSqSelector(5000).fit(sc.parallelize(lc))
      3 chi_l = l.mapValues(lambda x : model.transform (x))
 ----> 4 print chi_l.first()

 /opt/spark/python/pyspark/rdd.py in first(self)
   1281         ValueError: RDD is empty
   1282         """
 -> 1283         rs = self.take(1)
   1284         if rs:
   1285             return rs[0]

 /opt/spark/python/pyspark/rdd.py in take(self, num)
   1263 
   1264             p = range(partsScanned, min(partsScanned + numPartsToTry, totalParts))
 -> 1265             res = self.context.runJob(self, takeUpToNumLeft, p, True)
   1266 
   1267             items += res

 /opt/spark/python/pyspark/context.py in runJob(self, rdd, partitionFunc, partitions, allowLocal)
    878         # SparkContext#runJob.
    879         mappedRDD = rdd.mapPartitions(partitionFunc)
 --> 880         port = self._jvm.PythonRDD.runJob(self._jsc.sc(), mappedRDD._jrdd, partitions,
    881                                           allowLocal)
    882         return list(_load_from_socket(port, mappedRDD._jrdd_deserializer))

 /opt/spark/python/pyspark/rdd.py in _jrdd(self)
   2349         command = (self.func, profiler, self._prev_jrdd_deserializer,
   2350                    self._jrdd_deserializer)
 -> 2351         pickled_cmd, bvars, env, includes = _prepare_for_python_RDD(self.ctx, command, self)
   2352         python_rdd = self.ctx._jvm.PythonRDD(self._prev_jrdd.rdd(),
   2353                                              bytearray(pickled_cmd),

 /opt/spark/python/pyspark/rdd.py in _prepare_for_python_RDD(sc, command, obj)
   2269     # the serialized command will be compressed by broadcast
   2270     ser = CloudPickleSerializer()
 -> 2271     pickled_command = ser.dumps(command)
   2272     if len(pickled_command) > (1 << 20):  # 1M
   2273         # The broadcast will have same life cycle as created PythonRDD

 /opt/spark/python/pyspark/serializers.py in dumps(self, obj)
    425 
    426     def dumps(self, obj):
 --> 427         return cloudpickle.dumps(obj, 2)
    428 
    429 

 /opt/spark/python/pyspark/cloudpickle.py in dumps(obj, protocol)
    620 
    621     cp = CloudPickler(file,protocol)
 --> 622     cp.dump(obj)
    623 
    624     return file.getvalue()

 /opt/spark/python/pyspark/cloudpickle.py in dump(self, obj)
    105         self.inject_addons()
    106         try:
 --> 107             return Pickler.dump(self, obj)
    108         except RuntimeError as e:
    109             if 'recursion' in e.args[0]:

 /usr/lib/python2.7/pickle.pyc in dump(self, obj)
    222         if self.proto >= 2:
    223             self.write(PROTO + chr(self.proto))
 --> 224         self.save(obj)
    225         self.write(STOP)
    226 

 /usr/lib/python2.7/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
 --> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288 

 /usr/lib/python2.7/pickle.pyc in save_tuple(self, obj)
    560         write(MARK)
    561         for element in obj:
 --> 562             save(element)
    563 
    564         if id(obj) in memo:

 /usr/lib/python2.7/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
 --> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288 

 /opt/spark/python/pyspark/cloudpickle.py in save_function(self, obj, name)
    197             klass = getattr(themodule, name, None)
    198             if klass is None or klass is not obj:
 --> 199                 self.save_function_tuple(obj)
    200                 return
    201 

 /opt/spark/python/pyspark/cloudpickle.py in save_function_tuple(self, func)
    234         # create a skeleton function object and memoize it
    235         save(_make_skel_func)
 --> 236         save((code, closure, base_globals))
    237         write(pickle.REDUCE)
    238         self.memoize(func)

 /usr/lib/python2.7/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
 --> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288 

 /usr/lib/python2.7/pickle.pyc in save_tuple(self, obj)
    546         if n <= 3 and proto >= 2:
    547             for element in obj:
 --> 548                 save(element)
    549             # Subtle.  Same as in the big comment below.
    550             if id(obj) in memo:

 /usr/lib/python2.7/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
 --> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288 

 /usr/lib/python2.7/pickle.pyc in save_list(self, obj)
    598 
    599         self.memoize(obj)
 --> 600         self._batch_appends(iter(obj))
    601 
    602     dispatch[ListType] = save_list

 /usr/lib/python2.7/pickle.pyc in _batch_appends(self, items)
    631                 write(MARK)
    632                 for x in tmp:
 --> 633                     save(x)
    634                 write(APPENDS)
    635             elif n:

 /usr/lib/python2.7/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
 --> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288 

 /opt/spark/python/pyspark/cloudpickle.py in save_function(self, obj, name)
    197             klass = getattr(themodule, name, None)
    198             if klass is None or klass is not obj:
 --> 199                 self.save_function_tuple(obj)
    200                 return
    201 

 /opt/spark/python/pyspark/cloudpickle.py in save_function_tuple(self, func)
    234         # create a skeleton function object and memoize it
    235         save(_make_skel_func)
 --> 236         save((code, closure, base_globals))
    237         write(pickle.REDUCE)
    238         self.memoize(func)

 /usr/lib/python2.7/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
 --> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288 

 /usr/lib/python2.7/pickle.pyc in save_tuple(self, obj)
    546         if n <= 3 and proto >= 2:
    547             for element in obj:
 --> 548                 save(element)
    549             # Subtle.  Same as in the big comment below.
    550             if id(obj) in memo:

 /usr/lib/python2.7/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
 --> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288 

 /usr/lib/python2.7/pickle.pyc in save_list(self, obj)
    598 
    599         self.memoize(obj)
 --> 600         self._batch_appends(iter(obj))
    601 
    602     dispatch[ListType] = save_list

 /usr/lib/python2.7/pickle.pyc in _batch_appends(self, items)
    631                 write(MARK)
    632                 for x in tmp:
 --> 633                     save(x)
    634                 write(APPENDS)
    635             elif n:

 /usr/lib/python2.7/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
 --> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288 

 /opt/spark/python/pyspark/cloudpickle.py in save_function(self, obj, name)
    197             klass = getattr(themodule, name, None)
    198             if klass is None or klass is not obj:
 --> 199                 self.save_function_tuple(obj)
    200                 return
    201 

 /opt/spark/python/pyspark/cloudpickle.py in save_function_tuple(self, func)
    234         # create a skeleton function object and memoize it
    235         save(_make_skel_func)
 --> 236         save((code, closure, base_globals))
    237         write(pickle.REDUCE)
    238         self.memoize(func)

 /usr/lib/python2.7/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
 --> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288 

 /usr/lib/python2.7/pickle.pyc in save_tuple(self, obj)
    546         if n <= 3 and proto >= 2:
    547             for element in obj:
 --> 548                 save(element)
    549             # Subtle.  Same as in the big comment below.
    550             if id(obj) in memo:

 /usr/lib/python2.7/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
 --> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288 

 /usr/lib/python2.7/pickle.pyc in save_list(self, obj)
    598 
    599         self.memoize(obj)
 --> 600         self._batch_appends(iter(obj))
    601 
    602     dispatch[ListType] = save_list

 /usr/lib/python2.7/pickle.pyc in _batch_appends(self, items)
    634                 write(APPENDS)
    635             elif n:
 --> 636                 save(tmp[0])
    637                 write(APPEND)
    638             # else tmp is empty, and we're done

 /usr/lib/python2.7/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
 --> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288 

 /opt/spark/python/pyspark/cloudpickle.py in save_function(self, obj, name)
    191         if islambda(obj) or obj.__code__.co_filename == '<stdin>' or themodule is None:
    192             #print("save global", islambda(obj), obj.__code__.co_filename, modname, themodule)
 --> 193             self.save_function_tuple(obj)
    194             return
    195         else:

 /opt/spark/python/pyspark/cloudpickle.py in save_function_tuple(self, func)
    234         # create a skeleton function object and memoize it
    235         save(_make_skel_func)
 --> 236         save((code, closure, base_globals))
    237         write(pickle.REDUCE)
    238         self.memoize(func)

 /usr/lib/python2.7/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
 --> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288 

 /usr/lib/python2.7/pickle.pyc in save_tuple(self, obj)
    546         if n <= 3 and proto >= 2:
    547             for element in obj:
 --> 548                 save(element)
    549             # Subtle.  Same as in the big comment below.
    550             if id(obj) in memo:

 /usr/lib/python2.7/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
 --> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288 

 /usr/lib/python2.7/pickle.pyc in save_list(self, obj)
    598 
    599         self.memoize(obj)
 --> 600         self._batch_appends(iter(obj))
    601 
    602     dispatch[ListType] = save_list

 /usr/lib/python2.7/pickle.pyc in _batch_appends(self, items)
    634                 write(APPENDS)
    635             elif n:
 --> 636                 save(tmp[0])
    637                 write(APPEND)
    638             # else tmp is empty, and we're done

 /usr/lib/python2.7/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
 --> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288 

 /opt/spark/python/pyspark/cloudpickle.py in save_function(self, obj, name)
    191         if islambda(obj) or obj.__code__.co_filename == '<stdin>' or themodule is None:
    192             #print("save global", islambda(obj), obj.__code__.co_filename, modname, themodule)
 --> 193             self.save_function_tuple(obj)
    194             return
    195         else:

 /opt/spark/python/pyspark/cloudpickle.py in save_function_tuple(self, func)
    239 
    240         # save the rest of the func data needed by _fill_function
 --> 241         save(f_globals)
    242         save(defaults)
    243         save(dct)

 /usr/lib/python2.7/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
 --> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288 

 /usr/lib/python2.7/pickle.pyc in save_dict(self, obj)
    647 
    648         self.memoize(obj)
 --> 649         self._batch_setitems(obj.iteritems())
    650 
    651     dispatch[DictionaryType] = save_dict

 /usr/lib/python2.7/pickle.pyc in _batch_setitems(self, items)
    684                 k, v = tmp[0]
    685                 save(k)
 --> 686                 save(v)
    687                 write(SETITEM)
    688             # else tmp is empty, and we're done

 /usr/lib/python2.7/pickle.pyc in save(self, obj)
    329 
    330         # Save the reduce() output and finally memoize the object
 --> 331         self.save_reduce(obj=obj, *rv)
    332 
    333     def persistent_id(self, obj):

 /opt/spark/python/pyspark/cloudpickle.py in save_reduce(self, func, args, state, listitems, dictitems, obj)
    516 
    517         if state is not None:
 --> 518             save(state)
    519             write(pickle.BUILD)
    520 

 /usr/lib/python2.7/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
 --> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288 

 /usr/lib/python2.7/pickle.pyc in save_dict(self, obj)
    647 
    648         self.memoize(obj)
 --> 649         self._batch_setitems(obj.iteritems())
    650 
    651     dispatch[DictionaryType] = save_dict

 /usr/lib/python2.7/pickle.pyc in _batch_setitems(self, items)
    679                 for k, v in tmp:
    680                     save(k)
 --> 681                     save(v)
    682                 write(SETITEMS)
    683             elif n:

 /usr/lib/python2.7/pickle.pyc in save(self, obj)
    304             reduce = getattr(obj, "__reduce_ex__", None)
    305             if reduce:
 --> 306                 rv = reduce(self.proto)
    307             else:
    308                 reduce = getattr(obj, "__reduce__", None)

 /opt/spark/python/pyspark/context.py in __getnewargs__(self)
    250         # This method is called when attempting to pickle SparkContext, which is always an error:
    251         raise Exception(
 --> 252             "It appears that you are attempting to reference SparkContext from a broadcast "
    253             "variable, action, or transforamtion. SparkContext can only be used on the driver, "
    254             "not in code that it run on workers. For more information, see SPARK-5063."

 Exception: It appears that you are attempting to reference SparkContext from a broadcast variable, action, or transforamtion. SparkContext can only be used on the driver, not in code that it run on workers. For more information, see SPARK-5063.
	from pyspark.mllib.feature import ChiSqSelector
	model = ChiSqSelector(5000).fit(sc.parallelize(lc))
	chi_l = l.mapValues(lambda x : model.transform (x))
	print chi_l.first()

	出現以下的訊息


	---------------------------------------------------------------------------
	Exception Traceback (most recent call last)
	<ipython-input-6-aff828b23220> in <module>()
	2 model = ChiSqSelector(5000).fit(sc.parallelize(lc))
	3 chi_l = l.mapValues(lambda x : model.transform (x))
	----> 4 print chi_l.first()

	/opt/spark/python/pyspark/rdd.py in first(self)
	1281 ValueError: RDD is empty
	1282 """
	-> 1283 rs = self.take(1)
	1284 if rs:
	1285 return rs[0]

	/opt/spark/python/pyspark/rdd.py in take(self, num)
	1263
	1264 p = range(partsScanned, min(partsScanned + numPartsToTry, totalParts))
	-> 1265 res = self.context.runJob(self, takeUpToNumLeft, p, True)
	1266
	1267 items += res

	/opt/spark/python/pyspark/context.py in runJob(self, rdd, partitionFunc, partitions, allowLocal)
	878 # SparkContext#runJob.
	879 mappedRDD = rdd.mapPartitions(partitionFunc)
	--> 880 port = self._jvm.PythonRDD.runJob(self._jsc.sc(), mappedRDD._jrdd, partitions,
	881 allowLocal)
	882 return list(_load_from_socket(port, mappedRDD._jrdd_deserializer))

	/opt/spark/python/pyspark/rdd.py in _jrdd(self)
	2349 command = (self.func, profiler, self._prev_jrdd_deserializer,
	2350 self._jrdd_deserializer)
	-> 2351 pickled_cmd, bvars, env, includes = _prepare_for_python_RDD(self.ctx, command, self)
	2352 python_rdd = self.ctx._jvm.PythonRDD(self._prev_jrdd.rdd(),
	2353 bytearray(pickled_cmd),

	/opt/spark/python/pyspark/rdd.py in _prepare_for_python_RDD(sc, command, obj)
	2269 # the serialized command will be compressed by broadcast
	2270 ser = CloudPickleSerializer()
	-> 2271 pickled_command = ser.dumps(command)
	2272 if len(pickled_command) > (1 << 20): # 1M
	2273 # The broadcast will have same life cycle as created PythonRDD

	/opt/spark/python/pyspark/serializers.py in dumps(self, obj)
	425
	426 def dumps(self, obj):
	--> 427 return cloudpickle.dumps(obj, 2)
	428
	429

	/opt/spark/python/pyspark/cloudpickle.py in dumps(obj, protocol)
	620
	621 cp = CloudPickler(file,protocol)
	--> 622 cp.dump(obj)
	623
	624 return file.getvalue()

	/opt/spark/python/pyspark/cloudpickle.py in dump(self, obj)
	105 self.inject_addons()
	106 try:
	--> 107 return Pickler.dump(self, obj)
	108 except RuntimeError as e:
	109 if 'recursion' in e.args[0]:

	/usr/lib/python2.7/pickle.pyc in dump(self, obj)
	222 if self.proto >= 2:
	223 self.write(PROTO + chr(self.proto))
	--> 224 self.save(obj)
	225 self.write(STOP)
	226

	/usr/lib/python2.7/pickle.pyc in save(self, obj)
	284 f = self.dispatch.get(t)
	285 if f:
	--> 286 f(self, obj) # Call unbound method with explicit self
	287 return
	288

	/usr/lib/python2.7/pickle.pyc in save_tuple(self, obj)
	560 write(MARK)
	561 for element in obj:
	--> 562 save(element)
	563
	564 if id(obj) in memo:

	/usr/lib/python2.7/pickle.pyc in save(self, obj)
	284 f = self.dispatch.get(t)
	285 if f:
	--> 286 f(self, obj) # Call unbound method with explicit self
	287 return
	288

	/opt/spark/python/pyspark/cloudpickle.py in save_function(self, obj, name)
	197 klass = getattr(themodule, name, None)
	198 if klass is None or klass is not obj:
	--> 199 self.save_function_tuple(obj)
	200 return
	201

	/opt/spark/python/pyspark/cloudpickle.py in save_function_tuple(self, func)
	234 # create a skeleton function object and memoize it
	235 save(_make_skel_func)
	--> 236 save((code, closure, base_globals))
	237 write(pickle.REDUCE)
	238 self.memoize(func)

	/usr/lib/python2.7/pickle.pyc in save(self, obj)
	284 f = self.dispatch.get(t)
	285 if f:
	--> 286 f(self, obj) # Call unbound method with explicit self
	287 return
	288

	/usr/lib/python2.7/pickle.pyc in save_tuple(self, obj)
	546 if n <= 3 and proto >= 2:
	547 for element in obj:
	--> 548 save(element)
	549 # Subtle. Same as in the big comment below.
	550 if id(obj) in memo:

	/usr/lib/python2.7/pickle.pyc in save(self, obj)
	284 f = self.dispatch.get(t)
	285 if f:
	--> 286 f(self, obj) # Call unbound method with explicit self
	287 return
	288

	/usr/lib/python2.7/pickle.pyc in save_list(self, obj)
	598
	599 self.memoize(obj)
	--> 600 self._batch_appends(iter(obj))
	601
	602 dispatch[ListType] = save_list

	/usr/lib/python2.7/pickle.pyc in _batch_appends(self, items)
	631 write(MARK)
	632 for x in tmp:
	--> 633 save(x)
	634 write(APPENDS)
	635 elif n:

	/usr/lib/python2.7/pickle.pyc in save(self, obj)
	284 f = self.dispatch.get(t)
	285 if f:
	--> 286 f(self, obj) # Call unbound method with explicit self
	287 return
	288

	/opt/spark/python/pyspark/cloudpickle.py in save_function(self, obj, name)
	197 klass = getattr(themodule, name, None)
	198 if klass is None or klass is not obj:
	--> 199 self.save_function_tuple(obj)
	200 return
	201

	/opt/spark/python/pyspark/cloudpickle.py in save_function_tuple(self, func)
	234 # create a skeleton function object and memoize it
	235 save(_make_skel_func)
	--> 236 save((code, closure, base_globals))
	237 write(pickle.REDUCE)
	238 self.memoize(func)

	/usr/lib/python2.7/pickle.pyc in save(self, obj)
	284 f = self.dispatch.get(t)
	285 if f:
	--> 286 f(self, obj) # Call unbound method with explicit self
	287 return
	288

	/usr/lib/python2.7/pickle.pyc in save_tuple(self, obj)
	546 if n <= 3 and proto >= 2:
	547 for element in obj:
	--> 548 save(element)
	549 # Subtle. Same as in the big comment below.
	550 if id(obj) in memo:

	/usr/lib/python2.7/pickle.pyc in save(self, obj)
	284 f = self.dispatch.get(t)
	285 if f:
	--> 286 f(self, obj) # Call unbound method with explicit self
	287 return
	288

	/usr/lib/python2.7/pickle.pyc in save_list(self, obj)
	598
	599 self.memoize(obj)
	--> 600 self._batch_appends(iter(obj))
	601
	602 dispatch[ListType] = save_list

	/usr/lib/python2.7/pickle.pyc in _batch_appends(self, items)
	631 write(MARK)
	632 for x in tmp:
	--> 633 save(x)
	634 write(APPENDS)
	635 elif n:

	/usr/lib/python2.7/pickle.pyc in save(self, obj)
	284 f = self.dispatch.get(t)
	285 if f:
	--> 286 f(self, obj) # Call unbound method with explicit self
	287 return
	288

	/opt/spark/python/pyspark/cloudpickle.py in save_function(self, obj, name)
	197 klass = getattr(themodule, name, None)
	198 if klass is None or klass is not obj:
	--> 199 self.save_function_tuple(obj)
	200 return
	201

	/opt/spark/python/pyspark/cloudpickle.py in save_function_tuple(self, func)
	234 # create a skeleton function object and memoize it
	235 save(_make_skel_func)
	--> 236 save((code, closure, base_globals))
	237 write(pickle.REDUCE)
	238 self.memoize(func)

	/usr/lib/python2.7/pickle.pyc in save(self, obj)
	284 f = self.dispatch.get(t)
	285 if f:
	--> 286 f(self, obj) # Call unbound method with explicit self
	287 return
	288

	/usr/lib/python2.7/pickle.pyc in save_tuple(self, obj)
	546 if n <= 3 and proto >= 2:
	547 for element in obj:
	--> 548 save(element)
	549 # Subtle. Same as in the big comment below.
	550 if id(obj) in memo:

	/usr/lib/python2.7/pickle.pyc in save(self, obj)
	284 f = self.dispatch.get(t)
	285 if f:
	--> 286 f(self, obj) # Call unbound method with explicit self
	287 return
	288

	/usr/lib/python2.7/pickle.pyc in save_list(self, obj)
	598
	599 self.memoize(obj)
	--> 600 self._batch_appends(iter(obj))
	601
	602 dispatch[ListType] = save_list

	/usr/lib/python2.7/pickle.pyc in _batch_appends(self, items)
	634 write(APPENDS)
	635 elif n:
	--> 636 save(tmp[0])
	637 write(APPEND)
	638 # else tmp is empty, and we're done

	/usr/lib/python2.7/pickle.pyc in save(self, obj)
	284 f = self.dispatch.get(t)
	285 if f:
	--> 286 f(self, obj) # Call unbound method with explicit self
	287 return
	288

	/opt/spark/python/pyspark/cloudpickle.py in save_function(self, obj, name)
	191 if islambda(obj) or obj.__code__.co_filename == '<stdin>' or themodule is None:
	192 #print("save global", islambda(obj), obj.__code__.co_filename, modname, themodule)
	--> 193 self.save_function_tuple(obj)
	194 return
	195 else:

	/opt/spark/python/pyspark/cloudpickle.py in save_function_tuple(self, func)
	234 # create a skeleton function object and memoize it
	235 save(_make_skel_func)
	--> 236 save((code, closure, base_globals))
	237 write(pickle.REDUCE)
	238 self.memoize(func)

	/usr/lib/python2.7/pickle.pyc in save(self, obj)
	284 f = self.dispatch.get(t)
	285 if f:
	--> 286 f(self, obj) # Call unbound method with explicit self
	287 return
	288

	/usr/lib/python2.7/pickle.pyc in save_tuple(self, obj)
	546 if n <= 3 and proto >= 2:
	547 for element in obj:
	--> 548 save(element)
	549 # Subtle. Same as in the big comment below.
	550 if id(obj) in memo:

	/usr/lib/python2.7/pickle.pyc in save(self, obj)
	284 f = self.dispatch.get(t)
	285 if f:
	--> 286 f(self, obj) # Call unbound method with explicit self
	287 return
	288

	/usr/lib/python2.7/pickle.pyc in save_list(self, obj)
	598
	599 self.memoize(obj)
	--> 600 self._batch_appends(iter(obj))
	601
	602 dispatch[ListType] = save_list

	/usr/lib/python2.7/pickle.pyc in _batch_appends(self, items)
	634 write(APPENDS)
	635 elif n:
	--> 636 save(tmp[0])
	637 write(APPEND)
	638 # else tmp is empty, and we're done

	/usr/lib/python2.7/pickle.pyc in save(self, obj)
	284 f = self.dispatch.get(t)
	285 if f:
	--> 286 f(self, obj) # Call unbound method with explicit self
	287 return
	288

	/opt/spark/python/pyspark/cloudpickle.py in save_function(self, obj, name)
	191 if islambda(obj) or obj.__code__.co_filename == '<stdin>' or themodule is None:
	192 #print("save global", islambda(obj), obj.__code__.co_filename, modname, themodule)
	--> 193 self.save_function_tuple(obj)
	194 return
	195 else:

	/opt/spark/python/pyspark/cloudpickle.py in save_function_tuple(self, func)
	239
	240 # save the rest of the func data needed by _fill_function
	--> 241 save(f_globals)
	242 save(defaults)
	243 save(dct)

	/usr/lib/python2.7/pickle.pyc in save(self, obj)
	284 f = self.dispatch.get(t)
	285 if f:
	--> 286 f(self, obj) # Call unbound method with explicit self
	287 return
	288

	/usr/lib/python2.7/pickle.pyc in save_dict(self, obj)
	647
	648 self.memoize(obj)
	--> 649 self._batch_setitems(obj.iteritems())
	650
	651 dispatch[DictionaryType] = save_dict

	/usr/lib/python2.7/pickle.pyc in _batch_setitems(self, items)
	684 k, v = tmp[0]
	685 save(k)
	--> 686 save(v)
	687 write(SETITEM)
	688 # else tmp is empty, and we're done

	/usr/lib/python2.7/pickle.pyc in save(self, obj)
	329
	330 # Save the reduce() output and finally memoize the object
	--> 331 self.save_reduce(obj=obj, *rv)
	332
	333 def persistent_id(self, obj):

	/opt/spark/python/pyspark/cloudpickle.py in save_reduce(self, func, args, state, listitems, dictitems, obj)
	516
	517 if state is not None:
	--> 518 save(state)
	519 write(pickle.BUILD)
	520

	/usr/lib/python2.7/pickle.pyc in save(self, obj)
	284 f = self.dispatch.get(t)
	285 if f:
	--> 286 f(self, obj) # Call unbound method with explicit self
	287 return
	288

	/usr/lib/python2.7/pickle.pyc in save_dict(self, obj)
	647
	648 self.memoize(obj)
	--> 649 self._batch_setitems(obj.iteritems())
	650
	651 dispatch[DictionaryType] = save_dict

	/usr/lib/python2.7/pickle.pyc in _batch_setitems(self, items)
	679 for k, v in tmp:
	680 save(k)
	--> 681 save(v)
	682 write(SETITEMS)
	683 elif n:

	/usr/lib/python2.7/pickle.pyc in save(self, obj)
	304 reduce = getattr(obj, "__reduce_ex__", None)
	305 if reduce:
	--> 306 rv = reduce(self.proto)
	307 else:
	308 reduce = getattr(obj, "__reduce__", None)

	/opt/spark/python/pyspark/context.py in __getnewargs__(self)
	250 # This method is called when attempting to pickle SparkContext, which is always an error:
	251 raise Exception(
	--> 252 "It appears that you are attempting to reference SparkContext from a broadcast "
	253 "variable, action, or transforamtion. SparkContext can only be used on the driver, "
	254 "not in code that it run on workers. For more information, see SPARK-5063."

	Exception: It appears that you are attempting to reference SparkContext from a broadcast variable, action, or transforamtion. SparkContext can only be used on the driver, not in code that it run on workers. For more information, see SPARK-5063.