how to serialize an object collection to RDD? in pySpark?
for example: the simplest operation
class test:
data = 1
def __init__(self):
self.property=0
def test2(self):
print("hello")
if name ="_ _ main__":
p1 = test()
p2 = test()
a = [p1,p2]
sc = SparkContext("local[2]", "test" )
rdd = sc.parallelize(a)
rdd.map(lambda x : x.property ).collect()
sc.stop()