#import urllib2
#response = urllib2.urlopen('http://www.gutenberg.org/files/30760/30760-0.txt')
#html = response.read()
#file("book_temp.txt","w").write(html)
#words=sc.textFile("book_temp.txt")

#file("book_temp.txt", "w").write(urllib2.urlopen("http://www.gutenberg.org/files/30760/30760-0.txt").read())
#words = sc.textFile("hdfs://localhost:9000/user//DailyFullModBusMeiDeviceID")
words = sc.textFile("hdfs://localhost:9000/Python/book.txt")

words.filter(lambda w: w.startswith(" ")).take(5)

counts = words.flatMap(lambda line: line.split(" ")) \
             .map(lambda word: (word, 1)) \
             .reduceByKey(lambda a, b: a + b)

counts.saveAsTextFile("hdfs://localhost:9000/Python/spark_output1")

counts.collect()