fork download
  1. from pyspark import SparkContext
  2. from pyspark.sql import SQLContext
  3. from pyspark.sql.functions import year
  4.  
  5. from pyspark import SparkConf, SparkContext
  6. from pyspark.sql import HiveContext
  7.  
  8.  
  9.  
  10. emp = [(1,"Smith",-1,"2018","10","M",3000), \
  11. (2,"Rose",1,"2010","20","M",4000), \
  12. (3,"Williams",1,"2010","10","M",1000), \
  13. (4,"Jones",2,"2005","10","F",2000), \
  14. (5,"Brown",2,"2010","40","",-1), \
  15. (6,"Brown",2,"2010","50","",-1) \
  16. ]
  17. empColumns = ["emp_id","name","superior_emp_id","year_joined", \
  18. "emp_dept_id","gender","salary"]
  19.  
  20. empDF = spark.createDataFrame(data=emp, schema = empColumns)
  21. empDF.printSchema()
  22. empDF.show(truncate=False)
  23.  
  24. dept = [("Finance",10), \
  25. ("Marketing",20), \
  26. ("Sales",30), \
  27. ("IT",40) \
  28. ]
  29. deptColumns = ["dept_name","dept_id"]
  30. deptDF = spark.createDataFrame(data=dept, schema = deptColumns)
  31. deptDF.printSchema()
  32.  
Success #stdin #stdout #stderr 0.01s 5456KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
Error: near line 1: near "from": syntax error