fork download
  1. import pandas as pd
  2. import numpy as np
  3. import time
  4. from io import StringIO # python3; python2: BytesIO
  5. import boto3
  6. import s3fs
  7. from botocore.exceptions import NoCredentialsError
  8.  
  9. def lambda_handler(event, context):
  10.  
  11. # Dataset 1
  12. # loading the data
  13. df1 = pd.read_csv("https://i...content-available-to-author-only...s.com/Minimum+Wage+Data.csv",encoding= 'unicode_escape')
  14.  
  15. # Renaming the columns.
  16. df1.rename(columns={'High.Value': 'min_wage_by_law', 'Low.Value': 'min_wage_real'}, inplace=True)
  17.  
  18. # Removing all unneeded values.
  19. df1 = df1.drop(['Table_Data','Footnote','High.2018','Low.2018'], axis=1)
  20. df1 = df1.loc[df1['Year']>1969].copy()
  21.  
  22. # ---------------------------------
  23.  
  24. # Dataset 2
  25. # Loading from the debt S3 bucket
  26. df2 = pd.read_csv("https://i...content-available-to-author-only...s.com/USGS_Final_File.csv")
  27.  
  28. #Filtering getting the range in between 1969 and 2018.
  29. df2 = df2.loc[df2['Year']>1969].copy()
  30. df2 = df2.loc[df2['Year']<2018].copy()
  31. df2.rename(columns={'Real State Growth %': 'Real State Growth','Population (million)':'Population Mil'}, inplace=True)
  32.  
  33. # Cleaning the data
  34. df2['State Debt'] = df2['State Debt'].str.replace(',', '')
  35. df2['Local Debt'] = df2['Local Debt'].str.replace(',', '')
  36. df2["State and Local Debt"] = df2["State and Local Debt"].str.replace(',', '')
  37. df2["Gross State Product"] = df2["Gross State Product"].str.replace(',', '')
  38.  
  39. # Cast to Floating
  40. df2[["State Debt","Local Debt","State and Local Debt","Gross State Product"]] = df2[[ "State Debt","Local Debt","State and Local Debt","Gross State Product"]].apply(pd.to_numeric)
  41.  
  42. # --------------------------------------------
  43. # Merge the data through an inner join.
  44. full = pd.merge(df1,df2,on=['State','Year'])
  45. #--------------------------------------------
  46. filename = '/tmp/'#specify location of s3:/{my-bucket}/
  47. file= 'debt_and_wage' #name of file
  48. datetime = time.strftime("%Y%m%d%H%M%S") #timestamp
  49. filenames3 = "%s%s%s.csv"%(filename,file,datetime) #name of the filepath and csv file
  50.  
  51. full.to_csv(filenames3, header = True)
  52.  
  53. ## Saving it on AWS
  54.  
  55. s3 = boto3.resource('s3',aws_access_key_id='accesskeycantshare',aws_secret_access_key= 'key')
  56.  
  57. s3.meta.client.upload_file(filenames3, 'information-arch',file+datetime+'.csv')
  58.  
Runtime error #stdin #stdout #stderr 0.04s 63596KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
Traceback (most recent call last):
  File "prog.py", line 1, in <module>
    import pandas as pd
ImportError: No module named pandas