import pandas as pd
import numpy as np
import time
from io import StringIO # python3; python2: BytesIO
import boto3
import s3fs
from botocore.exceptions import NoCredentialsError
def lambda_handler( event, context) :
# Dataset 1
# loading the data
df1 = pd.read_csv ( "https://i...content-available-to-author-only...s.com/Minimum+Wage+Data.csv" , encoding= 'unicode_escape' )
# Renaming the columns.
df1.rename ( columns= { 'High.Value' : 'min_wage_by_law' , 'Low.Value' : 'min_wage_real' } , inplace= True )
# Removing all unneeded values.
df1 = df1.drop ( [ 'Table_Data' , 'Footnote' , 'High.2018' , 'Low.2018' ] , axis= 1 )
df1 = df1.loc [ df1[ 'Year' ] > 1969 ] .copy ( )
# ---------------------------------
# Dataset 2
# Loading from the debt S3 bucket
df2 = pd.read_csv ( "https://i...content-available-to-author-only...s.com/USGS_Final_File.csv" )
#Filtering getting the range in between 1969 and 2018.
df2 = df2.loc [ df2[ 'Year' ] > 1969 ] .copy ( )
df2 = df2.loc [ df2[ 'Year' ] < 2018 ] .copy ( )
df2.rename ( columns= { 'Real State Growth %' : 'Real State Growth' , 'Population (million)' :'Population Mil' } , inplace= True )
# Cleaning the data
df2[ 'State Debt' ] = df2[ 'State Debt' ] .str .replace ( ',' , '' )
df2[ 'Local Debt' ] = df2[ 'Local Debt' ] .str .replace ( ',' , '' )
df2[ "State and Local Debt" ] = df2[ "State and Local Debt" ] .str .replace ( ',' , '' )
df2[ "Gross State Product" ] = df2[ "Gross State Product" ] .str .replace ( ',' , '' )
# Cast to Floating
df2[ [ "State Debt" , "Local Debt" , "State and Local Debt" , "Gross State Product" ] ] = df2[ [ "State Debt" , "Local Debt" , "State and Local Debt" , "Gross State Product" ] ] .apply ( pd.to_numeric )
# --------------------------------------------
# Merge the data through an inner join.
full = pd.merge ( df1, df2, on= [ 'State' , 'Year' ] )
#--------------------------------------------
filename = '/tmp/' #specify location of s3:/{my-bucket}/
file = 'debt_and_wage' #name of file
datetime = time .strftime ( "%Y%m%d%H%M%S" ) #timestamp
filenames3 = "%s%s%s.csv" %( filename, file , datetime ) #name of the filepath and csv file
full.to_csv ( filenames3, header = True )
## Saving it on AWS
s3 = boto3.resource ( 's3' , aws_access_key_id= 'accesskeycantshare' , aws_secret_access_key= 'key' )
s3.meta .client .upload_file ( filenames3, 'information-arch' , file +datetime +'.csv' )
aW1wb3J0IHBhbmRhcyBhcyBwZAppbXBvcnQgbnVtcHkgYXMgbnAKaW1wb3J0IHRpbWUKZnJvbSBpbyBpbXBvcnQgU3RyaW5nSU8gIyBweXRob24zOyBweXRob24yOiBCeXRlc0lPIAppbXBvcnQgYm90bzMKaW1wb3J0IHMzZnMKZnJvbSBib3RvY29yZS5leGNlcHRpb25zIGltcG9ydCBOb0NyZWRlbnRpYWxzRXJyb3IKCmRlZiBsYW1iZGFfaGFuZGxlcihldmVudCwgY29udGV4dCk6CgogICAgIyBEYXRhc2V0IDEKICAgICMgbG9hZGluZyB0aGUgZGF0YQogICAgZGYxID0gcGQucmVhZF9jc3YoImh0dHBzOi8vaS4uLmNvbnRlbnQtYXZhaWxhYmxlLXRvLWF1dGhvci1vbmx5Li4ucy5jb20vTWluaW11bStXYWdlK0RhdGEuY3N2IixlbmNvZGluZz0gJ3VuaWNvZGVfZXNjYXBlJykKCiAgICAjIFJlbmFtaW5nIHRoZSBjb2x1bW5zLgogICAgZGYxLnJlbmFtZShjb2x1bW5zPXsnSGlnaC5WYWx1ZSc6ICdtaW5fd2FnZV9ieV9sYXcnLCAnTG93LlZhbHVlJzogJ21pbl93YWdlX3JlYWwnfSwgaW5wbGFjZT1UcnVlKQoKICAgICMgUmVtb3ZpbmcgYWxsIHVubmVlZGVkIHZhbHVlcy4KICAgIGRmMSA9IGRmMS5kcm9wKFsnVGFibGVfRGF0YScsJ0Zvb3Rub3RlJywnSGlnaC4yMDE4JywnTG93LjIwMTgnXSwgYXhpcz0xKQogICAgZGYxID0gZGYxLmxvY1tkZjFbJ1llYXInXT4xOTY5XS5jb3B5KCkKCiAgICAjIC0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQogICAgIAogICAgIyBEYXRhc2V0IDIKICAgICMgTG9hZGluZyBmcm9tIHRoZSBkZWJ0IFMzIGJ1Y2tldAogICAgZGYyID0gcGQucmVhZF9jc3YoImh0dHBzOi8vaS4uLmNvbnRlbnQtYXZhaWxhYmxlLXRvLWF1dGhvci1vbmx5Li4ucy5jb20vVVNHU19GaW5hbF9GaWxlLmNzdiIpIAoKICAgICNGaWx0ZXJpbmcgZ2V0dGluZyB0aGUgcmFuZ2UgaW4gYmV0d2VlbiAxOTY5IGFuZCAyMDE4LgogICAgZGYyID0gZGYyLmxvY1tkZjJbJ1llYXInXT4xOTY5XS5jb3B5KCkKICAgIGRmMiA9IGRmMi5sb2NbZGYyWydZZWFyJ108MjAxOF0uY29weSgpCiAgICBkZjIucmVuYW1lKGNvbHVtbnM9eydSZWFsIFN0YXRlIEdyb3d0aCAlJzogJ1JlYWwgU3RhdGUgR3Jvd3RoJywnUG9wdWxhdGlvbiAobWlsbGlvbiknOidQb3B1bGF0aW9uIE1pbCd9LCBpbnBsYWNlPVRydWUpCgogICAgIyBDbGVhbmluZyB0aGUgZGF0YQogICAgZGYyWydTdGF0ZSBEZWJ0J10gPSBkZjJbJ1N0YXRlIERlYnQnXS5zdHIucmVwbGFjZSgnLCcsICcnKQogICAgZGYyWydMb2NhbCBEZWJ0J10gPSBkZjJbJ0xvY2FsIERlYnQnXS5zdHIucmVwbGFjZSgnLCcsICcnKQogICAgZGYyWyJTdGF0ZSBhbmQgTG9jYWwgRGVidCJdID0gZGYyWyJTdGF0ZSBhbmQgTG9jYWwgRGVidCJdLnN0ci5yZXBsYWNlKCcsJywgJycpCiAgICBkZjJbIkdyb3NzIFN0YXRlIFByb2R1Y3QiXSA9IGRmMlsiR3Jvc3MgU3RhdGUgUHJvZHVjdCJdLnN0ci5yZXBsYWNlKCcsJywgJycpCgogICAgIyBDYXN0IHRvIEZsb2F0aW5nCiAgICBkZjJbWyJTdGF0ZSBEZWJ0IiwiTG9jYWwgRGVidCIsIlN0YXRlIGFuZCBMb2NhbCBEZWJ0IiwiR3Jvc3MgU3RhdGUgUHJvZHVjdCJdXSA9IGRmMltbICJTdGF0ZSBEZWJ0IiwiTG9jYWwgRGVidCIsIlN0YXRlIGFuZCBMb2NhbCBEZWJ0IiwiR3Jvc3MgU3RhdGUgUHJvZHVjdCJdXS5hcHBseShwZC50b19udW1lcmljKQoKICAgICMgLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0KICAgICMgTWVyZ2UgdGhlIGRhdGEgdGhyb3VnaCBhbiBpbm5lciBqb2luLgogICAgZnVsbCA9IHBkLm1lcmdlKGRmMSxkZjIsb249WydTdGF0ZScsJ1llYXInXSkKICAgICMtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQogICAgZmlsZW5hbWUgPSAnL3RtcC8nI3NwZWNpZnkgbG9jYXRpb24gb2YgczM6L3tteS1idWNrZXR9LwogICAgZmlsZT0gJ2RlYnRfYW5kX3dhZ2UnICNuYW1lIG9mIGZpbGUKICAgIGRhdGV0aW1lID0gdGltZS5zdHJmdGltZSgiJVklbSVkJUglTSVTIikgI3RpbWVzdGFtcAogICAgZmlsZW5hbWVzMyA9ICIlcyVzJXMuY3N2IiUoZmlsZW5hbWUsZmlsZSxkYXRldGltZSkgI25hbWUgb2YgdGhlIGZpbGVwYXRoIGFuZCBjc3YgZmlsZQoKICAgIGZ1bGwudG9fY3N2KGZpbGVuYW1lczMsIGhlYWRlciA9IFRydWUpCgogICAgIyMgU2F2aW5nIGl0IG9uIEFXUwoKICAgIHMzID0gYm90bzMucmVzb3VyY2UoJ3MzJyxhd3NfYWNjZXNzX2tleV9pZD0nYWNjZXNza2V5Y2FudHNoYXJlJyxhd3Nfc2VjcmV0X2FjY2Vzc19rZXk9ICdrZXknKQoKICAgIHMzLm1ldGEuY2xpZW50LnVwbG9hZF9maWxlKGZpbGVuYW1lczMsICdpbmZvcm1hdGlvbi1hcmNoJyxmaWxlK2RhdGV0aW1lKycuY3N2JykKICAgIA==