import pandas as pd
import numpy as np
import time
from io import StringIO # python3; python2: BytesIO 
import boto3
import s3fs
from botocore.exceptions import NoCredentialsError

def lambda_handler(event, context):

    # Dataset 1
    # loading the data
    df1 = pd.read_csv("https://i...content-available-to-author-only...s.com/Minimum+Wage+Data.csv",encoding= 'unicode_escape')

    # Renaming the columns.
    df1.rename(columns={'High.Value': 'min_wage_by_law', 'Low.Value': 'min_wage_real'}, inplace=True)

    # Removing all unneeded values.
    df1 = df1.drop(['Table_Data','Footnote','High.2018','Low.2018'], axis=1)
    df1 = df1.loc[df1['Year']>1969].copy()

    # ---------------------------------
     
    # Dataset 2
    # Loading from the debt S3 bucket
    df2 = pd.read_csv("https://i...content-available-to-author-only...s.com/USGS_Final_File.csv") 

    #Filtering getting the range in between 1969 and 2018.
    df2 = df2.loc[df2['Year']>1969].copy()
    df2 = df2.loc[df2['Year']<2018].copy()
    df2.rename(columns={'Real State Growth %': 'Real State Growth','Population (million)':'Population Mil'}, inplace=True)

    # Cleaning the data
    df2['State Debt'] = df2['State Debt'].str.replace(',', '')
    df2['Local Debt'] = df2['Local Debt'].str.replace(',', '')
    df2["State and Local Debt"] = df2["State and Local Debt"].str.replace(',', '')
    df2["Gross State Product"] = df2["Gross State Product"].str.replace(',', '')

    # Cast to Floating
    df2[["State Debt","Local Debt","State and Local Debt","Gross State Product"]] = df2[[ "State Debt","Local Debt","State and Local Debt","Gross State Product"]].apply(pd.to_numeric)

    # --------------------------------------------
    # Merge the data through an inner join.
    full = pd.merge(df1,df2,on=['State','Year'])
    #--------------------------------------------
    filename = '/tmp/'#specify location of s3:/{my-bucket}/
    file= 'debt_and_wage' #name of file
    datetime = time.strftime("%Y%m%d%H%M%S") #timestamp
    filenames3 = "%s%s%s.csv"%(filename,file,datetime) #name of the filepath and csv file

    full.to_csv(filenames3, header = True)

    ## Saving it on AWS

    s3 = boto3.resource('s3',aws_access_key_id='accesskeycantshare',aws_secret_access_key= 'key')

    s3.meta.client.upload_file(filenames3, 'information-arch',file+datetime+'.csv')
    