import pandas as pd
from sklearn.metrics import make_scorer, accuracy_score
df = pd.read_csv('titanic.csv')
include = ['Age', 'Sex', 'Embarked', 'Survived']
dependent_variable = include[-1]
print dependent_variable
from sklearn.ensemble import RandomForestClassifier as rf
df_ = df[include]
categoricals = [] # going to one-hot encode categorical variables
for col, col_type in df_.dtypes.iteritems():
if col_type == 'object':
print col
categoricals.append(col)
else:
pass
df_[col].fillna(0, inplace=True) # fill NA's with 0 for ints/floats, too generic
print categoricals
# get_dummies effectively creates one-hot encoded variables
df_ohe = pd.get_dummies(df_, columns=categoricals, dummy_na=False)
print df_ohe
# print pd.
# get_dummies(data=df, columns=dependent_variable)
x = df_ohe[df_ohe.columns.difference([dependent_variable])]
y = df_ohe[dependent_variable]
#
# print (x)
#
clf = rf()
clf.fit(x, y)
#
model_columns = list(x.columns)
#
# #
json_ = [{"Age": 85, "Sex": "male", "Embarked": "S"}]
query = pd.get_dummies(pd.DataFrame(json_))
query = query.reindex(columns=model_columns, fill_value=0)
result = clf.predict(query)
print result
from sklearn.metrics import accuracy_score
print accuracy_score(training_data, result)
aW1wb3J0IHBhbmRhcyBhcyBwZApmcm9tIHNrbGVhcm4ubWV0cmljcyBpbXBvcnQgbWFrZV9zY29yZXIsIGFjY3VyYWN5X3Njb3JlCgpkZiA9IHBkLnJlYWRfY3N2KCd0aXRhbmljLmNzdicpCgppbmNsdWRlID0gWydBZ2UnLCAnU2V4JywgJ0VtYmFya2VkJywgJ1N1cnZpdmVkJ10KZGVwZW5kZW50X3ZhcmlhYmxlID0gaW5jbHVkZVstMV0KcHJpbnQgZGVwZW5kZW50X3ZhcmlhYmxlCgpmcm9tIHNrbGVhcm4uZW5zZW1ibGUgaW1wb3J0IFJhbmRvbUZvcmVzdENsYXNzaWZpZXIgYXMgcmYKCmRmXyA9IGRmW2luY2x1ZGVdCgpjYXRlZ29yaWNhbHMgPSBbXSAgIyBnb2luZyB0byBvbmUtaG90IGVuY29kZSBjYXRlZ29yaWNhbCB2YXJpYWJsZXMKCmZvciBjb2wsIGNvbF90eXBlIGluIGRmXy5kdHlwZXMuaXRlcml0ZW1zKCk6CiAgICBpZiBjb2xfdHlwZSA9PSAnb2JqZWN0JzoKICAgICAgICBwcmludCBjb2wKICAgICAgICBjYXRlZ29yaWNhbHMuYXBwZW5kKGNvbCkKICAgIGVsc2U6CiAgICAgICAgcGFzcwogICAgICAgIGRmX1tjb2xdLmZpbGxuYSgwLCBpbnBsYWNlPVRydWUpICAjIGZpbGwgTkEncyB3aXRoIDAgZm9yIGludHMvZmxvYXRzLCB0b28gZ2VuZXJpYwpwcmludCBjYXRlZ29yaWNhbHMKCiMgZ2V0X2R1bW1pZXMgZWZmZWN0aXZlbHkgY3JlYXRlcyBvbmUtaG90IGVuY29kZWQgdmFyaWFibGVzCmRmX29oZSA9IHBkLmdldF9kdW1taWVzKGRmXywgY29sdW1ucz1jYXRlZ29yaWNhbHMsIGR1bW15X25hPUZhbHNlKQpwcmludCBkZl9vaGUKCiMgcHJpbnQgcGQuCiMgZ2V0X2R1bW1pZXMoZGF0YT1kZiwgY29sdW1ucz1kZXBlbmRlbnRfdmFyaWFibGUpCgp4ID0gZGZfb2hlW2RmX29oZS5jb2x1bW5zLmRpZmZlcmVuY2UoW2RlcGVuZGVudF92YXJpYWJsZV0pXQoKeSA9IGRmX29oZVtkZXBlbmRlbnRfdmFyaWFibGVdCiMKIyBwcmludCAoeCkKIwpjbGYgPSByZigpCmNsZi5maXQoeCwgeSkKIwptb2RlbF9jb2x1bW5zID0gbGlzdCh4LmNvbHVtbnMpCiMKIyAjCmpzb25fID0gW3siQWdlIjogODUsICJTZXgiOiAibWFsZSIsICJFbWJhcmtlZCI6ICJTIn1dCnF1ZXJ5ID0gcGQuZ2V0X2R1bW1pZXMocGQuRGF0YUZyYW1lKGpzb25fKSkKcXVlcnkgPSBxdWVyeS5yZWluZGV4KGNvbHVtbnM9bW9kZWxfY29sdW1ucywgZmlsbF92YWx1ZT0wKQoKcmVzdWx0ID0gY2xmLnByZWRpY3QocXVlcnkpCnByaW50IHJlc3VsdAoKZnJvbSBza2xlYXJuLm1ldHJpY3MgaW1wb3J0IGFjY3VyYWN5X3Njb3JlCnByaW50IGFjY3VyYWN5X3Njb3JlKHRyYWluaW5nX2RhdGEsIHJlc3VsdCkJ