from pandas import read_csv, DataFrame
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.cross_validation import train_test_split
dataset = read_csv('ENB2012_data.csv',',')
dataset = dataset.drop(['X1','X4'], axis=1)
trg = dataset[['Y1','Y2']]
trn = dataset.drop(['Y1','Y2'], axis=1)
models = [LinearRegression(), # метод наименьших квадратов
RandomForestRegressor(n_estimators=100, max_features='sqrt'), # случайный лес
KNeighborsRegressor(n_neighbors=6), # метод ближайших соседей
SVR(kernel='linear'), # метод опорных векторов с линейным ядром
LogisticRegression() # логистическая регрессия
]
Xtrn, Xtest, Ytrn, Ytest = train_test_split(trn, trg, test_size=0.4)
# создаем временные структуры
TestModels = DataFrame()
tmp = {}
# для каждой модели из списка
for model in models:
# получаем имя модели
m = str(model)
tmp['Model'] = m[:m.index('(')]
# для каждого столбцам результирующего набора
for i in range(Ytrn.shape[1]):
# обучаем модель
model.fit(Xtrn, Ytrn[:, i])
# вычисляем коэффициент детерминации
tmp['R2_Y%s' % str(i + 1)] = r2_score(Ytest[:, 0], model.predict(Xtest))
# записываем данные и итоговый DataFrame
TestModels = TestModels.append([tmp])
# делаем индекс по названию модели
TestModels.set_index('Model', inplace=True)
ZnJvbSBwYW5kYXMgaW1wb3J0IHJlYWRfY3N2LCBEYXRhRnJhbWUKZnJvbSBza2xlYXJuLm5laWdoYm9ycyBpbXBvcnQgS05laWdoYm9yc1JlZ3Jlc3Nvcgpmcm9tIHNrbGVhcm4ubGluZWFyX21vZGVsIGltcG9ydCBMaW5lYXJSZWdyZXNzaW9uLCBMb2dpc3RpY1JlZ3Jlc3Npb24KZnJvbSBza2xlYXJuLnN2bSBpbXBvcnQgU1ZSCmZyb20gc2tsZWFybi5lbnNlbWJsZSBpbXBvcnQgUmFuZG9tRm9yZXN0UmVncmVzc29yCmZyb20gc2tsZWFybi5tZXRyaWNzIGltcG9ydCByMl9zY29yZQpmcm9tIHNrbGVhcm4uY3Jvc3NfdmFsaWRhdGlvbiBpbXBvcnQgdHJhaW5fdGVzdF9zcGxpdAoKZGF0YXNldCA9IHJlYWRfY3N2KCdFTkIyMDEyX2RhdGEuY3N2JywnLCcpCgpkYXRhc2V0ID0gZGF0YXNldC5kcm9wKFsnWDEnLCdYNCddLCBheGlzPTEpCgp0cmcgPSBkYXRhc2V0W1snWTEnLCdZMiddXQp0cm4gPSBkYXRhc2V0LmRyb3AoWydZMScsJ1kyJ10sIGF4aXM9MSkKCm1vZGVscyA9IFtMaW5lYXJSZWdyZXNzaW9uKCksICAjINC80LXRgtC+0LQg0L3QsNC40LzQtdC90YzRiNC40YUg0LrQstCw0LTRgNCw0YLQvtCyCiAgICAgICAgICBSYW5kb21Gb3Jlc3RSZWdyZXNzb3Iobl9lc3RpbWF0b3JzPTEwMCwgbWF4X2ZlYXR1cmVzPSdzcXJ0JyksICAjINGB0LvRg9GH0LDQudC90YvQuSDQu9C10YEKICAgICAgICAgIEtOZWlnaGJvcnNSZWdyZXNzb3Iobl9uZWlnaGJvcnM9NiksICAjINC80LXRgtC+0LQg0LHQu9C40LbQsNC50YjQuNGFINGB0L7RgdC10LTQtdC5CiAgICAgICAgICBTVlIoa2VybmVsPSdsaW5lYXInKSwgICMg0LzQtdGC0L7QtCDQvtC/0L7RgNC90YvRhSDQstC10LrRgtC+0YDQvtCyINGBINC70LjQvdC10LnQvdGL0Lwg0Y/QtNGA0L7QvAogICAgICAgICAgTG9naXN0aWNSZWdyZXNzaW9uKCkgICMg0LvQvtCz0LjRgdGC0LjRh9C10YHQutCw0Y8g0YDQtdCz0YDQtdGB0YHQuNGPCiAgICAgICAgICBdCgpYdHJuLCBYdGVzdCwgWXRybiwgWXRlc3QgPSB0cmFpbl90ZXN0X3NwbGl0KHRybiwgdHJnLCB0ZXN0X3NpemU9MC40KQoKIyDRgdC+0LfQtNCw0LXQvCDQstGA0LXQvNC10L3QvdGL0LUg0YHRgtGA0YPQutGC0YPRgNGLClRlc3RNb2RlbHMgPSBEYXRhRnJhbWUoKQp0bXAgPSB7fQojINC00LvRjyDQutCw0LbQtNC+0Lkg0LzQvtC00LXQu9C4INC40Lcg0YHQv9C40YHQutCwCmZvciBtb2RlbCBpbiBtb2RlbHM6CiAgICAjINC/0L7Qu9GD0YfQsNC10Lwg0LjQvNGPINC80L7QtNC10LvQuAogICAgbSA9IHN0cihtb2RlbCkKICAgIHRtcFsnTW9kZWwnXSA9IG1bOm0uaW5kZXgoJygnKV0KICAgICMg0LTQu9GPINC60LDQttC00L7Qs9C+INGB0YLQvtC70LHRhtCw0Lwg0YDQtdC30YPQu9GM0YLQuNGA0YPRjtGJ0LXQs9C+INC90LDQsdC+0YDQsAogICAgZm9yIGkgaW4gcmFuZ2UoWXRybi5zaGFwZVsxXSk6CiAgICAgICAgIyDQvtCx0YPRh9Cw0LXQvCDQvNC+0LTQtdC70YwKICAgICAgICBtb2RlbC5maXQoWHRybiwgWXRybls6LCBpXSkKICAgICAgICAjINCy0YvRh9C40YHQu9GP0LXQvCDQutC+0Y3RhNGE0LjRhtC40LXQvdGCINC00LXRgtC10YDQvNC40L3QsNGG0LjQuAogICAgICAgIHRtcFsnUjJfWSVzJyAlIHN0cihpICsgMSldID0gcjJfc2NvcmUoWXRlc3RbOiwgMF0sIG1vZGVsLnByZWRpY3QoWHRlc3QpKQogICAgIyDQt9Cw0L/QuNGB0YvQstCw0LXQvCDQtNCw0L3QvdGL0LUg0Lgg0LjRgtC+0LPQvtCy0YvQuSBEYXRhRnJhbWUKICAgIFRlc3RNb2RlbHMgPSBUZXN0TW9kZWxzLmFwcGVuZChbdG1wXSkKIyDQtNC10LvQsNC10Lwg0LjQvdC00LXQutGBINC/0L4g0L3QsNC30LLQsNC90LjRjiDQvNC+0LTQtdC70LgKVGVzdE1vZGVscy5zZXRfaW5kZXgoJ01vZGVsJywgaW5wbGFjZT1UcnVlKQ==