import numpy as np
np.random.seed(123)
from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences
from keras.layers import *
from keras.models import Sequential, Model
from keras.callbacks import EarlyStopping
from keras import optimizers
import matplotlib.pyplot as plot
import time
MAXLEN = 500
BATCHSIZE = 16
EMBSIZE = 50
HIDDENSIZE = 50
KERNELSIZE = 5
VOCABSIZE = 10000
MAXEPOCHS = 20
(x_train, y_train), (x_test, y_test) = imdb.load_data(path = "imdb.npz",
num_words = VOCABSIZE,
skip_top = 0,
maxlen = MAXLEN,
start_char = 1,
oov_char = 2,
index_from = 3)
y_train = np.expand_dims(y_train, -1)
y_test = np.expand_dims(y_test, -1)
x_test = x_test[:1000]
y_test = y_test[:1000]
print("# training samples: {}; # validation samples: {}".format(len(x_train), len(x_test)))
BATCHES_PER_EPOCH = len(x_train) // BATCHSIZE
def generator(x, y, return_positions = False):
while True:
for i in range(0, len(x), BATCHSIZE):
x_batch = x[i:i+BATCHSIZE] # DONE
print(x_batch)
y_batch =y[i:i+BATCHSIZE] # DONE
yield(pad_sequences(x_batch), y_batch)
def build_gru_model():
embedding_L = Embedding(input_dim = VOCABSIZE, output_dim = EMBSIZE, mask_zero = True)
gru_L = Bidirectional(GRU(units = HIDDENSIZE // 2))
output_L = Dense(units = 1, activation = "sigmoid")
return Sequential([embedding_L, dropout_L, gru_L, dropout_L, output_L])
def build_cnn_model():
#DONE
embedding_L = Embedding(input_dim = VOCABSIZE, output_dim = EMBSIZE, mask_zero = False)
dropout_L = Dropout(0.25)
#Posmotret
conv_L = Convolution1D(convolution_num_filters, convolution_filter_length, input_length=EMBSIZE,
input_dim=EMBSIZE, border_mode='same', activation='tanh')(inp)
maxpool_L = MaxPooling1D(pool_length=sequence_length)(conv)
dropout_L = Dropout(0.25)
output_L = Dense(units = 1, activation = "sigmoid")
return Sequential([embedding_L, dropout_L, conv_L, maxpool_L, dropout_L, output_L])
def build_emb_model():
embedding_L = Embedding(input_dim = VOCABSIZE, output_dim = EMBSIZE, mask_zero = False)
dropout_L = Dropout(0.25)
avaragepool_L = AveragePooling1D(pool_length=2, stride=None, border_mode='valid')
dropout_L = Dropout(0.25)
output_L = Dense(units = 1, activation = "sigmoid")
return Sequential([embedding_L, dropout_L, avaragepool_L , dropout_L, output_L])
# Done
def train_model(model, x_train, y_train, x_test, y_test):
sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer="sgd", metrics=['accuracy']) # Done
earlystop = EarlyStopping(monitor = "val_acc", patience = 7)
history = model.fit_generator(generator(x_train, y_train),
steps_per_epoch = BATCHES_PER_EPOCH,
validation_data = (pad_sequences(x_test), y_test),
epochs = MAXEPOCHS, callbacks = [earlystop])
return history.history
models = {}
#uncomment all models that you have implemented:
#models["gru+attn"] = build_gru_attn_model()
models["gru"] = build_gru_model()
#models["cnn"] = build_cnn_model()
#models["emb"] = build_emb_model()
histories = {}
traintimes = {}
for name in sorted(models.keys()):
print("Training", name)
before = time.time()
histories[name] = train_model(models[name], x_train, y_train, x_test, y_test)
duration = time.time() - before
traintimes[name] = duration / len(histories[name]["loss"]) / BATCHES_PER_EPOCH
aW1wb3J0IG51bXB5IGFzIG5wCm5wLnJhbmRvbS5zZWVkKDEyMykKZnJvbSBrZXJhcy5kYXRhc2V0cyBpbXBvcnQgaW1kYgpmcm9tIGtlcmFzLnByZXByb2Nlc3Npbmcuc2VxdWVuY2UgaW1wb3J0IHBhZF9zZXF1ZW5jZXMKZnJvbSBrZXJhcy5sYXllcnMgaW1wb3J0ICoKZnJvbSBrZXJhcy5tb2RlbHMgaW1wb3J0IFNlcXVlbnRpYWwsIE1vZGVsCmZyb20ga2VyYXMuY2FsbGJhY2tzIGltcG9ydCBFYXJseVN0b3BwaW5nCmZyb20ga2VyYXMgaW1wb3J0IG9wdGltaXplcnMKaW1wb3J0IG1hdHBsb3RsaWIucHlwbG90IGFzIHBsb3QKaW1wb3J0IHRpbWUKCgpNQVhMRU4gPSA1MDAKQkFUQ0hTSVpFID0gMTYKRU1CU0laRSA9IDUwCkhJRERFTlNJWkUgPSA1MApLRVJORUxTSVpFID0gNQpWT0NBQlNJWkUgPSAxMDAwMAoKTUFYRVBPQ0hTID0gMjAKCgoKKHhfdHJhaW4sIHlfdHJhaW4pLCAoeF90ZXN0LCB5X3Rlc3QpID0gaW1kYi5sb2FkX2RhdGEocGF0aCA9ICJpbWRiLm5weiIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG51bV93b3JkcyA9IFZPQ0FCU0laRSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgc2tpcF90b3AgPSAwLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtYXhsZW4gPSBNQVhMRU4sCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHN0YXJ0X2NoYXIgPSAxLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBvb3ZfY2hhciA9IDIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGluZGV4X2Zyb20gPSAzKQoKeV90cmFpbiA9IG5wLmV4cGFuZF9kaW1zKHlfdHJhaW4sIC0xKQp5X3Rlc3QgPSBucC5leHBhbmRfZGltcyh5X3Rlc3QsIC0xKQoKCgp4X3Rlc3QgPSB4X3Rlc3RbOjEwMDBdCnlfdGVzdCA9IHlfdGVzdFs6MTAwMF0KCnByaW50KCIjIHRyYWluaW5nIHNhbXBsZXM6IHt9OyAjIHZhbGlkYXRpb24gc2FtcGxlczoge30iLmZvcm1hdChsZW4oeF90cmFpbiksIGxlbih4X3Rlc3QpKSkKCkJBVENIRVNfUEVSX0VQT0NIID0gbGVuKHhfdHJhaW4pIC8vIEJBVENIU0laRQoKZGVmIGdlbmVyYXRvcih4LCB5LCByZXR1cm5fcG9zaXRpb25zID0gRmFsc2UpOgogICAgd2hpbGUgVHJ1ZToKICAgICAgICBmb3IgaSBpbiByYW5nZSgwLCBsZW4oeCksIEJBVENIU0laRSk6CiAgICAgICAgICAgIHhfYmF0Y2ggPSB4W2k6aStCQVRDSFNJWkVdICMgRE9ORQogICAgICAgICAgICBwcmludCh4X2JhdGNoKQogICAgICAgICAgICB5X2JhdGNoID15W2k6aStCQVRDSFNJWkVdICMgRE9ORQogICAgICAgICAgICAgICAgCiAgICAgICAgICAgIHlpZWxkKHBhZF9zZXF1ZW5jZXMoeF9iYXRjaCksIHlfYmF0Y2gpCgpkZWYgYnVpbGRfZ3J1X21vZGVsKCk6CiAgICBlbWJlZGRpbmdfTCA9IEVtYmVkZGluZyhpbnB1dF9kaW0gPSBWT0NBQlNJWkUsIG91dHB1dF9kaW0gPSBFTUJTSVpFLCBtYXNrX3plcm8gPSBUcnVlKQogICAgZ3J1X0wgPSBCaWRpcmVjdGlvbmFsKEdSVSh1bml0cyA9IEhJRERFTlNJWkUgLy8gMikpCiAgICBvdXRwdXRfTCA9IERlbnNlKHVuaXRzID0gMSwgYWN0aXZhdGlvbiA9ICJzaWdtb2lkIikKICAgIHJldHVybiBTZXF1ZW50aWFsKFtlbWJlZGRpbmdfTCwgZHJvcG91dF9MLCBncnVfTCwgZHJvcG91dF9MLCBvdXRwdXRfTF0pCgpkZWYgYnVpbGRfY25uX21vZGVsKCk6CiAgICAjRE9ORQogICAgZW1iZWRkaW5nX0wgPSBFbWJlZGRpbmcoaW5wdXRfZGltID0gVk9DQUJTSVpFLCBvdXRwdXRfZGltID0gRU1CU0laRSwgbWFza196ZXJvID0gRmFsc2UpCiAgICBkcm9wb3V0X0wgPSBEcm9wb3V0KDAuMjUpCiAgICAjUG9zbW90cmV0CiAgICBjb252X0wgPSBDb252b2x1dGlvbjFEKGNvbnZvbHV0aW9uX251bV9maWx0ZXJzLCBjb252b2x1dGlvbl9maWx0ZXJfbGVuZ3RoLCBpbnB1dF9sZW5ndGg9RU1CU0laRSwKICAgICAgICAgICAgICAgICAgICAgICAgIGlucHV0X2RpbT1FTUJTSVpFLCBib3JkZXJfbW9kZT0nc2FtZScsIGFjdGl2YXRpb249J3RhbmgnKShpbnApCiAgICBtYXhwb29sX0wgPSBNYXhQb29saW5nMUQocG9vbF9sZW5ndGg9c2VxdWVuY2VfbGVuZ3RoKShjb252KQogICAgZHJvcG91dF9MID0gRHJvcG91dCgwLjI1KQogICAgb3V0cHV0X0wgPSBEZW5zZSh1bml0cyA9IDEsIGFjdGl2YXRpb24gPSAic2lnbW9pZCIpCiAgICByZXR1cm4gU2VxdWVudGlhbChbZW1iZWRkaW5nX0wsIGRyb3BvdXRfTCwgY29udl9MLCBtYXhwb29sX0wsIGRyb3BvdXRfTCwgb3V0cHV0X0xdKQoKCmRlZiBidWlsZF9lbWJfbW9kZWwoKToKICAgIGVtYmVkZGluZ19MID0gRW1iZWRkaW5nKGlucHV0X2RpbSA9IFZPQ0FCU0laRSwgb3V0cHV0X2RpbSA9IEVNQlNJWkUsIG1hc2tfemVybyA9IEZhbHNlKQogICAgZHJvcG91dF9MID0gRHJvcG91dCgwLjI1KQogICAgYXZhcmFnZXBvb2xfTCA9IEF2ZXJhZ2VQb29saW5nMUQocG9vbF9sZW5ndGg9Miwgc3RyaWRlPU5vbmUsIGJvcmRlcl9tb2RlPSd2YWxpZCcpCiAgICBkcm9wb3V0X0wgPSBEcm9wb3V0KDAuMjUpCiAgICBvdXRwdXRfTCA9IERlbnNlKHVuaXRzID0gMSwgYWN0aXZhdGlvbiA9ICJzaWdtb2lkIikKICAgIHJldHVybiBTZXF1ZW50aWFsKFtlbWJlZGRpbmdfTCwgZHJvcG91dF9MLCBhdmFyYWdlcG9vbF9MICwgZHJvcG91dF9MLCBvdXRwdXRfTF0pCiAgICAjIERvbmUKCmRlZiB0cmFpbl9tb2RlbChtb2RlbCwgeF90cmFpbiwgeV90cmFpbiwgeF90ZXN0LCB5X3Rlc3QpOgogICAgc2dkID0gb3B0aW1pemVycy5TR0QobHI9MC4wMSwgZGVjYXk9MWUtNiwgbW9tZW50dW09MC45LCBuZXN0ZXJvdj1UcnVlKQogICAgbW9kZWwuY29tcGlsZShsb3NzPSdjYXRlZ29yaWNhbF9jcm9zc2VudHJvcHknLCBvcHRpbWl6ZXI9InNnZCIsIG1ldHJpY3M9WydhY2N1cmFjeSddKSAjIERvbmUKICAgIAogICAgZWFybHlzdG9wID0gRWFybHlTdG9wcGluZyhtb25pdG9yID0gInZhbF9hY2MiLCBwYXRpZW5jZSA9IDcpCiAgICAKICAgIGhpc3RvcnkgPSBtb2RlbC5maXRfZ2VuZXJhdG9yKGdlbmVyYXRvcih4X3RyYWluLCB5X3RyYWluKSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHN0ZXBzX3Blcl9lcG9jaCA9IEJBVENIRVNfUEVSX0VQT0NILAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdmFsaWRhdGlvbl9kYXRhID0gKHBhZF9zZXF1ZW5jZXMoeF90ZXN0KSwgeV90ZXN0KSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGVwb2NocyA9IE1BWEVQT0NIUywgY2FsbGJhY2tzID0gW2Vhcmx5c3RvcF0pCiAgICAKICAgIHJldHVybiBoaXN0b3J5Lmhpc3RvcnkKCm1vZGVscyA9IHt9CgojdW5jb21tZW50IGFsbCBtb2RlbHMgdGhhdCB5b3UgaGF2ZSBpbXBsZW1lbnRlZDoKI21vZGVsc1siZ3J1K2F0dG4iXSA9IGJ1aWxkX2dydV9hdHRuX21vZGVsKCkKbW9kZWxzWyJncnUiXSA9IGJ1aWxkX2dydV9tb2RlbCgpCiNtb2RlbHNbImNubiJdID0gYnVpbGRfY25uX21vZGVsKCkKI21vZGVsc1siZW1iIl0gPSBidWlsZF9lbWJfbW9kZWwoKQoKaGlzdG9yaWVzID0ge30KdHJhaW50aW1lcyA9IHt9Cgpmb3IgbmFtZSBpbiBzb3J0ZWQobW9kZWxzLmtleXMoKSk6CiAgICBwcmludCgiVHJhaW5pbmciLCBuYW1lKQogICAgYmVmb3JlID0gdGltZS50aW1lKCkKICAgIGhpc3Rvcmllc1tuYW1lXSA9IHRyYWluX21vZGVsKG1vZGVsc1tuYW1lXSwgeF90cmFpbiwgeV90cmFpbiwgeF90ZXN0LCB5X3Rlc3QpCiAgICBkdXJhdGlvbiA9IHRpbWUudGltZSgpIC0gYmVmb3JlCiAgICB0cmFpbnRpbWVzW25hbWVdID0gZHVyYXRpb24gLyBsZW4oaGlzdG9yaWVzW25hbWVdWyJsb3NzIl0pIC8gQkFUQ0hFU19QRVJfRVBPQ0g=