|
| 1 | +import pickle |
| 2 | +import random |
| 3 | +import keras |
| 4 | +import argparse |
| 5 | +import numpy as np |
| 6 | +import tensorflow as tf |
| 7 | + |
| 8 | +from sklearn.model_selection import StratifiedKFold |
| 9 | +from sklearn.metrics import average_precision_score as auprc |
| 10 | +from sklearn.metrics import roc_auc_score as auc_score |
| 11 | +from keras.utils import multi_gpu_model |
| 12 | +from keras.layers import Input, Dense, Masking, GRU, Dropout, Lambda, Permute |
| 13 | +from keras.models import load_model, Model, Sequential |
| 14 | +from interpolation_layer import single_channel_interp, cross_channel_interp |
| 15 | + |
| 16 | +np.random.seed(10) |
| 17 | +tf.set_random_seed(10) |
| 18 | + |
| 19 | +# Loading dataset |
| 20 | +""" |
| 21 | +y : (N,) discrete for classification, real values for regression |
| 22 | +x : (N, D, tn) input multivariate time series data with dimension |
| 23 | + where N is number of data cases, D is the dimension of |
| 24 | + sparse and irregularly sampled time series and tn is the union |
| 25 | + of observed time stamps in all the dimension for a data case n. |
| 26 | + Since each tn is of variable length, we pad them with zeros to |
| 27 | + have an array representation. |
| 28 | +m : (N, D, tn) where m[i,j,k] = 0 means that x[i,j,k] is not observed. |
| 29 | +T : (N, D, tn) represents the actual time stamps of observation; |
| 30 | +""" |
| 31 | + |
| 32 | +"""To implement the autoencoder component of the loss, we introduce a set |
| 33 | +of masking variables mr (and mr1) for each data point. If drop_mask = 0, then we remove |
| 34 | +the data point as an input to the interpolation network, and include |
| 35 | +the predicted value at this time point when assessing |
| 36 | +the autoencoder loss. In practice, we randomly select 20% of the |
| 37 | +observed data points to hold out from |
| 38 | +every input time series.""" |
| 39 | + |
| 40 | + |
| 41 | +def drop_mask(mask, perc=0.2): |
| 42 | + drop_mask = np.ones_like(mask) |
| 43 | + drop_mask *= mask |
| 44 | + for i in range(mask.shape[0]): |
| 45 | + for j in range(mask.shape[1]): |
| 46 | + count = np.sum(mask[i, j], dtype='int') |
| 47 | + if int(0.20*count) > 1: |
| 48 | + index = 0 |
| 49 | + r = np.ones((count, 1)) |
| 50 | + b = np.random.choice(count, int(0.20*count), replace=False) |
| 51 | + r[b] = 0 |
| 52 | + for k in range(mask.shape[2]): |
| 53 | + if mask[i, j, k] > 0: |
| 54 | + drop_mask[i, j, k] = r[index] |
| 55 | + index += 1 |
| 56 | + return drop_mask |
| 57 | + |
| 58 | + |
| 59 | +x = np.concatenate((x, m, T, drop_mask(m)), axis=1) # input format |
| 60 | +print(x.shape, y.shape) |
| 61 | + |
| 62 | +ap = argparse.ArgumentParser() |
| 63 | +ap.add_argument("-g", "--gpus", type=int, default=4, |
| 64 | + help="# of GPUs to use for training") |
| 65 | +ap.add_argument("-batch", "--batch_size", type=int, default=256, |
| 66 | + help="# batch size to use for training") |
| 67 | +ap.add_argument("-e", "--epochs", type=int, default=100, |
| 68 | + help="# of epochs for training") |
| 69 | +ap.add_argument("-ref", "--reference_points", type=int, |
| 70 | + default=192, help="# of reference points") |
| 71 | +ap.add_argument("-units", "--hidden_units", type=int, |
| 72 | + default=100, help="# of hidden units") |
| 73 | +ap.add_argument("-hfadm", "--hours_from_adm", type=int, |
| 74 | + default=48, help="Hours of record to look at") |
| 75 | + |
| 76 | +args = vars(ap.parse_args()) |
| 77 | +gpu_num = args["gpus"] |
| 78 | +iter = args["epochs"] |
| 79 | +hid = args["hidden_units"] |
| 80 | +timestamp = x.shape[2] |
| 81 | +num_features = x.shape[1]/4 |
| 82 | +ref_points = args["reference_points"] |
| 83 | +hours_look_ahead = args["hours_from_adm"] |
| 84 | +if gpu_num > 0: |
| 85 | + batch = args["batch_size"]*gpu_num |
| 86 | +else: |
| 87 | + batch = args["batch_size"] |
| 88 | + |
| 89 | +# Autoencoder loss |
| 90 | + |
| 91 | + |
| 92 | +def customloss(ytrue, ypred): |
| 93 | + # standard deviation of each feature mentioned in paper for MIMIC_III data |
| 94 | + wc = np.array([3.33, 23.27, 5.69, 22.45, 14.75, 2.32, |
| 95 | + 3.75, 1.0, 98.1, 23.41, 59.32, 1.41]) |
| 96 | + wc.shape = (1, num_features) |
| 97 | + y = ytrue[:, :num_features, :] |
| 98 | + m2 = ytrue[:, 3*num_features:4*num_features, :] |
| 99 | + m2 = 1 - m2 |
| 100 | + m1 = ytrue[:, num_features:2*num_features, :] |
| 101 | + m = m1*m2 |
| 102 | + ypred = ypred[:, :num_features, :] |
| 103 | + x = (y - ypred)*(y - ypred) |
| 104 | + x = x*m |
| 105 | + count = tf.reduce_sum(m, axis=2) |
| 106 | + count = tf.where(count > 0, count, tf.ones_like(count)) |
| 107 | + x = tf.reduce_sum(x, axis=2)/count |
| 108 | + x = x/(wc**2) # dividing by standard deviation |
| 109 | + x = tf.reduce_sum(x, axis=1)/num_features |
| 110 | + return tf.reduce_mean(x) |
| 111 | + |
| 112 | + |
| 113 | +seed = 0 |
| 114 | +results = {} |
| 115 | +results['loss'] = [] |
| 116 | +results['auc'] = [] |
| 117 | +results['acc'] = [] |
| 118 | +results['auprc'] = [] |
| 119 | + |
| 120 | +# interpolation-prediction network |
| 121 | + |
| 122 | + |
| 123 | +def interp_net(): |
| 124 | + if gpu_num > 1: |
| 125 | + dev = "/cpu:0" |
| 126 | + else: |
| 127 | + dev = "/gpu:0" |
| 128 | + with tf.device(dev): |
| 129 | + main_input = Input(shape=(4*num_features, timestamp), name='input') |
| 130 | + sci = single_channel_interp(ref_points, hours_look_ahead) |
| 131 | + cci = cross_channel_interp() |
| 132 | + interp = cci(sci(main_input)) |
| 133 | + reconst = cci(sci(main_input, reconstruction=True), |
| 134 | + reconstruction=True) |
| 135 | + aux_output = Lambda(lambda x: x, name='aux_output')(reconst) |
| 136 | + z = Permute((2, 1))(interp) |
| 137 | + z = GRU(hid, activation='tanh', recurrent_dropout=0.2, dropout=0.2)(z) |
| 138 | + main_output = Dense(1, activation='sigmoid', name='main_output')(z) |
| 139 | + orig_model = Model([main_input], [main_output, aux_output]) |
| 140 | + if gpu_num > 1: |
| 141 | + model = multi_gpu_model(orig_model, gpus=gpu_num) |
| 142 | + else: |
| 143 | + model = orig_model |
| 144 | + print(orig_model.summary()) |
| 145 | + return model |
| 146 | + |
| 147 | + |
| 148 | +earlystop = keras.callbacks.EarlyStopping( |
| 149 | + monitor='val_loss', min_delta=0.0000, patience=20, verbose=0) |
| 150 | +callbacks_list = [earlystop] |
| 151 | + |
| 152 | +# 5-fold cross-validation |
| 153 | + |
| 154 | +i = 0 |
| 155 | +kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed) |
| 156 | +for train, test in kfold.split(np.zeros(len(y)), y): |
| 157 | + print("Running Fold:", i+1) |
| 158 | + model = interp_net() # re-initializing every time |
| 159 | + model.compile(optimizer='adam', loss={'main_output': 'binary_crossentropy', 'aux_output': customloss}, |
| 160 | + loss_weights={'main_output': 1., 'aux_output': 1.}, metrics={'main_output': 'accuracy'}) |
| 161 | + model.fit({'input': x[train]}, {'main_output': y[train], 'aux_output': x[train]}, |
| 162 | + batch_size=batch, callbacks=callbacks_list, nb_epoch=iter, validation_split=0.20, verbose=2) |
| 163 | + y_pred = model.predict(x[test], batch_size=batch) |
| 164 | + y_pred = y_pred[0] |
| 165 | + total_loss, score, reconst_loss, acc = model.evaluate( |
| 166 | + {'input': x[test]}, {'main_output': y[test], 'aux_output': x[test]}, batch_size=batch, verbose=0) |
| 167 | + results['loss'].append(score) |
| 168 | + results['acc'].append(acc) |
| 169 | + results['auc'].append(auc_score(y[test], y_pred)) |
| 170 | + results['auprc'].append(auprc(y[test], y_pred)) |
| 171 | + print(results) |
| 172 | + i += 1 |
0 commit comments