In [None]:
# Let's build a Deep Neural Network using TensorFlow!
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf

In [None]:
# If we have a GPU we add this line
# with tf.device("/gpu:0"):

In [None]:
# Initiate a TensorFlow session
sess = tf.Session()

In [None]:
# Initialize parameters for our DNN
N = 100 # Number of predictors (input neurons)
D = 4 # Number of classes (output neurons)
H = [72, 48, 24] # Sizes of the hidden layers
L = [N] + H + [D] # The DNN architecture

In [None]:
# Read in the data files
# Define 'basedir' as wherever your data files are!
# basedir = '/Users/will/Desktop/'
files = pd.read_table(basedir + 'training_files.par', delim_whitespace=True, comment='#', names=['stdim', 'filename'])
tp = [pd.read_table(basedir + file, delim_whitespace=True, names=range(N), dtype=int) for file in files['filename'].values]
ltp = [len(p) for p in tp]
training_predictors = pd.concat(tp).reset_index(drop=True)

ser=[]
total = 0
training_classes = pd.DataFrame()
for i, stdim in enumerate(files['stdim'].values):
    if not -stdim in training_classes.columns:
        training_classes[-stdim] = pd.concat([pd.Series([0 for x in range(0, total)]),
            pd.Series([1 for x in range(total, total + ltp[i])]), pd.Series([0 for x in range(ltp[i], len(training_predictors))])]).reset_index(drop=True)
    else:
        training_classes[-stdim][total:(total+ltp[i])] = 1
    total += ltp[i]

In [None]:
print(training_predictors.head(10))

In [None]:
print(training_classes.head(10))

In [None]:
# Randomly shuffle data
shuffled_data = pd.concat([training_predictors, training_classes], axis=1).sample(frac=1).reset_index(drop=True)
training_predictors = shuffled_data[shuffled_data.columns[:N]]
training_classes = shuffled_data[shuffled_data.columns[-D:]]

In [None]:
print(training_classes.head(10))

In [None]:
# The first 80% of the data will be for training and the last 20% for testing
training_size = int(len(training_predictors) * 0.8)
test_size = len(training_predictors) - training_size

In [None]:
# Function to return weights/biases
# Useful for multiple hidden layers
def get_wb(num_inputs, num_outputs):
    # Weights initialized randomly
    weights = tf.Variable(tf.truncated_normal([num_inputs, num_outputs], stddev = 0.0001))
    # Biases initialized to all 1
    biases = tf.Variable(tf.ones([num_outputs]))
    return weights, biases

In [None]:
# Function to return hidden layers
# We use a rectified linear unit (ReLU) instead of softmax
# This speeds up convergence
def get_hl(data, weights, biases):
    layers = []
    for i in range(len(weights) - 1):
        # We use a linear function at each layer, adjusted by the ReLU
        layers.append(tf.nn.relu(tf.matmul(data, weights[i]) + biases[i]))
        data = layers[i]
    # The final output is sent to a softmax function
    # so it can be interpreted as a set of probabilities
    model = tf.nn.softmax(tf.matmul(layers[-1], weights[-1]) + biases[-1])
    return model, layers

In [None]:
# Get all weight/bias data
W = []
b = []
for i in range(len(L) - 1):
    w0, b0 = get_wb(L[i], L[i+1])
    W.append(w0)
    b.append(b0)

In [None]:
# TensorFlow uses 'placeholders' for inputs/outputs
predictors = tf.placeholder("float", [None, N])
classes = tf.placeholder("float", [None, D])

In [None]:
# Get hidden layers and model
model, _ = get_hl(predictors, W, b)

In [None]:
# Cost function (cross-entropy)
cost = -tf.reduce_sum(classes * tf.log(tf.clip_by_value(model, 1e-10, 1.0)))

In [None]:
# Training operation (Adaptive Momentum)
op = tf.train.AdamOptimizer(learning_rate=0.00005).minimize(cost)
#op = tf.train.GradientDescentOptimizer(0.0001).minimize(cost)
#op = tf.train.AdadeltaOptimizer(learning_rate=0.00005).minimize(cost)

In [None]:
# Accuracy function
correct_prediction = tf.equal(tf.argmax(model, 1), tf.argmax(classes, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

In [None]:
# Start the TensorFlow session
init = tf.global_variables_initializer()
sess.run(init)

In [None]:
# Train the DNN
T = 3000 # Number of training steps

# Batch size
# In each epoch we train with this many data samples
B = 1024

# Accuracy data
acc_data = []

print('Epoch \t Cost \t\t Accuracy')
print('-----------------------------------')
for i in range(1, T + 1):
    # Pick a sequential set of 'B' samples
    offset = np.random.randint(training_size - B)
    batch_predictors = training_predictors[offset:(offset+B)]
    batch_classes = training_classes[offset:(offset+B)]
    _, curr_cost = sess.run([op, cost], feed_dict={predictors: batch_predictors, classes: batch_classes})
    
    if i % 100 == 0:
        acc = sess.run(accuracy, feed_dict={predictors: training_predictors[:training_size], 
                                                                      classes: training_classes[:training_size]})
        print(i, '\t', curr_cost, '\t', acc)
        acc_data.append(acc)

In [None]:
# Plot data
plt.plot(acc_data)
plt.show()

In [None]:
# What's really happening?
for i in range(1, 5):
    offset = np.random.randint(training_size)
    single_predictor = training_predictors[offset:offset+1]
    single_class = training_classes[offset:offset+1]
    _, curr_model, curr_cost, am, cp = sess.run([op, model, cost, tf.argmax(model, 1), \
        tf.cast(correct_prediction, "float")], feed_dict={predictors: single_predictor, classes: single_class})
    
    prediction = [0, 0, 0]
    prediction[am[0]] = 1
    print(curr_model, '-->', prediction, '==?', single_class.values, ' ', cp)

In [None]:
# Test on the data not used for training
if test_size > 0:
    feed_dict = {predictors: training_predictors[-test_size:], classes: training_classes[-test_size:]}
    print('\nAccuracy:', sess.run(accuracy, feed_dict))

In [None]:
# All done!
# sess.close()