Sandya Subramanian
HST (Health Sciences and Technology), 2nd year Ph.D.
using CSV, Plots, Images
using Knet, AutoGrad
using Knet: sigm_dot, tanh_dot
using StatsBase
Knet.gpu(0)
include("findpeaks.jl")
include("pan_tompkins.jl")
include("run_pan_tompkins.jl")
include("get_breaks.jl")
include("get_dataset.jl")
include("get_comb_dataset.jl")
include("get_traintest.jl")
After seeing that classification was really successful with just MLP, I decided to try LSTM just to see if it made a difference to have the time dependencies already encoded. Since ECG is a very regular signal in time, one would expect that temporal dependencies really matter.
#Task 1: Create dictionary
function createVocabulary(mode)
vocab = Dict{Int64,Int64}()
# Depends on if binary or multiclass
if mode == "binary"
for i = 1:2
vocab[i] = i
end
else
for i = 1:8
vocab[i] = i
end
end
return vocab
end
#LSTM Network Function
function lstm(weight,bias,hidden,cell,input)
gates = hcat(input,hidden) * weight .+ bias
hsize = size(hidden,2) #size of layer
forget = sigm_dot(gates[:,1:hsize]) #Assumed size H+I * 4H
ingate = sigm_dot(gates[:,1+hsize:2hsize])
outgate = sigm_dot(gates[:,1+2hsize:3hsize])
change = tanh_dot(gates[:,1+3hsize:end])
cell = cell .* forget + ingate .* change
hidden = outgate .* tanh_dot(cell)
return (hidden,cell)
end
#Task 2: Create Initial Weights
function initweights(lenhidden, lenvocab, embed, arraytype)
init(d...) = xavier(d...)
bias(d...) = zeros(d...)
tmp_model = Vector{Array{Float32}}(2*length(lenhidden)+2) #2*num layers + 2
X = embed #size of x input (721)
for k = 1:length(lenhidden)
tmp_model[2k-1] = init(X+lenhidden[k],4*lenhidden[k]) #Each element of the Vector is a matrix itself
tmp_model[2k] = bias(1,4*lenhidden[k])
#Combine all the weights
#Biases separately
X = lenhidden[k] #Replace previous layer size
end
#INPUT
#tmp_model[end-2] = init(lenvocab,embed) #Size of vocab by size of input x?
#OUTPUT - ok as is
tmp_model[end-1] = init(lenhidden[end],lenvocab) #Size of last layer by number of words - for output?
tmp_model[end] = bias(1,lenvocab) #Zero vector for each word?
if arraytype == "KnetArray"
model = Vector{KnetArray{Float32}}(2*length(lenhidden)+2)
for k = 1:length(tmp_model)
model[k] = KnetArray{Float32}(tmp_model[k]);
end
else
model = copy(tmp_model);
end
return model
end
#Task 3: Create Initial State
function initstate(model, batch)
#Check what type to use
if typeof(model[1]) == Array{Float64,2}
arraytype = "Array"
else
arraytype = "KnetArray"
end
nlayers = div(length(model)-2,2)
state = Vector{Any}(2*nlayers)
for k = 1:nlayers
#Get length of layer
H = div(size(model[2k],2),4)
state[2k-1] = state[2k] = zeros(batch,H)
#cell and hidden for each layer both vectors length H
end
if arraytype == "KnetArray"
for k = 1:length(state)
state[k] = KnetArray{Float32}(state[k]);
end
end
return state
end
#Task 4: Create Predict function
function predict(model, state, input; pdrop=0)
input = KnetArray(input');
nlayers = div(length(model)-2,2)
newstate = similar(state)
for k = 1:nlayers
#Run through by selecting the right elements corresponding to that layer
#state[2k-1] -> hidden
#state[2k] -> cell
#model[2k-1] -> weights
#model[2k] -> bias
hidden, cell = lstm(model[2k-1],model[2k],state[2k-1],state[2k],input)
newstate[2k-1] = hidden
newstate[2k] = cell
input = hidden
end
output = input * model[end-1] .+ model[end] #To get outputs
return output,newstate
end
function accuracy_LSTM(model,state,data,p; average=true)
sum = cnt = 0
for (x,y) in data
sum += accuracy(p(model,state,x)[1]',y)
cnt += 1
end
average ? sum / cnt : sum
end
#Task 5: Create Loss Function
function loss(model, state, x_batch, y; newstate=nothing, pdrop=0)
preds,newstate = predict(model,state,x_batch)
if newstate != nothing
copy!(newstate, map(AutoGrad.getval,state))
end
return nll(preds',y)
end
#Task 6: Create Train Function
function train(model, dtrn, optim, lossgradient, options; pdrop=0)
state = initstate(model, options["batchsize"])
for (x,y) in dtrn
#temp = model[1]
grads = lossgradient(model, state, x, y; newstate=state, pdrop=pdrop)
update!(model, grads, optim)
#println(sum(model[1]-temp))
end
return model
end
function prep_dataset(subj_list,fs)
abn_dataset, full_dataset, abn_truth_cats, full_bin_cats, label_key = get_comb_dataset(subj_list,fs)
println(countmap(abn_truth_cats))
println(countmap(full_bin_cats))
xtst_mc, ytst_mc, xtrn_mc, ytrn_mc, xtst_bin, ytst_bin, xtrn_bin, ytrn_bin = get_traintest(abn_dataset,full_dataset,abn_truth_cats,full_bin_cats,0.1)
return xtst_mc, ytst_mc, xtrn_mc, ytrn_mc, xtst_bin, ytst_bin, xtrn_bin, ytrn_bin
end
function setup(numepochs,lenhidden,mode,arraytype)
#Options
lenhidden = lenhidden; # Sizes of one or more LSTM layers.
options1 = Dict{String,Any}()
options1["togenerate"] = Int64(4000) # If non-zero generate given number of characters.
options1["epochs"] = Int64(numepochs) # Number of epochs for training.
options1["embed"] = Int64(721) # Size of the embedding vector.
options1["batchsize"] = Int64(50) # Number of sequences to train on in parallel
# options1["seqlength"] = Int64(721) # Maximum number of steps to unroll the network for bptt. Initial epochs will use the epoch number as bptt length for faster convergence.
options1["seed"] = -1 # Random number seed. -1 or 0 is no fixed seed
options1["lr"] = 0.001 # Initial learning rate
options1["gclip"] = 3.0 # Value to clip the gradient norm at.
options1["dpout"] = 0.0 # Dropout probability.
#options1["seed"] > 0 && srand(options1["seed"])
# Prep dataset
#xtst_mc, ytst_mc, xtrn_mc, ytrn_mc, xtst_bin, ytst_bin, xtrn_bin, ytrn_bin = prep_dataset([207,212,203,209,201],360)
xtst_mc, ytst_mc, xtrn_mc, ytrn_mc, xtst_bin, ytst_bin, xtrn_bin, ytrn_bin = prep_dataset([207, 212, 203, 209, 201, 202, 205, 208, 210, 213, 220, 221, 222, 230, 111, 112, 113, 114, 115, 116, 117, 118, 119, 121, 122, 123, 124, 100, 101, 103, 104, 106, 108, 109, 232, 233, 234],360)
signal = [xtst_mc, ytst_mc, xtrn_mc, ytrn_mc, xtst_bin, ytst_bin, xtrn_bin, ytrn_bin]
vocab = createVocabulary(mode);
info("$(length(vocab)) unique values.") # The output should be 75 unique chars for input.txt
#Now, let's compute the accuracy of a random model
model = initweights(lenhidden, length(vocab), options1["embed"], arraytype);
return options1, lenhidden, signal, vocab, model
end
function getbatches(options1,signal,mode)
#Now we are ready. First, let's see the intial loss.
#Feed input matrix based on mode
if mode == "binary"
dtrn = minibatch(signal[7], signal[8], options1["batchsize"]);
dtst = minibatch(signal[5], signal[6], options1["batchsize"]);
else
dtrn = minibatch(signal[3], signal[4], options1["batchsize"]);
dtst = minibatch(signal[1], signal[2], options1["batchsize"]);
end
return dtrn,dtst
end
function setuptrain(options1, dtrn, dtst, model)
# Knet magic
lossgradient = grad(loss,1);
# Print the loss of train and test sets with random model.
firststate = initstate(model, options1["batchsize"])
println((:epoch,0,:train,accuracy_LSTM(model,firststate,dtrn,predict),:test,accuracy_LSTM(model,firststate,dtst,predict)))
#Below is the training part of RNN (with Adam)
optim = map(x->Adam(lr=options1["lr"], gclip=options1["gclip"]), model);
return lossgradient, optim
end
function trainloop(options1, vocab, dtrn, dtst, lossgradient, optim, model)
# MAIN LOOP
firststate = initstate(model, options1["batchsize"])
for epoch=1:options1["epochs"]
@time train(model, dtrn, optim, lossgradient, options1; pdrop=options1["dpout"])
# Calculate and print the losses after each epoch
println((:epoch,epoch,:train,accuracy_LSTM(model,firststate,dtrn,predict),:test,accuracy_LSTM(model,firststate,dtst,predict)))
end
return model
end
function main(numepochs,lenhidden,mode,arraytype)
options1, lenhidden, signal, vocab, model = setup(numepochs,lenhidden,mode,arraytype)
dtrn,dtst = getbatches(options1, signal, mode)
lossgradient, optim = setuptrain(options1, dtrn, dtst, model)
model = trainloop(options1, vocab, dtrn, dtst, lossgradient, optim, model)
end
Binary Classification
Interestingly enough, I was able to achieve the same high accuracy but with far fewer layers. Perhaps the inherent structure of the LSTM simplifies the number of layers necessary.
main(100,[128],"binary","KnetArray")
Multi-class classification
Here I needed 3 layers, but still achieved the same high accuracy as with MLP. Perhaps the time dependencies between different types of arrhyhtmias are less straightforward, which makes sense.
main(100,[128,256,64],"multiclass","KnetArray")