Sandya Subramanian
HST (Health Sciences and Technology), 2nd year Ph.D.
First to import the data....
#Pkg.add("CSV")
#Pkg.add("Plots")
#Pkg.add("TimeSeries")
#Pkg.add("Images")
#Pkg.add("Plotly")
using CSV, Plots, Images #, TimeSeries
using Knet, AutoGrad
using Knet: sigm_dot, tanh_dot
using StatsBase
Knet.gpu(0)
And get all the helper code I wrote...
include("findpeaks.jl")
include("pan_tompkins.jl")
include("run_pan_tompkins.jl")
include("get_breaks.jl")
include("get_dataset.jl")
include("get_comb_dataset.jl")
include("get_traintest.jl")
I decided to start off with something simple. I know cardiologsts can classify beats pretty easily by eye, and most student with a few hours of training can at least tell if something is abnormal at the least, so it can't be THAT hard of a problem. However, I wasn't sure if it would be easy enough for a non-deep learning network. I used MLP, and played around with the number of layers, starting at 1 and increasing as I saw improving accuracy. In the end, I used 4 layers.
function predict(w,x)
for i=1:2:length(w)
#Check x type
if isa(x,Array)
x = w[i]*KnetArray(mat(x)) .+ w[i+1]
else
x = w[i]*mat(x) .+ w[i+1]
end
if i<length(w)-1
x = relu.(x) # max(0,x)
end
end
return x
end
loss(w,x,ygold) = nll(predict(w,x),ygold)
lossgradient = grad(loss)
function train(w, dtrn; lr=.5, epochs=10)
for epoch=1:epochs
for (x,y) in dtrn
g = lossgradient(w, x, y)
update!(w,g;lr=lr)
end
end
return w
end
function weights(h...; atype=Array{Float32}, winit=0.1, mode="binary")
w = Any[]
x = Int(2*360+1)
if mode == "binary"
sizes = [h..., 2]
else
sizes = [h..., 8]
end
for y in sizes
push!(w, convert(atype, winit*randn(y,x)))
push!(w, convert(atype, zeros(y, 1)))
x = y
end
return w
end
function prep_dataset(subj_list,fs)
abn_dataset, full_dataset, abn_truth_cats, full_bin_cats, label_key = get_comb_dataset(subj_list,fs)
println(countmap(abn_truth_cats))
println(countmap(full_bin_cats))
xtst_mc, ytst_mc, xtrn_mc, ytrn_mc, xtst_bin, ytst_bin, xtrn_bin, ytrn_bin = get_traintest(abn_dataset,full_dataset,abn_truth_cats,full_bin_cats,0.1)
return xtst_mc, ytst_mc, xtrn_mc, ytrn_mc, xtst_bin, ytst_bin, xtrn_bin, ytrn_bin
end
function main(numepochs,mode,arraytype)
args = Dict{String,Any}()
args["mode"] = mode; #binary or multiclass
args["seed"] = -1 #random number seed: use a nonnegative int for repeatable results
args["batchsize"] = 50 #minibatch size
args["epochs"] = numepochs #number of epochs for training
args["hidden"] = [128, 256, 128, 64] #sizes of hidden layers
args["lr"] = 0.1 #learning rate
args["winit"] = 0.1 #w initialized with winit*randn()
args["fast"] = false #skip loss printing for faster run
if arraytype == "Array"
args["atype"] = "Array{Float32}"
else
args["atype"] = "KnetArray{Float32}"
end
args["gcheck"] = 0 #check N random gradients per parameter
if !args["fast"]
println("opts=",[(k,v) for (k,v) in args]...)
end
args["seed"] > 0 && srand(args["seed"])
atype = eval(parse(args["atype"]))
w = weights(args["hidden"]...; atype=atype, winit=args["winit"], mode=args["mode"])
#xtst_mc, ytst_mc, xtrn_mc, ytrn_mc, xtst_bin, ytst_bin, xtrn_bin, ytrn_bin = prep_dataset([207,212,203,209,201],360)
xtst_mc, ytst_mc, xtrn_mc, ytrn_mc, xtst_bin, ytst_bin, xtrn_bin, ytrn_bin = prep_dataset([207, 212, 203, 209, 201, 202, 205, 208, 210, 213, 220, 221, 222, 230, 111, 112, 113, 114, 115, 116, 117, 118, 119, 121, 122, 123, 124, 100, 101, 103, 104, 106, 108, 109, 232, 233, 234],360)
if args["mode"] == "multiclass"
println("Multi-class classification")
dtrn = minibatch(xtrn_mc, ytrn_mc, args["batchsize"])
dtst = minibatch(xtst_mc, ytst_mc, args["batchsize"])
else
println("Binary classification")
dtrn = minibatch(xtrn_bin, ytrn_bin, args["batchsize"])
dtst = minibatch(xtst_bin, ytst_bin, args["batchsize"])
end
report(epoch)=println((:epoch,epoch,:trn,accuracy(w,dtrn,predict),:tst,accuracy(w,dtst,predict)))
if args["fast"]
(train(w, dtrn; lr=args["lr"], epochs=args["epochs"]); gpu()>=0 && Knet.cudaDeviceSynchronize())
else
report(0)
@time for epoch=1:args["epochs"]
train(w, dtrn; lr=args["lr"], epochs=1)
report(epoch)
if args["gcheck"] > 0
gradcheck(loss, w, first(dtrn)...; gcheck=args["gcheck"], verbose=true)
end
end
end
return w, dtrn, dtst
end
Multi-class classification
w, dtrn, dtst = main(50,"multiclass","KnetArray")
Whoohoo! The accuracy is over 98.5% on the test set after just 50 epochs!
Binary classification
w, dtrn, dtst = main(50,"binary","KnetArray")
Here as well, the accuracy is over 97% after 50 epochs. Both results are highly successful and promising.