# # A simple routine to create a combination of several Gaussian distributions. # It requires the 'clusterGeneration' library to create random covariance matrices # and the mvtnorm library to create random numbers from multi-D Gaussians. # Here we only generate 2D variables though. # show.simple.data = function(r, xlim, ylim, nx=100, ny=100) { dims = dim(r$covars) nclass = dims[3] x = seq(xlim[1], xlim[2], length=nx) y = seq(ylim[1], ylim[2], length=ny) g = function(a,b, sig) { dmvnorm(x=cbind(a,b),sigma=sig) } im = array(0, dim=c(nx, ny)) for (i in seq(1, nclass, 1)) { z = outer(x-r$means[i, 1],y-r$means[i, 2], g, sig=r$covars[, , i]) im = im+z } image(x, y, im) return( list(im=im, x=x, y=y)) } simple.data = function (n=200, nclass=2) { # # Create simple data based on mixtures of Gaussians. The N gives the number of # random numbers (default 200) and nclass the number of independent classes # (Gaussians). # require(clusterGeneration) require(mvtnorm) # Center of Gaussians xpos = seq(-nclass*2, nclass*2, length=nclass) ypos = runif(nclass, min=-2*nclass, max=2*nclass) # Create random covariance matrices. covars = array(0, dim=c(2, 2, nclass)) for (i in 1:nclass) { # Create a random covariance matrix cov = genPositiveDefMat(2, covMethod="eigen", rangeVar=c(1, 10), lambdaLow=1, ratioLambda=10) # I divide the covariance matrix by 2 to reduce overlap a bit. covars[, , i] = cov$Sigma/2. # Draw random data x = rmvnorm(n=n, mean=c(xpos[i], ypos[i]), sigma=cov$Sigma) # Assign a class class.this = rep(i*1.0, n) if (i == 1) { data = x class = class.this } else { data = rbind(data, x) class = c(class, class.this) } } return (list(means=cbind(xpos, ypos), covars=covars, data=data, class=class)) } get.grid = function(r, nx=100, ny=100) { # For the visualisation of the classification we need to create # a 2D grid of x & y. xlim = range(r$data[, 1]) ylim = range(r$data[, 2]) x = seq(from=xlim[1], to=xlim[2], length=nx) y = seq(from=ylim[1], to=ylim[2], length=ny) z = matrix(c(rep(x,length(y)), rep(y, rep(length(x), length(y)))),,2) return (list(x=x, y=y, z=z)) } # # The following routines classify based on a random sample from the # above routine # # # First k nearest-neighbour methods. # knn.example = function (r, n=3, show.misclass=FALSE) { require(latticeExtra) # Get a grid for visualisation g = get.grid(r) # Run classification for the grid k = knn(r$data, g$z, as.matrix(r$class), k=n) # Run classification for the input data k.test = knn(r$data, r$data, as.matrix(r$class), k=n) # Quantify the mis-classification rate. diff.class = as.numeric(k.test) - as.numeric(r$class) i.misclass = which(abs(diff.class) > 0) n.misclass = length(i.misclass) f.misclass = n.misclass/length(r$class) z = matrix(as.numeric(k), length(g$x), length(g$y)) # Plot data. plot(r$data, col=r$class) contour(g$x, g$y, z, add=TRUE) # Overplot mis-classified objects if (show.misclass) { points(r$data[i.misclass, 1], r$data[i.misclass, 2], col=r$class[i.misclass], pch=19, cex=1.5) } # Create a visualisation of the results using lattice graphics: ## grid = expand.grid(x=g$x, y=g$y) ## grid$z = as.vector(k) ## p = levelplot( z ~ x+y, data=grid, col.regions=gray.colors(100)) ## plot.data = data.frame(xx = as.vector(r$data[, 1]), yy=as.vector(r$data[, 2]), ## class=r$class) ## p = p + as.layer(xyplot(yy ~ xx, group=class, data=plot.data), ## x.same=TRUE, y.same=TRUE) ## print(p) return (list(inds=i.misclass, num=n.misclass, frac=f.misclass)) } # # Carry out Linear Discriminant analysis of the above dataset. # lda.example = function (r, show.misclass=FALSE, ...) { g = get.grid(r, ...) l = lda(r$data, r$class) # Get the predictions from the test set l.test = predict(l, r$data) # Quantify the mis-classification rate. diff.class = as.numeric(l.test$class) - as.numeric(r$class) i.misclass = which(abs(diff.class) > 0) n.misclass = length(i.misclass) f.misclass = n.misclass/length(r$class) # Create predictions for the grid in x & y im = predict(l, g$z) z = matrix(im$class, length(g$x), length(g$y)) # Plot data. plot(r$data, col=r$class) contour(g$x, g$y, z, add=TRUE) # Overplot mis-classified objects if (show.misclass) { points(r$data[i.misclass, 1], r$data[i.misclass, 2], col=r$class[i.misclass], pch=19, cex=1.5) } # If you want to use lattice graphics - uncomment below: # grid = expand.grid(x=g$x, y=g$y) # grid$z = as.numeric(im$class) # p = contourplot( z ~ x+y, data=grid, col.regions=gray.colors(100)) # plot.data = data.frame(xx = as.vector(r$data[, 1]), yy=as.vector(r$data[, 2]), # class=r$class) # p = p + as.layer(xyplot(yy ~ xx, group=class, data=plot.data), # x.same=TRUE, y.same=TRUE) # print(p) return (list(inds=i.misclass, num=n.misclass, frac=f.misclass)) } qda.example = function (r, show.misclass=FALSE, ...) { # Carry out LDA of the above dataset. g = get.grid(r, ...) l = qda(r$data, r$class) # Get the predictions from the test set l.test = predict(l, r$data) # Quantify the mis-classification rate. diff.class = as.numeric(l.test$class) - as.numeric(r$class) i.misclass = which(abs(diff.class) > 0) n.misclass = length(i.misclass) f.misclass = n.misclass/length(r$class) im = predict(l, g$z) z = matrix(im$class, length(g$x), length(g$y)) # Plot data. plot(r$data, col=r$class) contour(g$x, g$y, z, add=TRUE) # Overplot mis-classified objects if (show.misclass) { points(r$data[i.misclass, 1], r$data[i.misclass, 2], col=r$class[i.misclass], pch=19, cex=1.5) } # Lattice graphics below here again ## grid = expand.grid(x=g$x, y=g$y) ## grid$z = as.numeric(im$class) ## p = contourplot( z ~ x+y, data=grid, col.regions=gray.colors(100)) ## plot.data = data.frame(xx = as.vector(r$data[, 1]), yy=as.vector(r$data[, 2]), ## class=r$class) ## p = p + as.layer(xyplot(yy ~ xx, group=class, data=plot.data), ## x.same=TRUE, y.same=TRUE) ## print(p) return (list(inds=i.misclass, num=n.misclass, frac=f.misclass)) } ann.example = function (r, nx=100, ny=100, size=5, show.misclass=FALSE, linout=TRUE, ...) { # Train a neural network to classify a dataset. # Run the neural network n = nnet(r$data, r$class, size=size, linout=linout, ...) # Calculate classifications across a grid of x & y values. g = get.grid(r, nx=nx, ny=ny) im = predict(n, g$z) z = matrix(as.numeric(im), length(g$x), length(g$y)) class.z = round(z) # Classify the input data. new.class = predict(n, r$data) # I prefer integer classes new.class = round(new.class) # Quantify the mis-classification rate. diff.class = as.numeric(new.class) - as.numeric(r$class) i.misclass = which(abs(diff.class) > 0) n.misclass = length(i.misclass) f.misclass = n.misclass/length(r$class) # Plot data. plot(r$data, col=round(r$class)) contour(g$x, g$y, class.z, add=TRUE) # Overplot mis-classified objects if (show.misclass) { points(r$data[i.misclass, 1], r$data[i.misclass, 2], col=r$class[i.misclass], pch=19, cex=1.5) } # Lattice graphics visualisation: ## p = contourplot( z ~ x+y, data=grid, col.regions=gray.colors(100), levels=0) ## plot.data = data.frame(xx = as.vector(r$data[, 1]), yy=as.vector(r$data[, 2]), ## class=r$class) ## p = p + as.layer(xyplot(yy ~ xx, group=class, data=plot.data), ## x.same=TRUE, y.same=TRUE) ## print(p) return (list(inds=i.misclass, num=n.misclass, frac=f.misclass)) } ann.example.avg = function (r, nx=100, ny=100, size=5, n.average=10, show.misclass=FALSE, linout=TRUE, trace=FALSE, ...) { # Train a neural network to classify a dataset. This time average # together n.average (default=10) neural networks. ## We do the analysis once first to set variables. Then we repeat ## n.average-1 times # Run the neural network once. n = nnet(r$data, r$class, size=size, linout=linout, trace=trace, ...) # Calculate classifications across a grid of x & y values. g = get.grid(r, nx=nx, ny=ny) im = predict(n, g$z) z = matrix(as.numeric(im), length(g$x), length(g$y)) # Classify the input data. # Note that for the averaging we keep floating point variables here. new.class = predict(n, r$data) for (i in seq(2, n.average, 1)) { ## Run the neural network once. n = nnet(r$data, r$class, size=size, linout=linout, trace=trace, ...) ## Calculate classifications across a grid of x & y values. im = predict(n, g$z) ## Add up for the averaging z = z + matrix(as.numeric(im), length(g$x), length(g$y)) ## Classify the input data. new.class.this = predict(n, r$data) new.class = new.class + new.class.this } ## Average and create integers z = round(z/n.average) new.class = round(new.class/n.average) # Quantify the mis-classification rate. diff.class = as.numeric(new.class) - as.numeric(r$class) i.misclass = which(abs(diff.class) > 0) n.misclass = length(i.misclass) f.misclass = n.misclass/length(r$class) # Plot data. plot(r$data, col=as.factor(r$class)) contour(g$x, g$y, z, add=TRUE) # Overplot mis-classified objects if (show.misclass) { points(r$data[i.misclass, 1], r$data[i.misclass, 2], col=r$class[i.misclass], pch=19, cex=1.5) } return (list(inds=i.misclass, num=n.misclass, frac=f.misclass)) }