Here we select a random half of the synaptome data, different from the half we have been using.

The data are then z-scored and principal components are computed to the 3rd elbow by Z-G.

require(meda)
source("~/neurodata/synaptome-stats/Code/doidt.r")
load('~/neurodata/synaptome-stats/Code/cleanDataWithAttributes.RData')
### Setting a seed and creating an index vector
### to select half of the data
#set.seed(2^10)
set.seed(317)
half1 <- sample(dim(data01)[1],dim(data01)[1]/2)
half2 <- setdiff(1:dim(data01)[1],half1)

feat <- data01[half1,]
feat2 <- data01[half2,]

#set.seed(2^10)
set.seed(317)
ss <- sample(dim(data01)[1],1000)
small <- data01[ss, 1:24, with = FALSE]
dat <- small
zfeat <- 
  dat[, lapply(.SD, scale, center = TRUE, scale=TRUE)]

pr2 <- prcomp(zfeat)
cur <- rCUR::CUR(as.matrix(zfeat), k = 3)@C

(elb <- getElbows(pr2$x, 3, plot = FALSE))
## [1]  3 18 21
X <- pr2$x[, 1:elb[3]]

out <- doIDT(as.matrix(X),
             FUN="mclust",
             Dmax=ncol(X), ## max dim for clustering
             Kmax=9,  ## max K for clustering 
             maxsamp=nrow(X), ## max n for clustering
             samp=1, # 1: no sampling, else n/sample sampling
             maxdepth=3, # termination, maximum depth for idt
             minnum=100, # termination, minimum observations per branch
             verbose=FALSE)  
## number of leaves (clusters) =  22
idtlab <- out$class
pairs(X[,1:3], pch=19,col=idtlab)
title("FOTD 20161212: IDT on Synaptome Data")