Here we select a random half of the synaptome data, different from the half we have been using.

The data are then z-scored and principal components are computed to the 3rd elbow by Z-G.

supressMessages(require(meda))
source("~/neurodata/synaptome-stats/Code/doidt.r")
load('~/neurodata/synaptome-stats/Code/cleanDataWithAttributes.RData')
### Setting a seed and creating an index vector
### to select half of the data
#set.seed(2^10)
set.seed(317)
half1 <- sample(dim(data01)[1],dim(data01)[1]/2)
half2 <- setdiff(1:dim(data01)[1],half1)

feat <- data01[half1,]
feat2 <- data01[half2,]

#set.seed(2^10)
set.seed(317)
ss <- sample(dim(data01)[1],10000)
small <- data01[ss, 1:24, with = FALSE]
dat <- small
zfeat <- 
  dat[, lapply(.SD, scale, center = TRUE, scale=TRUE)]

pr2 <- prcomp(zfeat)
#cur <- rCUR::CUR(as.matrix(zfeat), k = 3)@C

(elb <- getElbows(pr2$x, 3, plot = FALSE))
## [1]  3 18 21
X <- pr2$x[, 1:elb[3]]

out <- doIDT(as.matrix(X),
             FUN="mclust",
             Dmax=ncol(X), ## max dim for clustering
             Kmax=2,  ## max K for clustering 
             maxsamp=nrow(X), ## max n for clustering
             samp=1, # 1: no sampling, else n/sample sampling
             maxdepth=2, # termination, maximum depth for idt
             minnum=100, # termination, minimum observations per branch
             verbose=TRUE)  
## ===============================================
## Working on branch  1 , depth =  1 ( 2 )
## n =  10000 , dim =  21 , dmax =  21 , Kmax =  2 
## Clustering in dim =  13, Khat:  2 ,  VVV 
## ===============================================
## Working on branch  11 , depth =  2 ( 2 )
## n =  1915 , dim =  13 , dmax =  13 , Kmax =  2 
## Clustering in dim =  16, Khat:  2 ,  VVV 
## ===============================================
## Working on branch  111 , depth =  3 ( 2 )
## n =  821 , dim =  16 , dmax =  16 , Kmax =  2 
## ***** LEAF:  111 : (pure,small,deep)=( FALSE , FALSE , TRUE )
## 
## ===============================================
## Working on branch  112 , depth =  3 ( 2 )
## n =  1094 , dim =  16 , dmax =  16 , Kmax =  2 
## ***** LEAF:  112 : (pure,small,deep)=( FALSE , FALSE , TRUE )
## 
## 
## ===============================================
## Working on branch  12 , depth =  2 ( 2 )
## n =  8085 , dim =  13 , dmax =  13 , Kmax =  2 
## Clustering in dim =  11, Khat:  2 ,  VVV 
## ===============================================
## Working on branch  121 , depth =  3 ( 2 )
## n =  4433 , dim =  11 , dmax =  11 , Kmax =  2 
## ***** LEAF:  121 : (pure,small,deep)=( FALSE , FALSE , TRUE )
## 
## ===============================================
## Working on branch  122 , depth =  3 ( 2 )
## n =  3652 , dim =  11 , dmax =  11 , Kmax =  2 
## ***** LEAF:  122 : (pure,small,deep)=( FALSE , FALSE , TRUE )
## 
## 
## number of leaves (clusters) =  4
save(X, out, file = "IDTrun20161214_2.RData")
idtlab <- out$class
idtall <- out$idtall
leaves <- which(sapply(idtall, function(x) x$isLeaf))

#sapply(idtall[leaves], function(x) {
#         pairs(X[x$ids, 1:3], col = out$classification[x$ids]); Sys.sleep(3)
#             })

pairs(X[1:5e3,1:3], pch=19,col= idtlab[1:5e3], main ="FOTD 20161214: IDT on Synaptome Data")

#n <- 5e3
#plot3d(X[1:n,1:3], col = idtlab[1:n], size = 15, alpha = 0.5)

idtall <- out$idtall
leaves <- which(sapply(idtall, function(x) x$isLeaf))
nleaves <- G <- length(leaves)
dend <- makeDendrogram(idtall)
plot(dend)