# Raw procedure for learning emergent models by computing cybernetic equilibria.
# Run in R statistical computing environment with 'source("pca.r")' in proper directory.
# The models phi_i will converge to principal components of data, or at least to the subspace spanned by them.
# This algorithm assumes intelligent coupling A between systems x_i, which separates the models.
# The process can be extended to self-organize and self-regulate also with simpler coupling Q
# when data is consumed for activity. With careful conjugations, also complex values can be implemented.
# The models summarize steady states of rapid gradient descents of cybernetic objective function 
# J(x,u) = 1/2 x^T A x - x^T B u, i.e. dx/dt = - G A x + G B u, i.e. xbar = A^-1 B u = phi^T u,
# where A = E{xx^T} and B = E{xu^T}.
# Heikki Hyötyniemi & Petri Lievonen -- http://neocybernetics.com/

#library('Matrix')                              # use Matrix library instead of standard 'matrix' command
                                                # for larger sparse data and matrices of specific form
# Set:
n <- 15                                         # number of models/features/systems/attractors x_i
lambda <- 0.3                                   # near zero: inverse of adaptation time scale in sample time
loadData <- TRUE                                # load data from file
normalizeSamples <- FALSE                       # center samples and normalize sample vectors to unity
centerDimensions <- FALSE                       # center data (recommended for numerical stability)
normalizeDimensions <- FALSE                    # normalize data variances to unity
initModels <- TRUE                              # initialize model matrices
useRays <- FALSE                                # enhance sparsity by cutting negative inner products to zero
maskA <- TRUE                                   # elicit actual principal components by structural constraints

if (loadData) {
    m <- 256                                    # number of data dimensions u_j
    k <- 500                                    # number of data samples u

    U <- matrix(scan("digits500.txt"), m, k)    # load data samples to columns of matrix U
                                                # try library 'matlab' or 'R.matlab' for .mat data files
    side <- 16                                  # image width and height for this special data

    if (normalizeSamples) {                     # optionally normalize sample vectors to unit length
        sampleMeans <- colMeans(U)              # calculate sample means
        U <- t(t(U)-sampleMeans)                # center samples by substracting the means
        sampleSqMeans <- colMeans(U^2)          # calculate means of sample squares (std.dev^2, RMS^2)
        sampleSqMeans[sampleSqMeans==0] <- 1    # prevent division by zero
        U <- t(t(U)/sqrt(sampleSqMeans))        # normalize sample vectors to unit length
    }
    if (centerDimensions) {                     # for numerical stability it is better to center the data
        meanU <- rowMeans(U)                    # calculate data dimension averages
        U <- U - meanU                          # center the data by substracting the mean (store for reversal)
    }
    if (normalizeDimensions) {                  # if nothing more specific is known about the data distribution
        varU <- rowMeans(U^2)                   # calculate data dimension variances
        varU[varU==0] <- 1                      # prevent division by zero
        U <- U/sqrt(varU)                       # normalize data by dividing by standard deviations
    }
}                                               # for natural positive data consider instead logarithms and
                                                # geometric means of data, for example dividing deviations
                                                # from means by those means.
                                                # for time series study vectorizing successive samples together

if(initModels) {
    ExuT <- 0.1*matrix(runif(n*m)-0.5,n,m)      # initialize matrix E{xu^T} with small random numbers
    ExxT <- 0.1*matrix(runif(n*n),n,n)          # initialize matrix E{xx^T} with small random numbers
                                                # should make more effort to generate different scenarios,
}                                               # for example to ensure symmetricity and positive-definiteness

jet.colors <- colorRampPalette(c("blue","white","red"), space="Lab")    # create custom color palette
bwr <- jet.colors(255)                          # palette ranges from blue (-) to white (0) to red (+)


for (p in 1:1000) {                             # iterate for some time or when Esc pressed
    par(mfrow=c(5,5), mar=c(1,1,1,1))           # create/clear a graphics window with 5x5=25 tiled image blocks

    Xbar <- solve(ExxT) %*% ExuT %*% U          # calculate steady state for each u

    if (useRays) {                              # one way to elicit sparse solutions:
        Xbar[Xbar<0] <- 0                       # cut negative inner products to zero
    }
                                                # update system/model matrices
    ExuT <- (1-lambda) * ExuT  +  lambda * Xbar %*% t(U) / k
    ExxT <- (1-lambda) * ExxT  +  lambda * Xbar %*% t(Xbar) / k
        
    if (maskA) {                                # one way to use structural constraints:
        ExxT[row(ExxT)<col(ExxT)] <- 0          # clearing upper triangle results in principal components
    }                                           # as the first model does not react to other cascading models
    
    phi <- t(solve(ExxT) %*% ExuT)              # models as columns: compare to first eigen(U%*%t(U)/k)$vectors
    maxColor <- max(abs(range(phi)))            # calculate value range to center zero to white color
 
    for (i in 1:n) {                            # draw all n models as images to observe convergence
        values <- matrix(phi[,i],side,side)     # remember that they are in preprocessed data space
        image(values, zlim=c(-maxColor,maxColor), col=bwr, axes=FALSE, ylim=c(1.05,-0.05))
    }

    phiTphi <- t(phi) %*% phi                   # calculate inner products between models (or crossprod(phi))
    maxColor <- max(abs(range(phiTphi)))        # show model inner product matrix as an image
    image(phiTphi, zlim=c(-maxColor,maxColor), col=bwr, axes=FALSE, ylim=c(1.05,-0.05))

    maxColor <- max(abs(range(ExxT)))           # show activity covariance matrix as an image
    image(t(ExxT), zlim=c(-maxColor,maxColor), col=bwr, axes=FALSE, ylim=c(1.05,-0.05))
                                                # could study the minimization of cybernetic objective function
}
                                                # after convergence, study Xbar, U, phi, Uest as phi%*%Xbar,
                                                # their covariances and eigenvalues, etc. 
                                                # remember to reverse preprocessing to use estimates 
                                                # in the original data space