from Numeric import * from MLab import * from LinearAlgebra import * #import os, string #import sys, re, time # Compute the regression model of data observations for the given index def compute_regression(data, index): b=[] if not len(data): #print "(local_search/compute_regression) Error: no data" return b else: i=0 tmpdata=[] y=[] # for obs in data: vect = obs.data()[:] y.append(vect[index]) vect[index] = 1.0 tmpdata.append(vect) u = array(tmpdata) a = matrixmultiply(transpose(u),u) c = matrixmultiply(transpose(u),array(y)) detA = determinant(a) if detA*detA > 1e-16: b = solve_linear_equations(a, c) else: if len(data) < len(data[0]): #print "(local_search/compute_regression) Singular matrix", detA, len(data) b = [] else: #print "(local_search/compute_regression) Singular matrix, partial model" eigen = eigenvectors(a) Pc = matrixmultiply(eigen[1],c).tolist() Pb = range(len(eigen[0])); # l = eigen[0].tolist() for i in range(len(eigen[0])): if abs(eigen[0][i]) < 1e-12: Pb[i] = 0 else: Pb[i] = Pc[i]/eigen[0][i] # d = MLab.diag(array(l)) # Pb = solve_linear_equations(d,array(Pc)) # Pb = Pb.tolist() # for i in range(len(eigen[0])): # if abs(eigen[0][i]) < 1e-12: # Pb[i:i] = [0] b = matrixmultiply(inverse(eigen[1]), array(Pb)) for i in range(len(b)): if type(b[i]) == type(1J): b[i] = b[i].real return b # Compute the regression model for all clusters def regression_model(data,index,oldB): K = len(data) B = {} for clusterI in data: b = compute_regression(data[clusterI],index) if len(b) != 0: B[clusterI] = b.tolist() else: B[clusterI] = oldB[clusterI] return B #Compute the quadratic error of a observation for a given regression model def compute_observation_error(obs,b,index): lobs = obs.data() e = lobs[index] - b[index] rng = range(len(lobs)) del rng[index] for i in rng: e = e - lobs[i]*b[i] return abs(e)*abs(e) #Move the observations in the cluster in which their quadratic error is minimal def move_observations(data,B,index): K = len(data) modelError = 0 identity = 1 data2 = {} for clusterI in data: data2[clusterI] = [] for clusterI in data: for obs in data[clusterI]: errormin = 1e24 imin = 0 for i in data: error = compute_observation_error(obs,B[i],index) if error < errormin : errormin = error imin = i data2[imin].append(obs) if imin != clusterI: identity = 0 modelError = modelError + errormin for clusterI in data: data[clusterI] = data2[clusterI][:] return [modelError,identity] #Perform a local search def local_search(data,index,maxIter=100): K=len(data) i = 0 fin = 0 error=1e2005 B = {} for clusterI in data: b = compute_regression(data[clusterI],index) if len(b) != 0: B[clusterI] = b.tolist() else: b = data[clusterI] if len(b) != 0: b = b[0].data() else: i=0 while len(data[i]) ==0: i+=1 b = [0]*len(data[i][0].data()) B[clusterI] = b while fin == 0 and i < maxIter: B = regression_model(data,index,B) retVal = move_observations(data,B,index) error = retVal[0] fin = retVal[1] i = i + 1 return [error,i,B]