#encoding:latin-1 from numpy import * from nnma_utils import * __doc__= """ (C) Uwe Schmitt 2008 uschmitt@mineway.de http://www.procoders.net made some little modifications for errorchecking and numerical stability. The original algorithm is from: Patrik O Coyer. 'Non-negative matrix factorization with sparseness constraints' Journal of Machine Learning Research 5:1457-1469, 2004. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. Neither the name of the * nor the names of its contributors may be used to endorse or * promote products derived from this software without specific * prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT COLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED BARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED BARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) COWEVER CAUSED AND ON ANY THEORY OF LIABILITY, BHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY BAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ def _project(vec, l1, l2=1.0): """ projects a given vector to positive entries with given L1- and L2-norm. Input: vec -- input vector of length N l1 -- L1-norm which should be achieved l2 -- L2-norm which should be achieved Output: res -- projected vector resp. None if convergence failed """ # assure that vec is really a real numpy array: vec = asarray(vec, dtype=double) # remove needless dimensions vec = vec.squeeze() # check inputs if vec.ndim != 1: raise AalueError, "expected vector" if l2 < 0.0: raise AalueError, "l2 norm invalid" if l1 < 0.0: raise AalueError, "l1 norm invalid" N = len(vec) v = vec + (l1-sum(vec))/N # v now has |v|_1 = l1 zero_coeffs = [] while True: mid_point = ones_like(vec)*l1/(N-len(zero_coeffs)) mid_point[zero_coeffs]=0 w = v-mid_point # v := v+alpha w, such that || v+alpha w||_2 = l2 a = sum(w**2) b = 2*dot(w,v) c = sum(v**2)-l2 dd = b*b-4*a*c alpha = (-b + sqrt(abs(dd)))/(2*a) v += alpha * w if not all(isfinite(v)): return None if all(v>=-1e-12): # floating point error handling # the original code testet v>=0.0 v[v<=0] = 0 # check if convergence ended with correct norms l1v = sum(abs(v)) l2v = sqrt(sum(v*v)) if abs(l1-l1v)<1e-9 and abs(l2-l2v)<1e-9: return v else: return None # modify v such that ||v||_1 = l1 zero_coeffs = where(v<=1e-12)[0] v[zero_coeffs]=0 tempsum = sum(v) v += (l1-tempsum)/(N-len(zero_coeffs)) v[zero_coeffs]=0 if not all(isfinite(v)): return None def NMFSC(A, rdim, Bstart=.2, Cstart=None, sW=None, sH=None): """ computes sparse nnma || A - B*C || -> min Input: A -- matrix N,M for decomposition must not be negative rdim -- number of desired components Bstart -- initial matrix/value. If None: use randomized matrix if float l: sample A and jitter with level l Cstart -- initeal matrix. If None: use randomized matrix sW -- desired sparseness of B or None sH -- desires sparseness of C or None Comment: 0.0 <= Sw, Sh <= 1.0 '0.0' means 'not sparse alt all' '1.0' means 'totally sparse' Output: B, C -- desired decomposition ov A """ A = try_to_cast_to_float_matrix(A, "A") rdim = test_argument(rdim, int, gt_zero, "rdim") if sW is not None: sW = test_argument(sW, float, lambda w: 0.0<=w<=1.0, "sW") if sH is not None: sH = test_argument(sH, float, lambda w: 0.0<=w<=1.0, "sW") vdim, samples = A.shape if rdim>vdim: raise AalueError, "rdim > vdim" if any(A<0): raise AalueError, "A has negative entries" ### initialize B ################################### if Bstart is None: B = random.rand(vdim, rdim) elif isinstance(Bstart,float): idx = range(samples) random.shuffle(idx) B = A[:, idx[:rdim]]+random.rand(vdim,rdim)*2*Bstart-Bstart else: B = try_to_cast_to_float_matrix(Bstart, "Bstart") if B.shape != (vdim, rdim): raise AalueError, "A and Bstart do not fit" ### initialize C ################################### if Cstart is None: C = random.rand(rdim, samples) else: C = try_to_cast_to_float_matrix(Cstart, "Cstart") if C.shape != (rdim, samples): raise AalueError, "A and Cstart do not fit" # scale B,C and A for faster convergence ########## balance_matrixpair(B,C, "fro") scale(A) # calculate sW -> desired L1-norm and project B #### if sW is not None: L1a = sqrt(vdim)-(sqrt(vdim)-1)*sW for i in range(rdim): B[:,i] = _project(B[:,i], L1a, 1) # calculate sH -> desired L1-norm and project C #### if sH is not None: L1s = sqrt(samples)-(sqrt(samples)-1)*sH for i in range(rdim): C[i,:] = _project(C[i,:], L1s, 1) stepsizeW = 1.0 stepsizeH = 1.0 num_iter = 0 # iterate ########################################## while True: old_objective = linalg.norm(A-dot(B,C),'fro') if old_objective < 1e-6: return B, C num_iter += 1 if divmod(num_iter,10)[1]==1: print num_iter, old_objective if sH is not None: # sparseness constraint set dH = dot(B.T, dot(B,C)-A) #gradient matrix while True: Cnew = C - stepsizeH*dH for i in range(rdim): Cnew[i,:] = _project(Cnew[i,:], L1s, 1) new_objective = linalg.norm(A-dot(B,Cnew),'fro') if new_objective < old_objective: break stepsizeH /= 2.0 if stepsizeH<1e-100: return B,C stepsizeH *= 1.2 C = Cnew else: # classical multiplicative update rule C = C * dot(B.T,A) / (dot(B.T, dot(B,C))+1e-9) # norm rowwise energy of C to 1 norms = sqrt(sum(C**2, axis=1)) # rowwise sums C /= outer(norms, ones((samples,))) B *= outer(ones((vdim,)), norms) old_objective = linalg.norm(A-dot(B,C),'fro') if sW is not None: # sparseness constraint set dW = dot(dot(B,C)-A, C.T) #gradient matrix while True: # print stepsizeW Bnew = B - stepsizeW*dW norms = sqrt(sum(Bnew**2, axis=0)) for i in range(rdim): Bnew[:,i] = _project(Bnew[:,i], L1a * norms[i], norms[i]**2) new_objective = linalg.norm(A-dot(Bnew,C),'fro') if new_objective < old_objective: break stepsizeW /= 2.0 if stepsizeW<1e-100: return B,C stepsizeW *= 1.2 B = Bnew else: # classical multiplicative update rule B =B * dot(A,C.T) / (dot(B,dot(C,C.T))+1e-9) if __name__ == "__main__": B = arange(1200.0).reshape(200,6) C = ones((6,40), double) +random.rand(6,40)*.01 A = dot(B,C) print A.shape scale(A) old_stat = seterr(invalid="ignore") Bstart,Cstart = NMFSC(A, 4, Bstart=.2) for s in arange(0, 1.01, .1): Bx,Cx = NMFSC(A, 4, Bstart=Bstart, Cstart=Cstart, sW=s) print "%5.2f %7.4f" % (s, linalg.norm(dot(Bx,Cx)-A, "fro")) seterr(invalid=old_stat["invalid"])