#encoding: latin-1 import liblinear as ll import numpy as np import math import random import unittest #TODO: get_w und get_bias ins tests auf nehmen print "version is", ll.version class LibLinearTest(unittest.TestCase): def setUp(self): # initializes sparse vectors. sparse vectors are lists of tuples # where each tuple consists of index and value of an entry of the # sparse vectors. # the indices must be sorted ! # this is direct construction of sparse representation: self.good_vec = [ (0, 0), (1, 1), (2, 2) ] # this constructs sparse representation of "full" vector (1,2,3): self.bad_vec = ll.vector2sparse([1,2,3]) def testOne(self): mach_names = "MCSVM_CS L2LOSS_SVM_DUAL L2LOSS_SVM L1LOSS_SVM_DUAL".split() data = [[self.good_vec, self.good_vec], [self.bad_vec, self.bad_vec]] for i, mach in enumerate(mach_names): method = getattr(ll, mach) # train machine with weighted classes clfr = ll.LinearSVM.train(data, method, weights=[2, 1], verbose=0) self.assertEqual(0, clfr.predict(self.good_vec)) self.assertEqual(1, clfr.predict(self.bad_vec)) clfr = ll.LinearSVM.fromstring(clfr.tostring()) self.assertEqual(0, clfr.predict(self.good_vec)) self.assertEqual(1, clfr.predict(self.bad_vec)) def testExcepions(self): data = [[self.good_vec], [self.bad_vec]] # number of weights does not fit: self.assertRaises(ValueError, ll.LinearSVM.train, data , ll.L2_LR, weights=[1]) def testLR(self): # test linear regression -> calculates probablities extra to # class labels good_vec1 = [ (0, 0), (1, 1), (2, 2) ] good_vec2 = [ (0, 0), (1, 1.1), (2, 2.2) ] bad_vec1 = [ (0, 1), (1, 2), (2, 3) ] bad_vec2 = [ (0, 1.1), (1, 2.2), (2, 3.3) ] data = [[good_vec1, good_vec2], [bad_vec1, bad_vec2]] clfr = ll.LinearSVM.train(data, ll.L2_LR, weights=[1, 1]) p1 = clfr.predict_probabilites(good_vec1) p2 = clfr.predict_probabilites(bad_vec1) self.assertAlmostEqual(p1[0], .531221, 5) self.assertAlmostEqual(p1[1], .468778, 5) self.assertAlmostEqual(p2[0], .318536, 5) self.assertAlmostEqual(p2[1], .681463, 5) self.assertEqual(clfr.predict(good_vec1), 0) self.assertEqual(clfr.predict(good_vec2), 0) self.assertEqual(clfr.predict(bad_vec1), 1) self.assertEqual(clfr.predict(bad_vec2), 1) @staticmethod def make_checkerboard(num=100): ds = [] for sektor in range(4): midpoint = np.array(divmod(sektor, 2)) data = midpoint + np.random.random(size=(num, 2)) * .2 - .1 ds.append(ll.matrix2sparse(data)) return ds def testCheckBoard(self): """ creates four point clouds around centers (0,0), (0,1), (1,0), (1,1) trains multiclass svms and tests them. """ mach_names = "MCSVM_CS L2LOSS_SVM_DUAL L2LOSS_SVM L1LOSS_SVM_DUAL".split() ds_learn = self.make_checkerboard() ds_test = self.make_checkerboard() cmat_tobe = [[100, 0, 0, 0], [0, 100, 0, 0], [0, 0, 100, 0], [0, 0, 0, 100]] for mach in mach_names: method = getattr(ll, mach) clfr = ll.LinearSVM.train(ds_learn, method) for i, d in enumerate(ds_test): for row in d: self.assertEqual(i, clfr.predict(row)) self.assertEqual(clfr.build_confusion_matrix(ds_learn), cmat_tobe) # test pickling of svm classifier import pickle clfr = pickle.loads(pickle.dumps(clfr)) for i, d in enumerate(ds_test): for row in d: self.assertEqual(i, clfr.predict(row)) def testCrossvalidation(self): ds = self.make_checkerboard() cmat = ll.crossvalidation(ds, verbose=0) for i in range(4): for j in range(4): self.assertEqual(cmat[i][j], (i == j) and 100 or 0) # setup non linear seperable example: sin_data_pos = [] sin_data_neg = [] for x in np.arange(- 3, 3, .5): for y in np.arange(- 3, 3, .5): p = [ (0, x), (1, y) ] if y >= math.sin(2.5 * x): sin_data_pos.append(p) else: sin_data_neg.append(p) sin_data = [sin_data_pos, sin_data_neg] # as liblinears cross_validation functino shuffles data # as a first step, we only get reproducible resuls # if we do leave-one-out validation, that is: num_folds # is the same as num examples: num_ex = len(sin_data[0]) + len(sin_data[1]) c = ll.crossvalidation(sin_data, num_folds=num_ex) self.assertEqual(c[0][0], 62) self.assertEqual(c[0][1], 6) self.assertEqual(c[1][0], 8) self.assertEqual(c[1][1], 68) def testMultiClassSVM(self): # x y x y d1 = [ [(0, 1), (1, 1)] , [(0, .9), (1, 1)] ] d2 = [ [(0, - 1), (1, 1)] , [(0, - .9), (1, 1)] ] d3 = [ [(0, 0), (1, - 1)] , [(0, 0), (1, - 1.1)] ] clfr = ll.LinearSVM.train([d1, d2, d3], ll.MCSVM_CS) self.assertEqual(clfr.predict(d1[0]), 0) self.assertEqual(clfr.predict(d1[1]), 0) self.assertEqual(clfr.predict(d2[0]), 1) self.assertEqual(clfr.predict(d2[1]), 1) self.assertEqual(clfr.predict(d3[0]), 2) self.assertEqual(clfr.predict(d3[1]), 2) def testPredictValues(self): mach_names = "MCSVM_CS L2LOSS_SVM_DUAL L2LOSS_SVM L1LOSS_SVM_DUAL".split() data = [[self.good_vec, self.good_vec], [self.bad_vec, self.bad_vec]] good_values = [] bad_values = [] for i, mach in enumerate(mach_names): method = getattr(ll, mach) # train machine with weighted classes clfr = ll.LinearSVM.train(data, method, weights=[2, 1], verbose=0) good_values.append( clfr.predict_values(self.good_vec) ) bad_values.append( clfr.predict_values(self.bad_vec) ) self.assertAlmostEqual(good_values[0][0], .49, 1) self.assertAlmostEqual(good_values[0][1], -.49, 1) self.assertAlmostEqual(good_values[1][0], .75, 1) self.assertAlmostEqual(good_values[2][0], .75, 1) self.assertAlmostEqual(good_values[3][0], 1.0, 1) self.assertAlmostEqual(bad_values[0][0], -.5, 1) self.assertAlmostEqual(bad_values[0][1], .5, 1) self.assertAlmostEqual(bad_values[1][0], -.7, 1) self.assertAlmostEqual(bad_values[2][0], -.7, 1) self.assertAlmostEqual(bad_values[3][0], -1.0, 1) if __name__ == "__main__": unittest.main()