【one vs one】基于svm的二分类器实现多分类

你的名字 2023-01-21 15:23 368阅读 0赞

文章目录

  • 一、教案版本代码解析
    • 1、整体代码
    • 2、逐段分析
      • (1)输入可视化
      • (2) train_OvO分段解析
      • (3)test_OvO 分段解析
      • (4)score_svm函数解析
  • 二、20-21题,加入score_svm后的分类
    • 1、整体代码
    • 2、逐段解析
      • (1)test_OvO分析
      • (2)对我作出的更改进行测试

一、教案版本代码解析

1、整体代码

  1. import numpy as np
  2. from sklearn.svm import LinearSVC
  3. from sklearn.datasets import load_digits
  4. from sklearn.model_selection import train_test_split
  5. from sklearn.metrics import confusion_matrix
  6. import matplotlib.pyplot as plt
  7. import numpy as np
  8. import scipy as sp
  9. import warnings
  10. warnings.filterwarnings('ignore')
  11. # unit test utilities: you can ignore these function
  12. def is_approximately_equal(test, target, eps=1e-2):
  13. return np.mean(np.fabs(np.array(test) - np.array(target))) < eps
  14. def assert_test_equality(test, target):
  15. assert is_approximately_equal(test, target), 'Expected:\n %s \nbut got:\n %s' % (target, test)
  16. def train_svm(X_train, y_train, param):
  17. est = LinearSVC(C=param).fit(X_train, y_train)
  18. return est
  19. def test_svm(X_test, est):
  20. return est.predict(X_test)
  21. def score_svm(X_test, est):
  22. return est.decision_function(X_test)
  23. def train_OvO(X_train, y_train, train_func, param):
  24. classes = sorted(set(y_train))
  25. print("classes:", classes)
  26. estimators = dict()
  27. for i, ci in enumerate(classes):
  28. print('-' * 20)
  29. print("i, ci:", i, ci)
  30. for j, cj in enumerate(classes):
  31. print('-' * 7)
  32. print("j, cj:", j, cj)
  33. if j > i:
  34. X = X_train.copy()
  35. X = X[np.logical_or(y_train == ci, y_train == cj)]
  36. print("np.logical_or(y_train == ci, y_train == cj):", np.logical_or(y_train == ci, y_train == cj))
  37. print("X:", X)
  38. y = y_train.copy()
  39. y = y[np.logical_or(y_train == ci, y_train == cj)]
  40. print("y:", y)
  41. yp = y.copy()
  42. yp[y == ci] = 1
  43. yp[y == cj] = -1
  44. print("yp:", yp)
  45. est = train_func(X, yp, param)
  46. estimators[(i, j)] = est
  47. return estimators
  48. def test_OvO(X_test, test_func, estimators):
  49. print("len(estimators):", len(estimators))
  50. all_scores = np.zeros((X_test.shape[0], len(estimators)))
  51. print("all_scores.shape:", all_scores.shape)
  52. for i, j in estimators:
  53. est = estimators[(i, j)]
  54. preds = test_func(X_test, est)
  55. print("preds:", preds)
  56. print("i:", i)
  57. print("preds == 1:", preds == 1)
  58. print('all_scores:', all_scores)
  59. all_scores[:, i][preds == 1] += 1
  60. print('all_scores:', all_scores)
  61. print('j:', j)
  62. print('preds == -1:', preds == -1)
  63. all_scores[:, j][preds == -1] += 1
  64. print('all_scores:', all_scores)
  65. # 为什么这里是以横轴判断类别。说明每一样代表一个X_test。
  66. preds = np.argmax(all_scores, axis=1)
  67. return preds
  68. # This cell is reserved for the unit tests. Do not consider this cell.
  69. ### BEGIN TESTS
  70. X_train = np.array([[10, 10], [8, 10], [-5, 5.5], [-5.4, 5.5], [-20, -20], [-15, -20]])
  71. y_train = np.array([0, 0, 1, 1, 2, 2])
  72. X_test = np.array([[11, 11], [-5, 5], [-15, -15]])
  73. y_test = np.array([0, 1, 2])
  74. # plt.scatter(X_train[:, 0], X_train[:, 1])
  75. # plt.show()
  76. est = train_OvO(X_train, y_train, train_svm, param=1)
  77. # print(est)
  78. print('=' * 40)
  79. preds = test_OvO(X_test, test_svm, est)
  80. print("preds:", preds)
  81. test_cm = confusion_matrix(y_test, preds)
  82. # print(test_cm)
  83. target_cm = np.eye(3)
  84. assert_test_equality(target_cm, test_cm)
  85. ### END TESTS
  86. print('-' * 50)
  87. for i, j in est:
  88. print(score_svm(X_test, est[(i, j)]))
  • 控制台输出

    classes: [0, 1, 2]

    i, ci: 0 0

    j, cj: 0 0

    j, cj: 1 1
    np.logical_or(y_train == ci, y_train == cj): [ True True True True False False]
    X: [[10. 10. ]
    [ 8. 10. ]
    [-5. 5.5]
    [-5.4 5.5]]
    y: [0 0 1 1]

    yp: [ 1 1 -1 -1]

    j, cj: 2 2
    np.logical_or(y_train == ci, y_train == cj): [ True True False False True True]
    X: [[ 10. 10.]
    [ 8. 10.]
    [-20. -20.]
    [-15. -20.]]
    y: [0 0 2 2]

    yp: [ 1 1 -1 -1]

    i, ci: 1 1

    j, cj: 0 0

    j, cj: 1 1

    j, cj: 2 2
    np.logical_or(y_train == ci, y_train == cj): [False False True True True True]
    X: [[ -5. 5.5]
    [ -5.4 5.5]
    [-20. -20. ]
    [-15. -20. ]]
    y: [1 1 2 2]

    yp: [ 1 1 -1 -1]

    i, ci: 2 2

    j, cj: 0 0

    j, cj: 1 1

    j, cj: 2 2

    len(estimators): 3
    all_scores.shape: (3, 3)
    preds: [ 1 -1 -1]
    i: 0
    preds == 1: [ True False False]
    all_scores: [[0. 0. 0.]
    [0. 0. 0.]
    [0. 0. 0.]]
    all_scores: [[1. 0. 0.]
    [0. 0. 0.]
    [0. 0. 0.]]
    j: 1
    preds == -1: [False True True]
    all_scores: [[1. 0. 0.]
    [0. 1. 0.]
    [0. 1. 0.]]
    preds: [ 1 1 -1]
    i: 0
    preds == 1: [ True True False]
    all_scores: [[1. 0. 0.]
    [0. 1. 0.]
    [0. 1. 0.]]
    all_scores: [[2. 0. 0.]
    [1. 1. 0.]
    [0. 1. 0.]]
    j: 2
    preds == -1: [False False True]
    all_scores: [[2. 0. 0.]
    [1. 1. 0.]
    [0. 1. 1.]]
    preds: [ 1 1 -1]
    i: 1
    preds == 1: [ True True False]
    all_scores: [[2. 0. 0.]
    [1. 1. 0.]
    [0. 1. 1.]]
    all_scores: [[2. 1. 0.]
    [1. 2. 0.]
    [0. 1. 1.]]
    j: 2
    preds == -1: [False False True]
    all_scores: [[2. 1. 0.]
    [1. 2. 0.]
    [0. 1. 2.]]

    preds: [0 1 2]

    [ 1.45548543 -0.97524791 -2.01288786]
    [ 1.20241692 0.06646526 -1.62537764]
    [ 0.57513497 0.93275666 -0.74550495]

    Process finished with exit code 0

2、逐段分析

(1)输入可视化

  1. # This cell is reserved for the unit tests. Do not consider this cell.
  2. ### BEGIN TESTS
  3. X_train = np.array([[10, 10], [8, 10], [-5, 5.5], [-5.4, 5.5], [-20, -20], [-15, -20]])
  4. y_train = np.array([0, 0, 1, 1, 2, 2])
  5. X_test = np.array([[11, 11], [-5, 5], [-15, -15]])
  6. y_test = np.array([0, 1, 2])
  7. plt.scatter(X_train[:, 0], X_train[:, 1], color="r")
  8. plt.scatter(X_test[:, 0], X_test[:, 1], color="b")
  9. plt.show()

在这里插入图片描述
- 从输入可以看出训练集和测试集不同标签分得很开,能够很好分类

(2) train_OvO分段解析

  1. def train_OvO(X_train, y_train, train_func, param):
  2. classes = sorted(set(y_train))
  3. print("classes:", classes)
  4. estimators = dict()
  5. for i, ci in enumerate(classes):
  6. print('-' * 20)
  7. print("i, ci:", i, ci)
  8. for j, cj in enumerate(classes):
  9. print('-' * 7)
  10. print("j, cj:", j, cj)
  11. if j > i:
  12. X = X_train.copy()
  13. X = X[np.logical_or(y_train == ci, y_train == cj)]
  14. print("np.logical_or(y_train == ci, y_train == cj):", np.logical_or(y_train == ci, y_train == cj))
  15. print("X:", X)
  16. y = y_train.copy()
  17. y = y[np.logical_or(y_train == ci, y_train == cj)]
  18. print("y:", y)
  19. yp = y.copy()
  20. yp[y == ci] = 1
  21. yp[y == cj] = -1
  22. print("yp:", yp)
  23. est = train_func(X, yp, param)
  24. estimators[(i, j)] = est
  25. return estimators
  26. est = train_OvO(X_train, y_train, train_svm, param=1)
  • 输出

    classes: [0, 1, 2]

    i, ci: 0 0

    j, cj: 0 0

    j, cj: 1 1
    np.logical_or(y_train == ci, y_train == cj): [ True True True True False False]
    X: [[10. 10. ]
    [ 8. 10. ]
    [-5. 5.5]
    [-5.4 5.5]]
    y: [0 0 1 1]

    yp: [ 1 1 -1 -1]

    j, cj: 2 2
    np.logical_or(y_train == ci, y_train == cj): [ True True False False True True]
    X: [[ 10. 10.]
    [ 8. 10.]
    [-20. -20.]
    [-15. -20.]]
    y: [0 0 2 2]

    yp: [ 1 1 -1 -1]

    i, ci: 1 1

    j, cj: 0 0

    j, cj: 1 1

    j, cj: 2 2
    np.logical_or(y_train == ci, y_train == cj): [False False True True True True]
    X: [[ -5. 5.5]
    [ -5.4 5.5]
    [-20. -20. ]
    [-15. -20. ]]
    y: [1 1 2 2]

    yp: [ 1 1 -1 -1]

    i, ci: 2 2

    j, cj: 0 0

    j, cj: 1 1

    j, cj: 2 2

可以看出遍历了i,j的所有情况,每一个i和j都代表了一种类别,对i和j进行分类的过程就是一个二分类的过程。这样每一对(i,j)都产生了一个分类器 C i C_i Ci​。返回值estimators就是一个key为(i,j),value为 C i C_i Ci​的字典。

(3)test_OvO 分段解析

  1. def test_OvO(X_test, test_func, estimators):
  2. print("len(estimators):", len(estimators))
  3. all_scores = np.zeros((X_test.shape[0], len(estimators)))
  4. print("all_scores.shape:", all_scores.shape)
  5. for i, j in estimators:
  6. est = estimators[(i, j)]
  7. preds = test_func(X_test, est)
  8. print("preds:", preds)
  9. print("i:", i)
  10. print("preds == 1:", preds == 1)
  11. print('all_scores:', all_scores)
  12. all_scores[:, i][preds == 1] += 1
  13. print('all_scores:', all_scores)
  14. print('j:', j)
  15. print('preds == -1:', preds == -1)
  16. all_scores[:, j][preds == -1] += 1
  17. print('all_scores:', all_scores)
  18. # 为什么这里是以横轴判断类别。说明每一样代表一个X_test。
  19. preds = np.argmax(all_scores, axis=1)
  20. return preds
  21. preds = test_OvO(X_test, test_svm, est)
  • 输出

    len(estimators): 3
    all_scores.shape: (3, 3)
    preds: [ 1 -1 -1]
    i: 0
    preds == 1: [ True False False]
    all_scores: [[0. 0. 0.]
    [0. 0. 0.]
    [0. 0. 0.]]
    all_scores: [[1. 0. 0.]
    [0. 0. 0.]
    [0. 0. 0.]]
    j: 1
    preds == -1: [False True True]
    all_scores: [[1. 0. 0.]
    [0. 1. 0.]
    [0. 1. 0.]]
    preds: [ 1 1 -1]
    i: 0
    preds == 1: [ True True False]
    all_scores: [[1. 0. 0.]
    [0. 1. 0.]
    [0. 1. 0.]]
    all_scores: [[2. 0. 0.]
    [1. 1. 0.]
    [0. 1. 0.]]
    j: 2
    preds == -1: [False False True]
    all_scores: [[2. 0. 0.]
    [1. 1. 0.]
    [0. 1. 1.]]
    preds: [ 1 1 -1]
    i: 1
    preds == 1: [ True True False]
    all_scores: [[2. 0. 0.]
    [1. 1. 0.]
    [0. 1. 1.]]
    all_scores: [[2. 1. 0.]
    [1. 2. 0.]
    [0. 1. 1.]]
    j: 2
    preds == -1: [False False True]
    all_scores: [[2. 1. 0.]
    [1. 2. 0.]
    [0. 1. 2.]]

    preds: [0 1 2]

    [ 1.45548543 -0.97524791 -2.01288786]
    [ 1.20241692 0.06646526 -1.62537764]
    [ 0.57513497 0.93275666 -0.74550495]

    Process finished with exit code 0

这一段解释了test_OvO是如何工作的,首先构造了all_scores为一个行数为输入测试集的个数,列数为分类器字典中,分类器的总数。这意味着,每一个分类器都会对每一个输入进行一次分类,对分类结果累加,最后得到一个分类矩阵。最后对分类矩阵按行求最大值的下标,就得到了每个输入的预测分类。

(4)score_svm函数解析

  1. print('-' * 50)
  2. for i, j in est:
  3. print(score_svm(X_test, est[(i, j)]))
  • 输出

    [ 1.45548543 -0.97524791 -2.01288786]
    [ 1.20241692 0.06646526 -1.62537764]
    [ 0.57513497 0.93275666 -0.74550495]

此处score_svm的输出为(i,j)之间的距离,对于三种分类器

二、20-21题,加入score_svm后的分类

在这里插入图片描述
在这里插入图片描述
根据上面老师的解释,这一题的关键是要解决在使用test_svm函数得到得结果矩阵如果在一行中,出现了两个一样大得最大数值后,要使用score_svm来区分谁最优。这里就构造一个一样维度的结果矩阵

1、整体代码

  1. import numpy as np
  2. from sklearn.svm import LinearSVC
  3. from sklearn.datasets import load_digits
  4. from sklearn.model_selection import train_test_split
  5. from sklearn.metrics import confusion_matrix
  6. import matplotlib.pyplot as plt
  7. import numpy as np
  8. import scipy as sp
  9. import warnings
  10. warnings.filterwarnings('ignore')
  11. # unit test utilities: you can ignore these function
  12. def is_approximately_equal(test, target, eps=1e-2):
  13. return np.mean(np.fabs(np.array(test) - np.array(target))) < eps
  14. def assert_test_equality(test, target):
  15. assert is_approximately_equal(test, target), 'Expected:\n %s \nbut got:\n %s' % (target, test)
  16. def train_svm(X_train, y_train, param):
  17. est = LinearSVC(C=param).fit(X_train, y_train)
  18. return est
  19. def test_svm(X_test, est):
  20. return est.predict(X_test)
  21. def score_svm(X_test, est):
  22. return est.decision_function(X_test)
  23. def train_OvO(X_train, y_train, train_func, param):
  24. classes = sorted(set(y_train))
  25. estimators = dict()
  26. for i, ci in enumerate(classes):
  27. for j, cj in enumerate(classes):
  28. if j > i:
  29. X = X_train.copy()
  30. X = X[np.logical_or(y_train == ci, y_train == cj)]
  31. y = y_train.copy()
  32. y = y[np.logical_or(y_train == ci, y_train == cj)]
  33. yp = y.copy()
  34. yp[y == ci] = 1
  35. yp[y == cj] = -1
  36. est = train_func(X, yp, param)
  37. estimators[(i, j)] = est
  38. return estimators
  39. def test_OvO(X_test, test_func, score_func, estimators):
  40. all_scores = np.zeros((X_test.shape[0], len(estimators)))
  41. score_m = np.zeros((X_test.shape[0], len(estimators)))
  42. for i, j in estimators:
  43. est = estimators[(i, j)]
  44. preds = test_func(X_test, est)
  45. # the result of score_func
  46. preds_s = score_func(X_test, est)
  47. all_scores[:, i][preds == 1] += 1
  48. score_m[:, i][preds > 0] += preds_s[preds_s > 0]
  49. all_scores[:, j][preds == -1] += 1
  50. score_m[:, j][preds < 0] += preds_s[preds_s < 0]
  51. print("all_scores:\n", all_scores)
  52. print("score_m:\n",score_m)
  53. print("preds:\n", preds)
  54. # When two or more classifiers end in a tie, use score_func to break the tie
  55. preds = np.argmax(all_scores, axis=1)
  56. # judge whether two or more classifiers end in a tie
  57. for i in range(len(preds)):
  58. all_scores_sort = sorted(all_scores[i])
  59. # use score_func to break the tie
  60. if all_scores_sort[-1] == all_scores_sort[-2]:
  61. preds[i] = np.argmax(score_m, axis=1)[i]
  62. print("preds:\n", preds)
  63. return preds
  64. # This cell is reserved for the unit tests. Do not consider this cell.
  65. ### BEGIN TESTS
  66. X_train = np.array([[10, 10], [8, 10], [-5, 5.5], [-5.4, 5.5], [-20, -20], [-15, -20]])
  67. y_train = np.array([0, 0, 1, 1, 2, 2])
  68. X_test = np.array([[11, 11], [-5, 5], [-15, -15]])
  69. y_test = np.array([0, 1, 2])
  70. est = train_OvO(X_train, y_train, train_svm, param=1)
  71. preds = test_OvO(X_test, test_svm, score_svm, est)
  72. test_cm = confusion_matrix(y_test, preds)
  73. target_cm = np.eye(3)
  74. assert_test_equality(target_cm, test_cm)
  75. ### END TESTS
  • 输出

    all_scores:
    [[2. 1. 0.]
    [1. 2. 0.]
    [0. 1. 2.]]
    score_m:
    [[ 2.65790263 0.57513497 0. ]
    [ 0.06646526 -0.04249243 0. ]
    [ 0. -2.0128883 -2.3708826 ]]
    preds:
    [ 1 1 -1]
    preds:
    [0 1 2]

    Process finished with exit code 0

2、逐段解析

(1)test_OvO分析

  1. def test_OvO(X_test, test_func, score_func, estimators):
  2. all_scores = np.zeros((X_test.shape[0], len(estimators)))
  3. score_m = np.zeros((X_test.shape[0], len(estimators)))
  4. for i, j in estimators:
  5. est = estimators[(i, j)]
  6. preds = test_func(X_test, est)
  7. # the result of score_func
  8. preds_s = score_func(X_test, est)
  9. all_scores[:, i][preds == 1] += 1
  10. score_m[:, i][preds > 0] += preds_s[preds_s > 0]
  11. all_scores[:, j][preds == -1] += 1
  12. score_m[:, j][preds < 0] += preds_s[preds_s < 0]
  13. print("all_scores:\n", all_scores)
  14. print("score_m:\n",score_m)
  15. print("preds:\n", preds)
  16. # When two or more classifiers end in a tie, use score_func to break the tie
  17. preds = np.argmax(all_scores, axis=1)
  18. # judge whether two or more classifiers end in a tie
  19. for i in range(len(preds)):
  20. all_scores_sort = sorted(all_scores[i])
  21. # use score_func to break the tie
  22. if all_scores_sort[-1] == all_scores_sort[-2]:
  23. preds[i] = np.argmax(score_m, axis=1)[i]
  24. print("preds:\n", preds)
  25. return preds
  26. preds = test_OvO(X_test, test_svm, score_svm, est)
  • 输出

    all_scores:
    [[2. 1. 0.]
    [1. 2. 0.]
    [0. 1. 2.]]
    score_m:
    [[ 2.65790263 0.57513497 0. ]
    [ 0.06646526 -0.04249243 0. ]
    [ 0. -2.0128883 -2.3708826 ]]
    preds:
    [ 1 1 -1]
    preds:
    [0 1 2]

由于train_OvO没有改变故而这里不讲。接着本节开始的分析,对test_func, score_func分别构造了矩阵 all_scores,score_m用于存取分类器的结果。代码中score_m[:, i][preds > 0] += preds_s[preds_s > 0]意思是取出score_func输出的结果大于零的部分相加。矩阵 all_scores,score_m的构造过程相同。得到的结果如上面输出所示,可以发现score_m其实就是更加精细化输出的all_scores。代码的最后部分,判断了all_scores每一行中,最大的两个数是否相同,如果相同,就使用score_m矩阵得最优解。

(2)对我作出的更改进行测试

  1. import numpy as np
  2. all_scores = np.array([
  3. [0, 2, 1],
  4. [1, 1, 0],
  5. [2, 0, 2]
  6. ])
  7. score_m = np.array([
  8. [0, 2.1, 1.2],
  9. [1.6, 1.3, 0],
  10. [2.1, 0, 2.9]
  11. ])
  12. preds = np.argmax(all_scores, axis=1)
  13. print('preds:', preds)
  14. for i in range(len(preds)):
  15. print('all_scores[i]:', all_scores[i])
  16. all_scores_sort = sorted(all_scores[i])
  17. print('all_scores_sort:', all_scores_sort)
  18. if all_scores_sort[-1] == all_scores_sort[-2]:
  19. preds[i] = np.argmax(score_m, axis=1)[i]
  20. print('preds:', preds)
  • 输出

    preds: [1 0 0]
    all_scores[i]: [0 2 1]
    all_scores_sort: [0, 1, 2]
    all_scores[i]: [1 1 0]
    all_scores_sort: [0, 1, 1]
    all_scores[i]: [2 0 2]
    all_scores_sort: [0, 2, 2]
    preds: [1 0 2]

分析我给出的矩阵 all_scores,score_m,all_scores中第二行和第三行有相同的值,得到的预测是preds: [1 0 0],再判断all_scores中每一行,最大的两个数是否相同,如果相同,就使用score_m矩阵得最优解。最后矫正之后的预测为preds: [1 0 2],可见,算法可行。

发表评论

表情:
评论列表 (有 0 条评论,368人围观)

还没有评论,来说两句吧...

相关阅读