sklearn实现多种机器学习中的集成算法。 包括Adaboost,随机森林,梯度提升回归等算法

àì夳堔傛蜴生んèń 2022-04-05 14:41 242阅读 0赞

求各位进来的老铁帮个忙。。帮我把最后自己写的那个提升算法完善一下。。测试集该怎么测试准确率??? 求大佬补充

  1. from sklearn.datasets import load_iris
  2. # 用决策树作为基础模型
  3. from sklearn.tree import DecisionTreeClassifier
  4. from sklearn.model_selection import train_test_split
  5. from sklearn.ensemble import GradientBoostingClassifier
  6. from sklearn.ensemble import AdaBoostClassifier
  7. from sklearn.ensemble import RandomForestClassifier
  8. import pandas as pd
  9. # bagging模型
  10. def RandomForestBagging(X, y):
  11. '''
  12. 随机森林
  13. :param X:
  14. :param y:
  15. :return:
  16. '''
  17. x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
  18. rd = RandomForestClassifier(
  19. n_estimators=100,
  20. criterion="gini",
  21. max_depth=4,
  22. )
  23. rd.fit(x_train, y_train)
  24. print("随机森林的测试集上的准确率:", rd.score(x_test, y_test))
  25. print("随机森林的训练集上的准确率:", rd.score(x_train, y_train))
  26. # boosting模型汇总
  27. def GardientBoosting(X, y):
  28. '''
  29. 梯度提升算法
  30. :return:
  31. '''
  32. x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
  33. gbrt = GradientBoostingClassifier(max_depth=2, n_estimators=3, learning_rate=1.0)
  34. gbrt.fit(x_train, y_train)
  35. print("梯度提升回归树的测试集上准确率:", gbrt.score(x_test, y_test))
  36. print("梯度提升回归树的训练集上准确率:", gbrt.score(x_train, y_train))
  37. def AdaBoosting(X, y):
  38. '''
  39. 自适应提升算法
  40. :param X:
  41. :param y:
  42. :return:
  43. '''
  44. x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
  45. ada = AdaBoostClassifier(
  46. base_estimator=DecisionTreeClassifier(max_depth=1),
  47. n_estimators=100,
  48. learning_rate=0.5,
  49. algorithm='SAMME.R',
  50. random_state=0
  51. )
  52. ada.fit(x_train, y_train)
  53. print("自适应提升算法的测试集上准确率:", ada.score(x_test, y_test))
  54. print("自适应提升算法的训练集上准确率:", ada.score(x_train, y_train))
  55. # stacking模型汇总
  56. def selfsuanfa(x_train, y_train):
  57. from sklearn.svm import SVC
  58. from sklearn.naive_bayes import GaussianNB
  59. from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
  60. from sklearn.linear_model import LogisticRegression
  61. from sklearn.neighbors import KNeighborsClassifier
  62. from sklearn.neural_network import MLPClassifier
  63. SEED = 1
  64. nb = GaussianNB()
  65. svc = SVC(C=100, probability=True)
  66. knn = KNeighborsClassifier(n_neighbors=3)
  67. lr = LogisticRegression(C=100, random_state=SEED)
  68. nn = MLPClassifier((80, 10), early_stopping=False, random_state=SEED)
  69. gb = GradientBoostingClassifier(n_estimators=100, random_state=SEED)
  70. rf = RandomForestClassifier(n_estimators=10, max_features=3, random_state=SEED)
  71. # 第一层定义好了
  72. nb.fit(x_train, y_train)
  73. data1 = nb.predict(x_train).reshape(-1, 1)
  74. svc.fit(x_train, y_train)
  75. data2 = svc.predict(x_train).reshape(-1, 1)
  76. knn.fit(x_train, y_train)
  77. data3 = knn.predict(x_train).reshape(-1, 1)
  78. data_level1 = pd.DataFrame(data1, columns=['nb'])
  79. data_level1['svc'] = data2
  80. data_level1['knn'] = data3
  81. data_level1['real'] = y_train
  82. # print(data_level1)
  83. # 第二层
  84. X1 = data_level1[['nb', 'svc', 'knn']]
  85. y1 = data_level1['real']
  86. x_train1 = X1
  87. y_train1 = y1
  88. nn.fit(x_train1, y_train1)
  89. data11 = nn.predict(x_train1).reshape(-1, 1)
  90. data_level2 = pd.DataFrame(data11, columns=['nn'])
  91. gb.fit(x_train1, y_train1)
  92. data22 = gb.predict(x_train1).reshape(-1, 1)
  93. data_level2['gb'] = data22
  94. lr.fit(x_train1, y_train1)
  95. data33 = lr.predict(x_train1).reshape(-1, 1)
  96. data_level2['lr'] = data33
  97. data_level2['real2'] = y_train1
  98. # print(data_level2)
  99. # 第三层
  100. X2 = data_level2[['nn', 'gb', 'lr']]
  101. y2 = data_level2[['real2']]
  102. x_train2 = X2
  103. y_train2 = y2
  104. rf.fit(x_train2, y_train2)
  105. print("最强集成算法的测试集上准确率:", rf.score(x_train2, y_train2))
  106. if __name__ == '__main__':
  107. iris = load_iris()
  108. X = iris.data
  109. y = iris.target
  110. # model1(X, y)
  111. RandomForestBagging(X, y) # 随机森林算法
  112. GardientBoosting(X, y) # 梯度提升算法
  113. AdaBoosting(X, y) # 自适应提升算法
  114. x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
  115. # 训练集上的准确率
  116. selfsuanfa(x_train, y_train) # 就这个算法,求老铁补充一下

发表评论

表情:
评论列表 (有 0 条评论,242人围观)

还没有评论,来说两句吧...

相关阅读

    相关 随机森林梯度提升

    提升树模型: 提升方法实际采用加法模型(即基函数的线形组合)与前向分步算法。以决策树为基函数的提升方法称为提升树。对分类问题决策树是二叉分类树。提升树模型可以表示为决策树的加

    相关 随机森林梯度提升

    提升树模型: 提升方法实际采用加法模型(即基函数的线形组合)与前向分步算法。以决策树为基函数的提升方法称为提升树。对分类问题决策树是二叉分类树。提升树模型可以表示为决策树的加