机器学习-逻辑回归算法推理+代码复现 ゝ一世哀愁。 2023-02-15 05:24 42阅读 0赞 # 1、算法推理: # 这里算法推理和代码复现原型参考:[https://blog.csdn.net/zouxy09/article/details/20319673][https_blog.csdn.net_zouxy09_article_details_20319673] # 2、算法应用背景-课题设计: # 企业财务风险预警是企业风险预警系统的一个重要组成部分,它能有效的预知部分财务风险。本课题将风险公司记为ST,非风险公司记为非ST,ST判断标准如下: 1.)连续两年年报显示净利润为负值 2)净资产收益率、总资产净利润率为负值。 其影响特征变量如下: ![watermark_type_ZmFuZ3poZW5naGVpdGk_shadow_10_text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0xpbnV4X2Jpbg_size_16_color_FFFFFF_t_70][] 注: 现金比率= 货币资金÷ 流动负债; 盈利现金比率= 经营活动的现金净流量÷ 净利润; 主营业务鲜明程度= 主营业务利润÷ |净利润|; 请利用2018年的财务指标相关数据,构建基于BP神经网络的财务风险预警模型。 # 3、代码复现 # ## 3.1 train & predict ## from numpy import * import matplotlib.pyplot as plt import time import numpy as np from data import Dataset import pickle # calculate the sigmoid function #算法推理来自于:https://blog.csdn.net/zouxy09/article/details/20319673 #commit code to https://github.com/Linby1103/Pre-warning.git class LR(object): def __init__(self,train_data,label_): self.traindata=train_data self.label=label_ def sigmoid(self,z): return 1.0 / (1 + exp(-z)) def train(self,opts,weight_path="weights.pkl"): # train a logistic regression model using some optional optimize algorithm # traindata is a mat datatype, each row stands for one sample #label is mat datatype too, each row is the corresponding label #opts is optimize option include step and maximum number of iterations # calculate training time startTime = time.time() numSamples, numFeatures = shape(self.traindata) alpha = opts['alpha']; maxIter = opts['maxIter'] weights = ones((numFeatures, 1)) # optimize through gradient descent algorilthm for k in range(maxIter): if opts['optimizeType'] == 'gradDescent': # gradient descent algorilthm output = self.sigmoid(self.traindata * weights) error = self.label - output weights = weights + alpha * self.traindata.transpose() * error # (y-err)*x BP elif opts['optimizeType'] == 'stocGradDescent': # stochastic gradient descent for i in range(numSamples): output = self.sigmoid(self.traindata[i, :] * weights)#注意这traindata是转化为mat类型的数据,所以这里用* 和用dot做矩阵的乘法是一样的 error = self.label[i, 0] - output weights = weights + alpha * self.traindata[i, :].transpose() * error elif opts['optimizeType'] == 'smoothStocGradDescent': # smooth stochastic gradient descent # randomly select samples to optimize for reducing cycle fluctuations dataIndex = range(numSamples) for i in range(numSamples): alpha = 4.0 / (1.0 + k + i) + 0.01 randIndex = int(random.uniform(0, len(dataIndex))) output = self.sigmoid(self.traindata[randIndex, :] * weights) error = self.label[randIndex, 0] - output weights = weights + alpha * self.traindata[randIndex, :].transpose() * error del (dataIndex[randIndex]) # during one interation, delete the optimized sample else: raise NameError('Not support optimize method type!') print('training finished! %fs!' % (time.time() - startTime)) try: pickf = open(weight_path, 'wb') print("Save weight to {}".format(weight_path)) data = {"weight": weights} pickle.dump(data, pickf) except Exception as e: print("Write weight to {} fail! {}".format(pickf,e)) pickf.close() finally: pickf.close() # test trained Logistic Regression model given test set def predict(self,weights, test_x, test_y): numSamples, numFeatures = shape(test_x) matchCount = 0 result=[] for i in range(numSamples): prob = 1 if self.sigmoid(test_x[i, :] * weights)[0, 0] > 0.5 else 0 result.append(prob) if prob == bool(test_y[i, 0]): matchCount += 1 accuracy = float(matchCount) / numSamples print("accuracy={}".format(accuracy)) return result def load_weight(self,path): weight={} try: weight = pickle.load(open(path,'rb'))["weight"] print(weight.shape) except Exception as e: print("Load weight faild! {}".format(e)) finally: return weight def loadData(): train_x = [] train_y = [] fileIn = open('./data/testdata.txt') for line in fileIn.readlines(): lineArr = line.strip().split() train_x.append([1.0, float(lineArr[0]), float(lineArr[1])]) train_y.append(float(lineArr[2])) return mat(train_x), mat(train_y).transpose() if __name__=="__main__": STATUS = ['非ST','ST'] save_model="weights.pkl" train, label = Dataset.GetDatafromDict() trainarrayx=mat(train) labelarray=mat(label).transpose() logistreg = LR(trainarrayx,labelarray) # print(trainarrayx.shape,labelarray.shape) opts={"alpha":0.001,"maxIter":200,"optimizeType":"gradDescent"} logistreg.train(opts,save_model) weights=logistreg.load_weight(save_model) res=logistreg.predict(weights,trainarrayx,labelarray) for i in range(len(res)): print("预测结果 :{}---原始数据:{}".format(STATUS[res[i]], STATUS[label[i]])) ## 3.2 Create dataset-数据来源于excel ## import xlrd import os import numpy as np class ExcelReade(object): def __init__(self, excel_name, sheet_name): """ # 我把excel放在工程包的当前文件夹中: # 1.需要先获取到工程文件的地址 # 2.再找到excel的文件地址(比写死的绝对路径灵活) os.path.relpath(__file__) 1.根据系统获取绝对路径 2.会根据电脑系统自动匹配路径:mac路径用/,windows路径用\ 3.直接使用__file__方法是不会自动适配环境的 """ self.excel_path = excel_name # 打开指定的excel文件 self.date = xlrd.open_workbook(self.excel_path) # 找到指定的sheet页 self.table = self.date.sheet_by_name(sheet_name) self.rows = self.table.nrows # 获取总行数 self.cols = self.table.ncols # 获取总列数 def data_dict(self): if self.rows <= 1: print("总行数小于等于1,路径:", end='') print(self.excel_path) return False else: # 将列表的第一行设置为字典的key值 keys = self.table.row_values(0) # 定义一个数组 data = [] # 从第二行开始读取数据,循环rows(总行数)-1次 for i in range(1, self.rows): # 循环内定义一个字典,每次循环都会清空 dict = {} # 从第一列开始读取数据,循环cols(总列数)次 for j in range(0, self.cols): # 将value值关联同一列的key值 dict[keys[j]] = self.table.row_values(i)[j] if self.table.row_values(i)[j] != "NULL" else "0" # 将关联后的字典放到数组里 data.append(dict) return data #Stkcd 净利润2017 净利润2018 净资产收益率 总资产净利润率 x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 def GetDatafromDict(): start = ExcelReade('./data/dataset.xlsx', 'Sheet1') data = start.data_dict() train=[] label=[] for i in range(len(data)): array=[] array.append(float(data[i]['净利润2017'])) array.append(float(data[i]['净利润2018'])) array.append(float(data[i]['净资产收益率'])) array.append(float(data[i]['总资产净利润率'])) array.append(float(data[i]['x1'])) array.append(float(data[i]['x2'])) array.append(float(data[i]['x3'])) array.append(float(data[i]['x4'])) array.append(float(data[i]['x5'])) array.append(float(data[i]['x6'])) array.append(float(data[i]['x7'])) array.append(float(data[i]['x8'])) array.append(float(data[i]['x9'])) array.append(float(data[i]['x10'])) array.append(float(data[i]['x11'])) array.append(float(data[i]['x12'])) array.append(float(data[i]['x13'])) array.append(float(data[i]['x14'])) train.append(array) if float(data[i]['净利润2017'])*float(data[i]['净利润2018'])<0: label.append(0) else: label.append(1) return np.array(train) ,np.array(label) if __name__ == '__main__': start = ExcelReade('C:/Users/91324/Desktop/datasset.xlsx','Sheet1') data = start.data_dict() GetDatafromDict(data) 数据集,代码上传至:[https://github.com/Linby1103/Pre-warning.git][https_github.com_Linby1103_Pre-warning.git] [https_blog.csdn.net_zouxy09_article_details_20319673]: https://blog.csdn.net/zouxy09/article/details/20319673 [watermark_type_ZmFuZ3poZW5naGVpdGk_shadow_10_text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0xpbnV4X2Jpbg_size_16_color_FFFFFF_t_70]: https://img-blog.csdnimg.cn/20200604170633765.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0xpbnV4X2Jpbg==,size_16,color_FFFFFF,t_70 [https_github.com_Linby1103_Pre-warning.git]: https://github.com/Linby1103/Pre-warning.git
还没有评论,来说两句吧...