我是靠谱客的博主 俏皮含羞草,这篇文章主要介绍第八章,现在分享给大家,希望可以做个参考。

复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#例题 import pandas as pd import numpy as np data=pd.read_csv("C:\data\data8.csv",encoding='gbk') np.round(data.corr(method='pearson'),2) from sklearn.linear_model import Lasso lasso=Lasso(1000).fit(data.iloc[:,0:13],data['y']) np.round(lasso.coef_,5) np.sum(lasso.coef_!=0) mask=lasso.coef_!=0 new_reg_data=data.iloc[:,mask] new_reg_data.to_csv('C:\data\new_reg_data.csv') print(new_reg_data.shape) def GM11(x0): #自定义灰色预测函数 import numpy as np x1 = x0.cumsum() #1-AGO序列 z1 = (x1[:len(x1)-1] + x1[1:])/2.0 #紧邻均值(MEAN)生成序列 z1 = z1.reshape((len(z1),1)) B = np.append(-z1, np.ones_like(z1), axis = 1) Yn = x0[1:].reshape((len(x0)-1, 1)) [[a],[b]] = np.dot(np.dot(np.linalg.inv(np.dot(B.T, B)), B.T), Yn) #计算参数 f = lambda k: (x0[0]-b/a)*np.exp(-a*(k-1))-(x0[0]-b/a)*np.exp(-a*(k-2)) #还原值 delta = np.abs(x0 - np.array([f(i) for i in range(1,len(x0)+1)])) C = delta.std()/x0.std() P = 1.0*(np.abs(delta - delta.mean()) < 0.6745*x0.std()).sum()/len(x0) return f, a, b, x0[0], C, P #返回灰色预测函数、a、b、首项、方差比、小残差概率 new_reg_data=pd.read_csv('C:\data\new_reg_data.csv') data=pd.read_csv("C:\data\data8.csv",encoding='gbk') new_reg_data.index=range(1994,2014) new_reg_data.loc[2014]=None new_reg_data.loc[2015]=None l=['x1','x3','x4','x5','x6','x7','x8','x13'] for i in l: f=GM11(new_reg_data.loc[range(1994,2014),i].as_matrix())[0] new_reg_data.loc[2014,i]=f(len(new_reg_data)-1) new_reg_data.loc[2015,i]=f(len(new_reg_data)) new_reg_data[i]=new_reg_data[i].round(2) y=list(data['y'].values) y.extend([np.nan,np.nan]) new_reg_data['y']=y new_reg_data.to_excel('C:\data\new_reg_data_GM11.xlsx') import pandas as pd import numpy as np from sklearn.svm import LinearSVR import matplotlib.pyplot as plt from sklearn.metrics import explained_variance_score,mean_absolute_error,mean_squared_error, median_absolute_error,r2_score data=pd.read_excel('C:\data\new_reg_data_GM11.xlsx') feature=['x1','x3','x4','x5','x6','x7','x8','x13'] data_train=data.loc[range(1994,2014)].copy() data_mean=data_train.mean() data_std=data_train.std() data_train=(data_train-data_mean)/data_std x_train=data_train[feature].as_matrix() y_train=data_train['y'].as_matrix() linearsvr=LinearSVR().fit(x_train,y_train) x=((data[feature]-data_mean[feature])/data_std[feature]).as_matrix() data[u'y_pred']=linearsvr.predict(x)*data_std['y']+data_mean['y'] data.to_excel('C:\data\new_reg_data_GM11_revenue.xlsx') print(data[['y','y_pred']]) print(data[['y','y_pred']].plot(subplots=True,style=['b-o','r-*'],xticks=data.index[::2]))
复制代码
1
2
3
4
5
#操作题 import pandas as pd data=pd.read_csv("C:\data\data88.csv",encoding='gbk') np.round(data.corr(method='pearson'),2)
复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#实训1 import pandas as pd import numpy as np data=pd.read_csv("C:\data\income_tax.csv",encoding='gbk') np.round(data.corr(method='pearson'),2) #实训2 from sklearn.linear_model import Lasso data.index=data['year'] data=data.drop(['year'],axis=1) lasso=Lasso(1000).fit(data.iloc[:,0:10],data['y']) mask=lasso.coef_!=0 new_data=data.iloc[:,mask] #实训3 def GM11(x0): #自定义灰色预测函数 import numpy as np x1 = x0.cumsum() #1-AGO序列 z1 = (x1[:len(x1)-1] + x1[1:])/2.0 #紧邻均值(MEAN)生成序列 z1 = z1.reshape((len(z1),1)) B = np.append(-z1, np.ones_like(z1), axis = 1) Yn = x0[1:].reshape((len(x0)-1, 1)) [[a],[b]] = np.dot(np.dot(np.linalg.inv(np.dot(B.T, B)), B.T), Yn) #计算参数 f = lambda k: (x0[0]-b/a)*np.exp(-a*(k-1))-(x0[0]-b/a)*np.exp(-a*(k-2)) #还原值 delta = np.abs(x0 - np.array([f(i) for i in range(1,len(x0)+1)])) C = delta.std()/x0.std() P = 1.0*(np.abs(delta - delta.mean()) < 0.6745*x0.std()).sum()/len(x0) return f, a, b, x0[0], C, P #返回灰色预测函数、a、b、首项、方差比、小残差概率 new_data.loc[2016]=None new_data.loc[2017]=None feature=['x1','x2','x3','x4','x5','x6','x7','x8','x9','x10'] for i in feature: f=GM11(new_data.loc[range(2004,2016),i].as_matrix())[0] new_data.loc[2016,i]=f(len(new_data)-1) new_data.loc[2017,i]=f(len(new_data)) new_data[i]=new_data[i].round(2) y=list(data['y'].values) y.extend([np.nan,np.nan]) new_data['y']=y data_train=data.loc[range(2004,2016)].copy() data_mean=data_train.mean() data_std=data_train.std() data_train=(data_train-data_mean)/data_std x_train=data_train[feature].as_matrix() y_train=data_train['y'].as_matrix() from sklearn.svm import LinearSVR linearsvr=LinearSVR().fit(x_train,y_train) x=((data[feature]-data_mean[feature])/data_std[feature]).as_matrix() data[u'y_pred']=linearsvr.predict(x)*data_std['y']+data_mean['y'] print(data[['y','y_pred']]) print(data[['y','y_pred']].plot(subplots=True,style=['b-o','r-*'],xticks=data.index[::2])) ```cpp # 实训1 import numpy as np import pandas as pd data=pd.read_csv("C:\data\income_tax.csv",encoding='gbk') data = data.iloc[:-2,1:] #求取原始数据特征之间的Pearson相关系数 Pearson = np.round(data.corr(method='pearson'),2) # 实训2 # 使用Lasso回归方法进行关键特征选取 from sklearn.linear_model import Lasso lasso = Lasso(10000,random_state=12)#1000是最大循环次数,调用Lasso函数 lasso.fit(data.iloc[:,0:10],data['y']) print('相关系数为:n',np.round(lasso.coef_,5)) #计算相关系数非零的个数 print('相关系数非零个数为:',np.sum(lasso.coef_ != 0)) #返回一个相关系数是否为零的布尔数组 mask = lasso.coef_ != 0 print('相关系数是否为零:n',mask) #将关键特征进行提取 new_data = data.iloc[:,mask] print('新数据的维度为:',new_data.shape) # 实训3 # (1) 使用灰色预测模型对个各特征在2014年和2015年的值进行预测 def GM11(x0): #自定义灰色预测函数 import numpy as np x1 = x0.cumsum() #1-AGO序列 z1 = (x1[:len(x1)-1] + x1[1:])/2.0 #紧邻均值(MEAN)生成序列 z1 = z1.reshape((len(z1),1)) B = np.append(-z1, np.ones_like(z1), axis = 1) Yn = x0[1:].reshape((len(x0)-1, 1)) [[a],[b]] = np.dot(np.dot(np.linalg.inv(np.dot(B.T, B)), B.T), Yn) #计算参数 f = lambda k: (x0[0]-b/a)*np.exp(-a*(k-1))-(x0[0]-b/a)*np.exp(-a*(k-2)) #还原值 delta = np.abs(x0 - np.array([f(i) for i in range(1,len(x0)+1)])) C = delta.std()/x0.std() P = 1.0*(np.abs(delta - delta.mean()) < 0.6745*x0.std()).sum()/len(x0) return f, a, b, x0[0], C, P #返回灰色预测函数、a、b、首项、方差比、小残差概率 new_data.index = range(2004,2014) new_data.loc[2014] = None new_data.loc[2015] = None l = ['x1','x2','x3','x4','x5','x7','x8','x9','x10'] for i in l: f = GM11(new_data.loc[range(2004,2014),i].as_matrix())[0] new_data.loc[2014,i] = f(len(new_data)-1) new_data.loc[2015,i] = f(len(new_data)) new_data[i] = new_data[i].round(2) y = list(data['y'].values) y.extend([np.nan,np.nan]) new_data['y'] = y print('2014年和2015年预测结果为:n',new_data.loc[2014:2015,:]) # (2) 建立支持向量机回归预测模型 from sklearn.svm import LinearSVR from sklearn.metrics import explained_variance_score,mean_absolute_error, median_absolute_error,r2_score feature = ['x1','x2','x3','x4','x5','x7','x8','x9','x10'] data_train = new_data.loc[range(2004,2014)].copy() data_mean = data_train.mean() data_std = data_train.std() data_train = (data_train - data_mean)/data_std#数据标准化 x_train = data_train[feature].as_matrix()#特征数据 #x_trains = data_train[feature].values y_train = data_train['y'].as_matrix()#标签数据 linearsvr = LinearSVR()#调用函数 linearsvr.fit(x_train,y_train)#训练模型 x = ((new_data[feature] - data_mean[feature])/data_std[feature]).as_matrix() new_data[u'y_pred'] = linearsvr.predict(x)*data_std['y']+data_mean['y'] print('真实值与预测值分别为:n',new_data[['y','y_pred']]) # (3) 对上述建立的企业所得税预测模型进行评价 data_y = new_data['y'].iloc[:-2] data_y_pred = new_data['y_pred'].iloc[:-2] print('平均绝对误差为:',mean_absolute_error(data_y,data_y_pred)) print('中值绝对误差为:',median_absolute_error(data_y,data_y_pred)) print('可解释方差值为:',explained_variance_score(data_y,data_y_pred)) print('R2值为:',r2_score(data_y,data_y_pred))

最后

以上就是俏皮含羞草最近收集整理的关于第八章的全部内容,更多相关第八章内容请搜索靠谱客的其他文章。

本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
点赞(99)

评论列表共有 0 条评论

立即
投稿
返回
顶部