用 Python 预测今后的北京高考人数
- # coding: utf-8
-
- # 作者:Wizard <github.com/wizardforcel>
- # 预测今后的北京高考人数
- # 假设 x 年的出生人数和 (x + 18) 年的高考人数是线性关系
-
- import numpy as np
- from matplotlib import pyplot as plt
-
- def unary_linear_fit(x, y):
-
- assert(x.ndim == 1 and y.ndim == 1 and len(x) == len(y))
-
- cov_x_y = np.mean((x - x.mean()) * (y - y.mean()))
-
- k = cov_x_y / x.var()
-
- b = y.mean() - k * x.mean()
-
- return k, b
-
-
- def r_square(y, y_hat):
-
- assert(y_hat.ndim == 1 and y.ndim == 1 and len(y_hat) == len(y))
-
- return 1 - np.sum((y - y_hat) ** 2) / np.sum((y - y.mean()) ** 2)
-
- # 1988 ~ 2015 年的出生人数(万人)数据
- x = np.asarray([15.3, 19, 14, 9.2, 8.3, 7.9, 8.5, 8.5, 7.8, 7.6, 6.7, 6.3, 7.2, \
- 6, 6, 4.5, 6.6, 7.7, 7.7, 9.9, 10.6, 10.9, 10.2, 12.5, 14.5, 13.6, 17.2, 12.3])
-
- # 考试人数从 2006 年开始统计
- y_train = np.asarray([12.6, 12.5, 11.8, 10.1, 8.1, 7.6, 7.3, 7.27, 7.05, 6.8, 6.12, 6])
-
- x_train = x[:len(y_train)]
- x_pred = x[len(y_train):]
-
- k, b = unary_linear_fit(x_train, y_train)
-
- y_train_hat = k * x_train + b
-
- rsq = r_square(y_train, y_train_hat)
- print rsq
- # 0.870773027191
- # 拟合度相当高
-
- y_pred = k * x_pred + b
-
-
- years = np.arange(1988, 2015 + 1) + 18
- yr_train = years[:len(y_train)]
- yr_pred = years[len(y_train):]
-
- plt.plot(yr_train, y_train, 'b', label='history value')
- plt.plot(yr_pred, y_pred, 'r', label='prediction value')
-
- plt.title('Beijing people for CEE')
- plt.xlabel('year')
- plt.ylabel('population / 10e4')
- plt.legend(loc='best')
- plt.show()