机器学习策略——基于逻辑回归的股市趋势预测(及SVM策略)

%matplotlib inline

import seaborn

import matplotlib as mpl

mpl.rcParams['font.family'] = 'serif'

import warnings; warnings.simplefilter('ignore') #忽略可能会出现的警告信息,警告并不是错误,可以忽略;

1. 数据获取

import pandas as pd

import numpy as np

import tushare as ts

hs300 = ts.get_k_data('hs300', start = '2013-01-01', end = '2016-06-25') #训练集数据

hs300.set_index('date', inplace = True)

hs300.set_index('date', inplace = True)

hs300['returns'] = hs300['close'].pct_change()

hs300.dropna(inplace=True) #有NaN值的话会影响机器学习算法;

2. 数据处理——特征工程处理

#特征的生成;

for i in range(1, 8, 1):

    hs300['close - ' + str(i) + 'd'] = hs300['close'].shift(i)

hs_7d = hs300[[x for x in hs300.columns if 'close' in x]].iloc[7:] #从第八行开始没有NaN地方开始取值作为features

import sklearn

from sklearn import linear_model

X_train = hs_7d

X_train = sklearn.preprocessing.scale(X_train)    #对features进行标准化;

X_train

3. 逻辑回归预测股价趋势算法实现

lm = linear_model.LogisticRegression(C=1000)

# 计算出训练集的labels;

y_train = np.sign(hs_7d['close'].pct_change().shift(-1))      #非常重要;拿到下一天的收益,用.shift(-1)

y_train.replace(to_replace= np.NaN, value = 0,inplace = True)

y_train = y_train.reshape(-1,1)

y_train[-10:]

lm.fit(X_train, y_train)

lm.score(X_train, y_train)

hs300['prediction'] = np.NaN

hs300['prediction'].ix[7:] = lm.predict(X_train)    #返回的是我预测的训练集的labels;

hs300['strategy'] = (hs300['prediction'].shift(1) * hs300['returns'] + 1).cumprod()

hs300['cum_ret'] = (hs300['returns']+1).cumprod()

hs300[['strategy','cum_ret']].dropna().plot(figsize=(10, 6))

机器学习策略——基于逻辑回归的股市趋势预测(及SVM策略)_第1张图片

4. 改变算法:SVM

X_train = hs_7d

from sklearn.svm import SVC

clf_SVC = SVC(kernel = 'linear')

clf_SVC.fit(X_train, y_train)

clf_SVC.score(X_train, y_train)

hs300['prediction'] = np.NaN

hs300['prediction'].ix[7:] = clf_SVC.predict(X_train)

hs300['strategy'] = (hs300['prediction'].shift(1) * hs300['returns'] + 1).cumprod()

hs300['cum_ret'] = (hs300['returns']+1).cumprod()

hs300[['strategy','cum_ret']].dropna().plot(figsize=(10, 6))

机器学习策略——基于逻辑回归的股市趋势预测(及SVM策略)_第2张图片

5. 逻辑回归算法在测试集的验证

hs300_test = ts.get_k_data('hs300', start = '2016-07-01', end = '2017-06-30') #测试集数据

hs300_test.set_index('date', inplace = True)

hs300_test['returns'] = hs300_test['close'].pct_change()

hs300_test.dropna(inplace=True)

for i in range(1, 8, 1):

    hs300_test['close - ' + str(i) + 'd'] = hs300_test['close'].shift(i)

hs_7d_test = hs300_test[[x for x in hs300_test.columns if 'close' in x]].iloc[7:]

X_test = hs_7d_test

X_test = sklearn.preprocessing.scale(X_test)

X_test

hs300_test['prediction'] = np.NaN

hs300_test['prediction'].ix[7:] = lm.predict(X_test)    #给你测试集的features,返回的是预测的测试集的labels

hs300_test['strategy'] = (hs300_test['prediction'].shift(1) * hs300_test['returns'] + 1).cumprod()

hs300_test['cum_ret'] = (hs300_test['returns']+1).cumprod()

hs300_test[['strategy','cum_ret']].dropna().plot(figsize=(10, 6))

机器学习策略——基于逻辑回归的股市趋势预测(及SVM策略)_第3张图片

6.SVM算法在测试集的验证

X_test = hs_7d_test

hs300_test['prediction'] = np.NaN

hs300_test['prediction'].ix[7:] = clf_SVC.predict(X_test)

hs300_test['strategy'] = (hs300_test['prediction'].shift(1) * hs300_test['returns'] + 1).cumprod()

hs300_test['cum_ret'] = (hs300_test['returns']+1).cumprod()

hs300_test[['strategy','cum_ret']].dropna().plot(figsize=(10, 6))

机器学习策略——基于逻辑回归的股市趋势预测(及SVM策略)_第4张图片

你可能感兴趣的:(机器学习策略——基于逻辑回归的股市趋势预测(及SVM策略))