ICode9

精准搜索请尝试: 精确搜索
首页 > 其他分享> 文章详细

单因子——数据Wind

2019-08-16 17:00:58  阅读:837  来源: 互联网

标签:df 数据 Price Daily 因子 Adjust iloc data Wind


读取数据

import time
date = time.strftime("%Y-%m-%d") 
date = date.replace("'", "")
from WindPy import *
import pandas as pd
import numpy as np
from pymongo import MongoClient
w.start()
w.isconnected()
import time
from concurrent.futures import ProcessPoolExecutor,ThreadPoolExecutor, as_completed
import json

连接数据库

host ='xxx.xxx.x.xx'
port='27017'
user='xxxx'
code='utf8utf8'
uri= "mongodb://"+user+":"+code+"@"+host+":"+port+"/"+"?authMechanism=SCRAM-SHA-1"
client=MongoClient(uri)
db = client['CN_Stock']
collection = db['Wind_Close_Price']

获取Wind中A股公司列表

A_list = w.wset("sectorconstituent","date=2019-07-30;sectorid=a001010100000000",usedf=True)
A_list = A_list[1]#取dataframe部分

多线程

df = pd.DataFrame(columns = ['wind_code','close','volume','amt','total_shares'] )
df['wind_code'] = A_list['wind_code']
df_list = A_list.copy()
#定义多线程函数
def applyParallel(dfGrouped, func):
        ppe = ThreadPoolExecutor(10)#设置线程池中最多能同时运行的线程数目
        futures = []
        results = []
        for name, group in dfGrouped:
            p = ppe.submit(func, group)#提交函数和变量,立即返回
            futures.append(p)
        for future in as_completed(futures):
            r = future.result()
            results.append(r)
        return pd.concat(results)#合并表
#定义获取股票价格函数
def get_price(df_list):
    stk = df_list['wind_code'].iloc[0]
    collection = db['Wind_Data_Raw_Price_Daily']
    Adjust_Price_Daily = pd.DataFrame()
    try:
        data2 = w.wsd(stk, "close,industry_csrc12_n", "2018-01-10", "2019-08-08","industryType=1;PriceAdj=F",usedf=True)
        data2 = data2[1]#前复权价格
        data2['wind_code']= stk
        Adjust_Price_Daily = pd.concat([Adjust_Price_Daily,data2])       
        time.sleep(0.5)
        print("done2_"+stk)
    except:
        print('error_no_record'+stk)
        df['state']= 'error_no_record'
    return Adjust_Price_Daily

df_list['rank'] = df_list.index
t1=time.time()
Adjust_Price_Daily_temp = applyParallel(df_list.groupby('rank'), get_price)
t2=time.time()
print(t2-t1)
Adjust_Price_Daily.index = range(len(Adjust_Price_Daily))
#某只股票的价格走势图
Adjust_Price_Daily[(Adjust_Price_Daily['wind_code'] == '000001.SZ')].plot(y=['CLOSE'])
collection.drop()
collection.insert_many(json.loads(Adjust_Price_Daily.T.to_json()).values())

清洗数据,标准化

Adjust_Price_Daily['date'] = Adjust_Price_Daily.index
Adjust_Price_Daily.dropna(axis=0, how='any', inplace=True)#去掉有空值的行
Adjust_Price = Adjust_Price_Daily.pivot( index='date',values='CLOSE', columns='wind_code')#将SQL格式转化为DataFrame格式
Adjust_Price.dropna(axis=1, how='any', inplace=True)
df_r = Adjust_Price.iloc[:,:]/Adjust_Price.iloc[:,:].shift(1)-1#获得每只股票每天的回报率
df_m = Adjust_Price.iloc/Adjust_Price.iloc.shift(30)#获得股票30天动量
df_m.dropna(axis=0, how='any', inplace=True)
df_r.dropna(axis=0, how='any', inplace=True)

将动量作为因子

data_all = pd.DataFrame()

for i in range(len(df_m)):
    print (i)
    df_1= df_m.iloc[i:i+1,:]
    df_1 = df_1.T
    df_1.columns = ['m_fz']
    q0,q1,q2,q3,q4,q5 = df_1['m_fz'].quantile([0,0.2,0.4,0.6,0.8,1.0])#获取分位点
    bins = [q0-1,q1, q2, q3, q4, q5+1]           
    labels=['1', '2', '3', '4', '5']
    df_1 = pd.cut(df_1.m_fz, bins,right=True,labels=labels)      #将股票分为五组
    df_2= df_r.iloc[i+15:i+16,:]
    df_2 = df_2.T
    df = pd.concat([df_1,df_2],axis=1)#获得一个df,有组别和return值
    data = df.groupby('m_fz').mean()
    #加入5-1,4-2
    data_51 = data.iloc[4]-data.iloc[0]
    data_42 = data.iloc[3]-data.iloc[1]
    data = data.append(data_51,ignore_index=True)
    data = data.append(data_42,ignore_index=True)      
    data_all = pd.concat([data_all,data],axis = 1)

data_all.index = ['1','2','3','4','5','5-1','4-2']   
data_all = data_all.T

pic = (1+data_all).cumprod().plot()#画图

接下来以vol波动率为因子

df_vol = df_r.rolling(30).std()*(252**0.5)#年化利率
df_vol.dropna(axis=0, how='any', inplace=True)

data_all = pd.DataFrame()
for i in range(len(df_vol)):
    print (i)
    df_1= df_vol.iloc[i+15:i+16,:]
    df_1 = df_1.T
    df_1.columns = ['v_fz']
    q0,q1,q2,q3,q4,q5 = df_1['v_fz'].quantile([0,0.2,0.4,0.6,0.8,1.0])
    bins = [q0-1,q1, q2, q3, q4, q5+1]           
    labels=['1', '2', '3', '4', '5']
    df_1 = pd.cut(df_1.v_fz, bins,right=True,labels=labels)      
    df_2= df_r.iloc[i:i+1,:]
    df_2 = df_2.T
    df = pd.concat([df_1,df_2],axis=1)
    data = df.groupby('v_fz').mean()
    data_51 = data.iloc[4]-data.iloc[0]
    data_42 = data.iloc[3]-data.iloc[1]
    data = data.append(data_51,ignore_index=True)
    data = data.append(data_42,ignore_index=True)      
    data_all = pd.concat([data_all,data],axis = 1)
    
data_all = data_all.T
data_all.columns = ['1','2','3','4','5','5-1','4-2']
data_all.head()

(1+data_all).cumprod().plot()#画图

加入行业对冲

df_Industry = pd.read_excel('首先有一个行业列表的excel.xlsx',encoding='utf-8')#读取行业,当然这个也是wind下的
df_Industry.columns=['wind_code','Industry']
df_Industry.dropna(axis=0, how='any', inplace=True)
#result = df_Industry.values.tolist()
Adjust_Price_Daily['date'] = Adjust_Price_Daily.index
Adjust_Price_Daily = pd.merge(Adjust_Price_Daily,df_Industry,on='wind_code')
Adjust_Price_Daily.dropna(axis=0, how='any', inplace=True)
#获取行业list
Industry = df_Industry['Industry'].unique() 
Industry = Industry.tolist()
#根据行业分组,选取每个行业表现最好的一组
data_all = pd.DataFrame()
data_al = pd.DataFrame()
for ind in Industry:
    print(ind)
    df = Adjust_Price_Daily[Adjust_Price_Daily['Industry']==ind]
    df = df.pivot( index='date',values='CLOSE', columns='wind_code')
    df.dropna(axis=1, how='any', inplace=True)
    df_r = df.iloc[:,:]/df.iloc[:,:].shift(1)-1
    df_m = df/df.shift(30)
    df_m.dropna(axis=0, how='any', inplace=True)
    df_r.dropna(axis=0, how='any', inplace=True)
    for i in range(len(df_m)):
        print (i)
        df_1= df_m.iloc[i:i+1,:]
        df_1 = df_1.T
        df_1.columns = ['m_fz']
        if len(df_1)<5:
            data = df_r.max
        else:            
            q0,q1,q2,q3,q4,q5 = df_1['m_fz'].quantile([0,0.2,0.4,0.6,0.8,1.0])
            bins = [q0-1,q1, q2, q3, q4, q5+1]           
            labels=['1', '2', '3', '4', '5']
            df_1 = pd.cut(df_1['m_fz'], bins,right=True,labels=labels)      
            df_2= df_r.iloc[i+15:i+16,:]
            df_2 = df_2.T
            df_3 = pd.concat([df_1,df_2],axis=1)
            data = df_3.groupby('m_fz').mean()
            data = data.iloc[4]
            data_al = pd.concat([data_all,data],axis=1)
        data_all = pd.concat([data_all,data_al],axis=1)
    
data_all.index = ['1','2','3','4','5','5-1','4-2']   
data_all = data_all.T
(1+data_all).cumprod().plot()

标签:df,数据,Price,Daily,因子,Adjust,iloc,data,Wind
来源: https://blog.csdn.net/weixin_42659943/article/details/99681117

本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享;
2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关;
3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关;
4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除;
5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

专注分享技术,共同学习,共同进步。侵权联系[81616952@qq.com]

Copyright (C)ICode9.com, All Rights Reserved.

ICode9版权所有