source code
This commit is contained in:
parent
4a5c1569d1
commit
fe44f5b4ff
1023
indicators.py
Normal file
1023
indicators.py
Normal file
File diff suppressed because it is too large
Load Diff
163
marketsim.py
Normal file
163
marketsim.py
Normal file
@ -0,0 +1,163 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Created on Wed Feb 5 21:56:42 2020
|
||||||
|
|
||||||
|
@author: cpan
|
||||||
|
"""
|
||||||
|
|
||||||
|
"""MC2-P1: Market simulator."""
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
# import datetime as dt
|
||||||
|
# import os
|
||||||
|
from util import get_data, plot_data
|
||||||
|
|
||||||
|
def normalize_data(df):
|
||||||
|
return df/df.iloc[0,:]
|
||||||
|
|
||||||
|
def fill_missing_values(df_data):
|
||||||
|
'''First fill forward and then fill backward'''
|
||||||
|
df_data.fillna(method="ffill", inplace=True)
|
||||||
|
df_data.fillna(method="bfill", inplace=True)
|
||||||
|
|
||||||
|
def get_orders(orders_file):
|
||||||
|
if isinstance(orders_file, pd.DataFrame):
|
||||||
|
# orders_df = orders_file.set_index('Date')
|
||||||
|
orders_df = orders_file
|
||||||
|
else:
|
||||||
|
orders_df = pd.read_csv(orders_file, index_col = 'Date', parse_dates = True,
|
||||||
|
na_values = ['nan'])
|
||||||
|
orders_df = orders_df.dropna()
|
||||||
|
orders_df = orders_df.sort_index()
|
||||||
|
return orders_df
|
||||||
|
|
||||||
|
def compute_daily_returns(df):
|
||||||
|
daily_returns = df.copy()
|
||||||
|
daily_returns[1:] = (df[1:] / df[:-1].values) - 1
|
||||||
|
daily_returns.iloc[0, :] = 0
|
||||||
|
return daily_returns
|
||||||
|
|
||||||
|
def compute_portfolio_stats(price, allocs=[0.1,0.2,0,3,0.4], rfr=0.0, sf=252.0):
|
||||||
|
norm_price = normalize_data(price)
|
||||||
|
norm_positions_val = norm_price * allocs
|
||||||
|
if len(norm_positions_val.columns) == 1:
|
||||||
|
norm_portfolio_val = norm_positions_val
|
||||||
|
else:
|
||||||
|
norm_portfolio_val = norm_positions_val.sum(axis=1).to_frame('PORTFOLIO')
|
||||||
|
cr = norm_portfolio_val.iloc[-1] / norm_portfolio_val.iloc[0] -1
|
||||||
|
daily_returns = compute_daily_returns(norm_portfolio_val)
|
||||||
|
daily_returns = daily_returns[1:] # remove first row (all zeros)
|
||||||
|
adr = daily_returns.mean()
|
||||||
|
sddr = daily_returns.std()
|
||||||
|
sr = np.sqrt(sf) * (adr - rfr)/sddr
|
||||||
|
return cr, adr, sddr, sr
|
||||||
|
|
||||||
|
def plot_against_SPY(df):
|
||||||
|
df_temp = df.copy()
|
||||||
|
if 'SPY' not in df_temp.columns:
|
||||||
|
df_SPY = get_data(['SPY'], pd.to_datetime(df_temp.index.values))
|
||||||
|
df_temp['SPY'] = df_SPY.values
|
||||||
|
else:
|
||||||
|
df_SPY = df_temp['SPY']
|
||||||
|
df_temp = normalize_data(df_temp)
|
||||||
|
plot_data(df_temp)
|
||||||
|
return df_SPY
|
||||||
|
|
||||||
|
def compute_portvals(orders_file = "./orders/orders.csv", start_val = 1000000,
|
||||||
|
commission=9.95, impact=0.005):
|
||||||
|
#read in order data
|
||||||
|
orders_df = get_orders(orders_file)
|
||||||
|
#scan symbols
|
||||||
|
symbols = list(set(orders_df['Symbol'].values))
|
||||||
|
#get date range
|
||||||
|
dates = pd.date_range(orders_df.index.values[0], orders_df.index.values[-1])
|
||||||
|
#read in prices
|
||||||
|
prices = get_data(symbols, dates)
|
||||||
|
# fill_missing_values(prices) # included in get_data() function
|
||||||
|
prices = prices[symbols]
|
||||||
|
#add an extra column 'Cash' and initialize it to all ones
|
||||||
|
prices['Cash'] = np.ones(prices.shape[0])
|
||||||
|
|
||||||
|
#duplicate price df into a units df, intialize it to all zeros
|
||||||
|
positions=prices*0.0
|
||||||
|
#initialize cash position with starting value
|
||||||
|
positions.loc[positions.index.values[0],['Cash']]=start_val
|
||||||
|
|
||||||
|
#adjust positions to show how stock units and cash are changing over time with orders
|
||||||
|
for index, row in orders_df.iterrows():
|
||||||
|
stock_sym = row['Symbol']
|
||||||
|
order_price = prices.loc[index, stock_sym]
|
||||||
|
order_shrs = row['Shares']
|
||||||
|
|
||||||
|
if row['Order'].upper() == 'BUY':
|
||||||
|
if positions.loc[index, 'Cash'] < order_shrs*order_price +\
|
||||||
|
commission + order_shrs*order_price*impact:
|
||||||
|
# print('Not enough cash to excute the order:\n', row)
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
#update positions on transaction days
|
||||||
|
positions.loc[index, stock_sym] += order_shrs
|
||||||
|
positions.loc[index, "Cash"] -= order_shrs*order_price
|
||||||
|
#deduct commission
|
||||||
|
positions.loc[index,"Cash"] -= commission
|
||||||
|
#impact = no. of orders in transaction * price of each share * impact.
|
||||||
|
positions.loc[index,"Cash"] -= order_shrs*order_price*impact
|
||||||
|
elif row['Order'].upper() == 'SELL':
|
||||||
|
if positions.loc[index, stock_sym] < order_shrs:
|
||||||
|
# print('Not enough shares to sell to fill the order:\n', row)
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
positions.loc[index, stock_sym] -= order_shrs
|
||||||
|
positions.loc[index, "Cash"] += order_shrs*order_price
|
||||||
|
#deduct commission
|
||||||
|
positions.loc[index,"Cash"] -= commission
|
||||||
|
#impact = no. of orders in transaction * price of each share * impact.
|
||||||
|
positions.loc[index,"Cash"] -= order_shrs*order_price*impact
|
||||||
|
|
||||||
|
# propagate positions beyond transaction days
|
||||||
|
start_row = positions.index.get_loc(index) + 1
|
||||||
|
positions.iloc[start_row:, :] = positions.iloc[start_row-1].values
|
||||||
|
|
||||||
|
#calculate port_vals
|
||||||
|
port_vals=prices*positions
|
||||||
|
port_vals.insert(0, 'Portfolio', port_vals.sum(axis=1))
|
||||||
|
|
||||||
|
return port_vals
|
||||||
|
|
||||||
|
def test_code():
|
||||||
|
of = "./orders/orders-05.csv"
|
||||||
|
sv = 1000000
|
||||||
|
|
||||||
|
# Process orders
|
||||||
|
portvals = compute_portvals(orders_file = of, start_val = sv)
|
||||||
|
if isinstance(portvals, pd.DataFrame):
|
||||||
|
portvals = portvals[portvals.columns[0]].to_frame() # just get the first column
|
||||||
|
else:
|
||||||
|
print("warning, code did not return a DataFrame")
|
||||||
|
|
||||||
|
# Get portfolio stats
|
||||||
|
start_date = pd.to_datetime(portvals.index.values[0])
|
||||||
|
end_date = pd.to_datetime(portvals.index.values[-1])
|
||||||
|
price_SPY = plot_against_SPY(portvals)
|
||||||
|
#portfolio stats calculated similar to assess_portfolio
|
||||||
|
rfr=0
|
||||||
|
sf=252
|
||||||
|
|
||||||
|
cr, adr, sddr, sr = compute_portfolio_stats(portvals, [1.0], rfr, sf)
|
||||||
|
crSP,adrSP,sddrSP,srSP = compute_portfolio_stats(price_SPY, [1.0], rfr, sf)
|
||||||
|
# Compare portfolio against $SPX
|
||||||
|
print("\nDate Range: {} to {}".format(start_date.date(), end_date.date()))
|
||||||
|
print()
|
||||||
|
print("Sharpe Ratio: {}, {}".format(sr, srSP))
|
||||||
|
print()
|
||||||
|
print("Cumulative Return: {}, {}".format(cr, crSP))
|
||||||
|
print()
|
||||||
|
print("Standard Deviation: {}, {}".format(sddr, sddrSP))
|
||||||
|
print()
|
||||||
|
print("Average Daily Return: {}, {}".format(adr, adrSP))
|
||||||
|
print()
|
||||||
|
print("Final Portfolio Value: {:.2f}".format(portvals['Portfolio'].iloc[-1]))
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_code()
|
371
util.py
Normal file
371
util.py
Normal file
@ -0,0 +1,371 @@
|
|||||||
|
"""
|
||||||
|
Use Yahoo Finance data
|
||||||
|
"""
|
||||||
|
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
# Suppress FutureWarnings
|
||||||
|
warnings.simplefilter(action='ignore', category=FutureWarning)
|
||||||
|
|
||||||
|
import datetime as dt
|
||||||
|
import os
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import yfinance as yf
|
||||||
|
import requests
|
||||||
|
from lxml import html
|
||||||
|
from io import StringIO
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
|
WEBSITE = 'https://www.isolo.org/dokuwiki/knowledge_base/investing/watchlist'
|
||||||
|
BATCHSIZE = 20
|
||||||
|
TIMEGAP = 0.2
|
||||||
|
|
||||||
|
def fill_missing_data(df):
|
||||||
|
temp = df.ffill()
|
||||||
|
temp = temp.bfill()
|
||||||
|
return temp
|
||||||
|
|
||||||
|
def symbol_to_path(symbol, base_dir=None):
|
||||||
|
"""Return CSV file path given ticker symbol."""
|
||||||
|
if base_dir is None:
|
||||||
|
base_dir = os.environ.get("MARKET_DATA_DIR", '../data/')
|
||||||
|
return os.path.join(base_dir, "{}.csv".format(str(symbol)))
|
||||||
|
|
||||||
|
def get_data_full(symbols, start_date, addSPY=True, colname = 'Adj Close'):
|
||||||
|
"""
|
||||||
|
Read stock data (adjusted close) for given symbols from Yahoo Finance
|
||||||
|
from start_date to the latest date available (usually the current date).
|
||||||
|
"""
|
||||||
|
if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent
|
||||||
|
symbols = ['SPY'] + symbols
|
||||||
|
|
||||||
|
df = yf.download(symbols, start = start_date)[colname]
|
||||||
|
if len(symbols) == 1:
|
||||||
|
df.name = symbols[0]
|
||||||
|
df = df.to_frame()
|
||||||
|
return df
|
||||||
|
|
||||||
|
# def get_data_full(symbols, start_date, addSPY=True, colname = 'Adj Close'):
|
||||||
|
"""
|
||||||
|
Read stock data (adjusted close) for given symbols from CSV files
|
||||||
|
from start_date to the latest date available in the CSV files.
|
||||||
|
"""
|
||||||
|
# df_temp = pd.read_csv(symbol_to_path('SPY'), index_col='Date',
|
||||||
|
# parse_dates=True, usecols=['Date', colname], na_values=['nan'])
|
||||||
|
# df_temp = df_temp.rename(columns={colname: 'SPY'})
|
||||||
|
# end_date = df_temp.index.values[-1]
|
||||||
|
# dates = pd.date_range(start_date, end_date)
|
||||||
|
# df = pd.DataFrame(index=dates)
|
||||||
|
# df = df.join(df_temp)
|
||||||
|
# df = df.dropna()
|
||||||
|
# # if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent
|
||||||
|
# # symbols = ['SPY'] + symbols
|
||||||
|
# for symbol in symbols:
|
||||||
|
# df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date',
|
||||||
|
# parse_dates=True, usecols=['Date', colname], na_values=['nan'])
|
||||||
|
# df_temp = df_temp.rename(columns={colname: symbol})
|
||||||
|
# df = df.join(df_temp)
|
||||||
|
# # if symbol == 'SPY': # drop dates SPY did not trade
|
||||||
|
# # df = df.dropna(subset=["SPY"])
|
||||||
|
# if not addSPY:
|
||||||
|
# df = df[symbols]
|
||||||
|
# return df
|
||||||
|
|
||||||
|
def get_data_range(df, dates):
|
||||||
|
"""
|
||||||
|
Extract sections of the data in the dates range from the full data set
|
||||||
|
"""
|
||||||
|
df_range = pd.DataFrame(index=dates)
|
||||||
|
df_range = df_range.join(df, how='inner')
|
||||||
|
return df_range
|
||||||
|
|
||||||
|
def get_data(symbols, dates, addSPY=True, colname = 'Adj Close'):
|
||||||
|
"""
|
||||||
|
Read stock data (adjusted close) for given symbols from Yahoo Finance
|
||||||
|
"""
|
||||||
|
org_sym = symbols
|
||||||
|
sd = dates[0]
|
||||||
|
ed = dates[-1]
|
||||||
|
# if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent
|
||||||
|
if 'SPY' not in symbols:
|
||||||
|
symbols = ['SPY'] + symbols
|
||||||
|
df = yf.download(symbols, start=sd, end = ed)[colname]
|
||||||
|
if len(symbols) == 1:
|
||||||
|
df.name = symbols[0]
|
||||||
|
df = df.to_frame()
|
||||||
|
|
||||||
|
df = df.dropna(subset=['SPY'])
|
||||||
|
df = fill_missing_data(df)
|
||||||
|
|
||||||
|
if addSPY==False:
|
||||||
|
# df = df.drop(columns=['SPY'])
|
||||||
|
df = df[org_sym]
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
def yf_batch_download(symbols, start, end, batch_size, time_gap):
|
||||||
|
"""
|
||||||
|
download in small batches to avoid connection closure by host
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
symbols : list
|
||||||
|
stock symbols.
|
||||||
|
start : datetime
|
||||||
|
start date.
|
||||||
|
end : datetime
|
||||||
|
stop date.
|
||||||
|
batch_size : integer
|
||||||
|
batch size.
|
||||||
|
time_gap : float
|
||||||
|
in seconds or fraction of seconds.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
df : dataframe
|
||||||
|
stock price volume information.
|
||||||
|
|
||||||
|
"""
|
||||||
|
n = len(symbols)
|
||||||
|
batches = n // batch_size
|
||||||
|
df = pd.DataFrame()
|
||||||
|
for i in range(batches - 1):
|
||||||
|
tmp = yf.download(symbols[i*batch_size:(i+1)*batch_size], start, end)
|
||||||
|
df = pd.concat([df, tmp], axis=1)
|
||||||
|
sleep(time_gap)
|
||||||
|
tmp = yf.download(symbols[(batches-1)*batch_size:n], start, end)
|
||||||
|
df = pd.concat([df, tmp], axis=1)
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
def get_price_volume(symbols, dates, addSPY=False):
|
||||||
|
"""
|
||||||
|
Read stock data (adjusted close and volume) for given symbols from local
|
||||||
|
file unless data is not in local. It only gets date from Yahoo Finance
|
||||||
|
when necessary to increase speed and reduce internet data.
|
||||||
|
|
||||||
|
It will refresh local data if the symbols are on the _refresh.csv. This
|
||||||
|
is necessary when stock splits, spins off or something else happens.
|
||||||
|
"""
|
||||||
|
# DATAFILE = "_stkdata.pickle"
|
||||||
|
# REFRESH = "_refresh.csv"
|
||||||
|
org_sym = symbols
|
||||||
|
sd = dates[0]
|
||||||
|
ed = dates[-1]
|
||||||
|
# if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent
|
||||||
|
if 'SPY' not in symbols:
|
||||||
|
symbols = ['SPY'] + symbols
|
||||||
|
|
||||||
|
df = yf_batch_download(symbols, start=sd, end=ed, \
|
||||||
|
batch_size=BATCHSIZE, time_gap=TIMEGAP)
|
||||||
|
if len(symbols) == 1:
|
||||||
|
tuples = list(zip(df.columns.values.tolist(), \
|
||||||
|
[symbols[0]]*len(df.columns.values)))
|
||||||
|
df.columns = pd.MultiIndex.from_tuples(tuples, names=[None, None])
|
||||||
|
|
||||||
|
# if not os.path.exists(DATAFILE):
|
||||||
|
# df = yf_batch_download(symbols, start=sd, end=ed, \
|
||||||
|
# batch_size=BATCHSIZE, time_gap=TIMEGAP)
|
||||||
|
# if len(symbols) == 1:
|
||||||
|
# tuples = list(zip(df.columns.values.tolist(), \
|
||||||
|
# [symbols[0]]*len(df.columns.values)))
|
||||||
|
# df.columns = pd.MultiIndex.from_tuples(tuples, names=[None, None])
|
||||||
|
# else:
|
||||||
|
# df = pd.read_pickle(DATAFILE)
|
||||||
|
# exist_syms = df["Adj Close"].columns.values.tolist()
|
||||||
|
# if os.path.exists(REFRESH):
|
||||||
|
# try:
|
||||||
|
# refresh_df = pd.read_csv(REFRESH, header=None)
|
||||||
|
# refresh_syms = refresh_df.values.tolist()
|
||||||
|
# refresh_syms = [x for sublist in refresh_syms for x in sublist]
|
||||||
|
# remove_syms = [x for x in exist_syms if x in refresh_syms]
|
||||||
|
# if remove_syms:
|
||||||
|
# df.drop(columns=remove_syms, axis=1, level=1, inplace=True)
|
||||||
|
# exist_syms = [x for x in exist_syms if x not in refresh_syms]
|
||||||
|
# except:
|
||||||
|
# pass
|
||||||
|
|
||||||
|
exist_syms = []
|
||||||
|
|
||||||
|
last_day = pd.to_datetime(df.index.values[-1])
|
||||||
|
first_day = pd.to_datetime(df.index.values[0])
|
||||||
|
intersect_syms = list(set(org_sym) & set(exist_syms))
|
||||||
|
# reduce df to only contain intersect_syms
|
||||||
|
df = df.loc[:, (slice(None), intersect_syms)]
|
||||||
|
|
||||||
|
if sd < first_day:
|
||||||
|
# fill gap from online
|
||||||
|
tmp_df = yf_batch_download(intersect_syms, start=sd, end=first_day, \
|
||||||
|
batch_size=BATCHSIZE, time_gap=TIMEGAP)
|
||||||
|
df = pd.concat([tmp_df, df])
|
||||||
|
|
||||||
|
if ed >= last_day:
|
||||||
|
# fill gap from online incl last two days to get mkt close data
|
||||||
|
if ed.date() == last_day.date():
|
||||||
|
tmp_df = yf_batch_download(intersect_syms, start=ed, end=ed, \
|
||||||
|
batch_size=BATCHSIZE, time_gap=TIMEGAP)
|
||||||
|
else:
|
||||||
|
tmp_df = yf_batch_download(intersect_syms, start=last_day, end=ed, \
|
||||||
|
batch_size=BATCHSIZE, time_gap=TIMEGAP)
|
||||||
|
df = pd.concat([df[:-1], tmp_df])
|
||||||
|
|
||||||
|
# get data online when new stks were added
|
||||||
|
new_stks = np.setdiff1d(symbols, exist_syms).tolist()
|
||||||
|
if not new_stks == []:
|
||||||
|
tmp_df = yf_batch_download(new_stks, start=sd, end=ed, \
|
||||||
|
batch_size=BATCHSIZE, time_gap=TIMEGAP)
|
||||||
|
if len(new_stks) == 1:
|
||||||
|
tuples = list(zip(tmp_df.columns.values.tolist(), \
|
||||||
|
[new_stks[0]]*len(tmp_df.columns.values)))
|
||||||
|
tmp_df.columns = pd.MultiIndex.from_tuples(tuples, names=[None, None])
|
||||||
|
df = df.join(tmp_df)
|
||||||
|
|
||||||
|
# df.to_pickle(DATAFILE) # save to local, overwrite existing file
|
||||||
|
# if os.path.exists(REFRESH):
|
||||||
|
# with open(REFRESH, 'w'):
|
||||||
|
# pass
|
||||||
|
|
||||||
|
df = df.dropna(subset=[('Adj Close', 'SPY')])
|
||||||
|
price = df['Adj Close']
|
||||||
|
price = fill_missing_data(price)
|
||||||
|
volume = df['Volume']
|
||||||
|
volume = volume.fillna(0)
|
||||||
|
|
||||||
|
# if len(symbols) == 1:
|
||||||
|
# price.name = symbols[0]
|
||||||
|
# volume.name = symbols[0]
|
||||||
|
# price = price.to_frame()
|
||||||
|
# volume = volume.to_frame()
|
||||||
|
|
||||||
|
if addSPY==False:
|
||||||
|
price = price[org_sym]
|
||||||
|
volume = volume[org_sym]
|
||||||
|
|
||||||
|
return price, volume
|
||||||
|
|
||||||
|
|
||||||
|
def get_price_volume_online(symbols, dates, addSPY=False):
|
||||||
|
"""
|
||||||
|
Read stock data (adjusted close and volume) for given symbols from Yahoo
|
||||||
|
Finance
|
||||||
|
"""
|
||||||
|
org_sym = symbols
|
||||||
|
sd = dates[0]
|
||||||
|
ed = dates[-1]
|
||||||
|
# if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent
|
||||||
|
if 'SPY' not in symbols:
|
||||||
|
symbols = ['SPY'] + symbols
|
||||||
|
df = yf.download(symbols, start=sd, end = ed)
|
||||||
|
if len(symbols) == 1:
|
||||||
|
df = df.dropna(subset = ['Adj Close'])
|
||||||
|
else:
|
||||||
|
df = df.dropna(subset=[('Adj Close', 'SPY')])
|
||||||
|
price = df['Adj Close']
|
||||||
|
price = fill_missing_data(price)
|
||||||
|
volume = df['Volume']
|
||||||
|
volume = volume.fillna(0)
|
||||||
|
|
||||||
|
if len(symbols) == 1:
|
||||||
|
price.name = symbols[0]
|
||||||
|
volume.name = symbols[0]
|
||||||
|
price = price.to_frame()
|
||||||
|
volume = volume.to_frame()
|
||||||
|
|
||||||
|
if addSPY==False:
|
||||||
|
price = price[org_sym]
|
||||||
|
volume = volume[org_sym]
|
||||||
|
|
||||||
|
return price, volume
|
||||||
|
|
||||||
|
def get_watchlist(website: str = WEBSITE):
|
||||||
|
page = requests.get(WEBSITE)
|
||||||
|
# page = requests.get(WEBSITE, verify=False) # skip certificate check for https
|
||||||
|
tree = html.fromstring(page.content)
|
||||||
|
watchlist = tree.xpath('//*[@id="dokuwiki__content"]/div[1]/div/div[3]/div/pre/text()')[0]
|
||||||
|
file_name = StringIO(watchlist)
|
||||||
|
df = pd.read_csv(file_name, index_col = 'Symbol',
|
||||||
|
comment = '#', na_filter=False)
|
||||||
|
return df
|
||||||
|
|
||||||
|
# def get_watchlist(file_name: str = 'watchlist.csv'):
|
||||||
|
# df = pd.read_csv(file_name, index_col = 'Symbol',
|
||||||
|
# comment = '#', na_filter=False)
|
||||||
|
# return df
|
||||||
|
|
||||||
|
# def get_data(symbols, dates, addSPY=True, colname = 'Adj Close'):
|
||||||
|
# """
|
||||||
|
# Read stock data (adjusted close) for given symbols from CSV files.
|
||||||
|
|
||||||
|
# (done) TODO: there are nan values in the data when addSPY=False is passed. The
|
||||||
|
# strategy should be using SPY to clean the data first including fill
|
||||||
|
# forward and fill backward, then to drop the SPY if addSPY=False
|
||||||
|
# """
|
||||||
|
# org_sym = symbols
|
||||||
|
# df = pd.DataFrame(index=dates)
|
||||||
|
# # if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent
|
||||||
|
# # symbols = ['SPY'] + symbols
|
||||||
|
# if 'SPY' not in symbols:
|
||||||
|
# symbols = ['SPY'] + symbols
|
||||||
|
# for symbol in symbols:
|
||||||
|
# df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date',
|
||||||
|
# parse_dates=True, usecols=['Date', colname], na_values=['nan'])
|
||||||
|
# df_temp = df_temp.rename(columns={colname: symbol})
|
||||||
|
# df = df.join(df_temp)
|
||||||
|
# if symbol == 'SPY': # drop dates SPY did not trade
|
||||||
|
# df = df.dropna(subset=["SPY"])
|
||||||
|
# # fill missing data
|
||||||
|
# df = fill_missing_data(df)
|
||||||
|
# if addSPY == False: # drop SPY
|
||||||
|
# # df = df.drop(columns=['SPY'])
|
||||||
|
# df = df[org_sym]
|
||||||
|
|
||||||
|
# return df
|
||||||
|
|
||||||
|
|
||||||
|
def plot_data(df, axs=[], title=[], xlabel='', ylabel=''):
|
||||||
|
|
||||||
|
"""Plot stock prices with a custom title and meaningful axis labels."""
|
||||||
|
if axs == []:
|
||||||
|
ax = df.plot(title = title)
|
||||||
|
else:
|
||||||
|
ax = df.plot(ax=axs, title=title)
|
||||||
|
ax.set_xlabel(xlabel)
|
||||||
|
ax.set_ylabel(ylabel)
|
||||||
|
ax.grid()
|
||||||
|
|
||||||
|
|
||||||
|
# def plot_data(df, title=[], xlabel='', ylabel=''):
|
||||||
|
# import matplotlib.pyplot as plt
|
||||||
|
# """Plot stock prices with a custom title and meaningful axis labels."""
|
||||||
|
# ax = df.plot(title=title, fontsize=12, figsize=(10, 7))
|
||||||
|
# ax.set_xlabel(xlabel)
|
||||||
|
# ax.set_ylabel(ylabel)
|
||||||
|
# plt.grid()
|
||||||
|
# plt.show()
|
||||||
|
|
||||||
|
def get_orders_data_file(basefilename):
|
||||||
|
return open(os.path.join(os.environ.get("ORDERS_DATA_DIR",'orders/'),basefilename))
|
||||||
|
|
||||||
|
def get_learner_data_file(basefilename):
|
||||||
|
return open(os.path.join(os.environ.get("LEARNER_DATA_DIR",'Data/'),basefilename),'r')
|
||||||
|
|
||||||
|
def get_robot_world_file(basefilename):
|
||||||
|
return open(os.path.join(os.environ.get("ROBOT_WORLDS_DIR",'testworlds/'),basefilename))
|
||||||
|
|
||||||
|
|
||||||
|
def test_code():
|
||||||
|
|
||||||
|
symbol = ['GOOG', 'AMZN']
|
||||||
|
# lookback years
|
||||||
|
lb_year = 0.08
|
||||||
|
ed = dt.datetime.today()
|
||||||
|
sd = ed - dt.timedelta(days = 365 * lb_year + 1)
|
||||||
|
# If ed or sd falls on to a non-trading day, you might get warnings saying
|
||||||
|
# "No data found for this date range, symbol may be delisted". This is
|
||||||
|
# normal behavior.
|
||||||
|
prices, volume = get_price_volume(symbol, pd.date_range(sd, ed), addSPY=False)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
test_code()
|
Loading…
x
Reference in New Issue
Block a user