WIP (need to figure out join single index to multi-index)

This commit is contained in:
George 2024-06-20 19:48:58 -07:00
parent fe44f5b4ff
commit c2d00ff69a
2 changed files with 135 additions and 121 deletions

View File

@ -21,8 +21,9 @@ Could also come up with a value that ties to the trading volume.
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import datetime as dt import datetime as dt
from util import get_data, get_price_volume, plot_data, get_watchlist # from util import get_data, get_price_volume, plot_data, get_watchlist
from marketsim import compute_portvals, compute_portfolio_stats, normalize_data from util import get_price_volume, plot_data, get_watchlist
# from marketsim import compute_portvals, compute_portfolio_stats, normalize_data
# import matplotlib.pyplot as plt # import matplotlib.pyplot as plt
# import matplotlib # import matplotlib
from numpy.fft import fft, ifft from numpy.fft import fft, ifft
@ -403,28 +404,28 @@ def simple_bollinger_strategy(stk):
return order_list return order_list
def plot_against_sym(df, sym=['SPY']): # def plot_against_sym(df, sym=['SPY']):
df_temp = df.copy() # df_temp = df.copy()
df_sym = get_data(sym, pd.to_datetime(df_temp.index.values), addSPY=False) # df_sym = get_data(sym, pd.to_datetime(df_temp.index.values), addSPY=False)
df_temp[sym[0]] = df_sym.values # df_temp[sym[0]] = df_sym.values
df_temp = normalize_data(df_temp) # df_temp = normalize_data(df_temp)
plot_data(df_temp) # plot_data(df_temp)
return df_sym # return df_sym
def test_bollinger_sell(): # def test_bollinger_sell():
sd = dt.datetime(2010,1,1) # sd = dt.datetime(2010,1,1)
# ed = dt.datetime.today() # # ed = dt.datetime.today()
ed = dt.datetime(2012,12,31) # ed = dt.datetime(2012,12,31)
symbol = ['XOM'] # symbol = ['XOM']
dates = dates = pd.date_range(sd, ed) # dates = dates = pd.date_range(sd, ed)
prices = get_data(symbol, dates, addSPY=False) # prices = get_data(symbol, dates, addSPY=False)
# prices = prices.dropna() # # prices = prices.dropna()
stk = security(prices) # stk = security(prices)
sell = bollinger_sell(stk) # sell = bollinger_sell(stk)
plot_data(sell) # plot_data(sell)
buy = bollinger_buy(stk, 190) # buy = bollinger_buy(stk, 190)
plot_data(buy) # plot_data(buy)
def get_crossing(stocks): def get_crossing(stocks):
""" """
@ -515,43 +516,43 @@ def modified_bollinger_strategy(stk):
return order_list return order_list
def test_get_orders(): # def test_get_orders():
sd = dt.datetime(2000,2,1) # sd = dt.datetime(2000,2,1)
# ed = dt.datetime.today() # # ed = dt.datetime.today()
ed = dt.datetime(2012,9,12) # ed = dt.datetime(2012,9,12)
symbol = ['INTC', 'XOM', 'MSFT'] # symbol = ['INTC', 'XOM', 'MSFT']
dates = dates = pd.date_range(sd, ed) # dates = dates = pd.date_range(sd, ed)
prices = get_data(symbol, dates, addSPY=False) # prices = get_data(symbol, dates, addSPY=False)
stk = security(prices) # stk = security(prices)
# order_list = simple_bollinger_strategy(stk) # # order_list = simple_bollinger_strategy(stk)
order_list = modified_bollinger_strategy(stk) # order_list = modified_bollinger_strategy(stk)
# print(order_list) # # print(order_list)
port_val = compute_portvals(order_list,100000,9.95,0.005) # port_val = compute_portvals(order_list,100000,9.95,0.005)
if isinstance(port_val, pd.DataFrame): # if isinstance(port_val, pd.DataFrame):
port_val = port_val[port_val.columns[0]].to_frame() # just get the first column # port_val = port_val[port_val.columns[0]].to_frame() # just get the first column
else: # else:
print("warning, code did not return a DataFrame") # print("warning, code did not return a DataFrame")
price_SPY = plot_against_sym(port_val) # price_SPY = plot_against_sym(port_val)
rfr=0 # rfr=0
sf=252 # sf=252
cr, adr, sddr, sr = compute_portfolio_stats(port_val, [1.0], rfr, sf) # cr, adr, sddr, sr = compute_portfolio_stats(port_val, [1.0], rfr, sf)
crSP,adrSP,sddrSP,srSP = compute_portfolio_stats(price_SPY, [1.0], rfr, sf) # crSP,adrSP,sddrSP,srSP = compute_portfolio_stats(price_SPY, [1.0], rfr, sf)
# Compare portfolio against $SPX # # Compare portfolio against $SPX
print("\nDate Range: {} to {}".format(sd.date(), ed.date())) # print("\nDate Range: {} to {}".format(sd.date(), ed.date()))
print() # print()
print("Sharpe Ratio: {}, {}".format(sr, srSP)) # print("Sharpe Ratio: {}, {}".format(sr, srSP))
print() # print()
print("Cumulative Return: {}, {}".format(cr, crSP)) # print("Cumulative Return: {}, {}".format(cr, crSP))
print() # print()
print("Standard Deviation: {}, {}".format(sddr, sddrSP)) # print("Standard Deviation: {}, {}".format(sddr, sddrSP))
print() # print()
print("Average Daily Return: {}, {}".format(adr, adrSP)) # print("Average Daily Return: {}, {}".format(adr, adrSP))
print() # print()
print("Final Portfolio Value: {:.2f}".format(port_val['Portfolio'].iloc[-1])) # print("Final Portfolio Value: {:.2f}".format(port_val['Portfolio'].iloc[-1]))
def plot_basic(stk, axs): def plot_basic(stk, axs):
data = stk.price.copy() data = stk.price.copy()
@ -880,7 +881,7 @@ if __name__ == "__main__":
Input(component_id='button', component_property='n_clicks'), Input(component_id='button', component_property='n_clicks'),
prevent_initial_call=True, prevent_initial_call=True,
) )
def get_data(clicks): def get_data_cb(clicks):
# global all_plot_sym, all_plot_ind, all_data, all_vol, all_macd, all_rsi # global all_plot_sym, all_plot_ind, all_data, all_vol, all_macd, all_rsi
# if clicks == 0: # if clicks == 0:
# return # no update # return # no update

143
util.py
View File

@ -11,7 +11,8 @@ import datetime as dt
import os import os
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import yfinance as yf # import yfinance as yf
import yahoo_fin.stock_info as si
import requests import requests
from lxml import html from lxml import html
from io import StringIO from io import StringIO
@ -32,19 +33,19 @@ def symbol_to_path(symbol, base_dir=None):
base_dir = os.environ.get("MARKET_DATA_DIR", '../data/') base_dir = os.environ.get("MARKET_DATA_DIR", '../data/')
return os.path.join(base_dir, "{}.csv".format(str(symbol))) return os.path.join(base_dir, "{}.csv".format(str(symbol)))
def get_data_full(symbols, start_date, addSPY=True, colname = 'Adj Close'): # def get_data_full(symbols, start_date, addSPY=True, colname = 'Adj Close'):
""" # """
Read stock data (adjusted close) for given symbols from Yahoo Finance # Read stock data (adjusted close) for given symbols from Yahoo Finance
from start_date to the latest date available (usually the current date). # from start_date to the latest date available (usually the current date).
""" # """
if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent # if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent
symbols = ['SPY'] + symbols # symbols = ['SPY'] + symbols
df = yf.download(symbols, start = start_date)[colname] # df = yf.download(symbols, start = start_date)[colname]
if len(symbols) == 1: # if len(symbols) == 1:
df.name = symbols[0] # df.name = symbols[0]
df = df.to_frame() # df = df.to_frame()
return df # return df
# def get_data_full(symbols, start_date, addSPY=True, colname = 'Adj Close'): # def get_data_full(symbols, start_date, addSPY=True, colname = 'Adj Close'):
""" """
@ -80,30 +81,42 @@ def get_data_range(df, dates):
df_range = df_range.join(df, how='inner') df_range = df_range.join(df, how='inner')
return df_range return df_range
def get_data(symbols, dates, addSPY=True, colname = 'Adj Close'): def yf_download(symbols, start, end):
""" df = pd.DataFrame(columns = pd.MultiIndex(levels=[["Adj Close", "Volume"],[]], codes=[[],[]], names=["adjclose", "volume"]))
Read stock data (adjusted close) for given symbols from Yahoo Finance for sym in symbols:
""" # tmp = si.get_data(sym, start_date=start)
org_sym = symbols tmp = si.get_data(sym, start_date=start)[["adjclose", "volume"]]
sd = dates[0] tuples = list(zip(tmp.columns.values.tolist(), \
ed = dates[-1] [symbols[0]]*len(tmp.columns.values)))
# if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent tmp.columns = pd.MultiIndex.from_tuples(tuples, names=[None, None])
if 'SPY' not in symbols: df = df.join(tmp, how='outer')
symbols = ['SPY'] + symbols
df = yf.download(symbols, start=sd, end = ed)[colname]
if len(symbols) == 1:
df.name = symbols[0]
df = df.to_frame()
df = df.dropna(subset=['SPY'])
df = fill_missing_data(df)
if addSPY==False:
# df = df.drop(columns=['SPY'])
df = df[org_sym]
return df return df
# def get_data(symbols, dates, addSPY=True, colname = 'Adj Close'):
# """
# Read stock data (adjusted close) for given symbols from Yahoo Finance
# """
# org_sym = symbols
# sd = dates[0]
# ed = dates[-1]
# # if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent
# if 'SPY' not in symbols:
# symbols = ['SPY'] + symbols
# df = yf.download(symbols, start=sd, end = ed)[colname]
# if len(symbols) == 1:
# df.name = symbols[0]
# df = df.to_frame()
# df = df.dropna(subset=['SPY'])
# df = fill_missing_data(df)
# if addSPY==False:
# # df = df.drop(columns=['SPY'])
# df = df[org_sym]
# return df
def yf_batch_download(symbols, start, end, batch_size, time_gap): def yf_batch_download(symbols, start, end, batch_size, time_gap):
""" """
download in small batches to avoid connection closure by host download in small batches to avoid connection closure by host
@ -131,10 +144,10 @@ def yf_batch_download(symbols, start, end, batch_size, time_gap):
batches = n // batch_size batches = n // batch_size
df = pd.DataFrame() df = pd.DataFrame()
for i in range(batches - 1): for i in range(batches - 1):
tmp = yf.download(symbols[i*batch_size:(i+1)*batch_size], start, end) tmp = yf_download(symbols[i*batch_size:(i+1)*batch_size], start, end)
df = pd.concat([df, tmp], axis=1) df = pd.concat([df, tmp], axis=1)
sleep(time_gap) sleep(time_gap)
tmp = yf.download(symbols[(batches-1)*batch_size:n], start, end) tmp = yf_download(symbols[(batches-1)*batch_size:n], start, end)
df = pd.concat([df, tmp], axis=1) df = pd.concat([df, tmp], axis=1)
return df return df
@ -245,38 +258,38 @@ def get_price_volume(symbols, dates, addSPY=False):
return price, volume return price, volume
def get_price_volume_online(symbols, dates, addSPY=False): # def get_price_volume_online(symbols, dates, addSPY=False):
""" # """
Read stock data (adjusted close and volume) for given symbols from Yahoo # Read stock data (adjusted close and volume) for given symbols from Yahoo
Finance # Finance
""" # """
org_sym = symbols # org_sym = symbols
sd = dates[0] # sd = dates[0]
ed = dates[-1] # ed = dates[-1]
# if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent # # if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent
if 'SPY' not in symbols: # if 'SPY' not in symbols:
symbols = ['SPY'] + symbols # symbols = ['SPY'] + symbols
df = yf.download(symbols, start=sd, end = ed) # df = yf.download(symbols, start=sd, end = ed)
if len(symbols) == 1: # if len(symbols) == 1:
df = df.dropna(subset = ['Adj Close']) # df = df.dropna(subset = ['Adj Close'])
else: # else:
df = df.dropna(subset=[('Adj Close', 'SPY')]) # df = df.dropna(subset=[('Adj Close', 'SPY')])
price = df['Adj Close'] # price = df['Adj Close']
price = fill_missing_data(price) # price = fill_missing_data(price)
volume = df['Volume'] # volume = df['Volume']
volume = volume.fillna(0) # volume = volume.fillna(0)
if len(symbols) == 1: # if len(symbols) == 1:
price.name = symbols[0] # price.name = symbols[0]
volume.name = symbols[0] # volume.name = symbols[0]
price = price.to_frame() # price = price.to_frame()
volume = volume.to_frame() # volume = volume.to_frame()
if addSPY==False: # if addSPY==False:
price = price[org_sym] # price = price[org_sym]
volume = volume[org_sym] # volume = volume[org_sym]
return price, volume # return price, volume
def get_watchlist(website: str = WEBSITE): def get_watchlist(website: str = WEBSITE):
page = requests.get(WEBSITE) page = requests.get(WEBSITE)