WIP (need to figure out join single index to multi-index)

This commit is contained in:
George 2024-06-20 19:48:58 -07:00
parent fe44f5b4ff
commit c2d00ff69a
2 changed files with 135 additions and 121 deletions

View File

@ -21,8 +21,9 @@ Could also come up with a value that ties to the trading volume.
import pandas as pd
import numpy as np
import datetime as dt
from util import get_data, get_price_volume, plot_data, get_watchlist
from marketsim import compute_portvals, compute_portfolio_stats, normalize_data
# from util import get_data, get_price_volume, plot_data, get_watchlist
from util import get_price_volume, plot_data, get_watchlist
# from marketsim import compute_portvals, compute_portfolio_stats, normalize_data
# import matplotlib.pyplot as plt
# import matplotlib
from numpy.fft import fft, ifft
@ -403,28 +404,28 @@ def simple_bollinger_strategy(stk):
return order_list
def plot_against_sym(df, sym=['SPY']):
df_temp = df.copy()
df_sym = get_data(sym, pd.to_datetime(df_temp.index.values), addSPY=False)
df_temp[sym[0]] = df_sym.values
df_temp = normalize_data(df_temp)
plot_data(df_temp)
return df_sym
# def plot_against_sym(df, sym=['SPY']):
# df_temp = df.copy()
# df_sym = get_data(sym, pd.to_datetime(df_temp.index.values), addSPY=False)
# df_temp[sym[0]] = df_sym.values
# df_temp = normalize_data(df_temp)
# plot_data(df_temp)
# return df_sym
def test_bollinger_sell():
sd = dt.datetime(2010,1,1)
# ed = dt.datetime.today()
ed = dt.datetime(2012,12,31)
symbol = ['XOM']
dates = dates = pd.date_range(sd, ed)
prices = get_data(symbol, dates, addSPY=False)
# prices = prices.dropna()
stk = security(prices)
sell = bollinger_sell(stk)
plot_data(sell)
buy = bollinger_buy(stk, 190)
plot_data(buy)
# def test_bollinger_sell():
# sd = dt.datetime(2010,1,1)
# # ed = dt.datetime.today()
# ed = dt.datetime(2012,12,31)
# symbol = ['XOM']
# dates = dates = pd.date_range(sd, ed)
# prices = get_data(symbol, dates, addSPY=False)
# # prices = prices.dropna()
# stk = security(prices)
# sell = bollinger_sell(stk)
# plot_data(sell)
# buy = bollinger_buy(stk, 190)
# plot_data(buy)
def get_crossing(stocks):
"""
@ -515,43 +516,43 @@ def modified_bollinger_strategy(stk):
return order_list
def test_get_orders():
sd = dt.datetime(2000,2,1)
# ed = dt.datetime.today()
ed = dt.datetime(2012,9,12)
symbol = ['INTC', 'XOM', 'MSFT']
dates = dates = pd.date_range(sd, ed)
prices = get_data(symbol, dates, addSPY=False)
stk = security(prices)
# def test_get_orders():
# sd = dt.datetime(2000,2,1)
# # ed = dt.datetime.today()
# ed = dt.datetime(2012,9,12)
# symbol = ['INTC', 'XOM', 'MSFT']
# dates = dates = pd.date_range(sd, ed)
# prices = get_data(symbol, dates, addSPY=False)
# stk = security(prices)
# order_list = simple_bollinger_strategy(stk)
order_list = modified_bollinger_strategy(stk)
# # order_list = simple_bollinger_strategy(stk)
# order_list = modified_bollinger_strategy(stk)
# print(order_list)
port_val = compute_portvals(order_list,100000,9.95,0.005)
if isinstance(port_val, pd.DataFrame):
port_val = port_val[port_val.columns[0]].to_frame() # just get the first column
else:
print("warning, code did not return a DataFrame")
price_SPY = plot_against_sym(port_val)
# # print(order_list)
# port_val = compute_portvals(order_list,100000,9.95,0.005)
# if isinstance(port_val, pd.DataFrame):
# port_val = port_val[port_val.columns[0]].to_frame() # just get the first column
# else:
# print("warning, code did not return a DataFrame")
# price_SPY = plot_against_sym(port_val)
rfr=0
sf=252
# rfr=0
# sf=252
cr, adr, sddr, sr = compute_portfolio_stats(port_val, [1.0], rfr, sf)
crSP,adrSP,sddrSP,srSP = compute_portfolio_stats(price_SPY, [1.0], rfr, sf)
# Compare portfolio against $SPX
print("\nDate Range: {} to {}".format(sd.date(), ed.date()))
print()
print("Sharpe Ratio: {}, {}".format(sr, srSP))
print()
print("Cumulative Return: {}, {}".format(cr, crSP))
print()
print("Standard Deviation: {}, {}".format(sddr, sddrSP))
print()
print("Average Daily Return: {}, {}".format(adr, adrSP))
print()
print("Final Portfolio Value: {:.2f}".format(port_val['Portfolio'].iloc[-1]))
# cr, adr, sddr, sr = compute_portfolio_stats(port_val, [1.0], rfr, sf)
# crSP,adrSP,sddrSP,srSP = compute_portfolio_stats(price_SPY, [1.0], rfr, sf)
# # Compare portfolio against $SPX
# print("\nDate Range: {} to {}".format(sd.date(), ed.date()))
# print()
# print("Sharpe Ratio: {}, {}".format(sr, srSP))
# print()
# print("Cumulative Return: {}, {}".format(cr, crSP))
# print()
# print("Standard Deviation: {}, {}".format(sddr, sddrSP))
# print()
# print("Average Daily Return: {}, {}".format(adr, adrSP))
# print()
# print("Final Portfolio Value: {:.2f}".format(port_val['Portfolio'].iloc[-1]))
def plot_basic(stk, axs):
data = stk.price.copy()
@ -880,7 +881,7 @@ if __name__ == "__main__":
Input(component_id='button', component_property='n_clicks'),
prevent_initial_call=True,
)
def get_data(clicks):
def get_data_cb(clicks):
# global all_plot_sym, all_plot_ind, all_data, all_vol, all_macd, all_rsi
# if clicks == 0:
# return # no update

143
util.py
View File

@ -11,7 +11,8 @@ import datetime as dt
import os
import pandas as pd
import numpy as np
import yfinance as yf
# import yfinance as yf
import yahoo_fin.stock_info as si
import requests
from lxml import html
from io import StringIO
@ -32,19 +33,19 @@ def symbol_to_path(symbol, base_dir=None):
base_dir = os.environ.get("MARKET_DATA_DIR", '../data/')
return os.path.join(base_dir, "{}.csv".format(str(symbol)))
def get_data_full(symbols, start_date, addSPY=True, colname = 'Adj Close'):
"""
Read stock data (adjusted close) for given symbols from Yahoo Finance
from start_date to the latest date available (usually the current date).
"""
if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent
symbols = ['SPY'] + symbols
# def get_data_full(symbols, start_date, addSPY=True, colname = 'Adj Close'):
# """
# Read stock data (adjusted close) for given symbols from Yahoo Finance
# from start_date to the latest date available (usually the current date).
# """
# if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent
# symbols = ['SPY'] + symbols
df = yf.download(symbols, start = start_date)[colname]
if len(symbols) == 1:
df.name = symbols[0]
df = df.to_frame()
return df
# df = yf.download(symbols, start = start_date)[colname]
# if len(symbols) == 1:
# df.name = symbols[0]
# df = df.to_frame()
# return df
# def get_data_full(symbols, start_date, addSPY=True, colname = 'Adj Close'):
"""
@ -80,30 +81,42 @@ def get_data_range(df, dates):
df_range = df_range.join(df, how='inner')
return df_range
def get_data(symbols, dates, addSPY=True, colname = 'Adj Close'):
"""
Read stock data (adjusted close) for given symbols from Yahoo Finance
"""
org_sym = symbols
sd = dates[0]
ed = dates[-1]
# if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent
if 'SPY' not in symbols:
symbols = ['SPY'] + symbols
df = yf.download(symbols, start=sd, end = ed)[colname]
if len(symbols) == 1:
df.name = symbols[0]
df = df.to_frame()
df = df.dropna(subset=['SPY'])
df = fill_missing_data(df)
if addSPY==False:
# df = df.drop(columns=['SPY'])
df = df[org_sym]
def yf_download(symbols, start, end):
df = pd.DataFrame(columns = pd.MultiIndex(levels=[["Adj Close", "Volume"],[]], codes=[[],[]], names=["adjclose", "volume"]))
for sym in symbols:
# tmp = si.get_data(sym, start_date=start)
tmp = si.get_data(sym, start_date=start)[["adjclose", "volume"]]
tuples = list(zip(tmp.columns.values.tolist(), \
[symbols[0]]*len(tmp.columns.values)))
tmp.columns = pd.MultiIndex.from_tuples(tuples, names=[None, None])
df = df.join(tmp, how='outer')
return df
# def get_data(symbols, dates, addSPY=True, colname = 'Adj Close'):
# """
# Read stock data (adjusted close) for given symbols from Yahoo Finance
# """
# org_sym = symbols
# sd = dates[0]
# ed = dates[-1]
# # if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent
# if 'SPY' not in symbols:
# symbols = ['SPY'] + symbols
# df = yf.download(symbols, start=sd, end = ed)[colname]
# if len(symbols) == 1:
# df.name = symbols[0]
# df = df.to_frame()
# df = df.dropna(subset=['SPY'])
# df = fill_missing_data(df)
# if addSPY==False:
# # df = df.drop(columns=['SPY'])
# df = df[org_sym]
# return df
def yf_batch_download(symbols, start, end, batch_size, time_gap):
"""
download in small batches to avoid connection closure by host
@ -131,10 +144,10 @@ def yf_batch_download(symbols, start, end, batch_size, time_gap):
batches = n // batch_size
df = pd.DataFrame()
for i in range(batches - 1):
tmp = yf.download(symbols[i*batch_size:(i+1)*batch_size], start, end)
tmp = yf_download(symbols[i*batch_size:(i+1)*batch_size], start, end)
df = pd.concat([df, tmp], axis=1)
sleep(time_gap)
tmp = yf.download(symbols[(batches-1)*batch_size:n], start, end)
tmp = yf_download(symbols[(batches-1)*batch_size:n], start, end)
df = pd.concat([df, tmp], axis=1)
return df
@ -245,38 +258,38 @@ def get_price_volume(symbols, dates, addSPY=False):
return price, volume
def get_price_volume_online(symbols, dates, addSPY=False):
"""
Read stock data (adjusted close and volume) for given symbols from Yahoo
Finance
"""
org_sym = symbols
sd = dates[0]
ed = dates[-1]
# if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent
if 'SPY' not in symbols:
symbols = ['SPY'] + symbols
df = yf.download(symbols, start=sd, end = ed)
if len(symbols) == 1:
df = df.dropna(subset = ['Adj Close'])
else:
df = df.dropna(subset=[('Adj Close', 'SPY')])
price = df['Adj Close']
price = fill_missing_data(price)
volume = df['Volume']
volume = volume.fillna(0)
# def get_price_volume_online(symbols, dates, addSPY=False):
# """
# Read stock data (adjusted close and volume) for given symbols from Yahoo
# Finance
# """
# org_sym = symbols
# sd = dates[0]
# ed = dates[-1]
# # if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent
# if 'SPY' not in symbols:
# symbols = ['SPY'] + symbols
# df = yf.download(symbols, start=sd, end = ed)
# if len(symbols) == 1:
# df = df.dropna(subset = ['Adj Close'])
# else:
# df = df.dropna(subset=[('Adj Close', 'SPY')])
# price = df['Adj Close']
# price = fill_missing_data(price)
# volume = df['Volume']
# volume = volume.fillna(0)
if len(symbols) == 1:
price.name = symbols[0]
volume.name = symbols[0]
price = price.to_frame()
volume = volume.to_frame()
# if len(symbols) == 1:
# price.name = symbols[0]
# volume.name = symbols[0]
# price = price.to_frame()
# volume = volume.to_frame()
if addSPY==False:
price = price[org_sym]
volume = volume[org_sym]
# if addSPY==False:
# price = price[org_sym]
# volume = volume[org_sym]
return price, volume
# return price, volume
def get_watchlist(website: str = WEBSITE):
page = requests.get(WEBSITE)