From c2d00ff69aa4a2f5738402044867b6295c8f1d42 Mon Sep 17 00:00:00 2001 From: cpan Date: Thu, 20 Jun 2024 19:48:58 -0700 Subject: [PATCH] WIP (need to figure out join single index to multi-index) --- indicators.py | 113 +++++++++++++++++++-------------------- util.py | 143 +++++++++++++++++++++++++++----------------------- 2 files changed, 135 insertions(+), 121 deletions(-) diff --git a/indicators.py b/indicators.py index 8aaff22..11e5640 100644 --- a/indicators.py +++ b/indicators.py @@ -21,8 +21,9 @@ Could also come up with a value that ties to the trading volume. import pandas as pd import numpy as np import datetime as dt -from util import get_data, get_price_volume, plot_data, get_watchlist -from marketsim import compute_portvals, compute_portfolio_stats, normalize_data +# from util import get_data, get_price_volume, plot_data, get_watchlist +from util import get_price_volume, plot_data, get_watchlist +# from marketsim import compute_portvals, compute_portfolio_stats, normalize_data # import matplotlib.pyplot as plt # import matplotlib from numpy.fft import fft, ifft @@ -403,28 +404,28 @@ def simple_bollinger_strategy(stk): return order_list -def plot_against_sym(df, sym=['SPY']): - df_temp = df.copy() - df_sym = get_data(sym, pd.to_datetime(df_temp.index.values), addSPY=False) - df_temp[sym[0]] = df_sym.values - df_temp = normalize_data(df_temp) - plot_data(df_temp) - return df_sym +# def plot_against_sym(df, sym=['SPY']): +# df_temp = df.copy() +# df_sym = get_data(sym, pd.to_datetime(df_temp.index.values), addSPY=False) +# df_temp[sym[0]] = df_sym.values +# df_temp = normalize_data(df_temp) +# plot_data(df_temp) +# return df_sym -def test_bollinger_sell(): - sd = dt.datetime(2010,1,1) - # ed = dt.datetime.today() - ed = dt.datetime(2012,12,31) - symbol = ['XOM'] - dates = dates = pd.date_range(sd, ed) - prices = get_data(symbol, dates, addSPY=False) - # prices = prices.dropna() - stk = security(prices) - sell = bollinger_sell(stk) - plot_data(sell) - buy = bollinger_buy(stk, 190) - plot_data(buy) +# def test_bollinger_sell(): +# sd = dt.datetime(2010,1,1) +# # ed = dt.datetime.today() +# ed = dt.datetime(2012,12,31) +# symbol = ['XOM'] +# dates = dates = pd.date_range(sd, ed) +# prices = get_data(symbol, dates, addSPY=False) +# # prices = prices.dropna() +# stk = security(prices) +# sell = bollinger_sell(stk) +# plot_data(sell) +# buy = bollinger_buy(stk, 190) +# plot_data(buy) def get_crossing(stocks): """ @@ -515,43 +516,43 @@ def modified_bollinger_strategy(stk): return order_list -def test_get_orders(): - sd = dt.datetime(2000,2,1) - # ed = dt.datetime.today() - ed = dt.datetime(2012,9,12) - symbol = ['INTC', 'XOM', 'MSFT'] - dates = dates = pd.date_range(sd, ed) - prices = get_data(symbol, dates, addSPY=False) - stk = security(prices) +# def test_get_orders(): +# sd = dt.datetime(2000,2,1) +# # ed = dt.datetime.today() +# ed = dt.datetime(2012,9,12) +# symbol = ['INTC', 'XOM', 'MSFT'] +# dates = dates = pd.date_range(sd, ed) +# prices = get_data(symbol, dates, addSPY=False) +# stk = security(prices) - # order_list = simple_bollinger_strategy(stk) - order_list = modified_bollinger_strategy(stk) +# # order_list = simple_bollinger_strategy(stk) +# order_list = modified_bollinger_strategy(stk) - # print(order_list) - port_val = compute_portvals(order_list,100000,9.95,0.005) - if isinstance(port_val, pd.DataFrame): - port_val = port_val[port_val.columns[0]].to_frame() # just get the first column - else: - print("warning, code did not return a DataFrame") - price_SPY = plot_against_sym(port_val) +# # print(order_list) +# port_val = compute_portvals(order_list,100000,9.95,0.005) +# if isinstance(port_val, pd.DataFrame): +# port_val = port_val[port_val.columns[0]].to_frame() # just get the first column +# else: +# print("warning, code did not return a DataFrame") +# price_SPY = plot_against_sym(port_val) - rfr=0 - sf=252 +# rfr=0 +# sf=252 - cr, adr, sddr, sr = compute_portfolio_stats(port_val, [1.0], rfr, sf) - crSP,adrSP,sddrSP,srSP = compute_portfolio_stats(price_SPY, [1.0], rfr, sf) - # Compare portfolio against $SPX - print("\nDate Range: {} to {}".format(sd.date(), ed.date())) - print() - print("Sharpe Ratio: {}, {}".format(sr, srSP)) - print() - print("Cumulative Return: {}, {}".format(cr, crSP)) - print() - print("Standard Deviation: {}, {}".format(sddr, sddrSP)) - print() - print("Average Daily Return: {}, {}".format(adr, adrSP)) - print() - print("Final Portfolio Value: {:.2f}".format(port_val['Portfolio'].iloc[-1])) +# cr, adr, sddr, sr = compute_portfolio_stats(port_val, [1.0], rfr, sf) +# crSP,adrSP,sddrSP,srSP = compute_portfolio_stats(price_SPY, [1.0], rfr, sf) +# # Compare portfolio against $SPX +# print("\nDate Range: {} to {}".format(sd.date(), ed.date())) +# print() +# print("Sharpe Ratio: {}, {}".format(sr, srSP)) +# print() +# print("Cumulative Return: {}, {}".format(cr, crSP)) +# print() +# print("Standard Deviation: {}, {}".format(sddr, sddrSP)) +# print() +# print("Average Daily Return: {}, {}".format(adr, adrSP)) +# print() +# print("Final Portfolio Value: {:.2f}".format(port_val['Portfolio'].iloc[-1])) def plot_basic(stk, axs): data = stk.price.copy() @@ -880,7 +881,7 @@ if __name__ == "__main__": Input(component_id='button', component_property='n_clicks'), prevent_initial_call=True, ) - def get_data(clicks): + def get_data_cb(clicks): # global all_plot_sym, all_plot_ind, all_data, all_vol, all_macd, all_rsi # if clicks == 0: # return # no update diff --git a/util.py b/util.py index 1441aac..1a24b84 100644 --- a/util.py +++ b/util.py @@ -11,7 +11,8 @@ import datetime as dt import os import pandas as pd import numpy as np -import yfinance as yf +# import yfinance as yf +import yahoo_fin.stock_info as si import requests from lxml import html from io import StringIO @@ -32,19 +33,19 @@ def symbol_to_path(symbol, base_dir=None): base_dir = os.environ.get("MARKET_DATA_DIR", '../data/') return os.path.join(base_dir, "{}.csv".format(str(symbol))) -def get_data_full(symbols, start_date, addSPY=True, colname = 'Adj Close'): - """ - Read stock data (adjusted close) for given symbols from Yahoo Finance - from start_date to the latest date available (usually the current date). - """ - if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent - symbols = ['SPY'] + symbols +# def get_data_full(symbols, start_date, addSPY=True, colname = 'Adj Close'): +# """ +# Read stock data (adjusted close) for given symbols from Yahoo Finance +# from start_date to the latest date available (usually the current date). +# """ +# if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent +# symbols = ['SPY'] + symbols - df = yf.download(symbols, start = start_date)[colname] - if len(symbols) == 1: - df.name = symbols[0] - df = df.to_frame() - return df +# df = yf.download(symbols, start = start_date)[colname] +# if len(symbols) == 1: +# df.name = symbols[0] +# df = df.to_frame() +# return df # def get_data_full(symbols, start_date, addSPY=True, colname = 'Adj Close'): """ @@ -80,30 +81,42 @@ def get_data_range(df, dates): df_range = df_range.join(df, how='inner') return df_range -def get_data(symbols, dates, addSPY=True, colname = 'Adj Close'): - """ - Read stock data (adjusted close) for given symbols from Yahoo Finance - """ - org_sym = symbols - sd = dates[0] - ed = dates[-1] - # if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent - if 'SPY' not in symbols: - symbols = ['SPY'] + symbols - df = yf.download(symbols, start=sd, end = ed)[colname] - if len(symbols) == 1: - df.name = symbols[0] - df = df.to_frame() - - df = df.dropna(subset=['SPY']) - df = fill_missing_data(df) - - if addSPY==False: - # df = df.drop(columns=['SPY']) - df = df[org_sym] +def yf_download(symbols, start, end): + df = pd.DataFrame(columns = pd.MultiIndex(levels=[["Adj Close", "Volume"],[]], codes=[[],[]], names=["adjclose", "volume"])) + for sym in symbols: + # tmp = si.get_data(sym, start_date=start) + tmp = si.get_data(sym, start_date=start)[["adjclose", "volume"]] + tuples = list(zip(tmp.columns.values.tolist(), \ + [symbols[0]]*len(tmp.columns.values))) + tmp.columns = pd.MultiIndex.from_tuples(tuples, names=[None, None]) + df = df.join(tmp, how='outer') return df +# def get_data(symbols, dates, addSPY=True, colname = 'Adj Close'): +# """ +# Read stock data (adjusted close) for given symbols from Yahoo Finance +# """ +# org_sym = symbols +# sd = dates[0] +# ed = dates[-1] +# # if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent +# if 'SPY' not in symbols: +# symbols = ['SPY'] + symbols +# df = yf.download(symbols, start=sd, end = ed)[colname] +# if len(symbols) == 1: +# df.name = symbols[0] +# df = df.to_frame() + +# df = df.dropna(subset=['SPY']) +# df = fill_missing_data(df) + +# if addSPY==False: +# # df = df.drop(columns=['SPY']) +# df = df[org_sym] + +# return df + def yf_batch_download(symbols, start, end, batch_size, time_gap): """ download in small batches to avoid connection closure by host @@ -131,10 +144,10 @@ def yf_batch_download(symbols, start, end, batch_size, time_gap): batches = n // batch_size df = pd.DataFrame() for i in range(batches - 1): - tmp = yf.download(symbols[i*batch_size:(i+1)*batch_size], start, end) + tmp = yf_download(symbols[i*batch_size:(i+1)*batch_size], start, end) df = pd.concat([df, tmp], axis=1) sleep(time_gap) - tmp = yf.download(symbols[(batches-1)*batch_size:n], start, end) + tmp = yf_download(symbols[(batches-1)*batch_size:n], start, end) df = pd.concat([df, tmp], axis=1) return df @@ -245,38 +258,38 @@ def get_price_volume(symbols, dates, addSPY=False): return price, volume -def get_price_volume_online(symbols, dates, addSPY=False): - """ - Read stock data (adjusted close and volume) for given symbols from Yahoo - Finance - """ - org_sym = symbols - sd = dates[0] - ed = dates[-1] - # if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent - if 'SPY' not in symbols: - symbols = ['SPY'] + symbols - df = yf.download(symbols, start=sd, end = ed) - if len(symbols) == 1: - df = df.dropna(subset = ['Adj Close']) - else: - df = df.dropna(subset=[('Adj Close', 'SPY')]) - price = df['Adj Close'] - price = fill_missing_data(price) - volume = df['Volume'] - volume = volume.fillna(0) +# def get_price_volume_online(symbols, dates, addSPY=False): +# """ +# Read stock data (adjusted close and volume) for given symbols from Yahoo +# Finance +# """ +# org_sym = symbols +# sd = dates[0] +# ed = dates[-1] +# # if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent +# if 'SPY' not in symbols: +# symbols = ['SPY'] + symbols +# df = yf.download(symbols, start=sd, end = ed) +# if len(symbols) == 1: +# df = df.dropna(subset = ['Adj Close']) +# else: +# df = df.dropna(subset=[('Adj Close', 'SPY')]) +# price = df['Adj Close'] +# price = fill_missing_data(price) +# volume = df['Volume'] +# volume = volume.fillna(0) - if len(symbols) == 1: - price.name = symbols[0] - volume.name = symbols[0] - price = price.to_frame() - volume = volume.to_frame() +# if len(symbols) == 1: +# price.name = symbols[0] +# volume.name = symbols[0] +# price = price.to_frame() +# volume = volume.to_frame() - if addSPY==False: - price = price[org_sym] - volume = volume[org_sym] +# if addSPY==False: +# price = price[org_sym] +# volume = volume[org_sym] - return price, volume +# return price, volume def get_watchlist(website: str = WEBSITE): page = requests.get(WEBSITE)