From fe44f5b4ffa9707de4c9a9d612e4752da1314a02 Mon Sep 17 00:00:00 2001 From: cpan Date: Wed, 19 Jun 2024 10:33:24 -0700 Subject: [PATCH] source code --- indicators.py | 1023 +++++++++++++++++++++++++++++++++++++++++++++++++ marketsim.py | 163 ++++++++ util.py | 371 ++++++++++++++++++ 3 files changed, 1557 insertions(+) create mode 100644 indicators.py create mode 100644 marketsim.py create mode 100644 util.py diff --git a/indicators.py b/indicators.py new file mode 100644 index 0000000..8aaff22 --- /dev/null +++ b/indicators.py @@ -0,0 +1,1023 @@ +# -*- coding: utf-8 -*- +""" + +TODO: given a list of symbols, for each stock, plot +Subplot1: +1. price +2. 50 and 200 day SMA lines +3. bollinger band (200 day) +Subplot2 +RSI +Subplot3 +MACD +TODO: validate the plots with online resource + +Created on Mon Feb 17 14:50:17 2020 +Use one as buy/sell trigger and verify a bag of indicators to make final decision. +Could also come up with a value that ties to the trading volume. +@author: thomwang +""" + +import pandas as pd +import numpy as np +import datetime as dt +from util import get_data, get_price_volume, plot_data, get_watchlist +from marketsim import compute_portvals, compute_portfolio_stats, normalize_data +# import matplotlib.pyplot as plt +# import matplotlib +from numpy.fft import fft, ifft +import scipy.signal as sig +import plotly.express as px +from plotly.subplots import make_subplots +# import plotly.graph_objects as go +from dash import Dash, html, dcc, callback, Output, Input, no_update +from waitress import serve +import json +import io +from flask_caching import Cache +from dash.exceptions import PreventUpdate + +# def fill_missing_data(df): +# df.ffill(inplace=True) +# df.bfilln(inplace=True) + +def fft_convolve(signal, window): + fft_signal = fft(signal) + fft_window = fft(window) + return ifft(fft_signal * fft_window) + +def zero_pad(array, n): + """Extends an array with zeros. + + array: numpy array + n: length of result + + returns: new NumPy array + """ + res = np.zeros(n) + res[: len(array)] = array + return res + +def smooth(price, hsize=10, sigma=3): + """ + Parameters + ---------- + price : TYPE DataFrame. + DESCRIPTION - with time index and no invalid values + hsize : TYPE integer + DESCRIPTION - this adds phase delay. similar to SMA window + sigma : TYPE float + DESCRIPTION - gaussian standard deviation affects smoothness + + Returns + ------- + TYPE DataFrame + DESCRIPTION - smoothed price + Doesn't offer much benefit over sma. Only theoretical values. For future + different smooth functiona experiments + """ + data = price.copy() + window = sig.gaussian(M=hsize, std=sigma) + window /= window.sum() + padded = zero_pad(window, data.shape[0]) + for col in data.columns: + ys = data[col].values + smooth = abs(fft_convolve(ys, padded)) + smooth[0:hsize-1] = np.nan + data[col] = smooth + + return data + + +class security: + """ + This can be a list of stocks, bonds, or otherinvestment vehicles. + price - Pandas DataFrame with datetime as index sorted to chronical order + """ + def __init__(self, price, volume=None, rfr: float = 0.01, sf: float = 252.0): + """ + Parameters + ---------- + price : TYPE pandas.DataFrame + DESCRIPTION. historical adj. daily close prices of stocks under + consideration + volume : TYPE pandas.DataFrame + DESCRIPTION. daily trading volume. The default is none. + rfr : TYPE float, optional + DESCRIPTION. annualized risk free rate. The default is 0.01. + sf : TYPE sample frequency, optional + DESCRIPTION. The default is 252 (daily). there are 252 trading + days in a year. Monthly sampling frequency would be 12. And + weekly sampling frequenc is 52. + """ + self._price = price + self._volume = volume + self._symbol = price.columns.values + self._rfr = rfr + self._sf = sf + + @property + def symbol(self): + return self._symbol + @symbol.setter + def symbol(self, value): + raise AttributeError('security symbol is read only') + + @property + def price(self): + return self._price + + @price.setter + def price(self, value): + raise AttributeError('security price is read only') + + @property + def volume(self): + if self._volume is None: + raise ValueError('trading volume information not available') + return self._volume + + @volume.setter + def volume(self, value): + raise AttributeError('security volume is read only') + + def sma(self, window): + return self.price.rolling(window).mean() + + def vwma(self, window): + """ + Volume weighted moving average. When plotted against sma, it gives an + early indicator when VWMA crosses SMA. When VWMA is above SMA, it + indicates a strong upward trend and vice versa. + """ + price_vol = self.price * self.volume + return price_vol.rolling(window).sum() / self.volume.rolling(window).sum() + + def vosma(self, window): + return self.volume.rolling(window).mean() + + def ema(self, window): # default to 14 day window + # EMA pre-process the first point + price = self.price + temp = price.iloc[0:window].mean() + price.iloc[window-1] = temp + price.iloc[0:(window-1)] = np.nan + + # process the EMA + avg = price.ewm(span=window, adjust=False).mean() + return avg + + def voema(self, window): # default to 14 day window + # EMA pre-process the first point + vol = self.volume + temp = vol.iloc[0:window].mean() + vol.iloc[window-1] = temp + vol.iloc[0:(window-1)] = np.nan + + # process the EMA + avg = vol.ewm(span=window, adjust=False).mean() + return avg + + def rsi(self, window = 14): + """ + Traditional interpretation and usage of the RSI are that values of 70 + or above indicate that a security is becoming overbought or overvalued + and may be primed for a trend reversal or corrective pullback in price. + An RSI reading of 30 or below indicates an oversold or undervalued + condition. + """ + # use exponential averaging + d_chg = self.price.diff() + d_up, d_dn = d_chg.copy(), d_chg.copy() + d_up[d_up < 0] = 0 + d_dn[d_dn > 0] = 0 + + # EMA pre-process the first point + temp = d_up.iloc[1:(window+1)].mean() + d_up.iloc[window] = temp + d_up.iloc[1:window] = np.nan + temp = d_dn.iloc[1:(window+1)].mean() + d_dn.iloc[window] = temp + d_dn.iloc[1:window] = np.nan + + # process the EMA + avg_up = d_up.ewm(span=window, adjust=False).mean() + avg_dn = d_dn.ewm(span=window, adjust=False).mean() + rs = avg_up / abs(avg_dn.values) + exp_rsi = 100 - 100 / (1+rs) + return exp_rsi + + + def volume_rsi(self, window = 14): + """ + The volume RSI (Relative Strength Index) is quite similar to the price + based RSI with difference that up-volume and down-volume are used in + the RSI formula instead changes in price. If price RSI shows relation + between up-moves and down-moves within an analyzed period of time by + revealing which moves are stronger, the volume RSI indicator shows the + relation between volume traded during these price up-moves and + down-moves respectfully by revealing whether up-volume (bullish money + flow) or down-volume (bearish money flow) is stronger. + + The same as price RSI, volume RSI oscillates around 50% center-line in + the range from 0 to 100%. In technical analysis this indicator could be + used in the same way as well. The simplest way of using the volume RSI + would be to generate trading signals on the crossovers of the indicator + and 50% center-line around which it oscillates. Here you have to + remember following: + + volume RSI reading above 50% are considered bullish as bullish volume + dominates over bearish volume; volume RSI readings below 50% are + considered bearish as bearish volume overcomes bullish volume. + Respectfully, technical analysis would suggest to generate buy/sell + signals by following rules: + + Buy when indicators moves above 50% line after being below it; + Sell when indicator drops below 50% line after being above it. + """ + # use exponential averaging + volume = self.volume + + up_vol, dn_vol = volume.copy(), volume.copy() + d_chg = self.price.diff() + + up_vol[d_chg < 0] = 0 + dn_vol[d_chg > 0] = 0 + up_vol.iloc[0] = np.nan + dn_vol.iloc[0] = np.nan + + # EMA pre-process the first point + temp = up_vol.iloc[1:(window+1)].mean() + up_vol.iloc[window] = temp + up_vol.iloc[1:window] = np.nan + temp = dn_vol.iloc[1:(window+1)].mean() + dn_vol.iloc[window] = temp + dn_vol.iloc[1:window] = np.nan + + # EMA processing + avg_up = up_vol.ewm(span=window, adjust=False).mean() + avg_dn = dn_vol.ewm(span=window, adjust=False).mean() + rs = avg_up / avg_dn.values + exp_rsi = 100 - 100 / (1+rs) + return exp_rsi + + def daily_returns(self): + return self.price.pct_change() + + @property + def annualized_return(self): + dr = self.daily_returns() + return self._sf * dr.mean() + + @property + def annualized_stdev(self): + dr = self.daily_returns() + return np.sqrt(self._sf) * dr.std() + + @property + def sharpe(self): + return (self.annualized_return - self._rfr) / self.annualize_stdev + + def rolling_stdev(self, window): + return self.price.rolling(window).std() + + def bollinger(self, window): + """ + Parameters + ---------- + window : TYPE int, optional + DESCRIPTION - averaging window in days. + + Returns + ------- + lower, upper : TYPE pandas.DataFrame + DESCRIPTION - lower band (minus 2 sigma) and the upper band. + """ + avg = self.sma(window) + sdd2 = self.rolling_stdev(window).mul(2) + lower = avg.sub(sdd2.values) + upper = avg.add(sdd2.values) + # low_up = lower.join(upper, lsuffix='_L', rsuffix='_U') + + return lower, upper + + def macd(self, short_wd = 12, long_wd = 26, sig_wd = 9): + """ + MACD Line: (12-day EMA - 26-day EMA) + Signal Line: 9-day EMA of MACD Line + MACD Histogram: MACD Line - Signal Line + + MACD is calculated by subtracting the 26-period EMA from the 12-period + EMA. MACD triggers technical signals when it crosses above (to buy) or + below (to sell) its signal line. The speed of crossovers is also taken + as a signal of a market is overbought or oversold. MACD helps investors + understand whether the bullish or bearish movement in the price is + strengthening or weakening + + MACD historgram represents signal line crossovers that are the most + common MACD signals. The signal line is a 9-day EMA of the MACD line. + As a moving average of the indicator, it trails the MACD and makes it + easier to spot MACD turns. A bullish crossover occurs when the MACD + turns up and crosses above the signal line. A bearish crossover occurs + when the MACD turns down and crosses below the signal line. Crossovers + can last a few days or a few weeks, depending on the strength of the + move. + """ + macd_short = self.ema(short_wd) + macd_long = self.ema(long_wd) + macd_line = macd_short - macd_long.values + macd_sig = macd_line.ewm(span=sig_wd, adjust=False).mean() + macd_hist = macd_line - macd_sig.values + norm_hist = macd_hist.div(macd_long.values) + return macd_line, macd_sig, macd_hist, norm_hist + + +def bollinger_sell(stock, wd=200): + """ + Parameters + ---------- + stock : TYPE class 'serurity' + wd : TYPE, int, optional + DESCRIPTION. Moving average windows. The default is 200. + + Returns + ------- + TYPE DataFrame + DESCRIPTION - +1 when stock price is above bollinger upper band + . -1 when vice versa. transition days are of value +3 and -3 + respectively. A value of -3 is a sell signal + """ + _, bol_up = stock.bollinger(wd) + # bol_up = bol_up[bol_up.columns[-1]].to_frame() + # bol_up = bol_up.iloc[:, [-1]] + sell = np.sign(stock.price.sub(bol_up.values)) + sell_diff = sell.diff() + + return sell.add(sell_diff.values) + + +def bollinger_buy(stock, wd=200): + """ + Parameters + ---------- + stock : TYPE class 'serurity' + wd : TYPE, int, optional + DESCRIPTION. Moving average windows. The default is 200. + + Returns + ------- + TYPE DataFrame + DESCRIPTION - +1 when stock price is above bollinger lower band + . -1 when vice versa. transition days are of value +3 and -3 + respectively. A value of +3 is a buy signal + """ + bol_low, _ = stock.bollinger(wd) + buy = np.sign(stock.price.sub(bol_low.values)) + buy_diff = buy.diff() + + return buy.add(buy_diff.values) + + +def simple_bollinger_strategy(stk): + + # buy orders + buy = bollinger_buy(stk, 190) + buy_orders = buy[np.any(buy>2, axis=1)] + + sell = bollinger_sell(stk, 190) + sell_orders = sell[np.any(sell<-2, axis=1)] + + orders = pd.concat([buy_orders, sell_orders]) + orders = orders.sort_index() + + order_list = pd.DataFrame(columns = ['Date', 'Symbol', 'Order', 'Shares']) + for index, row in orders.iterrows(): + for sym in orders.columns.values: + if row[sym] > 2: # buy order + order_list = order_list.append({'Date' : index, 'Symbol' : sym, + 'Order' : 'BUY', 'Shares' : 100}, ignore_index = True ) + elif row[sym] < -2: # sell order + order_list = order_list.append({'Date' : index, 'Symbol' : sym, + 'Order' : 'SELL', 'Shares' : 100}, ignore_index = True ) + order_list = order_list.set_index('Date') + + return order_list + +def plot_against_sym(df, sym=['SPY']): + df_temp = df.copy() + df_sym = get_data(sym, pd.to_datetime(df_temp.index.values), addSPY=False) + df_temp[sym[0]] = df_sym.values + df_temp = normalize_data(df_temp) + plot_data(df_temp) + return df_sym + + +def test_bollinger_sell(): + sd = dt.datetime(2010,1,1) + # ed = dt.datetime.today() + ed = dt.datetime(2012,12,31) + symbol = ['XOM'] + dates = dates = pd.date_range(sd, ed) + prices = get_data(symbol, dates, addSPY=False) + # prices = prices.dropna() + stk = security(prices) + sell = bollinger_sell(stk) + plot_data(sell) + buy = bollinger_buy(stk, 190) + plot_data(buy) + +def get_crossing(stocks): + """ + Parameters + ---------- + stocks : TYPE instance of class 'security' + + Returns + ------- + cross : TYPE pandas DataFrame + DESCRIPTION - +1 when 50 day moving average is above 200 day moving + average. -1 when vice versa. transition days are of value +3 and -3 + respectively. + """ + sma50 = stocks.sma(50) + sma200 = stocks.sma(200) + cross = np.sign(sma50.sub(sma200.values)) + cross_diff = cross.diff() + cross = cross.add(cross_diff.values) + cross.columns = stocks.price.columns + + return cross + +def get_sma_slope(stocks, wd = 50): + """ + Parameters + ---------- + stocks : TYPE + DESCRIPTION. + wd : TYPE, optional + DESCRIPTION. The default is 50. + + Returns + ------- + slope : TYPE pandas DataFrame + DESCRIPTION - +1 when n day moving average is positive. -1 when + negative. transition days are of value +3 and -3 respectively. + """ + sma = stocks.sma(wd) + slope = np.sign(sma.diff()) + slope_diff = slope.diff() + slope = slope.add(slope_diff.values) + + return slope + + +def modified_bollinger_strategy(stk): + + rsi = stk.rsi() + btemp = pd.DataFrame() + # buy orders + buy = bollinger_buy(stk, 190) + buy_orders = buy[np.any(buy>2, axis=1)] + for col in buy_orders.columns: + buy_stk = buy_orders[col] + buy_stk = buy_stk[buy_stk > 2] + buy_stk = buy_stk[rsi[col].loc[buy_stk.index] < 70] + btemp = btemp.join(buy_stk, how='outer') + + stemp = pd.DataFrame() + sell = bollinger_sell(stk, 190) + sell_orders = sell[np.any(sell<-2, axis=1)] + for col in sell_orders.columns: + sell_stk = sell_orders[col] + sell_stk = sell_stk[sell_stk < -2] + sell_stk = sell_stk[rsi[col].loc[sell_stk.index] > 30] + stemp = stemp.join(sell_stk, how='outer') + + orders = pd.concat([btemp, stemp]) + + # TODO - refine orders based on slope + # TODO - revine further based on other conditions (RSI, MACD) + # TODO - transaction shares determination + + orders = orders.sort_index() + + order_list = pd.DataFrame(columns = ['Date', 'Symbol', 'Order', 'Shares']) + for index, row in orders.iterrows(): + for sym in orders.columns.values: + if row[sym] > 2: # buy order + order_list = order_list.append({'Date' : index, 'Symbol' : sym, + 'Order' : 'BUY', 'Shares' : 100}, ignore_index = True ) + elif row[sym] < -2: # sell order + order_list = order_list.append({'Date' : index, 'Symbol' : sym, + 'Order' : 'SELL', 'Shares' : 100}, ignore_index = True ) + order_list = order_list.set_index('Date') + + return order_list + + +def test_get_orders(): + sd = dt.datetime(2000,2,1) + # ed = dt.datetime.today() + ed = dt.datetime(2012,9,12) + symbol = ['INTC', 'XOM', 'MSFT'] + dates = dates = pd.date_range(sd, ed) + prices = get_data(symbol, dates, addSPY=False) + stk = security(prices) + + # order_list = simple_bollinger_strategy(stk) + order_list = modified_bollinger_strategy(stk) + + # print(order_list) + port_val = compute_portvals(order_list,100000,9.95,0.005) + if isinstance(port_val, pd.DataFrame): + port_val = port_val[port_val.columns[0]].to_frame() # just get the first column + else: + print("warning, code did not return a DataFrame") + price_SPY = plot_against_sym(port_val) + + rfr=0 + sf=252 + + cr, adr, sddr, sr = compute_portfolio_stats(port_val, [1.0], rfr, sf) + crSP,adrSP,sddrSP,srSP = compute_portfolio_stats(price_SPY, [1.0], rfr, sf) + # Compare portfolio against $SPX + print("\nDate Range: {} to {}".format(sd.date(), ed.date())) + print() + print("Sharpe Ratio: {}, {}".format(sr, srSP)) + print() + print("Cumulative Return: {}, {}".format(cr, crSP)) + print() + print("Standard Deviation: {}, {}".format(sddr, sddrSP)) + print() + print("Average Daily Return: {}, {}".format(adr, adrSP)) + print() + print("Final Portfolio Value: {:.2f}".format(port_val['Portfolio'].iloc[-1])) + +def plot_basic(stk, axs): + data = stk.price.copy() + lower, upper = stk.bollinger(200) + data = data.join(lower, rsuffix = '_BOL200L') + data = data.join(upper, rsuffix = '_BOL200U') + data = data.join(stk.sma(200), rsuffix = '_SMA200') + data = data.join(stk.sma(50), rsuffix = '_SMA50') + data = data.join(stk.vwma(50), rsuffix = '_WVMA50') + plot_data(data, axs, ylabel='Price ($)') + +def plot_rsi(rsi, axs): + poscol = 'green' + negcol = 'red' + axs.plot(rsi.index.values, rsi.iloc[:,0]) + axs.axhline(70, color=negcol, ls='dotted') + axs.axhline(30, color=poscol, ls='dotted') + axs.fill_between(rsi.index.values, rsi.iloc[:,0], 70, + where=rsi.iloc[:,0]>=70, facecolor=negcol, edgecolor=negcol, alpha=0.5) + axs.fill_between(rsi.index.values, rsi.iloc[:,0], 30, + where=rsi.iloc[:,0]<=30, facecolor=poscol, edgecolor=poscol, alpha=0.5) + axs.set_yticks([30, 50, 70]) + axs.set_ylabel('RSI') + axs.grid() + +def plot_volume_rsi(rsi, axs): + poscol = 'green' + negcol = 'red' + axs.plot(rsi.index.values, rsi.iloc[:,0]) + axs.axhline(70, color=negcol, ls='dotted') + axs.axhline(30, color=poscol, ls='dotted') + axs.fill_between(rsi.index.values, rsi.iloc[:,0], 70, + where=rsi.iloc[:,0]>=70, facecolor=negcol, edgecolor=negcol, alpha=0.5) + axs.fill_between(rsi.index.values, rsi.iloc[:,0], 30, + where=rsi.iloc[:,0]<=30, facecolor=poscol, edgecolor=poscol, alpha=0.5) + axs.set_yticks([30, 50, 70]) + axs.set_ylabel('VoRSI') + axs.grid() + +def plot_macd(macd, macd_sig, macd_hist, axs): + poscol = 'green' + negcol = 'red' + axs.plot(macd.index.values, macd.iloc[:, 0], label='MACD') + axs.legend() + axs.plot(macd.index.values, macd_sig.iloc[:, 0]) + axs.plot(macd.index.values, macd_hist.iloc[:, 0]) + axs.fill_between(macd.index.values, macd_hist.iloc[:, 0], 0, + where=macd_hist.iloc[:, 0]>0, facecolor=poscol, edgecolor=poscol, alpha=0.5) + axs.fill_between(macd.index.values, macd_hist.iloc[:, 0], 0, + where=macd_hist.iloc[:, 0]<0, facecolor=negcol, edgecolor=negcol, alpha=0.5) + axs.set_ylabel('MACD') + axs.grid() + + +def intelligent_loop_plots(): + # Only plot ones that are standing out meaning: + # 1. outside of bollinger bands or recently crossed over (within 9 days) + # 2. RSI above 70 or below 30 + # 3. VoRSI above 70 or below 30 + # 4. when normalized MACD hist (by dividing slower moving average) is + # above 2% or below -2%. + # 5. near golden cross or death cross + # 6. price cross (near) 200 day moving average + # 7. MACD histogram zero crossing (bullish or bearish) + + # symbol = ['AMZN', 'SPY', 'GOOG', 'BAC', 'BA', 'XLE', 'CTL', 'ATVI', 'JD',\ + # 'COST', 'HD', 'UBER', 'XOM', 'UAL', 'LUV', 'T', 'WMT'] + + # matplotlib.rcParams.update({'figure.max_open_warning': 0}) + + lb_year = 3 # years of stock data to retrieve + plt_year = 2 # number of years data to plot + lb_trigger = 5 # days to lookback for triggering events + ed = dt.datetime.today() + sd = ed - dt.timedelta(days = 365 * lb_year) + plot_sd = ed - dt.timedelta(days = 365 * plt_year) + plot_ed = ed + + watchlist = get_watchlist() + symbols = watchlist.index.values.tolist() + + prices, volumes = get_price_volume(symbols, pd.date_range(sd, ed), addSPY=True) + + # plt.ion() + num_of_plots = 2 + all_plot_sym = [] + all_plot_ind = {} + all_data = pd.DataFrame([]) + all_vol = pd.DataFrame([]) + all_macd = pd.DataFrame([]) + all_rsi = pd.DataFrame([]) + + for sym in symbols: + price = prices[sym].to_frame() + vol = volumes[sym].to_frame() + all_vol = all_vol.join(vol, how='outer') + stk = security(price, vol) + + rsi = stk.rsi() + vorsi = stk.volume_rsi() + macd, macd_sig, macd_hist, norm_hist = stk.macd() + sma50 = stk.sma(50) + vwma50 = stk.vwma(50) + sma200 = stk.sma(200) + bol_low, bol_up = stk.bollinger(200) + + # init + plot_indicator = "[" + print('{:5}: '.format(sym), end = '') + + # RSI outside window (over bought / over sold) + rsi_tail = rsi.tail(lb_trigger) + if (rsi_tail[sym] >= 70).any() or (rsi_tail[sym] <= 30).any(): + print('--RSI', end = '') + plot_indicator += 'RSI, ' + + # VoRSI outside window (over bought / over sold) + vorsi_tail = vorsi.tail(lb_trigger) + if (vorsi_tail[sym] >= 70).any() or (vorsi_tail[sym] <= 30).any(): + print('--VoRSI', end = '') + plot_indicator += 'VoRSI, ' + + # Normalized MACD histogram out of 3% range + norm_hist_tail = abs(norm_hist.tail(lb_trigger)) + if (abs(norm_hist_tail[sym]) >= 0.02).any(): + print('--MACD/R', end = '') # outside normal range + plot_indicator += 'MACD/R, ' + + # MACD histogram zero crossing + macd_hist_tail = macd_hist.tail(lb_trigger) + macd_hist_sign = np.sign(macd_hist_tail) + macd_hist_diff = macd_hist_sign.diff() + if (abs(macd_hist_diff[sym]) > 1).any(): + print('--MACD', end = '') # zero crossing + plot_indicator += 'MACD, ' + + # Stock price crosses SMA50 + sma50_cross_tail = sma50.tail(lb_trigger) - price.tail(lb_trigger) + sma50_cross_sign = np.sign(sma50_cross_tail) + sma50_cross_diff = sma50_cross_sign.diff() + if (abs(sma50_cross_diff[sym]) > 1).any(): + print('--SMA50', end = '') + plot_indicator += 'SMA50, ' + + # Death cross or golden cross - SMA50 vs SMA200 + sma_cross_tail = sma50.tail(lb_trigger) - sma200.tail(lb_trigger).values + sma_cross_sign = np.sign(sma_cross_tail) + sma_cross_diff = sma_cross_sign.diff() + if (abs(sma_cross_diff[sym]) > 1).any(): + print('--Golden/Death', end = '') + plot_indicator += 'Golden/Death, ' + + # Price outside bollinger band or crossing + price_tail = price.tail(lb_trigger) + bol_low_tail = bol_low.tail(lb_trigger) + bol_up_tail = bol_up.tail(lb_trigger) + price_high = price_tail - bol_up_tail.values + price_low = price_tail - bol_low_tail.values + if (price_high[sym] >= 0).any() or (price_low[sym] <= 0).any(): + print('--Bollinger', end ='') + plot_indicator += 'Bollinger, ' + + # Price cross 200 day moving average + sma200_tail = sma200.tail(lb_trigger) + sma200_cross = price_tail - sma200_tail.values + sma200_cross_sign = np.sign(sma200_cross) + sma200_cross_diff = sma200_cross_sign.diff() + if (abs(sma200_cross_diff[sym]) > 1).any(): + print('--SMA200', end = '') + plot_indicator += 'SMA200, ' + + # Large trading volume trigger + volume_tail = vol.tail(lb_trigger) + vol_mean = vol.tail(50).mean() + vol_std = vol.tail(50).std() + if ((volume_tail[sym] - vol_mean[sym] - 2*vol_std[sym]) > 0).any(): + print('--HiVol', end = '') + plot_indicator += "HiVol, " + + print(f"-- {watchlist.loc[sym, 'Notes']}") # carriage return + plot_indicator += ']' + note_field = watchlist.loc[sym, 'Notes'].strip().lower() + if note_field != "watch" and ( note_field == "skip" or \ + plot_indicator =="[]" ): + continue # skipping plotting to save memory and time + + # Plotting + all_plot_sym.append(sym) + all_plot_ind[sym] = sym + ' - ' + watchlist.loc[sym, 'Segment'] + ' - ' +\ + watchlist.loc[sym, 'Sub Segment'] +\ + ' - ' + watchlist.loc[sym, 'Notes'] + ' - ' + plot_indicator + + # fig, (axs0, axs1, axs2, axs3) = plt.subplots(4, sharex=True, + # gridspec_kw={'hspace': 0, 'height_ratios': [3, 1, 1, 1]}, + # figsize=(16, 12)) + # # fig.suptitle('{} - {} - {} - {}'.format(sym,\ + # # watchlist.loc[sym, 'Segment'], watchlist.loc[sym, 'Sub Segment'])) + # axs0.set_title('{} - {} - {} - {} - {}'.format(sym,\ + # watchlist.loc[sym, 'Segment'], watchlist.loc[sym, 'Sub Segment'],\ + # watchlist.loc[sym, 'Notes'], plot_indicator)) + # axs0.set_xlim([plot_sd, plot_ed]) + + # plot basic price info + data = price.copy() + # to limit low bound when plotting in log scale + bol_low.loc[sma200[sym].divide(bol_low[sym]) >\ + bol_up[sym].divide(sma200[sym]).mul(3), sym] = np.nan + data = data.join(bol_low, rsuffix = '_BOL200L') + data = data.join(bol_up, rsuffix = '_BOL200U') + data = data.join(sma200, rsuffix = '_SMA200') + data = data.join(sma50, rsuffix = '_SMA50') + data = data.join(vwma50, rsuffix = '_WVMA50') + + all_data = all_data.join(data, how='outer') + + macd = macd.join(macd_sig, rsuffix = '_SIG') + macd = macd.join(macd_hist, rsuffix = '_HIST') + macd.rename(columns={sym: sym+'_MACD'}, inplace=True) + all_macd = all_macd.join(macd, how='outer') + + rsi = rsi.join(vorsi, rsuffix = '_VoRSI') + rsi.rename(columns={sym: sym+'_RSI'}, inplace=True) + all_rsi = all_rsi.join(rsi, how='outer') + + # # plot stk price in log scale + # axs0.set_yscale('log') + # plot_data(data, axs0, ylabel='Price ($)') + # axs0.plot(data.index[-1], data.iloc[-1][sym], marker=5, color='C0',\ + # markersize=10) + + # # plot volume + # axs0t = axs0.twinx() + # vol.plot(kind='area', ax=axs0t, color='red', alpha=0.1, legend=False) + # axs0t.plot(vol.index[-1], vol.iloc[-1][sym], marker=5, color='red',\ + # markersize=10) + + # # plot RSI + # plot_rsi(rsi, axs1) + # # plot Volume RSI + # plot_volume_rsi(vorsi, axs2) + # # plot MCAD + # plot_macd(macd, macd_sig, macd_hist, axs3) + + # for debug only + # num_of_plots -= 1 + # if num_of_plots < 0: + # break + + all_data = all_data[all_data.index >= plot_sd] + all_vol = all_vol[all_vol.index >= plot_sd] + all_macd = all_macd[all_macd.index >= plot_sd] + all_rsi = all_rsi[all_rsi.index >= plot_sd] + + # pack into one json obj + j_dict = {"all_plot_sym": all_plot_sym, "all_plot_ind": all_plot_ind, + "all_data": all_data.to_json(orient='columns', index=True), + "all_vol": all_vol.to_json(orient='columns', index=True), + "all_macd": all_macd.to_json(orient='columns', index=True), + "all_rsi": all_rsi.to_json(orient='columns', index=True)} + + j_obj = json.dumps(j_dict) + + # return all_plot_sym, all_plot_ind, all_data, all_vol, all_macd, all_rsi + return j_obj + +if __name__ == "__main__": + # test_sec_class() + # test_smooth() + # test_bollinger_sell() + # test_get_orders() + # all_plot_sym, all_plot_ind, all_data, all_vol, all_macd, all_rsi = intelligent_loop_plots() + + # j_obj = intelligent_loop_plots() + # # unpacking + # json_data = json.loads(j_obj) + # all_plot_sym = json_data["all_plot_sym"] + # all_plot_ind = json_data["all_plot_ind"] + # all_data = pd.read_json(io.StringIO(json_data['all_data'])) + # all_vol = pd.read_json(io.StringIO(json_data['all_vol'])) + # all_macd = pd.read_json(io.StringIO(json_data['all_macd'])) + # all_rsi = pd.read_json(io.StringIO(json_data['all_rsi'])) + + # Initialize the app + app = Dash() + + CACHE_CONFIG = {'CACHE_TYPE': 'SimpleCache'} + cache = Cache() + cache.init_app(app.server, config=CACHE_CONFIG) + + # App layout + app.layout = [ + html.Button('Refresh Data', id='button', n_clicks=0), + # html.Div(children='Pick A Symbol from Dropdown List: '), + # html.Hr(), + # dcc.RadioItems(options=['pop', 'lifeExp', 'gdpPercap'], value='lifeExp', id='controls-and-radio-item'), + # dcc.Dropdown(['pop', 'lifeExp', 'gdpPercap'], 'lifeExp', id='controls-and-radio-item'), + dcc.Dropdown(id='controls-and-radio-item'), + # dash_table.DataTable(data=df.to_dict('records'), page_size=6), + dcc.Graph( + figure={}, + id='controls-and-graph', + style={'height':'85vh'} + ), + dcc.Store(id="signal"), + ] + + @cache.memoize(timeout=14400) # cache timeout set to 4 hours + def global_store(): + j_obj = intelligent_loop_plots() + return j_obj + + # button callback + @app.callback( + Output("button", "disabled", allow_duplicate=True), + Input("button", "n_clicks"), + prevent_initial_call=True, + ) + def disable_btn(n): + if n: + return True + return no_update + + @callback( + Output(component_id='signal', component_property='data'), + # Output(component_id='button', component_property='disabled'), + Input(component_id='button', component_property='n_clicks'), + prevent_initial_call=True, + ) + def get_data(clicks): + # global all_plot_sym, all_plot_ind, all_data, all_vol, all_macd, all_rsi + # if clicks == 0: + # return # no update + if not clicks or clicks == 0: + raise PreventUpdate + + print("get data") + global_store() + print("data retrieved") + # unpacking + # json_data = json.loads(j_obj) + # all_plot_sym = json_data["all_plot_sym"] + # all_plot_ind = json_data["all_plot_ind"] + # all_data = pd.read_json(io.StringIO(json_data['all_data'])) + # all_vol = pd.read_json(io.StringIO(json_data['all_vol'])) + # all_macd = pd.read_json(io.StringIO(json_data['all_macd'])) + # all_rsi = pd.read_json(io.StringIO(json_data['all_rsi'])) + return clicks + + # Add controls to build the interaction + + @callback( + Output(component_id='controls-and-radio-item', component_property='options'), + Output(component_id='button', component_property='disabled'), + Input(component_id='signal', component_property='data'), + ) + def update_dropdown(in_data): + if not in_data or in_data == 0: + raise PreventUpdate + # return no_update + print(f"input: {in_data}") + j_obj = global_store() + # unpacking + json_data = json.loads(j_obj) + all_plot_sym = json_data["all_plot_sym"] + print("dropdown menu options updated") + # all_plot_sym = ["SPY", "AMZN"] + + return sorted(all_plot_sym), False + + @callback( + Output(component_id='controls-and-graph', component_property='figure'), + Input(component_id='controls-and-radio-item', component_property='value'), + # Input(component_id='signal', component_property='data'), + ) + + def update_graph(col_chosen): + if not col_chosen: + raise PreventUpdate + # return no_update + # col_chosen = "SPY" + j_obj = global_store() + # unpacking + json_data = json.loads(j_obj) + # all_plot_sym = json_data["all_plot_sym"] + all_plot_ind = json_data["all_plot_ind"] + all_data = pd.read_json(io.StringIO(json_data['all_data'])) + all_vol = pd.read_json(io.StringIO(json_data['all_vol'])) + all_macd = pd.read_json(io.StringIO(json_data['all_macd'])) + all_rsi = pd.read_json(io.StringIO(json_data['all_rsi'])) + + fig = make_subplots( + rows=3, + cols=1, + shared_xaxes=True, + row_heights=[0.6, 0.2, 0.2], + vertical_spacing=0.02, + specs=[[{"secondary_y": True}],[{"secondary_y": False}],[{"secondary_y": False}]], + ) + + price_line = px.line( + all_data, + x=all_data.index, + y=[col_chosen, col_chosen+'_BOL200L', col_chosen+'_BOL200U', col_chosen+'_SMA200', col_chosen+'_SMA50', col_chosen+'_WVMA50'], + # y=[sym, sym+'_BOL200L', sym+'_BOL200U', sym+'_SMA200', sym+'_SMA50', sym+'_WVMA50'], + ) + + volume_line = px.bar( + all_vol, + x=all_vol.index, + # y=sym, + y=col_chosen, + ) + + macd_line = px.line( + all_macd, + x=all_macd.index, + y=[col_chosen+'_MACD', col_chosen+'_SIG'], + ) + + macd_neg = all_macd.copy() + macd_pos = all_macd.copy() + macd_neg[macd_neg>0] = 0 + macd_pos[macd_pos<0] = 0 + + macd_hist_pos = px.line( + macd_pos, + x=macd_pos.index, + y=[col_chosen+'_HIST'], + ) + macd_hist_pos.update_traces(fill='tozeroy', line_color='rgba(0,100,0,0.5)', showlegend=False) + + macd_hist_neg = px.line( + macd_neg, + x=macd_neg.index, + y=[col_chosen+'_HIST'], + ) + macd_hist_neg.update_traces(fill='tozeroy', line_color='rgba(100,0,0,0.5)', showlegend=False) + + rsi_line = px.line( + all_rsi, + x=all_rsi.index, + y=[col_chosen+'_RSI', col_chosen+'_VoRSI'], + ) + fig.add_traces(price_line.data + volume_line.data, rows=1, cols=1, secondary_ys=[False, False, False, False, False, False, True]) + fig.add_traces(macd_line.data + macd_hist_pos.data + macd_hist_neg.data, rows=2, cols=1) + # fig.add_traces(macd_line.data, rows=2, cols=1) + fig.add_traces(rsi_line.data, rows=3, cols=1) + + # fig.update_traces(marker_color = 'rgba(0,0,250,0.5)', + # marker_line_width = 0, + # selector=dict(type="bar"), + # ) + # fig.update_layout(bargap=0, bargroupgap=0) + + # fig.layout.xaxis.title="Time" + fig.layout.yaxis.title="Price" + fig.layout.yaxis.type="log" + fig.layout.yaxis2.title="Volume" + fig.layout.yaxis3.title="MACD" + fig.layout.yaxis4.title="RSI/VoRSI" + + fig.update_layout(title_text=all_plot_ind[col_chosen]) + # fig.update_layout(showlegend=False) + fig.update_layout(margin=dict(l=30, r=20, t=50, b=20)) + + return fig + + serve(app.server, host="0.0.0.0", port=8050, threads=7) + # app.run(debug=True) diff --git a/marketsim.py b/marketsim.py new file mode 100644 index 0000000..91c0b38 --- /dev/null +++ b/marketsim.py @@ -0,0 +1,163 @@ +# -*- coding: utf-8 -*- +""" +Created on Wed Feb 5 21:56:42 2020 + +@author: cpan +""" + +"""MC2-P1: Market simulator.""" + +import pandas as pd +import numpy as np +# import datetime as dt +# import os +from util import get_data, plot_data + +def normalize_data(df): + return df/df.iloc[0,:] + +def fill_missing_values(df_data): + '''First fill forward and then fill backward''' + df_data.fillna(method="ffill", inplace=True) + df_data.fillna(method="bfill", inplace=True) + +def get_orders(orders_file): + if isinstance(orders_file, pd.DataFrame): + # orders_df = orders_file.set_index('Date') + orders_df = orders_file + else: + orders_df = pd.read_csv(orders_file, index_col = 'Date', parse_dates = True, + na_values = ['nan']) + orders_df = orders_df.dropna() + orders_df = orders_df.sort_index() + return orders_df + +def compute_daily_returns(df): + daily_returns = df.copy() + daily_returns[1:] = (df[1:] / df[:-1].values) - 1 + daily_returns.iloc[0, :] = 0 + return daily_returns + +def compute_portfolio_stats(price, allocs=[0.1,0.2,0,3,0.4], rfr=0.0, sf=252.0): + norm_price = normalize_data(price) + norm_positions_val = norm_price * allocs + if len(norm_positions_val.columns) == 1: + norm_portfolio_val = norm_positions_val + else: + norm_portfolio_val = norm_positions_val.sum(axis=1).to_frame('PORTFOLIO') + cr = norm_portfolio_val.iloc[-1] / norm_portfolio_val.iloc[0] -1 + daily_returns = compute_daily_returns(norm_portfolio_val) + daily_returns = daily_returns[1:] # remove first row (all zeros) + adr = daily_returns.mean() + sddr = daily_returns.std() + sr = np.sqrt(sf) * (adr - rfr)/sddr + return cr, adr, sddr, sr + +def plot_against_SPY(df): + df_temp = df.copy() + if 'SPY' not in df_temp.columns: + df_SPY = get_data(['SPY'], pd.to_datetime(df_temp.index.values)) + df_temp['SPY'] = df_SPY.values + else: + df_SPY = df_temp['SPY'] + df_temp = normalize_data(df_temp) + plot_data(df_temp) + return df_SPY + +def compute_portvals(orders_file = "./orders/orders.csv", start_val = 1000000, + commission=9.95, impact=0.005): + #read in order data + orders_df = get_orders(orders_file) + #scan symbols + symbols = list(set(orders_df['Symbol'].values)) + #get date range + dates = pd.date_range(orders_df.index.values[0], orders_df.index.values[-1]) + #read in prices + prices = get_data(symbols, dates) + # fill_missing_values(prices) # included in get_data() function + prices = prices[symbols] + #add an extra column 'Cash' and initialize it to all ones + prices['Cash'] = np.ones(prices.shape[0]) + + #duplicate price df into a units df, intialize it to all zeros + positions=prices*0.0 + #initialize cash position with starting value + positions.loc[positions.index.values[0],['Cash']]=start_val + + #adjust positions to show how stock units and cash are changing over time with orders + for index, row in orders_df.iterrows(): + stock_sym = row['Symbol'] + order_price = prices.loc[index, stock_sym] + order_shrs = row['Shares'] + + if row['Order'].upper() == 'BUY': + if positions.loc[index, 'Cash'] < order_shrs*order_price +\ + commission + order_shrs*order_price*impact: + # print('Not enough cash to excute the order:\n', row) + pass + else: + #update positions on transaction days + positions.loc[index, stock_sym] += order_shrs + positions.loc[index, "Cash"] -= order_shrs*order_price + #deduct commission + positions.loc[index,"Cash"] -= commission + #impact = no. of orders in transaction * price of each share * impact. + positions.loc[index,"Cash"] -= order_shrs*order_price*impact + elif row['Order'].upper() == 'SELL': + if positions.loc[index, stock_sym] < order_shrs: + # print('Not enough shares to sell to fill the order:\n', row) + pass + else: + positions.loc[index, stock_sym] -= order_shrs + positions.loc[index, "Cash"] += order_shrs*order_price + #deduct commission + positions.loc[index,"Cash"] -= commission + #impact = no. of orders in transaction * price of each share * impact. + positions.loc[index,"Cash"] -= order_shrs*order_price*impact + + # propagate positions beyond transaction days + start_row = positions.index.get_loc(index) + 1 + positions.iloc[start_row:, :] = positions.iloc[start_row-1].values + + #calculate port_vals + port_vals=prices*positions + port_vals.insert(0, 'Portfolio', port_vals.sum(axis=1)) + + return port_vals + +def test_code(): + of = "./orders/orders-05.csv" + sv = 1000000 + + # Process orders + portvals = compute_portvals(orders_file = of, start_val = sv) + if isinstance(portvals, pd.DataFrame): + portvals = portvals[portvals.columns[0]].to_frame() # just get the first column + else: + print("warning, code did not return a DataFrame") + + # Get portfolio stats + start_date = pd.to_datetime(portvals.index.values[0]) + end_date = pd.to_datetime(portvals.index.values[-1]) + price_SPY = plot_against_SPY(portvals) + #portfolio stats calculated similar to assess_portfolio + rfr=0 + sf=252 + + cr, adr, sddr, sr = compute_portfolio_stats(portvals, [1.0], rfr, sf) + crSP,adrSP,sddrSP,srSP = compute_portfolio_stats(price_SPY, [1.0], rfr, sf) + # Compare portfolio against $SPX + print("\nDate Range: {} to {}".format(start_date.date(), end_date.date())) + print() + print("Sharpe Ratio: {}, {}".format(sr, srSP)) + print() + print("Cumulative Return: {}, {}".format(cr, crSP)) + print() + print("Standard Deviation: {}, {}".format(sddr, sddrSP)) + print() + print("Average Daily Return: {}, {}".format(adr, adrSP)) + print() + print("Final Portfolio Value: {:.2f}".format(portvals['Portfolio'].iloc[-1])) + +if __name__ == "__main__": + test_code() \ No newline at end of file diff --git a/util.py b/util.py new file mode 100644 index 0000000..1441aac --- /dev/null +++ b/util.py @@ -0,0 +1,371 @@ +""" +Use Yahoo Finance data +""" + +import warnings + +# Suppress FutureWarnings +warnings.simplefilter(action='ignore', category=FutureWarning) + +import datetime as dt +import os +import pandas as pd +import numpy as np +import yfinance as yf +import requests +from lxml import html +from io import StringIO +from time import sleep + +WEBSITE = 'https://www.isolo.org/dokuwiki/knowledge_base/investing/watchlist' +BATCHSIZE = 20 +TIMEGAP = 0.2 + +def fill_missing_data(df): + temp = df.ffill() + temp = temp.bfill() + return temp + +def symbol_to_path(symbol, base_dir=None): + """Return CSV file path given ticker symbol.""" + if base_dir is None: + base_dir = os.environ.get("MARKET_DATA_DIR", '../data/') + return os.path.join(base_dir, "{}.csv".format(str(symbol))) + +def get_data_full(symbols, start_date, addSPY=True, colname = 'Adj Close'): + """ + Read stock data (adjusted close) for given symbols from Yahoo Finance + from start_date to the latest date available (usually the current date). + """ + if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent + symbols = ['SPY'] + symbols + + df = yf.download(symbols, start = start_date)[colname] + if len(symbols) == 1: + df.name = symbols[0] + df = df.to_frame() + return df + +# def get_data_full(symbols, start_date, addSPY=True, colname = 'Adj Close'): + """ + Read stock data (adjusted close) for given symbols from CSV files + from start_date to the latest date available in the CSV files. + """ +# df_temp = pd.read_csv(symbol_to_path('SPY'), index_col='Date', +# parse_dates=True, usecols=['Date', colname], na_values=['nan']) +# df_temp = df_temp.rename(columns={colname: 'SPY'}) +# end_date = df_temp.index.values[-1] +# dates = pd.date_range(start_date, end_date) +# df = pd.DataFrame(index=dates) +# df = df.join(df_temp) +# df = df.dropna() +# # if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent +# # symbols = ['SPY'] + symbols +# for symbol in symbols: +# df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date', +# parse_dates=True, usecols=['Date', colname], na_values=['nan']) +# df_temp = df_temp.rename(columns={colname: symbol}) +# df = df.join(df_temp) +# # if symbol == 'SPY': # drop dates SPY did not trade +# # df = df.dropna(subset=["SPY"]) +# if not addSPY: +# df = df[symbols] +# return df + +def get_data_range(df, dates): + """ + Extract sections of the data in the dates range from the full data set + """ + df_range = pd.DataFrame(index=dates) + df_range = df_range.join(df, how='inner') + return df_range + +def get_data(symbols, dates, addSPY=True, colname = 'Adj Close'): + """ + Read stock data (adjusted close) for given symbols from Yahoo Finance + """ + org_sym = symbols + sd = dates[0] + ed = dates[-1] + # if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent + if 'SPY' not in symbols: + symbols = ['SPY'] + symbols + df = yf.download(symbols, start=sd, end = ed)[colname] + if len(symbols) == 1: + df.name = symbols[0] + df = df.to_frame() + + df = df.dropna(subset=['SPY']) + df = fill_missing_data(df) + + if addSPY==False: + # df = df.drop(columns=['SPY']) + df = df[org_sym] + + return df + +def yf_batch_download(symbols, start, end, batch_size, time_gap): + """ + download in small batches to avoid connection closure by host + + Parameters + ---------- + symbols : list + stock symbols. + start : datetime + start date. + end : datetime + stop date. + batch_size : integer + batch size. + time_gap : float + in seconds or fraction of seconds. + + Returns + ------- + df : dataframe + stock price volume information. + + """ + n = len(symbols) + batches = n // batch_size + df = pd.DataFrame() + for i in range(batches - 1): + tmp = yf.download(symbols[i*batch_size:(i+1)*batch_size], start, end) + df = pd.concat([df, tmp], axis=1) + sleep(time_gap) + tmp = yf.download(symbols[(batches-1)*batch_size:n], start, end) + df = pd.concat([df, tmp], axis=1) + + return df + +def get_price_volume(symbols, dates, addSPY=False): + """ + Read stock data (adjusted close and volume) for given symbols from local + file unless data is not in local. It only gets date from Yahoo Finance + when necessary to increase speed and reduce internet data. + + It will refresh local data if the symbols are on the _refresh.csv. This + is necessary when stock splits, spins off or something else happens. + """ + # DATAFILE = "_stkdata.pickle" + # REFRESH = "_refresh.csv" + org_sym = symbols + sd = dates[0] + ed = dates[-1] + # if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent + if 'SPY' not in symbols: + symbols = ['SPY'] + symbols + + df = yf_batch_download(symbols, start=sd, end=ed, \ + batch_size=BATCHSIZE, time_gap=TIMEGAP) + if len(symbols) == 1: + tuples = list(zip(df.columns.values.tolist(), \ + [symbols[0]]*len(df.columns.values))) + df.columns = pd.MultiIndex.from_tuples(tuples, names=[None, None]) + + # if not os.path.exists(DATAFILE): + # df = yf_batch_download(symbols, start=sd, end=ed, \ + # batch_size=BATCHSIZE, time_gap=TIMEGAP) + # if len(symbols) == 1: + # tuples = list(zip(df.columns.values.tolist(), \ + # [symbols[0]]*len(df.columns.values))) + # df.columns = pd.MultiIndex.from_tuples(tuples, names=[None, None]) + # else: + # df = pd.read_pickle(DATAFILE) + # exist_syms = df["Adj Close"].columns.values.tolist() + # if os.path.exists(REFRESH): + # try: + # refresh_df = pd.read_csv(REFRESH, header=None) + # refresh_syms = refresh_df.values.tolist() + # refresh_syms = [x for sublist in refresh_syms for x in sublist] + # remove_syms = [x for x in exist_syms if x in refresh_syms] + # if remove_syms: + # df.drop(columns=remove_syms, axis=1, level=1, inplace=True) + # exist_syms = [x for x in exist_syms if x not in refresh_syms] + # except: + # pass + + exist_syms = [] + + last_day = pd.to_datetime(df.index.values[-1]) + first_day = pd.to_datetime(df.index.values[0]) + intersect_syms = list(set(org_sym) & set(exist_syms)) + # reduce df to only contain intersect_syms + df = df.loc[:, (slice(None), intersect_syms)] + + if sd < first_day: + # fill gap from online + tmp_df = yf_batch_download(intersect_syms, start=sd, end=first_day, \ + batch_size=BATCHSIZE, time_gap=TIMEGAP) + df = pd.concat([tmp_df, df]) + + if ed >= last_day: + # fill gap from online incl last two days to get mkt close data + if ed.date() == last_day.date(): + tmp_df = yf_batch_download(intersect_syms, start=ed, end=ed, \ + batch_size=BATCHSIZE, time_gap=TIMEGAP) + else: + tmp_df = yf_batch_download(intersect_syms, start=last_day, end=ed, \ + batch_size=BATCHSIZE, time_gap=TIMEGAP) + df = pd.concat([df[:-1], tmp_df]) + + # get data online when new stks were added + new_stks = np.setdiff1d(symbols, exist_syms).tolist() + if not new_stks == []: + tmp_df = yf_batch_download(new_stks, start=sd, end=ed, \ + batch_size=BATCHSIZE, time_gap=TIMEGAP) + if len(new_stks) == 1: + tuples = list(zip(tmp_df.columns.values.tolist(), \ + [new_stks[0]]*len(tmp_df.columns.values))) + tmp_df.columns = pd.MultiIndex.from_tuples(tuples, names=[None, None]) + df = df.join(tmp_df) + + # df.to_pickle(DATAFILE) # save to local, overwrite existing file + # if os.path.exists(REFRESH): + # with open(REFRESH, 'w'): + # pass + + df = df.dropna(subset=[('Adj Close', 'SPY')]) + price = df['Adj Close'] + price = fill_missing_data(price) + volume = df['Volume'] + volume = volume.fillna(0) + + # if len(symbols) == 1: + # price.name = symbols[0] + # volume.name = symbols[0] + # price = price.to_frame() + # volume = volume.to_frame() + + if addSPY==False: + price = price[org_sym] + volume = volume[org_sym] + + return price, volume + + +def get_price_volume_online(symbols, dates, addSPY=False): + """ + Read stock data (adjusted close and volume) for given symbols from Yahoo + Finance + """ + org_sym = symbols + sd = dates[0] + ed = dates[-1] + # if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent + if 'SPY' not in symbols: + symbols = ['SPY'] + symbols + df = yf.download(symbols, start=sd, end = ed) + if len(symbols) == 1: + df = df.dropna(subset = ['Adj Close']) + else: + df = df.dropna(subset=[('Adj Close', 'SPY')]) + price = df['Adj Close'] + price = fill_missing_data(price) + volume = df['Volume'] + volume = volume.fillna(0) + + if len(symbols) == 1: + price.name = symbols[0] + volume.name = symbols[0] + price = price.to_frame() + volume = volume.to_frame() + + if addSPY==False: + price = price[org_sym] + volume = volume[org_sym] + + return price, volume + +def get_watchlist(website: str = WEBSITE): + page = requests.get(WEBSITE) + # page = requests.get(WEBSITE, verify=False) # skip certificate check for https + tree = html.fromstring(page.content) + watchlist = tree.xpath('//*[@id="dokuwiki__content"]/div[1]/div/div[3]/div/pre/text()')[0] + file_name = StringIO(watchlist) + df = pd.read_csv(file_name, index_col = 'Symbol', + comment = '#', na_filter=False) + return df + +# def get_watchlist(file_name: str = 'watchlist.csv'): +# df = pd.read_csv(file_name, index_col = 'Symbol', +# comment = '#', na_filter=False) +# return df + +# def get_data(symbols, dates, addSPY=True, colname = 'Adj Close'): +# """ +# Read stock data (adjusted close) for given symbols from CSV files. + +# (done) TODO: there are nan values in the data when addSPY=False is passed. The +# strategy should be using SPY to clean the data first including fill +# forward and fill backward, then to drop the SPY if addSPY=False +# """ +# org_sym = symbols +# df = pd.DataFrame(index=dates) +# # if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent +# # symbols = ['SPY'] + symbols +# if 'SPY' not in symbols: +# symbols = ['SPY'] + symbols +# for symbol in symbols: +# df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date', +# parse_dates=True, usecols=['Date', colname], na_values=['nan']) +# df_temp = df_temp.rename(columns={colname: symbol}) +# df = df.join(df_temp) +# if symbol == 'SPY': # drop dates SPY did not trade +# df = df.dropna(subset=["SPY"]) +# # fill missing data +# df = fill_missing_data(df) +# if addSPY == False: # drop SPY +# # df = df.drop(columns=['SPY']) +# df = df[org_sym] + +# return df + + +def plot_data(df, axs=[], title=[], xlabel='', ylabel=''): + + """Plot stock prices with a custom title and meaningful axis labels.""" + if axs == []: + ax = df.plot(title = title) + else: + ax = df.plot(ax=axs, title=title) + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + ax.grid() + + +# def plot_data(df, title=[], xlabel='', ylabel=''): +# import matplotlib.pyplot as plt +# """Plot stock prices with a custom title and meaningful axis labels.""" +# ax = df.plot(title=title, fontsize=12, figsize=(10, 7)) +# ax.set_xlabel(xlabel) +# ax.set_ylabel(ylabel) +# plt.grid() +# plt.show() + +def get_orders_data_file(basefilename): + return open(os.path.join(os.environ.get("ORDERS_DATA_DIR",'orders/'),basefilename)) + +def get_learner_data_file(basefilename): + return open(os.path.join(os.environ.get("LEARNER_DATA_DIR",'Data/'),basefilename),'r') + +def get_robot_world_file(basefilename): + return open(os.path.join(os.environ.get("ROBOT_WORLDS_DIR",'testworlds/'),basefilename)) + + +def test_code(): + + symbol = ['GOOG', 'AMZN'] + # lookback years + lb_year = 0.08 + ed = dt.datetime.today() + sd = ed - dt.timedelta(days = 365 * lb_year + 1) + # If ed or sd falls on to a non-trading day, you might get warnings saying + # "No data found for this date range, symbol may be delisted". This is + # normal behavior. + prices, volume = get_price_volume(symbol, pd.date_range(sd, ed), addSPY=False) + + +if __name__ == '__main__': + test_code() \ No newline at end of file