Use database - paving the road for dynamically adding/deleting watchlist items

2024-10-04 02:38:21 -07:00 · 2024-10-04 02:38:21 -07:00 · 4b49def86d
commit 4b49def86d
parent 216e40ddc8
3 changed files with 44 additions and 388 deletions
--- a/indicators.py
+++ b/indicators.py
@ -21,7 +21,7 @@ Could also come up with a value that ties to the trading volume.
 import pandas as pd
 import numpy as np
 import datetime as dt
-from util import get_watchlist
+# from util import get_watchlist
 from numpy.fft import fft, ifft
 import scipy.signal as sig
 import plotly.express as px
@ -33,8 +33,44 @@ from dash.exceptions import PreventUpdate
 import dash_auth
 import yahoo_fin.stock_info as si
 import hashlib
 from dotenv import load_dotenv
 import psycopg2
 import os
 import sys
 pd.options.mode.chained_assignment = None  # default='warn'
 load_dotenv()
 def connect_db():
    conn = None
    try:
        conn = psycopg2.connect(
                    host=os.environ['DB_PATH'],
                    database=os.environ['DB_NAME'],
                    user=os.environ['DB_USERNAME'],
                    password=os.environ['DB_PASSWORD'],
                )
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        sys.exit(1)
    return conn
 def sql_to_dataframe(conn, query):
    cursor = conn.cursor()
    try:
        cursor.execute(query)
    except (Exception, psycopg2.DatabaseError) as error:
        print(f"Error: {error}")
        cursor.close()
        return 1
    tuples_list = cursor.fetchall()
    cursor.close()
    df = pd.DataFrame(tuples_list)
    return df
 def get_watchlist():
    QUERY = '''select * from stock_watch_list'''
    return sql_to_dataframe(connect_db(), QUERY)
 def hash_password(password):
    # Encode the password as bytes
@ -485,7 +521,7 @@ def intelligent_loop_plots(sym, stk_data):
    volume_tail = vol.tail(LB_TRIGGER)
    vol_mean = vol.tail(50).mean()
    vol_std = vol.tail(50).std()
-    if ((volume_tail[1] - vol_mean - 2*vol_std) > 0).any():
+    if ((volume_tail - vol_mean - 2*vol_std) > 0).any():
        # print('--HiVol', end = '')
        plot_indicator += "HiVol, "
@ -536,7 +572,8 @@ dash_auth.BasicAuth(
 watchlist = get_watchlist()
 # symbols = watchlist.index.values.tolist()
-symbols = (watchlist.index.values + " - " + watchlist["Sub Segment"]).tolist()
+# symbols = (watchlist.index.values + " - " + watchlist["Sub Segment"]).tolist()
 symbols = (watchlist.iloc[:, 0] + " - " + watchlist.iloc[:, 1]).tolist()
 CACHE_CONFIG = {'CACHE_TYPE': 'SimpleCache'}
 cache = Cache()
@ -614,7 +651,8 @@ def start_cycle(n, value):
 def reload_syms(n):
    if n:
        watchlist = get_watchlist()
-        symbols = (watchlist.index.values + " - " + watchlist["Sub Segment"]).tolist()
+        # symbols = (watchlist.index.values + " - " + watchlist["Sub Segment"]).tolist()
        symbols = (watchlist.iloc[:, 0] + " - " + watchlist.iloc[:, 1]).tolist()
        return symbols, 0
    return no_update
--- a/requirements.txt
+++ b/requirements.txt
@ -30,10 +30,12 @@ packaging==24.1
 pandas==2.2.3
 parse==1.20.2
 plotly==5.24.1
 psycopg2==2.9.9
 pyee==11.1.1
 pyppeteer==2.0.0
 pyquery==2.0.1
 python-dateutil==2.9.0.post0
 python-dotenv==1.0.1
 pytz==2024.2
 requests==2.32.3
 requests-html==0.10.0
--- a/util.py
+++ b/util.py
@ -1,384 +0,0 @@
 """
 Use Yahoo Finance data
 """
 import warnings
 # Suppress FutureWarnings
 warnings.simplefilter(action='ignore', category=FutureWarning)
 import datetime as dt
 import os
 import pandas as pd
 import numpy as np
 # import yfinance as yf
 import yahoo_fin.stock_info as si
 import requests
 from lxml import html
 from io import StringIO
 from time import sleep
 WEBSITE = 'https://www.isolo.org/dokuwiki/knowledge_base/investing/watchlist'
 BATCHSIZE = 20
 TIMEGAP = 0.2
 def fill_missing_data(df):
    temp = df.ffill()
    temp = temp.bfill()
    return temp
 def symbol_to_path(symbol, base_dir=None):
    """Return CSV file path given ticker symbol."""
    if base_dir is None:
        base_dir = os.environ.get("MARKET_DATA_DIR", '../data/')
    return os.path.join(base_dir, "{}.csv".format(str(symbol)))
 # def get_data_full(symbols, start_date, addSPY=True, colname = 'Adj Close'):
 #     """
 #     Read stock data (adjusted close) for given symbols from Yahoo Finance
 #     from start_date to the latest date available (usually the current date).
 #     """
 #     if addSPY and 'SPY' not in symbols:  # add SPY for reference, if absent
 #         symbols = ['SPY'] + symbols
 #     df = yf.download(symbols, start = start_date)[colname]
 #     if len(symbols) == 1:
 #         df.name = symbols[0]
 #         df = df.to_frame()
 #     return df
 # def get_data_full(symbols, start_date, addSPY=True, colname = 'Adj Close'):
    """
    Read stock data (adjusted close) for given symbols from CSV files
    from start_date to the latest date available in the CSV files.
    """
 #     df_temp = pd.read_csv(symbol_to_path('SPY'), index_col='Date',
 #             parse_dates=True, usecols=['Date', colname], na_values=['nan'])
 #     df_temp = df_temp.rename(columns={colname: 'SPY'})
 #     end_date = df_temp.index.values[-1]
 #     dates = pd.date_range(start_date, end_date)
 #     df = pd.DataFrame(index=dates)
 #     df = df.join(df_temp)
 #     df = df.dropna()
 #     # if addSPY and 'SPY' not in symbols:  # add SPY for reference, if absent
 #     #     symbols = ['SPY'] + symbols
 #     for symbol in symbols:
 #         df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date',
 #                 parse_dates=True, usecols=['Date', colname], na_values=['nan'])
 #         df_temp = df_temp.rename(columns={colname: symbol})
 #         df = df.join(df_temp)
 #         # if symbol == 'SPY':  # drop dates SPY did not trade
 #         #     df = df.dropna(subset=["SPY"])
 #     if not addSPY:
 #         df = df[symbols]
 #     return df
 def get_data_range(df, dates):
    """
    Extract sections of the data in the dates range from the full data set
    """
    df_range = pd.DataFrame(index=dates)
    df_range = df_range.join(df, how='inner')
    return df_range
 def yf_download(symbols, start, end):
    df = pd.DataFrame(columns = pd.MultiIndex(levels=[["Adj Close", "Volume"],[]], codes=[[],[]], names=["param", "tick"]))
    for sym in symbols:
        # tmp = si.get_data(sym, start_date=start)
        tmp = si.get_data(sym, start_date=start)[["adjclose", "volume"]]
        tmp.rename(columns={"adjclose": "Adj Close", "volume": "Volume"}, inplace=True)
        tmp.columns = pd.MultiIndex.from_product([list(tmp.columns)] + [[sym]], names=["param", "tick"])
        df = df.join(tmp, how='outer')
    return df
 # def get_data(symbols, dates, addSPY=True, colname = 'Adj Close'):
 #     """
 #     Read stock data (adjusted close) for given symbols from Yahoo Finance
 #     """
 #     org_sym = symbols
 #     sd = dates[0]
 #     ed = dates[-1]
 #     # if addSPY and 'SPY' not in symbols:  # add SPY for reference, if absent
 #     if 'SPY' not in symbols:
 #         symbols = ['SPY'] + symbols
 #     df = yf.download(symbols, start=sd, end = ed)[colname]
 #     if len(symbols) == 1:
 #         df.name = symbols[0]
 #         df = df.to_frame()
 #     df = df.dropna(subset=['SPY'])
 #     df = fill_missing_data(df)
 #     if addSPY==False:
 #         # df = df.drop(columns=['SPY'])
 #         df = df[org_sym]
 #     return df
 def yf_batch_download(symbols, start, end, batch_size, time_gap):
    """
    download in small batches to avoid connection closure by host
    Parameters
    ----------
    symbols : list
        stock symbols.
    start : datetime
        start date.
    end : datetime
        stop date.
    batch_size : integer
        batch size.
    time_gap : float
        in seconds or fraction of seconds.
    Returns
    -------
    df : dataframe
        stock price volume information.
    """
    n = len(symbols)
    batches = n // batch_size
    df = pd.DataFrame()
    for i in range(batches - 1):
        tmp = yf_download(symbols[i*batch_size:(i+1)*batch_size], start, end)
        df = pd.concat([df, tmp], axis=1)
        sleep(time_gap)
    tmp = yf_download(symbols[(batches-1)*batch_size:n], start, end)
    df = pd.concat([df, tmp], axis=1)
    return df
 def get_price_volume(symbols, dates, addSPY=False):
    """
    Read stock data (adjusted close and volume) for given symbols from local
    file unless data is not in local. It only gets date from Yahoo Finance
    when necessary to increase speed and reduce internet data.
    It will refresh local data if the symbols are on the _refresh.csv. This
    is necessary when stock splits, spins off or something else happens.
    """
    # DATAFILE = "_stkdata.pickle"
    # REFRESH = "_refresh.csv"
    org_sym = symbols
    sd = dates[0]
    ed = dates[-1]
    # if addSPY and 'SPY' not in symbols:  # add SPY for reference, if absent
    if 'SPY' not in symbols:
        symbols = ['SPY'] + symbols
    df = yf_batch_download(symbols, start=sd, end=ed, \
                            batch_size=BATCHSIZE, time_gap=TIMEGAP)
    if len(symbols) == 1:
        tuples = list(zip(df.columns.values.tolist(), \
                            [symbols[0]]*len(df.columns.values)))
        df.columns = pd.MultiIndex.from_tuples(tuples, names=[None, None])           
    # if not os.path.exists(DATAFILE):
    #     df = yf_batch_download(symbols, start=sd, end=ed, \
    #                            batch_size=BATCHSIZE, time_gap=TIMEGAP)
    #     if len(symbols) == 1:
    #         tuples = list(zip(df.columns.values.tolist(), \
    #                           [symbols[0]]*len(df.columns.values)))
    #         df.columns = pd.MultiIndex.from_tuples(tuples, names=[None, None])            
    # else:
    #     df = pd.read_pickle(DATAFILE)
    #     exist_syms = df["Adj Close"].columns.values.tolist()
    #     if os.path.exists(REFRESH):
    #         try:
    #             refresh_df = pd.read_csv(REFRESH, header=None)
    #             refresh_syms = refresh_df.values.tolist()
    #             refresh_syms = [x for sublist in refresh_syms for x in sublist]
    #             remove_syms = [x for x in exist_syms if x in refresh_syms]
    #             if remove_syms:
    #                 df.drop(columns=remove_syms, axis=1, level=1, inplace=True)
    #                 exist_syms = [x for x in exist_syms if x not in refresh_syms]
    #         except:
    #             pass
        exist_syms = []
        last_day = pd.to_datetime(df.index.values[-1])
        first_day = pd.to_datetime(df.index.values[0])
        intersect_syms = list(set(org_sym) & set(exist_syms))
        # reduce df to only contain intersect_syms
        df = df.loc[:, (slice(None), intersect_syms)]
        if sd < first_day:
            # fill gap from online
            tmp_df = yf_batch_download(intersect_syms, start=sd, end=first_day, \
                               batch_size=BATCHSIZE, time_gap=TIMEGAP)
            df = pd.concat([tmp_df, df])
        if ed >= last_day:
            # fill gap from online incl last two days to get mkt close data
            if ed.date() == last_day.date():
                tmp_df = yf_batch_download(intersect_syms, start=ed, end=ed, \
                               batch_size=BATCHSIZE, time_gap=TIMEGAP)
            else:
                tmp_df = yf_batch_download(intersect_syms, start=last_day, end=ed, \
                               batch_size=BATCHSIZE, time_gap=TIMEGAP)
            df = pd.concat([df[:-1], tmp_df])
        # get data online when new stks were added    
        new_stks = np.setdiff1d(symbols, exist_syms).tolist()
        if not new_stks == []:
            tmp_df = yf_batch_download(new_stks, start=sd, end=ed, \
                               batch_size=BATCHSIZE, time_gap=TIMEGAP)
            if len(new_stks) == 1:
                tuples = list(zip(tmp_df.columns.values.tolist(), \
                                  [new_stks[0]]*len(tmp_df.columns.values)))
                tmp_df.columns = pd.MultiIndex.from_tuples(tuples, names=[None, None])
            df = df.join(tmp_df)
    # df.to_pickle(DATAFILE) # save to local, overwrite existing file
    # if os.path.exists(REFRESH):
    #     with open(REFRESH, 'w'):
    #         pass
    df = df.dropna(subset=[('Adj Close', 'SPY')])
    price = df['Adj Close']
    price = fill_missing_data(price)
    volume = df['Volume']
    volume = volume.fillna(0)
    # if len(symbols) == 1:
    #     price.name = symbols[0]
    #     volume.name = symbols[0]
    #     price = price.to_frame()
    #     volume = volume.to_frame()
    if addSPY==False:
        price = price[org_sym]
        volume = volume[org_sym]
    return price, volume      
 # def get_price_volume_online(symbols, dates, addSPY=False):
 #     """
 #     Read stock data (adjusted close and volume) for given symbols from Yahoo
 #     Finance
 #     """
 #     org_sym = symbols
 #     sd = dates[0]
 #     ed = dates[-1]
 #     # if addSPY and 'SPY' not in symbols:  # add SPY for reference, if absent
 #     if 'SPY' not in symbols:
 #         symbols = ['SPY'] + symbols
 #     df = yf.download(symbols, start=sd, end = ed)
 #     if len(symbols) == 1:
 #         df = df.dropna(subset = ['Adj Close'])
 #     else:
 #         df = df.dropna(subset=[('Adj Close', 'SPY')])
 #     price = df['Adj Close']
 #     price = fill_missing_data(price)
 #     volume = df['Volume']
 #     volume = volume.fillna(0)
 #     if len(symbols) == 1:
 #         price.name = symbols[0]
 #         volume.name = symbols[0]
 #         price = price.to_frame()
 #         volume = volume.to_frame()
 #     if addSPY==False:
 #         price = price[org_sym]
 #         volume = volume[org_sym]
 #     return price, volume
 def get_watchlist(website: str = WEBSITE):
    page = requests.get(WEBSITE)
    # page = requests.get(WEBSITE, verify=False) # skip certificate check for https
    tree = html.fromstring(page.content)
    watchlist = tree.xpath('//*[@id="dokuwiki__content"]/div[1]/div/div[3]/div/pre/text()')[0]
    file_name = StringIO(watchlist)
    df = pd.read_csv(file_name, index_col = 'Symbol',
                      comment = '#', na_filter=False)
    return df
 # def get_watchlist(file_name: str = 'watchlist.csv'):
 #     df = pd.read_csv(file_name, index_col = 'Symbol',
 #                      comment = '#', na_filter=False)
 #     return df
 # def get_data(symbols, dates, addSPY=True, colname = 'Adj Close'):
 #     """
 #     Read stock data (adjusted close) for given symbols from CSV files.
 #     (done) TODO: there are nan values in the data when addSPY=False is passed. The
 #     strategy should be using SPY to clean the data first including fill
 #     forward and fill backward, then to drop the SPY if addSPY=False
 #     """
 #     org_sym = symbols
 #     df = pd.DataFrame(index=dates)
 #     # if addSPY and 'SPY' not in symbols:  # add SPY for reference, if absent
 #     #     symbols = ['SPY'] + symbols
 #     if 'SPY' not in symbols:
 #         symbols = ['SPY'] + symbols
 #     for symbol in symbols:
 #         df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date',
 #                 parse_dates=True, usecols=['Date', colname], na_values=['nan'])
 #         df_temp = df_temp.rename(columns={colname: symbol})
 #         df = df.join(df_temp)
 #         if symbol == 'SPY':  # drop dates SPY did not trade
 #             df = df.dropna(subset=["SPY"])
 #     # fill missing data
 #     df = fill_missing_data(df)
 #     if addSPY == False: # drop SPY
 #         # df = df.drop(columns=['SPY'])
 #         df = df[org_sym]
 #     return df
 def plot_data(df, axs=[], title=[], xlabel='', ylabel=''):
    """Plot stock prices with a custom title and meaningful axis labels."""
    if axs == []:
        ax = df.plot(title = title)
    else:
        ax = df.plot(ax=axs, title=title)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.grid()
 # def plot_data(df, title=[], xlabel='', ylabel=''):
 #     import matplotlib.pyplot as plt
 #     """Plot stock prices with a custom title and meaningful axis labels."""
 #     ax = df.plot(title=title, fontsize=12, figsize=(10, 7))
 #     ax.set_xlabel(xlabel)
 #     ax.set_ylabel(ylabel)
 #     plt.grid()
 #     plt.show()
 def get_orders_data_file(basefilename):
    return open(os.path.join(os.environ.get("ORDERS_DATA_DIR",'orders/'),basefilename))
 def get_learner_data_file(basefilename):
    return open(os.path.join(os.environ.get("LEARNER_DATA_DIR",'Data/'),basefilename),'r')
 def get_robot_world_file(basefilename):
    return open(os.path.join(os.environ.get("ROBOT_WORLDS_DIR",'testworlds/'),basefilename))
 def test_code():
    symbol = ['GOOG', 'AMZN']
    # lookback years
    lb_year = 0.08
    ed = dt.datetime.today()
    sd = ed - dt.timedelta(days = 365 * lb_year + 1)
    # If ed or sd falls on to a non-trading day, you might get warnings saying
    # "No data found for this date range, symbol may be delisted". This is 
    # normal behavior.
    prices, volume = get_price_volume(symbol, pd.date_range(sd, ed), addSPY=False)
 if __name__ == '__main__':
    test_code()