From 4b49def86df9505c7b419240d1460a2a18bc4920 Mon Sep 17 00:00:00 2001
From: Charlie <wang@post.com>
Date: Fri, 4 Oct 2024 02:38:21 -0700
Subject: [PATCH] Use database - paving the road for dynamically
 adding/deleting watchlist items

---
 indicators.py    |  46 +++++-
 requirements.txt |   2 +
 util.py          | 384 -----------------------------------------------
 3 files changed, 44 insertions(+), 388 deletions(-)
 delete mode 100644 util.py

diff --git a/indicators.py b/indicators.py
index 7d29670..2ef21b3 100644
--- a/indicators.py
+++ b/indicators.py
@@ -21,7 +21,7 @@ Could also come up with a value that ties to the trading volume.
 import pandas as pd
 import numpy as np
 import datetime as dt
-from util import get_watchlist
+# from util import get_watchlist
 from numpy.fft import fft, ifft
 import scipy.signal as sig
 import plotly.express as px
@@ -33,8 +33,44 @@ from dash.exceptions import PreventUpdate
 import dash_auth
 import yahoo_fin.stock_info as si
 import hashlib
+from dotenv import load_dotenv
+import psycopg2
+import os
+import sys
 
 pd.options.mode.chained_assignment = None  # default='warn'
+load_dotenv()
+
+def connect_db():
+    conn = None
+    try:
+        conn = psycopg2.connect(
+                    host=os.environ['DB_PATH'],
+                    database=os.environ['DB_NAME'],
+                    user=os.environ['DB_USERNAME'],
+                    password=os.environ['DB_PASSWORD'],
+                )
+    except (Exception, psycopg2.DatabaseError) as error:
+        print(error)
+        sys.exit(1)
+    return conn
+
+def sql_to_dataframe(conn, query):
+    cursor = conn.cursor()
+    try:
+        cursor.execute(query)
+    except (Exception, psycopg2.DatabaseError) as error:
+        print(f"Error: {error}")
+        cursor.close()
+        return 1
+    tuples_list = cursor.fetchall()
+    cursor.close()
+    df = pd.DataFrame(tuples_list)
+    return df
+
+def get_watchlist():
+    QUERY = '''select * from stock_watch_list'''
+    return sql_to_dataframe(connect_db(), QUERY)
 
 def hash_password(password):
     # Encode the password as bytes
@@ -485,7 +521,7 @@ def intelligent_loop_plots(sym, stk_data):
     volume_tail = vol.tail(LB_TRIGGER)
     vol_mean = vol.tail(50).mean()
     vol_std = vol.tail(50).std()
-    if ((volume_tail[1] - vol_mean - 2*vol_std) > 0).any():
+    if ((volume_tail - vol_mean - 2*vol_std) > 0).any():
         # print('--HiVol', end = '')
         plot_indicator += "HiVol, "
 
@@ -536,7 +572,8 @@ dash_auth.BasicAuth(
 
 watchlist = get_watchlist()
 # symbols = watchlist.index.values.tolist()
-symbols = (watchlist.index.values + " - " + watchlist["Sub Segment"]).tolist()
+# symbols = (watchlist.index.values + " - " + watchlist["Sub Segment"]).tolist()
+symbols = (watchlist.iloc[:, 0] + " - " + watchlist.iloc[:, 1]).tolist()
 
 CACHE_CONFIG = {'CACHE_TYPE': 'SimpleCache'}
 cache = Cache()
@@ -614,7 +651,8 @@ def start_cycle(n, value):
 def reload_syms(n):
     if n:
         watchlist = get_watchlist()
-        symbols = (watchlist.index.values + " - " + watchlist["Sub Segment"]).tolist()
+        # symbols = (watchlist.index.values + " - " + watchlist["Sub Segment"]).tolist()
+        symbols = (watchlist.iloc[:, 0] + " - " + watchlist.iloc[:, 1]).tolist()
         return symbols, 0
     return no_update
 
diff --git a/requirements.txt b/requirements.txt
index d2dc64e..fa1c06a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -30,10 +30,12 @@ packaging==24.1
 pandas==2.2.3
 parse==1.20.2
 plotly==5.24.1
+psycopg2==2.9.9
 pyee==11.1.1
 pyppeteer==2.0.0
 pyquery==2.0.1
 python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
 pytz==2024.2
 requests==2.32.3
 requests-html==0.10.0
diff --git a/util.py b/util.py
deleted file mode 100644
index fc2ce91..0000000
--- a/util.py
+++ /dev/null
@@ -1,384 +0,0 @@
-"""
-Use Yahoo Finance data
-"""
-
-import warnings
-
-# Suppress FutureWarnings
-warnings.simplefilter(action='ignore', category=FutureWarning)
-
-import datetime as dt
-import os
-import pandas as pd
-import numpy as np
-# import yfinance as yf
-import yahoo_fin.stock_info as si
-import requests
-from lxml import html
-from io import StringIO
-from time import sleep
-
-WEBSITE = 'https://www.isolo.org/dokuwiki/knowledge_base/investing/watchlist'
-BATCHSIZE = 20
-TIMEGAP = 0.2
-
-def fill_missing_data(df):
-    temp = df.ffill()
-    temp = temp.bfill()
-    return temp
-
-def symbol_to_path(symbol, base_dir=None):
-    """Return CSV file path given ticker symbol."""
-    if base_dir is None:
-        base_dir = os.environ.get("MARKET_DATA_DIR", '../data/')
-    return os.path.join(base_dir, "{}.csv".format(str(symbol)))
-
-# def get_data_full(symbols, start_date, addSPY=True, colname = 'Adj Close'):
-#     """
-#     Read stock data (adjusted close) for given symbols from Yahoo Finance
-#     from start_date to the latest date available (usually the current date).
-#     """
-#     if addSPY and 'SPY' not in symbols:  # add SPY for reference, if absent
-#         symbols = ['SPY'] + symbols
-
-#     df = yf.download(symbols, start = start_date)[colname]
-#     if len(symbols) == 1:
-#         df.name = symbols[0]
-#         df = df.to_frame()
-#     return df
-
-# def get_data_full(symbols, start_date, addSPY=True, colname = 'Adj Close'):
-    """
-    Read stock data (adjusted close) for given symbols from CSV files
-    from start_date to the latest date available in the CSV files.
-    """
-#     df_temp = pd.read_csv(symbol_to_path('SPY'), index_col='Date',
-#             parse_dates=True, usecols=['Date', colname], na_values=['nan'])
-#     df_temp = df_temp.rename(columns={colname: 'SPY'})
-#     end_date = df_temp.index.values[-1]
-#     dates = pd.date_range(start_date, end_date)
-#     df = pd.DataFrame(index=dates)
-#     df = df.join(df_temp)
-#     df = df.dropna()
-#     # if addSPY and 'SPY' not in symbols:  # add SPY for reference, if absent
-#     #     symbols = ['SPY'] + symbols
-#     for symbol in symbols:
-#         df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date',
-#                 parse_dates=True, usecols=['Date', colname], na_values=['nan'])
-#         df_temp = df_temp.rename(columns={colname: symbol})
-#         df = df.join(df_temp)
-#         # if symbol == 'SPY':  # drop dates SPY did not trade
-#         #     df = df.dropna(subset=["SPY"])
-#     if not addSPY:
-#         df = df[symbols]
-#     return df
-
-def get_data_range(df, dates):
-    """
-    Extract sections of the data in the dates range from the full data set
-    """
-    df_range = pd.DataFrame(index=dates)
-    df_range = df_range.join(df, how='inner')
-    return df_range
-
-def yf_download(symbols, start, end):
-    df = pd.DataFrame(columns = pd.MultiIndex(levels=[["Adj Close", "Volume"],[]], codes=[[],[]], names=["param", "tick"]))
-    for sym in symbols:
-        # tmp = si.get_data(sym, start_date=start)
-        tmp = si.get_data(sym, start_date=start)[["adjclose", "volume"]]
-        tmp.rename(columns={"adjclose": "Adj Close", "volume": "Volume"}, inplace=True)
-        tmp.columns = pd.MultiIndex.from_product([list(tmp.columns)] + [[sym]], names=["param", "tick"])
-
-        df = df.join(tmp, how='outer')
-
-    return df
-
-# def get_data(symbols, dates, addSPY=True, colname = 'Adj Close'):
-#     """
-#     Read stock data (adjusted close) for given symbols from Yahoo Finance
-#     """
-#     org_sym = symbols
-#     sd = dates[0]
-#     ed = dates[-1]
-#     # if addSPY and 'SPY' not in symbols:  # add SPY for reference, if absent
-#     if 'SPY' not in symbols:
-#         symbols = ['SPY'] + symbols
-#     df = yf.download(symbols, start=sd, end = ed)[colname]
-#     if len(symbols) == 1:
-#         df.name = symbols[0]
-#         df = df.to_frame()
-
-#     df = df.dropna(subset=['SPY'])
-#     df = fill_missing_data(df)
-
-#     if addSPY==False:
-#         # df = df.drop(columns=['SPY'])
-#         df = df[org_sym]
-
-#     return df
-
-def yf_batch_download(symbols, start, end, batch_size, time_gap):
-    """
-    download in small batches to avoid connection closure by host
-
-    Parameters
-    ----------
-    symbols : list
-        stock symbols.
-    start : datetime
-        start date.
-    end : datetime
-        stop date.
-    batch_size : integer
-        batch size.
-    time_gap : float
-        in seconds or fraction of seconds.
-
-    Returns
-    -------
-    df : dataframe
-        stock price volume information.
-
-    """
-    n = len(symbols)
-    batches = n // batch_size
-    df = pd.DataFrame()
-    for i in range(batches - 1):
-        tmp = yf_download(symbols[i*batch_size:(i+1)*batch_size], start, end)
-        df = pd.concat([df, tmp], axis=1)
-        sleep(time_gap)
-    tmp = yf_download(symbols[(batches-1)*batch_size:n], start, end)
-    df = pd.concat([df, tmp], axis=1)
-    
-    return df
-
-def get_price_volume(symbols, dates, addSPY=False):
-    """
-    Read stock data (adjusted close and volume) for given symbols from local
-    file unless data is not in local. It only gets date from Yahoo Finance
-    when necessary to increase speed and reduce internet data.
-    
-    It will refresh local data if the symbols are on the _refresh.csv. This
-    is necessary when stock splits, spins off or something else happens.
-    """
-    # DATAFILE = "_stkdata.pickle"
-    # REFRESH = "_refresh.csv"
-    org_sym = symbols
-    sd = dates[0]
-    ed = dates[-1]
-    # if addSPY and 'SPY' not in symbols:  # add SPY for reference, if absent
-    if 'SPY' not in symbols:
-        symbols = ['SPY'] + symbols
-
-    df = yf_batch_download(symbols, start=sd, end=ed, \
-                            batch_size=BATCHSIZE, time_gap=TIMEGAP)
-    if len(symbols) == 1:
-        tuples = list(zip(df.columns.values.tolist(), \
-                            [symbols[0]]*len(df.columns.values)))
-        df.columns = pd.MultiIndex.from_tuples(tuples, names=[None, None])           
-
-    # if not os.path.exists(DATAFILE):
-    #     df = yf_batch_download(symbols, start=sd, end=ed, \
-    #                            batch_size=BATCHSIZE, time_gap=TIMEGAP)
-    #     if len(symbols) == 1:
-    #         tuples = list(zip(df.columns.values.tolist(), \
-    #                           [symbols[0]]*len(df.columns.values)))
-    #         df.columns = pd.MultiIndex.from_tuples(tuples, names=[None, None])            
-    # else:
-    #     df = pd.read_pickle(DATAFILE)
-    #     exist_syms = df["Adj Close"].columns.values.tolist()
-    #     if os.path.exists(REFRESH):
-    #         try:
-    #             refresh_df = pd.read_csv(REFRESH, header=None)
-    #             refresh_syms = refresh_df.values.tolist()
-    #             refresh_syms = [x for sublist in refresh_syms for x in sublist]
-    #             remove_syms = [x for x in exist_syms if x in refresh_syms]
-    #             if remove_syms:
-    #                 df.drop(columns=remove_syms, axis=1, level=1, inplace=True)
-    #                 exist_syms = [x for x in exist_syms if x not in refresh_syms]
-    #         except:
-    #             pass
-
-        exist_syms = []
-
-        last_day = pd.to_datetime(df.index.values[-1])
-        first_day = pd.to_datetime(df.index.values[0])
-        intersect_syms = list(set(org_sym) & set(exist_syms))
-        # reduce df to only contain intersect_syms
-        df = df.loc[:, (slice(None), intersect_syms)]
-        
-        if sd < first_day:
-            # fill gap from online
-            tmp_df = yf_batch_download(intersect_syms, start=sd, end=first_day, \
-                               batch_size=BATCHSIZE, time_gap=TIMEGAP)
-            df = pd.concat([tmp_df, df])
-            
-        if ed >= last_day:
-            # fill gap from online incl last two days to get mkt close data
-            if ed.date() == last_day.date():
-                tmp_df = yf_batch_download(intersect_syms, start=ed, end=ed, \
-                               batch_size=BATCHSIZE, time_gap=TIMEGAP)
-            else:
-                tmp_df = yf_batch_download(intersect_syms, start=last_day, end=ed, \
-                               batch_size=BATCHSIZE, time_gap=TIMEGAP)
-            df = pd.concat([df[:-1], tmp_df])
-
-        # get data online when new stks were added    
-        new_stks = np.setdiff1d(symbols, exist_syms).tolist()
-        if not new_stks == []:
-            tmp_df = yf_batch_download(new_stks, start=sd, end=ed, \
-                               batch_size=BATCHSIZE, time_gap=TIMEGAP)
-            if len(new_stks) == 1:
-                tuples = list(zip(tmp_df.columns.values.tolist(), \
-                                  [new_stks[0]]*len(tmp_df.columns.values)))
-                tmp_df.columns = pd.MultiIndex.from_tuples(tuples, names=[None, None])
-            df = df.join(tmp_df)
-
-    # df.to_pickle(DATAFILE) # save to local, overwrite existing file
-    # if os.path.exists(REFRESH):
-    #     with open(REFRESH, 'w'):
-    #         pass
-
-    df = df.dropna(subset=[('Adj Close', 'SPY')])
-    price = df['Adj Close']
-    price = fill_missing_data(price)
-    volume = df['Volume']
-    volume = volume.fillna(0)
-
-    # if len(symbols) == 1:
-    #     price.name = symbols[0]
-    #     volume.name = symbols[0]
-    #     price = price.to_frame()
-    #     volume = volume.to_frame()
-
-    if addSPY==False:
-        price = price[org_sym]
-        volume = volume[org_sym]
-   
-    return price, volume      
-    
-
-# def get_price_volume_online(symbols, dates, addSPY=False):
-#     """
-#     Read stock data (adjusted close and volume) for given symbols from Yahoo
-#     Finance
-#     """
-#     org_sym = symbols
-#     sd = dates[0]
-#     ed = dates[-1]
-#     # if addSPY and 'SPY' not in symbols:  # add SPY for reference, if absent
-#     if 'SPY' not in symbols:
-#         symbols = ['SPY'] + symbols
-#     df = yf.download(symbols, start=sd, end = ed)
-#     if len(symbols) == 1:
-#         df = df.dropna(subset = ['Adj Close'])
-#     else:
-#         df = df.dropna(subset=[('Adj Close', 'SPY')])
-#     price = df['Adj Close']
-#     price = fill_missing_data(price)
-#     volume = df['Volume']
-#     volume = volume.fillna(0)
-
-#     if len(symbols) == 1:
-#         price.name = symbols[0]
-#         volume.name = symbols[0]
-#         price = price.to_frame()
-#         volume = volume.to_frame()
-
-#     if addSPY==False:
-#         price = price[org_sym]
-#         volume = volume[org_sym]
-
-#     return price, volume
-
-def get_watchlist(website: str = WEBSITE):
-    page = requests.get(WEBSITE)
-    # page = requests.get(WEBSITE, verify=False) # skip certificate check for https
-    tree = html.fromstring(page.content)
-    watchlist = tree.xpath('//*[@id="dokuwiki__content"]/div[1]/div/div[3]/div/pre/text()')[0]
-    file_name = StringIO(watchlist)
-    df = pd.read_csv(file_name, index_col = 'Symbol',
-                      comment = '#', na_filter=False)
-    return df
-
-# def get_watchlist(file_name: str = 'watchlist.csv'):
-#     df = pd.read_csv(file_name, index_col = 'Symbol',
-#                      comment = '#', na_filter=False)
-#     return df
-
-# def get_data(symbols, dates, addSPY=True, colname = 'Adj Close'):
-#     """
-#     Read stock data (adjusted close) for given symbols from CSV files.
-
-#     (done) TODO: there are nan values in the data when addSPY=False is passed. The
-#     strategy should be using SPY to clean the data first including fill
-#     forward and fill backward, then to drop the SPY if addSPY=False
-#     """
-#     org_sym = symbols
-#     df = pd.DataFrame(index=dates)
-#     # if addSPY and 'SPY' not in symbols:  # add SPY for reference, if absent
-#     #     symbols = ['SPY'] + symbols
-#     if 'SPY' not in symbols:
-#         symbols = ['SPY'] + symbols
-#     for symbol in symbols:
-#         df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date',
-#                 parse_dates=True, usecols=['Date', colname], na_values=['nan'])
-#         df_temp = df_temp.rename(columns={colname: symbol})
-#         df = df.join(df_temp)
-#         if symbol == 'SPY':  # drop dates SPY did not trade
-#             df = df.dropna(subset=["SPY"])
-#     # fill missing data
-#     df = fill_missing_data(df)
-#     if addSPY == False: # drop SPY
-#         # df = df.drop(columns=['SPY'])
-#         df = df[org_sym]
-
-#     return df
-
-
-def plot_data(df, axs=[], title=[], xlabel='', ylabel=''):
-
-    """Plot stock prices with a custom title and meaningful axis labels."""
-    if axs == []:
-        ax = df.plot(title = title)
-    else:
-        ax = df.plot(ax=axs, title=title)
-    ax.set_xlabel(xlabel)
-    ax.set_ylabel(ylabel)
-    ax.grid()
-
-
-# def plot_data(df, title=[], xlabel='', ylabel=''):
-#     import matplotlib.pyplot as plt
-#     """Plot stock prices with a custom title and meaningful axis labels."""
-#     ax = df.plot(title=title, fontsize=12, figsize=(10, 7))
-#     ax.set_xlabel(xlabel)
-#     ax.set_ylabel(ylabel)
-#     plt.grid()
-#     plt.show()
-
-def get_orders_data_file(basefilename):
-    return open(os.path.join(os.environ.get("ORDERS_DATA_DIR",'orders/'),basefilename))
-
-def get_learner_data_file(basefilename):
-    return open(os.path.join(os.environ.get("LEARNER_DATA_DIR",'Data/'),basefilename),'r')
-
-def get_robot_world_file(basefilename):
-    return open(os.path.join(os.environ.get("ROBOT_WORLDS_DIR",'testworlds/'),basefilename))
-
-
-def test_code():
-
-    symbol = ['GOOG', 'AMZN']
-    # lookback years
-    lb_year = 0.08
-    ed = dt.datetime.today()
-    sd = ed - dt.timedelta(days = 365 * lb_year + 1)
-    # If ed or sd falls on to a non-trading day, you might get warnings saying
-    # "No data found for this date range, symbol may be delisted". This is 
-    # normal behavior.
-    prices, volume = get_price_volume(symbol, pd.date_range(sd, ed), addSPY=False)
-
-
-if __name__ == '__main__':
-    test_code()
\ No newline at end of file