From 4b49def86df9505c7b419240d1460a2a18bc4920 Mon Sep 17 00:00:00 2001 From: Charlie Date: Fri, 4 Oct 2024 02:38:21 -0700 Subject: [PATCH] Use database - paving the road for dynamically adding/deleting watchlist items --- indicators.py | 46 +++++- requirements.txt | 2 + util.py | 384 ----------------------------------------------- 3 files changed, 44 insertions(+), 388 deletions(-) delete mode 100644 util.py diff --git a/indicators.py b/indicators.py index 7d29670..2ef21b3 100644 --- a/indicators.py +++ b/indicators.py @@ -21,7 +21,7 @@ Could also come up with a value that ties to the trading volume. import pandas as pd import numpy as np import datetime as dt -from util import get_watchlist +# from util import get_watchlist from numpy.fft import fft, ifft import scipy.signal as sig import plotly.express as px @@ -33,8 +33,44 @@ from dash.exceptions import PreventUpdate import dash_auth import yahoo_fin.stock_info as si import hashlib +from dotenv import load_dotenv +import psycopg2 +import os +import sys pd.options.mode.chained_assignment = None # default='warn' +load_dotenv() + +def connect_db(): + conn = None + try: + conn = psycopg2.connect( + host=os.environ['DB_PATH'], + database=os.environ['DB_NAME'], + user=os.environ['DB_USERNAME'], + password=os.environ['DB_PASSWORD'], + ) + except (Exception, psycopg2.DatabaseError) as error: + print(error) + sys.exit(1) + return conn + +def sql_to_dataframe(conn, query): + cursor = conn.cursor() + try: + cursor.execute(query) + except (Exception, psycopg2.DatabaseError) as error: + print(f"Error: {error}") + cursor.close() + return 1 + tuples_list = cursor.fetchall() + cursor.close() + df = pd.DataFrame(tuples_list) + return df + +def get_watchlist(): + QUERY = '''select * from stock_watch_list''' + return sql_to_dataframe(connect_db(), QUERY) def hash_password(password): # Encode the password as bytes @@ -485,7 +521,7 @@ def intelligent_loop_plots(sym, stk_data): volume_tail = vol.tail(LB_TRIGGER) vol_mean = vol.tail(50).mean() vol_std = vol.tail(50).std() - if ((volume_tail[1] - vol_mean - 2*vol_std) > 0).any(): + if ((volume_tail - vol_mean - 2*vol_std) > 0).any(): # print('--HiVol', end = '') plot_indicator += "HiVol, " @@ -536,7 +572,8 @@ dash_auth.BasicAuth( watchlist = get_watchlist() # symbols = watchlist.index.values.tolist() -symbols = (watchlist.index.values + " - " + watchlist["Sub Segment"]).tolist() +# symbols = (watchlist.index.values + " - " + watchlist["Sub Segment"]).tolist() +symbols = (watchlist.iloc[:, 0] + " - " + watchlist.iloc[:, 1]).tolist() CACHE_CONFIG = {'CACHE_TYPE': 'SimpleCache'} cache = Cache() @@ -614,7 +651,8 @@ def start_cycle(n, value): def reload_syms(n): if n: watchlist = get_watchlist() - symbols = (watchlist.index.values + " - " + watchlist["Sub Segment"]).tolist() + # symbols = (watchlist.index.values + " - " + watchlist["Sub Segment"]).tolist() + symbols = (watchlist.iloc[:, 0] + " - " + watchlist.iloc[:, 1]).tolist() return symbols, 0 return no_update diff --git a/requirements.txt b/requirements.txt index d2dc64e..fa1c06a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -30,10 +30,12 @@ packaging==24.1 pandas==2.2.3 parse==1.20.2 plotly==5.24.1 +psycopg2==2.9.9 pyee==11.1.1 pyppeteer==2.0.0 pyquery==2.0.1 python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 pytz==2024.2 requests==2.32.3 requests-html==0.10.0 diff --git a/util.py b/util.py deleted file mode 100644 index fc2ce91..0000000 --- a/util.py +++ /dev/null @@ -1,384 +0,0 @@ -""" -Use Yahoo Finance data -""" - -import warnings - -# Suppress FutureWarnings -warnings.simplefilter(action='ignore', category=FutureWarning) - -import datetime as dt -import os -import pandas as pd -import numpy as np -# import yfinance as yf -import yahoo_fin.stock_info as si -import requests -from lxml import html -from io import StringIO -from time import sleep - -WEBSITE = 'https://www.isolo.org/dokuwiki/knowledge_base/investing/watchlist' -BATCHSIZE = 20 -TIMEGAP = 0.2 - -def fill_missing_data(df): - temp = df.ffill() - temp = temp.bfill() - return temp - -def symbol_to_path(symbol, base_dir=None): - """Return CSV file path given ticker symbol.""" - if base_dir is None: - base_dir = os.environ.get("MARKET_DATA_DIR", '../data/') - return os.path.join(base_dir, "{}.csv".format(str(symbol))) - -# def get_data_full(symbols, start_date, addSPY=True, colname = 'Adj Close'): -# """ -# Read stock data (adjusted close) for given symbols from Yahoo Finance -# from start_date to the latest date available (usually the current date). -# """ -# if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent -# symbols = ['SPY'] + symbols - -# df = yf.download(symbols, start = start_date)[colname] -# if len(symbols) == 1: -# df.name = symbols[0] -# df = df.to_frame() -# return df - -# def get_data_full(symbols, start_date, addSPY=True, colname = 'Adj Close'): - """ - Read stock data (adjusted close) for given symbols from CSV files - from start_date to the latest date available in the CSV files. - """ -# df_temp = pd.read_csv(symbol_to_path('SPY'), index_col='Date', -# parse_dates=True, usecols=['Date', colname], na_values=['nan']) -# df_temp = df_temp.rename(columns={colname: 'SPY'}) -# end_date = df_temp.index.values[-1] -# dates = pd.date_range(start_date, end_date) -# df = pd.DataFrame(index=dates) -# df = df.join(df_temp) -# df = df.dropna() -# # if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent -# # symbols = ['SPY'] + symbols -# for symbol in symbols: -# df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date', -# parse_dates=True, usecols=['Date', colname], na_values=['nan']) -# df_temp = df_temp.rename(columns={colname: symbol}) -# df = df.join(df_temp) -# # if symbol == 'SPY': # drop dates SPY did not trade -# # df = df.dropna(subset=["SPY"]) -# if not addSPY: -# df = df[symbols] -# return df - -def get_data_range(df, dates): - """ - Extract sections of the data in the dates range from the full data set - """ - df_range = pd.DataFrame(index=dates) - df_range = df_range.join(df, how='inner') - return df_range - -def yf_download(symbols, start, end): - df = pd.DataFrame(columns = pd.MultiIndex(levels=[["Adj Close", "Volume"],[]], codes=[[],[]], names=["param", "tick"])) - for sym in symbols: - # tmp = si.get_data(sym, start_date=start) - tmp = si.get_data(sym, start_date=start)[["adjclose", "volume"]] - tmp.rename(columns={"adjclose": "Adj Close", "volume": "Volume"}, inplace=True) - tmp.columns = pd.MultiIndex.from_product([list(tmp.columns)] + [[sym]], names=["param", "tick"]) - - df = df.join(tmp, how='outer') - - return df - -# def get_data(symbols, dates, addSPY=True, colname = 'Adj Close'): -# """ -# Read stock data (adjusted close) for given symbols from Yahoo Finance -# """ -# org_sym = symbols -# sd = dates[0] -# ed = dates[-1] -# # if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent -# if 'SPY' not in symbols: -# symbols = ['SPY'] + symbols -# df = yf.download(symbols, start=sd, end = ed)[colname] -# if len(symbols) == 1: -# df.name = symbols[0] -# df = df.to_frame() - -# df = df.dropna(subset=['SPY']) -# df = fill_missing_data(df) - -# if addSPY==False: -# # df = df.drop(columns=['SPY']) -# df = df[org_sym] - -# return df - -def yf_batch_download(symbols, start, end, batch_size, time_gap): - """ - download in small batches to avoid connection closure by host - - Parameters - ---------- - symbols : list - stock symbols. - start : datetime - start date. - end : datetime - stop date. - batch_size : integer - batch size. - time_gap : float - in seconds or fraction of seconds. - - Returns - ------- - df : dataframe - stock price volume information. - - """ - n = len(symbols) - batches = n // batch_size - df = pd.DataFrame() - for i in range(batches - 1): - tmp = yf_download(symbols[i*batch_size:(i+1)*batch_size], start, end) - df = pd.concat([df, tmp], axis=1) - sleep(time_gap) - tmp = yf_download(symbols[(batches-1)*batch_size:n], start, end) - df = pd.concat([df, tmp], axis=1) - - return df - -def get_price_volume(symbols, dates, addSPY=False): - """ - Read stock data (adjusted close and volume) for given symbols from local - file unless data is not in local. It only gets date from Yahoo Finance - when necessary to increase speed and reduce internet data. - - It will refresh local data if the symbols are on the _refresh.csv. This - is necessary when stock splits, spins off or something else happens. - """ - # DATAFILE = "_stkdata.pickle" - # REFRESH = "_refresh.csv" - org_sym = symbols - sd = dates[0] - ed = dates[-1] - # if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent - if 'SPY' not in symbols: - symbols = ['SPY'] + symbols - - df = yf_batch_download(symbols, start=sd, end=ed, \ - batch_size=BATCHSIZE, time_gap=TIMEGAP) - if len(symbols) == 1: - tuples = list(zip(df.columns.values.tolist(), \ - [symbols[0]]*len(df.columns.values))) - df.columns = pd.MultiIndex.from_tuples(tuples, names=[None, None]) - - # if not os.path.exists(DATAFILE): - # df = yf_batch_download(symbols, start=sd, end=ed, \ - # batch_size=BATCHSIZE, time_gap=TIMEGAP) - # if len(symbols) == 1: - # tuples = list(zip(df.columns.values.tolist(), \ - # [symbols[0]]*len(df.columns.values))) - # df.columns = pd.MultiIndex.from_tuples(tuples, names=[None, None]) - # else: - # df = pd.read_pickle(DATAFILE) - # exist_syms = df["Adj Close"].columns.values.tolist() - # if os.path.exists(REFRESH): - # try: - # refresh_df = pd.read_csv(REFRESH, header=None) - # refresh_syms = refresh_df.values.tolist() - # refresh_syms = [x for sublist in refresh_syms for x in sublist] - # remove_syms = [x for x in exist_syms if x in refresh_syms] - # if remove_syms: - # df.drop(columns=remove_syms, axis=1, level=1, inplace=True) - # exist_syms = [x for x in exist_syms if x not in refresh_syms] - # except: - # pass - - exist_syms = [] - - last_day = pd.to_datetime(df.index.values[-1]) - first_day = pd.to_datetime(df.index.values[0]) - intersect_syms = list(set(org_sym) & set(exist_syms)) - # reduce df to only contain intersect_syms - df = df.loc[:, (slice(None), intersect_syms)] - - if sd < first_day: - # fill gap from online - tmp_df = yf_batch_download(intersect_syms, start=sd, end=first_day, \ - batch_size=BATCHSIZE, time_gap=TIMEGAP) - df = pd.concat([tmp_df, df]) - - if ed >= last_day: - # fill gap from online incl last two days to get mkt close data - if ed.date() == last_day.date(): - tmp_df = yf_batch_download(intersect_syms, start=ed, end=ed, \ - batch_size=BATCHSIZE, time_gap=TIMEGAP) - else: - tmp_df = yf_batch_download(intersect_syms, start=last_day, end=ed, \ - batch_size=BATCHSIZE, time_gap=TIMEGAP) - df = pd.concat([df[:-1], tmp_df]) - - # get data online when new stks were added - new_stks = np.setdiff1d(symbols, exist_syms).tolist() - if not new_stks == []: - tmp_df = yf_batch_download(new_stks, start=sd, end=ed, \ - batch_size=BATCHSIZE, time_gap=TIMEGAP) - if len(new_stks) == 1: - tuples = list(zip(tmp_df.columns.values.tolist(), \ - [new_stks[0]]*len(tmp_df.columns.values))) - tmp_df.columns = pd.MultiIndex.from_tuples(tuples, names=[None, None]) - df = df.join(tmp_df) - - # df.to_pickle(DATAFILE) # save to local, overwrite existing file - # if os.path.exists(REFRESH): - # with open(REFRESH, 'w'): - # pass - - df = df.dropna(subset=[('Adj Close', 'SPY')]) - price = df['Adj Close'] - price = fill_missing_data(price) - volume = df['Volume'] - volume = volume.fillna(0) - - # if len(symbols) == 1: - # price.name = symbols[0] - # volume.name = symbols[0] - # price = price.to_frame() - # volume = volume.to_frame() - - if addSPY==False: - price = price[org_sym] - volume = volume[org_sym] - - return price, volume - - -# def get_price_volume_online(symbols, dates, addSPY=False): -# """ -# Read stock data (adjusted close and volume) for given symbols from Yahoo -# Finance -# """ -# org_sym = symbols -# sd = dates[0] -# ed = dates[-1] -# # if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent -# if 'SPY' not in symbols: -# symbols = ['SPY'] + symbols -# df = yf.download(symbols, start=sd, end = ed) -# if len(symbols) == 1: -# df = df.dropna(subset = ['Adj Close']) -# else: -# df = df.dropna(subset=[('Adj Close', 'SPY')]) -# price = df['Adj Close'] -# price = fill_missing_data(price) -# volume = df['Volume'] -# volume = volume.fillna(0) - -# if len(symbols) == 1: -# price.name = symbols[0] -# volume.name = symbols[0] -# price = price.to_frame() -# volume = volume.to_frame() - -# if addSPY==False: -# price = price[org_sym] -# volume = volume[org_sym] - -# return price, volume - -def get_watchlist(website: str = WEBSITE): - page = requests.get(WEBSITE) - # page = requests.get(WEBSITE, verify=False) # skip certificate check for https - tree = html.fromstring(page.content) - watchlist = tree.xpath('//*[@id="dokuwiki__content"]/div[1]/div/div[3]/div/pre/text()')[0] - file_name = StringIO(watchlist) - df = pd.read_csv(file_name, index_col = 'Symbol', - comment = '#', na_filter=False) - return df - -# def get_watchlist(file_name: str = 'watchlist.csv'): -# df = pd.read_csv(file_name, index_col = 'Symbol', -# comment = '#', na_filter=False) -# return df - -# def get_data(symbols, dates, addSPY=True, colname = 'Adj Close'): -# """ -# Read stock data (adjusted close) for given symbols from CSV files. - -# (done) TODO: there are nan values in the data when addSPY=False is passed. The -# strategy should be using SPY to clean the data first including fill -# forward and fill backward, then to drop the SPY if addSPY=False -# """ -# org_sym = symbols -# df = pd.DataFrame(index=dates) -# # if addSPY and 'SPY' not in symbols: # add SPY for reference, if absent -# # symbols = ['SPY'] + symbols -# if 'SPY' not in symbols: -# symbols = ['SPY'] + symbols -# for symbol in symbols: -# df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date', -# parse_dates=True, usecols=['Date', colname], na_values=['nan']) -# df_temp = df_temp.rename(columns={colname: symbol}) -# df = df.join(df_temp) -# if symbol == 'SPY': # drop dates SPY did not trade -# df = df.dropna(subset=["SPY"]) -# # fill missing data -# df = fill_missing_data(df) -# if addSPY == False: # drop SPY -# # df = df.drop(columns=['SPY']) -# df = df[org_sym] - -# return df - - -def plot_data(df, axs=[], title=[], xlabel='', ylabel=''): - - """Plot stock prices with a custom title and meaningful axis labels.""" - if axs == []: - ax = df.plot(title = title) - else: - ax = df.plot(ax=axs, title=title) - ax.set_xlabel(xlabel) - ax.set_ylabel(ylabel) - ax.grid() - - -# def plot_data(df, title=[], xlabel='', ylabel=''): -# import matplotlib.pyplot as plt -# """Plot stock prices with a custom title and meaningful axis labels.""" -# ax = df.plot(title=title, fontsize=12, figsize=(10, 7)) -# ax.set_xlabel(xlabel) -# ax.set_ylabel(ylabel) -# plt.grid() -# plt.show() - -def get_orders_data_file(basefilename): - return open(os.path.join(os.environ.get("ORDERS_DATA_DIR",'orders/'),basefilename)) - -def get_learner_data_file(basefilename): - return open(os.path.join(os.environ.get("LEARNER_DATA_DIR",'Data/'),basefilename),'r') - -def get_robot_world_file(basefilename): - return open(os.path.join(os.environ.get("ROBOT_WORLDS_DIR",'testworlds/'),basefilename)) - - -def test_code(): - - symbol = ['GOOG', 'AMZN'] - # lookback years - lb_year = 0.08 - ed = dt.datetime.today() - sd = ed - dt.timedelta(days = 365 * lb_year + 1) - # If ed or sd falls on to a non-trading day, you might get warnings saying - # "No data found for this date range, symbol may be delisted". This is - # normal behavior. - prices, volume = get_price_volume(symbol, pd.date_range(sd, ed), addSPY=False) - - -if __name__ == '__main__': - test_code() \ No newline at end of file