Skip to content

Commit e003411

Browse files
author
Bob Colner
committed
elimiate BarBuilder class/ refactor
1 parent ed1de50 commit e003411

File tree

7 files changed

+327
-3121
lines changed

7 files changed

+327
-3121
lines changed

bar_features.py

Lines changed: 23 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1,80 +1,27 @@
1+
import pandas as pd
12
from statsmodels.stats.weightstats import DescrStatsW
2-
33

4-
def state_to_bar(state: dict) -> dict:
5-
6-
bar = {}
7-
if state['stat']['tick_count'] == 0:
8-
return bar
94

10-
bar['bar_trigger'] = state['trigger_yet?!']
11-
# time
12-
bar['open_at'] = state['trades']['date_time'][0]
13-
bar['close_at'] = state['trades']['date_time'][-1]
14-
bar['duration_td'] = bar['close_at'] - bar['open_at']
15-
# price
16-
bar['price_open'] = state['trades']['price'][0]
17-
bar['price_close'] = state['trades']['price'][-1]
18-
bar['price_low'] = state['stat']['price_min']
19-
bar['price_high'] = state['stat']['price_max']
20-
bar['price_range'] = state['stat']['price_range']
21-
bar['price_return'] = state['stat']['price_return']
22-
# volume weighted price
23-
dsw = DescrStatsW(data=state['trades']['price'], weights=state['trades']['volume'])
24-
qtiles = dsw.quantile(probs=[0.1, 0.5, 0.9]).values
25-
bar['price_wq10'] = qtiles[0]
26-
bar['price_wq50'] = qtiles[1]
27-
bar['price_wq90'] = qtiles[2]
28-
bar['price_wq_range'] = bar['price_wq90'] - bar['price_wq10']
29-
bar['price_wmean'] = dsw.mean
30-
bar['price_wstd'] = dsw.std
31-
# jma
32-
bar['jma_open'] = state['trades']['jma'][0]
33-
bar['jma_close'] = state['trades']['jma'][-1]
34-
bar['jma_low'] = state['stat']['jma_min']
35-
bar['jma_high'] = state['stat']['jma_max']
36-
bar['jma_range'] = state['stat']['jma_range']
37-
bar['jma_return'] = state['stat']['jma_return']
38-
# volume weighted jma
39-
dsw = DescrStatsW(data=state['trades']['jma'], weights=state['trades']['volume'])
40-
qtiles = dsw.quantile(probs=[0.1, 0.5, 0.9]).values
41-
bar['jma_wq10'] = qtiles[0]
42-
bar['jma_wq50'] = qtiles[1]
43-
bar['jma_wq90'] = qtiles[2]
44-
bar['jma_wq_range'] = bar['jma_wq90'] - bar['jma_wq10']
45-
bar['jma_wmean'] = dsw.mean
46-
bar['jma_wstd'] = dsw.std
47-
# tick/vol/dollar/imbalance
48-
bar['tick_count'] = state['stat']['tick_count']
49-
bar['volume'] = state['stat']['volume']
50-
bar['dollars'] = state['stat']['dollars']
51-
bar['tick_imbalance'] = state['stat']['tick_imbalance']
52-
bar['volume_imbalance'] = state['stat']['volume_imbalance']
53-
bar['dollar_imbalance'] = state['stat']['dollar_imbalance']
54-
55-
return bar
565

57-
58-
def trades_to_bar(trades: dict, bar_trigger: str) -> dict:
6+
def trades_to_bar(ticks) -> dict:
7+
8+
if type(ticks) != pd.DataFrame:
9+
ticks = pd.DataFrame(ticks)
5910

6011
bar = {}
61-
if state['stat']['tick_count'] == 0:
62-
return bar
63-
64-
bar['bar_trigger'] = bar_trigger
6512
# time
66-
bar['open_at'] = trades['date_time'][0]
67-
bar['close_at'] = trades['date_time'][-1]
13+
bar['open_at'] = ticks.nyc_time.iloc[0]
14+
bar['close_at'] = ticks.nyc_time.iloc[-1]
6815
bar['duration_td'] = bar['close_at'] - bar['open_at']
6916
# price
70-
bar['price_open'] = trades['price'][0]
71-
bar['price_close'] = trades['price'][-1]
72-
bar['price_low'] = min(trades['price'])
73-
bar['price_high'] = max(trades['price'])
17+
bar['price_open'] = ticks.price.values[0]
18+
bar['price_close'] = ticks.price.values[-1]
19+
bar['price_low'] = ticks.price.min()
20+
bar['price_high'] = ticks.price.max()
7421
bar['price_range'] = bar['price_high'] - bar['price_low']
7522
bar['price_return'] = bar['price_close'] - bar['price_close']
7623
# volume weighted price
77-
dsw = DescrStatsW(data=trades['price'], weights=trades['volume'])
24+
dsw = DescrStatsW(data=ticks.price, weights=ticks.volume)
7825
qtiles = dsw.quantile(probs=[0.1, 0.5, 0.9]).values
7926
bar['price_wq10'] = qtiles[0]
8027
bar['price_wq50'] = qtiles[1]
@@ -83,14 +30,14 @@ def trades_to_bar(trades: dict, bar_trigger: str) -> dict:
8330
bar['price_wmean'] = dsw.mean
8431
bar['price_wstd'] = dsw.std
8532
# jma
86-
bar['jma_open'] = trades['jma'][0]
87-
bar['jma_close'] = trades['jma'][-1]
88-
bar['jma_low'] = min(trades['jma'])
89-
bar['jma_high'] = max(trades['jma'])
33+
bar['jma_open'] = ticks.jma.values[0]
34+
bar['jma_close'] = ticks.jma.values[-1]
35+
bar['jma_low'] = ticks.jma.min()
36+
bar['jma_high'] = ticks.jma.max()
9037
bar['jma_range'] = bar['jma_high'] - bar['jma_low']
9138
bar['jma_return'] = bar['jma_close'] - bar['jma_open']
9239
# volume weighted jma
93-
dsw = DescrStatsW(data=trades['jma'], weights=state['trades']['volume'])
40+
dsw = DescrStatsW(data=ticks.jma, weights=ticks.volume)
9441
qtiles = dsw.quantile(probs=[0.1, 0.5, 0.9]).values
9542
bar['jma_wq10'] = qtiles[0]
9643
bar['jma_wq50'] = qtiles[1]
@@ -99,11 +46,11 @@ def trades_to_bar(trades: dict, bar_trigger: str) -> dict:
9946
bar['jma_wmean'] = dsw.mean
10047
bar['jma_wstd'] = dsw.std
10148
# tick/vol/dollar/imbalance
102-
bar['tick_count'] = len(trades)
103-
bar['volume'] = sum(trades['volume'])
104-
bar['dollars'] = bar['volume'] * bar['jma_wmean']
105-
bar['tick_imbalance'] = sum(trades['side'])
106-
bar['volume_imbalance'] = bar['tick_imbalance'] * bar['volume']
107-
bar['dollar_imbalance'] = bar['volume_imbalance'] * bar['jma_wmean']
49+
bar['tick_count'] = ticks.shape[0]
50+
bar['volume'] = ticks.volume.sum()
51+
bar['dollars'] = (ticks.volume * ticks.price).sum()
52+
bar['tick_imbalance'] = ticks.side.sum()
53+
bar['volume_imbalance'] = (ticks.volume * ticks.side).sum()
54+
bar['dollar_imbalance'] = (ticks.volume * ticks.price * ticks.side).sum()
10855

10956
return bar

bar_labels.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,18 +32,18 @@ def get_tb_outcome(reward_ratio: float, risk_level: float, side: str, label_pric
3232
if side=='long':
3333
if goal=='profit':
3434
target_price = first_price + (risk_level * reward_ratio)
35-
target_at = label_prices[label_prices[price_col] >= target_price].min()['date_time']
35+
target_at = label_prices[label_prices[price_col] >= target_price].min()['nyc_time']
3636
elif goal=='stop':
3737
target_price = first_price - risk_level
38-
target_at = label_prices[label_prices[price_col] < target_price].min()['date_time']
38+
target_at = label_prices[label_prices[price_col] < target_price].min()['nyc_time']
3939
reward_ratio = -1
4040
elif side=='short':
4141
if goal=='profit':
4242
target_price = first_price - (risk_level * reward_ratio)
43-
target_at = label_prices[label_prices[price_col] <= target_price].min()['date_time']
43+
target_at = label_prices[label_prices[price_col] <= target_price].min()['nyc_time']
4444
elif goal=='stop':
4545
target_price = first_price + risk_level
46-
target_at = label_prices[label_prices[price_col] > target_price].min()['date_time']
46+
target_at = label_prices[label_prices[price_col] > target_price].min()['nyc_time']
4747
reward_ratio = -1
4848

4949
reward_ratio = reward_ratio * -1
@@ -149,7 +149,7 @@ def get_concurrent_stats(lbars_df: pd.DataFrame) -> dict:
149149
def get_label_ticks(ticks_df: pd.DataFrame, label_start_at: pd._libs.tslibs.timestamps.Timestamp, horizon_mins: int) -> pd.DataFrame:
150150
delayed_label_start_at = label_start_at + pd.Timedelta(value=3, unit='seconds') # inference+network latency compensation
151151
label_end_at = label_start_at + pd.Timedelta(value=horizon_mins, unit='minutes')
152-
label_prices = ticks_df.loc[(ticks_df['date_time'] >= delayed_label_start_at) & (ticks_df['date_time'] < label_end_at)]
152+
label_prices = ticks_df.loc[(ticks_df['nyc_time'] >= delayed_label_start_at) & (ticks_df['nyc_time'] < label_end_at)]
153153
return label_prices, label_end_at
154154

155155

bar_samples.py

Lines changed: 35 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,7 @@
1-
from datetime import time
2-
import pandas as pd
3-
from bar_features import state_to_bar
4-
from utils_filters import TickRule, MADFilter, JMAFilter
1+
from bar_features import trades_to_bar
52

63

74

8-
class BarActor:
9-
10-
def __init__(self, thresh: dict):
11-
self.state = reset_state(thresh)
12-
self.bars = []
13-
14-
def update(self, tick: dict):
15-
self.bars, self.state = update_bar_state(tick, self.state, self.bars, self.state['thresh'])
16-
17-
185
def reset_state(thresh: dict={}) -> dict:
196
state = {}
207
state['thresh'] = thresh
@@ -37,24 +24,18 @@ def reset_state(thresh: dict={}) -> dict:
3724
state['stat']['dollar_imbalance'] = 0
3825
# copy of tick events
3926
state['trades'] = {}
40-
state['trades']['date_time'] = []
27+
state['trades']['nyc_time'] = []
4128
state['trades']['price'] = []
4229
state['trades']['volume'] = []
4330
state['trades']['side'] = []
4431
state['trades']['jma'] = []
4532
# trigger status
46-
state['trigger_yet?!'] = 'waiting'
47-
return state
48-
49-
50-
def imbalance_net(state: dict) -> dict:
51-
state['stat']['tick_imbalance'] += state['trades']['side'][-1]
52-
state['stat']['volume_imbalance'] += (state['trades']['side'][-1] * state['trades']['volume'][-1])
53-
state['stat']['dollar_imbalance'] += (state['trades']['side'][-1] * state['trades']['volume'][-1] * state['trades']['price'][-1])
33+
state['bar_trigger'] = 'waiting'
5434
return state
5535

5636

5737
def imbalance_runs(state: dict) -> dict:
38+
5839
if len(state['trades']['side']) >= 2:
5940
if state['trades']['side'][-1] == state['trades']['side'][-2]:
6041
state['stat']['tick_run'] += 1
@@ -64,7 +45,7 @@ def imbalance_runs(state: dict) -> dict:
6445
state['stat']['tick_run'] = 0
6546
state['stat']['volume_run'] = 0
6647
state['stat']['dollar_run'] = 0
67-
48+
6849
return state
6950

7051

@@ -91,45 +72,39 @@ def get_next_renko_thresh(renko_size: float, last_bar_return: float, reversal_mu
9172
state['thresh']['renko_bear'] = -state['thresh']['renko_size']
9273

9374
if state['stat'][state['thresh']['renko_return']] >= state['thresh']['renko_bull']:
94-
state['trigger_yet?!'] = 'renko_up'
75+
state['bar_trigger'] = 'renko_up'
9576
if state['stat'][state['thresh']['renko_return']] < state['thresh']['renko_bear']:
96-
state['trigger_yet?!'] = 'renko_down'
77+
state['bar_trigger'] = 'renko_down'
9778

9879
if 'volume_imbalance' in state['thresh'] and abs(state['stat']['volume_imbalance']) >= state['thresh']['volume_imbalance']:
99-
state['trigger_yet?!'] = 'volume_imbalance'
80+
state['bar_trigger'] = 'volume_imbalance'
10081

10182
if 'max_duration_td' in state['thresh'] and state['stat']['duration_td'] > state['thresh']['max_duration_td']:
102-
state['trigger_yet?!'] = 'duration'
83+
state['bar_trigger'] = 'duration'
10384

10485
# over-ride newbar trigger with 'minimum' thresholds
10586
if 'min_duration_td' in state['thresh'] and state['stat']['duration_td'] < state['thresh']['min_duration_td']:
106-
state['trigger_yet?!'] = 'waiting'
87+
state['bar_trigger'] = 'waiting'
10788

10889
if 'min_tick_count' in state['thresh'] and state['stat']['tick_count'] < state['thresh']['min_tick_count']:
109-
state['trigger_yet?!'] = 'waiting'
90+
state['bar_trigger'] = 'waiting'
11091

11192
return state
11293

11394

114-
def update_bar_state(tick: dict, state: dict, bars: list, thresh: dict={}) -> tuple:
115-
116-
state['trades']['date_time'].append(tick['date_time'])
95+
def update_bar_state(tick: dict, state: dict, bars: list=[], thresh: dict={}) -> tuple:
96+
# append tick
97+
state['trades']['nyc_time'].append(tick['nyc_time'])
11798
state['trades']['price'].append(tick['price'])
118-
state['trades']['jma'].append(tick['jma'])
11999
state['trades']['volume'].append(tick['volume'])
120100
state['trades']['side'].append(tick['side'])
121-
# if len(state['trades']['price']) >= 2:
122-
# tick_side = tick_rule(
123-
# latest_price=state['trades']['price'][-1],
124-
# prev_price=state['trades']['price'][-2],
125-
# last_side=state['trades']['side'][-1],
126-
# )
127-
# else:
128-
# tick_side = 0
129-
# state['trades']['side'].append(tick_side)
130-
state = imbalance_net(state)
131-
# state = imbalance_runs(state)
132-
state['stat']['duration_td'] = state['trades']['date_time'][-1] - state['trades']['date_time'][0]
101+
state['trades']['jma'].append(tick['jma'])
102+
# imbalances
103+
state['stat']['tick_imbalance'] += state['trades']['side'][-1]
104+
state['stat']['volume_imbalance'] += (state['trades']['side'][-1] * state['trades']['volume'][-1])
105+
state['stat']['dollar_imbalance'] += (state['trades']['side'][-1] * state['trades']['volume'][-1] * state['trades']['price'][-1])
106+
# other
107+
state['stat']['duration_td'] = state['trades']['nyc_time'][-1] - state['trades']['nyc_time'][0]
133108
state['stat']['tick_count'] += 1
134109
state['stat']['volume'] += tick['volume']
135110
state['stat']['dollars'] += tick['price'] * tick['volume']
@@ -146,71 +121,24 @@ def update_bar_state(tick: dict, state: dict, bars: list, thresh: dict={}) -> tu
146121
state['stat']['jma_return'] = tick['jma'] - state['trades']['jma'][0]
147122
# check state tirggered sample threshold
148123
state = check_bar_thresholds(state)
149-
if state['trigger_yet?!'] != 'waiting':
150-
new_bar = state_to_bar(state)
124+
125+
if state['bar_trigger'] != 'waiting':
126+
# new_bar = state_to_bar(state)
127+
new_bar = trades_to_bar(ticks=state['trades'])
151128
bars.append(new_bar)
152129
state = reset_state(thresh)
130+
else:
131+
new_bar = {'bar_trigger': 'waiting'}
153132

154-
return bars, state
155-
133+
return bars, state, new_bar
156134

157-
def filter_tick(tick: dict, mad_filter: MADFilter, jma_filter: JMAFilter, tick_rule: TickRule) -> dict:
158135

159-
tick['date_time'] = tick['sip_dt'].tz_localize('UTC').tz_convert('America/New_York')
160-
161-
mad_filter.update(next_value=tick['price']) # update mad filter
136+
class BarSampler:
162137

163-
irregular_conditions = [2, 5, 7, 10, 13, 15, 16, 20, 21, 22, 29, 33, 38, 52, 53]
164-
165-
if tick['volume'] < 1: # zero volume/size tick
166-
tick['status'] = 'zero_volume'
167-
elif pd.Series(tick['conditions']).isin(irregular_conditions).any(): # 'irrgular' tick condition
168-
tick['status'] = 'irregular_condition'
169-
elif abs(tick['sip_dt'] - tick['exchange_dt']) > pd.to_timedelta(2, unit='S'): # remove large ts deltas
170-
tick['status'] = 'ts_delta'
171-
elif mad_filter.status != 'mad_clean': # MAD filter outlier
172-
tick['status'] = 'mad_outlier'
173-
else: # 'clean' tick
174-
tick['status'] = 'clean'
175-
tick['jma'] = jma_filter.update(next_value=tick['price']) # update jma filter
176-
tick['side'] = tick_rule.update(next_price=tick['price']) # update tick rule
177-
if tick['date_time'].to_pydatetime().time() < time(hour=9, minute=30):
178-
tick['status'] = 'clean_pre_market'
179-
elif tick['date_time'].hour >= 16:
180-
tick['status'] = 'clean_after_hours'
181-
else:
182-
tick['status'] = 'clean_open_market'
183-
184-
# remove fields
185-
tick.pop('sip_dt', None)
186-
tick.pop('exchange_dt', None)
187-
tick.pop('conditions', None)
188-
189-
return tick
190-
138+
def __init__(self, thresh: dict):
139+
self.state = reset_state(thresh)
140+
self.bars = []
191141

192-
def build_bars(ticks_df: pd.DataFrame, thresh: dict) -> tuple:
193-
194-
mad_filter = MADFilter(thresh['mad_value_winlen'], thresh['mad_deviation_winlen'], thresh['mad_k'])
195-
jma_filter = JMAFilter(ticks_df['price'].values[0], thresh['jma_winlen'], thresh['jma_power'])
196-
tick_rule = TickRule()
197-
bar_actor = BarActor(thresh)
198-
ticks = []
199-
for t in ticks_df.itertuples():
200-
tick = {
201-
'sip_dt': t.sip_dt,
202-
'exchange_dt': t.exchange_dt,
203-
'price': t.price,
204-
'volume': t.size,
205-
'conditions': t.conditions,
206-
'status': 'raw',
207-
}
208-
tick = filter_tick(tick, mad_filter, jma_filter, tick_rule)
209-
210-
if tick['status'] == 'clean_open_market':
211-
# if tick['status'].startswith('clean'):
212-
bar_actor.update(tick)
213-
214-
ticks.append(tick)
215-
216-
return bar_actor.bars, ticks
142+
def update(self, tick: dict):
143+
self.bars, self.state, new_bar = update_bar_state(tick, self.state, self.bars, self.state['thresh'])
144+
return new_bar

0 commit comments

Comments
 (0)