Skip to content

Commit

Permalink
orderbook class perf improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
Jan Škoda committed Aug 9, 2024
1 parent 6d4821a commit 69a27bd
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 6 deletions.
4 changes: 4 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
History
=======

0.17.0 (2024-08-09)

* orderbook class performance improvements

0.16.0 (2024-08-06)
-------------------

Expand Down
31 changes: 25 additions & 6 deletions lakeapi/orderbook.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,26 @@

class OrderBookUpdater:
''' Maintains order book snapshot while iterating over a dataframe with order book deltas. '''
def __init__(self, df: 'pd.DataFrame'):
def __init__(self, df: 'pd.DataFrame', depth_limit_min: int = 3000, depth_limit_max: int = 5000):
self.bid = Dict.empty(key_type = float64, value_type = float64)
self.ask = Dict.empty(key_type = float64, value_type = float64)
self.current_index = 0
self.received_timestamp = None
self.sequence_number = None
self.int_arr = df[['received_time', 'sequence_number']].astype('int64').values
self.np_arr = df[['side_is_bid', 'price', 'size']].astype('float64').values
self._received_time = df['received_time'].astype('int64').values
self._sequence_number = df['sequence_number'].astype('int64').values
self._side_is_bid = df['side_is_bid'].astype('float64').values
self._price = df['price'].astype('float64').values
self._size = df['size'].astype('float64').values
self.depth_limit_min = depth_limit_min
self.depth_limit_max = depth_limit_max
self._bests_cache = List()
self._bests_cache.append(0.)
self._bests_cache.append(0.)

@staticmethod
@njit(cache = True)
def _update_more(side_is_bid, prices, sizes, received_time, sequence_number, current_index, bid_book, ask_book, bests_cache):
def _update_more(side_is_bid, prices, sizes, received_time, sequence_number, current_index, bid_book, ask_book, bests_cache, depth_limit_min, depth_limit_max):
starting_received_time = received_time[current_index]
while received_time[current_index] == starting_received_time:
price = prices[current_index]
Expand Down Expand Up @@ -50,17 +55,31 @@ def _update_more(side_is_bid, prices, sizes, received_time, sequence_number, cur
current_index += 1
if current_index >= prices.shape[0]:
break

# cleanup
if len(bid_book) > depth_limit_max:
sorted_bids = sorted(bid_book.keys(), reverse=True)
for price in sorted_bids[depth_limit_min:]:
del bid_book[price]
elif len(ask_book) > depth_limit_max:
sorted_asks = sorted(ask_book.keys())
for price in sorted_asks[depth_limit_min:]:
del ask_book[price]

return current_index, sequence_number[current_index-1], received_time[current_index-1]

def process_next_update(self, starting_row: Optional[int] = None) -> int:
''' row in df contains received_time, bid and ask columns with numpy list of price-quantity pairs'''
if self.current_index >= self.np_arr.shape[0]:
if self.current_index >= self._side_is_bid.shape[0]:
return 0
if starting_row is not None:
self.current_index = starting_row

self.current_index, self.sequence_number, self.received_timestamp = \
self._update_more(*self.np_arr.T, *self.int_arr.T, self.current_index, self.bid, self.ask, self._bests_cache)
self._update_more(
self._side_is_bid, self._price, self._size, self._received_time, self._sequence_number,
self.current_index, self.bid, self.ask, self._bests_cache, self.depth_limit_min, self.depth_limit_max
)

return self.current_index

Expand Down

0 comments on commit 69a27bd

Please sign in to comment.