From 6ecdd830b5508ca61b29eadeaa7bdb944af5708f Mon Sep 17 00:00:00 2001 From: hscott Date: Thu, 5 Dec 2024 12:20:27 -0500 Subject: [PATCH 01/35] organized parameters in Makefile --- Makefile | 59 +++++++++++++------------------------ precog/utils/general.py | 16 ++++++++++ precog/validators/reward.py | 17 +---------- 3 files changed, 37 insertions(+), 55 deletions(-) diff --git a/Makefile b/Makefile index cee96c2..fdf4fda 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,19 @@ -network = ws://127.0.0.1:9944 -netuid = 1 +## Network Parameters ## +finney = wss://entrypoint-finney.opentensor.ai:443 +testnet = wss://test.finney.opentensor.ai:443 +locanet = ws://127.0.0.1:9944 + +testnet_netuid = 256 +localnet_netuid = 1 logging_level = trace # options= ['info', 'debug', 'trace'] -coldkey = cm-owner + +netuid = $(testnet_netuid) +network = $(testnet) + +## User Parameters +coldkey = default +validator_hotkey = validator +miner_hotkey = miner metagraph: btcli subnet metagraph --netuid $(netuid) --subtensor.chain_endpoint $(network) @@ -16,28 +28,18 @@ validator: python start_validator.py \ --neuron.name validator \ --wallet.name $(coldkey) \ - --wallet.hotkey validator \ - --subtensor.chain_endpoint $(network) \ + --wallet.hotkey $(validator_hotkey) \ + --network $(network) \ --axon.port 30335 \ --netuid $(netuid) \ --logging.level $(logging_level) -validator2: - python start_validator.py \ - --neuron.name validator2 \ - --wallet.name $(coldkey) \ - --wallet.hotkey validator2 \ - --subtensor.chain_endpoint $(network) \ - --axon.port 30339 \ - --netuid $(netuid) \ - --logging.level $(logging_level) - miner: python start_miner.py \ --neuron.name miner \ --wallet.name $(coldkey) \ - --wallet.hotkey miner \ - --subtensor.chain_endpoint $(network) \ + --wallet.hotkey $(miner_hotkey) \ + --network $(network) \ --axon.port 30336 \ --netuid $(netuid) \ --logging.level $(logging_level) \ @@ -49,31 +51,10 @@ miner2: --neuron.name miner2 \ --wallet.name $(coldkey) \ --wallet.hotkey miner2 \ - --subtensor.chain_endpoint $(network) \ + --network $(network) \ --axon.port 30337 \ --netuid $(netuid) \ --logging.level $(logging_level) \ --timeout 16 \ --forward_function forward_bad -miner3: - python start_miner.py \ - --neuron.name miner3 \ - --wallet.name $(coldkey) \ - --wallet.hotkey miner3 \ - --subtensor.chain_endpoint $(network) \ - --axon.port 30338 \ - --netuid $(netuid) \ - --logging.level $(logging_level) \ - --timeout 16 \ - --forward_function forward - -setup_local: - btcli wallet faucet --wallet.name $(coldkey) --subtensor.chain_endpoint $(network) ;\ - btcli subnet create --wallet.name $(coldkey) --subtensor.chain_endpoint $(network) ;\ - btcli subnet register \ - --wallet.name $(coldkey) \ - --wallet.hotkey validator \ - --netuid $(netuid) - --subtensor.chain_endpoint $(network) ;\ - btcli stake add --wallet.name $(coldkey) --wallet.hotkey validator --amount 1024 ;\ diff --git a/precog/utils/general.py b/precog/utils/general.py index fbe3d5c..912b5f1 100644 --- a/precog/utils/general.py +++ b/precog/utils/general.py @@ -84,3 +84,19 @@ def get_version() -> Optional[str]: raise Exception("Version information not found") return version_match.group() + + +def rank(vector): + if vector is None or len(vector) <= 1: + return np.array([0]) + else: + # Sort the array and get the indices that would sort it + sorted_indices = np.argsort(vector) + sorted_vector = vector[sorted_indices] + # Create a mask for where each new unique value starts in the sorted array + unique_mask = np.concatenate(([True], sorted_vector[1:] != sorted_vector[:-1])) + # Use cumulative sum of the unique mask to get the ranks, then assign back in original order + ranks = np.cumsum(unique_mask) - 1 + rank_vector = np.empty_like(vector, dtype=int) + rank_vector[sorted_indices] = ranks + return rank_vector diff --git a/precog/validators/reward.py b/precog/validators/reward.py index 22e14c8..bad11fc 100644 --- a/precog/validators/reward.py +++ b/precog/validators/reward.py @@ -5,6 +5,7 @@ import numpy as np from precog.protocol import Challenge +from precog.utils.general import rank from precog.utils.timestamp import align_timepoints, get_now, mature_dictionary, round_minute_down @@ -49,22 +50,6 @@ def calc_rewards( return rewards -def rank(vector): - if vector is None or len(vector) <= 1: - return np.array([0]) - else: - # Sort the array and get the indices that would sort it - sorted_indices = np.argsort(vector) - sorted_vector = vector[sorted_indices] - # Create a mask for where each new unique value starts in the sorted array - unique_mask = np.concatenate(([True], sorted_vector[1:] != sorted_vector[:-1])) - # Use cumulative sum of the unique mask to get the ranks, then assign back in original order - ranks = np.cumsum(unique_mask) - 1 - rank_vector = np.empty_like(vector, dtype=int) - rank_vector[sorted_indices] = ranks - return rank_vector - - def interval_error(intervals, cm_prices): if intervals is None: return np.array([0]) From c1b016cf5184fd177b608631fd855a88d68ef420 Mon Sep 17 00:00:00 2001 From: hscott Date: Thu, 5 Dec 2024 12:25:44 -0500 Subject: [PATCH 02/35] moved to more advanced loop handler --- precog/utils/general.py | 31 +++++++++++--- precog/validators/weight_setter.py | 69 +++++++++++------------------- 2 files changed, 52 insertions(+), 48 deletions(-) diff --git a/precog/utils/general.py b/precog/utils/general.py index 912b5f1..f0a343f 100644 --- a/precog/utils/general.py +++ b/precog/utils/general.py @@ -1,9 +1,11 @@ import argparse +import asyncio import re from typing import Optional import bittensor as bt import git +from numpy import argsort, array, concatenate, cumsum, empty_like import requests from precog.utils.classes import NestedNamespace @@ -88,15 +90,34 @@ def get_version() -> Optional[str]: def rank(vector): if vector is None or len(vector) <= 1: - return np.array([0]) + return array([0]) else: # Sort the array and get the indices that would sort it - sorted_indices = np.argsort(vector) + sorted_indices = argsort(vector) sorted_vector = vector[sorted_indices] # Create a mask for where each new unique value starts in the sorted array - unique_mask = np.concatenate(([True], sorted_vector[1:] != sorted_vector[:-1])) + unique_mask = concatenate(([True], sorted_vector[1:] != sorted_vector[:-1])) # Use cumulative sum of the unique mask to get the ranks, then assign back in original order - ranks = np.cumsum(unique_mask) - 1 - rank_vector = np.empty_like(vector, dtype=int) + ranks = cumsum(unique_mask) - 1 + rank_vector = empty_like(vector, dtype=int) rank_vector[sorted_indices] = ranks return rank_vector + + +async def loop_handler(self, func, sleep_time=120): + try: + while not self.stop_event.is_set(): + async with self.lock: + await func() + await asyncio.sleep(sleep_time) + except asyncio.CancelledError: + bt.logging.error(f"{func.__name__} cancelled") + raise + except KeyboardInterrupt: + raise + except Exception as e: + bt.logging.error(f"{func.__name__} raised error: {e}") + raise + finally: + async with self.lock: + self.stop_event.set() diff --git a/precog/validators/weight_setter.py b/precog/validators/weight_setter.py index 7fcd768..54e3ab6 100755 --- a/precog/validators/weight_setter.py +++ b/precog/validators/weight_setter.py @@ -11,6 +11,7 @@ from precog.protocol import Challenge from precog.utils.bittensor import check_uid_availability, print_info, setup_bittensor_objects from precog.utils.classes import MinerHistory +from precog.utils.general import loop_handler from precog.utils.timestamp import elapsed_seconds, get_before, get_now, is_query_time, iso8601_to_datetime from precog.utils.wandb import log_wandb, setup_wandb from precog.validators.reward import calc_rewards @@ -51,10 +52,10 @@ def __init__(self, config=None, loop=None): self.stop_event = asyncio.Event() bt.logging.info("Setup complete, starting loop") self.loop.create_task( - self.loop_handler(self.scheduled_prediction_request, sleep_time=self.config.print_cadence) + loop_handler(self.scheduled_prediction_request, sleep_time=self.config.print_cadence) ) - self.loop.create_task(self.loop_handler(self.resync_metagraph, sleep_time=self.resync_metagraph_rate)) - self.loop.create_task(self.loop_handler(self.set_weights, sleep_time=self.set_weights_rate)) + self.loop.create_task(loop_handler(self.resync_metagraph, sleep_time=self.resync_metagraph_rate)) + self.loop.create_task(loop_handler(self.set_weights, sleep_time=self.set_weights_rate)) def __exit__(self, exc_type, exc_value, traceback): self.save_state() @@ -67,21 +68,6 @@ def __exit__(self, exc_type, exc_value, traceback): bt.logging.error(f"Error on __exit__ function: {e}") self.loop.stop() - async def loop_handler(self, func, sleep_time=120): - try: - while not self.stop_event.is_set(): - await func() - await asyncio.sleep(sleep_time) - except asyncio.exceptions.CancelledError: - raise - except KeyboardInterrupt: - raise - except Exception: - raise - finally: - async with self.lock: - self.stop_event.set() - self.__exit__(None, None, None) async def get_available_uids(self): miner_uids = [] @@ -93,23 +79,22 @@ async def get_available_uids(self): async def resync_metagraph(self, force=False): """Resyncs the metagraph and updates the hotkeys and moving averages based on the new metagraph.""" - async with self.lock: - self.blocks_since_sync = self.current_block - self.last_sync - if self.blocks_since_sync >= self.resync_metagraph_rate or force: - bt.logging.info("Syncing Metagraph...") - self.metagraph.sync(subtensor=self.subtensor) - bt.logging.info("Metagraph updated, re-syncing hotkeys, dendrite pool and moving averages") - # Zero out all hotkeys that have been replaced. - self.available_uids = asyncio.run(self.get_available_uids()) - for uid, hotkey in enumerate(self.metagraph.hotkeys): - if (uid not in self.MinerHistory and uid in self.available_uids) or self.hotkeys[uid] != hotkey: - bt.logging.info(f"Replacing hotkey on {uid} with {self.metagraph.hotkeys[uid]}") - self.hotkeys[uid] = hotkey - self.scores[uid] = 0 # hotkey has been replaced - self.MinerHistory[uid] = MinerHistory(uid, timezone=self.timezone) - self.moving_average_scores[uid] = 0 - self.last_sync = self.subtensor.get_current_block() - self.save_state() + self.blocks_since_sync = self.current_block - self.last_sync + if self.blocks_since_sync >= self.resync_metagraph_rate or force: + bt.logging.info("Syncing Metagraph...") + self.metagraph.sync(subtensor=self.subtensor) + bt.logging.info("Metagraph updated, re-syncing hotkeys, dendrite pool and moving averages") + # Zero out all hotkeys that have been replaced. + self.available_uids = asyncio.run(self.get_available_uids()) + for uid, hotkey in enumerate(self.metagraph.hotkeys): + if (uid not in self.MinerHistory and uid in self.available_uids) or self.hotkeys[uid] != hotkey: + bt.logging.info(f"Replacing hotkey on {uid} with {self.metagraph.hotkeys[uid]}") + self.hotkeys[uid] = hotkey + self.scores[uid] = 0 # hotkey has been replaced + self.MinerHistory[uid] = MinerHistory(uid, timezone=self.timezone) + self.moving_average_scores[uid] = 0 + self.last_sync = self.subtensor.get_current_block() + self.save_state() def query_miners(self): timestamp = get_now().isoformat() @@ -133,9 +118,8 @@ def node_query(self, module, method, params): async def set_weights(self): if self.blocks_since_last_update >= self.set_weights_rate: - async with self.lock: - uids = array(self.available_uids) - weights = [self.moving_average_scores[uid] for uid in self.available_uids] + uids = array(self.available_uids) + weights = [self.moving_average_scores[uid] for uid in self.available_uids] for i, j in zip(weights, self.available_uids): bt.logging.debug(f"UID: {j} | Weight: {i}") if sum(weights) == 0: @@ -162,11 +146,10 @@ async def set_weights(self): "Failed to set weights this iteration with message:", msg, ) - async with self.lock: - self.current_block = self.subtensor.get_current_block() - self.blocks_since_last_update = ( - self.current_block - self.node_query("SubtensorModule", "LastUpdate", [self.config.netuid])[self.my_uid] - ) + self.current_block = self.subtensor.get_current_block() + self.blocks_since_last_update = ( + self.current_block - self.node_query("SubtensorModule", "LastUpdate", [self.config.netuid])[self.my_uid] + ) async def scheduled_prediction_request(self): if not hasattr(self, "timestamp"): From 3d87eb63ce208b0f0673cd65368958ad33ea47b2 Mon Sep 17 00:00:00 2001 From: hscott Date: Thu, 5 Dec 2024 12:28:59 -0500 Subject: [PATCH 03/35] --network flag not appropriate for the parser --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index fdf4fda..aec52f8 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ validator: --neuron.name validator \ --wallet.name $(coldkey) \ --wallet.hotkey $(validator_hotkey) \ - --network $(network) \ + --subtensor.network $(network) \ --axon.port 30335 \ --netuid $(netuid) \ --logging.level $(logging_level) @@ -39,7 +39,7 @@ miner: --neuron.name miner \ --wallet.name $(coldkey) \ --wallet.hotkey $(miner_hotkey) \ - --network $(network) \ + --subtensor.network $(network) \ --axon.port 30336 \ --netuid $(netuid) \ --logging.level $(logging_level) \ @@ -51,7 +51,7 @@ miner2: --neuron.name miner2 \ --wallet.name $(coldkey) \ --wallet.hotkey miner2 \ - --network $(network) \ + --subtensor.network $(network) \ --axon.port 30337 \ --netuid $(netuid) \ --logging.level $(logging_level) \ From 8b428e0fbb030357dc305758dfd1f0db35e4bd9e Mon Sep 17 00:00:00 2001 From: hscott Date: Thu, 5 Dec 2024 12:30:32 -0500 Subject: [PATCH 04/35] --network flag not appropriate for the parser --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index aec52f8..bf6383e 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ validator: --neuron.name validator \ --wallet.name $(coldkey) \ --wallet.hotkey $(validator_hotkey) \ - --subtensor.network $(network) \ + --subtensor.chain_endpoint $(network) \ --axon.port 30335 \ --netuid $(netuid) \ --logging.level $(logging_level) @@ -39,7 +39,7 @@ miner: --neuron.name miner \ --wallet.name $(coldkey) \ --wallet.hotkey $(miner_hotkey) \ - --subtensor.network $(network) \ + --subtensor.chain_endpoint $(network) \ --axon.port 30336 \ --netuid $(netuid) \ --logging.level $(logging_level) \ @@ -51,7 +51,7 @@ miner2: --neuron.name miner2 \ --wallet.name $(coldkey) \ --wallet.hotkey miner2 \ - --subtensor.network $(network) \ + --subtensor.chain_endpoint $(network) \ --axon.port 30337 \ --netuid $(netuid) \ --logging.level $(logging_level) \ From 7df5c8fe257c5302e355702bd17e15eb4b10cea4 Mon Sep 17 00:00:00 2001 From: hscott Date: Thu, 5 Dec 2024 12:32:04 -0500 Subject: [PATCH 05/35] missing arg in loop handler --- precog/validators/weight_setter.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/precog/validators/weight_setter.py b/precog/validators/weight_setter.py index 54e3ab6..4986b03 100755 --- a/precog/validators/weight_setter.py +++ b/precog/validators/weight_setter.py @@ -51,11 +51,9 @@ def __init__(self, config=None, loop=None): setup_wandb(self) self.stop_event = asyncio.Event() bt.logging.info("Setup complete, starting loop") - self.loop.create_task( - loop_handler(self.scheduled_prediction_request, sleep_time=self.config.print_cadence) - ) - self.loop.create_task(loop_handler(self.resync_metagraph, sleep_time=self.resync_metagraph_rate)) - self.loop.create_task(loop_handler(self.set_weights, sleep_time=self.set_weights_rate)) + self.loop.create_task(loop_handler(self, self.scheduled_prediction_request, sleep_time=self.config.print_cadence)) + self.loop.create_task(loop_handler(self, self.resync_metagraph, sleep_time=self.resync_metagraph_rate)) + self.loop.create_task(loop_handler(self, self.set_weights, sleep_time=self.set_weights_rate)) def __exit__(self, exc_type, exc_value, traceback): self.save_state() From 121d4185da6cb38ef54b64a8719f4b477b1fec5c Mon Sep 17 00:00:00 2001 From: hscott Date: Thu, 5 Dec 2024 12:40:20 -0500 Subject: [PATCH 06/35] reduced vpermit limit to 2 in makefile --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index bf6383e..7e4c62f 100644 --- a/Makefile +++ b/Makefile @@ -44,6 +44,7 @@ miner: --netuid $(netuid) \ --logging.level $(logging_level) \ --timeout 16 \ + --vpermit_tao_limit 2 \ --forward_function forward miner2: From b16f89d57054fe06878645768ecfcc8685ca2df5 Mon Sep 17 00:00:00 2001 From: hscott Date: Thu, 5 Dec 2024 12:46:48 -0500 Subject: [PATCH 07/35] unnecessary asyncio lock blocks runtime --- precog/validators/weight_setter.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/precog/validators/weight_setter.py b/precog/validators/weight_setter.py index 4986b03..1199827 100755 --- a/precog/validators/weight_setter.py +++ b/precog/validators/weight_setter.py @@ -165,12 +165,11 @@ async def scheduled_prediction_request(self): except Exception as e: bt.logging.error(f"Failed to calculate rewards with error: {e}") # Adjust the scores based on responses from miners and update moving average. - async with self.lock: - for i, value in zip(self.available_uids, rewards): - self.moving_average_scores[i] = (1 - self.config.alpha) * self.moving_average_scores[ - i - ] + self.config.alpha * value - self.scores = list(self.moving_average_scores.values()) + for i, value in zip(self.available_uids, rewards): + self.moving_average_scores[i] = (1 - self.config.alpha) * self.moving_average_scores[ + i + ] + self.config.alpha * value + self.scores = list(self.moving_average_scores.values()) if self.config.wandb_on: log_wandb(responses, rewards, self.available_uids) else: From 4b68f67211a693486a895005dcd3be269756a748 Mon Sep 17 00:00:00 2001 From: hscott Date: Thu, 5 Dec 2024 12:53:25 -0500 Subject: [PATCH 08/35] linters --- Makefile | 1 - precog/utils/general.py | 2 +- precog/validators/weight_setter.py | 5 +++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 7e4c62f..721c621 100644 --- a/Makefile +++ b/Makefile @@ -58,4 +58,3 @@ miner2: --logging.level $(logging_level) \ --timeout 16 \ --forward_function forward_bad - diff --git a/precog/utils/general.py b/precog/utils/general.py index f0a343f..59b9fc2 100644 --- a/precog/utils/general.py +++ b/precog/utils/general.py @@ -5,8 +5,8 @@ import bittensor as bt import git -from numpy import argsort, array, concatenate, cumsum, empty_like import requests +from numpy import argsort, array, concatenate, cumsum, empty_like from precog.utils.classes import NestedNamespace diff --git a/precog/validators/weight_setter.py b/precog/validators/weight_setter.py index 1199827..067aa5b 100755 --- a/precog/validators/weight_setter.py +++ b/precog/validators/weight_setter.py @@ -51,7 +51,9 @@ def __init__(self, config=None, loop=None): setup_wandb(self) self.stop_event = asyncio.Event() bt.logging.info("Setup complete, starting loop") - self.loop.create_task(loop_handler(self, self.scheduled_prediction_request, sleep_time=self.config.print_cadence)) + self.loop.create_task( + loop_handler(self, self.scheduled_prediction_request, sleep_time=self.config.print_cadence) + ) self.loop.create_task(loop_handler(self, self.resync_metagraph, sleep_time=self.resync_metagraph_rate)) self.loop.create_task(loop_handler(self, self.set_weights, sleep_time=self.set_weights_rate)) @@ -66,7 +68,6 @@ def __exit__(self, exc_type, exc_value, traceback): bt.logging.error(f"Error on __exit__ function: {e}") self.loop.stop() - async def get_available_uids(self): miner_uids = [] for uid in range(len(self.metagraph.S)): From b8cb6bac0b0e203d5f056f0659c2bc72f2213ba5 Mon Sep 17 00:00:00 2001 From: hscott Date: Thu, 5 Dec 2024 13:02:03 -0500 Subject: [PATCH 09/35] invalid ip error on weight setting --- precog/validators/weight_setter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/precog/validators/weight_setter.py b/precog/validators/weight_setter.py index 067aa5b..eb2a4a4 100755 --- a/precog/validators/weight_setter.py +++ b/precog/validators/weight_setter.py @@ -47,6 +47,7 @@ def __init__(self, config=None, loop=None): self.current_block - self.node_query("SubtensorModule", "LastUpdate", [self.config.netuid])[self.my_uid] ) self.tempo = self.node_query("SubtensorModule", "Tempo", [self.config.netuid]) + bt.logging.debug(f"Config:{self.config}") if self.config.wandb_on: setup_wandb(self) self.stop_event = asyncio.Event() @@ -135,7 +136,6 @@ async def set_weights(self): uids=uint_uids, weights=uint_weights, wait_for_inclusion=True, - wait_for_finalization=True, version_key=__spec_version__, ) if result: From c34f9f28573353cf88545963b9947005f198b7fe Mon Sep 17 00:00:00 2001 From: hscott Date: Thu, 5 Dec 2024 13:15:54 -0500 Subject: [PATCH 10/35] moved from chain_endpoint to network --- Makefile | 8 ++++---- precog/validators/weight_setter.py | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 721c621..bb1c0fc 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ localnet_netuid = 1 logging_level = trace # options= ['info', 'debug', 'trace'] netuid = $(testnet_netuid) -network = $(testnet) +network = test ## User Parameters coldkey = default @@ -29,7 +29,7 @@ validator: --neuron.name validator \ --wallet.name $(coldkey) \ --wallet.hotkey $(validator_hotkey) \ - --subtensor.chain_endpoint $(network) \ + --network $(network) \ --axon.port 30335 \ --netuid $(netuid) \ --logging.level $(logging_level) @@ -39,7 +39,7 @@ miner: --neuron.name miner \ --wallet.name $(coldkey) \ --wallet.hotkey $(miner_hotkey) \ - --subtensor.chain_endpoint $(network) \ + --network $(network) \ --axon.port 30336 \ --netuid $(netuid) \ --logging.level $(logging_level) \ @@ -52,7 +52,7 @@ miner2: --neuron.name miner2 \ --wallet.name $(coldkey) \ --wallet.hotkey miner2 \ - --subtensor.chain_endpoint $(network) \ + --network $(network) \ --axon.port 30337 \ --netuid $(netuid) \ --logging.level $(logging_level) \ diff --git a/precog/validators/weight_setter.py b/precog/validators/weight_setter.py index eb2a4a4..86a0b89 100755 --- a/precog/validators/weight_setter.py +++ b/precog/validators/weight_setter.py @@ -47,7 +47,6 @@ def __init__(self, config=None, loop=None): self.current_block - self.node_query("SubtensorModule", "LastUpdate", [self.config.netuid])[self.my_uid] ) self.tempo = self.node_query("SubtensorModule", "Tempo", [self.config.netuid]) - bt.logging.debug(f"Config:{self.config}") if self.config.wandb_on: setup_wandb(self) self.stop_event = asyncio.Event() From a497a2701e8d28ae95fb74ee85fefefc2a19df8b Mon Sep 17 00:00:00 2001 From: hscott Date: Thu, 5 Dec 2024 13:20:35 -0500 Subject: [PATCH 11/35] changed how network is handled in config --- precog/utils/bittensor.py | 10 ++++------ precog/utils/general.py | 6 ++---- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/precog/utils/bittensor.py b/precog/utils/bittensor.py index 7903a33..2d976df 100644 --- a/precog/utils/bittensor.py +++ b/precog/utils/bittensor.py @@ -5,15 +5,13 @@ def setup_bittensor_objects(self): # if chain enpoint isn't set, use the network arg - if self.config.subtensor.chain_endpoint is None: - self.config.subtensor.chain_endpoint = bt.subtensor.determine_chain_endpoint_and_network( - self.config.subtensor.network - )[1] + if self.config.chain_endpoint is None: + self.config.chain_endpoint = bt.subtensor.determine_chain_endpoint_and_network(self.config.network)[1] else: # if chain endpoint is set, overwrite network arg - self.config.subtensor.network = self.config.subtensor.chain_endpoint + self.config.network = self.config.subtensor.chain_endpoint # Initialize subtensor. - self.subtensor = bt.subtensor(config=self.config, network=self.config.subtensor.network) + self.subtensor = bt.subtensor(config=self.config, network=self.config.network) self.metagraph = self.subtensor.metagraph(self.config.netuid) self.wallet = bt.wallet(name=self.config.wallet.name, hotkey=self.config.wallet.hotkey) self.dendrite = bt.dendrite(wallet=self.wallet) diff --git a/precog/utils/general.py b/precog/utils/general.py index 59b9fc2..af17e3e 100644 --- a/precog/utils/general.py +++ b/precog/utils/general.py @@ -27,11 +27,9 @@ def parse_arguments(parser: Optional[argparse.ArgumentParser] = None): """ if parser is None: parser = argparse.ArgumentParser(description="Configuration") + parser.add_argument("--chain_endpoint", type=str, default=None) # for testnet: wss://test.finney.opentensor.ai:443 parser.add_argument( - "--subtensor.chain_endpoint", type=str, default=None - ) # for testnet: wss://test.finney.opentensor.ai:443 - parser.add_argument( - "--subtensor.network", + "--network", choices=["finney", "test", "local"], default="finney", ) From 9313a46eb4998a41ac9449a18fcdd7e74f36a104 Mon Sep 17 00:00:00 2001 From: hscott Date: Thu, 5 Dec 2024 13:21:46 -0500 Subject: [PATCH 12/35] typo --- precog/utils/bittensor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/precog/utils/bittensor.py b/precog/utils/bittensor.py index 2d976df..42790bc 100644 --- a/precog/utils/bittensor.py +++ b/precog/utils/bittensor.py @@ -9,7 +9,7 @@ def setup_bittensor_objects(self): self.config.chain_endpoint = bt.subtensor.determine_chain_endpoint_and_network(self.config.network)[1] else: # if chain endpoint is set, overwrite network arg - self.config.network = self.config.subtensor.chain_endpoint + self.config.network = self.config.chain_endpoint # Initialize subtensor. self.subtensor = bt.subtensor(config=self.config, network=self.config.network) self.metagraph = self.subtensor.metagraph(self.config.netuid) From 603c9fa3b966d5f0de24eb28cff76ecbf8bfd8d3 Mon Sep 17 00:00:00 2001 From: hscott Date: Thu, 5 Dec 2024 13:27:22 -0500 Subject: [PATCH 13/35] reverted some changes --- Makefile | 6 +++--- precog/utils/bittensor.py | 10 ++++++---- precog/utils/general.py | 6 ++++-- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index bb1c0fc..44b4b24 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ localnet_netuid = 1 logging_level = trace # options= ['info', 'debug', 'trace'] netuid = $(testnet_netuid) -network = test +network = $(testnet) ## User Parameters coldkey = default @@ -29,7 +29,7 @@ validator: --neuron.name validator \ --wallet.name $(coldkey) \ --wallet.hotkey $(validator_hotkey) \ - --network $(network) \ + --subtensor.network $(network) \ --axon.port 30335 \ --netuid $(netuid) \ --logging.level $(logging_level) @@ -39,7 +39,7 @@ miner: --neuron.name miner \ --wallet.name $(coldkey) \ --wallet.hotkey $(miner_hotkey) \ - --network $(network) \ + --subtensor.network $(network) \ --axon.port 30336 \ --netuid $(netuid) \ --logging.level $(logging_level) \ diff --git a/precog/utils/bittensor.py b/precog/utils/bittensor.py index 42790bc..7903a33 100644 --- a/precog/utils/bittensor.py +++ b/precog/utils/bittensor.py @@ -5,13 +5,15 @@ def setup_bittensor_objects(self): # if chain enpoint isn't set, use the network arg - if self.config.chain_endpoint is None: - self.config.chain_endpoint = bt.subtensor.determine_chain_endpoint_and_network(self.config.network)[1] + if self.config.subtensor.chain_endpoint is None: + self.config.subtensor.chain_endpoint = bt.subtensor.determine_chain_endpoint_and_network( + self.config.subtensor.network + )[1] else: # if chain endpoint is set, overwrite network arg - self.config.network = self.config.chain_endpoint + self.config.subtensor.network = self.config.subtensor.chain_endpoint # Initialize subtensor. - self.subtensor = bt.subtensor(config=self.config, network=self.config.network) + self.subtensor = bt.subtensor(config=self.config, network=self.config.subtensor.network) self.metagraph = self.subtensor.metagraph(self.config.netuid) self.wallet = bt.wallet(name=self.config.wallet.name, hotkey=self.config.wallet.hotkey) self.dendrite = bt.dendrite(wallet=self.wallet) diff --git a/precog/utils/general.py b/precog/utils/general.py index af17e3e..59b9fc2 100644 --- a/precog/utils/general.py +++ b/precog/utils/general.py @@ -27,9 +27,11 @@ def parse_arguments(parser: Optional[argparse.ArgumentParser] = None): """ if parser is None: parser = argparse.ArgumentParser(description="Configuration") - parser.add_argument("--chain_endpoint", type=str, default=None) # for testnet: wss://test.finney.opentensor.ai:443 parser.add_argument( - "--network", + "--subtensor.chain_endpoint", type=str, default=None + ) # for testnet: wss://test.finney.opentensor.ai:443 + parser.add_argument( + "--subtensor.network", choices=["finney", "test", "local"], default="finney", ) From 6aa5ed1cebc3df36df81543786a5326d5cdebc5b Mon Sep 17 00:00:00 2001 From: hscott Date: Thu, 5 Dec 2024 13:28:40 -0500 Subject: [PATCH 14/35] fixed Makefile error --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 44b4b24..721c621 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ validator: --neuron.name validator \ --wallet.name $(coldkey) \ --wallet.hotkey $(validator_hotkey) \ - --subtensor.network $(network) \ + --subtensor.chain_endpoint $(network) \ --axon.port 30335 \ --netuid $(netuid) \ --logging.level $(logging_level) @@ -39,7 +39,7 @@ miner: --neuron.name miner \ --wallet.name $(coldkey) \ --wallet.hotkey $(miner_hotkey) \ - --subtensor.network $(network) \ + --subtensor.chain_endpoint $(network) \ --axon.port 30336 \ --netuid $(netuid) \ --logging.level $(logging_level) \ @@ -52,7 +52,7 @@ miner2: --neuron.name miner2 \ --wallet.name $(coldkey) \ --wallet.hotkey miner2 \ - --network $(network) \ + --subtensor.chain_endpoint $(network) \ --axon.port 30337 \ --netuid $(netuid) \ --logging.level $(logging_level) \ From 64c486a26d44f1cc6d50e933920026b4ac716f0d Mon Sep 17 00:00:00 2001 From: hscott Date: Thu, 5 Dec 2024 15:48:41 -0500 Subject: [PATCH 15/35] added try block for when node-query fails --- precog/utils/bittensor.py | 4 ++-- precog/validators/weight_setter.py | 11 +++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/precog/utils/bittensor.py b/precog/utils/bittensor.py index 7903a33..ef406b8 100644 --- a/precog/utils/bittensor.py +++ b/precog/utils/bittensor.py @@ -13,9 +13,9 @@ def setup_bittensor_objects(self): # if chain endpoint is set, overwrite network arg self.config.subtensor.network = self.config.subtensor.chain_endpoint # Initialize subtensor. - self.subtensor = bt.subtensor(config=self.config, network=self.config.subtensor.network) + self.subtensor = bt.subtensor(config=self.config) self.metagraph = self.subtensor.metagraph(self.config.netuid) - self.wallet = bt.wallet(name=self.config.wallet.name, hotkey=self.config.wallet.hotkey) + self.wallet = bt.wallet(config=self.config) self.dendrite = bt.dendrite(wallet=self.wallet) self.axon = bt.axon(wallet=self.wallet, config=self.config, port=self.config.axon.port) # Connect the validator to the network. diff --git a/precog/validators/weight_setter.py b/precog/validators/weight_setter.py index 86a0b89..d3f08fb 100755 --- a/precog/validators/weight_setter.py +++ b/precog/validators/weight_setter.py @@ -144,10 +144,13 @@ async def set_weights(self): "Failed to set weights this iteration with message:", msg, ) - self.current_block = self.subtensor.get_current_block() - self.blocks_since_last_update = ( - self.current_block - self.node_query("SubtensorModule", "LastUpdate", [self.config.netuid])[self.my_uid] - ) + try: + self.current_block = self.subtensor.get_current_block() + self.blocks_since_last_update = ( + self.current_block - self.node_query("SubtensorModule", "LastUpdate", [self.config.netuid])[self.my_uid] + ) + except Exception: + bt.logging.error("Failed to get current block, skipping block update") async def scheduled_prediction_request(self): if not hasattr(self, "timestamp"): From db6e44dac4c9e0606dc9640aa20bb7d0cdf0989a Mon Sep 17 00:00:00 2001 From: hscott Date: Thu, 5 Dec 2024 15:50:21 -0500 Subject: [PATCH 16/35] missing network info from subtensor call --- precog/utils/bittensor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/precog/utils/bittensor.py b/precog/utils/bittensor.py index ef406b8..29256de 100644 --- a/precog/utils/bittensor.py +++ b/precog/utils/bittensor.py @@ -13,7 +13,7 @@ def setup_bittensor_objects(self): # if chain endpoint is set, overwrite network arg self.config.subtensor.network = self.config.subtensor.chain_endpoint # Initialize subtensor. - self.subtensor = bt.subtensor(config=self.config) + self.subtensor = bt.subtensor(config=self.config, chain_endpoint=self.config.subtensor.chain_endpoint) self.metagraph = self.subtensor.metagraph(self.config.netuid) self.wallet = bt.wallet(config=self.config) self.dendrite = bt.dendrite(wallet=self.wallet) From 3b086787f254aa4d208b0d245bd2f3217101a682 Mon Sep 17 00:00:00 2001 From: hscott Date: Thu, 5 Dec 2024 15:56:39 -0500 Subject: [PATCH 17/35] set block update to before weight setting code in set_weights loop --- precog/utils/bittensor.py | 2 +- precog/validators/weight_setter.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/precog/utils/bittensor.py b/precog/utils/bittensor.py index 29256de..844ff70 100644 --- a/precog/utils/bittensor.py +++ b/precog/utils/bittensor.py @@ -13,7 +13,7 @@ def setup_bittensor_objects(self): # if chain endpoint is set, overwrite network arg self.config.subtensor.network = self.config.subtensor.chain_endpoint # Initialize subtensor. - self.subtensor = bt.subtensor(config=self.config, chain_endpoint=self.config.subtensor.chain_endpoint) + self.subtensor = bt.subtensor(config=self.config, network=self.config.subtensor.chain_endpoint) self.metagraph = self.subtensor.metagraph(self.config.netuid) self.wallet = bt.wallet(config=self.config) self.dendrite = bt.dendrite(wallet=self.wallet) diff --git a/precog/validators/weight_setter.py b/precog/validators/weight_setter.py index d3f08fb..a704bd1 100755 --- a/precog/validators/weight_setter.py +++ b/precog/validators/weight_setter.py @@ -116,6 +116,13 @@ def node_query(self, module, method, params): return result async def set_weights(self): + try: + self.current_block = self.subtensor.get_current_block() + self.blocks_since_last_update = ( + self.current_block - self.node_query("SubtensorModule", "LastUpdate", [self.config.netuid])[self.my_uid] + ) + except Exception: + bt.logging.error("Failed to get current block, skipping block update") if self.blocks_since_last_update >= self.set_weights_rate: uids = array(self.available_uids) weights = [self.moving_average_scores[uid] for uid in self.available_uids] @@ -144,13 +151,6 @@ async def set_weights(self): "Failed to set weights this iteration with message:", msg, ) - try: - self.current_block = self.subtensor.get_current_block() - self.blocks_since_last_update = ( - self.current_block - self.node_query("SubtensorModule", "LastUpdate", [self.config.netuid])[self.my_uid] - ) - except Exception: - bt.logging.error("Failed to get current block, skipping block update") async def scheduled_prediction_request(self): if not hasattr(self, "timestamp"): From fff5587d36686bc9db0a7c6b946117a881a861f8 Mon Sep 17 00:00:00 2001 From: hscott Date: Thu, 5 Dec 2024 15:58:31 -0500 Subject: [PATCH 18/35] added reinitializing subtensor to resync metagraph code --- precog/validators/weight_setter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/precog/validators/weight_setter.py b/precog/validators/weight_setter.py index a704bd1..6981c7e 100755 --- a/precog/validators/weight_setter.py +++ b/precog/validators/weight_setter.py @@ -80,6 +80,7 @@ async def resync_metagraph(self, force=False): """Resyncs the metagraph and updates the hotkeys and moving averages based on the new metagraph.""" self.blocks_since_sync = self.current_block - self.last_sync if self.blocks_since_sync >= self.resync_metagraph_rate or force: + self.subtensor = bt.subtensor(config=self.config, network=self.config.subtensor.chain_endpoint) bt.logging.info("Syncing Metagraph...") self.metagraph.sync(subtensor=self.subtensor) bt.logging.info("Metagraph updated, re-syncing hotkeys, dendrite pool and moving averages") From 781080f473125fdbb6f3350e5894b5baff62505f Mon Sep 17 00:00:00 2001 From: hscott Date: Thu, 5 Dec 2024 16:53:08 -0500 Subject: [PATCH 19/35] reduced set_weights and resync_metagraph cadence --- precog/validators/weight_setter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/precog/validators/weight_setter.py b/precog/validators/weight_setter.py index 6981c7e..61454dd 100755 --- a/precog/validators/weight_setter.py +++ b/precog/validators/weight_setter.py @@ -27,8 +27,8 @@ def __init__(self, config=None, loop=None): self.prediction_interval = self.config.prediction_interval # in minutes self.N_TIMEPOINTS = self.config.N_TIMEPOINTS # number of timepoints to predict self.last_sync = 0 - self.set_weights_rate = 100 # in blocks - self.resync_metagraph_rate = 20 # in blocks + self.set_weights_rate = 150 # in blocks + self.resync_metagraph_rate = 25 # in blocks bt.logging.info( f"Running validator for subnet: {self.config.netuid} on network: {self.config.subtensor.network}" ) From 2d616c34d01ea51a9670dae00595e6acd0eca7ce Mon Sep 17 00:00:00 2001 From: hscott Date: Fri, 6 Dec 2024 10:40:46 -0500 Subject: [PATCH 20/35] trying to catch and restart websocket closed exceptions --- precog/validators/validator.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/precog/validators/validator.py b/precog/validators/validator.py index d09c2c2..79aae3c 100755 --- a/precog/validators/validator.py +++ b/precog/validators/validator.py @@ -2,6 +2,7 @@ from pathlib import Path import bittensor as bt +import websocket from precog.utils.classes import Config from precog.utils.general import parse_arguments @@ -27,6 +28,9 @@ async def main(self): except BrokenPipeError: bt.logging.error("Recieved a Broken Pipe substrate error") asyncio.run(self.reset_instance()) + except websocket._exceptions.WebSocketConnectionClosedException: + bt.logging.error("Recieved a websocket closed error, restarting validator") + asyncio.run(self.reset_instance()) except Exception as e: bt.logging.error(f"Unhandled exception: {e}") finally: From 813cc6c7d4e05b533e22bf29fc0d8af34e9c7212 Mon Sep 17 00:00:00 2001 From: hscott Date: Fri, 6 Dec 2024 11:40:43 -0500 Subject: [PATCH 21/35] added return_exceptions to asyncio gather --- precog/utils/general.py | 2 +- precog/validators/weight_setter.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/precog/utils/general.py b/precog/utils/general.py index 59b9fc2..91bcfb7 100644 --- a/precog/utils/general.py +++ b/precog/utils/general.py @@ -117,7 +117,7 @@ async def loop_handler(self, func, sleep_time=120): raise except Exception as e: bt.logging.error(f"{func.__name__} raised error: {e}") - raise + raise e finally: async with self.lock: self.stop_event.set() diff --git a/precog/validators/weight_setter.py b/precog/validators/weight_setter.py index 61454dd..8df0458 100755 --- a/precog/validators/weight_setter.py +++ b/precog/validators/weight_setter.py @@ -63,7 +63,7 @@ def __exit__(self, exc_type, exc_value, traceback): pending = asyncio.all_tasks(self.loop) for task in pending: task.cancel() - asyncio.gather(*pending) + asyncio.gather(*pending, return_exceptions=True) except Exception as e: bt.logging.error(f"Error on __exit__ function: {e}") self.loop.stop() From c9677f1dc1a3b3d4cb7d32aade201b24130c6839 Mon Sep 17 00:00:00 2001 From: hscott Date: Fri, 6 Dec 2024 11:57:08 -0500 Subject: [PATCH 22/35] moved error handling to weight_setter --- precog/validators/validator.py | 15 --------------- precog/validators/weight_setter.py | 24 +++++++++++++++++++----- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/precog/validators/validator.py b/precog/validators/validator.py index 79aae3c..b8bf9c3 100755 --- a/precog/validators/validator.py +++ b/precog/validators/validator.py @@ -1,9 +1,6 @@ import asyncio from pathlib import Path -import bittensor as bt -import websocket - from precog.utils.classes import Config from precog.utils.general import parse_arguments from precog.validators.weight_setter import weight_setter @@ -23,18 +20,6 @@ def __init__(self): async def main(self): loop = asyncio.get_event_loop() self.weight_setter = weight_setter(config=self.config, loop=loop) - try: - loop.run_forever() - except BrokenPipeError: - bt.logging.error("Recieved a Broken Pipe substrate error") - asyncio.run(self.reset_instance()) - except websocket._exceptions.WebSocketConnectionClosedException: - bt.logging.error("Recieved a websocket closed error, restarting validator") - asyncio.run(self.reset_instance()) - except Exception as e: - bt.logging.error(f"Unhandled exception: {e}") - finally: - bt.logging.info("Exiting Validator") async def reset_instance(self): self.__init__() diff --git a/precog/validators/weight_setter.py b/precog/validators/weight_setter.py index 8df0458..7f2b3b9 100755 --- a/precog/validators/weight_setter.py +++ b/precog/validators/weight_setter.py @@ -56,6 +56,11 @@ def __init__(self, config=None, loop=None): ) self.loop.create_task(loop_handler(self, self.resync_metagraph, sleep_time=self.resync_metagraph_rate)) self.loop.create_task(loop_handler(self, self.set_weights, sleep_time=self.set_weights_rate)) + try: + self.loop.run_forever() + except Exception as e: + bt.logging.error(f"Error on loop: {e}") + self.__reset_instance__() def __exit__(self, exc_type, exc_value, traceback): self.save_state() @@ -63,10 +68,15 @@ def __exit__(self, exc_type, exc_value, traceback): pending = asyncio.all_tasks(self.loop) for task in pending: task.cancel() - asyncio.gather(*pending, return_exceptions=True) except Exception as e: bt.logging.error(f"Error on __exit__ function: {e}") - self.loop.stop() + finally: + asyncio.gather(*pending, return_exceptions=True) + self.loop.stop() + + def __reset_instance__(self): + self.__exit__(None, None, None) + self.__init__(self.config, self.loop) async def get_available_uids(self): miner_uids = [] @@ -119,9 +129,7 @@ def node_query(self, module, method, params): async def set_weights(self): try: self.current_block = self.subtensor.get_current_block() - self.blocks_since_last_update = ( - self.current_block - self.node_query("SubtensorModule", "LastUpdate", [self.config.netuid])[self.my_uid] - ) + self.blocks_since_last_update = self.current_block - self.last_update except Exception: bt.logging.error("Failed to get current block, skipping block update") if self.blocks_since_last_update >= self.set_weights_rate: @@ -147,6 +155,12 @@ async def set_weights(self): ) if result: bt.logging.success("✅ Set Weights on chain successfully!") + try: + self.last_update = self.node_query("SubtensorModule", "LastUpdate", [self.config.netuid])[ + self.my_uid + ] + except Exception: + pass else: bt.logging.debug( "Failed to set weights this iteration with message:", From 0b3467ee9ec50e0a251212adcc0ce2f2371c7d8c Mon Sep 17 00:00:00 2001 From: hscott Date: Fri, 6 Dec 2024 12:11:07 -0500 Subject: [PATCH 23/35] removed node_query function --- precog/validators/weight_setter.py | 30 ++++++++---------------------- 1 file changed, 8 insertions(+), 22 deletions(-) diff --git a/precog/validators/weight_setter.py b/precog/validators/weight_setter.py index 7f2b3b9..219f26a 100755 --- a/precog/validators/weight_setter.py +++ b/precog/validators/weight_setter.py @@ -3,9 +3,9 @@ import pickle import bittensor as bt +import websocket from numpy import array from pytz import timezone -from substrateinterface import SubstrateInterface from precog import __spec_version__ from precog.protocol import Challenge @@ -41,12 +41,10 @@ def __init__(self, config=None, loop=None): self.save_state() else: self.load_state() - self.node = SubstrateInterface(url=self.config.subtensor.chain_endpoint) self.current_block = self.subtensor.get_current_block() - self.blocks_since_last_update = ( - self.current_block - self.node_query("SubtensorModule", "LastUpdate", [self.config.netuid])[self.my_uid] + self.blocks_since_last_update = self.subtensor.blocks_since_last_update( + neutid=self.config.netuid, uid=self.my_uid ) - self.tempo = self.node_query("SubtensorModule", "Tempo", [self.config.netuid]) if self.config.wandb_on: setup_wandb(self) self.stop_event = asyncio.Event() @@ -58,9 +56,10 @@ def __init__(self, config=None, loop=None): self.loop.create_task(loop_handler(self, self.set_weights, sleep_time=self.set_weights_rate)) try: self.loop.run_forever() + except websocket._exceptions.WebSocketConnectionClosedException: + self.__reset_instance__() except Exception as e: bt.logging.error(f"Error on loop: {e}") - self.__reset_instance__() def __exit__(self, exc_type, exc_value, traceback): self.save_state() @@ -117,19 +116,12 @@ def query_miners(self): ) return responses, timestamp - def node_query(self, module, method, params): - try: - result = self.node.query(module, method, params).value - except Exception: - # reinitilize node - self.node = SubstrateInterface(url=self.subtensor.chain_endpoint) - result = self.node.query(module, method, params).value - return result - async def set_weights(self): try: self.current_block = self.subtensor.get_current_block() - self.blocks_since_last_update = self.current_block - self.last_update + self.blocks_since_last_update = self.subtensor.blocks_since_last_update( + neutid=self.config.netuid, uid=self.my_uid + ) except Exception: bt.logging.error("Failed to get current block, skipping block update") if self.blocks_since_last_update >= self.set_weights_rate: @@ -155,12 +147,6 @@ async def set_weights(self): ) if result: bt.logging.success("✅ Set Weights on chain successfully!") - try: - self.last_update = self.node_query("SubtensorModule", "LastUpdate", [self.config.netuid])[ - self.my_uid - ] - except Exception: - pass else: bt.logging.debug( "Failed to set weights this iteration with message:", From ca2b7b35d415b548314a20df7366b8737eab0ec8 Mon Sep 17 00:00:00 2001 From: hscott Date: Fri, 6 Dec 2024 12:12:43 -0500 Subject: [PATCH 24/35] typo --- precog/validators/weight_setter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/precog/validators/weight_setter.py b/precog/validators/weight_setter.py index 219f26a..c8c4dff 100755 --- a/precog/validators/weight_setter.py +++ b/precog/validators/weight_setter.py @@ -43,7 +43,7 @@ def __init__(self, config=None, loop=None): self.load_state() self.current_block = self.subtensor.get_current_block() self.blocks_since_last_update = self.subtensor.blocks_since_last_update( - neutid=self.config.netuid, uid=self.my_uid + netuid=self.config.netuid, uid=self.my_uid ) if self.config.wandb_on: setup_wandb(self) @@ -120,7 +120,7 @@ async def set_weights(self): try: self.current_block = self.subtensor.get_current_block() self.blocks_since_last_update = self.subtensor.blocks_since_last_update( - neutid=self.config.netuid, uid=self.my_uid + netuid=self.config.netuid, uid=self.my_uid ) except Exception: bt.logging.error("Failed to get current block, skipping block update") From 4a4cb8c0fac6c4d27a428e6e1e51ffc27f82787d Mon Sep 17 00:00:00 2001 From: hscott Date: Fri, 6 Dec 2024 12:19:05 -0500 Subject: [PATCH 25/35] simplified block handling code --- precog/validators/weight_setter.py | 38 +++++++++++++----------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/precog/validators/weight_setter.py b/precog/validators/weight_setter.py index c8c4dff..c8de0eb 100755 --- a/precog/validators/weight_setter.py +++ b/precog/validators/weight_setter.py @@ -28,7 +28,7 @@ def __init__(self, config=None, loop=None): self.N_TIMEPOINTS = self.config.N_TIMEPOINTS # number of timepoints to predict self.last_sync = 0 self.set_weights_rate = 150 # in blocks - self.resync_metagraph_rate = 25 # in blocks + self.resync_metagraph_rate = 600 # in seconds bt.logging.info( f"Running validator for subnet: {self.config.netuid} on network: {self.config.subtensor.network}" ) @@ -41,7 +41,6 @@ def __init__(self, config=None, loop=None): self.save_state() else: self.load_state() - self.current_block = self.subtensor.get_current_block() self.blocks_since_last_update = self.subtensor.blocks_since_last_update( netuid=self.config.netuid, uid=self.my_uid ) @@ -57,6 +56,7 @@ def __init__(self, config=None, loop=None): try: self.loop.run_forever() except websocket._exceptions.WebSocketConnectionClosedException: + bt.logging.info("Caught websocket connection closed exception") self.__reset_instance__() except Exception as e: bt.logging.error(f"Error on loop: {e}") @@ -85,25 +85,22 @@ async def get_available_uids(self): miner_uids.append(uid) return miner_uids - async def resync_metagraph(self, force=False): + async def resync_metagraph(self): """Resyncs the metagraph and updates the hotkeys and moving averages based on the new metagraph.""" - self.blocks_since_sync = self.current_block - self.last_sync - if self.blocks_since_sync >= self.resync_metagraph_rate or force: - self.subtensor = bt.subtensor(config=self.config, network=self.config.subtensor.chain_endpoint) - bt.logging.info("Syncing Metagraph...") - self.metagraph.sync(subtensor=self.subtensor) - bt.logging.info("Metagraph updated, re-syncing hotkeys, dendrite pool and moving averages") - # Zero out all hotkeys that have been replaced. - self.available_uids = asyncio.run(self.get_available_uids()) - for uid, hotkey in enumerate(self.metagraph.hotkeys): - if (uid not in self.MinerHistory and uid in self.available_uids) or self.hotkeys[uid] != hotkey: - bt.logging.info(f"Replacing hotkey on {uid} with {self.metagraph.hotkeys[uid]}") - self.hotkeys[uid] = hotkey - self.scores[uid] = 0 # hotkey has been replaced - self.MinerHistory[uid] = MinerHistory(uid, timezone=self.timezone) - self.moving_average_scores[uid] = 0 - self.last_sync = self.subtensor.get_current_block() - self.save_state() + self.subtensor = bt.subtensor(config=self.config, network=self.config.subtensor.chain_endpoint) + bt.logging.info("Syncing Metagraph...") + self.metagraph.sync(subtensor=self.subtensor) + bt.logging.info("Metagraph updated, re-syncing hotkeys, dendrite pool and moving averages") + # Zero out all hotkeys that have been replaced. + self.available_uids = asyncio.run(self.get_available_uids()) + for uid, hotkey in enumerate(self.metagraph.hotkeys): + if (uid not in self.MinerHistory and uid in self.available_uids) or self.hotkeys[uid] != hotkey: + bt.logging.info(f"Replacing hotkey on {uid} with {self.metagraph.hotkeys[uid]}") + self.hotkeys[uid] = hotkey + self.scores[uid] = 0 # hotkey has been replaced + self.MinerHistory[uid] = MinerHistory(uid, timezone=self.timezone) + self.moving_average_scores[uid] = 0 + self.save_state() def query_miners(self): timestamp = get_now().isoformat() @@ -118,7 +115,6 @@ def query_miners(self): async def set_weights(self): try: - self.current_block = self.subtensor.get_current_block() self.blocks_since_last_update = self.subtensor.blocks_since_last_update( netuid=self.config.netuid, uid=self.my_uid ) From ed937b4b5689fd613fc6d9a9ce57123cc113aa27 Mon Sep 17 00:00:00 2001 From: hscott Date: Fri, 6 Dec 2024 12:22:16 -0500 Subject: [PATCH 26/35] now updates current block on calls to set_weights --- precog/validators/weight_setter.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/precog/validators/weight_setter.py b/precog/validators/weight_setter.py index c8de0eb..7d07e20 100755 --- a/precog/validators/weight_setter.py +++ b/precog/validators/weight_setter.py @@ -27,7 +27,7 @@ def __init__(self, config=None, loop=None): self.prediction_interval = self.config.prediction_interval # in minutes self.N_TIMEPOINTS = self.config.N_TIMEPOINTS # number of timepoints to predict self.last_sync = 0 - self.set_weights_rate = 150 # in blocks + self.set_weights_rate = 100 # in blocks self.resync_metagraph_rate = 600 # in seconds bt.logging.info( f"Running validator for subnet: {self.config.netuid} on network: {self.config.subtensor.network}" @@ -41,6 +41,7 @@ def __init__(self, config=None, loop=None): self.save_state() else: self.load_state() + self.current_block = self.subtensor.get_current_block() self.blocks_since_last_update = self.subtensor.blocks_since_last_update( netuid=self.config.netuid, uid=self.my_uid ) @@ -118,6 +119,7 @@ async def set_weights(self): self.blocks_since_last_update = self.subtensor.blocks_since_last_update( netuid=self.config.netuid, uid=self.my_uid ) + self.current_block = self.subtensor.get_current_block() except Exception: bt.logging.error("Failed to get current block, skipping block update") if self.blocks_since_last_update >= self.set_weights_rate: From 9573841332bc62127cdc8852c6c764a63499704f Mon Sep 17 00:00:00 2001 From: hscott Date: Fri, 6 Dec 2024 12:36:14 -0500 Subject: [PATCH 27/35] resets blocks_since_last_update on successful set weights --- precog/validators/weight_setter.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/precog/validators/weight_setter.py b/precog/validators/weight_setter.py index 7d07e20..097cf3c 100755 --- a/precog/validators/weight_setter.py +++ b/precog/validators/weight_setter.py @@ -120,8 +120,8 @@ async def set_weights(self): netuid=self.config.netuid, uid=self.my_uid ) self.current_block = self.subtensor.get_current_block() - except Exception: - bt.logging.error("Failed to get current block, skipping block update") + except Exception as e: + bt.logging.error(f"Failed to get current block with error {e}, skipping block update") if self.blocks_since_last_update >= self.set_weights_rate: uids = array(self.available_uids) weights = [self.moving_average_scores[uid] for uid in self.available_uids] @@ -145,6 +145,7 @@ async def set_weights(self): ) if result: bt.logging.success("✅ Set Weights on chain successfully!") + self.blocks_since_last_update = 0 else: bt.logging.debug( "Failed to set weights this iteration with message:", From 2908deaf601d9957de7948bbb1738f4b8a7968ba Mon Sep 17 00:00:00 2001 From: hscott Date: Fri, 6 Dec 2024 13:06:19 -0500 Subject: [PATCH 28/35] now pulls weights_rate_limit from subnet hyperparams instead of setting own value --- precog/utils/general.py | 19 +++++++++++++++++-- precog/validators/weight_setter.py | 9 +++++---- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/precog/utils/general.py b/precog/utils/general.py index 91bcfb7..8e4f4e7 100644 --- a/precog/utils/general.py +++ b/precog/utils/general.py @@ -1,7 +1,7 @@ import argparse import asyncio import re -from typing import Optional +from typing import Any, Callable, Optional import bittensor as bt import git @@ -104,7 +104,7 @@ def rank(vector): return rank_vector -async def loop_handler(self, func, sleep_time=120): +async def loop_handler(self, func: Callable, sleep_time: float = 120): try: while not self.stop_event.is_set(): async with self.lock: @@ -121,3 +121,18 @@ async def loop_handler(self, func, sleep_time=120): finally: async with self.lock: self.stop_event.set() + + +def func_with_retry(func: Callable, max_attempts: int = 3, delay: float = 2, *args, **kwargs) -> Any: + attempt = 0 + while attempt < max_attempts: + try: + result = func(*args, **kwargs) + return result + except Exception as e: + attempt += 1 + print(f"Attempt {attempt} failed with error: {e}") + if attempt == max_attempts: + raise + else: + print(f"Retrying... (Attempt {attempt + 1})") diff --git a/precog/validators/weight_setter.py b/precog/validators/weight_setter.py index 097cf3c..8cb881e 100755 --- a/precog/validators/weight_setter.py +++ b/precog/validators/weight_setter.py @@ -11,7 +11,7 @@ from precog.protocol import Challenge from precog.utils.bittensor import check_uid_availability, print_info, setup_bittensor_objects from precog.utils.classes import MinerHistory -from precog.utils.general import loop_handler +from precog.utils.general import func_with_retry, loop_handler from precog.utils.timestamp import elapsed_seconds, get_before, get_now, is_query_time, iso8601_to_datetime from precog.utils.wandb import log_wandb, setup_wandb from precog.validators.reward import calc_rewards @@ -26,8 +26,8 @@ def __init__(self, config=None, loop=None): self.timezone = timezone("UTC") self.prediction_interval = self.config.prediction_interval # in minutes self.N_TIMEPOINTS = self.config.N_TIMEPOINTS # number of timepoints to predict + self.hyperparams = func_with_retry(self.subtensor.get_subnet_hyperparameters, netuid=self.config.netuid) self.last_sync = 0 - self.set_weights_rate = 100 # in blocks self.resync_metagraph_rate = 600 # in seconds bt.logging.info( f"Running validator for subnet: {self.config.netuid} on network: {self.config.subtensor.network}" @@ -53,10 +53,11 @@ def __init__(self, config=None, loop=None): loop_handler(self, self.scheduled_prediction_request, sleep_time=self.config.print_cadence) ) self.loop.create_task(loop_handler(self, self.resync_metagraph, sleep_time=self.resync_metagraph_rate)) - self.loop.create_task(loop_handler(self, self.set_weights, sleep_time=self.set_weights_rate)) + self.loop.create_task(loop_handler(self, self.set_weights, sleep_time=self.hyperparameters.weights_rate_limit)) try: self.loop.run_forever() except websocket._exceptions.WebSocketConnectionClosedException: + # TODO: Exceptions are not being caught in this loop bt.logging.info("Caught websocket connection closed exception") self.__reset_instance__() except Exception as e: @@ -122,7 +123,7 @@ async def set_weights(self): self.current_block = self.subtensor.get_current_block() except Exception as e: bt.logging.error(f"Failed to get current block with error {e}, skipping block update") - if self.blocks_since_last_update >= self.set_weights_rate: + if self.blocks_since_last_update >= self.hyperparameters.weights_rate_limit: uids = array(self.available_uids) weights = [self.moving_average_scores[uid] for uid in self.available_uids] for i, j in zip(weights, self.available_uids): From 855cd94656cb3bcfb689026ba67c52e5bf05c306 Mon Sep 17 00:00:00 2001 From: hscott Date: Fri, 6 Dec 2024 13:07:26 -0500 Subject: [PATCH 29/35] typo --- precog/validators/weight_setter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/precog/validators/weight_setter.py b/precog/validators/weight_setter.py index 8cb881e..0921933 100755 --- a/precog/validators/weight_setter.py +++ b/precog/validators/weight_setter.py @@ -26,7 +26,7 @@ def __init__(self, config=None, loop=None): self.timezone = timezone("UTC") self.prediction_interval = self.config.prediction_interval # in minutes self.N_TIMEPOINTS = self.config.N_TIMEPOINTS # number of timepoints to predict - self.hyperparams = func_with_retry(self.subtensor.get_subnet_hyperparameters, netuid=self.config.netuid) + self.hyperparameters = func_with_retry(self.subtensor.get_subnet_hyperparameters, netuid=self.config.netuid) self.last_sync = 0 self.resync_metagraph_rate = 600 # in seconds bt.logging.info( From 790e359113020d6ca8ded2c5bfc47c90a3c17158 Mon Sep 17 00:00:00 2001 From: hscott Date: Fri, 6 Dec 2024 13:08:42 -0500 Subject: [PATCH 30/35] missed assignment in utils function --- precog/utils/bittensor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/precog/utils/bittensor.py b/precog/utils/bittensor.py index 844ff70..890e390 100644 --- a/precog/utils/bittensor.py +++ b/precog/utils/bittensor.py @@ -46,7 +46,7 @@ def setup_bittensor_objects(self): def print_info(self) -> None: if self.config.neuron.type == "Validator": - weight_timing = self.set_weights_rate - self.blocks_since_last_update + weight_timing = self.hyperparameters.weights_rate_limit - self.blocks_since_last_update if weight_timing <= 0: weight_timing = "a few" # hashtag aesthetic af log = ( From 756fda16e1763c3211ac38e9e3bb965516be5cec Mon Sep 17 00:00:00 2001 From: hscott Date: Fri, 6 Dec 2024 13:10:08 -0500 Subject: [PATCH 31/35] unused variable removed --- precog/validators/weight_setter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/precog/validators/weight_setter.py b/precog/validators/weight_setter.py index 0921933..ecd0b80 100755 --- a/precog/validators/weight_setter.py +++ b/precog/validators/weight_setter.py @@ -27,7 +27,6 @@ def __init__(self, config=None, loop=None): self.prediction_interval = self.config.prediction_interval # in minutes self.N_TIMEPOINTS = self.config.N_TIMEPOINTS # number of timepoints to predict self.hyperparameters = func_with_retry(self.subtensor.get_subnet_hyperparameters, netuid=self.config.netuid) - self.last_sync = 0 self.resync_metagraph_rate = 600 # in seconds bt.logging.info( f"Running validator for subnet: {self.config.netuid} on network: {self.config.subtensor.network}" From 64d4841d8373d601e80d1a330c59331a508e33ba Mon Sep 17 00:00:00 2001 From: hscott Date: Fri, 6 Dec 2024 13:21:35 -0500 Subject: [PATCH 32/35] added retry wrapper to get_block calls in set_weights --- precog/validators/weight_setter.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/precog/validators/weight_setter.py b/precog/validators/weight_setter.py index ecd0b80..56de5ef 100755 --- a/precog/validators/weight_setter.py +++ b/precog/validators/weight_setter.py @@ -24,8 +24,6 @@ def __init__(self, config=None, loop=None): self.lock = asyncio.Lock() setup_bittensor_objects(self) self.timezone = timezone("UTC") - self.prediction_interval = self.config.prediction_interval # in minutes - self.N_TIMEPOINTS = self.config.N_TIMEPOINTS # number of timepoints to predict self.hyperparameters = func_with_retry(self.subtensor.get_subnet_hyperparameters, netuid=self.config.netuid) self.resync_metagraph_rate = 600 # in seconds bt.logging.info( @@ -61,6 +59,8 @@ def __init__(self, config=None, loop=None): self.__reset_instance__() except Exception as e: bt.logging.error(f"Error on loop: {e}") + finally: + self.__exit__(None, None, None) def __exit__(self, exc_type, exc_value, traceback): self.save_state() @@ -116,10 +116,10 @@ def query_miners(self): async def set_weights(self): try: - self.blocks_since_last_update = self.subtensor.blocks_since_last_update( - netuid=self.config.netuid, uid=self.my_uid + self.blocks_since_last_update = func_with_retry( + self.subtensor.blocks_since_last_update, netuid=self.config.netuid, uid=self.my_uid ) - self.current_block = self.subtensor.get_current_block() + self.current_block = func_with_retry(self.subtensor.get_current_block) except Exception as e: bt.logging.error(f"Failed to get current block with error {e}, skipping block update") if self.blocks_since_last_update >= self.hyperparameters.weights_rate_limit: From d4c58ec1e3ecaba66940a7694813d119c20c246e Mon Sep 17 00:00:00 2001 From: hscott Date: Fri, 6 Dec 2024 13:24:03 -0500 Subject: [PATCH 33/35] added delay functionality to func_with_retry --- precog/utils/general.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/precog/utils/general.py b/precog/utils/general.py index 8e4f4e7..52352c6 100644 --- a/precog/utils/general.py +++ b/precog/utils/general.py @@ -1,6 +1,7 @@ import argparse import asyncio import re +import time from typing import Any, Callable, Optional import bittensor as bt @@ -123,7 +124,7 @@ async def loop_handler(self, func: Callable, sleep_time: float = 120): self.stop_event.set() -def func_with_retry(func: Callable, max_attempts: int = 3, delay: float = 2, *args, **kwargs) -> Any: +def func_with_retry(func: Callable, max_attempts: int = 3, delay: float = 1, *args, **kwargs) -> Any: attempt = 0 while attempt < max_attempts: try: @@ -135,4 +136,5 @@ def func_with_retry(func: Callable, max_attempts: int = 3, delay: float = 2, *ar if attempt == max_attempts: raise else: - print(f"Retrying... (Attempt {attempt + 1})") + print(f"Retrying... (Attempt {attempt + 1}) in {delay} seconds") + time.sleep(delay) From ef5f00b6c0aff39d56de8db288c8ceecec1ec979 Mon Sep 17 00:00:00 2001 From: hscott Date: Fri, 6 Dec 2024 13:25:29 -0500 Subject: [PATCH 34/35] missing variable --- precog/validators/weight_setter.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/precog/validators/weight_setter.py b/precog/validators/weight_setter.py index 56de5ef..32d2fb9 100755 --- a/precog/validators/weight_setter.py +++ b/precog/validators/weight_setter.py @@ -24,6 +24,8 @@ def __init__(self, config=None, loop=None): self.lock = asyncio.Lock() setup_bittensor_objects(self) self.timezone = timezone("UTC") + self.prediction_interval = self.config.prediction_interval # in seconds + self.N_TIMEPOINTS = self.config.N_TIMEPOINTS # number of timepoints to predict self.hyperparameters = func_with_retry(self.subtensor.get_subnet_hyperparameters, netuid=self.config.netuid) self.resync_metagraph_rate = 600 # in seconds bt.logging.info( From 189062be08cf1720be6782c019e3c8701e1b542b Mon Sep 17 00:00:00 2001 From: hscott Date: Fri, 6 Dec 2024 13:32:50 -0500 Subject: [PATCH 35/35] changed print statements to bt.logging statements --- precog/utils/general.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/precog/utils/general.py b/precog/utils/general.py index 52352c6..11e6485 100644 --- a/precog/utils/general.py +++ b/precog/utils/general.py @@ -132,9 +132,9 @@ def func_with_retry(func: Callable, max_attempts: int = 3, delay: float = 1, *ar return result except Exception as e: attempt += 1 - print(f"Attempt {attempt} failed with error: {e}") + bt.logging.debug(f"Function {func} failed: Attempt {attempt} of {max_attempts} with error: {e}") if attempt == max_attempts: + bt.logging.error(f"Function {func} failed {max_attempts} times, skipping.") raise else: - print(f"Retrying... (Attempt {attempt + 1}) in {delay} seconds") time.sleep(delay)