generated from opentensor/bittensor-subnet-template
-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathstart.sh
executable file
·393 lines (338 loc) · 13 KB
/
start.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
#!/bin/bash
set -e
# Source .env if it exists
[ -f .env ] && source .env
# Basic setup
SUBTENSOR_NETWORK=${SUBTENSOR_NETWORK}
NETUID=${NETUID}
# Set default counts if not provided
VALIDATOR_COUNT=${VALIDATOR_COUNT:-0}
MINER_COUNT=${MINER_COUNT:-1}
ENABLE_BOOTNODE=${ENABLE_BOOTNODE:-false}
ORACLE_WORKER_COUNT=${ORACLE_WORKER_COUNT:-0}
TEE_WORKER_COUNT=${TEE_WORKER_COUNT:-0}
# Image configuration
BITTENSOR_IMAGE=${BITTENSOR_IMAGE:-"masaengineering/masa-bittensor:latest"}
ORACLE_IMAGE=${ORACLE_IMAGE:-"masaengineering/oracle:latest"}
TEE_WORKER_IMAGE=${TEE_WORKER_IMAGE:-"masaengineering/tee-worker:latest"}
# Get the host IP address
HOST_IP=$(hostname -I | awk '{print $1}')
echo "Host IP address: $HOST_IP"
echo "Starting nodes for network: $SUBTENSOR_NETWORK (subnet $NETUID)"
echo "Validator count: $VALIDATOR_COUNT"
echo "Miner count: $MINER_COUNT"
echo "Enable bootnode: $ENABLE_BOOTNODE"
echo "Oracle worker count: $ORACLE_WORKER_COUNT"
echo "TEE Worker count: $TEE_WORKER_COUNT"
echo "Using Bittensor image: $BITTENSOR_IMAGE"
echo "Using Oracle image: $ORACLE_IMAGE"
echo "Using TEE Worker image: $TEE_WORKER_IMAGE"
# Pull latest images
echo "Pulling latest images..."
docker pull $BITTENSOR_IMAGE
if [ "$ENABLE_BOOTNODE" = "true" ] || [ "$ORACLE_WORKER_COUNT" -gt 0 ]; then
docker pull $ORACLE_IMAGE
fi
[ "$TEE_WORKER_COUNT" -gt 0 ] && docker pull $TEE_WORKER_IMAGE
# Create necessary directories if they don't exist
mkdir -p .bittensor
chmod 777 .bittensor
if [ "$ENABLE_BOOTNODE" = "true" ] || [ "$ORACLE_WORKER_COUNT" -gt 0 ]; then
mkdir -p .masa-bootnode
chmod 777 .masa-bootnode
if [ "$ORACLE_WORKER_COUNT" -gt 0 ]; then
mkdir -p .masa-worker
chmod 777 .masa-worker
fi
fi
# Base ports - use environment variables with defaults
VALIDATOR_PORT=${VALIDATOR_PORT:-8091}
VALIDATOR_METRICS_PORT=${VALIDATOR_METRICS_PORT:-8881}
VALIDATOR_GRAFANA_PORT=${VALIDATOR_GRAFANA_PORT:-3001}
MINER_PORT=${MINER_PORT:-8092}
MINER_METRICS_PORT=${MINER_METRICS_PORT:-8882}
MINER_GRAFANA_PORT=${MINER_GRAFANA_PORT:-3002}
BOOTNODE_PORT=${BOOTNODE_PORT:-18201}
BOOTNODE_METRICS_PORT=${BOOTNODE_METRICS_PORT:-8893}
BOOTNODE_GRAFANA_PORT=${BOOTNODE_GRAFANA_PORT:-3103}
ORACLE_WORKER_PORT=${ORACLE_WORKER_PORT:-18202}
ORACLE_WORKER_METRICS_PORT=${ORACLE_WORKER_METRICS_PORT:-8894}
ORACLE_WORKER_GRAFANA_PORT=${ORACLE_WORKER_GRAFANA_PORT:-3104}
TEE_WORKER_PORT=${TEE_WORKER_PORT:-8095}
TEE_WORKER_METRICS_PORT=${TEE_WORKER_METRICS_PORT:-8885}
TEE_WORKER_GRAFANA_PORT=${TEE_WORKER_GRAFANA_PORT:-3005}
# Function to check if a port is available
check_port() {
local port=$1
if command -v nc >/dev/null 2>&1; then
nc -z localhost $port >/dev/null 2>&1
if [ $? -eq 0 ]; then
return 1 # Port is in use
fi
else
# Fallback to using /dev/tcp if nc is not available
(echo >/dev/tcp/localhost/$port) >/dev/null 2>&1
if [ $? -eq 0 ]; then
return 1 # Port is in use
fi
fi
return 0 # Port is available
}
# Function to start a bittensor node (validator or miner)
start_node() {
local role=$1
local instance_num=$2
local base_port=$3
local base_metrics_port=$4
local base_grafana_port=$5
# Calculate ports for this instance
local port=$((base_port + instance_num - 1))
local metrics_port=$((base_metrics_port + instance_num - 1))
local grafana_port=$((base_grafana_port + instance_num - 1))
# Generate wallet and hotkey names for this instance
local wallet_name="subnet_${NETUID}"
local hotkey_name="${role}_${instance_num}"
echo "Starting $role $instance_num with ports:"
echo " Port: $port"
echo " Metrics: $metrics_port"
echo " Grafana: $grafana_port"
echo " Using wallet: $wallet_name"
echo " Using hotkey: $hotkey_name"
# Check if ports are available
if ! check_port $port || ! check_port $metrics_port || ! check_port $grafana_port; then
echo "Error: One or more ports are already in use for $role $instance_num"
exit 1
fi
# Set role-specific environment variables and image
case "$role" in
"validator")
ENV_VARS="-e VALIDATOR_PORT=$port -e VALIDATOR_METRICS_PORT=$metrics_port -e VALIDATOR_GRAFANA_PORT=$grafana_port -e VALIDATOR_AXON_PORT=$port"
IMAGE=$BITTENSOR_IMAGE
;;
"tee-worker")
ENV_VARS="-e TEE_WORKER_PORT=$port -e TEE_WORKER_METRICS_PORT=$metrics_port -e TEE_WORKER_GRAFANA_PORT=$grafana_port"
IMAGE=$TEE_WORKER_IMAGE
;;
*) # miner
ENV_VARS="-e MINER_PORT=$port -e MINER_METRICS_PORT=$metrics_port -e MINER_GRAFANA_PORT=$grafana_port -e MINER_AXON_PORT=$port"
IMAGE=$BITTENSOR_IMAGE
;;
esac
# Launch bittensor nodes with host networking
docker run -d \
--name "masa_${role}_${instance_num}" \
--network host \
--env-file .env \
-e ROLE=$role \
-e NETUID=$NETUID \
-e SUBTENSOR_NETWORK=$SUBTENSOR_NETWORK \
-e REPLICA_NUM=$instance_num \
-e WALLET_NAME=$wallet_name \
-e HOTKEY_NAME=$hotkey_name \
-e MASA_BASE_URL=${MASA_BASE_URL} \
-e API_URL=${API_URL} \
-e COLDKEY_MNEMONIC="$COLDKEY_MNEMONIC" \
-e HOST_IP="$HOST_IP" \
$ENV_VARS \
-v $(pwd)/.env:/app/.env \
-v $(pwd)/.bittensor:/root/.bittensor \
-v $(pwd)/startup:/app/startup \
-v $(pwd)/masa:/app/masa \
-v $(pwd)/neurons:/app/neurons \
-v $(pwd)/config.json:/app/config.json \
$IMAGE python -m startup
}
# Function to start a bootnode
start_bootnode() {
local port=$BOOTNODE_PORT
local metrics_port=$BOOTNODE_METRICS_PORT
local grafana_port=$BOOTNODE_GRAFANA_PORT
echo "Starting bootnode with ports:"
echo " Port: $port"
echo " Metrics: $metrics_port"
echo " Grafana: $grafana_port"
# Check if ports are available
if ! check_port $port || ! check_port $metrics_port || ! check_port $grafana_port || ! check_port 4001; then
echo "Error: One or more ports are already in use for bootnode"
exit 1
fi
# Launch bootnode with bridge networking
docker run -d \
--name "masa_bootnode" \
--hostname "bootnode" \
--network masa_network \
-p 4001:4001/udp \
-p 8080:8080 \
-v ./bootnode.env:/home/masa/.env \
-v ./.masa-bootnode:/home/masa/.masa \
$ORACLE_IMAGE \
--masaDir=/home/masa/.masa \
--env=hometest \
--api-enabled \
--logLevel=debug \
--port=$port
}
# Function to start an oracle worker
start_oracle_worker() {
local instance_num=$1
local bootnodes=$2
local base_port=$ORACLE_WORKER_PORT
local base_metrics_port=$ORACLE_WORKER_METRICS_PORT
local base_grafana_port=$ORACLE_WORKER_GRAFANA_PORT
# Calculate ports for this instance
local port=$((base_port + instance_num - 1))
local metrics_port=$((base_metrics_port + instance_num - 1))
local grafana_port=$((base_grafana_port + instance_num - 1))
echo "Starting oracle worker $instance_num with ports:"
echo " Port: $port"
echo " Metrics: $metrics_port"
echo " Grafana: $grafana_port"
# Validate bootnode address
if [[ "$bootnodes" != /* ]]; then
echo "WARNING: Invalid bootnode address format: $bootnodes"
echo "Using DNS-based bootnode address format"
bootnodes="/dns4/bootnode/udp/4001/quic-v1"
fi
echo " Using bootnode: $bootnodes"
# Check if ports are available
if ! check_port $port || ! check_port $metrics_port || ! check_port $grafana_port; then
echo "Error: One or more ports are already in use for oracle worker $instance_num"
exit 1
fi
# Launch oracle worker with bridge networking
docker run -d \
--name "masa_oracle_worker_${instance_num}" \
--hostname "worker_${instance_num}" \
--network masa_network \
-p $((4002 + instance_num - 1)):4001/udp \
-p $((8081 + instance_num - 1)):8081 \
--env-file worker.env \
-v ./.masa-worker:/home/masa/.masa \
-v ./worker.env:/home/masa/.env \
$ORACLE_IMAGE \
--masaDir=/home/masa/.masa \
--env=hometest \
--api-enabled \
--logLevel=debug \
--port=$port
}
# Function to display node info
display_node_info() {
echo -e "\n============= Node Information =============\n"
# Display miner info if any running
if [ "$MINER_COUNT" -gt 0 ]; then
echo -e "===== MINER NODES =====\n"
for i in $(seq 1 $MINER_COUNT); do
echo "Miner $i:"
docker logs masa_miner_$i 2>&1 | grep -i "hotkey" | tail -1 || echo "No hotkey info found"
done
echo ""
fi
# Display bootnode info if running
if [ "$ENABLE_BOOTNODE" = "true" ]; then
echo -e "===== BOOTNODE =====\n"
# Just show that it's running
if docker ps -q -f name=masa_bootnode >/dev/null 2>&1; then
echo "Bootnode: Running"
else
echo "Bootnode: Not running or failed to start"
fi
echo ""
fi
# Display oracle worker info if any running
if [ "$ORACLE_WORKER_COUNT" -gt 0 ]; then
echo -e "===== ORACLE WORKERS =====\n"
for i in $(seq 1 $ORACLE_WORKER_COUNT); do
container_name="masa_oracle_worker_$i"
if docker ps -q -f name=$container_name >/dev/null 2>&1; then
echo "Oracle Worker $i: Running"
else
echo "Oracle Worker $i: Not running or failed to start"
fi
done
echo ""
fi
# Display TEE worker info if any running
if [ "$TEE_WORKER_COUNT" -gt 0 ]; then
echo -e "===== TEE WORKERS =====\n"
for i in $(seq 1 $TEE_WORKER_COUNT); do
container_name="masa_tee-worker_$i"
if docker ps -q -f name=$container_name >/dev/null 2>&1; then
echo "TEE Worker $i: Running"
else
echo "TEE Worker $i: Not running or failed to start"
fi
done
echo ""
fi
echo -e "============= End Node Information =============\n"
}
# Function to clean up containers
cleanup() {
echo "Cleaning up containers..."
docker rm -f $(docker ps -aq --filter "name=masa_") 2>/dev/null || echo "No containers to remove"
echo "Done!"
}
# Clean up any existing containers
echo "Cleaning up existing containers..."
# First clean up all containers with masa_ prefix
docker ps -a | grep 'masa_' | awk '{print $1}' | xargs -r docker rm -f
# Also clean up any potential bootnode or oracle containers that might be running
docker ps -a | grep 'bootnode\|oracle\|worker' | awk '{print $1}' | xargs -r docker rm -f 2>/dev/null || true
# Ensure ports are released (give a little time for cleanup)
sleep 2
# Create masa_network if it doesn't exist
echo "Setting up Docker network..."
if ! docker network inspect masa_network >/dev/null 2>&1; then
docker network create masa_network
fi
echo "Starting requested nodes:"
[ "$VALIDATOR_COUNT" -gt 0 ] && echo "- $VALIDATOR_COUNT validator(s)"
[ "$MINER_COUNT" -gt 0 ] && echo "- $MINER_COUNT miner(s)"
[ "$ENABLE_BOOTNODE" = "true" ] && echo "- 1 bootnode"
[ "$ORACLE_WORKER_COUNT" -gt 0 ] && echo "- $ORACLE_WORKER_COUNT oracle worker(s)"
[ "$TEE_WORKER_COUNT" -gt 0 ] && echo "- $TEE_WORKER_COUNT TEE worker(s)"
# Start validators
if [ "$VALIDATOR_COUNT" -gt 0 ]; then
for i in $(seq 1 $VALIDATOR_COUNT); do
echo "Starting validator $i..."
start_node "validator" $i $VALIDATOR_PORT $VALIDATOR_METRICS_PORT $VALIDATOR_GRAFANA_PORT
done
fi
# Start miners
if [ "$MINER_COUNT" -gt 0 ]; then
for i in $(seq 1 $MINER_COUNT); do
echo "Starting miner $i..."
start_node "miner" $i $MINER_PORT $MINER_METRICS_PORT $MINER_GRAFANA_PORT
done
fi
if [ "$ENABLE_BOOTNODE" = "true" ]; then
echo "Starting bootnode..."
start_bootnode
fi
# Start oracle workers
if [ "$ORACLE_WORKER_COUNT" -gt 0 ] && [ "$ENABLE_BOOTNODE" = "true" ]; then
for i in $(seq 1 $ORACLE_WORKER_COUNT); do
echo "Starting oracle worker $i..."
start_oracle_worker $i
done
fi
# Start TEE workers
if [ "$TEE_WORKER_COUNT" -gt 0 ]; then
for i in $(seq 1 $TEE_WORKER_COUNT); do
echo "Starting TEE worker $i..."
start_node "tee-worker" $i $TEE_WORKER_PORT $TEE_WORKER_METRICS_PORT $TEE_WORKER_GRAFANA_PORT
done
fi
echo -e "\nActual running containers:"
docker ps --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}" | grep masa_
# Wait a bit for logs to be available
sleep 5
# Display node information
display_node_info
echo "All nodes started. Check logs with:"
echo "docker logs -f masa_validator_N # where N is the validator number"
echo "docker logs -f masa_miner_N # where N is the miner number"
echo "docker logs -f masa_bootnode # for the bootnode"
echo "docker logs -f masa_oracle_worker_N # where N is the oracle worker number"
echo "docker logs -f masa_tee-worker_N # where N is the TEE worker number"