forked from genesys-neu/t-prime
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTprime_USRP_run.py
409 lines (353 loc) · 16.6 KB
/
Tprime_USRP_run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
#!/usr/bin/env python3
doAddPaths = False
if doAddPaths:
import sys
sys.path.append('/home/mauro/.local/bin')
sys.path.append('/usr/lib/python310.zip')
sys.path.append('/usr/lib/python3.10')
sys.path.append('/usr/lib/python3.10/lib-dynload')
sys.path.append('/home/mauro/.local/lib/python3.10/site-packages')
sys.path.append('/usr/local/lib/python3.10/dist-packages')
sys.path.append('/usr/lib/python3/dist-packages')
# Import Packages
import numpy as np
import torch
import os
import SoapySDR
from SoapySDR import SOAPY_SDR_RX, SOAPY_SDR_CS16
from SoapySDR import * # SOAPY_SDR_ constants
from scipy.signal import resample_poly, firwin, bilinear, lfilter
import matplotlib.pyplot as plt
import time
import argparse
import threading
from TPrime_transformer.model_transformer import TransformerModel
from queue import Queue
from preprocessing.model_rmsnorm import RMSNorm
N = 12900 # number of complex samples needed
q = Queue(2)
q2 = Queue(2)
# our decisions will also be delayed by 206 ms once the buffer is full
freq = 2.457e9 # LO tuning frequency in Hz
exitFlag = 0
fs = 31.25e6 # Radio sample Rate
t_out = 60
# producer task
def receiver(rx_driver, rx_type):
rx_chan = 0 # RX1 = 0, RX2 = 1
use_agc = True # Use or don't use the AGC
timeout_us = int(5e6)
time_avg = 0
# Initialize the AIR-T receiver using SoapyAIRT
#sdr = SoapySDR.Device(dict(driver="SoapyAIRT")) # Create AIR-T instance
#enumerate devices
results = SoapySDR.Device.enumerate() # TODO UHD device don't show up, but it shows up in the normal terminal with ipython
for result in results: print(result)
print("[RECEIVER] RX_DRIVER:", rx_driver, "RX_TYPE", rx_type)
args_soapy = dict(driver=rx_driver, type=rx_type) #e.g. driver="uhd", type="b200"
sdr = SoapySDR.Device(args_soapy)
sdr.setSampleRate(SOAPY_SDR_RX, 0, fs) # Set sample rate
if use_agc:
sdr.setGainMode(SOAPY_SDR_RX, 0, use_agc) # Set the gain mode
sdr.setFrequency(SOAPY_SDR_RX, 0, freq) # Tune the LO
# Create data buffer and start streaming samples to it
rx_buff = np.empty(2 * N, np.int16) # Create memory buffer for data stream
rx_stream = sdr.setupStream(SOAPY_SDR_RX, SOAPY_SDR_CS16, [rx_chan]) # Setup data stream
sdr.activateStream(rx_stream) # this turns the radio on
file_cntr = 0
restart_cntr = 0
while not exitFlag:
t1 = time.perf_counter()
sr = sdr.readStream(rx_stream, [rx_buff], N, timeoutUs=timeout_us)
file_cntr = file_cntr + 1
rc = sr.ret # number of samples read or the error code
if rc != N:
# print('Error {} after {} attempts at reading the buffer'.format(sr.ret, file_cntr))
sdr.deactivateStream(rx_stream) # turn off the stream
sdr.activateStream(rx_stream) # turn on the stream again
# print('restarting the stream took {} ms'.format(1000*(t2 - t1)))
restart_cntr = restart_cntr + 1
if not q.full():
q.put(rx_buff)
# print('Putting ' + str(rx_buff) + ' : ' + str(q.qsize()) + ' items in queue')
t2 = time.perf_counter()
time_avg = time_avg + (t2-t1)
# print('Reciver took {} ms'.format(1000*(t2-t1)))
sdr.deactivateStream(rx_stream)
sdr.closeStream(rx_stream)
time.sleep(1)
print('Restarted {} times'.format(restart_cntr))
print('Reciever takes {} ms on average to complete {} cycles'.format(1000*time_avg/file_cntr,file_cntr))
def signalprocessing():
rx_bits = 16 # The AIR-T's ADC is 16 bits
taps = firwin(numtaps=101, cutoff=10e6, fs=fs)
time_avg = 0
cntr = 0
while not exitFlag:
if not q.empty():
t1 = time.perf_counter()
s_final = np.empty(16384)
if MODEL_SIZE == 'sm':
s_final = np.empty(3072)
item = q.get()
# print(str(q.qsize()) + ' items in queue')
############################################################################################
# Process Signal
############################################################################################
# Convert interleaved shorts (received signal) to numpy.complex64 normalized between [-1, 1]
s0 = item.astype(float) / np.power(2.0, rx_bits - 1)
# print('s0 {}'.format(s0.size))
s = s0[::2] + 1j * s0[1::2]
# print('s {}'.format(s.size))
if MODEL_SIZE == 'sm':
s = s[:2500:]
t2 = time.perf_counter()
# print('reading queue and converting to complex float took {} ms'.format(1000*(t2-t1)))
# Low-Pass Filter
lpf_samples = np.convolve(s, taps, 'valid')
t3 = time.perf_counter()
# print('lpf took {} ms'.format(1000*(t3-t2)))
# rational resample
# Resample to 20e6
resampled_samples = resample_poly(lpf_samples, 16, 25)
# 16*31.25=500,20*25=500(need LCM because input needs to be an int).
# So we go up by factor of 16, then down by factor of 25 to reach final samp_rate of 20e6
# print('resampled_samples {}, # {}'.format(resampled_samples, resampled_samples.size))
t4 = time.perf_counter()
# print('resampling took {} ms'.format(1000*(t4-t3)))
# convert to ML input
s_final[::2] = resampled_samples.real
s_final[1::2] = resampled_samples.imag
# print('final s {}, # {}'.format(s_final, s_final.size))
if not q2.full():
q2.put(s_final)
#print(str(q2.qsize()) + ' items in queue 2')
t5 = time.perf_counter()
# print('final format converstion took {} ms'.format(1000*(t5-t4)))
# print("signal processing took {} ms".format(1000*(t5-t1)))
time_avg = time_avg + (t5-t1)
cntr = cntr + 1
# else:
# print('q is empty!')
time.sleep(1)
print('Signal processor takes {} ms on average to complete {} cycles'.format(1000*time_avg/cntr,cntr))
def machinelearning():
# Model configuration and loading
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# print('Device is {}'.format(device))
# RMS layer
if RMSNORM:
RMSNorm_layer = RMSNorm(model='Transformer')
else:
RMSNorm_layer = None
if MODEL_SIZE == 'sm':
seq_len = 24
model = TransformerModel(classes=len(PROTOCOLS), d_model=64*2, seq_len=seq_len, nlayers=2, use_pos=False)
else: # lg architecture
seq_len = 64
model = TransformerModel(classes=len(PROTOCOLS), d_model=128*2, seq_len=seq_len, nlayers=2, use_pos=False)
try:
model.load_state_dict(torch.load(MODEL_PATH,map_location=device)['model_state_dict'])
except:
raise Exception("The model you provided does not correspond with the selected architecture. Please revise the path and try again.")
# model.eval()
model = model.float()
model.to(device)
model.eval()
preds = [] # list to keep track of model predictions
pred_cntr = 0
time_avg = 0
while not exitFlag:
with torch.no_grad():
if not q2.empty():
t1 = time.perf_counter()
input = q2.get()
# print('ML input recieved')
# split sequence into words
input = np.split(input, seq_len)
# print('words are now {}'.format(input))
input = np.array(input)
input = torch.from_numpy(input)
# create empty batch dimension
input = torch.unsqueeze(input, 0)
input = input.to(device)
# predict class
if RMSNorm_layer is not None:
input = RMSNorm_layer(input)
pred = model(input.float()).argmax(1)
print(PROTOCOLS[pred])
# Write it in output file to pass it to the GUI
file_flag = 'a'
# Every 500 predictions flush output content
if pred_cntr%500 == 0:
file_flag = 'w'
with open('output.txt', file_flag) as file:
file.write(f'{pred.item()} {time.time()}\n')
preds.append(pred)
#print(str(q2.qsize()) + ' items in queue 2')
t2 = time.perf_counter()
pred_cntr = pred_cntr + 1
time_avg = time_avg + (t2-t1)
time.sleep(1)
print("ML predictions takes {} ms on average to complete {} cycles".format(1000*time_avg/pred_cntr,pred_cntr))
def machinelearning_tensorRT():
# Model configuration and loading
batch_size = 1
INPUT_NODE_NAME = 'input_buffer' # (for TensorRT) User defined name of input node
OUTPUT_NODE_NAME = 'output_buffer' # User defined name of output node
ONNX_VERSION = 14 # the ONNX version to export the model to newer GPU architecture
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
Nclasses = len(PROTOCOLS)
# Setup the pyCUDA context
trt_utils.make_cuda_context()
# print('Device is {}'.format(device))
# RMS layer
if RMSNORM:
RMSNorm_layer = RMSNorm(model='Transformer')
else:
RMSNorm_layer = None
fname = os.path.basename(MODEL_PATH)
if fname[-3:] == '.pt':
fname = fname[:-3]
ONNX_FILE_NAME = os.path.join(os.path.dirname(MODEL_PATH), str(fname) + ".onnx")
if MODEL_SIZE == 'sm':
seq_len = 24
sli_len = 64 * 2
model = TransformerModel(classes=Nclasses, d_model=64 * 2, seq_len=seq_len, nlayers=2, use_pos=False)
else: # lg architecture
seq_len = 64
sli_len = 128 * 2
model = TransformerModel(classes=Nclasses, d_model=128 * 2, seq_len=seq_len, nlayers=2, use_pos=False)
try:
model.load_state_dict(torch.load(MODEL_PATH, map_location=device)['model_state_dict'])
except:
raise Exception(
"The model you provided does not correspond with the selected architecture. Please revise the path and try again.")
# model.eval()
slice_in = np.random.random((batch_size, seq_len, sli_len))
model = model.float()
model.to(device)
model.eval()
plan_file = generate_model_plan(INPUT_NODE_NAME, ONNX_FILE_NAME, ONNX_VERSION, OUTPUT_NODE_NAME, model, slice_in, benchmark=False)
input_dtype = np.float32
# Use pyCUDA to create a shared memory buffer that will receive samples from the
# AIR-T to be fed into the neural network.
batch_size, seq_len, cplx_samples = slice_in.shape
buff_len = seq_len * cplx_samples * batch_size
sample_buffer = trt_utils.MappedBuffer(buff_len, input_dtype)
# Set up the inference engine. Note that the output buffers are created for
# us when we create the inference object.
dnn = trt_utils.TrtInferFromPlan(plan_file, batch_size,
sample_buffer, verbose=False)
preds = [] # list to keep track of model predictions
pred_cntr = 0
time_avg = 0
while not exitFlag:
with torch.no_grad():
if not q2.empty():
t1 = time.perf_counter()
input = q2.get()
# print('ML input recieved')
# split sequence into words
input = np.split(input, seq_len)
# print('words are now {}'.format(input))
input = np.array(input)
input = torch.from_numpy(input)
# create empty batch dimension
input = torch.unsqueeze(input, 0)
input = input.to(device)
# predict class
if RMSNorm_layer is not None:
input = RMSNorm_layer(input) # NOTE: this should also be included in the .plan
X = input.cpu().numpy()
dnn.input_buff.host[:] = X.flatten().astype(input_dtype)
dnn.feed_forward()
trt_out = dnn.output_buff.host.reshape((batch_size, Nclasses))
pred = trt_out.argmax(1)
#pred = model(input.float()).argmax(1)
print(PROTOCOLS[np.squeeze(pred)])
# Write it in output file to pass it to the GUI
file_flag = 'a'
# Every 500 predictions flush output content
if pred_cntr % 500 == 0:
file_flag = 'w'
with open('output.txt', file_flag) as file:
file.write(f'{pred.item()} {time.time()}\n')
preds.append(pred)
# print(str(q2.qsize()) + ' items in queue 2')
t2 = time.perf_counter()
pred_cntr = pred_cntr + 1
time_avg = time_avg + (t2 - t1)
time.sleep(1)
print("ML predictions takes {} ms on average to complete {} cycles".format(1000 * time_avg / pred_cntr, pred_cntr))
def generate_model_plan(INPUT_NODE_NAME, ONNX_FILE_NAME, ONNX_VERSION, OUTPUT_NODE_NAME, model, slice_in, benchmark=False):
slice_t = torch.Tensor(slice_in)
slice_t = slice_t.to(model.device.type)
# Let's produce the ONNX schema of the current model
torch.onnx.export(model, slice_t, ONNX_FILE_NAME, export_params=True,
opset_version=ONNX_VERSION, do_constant_folding=True,
input_names=[INPUT_NODE_NAME], output_names=[OUTPUT_NODE_NAME],
dynamic_axes={INPUT_NODE_NAME: {0: 'batch_size'},
OUTPUT_NODE_NAME: {0: 'batch_size'}})
# Then create the relative plan file using TensorRT
MAX_WORKSPACE_SIZE = 1073741824 # 1 GB for example
onnx2plan(onnx_file_name=ONNX_FILE_NAME, nchan=slice_in.shape[1],
input_len=slice_in.shape[2], logger=trt.Logger(trt.Logger.WARNING),
MAX_BATCH_SIZE=slice_in.shape[0], MAX_WORKSPACE_SIZE=MAX_WORKSPACE_SIZE,
BENCHMARK=True)
plan_file = ONNX_FILE_NAME.replace('.onnx', '.plan')
if benchmark:
print('Running Inference Benchmark')
plan_bench(plan_file_name=plan_file, cplx_samples=slice_in.shape[2], num_chans=slice_in.shape[1],
batch_size=slice_in.shape[0], num_batches=512, input_dtype=np.float32)
return plan_file
# TODO add GUI interface - will this require another threadsafe queue?
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-fq', '--frequency', help='center frequency, default is 2.457e9', type=float)
parser.add_argument('-t', '--timeout', help='amount of time (in seconds) to run before graceful exit, '
'default is 60s', type=int)
parser.add_argument("--model_path", default='./', help='Path to the checkpoint to load the model for inference.')
parser.add_argument("--model_size", default="lg", choices=["sm", "lg"], help="Define the use of the large or the small transformer.")
parser.add_argument("--RMSNorm", default=False, action='store_true', help="If present, we apply RMS normalization on input signals while training and testing")
parser.add_argument("--tensorRT", action="store_true", default=False, help='Use TensorRT model' )
parser.add_argument("--protocols", default=['802_11ax', '802_11b', '802_11n', '802_11g', 'noise'], help="Specify the list of classes")
parser.add_argument("--rx_driver", default="uhd", choices=["uhd"], help="USRP driver type")
parser.add_argument("--rx_type", default="x300", choices=["x300", "b200"], help="USRP device type")
args = parser.parse_args()
if args.frequency:
freq = args.frequency
if args.timeout:
t_out = args.timeout
MODEL_PATH = args.model_path
MODEL_SIZE = args.model_size
RMSNORM = args.RMSNorm
if args.tensorRT:
import tensorrt as trt
from preprocessing.inference.onnx2plan import onnx2plan
from preprocessing.inference.plan_bench import plan_bench
import preprocessing.inference.trt_utils as trt_utils
MODE = 'TensorRT' # choices=['pytorch', 'TensorRT']
else:
MODE = 'pytorch'
PROTOCOLS = args.protocols
# if MODEL_SIZE == 'sm': #this causes more frequent buffer overflows in the receiver thread
# N = 2500
RX_DRIVER = args.rx_driver
RX_TYPE = args.rx_type
rec = threading.Thread(target=receiver, args=(args.rx_driver, args.rx_type))
rec.start()
sp = threading.Thread(target=signalprocessing)
sp.start()
if MODE == 'pytorch':
ml = threading.Thread(target=machinelearning)
elif MODE == 'TensorRT':
ml = threading.Thread(target=machinelearning_tensorRT)
ml.start()
# gracefully end program
time.sleep(t_out)
exitFlag = 1
time.sleep(1)
rec.join()
sp.join()
ml.join()