From 32c5254415606664b162ba003cd7bf887dd390a3 Mon Sep 17 00:00:00 2001 From: Maxime Desroches Date: Sat, 2 Nov 2024 21:19:05 -0700 Subject: [PATCH] ci: faster test_onroad (#33906) * 30s * ruff * remove * timeout * global * value * get value * better * format * cleaner --- Jenkinsfile | 4 +- selfdrive/test/test_onroad.py | 88 ++++++++++++++++++----------------- 2 files changed, 47 insertions(+), 45 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index b47a3884a03694..d6ebcb2589860c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -66,7 +66,7 @@ fi ln -snf ${env.TEST_DIR} /data/pythonpath cd ${env.TEST_DIR} || true -${cmd} +time ${cmd} END""" sh script: ssh_cmd, label: step_label @@ -198,7 +198,7 @@ node { //["build master-ci", "cd $SOURCE_DIR/release && TARGET_DIR=$TEST_DIR $SOURCE_DIR/scripts/retry.sh ./build_devel.sh"], step("build openpilot", "cd system/manager && ./build.py"), step("check dirty", "release/check-dirty.sh"), - step("onroad tests", "pytest selfdrive/test/test_onroad.py -s"), + step("onroad tests", "pytest selfdrive/test/test_onroad.py -s", [timeout: 60]), //["time to onroad", "pytest selfdrive/test/test_time_to_onroad.py"], ]) }, diff --git a/selfdrive/test/test_onroad.py b/selfdrive/test/test_onroad.py index 35338419de6be1..4908224b64f3fc 100644 --- a/selfdrive/test/test_onroad.py +++ b/selfdrive/test/test_onroad.py @@ -10,7 +10,6 @@ import numpy as np import zstandard as zstd from collections import Counter, defaultdict -from functools import cached_property from pathlib import Path from cereal import car, log @@ -33,6 +32,9 @@ should not exceed MAX_TOTAL_CPU """ +TEST_DURATION = 25 +LOG_OFFSET = 8 + MAX_TOTAL_CPU = 265. # total for all 8 cores PROCS = { # Baseline CPU usage by process @@ -58,12 +60,12 @@ "./proclogd": 1.54, "system.logmessaged": 0.2, "system.tombstoned": 0, - "./logcatd": 0, + "./logcatd": 1.0, "system.micd": 5.0, "system.timed": 0, "selfdrive.pandad.pandad": 0, "system.statsd": 0.4, - "system.loggerd.uploader": (0.5, 15.0), + "system.loggerd.uploader": (0.0, 15.0), "system.loggerd.deleter": 0.1, } @@ -98,6 +100,13 @@ "wideRoadCameraState": [1.5, 0.35], } +LOGS_SIZE_RATE = { + "qlog": 0.0083, + "rlog": 0.1528, + "qcamera.ts": 0.03828, +} +LOGS_SIZE_RATE.update(dict.fromkeys(['ecamera.hevc', 'fcamera.hevc'], 1.2740)) + def cputime_total(ct): return ct.cpuUser + ct.cpuSystem + ct.cpuChildrenUser + ct.cpuChildrenSystem @@ -124,7 +133,7 @@ def setup_class(cls): if os.path.exists(Paths.log_root()): shutil.rmtree(Paths.log_root()) - # start manager and run openpilot for a minute + # start manager and run openpilot for TEST_DURATION proc = None try: manager_path = os.path.join(BASEDIR, "system/manager/manager.py") @@ -135,26 +144,24 @@ def setup_class(cls): while sm.recv_frame['carState'] < 0: sm.update(1000) - # make sure we get at least two full segments route = None cls.segments = [] with Timeout(300, "timed out waiting for logs"): while route is None: route = params.get("CurrentRoute", encoding="utf-8") - time.sleep(0.1) + time.sleep(0.01) # test car params caching params.put("CarParamsCache", car.CarParams().to_bytes()) - while len(cls.segments) < 3: + while len(cls.segments) < 1: segs = set() if Path(Paths.log_root()).exists(): segs = set(Path(Paths.log_root()).glob(f"{route}--*")) cls.segments = sorted(segs, key=lambda s: int(str(s).rsplit('--')[-1])) - time.sleep(2) + time.sleep(0.01) - # chop off last, incomplete segment - cls.segments = cls.segments[:-1] + time.sleep(TEST_DURATION) finally: cls.gpu_procs = {psutil.Process(int(f.name)).name() for f in pathlib.Path('/sys/devices/virtual/kgsl/kgsl/proc/').iterdir() if f.is_dir()} @@ -166,9 +173,8 @@ def setup_class(cls): cls.lrs = [list(LogReader(os.path.join(str(s), "rlog"))) for s in cls.segments] - # use the second segment by default as it's the first full segment - cls.lr = list(LogReader(os.path.join(str(cls.segments[1]), "rlog"))) - cls.log_path = cls.segments[1] + cls.lr = list(LogReader(os.path.join(str(cls.segments[0]), "rlog"))) + cls.log_path = cls.segments[0] cls.log_sizes = {} for f in cls.log_path.iterdir(): @@ -178,16 +184,13 @@ def setup_class(cls): with open(f, 'rb') as ff: cls.log_sizes[f] = len(zstd.compress(ff.read(), LOG_COMPRESSION_LEVEL)) / 1e6 + cls.msgs = defaultdict(list) + for m in cls.lr: + cls.msgs[m.which()].append(m) - @cached_property - def service_msgs(self): - msgs = defaultdict(list) - for m in self.lr: - msgs[m.which()].append(m) - return msgs def test_service_frequencies(self, subtests): - for s, msgs in self.service_msgs.items(): + for s, msgs in self.msgs.items(): if s in ('initData', 'sentinel'): continue @@ -196,10 +199,10 @@ def test_service_frequencies(self, subtests): continue with subtests.test(service=s): - assert len(msgs) >= math.floor(SERVICE_LIST[s].frequency*55) + assert len(msgs) >= math.floor(SERVICE_LIST[s].frequency*int(TEST_DURATION*0.8)) def test_cloudlog_size(self): - msgs = [m for m in self.lr if m.which() == 'logMessage'] + msgs = self.msgs['logMessage'] total_size = sum(len(m.as_builder().to_bytes()) for m in msgs) assert total_size < 3.5e5 @@ -210,16 +213,10 @@ def test_cloudlog_size(self): def test_log_sizes(self): for f, sz in self.log_sizes.items(): - if f.name == "qcamera.ts": - assert 2.15 < sz < 2.6 - elif f.name == "qlog": - assert 0.4 < sz < 0.55 - elif f.name == "rlog": - assert 5 < sz < 50 - elif f.name.endswith('.hevc'): - assert 70 < sz < 80 - else: - raise NotImplementedError + rate = LOGS_SIZE_RATE[f.name] + minn = rate * TEST_DURATION * 0.8 + maxx = rate * TEST_DURATION * 1.2 + assert minn < sz < maxx def test_ui_timings(self): result = "\n" @@ -227,7 +224,7 @@ def test_ui_timings(self): result += "-------------- UI Draw Timing ------------------\n" result += "------------------------------------------------\n" - ts = [m.uiDebug.drawTimeMillis for m in self.service_msgs['uiDebug']] + ts = [m.uiDebug.drawTimeMillis for m in self.msgs['uiDebug']] result += f"min {min(ts):.2f}ms\n" result += f"max {max(ts):.2f}ms\n" result += f"std {np.std(ts):.2f}ms\n" @@ -250,7 +247,7 @@ def test_cpu_usage(self, subtests): result += "------------------------------------------------\n" plogs_by_proc = defaultdict(list) - for pl in self.service_msgs['procLog']: + for pl in self.msgs['procLog']: for x in pl.procLog.procs: if len(x.cmdline) > 0: n = list(x.cmdline)[0] @@ -258,7 +255,7 @@ def test_cpu_usage(self, subtests): print(plogs_by_proc.keys()) cpu_ok = True - dt = (self.service_msgs['procLog'][-1].logMonoTime - self.service_msgs['procLog'][0].logMonoTime) / 1e9 + dt = (self.msgs['procLog'][-1].logMonoTime - self.msgs['procLog'][0].logMonoTime) / 1e9 for proc_name, expected_cpu in PROCS.items(): err = "" @@ -290,7 +287,7 @@ def test_cpu_usage(self, subtests): result += "------------------------------------------------\n" # Ensure there's no missing procs - all_procs = {p.name for p in self.service_msgs['managerState'][0].managerState.processes if p.shouldBeRunning} + all_procs = {p.name for p in self.msgs['managerState'][0].managerState.processes if p.shouldBeRunning} for p in all_procs: with subtests.test(proc=p): assert any(p in pp for pp in PROCS.keys()), f"Expected CPU usage missing for {p}" @@ -308,7 +305,8 @@ def test_cpu_usage(self, subtests): assert cpu_ok def test_memory_usage(self): - mems = [m.deviceState.memoryUsagePercent for m in self.service_msgs['deviceState']] + offset = int(SERVICE_LIST['deviceState'].frequency * LOG_OFFSET) + mems = [m.deviceState.memoryUsagePercent for m in self.msgs['deviceState'][offset:]] print("Memory usage: ", mems) # check for big leaks. note that memory usage is @@ -324,7 +322,9 @@ def test_camera_processing_time(self): result += "-------------- ImgProc Timing ------------------\n" result += "------------------------------------------------\n" - ts = [getattr(m, m.which()).processingTime for m in self.lr if 'CameraState' in m.which()] + ts = [] + for s in ['roadCameraState', 'driverCameraState', 'wideCameraState']: + ts.extend(getattr(m, s).processingTime for m in self.msgs[s]) assert min(ts) < 0.025, f"high execution time: {min(ts)}" result += f"execution time: min {min(ts):.5f}s\n" result += f"execution time: max {max(ts):.5f}s\n" @@ -357,7 +357,7 @@ def test_mpc_execution_timings(self): cfgs = [("longitudinalPlan", 0.05, 0.05),] for (s, instant_max, avg_max) in cfgs: - ts = [getattr(m, s).solverExecutionTime for m in self.service_msgs[s]] + ts = [getattr(m, s).solverExecutionTime for m in self.msgs[s]] assert max(ts) < instant_max, f"high '{s}' execution time: {max(ts)}" assert np.mean(ts) < avg_max, f"high avg '{s}' execution time: {np.mean(ts)}" result += f"'{s}' execution time: min {min(ts):.5f}s\n" @@ -377,7 +377,7 @@ def test_model_execution_timings(self): ("driverStateV2", 0.050, 0.026), ] for (s, instant_max, avg_max) in cfgs: - ts = [getattr(m, s).modelExecutionTime for m in self.service_msgs[s]] + ts = [getattr(m, s).modelExecutionTime for m in self.msgs[s]] assert max(ts) < instant_max, f"high '{s}' execution time: {max(ts)}" assert np.mean(ts) < avg_max, f"high avg '{s}' execution time: {np.mean(ts)}" result += f"'{s}' execution time: min {min(ts):.5f}s\n" @@ -393,7 +393,8 @@ def test_timings(self): result += "----------------- Service Timings --------------\n" result += "------------------------------------------------\n" for s, (maxmin, rsd) in TIMINGS.items(): - msgs = [m.logMonoTime for m in self.service_msgs[s]] + offset = int(SERVICE_LIST[s].frequency * LOG_OFFSET) + msgs = [m.logMonoTime for m in self.msgs[s][offset:]] if not len(msgs): raise Exception(f"missing {s}") @@ -430,11 +431,12 @@ def test_startup(self): def test_engagable(self): no_entries = Counter() - for m in self.service_msgs['onroadEvents']: + for m in self.msgs['onroadEvents']: for evt in m.onroadEvents: if evt.noEntry: no_entries[evt.name] += 1 - eng = [m.selfdriveState.engageable for m in self.service_msgs['selfdriveState']] + offset = int(SERVICE_LIST['selfdriveState'].frequency * LOG_OFFSET) + eng = [m.selfdriveState.engageable for m in self.msgs['selfdriveState'][offset:]] assert all(eng), \ f"Not engageable for whole segment:\n- selfdriveState.engageable: {Counter(eng)}\n- No entry events: {no_entries}"