From 32c5254415606664b162ba003cd7bf887dd390a3 Mon Sep 17 00:00:00 2001
From: Maxime Desroches <desroches.maxime@gmail.com>
Date: Sat, 2 Nov 2024 21:19:05 -0700
Subject: [PATCH] ci: faster test_onroad (#33906)

* 30s

* ruff

* remove

* timeout

* global

* value

* get value

* better

* format

* cleaner
---
 Jenkinsfile                   |  4 +-
 selfdrive/test/test_onroad.py | 88 ++++++++++++++++++-----------------
 2 files changed, 47 insertions(+), 45 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index b47a3884a03694..d6ebcb2589860c 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -66,7 +66,7 @@ fi
 ln -snf ${env.TEST_DIR} /data/pythonpath
 
 cd ${env.TEST_DIR} || true
-${cmd}
+time ${cmd}
 END"""
 
     sh script: ssh_cmd, label: step_label
@@ -198,7 +198,7 @@ node {
           //["build master-ci", "cd $SOURCE_DIR/release && TARGET_DIR=$TEST_DIR $SOURCE_DIR/scripts/retry.sh ./build_devel.sh"],
           step("build openpilot", "cd system/manager && ./build.py"),
           step("check dirty", "release/check-dirty.sh"),
-          step("onroad tests", "pytest selfdrive/test/test_onroad.py -s"),
+          step("onroad tests", "pytest selfdrive/test/test_onroad.py -s", [timeout: 60]),
           //["time to onroad", "pytest selfdrive/test/test_time_to_onroad.py"],
         ])
       },
diff --git a/selfdrive/test/test_onroad.py b/selfdrive/test/test_onroad.py
index 35338419de6be1..4908224b64f3fc 100644
--- a/selfdrive/test/test_onroad.py
+++ b/selfdrive/test/test_onroad.py
@@ -10,7 +10,6 @@
 import numpy as np
 import zstandard as zstd
 from collections import Counter, defaultdict
-from functools import cached_property
 from pathlib import Path
 
 from cereal import car, log
@@ -33,6 +32,9 @@
   should not exceed MAX_TOTAL_CPU
 """
 
+TEST_DURATION = 25
+LOG_OFFSET = 8
+
 MAX_TOTAL_CPU = 265.  # total for all 8 cores
 PROCS = {
   # Baseline CPU usage by process
@@ -58,12 +60,12 @@
   "./proclogd": 1.54,
   "system.logmessaged": 0.2,
   "system.tombstoned": 0,
-  "./logcatd": 0,
+  "./logcatd": 1.0,
   "system.micd": 5.0,
   "system.timed": 0,
   "selfdrive.pandad.pandad": 0,
   "system.statsd": 0.4,
-  "system.loggerd.uploader": (0.5, 15.0),
+  "system.loggerd.uploader": (0.0, 15.0),
   "system.loggerd.deleter": 0.1,
 }
 
@@ -98,6 +100,13 @@
   "wideRoadCameraState": [1.5, 0.35],
 }
 
+LOGS_SIZE_RATE = {
+  "qlog": 0.0083,
+  "rlog": 0.1528,
+  "qcamera.ts": 0.03828,
+}
+LOGS_SIZE_RATE.update(dict.fromkeys(['ecamera.hevc', 'fcamera.hevc'], 1.2740))
+
 
 def cputime_total(ct):
   return ct.cpuUser + ct.cpuSystem + ct.cpuChildrenUser + ct.cpuChildrenSystem
@@ -124,7 +133,7 @@ def setup_class(cls):
     if os.path.exists(Paths.log_root()):
       shutil.rmtree(Paths.log_root())
 
-    # start manager and run openpilot for a minute
+    # start manager and run openpilot for TEST_DURATION
     proc = None
     try:
       manager_path = os.path.join(BASEDIR, "system/manager/manager.py")
@@ -135,26 +144,24 @@ def setup_class(cls):
         while sm.recv_frame['carState'] < 0:
           sm.update(1000)
 
-      # make sure we get at least two full segments
       route = None
       cls.segments = []
       with Timeout(300, "timed out waiting for logs"):
         while route is None:
           route = params.get("CurrentRoute", encoding="utf-8")
-          time.sleep(0.1)
+          time.sleep(0.01)
 
         # test car params caching
         params.put("CarParamsCache", car.CarParams().to_bytes())
 
-        while len(cls.segments) < 3:
+        while len(cls.segments) < 1:
           segs = set()
           if Path(Paths.log_root()).exists():
             segs = set(Path(Paths.log_root()).glob(f"{route}--*"))
           cls.segments = sorted(segs, key=lambda s: int(str(s).rsplit('--')[-1]))
-          time.sleep(2)
+          time.sleep(0.01)
 
-      # chop off last, incomplete segment
-      cls.segments = cls.segments[:-1]
+      time.sleep(TEST_DURATION)
 
     finally:
       cls.gpu_procs = {psutil.Process(int(f.name)).name() for f in pathlib.Path('/sys/devices/virtual/kgsl/kgsl/proc/').iterdir() if f.is_dir()}
@@ -166,9 +173,8 @@ def setup_class(cls):
 
     cls.lrs = [list(LogReader(os.path.join(str(s), "rlog"))) for s in cls.segments]
 
-    # use the second segment by default as it's the first full segment
-    cls.lr = list(LogReader(os.path.join(str(cls.segments[1]), "rlog")))
-    cls.log_path = cls.segments[1]
+    cls.lr = list(LogReader(os.path.join(str(cls.segments[0]), "rlog")))
+    cls.log_path = cls.segments[0]
 
     cls.log_sizes = {}
     for f in cls.log_path.iterdir():
@@ -178,16 +184,13 @@ def setup_class(cls):
         with open(f, 'rb') as ff:
           cls.log_sizes[f] = len(zstd.compress(ff.read(), LOG_COMPRESSION_LEVEL)) / 1e6
 
+    cls.msgs = defaultdict(list)
+    for m in cls.lr:
+      cls.msgs[m.which()].append(m)
 
-  @cached_property
-  def service_msgs(self):
-    msgs = defaultdict(list)
-    for m in self.lr:
-      msgs[m.which()].append(m)
-    return msgs
 
   def test_service_frequencies(self, subtests):
-    for s, msgs in self.service_msgs.items():
+    for s, msgs in self.msgs.items():
       if s in ('initData', 'sentinel'):
         continue
 
@@ -196,10 +199,10 @@ def test_service_frequencies(self, subtests):
         continue
 
       with subtests.test(service=s):
-        assert len(msgs) >= math.floor(SERVICE_LIST[s].frequency*55)
+        assert len(msgs) >= math.floor(SERVICE_LIST[s].frequency*int(TEST_DURATION*0.8))
 
   def test_cloudlog_size(self):
-    msgs = [m for m in self.lr if m.which() == 'logMessage']
+    msgs = self.msgs['logMessage']
 
     total_size = sum(len(m.as_builder().to_bytes()) for m in msgs)
     assert total_size < 3.5e5
@@ -210,16 +213,10 @@ def test_cloudlog_size(self):
 
   def test_log_sizes(self):
     for f, sz in self.log_sizes.items():
-      if f.name == "qcamera.ts":
-        assert 2.15 < sz < 2.6
-      elif f.name == "qlog":
-        assert 0.4 < sz < 0.55
-      elif f.name == "rlog":
-        assert 5 < sz < 50
-      elif f.name.endswith('.hevc'):
-        assert 70 < sz < 80
-      else:
-        raise NotImplementedError
+      rate = LOGS_SIZE_RATE[f.name]
+      minn = rate * TEST_DURATION * 0.8
+      maxx = rate * TEST_DURATION * 1.2
+      assert minn < sz <  maxx
 
   def test_ui_timings(self):
     result = "\n"
@@ -227,7 +224,7 @@ def test_ui_timings(self):
     result += "-------------- UI Draw Timing ------------------\n"
     result += "------------------------------------------------\n"
 
-    ts = [m.uiDebug.drawTimeMillis for m in self.service_msgs['uiDebug']]
+    ts = [m.uiDebug.drawTimeMillis for m in self.msgs['uiDebug']]
     result += f"min  {min(ts):.2f}ms\n"
     result += f"max  {max(ts):.2f}ms\n"
     result += f"std  {np.std(ts):.2f}ms\n"
@@ -250,7 +247,7 @@ def test_cpu_usage(self, subtests):
     result += "------------------------------------------------\n"
 
     plogs_by_proc = defaultdict(list)
-    for pl in self.service_msgs['procLog']:
+    for pl in self.msgs['procLog']:
       for x in pl.procLog.procs:
         if len(x.cmdline) > 0:
           n = list(x.cmdline)[0]
@@ -258,7 +255,7 @@ def test_cpu_usage(self, subtests):
     print(plogs_by_proc.keys())
 
     cpu_ok = True
-    dt = (self.service_msgs['procLog'][-1].logMonoTime - self.service_msgs['procLog'][0].logMonoTime) / 1e9
+    dt = (self.msgs['procLog'][-1].logMonoTime - self.msgs['procLog'][0].logMonoTime) / 1e9
     for proc_name, expected_cpu in PROCS.items():
 
       err = ""
@@ -290,7 +287,7 @@ def test_cpu_usage(self, subtests):
     result += "------------------------------------------------\n"
 
     # Ensure there's no missing procs
-    all_procs = {p.name for p in self.service_msgs['managerState'][0].managerState.processes if p.shouldBeRunning}
+    all_procs = {p.name for p in self.msgs['managerState'][0].managerState.processes if p.shouldBeRunning}
     for p in all_procs:
       with subtests.test(proc=p):
         assert any(p in pp for pp in PROCS.keys()), f"Expected CPU usage missing for {p}"
@@ -308,7 +305,8 @@ def test_cpu_usage(self, subtests):
     assert cpu_ok
 
   def test_memory_usage(self):
-    mems = [m.deviceState.memoryUsagePercent for m in self.service_msgs['deviceState']]
+    offset = int(SERVICE_LIST['deviceState'].frequency * LOG_OFFSET)
+    mems = [m.deviceState.memoryUsagePercent for m in self.msgs['deviceState'][offset:]]
     print("Memory usage: ", mems)
 
     # check for big leaks. note that memory usage is
@@ -324,7 +322,9 @@ def test_camera_processing_time(self):
     result += "-------------- ImgProc Timing ------------------\n"
     result += "------------------------------------------------\n"
 
-    ts = [getattr(m, m.which()).processingTime for m in self.lr if 'CameraState' in m.which()]
+    ts = []
+    for s in ['roadCameraState', 'driverCameraState', 'wideCameraState']:
+      ts.extend(getattr(m, s).processingTime for m in self.msgs[s])
     assert min(ts) < 0.025, f"high execution time: {min(ts)}"
     result += f"execution time: min  {min(ts):.5f}s\n"
     result += f"execution time: max  {max(ts):.5f}s\n"
@@ -357,7 +357,7 @@ def test_mpc_execution_timings(self):
 
     cfgs = [("longitudinalPlan", 0.05, 0.05),]
     for (s, instant_max, avg_max) in cfgs:
-      ts = [getattr(m, s).solverExecutionTime for m in self.service_msgs[s]]
+      ts = [getattr(m, s).solverExecutionTime for m in self.msgs[s]]
       assert max(ts) < instant_max, f"high '{s}' execution time: {max(ts)}"
       assert np.mean(ts) < avg_max, f"high avg '{s}' execution time: {np.mean(ts)}"
       result += f"'{s}' execution time: min  {min(ts):.5f}s\n"
@@ -377,7 +377,7 @@ def test_model_execution_timings(self):
       ("driverStateV2", 0.050, 0.026),
     ]
     for (s, instant_max, avg_max) in cfgs:
-      ts = [getattr(m, s).modelExecutionTime for m in self.service_msgs[s]]
+      ts = [getattr(m, s).modelExecutionTime for m in self.msgs[s]]
       assert max(ts) < instant_max, f"high '{s}' execution time: {max(ts)}"
       assert np.mean(ts) < avg_max, f"high avg '{s}' execution time: {np.mean(ts)}"
       result += f"'{s}' execution time: min  {min(ts):.5f}s\n"
@@ -393,7 +393,8 @@ def test_timings(self):
     result += "----------------- Service Timings --------------\n"
     result += "------------------------------------------------\n"
     for s, (maxmin, rsd) in TIMINGS.items():
-      msgs = [m.logMonoTime for m in self.service_msgs[s]]
+      offset = int(SERVICE_LIST[s].frequency * LOG_OFFSET)
+      msgs = [m.logMonoTime for m in self.msgs[s][offset:]]
       if not len(msgs):
         raise Exception(f"missing {s}")
 
@@ -430,11 +431,12 @@ def test_startup(self):
 
   def test_engagable(self):
     no_entries = Counter()
-    for m in self.service_msgs['onroadEvents']:
+    for m in self.msgs['onroadEvents']:
       for evt in m.onroadEvents:
         if evt.noEntry:
           no_entries[evt.name] += 1
 
-    eng = [m.selfdriveState.engageable for m in self.service_msgs['selfdriveState']]
+    offset = int(SERVICE_LIST['selfdriveState'].frequency * LOG_OFFSET)
+    eng = [m.selfdriveState.engageable for m in self.msgs['selfdriveState'][offset:]]
     assert all(eng), \
            f"Not engageable for whole segment:\n- selfdriveState.engageable: {Counter(eng)}\n- No entry events: {no_entries}"