From c2d5609799b16792355bc711ac1c8f3c9df9f3d9 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Fri, 15 Nov 2024 11:38:07 -0800
Subject: [PATCH] Add an adapter for benchmark.oss_ci_benchmark_v3 (#5921)

The schema comes from
https://github.com/pytorch/test-infra/blob/main/torchci/clickhouse_queries/oss_ci_benchmark_v3/query.sql.
An example S3 path is
`s3://ossci-benchmarks/v3/pytorch/pytorch/11850871071/33027181871/add_loop_eager_dynamic.json`

I think we should figure out how to test changes to these replicator
lambdas. Otherwise, we might lose some data if they break. Any thoughts?
---
 .../lambda_function.py                        | 68 +++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/aws/lambda/clickhouse-replicator-s3/lambda_function.py b/aws/lambda/clickhouse-replicator-s3/lambda_function.py
index cb8e712bf2..ea5043c0b6 100644
--- a/aws/lambda/clickhouse-replicator-s3/lambda_function.py
+++ b/aws/lambda/clickhouse-replicator-s3/lambda_function.py
@@ -379,6 +379,72 @@ def torchao_perf_stats_adapter(table, bucket, key) -> None:
     general_adapter(table, bucket, key, schema, ["none"], "CSV")
 
 
+def oss_ci_benchmark_v3_adapter(table, bucket, key) -> None:
+    schema = """
+    `timestamp` UInt64,
+    `schema_version` String DEFAULT 'v3',
+    `name` String,
+    `repo` String DEFAULT 'pytorch/pytorch',
+    `head_branch` String,
+    `head_sha` String,
+    `workflow_id` UInt64,
+    `run_attempt` UInt32,
+    `job_id` UInt64,
+    `servicelab_experiment_id` UInt64 DEFAULT '0',
+    `servicelab_trial_id` UInt64 DEFAULT '0',
+    `s3_path` String,
+    `runners` Array(
+        Tuple(
+            name String,
+            type String,
+            cpu_info String,
+            cpu_count UInt32,
+            mem_info String,
+            avail_mem_in_gb UInt32,
+            gpu_info String,
+            gpu_count UInt32,
+            gpu_mem_info String,
+            avail_gpu_mem_in_gb UInt32,
+            extra_info Map(String, String)
+        )
+    ),
+    `benchmark` Tuple(
+        name String,
+        mode String,
+        dtype String,
+        extra_info Map(String, String)
+    ),
+    `model` Tuple (
+        name String,
+        type String,
+        backend String,
+        origins Array(String),
+        extra_info Map(String, String)
+    ),
+    `inputs` Map(
+        String,
+        Tuple(dtype String, extra_info Map(String, String))
+    ),
+    `dependencies` Map(
+        String,
+        Tuple(
+            `repo` String,
+            `branch` String,
+            `sha` String,
+            `version` String,
+            extra_info Map(String, String)
+        )
+    ),
+    `metric` Tuple(
+        name String,
+        benchmark_values Array(Float32),
+        target_value Float32,
+        extra_info Map(String, String)
+    ),
+    """
+    general_adapter(table, bucket, key, schema, ["gzip"], "JSONEachRow")
+
+
 def torchbench_userbenchmark_adapter(table, bucket, key):
     schema = """
     `environ` String,
@@ -434,6 +500,7 @@ def stable_pushes_adapter(table, bucket, key):
     "torchbench-userbenchmark": "benchmark.torchbench_userbenchmark",
     "ossci_uploaded_metrics": "misc.ossci_uploaded_metrics",
     "stable_pushes": "misc.stable_pushes",
+    "v3": "benchmark.oss_ci_benchmark_v3",
 }
 
 OBJECT_CONVERTER = {
@@ -450,6 +517,7 @@ def stable_pushes_adapter(table, bucket, key):
     "benchmark.torchbench_userbenchmark": torchbench_userbenchmark_adapter,
     "misc.ossci_uploaded_metrics": ossci_uploaded_metrics_adapter,
     "misc.stable_pushes": stable_pushes_adapter,
+    "benchmark.oss_ci_benchmark_v3": oss_ci_benchmark_v3_adapter,
 }