From aab94e1bcdd1eb8133ec61dd8708e34a14d59c8e Mon Sep 17 00:00:00 2001
From: Zain Rizvi <ZainRizvi@users.noreply.github.com>
Date: Fri, 1 Nov 2024 17:19:06 -0500
Subject: [PATCH] Migrate lf rollover percentage query to CH (#5847)

Removed references to the obsolete amz2023 runner prefixes

Validation: Ensured the data in both CH and Rockset versions of the
query return the same data and the charts look the same
---
 .../lf_rollover_health/params.json            |  3 +-
 .../lf_rollover_percentage/params.json        |  3 +
 .../lf_rollover_percentage/query.sql          | 90 +++++++++++++++++++
 torchci/pages/metrics.tsx                     | 19 ++--
 4 files changed, 106 insertions(+), 9 deletions(-)
 create mode 100644 torchci/clickhouse_queries/lf_rollover_percentage/params.json
 create mode 100644 torchci/clickhouse_queries/lf_rollover_percentage/query.sql

diff --git a/torchci/clickhouse_queries/lf_rollover_health/params.json b/torchci/clickhouse_queries/lf_rollover_health/params.json
index 76dfd33584..79575a60ff 100644
--- a/torchci/clickhouse_queries/lf_rollover_health/params.json
+++ b/torchci/clickhouse_queries/lf_rollover_health/params.json
@@ -1,4 +1,3 @@
 {
-  "days_ago": "Int64",
-  "granularity": "String"
+  "days_ago": "Int64"
 }
\ No newline at end of file
diff --git a/torchci/clickhouse_queries/lf_rollover_percentage/params.json b/torchci/clickhouse_queries/lf_rollover_percentage/params.json
new file mode 100644
index 0000000000..79575a60ff
--- /dev/null
+++ b/torchci/clickhouse_queries/lf_rollover_percentage/params.json
@@ -0,0 +1,3 @@
+{
+  "days_ago": "Int64"
+}
\ No newline at end of file
diff --git a/torchci/clickhouse_queries/lf_rollover_percentage/query.sql b/torchci/clickhouse_queries/lf_rollover_percentage/query.sql
new file mode 100644
index 0000000000..e8c46a7238
--- /dev/null
+++ b/torchci/clickhouse_queries/lf_rollover_percentage/query.sql
@@ -0,0 +1,90 @@
+WITH
+    normalized_jobs AS (
+        SELECT
+            l AS label,
+            extract(j.name, '[^,]*') AS job_name, -- Remove shard number and label from job names
+            j.workflow_name,
+            toStartOfInterval(j.started_at, INTERVAL 1 HOUR) AS bucket
+        FROM
+            -- Deliberatly not adding FINAL to this workflow_job.
+            -- Risks of not using it:
+            --   - You may get duplicate records for rows that were updated corresponding to their
+            --     before/after states, but as long as there’s some mechanism in the query to account
+            --     for that it’s okay (we check for j.status = 'completed`).
+            --   - In the worst case scenario, you may only see the ‘old’ version of the records for some rows
+            -- Costs of using it:
+            --   - Query procesing time increases from ~5 -> 16 seconds
+            --   - Memory usage grows from 7.5 GB -> 32 GB
+            -- So the tradeoff is worth it for this query.
+            workflow_job AS j
+            ARRAY JOIN j.labels as l
+        WHERE
+            j.created_at > now() - INTERVAL {days_ago: Int64} DAY
+            AND j.status = 'completed'
+            AND l != 'self-hosted'
+            AND l NOT LIKE 'lf.c.%'
+            AND l NOT LIKE '%canary%'
+    ),
+    lf_jobs AS (
+        SELECT
+            DISTINCT j.job_name
+        FROM
+            normalized_jobs AS j
+        WHERE
+            j.label LIKE 'lf%'
+    ),
+    -- filter jobs down to the ones that ran in both
+    -- LF and Meta fleets
+    comparable_jobs AS (
+        SELECT
+            j.bucket,
+            j.label,
+            j.job_name,
+            -- Remove shard number and label from job names
+            j.workflow_name
+        FROM
+            normalized_jobs AS j
+        INNER JOIN
+            lf_jobs AS lfj ON j.job_name = lfj.job_name
+    ),
+    success_stats AS (
+        SELECT
+            bucket,
+            count(*) AS group_size,
+            job_name,
+            workflow_name,
+            label,
+            if(substring(label, 1, 3) = 'lf.', True, False) AS lf_fleet
+        FROM
+            comparable_jobs
+        GROUP BY
+            bucket, job_name, workflow_name, label
+    ),
+    comparison_stats AS (
+        SELECT
+            lf.bucket,
+            SUM(lf.group_size + m.group_size) AS total_jobs,
+            SUM(m.group_size) AS compliment_jobs,
+            SUM(lf.group_size) AS counted_jobs,
+            m.lf_fleet AS c_fleet,
+            lf.lf_fleet AS m_fleet,
+            CAST(SUM(lf.group_size) AS Float32) / SUM(lf.group_size + m.group_size) * 100 AS percentage,
+            IF(lf.lf_fleet, 'Linux Foundation', 'Meta') AS fleet
+        FROM
+            success_stats AS lf
+        INNER JOIN
+            success_stats AS m ON lf.bucket = m.bucket
+        WHERE
+            lf.job_name = m.job_name
+            AND lf.workflow_name = m.workflow_name
+            AND (
+                (lf.lf_fleet = 1 AND m.lf_fleet = 0)
+                OR (lf.lf_fleet = 0 AND m.lf_fleet = 1)
+            )
+            AND lf.group_size > 3
+            AND m.group_size > 3
+        GROUP BY
+            lf.bucket, lf.lf_fleet, m.lf_fleet
+    )
+SELECT * FROM comparison_stats
+ORDER BY  bucket DESC, fleet
\ No newline at end of file
diff --git a/torchci/pages/metrics.tsx b/torchci/pages/metrics.tsx
index 49b2810f5b..440b8b9d6c 100644
--- a/torchci/pages/metrics.tsx
+++ b/torchci/pages/metrics.tsx
@@ -1230,18 +1230,23 @@ export default function Page() {
             title={"Percentage of jobs rolled over to Linux Foundation"}
             queryName={"lf_rollover_percentage"}
             queryCollection={"metrics"}
-            queryParams={[
-              {
-                name: "days_ago",
-                type: "int",
-                value: timeRange,
-              },
-            ]}
+            queryParams={
+              useClickHouse
+                ? { ...timeParamsClickHouse, days_ago: timeRange }
+                : [
+                    {
+                      name: "days_ago",
+                      type: "int",
+                      value: timeRange,
+                    },
+                  ]
+            }
             granularity={"hour"}
             timeFieldName={"bucket"}
             yAxisFieldName={"percentage"}
             groupByFieldName={"fleet"}
             yAxisRenderer={(value) => value.toFixed(2).toString() + "%"}
+            useClickHouse={useClickHouse}
           />
         </Grid>
       </Grid>