[#26235] YSQL: Index scan cost model should expect disk blocks are fe…

…tched sequentially for primary index scan Summary: In the cost model, we try to predict the order in which the disk blocks need to be fetched. In case of a secondary index scan, we assume that the disk blocksfor primary table will be fetched in a random order. But for a primary index scan, we must assume that the disk blocks are fetched in sequential order. Before this change, the index scan was being costed higher than a sequential scan for the following example. With this change, the index scan is cheaper. ``` CREATE TABLE t1 (a INT, PRIMARY KEY (a ASC)); INSERT INTO t1 SELECT s FROM generate_series(1, 10) s; ANALYZE t1; SET yb_enable_base_scans_cost_model = ON; yugabyte=# explain select * from t1 where a < 5; QUERY PLAN -------------------------------------------------------------------- Index Scan using t1_pkey on t1 (cost=10.00..31.21 rows=3 width=4) Index Cond: (a < 5) (2 rows) yugabyte=# /*+ SeqScan(t1) */ explain select * from t1 where a < 5; QUERY PLAN ---------------------------------------------------- Seq Scan on t1 (cost=10.00..31.61 rows=3 width=4) Storage Filter: (a < 5) (2 rows) ``` Jira: DB-15582 Test Plan: ./yb_build.sh --java-test 'org.yb.pgsql.TestPgRegressPlanner' Reviewers: mtakahara, amartsinchyk Reviewed By: mtakahara Subscribers: yql Differential Revision: https://phorge.dev.yugabyte.com/D42188
yugabyte · Feb 28, 2025 · cc3c7c6 · cc3c7c6
1 parent 9abf529
commit cc3c7c6
Show file tree

Hide file tree

Showing 5 changed files with 80 additions and 15 deletions.
diff --git a/java/yb-pgsql/src/test/java/org/yb/pgsql/TestPgEstimatedDocdbResultWidth.java b/java/yb-pgsql/src/test/java/org/yb/pgsql/TestPgEstimatedDocdbResultWidth.java
@@ -146,20 +146,23 @@ private void helperTestsForFixedTypeSizes(Statement stmt, String table_name,
     testDocdbResultWidhEstimationHelper(stmt,
         String.format("SELECT 0 FROM %1$s", table_name),
         String.format("%1$s", table_name), ybctid_size);
+
     testDocdbResultWidhEstimationHelper(stmt,
-        String.format("SELECT 0 FROM %1$s WHERE k1 > %2$s", table_name, value),
-        String.format("%1$s", table_name), ybctid_size);
+        String.format("/*+ IndexScan(%1$s %1$s_pkey) */ SELECT 0 FROM %1$s WHERE k1 > %2$s",
+            table_name, value),
+        String.format("%1$s", table_name), value_size);
     testDocdbResultWidhEstimationHelper(stmt,
-        String.format("SELECT 0 FROM %1$s WHERE k1 > %2$s and k2 > %2$s", table_name, value),
-        String.format("%1$s", table_name), ybctid_size);
+        String.format("/*+ IndexScan(%1$s %1$s_pkey) */ SELECT 0 FROM %1$s WHERE k1 > %2$s "
+            + "and k2 > %2$s", table_name, value),
+        String.format("%1$s", table_name), 2 * value_size);
     testDocdbResultWidhEstimationHelper(stmt,
-        String.format("SELECT 0 FROM %1$s WHERE k1 > %2$s and k2 > %2$s and v1 > %2$s",
-                      table_name, value),
-        String.format("%1$s", table_name), ybctid_size);
+        String.format("/*+ IndexScan(%1$s %1$s_pkey) */ SELECT 0 FROM %1$s WHERE k1 > %2$s "
+            + "and k2 > %2$s and v1 > %2$s", table_name, value),
+        String.format("%1$s", table_name), 2 * value_size);
     testDocdbResultWidhEstimationHelper(stmt,
-        String.format("SELECT 0 FROM %1$s WHERE k1 > %2$s and k2 > %2$s and " +
-                      "v1 > %2$s and v2 > %2$s", table_name, value),
-        String.format("%1$s", table_name), ybctid_size);
+        String.format("/*+ IndexScan(%1$s %1$s_pkey) */ SELECT 0 FROM %1$s WHERE k1 > %2$s "
+            + "and k2 > %2$s and v1 > %2$s and v2 > %2$s", table_name, value),
+        String.format("%1$s", table_name), 2 * value_size);
 
     /* In case of Index Only Scan when no column is projected and no index conditions are present,
      * then the ybctid is returned. If no columns are projected, but index condition exists, then

diff --git a/src/postgres/src/backend/optimizer/path/costsize.c b/src/postgres/src/backend/optimizer/path/costsize.c
@@ -8160,7 +8160,16 @@ yb_cost_index(IndexPath *path, PlannerInfo *root, double loop_count,
 													baserel_tuple_width /
 													YB_DEFAULT_DOCDB_BLOCK_SIZE);
 
-		run_cost += num_docdb_blocks_fetched * yb_random_block_cost;
+		/*
+		 * If this is a primary index scan, pages from the disk will likely be
+		 * fetched in sequential order as they are sorted by the primary key.
+		 * If this is a secondary index scan, we assume that the pages are
+		 * fetched in random order.
+		 */
+		if (is_primary_index)
+			run_cost += num_docdb_blocks_fetched * yb_seq_block_cost;
+		else
+			run_cost += num_docdb_blocks_fetched * yb_random_block_cost;
 	}
 
 	/*

diff --git a/src/postgres/src/test/regress/expected/yb.orig.planner_base_scans_cost_model.out b/src/postgres/src/test/regress/expected/yb.orig.planner_base_scans_cost_model.out
@@ -146,3 +146,39 @@ SET yb_enable_base_scans_cost_model = ON;
    Estimated Nexts: 1333333334
    Estimated Docdb Result Width: 1
 (5 rows)
+
+--------------------------------------------------------------------------------
+-- #26235 : Primary Index scan cost higher than Sequential cost in small tables
+--------------------------------------------------------------------------------
+CREATE TABLE t_26235 (k1 INT, v1 INT, PRIMARY KEY (k1 ASC));
+INSERT INTO t_26235 (SELECT s, s FROM generate_series(1, 10) s);
+ANALYZE t_26235;
+SET yb_enable_base_scans_cost_model = ON;
+-- Following querie should pick primary index scan over seq scan
+EXPLAIN (COSTS OFF) SELECT * FROM t_26235 WHERE k1 < 1;
+                QUERY PLAN
+------------------------------------------
+ Index Scan using t_26235_pkey on t_26235
+   Index Cond: (k1 < 1)
+(2 rows)
+
+EXPLAIN (COSTS OFF) SELECT * FROM t_26235 WHERE k1 < 5;
+                QUERY PLAN
+------------------------------------------
+ Index Scan using t_26235_pkey on t_26235
+   Index Cond: (k1 < 5)
+(2 rows)
+
+EXPLAIN (COSTS OFF) SELECT * FROM t_26235 WHERE k1 < 10;
+                QUERY PLAN
+------------------------------------------
+ Index Scan using t_26235_pkey on t_26235
+   Index Cond: (k1 < 10)
+(2 rows)
+
+-- Without filter, seq scan should be preferred
+EXPLAIN (COSTS OFF) SELECT * FROM t_26235;
+     QUERY PLAN
+---------------------
+ Seq Scan on t_26235
+(1 row)
diff --git a/src/postgres/src/test/regress/expected/yb.orig.planner_taqo_seek_next_estimation.out b/src/postgres/src/test/regress/expected/yb.orig.planner_taqo_seek_next_estimation.out
@@ -284,10 +284,10 @@ EXPLAIN (COSTS OFF) SELECT * FROM t1 WHERE k1 IN (4, 8);
 
 -- Query Hash: 6a8ead9b8b122eea0e5460236bc6c19b
 EXPLAIN (COSTS OFF) SELECT * FROM t1 WHERE k1 IN (4, 8, 12);
-                      QUERY PLAN
-------------------------------------------------------
- Seq Scan on t1
-   Storage Filter: (k1 = ANY ('{4,8,12}'::integer[]))
+                    QUERY PLAN
+--------------------------------------------------
+ Index Scan using t1_pkey on t1
+   Index Cond: (k1 = ANY ('{4,8,12}'::integer[]))
 (2 rows)
 
 -- Query Hash: d7eedc71bffb36fa0152da8c08bf7808

diff --git a/src/postgres/src/test/regress/sql/yb.orig.planner_base_scans_cost_model.sql b/src/postgres/src/test/regress/sql/yb.orig.planner_base_scans_cost_model.sql
@@ -67,3 +67,20 @@ SET yb_enable_base_scans_cost_model = ON;
 /*+ IndexScan(t_25682 t_25682_pkey) */EXPLAIN (DEBUG, COSTS OFF) SELECT * FROM t_25682 WHERE k1 > 0;
 /*+ IndexScan(t_25682 t_25682_idx) */EXPLAIN (DEBUG, COSTS OFF) SELECT * FROM t_25682 WHERE v1 > 0;
 /*+ IndexOnlyScan(t_25682 t_25682_idx) */EXPLAIN (DEBUG, COSTS OFF) SELECT v1 FROM t_25682 WHERE v1 > 0;
+
+--------------------------------------------------------------------------------
+-- #26235 : Primary Index scan cost higher than Sequential cost in small tables
+--------------------------------------------------------------------------------
+
+CREATE TABLE t_26235 (k1 INT, v1 INT, PRIMARY KEY (k1 ASC));
+INSERT INTO t_26235 (SELECT s, s FROM generate_series(1, 10) s);
+ANALYZE t_26235;
+SET yb_enable_base_scans_cost_model = ON;
+
+-- Following querie should pick primary index scan over seq scan
+EXPLAIN (COSTS OFF) SELECT * FROM t_26235 WHERE k1 < 1;
+EXPLAIN (COSTS OFF) SELECT * FROM t_26235 WHERE k1 < 5;
+EXPLAIN (COSTS OFF) SELECT * FROM t_26235 WHERE k1 < 10;
+
+-- Without filter, seq scan should be preferred
+EXPLAIN (COSTS OFF) SELECT * FROM t_26235;