From b39ff485e059200f8aca81dc782148ffe99a68bc Mon Sep 17 00:00:00 2001 From: NikG Date: Thu, 30 Nov 2023 19:37:36 +0200 Subject: [PATCH] CX: Add new index to speed up query in "findByPublishedTitle" method This patch adds a new index on the translation target title column of the "cx_translations" table, to speed up the query by reducing a full table scan, to scanning a few rows that match the given published title. Bug: T351999 Change-Id: Ib0b800966c47287e4e4d19ede043596ade26b4f8 --- includes/SchemaHooks.php | 7 +- includes/Store/TranslationStore.php | 1 - ...ch-cx_translations-target-title-index.json | 246 ++++++++++++++++++ ...tch-cx_translations-target-title-index.sql | 5 + sql/mysql/tables-generated.sql | 1 + ...tch-cx_translations-target-title-index.sql | 5 + sql/postgres/tables-generated.sql | 2 + ...tch-cx_translations-target-title-index.sql | 87 +++++++ sql/sqlite/tables-generated.sql | 2 + sql/tables.json | 5 + 10 files changed, 359 insertions(+), 2 deletions(-) create mode 100644 sql/abstractSchemaChanges/patch-cx_translations-target-title-index.json create mode 100644 sql/mysql/patch-cx_translations-target-title-index.sql create mode 100644 sql/postgres/patch-cx_translations-target-title-index.sql create mode 100644 sql/sqlite/patch-cx_translations-target-title-index.sql diff --git a/includes/SchemaHooks.php b/includes/SchemaHooks.php index d9b64402d..478d1eb47 100644 --- a/includes/SchemaHooks.php +++ b/includes/SchemaHooks.php @@ -20,7 +20,7 @@ public function onLoadExtensionSchemaUpdates( $updater ) { global $wgContentTranslationCluster, $wgContentTranslationDatabase; // Following tables should only be created if both cluster and database are false. - // Otherwise they are not created in the place they are accesses, because + // Otherwise, they are not created in the place they are accesses, because // DatabaseUpdater does not support other databases other than main wiki schema. if ( $wgContentTranslationCluster !== false || $wgContentTranslationDatabase !== false ) { return; @@ -89,6 +89,11 @@ public function onLoadExtensionSchemaUpdates( $updater ) { 'cx_translation_translators', "$dir/sql/$dbType/patch-cx_translators-unique-to-pk.sql" ); + $updater->addExtensionIndex( + 'cx_translations', + 'cx_translation_target_title', + "$dir/sql/$dbType/patch-cx_translations-target-title-index.sql" + ); } } diff --git a/includes/Store/TranslationStore.php b/includes/Store/TranslationStore.php index 31c44cfe4..82ea8caa7 100644 --- a/includes/Store/TranslationStore.php +++ b/includes/Store/TranslationStore.php @@ -138,7 +138,6 @@ public function findByPublishedTitle( string $publishedTitle, string $targetLang LIST_OR ); - // TODO: Add index to improve performance for this read query $row = $dbr->newSelectQueryBuilder() ->select( ISQLPlatform::ALL_ROWS ) ->from( self::TRANSLATION_TABLE_NAME ) diff --git a/sql/abstractSchemaChanges/patch-cx_translations-target-title-index.json b/sql/abstractSchemaChanges/patch-cx_translations-target-title-index.json new file mode 100644 index 000000000..ab33b5a6b --- /dev/null +++ b/sql/abstractSchemaChanges/patch-cx_translations-target-title-index.json @@ -0,0 +1,246 @@ +{ + "comment": "Add index to make finding of published translation by title faster (T351999)", + "before": { + "name": "cx_translations", + "columns": [ + { + "name": "translation_id", + "comment": "translation id. Autogenerated.", + "type": "integer", + "options": { "autoincrement": true, "notnull": true } + }, + { + "name": "translation_source_title", + "comment": "Source title of the translation", + "type": "binary", + "options": { "notnull": true, "length": 512 } + }, + { + "name": "translation_target_title", + "comment": "Target title of the translation", + "type": "binary", + "options": { "notnull": true, "length": 512 } + }, + { + "name": "translation_source_language", + "comment": "Source language. language code", + "type": "binary", + "options": { "notnull": true, "length": 36 } + }, + { + "name": "translation_target_language", + "comment": "Target language. language code", + "type": "binary", + "options": { "notnull": true, "length": 36 } + }, + { + "name": "translation_source_revision_id", + "comment": "Revision id of source article", + "type": "integer", + "options": { "notnull": false, "unsigned": true } + }, + { + "name": "translation_target_revision_id", + "comment": "Revision id of published translation", + "type": "integer", + "options": { "notnull": false, "unsigned": true } + }, + { + "name": "translation_source_url", + "comment": "source of the page as full canonical url -- https://www.mediawiki.org/wiki/Help:CxIsPage", + "type": "blob", + "options": { "notnull": true, "length": 65535 } + }, + { + "name": "translation_target_url", + "comment": "link to the draft/published target", + "type": "blob", + "options": { "notnull": false, "length": 65535 } + }, + { + "name": "translation_status", + "comment": "Status of translation - Draft or published status. There is no final status. A published translation can be draft again to update again", + "type": "mwenum", + "options": { + "notnull": false, + "CustomSchemaOptions": { + "enum_values": [ "draft", "published", "deleted" ] + } + } + }, + { + "name": "translation_start_timestamp", + "comment": "Start date of this translation", + "type": "mwtimestamp", + "options": { "notnull": true } + }, + { + "name": "translation_last_updated_timestamp", + "comment": "Last updated date of this translation", + "type": "mwtimestamp", + "options": { "notnull": true } + }, + { + "name": "translation_progress", + "comment": "Progress of the translation - json dump", + "type": "blob", + "options": { "notnull": true, "length": 255 } + }, + { + "name": "translation_started_by", + "comment": "Who started this translation? User id", + "type": "integer", + "options": { "notnull": false } + }, + { + "name": "translation_last_update_by", + "comment": "Who did the last translation? It need not be the translator who started.", + "type": "integer", + "options": { "notnull": false } + }, + { + "name": "translation_cx_version", + "comment": "Field to indicate which version of CX", + "type": "mwtinyint", + "options": { "notnull": false, "unsigned": true, "default": 1 } + } + ], + "indexes": [ + { + "name": "cx_translation_ref", + "columns": [ "translation_source_title", "translation_source_language", "translation_target_language", "translation_started_by" ], + "unique": true + }, + { + "name": "cx_translation_languages", + "columns": [ "translation_source_language", "translation_target_language", "translation_status" ], + "unique": false + } + ], + "pk": [ "translation_id" ] + }, + "after": { + "name": "cx_translations", + "columns": [ + { + "name": "translation_id", + "comment": "translation id. Autogenerated.", + "type": "integer", + "options": { "autoincrement": true, "notnull": true } + }, + { + "name": "translation_source_title", + "comment": "Source title of the translation", + "type": "binary", + "options": { "notnull": true, "length": 512 } + }, + { + "name": "translation_target_title", + "comment": "Target title of the translation", + "type": "binary", + "options": { "notnull": true, "length": 512 } + }, + { + "name": "translation_source_language", + "comment": "Source language. language code", + "type": "binary", + "options": { "notnull": true, "length": 36 } + }, + { + "name": "translation_target_language", + "comment": "Target language. language code", + "type": "binary", + "options": { "notnull": true, "length": 36 } + }, + { + "name": "translation_source_revision_id", + "comment": "Revision id of source article", + "type": "integer", + "options": { "notnull": false, "unsigned": true } + }, + { + "name": "translation_target_revision_id", + "comment": "Revision id of published translation", + "type": "integer", + "options": { "notnull": false, "unsigned": true } + }, + { + "name": "translation_source_url", + "comment": "source of the page as full canonical url -- https://www.mediawiki.org/wiki/Help:CxIsPage", + "type": "blob", + "options": { "notnull": true, "length": 65535 } + }, + { + "name": "translation_target_url", + "comment": "link to the draft/published target", + "type": "blob", + "options": { "notnull": false, "length": 65535 } + }, + { + "name": "translation_status", + "comment": "Status of translation - Draft or published status. There is no final status. A published translation can be draft again to update again", + "type": "mwenum", + "options": { + "notnull": false, + "CustomSchemaOptions": { + "enum_values": [ "draft", "published", "deleted" ] + } + } + }, + { + "name": "translation_start_timestamp", + "comment": "Start date of this translation", + "type": "mwtimestamp", + "options": { "notnull": true } + }, + { + "name": "translation_last_updated_timestamp", + "comment": "Last updated date of this translation", + "type": "mwtimestamp", + "options": { "notnull": true } + }, + { + "name": "translation_progress", + "comment": "Progress of the translation - json dump", + "type": "blob", + "options": { "notnull": true, "length": 255 } + }, + { + "name": "translation_started_by", + "comment": "Who started this translation? User id", + "type": "integer", + "options": { "notnull": false } + }, + { + "name": "translation_last_update_by", + "comment": "Who did the last translation? It need not be the translator who started.", + "type": "integer", + "options": { "notnull": false } + }, + { + "name": "translation_cx_version", + "comment": "Field to indicate which version of CX", + "type": "mwtinyint", + "options": { "notnull": false, "unsigned": true, "default": 1 } + } + ], + "indexes": [ + { + "name": "cx_translation_ref", + "columns": [ "translation_source_title", "translation_source_language", "translation_target_language", "translation_started_by" ], + "unique": true + }, + { + "name": "cx_translation_languages", + "columns": [ "translation_source_language", "translation_target_language", "translation_status" ], + "unique": false + }, + { + "name": "cx_translation_target_title", + "columns": [ "translation_target_title" ], + "unique": false + } + ], + "pk": [ "translation_id" ] + } +} diff --git a/sql/mysql/patch-cx_translations-target-title-index.sql b/sql/mysql/patch-cx_translations-target-title-index.sql new file mode 100644 index 000000000..ed301e779 --- /dev/null +++ b/sql/mysql/patch-cx_translations-target-title-index.sql @@ -0,0 +1,5 @@ +-- This file is automatically generated using maintenance/generateSchemaChangeSql.php. +-- Source: extensions/ContentTranslation/sql/abstractSchemaChanges/patch-cx_translations-target-title-index.json +-- Do not modify this file directly. +-- See https://www.mediawiki.org/wiki/Manual:Schema_changes +CREATE INDEX cx_translation_target_title ON /*_*/cx_translations (translation_target_title); diff --git a/sql/mysql/tables-generated.sql b/sql/mysql/tables-generated.sql index 897e1f01f..650c2cacb 100644 --- a/sql/mysql/tables-generated.sql +++ b/sql/mysql/tables-generated.sql @@ -27,6 +27,7 @@ CREATE TABLE /*_*/cx_translations ( translation_source_language, translation_target_language, translation_status ), + INDEX cx_translation_target_title (translation_target_title), PRIMARY KEY(translation_id) ) /*$wgDBTableOptions*/; diff --git a/sql/postgres/patch-cx_translations-target-title-index.sql b/sql/postgres/patch-cx_translations-target-title-index.sql new file mode 100644 index 000000000..460dd99cd --- /dev/null +++ b/sql/postgres/patch-cx_translations-target-title-index.sql @@ -0,0 +1,5 @@ +-- This file is automatically generated using maintenance/generateSchemaChangeSql.php. +-- Source: extensions/ContentTranslation/sql/abstractSchemaChanges/patch-cx_translations-target-title-index.json +-- Do not modify this file directly. +-- See https://www.mediawiki.org/wiki/Manual:Schema_changes +CREATE INDEX cx_translation_target_title ON cx_translations (translation_target_title); diff --git a/sql/postgres/tables-generated.sql b/sql/postgres/tables-generated.sql index 5bb1c9b6e..5163c95a0 100644 --- a/sql/postgres/tables-generated.sql +++ b/sql/postgres/tables-generated.sql @@ -32,6 +32,8 @@ CREATE INDEX cx_translation_languages ON cx_translations ( translation_status ); +CREATE INDEX cx_translation_target_title ON cx_translations (translation_target_title); + CREATE TABLE cx_translators ( translator_user_id INT NOT NULL, diff --git a/sql/sqlite/patch-cx_translations-target-title-index.sql b/sql/sqlite/patch-cx_translations-target-title-index.sql new file mode 100644 index 000000000..7a826f13f --- /dev/null +++ b/sql/sqlite/patch-cx_translations-target-title-index.sql @@ -0,0 +1,87 @@ +-- This file is automatically generated using maintenance/generateSchemaChangeSql.php. +-- Source: extensions/ContentTranslation/sql/abstractSchemaChanges/patch-cx_translations-target-title-index.json +-- Do not modify this file directly. +-- See https://www.mediawiki.org/wiki/Manual:Schema_changes +CREATE TEMPORARY TABLE /*_*/__temp__cx_translations AS +SELECT + translation_id, + translation_source_title, + translation_target_title, + translation_source_language, + translation_target_language, + translation_source_revision_id, + translation_target_revision_id, + translation_source_url, + translation_target_url, + translation_status, + translation_start_timestamp, + translation_last_updated_timestamp, + translation_progress, + translation_started_by, + translation_last_update_by, + translation_cx_version +FROM /*_*/cx_translations; +DROP TABLE /*_*/cx_translations; + + +CREATE TABLE /*_*/cx_translations ( + translation_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + translation_source_title BLOB NOT NULL, + translation_target_title BLOB NOT NULL, + translation_source_language BLOB NOT NULL, + translation_target_language BLOB NOT NULL, + translation_source_revision_id INTEGER UNSIGNED DEFAULT NULL, + translation_target_revision_id INTEGER UNSIGNED DEFAULT NULL, + translation_source_url BLOB NOT NULL, + translation_target_url BLOB DEFAULT NULL, + translation_status TEXT DEFAULT NULL, + translation_start_timestamp BLOB NOT NULL, + translation_last_updated_timestamp BLOB NOT NULL, + translation_progress BLOB NOT NULL, + translation_started_by INTEGER DEFAULT NULL, + translation_last_update_by INTEGER DEFAULT NULL, + translation_cx_version SMALLINT UNSIGNED DEFAULT 1 + ); +INSERT INTO /*_*/cx_translations ( + translation_id, translation_source_title, + translation_target_title, translation_source_language, + translation_target_language, translation_source_revision_id, + translation_target_revision_id, + translation_source_url, translation_target_url, + translation_status, translation_start_timestamp, + translation_last_updated_timestamp, + translation_progress, translation_started_by, + translation_last_update_by, translation_cx_version + ) +SELECT + translation_id, + translation_source_title, + translation_target_title, + translation_source_language, + translation_target_language, + translation_source_revision_id, + translation_target_revision_id, + translation_source_url, + translation_target_url, + translation_status, + translation_start_timestamp, + translation_last_updated_timestamp, + translation_progress, + translation_started_by, + translation_last_update_by, + translation_cx_version +FROM + /*_*/__temp__cx_translations; +DROP TABLE /*_*/__temp__cx_translations; + +CREATE UNIQUE INDEX cx_translation_ref ON /*_*/cx_translations ( + translation_source_title, translation_source_language, + translation_target_language, translation_started_by + ); + +CREATE INDEX cx_translation_languages ON /*_*/cx_translations ( + translation_source_language, translation_target_language, + translation_status + ); + +CREATE INDEX cx_translation_target_title ON /*_*/cx_translations (translation_target_title); diff --git a/sql/sqlite/tables-generated.sql b/sql/sqlite/tables-generated.sql index c24eb8c9a..117ac6fd6 100644 --- a/sql/sqlite/tables-generated.sql +++ b/sql/sqlite/tables-generated.sql @@ -31,6 +31,8 @@ CREATE INDEX cx_translation_languages ON /*_*/cx_translations ( translation_status ); +CREATE INDEX cx_translation_target_title ON /*_*/cx_translations (translation_target_title); + CREATE TABLE /*_*/cx_translators ( translator_user_id INTEGER NOT NULL, diff --git a/sql/tables.json b/sql/tables.json index 41c668f61..13a4bb0b8 100644 --- a/sql/tables.json +++ b/sql/tables.json @@ -114,6 +114,11 @@ "name": "cx_translation_languages", "columns": [ "translation_source_language", "translation_target_language", "translation_status" ], "unique": false + }, + { + "name": "cx_translation_target_title", + "columns": [ "translation_target_title" ], + "unique": false } ], "pk": [ "translation_id" ]