Skip to content

Commit

Permalink
pkp#9810 check if the URL in the usage stats log file corresponds to …
Browse files Browse the repository at this point in the history
…the configured base_url
  • Loading branch information
bozana committed Jul 2, 2024
1 parent f817a7c commit 85e4c68
Showing 1 changed file with 102 additions and 16 deletions.
118 changes: 102 additions & 16 deletions classes/migration/upgrade/v3_4_0/PreflightCheckMigration.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
use APP\core\Application;
use APP\migration\upgrade\v3_4_0\MergeLocalesMigration;
use APP\statistics\StatisticsHelper;
use DateTime;
use Exception;
use Illuminate\Database\MySqlConnection;
use Illuminate\Database\PostgresConnection;
Expand All @@ -25,6 +26,7 @@
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\DB;
use Illuminate\Support\Facades\Schema;
use PKP\config\Config;
use PKP\db\DAORegistry;
use Throwable;

Expand Down Expand Up @@ -94,6 +96,7 @@ protected function setFallbackVersion(): ?string

/**
* Check the contexts' contact details before upgrade
*
* @see https://github.com/pkp/pkp-lib/issues/8183
*
* @throws Exception
Expand Down Expand Up @@ -128,6 +131,7 @@ protected function checkContactSetting(): void

/**
* Ensures locale conflicts won't happen at a later stage of the migration
*
* @see https://github.com/pkp/pkp-lib/issues/8598
*
* @throws Exception
Expand Down Expand Up @@ -204,6 +208,7 @@ protected function checkLocaleConflicts(): void
protected function checkUsageStatsLogs(): void
{
$usageStatsDir = StatisticsHelper::getUsageStatsDirPath();
$filePathToCheck = '';
// check if there are usage stats log files older than yesterday
foreach (glob($usageStatsDir . '/usageEventLogs/*') as $usageStatsLogFile) {
if (!preg_match('/(\d{8})\.log$/', $usageStatsLogFile, $logFileDate)) {
Expand All @@ -214,6 +219,7 @@ protected function checkUsageStatsLogs(): void
if ($yesterday > $logFileDate) {
throw new Exception("There are unprocessed log files from more than 1 day ago in the directory {$usageStatsDir}/usageEventLogs/. This happens when the scheduled task to process usage stats logs is not being run daily. All logs in this directory older than {$yesterday} must be processed or removed before the upgrade can continue.");
}
$filePathToCheck = $usageStatsLogFile;
}
// check if there are old usage stats log files there that were not successfully processed
if (
Expand All @@ -223,10 +229,80 @@ protected function checkUsageStatsLogs(): void
) {
throw new Exception("There are one or more log files that were unable to finish processing. This happens when the scheduled task to process usage stats logs encounters a failure of some kind. These logs must be repaired and reprocessed or removed before the upgrade can continue. The logs can be found in the folders reject, processing and stage in {$usageStatsDir}.");
}

$this->checkUsageStatsLogFileUrl($filePathToCheck);
}

/**
* Check if the domains in the log file URLs are correct i.e. the same as the base_url in the config.inc.php.
* Only the first URL in the log file is checked (and the same assumed for all URLs in the log file).
*
* @throws Exception
*/
protected function checkUsageStatsLogFileUrl(string $filePathToCheck): void
{
$fhandle = fopen($filePathToCheck, 'r');
if (!$fhandle) {
throw new Exception("Can not open file {$filePathToCheck}.");
}
while (!feof($fhandle)) {
$line = trim(fgets($fhandle));
if (empty($line) || substr($line, 0, 1) === '#') {
continue;
} // Spacing or comment lines.

$entryData = $this->getDataFromLogEntry($line);

if (empty($entryData)) {
continue;
}
// Avoid internal apache requests.
if ($entryData['url'] == '*') {
continue;
}
// Avoid non sucessful requests.
$sucessfulReturnCodes = [200, 304];
if (!in_array($entryData['returnCode'], $sucessfulReturnCodes)) {
continue;
}

$configBaseUrl = Config::getVar('general', 'base_url');
$baseUrls = array_merge(Config::getContextBaseUrls(), [$configBaseUrl]);
foreach ($baseUrls as $baseUrl) {
if (str_contains($entryData['url'], $baseUrl)) {
return;
}
}
break;
}
fclose($fhandle);
throw new Exception('The base_url in config.inc.php should be the same as in URLs in the usage stats log file.');
}

/**
* Get data from the passed log entry.
*/
protected function getDataFromLogEntry(string $entry): array
{
$entryData = [];
// The default regex that can parse the usageStats plugin's log files.
$parseRegex = '/^(?P<ip>\S+) \S+ \S+ "(?P<date>.*?)" (?P<url>\S+) (?P<returnCode>\S+) "(?P<userAgent>.*?)"/';
if (preg_match($parseRegex, $entry, $m)) {
$associative = count(array_filter(array_keys($m), 'is_string')) > 0;
$entryData['ip'] = $associative ? $m['ip'] : $m[1];
$time = $associative ? $m['date'] : $m[2];
$dateTime = DateTime::createFromFormat('Y-m-d H:i:s', $time);
$entryData['date'] = $dateTime->format('Y-m-d H:i:s');
$entryData['url'] = urldecode($associative ? $m['url'] : $m[3]);
$entryData['returnCode'] = $associative ? $m['returnCode'] : $m[4];
$entryData['userAgent'] = $associative ? $m['userAgent'] : $m[5];
}
return $entryData;
}

/**
* Ensures that contexts with section editor assignments have a section editor role
*
* @see classes\migration\upgrade\v3_4_0\I7191_EditorAssignments.php
*
* @throws Exception
Expand All @@ -236,14 +312,16 @@ protected function checkRequiredUserEditor(): void
$contextId = "c.{$this->getContextKeyField()}";
// Look for contexts that have section editor assignments, but no section editor role
$contextsWithoutSubEditor = DB::table($this->getContextTable(), 'c')
->whereNotExists(fn (Builder $q) => $q->from('user_groups', 'ug')
->whereColumn('ug.context_id', '=', $contextId)
->where('ug.role_id', '=', 17) // Role::ROLE_ID_SUB_EDITOR
->selectRaw('0')
->whereNotExists(
fn (Builder $q) => $q->from('user_groups', 'ug')
->whereColumn('ug.context_id', '=', $contextId)
->where('ug.role_id', '=', 17) // Role::ROLE_ID_SUB_EDITOR
->selectRaw('0')
)
->whereExists(fn (Builder $q) => $q->from('subeditor_submission_group', 'ssg')
->whereColumn('ssg.context_id', '=', $contextId)
->selectRaw('0')
->whereExists(
fn (Builder $q) => $q->from('subeditor_submission_group', 'ssg')
->whereColumn('ssg.context_id', '=', $contextId)
->selectRaw('0')
)
->pluck('c.path');
if ($contextsWithoutSubEditor->count()) {
Expand Down Expand Up @@ -298,8 +376,10 @@ protected function checkUniqueEmailAndUsername(): void

/**
* Apply some validations to the submission checklist and attempts to auto-fix a small issue within the JSON data
*
* @see https://github.com/pkp/pkp-lib/issues/7191
* @see About the fix attempt: https://github.com/pkp/pkp-lib/issues/8929#issuecomment-1519867805
*
* @throws Exception
*/
protected function checkSubmissionChecklist(): void
Expand Down Expand Up @@ -342,6 +422,7 @@ protected function checkSubmissionChecklist(): void

/**
* Checks whether the database is ready for the introduction of foreign keys (MySQL only)
*
* @see https://github.com/pkp/pkp-lib/issues/6732
*
* @throws Exception
Expand All @@ -368,7 +449,7 @@ protected function checkForeignKeySupport(): void
);

if (count($result) > 0) {
error_log(print_r($result,true));
error_log(print_r($result, true));
$tableNames = data_get($result, '*.table_name');
throw new Exception(
'Storage engine that doesn\'t support foreign key constraints detected in one or more tables: ' .
Expand Down Expand Up @@ -424,6 +505,7 @@ protected function checkAuthorsMissingUserGroup(): void
* - Rows with required, but invalid foreign keys (null/bad values) will be deleted
* - Rows with nullable/optional foreign keys will be inspected on a case-by-case basis (if possible they will be nulled, otherwise removed)
* - Consideration is given to bidirectional/direct dependencies and exceptional cases (e.g. submission.current_publication_id, which is nullable, but required)
*
* @see https://github.com/pkp/pkp-lib/issues/6093
*
* @throws Exception
Expand All @@ -434,7 +516,7 @@ protected function clearOrphanedEntities(): void
// Sort the tables by the number of dependent entities
uksort(
$this->tableProcessors,
fn(string $a, string $b) => count($this->getEntityRelationships()[$b] ?? []) <=> count($this->getEntityRelationships()[$a] ?? [])
fn (string $a, string $b) => count($this->getEntityRelationships()[$b] ?? []) <=> count($this->getEntityRelationships()[$a] ?? [])
);
// Start the processing
foreach (array_keys($this->tableProcessors) as $table) {
Expand All @@ -448,7 +530,7 @@ protected function clearOrphanedEntities(): void
*/
protected function processTable(string $tableName): void
{
$affectedRows = array_reduce($this->tableProcessors[$tableName] ?? [], fn(int $affectedRows, callable $processor): int => $affectedRows += $processor(), 0);
$affectedRows = array_reduce($this->tableProcessors[$tableName] ?? [], fn (int $affectedRows, callable $processor): int => $affectedRows += $processor(), 0);
if (!$affectedRows) {
return;
}
Expand Down Expand Up @@ -481,8 +563,8 @@ protected function buildOrphanedEntityProcessor(): void
->leftJoin('publications AS p', 'p.publication_id', '=', 's.current_publication_id')
->join(
'publications AS last',
fn(JoinClause $q) => $q->where(
fn(Builder $q) => $q->from('publications AS p2')
fn (JoinClause $q) => $q->where(
fn (Builder $q) => $q->from('publications AS p2')
->whereColumn('p2.submission_id', '=', 's.submission_id')
->orderByDesc('p2.publication_id')
->limit(1)
Expand Down Expand Up @@ -1004,6 +1086,7 @@ protected function buildOrphanedEntityProcessor(): void
/**
* Delete rows from the source table where the foreign key field contains either invalid values or NULL
* Used for NOT NULL/required relationships
*
* @param $filter callable(Builder): Builder
*/
protected function deleteRequiredReference(string $sourceTable, string $sourceColumn, string $referenceTable, string $referenceColumn, ?callable $filter = null): int
Expand All @@ -1012,7 +1095,7 @@ protected function deleteRequiredReference(string $sourceTable, string $sourceCo
return 0;
}

$filter ??= fn(Builder $q) => $q;
$filter ??= fn (Builder $q) => $q;
$ids = $filter(
DB::table("{$sourceTable} AS s")
->leftJoin("{$referenceTable} AS r", "s.{$sourceColumn}", '=', "r.{$referenceColumn}")
Expand Down Expand Up @@ -1040,6 +1123,7 @@ protected function deleteRequiredReference(string $sourceTable, string $sourceCo
/**
* Resets optional/nullable foreign key fields from the source table to NULL when the field contains invalid values
* Used for NULLABLE relationships
*
* @param $filter callable(Builder): Builder
*/
protected function cleanOptionalReference(string $sourceTable, string $sourceColumn, string $referenceTable, string $referenceColumn, ?callable $filter = null): int
Expand All @@ -1048,7 +1132,7 @@ protected function cleanOptionalReference(string $sourceTable, string $sourceCol
return 0;
}

$filter ??= fn(Builder $q) => $q;
$filter ??= fn (Builder $q) => $q;
$ids = $filter(
DB::table("{$sourceTable} AS s")
->leftJoin("{$referenceTable} AS r", "s.{$sourceColumn}", '=', "r.{$referenceColumn}")
Expand Down Expand Up @@ -1076,6 +1160,7 @@ protected function cleanOptionalReference(string $sourceTable, string $sourceCol
/**
* Deletes rows from the source table where the foreign key field contains invalid values
* Used for NULLABLE relationships, where the source record lose the meaning without its relationship
*
* @param $filter callable(Builder): Builder
*/
protected function deleteOptionalReference(string $sourceTable, string $sourceColumn, string $referenceTable, string $referenceColumn, ?callable $filter = null): int
Expand All @@ -1084,7 +1169,7 @@ protected function deleteOptionalReference(string $sourceTable, string $sourceCo
return 0;
}

$filter ??= fn(Builder $q) => $q;
$filter ??= fn (Builder $q) => $q;
$ids = $filter(
DB::table("{$sourceTable} AS s")
->leftJoin("{$referenceTable} AS r", "s.{$sourceColumn}", '=', "r.{$referenceColumn}")
Expand Down Expand Up @@ -1127,7 +1212,7 @@ protected function addTableProcessor(string $table, callable $processor): void
*/
protected function ignoreZero(string $sourceColumn): callable
{
return fn(Builder $q) => $q->where("s.{$sourceColumn}", '!=', 0);
return fn (Builder $q) => $q->where("s.{$sourceColumn}", '!=', 0);
}

/**
Expand All @@ -1150,6 +1235,7 @@ protected function validateColumns(string $sourceTable, string $sourceColumn, st
* Clears duplicated user_settings
* This method used to be a migration, it has been incorporated at the pre-flight to avoid issues with the checks introduced by the MergeLocalesMigration
* Given that it operates on duplicated entries, it should be ok to run it several times
*
* @see https://github.com/pkp/pkp-lib/issues/7167
*/
protected function clearDuplicatedUserSettings(): void
Expand Down

0 comments on commit 85e4c68

Please sign in to comment.