Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEATURE] Better index cleanup #3809

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 102 additions & 0 deletions Classes/Task/CleanupIndexTask.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
<?php

declare(strict_types=1);

/*
* This file is part of the TYPO3 CMS project.
*
* It is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License, either version 2
* of the License, or any later version.
*
* For the full copyright and license information, please read the
* LICENSE.txt file that was distributed with this source code.
*
* The TYPO3 project - inspiring people to share!
*/

namespace ApacheSolrForTypo3\Solr\Task;

use ApacheSolrForTypo3\Solr\ConnectionManager;
use Doctrine\DBAL\ConnectionException as DBALConnectionException;
use Doctrine\DBAL\Exception as DBALException;
use TYPO3\CMS\Core\Utility\GeneralUtility;

/**
* Scheduler task to empty the indexes of a site and re-initialize the
* Solr Index Queue thus making the indexer re-index the site.
*
* @author Christoph Moeller <support@network-publishing.de>
*/
class CleanupIndexTask extends AbstractSolrTask
{
protected ?int $deleteOlderThanDays = null;

public function getDeleteOlderThanDays(): ?int
{
return $this->deleteOlderThanDays;
}

public function setDeleteOlderThanDays(?int $deleteOlderThanDays): void
{
$this->deleteOlderThanDays = $deleteOlderThanDays;
}

/**
* Deletes old documents from index
*
* @return bool Returns TRUE on success, FALSE on failure.
*
* @throws DBALConnectionException
* @throws DBALException
*
* @noinspection PhpMissingReturnTypeInspection See {@link \TYPO3\CMS\Scheduler\Task\AbstractTask::execute()}
*/
public function execute()
{
$cleanUpResult = true;
$solrConfiguration = $this->getSite()->getSolrConfiguration();
$solrServers = GeneralUtility::makeInstance(ConnectionManager::class)->getConnectionsBySite($this->getSite());
$enableCommitsSetting = $solrConfiguration->getEnableCommits();

foreach ($solrServers as $solrServer) {
$deleteQuery = 'siteHash:' . $this->getSite()->getSiteHash() . sprintf(' AND indexed:[* TO NOW-%dDAYS]', $this->deleteOlderThanDays ?? 1);
$solrServer->getWriteService()->deleteByQuery($deleteQuery);

if (!$enableCommitsSetting) {
// Do not commit
continue;
}

$response = $solrServer->getWriteService()->commit(false, false);
if ($response->getHttpStatus() != 200) {
$cleanUpResult = false;
break;
}
}

return $cleanUpResult;
}

/**
* This method is designed to return some additional information about the task,
* that may help to set it apart from other tasks from the same class
* This additional information is used - for example - in the Scheduler's BE module
* This method should be implemented in most task classes
*
* @return string Information to display
*
* @throws DBALException
*
* @noinspection PhpMissingReturnTypeInspection See {@link \TYPO3\CMS\Scheduler\Task\AbstractTask::getAdditionalInformation()}
*/
public function getAdditionalInformation()
{
$site = $this->getSite();
if (is_null($site)) {
return 'Invalid site configuration for scheduler please re-create the task!';
}

return 'Site: ' . $this->getSite()->getLabel();
}
}
197 changes: 197 additions & 0 deletions Classes/Task/CleanupTaskAdditionalFieldProvider.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
<?php

declare(strict_types=1);

/*
* This file is part of the TYPO3 CMS project.
*
* It is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License, either version 2
* of the License, or any later version.
*
* For the full copyright and license information, please read the
* LICENSE.txt file that was distributed with this source code.
*
* The TYPO3 project - inspiring people to share!
*/

namespace ApacheSolrForTypo3\Solr\Task;

use ApacheSolrForTypo3\Solr\Backend\SiteSelectorField;
use ApacheSolrForTypo3\Solr\Domain\Site\Exception\UnexpectedTYPO3SiteInitializationException;
use ApacheSolrForTypo3\Solr\Domain\Site\Site;
use ApacheSolrForTypo3\Solr\Domain\Site\SiteRepository;
use Doctrine\DBAL\Exception as DBALException;
use LogicException;
use TYPO3\CMS\Backend\Form\Exception as BackendFormException;
use TYPO3\CMS\Core\Page\PageRenderer;
use TYPO3\CMS\Core\Utility\GeneralUtility;
use TYPO3\CMS\Scheduler\AbstractAdditionalFieldProvider;
use TYPO3\CMS\Scheduler\Controller\SchedulerModuleController;
use TYPO3\CMS\Scheduler\Task\AbstractTask;
use TYPO3\CMS\Scheduler\Task\Enumeration\Action;

/**
* Adds additional field to specify the Solr server to initialize the index queue for
*
* @author Christoph Moeller <support@network-publishing.de>
*/
class CleanupTaskAdditionalFieldProvider extends AbstractAdditionalFieldProvider
{
protected array $taskInformation = [];

/**
* Scheduler task
*/
protected ?CleanupIndexTask $task;

protected ?SchedulerModuleController $schedulerModule = null;

/**
* Selected site
*/
protected ?Site $site = null;

protected SiteRepository $siteRepository;

protected ?PageRenderer $pageRenderer = null;

public function __construct()
{
$this->siteRepository = GeneralUtility::makeInstance(SiteRepository::class);
}

/**
* Initialize object
*
* @throws DBALException
*/
protected function initialize(
array $taskInfo,
?CleanupIndexTask $task,
SchedulerModuleController $schedulerModule
): void {
/* ReIndexTask @var $task */
$this->taskInformation = $taskInfo;
$this->task = $task;
$this->schedulerModule = $schedulerModule;

$currentAction = $schedulerModule->getCurrentAction();

if ($currentAction->equals(Action::EDIT)) {
$this->site = $this->siteRepository->getSiteByRootPageId((int)$task->getRootPageId());
}
}

/**
* Used to define fields to provide the Solr server address when adding
* or editing a task.
*
* @param array $taskInfo reference to the array containing the info used in the add/edit form
* @param CleanupIndexTask $task when editing, reference to the current task object. Null when adding.
* @param SchedulerModuleController $schedulerModule reference to the calling object (Scheduler's BE module)
*
* @return array Array containing all the information pertaining to the additional fields
* The array is multidimensional, keyed to the task class name and each field's id
* For each field it provides an associative sub-array with the following:
*
* @throws BackendFormException
* @throws UnexpectedTYPO3SiteInitializationException
* @throws DBALException
*
* @noinspection PhpParameterByRefIsNotUsedAsReferenceInspection
* @noinspection PhpMissingReturnTypeInspection
*/
public function getAdditionalFields(
array &$taskInfo,
$task,
SchedulerModuleController $schedulerModule
) {
$additionalFields = [];

if (!$this->isTaskInstanceofCleanupIndexTask($task)) {
return $additionalFields;
}

$this->initialize($taskInfo, $task, $schedulerModule);
$siteSelectorField = GeneralUtility::makeInstance(SiteSelectorField::class);

$additionalFields['site'] = [
'code' => $siteSelectorField->getAvailableSitesSelector(
'tx_scheduler[site]',
$this->site
),
'label' => 'LLL:EXT:solr/Resources/Private/Language/locallang.xlf:field_site',
];

$additionalFields['deleteOlderThanDays'] = [
'code' => '<input class="form-control" type="number" name="tx_scheduler[deleteOlderThanDays]" value="' . ($schedulerModule->getCurrentAction() == Action::EDIT ? $task->getDeleteOlderThanDays() ?? 1 : 1) . '" />',
'label' => 'LLL:EXT:solr/Resources/Private/Language/locallang.xlf:task.cleanupIndex.deleteOlderThanDays',
];

return $additionalFields;
}

/**
* Checks any additional data that is relevant to this task. If the task
* class is not relevant, the method is expected to return TRUE
*
* @param array $submittedData reference to the array containing the data submitted by the user
* @param SchedulerModuleController $schedulerModule reference to the calling object (Scheduler's BE module)
*
* @return bool True if validation was ok (or selected class is not relevant), FALSE otherwise
*
* @throws UnexpectedTYPO3SiteInitializationException
*
* @noinspection PhpParameterByRefIsNotUsedAsReferenceInspection
* @noinspection PhpMissingReturnTypeInspection
*/
public function validateAdditionalFields(
array &$submittedData,
SchedulerModuleController $schedulerModule
): bool {
$result = false;

// validate site
$sites = $this->siteRepository->getAvailableSites();
if (array_key_exists($submittedData['site'], $sites)) {
$result = true;
}

return $result;
}

/**
* Saves any additional input into the current task object if the task
* class matches.
*
* @param array $submittedData array containing the data submitted by the user
* @param CleanupIndexTask $task reference to the current task object
*/
public function saveAdditionalFields(
array $submittedData,
CleanupIndexTask|AbstractTask $task
): void {
if (!$this->isTaskInstanceofCleanupIndexTask($task)) {
return;
}

$task->setRootPageId((int)$submittedData['site']);
$task->setDeleteOlderThanDays($submittedData['deleteOlderThanDays'] ? (int)$submittedData['deleteOlderThanDays'] : null);
}

/**
* Check that a task is an instance of ReIndexTask
*/
protected function isTaskInstanceofCleanupIndexTask(?AbstractTask $task): bool
{
if ((!is_null($task)) && (!($task instanceof CleanupIndexTask))) {
throw new LogicException(
'$task must be an instance of ReIndexTask, '
. 'other instances are not supported.',
1487500366
);
}
return true;
}
}
47 changes: 2 additions & 45 deletions Classes/Task/ReIndexTask.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

namespace ApacheSolrForTypo3\Solr\Task;

use ApacheSolrForTypo3\Solr\ConnectionManager;
use ApacheSolrForTypo3\Solr\Domain\Index\Queue\QueueInitializationService;
use Doctrine\DBAL\ConnectionException as DBALConnectionException;
use Doctrine\DBAL\Exception as DBALException;
Expand All @@ -37,7 +36,7 @@ class ReIndexTask extends AbstractSolrTask
protected array $indexingConfigurationsToReIndex = [];

/**
* Purges/commits all Solr indexes, initializes the Index Queue
* Initializes the Index Queue
* and returns TRUE if the execution was successful
*
* @return bool Returns TRUE on success, FALSE on failure.
Expand All @@ -49,55 +48,13 @@ class ReIndexTask extends AbstractSolrTask
*/
public function execute()
{
// clean up
$cleanUpResult = $this->cleanUpIndex();

// initialize for re-indexing
/** @var QueueInitializationService $indexQueueInitializationService */
$indexQueueInitializationService = GeneralUtility::makeInstance(QueueInitializationService::class);
$indexQueueInitializationResults = $indexQueueInitializationService
->initializeBySiteAndIndexConfigurations($this->getSite(), $this->indexingConfigurationsToReIndex);

return $cleanUpResult && !in_array(false, $indexQueueInitializationResults);
}

/**
* Removes documents of the selected types from the index.
*
* @return bool TRUE if clean up was successful, FALSE on error
*
* @throws DBALException
*/
protected function cleanUpIndex(): bool
{
$cleanUpResult = true;
$solrConfiguration = $this->getSite()->getSolrConfiguration();
$solrServers = GeneralUtility::makeInstance(ConnectionManager::class)->getConnectionsBySite($this->getSite());
$typesToCleanUp = [];
$enableCommitsSetting = $solrConfiguration->getEnableCommits();

foreach ($this->indexingConfigurationsToReIndex as $indexingConfigurationName) {
$type = $solrConfiguration->getIndexQueueTypeOrFallbackToConfigurationName($indexingConfigurationName);
$typesToCleanUp[] = $type;
}

foreach ($solrServers as $solrServer) {
$deleteQuery = 'type:(' . implode(' OR ', $typesToCleanUp) . ')' . ' AND siteHash:' . $this->getSite()->getSiteHash();
$solrServer->getWriteService()->deleteByQuery($deleteQuery);

if (!$enableCommitsSetting) {
// Do not commit
continue;
}

$response = $solrServer->getWriteService()->commit(false, false);
if ($response->getHttpStatus() != 200) {
$cleanUpResult = false;
break;
}
}

return $cleanUpResult;
return !in_array(false, $indexQueueInitializationResults);
}

/**
Expand Down
12 changes: 12 additions & 0 deletions Resources/Private/Language/de.locallang.xlf
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,18 @@
<source>Forced webroot (only needed when webroot is not PATH_site)</source>
<target>Forced webroot (Nur notwendig wenn webroot von PATH_site abweicht)</target>
</trans-unit>
<trans-unit id="task.cleanupIndex.title">
<source>Index Cleanup</source>
<target>Index-Bereinigung</target>
</trans-unit>
<trans-unit id="task.cleanupIndex.description">
<source>Delete old documents from index</source>
<target>Löscht alte Dokumente aus dem Index</target>
</trans-unit>
<trans-unit id="task.cleanupIndex.deleteOlderThanDays">
<source>Delete documents older than # days</source>
<target>Lösche alle Dokumente älter als # Tage</target>
</trans-unit>
<trans-unit id="field_host" xml:space="preserve" approved="yes">
<source>Solr Host</source>
<target>Solr host</target>
Expand Down
Loading