Skip to content

Commit

Permalink
Add wildcards to each search token (#48)
Browse files Browse the repository at this point in the history
* Migrate phpunit config file

* Remove unused import in test file

* Add option to add wildcards to all tokens

resolves #47

* Refactor method name to match config

* Move new `SearchConfigutation` settings to constructor

* Prevent double % wildcard
  • Loading branch information
marijoo authored Jan 31, 2024
1 parent 4c2fe51 commit fba91e2
Show file tree
Hide file tree
Showing 7 changed files with 150 additions and 10 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ The options are described thoroughly in the file itself. By default, the package
and the [`PorterStemmer`](src/Stemmer/PorterStemmer.php) which is suitable for the English language. The search adds a trailing wildcard to the
last token and not all search terms need to be found in order for a document to show up in the results (there must be at least one match though).

You may also add a wildcard to each search token by enabling `wildcard_all_tokens` in the config file altough this is not recommended for performance reasons.

_A basic installation most likely does not require you to change any of these settings. Just to make sure, you should have a look at the
`connection` option though. If you want to change this, do so before running the migrations or the tables will be created using the wrong
database connection._
Expand Down
36 changes: 36 additions & 0 deletions config/scout-database.php
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,42 @@

'wildcard_last_token' => true,

/*
|--------------------------------------------------------------------------
| Use Wildcard for all Search Tokens
|--------------------------------------------------------------------------
|
| This setting controls whether all tokens of a search query shoudld be
| handled using a wildcard instead of an exact match. This basically
| means that for a search input of "hell wor", the query will match
| documents containing "hell%" or "wor%" where % is the SQL wildcard of
| a "like" condition. The wildcard will only be applied for
| search tokens that have a minimum length of `wilcard_min_length`.
|
| Setting this to `true` will add a wildcard to the end of each search
| token. You may also set this to "both" to add wildcards to the beginning
| and the end of each search token, for example: "%hell%" and "%wor%.
|
| Note: Please not that changing this setting may negatively impact the
| performance of search queries. Also you might want to make sure that
| `require_match_for_all_tokens` is set to `false` when using this.
|
*/

'wildcard_all_tokens' => false,

/*
|--------------------------------------------------------------------------
| Minimum Token Length to apply Wildcards
|--------------------------------------------------------------------------
|
| If `wildcard_all_tokens` is enabled this setting defines the minimum
| length search tokens must have before wildcards are applied.
|
*/

'wildcard_min_length' => 3,

/*
|--------------------------------------------------------------------------
| Require a Match for all Tokens
Expand Down
6 changes: 3 additions & 3 deletions phpunit.xml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<phpunit
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation="https://schema.phpunit.de/10.0/phpunit.xsd"
xsi:noNamespaceSchemaLocation="https://schema.phpunit.de/10.5/phpunit.xsd"
backupGlobals="false"
backupStaticProperties="false"
cacheDirectory=".phpunit.cache"
Expand Down Expand Up @@ -33,12 +33,12 @@
<directory suffix="Test.php">tests</directory>
</testsuite>
</testsuites>
<coverage>
<source>
<include>
<directory suffix=".php">src</directory>
</include>
<exclude>
<directory suffix=".php">src/Stemmer</directory>
</exclude>
</coverage>
</source>
</phpunit>
33 changes: 29 additions & 4 deletions src/DatabaseSeeker.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
use Namoshek\Scout\Database\Contracts\Stemmer;
use Namoshek\Scout\Database\Contracts\Tokenizer;
use Namoshek\Scout\Database\Support\DatabaseHelper;
use Illuminate\Support\Str;

/**
* The database seeker searches the database for collection items of a specific model,
Expand Down Expand Up @@ -75,10 +76,7 @@ public function search(Builder $builder, int $page = 1, int $pageSize = null): S
*/
private function performSearch(Builder $builder, array $keywords, int $page, ?int $limit): SearchResult
{
// Add a wildcard to the last search token if it is configured.
if ($this->searchConfiguration->lastTokenShouldUseWildcard()) {
$keywords[count($keywords) - 1] .= '%';
}
$keywords = $this->addWildcards($keywords);

// First, we retrieve the paginated results.
$results = $this->createSearchQuery($builder, $keywords)
Expand All @@ -104,6 +102,33 @@ private function performSearch(Builder $builder, array $keywords, int $page, ?in
return new SearchResult($builder, $results, $totalHits);
}

/**
* Add wildcards to the given keywords if configured.
*
* @param string[] $keywords
*/
private function addWildcards(array $keywords): array
{
// Add a wildcard to each search token if it is configured.
if ($this->searchConfiguration->allTokensShouldUseWildcard()) {
$pattern = $this->searchConfiguration->allTokensShouldUseWildcard() === 'both' ? '%%%s%%' : '%s%%';

$keywords = array_map(
fn ($token) => mb_strlen($token) >= $this->searchConfiguration->minimumLengthForWildcard()
? sprintf($pattern, $token)
: $token,
$keywords
);
}

// Add a wildcard to the last search token if it is configured.
if ($this->searchConfiguration->lastTokenShouldUseWildcard()) {
$keywords[count($keywords) - 1] = Str::finish($keywords[count($keywords) - 1], '%');
}

return $keywords;
}

/**
* Creates a new search query using the given builder. The query can be used to retrieve paginated results
* and also to count the total number of potential hits.
Expand Down
4 changes: 3 additions & 1 deletion src/ScoutDatabaseServiceProvider.php
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,9 @@ public function register(): void
$config->get('scout-database.search.term_frequency_weight', 1),
$config->get('scout-database.search.term_deviation_weight', 1),
$config->get('scout-database.search.wildcard_last_token', true),
$config->get('scout-database.search.require_match_for_all_tokens', false)
$config->get('scout-database.search.require_match_for_all_tokens', false),
$config->get('scout-database.search.wildcard_all_tokens', false),
$config->get('scout-database.search.wildcard_min_length', 3),
);
});
}
Expand Down
20 changes: 19 additions & 1 deletion src/SearchConfiguration.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ public function __construct(
private float $termFrequencyWeight,
private float $termDeviationWeight,
private bool $wildcardLastToken,
private bool $requireMatchForAllTokens
private bool $requireMatchForAllTokens,
private bool|string $wildcardAllTokens = false,
private int $wildcardMinLength = 3,
)
{
}
Expand Down Expand Up @@ -56,6 +58,22 @@ public function lastTokenShouldUseWildcard(): bool
return $this->wildcardLastToken;
}

/**
* Returns whether all tokens of a search query shall use a wildcard.
*/
public function allTokensShouldUseWildcard(): string|bool
{
return $this->wildcardAllTokens;
}

/**
* Returns th minimum token length for using wildcards.
*/
public function minimumLengthForWildcard(): int
{
return $this->wildcardMinLength;
}

/**
* Returns whether search shall only return documents containing all searched tokens.
*/
Expand Down
59 changes: 58 additions & 1 deletion tests/ScopedDatabaseSeekerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
use Illuminate\Contracts\Config\Repository as ConfigRepository;
use Illuminate\Database\ConnectionInterface;
use Illuminate\Foundation\Testing\DatabaseMigrations;
use Namoshek\Scout\Database\DatabaseSeeker;
use Namoshek\Scout\Database\SearchResult;
use Namoshek\Scout\Database\Tests\Stubs\User;

Expand Down Expand Up @@ -285,4 +284,62 @@ public function test_builder_returned_by_raw_results_is_the_one_used_for_searchi

$this->assertEquals($builder, $result->getBuilder());
}

public function test_does_not_find_documents_if_wildcard_all_tokens_is_disabled_and_no_exact_match_is_given(): void
{
$result = User::search('eur ent')->where('tenant_id', self::TENANT_ID_1)->keys();

$this->assertEmpty($result);
}

public function test_finds_documents_if_wildcard_all_tokens_is_enabled_and_no_exact_match_is_given(): void
{
$this->app->make('config')->set('scout-database.search.wildcard_all_tokens', true);

$result = User::search('eur ent')->where('tenant_id', self::TENANT_ID_1)->keys();

$this->assertEquals([6, 7], $result->toArray());
}

public function test_finds_documents_if_wildcard_all_tokens_is_set_to_both(): void
{
$this->app->make('config')->set('scout-database.search.wildcard_all_tokens', 'both');

$result = User::search('eur ent')->where('tenant_id', self::TENANT_ID_1)->keys();

$this->assertEquals([8, 6, 7], $result->toArray());
}

public function test_does_not_find_documents_by_wildcard_if_minimum_token_length_is_not_reached(): void
{
$this->app->make('config')->set('scout-database.search.wildcard_all_tokens', 'both');
$this->app->make('config')->set('scout-database.search.wildcard_min_length', 4);

$result = User::search('eur ent')->where('tenant_id', self::TENANT_ID_1)->keys();

$this->assertEmpty($result);
}

public function test_finds_documents_by_wildcard_if_minimum_token_length_is_reached(): void
{
$this->app->make('config')->set('scout-database.search.wildcard_all_tokens', 'both');
$this->app->make('config')->set('scout-database.search.wildcard_min_length', 4);

$result = User::search('ello abc xamp')->where('tenant_id', self::TENANT_ID_1)->keys();
$this->assertEquals([2, 11, 1, 10], $result->toArray());

$result = User::search('ello abc xamp')->where('tenant_id', self::TENANT_ID_2)->keys();
$this->assertEquals([3], $result->toArray());
}

public function test_adds_wildcard_to_last_token_even_if_minimum_length_is_not_reached(): void
{
$this->app->make('config')->set('scout-database.search.wildcard_all_tokens', 'both');
$this->app->make('config')->set('scout-database.search.wildcard_min_length', 7);
$this->app->make('config')->set('scout-database.search.wildcard_last_token', true);

$result = User::search('ello examp')->where('tenant_id', self::TENANT_ID_2)->keys();

$this->assertEquals([3], $result->toArray());
}
}

0 comments on commit fba91e2

Please sign in to comment.