Skip to content

Commit

Permalink
PyLucene >=9.12 required.
Browse files Browse the repository at this point in the history
PyLucene 10 compatibility.
  • Loading branch information
coady committed Oct 18, 2024
1 parent 2c50c3b commit 2a3ee77
Show file tree
Hide file tree
Showing 9 changed files with 19 additions and 33 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).

## Unreleased
* PyLucene >=9.12 required

## [3.2](https://pypi.org/project/lupyne/3.2/) - 2024-07-07
### Changed
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ PyLucene is not `pip` installable.
* [Homebrew](https://brew.sh) formula: `brew install coady/tap/pylucene`

## Dependencies
* PyLucene >=9.6
* PyLucene >=9.12
* strawberry-graphql (if graphql option)
* fastapi (if rest option)

Expand Down
2 changes: 1 addition & 1 deletion lupyne/engine/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@
from .indexers import IndexSearcher, MultiSearcher, IndexWriter, Indexer # noqa

version = tuple(map(int, lucene.VERSION.split('.')))
assert version >= (9, 6), version
assert version >= (9, 12), version
7 changes: 1 addition & 6 deletions lupyne/engine/analyzers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from org.apache.lucene import analysis, queryparser, search, util
from org.apache.lucene.search import uhighlight
from org.apache.pylucene.analysis import PythonAnalyzer, PythonTokenFilter
from org.apache.pylucene.queryparser.classic import PythonQueryParser


class TokenStream(analysis.TokenStream):
Expand Down Expand Up @@ -155,11 +154,7 @@ def parse(self, query: str, field='', op='', parser=None, **attrs) -> search.Que
setattr(parser, name, value)
if isinstance(parser, queryparser.classic.MultiFieldQueryParser):
return parser.parse(parser, query)
try:
return parser.parse(query)
finally:
if isinstance(parser, PythonQueryParser):
parser.finalize()
return parser.parse(query)

def highlight(self, query: search.Query, field: str, content: str, count: int = 1) -> str:
"""Return highlighted content.
Expand Down
10 changes: 7 additions & 3 deletions lupyne/engine/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ def __init__(self, searcher, scoredocs: Sequence, count=0, fields=None):
self.searcher, self.scoredocs = searcher, scoredocs
if hasattr(count, 'relation'):
cls = int if count.relation == search.TotalHits.Relation.EQUAL_TO else float
count = cls(count.value)
count = cls(count.value() if lucene.VERSION.startswith('10.') else count.value)
self.count, self.fields = count, fields

def select(self, *fields: str):
Expand Down Expand Up @@ -450,8 +450,12 @@ def __len__(self):
def __getitem__(self, index):
hits = groupdocs = self.groupdocs[index]
if isinstance(groupdocs, grouping.GroupDocs):
hits = Hits(self.searcher, groupdocs.scoreDocs, groupdocs.totalHits)
hits.value = convert(groupdocs.groupValue)
if lucene.VERSION.startswith('10.'): # pragma: no cover
hits = Hits(self.searcher, groupdocs.scoreDocs(), groupdocs.totalHits())
hits.value = convert(groupdocs.groupValue())
else:
hits = Hits(self.searcher, groupdocs.scoreDocs, groupdocs.totalHits)
hits.value = convert(groupdocs.groupValue)
hits.fields = self.fields
return hits

Expand Down
15 changes: 4 additions & 11 deletions lupyne/engine/indexers.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ def positions(self, name: str, value, payloads=False, offsets=False) -> Iterator
yield doc, list(positions)

def vector(self, id, field):
terms = self.getTermVector(id, field)
terms = self.termVectors().get(id, field)
termsenum = terms.iterator() if terms else index.TermsEnum.EMPTY
terms = map(operator.methodcaller('utf8ToString'), util.BytesRefIterator.cast_(termsenum))
return termsenum, terms
Expand Down Expand Up @@ -402,12 +402,12 @@ def collector(self, count=None, sort=None, reverse=False, scores=False, mincount
count = min(count, self.maxDoc() or 1)
mincount = max(count, mincount)
if sort is None:
return search.TopScoreDocCollector.create(count, mincount)
return search.TopScoreDocCollectorManager(count, mincount).newCollector()
if isinstance(sort, str):
sort = self.sortfield(sort, reverse=reverse)
if not isinstance(sort, search.Sort):
sort = search.Sort(sort)
return search.TopFieldCollector.create(sort, count, mincount)
return search.TopFieldCollectorManager(sort, count, mincount).newCollector()

def search(
self,
Expand All @@ -417,7 +417,6 @@ def search(
reverse=False,
scores=False,
mincount=1000,
timeout=None,
**parser,
) -> Hits:
"""Run query and return [Hits][lupyne.engine.documents.Hits].
Expand All @@ -432,17 +431,11 @@ def search(
reverse: reverse flag used with sort
scores: compute scores for candidate results when sorting
mincount: total hit count accuracy threshold
timeout: stop search after elapsed number of seconds
**parser: [parse][lupyne.engine.analyzers.Analyzer.parse]` options
"""
query = Query.alldocs() if query is None else self.parse(query, **parser)
results = cache = collector = self.collector(count, sort, reverse, scores, mincount)
counter = search.TimeLimitingCollector.getGlobalCounter()
if timeout is not None:
results = search.TimeLimitingCollector(collector, counter, int(timeout * 1000))
with suppress(search.TimeLimitingCollector.TimeExceededException):
super().search(query, results)
timeout = None
super().search(query, results)
if isinstance(cache, search.CachingCollector):
collector = search.TotalHitCountCollector()
cache.replay(collector)
Expand Down
6 changes: 3 additions & 3 deletions lupyne/engine/queries.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from collections.abc import Callable, Iterable, Iterator
from collections.abc import Callable, Iterable
import lucene # noqa
from java.lang import Double, Integer, Long
from java.util import Arrays
Expand Down Expand Up @@ -27,7 +27,7 @@ def term(cls, name: str, value) -> 'Query':
@classmethod
def terms(cls, name: str, values) -> 'Query':
"""Return lucene TermInSetQuery, optimizing a SHOULD BooleanQuery of many terms."""
return cls(search.TermInSetQuery, name, list(map(util.BytesRef, values)))
return cls(search.TermInSetQuery, name, Arrays.asList(list(map(util.BytesRef, values))))

@classmethod
def boolean(cls, occur, *queries, **terms):
Expand Down Expand Up @@ -274,8 +274,8 @@ def __getitem__(self, id: int):

class SortedSet(Sorted):
def __getitem__(self, id: int):
ords: Iterator = iter(self.docvalues.nextOrd, self.docvalues.NO_MORE_ORDS)
if self.docvalues.advanceExact(id):
ords = (self.docvalues.nextOrd() for _ in range(self.docvalues.docValueCount()))
return tuple(self.type(self.docvalues.lookupOrd(ord)) for ord in ords)


Expand Down
2 changes: 1 addition & 1 deletion lupyne/engine/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def suppress(exception):
"""Suppress specific lucene exception."""
try:
yield
except lucene.JavaError as exc:
except lucene.JavaError as exc: # pragma: no cover
if not exception.instance_(exc.getJavaException()):
raise

Expand Down
7 changes: 0 additions & 7 deletions tests/test_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,10 +398,6 @@ def test_grouping(tempdir, indexer, zipcodes):
assert all(grouping.search(indexer.indexSearcher, Q.alldocs()).facets.values())
assert len(grouping) == len(list(grouping)) > 100
assert set(grouping) > set(facets)
hits = indexer.search(query, timeout=-1)
assert not hits and not hits.count and math.isnan(hits.maxscore)
hits = indexer.search(query, timeout=10)
assert len(hits) == hits.count == indexer.count(query) and hits.maxscore == 1.0
directory = store.ByteBuffersDirectory()
query = Q.term('state', 'CA')
size = indexer.copy(directory, query)
Expand Down Expand Up @@ -465,9 +461,6 @@ def test_fields(indexer, constitution):
engine.Field('', stored='invalid')
with pytest.raises(AttributeError):
engine.Field('', invalid=None)
with pytest.raises(lucene.JavaError):
with engine.utils.suppress(search.TimeLimitingCollector.TimeExceededException):
document.Field('name', 'value', document.FieldType())
assert str(engine.Field.String('')) == str(
document.StringField('', '', document.Field.Store.NO).fieldType()
)
Expand Down

0 comments on commit 2a3ee77

Please sign in to comment.