-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathvector_field_type.xml
46 lines (35 loc) · 2.56 KB
/
vector_field_type.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
<!-- Example vector field type. To use the VectorQParser, you field name needs to contain the text 'vector' somewhere in the name -->
<fieldType name="w2v_title_vectors" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
<analyzer type="query">
<charFilter class="solr.HTMLStripCharFilterFactory" />
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="[;:\'\"\*/\),\(\-\|]" replacement=" "/>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.TrimFilterFactory" />
<filter class="solr.SynonymFilterFactory" synonyms="jobs_titles.txt" ignoreCase="true" tokenizerFactory="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.TypeTokenFilterFactory" types="synonym_types.txt" useWhitelist="true"/>
<!-- expand to vectors -->
<filter class="solr.SynonymFilterFactory" synonyms="title_vectors.txt" ignoreCase="true" expand="true" />
<!-- extract payloads -->
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float" delimiter="|"/>
<!-- collapse to mean vectors -->
<filter class="org.dice.solrenhancements.tokenfilters.MeanPayloadTokenFilterFactory"/>
<filter class="org.dice.solrenhancements.tokenfilters.PayloadQueryBoostTokenFilterFactory"/>
</analyzer>
<analyzer type="index">
<charFilter class="solr.HTMLStripCharFilterFactory" />
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="[;:\'\"\*/\),\(\-\|]" replacement=" "/>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.TrimFilterFactory" />
<filter class="solr.SynonymFilterFactory" synonyms="jobs_titles.txt" ignoreCase="true" tokenizerFactory="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.TypeTokenFilterFactory" types="synonym_types.txt" useWhitelist="true"/>
<!-- expand to vectors -->
<filter class="solr.SynonymFilterFactory" synonyms="title_vectors.txt" ignoreCase="true" expand="true" />
<!-- extract payloads -->
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float" delimiter="|"/>
<!-- collapse to mean vectors -->
<filter class="org.dice.solrenhancements.tokenfilters.MeanPayloadTokenFilterFactory"/>
</analyzer>
<!-- Custom similarity class that recognizes payloads and ignores tf and idf for this field -->
<!-- Define <similarity class="solr.SchemaSimilarityFactory"/> as the similarity class in your schema.xml to configure per field type similarity -->
<similarity class="org.dice.solrenhancements.similarity.PayloadOnlySimilarity"/>
</fieldType>