Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
janvonde committed Dec 21, 2024
2 parents 90acf78 + 3e9dafd commit 30e670a
Show file tree
Hide file tree
Showing 14 changed files with 182 additions and 123 deletions.
12 changes: 6 additions & 6 deletions goobi-viewer-indexer/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>io.goobi.viewer</groupId>
<artifactId>viewer-indexer</artifactId>
<version>24.11</version>
<version>24.12-SNAPSHOT</version>


<name>Goobi viewer - Indexer</name>
Expand Down Expand Up @@ -51,20 +51,20 @@
<commons-io.version>2.18.0</commons-io.version>
<commons-jxpath.version>1.3</commons-jxpath.version>
<commons-lang3.version>3.17.0</commons-lang3.version>
<commons-text.version>1.12.0</commons-text.version>
<commons-text.version>1.13.0</commons-text.version>
<httpclient.version>4.5.14</httpclient.version>
<httpcore.version>4.4.16</httpcore.version>
<icu.version>76.1</icu.version>
<imageio-openjpeg.version>0.6.8</imageio-openjpeg.version>
<log4j.version>2.24.2</log4j.version>
<log4j.version>2.24.3</log4j.version>
<jackson.version>2.18.2</jackson.version>
<jaxen.version>2.0.0</jaxen.version>
<jai.version>1.4.0</jai.version>
<jakarta.mail-api.version>2.1.3</jakarta.mail-api.version>
<jdom2.version>2.0.6.1</jdom2.version>
<jsoup.version>1.18.3</jsoup.version>
<json.version>20240303</json.version>
<junit.version>5.11.3</junit.version>
<junit.version>5.11.4</junit.version>
<metadata-extractor.version>2.19.0</metadata-extractor.version>
<solr.version>9.7.0</solr.version>
<sf-geojson.version>3.3.3</sf-geojson.version>
Expand Down Expand Up @@ -319,7 +319,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>3.8.0</version>
<version>3.8.1</version>
<executions>
<execution>
<id>analyze</id>
Expand Down Expand Up @@ -480,7 +480,7 @@
<dependency>
<groupId>com.puppycrawl.tools</groupId>
<artifactId>checkstyle</artifactId>
<version>10.20.2</version>
<version>10.21.0</version>
</dependency>
</dependencies>
<executions>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,8 @@
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang3.StringUtils;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -311,8 +311,10 @@ public String[] index(Path dcFile, Map<String, Path> dataFolders, final ISolrWri
getNextIddoc(SolrIndexerDaemon.getInstance().getSearchIndex()));
if (doc != null) {
useWriteStrategy.addDoc(doc);
logger.debug("Created group document for {}: {}", groupIdField, indexObj.getGroupIds().get(groupIdField));
} else {
if (logger.isDebugEnabled()) {
logger.debug("Created group document for {}: {}", groupIdField, indexObj.getGroupIds().get(groupIdField));
}
} else if (logger.isDebugEnabled()) {
logger.debug("Group document already exists for {}: {}", groupIdField, indexObj.getGroupIds().get(groupIdField));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -429,20 +429,18 @@ public IndexObject indexChild(Element node, IndexObject parentIndexObject, int d
indexObj.writeAccessConditions(parentIndexObject);

// Generate thumbnail info and page docs for this docstruct. PI_TOPSTRUCT must be set at this point!
if (StringUtils.isNotEmpty(indexObj.getLogId())) {
if (StringUtils.isNotEmpty(indexObj.getLogId()) && indexObj.getNumPages() > 0) {
// Write number of pages and first/last page labels for this docstruct
if (indexObj.getNumPages() > 0) {
indexObj.addToLucene(SolrConstants.NUMPAGES, String.valueOf(indexObj.getNumPages()));
if (indexObj.getFirstPageLabel() != null) {
indexObj.addToLucene(SolrConstants.ORDERLABELFIRST, indexObj.getFirstPageLabel());
}
if (indexObj.getLastPageLabel() != null) {
indexObj.addToLucene(SolrConstants.ORDERLABELLAST, indexObj.getLastPageLabel());
}
if (indexObj.getFirstPageLabel() != null && indexObj.getLastPageLabel() != null) {
indexObj.addToLucene("MD_ORDERLABELRANGE",
new StringBuilder(indexObj.getFirstPageLabel()).append(" - ").append(indexObj.getLastPageLabel()).toString());
}
indexObj.addToLucene(SolrConstants.NUMPAGES, String.valueOf(indexObj.getNumPages()));
if (indexObj.getFirstPageLabel() != null) {
indexObj.addToLucene(SolrConstants.ORDERLABELFIRST, indexObj.getFirstPageLabel());
}
if (indexObj.getLastPageLabel() != null) {
indexObj.addToLucene(SolrConstants.ORDERLABELLAST, indexObj.getLastPageLabel());
}
if (indexObj.getFirstPageLabel() != null && indexObj.getLastPageLabel() != null) {
indexObj.addToLucene("MD_ORDERLABELRANGE",
new StringBuilder(indexObj.getFirstPageLabel()).append(" - ").append(indexObj.getLastPageLabel()).toString());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1444,8 +1444,10 @@ int addGroupedMetadataDocs(GroupedMetadata gmd, ISolrWriteStrategy writeStrategy
}

// Add access conditions
for (String s : indexObj.getAccessConditions()) {
doc.addField(SolrConstants.ACCESSCONDITION, s);
if (!doc.containsKey(SolrConstants.ACCESSCONDITION)) {
for (String s : indexObj.getAccessConditions()) {
doc.addField(SolrConstants.ACCESSCONDITION, s);
}
}

// Add DC values to metadata doc
Expand Down Expand Up @@ -2080,7 +2082,6 @@ protected void addFullTextToPageDoc(SolrInputDocument doc, Map<String, Path> dat
logger.warn("Could not read ALTO file '{}': {}", altoFile.getName(), e.getMessage());
}
}
// logger.info("regular alto " + altoFile.getAbsolutePath() + " written: " + altoWritten);
}

// If FULLTEXT is still empty, look for a plain full-text
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -518,8 +518,10 @@ public String[] index(Path metsFile, Map<String, Path> dataFolders, final ISolrW
getNextIddoc(SolrIndexerDaemon.getInstance().getSearchIndex()));
if (doc != null) {
writeStrategy.addDoc(doc);
logger.debug("Created group document for {}: {}", groupIdField, indexObj.getGroupIds().get(groupIdField));
} else {
if (logger.isDebugEnabled()) {
logger.debug("Created group document for {}: {}", groupIdField, indexObj.getGroupIds().get(groupIdField));
}
} else if (logger.isDebugEnabled()) {
logger.debug("Group document already exists for {}: {}", groupIdField, indexObj.getGroupIds().get(groupIdField));
}
}
Expand Down Expand Up @@ -1309,7 +1311,7 @@ PhysicalElement generatePageDocument(Element eleStructMapPhysical, String iddoc,
}
logger.debug("fileId: {}", fileId);

// If fileId is not null, use an XPath expression for the appropriate file element;
// If fileId is not null, use an XPath expression for the appropriate file element,
// otherwise get all file elements and get the one with the index of the page order
String fileIdXPathCondition = "";
if (fileId != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ public String[] index(Path mainFile, Map<String, Path> dataFolders, final ISolrW
indexObj.addToLucene(SolrConstants.FULLTEXTAVAILABLE, String.valueOf(recordHasFulltext));

// Add THUMBNAIL,THUMBPAGENO,THUMBPAGENOLABEL (must be done AFTER writeDateMondified(),
// writeAccessConditions() and generatePageDocuments()!)
// writeAccessConditions() and generatePageDocuments()!
generateChildDocstructDocuments(indexObj, useWriteStrategy, dataFolders, workDepth);

// ISWORK only for non-anchors
Expand All @@ -351,7 +351,6 @@ public String[] index(Path mainFile, Map<String, Path> dataFolders, final ISolrW
// Add DEFAULT field
if (StringUtils.isNotEmpty(indexObj.getDefaultValue())) {
indexObj.addToLucene(SolrConstants.DEFAULT, cleanUpDefaultField(indexObj.getDefaultValue()));
// indexObj.getSuperDefaultBuilder().append(' ').append(indexObj.getDefaultValue().trim());
indexObj.setDefaultValue("");
}

Expand Down Expand Up @@ -395,8 +394,10 @@ public String[] index(Path mainFile, Map<String, Path> dataFolders, final ISolrW
getNextIddoc(SolrIndexerDaemon.getInstance().getSearchIndex()));
if (doc != null) {
useWriteStrategy.addDoc(doc);
logger.debug("Created group document for {}: {}", groupIdField, indexObj.getGroupIds().get(groupIdField));
} else {
if (logger.isDebugEnabled()) {
logger.debug("Created group document for {}: {}", groupIdField, indexObj.getGroupIds().get(groupIdField));
}
} else if (logger.isDebugEnabled()) {
logger.debug("Group document already exists for {}: {}", groupIdField, indexObj.getGroupIds().get(groupIdField));
}
}
Expand Down Expand Up @@ -560,14 +561,14 @@ private void generateChildDocstructDocuments(IndexObject rootIndexObj, ISolrWrit
sbDefaultValue.append(currentIndexObj.getDefaultValue());
String labelWithSpaces = new StringBuilder(" ").append(currentIndexObj.getLabel()).append(' ').toString();
if (StringUtils.isNotEmpty(currentIndexObj.getLabel()) && !sbDefaultValue.toString().contains(labelWithSpaces)) {
// logger.info("Adding own LABEL to DEFAULT: " + indexObj.getLabel());
// logger.info("Adding own LABEL to DEFAULT: {}", indexObj.getLabel()); //NOSONAR Debug
sbDefaultValue.append(labelWithSpaces);
}
if (SolrIndexerDaemon.getInstance().getConfiguration().isAddLabelToChildren()) {
for (String label : currentIndexObj.getParentLabels()) {
String parentLabelWithSpaces = new StringBuilder(" ").append(label).append(' ').toString();
if (StringUtils.isNotEmpty(label) && !sbDefaultValue.toString().contains(parentLabelWithSpaces)) {
// logger.info("Adding ancestor LABEL to DEFAULT: " + label);
// logger.info("Adding ancestor LABEL to DEFAULT: {}", label); //NOSONAR Debug
sbDefaultValue.append(parentLabelWithSpaces);
}
}
Expand All @@ -579,7 +580,6 @@ private void generateChildDocstructDocuments(IndexObject rootIndexObj, ISolrWrit
if (StringUtils.isNotEmpty(currentIndexObj.getDefaultValue())) {
currentIndexObj.addToLucene(SolrConstants.DEFAULT, cleanUpDefaultField(currentIndexObj.getDefaultValue()));
// Add default value to parent doc
// parentIndexObject.getSuperDefaultBuilder().append(' ').append(indexObj.getDefaultValue().trim());
currentIndexObj.setDefaultValue("");
}
}
Expand Down Expand Up @@ -815,9 +815,6 @@ PhysicalElement generatePageDocument(Element eleImage, String iddoc, String pi,
}

parseMimeType(ret.getDoc(), fileName);
} else {
// TODO placeholder
String placeholder = eleImage.getChildText("placeholder");
}

// FIELD_IMAGEAVAILABLE indicates whether this page has an image
Expand Down Expand Up @@ -847,9 +844,7 @@ PhysicalElement generatePageDocument(Element eleImage, String iddoc, String pi,
ret.getDoc().addField(SolrConstants.DOCSTRCT, "OtherDocStrct"); // TODO
}

if (dataFolders != null) {
addFullTextToPageDoc(ret.getDoc(), dataFolders, dataRepository, pi, useOrder, null);
}
addFullTextToPageDoc(ret.getDoc(), dataFolders, dataRepository, pi, useOrder, null);

return ret;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -474,10 +474,6 @@ public boolean scan() throws FatalIndexerException {
logger.debug("Queue full ({})", getHotfolderPath().getFileName());
}
}
// else {
// logger.info("Found file '{}' which is not in the re-index queue. This file will be deleted.", recordFile.getFileName());
// Files.delete(recordFile);
// }
}
} catch (IOException e) {
logger.error(e.getMessage(), e);
Expand Down
Loading

0 comments on commit 30e670a

Please sign in to comment.