Skip to content

Commit

Permalink
Update dependencies and update code accordingly. Fix configuration.
Browse files Browse the repository at this point in the history
  • Loading branch information
KonradHoeffner committed Apr 19, 2022
1 parent 5c49ed7 commit ccff7ba
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 32 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ The extracted classes and their relations as an RDF Turtle file.

# Windows Release

Download the latest portable Windows release [here](https://github.com/IMISE/snik-tag/releases/download/0.2.1/sniktag.zip).
Download the portable Windows release version 0.3.1 [here](https://github.com/IMISE/snik-tag/releases/download/0.3.1/sniktag.zip).

# Development

## Requirements
* Java 16 or higher
* Maven 3

## Build
## Run
* Run `mvn compile`
* Run `mvn javafx:run`

Expand Down
40 changes: 25 additions & 15 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,26 @@
<modelVersion>4.0.0</modelVersion>
<groupId>snik-tag</groupId>
<artifactId>snik-tag</artifactId>
<version>0.3.0-SNAPSHOT</version>
<version>22.05</version>
<properties>
<maven.compiler.release>16</maven.compiler.release>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<version.javafx>15</version.javafx>
<version.javafx>18</version.javafx>
</properties>
<build>
<sourceDirectory>src/main/eu.snik.tag</sourceDirectory>
<sourceDirectory>src/main/java</sourceDirectory>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.0</version>
<version>3.10.1</version>
<configuration>
<release>${maven.compiler.release}</release>
</configuration>
</plugin>
<plugin>
<groupId>org.openjfx</groupId>
<artifactId>javafx-maven-plugin</artifactId>
<version>0.0.4</version>
<version>0.0.8</version>
<configuration>
<mainClass>eu.snik.tag.gui.Main</mainClass>
</configuration>
Expand Down Expand Up @@ -83,17 +83,17 @@
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>2.0.0-alpha2</version>
<version>2.0.0-alpha7</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>2.0.0-alpha2</version>
<version>2.0.0-alpha7</version>
</dependency>
<dependency>
<groupId>org.docx4j</groupId>
<artifactId>docx4j-JAXB-ReferenceImpl</artifactId>
<version>11.2.9</version>
<version>11.4.6</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
Expand All @@ -119,7 +119,7 @@
<dependency>
<groupId>org.apache.jena</groupId>
<artifactId>jena-core</artifactId>
<version>4.1.0</version>
<version>4.4.0</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
Expand Down Expand Up @@ -154,7 +154,7 @@
<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
<version>20210307</version>
<version>20220320</version>
</dependency>
<dependency>
<groupId>org.hamcrest</groupId>
Expand All @@ -165,18 +165,18 @@
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId>
<version>5.7.2</version>
<version>5.8.2</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.controlsfx</groupId>
<artifactId>controlsfx</artifactId>
<version>11.1.0</version>
<version>11.1.1</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-server</artifactId>
<version>11.0.5</version>
<version>11.0.9</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
Expand All @@ -187,8 +187,18 @@
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.13.1</version>
<version>1.14.3</version>
</dependency>
<dependency>
<groupId>javax.xml.bind</groupId>
<artifactId>jaxb-api</artifactId>
<version>2.3.1</version>
</dependency>
<dependency>
<groupId>org.glassfish.jaxb</groupId>
<artifactId>jaxb-runtime</artifactId>
<version>3.0.2</version>
<scope>runtime</scope>
</dependency>
</dependencies>
</project>

30 changes: 16 additions & 14 deletions src/main/java/eu/snik/tag/DocxLoader.java
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
package eu.snik.tag;

import jakarta.xml.bind.JAXBElement;
import jakarta.xml.bind.JAXBException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import javax.xml.bind.JAXBElement;
import javax.xml.bind.JAXBException;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.WordUtils;
import org.docx4j.Docx4J;
Expand Down Expand Up @@ -84,44 +84,46 @@ public String getText() {
}
}

record TagClass (String tag, String description, Subtop subtop) {};
record TagClass(String tag, String description, Subtop subtop) {}

/** @return all classes extracted from the tagged parts of the DOCX document*/
@Override
public Collection<Clazz> getClasses() {
try {
var wordMLPackage = Docx4J.load(in());
var doc = wordMLPackage.getMainDocumentPart();
//List<Comment> comments = doc.getCommentsPart().getContents().getComment();

TagClass[] tagClasses = { new TagClass( "w:i", "Entity Type", Subtop.EntityType ), new TagClass( "w:b", "Role", Subtop.Role ),new TagClass( "w:u", "Function", Subtop.Function ) };

TagClass[] tagClasses = {
new TagClass("w:i", "Entity Type", Subtop.EntityType),
new TagClass("w:b", "Role", Subtop.Role),
new TagClass("w:u", "Function", Subtop.Function),
};

var classes = new LinkedHashSet<Clazz>();
var processedRuns = new HashSet<R>();
var processedLabels = new HashSet<String>();
var warnings = new HashSet<String>(); // prevent the same warning from showing multiple times

for (var tc : tagClasses) {
String xpath = "//w:r[w:rPr/" + tc.tag + "[not(@w:val='false')]]";
var runs = (List<R>) (List<?>) doc.getJAXBNodesViaXPath(xpath, false);
String xpath = "//w:r[w:rPr/" + tc.tag + "[not(@w:val='false')]]";
var runs = (List<R>) (List<?>) doc.getJAXBNodesViaXPath(xpath, false);
runs.removeAll(processedRuns); // we cannot handle overlapping tags right now



for (R run : runs) {
String text = TextUtils.getText(run);
String label = StringUtils.strip(text, "., ");
if(label.length()>80) {continue;} // too long texts seems to be erroneously detected
if (label.length() > 80) {
continue;
} // too long texts seems to be erroneously detected
//label = label.replaceAll("[^A-Za-z0-9 ]", ""); // removing non-alphanumerical characters leads to missing matches in the text tab
String filterLabel = label.replaceAll("[^A-Za-z0-9 ]", "").replaceAll("(the)|(and)|(or)", "");
if (filterLabel.length() < 4 && !filterLabel.matches("[A-Z]{3}")) {
} // abbreviations with 3 letters are OK
if (filterLabel.length() < 4 && !filterLabel.matches("[A-Z]{3}")) {} // abbreviations with 3 letters are OK
if (filterLabel.length() < 3) {
continue;
} // abbreviations
processedRuns.add(run);


/*
Comment comment = factory.createCommentsComment();
comments.add(comment);
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
exports eu.snik.tag ;

requires java.desktop;
requires java.xml.bind;
requires jakarta.xml.bind;
requires javafx.base;
requires javafx.controls;
requires javafx.graphics;
Expand Down

0 comments on commit ccff7ba

Please sign in to comment.