Skip to content

Commit

Permalink
Merge pull request #164 from kermitt2/grobid-0.8.1
Browse files Browse the repository at this point in the history
Update github actions and JDK 11
  • Loading branch information
lfoppiano authored Jan 6, 2025
2 parents 2a849b9 + defffe3 commit ac8bd03
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 16 deletions.
68 changes: 68 additions & 0 deletions .github/workflows/ci-build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
name: Build unstable

on: [push, workflow_dispatch]

concurrency:
group: gradle

jobs:
build:
runs-on: ubuntu-latest

steps:
- name: Checkout grobid home
uses: actions/checkout@v4
with:
repository: kermitt2/grobid
ref: 0.8.1
path: ./grobid
- name: Checkout grobid-ner
uses: actions/checkout@v4
with:
repository: kermitt2/grobid-ner
path: ./grobid/grobid-ner
- name: Checkout entity-fishing
uses: actions/checkout@v4
with:
path: ./entity-fishing
- name: Set up JDK 11
uses: actions/setup-java@v4
with:
java-version: '11'
distribution: 'adopt'
cache: 'gradle'
- name: Build grobid
working-directory: grobid
run: ./gradlew install -x test
- name: Build grobid-ner
working-directory: grobid/grobid-ner
run: ./gradlew install -x test
- name: Install grobid-ner
working-directory: grobid/grobid-ner
run: ./gradlew copyModels
- name: Build entity-fishing with Gradle
working-directory: entity-fishing
run: ./gradlew build -x test


docker-build:
needs: [ build ]
runs-on: ubuntu-latest

steps:
- name: Create more disk space
run: sudo rm -rf /usr/share/dotnet && sudo rm -rf /opt/ghc && sudo rm -rf "/usr/local/share/boost" && sudo rm -rf "$AGENT_TOOLSDIRECTORY"
- uses: actions/checkout@v4
- name: Build and push
id: docker_build
uses: mr-smithers-excellent/docker-build-push@v6
with:
dockerfile: Dockerfile
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
image: lfoppiano/entity-fishing
registry: docker.io
pushImage: false
tags: latest-develop
- name: Image digest
run: echo ${{ steps.docker_build.outputs.digest }}
30 changes: 17 additions & 13 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ version = '0.0.6'

description = """entity recognition and disambiguation against Wikidata and Wikipedia in a raw text, partially-annotated text segment, PDF or weighted term vector"""

sourceCompatibility = 1.8
targetCompatibility = 1.8
sourceCompatibility = 1.11
targetCompatibility = 1.11

import org.apache.tools.ant.taskdefs.condition.Os

Expand Down Expand Up @@ -65,6 +65,10 @@ dependencies {
exclude(group: 'ch.qos.logback', module: 'logback-classic')
}

implementation('ch.qos.logback:logback-classic:1.2.3'){
exclude(module: 'com.google.guava:guava')
}

implementation 'black.ninia:jep:4.0.2'
implementation 'org.apache.opennlp:opennlp-tools:1.9.1'
implementation "joda-time:joda-time:2.9.9"
Expand All @@ -76,15 +80,15 @@ dependencies {
implementation group: 'org.wipo.analysers', name: 'wipo-analysers', version: '0.0.1'

// Apache commons
implementation 'org.apache.commons:commons-collections4:4.1'
implementation 'org.apache.commons:commons-collections4:4.3'
implementation 'org.apache.commons:commons-lang3:3.6'
implementation 'commons-logging:commons-logging:1.2'
implementation 'commons-io:commons-io:2.7'
implementation 'commons-pool:commons-pool:1.6'
implementation group: 'org.apache.httpcomponents', name: 'httpclient', version: '4.5.13'
implementation group: 'org.apache.httpcomponents', name: 'httpmime', version: '4.5.13'
implementation group: 'org.apache.commons', name: 'commons-text', version: '1.1'
implementation group: 'com.google.guava', name: 'guava', version: '29.0-jre'
implementation "com.google.guava:guava:31.0.1-jre"

// json and yaml
implementation group: 'com.fasterxml.jackson.core', name: 'jackson-core', version: '2.10.1'
Expand Down Expand Up @@ -152,7 +156,7 @@ configurations.all {
force 'xml-apis:xml-apis:1.4.01'
}

exclude group: "ch.qos.logback", module: "logback-classic"
// exclude group: "ch.qos.logback", module: "logback-classic"
exclude group: 'org.slf4j', module: "slf4j-log4j12"
exclude group: 'org.slf4j', module: "slf4j-jdk14"
exclude group: 'log4j', module: "log4j"
Expand Down Expand Up @@ -211,7 +215,7 @@ task(train_corpus, dependsOn: 'classes', type: JavaExec, group: 'training') {
args getArg('corpus', ''), getArg('lang', 'en')
jvmArgs '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Xms2g', '-Xmx8g'
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
jvmArgs '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Xms2g', '-Xmx8g', "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
jvmArgs '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Xms2g', '-Xmx8g', "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
} else {
jvmArgs '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Xms2g', '-Xmx8g'
}
Expand All @@ -225,7 +229,7 @@ task(evaluation, dependsOn: 'classes', type: JavaExec, group: 'evaluation') {
classpath = sourceSets.main.runtimeClasspath
args getArg('corpus', '')
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
jvmArgs '--Xms2g', '-Xmx8g', "--add-opens", "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
jvmArgs '--Xms2g', '-Xmx8g', "--add-opens", "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
} else {
jvmArgs '--Xms2g', '-Xmx8g'
}
Expand All @@ -239,7 +243,7 @@ task(annotatedDataGeneration, dependsOn: 'classes', type: JavaExec, group: 'trai
classpath = sourceSets.main.runtimeClasspath
args getArg('corpus', '')
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
jvmArgs '--Xms2g', '-Xmx8g', "--add-opens", "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
jvmArgs '--Xms2g', '-Xmx8g', "--add-opens", "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
} else {
jvmArgs '--Xms2g', '-Xmx8g'
}
Expand All @@ -254,7 +258,7 @@ task(generate_entity_description, dependsOn: 'classes', type: JavaExec, group: '
classpath = sourceSets.main.runtimeClasspath
args 'data/embeddings/', getArg('lang', 'en')
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
jvmArgs '--Xms2g', '-Xmx8g', "--add-opens", "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
jvmArgs '--Xms2g', '-Xmx8g', "--add-opens", "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
} else {
jvmArgs '--Xms2g', '-Xmx8g'
}
Expand All @@ -269,7 +273,7 @@ task(quantize_word_embeddings, dependsOn: 'classes', type: JavaExec, group: 'emb
classpath = sourceSets.main.runtimeClasspath
args '-i', getArg('i', 'word.embeddings.vec'), '-o', getArg('o', 'word.embeddings.quantized'), '-error', getArg('e', '0.01'), '-hashheader'
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
jvmArgs '--Xms2g', '-Xmx8g', "--add-opens", "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
jvmArgs '--Xms2g', '-Xmx8g', "--add-opens", "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
} else {
jvmArgs '--Xms2g', '-Xmx8g'
}
Expand All @@ -282,7 +286,7 @@ task(generate_entity_embeddings, dependsOn: 'classes', type: JavaExec, group: 'e
classpath = sourceSets.main.runtimeClasspath
args '-in', getArg('in', 'entity.description'), '-v', getArg('v', 'word.embeddings.quantized'), '-out', getArg('out', 'entity.embeddings.vec'), '-n', getArg('n', '8')
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
jvmArgs '--Xms2g', '-Xmx8g', "--add-opens", "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
jvmArgs '--Xms2g', '-Xmx8g', "--add-opens", "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
} else {
jvmArgs '--Xms2g', '-Xmx8g'
}
Expand All @@ -295,7 +299,7 @@ task(quantize_entity_embeddings, dependsOn: 'classes', type: JavaExec, group: 'e
classpath = sourceSets.main.runtimeClasspath
args '-i', getArg('i', 'entity.embeddings.vec'), '-o', getArg('o', 'entity.embeddings.quantized'), '-error', getArg('e', '0.01'), '-hashheader'
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
jvmArgs '--Xms2g', '-Xmx8g', "--add-opens", "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
jvmArgs '--Xms2g', '-Xmx8g', "--add-opens", "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
} else {
jvmArgs '--Xms2g', '-Xmx8g'
}
Expand All @@ -308,7 +312,7 @@ application {

run {
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
jvmArgs "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
jvmArgs "--add-opens", "java.base/java.nio=ALL-UNNAMED", "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", "--add-opens", "java.base/java.text=ALL-UNNAMED", "--add-opens", "java.base/java.net=ALL-UNNAMED", "--add-opens", "java.base/java.lang=ALL-UNNAMED", "--add-opens", "java.base/java.math=ALL-UNNAMED", "--add-opens", "java.base/java.util=ALL-UNNAMED", "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED"
}

args = ['server', 'data/config/service.yaml']
Expand Down
8 changes: 5 additions & 3 deletions doc/build.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@ Install, build, run, and monitor
Install, build, and run
***********************

*entity-fishing* requires JDK 1.8 or higher. It supports Linux-64.
*entity-fishing* requires JDK 1.11 or higher.
The official supported architecture/OS is Linux-64.

Mac OS environments should work fine, but it is *officially* not supported.
Please use a Linux-64 environment for any production works. Below, we make available the up-to-date and full binary index data for Linux-64 architecture.
Mac OS is not officially supported. Mac OS (Intel) should nevertheless work fine, but ARM does not work.
Please use a Linux-64 environment for any production works.
Below, we make available the up-to-date and full binary index data for Linux-64 architecture.

Running the service requires at least 3GB of RAM for processing text inputs, but more RAM will be exploited if available for speeding up access to the compiled Wikidata and Wikipedia data (including Wikidata statements associated to entities) and for enabling high rate parallel processing. In case PDF are processed, a mimimum of 8GB is required due to additional PDF parsing and structuring requirements. For parallel processing of PDF exploiting multhreading (e.g. 10 parallel threads), 16GB is recommended.

Expand Down

0 comments on commit ac8bd03

Please sign in to comment.