Skip to content

Commit

Permalink
RdfImporter now works with 2.0 API.
Browse files Browse the repository at this point in the history
  • Loading branch information
mzattera committed May 3, 2021
1 parent 549279a commit c178648
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 77 deletions.
Binary file modified Graql/RdfImporter.jar
Binary file not shown.
15 changes: 8 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,10 @@ Notice that, in order to use the tool, you must have created the required RDF su
as explained in next section below.

```
java -jar RdfImporter.jar io.github.mzattera.semanticweb.kraal.RdfImporter -k <arg> -f <arg> [-u <arg>] [-s <arg>] file1 [file2] ...
java -jar RdfImporter.jar -k <arg> -f <arg> [-u <arg>] [-s <arg>] file1 [file2] ...
-f <arg> Format of input file.
-k <arg> Key space to use for importing.
-k <arg> Database to use for importing.
-s <arg> "Batch" size; perform this many insertions before committing a
transaction. Higher values might speed up execution, as long
as you have enough memory.
Expand All @@ -76,17 +76,17 @@ java -jar RdfImporter.jar io.github.mzattera.semanticweb.kraal.RdfImporter -k <a
RDF: RDF/XML
NT: N-Triples file format (.nt)
NT: N-Triples file format (.nt)
TTL: Turtle file format (.ttl)
TTLS: Turtle* (TurtleStar) file format (.ttls)
N3: N3/Notation3 file format (.n3)
N3: N3/Notation3 file format (.n3)
TRIX: TriX
TRIG: TriG file format (.trig)
TRIGS: TriG* (TriGStar) file format (.trigs)
BRF: A binary RDF format (.brf)
NQ: N-Quads file format (.nq)
NQ: N-Quads file format (.nq)
JSONLD: JSON-LD file format (.jsonld)
RJ: RDF/JSON file format (.rj)
RJ: RDF/JSON file format (.rj)
RDFA: RDFa file format (.xhtml)
HDT: HDT file format (.hdt)
```
Expand Down Expand Up @@ -124,7 +124,7 @@ named `rdf`and import the schema there. Please refer to Grankn console docmentat
In this example we assume `.jar` and vocabularies are in the same folder from where you
run the command.

```java -jar RdfImporter.jar io.github.mzattera.semanticweb.kraal.RdfImporter -k rdf -f TTL 22-rdf-syntax-ns.ttl rdf-schema.ttl```
```java -jar RdfImporter.jar -k rdf -f TTL 22-rdf-syntax-ns.ttl rdf-schema.ttl```


## Graql Editor
Expand All @@ -136,4 +136,5 @@ The file `Notepad++/graql.xml` is a language definition that adds syntax colorin
## How to release

Export `RdfImporter` from Eclipse as executable `.jar` file inside `Graql` folder.
The main class is `io.github.mzattera.semanticweb.kraal.RdfImporter`.

4 changes: 2 additions & 2 deletions eclipse/kraal/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
<dependency>
<groupId>org.eclipse.rdf4j</groupId>
<artifactId>rdf4j-bom</artifactId>
<version>3.6.1</version>
<version>3.6.3</version>
<type>pom</type>
<scope>import</scope>
</dependency>
Expand All @@ -39,7 +39,7 @@
<dependency>
<groupId>io.grakn.client</groupId>
<artifactId>grakn-client</artifactId>
<version>2.0.0</version>
<version>2.0.1</version>
</dependency>
<dependency>
<groupId>org.eclipse.rdf4j</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
import org.eclipse.rdf4j.rio.RDFParser;
import org.eclipse.rdf4j.rio.Rio;
import org.eclipse.rdf4j.rio.helpers.StatementCollector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import grakn.client.Grakn;
import grakn.client.api.GraknClient;
Expand All @@ -37,7 +39,6 @@
import grakn.client.api.answer.ConceptMap;
import graql.lang.Graql;
import graql.lang.pattern.variable.ThingVariable.Attribute;
import graql.lang.query.GraqlCompute.Path;
import graql.lang.query.GraqlInsert;
import io.github.mzattera.semanticweb.util.Utils;

Expand All @@ -49,16 +50,16 @@
*
*/
public final class RdfImporter implements Closeable {

private static final Logger logger = LoggerFactory.getLogger(RdfImporter.class);

private static final String DEFAULT_BASE_URI = "http://io.github.mzattera.semanticweb/#";

// We insert at maximum this number of triplets before performing a commit()
private static final int DEFAULT_TRIPLES_PER_TRANSACTION = 1500;

// TODO check if it is OK to get core.
private static final GraknOptions QUERY_OPTIONS = GraknOptions.core().infer(false).explain(false).batchSize(1);

// Client connected to the host
// TODO Now supports only CORE.
private final GraknClient client;

// Opened session to the host
Expand All @@ -77,14 +78,28 @@ public RdfImporter(String db) {
}

/**
* Creates a client to given host (and port).
* Creates a CORE client to given host (and port).
*
* @param host The Grakn host:port to connect to.
* @param db The database to connect to.
*/
public RdfImporter(String host, String db) {
client = Grakn.coreClient(host);
session = client.session(db, GraknSession.Type.DATA);
logger.info("Connected to fatabase " + db + " on " + host);
}

/**
* Creates a CORE client to given host (and port).
*
* @param host The Grakn host:port to connect to.
* @param db The database to connect to.
* @param options Server optiosn to use.
*/
public RdfImporter(String host, String db, GraknOptions options) {
client = Grakn.coreClient(host);
session = client.session(db, GraknSession.Type.DATA, options);
logger.info("Connected to fatabase " + db + " on " + host);
}

/**
Expand All @@ -100,7 +115,7 @@ public RdfImporter(String host, String db) {
public void importFile(String fileName, RDFFormat format, String baseUri, int batchSize)
throws FileNotFoundException, IOException, RDFParseException {

System.out.print(fileName + ": 0.. ");
logger.debug("IMPORTING TRIPLES from " + fileName + ": 0.. ");

createdResources.clear();

Expand All @@ -112,16 +127,17 @@ public void importFile(String fileName, RDFFormat format, String baseUri, int ba

// TODO Encoding?
File in = new File(fileName);
if (!in.canRead()) throw new FileNotFoundException("Cannot access file: " + in.getCanonicalPath());
if (!in.canRead())
throw new FileNotFoundException("Cannot access file: " + in.getCanonicalPath());
try (InputStream is = new FileInputStream(fileName)) {
rdfParser.parse(is, baseUri);
} // close input stream

int rows = 0;
int tot = 0;

// TODO can we reuse transaction? probably so....
GraknTransaction writeTransaction = commitAndReopenTransaction(session, null);
// TODO Make global....
GraknTransaction writeTransaction = commitAndReopenTransaction(null);

for (Statement s : statements) {

Expand All @@ -146,27 +162,26 @@ public void importFile(String fileName, RDFFormat format, String baseUri, int ba
.insert(Graql.var("t").rel("rdf-subject", "s").rel("rdf-predicate", "p").rel("rdf-object", "o")
.isa("rdf-triple"));

Stream<ConceptMap> inserted = writeTransaction.query().insert(query, QUERY_OPTIONS);
Stream<ConceptMap> inserted = writeTransaction.query().insert(query);
if (inserted.count() != 1) {
// TODO proper logging and handling
System.out.println("\tS:\t" + Utils.toString(sbj) + "\t" + sbj.getClass().getName());
System.out.println("\tP:\t" + Utils.toString(pred) + "\t" + pred.getClass().getName());
System.out.println("\tO:\t" + Utils.toString(obj) + "\t" + obj.getClass().getName());
// TODO proper handling
logger.error("\tS:\t" + Utils.toString(sbj) + "\t" + sbj.getClass().getName());
logger.error("\tP:\t" + Utils.toString(pred) + "\t" + pred.getClass().getName());
logger.error("\tO:\t" + Utils.toString(obj) + "\t" + obj.getClass().getName());
throw new RuntimeException(inserted.count() + " rdf-triple were inserted.");
}

++tot;
if (++rows >= batchSize) {
writeTransaction = commitAndReopenTransaction(session, writeTransaction);
System.out.print(tot + "... ");
writeTransaction = commitAndReopenTransaction(writeTransaction);
logger.debug("IMPORTING TRIPLES from " + fileName + " " + tot + "...");
rows = 0;
}
} // for each RDF statement

// close writeTransaction
writeTransaction.commit();
writeTransaction.close();
System.out.println(tot + " <end>");
logger.debug("IMPORTING TRIPLES from " + fileName + " " + tot + " FINISHED!");
}

@Override
Expand Down Expand Up @@ -215,11 +230,9 @@ private void insert(Value v, GraknTransaction writeTransaction) {
throw new UnsupportedOperationException();
}

if (writeTransaction.query().insert(query, QUERY_OPTIONS).count() != 1)
if (writeTransaction.query().insert(query).count() != 1)
throw new RuntimeException(v.stringValue() + " not inserted.");

createdResources.add(v.stringValue());

if (v.isIRI()) {
// Special handling is needed for rdfs:ContainerMembershipProperty
String propertyURI = v.stringValue();
Expand All @@ -240,12 +253,14 @@ private void insert(Value v, GraknTransaction writeTransaction) {
.insert(Graql.var("t").rel("rdf-subject", "s").rel("rdf-predicate", "p")
.rel("rdf-object", "o").isa("rdf-triple"));

if (writeTransaction.query().insert(query, QUERY_OPTIONS).count() != 1)
if (writeTransaction.query().insert(query).count() != 1)
throw new RuntimeException(v.stringValue() + " not properly marked as rdfs:memeber.");
} catch (NumberFormatException e) {
}
}
}

createdResources.add(v.stringValue());
}

/**
Expand All @@ -255,10 +270,9 @@ private void insert(Value v, GraknTransaction writeTransaction) {
* @param t Current transaction, if any, or null.
* @return
*/
private GraknTransaction commitAndReopenTransaction(GraknSession session, GraknTransaction t) {
private GraknTransaction commitAndReopenTransaction(GraknTransaction t) {
if (t != null) {
t.commit();
t.close();
}

return session.transaction(GraknTransaction.Type.WRITE);
Expand Down
84 changes: 40 additions & 44 deletions eclipse/kraal/src/main/java/logback.xml
Original file line number Diff line number Diff line change
@@ -1,50 +1,46 @@
<!-- turn debug=true on for logback-test.xml to help debug logging configurations. -->
<configuration debug="false">
<configuration debug="false">

<!--
We prefer logging to console instead of a File. Its very easy
to pipe console output to a file and most organizations already
have a log rotation setup in place. It can also be faster to use this
approach vs using a FileAppender directly
-->
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<!-- encoders are by default assigned the type
ch.qos.logback.classic.encoder.PatternLayoutEncoder -->
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
</encoder>
</appender>
<!-- We prefer logging to console instead of a File. Its very easy to pipe
console output to a file and most organizations already have a log rotation
setup in place. It can also be faster to use this approach vs using a FileAppender
directly -->
<appender name="STDOUT"
class="ch.qos.logback.core.ConsoleAppender">
<!-- encoders are by default assigned the type ch.qos.logback.classic.encoder.PatternLayoutEncoder -->
<encoder>
<!-- <pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36}
- %msg%n</pattern> -->
<pattern>%-5level %logger{36} - %msg%n</pattern>
</encoder>
</appender>

<!--
Async appenders can drastically speed up logging as well as your application's
response time but with some potential drawbacks. Read more at.
https://logback.qos.ch/manual/appenders.html#AsyncAppender
http://blog.takipi.com/how-to-instantly-improve-your-java-logging-with-7-logback-tweaks/
Always be sure to test different configurations for yourself. Every
application has different requirements.
-->
<appender name="ASYNC" class="ch.qos.logback.classic.AsyncAppender">
<appender-ref ref="STDOUT" />
<queueSize>1000</queueSize>
</appender>
<!-- Async appenders can drastically speed up logging as well as your application's
response time but with some potential drawbacks. Read more at. https://logback.qos.ch/manual/appenders.html#AsyncAppender
http://blog.takipi.com/how-to-instantly-improve-your-java-logging-with-7-logback-tweaks/
Always be sure to test different configurations for yourself. Every application
has different requirements. -->
<appender name="ASYNC"
class="ch.qos.logback.classic.AsyncAppender">
<appender-ref ref="STDOUT" />
<queueSize>1000</queueSize>
</appender>

<!--
We prefer a default setting of WARN and turn on logging explicitly for
any packages we care about. INFO is also a good choice. Going lower than INFO
may log sensitive data such as passwords or api tokens via HTTP or networking
libraries. Remember these defaults impact third party libraries as well.
Often times the cost of logging is overlooked. Try a simple benchmark of
logging in a tight loop a few million iterations vs not logging and see the difference.
There are a few ways you can change logging levels on the fly in a running app.
This could be a better solution than over logging.
-->
<root level="ERROR">
<!--
If you want async logging just use ref="ASYNC" instead.
We will favor synchronous logging for simplicity. -->
<appender-ref ref="STDOUT" />
</root>
<logger name="io.github.mzattera.semanticweb.kraal.RdfImporter"
level="DEBUG" />

<!-- We prefer a default setting of WARN and turn on logging explicitly
for any packages we care about. INFO is also a good choice. Going lower than
INFO may log sensitive data such as passwords or api tokens via HTTP or networking
libraries. Remember these defaults impact third party libraries as well.
Often times the cost of logging is overlooked. Try a simple benchmark of
logging in a tight loop a few million iterations vs not logging and see the
difference. There are a few ways you can change logging levels on the fly
in a running app. This could be a better solution than over logging. -->
<root level="ERROR">
<!-- If you want async logging just use ref="ASYNC" instead. We will favor
synchronous logging for simplicity. -->
<appender-ref ref="STDOUT" />
</root>

</configuration>

0 comments on commit c178648

Please sign in to comment.