Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
FlorianZipser committed Jun 24, 2015
2 parents 1dd625d + 8d1c625 commit a565a43
Show file tree
Hide file tree
Showing 11 changed files with 188 additions and 24 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
* artifactId: salt-saltSample *
***********************************************************************************

Version 2.1.1
=============
* this is just a bugfix release

Version 2.1.0
=============
* updated EMF version from 2.8 to 2.9.1-v20130827-0309
Expand Down
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<groupId>de.hu_berlin.german.korpling.saltnpepper</groupId>
<artifactId>salt</artifactId>
<packaging>pom</packaging>
<version>2.1.0</version>
<version>2.1.1-SNAPSHOT</version>
<modules>
<module>salt-graph</module>
<module>salt-saltCore</module>
Expand All @@ -31,7 +31,7 @@
<connection>scm:git:git://github.com/korpling/salt.git</connection>
<developerConnection>scm:git:git@github.com:korpling/salt.git</developerConnection>
<url>https://github.com/korpling/salt</url>
<tag>salt-2.1.0</tag>
<tag>HEAD</tag>
</scm>
<distributionManagement>
<repository>
Expand Down
6 changes: 3 additions & 3 deletions salt-doc/src/main/docbkx/salt_modelGuide.xml
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,9 @@
a layer or another label as shown in <xref linkend="fig_labelAndGraph"/>. An
attribute-value-pair is a triple which consists of a namespace, a name and a value
(namespace:name=value). The combination of name and namespace is used to identify a
label and therefore must be unique. The namespace is an optional value, to ditinguish in
case of there are two labels having the same name. For instance a node etc. can have a
label <emphasis role="italic">stts:pos=VVFIN</emphasis> as well as the label <emphasis
label and therefore must be unique. The namespace is an optional value, to distinguish
in case of there are two labels having the same name. For instance a node etc. can have
a label <emphasis role="italic">stts:pos=VVFIN</emphasis> as well as the label <emphasis
role="italic">pos:VV</emphasis> to annotate it with two part-of-speech annotations
from different tagsets. <figure xml:id="fig_labelAndGraph">
<title>Label mechanism for graph, node, edge and layer (class diagram)</title>
Expand Down
2 changes: 1 addition & 1 deletion salt-graph/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<parent>
<groupId>de.hu_berlin.german.korpling.saltnpepper</groupId>
<artifactId>salt</artifactId>
<version>2.1.0</version>
<version>2.1.1-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>
Expand Down
2 changes: 1 addition & 1 deletion salt-saltCommon/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<parent>
<groupId>de.hu_berlin.german.korpling.saltnpepper</groupId>
<artifactId>salt</artifactId>
<version>2.1.0</version>
<version>2.1.1-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<properties>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
import de.hu_berlin.german.korpling.saltnpepper.salt.saltCommon.sCorpusStructure.SDocument;
import de.hu_berlin.german.korpling.saltnpepper.salt.saltCommon.sDocumentStructure.SDocumentGraph;
import de.hu_berlin.german.korpling.saltnpepper.salt.saltCore.SElementId;
import java.util.LinkedList;

/**
* <!-- begin-user-doc -->
Expand Down Expand Up @@ -371,12 +372,12 @@ public synchronized void saveSaltProject(URI saltProjectURI)
(this.getSCorpusGraphs().size()> 0))
{//store all documents if exist
URI sDocumentFileURI= null;
for (SCorpusGraph sCorpusGraph: Collections.synchronizedList(this.getSCorpusGraphs()))
for (SCorpusGraph sCorpusGraph: new LinkedList<>(this.getSCorpusGraphs()))
{
if ( (sCorpusGraph.getSDocuments()!= null) &&
(sCorpusGraph.getSDocuments().size()> 0))
{
for (SDocument sDocument: Collections.synchronizedList(sCorpusGraph.getSDocuments()))
for (SDocument sDocument: new LinkedList<>(sCorpusGraph.getSDocuments()))
{
if (sDocument.getSDocumentGraph()!= null)
{//only store sDocument, when there is some content in sDocumentGraph corresponding to sDocument
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -782,8 +782,9 @@ public EList<SToken> tokenize()
{
if (sTextualDS!= null)
{
if (retVal== null)
if (retVal== null){
retVal= new BasicEList<SToken>();
}
retVal.addAll(tokenizer.tokenize(sTextualDS));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,24 @@
import org.eclipse.emf.common.util.EList;
import org.knallgrau.utils.textcat.TextCategorizer;

import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Multimap;
import com.google.common.collect.Range;
import com.google.common.collect.RangeMap;
import com.google.common.collect.TreeRangeMap;
import com.neovisionaries.i18n.LanguageCode;

import de.hu_berlin.german.korpling.saltnpepper.salt.SaltFactory;
import de.hu_berlin.german.korpling.saltnpepper.salt.graph.Edge;
import de.hu_berlin.german.korpling.saltnpepper.salt.saltCommon.exceptions.SaltTokenizerException;
import de.hu_berlin.german.korpling.saltnpepper.salt.saltCommon.sDocumentStructure.SDataSourceSequence;
import de.hu_berlin.german.korpling.saltnpepper.salt.saltCommon.sDocumentStructure.SDocumentGraph;
import de.hu_berlin.german.korpling.saltnpepper.salt.saltCommon.sDocumentStructure.SSpan;
import de.hu_berlin.german.korpling.saltnpepper.salt.saltCommon.sDocumentStructure.SSpanningRelation;
import de.hu_berlin.german.korpling.saltnpepper.salt.saltCommon.sDocumentStructure.STextualDS;
import de.hu_berlin.german.korpling.saltnpepper.salt.saltCommon.sDocumentStructure.STextualRelation;
import de.hu_berlin.german.korpling.saltnpepper.salt.saltCommon.sDocumentStructure.SToken;
import de.hu_berlin.german.korpling.saltnpepper.salt.saltCore.SAnnotation;

/**
* The general task of this class is to tokenize a given text in the same order
Expand Down Expand Up @@ -327,6 +339,11 @@ else if (LanguageCode.de.equals(language))
* The general task of this class is to tokenize a given text in the same
* order as the tool TreeTagger will do. A list of tokenized text is
* returned with the text anchor (start and end position) in original text.
* If the {@link SDocumentGraph} already contains tokens, the tokens will be preserved,
* if they overlap the same textual range as the new one. Otherwise a {@link SSpan} is
* created covering corresponding to the existing token. The span than overlaps all new tokens
* and contains all annotations the old token did. In case, the span would overlaps the same textual
* range as the old token did, no span is created.
*
* @param strInput
* original text
Expand All @@ -342,36 +359,134 @@ public EList<SToken> tokenizeToToken(STextualDS sTextualDS, LanguageCode languag
char[] chrText = strInput.toCharArray();
int tokenCntr = 0;

// check if tokens exist for passed span
List<SToken> tokens = null;
if ((startPos != 0) || (endPos != sTextualDS.getSText().length())
|| (getsDocumentGraph().getSTextualDSs().size() > 1)) {
SDataSourceSequence sequence = SaltFactory.eINSTANCE.createSDataSourceSequence();
sequence.setSSequentialDS(sTextualDS);
sequence.setSStart(startPos);
sequence.setSEnd(endPos);
tokens = getsDocumentGraph().getSTokensBySequence(sequence);
} else {
tokens = getsDocumentGraph().getSTokens();
}

RangeMap<Integer, SToken> oldTokens = null;
// create an organization structure for a tokens interval which
// corresponds to a token
if ((tokens != null) && (tokens.size() != 0)) {
if ((getsDocumentGraph().getSTextualRelations() != null)
&& (getsDocumentGraph().getSTextualRelations().size() > 0)) {
oldTokens = TreeRangeMap.create();
for (STextualRelation rel : getsDocumentGraph().getSTextualRelations()) {
oldTokens.put(Range.closed(rel.getSStart(), rel.getSEnd()), rel.getSToken());
}
}
}
// a map mapping new created tokens, to old already existing tokens.
// The old tokens should be removed later on and spans should be
// created instead
Multimap<SToken, SToken> old2newToken = ArrayListMultimap.create();

for (int i = 0; i < chrText.length; i++) {
if ((strTokens.get(tokenCntr).length() < 1) || (strTokens.get(tokenCntr).substring(0, 1).equals(String.valueOf(chrText[i])))) {// first
// letter
// matches
if ((strTokens.get(tokenCntr).length() < 1)
|| (strTokens.get(tokenCntr).substring(0, 1).equals(String.valueOf(chrText[i])))) {
// first letter matches
StringBuffer pattern = new StringBuffer();
for (int y = 0; y < strTokens.get(tokenCntr).length(); y++) {// compute
// pattern
// in
// text
for (int y = 0; y < strTokens.get(tokenCntr).length(); y++) {
// compute pattern in text
pattern.append(chrText[i + y]);
}// compute pattern in text
if (strTokens.get(tokenCntr).hashCode() == pattern.toString().hashCode()) {// pattern
// found
if (strTokens.get(tokenCntr).hashCode() == pattern.toString().hashCode()) {
// pattern found
int start = i + startPos;
int end = i + startPos + strTokens.get(tokenCntr).length();

if (this.getsDocumentGraph() == null)
if (this.getsDocumentGraph() == null) {
throw new SaltTokenizerException("Cannot add tokens to an empty SDocumentGraph object.");
}

SToken sTok = this.getsDocumentGraph().createSToken(sTextualDS, start, end);
if (retVal == null)
if (retVal == null) {
retVal = new BasicEList<SToken>();
}
retVal.add(sTok);
i = i + strTokens.get(tokenCntr).length() - 1;
tokenCntr++;
if (tokenCntr >= strTokens.size())
if (tokenCntr >= strTokens.size()) {
break;
}

/**
* check, if there is an old token, overlapping the same
* or a bigger span as the currently created one. If
* yes, remove the old one and create a span overlapping
* the new one.
**/
if (oldTokens != null) {
SToken oldToken = oldTokens.get(start);
if (oldToken != null) {
old2newToken.put(oldToken, sTok);
}
}

}// pattern found
}// first letter matches
}

if (old2newToken != null) {
for (SToken oldToken : old2newToken.keySet()) {
// create span for oldToken
EList<SToken> overlappedTokens = new BasicEList<SToken>(old2newToken.get(oldToken));
if (overlappedTokens.size() == 1) {
getsDocumentGraph().removeNode(overlappedTokens.get(0));
} else {

SSpan span = getsDocumentGraph().createSSpan(overlappedTokens);

// move all annotations from old token to span
for (SAnnotation sAnno : oldToken.getSAnnotations()) {
span.addSAnnotation(sAnno);
}

// redirect all relations to span
List<Edge> inEdges = new ArrayList<Edge>();
for (Edge edge : getsDocumentGraph().getInEdges(oldToken.getSId())) {
inEdges.add(edge);
}
for (Edge edge : inEdges) {
if (edge instanceof SSpanningRelation) {
// in case of edge is a SSpanningRelation remove
// it and create new ones for each token under
// the span
if (edge.getSource() instanceof SSpan) {
SSpan parentSpan = (SSpan) edge.getSource();
getsDocumentGraph().removeEdge(edge);
for (SToken overlappedToken : overlappedTokens) {
SSpanningRelation rel = SaltFactory.eINSTANCE.createSSpanningRelation();
rel.setSSource(parentSpan);
rel.setSTarget(overlappedToken);
getsDocumentGraph().addSRelation(rel);
}
}
} else {
edge.setTarget(span);
}
}
List<Edge> outEdges = new ArrayList<Edge>();
for (Edge edge : getsDocumentGraph().getOutEdges(oldToken.getSId())) {
if (!(edge instanceof STextualRelation)) {
outEdges.add(edge);
}
}
for (Edge edge : outEdges) {
edge.setSource(span);
}
getsDocumentGraph().removeNode(oldToken);
}
}
}
}
return (retVal);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1872,6 +1872,49 @@ else if (textRel.getSTarget().equals(sText1))
assertEquals(6, relTosText2);
}

/**
* Tests the tokenization of a pretokenized text and checks if the old toekns are removed correctly.
*/
public void testTokenize2()
{
String text="This is a sample.";

STextualDS sText1= this.getFixture().createSTextualDS(text);
getFixture().createSToken(sText1, 0, 7);
getFixture().createSToken(sText1, 8, 16);

this.getFixture().tokenize();

assertEquals(5, this.getFixture().getSTokens().size());
assertEquals(5, this.getFixture().getSTextualRelations().size());
assertEquals(2, this.getFixture().getSSpans().size());
}

/**
* Tests the tokenization of a pretokenized text and checks if the old tokens are removed correctly. Further checks, that
* all annotations are copied to the new created span node.
*/
public void testTokenize3()
{
String text="This is a sample.";

STextualDS sText1= this.getFixture().createSTextualDS(text);
SToken tok1= getFixture().createSToken(sText1, 0, 7);
tok1.createSAnnotation(null, "a", "b");
tok1.createSAnnotation(null, "c", "d");
SToken tok2= getFixture().createSToken(sText1, 8, 16);
tok2.createSAnnotation(null, "1", "2");
tok2.createSAnnotation(null, "3", "4");

this.getFixture().tokenize();

assertEquals(5, this.getFixture().getSTokens().size());
assertEquals(5, this.getFixture().getSTextualRelations().size());
assertEquals(2, this.getFixture().getSSpans().size());
assertEquals(2, this.getFixture().getSSpans().get(0).getSAnnotations().size());
assertEquals(2, this.getFixture().getSSpans().get(1).getSAnnotations().size());
}

/**
* Tests the '{@link de.hu_berlin.german.korpling.saltnpepper.salt.saltCommon.sDocumentStructure.SDocumentGraph#createTokenizer() <em>Create Tokenizer</em>}' operation.
* <!-- begin-user-doc -->
Expand Down
2 changes: 1 addition & 1 deletion salt-saltCore/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<parent>
<groupId>de.hu_berlin.german.korpling.saltnpepper</groupId>
<artifactId>salt</artifactId>
<version>2.1.0</version>
<version>2.1.1-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<dependencies>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ public void testGetSValueType_SURI()
SDATATYPE dataType= null;

dataType= SDATATYPE.SURI;
this.getFixture().setSValue(URI.createFileURI(""));
this.getFixture().setSValue(URI.createFileURI("."));
assertEquals(dataType, this.getFixture().getSValueType());
}

Expand Down

0 comments on commit a565a43

Please sign in to comment.