Skip to content

Commit

Permalink
automatically add tokenize,ssplit
Browse files Browse the repository at this point in the history
  • Loading branch information
J38 committed Jan 20, 2022
1 parent f44e693 commit 7a24b58
Showing 1 changed file with 4 additions and 0 deletions.
4 changes: 4 additions & 0 deletions src/edu/stanford/nlp/pipeline/StanfordCoreNLP.java
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,10 @@ public StanfordCoreNLP(Properties props, boolean enforceRequirements, AnnotatorP
logger.info("preTokenized option set: Changing annotators tokenize,ssplit,mwt to tokenize,ssplit");
} else if (oldAnnotators != null && oldAnnotators.startsWith("tokenize,ssplit")) {
logger.info("preTokenized option set: Annotators list starts with tokenize,ssplit, no change needed.");
} else if (oldAnnotators != null && !oldAnnotators.contains("tokenize") && !oldAnnotators.contains("mwt")
&& !oldAnnotators.contains("ssplit") && !oldAnnotators.contains("cdc_tokenize")) {
logger.info("preTokenized option set: Adding tokenize,ssplit to beginning.");
newAnnotators = "tokenize,ssplit," + oldAnnotators;
} else {
logger.warn("preTokenized option set: Non-standard annotators list, preTokenized may not work in this case.");
}
Expand Down

0 comments on commit 7a24b58

Please sign in to comment.