From 7a24b5874dcaf9a2189c3489fe913435c3332ae5 Mon Sep 17 00:00:00 2001 From: J38 Date: Thu, 20 Jan 2022 04:29:52 -0800 Subject: [PATCH] automatically add tokenize,ssplit --- src/edu/stanford/nlp/pipeline/StanfordCoreNLP.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/edu/stanford/nlp/pipeline/StanfordCoreNLP.java b/src/edu/stanford/nlp/pipeline/StanfordCoreNLP.java index 768d4b3543..1f1638d1bb 100644 --- a/src/edu/stanford/nlp/pipeline/StanfordCoreNLP.java +++ b/src/edu/stanford/nlp/pipeline/StanfordCoreNLP.java @@ -245,6 +245,10 @@ public StanfordCoreNLP(Properties props, boolean enforceRequirements, AnnotatorP logger.info("preTokenized option set: Changing annotators tokenize,ssplit,mwt to tokenize,ssplit"); } else if (oldAnnotators != null && oldAnnotators.startsWith("tokenize,ssplit")) { logger.info("preTokenized option set: Annotators list starts with tokenize,ssplit, no change needed."); + } else if (oldAnnotators != null && !oldAnnotators.contains("tokenize") && !oldAnnotators.contains("mwt") + && !oldAnnotators.contains("ssplit") && !oldAnnotators.contains("cdc_tokenize")) { + logger.info("preTokenized option set: Adding tokenize,ssplit to beginning."); + newAnnotators = "tokenize,ssplit," + oldAnnotators; } else { logger.warn("preTokenized option set: Non-standard annotators list, preTokenized may not work in this case."); }