From 0e950620b84ffade635e1da2980216ae0919fed1 Mon Sep 17 00:00:00 2001 From: Julien Odent Date: Mon, 16 Oct 2017 11:51:44 -0700 Subject: [PATCH] Email: restrict domain extensions to letters when spelling out Summary: We would parse things like "tonight at 6.40". Reviewed By: blandinw Differential Revision: D6066926 fbshipit-source-id: d18a8c6 --- Duckling/Email/Corpus.hs | 2 +- Duckling/Email/EN/Corpus.hs | 10 ++++++++++ Duckling/Email/EN/Rules.hs | 6 +++--- tests/Duckling/Email/EN/Tests.hs | 1 + 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/Duckling/Email/Corpus.hs b/Duckling/Email/Corpus.hs index 71a0a8a22..39a6a7df6 100644 --- a/Duckling/Email/Corpus.hs +++ b/Duckling/Email/Corpus.hs @@ -13,8 +13,8 @@ module Duckling.Email.Corpus , negativeCorpus ) where -import Prelude import Data.String +import Prelude import Duckling.Email.Types import Duckling.Testing.Types diff --git a/Duckling/Email/EN/Corpus.hs b/Duckling/Email/EN/Corpus.hs index 7370e808f..1fdbd6405 100644 --- a/Duckling/Email/EN/Corpus.hs +++ b/Duckling/Email/EN/Corpus.hs @@ -10,6 +10,7 @@ module Duckling.Email.EN.Corpus ( corpus + , negativeCorpus ) where import Data.String @@ -18,6 +19,15 @@ import Prelude import Duckling.Email.Types import Duckling.Testing.Types +negativeCorpus :: NegativeCorpus +negativeCorpus = (testContext, examples) + where + examples = + [ "fitness at 6.40" + , "class at 12.00" + , "tonight at 9.15" + ] + corpus :: Corpus corpus = (testContext, allExamples) diff --git a/Duckling/Email/EN/Rules.hs b/Duckling/Email/EN/Rules.hs index 597ed57bd..800f7cd7c 100644 --- a/Duckling/Email/EN/Rules.hs +++ b/Duckling/Email/EN/Rules.hs @@ -13,20 +13,20 @@ module Duckling.Email.EN.Rules ( rules ) where import Data.String -import qualified Data.Text as Text import Prelude +import qualified Data.Text as Text import Duckling.Dimensions.Types import Duckling.Email.Types (EmailData (..)) -import qualified Duckling.Email.Types as TEmail import Duckling.Regex.Types import Duckling.Types +import qualified Duckling.Email.Types as TEmail ruleEmailSpelledOut :: Rule ruleEmailSpelledOut = Rule { name = "email spelled out" , pattern = - [ regex "([\\w\\._+-]+) at ([\\w_-]+(\\.[\\w_-]+)+)" + [ regex "([\\w\\._+-]+) at ([\\w_-]+(\\.[a-zA-Z]+)+)" ] , prod = \xs -> case xs of (Token RegexMatch (GroupMatch (m1:m2:_)):_) -> diff --git a/tests/Duckling/Email/EN/Tests.hs b/tests/Duckling/Email/EN/Tests.hs index 27ca31368..8d1517db9 100644 --- a/tests/Duckling/Email/EN/Tests.hs +++ b/tests/Duckling/Email/EN/Tests.hs @@ -20,4 +20,5 @@ import Duckling.Testing.Asserts tests :: TestTree tests = testGroup "Email Tests" [ makeCorpusTest [This Email] corpus + , makeNegativeCorpusTest [This Email] negativeCorpus ]