Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add bracketing to lexer #68

Merged
merged 25 commits into from
Dec 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
dd87240
refactor: Add bracketing; kill withFC
croyzor Jun 27, 2024
1ad6d60
kill withFC
croyzor Oct 1, 2024
a5b4df9
Revert pullAndJuxt changes
croyzor Oct 1, 2024
39d101b
Drive-by: Update kind printing and remove Row kind
croyzor Oct 1, 2024
81b49bc
test: Add extra let binding tests
croyzor Oct 1, 2024
a4c47dc
Update golden tests
croyzor Oct 1, 2024
93e43c6
Fix warnings
croyzor Oct 1, 2024
48612d4
Wee cleanup
croyzor Oct 1, 2024
e4842a6
drive-by: Fix karlheinz list application
croyzor Oct 2, 2024
3c2be6f
fix: Don't allow trailing tokens when parsing between brackets
croyzor Oct 2, 2024
16b1c85
Merge remote-tracking branch 'origin/main' into refactor/parser-wc
croyzor Nov 5, 2024
4e9ac06
Propogate errors from parsers when going under brackets
croyzor Nov 6, 2024
a4997dc
Merge remote-tracking branch 'origin/main' into refactor/parser-wc
croyzor Nov 6, 2024
06f671e
Fix vector FCs
croyzor Nov 6, 2024
063634f
Merge remote-tracking branch 'origin/main' into refactor/parser-wc
croyzor Dec 17, 2024
c7eab1f
apply lints
croyzor Dec 17, 2024
859a144
Review comments
croyzor Dec 19, 2024
937ef01
Merge remote-tracking branch 'origin/main' into refactor/parser-wc
croyzor Dec 19, 2024
4806481
[refactor] Combine `brackets` and `within` (#73)
acl-cqc Dec 23, 2024
48e231b
Merge branch 'main' into refactor/parser-wc
croyzor Dec 23, 2024
7df8f65
Merge branch 'main' into refactor/parser-wc
croyzor Dec 23, 2024
44e1c3e
Merge opener and closer into openClose
croyzor Dec 23, 2024
ebf139f
Remove comments in BToken show instance
croyzor Dec 23, 2024
625451d
Uncurry
croyzor Dec 23, 2024
6cef983
run hlint
croyzor Dec 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions brat/Brat/Error.hs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
module Brat.Error (ParseError(..)
,LengthConstraintF(..), LengthConstraint
,BracketErrMsg(..)
,ErrorMsg(..)
,Error(..), showError
,SrcErr(..)
Expand All @@ -9,6 +10,7 @@
) where

import Brat.FC
import Data.Bracket
import Brat.Syntax.Port (PortName)

import Data.List (intercalate)
Expand All @@ -26,6 +28,28 @@

type LengthConstraint = LengthConstraintF Int

data BracketErrMsg
= EOFInBracket BracketType -- FC in enclosing `Err` should point to the open bracket
-- FC here is opening; closing FC in the enclosing `Err`
| OpenCloseMismatch (FC, BracketType) BracketType
| UnexpectedClose BracketType

instance Show BracketErrMsg where
show (EOFInBracket b) = "File ended before this " ++ showOpen b ++ " was closed"
show (OpenCloseMismatch (openFC, bOpen) bClose) = unwords ["This"
,showClose bClose
,"doesn't match the"
,showOpen bOpen
,"at"
,show openFC
]
show (UnexpectedClose b) = unwords ["There is no"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is pretty much an OpenCloseMismatch, without an opening (or where the opening is beginning of file) - consider combining the two, i.e.OpenCloseMismatch (Maybe (FC, BracketType)) BracketType. Or not, the conceptual similarity could just be confusing, up to you...

,showOpen b
,"for this"
,showClose b
,"to close"
]

data ErrorMsg
= TypeErr String
-- Term, Expected type, Actual type
Expand Down Expand Up @@ -83,6 +107,7 @@
-- The argument is the row of unused connectors
| ThunkLeftOvers String
| ThunkLeftUnders String
| BracketErr BracketErrMsg

instance Show ErrorMsg where
show (TypeErr x) = "Type error: " ++ x
Expand Down Expand Up @@ -166,6 +191,7 @@
show UnreachableBranch = "Branch cannot be reached"
show (ThunkLeftOvers overs) = "Expected function to address all inputs, but " ++ overs ++ " wasn't used"
show (ThunkLeftUnders unders) = "Expected function to return additional values of type: " ++ unders
show (BracketErr msg) = show msg


data Error = Err { fc :: Maybe FC
Expand Down Expand Up @@ -213,8 +239,8 @@
ls = lines contents
in case endLineN - startLineN of
0 -> [ls!!startLineN, highlightSection startCol endCol]
n | n > 0 -> let (first:rest) = drop (startLineN - 1) $ take (endLineN + 1) ls

Check warning on line 242 in brat/Brat/Error.hs

View workflow job for this annotation

GitHub Actions / build

Pattern match(es) are non-exhaustive

Check warning on line 242 in brat/Brat/Error.hs

View workflow job for this annotation

GitHub Actions / build

Pattern match(es) are non-exhaustive
(last:rmid) = reverse rest

Check warning on line 243 in brat/Brat/Error.hs

View workflow job for this annotation

GitHub Actions / build

Pattern match(es) are non-exhaustive

Check warning on line 243 in brat/Brat/Error.hs

View workflow job for this annotation

GitHub Actions / build

Pattern match(es) are non-exhaustive
in [first, highlightSection startCol (length first)]
++ (reverse rmid >>= (\l -> [l, highlightSection 0 (length l)]))
++ [last, highlightSection 0 endCol]
Expand Down
6 changes: 6 additions & 0 deletions brat/Brat/FC.hs
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,9 @@ fcOf (WC fc _) = fc
-- TODO: Remove this
dummyFC :: a -> WC a
dummyFC = WC (FC (Pos 0 0) (Pos 0 0))

spanFC :: FC -> FC -> FC
spanFC afc bfc = FC (start afc) (end bfc)

spanFCOf :: WC a -> WC b -> FC
spanFCOf (WC afc _) (WC bfc _) = spanFC afc bfc
98 changes: 98 additions & 0 deletions brat/Brat/Lexer/Bracketed.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
module Brat.Lexer.Bracketed (BToken(..), brackets) where

import Data.Bracket
import Brat.Error (BracketErrMsg(..), Error(Err), ErrorMsg(..))
import Brat.FC
import Brat.Lexer.Token

import Data.List.NonEmpty (NonEmpty(..))
import Data.Bifunctor (first)
import Text.Megaparsec (PosState(..), SourcePos(..), TraversableStream(..), VisualStream(..))
import Text.Megaparsec.Pos (mkPos)

data OpenClose = Open BracketType | Close BracketType

openClose :: Tok -> Maybe OpenClose
openClose LParen = Just (Open Paren)
openClose LSquare = Just (Open Square)
openClose LBrace = Just (Open Brace)
openClose RParen = Just (Close Paren)
openClose RSquare = Just (Close Square)
openClose RBrace = Just (Close Brace)
openClose _ = Nothing

-- Well bracketed tokens
data BToken
= Bracketed FC BracketType [BToken]
| FlatTok Token
deriving (Eq, Ord)

btokLen :: BToken -> Int
btokLen (FlatTok tok) = length (show tok)
btokLen (Bracketed _ _ bs) = sum (btokLen <$> bs) + 2

instance Show BToken where
show (FlatTok t) = show t
show (Bracketed _ b ts) = showOpen b ++ concatMap show ts ++ showClose b

instance VisualStream [BToken] where
showTokens _ = concatMap show
tokensLength _ = sum . fmap btokLen

instance TraversableStream [BToken] where
reachOffsetNoLine i pos = let fileName = sourceName (pstateSourcePos pos)
(Pos line col, rest) = skipChars (i - pstateOffset pos + 1) (pstateInput pos)
in pos
{ pstateInput = rest
, pstateOffset = max (pstateOffset pos) i
, pstateSourcePos = SourcePos fileName (mkPos line) (mkPos col)
}
where
skipChars :: Int -> [BToken] -> (Pos, [BToken])
skipChars 0 inp@(Bracketed fc _ _:_) = (start fc, inp)

Check warning on line 52 in brat/Brat/Lexer/Bracketed.hs

View workflow job for this annotation

GitHub Actions / build

Pattern match(es) are non-exhaustive

Check warning on line 52 in brat/Brat/Lexer/Bracketed.hs

View workflow job for this annotation

GitHub Actions / build

Pattern match(es) are non-exhaustive
skipChars 0 inp@(FlatTok t:_) = (start (fc t), inp)
skipChars i ((Bracketed fc b bts):rest) =
let Pos closeLine closeCol = end fc
closeFC = FC (Pos closeLine (closeCol - 1)) (Pos closeLine closeCol)
in skipChars (i - 1) (bts ++ [FlatTok (Token closeFC (closeTok b))] ++ rest)
skipChars i (FlatTok t:rest)
| i >= tokenLen t = skipChars (i - tokenLen t) rest
| otherwise = (start (fc t), FlatTok t:rest)

closeTok Paren = RParen
closeTok Square = RSquare
closeTok Brace = RBrace

eofErr :: (FC, BracketType) -> Error
eofErr (fc, b) = Err (Just fc) (BracketErr (EOFInBracket b))

openCloseMismatchErr :: (FC, BracketType) -> (FC, BracketType) -> Error
openCloseMismatchErr open (fcClose, bClose)
= Err (Just fcClose) (BracketErr (OpenCloseMismatch open bClose))

unexpectedCloseErr :: (FC, BracketType) -> Error
unexpectedCloseErr (fc, b) = Err (Just fc) (BracketErr (UnexpectedClose b))

brackets :: [Token] -> Either Error [BToken]
brackets ts = helper ts >>= \case
(res, Nothing) -> pure res
(_, Just (b, t:|_)) -> Left $ unexpectedCloseErr (fc t, b)
where
-- Given a list of tokens, either
-- (success) return [BToken] consisting of the prefix of the input [Token] in which all opened brackets are closed,
-- and any remaining [Token] beginning with a closer that does not match any opener in the input
-- (either Nothing = no remaining tokens; or tokens with the BracketType that the first token closes)
-- (failure) return an error, if a bracket opened in the input, is either not closed (EOF) or does not match the closer
helper :: [Token] -> Either Error ([BToken], Maybe (BracketType, NonEmpty Token))
helper [] = pure ([], Nothing)
helper (t:ts) = case openClose (_tok t) of
Just (Open b) -> let openFC = fc t in helper ts >>= \case
(_, Nothing) -> Left $ eofErr (fc t, b)
(within, Just (b', r :| rs)) ->
let closeFC = fc r
enclosingFC = spanFC openFC closeFC
in if b == b'
then first (Bracketed enclosingFC b within:) <$> helper rs
else Left $ openCloseMismatchErr (openFC, b) (closeFC, b')
Just (Close b) -> pure ([], Just (b, t :| ts)) -- return closer for caller
Nothing -> first (FlatTok t:) <$> helper ts
4 changes: 2 additions & 2 deletions brat/Brat/Lexer/Flat.hs
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ tok = try (char '(' $> LParen)
<|> try (char ')' $> RParen)
<|> try (char '{' $> LBrace)
<|> try (char '}' $> RBrace)
<|> try (char '[' $> LBracket)
<|> try (char ']' $> RBracket)
<|> try (char '[' $> LSquare)
<|> try (char ']' $> RSquare)
<|> try (Underscore <$ string "_")
<|> try (Quoted <$> (char '"' *> printChar `manyTill` char '"'))
<|> try (FloatLit <$> float)
Expand Down
15 changes: 9 additions & 6 deletions brat/Brat/Lexer/Token.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
module Brat.Lexer.Token (Tok(..), Token(..), Keyword(..)) where
module Brat.Lexer.Token (Tok(..), Token(..), Keyword(..), tokenLen) where

import Brat.FC

Expand All @@ -21,8 +21,8 @@ data Tok
| RParen
| LBrace
| RBrace
| LBracket
| RBracket
| LSquare
| RSquare
| Semicolon
| Into
| Comma
Expand Down Expand Up @@ -66,8 +66,8 @@ instance Show Tok where
show RParen = ")"
show LBrace = "{"
show RBrace = "}"
show LBracket = "["
show RBracket = "]"
show LSquare = "["
show RSquare = "]"
show Semicolon = ";"
show Into = "|>"
show Comma = ","
Expand Down Expand Up @@ -102,7 +102,8 @@ instance Eq Token where
(Token fc t) == (Token fc' t') = t == t' && fc == fc'

instance Show Token where
show (Token _ t) = show t ++ " "
show (Token _ t) = show t

instance Ord Token where
compare (Token (FC st nd) _) (Token (FC st' nd') _) = if st == st'
then compare nd nd'
Expand All @@ -128,6 +129,8 @@ instance Show Keyword where
tokLen :: Tok -> Int
tokLen = length . show

tokenLen = tokLen . _tok

instance VisualStream [Token] where
tokensLength _ = sum . fmap (\(Token _ t) -> tokLen t)
showTokens _ = concatMap show
Loading
Loading