Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Bug Fix] Fix malformed test for Windows. #211

Closed
wants to merge 8 commits into from
Next Next commit
Refactoring of scan_line_break.
* Performance improvement of `scan_line_break`.
* Rename `scan_line_break` to `yaml_1_1_scan_line_break`.
* Add `yaml_1_2_scan_line_break`.
* Add better comments.
* Add a TODO comment about possible bugs.
  • Loading branch information
Paalon committed Jun 15, 2024
commit 1c9a97daf054c43cca3823c9ae243ba9a9496c40
108 changes: 80 additions & 28 deletions src/scanner.jl
Original file line number Diff line number Diff line change
@@ -757,31 +757,83 @@ end

# If the stream is at a line break, advance past it.
#
# Returns:
# '\r\n' : '\n'
# '\r' : '\n'
# '\n' : '\n'
# '\x85' : '\n'
# '\u2028' : '\u2028'
# '\u2029 : '\u2029'
# default : ''
# YAML 1.1
#
function scan_line_break(stream::TokenStream)
if in(peek(stream.input), "\r\n\u0085")
if prefix(stream.input, 2) == "\r\n"
# [22] b-line-feed ::= #xA /*LF*/
# [23] b-carriage-return ::= #xD /*CR*/
# [24] b-next-line ::= #x85 /*NEL*/
# [25] b-line-separator ::= #x2028 /*LS*/
# [26] b-paragraph-separator ::= #x2029 /*PS*/
# [28] b-specific ::= b-line-separator | b-paragraph-separator
# [29] b-generic ::= ( b-carriage-return b-line-feed) | b-carriage-return | b-line-feed | b-next-line
# [30] b-as-line-feed ::= b-generic
# [31] b-normalized ::= b-as-line-feed | b-specific
#
# U+000D U+000A → U+000A
# U+000D → U+000A
# U+000A → U+000A
# U+0085 → U+000A
# U+2028 → U+2028
# U+2029 → U+2029
# otherwise → (empty)
#
function yaml_1_1_scan_line_break(stream::TokenStream)::String
c = peek(stream.input)
if c == '\u000d'
# TODO:
# This seems better for performance but gives errors and I don't know why.
# Perhaps, `prefx(stream.input, 2)` modifies `stream` and eventually escapes from an error.
# if peek(stream.input, 1) == '\u000a'
# forwardchars!(stream, 2)
# else
# forwardchars!(stream)
# end
if prefix(stream.input, 2) == "\u000d\u000a"
forwardchars!(stream, 2)
else
forwardchars!(stream)
end
return "\n"
elseif in(peek(stream.input), "\u2028\u2029")
ch = peek(stream.input)
"\u000a"
elseif c == '\u000a' || c == '\u0085'
forwardchars!(stream)
return ch
"\u000a"
elseif c == '\u2028' || c == '\u2029'
forwardchars!(stream)
string(c)
else
""
end
end
#
# YAML 1.2
#
# [24] b-line-feed ::= x0A
# [25] b-carriage-return ::= x0D
# [26] b-char ::= b-line-feed | b-carriage-return
# [27] nb-char ::= c-printable - b-char - c-byte-order-mark
# [28] b-break ::= ( b-carriage-return b-line-feed ) | b-carriage-return | b-line-feed
#
# U+000D U+000A → U+000A
# U+000D → U+000A
# U+000A → U+000A
# otherwise → (empty)
#
function yaml_1_2_scan_line_break(stream::TokenStream)::String
c = peek(stream.input)
if c == '\u000d'
if peek(stream.input, 1) == '\u000a'
forwardchars!(stream, 2)
else
forwardchars!(stream)
end
"\u000a"
elseif c == '\u000a'
forwardchars!(stream)
"\u000a"
else
""
end
return ""
end


# Scan past whitespace to the next token.
function scan_to_next_token(stream::TokenStream)
@@ -798,7 +850,7 @@ function scan_to_next_token(stream::TokenStream)
end
end

if scan_line_break(stream) != ""
if yaml_1_1_scan_line_break(stream) != ""
if stream.flow_level == 0
stream.allow_simple_key = true
end
@@ -948,7 +1000,7 @@ function scan_directive_ignored_line(stream::TokenStream, start_mark::Mark)
"expected a comment or a line break, but found '$(peek(stream.input))'",
get_mark(stream)))
end
scan_line_break(stream)
yaml_1_1_scan_line_break(stream)
end


@@ -1067,7 +1119,7 @@ function scan_block_scalar(stream::TokenStream, style::Char)
end
push!(chunks, prefix(stream.input, length))
forwardchars!(stream, length)
line_break = scan_line_break(stream)
line_break = yaml_1_1_scan_line_break(stream)
breaks, end_mark = scan_block_scalar_breaks(stream, indent)
if stream.column == indent && peek(stream.input) != '\0'
if folded && line_break == "\n" &&
@@ -1113,7 +1165,7 @@ function scan_block_scalar_ignored_line(stream::TokenStream, start_mark::Mark)
get_mark(stream)))
end

scan_line_break(stream)
yaml_1_1_scan_line_break(stream)
end


@@ -1166,7 +1218,7 @@ function scan_block_scalar_indentation(stream::TokenStream)
end_mark = get_mark(stream)
while in(peek(stream.input), " \r\n\u0085\u2028\u2029")
if peek(stream.input) != ' '
push!(chunks, scan_line_break(stream))
push!(chunks, yaml_1_1_scan_line_break(stream))
end_mark = get_mark(stream)
else
forwardchars!(stream)
@@ -1188,7 +1240,7 @@ function scan_block_scalar_breaks(stream::TokenStream, indent)
end

while in(peek(stream.input), "\r\n\u0085\u2028\u2029")
push!(chunks, scan_line_break(stream))
push!(chunks, yaml_1_1_scan_line_break(stream))
end_mark = get_mark(stream)
while stream.column < indent && peek(stream.input) == ' '
forwardchars!(stream)
@@ -1288,7 +1340,7 @@ function scan_flow_scalar_non_spaces(stream::TokenStream, double::Bool,
push!(chunks, Char(parse(Int, prefix(stream.input, length), base = 16)))
forwardchars!(stream, length)
elseif in(c, "\r\n\u0085\u2028\u2029")
scan_line_break(stream)
yaml_1_1_scan_line_break(stream)
append!(chunks, scan_flow_scalar_breaks(stream, double, start_mark))
else
throw(ScannerError("while scanning a double-quoted scalar",
@@ -1318,7 +1370,7 @@ function scan_flow_scalar_spaces(stream::TokenStream, double::Bool,
throw(ScannerError("while scanning a quoted scalar", start_mark,
"found unexpected end of stream", get_mark(stream)))
elseif in(c, "\r\n\u0085\u2028\u2029")
line_break = scan_line_break(stream)
line_break = yaml_1_1_scan_line_break(stream)
breaks = scan_flow_scalar_breaks(stream, double, start_mark)
if line_break != '\n'
push!(chunks, line_break)
@@ -1351,7 +1403,7 @@ function scan_flow_scalar_breaks(stream::TokenStream, double::Bool,
end

if in(peek(stream.input), "\r\n\u0085\u2028\u2029")
push!(chunks, scan_line_break(stream))
push!(chunks, yaml_1_1_scan_line_break(stream))
else
return chunks
end
@@ -1435,7 +1487,7 @@ function scan_plain_spaces(stream::TokenStream, indent::Integer,
forwardchars!(stream, length)
c = peek(stream.input)
if in(c, "\r\n\u0085\u2028\u2029")
line_break = scan_line_break(stream)
line_break = yaml_1_1_scan_line_break(stream)
stream.allow_simple_key = true
if peek(stream.input) == '\uFEFF'
return Any[]
@@ -1451,7 +1503,7 @@ function scan_plain_spaces(stream::TokenStream, indent::Integer,
if peek(stream.input) == ' '
forwardchars!(stream)
else
push!(breaks, scan_line_break(stream))
push!(breaks, yaml_1_1_scan_line_break(stream))
if peek(stream.input) == '\uFEFF'
return Any[]
end
Loading