Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new tests for syntax and ill-formed parser errors and fix... emm... errors #684

Merged
merged 12 commits into from
Nov 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,27 +20,39 @@ configuration is serializable.
- [#677]: Added methods `config()` and `config_mut()` to inspect and change the parser
configuration. Previous builder methods on `Reader` / `NsReader` was replaced by
direct access to fields of config using `reader.config_mut().<...>`.
- #[#684]: Added a method `Config::enable_all_checks` to turn on or off all
well-formedness checks.

### Bug Fixes

- [#622]: Fix wrong disregarding of not closed markup, such as lone `<`.
- [#684]: Fix incorrect position reported for `Error::IllFormed(DoubleHyphenInComment)`.
- [#684]: Fix incorrect position reported for `Error::IllFormed(MissingDoctypeName)`.

### Misc Changes

- [#675]: Minimum supported version of serde raised to 1.0.139
- [#675]: Rework the `quick_xml::Error` type to provide more accurate information:
- `Error::EndEventMismatch` replaced by `IllFormedError::MismatchedEnd` in some cases
- `Error::EndEventMismatch` replaced by `IllFormedError::UnmatchedEnd` in some cases
- `Error::EndEventMismatch` replaced by `IllFormedError::MismatchedEndTag` in some cases
- `Error::EndEventMismatch` replaced by `IllFormedError::UnmatchedEndTag` in some cases
- `Error::TextNotFound` was removed because not used
- `Error::UnexpectedBang` replaced by `SyntaxError`
- `Error::UnexpectedEof` replaced by `SyntaxError` in some cases
- `Error::UnexpectedEof` replaced by `IllFormedError` in some cases
- `Error::UnexpectedToken` replaced by `IllFormedError::DoubleHyphenInComment`
- `Error::XmlDeclWithoutVersion` replaced by `IllFormedError::MissingDeclVersion` (in [#684])
- `Error::EmptyDocType` replaced by `IllFormedError::MissingDoctypeName` (in [#684])
- [#684]: Changed positions reported for `SyntaxError`s: now they are always points
to the start of markup (i. e. to the `<` character) with error.
- [#684]: Now `<??>` parsed as `Event::PI` with empty content instead of raising
syntax error.
- [#684]: Now `<?xml?>` parsed as `Event::Decl` instead of `Event::PI`.

[#513]: https://github.com/tafia/quick-xml/issues/513
[#622]: https://github.com/tafia/quick-xml/issues/622
[#675]: https://github.com/tafia/quick-xml/pull/675
[#677]: https://github.com/tafia/quick-xml/pull/677
[#684]: https://github.com/tafia/quick-xml/pull/684


## 0.31.0 -- 2023-10-22
Expand Down
24 changes: 12 additions & 12 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2649,7 +2649,7 @@ where
/// |[`DeEvent::Start`]|`<any-tag>...</any-tag>` |Emits [`UnexpectedStart("any-tag")`](DeError::UnexpectedStart)
/// |[`DeEvent::End`] |`</tag>` |Returns an empty slice. The reader guarantee that tag will match the open one
/// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged, expects the `</tag>` after that
/// |[`DeEvent::Eof`] | |Emits [`InvalidXml(IllFormed(MissedEnd))`](DeError::InvalidXml)
/// |[`DeEvent::Eof`] | |Emits [`InvalidXml(IllFormed(MissingEndTag))`](DeError::InvalidXml)
///
/// [`Text`]: Event::Text
/// [`CData`]: Event::CData
Expand Down Expand Up @@ -3642,7 +3642,7 @@ mod tests {

match de.read_to_end(QName(b"tag")) {
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
assert_eq!(cause, IllFormedError::MissedEnd("tag".into()))
assert_eq!(cause, IllFormedError::MissingEndTag("tag".into()))
}
x => panic!(
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
Expand All @@ -3661,7 +3661,7 @@ mod tests {

match de.read_to_end(QName(b"tag")) {
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
assert_eq!(cause, IllFormedError::MissedEnd("tag".into()))
assert_eq!(cause, IllFormedError::MissingEndTag("tag".into()))
}
x => panic!(
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
Expand Down Expand Up @@ -3756,7 +3756,7 @@ mod tests {
fn read_string() {
match from_str::<String>(r#"</root>"#) {
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
assert_eq!(cause, IllFormedError::UnmatchedEnd("root".into()));
assert_eq!(cause, IllFormedError::UnmatchedEndTag("root".into()));
}
x => panic!(
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
Expand All @@ -3770,7 +3770,7 @@ mod tests {
match from_str::<String>(r#"<root></other>"#) {
Err(DeError::InvalidXml(Error::IllFormed(cause))) => assert_eq!(
cause,
IllFormedError::MismatchedEnd {
IllFormedError::MismatchedEndTag {
expected: "root".into(),
found: "other".into(),
}
Expand Down Expand Up @@ -4098,7 +4098,7 @@ mod tests {
assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
match de.next() {
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
assert_eq!(cause, IllFormedError::UnmatchedEnd("tag2".into()));
assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag2".into()));
}
x => panic!(
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
Expand Down Expand Up @@ -4241,7 +4241,7 @@ mod tests {
let mut de = make_de("</tag>");
match de.next() {
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
assert_eq!(cause, IllFormedError::UnmatchedEnd("tag".into()));
assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
}
x => panic!(
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
Expand Down Expand Up @@ -4320,7 +4320,7 @@ mod tests {
assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
match de.next() {
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
assert_eq!(cause, IllFormedError::UnmatchedEnd("tag".into()));
assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
}
x => panic!(
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
Expand Down Expand Up @@ -4352,7 +4352,7 @@ mod tests {
assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata ".into()));
match de.next() {
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
assert_eq!(cause, IllFormedError::UnmatchedEnd("tag".into()));
assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
}
x => panic!(
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
Expand Down Expand Up @@ -4458,7 +4458,7 @@ mod tests {
assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
match de.next() {
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
assert_eq!(cause, IllFormedError::UnmatchedEnd("tag".into()));
assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
}
x => panic!(
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
Expand Down Expand Up @@ -4488,7 +4488,7 @@ mod tests {
assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text".into()));
match de.next() {
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
assert_eq!(cause, IllFormedError::UnmatchedEnd("tag".into()));
assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
}
x => panic!(
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
Expand Down Expand Up @@ -4538,7 +4538,7 @@ mod tests {
assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into()));
match de.next() {
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
assert_eq!(cause, IllFormedError::UnmatchedEnd("tag".into()));
assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
}
x => panic!(
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
Expand Down
72 changes: 36 additions & 36 deletions src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,17 +71,36 @@ impl std::error::Error for SyntaxError {}
/// [well-formed]: https://www.w3.org/TR/xml11/#dt-wellformed
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum IllFormedError {
/// A `version` attribute was not found in an XML declaration or is not the
/// first attribute.
///
/// According to the [specification], the XML declaration (`<?xml ?>`) MUST contain
/// a `version` attribute and it MUST be the first attribute. This error indicates,
/// that the declaration does not contain attributes at all (if contains `None`)
/// or either `version` attribute is not present or not the first attribute in
/// the declaration. In the last case it contains the name of the found attribute.
///
/// [specification]: https://www.w3.org/TR/xml11/#sec-prolog-dtd
MissingDeclVersion(Option<String>),
/// A document type definition (DTD) does not contain a name of a root element.
///
/// According to the [specification], document type definition (`<!DOCTYPE foo>`)
/// MUST contain a name which defines a document type (`foo`). If that name
/// is missed, this error is returned.
///
/// [specification]: https://www.w3.org/TR/xml11/#NT-doctypedecl
MissingDoctypeName,
/// The end tag was not found during reading of a sub-tree of elements due to
/// encountering an EOF from the underlying reader. This error is returned from
/// [`Reader::read_to_end`].
///
/// [`Reader::read_to_end`]: crate::reader::Reader::read_to_end
MissedEnd(String),
MissingEndTag(String),
/// The specified end tag was encountered without corresponding open tag at the
/// same level of hierarchy
UnmatchedEnd(String),
UnmatchedEndTag(String),
/// The specified end tag does not match the start tag at that nesting level.
MismatchedEnd {
MismatchedEndTag {
/// Name of open tag, that is expected to be closed
expected: String,
/// Name of actually closed tag
Expand All @@ -103,15 +122,25 @@ pub enum IllFormedError {
impl fmt::Display for IllFormedError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::MissedEnd(tag) => write!(
Self::MissingDeclVersion(None) => {
write!(f, "an XML declaration does not contain `version` attribute")
}
Self::MissingDeclVersion(Some(attr)) => {
write!(f, "an XML declaration must start with `version` attribute, but in starts with `{}`", attr)
}
Self::MissingDoctypeName => write!(
f,
"`<!DOCTYPE>` declaration does not contain a name of a document type"
),
Self::MissingEndTag(tag) => write!(
f,
"start tag not closed: `</{}>` not found before end of input",
tag,
),
Self::UnmatchedEnd(tag) => {
Self::UnmatchedEndTag(tag) => {
write!(f, "close tag `</{}>` does not match any open tag", tag)
}
Self::MismatchedEnd { expected, found } => write!(
Self::MismatchedEndTag { expected, found } => write!(
f,
"expected `</{}>`, but `</{}>` was found",
expected, found,
Expand Down Expand Up @@ -143,25 +172,6 @@ pub enum Error {
///
/// [`encoding`]: index.html#encoding
NonDecodable(Option<Utf8Error>),
/// A `version` attribute was not found in an XML declaration or is not the
/// first attribute.
///
/// According to the [specification], the XML declaration (`<?xml ?>`) MUST contain
/// a `version` attribute and it MUST be the first attribute. This error indicates,
/// that the declaration does not contain attributes at all (if contains `None`)
/// or either `version` attribute is not present or not the first attribute in
/// the declaration. In the last case it contains the name of the found attribute.
///
/// [specification]: https://www.w3.org/TR/xml11/#sec-prolog-dtd
XmlDeclWithoutVersion(Option<String>),
/// A document type definition (DTD) does not contain a name of a root element.
///
/// According to the [specification], document type definition (`<!doctype foo>`)
/// MUST contain a name which defines a document type. If that name is missed,
/// this error is returned.
///
/// [specification]: https://www.w3.org/TR/xml11/#NT-doctypedecl
EmptyDocType,
/// Attribute parsing error
InvalidAttr(AttrError),
/// Escape error
Expand Down Expand Up @@ -189,7 +199,7 @@ pub enum Error {
impl Error {
pub(crate) fn missed_end(name: QName, decoder: Decoder) -> Self {
match decoder.decode(name.as_ref()) {
Ok(name) => IllFormedError::MissedEnd(name.into()).into(),
Ok(name) => IllFormedError::MissingEndTag(name.into()).into(),
Err(err) => err.into(),
}
}
Expand Down Expand Up @@ -261,16 +271,6 @@ impl fmt::Display for Error {
Error::IllFormed(e) => write!(f, "ill-formed document: {}", e),
Error::NonDecodable(None) => write!(f, "Malformed input, decoding impossible"),
Error::NonDecodable(Some(e)) => write!(f, "Malformed UTF-8 input: {}", e),
Error::XmlDeclWithoutVersion(None) => {
write!(f, "an XML declaration does not contain `version` attribute")
}
Error::XmlDeclWithoutVersion(Some(attr)) => {
write!(f, "an XML declaration must start with `version` attribute, but in starts with `{}`", attr)
}
Error::EmptyDocType => write!(
f,
"`<!DOCTYPE>` declaration does not contain a name of a document type"
),
Error::InvalidAttr(e) => write!(f, "error while parsing attribute: {}", e),
Error::EscapeError(e) => write!(f, "{}", e),
Error::UnknownPrefix(prefix) => {
Expand Down
18 changes: 10 additions & 8 deletions src/events/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ use std::ops::Deref;
use std::str::from_utf8;

use crate::encoding::Decoder;
use crate::errors::{Error, Result};
use crate::errors::{Error, IllFormedError, Result};
use crate::escape::{escape, partial_escape, unescape_with};
use crate::name::{LocalName, QName};
use crate::reader::is_whitespace;
Expand Down Expand Up @@ -391,12 +391,12 @@ impl<'a> BytesDecl<'a> {
/// In case of multiple attributes value of the first one is returned.
///
/// If version is missed in the declaration, or the first thing is not a version,
/// [`Error::XmlDeclWithoutVersion`] will be returned.
/// [`IllFormedError::MissingDeclVersion`] will be returned.
///
/// # Examples
///
/// ```
/// use quick_xml::Error;
/// use quick_xml::errors::{Error, IllFormedError};
/// use quick_xml::events::{BytesDecl, BytesStart};
///
/// // <?xml version='1.1'?>
Expand All @@ -410,21 +410,21 @@ impl<'a> BytesDecl<'a> {
/// // <?xml encoding='utf-8'?>
/// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
/// match decl.version() {
/// Err(Error::XmlDeclWithoutVersion(Some(key))) => assert_eq!(key, "encoding"),
/// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
/// _ => assert!(false),
/// }
///
/// // <?xml encoding='utf-8' version='1.1'?>
/// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0));
/// match decl.version() {
/// Err(Error::XmlDeclWithoutVersion(Some(key))) => assert_eq!(key, "encoding"),
/// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
/// _ => assert!(false),
/// }
///
/// // <?xml?>
/// let decl = BytesDecl::from_start(BytesStart::from_content("", 0));
/// match decl.version() {
/// Err(Error::XmlDeclWithoutVersion(None)) => {},
/// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))) => {},
/// _ => assert!(false),
/// }
/// ```
Expand All @@ -437,12 +437,14 @@ impl<'a> BytesDecl<'a> {
// first attribute was not "version"
Some(Ok(a)) => {
let found = from_utf8(a.key.as_ref())?.to_string();
Err(Error::XmlDeclWithoutVersion(Some(found)))
Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(
found,
))))
}
// error parsing attributes
Some(Err(e)) => Err(e.into()),
// no attributes
None => Err(Error::XmlDeclWithoutVersion(None)),
None => Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))),
}
}

Expand Down
10 changes: 9 additions & 1 deletion src/reader/buffered_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ macro_rules! impl_buffered_source {
buf.push(b'!');
self $(.$reader)? .consume(1);

let bang_type = BangType::new(self.peek_one() $(.$await)? ?)?;
let bang_type = BangType::new(self.peek_one() $(.$await)? ?, position)?;

loop {
match self $(.$reader)? .fill_buf() $(.$await)? {
Expand Down Expand Up @@ -139,6 +139,10 @@ macro_rules! impl_buffered_source {
}
}

// <!....EOF
// ^^^^^ - `buf` does not contains `<`, but we want to report error at `<`,
// so we move offset to it (+1 for `<`)
*position -= 1;
Err(bang_type.to_err())
}

Expand Down Expand Up @@ -182,6 +186,10 @@ macro_rules! impl_buffered_source {
};
}

// <.....EOF
// ^^^^^ - `buf` does not contains `<`, but we want to report error at `<`,
// so we move offset to it (+1 for `<`)
*position -= 1;
Err(Error::Syntax(SyntaxError::UnclosedTag))
}

Expand Down
Loading