From a72431e4867ec7d5b38faa52860a45da6b183ff3 Mon Sep 17 00:00:00 2001 From: Abraham Egnor Date: Tue, 30 Jul 2024 13:13:33 -0400 Subject: [PATCH] RUST-1874 Add optional integration with `serde_path_to_error` (#488) --- Cargo.toml | 2 + README.md | 11 +++++ src/de/error.rs | 33 +++++++++++---- src/de/mod.rs | 28 ++++++++++--- src/document.rs | 4 +- src/lib.rs | 11 ++++- src/ser/error.rs | 43 +++++++++++++++++-- src/ser/mod.rs | 14 ++++++- src/tests/modules/ser.rs | 11 ++--- src/tests/serde.rs | 89 ++++++++++++++++++++++++++++++++++++++++ 10 files changed, 217 insertions(+), 29 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 7c013ff7..5a1ca73f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,6 +43,7 @@ chrono-0_4 = ["chrono"] uuid-1 = [] # if enabled, include API for interfacing with time 0.3 time-0_3 = [] +serde_path_to_error = ["dep:serde_path_to_error"] # if enabled, include serde_with interop. # should be used in conjunction with chrono-0_4 or uuid-0_8. # it's commented out here because Cargo implicitly adds a feature flag for @@ -69,6 +70,7 @@ serde_with = { version = "1.3.1", optional = true } serde_with-3 = { package = "serde_with", version = "3.1.0", optional = true } time = { version = "0.3.9", features = ["formatting", "parsing", "macros", "large-dates"] } bitvec = "1.0.1" +serde_path_to_error = { version = "0.1.16", optional = true } [target.'cfg(all(target_arch = "wasm32", target_os = "unknown"))'.dependencies] js-sys = "0.3" diff --git a/README.md b/README.md index 993bcc36..196cf7de 100644 --- a/README.md +++ b/README.md @@ -50,8 +50,11 @@ Note that if you are using `bson` through the `mongodb` crate, you do not need t | `chrono-0_4` | Enable support for v0.4 of the [`chrono`](https://docs.rs/chrono/0.4) crate in the public API. | n/a | no | | `uuid-0_8` | Enable support for v0.8 of the [`uuid`](https://docs.rs/uuid/0.8) crate in the public API. | n/a | no | | `uuid-1` | Enable support for v1.x of the [`uuid`](https://docs.rs/uuid/1.0) crate in the public API. | n/a | no | +| `time-0_3` | Enable support for v0.3 of the [`time`](https://docs.rs/time/0.3) crate in the public API. | n/a | no | | `serde_with` | Enable [`serde_with`](https://docs.rs/serde_with/1.x) 1.x integrations for `bson::DateTime` and `bson::Uuid`.| serde_with | no | | `serde_with-3` | Enable [`serde_with`](https://docs.rs/serde_with/3.x) 3.x integrations for `bson::DateTime` and `bson::Uuid`.| serde_with | no | +| `serde_path_to_error` | Enable support for error paths via integration with [`serde_path_to_error`](https://docs.rs/serde_path_to_err/latest). This is an unstable feature and any breaking changes to `serde_path_to_error` may affect usage of it via this feature. | serde_path_to_error | no | + ## Overview of the BSON Format BSON, short for Binary JSON, is a binary-encoded serialization of JSON-like documents. @@ -208,6 +211,14 @@ separate the "business logic" that operates over the data from the (de)serializa translates the data to/from its serialized form. This can lead to more clear and concise code that is also less error prone. +When serializing values that cannot be represented in BSON, or deserialzing from BSON that does +not match the format expected by the type, the default error will only report the specific field +that failed. To aid debugging, enabling the [`serde_path_to_error`](#feature-flags) feature will +[augment errors](https://docs.rs/bson/latest/bson/de/enum.Error.html#variant.WithPath) with the +full field path from root object to failing field. This feature does incur a small CPU and memory +overhead during (de)serialization and should be enabled with care in performance-sensitive +environments. + ### Working with datetimes The BSON format includes a datetime type, which is modeled in this crate by the diff --git a/src/de/error.rs b/src/de/error.rs index 44a32131..8b3c7920 100644 --- a/src/de/error.rs +++ b/src/de/error.rs @@ -36,6 +36,17 @@ pub enum Error { /// A message describing the error. message: String, }, + + #[cfg(feature = "serde_path_to_error")] + #[cfg_attr(docsrs, doc(cfg(feature = "serde_path_to_error")))] + #[non_exhaustive] + WithPath { + /// The path to the error. + path: serde_path_to_error::Path, + + /// The original error. + source: Box, + }, } impl Error { @@ -44,6 +55,13 @@ impl Error { message: msg.to_string(), } } + + #[cfg(feature = "serde_path_to_error")] + pub(crate) fn with_path(err: serde_path_to_error::Error) -> Self { + let path = err.path().clone(); + let source = Box::new(err.into_inner()); + Self::WithPath { path, source } + } } impl From for Error { @@ -66,19 +84,18 @@ impl From for Error { impl fmt::Display for Error { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - match *self { - Error::Io(ref inner) => inner.fmt(fmt), - Error::InvalidUtf8String(ref inner) => inner.fmt(fmt), - Error::UnrecognizedDocumentElementType { - ref key, - element_type, - } => write!( + match self { + Error::Io(inner) => inner.fmt(fmt), + Error::InvalidUtf8String(inner) => inner.fmt(fmt), + Error::UnrecognizedDocumentElementType { key, element_type } => write!( fmt, "unrecognized element type for key \"{}\": `{:#x}`", key, element_type ), Error::EndOfStream => fmt.write_str("end of stream"), - Error::DeserializationError { ref message } => message.fmt(fmt), + Error::DeserializationError { message } => message.fmt(fmt), + #[cfg(feature = "serde_path_to_error")] + Error::WithPath { path, source } => write!(fmt, "error at {}: {}", path, source), } } } diff --git a/src/de/mod.rs b/src/de/mod.rs index b6a95a09..73c108d4 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -89,7 +89,14 @@ where T: DeserializeOwned, { let de = Deserializer::new(bson); - Deserialize::deserialize(de) + #[cfg(feature = "serde_path_to_error")] + { + return serde_path_to_error::deserialize(de).map_err(Error::with_path); + } + #[cfg(not(feature = "serde_path_to_error"))] + { + Deserialize::deserialize(de) + } } /// Deserialize a `T` from the provided [`Bson`] value, configuring the underlying @@ -201,8 +208,7 @@ pub fn from_slice<'de, T>(bytes: &'de [u8]) -> Result where T: Deserialize<'de>, { - let deserializer = raw::Deserializer::new(bytes, false)?; - T::deserialize(deserializer) + from_raw(raw::Deserializer::new(bytes, false)?) } /// Deserialize an instance of type `T` from a slice of BSON bytes, replacing any invalid UTF-8 @@ -215,6 +221,18 @@ pub fn from_slice_utf8_lossy<'de, T>(bytes: &'de [u8]) -> Result where T: Deserialize<'de>, { - let deserializer = raw::Deserializer::new(bytes, true)?; - T::deserialize(deserializer) + from_raw(raw::Deserializer::new(bytes, true)?) +} + +pub(crate) fn from_raw<'de, T: Deserialize<'de>>( + deserializer: raw::Deserializer<'de>, +) -> Result { + #[cfg(feature = "serde_path_to_error")] + { + serde_path_to_error::deserialize(deserializer).map_err(Error::with_path) + } + #[cfg(not(feature = "serde_path_to_error"))] + { + T::deserialize(deserializer) + } } diff --git a/src/document.rs b/src/document.rs index 1c18446a..fa408877 100644 --- a/src/document.rs +++ b/src/document.rs @@ -9,7 +9,6 @@ use std::{ use ahash::RandomState; use indexmap::IndexMap; -use serde::Deserialize; use crate::{ bson::{Array, Bson, Timestamp}, @@ -547,8 +546,7 @@ impl Document { fn decode(reader: &mut R, utf_lossy: bool) -> crate::de::Result { let buf = crate::de::reader_to_vec(reader)?; - let deserializer = crate::de::RawDeserializer::new(&buf, utf_lossy)?; - Document::deserialize(deserializer) + crate::de::from_raw(crate::de::RawDeserializer::new(&buf, utf_lossy)?) } /// Attempts to deserialize a [`Document`] from a byte stream. diff --git a/src/lib.rs b/src/lib.rs index 8e70d2fe..69ad9db0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -65,7 +65,9 @@ //! | `uuid-0_8` | Enable support for v0.8 of the [`uuid`](https://docs.rs/uuid/0.8) crate in the public API. | no | //! | `uuid-1` | Enable support for v1.x of the [`uuid`](https://docs.rs/uuid/1.x) crate in the public API. | no | //! | `time-0_3` | Enable support for v0.3 of the [`time`](https://docs.rs/time/0.3) crate in the public API. | no | -//! | `serde_with` | Enable [`serde_with`](https://docs.rs/serde_with/latest) integrations for [`DateTime`] and [`Uuid`]. | no | +//! | `serde_with` | Enable [`serde_with`](https://docs.rs/serde_with/1.x) 1.x integrations for [`DateTime`] and [`Uuid`]. | no | +//! | `serde_with-3` | Enable [`serde_with`](https://docs.rs/serde_with/3.x) 3.x integrations for [`DateTime`] and [`Uuid`]. | no | +//! | `serde_path_to_error` | Enable support for error paths via integration with [`serde_path_to_error`](https://docs.rs/serde_path_to_err/latest). This is an unstable feature and any breaking changes to `serde_path_to_error` may affect usage of it via this feature. | no | //! //! ## BSON values //! @@ -213,6 +215,13 @@ //! data from the (de)serialization logic that translates the data to/from its serialized form. This //! can lead to more clear and concise code that is also less error prone. //! +//! When serializing values that cannot be represented in BSON, or deserialzing from BSON that does +//! not match the format expected by the type, the default error will only report the specific field +//! that failed. To aid debugging, enabling the `serde_path_to_error` feature will +//! [augment errors](crate::de::Error::WithPath) with the full field path from root object to +//! failing field. This feature does incur a small CPU and memory overhead during (de)serialization +//! and should be enabled with care in performance-sensitive environments. +//! //! ## Working with datetimes //! //! The BSON format includes a datetime type, which is modeled in this crate by the diff --git a/src/ser/error.rs b/src/ser/error.rs index ed355ab8..6f3b8877 100644 --- a/src/ser/error.rs +++ b/src/ser/error.rs @@ -27,6 +27,39 @@ pub enum Error { /// An unsigned integer type could not fit into a signed integer type. UnsignedIntegerExceededRange(u64), + + #[cfg(feature = "serde_path_to_error")] + #[cfg_attr(docsrs, doc(cfg(feature = "serde_path_to_error")))] + #[non_exhaustive] + WithPath { + /// The path to the error. + path: serde_path_to_error::Path, + + /// The original error. + source: Box, + }, +} + +impl Error { + #[cfg(feature = "serde_path_to_error")] + pub(crate) fn with_path(err: serde_path_to_error::Error) -> Self { + let path = err.path().clone(); + let source = Box::new(err.into_inner()); + Self::WithPath { path, source } + } + + #[cfg(test)] + pub(crate) fn strip_path(self) -> Self { + #[cfg(feature = "serde_path_to_error")] + match self { + Self::WithPath { path: _, source } => *source, + _ => self, + } + #[cfg(not(feature = "serde_path_to_error"))] + { + self + } + } } impl From for Error { @@ -37,13 +70,13 @@ impl From for Error { impl fmt::Display for Error { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - match *self { - Error::Io(ref inner) => inner.fmt(fmt), - Error::InvalidDocumentKey(ref key) => write!(fmt, "Invalid map key type: {}", key), + match self { + Error::Io(inner) => inner.fmt(fmt), + Error::InvalidDocumentKey(key) => write!(fmt, "Invalid map key type: {}", key), Error::InvalidCString(ref string) => { write!(fmt, "cstrings cannot contain null bytes: {:?}", string) } - Error::SerializationError { ref message } => message.fmt(fmt), + Error::SerializationError { message } => message.fmt(fmt), Error::UnsignedIntegerExceededRange(value) => write!( fmt, "BSON does not support unsigned integers. @@ -51,6 +84,8 @@ impl fmt::Display for Error { size.", value ), + #[cfg(feature = "serde_path_to_error")] + Error::WithPath { path, source } => write!(fmt, "error at {}: {}", path, source), } } } diff --git a/src/ser/mod.rs b/src/ser/mod.rs index d2faf636..10fb8037 100644 --- a/src/ser/mod.rs +++ b/src/ser/mod.rs @@ -117,6 +117,11 @@ where T: Serialize + ?Sized, { let ser = Serializer::new(); + #[cfg(feature = "serde_path_to_error")] + { + serde_path_to_error::serialize(value, ser).map_err(Error::with_path) + } + #[cfg(not(feature = "serde_path_to_error"))] value.serialize(ser) } @@ -197,7 +202,14 @@ where T: Serialize, { let mut serializer = raw::Serializer::new(); - value.serialize(&mut serializer)?; + #[cfg(feature = "serde_path_to_error")] + { + serde_path_to_error::serialize(value, &mut serializer).map_err(Error::with_path)?; + } + #[cfg(not(feature = "serde_path_to_error"))] + { + value.serialize(&mut serializer)?; + } Ok(serializer.into_vec()) } diff --git a/src/tests/modules/ser.rs b/src/tests/modules/ser.rs index 48aa6596..aa95651c 100644 --- a/src/tests/modules/ser.rs +++ b/src/tests/modules/ser.rs @@ -109,11 +109,8 @@ fn uint64_u2i() { let deser_min: u64 = from_bson(obj_min).unwrap(); assert_eq!(deser_min, u64::MIN); - let obj_max: ser::Result = to_bson(&u64::MAX); - assert_matches!( - obj_max, - Err(ser::Error::UnsignedIntegerExceededRange(u64::MAX)) - ); + let err: ser::Error = to_bson(&u64::MAX).unwrap_err().strip_path(); + assert_matches!(err, ser::Error::UnsignedIntegerExceededRange(u64::MAX)); } #[test] @@ -161,11 +158,11 @@ fn cstring_null_bytes_error() { fn verify_doc(doc: Document) { let mut vec = Vec::new(); assert!(matches!( - doc.to_writer(&mut vec).unwrap_err(), + doc.to_writer(&mut vec).unwrap_err().strip_path(), ser::Error::InvalidCString(_) )); assert!(matches!( - to_vec(&doc).unwrap_err(), + to_vec(&doc).unwrap_err().strip_path(), ser::Error::InvalidCString(_) )); } diff --git a/src/tests/serde.rs b/src/tests/serde.rs index 1720d7dc..595cc312 100644 --- a/src/tests/serde.rs +++ b/src/tests/serde.rs @@ -1116,3 +1116,92 @@ fn fuzz_regression_00() { let buf: &[u8] = &[227, 0, 35, 4, 2, 0, 255, 255, 255, 127, 255, 255, 255, 47]; let _ = crate::from_slice::(buf); } + +#[cfg(feature = "serde_path_to_error")] +mod serde_path_to_error { + use super::*; + + #[derive(Serialize, Deserialize, Debug)] + struct Foo { + one: Bar, + two: Bar, + } + + #[derive(Serialize, Deserialize, Debug)] + struct Bar { + value: u64, + } + + #[test] + fn de() { + let src = doc! { + "one": { + "value": 42, + }, + "two": { + "value": "hello", + }, + }; + let result: Result = crate::from_document(src); + assert!(result.is_err()); + match result.unwrap_err() { + crate::de::Error::WithPath { source: _, path } => { + assert_eq!("two.value", path.to_string()) + } + e => panic!("unexpected error: {:?}", e), + } + } + + #[test] + fn de_raw() { + let src = rawdoc! { + "one": { + "value": 42, + }, + "two": { + "value": "hello", + }, + } + .into_bytes(); + let result: Result = crate::from_slice(&src); + assert!(result.is_err()); + match result.unwrap_err() { + crate::de::Error::WithPath { source: _, path } => { + assert_eq!("two.value", path.to_string()) + } + e => panic!("unexpected error: {:?}", e), + } + } + + #[test] + fn ser() { + let src = Foo { + one: Bar { value: 42 }, + two: Bar { value: u64::MAX }, + }; + let result = crate::to_bson(&src); + assert!(result.is_err()); + match result.unwrap_err() { + crate::ser::Error::WithPath { source: _, path } => { + assert_eq!("two.value", path.to_string()) + } + e => panic!("unexpected error: {:?}", e), + } + } + + #[test] + fn ser_raw() { + let src = Foo { + one: Bar { value: 42 }, + two: Bar { value: u64::MAX }, + }; + let result = crate::to_vec(&src); + assert!(result.is_err()); + match result.unwrap_err() { + crate::ser::Error::WithPath { source: _, path } => { + assert_eq!("two.value", path.to_string()) + } + e => panic!("unexpected error: {:?}", e), + } + } +}