From abe4ba0b2bd8b625fc50a16e7b7326939807ef73 Mon Sep 17 00:00:00 2001 From: David Overton Date: Wed, 27 Mar 2024 14:22:50 +1100 Subject: [PATCH 1/5] New default behaviour: attempt to use validator schema, fall back to sampling with default sample size of 10 --- CHANGELOG.md | 4 ++ .../src/introspection/validation_schema.rs | 69 ++++++++----------- crates/cli/src/lib.rs | 31 ++++----- 3 files changed, 48 insertions(+), 56 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 44872af3..3110224e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ This changelog documents the changes between release versions. ## [Unreleased] - Use separate schema files for each collection - Don't sample from collections that already have a schema +- New default behaviour for `update` CLI command: for each collection + - attempt to use validator schema if available + - if no validator schema, sample documents + - default sample size is 10 ## [0.0.2] - 2024-03-26 - Rename CLI plugin to ndc-mongodb ([PR #13](https://github.com/hasura/ndc-mongodb/pull/13)) diff --git a/crates/cli/src/introspection/validation_schema.rs b/crates/cli/src/introspection/validation_schema.rs index 9a276006..f52d0b6d 100644 --- a/crates/cli/src/introspection/validation_schema.rs +++ b/crates/cli/src/introspection/validation_schema.rs @@ -4,11 +4,10 @@ use configuration::{ schema::{self, Type}, Schema, WithName, }; -use futures_util::{StreamExt, TryStreamExt}; -use indexmap::IndexMap; +use futures_util::TryStreamExt; use mongodb::bson::from_bson; use mongodb_agent_common::schema::{get_property_description, Property, ValidatorSchema}; -use mongodb_support::{BsonScalarType, BsonType}; +use mongodb_support::BsonScalarType; use mongodb_agent_common::interface_types::{MongoAgentError, MongoConfig}; @@ -20,42 +19,34 @@ pub async fn get_metadata_from_validation_schema( config: &MongoConfig, ) -> Result, MongoAgentError> { let db = config.client.database(&config.database); - let collections_cursor = db.list_collections(None, None).await?; - - let schemas: Vec> = collections_cursor - .into_stream() - .map( - |collection_spec| -> Result, MongoAgentError> { - let collection_spec_value = collection_spec?; - let name = &collection_spec_value.name; - let schema_bson_option = collection_spec_value - .options - .validator - .as_ref() - .and_then(|x| x.get("$jsonSchema")); - - match schema_bson_option { - Some(schema_bson) => { - from_bson::(schema_bson.clone()).map_err(|err| { - MongoAgentError::BadCollectionSchema( - name.to_owned(), - schema_bson.clone(), - err, - ) - }) - } - None => Ok(ValidatorSchema { - bson_type: BsonType::Object, - description: None, - required: Vec::new(), - properties: IndexMap::new(), - }), - } - .map(|validator_schema| make_collection_schema(name, &validator_schema)) - }, - ) - .try_collect::>>() - .await?; + let mut collections_cursor = db.list_collections(None, None).await?; + + let mut schemas: Vec> = vec![]; + + while let Some(collection_spec) = collections_cursor.try_next().await? { + let name = &collection_spec.name; + let schema_bson_option = collection_spec + .options + .validator + .as_ref() + .and_then(|x| x.get("$jsonSchema")); + + match schema_bson_option { + Some(schema_bson) => { + let validator_schema = + from_bson::(schema_bson.clone()).map_err(|err| { + MongoAgentError::BadCollectionSchema( + name.to_owned(), + schema_bson.clone(), + err, + ) + })?; + let collection_schema = make_collection_schema(name, &validator_schema); + schemas.push(collection_schema); + } + None => {} + } + } Ok(WithName::into_map(schemas)) } diff --git a/crates/cli/src/lib.rs b/crates/cli/src/lib.rs index e8ce7838..89154140 100644 --- a/crates/cli/src/lib.rs +++ b/crates/cli/src/lib.rs @@ -10,8 +10,8 @@ use mongodb_agent_common::interface_types::MongoConfig; #[derive(Debug, Clone, Parser)] pub struct UpdateArgs { - #[arg(long = "sample-size", value_name = "N")] - sample_size: Option, + #[arg(long = "sample-size", value_name = "N", default_value = "10")] + sample_size: u32, } /// The command invoked by the user. @@ -36,19 +36,16 @@ pub async fn run(command: Command, context: &Context) -> anyhow::Result<()> { /// Update the configuration in the current directory by introspecting the database. async fn update(context: &Context, args: &UpdateArgs) -> anyhow::Result<()> { - let schemas = match args.sample_size { - None => introspection::get_metadata_from_validation_schema(&context.mongo_config).await?, - Some(sample_size) => { - let existing_schemas = configuration::list_existing_schemas(&context.path).await?; - introspection::sample_schema_from_db( - sample_size, - &context.mongo_config, - &existing_schemas, - ) - .await? - } - }; - configuration::write_schema_directory(&context.path, schemas).await?; - - Ok(()) + let schemas_from_json_validation = + introspection::get_metadata_from_validation_schema(&context.mongo_config).await?; + configuration::write_schema_directory(&context.path, schemas_from_json_validation).await?; + + let existing_schemas = configuration::list_existing_schemas(&context.path).await?; + let schemas_from_sampling = introspection::sample_schema_from_db( + args.sample_size, + &context.mongo_config, + &existing_schemas, + ) + .await?; + configuration::write_schema_directory(&context.path, schemas_from_sampling).await } From 5ad33cc717aa64d2241fbfe4ebdbcd26e0bb5076 Mon Sep 17 00:00:00 2001 From: David Overton Date: Wed, 27 Mar 2024 14:27:48 +1100 Subject: [PATCH 2/5] Add command line flag to disable using validator schema --- crates/cli/src/lib.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/crates/cli/src/lib.rs b/crates/cli/src/lib.rs index 89154140..1d371af2 100644 --- a/crates/cli/src/lib.rs +++ b/crates/cli/src/lib.rs @@ -10,8 +10,11 @@ use mongodb_agent_common::interface_types::MongoConfig; #[derive(Debug, Clone, Parser)] pub struct UpdateArgs { - #[arg(long = "sample-size", value_name = "N", default_value = "10")] + #[arg(long = "sample-size", value_name = "N", default_value_t = 10)] sample_size: u32, + + #[arg(long = "no-validator-schema", default_value_t = false)] + no_validator_schema: bool, } /// The command invoked by the user. @@ -36,9 +39,11 @@ pub async fn run(command: Command, context: &Context) -> anyhow::Result<()> { /// Update the configuration in the current directory by introspecting the database. async fn update(context: &Context, args: &UpdateArgs) -> anyhow::Result<()> { - let schemas_from_json_validation = - introspection::get_metadata_from_validation_schema(&context.mongo_config).await?; - configuration::write_schema_directory(&context.path, schemas_from_json_validation).await?; + if !args.no_validator_schema { + let schemas_from_json_validation = + introspection::get_metadata_from_validation_schema(&context.mongo_config).await?; + configuration::write_schema_directory(&context.path, schemas_from_json_validation).await?; + } let existing_schemas = configuration::list_existing_schemas(&context.path).await?; let schemas_from_sampling = introspection::sample_schema_from_db( From 17b2230b679ae599a1d1df73fd4eebc01c434036 Mon Sep 17 00:00:00 2001 From: David Overton Date: Wed, 27 Mar 2024 14:31:38 +1100 Subject: [PATCH 3/5] Update changelog --- CHANGELOG.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3110224e..b7c16553 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,11 +3,13 @@ This changelog documents the changes between release versions. ## [Unreleased] - Use separate schema files for each collection -- Don't sample from collections that already have a schema -- New default behaviour for `update` CLI command: for each collection - - attempt to use validator schema if available - - if no validator schema, sample documents - - default sample size is 10 +- Changes to `update` CLI command: + - new default behaviour: + - attempt to use validator schema if available + - if no validator schema then sample documents from the collection + - don't sample from collections that already have a schema + - if no --sample-size given on command line, default sample size is 10 + - new option --no-validator-schema to disable attempting to use validator schema ## [0.0.2] - 2024-03-26 - Rename CLI plugin to ndc-mongodb ([PR #13](https://github.com/hasura/ndc-mongodb/pull/13)) From 7d057a4c49457343d674ab285faa24fca89420e7 Mon Sep 17 00:00:00 2001 From: David Overton Date: Wed, 27 Mar 2024 14:47:50 +1100 Subject: [PATCH 4/5] Simplify code and make clippy happy --- .../src/introspection/validation_schema.rs | 21 +++++++------------ 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/crates/cli/src/introspection/validation_schema.rs b/crates/cli/src/introspection/validation_schema.rs index f52d0b6d..f9f47724 100644 --- a/crates/cli/src/introspection/validation_schema.rs +++ b/crates/cli/src/introspection/validation_schema.rs @@ -31,20 +31,13 @@ pub async fn get_metadata_from_validation_schema( .as_ref() .and_then(|x| x.get("$jsonSchema")); - match schema_bson_option { - Some(schema_bson) => { - let validator_schema = - from_bson::(schema_bson.clone()).map_err(|err| { - MongoAgentError::BadCollectionSchema( - name.to_owned(), - schema_bson.clone(), - err, - ) - })?; - let collection_schema = make_collection_schema(name, &validator_schema); - schemas.push(collection_schema); - } - None => {} + if let Some(schema_bson) = schema_bson_option { + let validator_schema = + from_bson::(schema_bson.clone()).map_err(|err| { + MongoAgentError::BadCollectionSchema(name.to_owned(), schema_bson.clone(), err) + })?; + let collection_schema = make_collection_schema(name, &validator_schema); + schemas.push(collection_schema); } } From b0e27da95b1c415076dff5d2bd29e64b6891bde7 Mon Sep 17 00:00:00 2001 From: David Overton Date: Wed, 27 Mar 2024 16:21:50 +1100 Subject: [PATCH 5/5] Link to PRs in changelog --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b7c16553..8a1543cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,8 +2,8 @@ This changelog documents the changes between release versions. ## [Unreleased] -- Use separate schema files for each collection -- Changes to `update` CLI command: +- Use separate schema files for each collection ([PR #14](https://github.com/hasura/ndc-mongodb/pull/14)) +- Changes to `update` CLI command ([PR #17](https://github.com/hasura/ndc-mongodb/pull/17)): - new default behaviour: - attempt to use validator schema if available - if no validator schema then sample documents from the collection