From 1dc65b3b7846ad5e1996baba3692cf3feac82bf3 Mon Sep 17 00:00:00 2001 From: Jan Schweizer Date: Tue, 7 Nov 2023 18:11:53 +0100 Subject: [PATCH 01/16] Add handling for unmanaged files to vacuum command --- .../deltalake-core/src/operations/vacuum.rs | 40 +++++++++++++++---- crates/deltalake-core/tests/command_vacuum.rs | 3 +- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/crates/deltalake-core/src/operations/vacuum.rs b/crates/deltalake-core/src/operations/vacuum.rs index 47f7c1d5c9..0f0a4e39ff 100644 --- a/crates/deltalake-core/src/operations/vacuum.rs +++ b/crates/deltalake-core/src/operations/vacuum.rs @@ -180,7 +180,6 @@ impl VacuumBuilder { }; let expired_tombstones = get_stale_files(&self.snapshot, retention_period, now_millis); - let valid_files = self.snapshot.file_paths_iter().collect::>(); let mut files_to_delete = vec![]; let mut file_sizes = vec![]; @@ -192,14 +191,35 @@ impl VacuumBuilder { .ok_or(DeltaTableError::NoMetadata)? .partition_columns; + let managed_files = self + .snapshot + .files() + .iter() + .map(|a| a.path.as_str()) + .chain( + self.snapshot + .all_tombstones() + .iter() + .map(|r| r.path.as_str()), + ) + .collect::>(); + while let Some(obj_meta) = all_files.next().await { // TODO should we allow NotFound here in case we have a temporary commit file in the list let obj_meta = obj_meta.map_err(DeltaTableError::from)?; - if valid_files.contains(&obj_meta.location) // file is still being tracked in table - || !expired_tombstones.contains(obj_meta.location.as_ref()) // file is not an expired tombstone - || is_hidden_directory(partition_columns, &obj_meta.location)? - { - continue; + let is_hidden = is_hidden_directory(partition_columns, &obj_meta.location)?; + + if managed_files.contains(obj_meta.location.as_ref()) { + if !expired_tombstones.contains(obj_meta.location.as_ref()) || is_hidden { + continue; + } + } else { + if now_millis - retention_period.num_milliseconds() + < obj_meta.last_modified.timestamp_millis() + || is_hidden + { + continue; + } } files_to_delete.push(obj_meta.location); @@ -357,8 +377,12 @@ impl VacuumPlan { /// deleted even if they'd normally be hidden. The _db_index directory contains (bloom filter) /// indexes and these must be deleted when the data they are tied to is deleted. fn is_hidden_directory(partition_columns: &[String], path: &Path) -> Result { - let path_name = path.to_string(); - Ok((path_name.starts_with('.') || path_name.starts_with('_')) + let is_hidden = path + .parts() + .any(|p| p.as_ref().starts_with('.') || p.as_ref().starts_with('_')); + + let path_name = path.as_ref(); + Ok(is_hidden && !path_name.starts_with("_delta_index") && !path_name.starts_with("_change_data") && !partition_columns diff --git a/crates/deltalake-core/tests/command_vacuum.rs b/crates/deltalake-core/tests/command_vacuum.rs index 0007f479d5..e21315e796 100644 --- a/crates/deltalake-core/tests/command_vacuum.rs +++ b/crates/deltalake-core/tests/command_vacuum.rs @@ -216,7 +216,6 @@ async fn test_partitions_included() { ); } -#[ignore] #[tokio::test] // files that are not managed by the delta log and have a last_modified greater // than the retention period should be deleted. Unmanaged files and directories @@ -276,7 +275,7 @@ async fn test_non_managed_files() { // Validate unmanaged files are deleted after the retention period let res = { - clock.tick(Duration::hours(1)); + clock.tick(Duration::days(7)); let (_, metrics) = DeltaOps(table) .vacuum() .with_clock(Arc::new(clock.clone())) From e59bb34587e85ca00024223fc61d7f28f884fe75 Mon Sep 17 00:00:00 2001 From: Jan Schweizer Date: Tue, 7 Nov 2023 18:55:40 +0100 Subject: [PATCH 02/16] collapse nested if block --- crates/deltalake-core/src/operations/vacuum.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/crates/deltalake-core/src/operations/vacuum.rs b/crates/deltalake-core/src/operations/vacuum.rs index 0f0a4e39ff..5499c0be92 100644 --- a/crates/deltalake-core/src/operations/vacuum.rs +++ b/crates/deltalake-core/src/operations/vacuum.rs @@ -213,13 +213,11 @@ impl VacuumBuilder { if !expired_tombstones.contains(obj_meta.location.as_ref()) || is_hidden { continue; } - } else { - if now_millis - retention_period.num_milliseconds() - < obj_meta.last_modified.timestamp_millis() - || is_hidden - { - continue; - } + } else if now_millis - retention_period.num_milliseconds() + < obj_meta.last_modified.timestamp_millis() + || is_hidden + { + continue; } files_to_delete.push(obj_meta.location); From 90b774127d6933000a127fe62c77b68fa4dd03b5 Mon Sep 17 00:00:00 2001 From: "R. Tyler Croy" Date: Tue, 7 Nov 2023 08:35:26 -0800 Subject: [PATCH 03/16] chore: upgrade to the latest dynamodb-lock crate The new version of this crate properly sets a lease duration such that the locks can actually expire --- crates/deltalake-core/Cargo.toml | 2 +- crates/deltalake-core/src/storage/s3.rs | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/crates/deltalake-core/Cargo.toml b/crates/deltalake-core/Cargo.toml index ce1c7490ad..b3a6178203 100644 --- a/crates/deltalake-core/Cargo.toml +++ b/crates/deltalake-core/Cargo.toml @@ -117,7 +117,7 @@ sqlparser = { version = "0.38", optional = true } fs_extra = { version = "1.3.0", optional = true } tempdir = { version = "0", optional = true } -dynamodb_lock = { version = "0", default-features = false, optional = true } +dynamodb_lock = { version = "0.6.0", default-features = false, optional = true } [dev-dependencies] dotenvy = "0" diff --git a/crates/deltalake-core/src/storage/s3.rs b/crates/deltalake-core/src/storage/s3.rs index 23e091dea5..7594d1b06c 100644 --- a/crates/deltalake-core/src/storage/s3.rs +++ b/crates/deltalake-core/src/storage/s3.rs @@ -3,7 +3,7 @@ use super::utils::str_is_truthy; use crate::table::builder::{s3_storage_options, str_option}; use bytes::Bytes; -use dynamodb_lock::{DynamoError, LockClient, LockItem, DEFAULT_MAX_RETRY_ACQUIRE_LOCK_ATTEMPTS}; +use dynamodb_lock::{DynamoError, LockClient, LockItem}; use futures::stream::BoxStream; use object_store::path::Path; use object_store::{ @@ -23,6 +23,7 @@ use std::time::Duration; use tokio::io::AsyncWrite; const STORE_NAME: &str = "DeltaS3ObjectStore"; +const DEFAULT_MAX_RETRY_ACQUIRE_LOCK_ATTEMPTS: u32 = 1_000; /// Error raised by storage lock client #[derive(thiserror::Error, Debug)] @@ -535,10 +536,11 @@ fn try_create_lock_client(options: &S3StorageOptions) -> Result rusoto_dynamodb::DynamoDbClient::new(options.region.clone()), }; - let lock_client = dynamodb_lock::DynamoDbLockClient::new( - dynamodb_client, - dynamodb_lock::DynamoDbOptions::from_map(options.extra_opts.clone()), - ); + let lock_client = dynamodb_lock::DynamoDbLockClient::for_region(options.region.clone()) + .with_client(dynamodb_client) + .with_options(dynamodb_lock::DynamoDbOptions::from_map( + options.extra_opts.clone(), + )); Ok(Some(S3LockClient { lock_client: Box::new(lock_client), })) From 809f645347e87baa74559eadde37851e31a028bc Mon Sep 17 00:00:00 2001 From: Thomas Peiselt Date: Thu, 9 Nov 2023 08:31:34 +0100 Subject: [PATCH 04/16] feat: default logstore implementation (#1742) # Description Introduce a `LogStore` abstraction to channel all log store reads and writes through a single place. This is supposed to allow implementations with more sophisticated locking mechanisms that do not rely on atomic rename semantics for the underlying object store. This does not change any functionality - it reorganizes read operations and commits on the delta commit log to be funneled through the respective methods of `LogStore`. ## Rationale The goal is to align the implementation of multi-cluster writes for Delta Lake on S3 with the one provided by the original `delta` library, enabling multi-cluster writes with some writers using Spark / Delta library and other writers using `delta-rs` For an overview of how it's done in delta, please see: 1. Delta [blog post](https://delta.io/blog/2022-05-18-multi-cluster-writes-to-delta-lake-storage-in-s3/) (high-level concept) 2. Associated Databricks [design doc](https://docs.google.com/document/d/1Gs4ZsTH19lMxth4BSdwlWjUNR-XhKHicDvBjd2RqNd8/edit#heading=h.mjjuxw9mcz9h) (detailed read) 3. [S3DynamoDbLogStore.java](https://github.com/delta-io/delta/blob/master/storage-s3-dynamodb/src/main/java/io/delta/storage/S3DynamoDBLogStore.java)(content warning: Java code behind this link) This approach requires readers of a delta table to "recover" unfinished commits from writers - as a result, reading and writing is combined in a single interface, which in this PR is modeled after [LogStore.java](https://github.com/delta-io/delta/blob/master/storage/src/main/java/io/delta/storage/LogStore.java). Currently in `delta-rs`, read path for commits is implemented directly in `DeltaTable`, and there's no mechanism to implement storage-specific behavior like interacting with DynamoDb. --------- Co-authored-by: Robert Pack <42610831+roeap@users.noreply.github.com> --- crates/deltalake-core/Cargo.toml | 2 +- .../src/delta_datafusion/mod.rs | 49 ++- crates/deltalake-core/src/lib.rs | 1 + .../src/logstore/default_logstore.rs | 89 +++++ crates/deltalake-core/src/logstore/mod.rs | 325 ++++++++++++++++ .../deltalake-core/src/operations/create.rs | 33 +- .../deltalake-core/src/operations/delete.rs | 28 +- .../src/operations/filesystem_check.rs | 24 +- crates/deltalake-core/src/operations/load.rs | 10 +- crates/deltalake-core/src/operations/merge.rs | 22 +- crates/deltalake-core/src/operations/mod.rs | 25 +- .../deltalake-core/src/operations/optimize.rs | 26 +- .../deltalake-core/src/operations/restore.rs | 29 +- .../src/operations/transaction/mod.rs | 90 +++-- .../src/operations/transaction/test_utils.rs | 22 +- .../deltalake-core/src/operations/update.rs | 24 +- .../deltalake-core/src/operations/vacuum.rs | 34 +- crates/deltalake-core/src/operations/write.rs | 23 +- .../deltalake-core/src/operations/writer.rs | 8 +- .../src/protocol/checkpoints.rs | 28 +- crates/deltalake-core/src/protocol/mod.rs | 12 +- crates/deltalake-core/src/storage/config.rs | 15 +- crates/deltalake-core/src/storage/mod.rs | 358 +----------------- crates/deltalake-core/src/storage/utils.rs | 2 +- crates/deltalake-core/src/table/builder.rs | 25 +- crates/deltalake-core/src/table/mod.rs | 120 +++--- crates/deltalake-core/src/table/state.rs | 4 +- crates/deltalake-core/src/test_utils.rs | 2 +- crates/deltalake-core/src/writer/json.rs | 8 +- crates/deltalake-core/src/writer/mod.rs | 9 +- .../deltalake-core/src/writer/record_batch.rs | 9 +- .../deltalake-core/src/writer/test_utils.rs | 2 +- .../deltalake-core/tests/command_optimize.rs | 12 +- crates/deltalake-core/tests/command_vacuum.rs | 2 +- .../tests/commit_info_format.rs | 2 +- crates/deltalake-core/tests/common/mod.rs | 16 +- crates/deltalake-core/tests/fs_common/mod.rs | 11 +- .../tests/integration_checkpoint.rs | 15 +- .../tests/integration_concurrent_writes.rs | 2 +- .../tests/integration_datafusion.rs | 17 +- .../tests/integration_object_store.rs | 9 +- .../deltalake-core/tests/integration_read.rs | 3 +- .../tests/repair_s3_rename_test.rs | 2 +- python/src/filesystem.rs | 3 +- python/src/lib.rs | 21 +- 45 files changed, 826 insertions(+), 747 deletions(-) create mode 100644 crates/deltalake-core/src/logstore/default_logstore.rs create mode 100644 crates/deltalake-core/src/logstore/mod.rs diff --git a/crates/deltalake-core/Cargo.toml b/crates/deltalake-core/Cargo.toml index b3a6178203..9fa259fa39 100644 --- a/crates/deltalake-core/Cargo.toml +++ b/crates/deltalake-core/Cargo.toml @@ -117,7 +117,7 @@ sqlparser = { version = "0.38", optional = true } fs_extra = { version = "1.3.0", optional = true } tempdir = { version = "0", optional = true } -dynamodb_lock = { version = "0.6.0", default-features = false, optional = true } +dynamodb_lock = { version = "0.6", default-features = false, optional = true } [dev-dependencies] dotenvy = "0" diff --git a/crates/deltalake-core/src/delta_datafusion/mod.rs b/crates/deltalake-core/src/delta_datafusion/mod.rs index 0147c250f9..38bf135739 100644 --- a/crates/deltalake-core/src/delta_datafusion/mod.rs +++ b/crates/deltalake-core/src/delta_datafusion/mod.rs @@ -71,8 +71,8 @@ use url::Url; use crate::errors::{DeltaResult, DeltaTableError}; use crate::kernel::{Add, DataType as DeltaDataType, Invariant, PrimitiveType}; +use crate::logstore::LogStoreRef; use crate::protocol::{self}; -use crate::storage::ObjectStoreRef; use crate::table::builder::ensure_table_uri; use crate::table::state::DeltaTableState; use crate::{open_table, open_table_with_storage_options, DeltaTable}; @@ -357,10 +357,10 @@ impl PruningStatistics for DeltaTable { // each delta table must register a specific object store, since paths are internally // handled relative to the table root. -pub(crate) fn register_store(store: ObjectStoreRef, env: Arc) { +pub(crate) fn register_store(store: LogStoreRef, env: Arc) { let object_store_url = store.object_store_url(); let url: &Url = object_store_url.as_ref(); - env.register_object_store(url, store); + env.register_object_store(url, store.object_store()); } pub(crate) fn logical_schema( @@ -467,7 +467,7 @@ pub struct DeltaScanConfig { #[derive(Debug)] pub(crate) struct DeltaScanBuilder<'a> { snapshot: &'a DeltaTableState, - object_store: ObjectStoreRef, + log_store: LogStoreRef, filter: Option, state: &'a SessionState, projection: Option<&'a Vec>, @@ -480,12 +480,12 @@ pub(crate) struct DeltaScanBuilder<'a> { impl<'a> DeltaScanBuilder<'a> { pub fn new( snapshot: &'a DeltaTableState, - object_store: ObjectStoreRef, + log_store: LogStoreRef, state: &'a SessionState, ) -> Self { DeltaScanBuilder { snapshot, - object_store, + log_store, filter: None, state, files: None, @@ -532,7 +532,7 @@ impl<'a> DeltaScanBuilder<'a> { Some(schema) => schema, None => { self.snapshot - .physical_arrow_schema(self.object_store.clone()) + .physical_arrow_schema(self.log_store.object_store()) .await? } }; @@ -632,7 +632,7 @@ impl<'a> DeltaScanBuilder<'a> { .create_physical_plan( self.state, FileScanConfig { - object_store_url: self.object_store.object_store_url(), + object_store_url: self.log_store.object_store_url(), file_schema, file_groups: file_groups.into_values().collect(), statistics: self.snapshot.datafusion_table_statistics(), @@ -647,9 +647,7 @@ impl<'a> DeltaScanBuilder<'a> { .await?; Ok(DeltaScan { - table_uri: ensure_table_uri(self.object_store.root_uri())? - .as_str() - .into(), + table_uri: ensure_table_uri(self.log_store.root_uri())?.as_str().into(), parquet_scan: scan, config, logical_schema, @@ -686,10 +684,10 @@ impl TableProvider for DeltaTable { filters: &[Expr], limit: Option, ) -> DataFusionResult> { - register_store(self.object_store(), session.runtime_env().clone()); + register_store(self.log_store(), session.runtime_env().clone()); let filter_expr = conjunction(filters.iter().cloned()); - let scan = DeltaScanBuilder::new(&self.state, self.object_store(), session) + let scan = DeltaScanBuilder::new(&self.state, self.log_store(), session) .with_projection(projection) .with_limit(limit) .with_filter(filter_expr) @@ -714,7 +712,7 @@ impl TableProvider for DeltaTable { /// A Delta table provider that enables additional metadata columns to be included during the scan pub struct DeltaTableProvider { snapshot: DeltaTableState, - store: ObjectStoreRef, + log_store: LogStoreRef, config: DeltaScanConfig, schema: Arc, } @@ -723,13 +721,13 @@ impl DeltaTableProvider { /// Build a DeltaTableProvider pub fn try_new( snapshot: DeltaTableState, - store: ObjectStoreRef, + log_store: LogStoreRef, config: DeltaScanConfig, ) -> DeltaResult { Ok(DeltaTableProvider { schema: logical_schema(&snapshot, &config)?, snapshot, - store, + log_store, config, }) } @@ -764,10 +762,10 @@ impl TableProvider for DeltaTableProvider { filters: &[Expr], limit: Option, ) -> DataFusionResult> { - register_store(self.store.clone(), session.runtime_env().clone()); + register_store(self.log_store.clone(), session.runtime_env().clone()); let filter_expr = conjunction(filters.iter().cloned()); - let scan = DeltaScanBuilder::new(&self.snapshot, self.store.clone(), session) + let scan = DeltaScanBuilder::new(&self.snapshot, self.log_store.clone(), session) .with_projection(projection) .with_limit(limit) .with_filter(filter_expr) @@ -1462,7 +1460,7 @@ fn join_batches_with_add_actions( /// Determine which files contain a record that statisfies the predicate pub(crate) async fn find_files_scan<'a>( snapshot: &DeltaTableState, - store: ObjectStoreRef, + log_store: LogStoreRef, state: &SessionState, expression: Expr, ) -> DeltaResult> { @@ -1489,7 +1487,7 @@ pub(crate) async fn find_files_scan<'a>( // Add path column used_columns.push(logical_schema.index_of(scan_config.file_column_name.as_ref().unwrap())?); - let scan = DeltaScanBuilder::new(snapshot, store.clone(), state) + let scan = DeltaScanBuilder::new(snapshot, log_store, state) .with_filter(Some(expression.clone())) .with_projection(Some(&used_columns)) .with_scan_config(scan_config) @@ -1580,7 +1578,7 @@ pub(crate) async fn scan_memory_table( /// Finds files in a snapshot that match the provided predicate. pub async fn find_files<'a>( snapshot: &DeltaTableState, - object_store: ObjectStoreRef, + log_store: LogStoreRef, state: &SessionState, predicate: Option, ) -> DeltaResult { @@ -1608,8 +1606,7 @@ pub async fn find_files<'a>( }) } else { let candidates = - find_files_scan(snapshot, object_store.clone(), state, predicate.to_owned()) - .await?; + find_files_scan(snapshot, log_store, state, predicate.to_owned()).await?; Ok(FindFiles { candidates, @@ -1924,7 +1921,8 @@ mod tests { .build(&table.state) .unwrap(); - let provider = DeltaTableProvider::try_new(table.state, table.storage, config).unwrap(); + let log_store = table.log_store(); + let provider = DeltaTableProvider::try_new(table.state, log_store, config).unwrap(); let ctx = SessionContext::new(); ctx.register_table("test", Arc::new(provider)).unwrap(); @@ -1983,7 +1981,8 @@ mod tests { let config = DeltaScanConfigBuilder::new().build(&table.state).unwrap(); - let provider = DeltaTableProvider::try_new(table.state, table.storage, config).unwrap(); + let log_store = table.log_store(); + let provider = DeltaTableProvider::try_new(table.state, log_store, config).unwrap(); let ctx = SessionContext::new(); ctx.register_table("test", Arc::new(provider)).unwrap(); diff --git a/crates/deltalake-core/src/lib.rs b/crates/deltalake-core/src/lib.rs index d683b906dd..644da2dcac 100644 --- a/crates/deltalake-core/src/lib.rs +++ b/crates/deltalake-core/src/lib.rs @@ -85,6 +85,7 @@ compile_error!( pub mod data_catalog; pub mod errors; pub mod kernel; +pub mod logstore; pub mod operations; pub mod protocol; pub mod schema; diff --git a/crates/deltalake-core/src/logstore/default_logstore.rs b/crates/deltalake-core/src/logstore/default_logstore.rs new file mode 100644 index 0000000000..715d810535 --- /dev/null +++ b/crates/deltalake-core/src/logstore/default_logstore.rs @@ -0,0 +1,89 @@ +//! Default implementation of [`LogStore`] for storage backends with atomic put-if-absent operation + +use std::sync::Arc; + +use bytes::Bytes; +#[cfg(feature = "datafusion")] +use datafusion::execution::object_store::ObjectStoreUrl; +use object_store::{path::Path, ObjectStore}; +use url::Url; + +use super::{LogStore, LogStoreConfig}; +use crate::{ + operations::transaction::TransactionError, + storage::{ + config::{self, StorageOptions}, + ObjectStoreRef, + }, + DeltaResult, +}; + +/// Default [`LogStore`] implementation +#[derive(Debug, Clone)] +pub struct DefaultLogStore { + pub(crate) storage: Arc, + config: LogStoreConfig, +} + +impl DefaultLogStore { + /// Create a new instance of [`DefaultLogStore`] + /// + /// # Arguments + /// + /// * `storage` - A shared reference to an [`object_store::ObjectStore`] with "/" pointing at delta table root (i.e. where `_delta_log` is located). + /// * `location` - A url corresponding to the storage location of `storage`. + pub fn new(storage: ObjectStoreRef, config: LogStoreConfig) -> Self { + Self { storage, config } + } + + /// Create log store + pub fn try_new(location: Url, options: impl Into + Clone) -> DeltaResult { + let mut options = options.into(); + let storage = config::configure_store(&location, &mut options)?; + Ok(Self { + storage: Arc::new(storage), + config: LogStoreConfig { location, options }, + }) + } +} + +#[async_trait::async_trait] +impl LogStore for DefaultLogStore { + async fn read_commit_entry(&self, version: i64) -> DeltaResult { + super::read_commit_entry(self.storage.as_ref(), version).await + } + + /// Tries to commit a prepared commit file. Returns [`TransactionError`] + /// if the given `version` already exists. The caller should handle the retry logic itself. + /// This is low-level transaction API. If user does not want to maintain the commit loop then + /// the `DeltaTransaction.commit` is desired to be used as it handles `try_commit_transaction` + /// with retry logic. + async fn write_commit_entry( + &self, + version: i64, + tmp_commit: &Path, + ) -> Result<(), TransactionError> { + super::write_commit_entry(self.storage.as_ref(), version, tmp_commit).await + } + + async fn get_latest_version(&self, current_version: i64) -> DeltaResult { + super::get_latest_version(self, current_version).await + } + + fn object_store(&self) -> Arc { + self.storage.clone() + } + + fn to_uri(&self, location: &Path) -> String { + super::to_uri(&self.config.location, location) + } + + #[cfg(feature = "datafusion")] + fn object_store_url(&self) -> ObjectStoreUrl { + super::object_store_url(&self.config.location) + } + + fn config(&self) -> &LogStoreConfig { + &self.config + } +} diff --git a/crates/deltalake-core/src/logstore/mod.rs b/crates/deltalake-core/src/logstore/mod.rs new file mode 100644 index 0000000000..7f1009b1de --- /dev/null +++ b/crates/deltalake-core/src/logstore/mod.rs @@ -0,0 +1,325 @@ +//! Delta log store. +use futures::StreamExt; +use lazy_static::lazy_static; +use regex::Regex; +use serde::{ + de::{Error, SeqAccess, Visitor}, + ser::SerializeSeq, + Deserialize, Serialize, +}; +use std::{cmp::max, collections::HashMap, sync::Arc}; +use url::Url; + +use crate::{ + errors::DeltaResult, + operations::transaction::TransactionError, + protocol::{get_last_checkpoint, ProtocolError}, + storage::{commit_uri_from_version, config::StorageOptions}, + DeltaTableError, +}; +use bytes::Bytes; +use log::debug; +use object_store::{ + path::Path, Error as ObjectStoreError, ObjectStore, Result as ObjectStoreResult, +}; + +#[cfg(feature = "datafusion")] +use datafusion::datasource::object_store::ObjectStoreUrl; + +pub mod default_logstore; + +/// Sharable reference to [`LogStore`] +pub type LogStoreRef = Arc; + +lazy_static! { + static ref DELTA_LOG_PATH: Path = Path::from("_delta_log"); +} + +/// Configuration parameters for a log store +#[derive(Debug, Clone)] +pub struct LogStoreConfig { + /// url corresponding to the storage location. + pub location: Url, + /// Options used for configuring backend storage + pub options: StorageOptions, +} + +/// Trait for critical operations required to read and write commit entries in Delta logs. +/// +/// The correctness is predicated on the atomicity and durability guarantees of +/// the implementation of this interface. Specifically, +/// +/// - Atomic visibility: Any commit created via `write_commit_entry` must become visible atomically. +/// - Mutual exclusion: Only one writer must be able to create a commit for a specific version. +/// - Consistent listing: Once a commit entry for version `v` has been written, any future call to +/// `get_latest_version` must return a version >= `v`, i.e. the underlying file system entry must +/// become visible immediately. +#[async_trait::async_trait] +pub trait LogStore: Sync + Send { + /// Read data for commit entry with the given version. + async fn read_commit_entry(&self, version: i64) -> DeltaResult; + + /// Write list of actions as delta commit entry for given version. + /// + /// This operation can be retried with a higher version in case the write + /// fails with [`TransactionError::VersionAlreadyExists`]. + async fn write_commit_entry( + &self, + version: i64, + tmp_commit: &Path, + ) -> Result<(), TransactionError>; + + /// Find latest version currently stored in the delta log. + async fn get_latest_version(&self, start_version: i64) -> DeltaResult; + + /// Get underlying object store. + fn object_store(&self) -> Arc; + + /// [Path] to Delta log + fn to_uri(&self, location: &Path) -> String; + + /// Get fully qualified uri for table root + fn root_uri(&self) -> String { + self.to_uri(&Path::from("")) + } + + /// [Path] to Delta log + fn log_path(&self) -> &Path { + &DELTA_LOG_PATH + } + + /// Check if the location is a delta table location + async fn is_delta_table_location(&self) -> ObjectStoreResult { + // TODO We should really be using HEAD here, but this fails in windows tests + let object_store = self.object_store(); + let mut stream = object_store.list(Some(self.log_path())).await?; + if let Some(res) = stream.next().await { + match res { + Ok(_) => Ok(true), + Err(ObjectStoreError::NotFound { .. }) => Ok(false), + Err(err) => Err(err), + } + } else { + Ok(false) + } + } + + #[cfg(feature = "datafusion")] + /// Generate a unique enough url to identify the store in datafusion. + /// The DF object store registry only cares about the scheme and the host of the url for + /// registering/fetching. In our case the scheme is hard-coded to "delta-rs", so to get a unique + /// host we convert the location from this `LogStore` to a valid name, combining the + /// original scheme, host and path with invalid characters replaced. + fn object_store_url(&self) -> ObjectStoreUrl; + + /// Get configuration representing configured log store. + fn config(&self) -> &LogStoreConfig; +} + +// TODO: maybe a bit of a hack, required to `#[derive(Debug)]` for the operation builders +impl std::fmt::Debug for dyn LogStore + '_ { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "LogStore({})", self.root_uri()) + } +} + +impl Serialize for LogStoreConfig { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + let mut seq = serializer.serialize_seq(None)?; + seq.serialize_element(&self.location.to_string())?; + seq.serialize_element(&self.options.0)?; + seq.end() + } +} + +impl<'de> Deserialize<'de> for LogStoreConfig { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + struct LogStoreConfigVisitor {} + + impl<'de> Visitor<'de> for LogStoreConfigVisitor { + type Value = LogStoreConfig; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("struct LogStoreConfig") + } + + fn visit_seq(self, mut seq: A) -> Result + where + A: SeqAccess<'de>, + { + let location_str: String = seq + .next_element()? + .ok_or_else(|| A::Error::invalid_length(0, &self))?; + let options: HashMap = seq + .next_element()? + .ok_or_else(|| A::Error::invalid_length(0, &self))?; + let location = Url::parse(&location_str).unwrap(); + Ok(LogStoreConfig { + location, + options: options.into(), + }) + } + } + + deserializer.deserialize_seq(LogStoreConfigVisitor {}) + } +} + +lazy_static! { + static ref DELTA_LOG_REGEX: Regex = Regex::new(r"(\d{20})\.(json|checkpoint).*$").unwrap(); +} + +fn to_uri(root: &Url, location: &Path) -> String { + match root.scheme() { + "file" => { + #[cfg(windows)] + let uri = format!( + "{}/{}", + root.as_ref().trim_end_matches('/'), + location.as_ref() + ) + .replace("file:///", ""); + #[cfg(unix)] + let uri = format!( + "{}/{}", + root.as_ref().trim_end_matches('/'), + location.as_ref() + ) + .replace("file://", ""); + uri + } + _ => { + if location.as_ref().is_empty() || location.as_ref() == "/" { + root.as_ref().to_string() + } else { + format!("{}/{}", root.as_ref(), location.as_ref()) + } + } + } +} + +#[cfg(feature = "datafusion")] +fn object_store_url(location: &Url) -> ObjectStoreUrl { + // we are certain, that the URL can be parsed, since + // we make sure when we are parsing the table uri + + use object_store::path::DELIMITER; + ObjectStoreUrl::parse(format!( + "delta-rs://{}-{}{}", + location.scheme(), + location.host_str().unwrap_or("-"), + location.path().replace(DELIMITER, "-").replace(':', "-") + )) + .expect("Invalid object store url.") +} + +/// Extract version from a file name in the delta log +pub fn extract_version_from_filename(name: &str) -> Option { + DELTA_LOG_REGEX + .captures(name) + .map(|captures| captures.get(1).unwrap().as_str().parse().unwrap()) +} + +async fn get_latest_version(log_store: &dyn LogStore, current_version: i64) -> DeltaResult { + let version_start = match get_last_checkpoint(log_store).await { + Ok(last_check_point) => last_check_point.version, + Err(ProtocolError::CheckpointNotFound) => { + // no checkpoint + -1 + } + Err(e) => { + return Err(DeltaTableError::from(e)); + } + }; + + debug!("latest checkpoint version: {version_start}"); + + let version_start = max(current_version, version_start); + + // list files to find max version + let version = async { + let mut max_version: i64 = version_start; + let prefix = Some(log_store.log_path()); + let offset_path = commit_uri_from_version(max_version); + let object_store = log_store.object_store(); + let mut files = object_store.list_with_offset(prefix, &offset_path).await?; + + while let Some(obj_meta) = files.next().await { + let obj_meta = obj_meta?; + if let Some(log_version) = extract_version_from_filename(obj_meta.location.as_ref()) { + max_version = max(max_version, log_version); + // also cache timestamp for version, for faster time-travel + // TODO: temporarily disabled because `version_timestamp` is not available in the [`LogStore`] + // self.version_timestamp + // .insert(log_version, obj_meta.last_modified.timestamp()); + } + } + + if max_version < 0 { + return Err(DeltaTableError::not_a_table(log_store.root_uri())); + } + + Ok::(max_version) + } + .await?; + Ok(version) +} + +async fn read_commit_entry(storage: &dyn ObjectStore, version: i64) -> DeltaResult { + let commit_uri = commit_uri_from_version(version); + let data = storage.get(&commit_uri).await?.bytes().await?; + Ok(data) +} + +async fn write_commit_entry( + storage: &dyn ObjectStore, + version: i64, + tmp_commit: &Path, +) -> Result<(), TransactionError> { + // move temporary commit file to delta log directory + // rely on storage to fail if the file already exists - + storage + .rename_if_not_exists(tmp_commit, &commit_uri_from_version(version)) + .await + .map_err(|err| -> TransactionError { + match err { + ObjectStoreError::AlreadyExists { .. } => { + TransactionError::VersionAlreadyExists(version) + } + _ => TransactionError::from(err), + } + })?; + Ok(()) +} + +#[cfg(feature = "datafusion")] +#[cfg(test)] +mod tests { + use url::Url; + + #[tokio::test] + async fn test_unique_object_store_url() { + for (location_1, location_2) in [ + // Same scheme, no host, different path + ("file:///path/to/table_1", "file:///path/to/table_2"), + // Different scheme/host, same path + ("s3://my_bucket/path/to/table_1", "file:///path/to/table_1"), + // Same scheme, different host, same path + ("s3://bucket_1/table_1", "s3://bucket_2/table_1"), + ] { + let url_1 = Url::parse(location_1).unwrap(); + let url_2 = Url::parse(location_2).unwrap(); + + assert_ne!( + super::object_store_url(&url_1).as_str(), + super::object_store_url(&url_2).as_str(), + ); + } + } +} diff --git a/crates/deltalake-core/src/operations/create.rs b/crates/deltalake-core/src/operations/create.rs index 1dc9fdf8b2..71398faf97 100644 --- a/crates/deltalake-core/src/operations/create.rs +++ b/crates/deltalake-core/src/operations/create.rs @@ -11,8 +11,8 @@ use super::transaction::commit; use super::{MAX_SUPPORTED_READER_VERSION, MAX_SUPPORTED_WRITER_VERSION}; use crate::errors::{DeltaResult, DeltaTableError}; use crate::kernel::{Action, DataType, Metadata, Protocol, StructField, StructType}; +use crate::logstore::{LogStore, LogStoreRef}; use crate::protocol::{DeltaOperation, SaveMode}; -use crate::storage::DeltaObjectStore; use crate::table::builder::ensure_table_uri; use crate::table::config::DeltaConfigKey; use crate::table::DeltaTableMetaData; @@ -55,7 +55,7 @@ pub struct CreateBuilder { partition_columns: Option>, storage_options: Option>, actions: Vec, - object_store: Option>, + log_store: Option, configuration: HashMap>, metadata: Option>, } @@ -78,7 +78,7 @@ impl CreateBuilder { partition_columns: None, storage_options: None, actions: Default::default(), - object_store: None, + log_store: None, configuration: Default::default(), metadata: Default::default(), } @@ -198,9 +198,9 @@ impl CreateBuilder { self } - /// Provide a [`DeltaObjectStore`] instance, that points at table location - pub fn with_object_store(mut self, object_store: Arc) -> Self { - self.object_store = Some(object_store); + /// Provide a [`LogStore`] instance, that points at table location + pub fn with_log_store(mut self, log_store: Arc) -> Self { + self.log_store = Some(log_store); self } @@ -219,12 +219,10 @@ impl CreateBuilder { return Err(CreateError::MissingSchema.into()); } - let (storage_url, table) = if let Some(object_store) = self.object_store { + let (storage_url, table) = if let Some(log_store) = self.log_store { ( - ensure_table_uri(object_store.root_uri())? - .as_str() - .to_string(), - DeltaTable::new(object_store, Default::default()), + ensure_table_uri(log_store.root_uri())?.as_str().to_string(), + DeltaTable::new(log_store, Default::default()), ) } else { let storage_url = ensure_table_uri(self.location.ok_or(CreateError::MissingLocation)?)?; @@ -293,7 +291,8 @@ impl std::future::IntoFuture for CreateBuilder { Box::pin(async move { let mode = this.mode.clone(); let (mut table, actions, operation) = this.into_table_and_actions()?; - let table_state = if table.object_store().is_delta_table_location().await? { + let log_store = table.log_store(); + let table_state = if log_store.is_delta_table_location().await? { match mode { SaveMode::ErrorIfExists => return Err(CreateError::TableAlreadyExists.into()), SaveMode::Append => return Err(CreateError::AppendNotAllowed.into()), @@ -311,7 +310,7 @@ impl std::future::IntoFuture for CreateBuilder { }; let version = commit( - table.object_store().as_ref(), + table.log_store.as_ref(), &actions, operation, table_state, @@ -443,11 +442,11 @@ mod tests { assert_eq!(table.version(), 0); let first_id = table.get_metadata().unwrap().id.clone(); - let object_store = table.object_store(); + let log_store = table.log_store; // Check an error is raised when a table exists at location let table = CreateBuilder::new() - .with_object_store(object_store.clone()) + .with_log_store(log_store.clone()) .with_columns(schema.fields().clone()) .with_save_mode(SaveMode::ErrorIfExists) .await; @@ -455,7 +454,7 @@ mod tests { // Check current table is returned when ignore option is chosen. let table = CreateBuilder::new() - .with_object_store(object_store.clone()) + .with_log_store(log_store.clone()) .with_columns(schema.fields().clone()) .with_save_mode(SaveMode::Ignore) .await @@ -464,7 +463,7 @@ mod tests { // Check table is overwritten let table = CreateBuilder::new() - .with_object_store(object_store.clone()) + .with_log_store(log_store) .with_columns(schema.fields().iter().cloned()) .with_save_mode(SaveMode::Overwrite) .await diff --git a/crates/deltalake-core/src/operations/delete.rs b/crates/deltalake-core/src/operations/delete.rs index 98e4bd1ebe..bd361c9707 100644 --- a/crates/deltalake-core/src/operations/delete.rs +++ b/crates/deltalake-core/src/operations/delete.rs @@ -21,6 +21,7 @@ use std::collections::HashMap; use std::sync::Arc; use std::time::{Instant, SystemTime, UNIX_EPOCH}; +use crate::logstore::LogStoreRef; use datafusion::execution::context::{SessionContext, SessionState}; use datafusion::physical_expr::create_physical_expr; use datafusion::physical_plan::filter::FilterExec; @@ -40,7 +41,6 @@ use crate::kernel::{Action, Add, Remove}; use crate::operations::transaction::commit; use crate::operations::write::write_execution_plan; use crate::protocol::DeltaOperation; -use crate::storage::{DeltaObjectStore, ObjectStoreRef}; use crate::table::state::DeltaTableState; use crate::DeltaTable; @@ -54,7 +54,7 @@ pub struct DeleteBuilder { /// A snapshot of the table's state snapshot: DeltaTableState, /// Delta object store for handling data files - store: Arc, + log_store: LogStoreRef, /// Datafusion session state relevant for executing the input plan state: Option, /// Properties passed to underlying parquet writer for when files are rewritten @@ -84,11 +84,11 @@ pub struct DeleteMetrics { impl DeleteBuilder { /// Create a new [`DeleteBuilder`] - pub fn new(object_store: ObjectStoreRef, snapshot: DeltaTableState) -> Self { + pub fn new(log_store: LogStoreRef, snapshot: DeltaTableState) -> Self { Self { predicate: None, snapshot, - store: object_store, + log_store, state: None, app_metadata: None, writer_properties: None, @@ -125,7 +125,7 @@ impl DeleteBuilder { async fn excute_non_empty_expr( snapshot: &DeltaTableState, - object_store: ObjectStoreRef, + log_store: LogStoreRef, state: &SessionState, expression: &Expr, metrics: &mut DeleteMetrics, @@ -144,7 +144,7 @@ async fn excute_non_empty_expr( .partition_columns .clone(); - let scan = DeltaScanBuilder::new(snapshot, object_store.clone(), state) + let scan = DeltaScanBuilder::new(snapshot, log_store.clone(), state) .with_files(rewrite) .build() .await?; @@ -167,7 +167,7 @@ async fn excute_non_empty_expr( state.clone(), filter.clone(), table_partition_cols.clone(), - object_store.clone(), + log_store.object_store(), Some(snapshot.table_config().target_file_size() as usize), None, writer_properties, @@ -187,7 +187,7 @@ async fn excute_non_empty_expr( async fn execute( predicate: Option, - object_store: ObjectStoreRef, + log_store: LogStoreRef, snapshot: &DeltaTableState, state: SessionState, writer_properties: Option, @@ -197,7 +197,7 @@ async fn execute( let mut metrics = DeleteMetrics::default(); let scan_start = Instant::now(); - let candidates = find_files(snapshot, object_store.clone(), &state, predicate.clone()).await?; + let candidates = find_files(snapshot, log_store.clone(), &state, predicate.clone()).await?; metrics.scan_time_ms = Instant::now().duration_since(scan_start).as_micros(); let predicate = predicate.unwrap_or(Expr::Literal(ScalarValue::Boolean(Some(true)))); @@ -208,7 +208,7 @@ async fn execute( let write_start = Instant::now(); let add = excute_non_empty_expr( snapshot, - object_store.clone(), + log_store.clone(), &state, &predicate, &mut metrics, @@ -254,7 +254,7 @@ async fn execute( predicate: Some(fmt_expr_to_sql(&predicate)?), }; version = commit( - object_store.as_ref(), + log_store.as_ref(), &actions, operation, snapshot, @@ -278,7 +278,7 @@ impl std::future::IntoFuture for DeleteBuilder { let session = SessionContext::new(); // If a user provides their own their DF state then they must register the store themselves - register_store(this.store.clone(), session.runtime_env()); + register_store(this.log_store.clone(), session.runtime_env()); session.state() }); @@ -295,7 +295,7 @@ impl std::future::IntoFuture for DeleteBuilder { let ((actions, version), metrics) = execute( predicate, - this.store.clone(), + this.log_store.clone(), &this.snapshot, state, this.writer_properties, @@ -305,7 +305,7 @@ impl std::future::IntoFuture for DeleteBuilder { this.snapshot .merge(DeltaTableState::from_actions(actions, version)?, true, true); - let table = DeltaTable::new_with_state(this.store, this.snapshot); + let table = DeltaTable::new_with_state(this.log_store, this.snapshot); Ok((table, metrics)) }) diff --git a/crates/deltalake-core/src/operations/filesystem_check.rs b/crates/deltalake-core/src/operations/filesystem_check.rs index 31b261c4cb..b79f22b1f4 100644 --- a/crates/deltalake-core/src/operations/filesystem_check.rs +++ b/crates/deltalake-core/src/operations/filesystem_check.rs @@ -14,7 +14,6 @@ use std::collections::HashMap; use std::fmt::Debug; -use std::sync::Arc; use std::time::SystemTime; use std::time::UNIX_EPOCH; @@ -27,9 +26,9 @@ use url::{ParseError, Url}; use crate::errors::{DeltaResult, DeltaTableError}; use crate::kernel::{Action, Add, Remove}; +use crate::logstore::LogStoreRef; use crate::operations::transaction::commit; use crate::protocol::DeltaOperation; -use crate::storage::DeltaObjectStore; use crate::table::state::DeltaTableState; use crate::DeltaTable; @@ -40,7 +39,7 @@ pub struct FileSystemCheckBuilder { /// A snapshot of the to-be-checked table's state snapshot: DeltaTableState, /// Delta object store for handling data files - store: Arc, + log_store: LogStoreRef, /// Don't remove actions to the table log. Just determine which files can be removed dry_run: bool, } @@ -56,7 +55,7 @@ pub struct FileSystemCheckMetrics { struct FileSystemCheckPlan { /// Delta object store for handling data files - store: Arc, + log_store: LogStoreRef, /// Files that no longer exists in undlying ObjectStore but have active add actions pub files_to_remove: Vec, } @@ -74,10 +73,10 @@ fn is_absolute_path(path: &str) -> DeltaResult { impl FileSystemCheckBuilder { /// Create a new [`FileSystemCheckBuilder`] - pub fn new(store: Arc, state: DeltaTableState) -> Self { + pub fn new(log_store: LogStoreRef, state: DeltaTableState) -> Self { FileSystemCheckBuilder { snapshot: state, - store, + log_store, dry_run: false, } } @@ -91,7 +90,7 @@ impl FileSystemCheckBuilder { async fn create_fsck_plan(&self) -> DeltaResult { let mut files_relative: HashMap<&str, &Add> = HashMap::with_capacity(self.snapshot.files().len()); - let store = self.store.clone(); + let log_store = self.log_store.clone(); for active in self.snapshot.files() { if is_absolute_path(&active.path)? { @@ -103,7 +102,8 @@ impl FileSystemCheckBuilder { } } - let mut files = self.store.list(None).await?; + let object_store = log_store.object_store(); + let mut files = object_store.list(None).await?; while let Some(result) = files.next().await { let file = result?; files_relative.remove(file.location.as_ref()); @@ -120,7 +120,7 @@ impl FileSystemCheckBuilder { Ok(FileSystemCheckPlan { files_to_remove, - store, + log_store, }) } } @@ -156,7 +156,7 @@ impl FileSystemCheckPlan { } commit( - self.store.as_ref(), + self.log_store.as_ref(), &actions, DeltaOperation::FileSystemCheck {}, snapshot, @@ -183,7 +183,7 @@ impl std::future::IntoFuture for FileSystemCheckBuilder { let plan = this.create_fsck_plan().await?; if this.dry_run { return Ok(( - DeltaTable::new_with_state(this.store, this.snapshot), + DeltaTable::new_with_state(this.log_store, this.snapshot), FileSystemCheckMetrics { files_removed: plan.files_to_remove.into_iter().map(|f| f.path).collect(), dry_run: true, @@ -192,7 +192,7 @@ impl std::future::IntoFuture for FileSystemCheckBuilder { } let metrics = plan.execute(&this.snapshot).await?; - let mut table = DeltaTable::new_with_state(this.store, this.snapshot); + let mut table = DeltaTable::new_with_state(this.log_store, this.snapshot); table.update().await?; Ok((table, metrics)) }) diff --git a/crates/deltalake-core/src/operations/load.rs b/crates/deltalake-core/src/operations/load.rs index 7baa59e3e1..1a4c5c4cc6 100644 --- a/crates/deltalake-core/src/operations/load.rs +++ b/crates/deltalake-core/src/operations/load.rs @@ -7,7 +7,7 @@ use datafusion::physical_plan::{ExecutionPlan, SendableRecordBatchStream}; use futures::future::BoxFuture; use crate::errors::{DeltaResult, DeltaTableError}; -use crate::storage::DeltaObjectStore; +use crate::logstore::LogStoreRef; use crate::table::state::DeltaTableState; use crate::DeltaTable; @@ -16,17 +16,17 @@ pub struct LoadBuilder { /// A snapshot of the to-be-loaded table's state snapshot: DeltaTableState, /// Delta object store for handling data files - store: Arc, + log_store: LogStoreRef, /// A sub-selection of columns to be loaded columns: Option>, } impl LoadBuilder { /// Create a new [`LoadBuilder`] - pub fn new(store: Arc, snapshot: DeltaTableState) -> Self { + pub fn new(log_store: LogStoreRef, snapshot: DeltaTableState) -> Self { Self { snapshot, - store, + log_store, columns: None, } } @@ -46,7 +46,7 @@ impl std::future::IntoFuture for LoadBuilder { let this = self; Box::pin(async move { - let table = DeltaTable::new_with_state(this.store, this.snapshot); + let table = DeltaTable::new_with_state(this.log_store, this.snapshot); let schema = table.state.arrow_schema()?; let projection = this .columns diff --git a/crates/deltalake-core/src/operations/merge.rs b/crates/deltalake-core/src/operations/merge.rs index 57621cb316..d38ddf0efb 100644 --- a/crates/deltalake-core/src/operations/merge.rs +++ b/crates/deltalake-core/src/operations/merge.rs @@ -68,10 +68,10 @@ use super::transaction::commit; use crate::delta_datafusion::expr::{fmt_expr_to_sql, parse_predicate_expression}; use crate::delta_datafusion::{register_store, DeltaScanBuilder}; use crate::kernel::{Action, Remove}; +use crate::logstore::LogStoreRef; use crate::operations::datafusion_utils::MetricObserverExec; use crate::operations::write::write_execution_plan; use crate::protocol::{DeltaOperation, MergePredicate}; -use crate::storage::{DeltaObjectStore, ObjectStoreRef}; use crate::table::state::DeltaTableState; use crate::{DeltaResult, DeltaTable, DeltaTableError}; @@ -107,7 +107,7 @@ pub struct MergeBuilder { /// The source data source: DataFrame, /// Delta object store for handling data files - object_store: Arc, + log_store: LogStoreRef, /// Datafusion session state relevant for executing the input plan state: Option, /// Properties passed to underlying parquet writer for when files are rewritten @@ -122,7 +122,7 @@ pub struct MergeBuilder { impl MergeBuilder { /// Create a new [`MergeBuilder`] pub fn new>( - object_store: ObjectStoreRef, + log_store: LogStoreRef, snapshot: DeltaTableState, predicate: E, source: DataFrame, @@ -132,7 +132,7 @@ impl MergeBuilder { predicate, source, snapshot, - object_store, + log_store, source_alias: None, target_alias: None, state: None, @@ -561,7 +561,7 @@ pub struct MergeMetrics { async fn execute( predicate: Expression, source: DataFrame, - object_store: ObjectStoreRef, + log_store: LogStoreRef, snapshot: &DeltaTableState, state: SessionState, writer_properties: Option, @@ -590,7 +590,7 @@ async fn execute( // predicates also need to be considered when pruning let target = Arc::new( - DeltaScanBuilder::new(snapshot, object_store.clone(), &state) + DeltaScanBuilder::new(snapshot, log_store.clone(), &state) .with_schema(snapshot.input_schema()?) .build() .await?, @@ -1126,7 +1126,7 @@ async fn execute( state.clone(), projection.clone(), table_partition_cols.clone(), - object_store.clone(), + log_store.object_store().clone(), Some(snapshot.table_config().target_file_size() as usize), None, writer_properties, @@ -1188,7 +1188,7 @@ async fn execute( not_matched_by_source_predicates: not_match_source_operations, }; version = commit( - object_store.as_ref(), + log_store.as_ref(), &actions, operation, snapshot, @@ -1212,7 +1212,7 @@ impl std::future::IntoFuture for MergeBuilder { let session = SessionContext::new(); // If a user provides their own their DF state then they must register the store themselves - register_store(this.object_store.clone(), session.runtime_env()); + register_store(this.log_store.clone(), session.runtime_env()); session.state() }); @@ -1220,7 +1220,7 @@ impl std::future::IntoFuture for MergeBuilder { let ((actions, version), metrics) = execute( this.predicate, this.source, - this.object_store.clone(), + this.log_store.clone(), &this.snapshot, state, this.writer_properties, @@ -1236,7 +1236,7 @@ impl std::future::IntoFuture for MergeBuilder { this.snapshot .merge(DeltaTableState::from_actions(actions, version)?, true, true); - let table = DeltaTable::new_with_state(this.object_store, this.snapshot); + let table = DeltaTable::new_with_state(this.log_store, this.snapshot); Ok((table, metrics)) }) diff --git a/crates/deltalake-core/src/operations/mod.rs b/crates/deltalake-core/src/operations/mod.rs index 35301f067e..0406272a5b 100644 --- a/crates/deltalake-core/src/operations/mod.rs +++ b/crates/deltalake-core/src/operations/mod.rs @@ -107,60 +107,60 @@ impl DeltaOps { /// ``` #[must_use] pub fn create(self) -> CreateBuilder { - CreateBuilder::default().with_object_store(self.0.object_store()) + CreateBuilder::default().with_log_store(self.0.log_store) } /// Load data from a DeltaTable #[cfg(feature = "datafusion")] #[must_use] pub fn load(self) -> LoadBuilder { - LoadBuilder::new(self.0.object_store(), self.0.state) + LoadBuilder::new(self.0.log_store, self.0.state) } /// Write data to Delta table #[cfg(feature = "datafusion")] #[must_use] pub fn write(self, batches: impl IntoIterator) -> WriteBuilder { - WriteBuilder::new(self.0.object_store(), self.0.state).with_input_batches(batches) + WriteBuilder::new(self.0.log_store, self.0.state).with_input_batches(batches) } /// Vacuum stale files from delta table #[must_use] pub fn vacuum(self) -> VacuumBuilder { - VacuumBuilder::new(self.0.object_store(), self.0.state) + VacuumBuilder::new(self.0.log_store, self.0.state) } /// Audit active files with files present on the filesystem #[must_use] pub fn filesystem_check(self) -> FileSystemCheckBuilder { - FileSystemCheckBuilder::new(self.0.object_store(), self.0.state) + FileSystemCheckBuilder::new(self.0.log_store, self.0.state) } /// Audit active files with files present on the filesystem #[cfg(all(feature = "arrow", feature = "parquet"))] #[must_use] pub fn optimize<'a>(self) -> OptimizeBuilder<'a> { - OptimizeBuilder::new(self.0.object_store(), self.0.state) + OptimizeBuilder::new(self.0.log_store, self.0.state) } /// Delete data from Delta table #[cfg(feature = "datafusion")] #[must_use] pub fn delete(self) -> DeleteBuilder { - DeleteBuilder::new(self.0.object_store(), self.0.state) + DeleteBuilder::new(self.0.log_store, self.0.state) } /// Update data from Delta table #[cfg(feature = "datafusion")] #[must_use] pub fn update(self) -> UpdateBuilder { - UpdateBuilder::new(self.0.object_store(), self.0.state) + UpdateBuilder::new(self.0.log_store, self.0.state) } /// Restore delta table to a specified version or datetime #[must_use] pub fn restore(self) -> RestoreBuilder { - RestoreBuilder::new(self.0.object_store(), self.0.state) + RestoreBuilder::new(self.0.log_store, self.0.state) } /// Update data from Delta table @@ -171,12 +171,7 @@ impl DeltaOps { source: datafusion::prelude::DataFrame, predicate: E, ) -> MergeBuilder { - MergeBuilder::new( - self.0.object_store(), - self.0.state, - predicate.into(), - source, - ) + MergeBuilder::new(self.0.log_store, self.0.state, predicate.into(), source) } } diff --git a/crates/deltalake-core/src/operations/optimize.rs b/crates/deltalake-core/src/operations/optimize.rs index 7feecd1e56..0467d43a8b 100644 --- a/crates/deltalake-core/src/operations/optimize.rs +++ b/crates/deltalake-core/src/operations/optimize.rs @@ -42,6 +42,7 @@ use super::transaction::commit; use super::writer::{PartitionWriter, PartitionWriterConfig}; use crate::errors::{DeltaResult, DeltaTableError}; use crate::kernel::{Action, Remove}; +use crate::logstore::LogStoreRef; use crate::protocol::DeltaOperation; use crate::storage::ObjectStoreRef; use crate::table::state::DeltaTableState; @@ -155,7 +156,7 @@ pub struct OptimizeBuilder<'a> { /// A snapshot of the to-be-optimized table's state snapshot: DeltaTableState, /// Delta object store for handling data files - store: ObjectStoreRef, + log_store: LogStoreRef, /// Filters to select specific table partitions to be optimized filters: &'a [PartitionFilter], /// Desired file size after bin-packing files @@ -177,10 +178,10 @@ pub struct OptimizeBuilder<'a> { impl<'a> OptimizeBuilder<'a> { /// Create a new [`OptimizeBuilder`] - pub fn new(store: ObjectStoreRef, snapshot: DeltaTableState) -> Self { + pub fn new(log_store: LogStoreRef, snapshot: DeltaTableState) -> Self { Self { snapshot, - store, + log_store, filters: &[], target_size: None, writer_properties: None, @@ -274,14 +275,14 @@ impl<'a> std::future::IntoFuture for OptimizeBuilder<'a> { )?; let metrics = plan .execute( - this.store.clone(), + this.log_store.clone(), &this.snapshot, this.max_concurrent_tasks, this.max_spill_size, this.min_commit_interval, ) .await?; - let mut table = DeltaTable::new_with_state(this.store, this.snapshot); + let mut table = DeltaTable::new_with_state(this.log_store, this.snapshot); table.update().await?; Ok((table, metrics)) }) @@ -584,7 +585,7 @@ impl MergePlan { /// Perform the operations outlined in the plan. pub async fn execute( mut self, - object_store: ObjectStoreRef, + log_store: LogStoreRef, snapshot: &DeltaTableState, max_concurrent_tasks: usize, #[allow(unused_variables)] // used behind a feature flag @@ -607,7 +608,7 @@ impl MergePlan { for file in files.iter() { debug!(" file {}", file.location); } - let object_store_ref = object_store.clone(); + let object_store_ref = log_store.object_store().clone(); let batch_stream = futures::stream::iter(files.clone()) .then(move |file| { let object_store_ref = object_store_ref.clone(); @@ -625,7 +626,7 @@ impl MergePlan { self.task_parameters.clone(), partition, files, - object_store.clone(), + log_store.object_store().clone(), futures::future::ready(Ok(batch_stream)), )); util::flatten_join_error(rewrite_result) @@ -635,13 +636,15 @@ impl MergePlan { #[cfg(not(feature = "datafusion"))] let exec_context = Arc::new(zorder::ZOrderExecContext::new( zorder_columns, - object_store.clone(), + log_store.object_store().clone(), // If there aren't enough bins to use all threads, then instead // use threads within the bins. This is important for the case where // the table is un-partitioned, in which case the entire table is just // one big bin. bins.len() <= num_cpus::get(), )); + let object_store = log_store.object_store().clone(); + #[cfg(feature = "datafusion")] let exec_context = Arc::new(zorder::ZOrderExecContext::new( zorder_columns, @@ -649,7 +652,6 @@ impl MergePlan { max_spill_size, )?); let task_parameters = self.task_parameters.clone(); - let object_store = object_store.clone(); futures::stream::iter(bins) .map(move |(partition, files)| { let batch_stream = Self::read_zorder(files.clone(), exec_context.clone()); @@ -671,7 +673,7 @@ impl MergePlan { let mut stream = stream.buffer_unordered(max_concurrent_tasks); - let mut table = DeltaTable::new_with_state(object_store.clone(), snapshot.clone()); + let mut table = DeltaTable::new_with_state(log_store.clone(), snapshot.clone()); // Actions buffered so far. These will be flushed either at the end // or when we reach the commit interval. @@ -720,7 +722,7 @@ impl MergePlan { //// TODO: Check for remove actions on optimized partitions. If a //// optimized partition was updated then abort the commit. Requires (#593). commit( - table.object_store().as_ref(), + table.log_store.as_ref(), &actions, self.task_parameters.input_parameters.clone().into(), table.get_state(), diff --git a/crates/deltalake-core/src/operations/restore.rs b/crates/deltalake-core/src/operations/restore.rs index a356b5b312..c391de6f04 100644 --- a/crates/deltalake-core/src/operations/restore.rs +++ b/crates/deltalake-core/src/operations/restore.rs @@ -10,7 +10,8 @@ //! 5) If ignore_missing_files option is false (default value) check availability of AddFile //! in file system. //! 6) Commit Protocol, all RemoveFile and AddFile actions -//! into delta log using `try_commit_transaction` (commit will be failed in case of parallel transaction) +//! into delta log using `LogStore::write_commit_entry` (commit will be failed in case of parallel transaction) +//! TODO: comment is outdated //! 7) If table was modified in parallel then ignore restore and raise exception. //! //! # Example @@ -31,9 +32,9 @@ use object_store::ObjectStore; use serde::Serialize; use crate::kernel::{Action, Add, Protocol, Remove}; -use crate::operations::transaction::{prepare_commit, try_commit_transaction, TransactionError}; +use crate::logstore::LogStoreRef; +use crate::operations::transaction::{prepare_commit, TransactionError}; use crate::protocol::DeltaOperation; -use crate::storage::ObjectStoreRef; use crate::table::state::DeltaTableState; use crate::{DeltaResult, DeltaTable, DeltaTableConfig, DeltaTableError, ObjectStoreError}; @@ -74,7 +75,7 @@ pub struct RestoreBuilder { /// A snapshot of the to-be-restored table's state snapshot: DeltaTableState, /// Delta object store for handling data files - store: ObjectStoreRef, + log_store: LogStoreRef, /// Version to restore version_to_restore: Option, /// Datetime to restore @@ -87,10 +88,10 @@ pub struct RestoreBuilder { impl RestoreBuilder { /// Create a new [`RestoreBuilder`] - pub fn new(store: ObjectStoreRef, snapshot: DeltaTableState) -> Self { + pub fn new(log_store: LogStoreRef, snapshot: DeltaTableState) -> Self { Self { snapshot, - store, + log_store, version_to_restore: None, datetime_to_restore: None, ignore_missing_files: false, @@ -125,7 +126,7 @@ impl RestoreBuilder { } async fn execute( - object_store: ObjectStoreRef, + log_store: LogStoreRef, snapshot: DeltaTableState, version_to_restore: Option, datetime_to_restore: Option>, @@ -138,7 +139,7 @@ async fn execute( { return Err(DeltaTableError::from(RestoreError::InvalidRestoreParameter)); } - let mut table = DeltaTable::new(object_store.clone(), DeltaTableConfig::default()); + let mut table = DeltaTable::new(log_store.clone(), DeltaTableConfig::default()); let version = match datetime_to_restore { Some(datetime) => { table.load_with_datetime(datetime).await?; @@ -195,7 +196,7 @@ async fn execute( .collect(); if !ignore_missing_files { - check_files_available(object_store.as_ref(), &files_to_add).await?; + check_files_available(log_store.object_store().as_ref(), &files_to_add).await?; } let metrics = RestoreMetrics { @@ -238,7 +239,7 @@ async fn execute( actions.extend(files_to_remove.into_iter().map(Action::Remove)); let commit = prepare_commit( - object_store.as_ref(), + log_store.object_store().as_ref(), &DeltaOperation::Restore { version: version_to_restore, datetime: datetime_to_restore.map(|time| -> i64 { time.timestamp_millis() }), @@ -249,13 +250,13 @@ async fn execute( ) .await?; let commit_version = snapshot.version() + 1; - match try_commit_transaction(object_store.as_ref(), &commit, commit_version).await { + match log_store.write_commit_entry(commit_version, &commit).await { Ok(_) => {} Err(err @ TransactionError::VersionAlreadyExists(_)) => { return Err(err.into()); } Err(err) => { - object_store.delete(&commit).await?; + log_store.object_store().delete(&commit).await?; return Err(err.into()); } } @@ -291,7 +292,7 @@ impl std::future::IntoFuture for RestoreBuilder { Box::pin(async move { let metrics = execute( - this.store.clone(), + this.log_store.clone(), this.snapshot.clone(), this.version_to_restore, this.datetime_to_restore, @@ -299,7 +300,7 @@ impl std::future::IntoFuture for RestoreBuilder { this.protocol_downgrade_allowed, ) .await?; - let mut table = DeltaTable::new_with_state(this.store, this.snapshot); + let mut table = DeltaTable::new_with_state(this.log_store, this.snapshot); table.update().await?; Ok((table, metrics)) }) diff --git a/crates/deltalake-core/src/operations/transaction/mod.rs b/crates/deltalake-core/src/operations/transaction/mod.rs index c31c349fd7..e5e808d2d5 100644 --- a/crates/deltalake-core/src/operations/transaction/mod.rs +++ b/crates/deltalake-core/src/operations/transaction/mod.rs @@ -10,8 +10,8 @@ use serde_json::Value; use crate::crate_version; use crate::errors::{DeltaResult, DeltaTableError}; use crate::kernel::{Action, CommitInfo}; +use crate::logstore::LogStore; use crate::protocol::DeltaOperation; -use crate::storage::commit_uri_from_version; use crate::table::state::DeltaTableState; mod conflict_checker; @@ -130,6 +130,7 @@ pub(crate) fn get_commit_bytes( /// Low-level transaction API. Creates a temporary commit file. Once created, /// the transaction object could be dropped and the actual commit could be executed /// with `DeltaTable.try_commit_transaction`. +/// TODO: comment is outdated now pub(crate) async fn prepare_commit<'a>( storage: &dyn ObjectStore, operation: &DeltaOperation, @@ -150,42 +151,26 @@ pub(crate) async fn prepare_commit<'a>( Ok(path) } -/// Tries to commit a prepared commit file. Returns [DeltaTableError::VersionAlreadyExists] -/// if the given `version` already exists. The caller should handle the retry logic itself. -/// This is low-level transaction API. If user does not want to maintain the commit loop then -/// the `DeltaTransaction.commit` is desired to be used as it handles `try_commit_transaction` -/// with retry logic. -pub(crate) async fn try_commit_transaction( - storage: &dyn ObjectStore, - tmp_commit: &Path, - version: i64, -) -> Result { - // move temporary commit file to delta log directory - // rely on storage to fail if the file already exists - - storage - .rename_if_not_exists(tmp_commit, &commit_uri_from_version(version)) - .await - .map_err(|err| match err { - ObjectStoreError::AlreadyExists { .. } => { - TransactionError::VersionAlreadyExists(version) - } - _ => TransactionError::from(err), - })?; - Ok(version) -} - /// Commit a transaction, with up to 15 retries. This is higher-level transaction API. /// /// Will error early if the a concurrent transaction has already been committed /// and conflicts with this transaction. pub async fn commit( - storage: &dyn ObjectStore, + log_store: &dyn LogStore, actions: &Vec, operation: DeltaOperation, read_snapshot: &DeltaTableState, app_metadata: Option>, ) -> DeltaResult { - commit_with_retries(storage, actions, operation, read_snapshot, app_metadata, 15).await + commit_with_retries( + log_store, + actions, + operation, + read_snapshot, + app_metadata, + 15, + ) + .await } /// Commit a transaction, with up configurable number of retries. This is higher-level transaction API. @@ -193,24 +178,35 @@ pub async fn commit( /// The function will error early if the a concurrent transaction has already been committed /// and conflicts with this transaction. pub async fn commit_with_retries( - storage: &dyn ObjectStore, + log_store: &dyn LogStore, actions: &Vec, operation: DeltaOperation, read_snapshot: &DeltaTableState, app_metadata: Option>, max_retries: usize, ) -> DeltaResult { - let tmp_commit = - prepare_commit(storage, &operation, actions, read_snapshot, app_metadata).await?; + let tmp_commit = prepare_commit( + log_store.object_store().as_ref(), + &operation, + actions, + read_snapshot, + app_metadata, + ) + .await?; let mut attempt_number = 1; while attempt_number <= max_retries { let version = read_snapshot.version() + attempt_number as i64; - match try_commit_transaction(storage, &tmp_commit, version).await { - Ok(version) => return Ok(version), + match log_store.write_commit_entry(version, &tmp_commit).await { + Ok(()) => return Ok(version), Err(TransactionError::VersionAlreadyExists(version)) => { - let summary = WinningCommitSummary::try_new(storage, version - 1, version).await?; + let summary = WinningCommitSummary::try_new( + log_store.object_store().as_ref(), + version - 1, + version, + ) + .await?; let transaction_info = TransactionInfo::try_new( read_snapshot, operation.read_predicate(), @@ -225,13 +221,13 @@ pub async fn commit_with_retries( attempt_number += 1; } Err(err) => { - storage.delete(&tmp_commit).await?; + log_store.object_store().delete(&tmp_commit).await?; return Err(TransactionError::CommitConflict(err).into()); } }; } Err(err) => { - storage.delete(&tmp_commit).await?; + log_store.object_store().delete(&tmp_commit).await?; return Err(err.into()); } } @@ -242,11 +238,16 @@ pub async fn commit_with_retries( #[cfg(all(test, feature = "parquet"))] mod tests { + use std::{collections::HashMap, sync::Arc}; + use self::test_utils::{create_remove_action, init_table_actions}; use super::*; - use crate::DeltaConfigKey; + use crate::{ + logstore::default_logstore::DefaultLogStore, storage::commit_uri_from_version, + DeltaConfigKey, + }; use object_store::memory::InMemory; - use std::collections::HashMap; + use url::Url; #[test] fn test_commit_uri_from_version() { @@ -290,18 +291,25 @@ mod tests { #[tokio::test] async fn test_try_commit_transaction() { - let store = InMemory::new(); + let store = Arc::new(InMemory::new()); + let url = Url::parse("mem://what/is/this").unwrap(); + let log_store = DefaultLogStore::new( + store.clone(), + crate::logstore::LogStoreConfig { + location: url, + options: HashMap::new().into(), + }, + ); let tmp_path = Path::from("_delta_log/tmp"); let version_path = Path::from("_delta_log/00000000000000000000.json"); store.put(&tmp_path, bytes::Bytes::new()).await.unwrap(); store.put(&version_path, bytes::Bytes::new()).await.unwrap(); + let res = log_store.write_commit_entry(0, &tmp_path).await; // fails if file version already exists - let res = try_commit_transaction(&store, &tmp_path, 0).await; assert!(res.is_err()); // succeeds for next version - let res = try_commit_transaction(&store, &tmp_path, 1).await.unwrap(); - assert_eq!(res, 1); + log_store.write_commit_entry(1, &tmp_path).await.unwrap(); } } diff --git a/crates/deltalake-core/src/operations/transaction/test_utils.rs b/crates/deltalake-core/src/operations/transaction/test_utils.rs index b52b1a1c7b..56b0894019 100644 --- a/crates/deltalake-core/src/operations/transaction/test_utils.rs +++ b/crates/deltalake-core/src/operations/transaction/test_utils.rs @@ -1,7 +1,7 @@ #![allow(unused)] use std::collections::HashMap; -use super::{prepare_commit, try_commit_transaction}; +use super::prepare_commit; use crate::kernel::{ Action, Add, CommitInfo, DataType, Metadata, PrimitiveType, Protocol, Remove, StructField, StructType, @@ -121,7 +121,7 @@ pub async fn create_initialized_table( partition_cols: &[String], configuration: Option>>, ) -> DeltaTable { - let storage = DeltaTableBuilder::from_uri("memory://") + let log_store = DeltaTableBuilder::from_uri("memory://") .build_storage() .unwrap(); let table_schema = StructType::new(vec![ @@ -161,11 +161,19 @@ pub async fn create_initialized_table( ), }; let actions = init_table_actions(None); - let prepared_commit = prepare_commit(storage.as_ref(), &operation, &actions, &state, None) - .await - .unwrap(); - try_commit_transaction(storage.as_ref(), &prepared_commit, 0) + let prepared_commit = prepare_commit( + log_store.object_store().as_ref(), + &operation, + &actions, + &state, + None, + ) + .await + .unwrap(); + + log_store + .write_commit_entry(0, &prepared_commit) .await .unwrap(); - DeltaTable::new_with_state(storage, state) + DeltaTable::new_with_state(log_store, state) } diff --git a/crates/deltalake-core/src/operations/update.rs b/crates/deltalake-core/src/operations/update.rs index bc458bdd53..559f28868d 100644 --- a/crates/deltalake-core/src/operations/update.rs +++ b/crates/deltalake-core/src/operations/update.rs @@ -49,8 +49,8 @@ use super::write::write_execution_plan; use crate::delta_datafusion::expr::fmt_expr_to_sql; use crate::delta_datafusion::{find_files, register_store, DeltaScanBuilder}; use crate::kernel::{Action, Remove}; +use crate::logstore::LogStoreRef; use crate::protocol::DeltaOperation; -use crate::storage::{DeltaObjectStore, ObjectStoreRef}; use crate::table::state::DeltaTableState; use crate::{DeltaResult, DeltaTable, DeltaTableError}; @@ -64,7 +64,7 @@ pub struct UpdateBuilder { /// A snapshot of the table's state snapshot: DeltaTableState, /// Delta object store for handling data files - object_store: Arc, + log_store: LogStoreRef, /// Datafusion session state relevant for executing the input plan state: Option, /// Properties passed to underlying parquet writer for when files are rewritten @@ -95,12 +95,12 @@ pub struct UpdateMetrics { impl UpdateBuilder { /// Create a new ['UpdateBuilder'] - pub fn new(object_store: ObjectStoreRef, snapshot: DeltaTableState) -> Self { + pub fn new(log_store: LogStoreRef, snapshot: DeltaTableState) -> Self { Self { predicate: None, updates: HashMap::new(), snapshot, - object_store, + log_store, state: None, writer_properties: None, app_metadata: None, @@ -164,7 +164,7 @@ impl UpdateBuilder { async fn execute( predicate: Option, updates: HashMap, - object_store: ObjectStoreRef, + log_store: LogStoreRef, snapshot: &DeltaTableState, state: SessionState, writer_properties: Option, @@ -213,7 +213,7 @@ async fn execute( let table_partition_cols = current_metadata.partition_columns.clone(); let scan_start = Instant::now(); - let candidates = find_files(snapshot, object_store.clone(), &state, predicate.clone()).await?; + let candidates = find_files(snapshot, log_store.clone(), &state, predicate.clone()).await?; metrics.scan_time_ms = Instant::now().duration_since(scan_start).as_millis() as u64; if candidates.candidates.is_empty() { @@ -225,7 +225,7 @@ async fn execute( let execution_props = state.execution_props(); // For each rewrite evaluate the predicate and then modify each expression // to either compute the new value or obtain the old one then write these batches - let scan = DeltaScanBuilder::new(snapshot, object_store.clone(), &state) + let scan = DeltaScanBuilder::new(snapshot, log_store.clone(), &state) .with_files(&candidates.candidates) .build() .await?; @@ -357,7 +357,7 @@ async fn execute( state.clone(), projection.clone(), table_partition_cols.clone(), - object_store.clone(), + log_store.object_store().clone(), Some(snapshot.table_config().target_file_size() as usize), None, writer_properties, @@ -407,7 +407,7 @@ async fn execute( predicate: Some(fmt_expr_to_sql(&predicate)?), }; version = commit( - object_store.as_ref(), + log_store.as_ref(), &actions, operation, snapshot, @@ -430,7 +430,7 @@ impl std::future::IntoFuture for UpdateBuilder { let session = SessionContext::new(); // If a user provides their own their DF state then they must register the store themselves - register_store(this.object_store.clone(), session.runtime_env()); + register_store(this.log_store.clone(), session.runtime_env()); session.state() }); @@ -438,7 +438,7 @@ impl std::future::IntoFuture for UpdateBuilder { let ((actions, version), metrics) = execute( this.predicate, this.updates, - this.object_store.clone(), + this.log_store.clone(), &this.snapshot, state, this.writer_properties, @@ -449,7 +449,7 @@ impl std::future::IntoFuture for UpdateBuilder { this.snapshot .merge(DeltaTableState::from_actions(actions, version)?, true, true); - let table = DeltaTable::new_with_state(this.object_store, this.snapshot); + let table = DeltaTable::new_with_state(this.log_store, this.snapshot); Ok((table, metrics)) }) diff --git a/crates/deltalake-core/src/operations/vacuum.rs b/crates/deltalake-core/src/operations/vacuum.rs index 5499c0be92..2c4e00793c 100644 --- a/crates/deltalake-core/src/operations/vacuum.rs +++ b/crates/deltalake-core/src/operations/vacuum.rs @@ -37,8 +37,8 @@ use super::transaction::commit; use crate::crate_version; use crate::errors::{DeltaResult, DeltaTableError}; use crate::kernel::Action; +use crate::logstore::{LogStore, LogStoreRef}; use crate::protocol::DeltaOperation; -use crate::storage::DeltaObjectStore; use crate::table::state::DeltaTableState; use crate::DeltaTable; @@ -82,7 +82,7 @@ pub struct VacuumBuilder { /// A snapshot of the to-be-vacuumed table's state snapshot: DeltaTableState, /// Delta object store for handling data files - store: Arc, + log_store: LogStoreRef, /// Period of stale files allowed. retention_period: Option, /// Validate the retention period is not below the retention period configured in the table @@ -125,10 +125,10 @@ pub struct VacuumEndOperationMetrics { /// Methods to specify various vacuum options and to execute the operation impl VacuumBuilder { /// Create a new [`VacuumBuilder`] - pub fn new(store: Arc, snapshot: DeltaTableState) -> Self { + pub fn new(log_store: LogStoreRef, snapshot: DeltaTableState) -> Self { VacuumBuilder { snapshot, - store, + log_store, retention_period: None, enforce_retention_duration: true, dry_run: false, @@ -183,8 +183,11 @@ impl VacuumBuilder { let mut files_to_delete = vec![]; let mut file_sizes = vec![]; - let mut all_files = self.store.list(None).await.map_err(DeltaTableError::from)?; - + let object_store = self.log_store.object_store(); + let mut all_files = object_store + .list(None) + .await + .map_err(DeltaTableError::from)?; let partition_columns = &self .snapshot .current_metadata() @@ -245,7 +248,7 @@ impl std::future::IntoFuture for VacuumBuilder { let plan = this.create_vacuum_plan().await?; if this.dry_run { return Ok(( - DeltaTable::new_with_state(this.store, this.snapshot), + DeltaTable::new_with_state(this.log_store, this.snapshot), VacuumMetrics { files_deleted: plan.files_to_delete.iter().map(|f| f.to_string()).collect(), dry_run: true, @@ -253,9 +256,11 @@ impl std::future::IntoFuture for VacuumBuilder { )); } - let metrics = plan.execute(&this.store, &this.snapshot).await?; + let metrics = plan + .execute(this.log_store.as_ref(), &this.snapshot) + .await?; Ok(( - DeltaTable::new_with_state(this.store, this.snapshot), + DeltaTable::new_with_state(this.log_store, this.snapshot), metrics, )) }) @@ -280,7 +285,7 @@ impl VacuumPlan { /// Execute the vacuum plan and delete files from underlying storage pub async fn execute( self, - store: &DeltaObjectStore, + store: &dyn LogStore, snapshot: &DeltaTableState, ) -> Result { if self.files_to_delete.is_empty() { @@ -329,6 +334,7 @@ impl VacuumPlan { .boxed(); let files_deleted = store + .object_store() .delete_stream(locations) .map(|res| match res { Ok(path) => Ok(path.to_string()), @@ -417,7 +423,7 @@ mod tests { async fn vacuum_delta_8_0_table() { let table = open_table("./tests/data/delta-0.8.0").await.unwrap(); - let result = VacuumBuilder::new(table.object_store(), table.state.clone()) + let result = VacuumBuilder::new(table.log_store, table.state.clone()) .with_retention_period(Duration::hours(1)) .with_dry_run(true) .await; @@ -425,7 +431,7 @@ mod tests { assert!(result.is_err()); let table = open_table("./tests/data/delta-0.8.0").await.unwrap(); - let (table, result) = VacuumBuilder::new(table.object_store(), table.state) + let (table, result) = VacuumBuilder::new(table.log_store, table.state) .with_retention_period(Duration::hours(0)) .with_dry_run(true) .with_enforce_retention_duration(false) @@ -437,7 +443,7 @@ mod tests { vec!["part-00001-911a94a2-43f6-4acb-8620-5e68c2654989-c000.snappy.parquet"] ); - let (table, result) = VacuumBuilder::new(table.object_store(), table.state) + let (table, result) = VacuumBuilder::new(table.log_store, table.state) .with_retention_period(Duration::hours(169)) .with_dry_run(true) .await @@ -454,7 +460,7 @@ mod tests { .as_secs() / 3600; let empty: Vec = Vec::new(); - let (_table, result) = VacuumBuilder::new(table.object_store(), table.state) + let (_table, result) = VacuumBuilder::new(table.log_store, table.state) .with_retention_period(Duration::hours(retention_hours as i64)) .with_dry_run(true) .await diff --git a/crates/deltalake-core/src/operations/write.rs b/crates/deltalake-core/src/operations/write.rs index 45bdaaeff5..dec4b7ced7 100644 --- a/crates/deltalake-core/src/operations/write.rs +++ b/crates/deltalake-core/src/operations/write.rs @@ -44,8 +44,9 @@ use super::{transaction::commit, CreateBuilder}; use crate::delta_datafusion::DeltaDataChecker; use crate::errors::{DeltaResult, DeltaTableError}; use crate::kernel::{Action, Add, Remove, StructType}; +use crate::logstore::LogStoreRef; use crate::protocol::{DeltaOperation, SaveMode}; -use crate::storage::{DeltaObjectStore, ObjectStoreRef}; +use crate::storage::ObjectStoreRef; use crate::table::state::DeltaTableState; use crate::writer::record_batch::divide_by_partition_values; use crate::writer::utils::PartitionPath; @@ -90,7 +91,7 @@ pub struct WriteBuilder { /// A snapshot of the to-be-loaded table's state snapshot: DeltaTableState, /// Delta object store for handling data files - store: Arc, + log_store: LogStoreRef, /// The input plan input: Option>, /// Datafusion session state relevant for executing the input plan @@ -117,10 +118,10 @@ pub struct WriteBuilder { impl WriteBuilder { /// Create a new [`WriteBuilder`] - pub fn new(store: Arc, snapshot: DeltaTableState) -> Self { + pub fn new(log_store: LogStoreRef, snapshot: DeltaTableState) -> Self { Self { snapshot, - store, + log_store, input: None, state: None, mode: SaveMode::Append, @@ -210,7 +211,7 @@ impl WriteBuilder { } async fn check_preconditions(&self) -> DeltaResult> { - match self.store.is_delta_table_location().await? { + match self.log_store.is_delta_table_location().await? { true => { let min_writer = self.snapshot.min_writer_version(); if min_writer > MAX_SUPPORTED_WRITER_VERSION { @@ -218,7 +219,7 @@ impl WriteBuilder { } else { match self.mode { SaveMode::ErrorIfExists => { - Err(WriteError::AlreadyExists(self.store.root_uri()).into()) + Err(WriteError::AlreadyExists(self.log_store.root_uri()).into()) } _ => Ok(vec![]), } @@ -236,7 +237,7 @@ impl WriteBuilder { Err(WriteError::MissingData) }?; let mut builder = CreateBuilder::new() - .with_object_store(self.store.clone()) + .with_log_store(self.log_store.clone()) .with_columns(schema.fields().clone()); if let Some(partition_columns) = self.partition_columns.as_ref() { builder = builder.with_partition_columns(partition_columns.clone()) @@ -356,7 +357,7 @@ impl std::future::IntoFuture for WriteBuilder { let schema = batches[0].schema(); let table_schema = this .snapshot - .physical_arrow_schema(this.store.clone()) + .physical_arrow_schema(this.log_store.object_store().clone()) .await .or_else(|_| this.snapshot.arrow_schema()) .unwrap_or(schema.clone()); @@ -418,7 +419,7 @@ impl std::future::IntoFuture for WriteBuilder { state, plan, partition_columns.clone(), - this.store.clone(), + this.log_store.object_store().clone(), this.target_file_size, this.write_batch_size, this.writer_properties, @@ -468,7 +469,7 @@ impl std::future::IntoFuture for WriteBuilder { }; let version = commit( - this.store.as_ref(), + this.log_store.as_ref(), &actions, DeltaOperation::Write { mode: this.mode, @@ -492,7 +493,7 @@ impl std::future::IntoFuture for WriteBuilder { // TODO should we build checkpoints based on config? - Ok(DeltaTable::new_with_state(this.store, this.snapshot)) + Ok(DeltaTable::new_with_state(this.log_store, this.snapshot)) }) } } diff --git a/crates/deltalake-core/src/operations/writer.rs b/crates/deltalake-core/src/operations/writer.rs index 0bba167e33..6d551ecb96 100644 --- a/crates/deltalake-core/src/operations/writer.rs +++ b/crates/deltalake-core/src/operations/writer.rs @@ -406,9 +406,10 @@ mod tests { #[tokio::test] async fn test_write_partition() { - let object_store = DeltaTableBuilder::from_uri("memory://") + let log_store = DeltaTableBuilder::from_uri("memory://") .build_storage() .unwrap(); + let object_store = log_store.object_store(); let batch = get_record_batch(None, false); // write single un-partitioned batch @@ -439,12 +440,13 @@ mod tests { let object_store = DeltaTableBuilder::from_uri("memory://") .build_storage() - .unwrap(); + .unwrap() + .object_store(); let properties = WriterProperties::builder() .set_max_row_group_size(1024) .build(); // configure small target file size and and row group size so we can observe multiple files written - let mut writer = get_writer(object_store.clone(), &batch, Some(properties), Some(10_000)); + let mut writer = get_writer(object_store, &batch, Some(properties), Some(10_000)); writer.write(&batch).await.unwrap(); // check that we have written more then once file, and no more then 1 is below target size diff --git a/crates/deltalake-core/src/protocol/checkpoints.rs b/crates/deltalake-core/src/protocol/checkpoints.rs index fc23c1d28b..f48fbfbd76 100644 --- a/crates/deltalake-core/src/protocol/checkpoints.rs +++ b/crates/deltalake-core/src/protocol/checkpoints.rs @@ -23,7 +23,7 @@ use crate::kernel::{ Action, Add as AddAction, DataType, Metadata, PrimitiveType, Protocol, StructField, StructType, Txn, }; -use crate::storage::DeltaObjectStore; +use crate::logstore::LogStore; use crate::table::state::DeltaTableState; use crate::table::{CheckPoint, CheckPointBuilder}; use crate::{open_table_with_version, DeltaTable}; @@ -70,7 +70,7 @@ pub const CHECKPOINT_RECORD_BATCH_SIZE: usize = 5000; /// Creates checkpoint at current table version pub async fn create_checkpoint(table: &DeltaTable) -> Result<(), ProtocolError> { - create_checkpoint_for(table.version(), table.get_state(), table.storage.as_ref()).await?; + create_checkpoint_for(table.version(), table.get_state(), table.log_store.as_ref()).await?; Ok(()) } @@ -81,7 +81,7 @@ pub async fn cleanup_metadata(table: &DeltaTable) -> Result Result<(), ProtocolError> { // TODO: checkpoints _can_ be multi-part... haven't actually found a good reference for // an appropriate split point yet though so only writing a single part currently. // See https://github.com/delta-io/delta-rs/issues/288 - let last_checkpoint_path = storage.log_path().child("_last_checkpoint"); + let last_checkpoint_path = log_store.log_path().child("_last_checkpoint"); debug!("Writing parquet bytes to checkpoint buffer."); let (checkpoint, parquet_bytes) = parquet_bytes_from_state(state)?; let file_name = format!("{version:020}.checkpoint.parquet"); - let checkpoint_path = storage.log_path().child(file_name); + let checkpoint_path = log_store.log_path().child(file_name); + let object_store = log_store.object_store(); debug!("Writing checkpoint to {:?}.", checkpoint_path); - storage.put(&checkpoint_path, parquet_bytes).await?; + object_store.put(&checkpoint_path, parquet_bytes).await?; let last_checkpoint_content: Value = serde_json::to_value(checkpoint)?; let last_checkpoint_content = bytes::Bytes::from(serde_json::to_vec(&last_checkpoint_content)?); debug!("Writing _last_checkpoint to {:?}.", last_checkpoint_path); - storage + object_store .put(&last_checkpoint_path, last_checkpoint_content) .await?; @@ -146,7 +147,7 @@ pub async fn create_checkpoint_for( /// and less than the specified version. pub async fn cleanup_expired_logs_for( until_version: i64, - storage: &DeltaObjectStore, + log_store: &dyn LogStore, cutoff_timestamp: i64, ) -> Result { lazy_static! { @@ -157,10 +158,11 @@ pub async fn cleanup_expired_logs_for( // Feed a stream of candidate deletion files directly into the delete_stream // function to try to improve the speed of cleanup and reduce the need for // intermediate memory. - let deleted = storage + let object_store = log_store.object_store(); + let deleted = object_store .delete_stream( - storage - .list(Some(storage.log_path())) + object_store + .list(Some(log_store.log_path())) .await? // This predicate function will filter out any locations that don't // match the given timestamp range diff --git a/crates/deltalake-core/src/protocol/mod.rs b/crates/deltalake-core/src/protocol/mod.rs index 47e24cd959..8a5cd9f858 100644 --- a/crates/deltalake-core/src/protocol/mod.rs +++ b/crates/deltalake-core/src/protocol/mod.rs @@ -26,7 +26,7 @@ use std::mem::take; use crate::errors::DeltaResult; use crate::kernel::{Add, CommitInfo, Metadata, Protocol, Remove}; -use crate::storage::ObjectStoreRef; +use crate::logstore::LogStore; use crate::table::CheckPoint; use crate::table::DeltaTableMetaData; @@ -601,14 +601,15 @@ pub enum OutputMode { } pub(crate) async fn get_last_checkpoint( - object_store: &ObjectStoreRef, + log_store: &dyn LogStore, ) -> Result { let last_checkpoint_path = Path::from_iter(["_delta_log", "_last_checkpoint"]); debug!("loading checkpoint from {last_checkpoint_path}"); + let object_store = log_store.object_store(); match object_store.get(&last_checkpoint_path).await { Ok(data) => Ok(serde_json::from_slice(&data.bytes().await?)?), Err(ObjectStoreError::NotFound { .. }) => { - match find_latest_check_point_for_version(object_store, i64::MAX).await { + match find_latest_check_point_for_version(log_store, i64::MAX).await { Ok(Some(cp)) => Ok(cp), _ => Err(ProtocolError::CheckpointNotFound), } @@ -618,7 +619,7 @@ pub(crate) async fn get_last_checkpoint( } pub(crate) async fn find_latest_check_point_for_version( - object_store: &ObjectStoreRef, + log_store: &dyn LogStore, version: i64, ) -> Result, ProtocolError> { lazy_static! { @@ -629,7 +630,8 @@ pub(crate) async fn find_latest_check_point_for_version( } let mut cp: Option = None; - let mut stream = object_store.list(Some(object_store.log_path())).await?; + let object_store = log_store.object_store(); + let mut stream = object_store.list(Some(log_store.log_path())).await?; while let Some(obj_meta) = stream.next().await { // Exit early if any objects can't be listed. diff --git a/crates/deltalake-core/src/storage/config.rs b/crates/deltalake-core/src/storage/config.rs index 1cba57b579..1ddf7d9c8a 100644 --- a/crates/deltalake-core/src/storage/config.rs +++ b/crates/deltalake-core/src/storage/config.rs @@ -12,6 +12,8 @@ use url::Url; use super::file::FileStorageBackend; use super::utils::str_is_truthy; use crate::errors::{DeltaResult, DeltaTableError}; +use crate::logstore::default_logstore::DefaultLogStore; +use crate::logstore::LogStoreRef; #[cfg(any(feature = "s3", feature = "s3-native-tls"))] use super::s3::{S3StorageBackend, S3StorageOptions}; @@ -225,7 +227,18 @@ impl From> for StorageOptions { } } -pub(crate) fn configure_store( +/// Configure a [`LogStoreRef`] for the given url and configuration +pub fn configure_log_store( + url: Url, + options: impl Into + Clone, +) -> DeltaResult { + let mut options = options.into(); + let (_scheme, _prefix) = ObjectStoreScheme::parse(&url, &mut options)?; + Ok(Arc::new(DefaultLogStore::try_new(url, options)?)) +} + +/// Configure an instance of an [`ObjectStore`] for the given url and configuration +pub fn configure_store( url: &Url, options: &mut StorageOptions, ) -> DeltaResult> { diff --git a/crates/deltalake-core/src/storage/mod.rs b/crates/deltalake-core/src/storage/mod.rs index c7309531ea..b571905f8b 100644 --- a/crates/deltalake-core/src/storage/mod.rs +++ b/crates/deltalake-core/src/storage/mod.rs @@ -1,22 +1,8 @@ //! Object storage backend abstraction layer for Delta Table transaction logs and data -use std::collections::HashMap; -use std::fmt; -use std::ops::Range; use std::sync::Arc; -use bytes::Bytes; -use futures::{stream::BoxStream, StreamExt}; use lazy_static::lazy_static; -use object_store::GetOptions; -use serde::de::{Error, SeqAccess, Visitor}; -use serde::ser::SerializeSeq; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use tokio::io::AsyncWrite; -use url::Url; - -use self::config::StorageOptions; -use crate::errors::DeltaResult; pub mod config; pub mod file; @@ -25,9 +11,6 @@ pub mod utils; #[cfg(any(feature = "s3", feature = "s3-native-tls"))] pub mod s3; -#[cfg(feature = "datafusion")] -use datafusion::datasource::object_store::ObjectStoreUrl; - pub use object_store::path::{Path, DELIMITER}; pub use object_store::{ DynObjectStore, Error as ObjectStoreError, GetResult, ListResult, MultipartId, ObjectMeta, @@ -45,342 +28,5 @@ pub(crate) fn commit_uri_from_version(version: i64) -> Path { DELTA_LOG_PATH.child(version.as_str()) } -/// Sharable reference to [`DeltaObjectStore`] -pub type ObjectStoreRef = Arc; - -/// Object Store implementation for DeltaTable. -/// -/// The [DeltaObjectStore] implements the [object_store::ObjectStore] trait to facilitate -/// interoperability with the larger rust / arrow ecosystem. Specifically it can directly -/// be registered as store within datafusion. -/// -/// The table root is treated as the root of the object store. -/// All [Path] are reported relative to the table root. -#[derive(Debug, Clone)] -pub struct DeltaObjectStore { - storage: Arc, - location: Url, - options: StorageOptions, -} - -impl std::fmt::Display for DeltaObjectStore { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "DeltaObjectStore({})", self.location.as_ref()) - } -} - -impl DeltaObjectStore { - /// Create a new instance of [`DeltaObjectStore`] - /// - /// # Arguments - /// - /// * `storage` - A shared reference to an [`object_store::ObjectStore`] with "/" pointing at delta table root (i.e. where `_delta_log` is located). - /// * `location` - A url corresponding to the storage location of `storage`. - pub fn new(storage: Arc, location: Url) -> Self { - Self { - storage, - location, - options: HashMap::new().into(), - } - } - - /// Try creating a new instance of [`DeltaObjectStore`] - /// - /// # Arguments - /// - /// * `location` - A url pointing to the root of the delta table. - /// * `options` - Options passed to underlying builders. See [`with_storage_options`](crate::table::builder::DeltaTableBuilder::with_storage_options) - pub fn try_new(location: Url, options: impl Into + Clone) -> DeltaResult { - let mut options = options.into(); - let storage = config::configure_store(&location, &mut options)?; - Ok(Self { - storage, - location, - options, - }) - } - - /// Get a reference to the underlying storage backend - pub fn storage_backend(&self) -> Arc { - self.storage.clone() - } - - /// Storage options used to initialize storage backend - pub fn storage_options(&self) -> &StorageOptions { - &self.options - } - - /// Get fully qualified uri for table root - pub fn root_uri(&self) -> String { - self.to_uri(&Path::from("")) - } - - #[cfg(feature = "datafusion")] - /// Generate a unique enough url to identify the store in datafusion. - /// The DF object store registry only cares about the scheme and the host of the url for - /// registering/fetching. In our case the scheme is hard-coded to "delta-rs", so to get a unique - /// host we convert the location from this `DeltaObjectStore` to a valid name, combining the - /// original scheme, host and path with invalid characters replaced. - pub fn object_store_url(&self) -> ObjectStoreUrl { - // we are certain, that the URL can be parsed, since - // we make sure when we are parsing the table uri - ObjectStoreUrl::parse(format!( - "delta-rs://{}-{}{}", - self.location.scheme(), - self.location.host_str().unwrap_or("-"), - self.location - .path() - .replace(DELIMITER, "-") - .replace(':', "-") - )) - .expect("Invalid object store url.") - } - - /// [Path] to Delta log - pub fn log_path(&self) -> &Path { - &DELTA_LOG_PATH - } - - /// [Path] to Delta log - pub fn to_uri(&self, location: &Path) -> String { - match self.location.scheme() { - "file" => { - #[cfg(windows)] - let uri = format!( - "{}/{}", - self.location.as_ref().trim_end_matches('/'), - location.as_ref() - ) - .replace("file:///", ""); - #[cfg(unix)] - let uri = format!( - "{}/{}", - self.location.as_ref().trim_end_matches('/'), - location.as_ref() - ) - .replace("file://", ""); - uri - } - _ => { - if location.as_ref().is_empty() || location.as_ref() == "/" { - self.location.as_ref().to_string() - } else { - format!("{}/{}", self.location.as_ref(), location.as_ref()) - } - } - } - } - - /// Deletes object by `paths`. - pub async fn delete_batch(&self, paths: &[Path]) -> ObjectStoreResult<()> { - for path in paths { - match self.delete(path).await { - Ok(_) => continue, - Err(ObjectStoreError::NotFound { .. }) => continue, - Err(e) => return Err(e), - } - } - Ok(()) - } - - /// Check if the location is a delta table location - pub async fn is_delta_table_location(&self) -> ObjectStoreResult { - // TODO We should really be using HEAD here, but this fails in windows tests - let mut stream = self.list(Some(self.log_path())).await?; - if let Some(res) = stream.next().await { - match res { - Ok(_) => Ok(true), - Err(ObjectStoreError::NotFound { .. }) => Ok(false), - Err(err) => Err(err), - } - } else { - Ok(false) - } - } -} - -#[async_trait::async_trait] -impl ObjectStore for DeltaObjectStore { - /// Save the provided bytes to the specified location. - async fn put(&self, location: &Path, bytes: Bytes) -> ObjectStoreResult<()> { - self.storage.put(location, bytes).await - } - - /// Return the bytes that are stored at the specified location. - async fn get(&self, location: &Path) -> ObjectStoreResult { - self.storage.get(location).await - } - - /// Perform a get request with options - /// - /// Note: options.range will be ignored if [`object_store::GetResultPayload::File`] - async fn get_opts(&self, location: &Path, options: GetOptions) -> ObjectStoreResult { - self.storage.get_opts(location, options).await - } - - /// Return the bytes that are stored at the specified location - /// in the given byte range - async fn get_range(&self, location: &Path, range: Range) -> ObjectStoreResult { - self.storage.get_range(location, range).await - } - - /// Return the metadata for the specified location - async fn head(&self, location: &Path) -> ObjectStoreResult { - self.storage.head(location).await - } - - /// Delete the object at the specified location. - async fn delete(&self, location: &Path) -> ObjectStoreResult<()> { - self.storage.delete(location).await - } - - /// List all the objects with the given prefix. - /// - /// Prefixes are evaluated on a path segment basis, i.e. `foo/bar/` is a prefix of `foo/bar/x` but not of - /// `foo/bar_baz/x`. - async fn list( - &self, - prefix: Option<&Path>, - ) -> ObjectStoreResult>> { - self.storage.list(prefix).await - } - - /// List all the objects with the given prefix and a location greater than `offset` - /// - /// Some stores, such as S3 and GCS, may be able to push `offset` down to reduce - /// the number of network requests required - async fn list_with_offset( - &self, - prefix: Option<&Path>, - offset: &Path, - ) -> ObjectStoreResult>> { - self.storage.list_with_offset(prefix, offset).await - } - - /// List objects with the given prefix and an implementation specific - /// delimiter. Returns common prefixes (directories) in addition to object - /// metadata. - /// - /// Prefixes are evaluated on a path segment basis, i.e. `foo/bar/` is a prefix of `foo/bar/x` but not of - /// `foo/bar_baz/x`. - async fn list_with_delimiter(&self, prefix: Option<&Path>) -> ObjectStoreResult { - self.storage.list_with_delimiter(prefix).await - } - - /// Copy an object from one path to another in the same object store. - /// - /// If there exists an object at the destination, it will be overwritten. - async fn copy(&self, from: &Path, to: &Path) -> ObjectStoreResult<()> { - self.storage.copy(from, to).await - } - - /// Copy an object from one path to another, only if destination is empty. - /// - /// Will return an error if the destination already has an object. - async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> ObjectStoreResult<()> { - self.storage.copy_if_not_exists(from, to).await - } - - /// Move an object from one path to another in the same object store. - /// - /// Will return an error if the destination already has an object. - async fn rename_if_not_exists(&self, from: &Path, to: &Path) -> ObjectStoreResult<()> { - self.storage.rename_if_not_exists(from, to).await - } - - async fn put_multipart( - &self, - location: &Path, - ) -> ObjectStoreResult<(MultipartId, Box)> { - self.storage.put_multipart(location).await - } - - async fn abort_multipart( - &self, - location: &Path, - multipart_id: &MultipartId, - ) -> ObjectStoreResult<()> { - self.storage.abort_multipart(location, multipart_id).await - } -} - -impl Serialize for DeltaObjectStore { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - let mut seq = serializer.serialize_seq(None)?; - seq.serialize_element(&self.location.to_string())?; - seq.serialize_element(&self.options.0)?; - seq.end() - } -} - -impl<'de> Deserialize<'de> for DeltaObjectStore { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - struct DeltaObjectStoreVisitor {} - - impl<'de> Visitor<'de> for DeltaObjectStoreVisitor { - type Value = DeltaObjectStore; - - fn expecting(&self, formatter: &mut std::fmt::Formatter) -> fmt::Result { - formatter.write_str("struct DeltaObjectStore") - } - - fn visit_seq(self, mut seq: A) -> Result - where - A: SeqAccess<'de>, - { - let location_str: String = seq - .next_element()? - .ok_or_else(|| A::Error::invalid_length(0, &self))?; - let options: HashMap = seq - .next_element()? - .ok_or_else(|| A::Error::invalid_length(0, &self))?; - let location = Url::parse(&location_str).unwrap(); - let table = DeltaObjectStore::try_new(location, options) - .map_err(|_| A::Error::custom("Failed deserializing DeltaObjectStore"))?; - Ok(table) - } - } - - deserializer.deserialize_seq(DeltaObjectStoreVisitor {}) - } -} - -#[cfg(feature = "datafusion")] -#[cfg(test)] -mod tests { - use crate::storage::DeltaObjectStore; - use object_store::memory::InMemory; - use std::sync::Arc; - use url::Url; - - #[tokio::test] - async fn test_unique_object_store_url() { - // Just a dummy store to be passed for initialization - let inner_store = Arc::from(InMemory::new()); - - for (location_1, location_2) in [ - // Same scheme, no host, different path - ("file:///path/to/table_1", "file:///path/to/table_2"), - // Different scheme/host, same path - ("s3://my_bucket/path/to/table_1", "file:///path/to/table_1"), - // Same scheme, different host, same path - ("s3://bucket_1/table_1", "s3://bucket_2/table_1"), - ] { - let url_1 = Url::parse(location_1).unwrap(); - let url_2 = Url::parse(location_2).unwrap(); - let store_1 = DeltaObjectStore::new(inner_store.clone(), url_1); - let store_2 = DeltaObjectStore::new(inner_store.clone(), url_2); - - assert_ne!( - store_1.object_store_url().as_str(), - store_2.object_store_url().as_str(), - ); - } - } -} +/// Sharable reference to [`ObjectStore`] +pub type ObjectStoreRef = Arc; diff --git a/crates/deltalake-core/src/storage/utils.rs b/crates/deltalake-core/src/storage/utils.rs index 7e516c7217..768664b97b 100644 --- a/crates/deltalake-core/src/storage/utils.rs +++ b/crates/deltalake-core/src/storage/utils.rs @@ -28,7 +28,7 @@ pub async fn copy_table( .with_storage_options(to_options.unwrap_or_default()) .with_allow_http(allow_http) .build_storage()?; - sync_stores(from_store, to_store).await + sync_stores(from_store.object_store(), to_store.object_store()).await } /// Synchronize the contents of two object stores diff --git a/crates/deltalake-core/src/table/builder.rs b/crates/deltalake-core/src/table/builder.rs index 92fc4851ad..2a4f8aca41 100644 --- a/crates/deltalake-core/src/table/builder.rs +++ b/crates/deltalake-core/src/table/builder.rs @@ -11,8 +11,9 @@ use url::Url; use super::DeltaTable; use crate::errors::{DeltaResult, DeltaTableError}; -use crate::storage::config::StorageOptions; -use crate::storage::{DeltaObjectStore, ObjectStoreRef}; +use crate::logstore::default_logstore::DefaultLogStore; +use crate::logstore::{LogStoreConfig, LogStoreRef}; +use crate::storage::config::{self, StorageOptions}; #[allow(dead_code)] #[derive(Debug, thiserror::Error)] @@ -243,18 +244,24 @@ impl DeltaTableBuilder { } /// Build a delta storage backend for the given config - pub fn build_storage(self) -> DeltaResult { + pub fn build_storage(self) -> DeltaResult { match self.options.storage_backend { - Some((storage, location)) => Ok(Arc::new(DeltaObjectStore::new( - storage, - ensure_table_uri(location.as_str())?, - ))), + Some((storage, location)) => { + let location = ensure_table_uri(location.as_str())?; + Ok(Arc::new(DefaultLogStore::new( + Arc::new(storage), + LogStoreConfig { + location, + options: HashMap::new().into(), + }, + ))) + } None => { let location = ensure_table_uri(&self.options.table_uri)?; - Ok(Arc::new(DeltaObjectStore::try_new( + Ok(config::configure_log_store( location, self.storage_options(), - )?)) + )?) } } } diff --git a/crates/deltalake-core/src/table/mod.rs b/crates/deltalake-core/src/table/mod.rs index 2b011ff608..cd0f1808f5 100644 --- a/crates/deltalake-core/src/table/mod.rs +++ b/crates/deltalake-core/src/table/mod.rs @@ -5,7 +5,6 @@ use std::convert::TryFrom; use std::fmt; use std::fmt::Formatter; use std::io::{BufRead, BufReader, Cursor}; -use std::sync::Arc; use std::{cmp::max, cmp::Ordering, collections::HashSet}; use chrono::{DateTime, Utc}; @@ -26,10 +25,13 @@ use crate::kernel::{ Action, Add, CommitInfo, DataType, Format, Metadata, ReaderFeatures, Remove, StructType, WriterFeatures, }; +use crate::logstore::LogStoreConfig; +use crate::logstore::LogStoreRef; use crate::partitions::PartitionFilter; use crate::protocol::{ find_latest_check_point_for_version, get_last_checkpoint, ProtocolError, Stats, }; +use crate::storage::config::configure_log_store; use crate::storage::{commit_uri_from_version, ObjectStoreRef}; pub mod builder; @@ -249,8 +251,8 @@ pub struct DeltaTable { pub state: DeltaTableState, /// the load options used during load pub config: DeltaTableConfig, - /// object store to access log and data files - pub(crate) storage: ObjectStoreRef, + /// log store + pub(crate) log_store: LogStoreRef, /// file metadata for latest checkpoint last_check_point: Option, /// table versions associated with timestamps @@ -265,7 +267,7 @@ impl Serialize for DeltaTable { let mut seq = serializer.serialize_seq(None)?; seq.serialize_element(&self.state)?; seq.serialize_element(&self.config)?; - seq.serialize_element(self.storage.as_ref())?; + seq.serialize_element(self.log_store.config())?; seq.serialize_element(&self.last_check_point)?; seq.serialize_element(&self.version_timestamp)?; seq.end() @@ -296,9 +298,12 @@ impl<'de> Deserialize<'de> for DeltaTable { let config = seq .next_element()? .ok_or_else(|| A::Error::invalid_length(0, &self))?; - let storage = seq + let storage_config: LogStoreConfig = seq .next_element()? .ok_or_else(|| A::Error::invalid_length(0, &self))?; + let log_store = + configure_log_store(storage_config.location, storage_config.options) + .map_err(|_| A::Error::custom("Failed deserializing LogStore"))?; let last_check_point = seq .next_element()? .ok_or_else(|| A::Error::invalid_length(0, &self))?; @@ -309,7 +314,7 @@ impl<'de> Deserialize<'de> for DeltaTable { let table = DeltaTable { state, config, - storage: Arc::new(storage), + log_store, last_check_point, version_timestamp, }; @@ -326,10 +331,10 @@ impl DeltaTable { /// /// NOTE: This is for advanced users. If you don't know why you need to use this method, please /// call one of the `open_table` helper methods instead. - pub fn new(storage: ObjectStoreRef, config: DeltaTableConfig) -> Self { + pub fn new(log_store: LogStoreRef, config: DeltaTableConfig) -> Self { Self { state: DeltaTableState::with_version(-1), - storage, + log_store, config, last_check_point: None, version_timestamp: HashMap::new(), @@ -341,10 +346,10 @@ impl DeltaTable { /// /// NOTE: This is for advanced users. If you don't know why you need to use this method, /// please call one of the `open_table` helper methods instead. - pub(crate) fn new_with_state(storage: ObjectStoreRef, state: DeltaTableState) -> Self { + pub(crate) fn new_with_state(log_store: LogStoreRef, state: DeltaTableState) -> Self { Self { state, - storage, + log_store, config: Default::default(), last_check_point: None, version_timestamp: HashMap::new(), @@ -353,18 +358,23 @@ impl DeltaTable { /// get a shared reference to the delta object store pub fn object_store(&self) -> ObjectStoreRef { - self.storage.clone() + self.log_store.object_store() } /// The URI of the underlying data pub fn table_uri(&self) -> String { - self.storage.root_uri() + self.log_store.root_uri() + } + + /// get a shared reference to the log store + pub fn log_store(&self) -> LogStoreRef { + self.log_store.clone() } /// Return the list of paths of given checkpoint. pub fn get_checkpoint_data_paths(&self, check_point: &CheckPoint) -> Vec { let checkpoint_prefix = format!("{:020}", check_point.version); - let log_path = self.storage.log_path(); + let log_path = self.log_store.log_path(); let mut checkpoint_data_paths = Vec::new(); match check_point.parts { @@ -399,7 +409,8 @@ impl DeltaTable { let mut current_delta_log_ver = i64::MAX; // Get file objects from table. - let mut stream = self.storage.list(Some(self.storage.log_path())).await?; + let storage = self.object_store(); + let mut stream = storage.list(Some(self.log_store.log_path())).await?; while let Some(obj_meta) = stream.next().await { let obj_meta = obj_meta?; @@ -422,54 +433,7 @@ impl DeltaTable { } /// returns the latest available version of the table pub async fn get_latest_version(&mut self) -> Result { - let version_start = match get_last_checkpoint(&self.storage).await { - Ok(last_check_point) => last_check_point.version, - Err(ProtocolError::CheckpointNotFound) => { - // no checkpoint - -1 - } - Err(e) => { - return Err(DeltaTableError::from(e)); - } - }; - - debug!("latest checkpoint version: {version_start}"); - - let version_start = max(self.version(), version_start); - - lazy_static! { - static ref DELTA_LOG_REGEX: Regex = - Regex::new(r"_delta_log/(\d{20})\.(json|checkpoint).*$").unwrap(); - } - - // list files to find max version - let version = async { - let mut max_version: i64 = version_start; - let prefix = Some(self.storage.log_path()); - let offset_path = commit_uri_from_version(max_version); - let mut files = self.storage.list_with_offset(prefix, &offset_path).await?; - - while let Some(obj_meta) = files.next().await { - let obj_meta = obj_meta?; - if let Some(captures) = DELTA_LOG_REGEX.captures(obj_meta.location.as_ref()) { - let log_version = captures.get(1).unwrap().as_str().parse().unwrap(); - // listing may not be ordered - max_version = max(max_version, log_version); - // also cache timestamp for version, for faster time-travel - self.version_timestamp - .insert(log_version, obj_meta.last_modified.timestamp()); - } - } - - if max_version < 0 { - return Err(DeltaTableError::not_a_table(self.table_uri())); - } - - Ok::(max_version) - } - .await?; - - Ok(version) + self.log_store.get_latest_version(self.version()).await } /// Currently loaded version of the table @@ -490,12 +454,12 @@ impl DeltaTable { current_version: i64, ) -> Result { let next_version = current_version + 1; - let commit_uri = commit_uri_from_version(next_version); - let commit_log_bytes = self.storage.get(&commit_uri).await; - let commit_log_bytes = match commit_log_bytes { - Err(ObjectStoreError::NotFound { .. }) => return Ok(PeekCommit::UpToDate), + let commit_log_bytes = match self.log_store.read_commit_entry(next_version).await { + Ok(bytes) => Ok(bytes), + Err(DeltaTableError::ObjectStore { + source: ObjectStoreError::NotFound { .. }, + }) => return Ok(PeekCommit::UpToDate), Err(err) => Err(err), - Ok(result) => result.bytes().await, }?; let actions = Self::get_actions(next_version, commit_log_bytes).await; @@ -529,7 +493,7 @@ impl DeltaTable { /// loading the last checkpoint and incrementally applying each version since. #[cfg(any(feature = "parquet", feature = "parquet2"))] pub async fn update(&mut self) -> Result<(), DeltaTableError> { - match get_last_checkpoint(&self.storage).await { + match get_last_checkpoint(self.log_store.as_ref()).await { Ok(last_check_point) => { debug!("update with latest checkpoint {last_check_point:?}"); if Some(last_check_point) == self.last_check_point { @@ -574,13 +538,12 @@ impl DeltaTable { let buf_size = self.config.log_buffer_size; - let store = self.storage.clone(); + let log_store = self.log_store.clone(); let mut log_stream = futures::stream::iter(self.version() + 1..max_version + 1) .map(|version| { - let store = store.clone(); - let loc = commit_uri_from_version(version); + let log_store = log_store.clone(); async move { - let data = store.get(&loc).await?.bytes().await?; + let data = log_store.read_commit_entry(version).await?; let actions = Self::get_actions(version, data).await?; Ok((version, actions)) } @@ -616,7 +579,7 @@ impl DeltaTable { pub async fn load_version(&mut self, version: i64) -> Result<(), DeltaTableError> { // check if version is valid let commit_uri = commit_uri_from_version(version); - match self.storage.head(&commit_uri).await { + match self.object_store().head(&commit_uri).await { Ok(_) => {} Err(ObjectStoreError::NotFound { .. }) => { return Err(DeltaTableError::InvalidVersion(version)); @@ -628,7 +591,7 @@ impl DeltaTable { // 1. find latest checkpoint below version #[cfg(any(feature = "parquet", feature = "parquet2"))] - match find_latest_check_point_for_version(&self.storage, version).await? { + match find_latest_check_point_for_version(self.log_store.as_ref(), version).await? { Some(check_point) => { self.restore_checkpoint(check_point).await?; } @@ -652,7 +615,10 @@ impl DeltaTable { match self.version_timestamp.get(&version) { Some(ts) => Ok(*ts), None => { - let meta = self.storage.head(&commit_uri_from_version(version)).await?; + let meta = self + .object_store() + .head(&commit_uri_from_version(version)) + .await?; let ts = meta.last_modified.timestamp(); // also cache timestamp for version self.version_timestamp.insert(version, ts); @@ -746,7 +712,7 @@ impl DeltaTable { let files = self.get_files_by_partitions(filters)?; Ok(files .iter() - .map(|fname| self.storage.to_uri(fname)) + .map(|fname| self.log_store.to_uri(fname)) .collect()) } @@ -771,7 +737,7 @@ impl DeltaTable { pub fn get_file_uris(&self) -> impl Iterator + '_ { self.state .file_paths_iter() - .map(|path| self.storage.to_uri(&path)) + .map(|path| self.log_store.to_uri(&path)) } /// Returns statistics for files, in order diff --git a/crates/deltalake-core/src/table/state.rs b/crates/deltalake-core/src/table/state.rs index 26becd0703..8fa51c55fd 100644 --- a/crates/deltalake-core/src/table/state.rs +++ b/crates/deltalake-core/src/table/state.rs @@ -71,7 +71,7 @@ impl DeltaTableState { /// Construct a delta table state object from commit version. pub async fn from_commit(table: &DeltaTable, version: i64) -> Result { let commit_uri = commit_uri_from_version(version); - let commit_log_bytes = match table.storage.get(&commit_uri).await { + let commit_log_bytes = match table.object_store().get(&commit_uri).await { Ok(get) => Ok(get.bytes().await?), Err(ObjectStoreError::NotFound { .. }) => Err(ProtocolError::EndOfLog), Err(source) => Err(ProtocolError::ObjectStore { source }), @@ -161,7 +161,7 @@ impl DeltaTableState { let mut new_state = Self::with_version(check_point.version); for f in &checkpoint_data_paths { - let obj = table.storage.get(f).await?.bytes().await?; + let obj = table.object_store().get(f).await?.bytes().await?; new_state.process_checkpoint_bytes(obj, &table.config)?; } diff --git a/crates/deltalake-core/src/test_utils.rs b/crates/deltalake-core/src/test_utils.rs index 124ec0365b..ce121106c4 100644 --- a/crates/deltalake-core/src/test_utils.rs +++ b/crates/deltalake-core/src/test_utils.rs @@ -88,7 +88,7 @@ impl IntegrationContext { _ => DeltaTableBuilder::from_uri(store_uri) .with_allow_http(true) .build_storage()? - .storage_backend(), + .object_store(), }; Ok(Self { diff --git a/crates/deltalake-core/src/writer/json.rs b/crates/deltalake-core/src/writer/json.rs index 044ffc20e2..7fec11fad2 100644 --- a/crates/deltalake-core/src/writer/json.rs +++ b/crates/deltalake-core/src/writer/json.rs @@ -26,14 +26,14 @@ use crate::errors::DeltaTableError; use crate::kernel::{Add, StructType}; use crate::table::builder::DeltaTableBuilder; use crate::table::DeltaTableMetaData; +use crate::writer::utils::ShareableBuffer; use crate::DeltaTable; -use crate::{storage::DeltaObjectStore, writer::utils::ShareableBuffer}; type BadValue = (Value, ParquetError); /// Writes messages to a delta lake table. pub struct JsonWriter { - storage: Arc, + storage: Arc, arrow_schema_ref: Arc, writer_properties: WriterProperties, partition_columns: Vec, @@ -195,7 +195,7 @@ impl JsonWriter { .build(); Ok(Self { - storage, + storage: storage.object_store(), arrow_schema_ref: schema, writer_properties, partition_columns: partition_columns.unwrap_or_default(), @@ -218,7 +218,7 @@ impl JsonWriter { .build(); Ok(Self { - storage: table.storage.clone(), + storage: table.object_store(), arrow_schema_ref, writer_properties, partition_columns, diff --git a/crates/deltalake-core/src/writer/mod.rs b/crates/deltalake-core/src/writer/mod.rs index 478a0b11f2..3b73fe2ef6 100644 --- a/crates/deltalake-core/src/writer/mod.rs +++ b/crates/deltalake-core/src/writer/mod.rs @@ -146,7 +146,14 @@ pub trait DeltaWriter { partition_by, predicate: None, }; - let version = commit(table.storage.as_ref(), &adds, operation, &table.state, None).await?; + let version = commit( + table.log_store.as_ref(), + &adds, + operation, + &table.state, + None, + ) + .await?; table.update().await?; Ok(version) } diff --git a/crates/deltalake-core/src/writer/record_batch.rs b/crates/deltalake-core/src/writer/record_batch.rs index b673146907..49b5dfebc9 100644 --- a/crates/deltalake-core/src/writer/record_batch.rs +++ b/crates/deltalake-core/src/writer/record_batch.rs @@ -29,11 +29,11 @@ use crate::errors::DeltaTableError; use crate::kernel::{Add, StructType}; use crate::table::builder::DeltaTableBuilder; use crate::table::DeltaTableMetaData; -use crate::{storage::DeltaObjectStore, DeltaTable}; +use crate::DeltaTable; /// Writes messages to a delta lake table. pub struct RecordBatchWriter { - storage: Arc, + storage: Arc, arrow_schema_ref: Arc, writer_properties: WriterProperties, partition_columns: Vec, @@ -56,7 +56,8 @@ impl RecordBatchWriter { ) -> Result { let storage = DeltaTableBuilder::from_uri(table_uri) .with_storage_options(storage_options.unwrap_or_default()) - .build_storage()?; + .build_storage()? + .object_store(); // Initialize writer properties for the underlying arrow writer let writer_properties = WriterProperties::builder() @@ -89,7 +90,7 @@ impl RecordBatchWriter { .build(); Ok(Self { - storage: table.storage.clone(), + storage: table.object_store(), arrow_schema_ref, writer_properties, partition_columns, diff --git a/crates/deltalake-core/src/writer/test_utils.rs b/crates/deltalake-core/src/writer/test_utils.rs index d67931c096..1daf9e407b 100644 --- a/crates/deltalake-core/src/writer/test_utils.rs +++ b/crates/deltalake-core/src/writer/test_utils.rs @@ -323,7 +323,7 @@ pub mod datafusion { use std::sync::Arc; pub async fn get_data(table: &DeltaTable) -> Vec { - let table = DeltaTable::new_with_state(table.object_store(), table.state.clone()); + let table = DeltaTable::new_with_state(table.log_store.clone(), table.state.clone()); let ctx = SessionContext::new(); ctx.register_table("test", Arc::new(table)).unwrap(); ctx.sql("select * from test") diff --git a/crates/deltalake-core/tests/command_optimize.rs b/crates/deltalake-core/tests/command_optimize.rs index a923d0064d..14f9d4c410 100644 --- a/crates/deltalake-core/tests/command_optimize.rs +++ b/crates/deltalake-core/tests/command_optimize.rs @@ -298,7 +298,7 @@ async fn test_conflict_for_remove_actions() -> Result<(), Box> { let operation = DeltaOperation::Delete { predicate: None }; commit( - other_dt.object_store().as_ref(), + other_dt.log_store().as_ref(), &vec![Action::Remove(remove)], operation, &other_dt.state, @@ -306,9 +306,7 @@ async fn test_conflict_for_remove_actions() -> Result<(), Box> { ) .await?; - let maybe_metrics = plan - .execute(dt.object_store(), &dt.state, 1, 20, None) - .await; + let maybe_metrics = plan.execute(dt.log_store(), &dt.state, 1, 20, None).await; assert!(maybe_metrics.is_err()); assert_eq!(dt.version(), version + 1); @@ -357,9 +355,7 @@ async fn test_no_conflict_for_append_actions() -> Result<(), Box> { ) .await?; - let metrics = plan - .execute(dt.object_store(), &dt.state, 1, 20, None) - .await?; + let metrics = plan.execute(dt.log_store(), &dt.state, 1, 20, None).await?; assert_eq!(metrics.num_files_added, 1); assert_eq!(metrics.num_files_removed, 2); @@ -399,7 +395,7 @@ async fn test_commit_interval() -> Result<(), Box> { let metrics = plan .execute( - dt.object_store(), + dt.log_store(), &dt.state, 1, 20, diff --git a/crates/deltalake-core/tests/command_vacuum.rs b/crates/deltalake-core/tests/command_vacuum.rs index e21315e796..24644159fe 100644 --- a/crates/deltalake-core/tests/command_vacuum.rs +++ b/crates/deltalake-core/tests/command_vacuum.rs @@ -296,6 +296,6 @@ async fn test_non_managed_files() { async fn is_deleted(context: &mut TestContext, path: &Path) -> bool { let backend = context.get_storage(); - let res = backend.head(path).await; + let res = backend.object_store().head(path).await; matches!(res, Err(ObjectStoreError::NotFound { .. })) } diff --git a/crates/deltalake-core/tests/commit_info_format.rs b/crates/deltalake-core/tests/commit_info_format.rs index de69397e32..a9d05e4c11 100644 --- a/crates/deltalake-core/tests/commit_info_format.rs +++ b/crates/deltalake-core/tests/commit_info_format.rs @@ -22,7 +22,7 @@ async fn test_operational_parameters() -> Result<(), Box> { }; commit( - table.object_store().as_ref(), + table.log_store().as_ref(), &actions, operation, &table.state, diff --git a/crates/deltalake-core/tests/common/mod.rs b/crates/deltalake-core/tests/common/mod.rs index 80df899323..af2e6e1a7f 100644 --- a/crates/deltalake-core/tests/common/mod.rs +++ b/crates/deltalake-core/tests/common/mod.rs @@ -2,10 +2,10 @@ use bytes::Bytes; use deltalake_core::kernel::{Action, Add, Remove, StructType}; +use deltalake_core::logstore::LogStore; use deltalake_core::operations::create::CreateBuilder; use deltalake_core::operations::transaction::commit; use deltalake_core::protocol::{DeltaOperation, SaveMode}; -use deltalake_core::storage::DeltaObjectStore; use deltalake_core::DeltaTable; use deltalake_core::DeltaTableBuilder; use object_store::{path::Path, ObjectStore}; @@ -28,7 +28,7 @@ pub mod s3; pub struct TestContext { /// The main table under test pub table: Option, - pub backend: Option>, + pub backend: Option>, /// The configuration used to create the backend. pub config: HashMap, /// An object when it is dropped will clean up any temporary resources created for the test @@ -55,7 +55,7 @@ impl TestContext { } } - pub fn get_storage(&mut self) -> Arc { + pub fn get_storage(&mut self) -> Arc { if self.backend.is_none() { self.backend = Some(self.new_storage()) } @@ -63,7 +63,7 @@ impl TestContext { self.backend.as_ref().unwrap().clone() } - fn new_storage(&self) -> Arc { + fn new_storage(&self) -> Arc { let config = self.config.clone(); let uri = config.get("URI").unwrap().to_string(); DeltaTableBuilder::from_uri(uri) @@ -82,9 +82,9 @@ impl TestContext { .iter() .map(|s| s.to_string()) .collect::>(); - let backend = self.new_storage(); + let log_store = self.new_storage(); CreateBuilder::new() - .with_object_store(backend) + .with_log_store(log_store) .with_table_name("delta-rs_test_table") .with_comment("Table created by delta-rs tests") .with_columns(schema.fields().clone()) @@ -149,7 +149,7 @@ pub async fn add_file( }; let actions = vec![Action::Add(add)]; commit( - table.object_store().as_ref(), + table.log_store().as_ref(), &actions, operation, &table.state, @@ -187,7 +187,7 @@ pub async fn remove_file( let operation = DeltaOperation::Delete { predicate: None }; let actions = vec![Action::Remove(remove)]; commit( - table.object_store().as_ref(), + table.log_store().as_ref(), &actions, operation, &table.state, diff --git a/crates/deltalake-core/tests/fs_common/mod.rs b/crates/deltalake-core/tests/fs_common/mod.rs index dc9ec2547a..73593f26b1 100644 --- a/crates/deltalake-core/tests/fs_common/mod.rs +++ b/crates/deltalake-core/tests/fs_common/mod.rs @@ -5,7 +5,8 @@ use deltalake_core::kernel::{ use deltalake_core::operations::create::CreateBuilder; use deltalake_core::operations::transaction::commit; use deltalake_core::protocol::{DeltaOperation, SaveMode}; -use deltalake_core::storage::{DeltaObjectStore, GetResult, ObjectStoreResult}; +use deltalake_core::storage::config::configure_store; +use deltalake_core::storage::{GetResult, ObjectStoreResult}; use deltalake_core::DeltaTable; use object_store::path::Path as StorePath; use object_store::ObjectStore; @@ -13,6 +14,7 @@ use serde_json::Value; use std::collections::HashMap; use std::fs; use std::path::Path; +use std::sync::Arc; use url::Url; use uuid::Uuid; @@ -119,7 +121,7 @@ pub async fn commit_actions( operation: DeltaOperation, ) -> i64 { let version = commit( - table.object_store().as_ref(), + table.log_store().as_ref(), &actions, operation, &table.state, @@ -133,7 +135,7 @@ pub async fn commit_actions( #[derive(Debug)] pub struct SlowStore { - inner: DeltaObjectStore, + inner: Arc, } impl std::fmt::Display for SlowStore { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -147,8 +149,9 @@ impl SlowStore { location: Url, options: impl Into + Clone, ) -> deltalake_core::DeltaResult { + let mut options = options.into(); Ok(Self { - inner: DeltaObjectStore::try_new(location, options).unwrap(), + inner: configure_store(&location, &mut options).unwrap(), }) } } diff --git a/crates/deltalake-core/tests/integration_checkpoint.rs b/crates/deltalake-core/tests/integration_checkpoint.rs index 9b5b0a73ff..56d253eb85 100644 --- a/crates/deltalake-core/tests/integration_checkpoint.rs +++ b/crates/deltalake-core/tests/integration_checkpoint.rs @@ -59,15 +59,12 @@ async fn cleanup_metadata_hdfs_test() -> TestResult { // test to run longer but reliable async fn cleanup_metadata_test(context: &IntegrationContext) -> TestResult { let table_uri = context.root_uri(); - let object_store = DeltaTableBuilder::from_uri(table_uri) + let log_store = DeltaTableBuilder::from_uri(table_uri) .with_allow_http(true) .build_storage()?; + let object_store = log_store.object_store(); - let log_path = |version| { - object_store - .log_path() - .child(format!("{:020}.json", version)) - }; + let log_path = |version| log_store.log_path().child(format!("{:020}.json", version)); // we don't need to actually populate files with content as cleanup works only with file's metadata object_store @@ -98,7 +95,7 @@ async fn cleanup_metadata_test(context: &IntegrationContext) -> TestResult { assert!(retention_timestamp > v1time.timestamp_millis()); assert!(retention_timestamp < v2time.timestamp_millis()); - let removed = cleanup_expired_logs_for(3, object_store.as_ref(), retention_timestamp).await?; + let removed = cleanup_expired_logs_for(3, log_store.as_ref(), retention_timestamp).await?; assert_eq!(removed, 2); assert!(object_store.head(&log_path(0)).await.is_err()); @@ -142,7 +139,7 @@ async fn test_issue_1420_cleanup_expired_logs_for() -> DeltaResult<()> { // Should delete v1 but not v2 or v2.checkpoint.parquet cleanup_expired_logs_for( table.version(), - table.object_store().as_ref(), + table.log_store().as_ref(), ts.timestamp_millis(), ) .await?; @@ -184,7 +181,7 @@ async fn test_issue_1420_cleanup_expired_logs_for() -> DeltaResult<()> { cleanup_expired_logs_for( table.version(), - table.object_store().as_ref(), + table.log_store().as_ref(), ts.timestamp_millis(), ) .await?; diff --git a/crates/deltalake-core/tests/integration_concurrent_writes.rs b/crates/deltalake-core/tests/integration_concurrent_writes.rs index bef44d0693..90dba7659a 100644 --- a/crates/deltalake-core/tests/integration_concurrent_writes.rs +++ b/crates/deltalake-core/tests/integration_concurrent_writes.rs @@ -168,7 +168,7 @@ impl Worker { default_row_commit_version: None, })]; let version = commit( - self.table.object_store().as_ref(), + self.table.log_store().as_ref(), &actions, operation, &self.table.state, diff --git a/crates/deltalake-core/tests/integration_datafusion.rs b/crates/deltalake-core/tests/integration_datafusion.rs index 3476de6839..7a9c38463f 100644 --- a/crates/deltalake-core/tests/integration_datafusion.rs +++ b/crates/deltalake-core/tests/integration_datafusion.rs @@ -34,11 +34,11 @@ use deltalake_core::delta_datafusion::{DeltaPhysicalCodec, DeltaScan}; use deltalake_core::kernel::{DataType, MapType, PrimitiveType, StructField, StructType}; use deltalake_core::operations::create::CreateBuilder; use deltalake_core::protocol::SaveMode; -use deltalake_core::storage::DeltaObjectStore; use deltalake_core::writer::{DeltaWriter, RecordBatchWriter}; use deltalake_core::{ open_table, operations::{write::WriteBuilder, DeltaOps}, + storage::config::configure_log_store, DeltaTable, DeltaTableError, }; use std::error::Error; @@ -211,7 +211,7 @@ mod local { // Trying to execute the write from the input plan without providing Datafusion with a session // state containing the referenced object store in the registry results in an error. assert!( - WriteBuilder::new(target_table.object_store(), target_table.state.clone()) + WriteBuilder::new(target_table.log_store(), target_table.state.clone()) .with_input_execution_plan(source_scan.clone()) .await .unwrap_err() @@ -227,19 +227,18 @@ mod local { .table_uri .clone(); let source_location = Url::parse(&source_uri).unwrap(); - let source_store = DeltaObjectStore::try_new(source_location, HashMap::new()).unwrap(); + let source_store = configure_log_store(source_location, HashMap::new()).unwrap(); let object_store_url = source_store.object_store_url(); let source_store_url: &Url = object_store_url.as_ref(); state .runtime_env() - .register_object_store(source_store_url, Arc::from(source_store)); + .register_object_store(source_store_url, source_store.object_store()); // Execute write to the target table with the proper state - let target_table = - WriteBuilder::new(target_table.object_store(), target_table.state.clone()) - .with_input_execution_plan(source_scan) - .with_input_session_state(state) - .await?; + let target_table = WriteBuilder::new(target_table.log_store(), target_table.state.clone()) + .with_input_execution_plan(source_scan) + .with_input_session_state(state) + .await?; ctx.register_table("target", Arc::new(target_table))?; // Check results diff --git a/crates/deltalake-core/tests/integration_object_store.rs b/crates/deltalake-core/tests/integration_object_store.rs index 3988cbdb6d..9c5720db85 100644 --- a/crates/deltalake-core/tests/integration_object_store.rs +++ b/crates/deltalake-core/tests/integration_object_store.rs @@ -81,7 +81,8 @@ async fn read_write_test_onelake(integration: StorageIntegration, path: &Path) - let delta_store = DeltaTableBuilder::from_uri(&context.root_uri()) .with_allow_http(true) - .build_storage()?; + .build_storage()? + .object_store(); //println!("{:#?}",delta_store); @@ -103,7 +104,8 @@ async fn test_object_store(integration: StorageIntegration, skip_copy: bool) -> let context = IntegrationContext::new(integration)?; let delta_store = DeltaTableBuilder::from_uri(context.root_uri()) .with_allow_http(true) - .build_storage()?; + .build_storage()? + .object_store(); put_get_delete_list(delta_store.as_ref()).await?; list_with_delimiter(delta_store.as_ref()).await?; @@ -482,7 +484,8 @@ async fn test_object_store_prefixes(integration: StorageIntegration) -> TestResu let rooturi = format!("{}/{}", context.root_uri(), prefix); let delta_store = DeltaTableBuilder::from_uri(&rooturi) .with_allow_http(true) - .build_storage()?; + .build_storage()? + .object_store(); let contents = Bytes::from("cats"); let path = Path::from("test"); diff --git a/crates/deltalake-core/tests/integration_read.rs b/crates/deltalake-core/tests/integration_read.rs index 0e17d34397..a15679a09c 100644 --- a/crates/deltalake-core/tests/integration_read.rs +++ b/crates/deltalake-core/tests/integration_read.rs @@ -155,7 +155,8 @@ async fn verify_store(integration: &IntegrationContext, root_path: &str) -> Test let table_uri = format!("{}/{}", integration.root_uri(), root_path); let storage = DeltaTableBuilder::from_uri(table_uri.clone()) .with_allow_http(true) - .build_storage()?; + .build_storage()? + .object_store(); let files = storage.list_with_delimiter(None).await?; assert_eq!( diff --git a/crates/deltalake-core/tests/repair_s3_rename_test.rs b/crates/deltalake-core/tests/repair_s3_rename_test.rs index 3157fab896..ecfba0fbbe 100644 --- a/crates/deltalake-core/tests/repair_s3_rename_test.rs +++ b/crates/deltalake-core/tests/repair_s3_rename_test.rs @@ -117,7 +117,7 @@ fn create_s3_backend( .with_allow_http(true) .build_storage() .unwrap() - .storage_backend(); + .object_store(); let delayed_store = DelayedObjectStore { inner: store, diff --git a/python/src/filesystem.rs b/python/src/filesystem.rs index 1f558b82fe..a8bfb6668a 100644 --- a/python/src/filesystem.rs +++ b/python/src/filesystem.rs @@ -51,7 +51,8 @@ impl DeltaFileSystemHandler { let storage = DeltaTableBuilder::from_uri(table_uri) .with_storage_options(options.clone().unwrap_or_default()) .build_storage() - .map_err(PythonError::from)?; + .map_err(PythonError::from)? + .object_store(); Ok(Self { inner: storage, rt: Arc::new(rt()?), diff --git a/python/src/lib.rs b/python/src/lib.rs index 923a06d159..a58d1f9aa6 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -274,7 +274,7 @@ impl RawDeltaTable { retention_hours: Option, enforce_retention_duration: bool, ) -> PyResult> { - let mut cmd = VacuumBuilder::new(self._table.object_store(), self._table.state.clone()) + let mut cmd = VacuumBuilder::new(self._table.log_store(), self._table.state.clone()) .with_enforce_retention_duration(enforce_retention_duration) .with_dry_run(dry_run); if let Some(retention_period) = retention_hours { @@ -296,7 +296,7 @@ impl RawDeltaTable { writer_properties: Option>, safe_cast: bool, ) -> PyResult { - let mut cmd = UpdateBuilder::new(self._table.object_store(), self._table.state.clone()) + let mut cmd = UpdateBuilder::new(self._table.log_store(), self._table.state.clone()) .with_safe_cast(safe_cast); if let Some(writer_props) = writer_properties { @@ -349,7 +349,7 @@ impl RawDeltaTable { max_concurrent_tasks: Option, min_commit_interval: Option, ) -> PyResult { - let mut cmd = OptimizeBuilder::new(self._table.object_store(), self._table.state.clone()) + let mut cmd = OptimizeBuilder::new(self._table.log_store(), self._table.state.clone()) .with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get)); if let Some(size) = target_size { cmd = cmd.with_target_size(size); @@ -379,7 +379,7 @@ impl RawDeltaTable { max_spill_size: usize, min_commit_interval: Option, ) -> PyResult { - let mut cmd = OptimizeBuilder::new(self._table.object_store(), self._table.state.clone()) + let mut cmd = OptimizeBuilder::new(self._table.log_store(), self._table.state.clone()) .with_max_concurrent_tasks(max_concurrent_tasks.unwrap_or_else(num_cpus::get)) .with_max_spill_size(max_spill_size) .with_type(OptimizeType::ZOrder(z_order_columns)); @@ -446,7 +446,7 @@ impl RawDeltaTable { let source_df = ctx.read_table(table_provider).unwrap(); let mut cmd = MergeBuilder::new( - self._table.object_store(), + self._table.log_store(), self._table.state.clone(), predicate, source_df, @@ -608,7 +608,7 @@ impl RawDeltaTable { ignore_missing_files: bool, protocol_downgrade_allowed: bool, ) -> PyResult { - let mut cmd = RestoreBuilder::new(self._table.object_store(), self._table.state.clone()); + let mut cmd = RestoreBuilder::new(self._table.log_store(), self._table.state.clone()); if let Some(val) = target { if let Ok(version) = val.extract::() { cmd = cmd.with_version_to_restore(version) @@ -822,7 +822,7 @@ impl RawDeltaTable { partition_by: Some(partition_by), predicate: None, }; - let store = self._table.object_store(); + let store = self._table.log_store(); rt()? .block_on(commit( @@ -866,7 +866,7 @@ impl RawDeltaTable { /// Run the delete command on the delta table: delete records following a predicate and return the delete metrics. #[pyo3(signature = (predicate = None))] pub fn delete(&mut self, predicate: Option) -> PyResult { - let mut cmd = DeleteBuilder::new(self._table.object_store(), self._table.state.clone()); + let mut cmd = DeleteBuilder::new(self._table.log_store(), self._table.state.clone()); if let Some(predicate) = predicate { cmd = cmd.with_predicate(predicate); } @@ -881,9 +881,8 @@ impl RawDeltaTable { /// have been deleted or are malformed #[pyo3(signature = (dry_run = true))] pub fn repair(&mut self, dry_run: bool) -> PyResult { - let cmd = - FileSystemCheckBuilder::new(self._table.object_store(), self._table.state.clone()) - .with_dry_run(dry_run); + let cmd = FileSystemCheckBuilder::new(self._table.log_store(), self._table.state.clone()) + .with_dry_run(dry_run); let (table, metrics) = rt()? .block_on(cmd.into_future()) From 7559c2460055b7292bad2ec01d8071bb1f410d4f Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Thu, 9 Nov 2023 00:18:46 +0100 Subject: [PATCH 05/16] fix: use correct folder for auto assigned labels --- .github/workflows/dev_pr/labeler.yml | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/.github/workflows/dev_pr/labeler.yml b/.github/workflows/dev_pr/labeler.yml index 71c240950f..e90f3296d5 100644 --- a/.github/workflows/dev_pr/labeler.yml +++ b/.github/workflows/dev_pr/labeler.yml @@ -1,11 +1,3 @@ -rust: - - delta-inspect/**/* - - proofs/**/* - - rust/**/* - -binding/python: - - python/**/* - ci: - .github/**.* @@ -16,17 +8,23 @@ documentation: - CONTRIBUTING.md - python/docs/**/* -storage/aws: - - aws/**/* - delta-inspect: - delta-inspect/**/* -binding/rust: - - rust/**/* - proofs: - proofs/**/* tlaplus: - tlaplus/**/* + +binding/python: + - python/**/* + +binding/rust: + - crates/**/* + +crate/core: + - crates/deltalake-core/**/* + +crate/sql: + - crates/deltalake-sql/**/* From da6e4389e9ff8e86223ff749db92979f92a49c39 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Thu, 9 Nov 2023 14:37:08 +0100 Subject: [PATCH 06/16] fix: run integration tests in CI --- .github/workflows/build.yml | 4 ++-- crates/deltalake-core/tests/repair_s3_rename_test.rs | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 80dec2eaef..f216830327 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -139,11 +139,11 @@ jobs: - name: Run tests with rustls (default) run: | - cargo test -p deltalake --features integration_test,azure,s3,gcs,datafusion + cargo test --features integration_test,azure,s3,gcs,datafusion - name: Run tests with native-tls run: | - cargo test -p deltalake --no-default-features --features integration_test,s3-native-tls,datafusion + cargo test --no-default-features --features integration_test,s3-native-tls,datafusion parquet2_test: runs-on: ubuntu-latest diff --git a/crates/deltalake-core/tests/repair_s3_rename_test.rs b/crates/deltalake-core/tests/repair_s3_rename_test.rs index ecfba0fbbe..557ebeae92 100644 --- a/crates/deltalake-core/tests/repair_s3_rename_test.rs +++ b/crates/deltalake-core/tests/repair_s3_rename_test.rs @@ -21,6 +21,7 @@ use tokio::time::Duration; #[tokio::test(flavor = "multi_thread")] #[serial] +#[ignore = "currently tests are hanging"] async fn repair_when_worker_pauses_before_rename_test() { let err = run_repair_test_case("test_1", true).await.unwrap_err(); // here worker is paused before copy, @@ -31,6 +32,7 @@ async fn repair_when_worker_pauses_before_rename_test() { #[tokio::test(flavor = "multi_thread")] #[serial] +#[ignore = "currently tests are hanging"] async fn repair_when_worker_pauses_after_rename_test() { let err = run_repair_test_case("test_2", false).await.unwrap_err(); // here worker is paused after copy but before delete, From 140f949d8b9fd6de0d485b237ad45eeae765f933 Mon Sep 17 00:00:00 2001 From: Denny Lee Date: Thu, 9 Nov 2023 14:55:42 -0800 Subject: [PATCH 07/16] Update README.md Update the Slack link to https://go.delta.io/slack --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b3dd824b77..fc572cd467 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ Deltalake - + #delta-rs in the Delta Lake Slack workspace

@@ -105,7 +105,7 @@ You can also try Delta Lake docker at [DockerHub](https://go.delta.io/dockerhub) We encourage you to reach out, and are [committed](https://github.com/delta-io/delta-rs/blob/main/CODE_OF_CONDUCT.md) to provide a welcoming community. -- [Join us in our Slack workspace](https://join.slack.com/t/delta-users/shared_invite/zt-23h0xwez7-wDTm43ZVEW2ZcbKn6Bc8Fg) +- [Join us in our Slack workspace](https://go.delta.io/slack) - [Report an issue](https://github.com/delta-io/delta-rs/issues/new?template=bug_report.md) - Looking to contribute? See our [good first issues](https://github.com/delta-io/delta-rs/contribute). From a327fa80060bf5c1997ba59992c8110b2fafdac6 Mon Sep 17 00:00:00 2001 From: Jan Schweizer Date: Sat, 11 Nov 2023 17:52:29 +0100 Subject: [PATCH 08/16] Correctly handle hidden files in _change_data and _delta_index & deletion vector files --- .../deltalake-core/src/operations/vacuum.rs | 53 +++++++++++++++---- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/crates/deltalake-core/src/operations/vacuum.rs b/crates/deltalake-core/src/operations/vacuum.rs index 2c4e00793c..015af693a9 100644 --- a/crates/deltalake-core/src/operations/vacuum.rs +++ b/crates/deltalake-core/src/operations/vacuum.rs @@ -32,6 +32,7 @@ use object_store::Error; use object_store::{path::Path, ObjectStore}; use serde::Serialize; use serde_json::Value; +use url::Url; use super::transaction::commit; use crate::crate_version; @@ -198,27 +199,44 @@ impl VacuumBuilder { .snapshot .files() .iter() - .map(|a| a.path.as_str()) + .map(|a| a.path.clone()) .chain( self.snapshot .all_tombstones() .iter() - .map(|r| r.path.as_str()), + .map(|r| r.path.clone()), ) - .collect::>(); + .chain(self.snapshot.files().iter().filter_map(|a| { + return if let Some(deletion_vector) = &a.deletion_vector { + if let Ok(parent) = &Url::parse(self.log_store.root_uri().as_str()) { + if let Ok(dv_absolut_path) = deletion_vector.absolute_path(&parent) { + Some(dv_absolut_path?.to_string()) + } else { + None + } + } else { + None + } + } else { + None + }; + })) + .collect::>(); while let Some(obj_meta) = all_files.next().await { // TODO should we allow NotFound here in case we have a temporary commit file in the list let obj_meta = obj_meta.map_err(DeltaTableError::from)?; - let is_hidden = is_hidden_directory(partition_columns, &obj_meta.location)?; + + if is_hidden_file(partition_columns, &obj_meta.location)? { + continue; + } if managed_files.contains(obj_meta.location.as_ref()) { - if !expired_tombstones.contains(obj_meta.location.as_ref()) || is_hidden { + if !expired_tombstones.contains(obj_meta.location.as_ref()) { continue; } } else if now_millis - retention_period.num_milliseconds() < obj_meta.last_modified.timestamp_millis() - || is_hidden { continue; } @@ -380,15 +398,15 @@ impl VacuumPlan { /// Names of the form partitionCol=[value] are partition directories, and should be /// deleted even if they'd normally be hidden. The _db_index directory contains (bloom filter) /// indexes and these must be deleted when the data they are tied to is deleted. -fn is_hidden_directory(partition_columns: &[String], path: &Path) -> Result { +fn is_hidden_file(partition_columns: &[String], path: &Path) -> Result { + let path_name = path.as_ref(); + let skip = path_name.starts_with("_delta_index") || path_name.starts_with("_change_data"); let is_hidden = path .parts() + .skip(skip as usize) .any(|p| p.as_ref().starts_with('.') || p.as_ref().starts_with('_')); - let path_name = path.as_ref(); Ok(is_hidden - && !path_name.starts_with("_delta_index") - && !path_name.starts_with("_change_data") && !partition_columns .iter() .any(|partition_column| path_name.starts_with(partition_column))) @@ -468,4 +486,19 @@ mod tests { assert_eq!(result.files_deleted, empty); } + + #[tokio::test] + async fn vacuum_table_with_dv_small() { + let table = open_table("./tests/data/table-with-dv-small") + .await + .unwrap(); + + let (_table, result) = VacuumBuilder::new(table.log_store, table.state) + .with_dry_run(true) + .await + .unwrap(); + + let empty: Vec = Vec::new(); + assert_eq!(result.files_deleted, empty); + } } From 752773acbd06ad58561545b25b4e1f2e83a6ac92 Mon Sep 17 00:00:00 2001 From: Jan Schweizer Date: Wed, 15 Nov 2023 16:26:14 +0100 Subject: [PATCH 09/16] Fix paths for managed files --- .../deltalake-core/src/operations/vacuum.rs | 47 ++++++++++++++----- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/crates/deltalake-core/src/operations/vacuum.rs b/crates/deltalake-core/src/operations/vacuum.rs index 015af693a9..27408fe48c 100644 --- a/crates/deltalake-core/src/operations/vacuum.rs +++ b/crates/deltalake-core/src/operations/vacuum.rs @@ -195,22 +195,32 @@ impl VacuumBuilder { .ok_or(DeltaTableError::NoMetadata)? .partition_columns; + // A set containing the absolute paths to managed files let managed_files = self .snapshot .files() .iter() - .map(|a| a.path.clone()) - .chain( - self.snapshot - .all_tombstones() - .iter() - .map(|r| r.path.clone()), - ) + .map(|a| { + if is_absolute_path(&a.path) { + a.path.clone() + } else { + format!("{}{}", self.log_store.root_uri(), a.path) + } + }) + .chain(self.snapshot.all_tombstones().iter().map(|r| { + if is_absolute_path(&r.path) { + r.path.clone() + } else { + format!("{}{}", self.log_store.root_uri(), r.path) + } + })) .chain(self.snapshot.files().iter().filter_map(|a| { return if let Some(deletion_vector) = &a.deletion_vector { - if let Ok(parent) = &Url::parse(self.log_store.root_uri().as_str()) { - if let Ok(dv_absolut_path) = deletion_vector.absolute_path(&parent) { - Some(dv_absolut_path?.to_string()) + if let Ok(parent) = + &Url::parse(&format!("file://{}", self.log_store.root_uri().as_str())) + { + if let Ok(dv_absolute_path) = deletion_vector.absolute_path(&parent) { + Some(dv_absolute_path?.path().to_string()) } else { None } @@ -231,7 +241,7 @@ impl VacuumBuilder { continue; } - if managed_files.contains(obj_meta.location.as_ref()) { + if self.is_file_managed(&managed_files, &obj_meta.location) { if !expired_tombstones.contains(obj_meta.location.as_ref()) { continue; } @@ -253,6 +263,16 @@ impl VacuumBuilder { specified_retention_millis: Some(retention_period.num_milliseconds()), }) } + + /// Whether a file is contained within the set of managed files. + fn is_file_managed(&self, managed_files: &HashSet, file: &Path) -> bool { + return if is_absolute_path(file.as_ref()) { + managed_files.contains(file.as_ref()) + } else { + let path = format!("{}{}", self.log_store.root_uri(), file.as_ref()); + managed_files.contains(&path) + }; + } } impl std::future::IntoFuture for VacuumBuilder { @@ -285,6 +305,11 @@ impl std::future::IntoFuture for VacuumBuilder { } } +fn is_absolute_path(path: &str) -> bool { + let path = std::path::Path::new(path); + return path.is_absolute(); +} + /// Encapsulate which files are to be deleted and the parameters used to make that decision struct VacuumPlan { /// What files are to be deleted From 48b4e3ca996d2f53ed97f74498abffc93bc3d0c8 Mon Sep 17 00:00:00 2001 From: Ion Koutsouris <15728914+ion-elgreco@users.noreply.github.com> Date: Fri, 1 Dec 2023 18:16:56 +0100 Subject: [PATCH 10/16] docs: fix all examples and change overall structure (#1931) # Description I have made a bunch of improvements to fix the overall structure due to example sections not being consistent. I've also enabled some extra features. Fixed also the issue of some classes/functions not being shown properly. --- .github/workflows/python_build.yml | 5 - docs/api/catalog.md | 1 + docs/api/delta_table.md | 10 - docs/api/delta_table/delta_table_merger.md | 5 + docs/api/delta_table/delta_table_optimizer.md | 5 + docs/api/delta_table/index.md | 5 + docs/api/delta_table/metadata.md | 6 + docs/api/delta_writer.md | 7 + docs/api/exceptions.md | 6 + docs/api/schema.md | 23 +- docs/api/storage.md | 6 +- .../appending-overwriting-delta-lake-table.md | 2 +- .../small-file-compaction-with-optimize.md | 24 +- mkdocs.yml | 28 +- python/deltalake/_internal.pyi | 290 ++++++---- python/deltalake/table.py | 538 ++++++++++-------- python/deltalake/writer.py | 14 +- 17 files changed, 544 insertions(+), 431 deletions(-) create mode 100644 docs/api/catalog.md delete mode 100644 docs/api/delta_table.md create mode 100644 docs/api/delta_table/delta_table_merger.md create mode 100644 docs/api/delta_table/delta_table_optimizer.md create mode 100644 docs/api/delta_table/index.md create mode 100644 docs/api/delta_table/metadata.md create mode 100644 docs/api/delta_writer.md create mode 100644 docs/api/exceptions.md diff --git a/.github/workflows/python_build.yml b/.github/workflows/python_build.yml index 51626310be..bc2f20cc9a 100644 --- a/.github/workflows/python_build.yml +++ b/.github/workflows/python_build.yml @@ -127,11 +127,6 @@ jobs: python -m pytest -m "not pandas and not integration and not benchmark" pip install pandas - - name: Build Sphinx documentation - run: | - source venv/bin/activate - make build-documentation - benchmark: name: Python Benchmark runs-on: ubuntu-latest diff --git a/docs/api/catalog.md b/docs/api/catalog.md new file mode 100644 index 0000000000..d75dd648db --- /dev/null +++ b/docs/api/catalog.md @@ -0,0 +1 @@ +::: deltalake.data_catalog.DataCatalog \ No newline at end of file diff --git a/docs/api/delta_table.md b/docs/api/delta_table.md deleted file mode 100644 index 75284664fe..0000000000 --- a/docs/api/delta_table.md +++ /dev/null @@ -1,10 +0,0 @@ -# DeltaTable - -::: deltalake.table - options: - show_root_heading: false - -## Writing Delta Tables - -::: deltalake.write_deltalake - diff --git a/docs/api/delta_table/delta_table_merger.md b/docs/api/delta_table/delta_table_merger.md new file mode 100644 index 0000000000..7d707a16f2 --- /dev/null +++ b/docs/api/delta_table/delta_table_merger.md @@ -0,0 +1,5 @@ +# TableMerger + +::: deltalake.table.TableMerger + options: + show_root_heading: true \ No newline at end of file diff --git a/docs/api/delta_table/delta_table_optimizer.md b/docs/api/delta_table/delta_table_optimizer.md new file mode 100644 index 0000000000..2275cbd0ca --- /dev/null +++ b/docs/api/delta_table/delta_table_optimizer.md @@ -0,0 +1,5 @@ +# TableOptimizer + +::: deltalake.table.TableOptimizer + options: + show_root_heading: true \ No newline at end of file diff --git a/docs/api/delta_table/index.md b/docs/api/delta_table/index.md new file mode 100644 index 0000000000..46a65af8d3 --- /dev/null +++ b/docs/api/delta_table/index.md @@ -0,0 +1,5 @@ +# DeltaTable + +::: deltalake.DeltaTable + options: + show_root_heading: true \ No newline at end of file diff --git a/docs/api/delta_table/metadata.md b/docs/api/delta_table/metadata.md new file mode 100644 index 0000000000..92ff62370a --- /dev/null +++ b/docs/api/delta_table/metadata.md @@ -0,0 +1,6 @@ +# Metadata + +::: deltalake.Metadata + options: + show_root_heading: true + diff --git a/docs/api/delta_writer.md b/docs/api/delta_writer.md new file mode 100644 index 0000000000..71c31534b0 --- /dev/null +++ b/docs/api/delta_writer.md @@ -0,0 +1,7 @@ +# Writer +## Write to Delta Tables + +::: deltalake.write_deltalake + +## Convert to Delta Tables +::: deltalake.convert_to_deltalake \ No newline at end of file diff --git a/docs/api/exceptions.md b/docs/api/exceptions.md new file mode 100644 index 0000000000..afe99f92f1 --- /dev/null +++ b/docs/api/exceptions.md @@ -0,0 +1,6 @@ +# Exceptions + +::: deltalake.exceptions.DeltaError +::: deltalake.exceptions.DeltaProtocolError +::: deltalake.exceptions.TableNotFoundError +::: deltalake.exceptions.CommitFailedError diff --git a/docs/api/schema.md b/docs/api/schema.md index 9a91f61062..9a6ba7b2e6 100644 --- a/docs/api/schema.md +++ b/docs/api/schema.md @@ -1,28 +1,29 @@ -## Delta Lake Schemas - +## Schema and field Schemas, fields, and data types are provided in the ``deltalake.schema`` submodule. -::: deltalake.schema.Schema +::: deltalake.Schema options: show_root_heading: true show_root_toc_entry: true -::: deltalake.schema.PrimitiveType +::: deltalake.Field options: show_root_heading: true show_root_toc_entry: true -::: deltalake.schema.ArrayType + +## Data types +::: deltalake.schema.PrimitiveType options: show_root_heading: true show_root_toc_entry: true -::: deltalake.schema.MapType +::: deltalake.schema.ArrayType options: show_root_heading: true show_root_toc_entry: true -::: deltalake.schema.Field +::: deltalake.schema.MapType options: show_root_heading: true show_root_toc_entry: true @@ -30,10 +31,4 @@ Schemas, fields, and data types are provided in the ``deltalake.schema`` submodu ::: deltalake.schema.StructType options: show_root_heading: true - show_root_toc_entry: true - -::: deltalake.data_catalog - -## Delta Storage Handler - -::: deltalake.fs + show_root_toc_entry: true \ No newline at end of file diff --git a/docs/api/storage.md b/docs/api/storage.md index 77fd28c81a..ddb18250cf 100644 --- a/docs/api/storage.md +++ b/docs/api/storage.md @@ -1,3 +1,5 @@ -## Delta Storage Handler +# Storage -::: deltalake.fs +The delta filesystem handler for the pyarrow engine writer. + +::: deltalake.fs.DeltaStorageHandler diff --git a/docs/usage/appending-overwriting-delta-lake-table.md b/docs/usage/appending-overwriting-delta-lake-table.md index 0930d8da1e..397edb9d0d 100644 --- a/docs/usage/appending-overwriting-delta-lake-table.md +++ b/docs/usage/appending-overwriting-delta-lake-table.md @@ -63,7 +63,7 @@ Here are the contents of the Delta table after the overwrite operation: Overwriting just performs a logical delete. It doesn't physically remove the previous data from storage. Time travel back to the previous version to confirm that the old version of the table is still accessable. -``` +```python dt = DeltaTable("tmp/some-table", version=1) +-------+----------+ diff --git a/docs/usage/optimize/small-file-compaction-with-optimize.md b/docs/usage/optimize/small-file-compaction-with-optimize.md index ece15deea4..78d8778ff5 100644 --- a/docs/usage/optimize/small-file-compaction-with-optimize.md +++ b/docs/usage/optimize/small-file-compaction-with-optimize.md @@ -16,7 +16,7 @@ Let’s start by creating a Delta table with a lot of small files so we can demo Start by writing a function that generates on thousand rows of random data given a timestamp. -``` +```python def record_observations(date: datetime) -> pa.Table: """Pulls data for a certain datetime""" nrows = 1000 @@ -31,7 +31,7 @@ def record_observations(date: datetime) -> pa.Table: Let’s run this function and observe the output: -``` +```python record_observations(datetime(2021, 1, 1, 12)).to_pandas() date timestamp value @@ -44,7 +44,7 @@ record_observations(datetime(2021, 1, 1, 12)).to_pandas() Let’s write 100 hours worth of data to the Delta table. -``` +```python # Every hour starting at midnight on 2021-01-01 hours_iter = (datetime(2021, 1, 1) + timedelta(hours=i) for i in itertools.count()) @@ -60,7 +60,7 @@ for timestamp in itertools.islice(hours_iter, 100): This data was appended to the Delta table in 100 separate transactions, so the table will contain 100 transaction log entries and 100 data files. You can see the number of files with the `files()` method. -``` +```python dt = DeltaTable("observation_data") len(dt.files()) # 100 ``` @@ -101,7 +101,7 @@ Each of these Parquet files are tiny - they’re only 10 KB. Let’s see how to Let’s run the optimize command to compact the existing small files into larger files: -``` +```python dt = DeltaTable("observation_data") dt.optimize() @@ -109,7 +109,7 @@ dt.optimize() Here’s the output of the command: -``` +```python {'numFilesAdded': 5, 'numFilesRemoved': 100, 'filesAdded': {'min': 39000, @@ -137,7 +137,7 @@ Let’s append some more data to the Delta table and see how we can selectively Let’s append another 24 hours of data to the Delta table: -``` +```python for timestamp in itertools.islice(hours_iter, 24): write_deltalake( dt, @@ -149,7 +149,7 @@ for timestamp in itertools.islice(hours_iter, 24): We can use `get_add_actions()` to introspect the table state. We can see that `2021-01-06` has only a few hours of data so far, so we don't want to optimize that yet. But `2021-01-05` has all 24 hours of data, so it's ready to be optimized. -``` +```python dt.get_add_actions(flatten=True).to_pandas()[ "partition.date" ].value_counts().sort_index() @@ -164,7 +164,7 @@ dt.get_add_actions(flatten=True).to_pandas()[ To optimize a single partition, you can pass in a `partition_filters` argument speficying which partitions to optimize. -``` +```python dt.optimize(partition_filters=[("date", "=", "2021-01-05")]) {'numFilesAdded': 1, @@ -188,7 +188,7 @@ dt.optimize(partition_filters=[("date", "=", "2021-01-05")]) This optimize operation tombstones 21 small data files and adds one file with all the existing data properly condensed. Let’s take a look a portion of the `_delta_log/00000000000000000125.json` file, which is the transaction log entry that corresponds with this incremental optimize command. -``` +```python { "remove": { "path": "date=2021-01-05/part-00000-41178aab-2491-488f-943d-8f03867295ee-c000.snappy.parquet", @@ -248,13 +248,13 @@ It’s normally a good idea to have a retention period of at least 7 days. For Let’s run the vacuum command: -``` +```python dt.vacuum(retention_hours=0, enforce_retention_duration=False, dry_run=False) ``` The command returns a list of all the files that are removed from storage: -``` +```python ['date=2021-01-02/39-a98680f2-0e0e-4f26-a491-18b183f9eb05-0.parquet', 'date=2021-01-02/41-e96bc8bb-c571-484c-b534-e897424fb7da-0.parquet', … diff --git a/mkdocs.yml b/mkdocs.yml index 97b6e91b0e..a86257c932 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -10,10 +10,14 @@ theme: features: - navigation.tracking - navigation.instant + - navigation.expand - navigation.tabs + - navigation.indexes - navigation.tabs.sticky - navigation.footer - content.tabs.link + - content.code.annotation + - content.code.copy nav: - Home: index.md - Usage: @@ -31,11 +35,17 @@ nav: - Small file compaction: usage/optimize/small-file-compaction-with-optimize.md - Z Order: usage/optimize/delta-lake-z-order.md - API Reference: - - api/delta_table.md + - api/delta_writer.md + - Table: + - api/delta_table/index.md + - api/delta_table/metadata.md + - api/delta_table/delta_table_merger.md + - api/delta_table/delta_table_optimizer.md - api/schema.md - api/storage.md + - api/catalog.md + - api/exceptions.md - Integrations: - - Arrow: integrations/delta-lake-arrow.md - pandas: integrations/delta-lake-pandas.md not_in_nav: | /_build/ @@ -61,7 +71,7 @@ plugins: show_source: false show_symbol_type_in_heading: true show_signature_annotations: true - show_root_heading: false + show_root_heading: true show_root_full_path: true separate_signature: true docstring_options: @@ -81,6 +91,11 @@ plugins: on_page_markdown: 'docs._build.hooks:on_page_markdown' markdown_extensions: + - pymdownx.highlight: + anchor_linenums: true + line_spans: __span + pygments_lang_class: true + - pymdownx.inlinehilite - admonition - pymdownx.details - attr_list @@ -97,4 +112,9 @@ markdown_extensions: - footnotes extra: - python_api_url: https://delta-io.github.io/delta-rs/api/ \ No newline at end of file + python_api_url: https://delta-io.github.io/delta-rs/api/ + generator: false + social: + - icon: fontawesome/brands/slack + link: https://go.delta.io/slack + name: Delta slack channel \ No newline at end of file diff --git a/python/deltalake/_internal.pyi b/python/deltalake/_internal.pyi index d7c0e1a8f9..e1f5288b81 100644 --- a/python/deltalake/_internal.pyi +++ b/python/deltalake/_internal.pyi @@ -210,36 +210,39 @@ class PrimitiveType: The JSON representation for a primitive type is just a quoted string: `PrimitiveType.from_json('"integer"')` Args: - json: A JSON string + json: a JSON string - Returns a [PrimitiveType][deltalake.schema.PrimitiveType] type + Returns: + a PrimitiveType type """ def to_pyarrow(self) -> pyarrow.DataType: """Get the equivalent PyArrow type (pyarrow.DataType)""" @staticmethod def from_pyarrow(type: pyarrow.DataType) -> PrimitiveType: - """Create a [PrimitiveType][deltalake.schema.PrimitiveType] from a PyArrow type + """Create a PrimitiveType from a PyArrow datatype Will raise `TypeError` if the PyArrow type is not a primitive type. Args: - type: A PyArrow [DataType][pyarrow.DataType] type + type: A PyArrow DataType - Returns: a [PrimitiveType][deltalake.schema.PrimitiveType] type + Returns: + a PrimitiveType """ class ArrayType: """An Array (List) DataType - Can either pass the element type explicitly or can pass a string - if it is a primitive type: - ``` - ArrayType(PrimitiveType("integer")) - # Returns ArrayType(PrimitiveType("integer"), contains_null=True) + Example: + Can either pass the element type explicitly or can pass a string + if it is a primitive type: + ```python + ArrayType(PrimitiveType("integer")) + # Returns ArrayType(PrimitiveType("integer"), contains_null=True) - ArrayType("integer", contains_null=False) - # Returns ArrayType(PrimitiveType("integer"), contains_null=False) - ``` + ArrayType("integer", contains_null=False) + # Returns ArrayType(PrimitiveType("integer"), contains_null=False) + ``` """ def __init__( @@ -269,23 +272,25 @@ class ArrayType: def from_json(json: str) -> "ArrayType": """Create an ArrayType from a JSON string - The JSON representation for an array type is an object with `type` (set to - `"array"`), `elementType`, and `containsNull`: - ``` - ArrayType.from_json( - '''{ - "type": "array", - "elementType": "integer", - "containsNull": false - }''' - ) - # Returns ArrayType(PrimitiveType("integer"), contains_null=False) - ``` - Args: - json: A JSON string + json: a JSON string + + Returns: + an ArrayType - Returns: an [ArrayType][deltalake.schema.ArrayType] type + Example: + The JSON representation for an array type is an object with `type` (set to + `"array"`), `elementType`, and `containsNull`. + ```python + ArrayType.from_json( + '''{ + "type": "array", + "elementType": "integer", + "containsNull": false + }''' + ) + # Returns ArrayType(PrimitiveType("integer"), contains_null=False) + ``` """ def to_pyarrow( self, @@ -298,9 +303,10 @@ class ArrayType: Will raise `TypeError` if a different PyArrow DataType is provided. Args: - type: The PyArrow [ListType][pyarrow.ListType] + type: The PyArrow ListType - Returns: an [ArrayType][deltalake.schema.ArrayType] type + Returns: + an ArrayType """ class MapType: @@ -310,13 +316,14 @@ class MapType: or [StructType][deltalake.schema.StructType]. A string can also be passed, which will be parsed as a primitive type: - ``` - MapType(PrimitiveType("integer"), PrimitiveType("string")) - # Returns MapType(PrimitiveType("integer"), PrimitiveType("string"), value_contains_null=True) + Example: + ```python + MapType(PrimitiveType("integer"), PrimitiveType("string")) + # Returns MapType(PrimitiveType("integer"), PrimitiveType("string"), value_contains_null=True) - MapType("integer", "string", value_contains_null=False) - # Returns MapType(PrimitiveType("integer"), PrimitiveType("string"), value_contains_null=False) - ``` + MapType("integer", "string", value_contains_null=False) + # Returns MapType(PrimitiveType("integer"), PrimitiveType("string"), value_contains_null=False) + ``` """ def __init__( @@ -352,29 +359,36 @@ class MapType: """ def to_json(self) -> str: - """Get JSON string representation of map type.""" + """Get JSON string representation of map type. + + Returns: + a JSON string + """ @staticmethod def from_json(json: str) -> MapType: """Create a MapType from a JSON string - The JSON representation for a map type is an object with `type` (set to `map`), - `keyType`, `valueType`, and `valueContainsNull`: - ``` - MapType.from_json( - '''{ - "type": "map", - "keyType": "integer", - "valueType": "string", - "valueContainsNull": true - }''' - ) - # Returns MapType(PrimitiveType("integer"), PrimitiveType("string"), value_contains_null=True) - ``` - Args: - json: A JSON string + json: a JSON string - Returns: a [MapType][deltalake.schema.MapType] type + Returns: + an ArrayType + + Example: + The JSON representation for a map type is an object with `type` (set to `map`), + `keyType`, `valueType`, and `valueContainsNull`: + + ```python + MapType.from_json( + '''{ + "type": "map", + "keyType": "integer", + "valueType": "string", + "valueContainsNull": true + }''' + ) + # Returns MapType(PrimitiveType("integer"), PrimitiveType("string"), value_contains_null=True) + ``` """ def to_pyarrow(self) -> pyarrow.MapType: """Get the equivalent PyArrow data type.""" @@ -387,25 +401,27 @@ class MapType: Args: type: the PyArrow MapType - Returns: a [MapType][deltalake.schema.MapType] type + Returns: + a MapType """ class Field: """A field in a Delta StructType or Schema - Can create with just a name and a type: - ``` - Field("my_int_col", "integer") - # Returns Field("my_int_col", PrimitiveType("integer"), nullable=True, metadata=None) - ``` + Example: + Can create with just a name and a type: + ```python + Field("my_int_col", "integer") + # Returns Field("my_int_col", PrimitiveType("integer"), nullable=True, metadata=None) + ``` - Can also attach metadata to the field. Metadata should be a dictionary with - string keys and JSON-serializable values (str, list, int, float, dict): + Can also attach metadata to the field. Metadata should be a dictionary with + string keys and JSON-serializable values (str, list, int, float, dict): - ``` - Field("my_col", "integer", metadata={"custom_metadata": {"test": 2}}) - # Returns Field("my_col", PrimitiveType("integer"), nullable=True, metadata={"custom_metadata": {"test": 2}}) - ``` + ```python + Field("my_col", "integer", metadata={"custom_metadata": {"test": 2}}) + # Returns Field("my_col", PrimitiveType("integer"), nullable=True, metadata={"custom_metadata": {"test": 2}}) + ``` """ def __init__( @@ -440,10 +456,15 @@ class Field: def to_json(self) -> str: """Get the field as JSON string. - ``` - Field("col", "integer").to_json() - # Returns '{"name":"col","type":"integer","nullable":true,"metadata":{}}' - ``` + + Returns: + a JSON string + + Example: + ```python + Field("col", "integer").to_json() + # Returns '{"name":"col","type":"integer","nullable":true,"metadata":{}}' + ``` """ @staticmethod def from_json(json: str) -> Field: @@ -452,25 +473,27 @@ class Field: Args: json: the JSON string. - Returns: Field + Returns: + Field Example: - ``` - Field.from_json('''{ - "name": "col", - "type": "integer", - "nullable": true, - "metadata": {} - }''' - ) - # Returns Field(col, PrimitiveType("integer"), nullable=True) - ``` + ``` + Field.from_json('''{ + "name": "col", + "type": "integer", + "nullable": true, + "metadata": {} + }''' + ) + # Returns Field(col, PrimitiveType("integer"), nullable=True) + ``` """ def to_pyarrow(self) -> pyarrow.Field: """Convert to an equivalent PyArrow field Note: This currently doesn't preserve field metadata. - Returns: a [pyarrow.Field][pyarrow.Field] type + Returns: + a pyarrow Field """ @staticmethod def from_pyarrow(field: pyarrow.Field) -> Field: @@ -478,21 +501,21 @@ class Field: Note: This currently doesn't preserve field metadata. Args: - field: a PyArrow Field type + field: a PyArrow Field - Returns: a [Field][deltalake.schema.Field] type + Returns: + a Field """ class StructType: """A struct datatype, containing one or more subfields Example: - - Create with a list of :class:`Field`: - ``` - StructType([Field("x", "integer"), Field("y", "string")]) - # Creates: StructType([Field(x, PrimitiveType("integer"), nullable=True), Field(y, PrimitiveType("string"), nullable=True)]) - ``` + Create with a list of :class:`Field`: + ```python + StructType([Field("x", "integer"), Field("y", "string")]) + # Creates: StructType([Field(x, PrimitiveType("integer"), nullable=True), Field(y, PrimitiveType("string"), nullable=True)]) + ``` """ def __init__(self, fields: List[Field]) -> None: ... @@ -503,33 +526,42 @@ class StructType: def to_json(self) -> str: """Get the JSON representation of the type. - ``` - StructType([Field("x", "integer")]).to_json() - # Returns '{"type":"struct","fields":[{"name":"x","type":"integer","nullable":true,"metadata":{}}]}' - ``` + + Returns: + a JSON string + + Example: + ```python + StructType([Field("x", "integer")]).to_json() + # Returns '{"type":"struct","fields":[{"name":"x","type":"integer","nullable":true,"metadata":{}}]}' + ``` """ @staticmethod def from_json(json: str) -> StructType: """Create a new StructType from a JSON string. - ``` - StructType.from_json( - '''{ - "type": "struct", - "fields": [{"name": "x", "type": "integer", "nullable": true, "metadata": {}}] - }''' - ) - # Returns StructType([Field(x, PrimitiveType("integer"), nullable=True)]) - ``` Args: json: a JSON string - Returns: a [StructType][deltalake.schema.StructType] type + Returns: + a StructType + + Example: + ```python + StructType.from_json( + '''{ + "type": "struct", + "fields": [{"name": "x", "type": "integer", "nullable": true, "metadata": {}}] + }''' + ) + # Returns StructType([Field(x, PrimitiveType("integer"), nullable=True)]) + ``` """ def to_pyarrow(self) -> pyarrow.StructType: """Get the equivalent PyArrow StructType - Returns: a PyArrow [StructType][pyarrow.StructType] type + Returns: + a PyArrow StructType """ @staticmethod def from_pyarrow(type: pyarrow.StructType) -> StructType: @@ -540,7 +572,8 @@ class StructType: Args: type: a PyArrow struct type. - Returns: a [StructType][deltalake.schema.StructType] type + Returns: + a StructType """ class Schema: @@ -553,38 +586,44 @@ class Schema: """ def to_json(self) -> str: """Get the JSON string representation of the Schema. - A schema has the same JSON format as a StructType. - ``` - Schema([Field("x", "integer")]).to_json() - # Returns '{"type":"struct","fields":[{"name":"x","type":"integer","nullable":true,"metadata":{}}]}' - ``` - Returns: a JSON string + + Returns: + a JSON string + + Example: + A schema has the same JSON format as a StructType. + ```python + Schema([Field("x", "integer")]).to_json() + # Returns '{"type":"struct","fields":[{"name":"x","type":"integer","nullable":true,"metadata":{}}]}' + ``` """ @staticmethod def from_json(json: str) -> Schema: """Create a new Schema from a JSON string. - A schema has the same JSON format as a StructType. - ``` - Schema.from_json('''{ - "type": "struct", - "fields": [{"name": "x", "type": "integer", "nullable": true, "metadata": {}}] - } - )''' - # Returns Schema([Field(x, PrimitiveType("integer"), nullable=True)]) - ``` - Args: json: a JSON string + + Example: + A schema has the same JSON format as a StructType. + ```python + Schema.from_json('''{ + "type": "struct", + "fields": [{"name": "x", "type": "integer", "nullable": true, "metadata": {}}] + } + )''' + # Returns Schema([Field(x, PrimitiveType("integer"), nullable=True)]) + ``` """ def to_pyarrow(self, as_large_types: bool = False) -> pyarrow.Schema: """Return equivalent PyArrow schema Args: - as_large_types: get schema with all variable size types (list, binary, string) as large variants (with int64 indices). This is for compatibility with systems like Polars that only support the large versions of Arrow types. + as_large_types: get schema with all variable size types (list, binary, string) as large variants (with int64 indices). + This is for compatibility with systems like Polars that only support the large versions of Arrow types. Returns: - a PyArrow [Schema][pyarrow.Schema] type + a PyArrow Schema """ @staticmethod def from_pyarrow(type: pyarrow.Schema) -> Schema: @@ -593,9 +632,10 @@ class Schema: Will raise `TypeError` if the PyArrow type is not a primitive type. Args: - type: A PyArrow [Schema][pyarrow.Schema] type + type: A PyArrow Schema - Returns: a [Schema][deltalake.schema.Schema] type + Returns: + a Schema """ class ObjectInputFile: diff --git a/python/deltalake/table.py b/python/deltalake/table.py index b238af7929..3ac28acf88 100644 --- a/python/deltalake/table.py +++ b/python/deltalake/table.py @@ -209,13 +209,13 @@ def _filters_to_expression(filters: FilterType) -> Expression: the op is in or not in, the value must be a collection such as a list, a set or a tuple. The supported type for value is str. Use empty string `''` for Null partition value. -Examples: -``` -("x", "=", "a") -("x", "!=", "a") -("y", "in", ["a", "b", "c"]) -("z", "not in", ["a","b"]) -``` +Example: + ``` + ("x", "=", "a") + ("x", "!=", "a") + ("y", "in", ["a", "b", "c"]) + ("z", "not in", ["a","b"]) + ``` """ @@ -329,13 +329,13 @@ def files( the op is in or not in, the value must be a collection such as a list, a set or a tuple. The supported type for value is str. Use empty string `''` for Null partition value. - Examples: - ``` - ("x", "=", "a") - ("x", "!=", "a") - ("y", "in", ["a", "b", "c"]) - ("z", "not in", ["a","b"]) - ``` + Example: + ``` + ("x", "=", "a") + ("x", "!=", "a") + ("y", "in", ["a", "b", "c"]) + ("z", "not in", ["a","b"]) + ``` """ return self._table.files(self.__stringify_partition_values(partition_filters)) @@ -366,13 +366,13 @@ def file_uris( the op is in or not in, the value must be a collection such as a list, a set or a tuple. The supported type for value is str. Use empty string `''` for Null partition value. - Examples: - ``` - ("x", "=", "a") - ("x", "!=", "a") - ("y", "in", ["a", "b", "c"]) - ("z", "not in", ["a","b"]) - ``` + Example: + ``` + ("x", "=", "a") + ("x", "!=", "a") + ("y", "in", ["a", "b", "c"]) + ("z", "not in", ["a","b"]) + ``` """ return self._table.file_uris( self.__stringify_partition_values(partition_filters) @@ -397,12 +397,12 @@ def load_with_datetime(self, datetime_string: str) -> None: Args: datetime_string: the identifier of the datetime point of the DeltaTable to load - Examples: - ``` - "2018-01-26T18:30:09Z" - "2018-12-19T16:39:57-08:00" - "2018-01-26T18:30:09.453+00:00" - ``` + Example: + ``` + "2018-01-26T18:30:09Z" + "2018-12-19T16:39:57-08:00" + "2018-01-26T18:30:09.453+00:00" + ``` """ self._table.load_with_datetime(datetime_string) @@ -511,7 +511,7 @@ def update( Args: updates: a mapping of column name to update SQL expression. new_values: a mapping of column name to python datatype. - predicate: a logical expression, defaults to None + predicate: a logical expression. writer_properties: Pass writer properties to the Rust parquet writer, see options https://arrow.apache.org/rust/parquet/file/properties/struct.WriterProperties.html, only the following fields are supported: `data_page_size_limit`, `dictionary_page_size_limit`, `data_page_row_count_limit`, `write_batch_size`, `max_row_group_size`. @@ -520,34 +520,43 @@ def update( Returns: the metrics from update - Examples: - - Update some row values with SQL predicate. This is equivalent to `UPDATE table SET deleted = true WHERE id = '3'` + Example: + **Update some row values with SQL predicate** + + This is equivalent to `UPDATE table SET deleted = true WHERE id = '3'` + ```py + from deltalake import write_deltalake, DeltaTable + import pandas as pd + df = pd.DataFrame( + {"id": ["1", "2", "3"], + "deleted": [False, False, False], + "price": [10., 15., 20.] + }) + write_deltalake("tmp", df) + dt = DeltaTable("tmp") + dt.update(predicate="id = '3'", updates = {"deleted": 'True'}) - >>> from deltalake import write_deltalake, DeltaTable - >>> import pandas as pd - >>> df = pd.DataFrame({"id": ["1", "2", "3"], "deleted": [False, False, False], "price": [10., 15., 20.]}) - >>> write_deltalake("tmp", df) - >>> dt = DeltaTable("tmp") - >>> dt.update(predicate="id = '3'", updates = {"deleted": 'True'}) - {'num_added_files': 1, 'num_removed_files': 1, 'num_updated_rows': 1, 'num_copied_rows': 2, 'execution_time_ms': ..., 'scan_time_ms': ...} + ``` + + **Update all row values** - Update all row values. This is equivalent to - ``UPDATE table SET deleted = true, id = concat(id, '_old')``. + This is equivalent to ``UPDATE table SET deleted = true, id = concat(id, '_old')``. + ```py + dt.update(updates = {"deleted": 'True', "id": "concat(id, '_old')"}) - >>> dt.update(updates = {"deleted": 'True', "id": "concat(id, '_old')"}) - - {'num_added_files': 1, 'num_removed_files': 1, 'num_updated_rows': 3, 'num_copied_rows': 0, 'execution_time_ms': ..., 'scan_time_ms': ...} + ``` - To use Python objects instead of SQL strings, use the `new_values` parameter - instead of the `updates` parameter. For example, this is equivalent to - ``UPDATE table SET price = 150.10 WHERE id = '1'`` + **Use Python objects instead of SQL strings** - >>> dt.update(predicate="id = '1_old'", new_values = {"price": 150.10}) - {'num_added_files': 1, 'num_removed_files': 1, 'num_updated_rows': 1, 'num_copied_rows': 2, 'execution_time_ms': ..., 'scan_time_ms': ...} + Use the `new_values` parameter instead of the `updates` parameter. For example, + this is equivalent to ``UPDATE table SET price = 150.10 WHERE id = '1'`` + ```py + dt.update(predicate="id = '1_old'", new_values = {"price": 150.10}) + {'num_added_files': 1, 'num_removed_files': 1, 'num_updated_rows': 1, 'num_copied_rows': 2, 'execution_time_ms': ..., 'scan_time_ms': ...} + ``` """ if updates is None and new_values is not None: updates = {} @@ -614,11 +623,11 @@ def merge( match the underlying table. Args: - source (pyarrow.Table | pyarrow.RecordBatch | pyarrow.RecordBatchReader ): source data - predicate (str): SQL like predicate on how to merge - source_alias (str): Alias for the source table - target_alias (str): Alias for the target table - error_on_type_mismatch (bool): specify if merge will return error if data types are mismatching :default = True + source: source data + predicate: SQL like predicate on how to merge + source_alias: Alias for the source table + target_alias: Alias for the target table + error_on_type_mismatch: specify if merge will return error if data types are mismatching :default = True Returns: TableMerger: TableMerger Object @@ -858,26 +867,27 @@ def get_add_actions(self, flatten: bool = False) -> pyarrow.RecordBatch: a PyArrow RecordBatch containing the add action data. Example: - - >>> from pprint import pprint - >>> from deltalake import DeltaTable, write_deltalake - >>> import pyarrow as pa - >>> data = pa.table({"x": [1, 2, 3], "y": [4, 5, 6]}) - >>> write_deltalake("tmp", data, partition_by=["x"]) - >>> dt = DeltaTable("tmp") - >>> df = dt.get_add_actions().to_pandas() - >>> df["path"].sort_values(ignore_index=True) - 0 x=1/0-... - 1 x=2/0-... - 2 x=3/0-... - ... - >>> df = dt.get_add_actions(flatten=True).to_pandas() - >>> df["partition.x"].sort_values(ignore_index=True) + ```python + from pprint import pprint + from deltalake import DeltaTable, write_deltalake + import pyarrow as pa + data = pa.table({"x": [1, 2, 3], "y": [4, 5, 6]}) + write_deltalake("tmp", data, partition_by=["x"]) + dt = DeltaTable("tmp") + df = dt.get_add_actions().to_pandas() + df["path"].sort_values(ignore_index=True) + 0 x=1/0 + 1 x=2/0 + 2 x=3/0 + ``` + + ```python + df = dt.get_add_actions(flatten=True).to_pandas() + df["partition.x"].sort_values(ignore_index=True) 0 1 1 2 2 3 - ... - + ``` """ return self._table.get_add_actions(flatten) @@ -911,16 +921,16 @@ def repair(self, dry_run: bool = False) -> Dict[str, Any]: Returns: The metrics from repair (FSCK) action. - Examples: - ``` - from deltalake import DeltaTable - dt = DeltaTable('TEST') - dt.repair(dry_run=False) - ``` - Results in - ``` - {'dry_run': False, 'files_removed': ['6-0d084325-6885-4847-b008-82c1cf30674c-0.parquet', 5-4fba1d3e-3e20-4de1-933d-a8e13ac59f53-0.parquet']} - ``` + Example: + ```python + from deltalake import DeltaTable + dt = DeltaTable('TEST') + dt.repair(dry_run=False) + ``` + Results in + ``` + {'dry_run': False, 'files_removed': ['6-0d084325-6885-4847-b008-82c1cf30674c-0.parquet', 5-4fba1d3e-3e20-4de1-933d-a8e13ac59f53-0.parquet']} + ``` """ metrics = self._table.repair(dry_run) return json.loads(metrics) @@ -969,11 +979,11 @@ def with_writer_properties( """Pass writer properties to the Rust parquet writer, see options https://arrow.apache.org/rust/parquet/file/properties/struct.WriterProperties.html: Args: - data_page_size_limit (int|None, Optional): Limit DataPage size to this in bytes. Defaults to None. - dictionary_page_size_limit (int|None, Optional): Limit the size of each DataPage to store dicts to this amount in bytes. Defaults to None. - data_page_row_count_limit (int|None, Optional): Limit the number of rows in each DataPage. Defaults to None. - write_batch_size (int|None, Optional): Splits internally to smaller batch size. Defaults to None. - max_row_group_size (int|None, Optional): Max number of rows in row group. Defaults to None. + data_page_size_limit: Limit DataPage size to this in bytes. + dictionary_page_size_limit: Limit the size of each DataPage to store dicts to this amount in bytes. + data_page_row_count_limit: Limit the number of rows in each DataPage. + write_batch_size: Splits internally to smaller batch size. + max_row_group_size: Max number of rows in row group. Returns: TableMerger: TableMerger Object @@ -995,36 +1005,39 @@ def when_matched_update( If a ``predicate`` is specified, then it must evaluate to true for the row to be updated. Args: - updates (dict): a mapping of column name to update SQL expression. - predicate (str | None, Optional): SQL like predicate on when to update. Defaults to None. + updates: a mapping of column name to update SQL expression. + predicate: SQL like predicate on when to update. Returns: TableMerger: TableMerger Object - Examples: - - >>> from deltalake import DeltaTable, write_deltalake - >>> import pyarrow as pa - >>> data = pa.table({"x": [1, 2, 3], "y": [4, 5, 6]}) - >>> write_deltalake("tmp", data) - >>> dt = DeltaTable("tmp") - >>> new_data = pa.table({"x": [1], "y": [7]}) - >>> ( - ... dt.merge( - ... source=new_data, - ... predicate="target.x = source.x", - ... source_alias="source", - ... target_alias="target") - ... .when_matched_update(updates={"x": "source.x", "y": "source.y"}) - ... .execute() - ... ) + Example: + ```python + from deltalake import DeltaTable, write_deltalake + import pyarrow as pa + + data = pa.table({"x": [1, 2, 3], "y": [4, 5, 6]}) + write_deltalake("tmp", data) + dt = DeltaTable("tmp") + new_data = pa.table({"x": [1], "y": [7]}) + + ( + dt.merge( + source=new_data, + predicate="target.x = source.x", + source_alias="source", + target_alias="target") + .when_matched_update(updates={"x": "source.x", "y": "source.y"}) + .execute() + ) {'num_source_rows': 1, 'num_target_rows_inserted': 0, 'num_target_rows_updated': 1, 'num_target_rows_deleted': 0, 'num_target_rows_copied': 2, 'num_output_rows': 3, 'num_target_files_added': 1, 'num_target_files_removed': 1, 'execution_time_ms': ..., 'scan_time_ms': ..., 'rewrite_time_ms': ...} - >>> dt.to_pandas() + + dt.to_pandas() x y 0 1 7 1 2 5 2 3 6 - + ``` """ if isinstance(self.matched_update_updates, list) and isinstance( self.matched_update_predicate, list @@ -1041,35 +1054,38 @@ def when_matched_update_all(self, predicate: Optional[str] = None) -> "TableMerg If a ``predicate`` is specified, then it must evaluate to true for the row to be updated. Args: - predicate (str | None, Optional): SQL like predicate on when to update all columns. Defaults to None. + predicate: SQL like predicate on when to update all columns. Returns: TableMerger: TableMerger Object - Examples: - - >>> from deltalake import DeltaTable, write_deltalake - >>> import pyarrow as pa - >>> data = pa.table({"x": [1, 2, 3], "y": [4, 5, 6]}) - >>> write_deltalake("tmp", data) - >>> dt = DeltaTable("tmp") - >>> new_data = pa.table({"x": [1], "y": [7]}) - >>> ( - ... dt.merge( - ... source=new_data, - ... predicate="target.x = source.x", - ... source_alias="source", - ... target_alias="target") - ... .when_matched_update_all() - ... .execute() - ... ) + Example: + ```python + from deltalake import DeltaTable, write_deltalake + import pyarrow as pa + + data = pa.table({"x": [1, 2, 3], "y": [4, 5, 6]}) + write_deltalake("tmp", data) + dt = DeltaTable("tmp") + new_data = pa.table({"x": [1], "y": [7]}) + + ( + dt.merge( + source=new_data, + predicate="target.x = source.x", + source_alias="source", + target_alias="target") + .when_matched_update_all() + .execute() + ) {'num_source_rows': 1, 'num_target_rows_inserted': 0, 'num_target_rows_updated': 1, 'num_target_rows_deleted': 0, 'num_target_rows_copied': 2, 'num_output_rows': 3, 'num_target_files_added': 1, 'num_target_files_removed': 1, 'execution_time_ms': ..., 'scan_time_ms': ..., 'rewrite_time_ms': ...} - >>> dt.to_pandas() + + dt.to_pandas() x y 0 1 7 1 2 5 2 3 6 - + ``` """ src_alias = (self.source_alias + ".") if self.source_alias is not None else "" @@ -1096,54 +1112,59 @@ def when_matched_delete(self, predicate: Optional[str] = None) -> "TableMerger": true for the matched row. If not specified it deletes all matches. Args: - predicate (str | None, Optional): SQL like predicate on when to delete. Defaults to None. + predicate (str | None, Optional): SQL like predicate on when to delete. Returns: TableMerger: TableMerger Object - Examples: - - Delete on a predicate - - >>> from deltalake import DeltaTable, write_deltalake - >>> import pyarrow as pa - >>> data = pa.table({"x": [1, 2, 3], "y": [4, 5, 6]}) - >>> write_deltalake("tmp", data) - >>> dt = DeltaTable("tmp") - >>> new_data = pa.table({"x": [2, 3], "deleted": [False, True]}) - >>> ( - ... dt.merge( - ... source=new_data, - ... predicate='target.x = source.x', - ... source_alias='source', - ... target_alias='target') - ... .when_matched_delete( - ... predicate="source.deleted = true") - ... .execute() - ... ) + Example: + **Delete on a predicate** + + ```python + from deltalake import DeltaTable, write_deltalake + import pyarrow as pa + + data = pa.table({"x": [1, 2, 3], "y": [4, 5, 6]}) + write_deltalake("tmp", data) + dt = DeltaTable("tmp") + new_data = pa.table({"x": [2, 3], "deleted": [False, True]}) + + ( + dt.merge( + source=new_data, + predicate='target.x = source.x', + source_alias='source', + target_alias='target') + .when_matched_delete( + predicate="source.deleted = true") + .execute() + ) {'num_source_rows': 2, 'num_target_rows_inserted': 0, 'num_target_rows_updated': 0, 'num_target_rows_deleted': 1, 'num_target_rows_copied': 2, 'num_output_rows': 2, 'num_target_files_added': 1, 'num_target_files_removed': 1, 'execution_time_ms': ..., 'scan_time_ms': ..., 'rewrite_time_ms': ...} - >>> dt.to_pandas().sort_values("x", ignore_index=True) + + dt.to_pandas().sort_values("x", ignore_index=True) x y 0 1 4 1 2 5 - - Delete all records that were matched - - >>> dt = DeltaTable("tmp") - >>> ( - ... dt.merge( - ... source=new_data, - ... predicate='target.x = source.x', - ... source_alias='source', - ... target_alias='target') - ... .when_matched_delete() - ... .execute() - ... ) + ``` + + **Delete all records that were matched** + ```python + dt = DeltaTable("tmp") + ( + dt.merge( + source=new_data, + predicate='target.x = source.x', + source_alias='source', + target_alias='target') + .when_matched_delete() + .execute() + ) {'num_source_rows': 2, 'num_target_rows_inserted': 0, 'num_target_rows_updated': 0, 'num_target_rows_deleted': 1, 'num_target_rows_copied': 1, 'num_output_rows': 1, 'num_target_files_added': 1, 'num_target_files_removed': 1, 'execution_time_ms': ..., 'scan_time_ms': ..., 'rewrite_time_ms': ...} - >>> dt.to_pandas() + + dt.to_pandas() x y 0 1 4 - + ``` """ if self.matched_delete_all is not None: raise ValueError( @@ -1168,40 +1189,43 @@ def when_not_matched_insert( Args: updates (dict): a mapping of column name to insert SQL expression. - predicate (str | None, Optional): SQL like predicate on when to insert. Defaults to None. + predicate (str | None, Optional): SQL like predicate on when to insert. Returns: TableMerger: TableMerger Object - Examples: - - >>> from deltalake import DeltaTable, write_deltalake - >>> import pyarrow as pa - >>> data = pa.table({"x": [1, 2, 3], "y": [4, 5, 6]}) - >>> write_deltalake("tmp", data) - >>> dt = DeltaTable("tmp") - >>> new_data = pa.table({"x": [4], "y": [7]}) - >>> ( - ... dt.merge( - ... source=new_data, - ... predicate='target.x = source.x', - ... source_alias='source', - ... target_alias='target') - ... .when_not_matched_insert( - ... updates = { - ... "x": "source.x", - ... "y": "source.y", - ... }) - ... .execute() - ... ) + Example: + ```python + from deltalake import DeltaTable, write_deltalake + import pyarrow as pa + + data = pa.table({"x": [1, 2, 3], "y": [4, 5, 6]}) + write_deltalake("tmp", data) + dt = DeltaTable("tmp") + new_data = pa.table({"x": [4], "y": [7]}) + + ( + dt.merge( + source=new_data, + predicate="target.x = source.x", + source_alias="source", + target_alias="target",) + .when_not_matched_insert( + updates={ + "x": "source.x", + "y": "source.y", + }) + .execute() + ) {'num_source_rows': 1, 'num_target_rows_inserted': 1, 'num_target_rows_updated': 0, 'num_target_rows_deleted': 0, 'num_target_rows_copied': 3, 'num_output_rows': 4, 'num_target_files_added': 1, 'num_target_files_removed': 1, 'execution_time_ms': ..., 'scan_time_ms': ..., 'rewrite_time_ms': ...} - >>> dt.to_pandas().sort_values("x", ignore_index=True) + + dt.to_pandas().sort_values("x", ignore_index=True) x y 0 1 4 1 2 5 2 3 6 3 4 7 - + ``` """ if isinstance(self.not_matched_insert_updates, list) and isinstance( @@ -1223,36 +1247,39 @@ def when_not_matched_insert_all( the new row to be inserted. Args: - predicate (str | None, Optional): SQL like predicate on when to insert. Defaults to None. + predicate: SQL like predicate on when to insert. Returns: TableMerger: TableMerger Object - Examples: - - >>> from deltalake import DeltaTable, write_deltalake - >>> import pyarrow as pa - >>> data = pa.table({"x": [1, 2, 3], "y": [4, 5, 6]}) - >>> write_deltalake("tmp", data) - >>> dt = DeltaTable("tmp") - >>> new_data = pa.table({"x": [4], "y": [7]}) - >>> ( - ... dt.merge( - ... source=new_data, - ... predicate='target.x = source.x', - ... source_alias='source', - ... target_alias='target') - ... .when_not_matched_insert_all() - ... .execute() - ... ) + Example: + ```python + from deltalake import DeltaTable, write_deltalake + import pyarrow as pa + + data = pa.table({"x": [1, 2, 3], "y": [4, 5, 6]}) + write_deltalake("tmp", data) + dt = DeltaTable("tmp") + new_data = pa.table({"x": [4], "y": [7]}) + + ( + dt.merge( + source=new_data, + predicate='target.x = source.x', + source_alias='source', + target_alias='target') + .when_not_matched_insert_all() + .execute() + ) {'num_source_rows': 1, 'num_target_rows_inserted': 1, 'num_target_rows_updated': 0, 'num_target_rows_deleted': 0, 'num_target_rows_copied': 3, 'num_output_rows': 4, 'num_target_files_added': 1, 'num_target_files_removed': 1, 'execution_time_ms': ..., 'scan_time_ms': ..., 'rewrite_time_ms': ...} - >>> dt.to_pandas().sort_values("x", ignore_index=True) + + dt.to_pandas().sort_values("x", ignore_index=True) x y 0 1 4 1 2 5 2 3 6 3 4 7 - + ``` """ src_alias = (self.source_alias + ".") if self.source_alias is not None else "" @@ -1279,38 +1306,41 @@ def when_not_matched_by_source_update( If a ``predicate`` is specified, then it must evaluate to true for the row to be updated. Args: - updates (dict): a mapping of column name to update SQL expression. - predicate (str | None, Optional): SQL like predicate on when to update. Defaults to None. + updates: a mapping of column name to update SQL expression. + predicate: SQL like predicate on when to update. Returns: TableMerger: TableMerger Object - Examples: - - >>> from deltalake import DeltaTable, write_deltalake - >>> import pyarrow as pa - >>> data = pa.table({"x": [1, 2, 3], "y": [4, 5, 6]}) - >>> write_deltalake("tmp", data) - >>> dt = DeltaTable("tmp") - >>> new_data = pa.table({"x": [2, 3, 4]}) - >>> ( - ... dt.merge( - ... source=new_data, - ... predicate='target.x = source.x', - ... source_alias='source', - ... target_alias='target') - ... .when_not_matched_by_source_update( - ... predicate = "y > 3", - ... updates = {"y": "0"}) - ... .execute() - ... ) + Example: + ```python + from deltalake import DeltaTable, write_deltalake + import pyarrow as pa + + data = pa.table({"x": [1, 2, 3], "y": [4, 5, 6]}) + write_deltalake("tmp", data) + dt = DeltaTable("tmp") + new_data = pa.table({"x": [2, 3, 4]}) + + ( + dt.merge( + source=new_data, + predicate='target.x = source.x', + source_alias='source', + target_alias='target') + .when_not_matched_by_source_update( + predicate = "y > 3", + updates = {"y": "0"}) + .execute() + ) {'num_source_rows': 3, 'num_target_rows_inserted': 0, 'num_target_rows_updated': 1, 'num_target_rows_deleted': 0, 'num_target_rows_copied': 2, 'num_output_rows': 3, 'num_target_files_added': 1, 'num_target_files_removed': 1, 'execution_time_ms': ..., 'scan_time_ms': ..., 'rewrite_time_ms': ...} - >>> dt.to_pandas().sort_values("x", ignore_index=True) + + dt.to_pandas().sort_values("x", ignore_index=True) x y 0 1 0 1 2 5 2 3 6 - + ``` """ if isinstance(self.not_matched_by_source_update_updates, list) and isinstance( @@ -1330,7 +1360,7 @@ def when_not_matched_by_source_delete( ``predicate`` (if specified) is true for the target row. Args: - predicate (str | None, Optional): SQL like predicate on when to delete when not matched by source. Defaults to None. + predicate: SQL like predicate on when to delete when not matched by source. Returns: TableMerger: TableMerger Object @@ -1354,7 +1384,7 @@ def execute(self) -> Dict[str, Any]: """Executes `MERGE` with the previously provided settings in Rust with Apache Datafusion query engine. Returns: - Dict[str, Any]: metrics + Dict: metrics """ metrics = self.table._table.merge_execute( source=self.source, @@ -1434,19 +1464,21 @@ def compact( Returns: the metrics from optimize - Examples: + Example: Use a timedelta object to specify the seconds, minutes or hours of the interval. + ```python + from deltalake import DeltaTable, write_deltalake + from datetime import timedelta + import pyarrow as pa - >>> from deltalake import DeltaTable, write_deltalake - >>> from datetime import timedelta - >>> import pyarrow as pa - >>> write_deltalake("tmp", pa.table({"x": [1], "y": [4]})) - >>> write_deltalake("tmp", pa.table({"x": [2], "y": [5]}), mode="append") - >>> dt = DeltaTable("tmp") - >>> time_delta = timedelta(minutes=10) - >>> dt.optimize.compact(min_commit_interval=time_delta) - {'numFilesAdded': 1, 'numFilesRemoved': 2, 'filesAdded': ..., 'filesRemoved': ..., 'partitionsOptimized': 1, 'numBatches': 2, 'totalConsideredFiles': 2, 'totalFilesSkipped': 0, 'preserveInsertionOrder': True} + write_deltalake("tmp", pa.table({"x": [1], "y": [4]})) + write_deltalake("tmp", pa.table({"x": [2], "y": [5]}), mode="append") + dt = DeltaTable("tmp") + time_delta = timedelta(minutes=10) + dt.optimize.compact(min_commit_interval=time_delta) + {'numFilesAdded': 1, 'numFilesRemoved': 2, 'filesAdded': ..., 'filesRemoved': ..., 'partitionsOptimized': 1, 'numBatches': 2, 'totalConsideredFiles': 2, 'totalFilesSkipped': 0, 'preserveInsertionOrder': True} + ``` """ if isinstance(min_commit_interval, timedelta): min_commit_interval = int(min_commit_interval.total_seconds()) @@ -1488,19 +1520,21 @@ def z_order( Returns: the metrics from optimize - Examples: + Example: Use a timedelta object to specify the seconds, minutes or hours of the interval. + ```python + from deltalake import DeltaTable, write_deltalake + from datetime import timedelta + import pyarrow as pa - >>> from deltalake import DeltaTable, write_deltalake - >>> from datetime import timedelta - >>> import pyarrow as pa - >>> write_deltalake("tmp", pa.table({"x": [1], "y": [4]})) - >>> write_deltalake("tmp", pa.table({"x": [2], "y": [5]}), mode="append") - >>> dt = DeltaTable("tmp") - >>> time_delta = timedelta(minutes=10) - >>> dt.optimize.z_order(["x"], min_commit_interval=time_delta) - {'numFilesAdded': 1, 'numFilesRemoved': 2, 'filesAdded': ..., 'filesRemoved': ..., 'partitionsOptimized': 0, 'numBatches': 1, 'totalConsideredFiles': 2, 'totalFilesSkipped': 0, 'preserveInsertionOrder': True} + write_deltalake("tmp", pa.table({"x": [1], "y": [4]})) + write_deltalake("tmp", pa.table({"x": [2], "y": [5]}), mode="append") + dt = DeltaTable("tmp") + time_delta = timedelta(minutes=10) + dt.optimize.z_order(["x"], min_commit_interval=time_delta) + {'numFilesAdded': 1, 'numFilesRemoved': 2, 'filesAdded': ..., 'filesRemoved': ..., 'partitionsOptimized': 0, 'numBatches': 1, 'totalConsideredFiles': 2, 'totalFilesSkipped': 0, 'preserveInsertionOrder': True} + ``` """ if isinstance(min_commit_interval, timedelta): min_commit_interval = int(min_commit_interval.total_seconds()) diff --git a/python/deltalake/writer.py b/python/deltalake/writer.py index 2b4814f98b..bb69fee457 100644 --- a/python/deltalake/writer.py +++ b/python/deltalake/writer.py @@ -21,7 +21,7 @@ ) from urllib.parse import unquote -from deltalake import Schema +from deltalake import Schema as DeltaSchema from deltalake.fs import DeltaStorageHandler from ._util import encode_partition_value @@ -82,7 +82,7 @@ def write_deltalake( RecordBatchReader, ], *, - schema: Optional[Union[pa.Schema, Schema]] = ..., + schema: Optional[Union[pa.Schema, DeltaSchema]] = ..., partition_by: Optional[Union[List[str], str]] = ..., filesystem: Optional[pa_fs.FileSystem] = None, mode: Literal["error", "append", "overwrite", "ignore"] = ..., @@ -116,7 +116,7 @@ def write_deltalake( RecordBatchReader, ], *, - schema: Optional[Union[pa.Schema, Schema]] = ..., + schema: Optional[Union[pa.Schema, DeltaSchema]] = ..., partition_by: Optional[Union[List[str], str]] = ..., mode: Literal["error", "append", "overwrite", "ignore"] = ..., max_rows_per_group: int = ..., @@ -143,7 +143,7 @@ def write_deltalake( RecordBatchReader, ], *, - schema: Optional[Union[pa.Schema, Schema]] = None, + schema: Optional[Union[pa.Schema, DeltaSchema]] = None, partition_by: Optional[Union[List[str], str]] = None, filesystem: Optional[pa_fs.FileSystem] = None, mode: Literal["error", "append", "overwrite", "ignore"] = "error", @@ -231,7 +231,9 @@ def write_deltalake( storage_options: options passed to the native delta filesystem. Unused if 'filesystem' is defined. predicate: When using `Overwrite` mode, replace data that matches a predicate. Only used in rust engine. partition_filters: the partition filters that will be used for partition overwrite. Only used in pyarrow engine. - large_dtypes: If True, the data schema is kept in large_dtypes, has no effect on pandas dataframe input + large_dtypes: If True, the data schema is kept in large_dtypes, has no effect on pandas dataframe input. + engine: writer engine to write the delta table. `Rust` engine is still experimental but you may + see up to 4x performance improvements over pyarrow. """ table, table_uri = try_get_table_and_table_uri(table_or_uri, storage_options) if table is not None: @@ -245,7 +247,7 @@ def write_deltalake( if isinstance(partition_by, str): partition_by = [partition_by] - if isinstance(schema, Schema): + if isinstance(schema, DeltaSchema): schema = schema.to_pyarrow() if isinstance(data, RecordBatchReader): From f90b48cfe09cdee24e7f78c6707dbed9903efbb8 Mon Sep 17 00:00:00 2001 From: haruband Date: Sat, 2 Dec 2023 09:41:06 +0900 Subject: [PATCH 11/16] fix: prune each merge bin with only 1 file (#1902) # Description This PR prunes each merge bin with only 1 file, even though there are multiple merge bins. # Related Issue(s) - closes #1901 # Documentation This PR adds test_idempotent_with_multiple_bins() for testing. --- .../deltalake-core/src/operations/optimize.rs | 12 ++-- .../deltalake-core/tests/command_optimize.rs | 65 +++++++++++++++++++ 2 files changed, 73 insertions(+), 4 deletions(-) diff --git a/crates/deltalake-core/src/operations/optimize.rs b/crates/deltalake-core/src/operations/optimize.rs index ef8905e0c9..09f9de087e 100644 --- a/crates/deltalake-core/src/operations/optimize.rs +++ b/crates/deltalake-core/src/operations/optimize.rs @@ -900,10 +900,14 @@ fn build_compaction_plan( // Prune merge bins with only 1 file, since they have no effect for (_, bins) in operations.iter_mut() { - if bins.len() == 1 && bins[0].len() == 1 { - metrics.total_files_skipped += 1; - bins.clear(); - } + bins.retain(|bin| { + if bin.len() == 1 { + metrics.total_files_skipped += 1; + false + } else { + true + } + }) } operations.retain(|_, files| !files.is_empty()); diff --git a/crates/deltalake-core/tests/command_optimize.rs b/crates/deltalake-core/tests/command_optimize.rs index 14f9d4c410..b91558ce08 100644 --- a/crates/deltalake-core/tests/command_optimize.rs +++ b/crates/deltalake-core/tests/command_optimize.rs @@ -508,6 +508,71 @@ async fn test_idempotent_metrics() -> Result<(), Box> { Ok(()) } +#[tokio::test] +/// Validate that multiple bins packing is idempotent. +async fn test_idempotent_with_multiple_bins() -> Result<(), Box> { + //TODO: Compression makes it hard to get the target file size... + //Maybe just commit files with a known size + let context = setup_test(true).await?; + let mut dt = context.table; + let mut writer = RecordBatchWriter::for_table(&dt)?; + + write( + &mut writer, + &mut dt, + generate_random_batch(records_for_size(6_000_000), "2022-05-22")?, + ) + .await?; + write( + &mut writer, + &mut dt, + generate_random_batch(records_for_size(3_000_000), "2022-05-22")?, + ) + .await?; + write( + &mut writer, + &mut dt, + generate_random_batch(records_for_size(6_000_000), "2022-05-22")?, + ) + .await?; + write( + &mut writer, + &mut dt, + generate_random_batch(records_for_size(3_000_000), "2022-05-22")?, + ) + .await?; + write( + &mut writer, + &mut dt, + generate_random_batch(records_for_size(9_900_000), "2022-05-22")?, + ) + .await?; + + let version = dt.version(); + + let filter = vec![PartitionFilter::try_from(("date", "=", "2022-05-22"))?]; + + let optimize = DeltaOps(dt) + .optimize() + .with_filters(&filter) + .with_target_size(10_000_000); + let (dt, metrics) = optimize.await?; + assert_eq!(metrics.num_files_added, 2); + assert_eq!(metrics.num_files_removed, 4); + assert_eq!(dt.version(), version + 1); + + let optimize = DeltaOps(dt) + .optimize() + .with_filters(&filter) + .with_target_size(10_000_000); + let (dt, metrics) = optimize.await?; + assert_eq!(metrics.num_files_added, 0); + assert_eq!(metrics.num_files_removed, 0); + assert_eq!(dt.version(), version + 1); + + Ok(()) +} + #[tokio::test] /// Validate operation data and metadata was written async fn test_commit_info() -> Result<(), Box> { From d518f40c7bea2cd6cdecc123af28395dbd0e44b4 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Sat, 2 Dec 2023 00:24:41 -0800 Subject: [PATCH 12/16] chore: update python version (#1934) # Description Prepare for next release # Related Issue(s) # Documentation --- python/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/Cargo.toml b/python/Cargo.toml index 5194a2fc22..a9936a483c 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "deltalake-python" -version = "0.13.0" +version = "0.14.0" authors = ["Qingping Hou ", "Will Jones "] homepage = "https://github.com/delta-io/delta-rs" license = "Apache-2.0" From 2733f3d34a593f4fb23f498969d73b0d83a1f05f Mon Sep 17 00:00:00 2001 From: bolkedebruin Date: Mon, 6 Nov 2023 15:39:47 +0100 Subject: [PATCH 13/16] Support os.PathLike for table references --- python/deltalake/table.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/deltalake/table.py b/python/deltalake/table.py index 3ac28acf88..f05a02fb85 100644 --- a/python/deltalake/table.py +++ b/python/deltalake/table.py @@ -1,5 +1,6 @@ import json import operator +import os import warnings from dataclasses import dataclass from datetime import datetime, timedelta @@ -225,7 +226,7 @@ class DeltaTable: def __init__( self, - table_uri: Union[str, Path], + table_uri: Union[str, Path, os.PathLike], version: Optional[int] = None, storage_options: Optional[Dict[str, str]] = None, without_files: bool = False, From f4b9e9178906563ae1e076fbfb50105594930b53 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Fri, 1 Dec 2023 18:51:27 -0800 Subject: [PATCH 14/16] add type param --- python/deltalake/table.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/deltalake/table.py b/python/deltalake/table.py index f05a02fb85..adf3ca92af 100644 --- a/python/deltalake/table.py +++ b/python/deltalake/table.py @@ -1,6 +1,5 @@ import json import operator -import os import warnings from dataclasses import dataclass from datetime import datetime, timedelta @@ -32,6 +31,8 @@ ) if TYPE_CHECKING: + import os + import pandas from deltalake._internal import DeltaDataChecker as _DeltaDataChecker @@ -226,7 +227,7 @@ class DeltaTable: def __init__( self, - table_uri: Union[str, Path, os.PathLike], + table_uri: Union[str, Path, "os.PathLike[str]"], version: Optional[int] = None, storage_options: Optional[Dict[str, str]] = None, without_files: bool = False, From 83f2f9905df9c9535b7088ed437fd8db42e95d77 Mon Sep 17 00:00:00 2001 From: Dmytro Suvorov Date: Sat, 2 Dec 2023 13:47:41 +0200 Subject: [PATCH 15/16] fix: get rid of panic in during table (#1928) # Description This is a continuation of the discussion in the https://github.com/delta-io/delta-rs/pull/1917 Getting rid of panic in the library crate in favor of returning an error so lib users could handle it in a way they see it Test changes accordingly Co-authored-by: Robert Pack <42610831+roeap@users.noreply.github.com> --- crates/deltalake-core/src/lib.rs | 27 +++++++++------------- crates/deltalake-core/src/table/builder.rs | 6 ++++- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/crates/deltalake-core/src/lib.rs b/crates/deltalake-core/src/lib.rs index a14a89c33e..fda54ccc01 100644 --- a/crates/deltalake-core/src/lib.rs +++ b/crates/deltalake-core/src/lib.rs @@ -710,28 +710,23 @@ mod tests { } #[tokio::test()] - #[should_panic(expected = "does not exist or you don't have access!")] async fn test_fail_fast_on_not_existing_path() { use std::path::Path as FolderPath; - let path_str = "./tests/data/folder_doesnt_exist"; + let non_existing_path_str = "./tests/data/folder_doesnt_exist"; // Check that there is no such path at the beginning - let path_doesnt_exist = !FolderPath::new(path_str).exists(); + let path_doesnt_exist = !FolderPath::new(non_existing_path_str).exists(); assert!(path_doesnt_exist); - match crate::open_table(path_str).await { - Ok(table) => Ok(table), - Err(e) => { - let path_still_doesnt_exist = !FolderPath::new(path_str).exists(); - assert!( - path_still_doesnt_exist, - "Path exists for some reason, but it shouldn't" - ); - - Err(e) - } - } - .unwrap(); + let error = crate::open_table(non_existing_path_str).await.unwrap_err(); + let _expected_error_msg = format!( + "Local path \"{}\" does not exist or you don't have access!", + non_existing_path_str + ); + assert!(matches!( + error, + DeltaTableError::InvalidTableLocation(_expected_error_msg), + )) } } diff --git a/crates/deltalake-core/src/table/builder.rs b/crates/deltalake-core/src/table/builder.rs index 89962ed518..f453c895f6 100644 --- a/crates/deltalake-core/src/table/builder.rs +++ b/crates/deltalake-core/src/table/builder.rs @@ -166,7 +166,11 @@ impl DeltaTableBuilder { if let UriType::LocalPath(path) = resolve_uri_type(table_uri)? { if !path.exists() { - panic!("Path \"{table_uri}\" does not exist or you don't have access!"); + let msg = format!( + "Local path \"{}\" does not exist or you don't have access!", + table_uri + ); + return Err(DeltaTableError::InvalidTableLocation(msg)); } } From b946d075d12a78e12a5d1a43debfd425ef34419f Mon Sep 17 00:00:00 2001 From: Jan Schweizer Date: Sat, 2 Dec 2023 13:29:00 +0100 Subject: [PATCH 16/16] Happify linter --- crates/deltalake-core/src/operations/vacuum.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/deltalake-core/src/operations/vacuum.rs b/crates/deltalake-core/src/operations/vacuum.rs index 27408fe48c..93ad44f153 100644 --- a/crates/deltalake-core/src/operations/vacuum.rs +++ b/crates/deltalake-core/src/operations/vacuum.rs @@ -219,7 +219,7 @@ impl VacuumBuilder { if let Ok(parent) = &Url::parse(&format!("file://{}", self.log_store.root_uri().as_str())) { - if let Ok(dv_absolute_path) = deletion_vector.absolute_path(&parent) { + if let Ok(dv_absolute_path) = deletion_vector.absolute_path(parent) { Some(dv_absolute_path?.path().to_string()) } else { None @@ -307,7 +307,7 @@ impl std::future::IntoFuture for VacuumBuilder { fn is_absolute_path(path: &str) -> bool { let path = std::path::Path::new(path); - return path.is_absolute(); + path.is_absolute() } /// Encapsulate which files are to be deleted and the parameters used to make that decision