Skip to content

Commit

Permalink
Implement unity catalog extension (#4890)
Browse files Browse the repository at this point in the history
  • Loading branch information
acquamarin authored Feb 14, 2025
1 parent 8a29797 commit edf39b9
Show file tree
Hide file tree
Showing 65 changed files with 415 additions and 11 deletions.
11 changes: 6 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ allconfig:
$(call config-cmake-release, \
-DBUILD_BENCHMARK=TRUE \
-DBUILD_EXAMPLES=TRUE \
-DBUILD_EXTENSIONS="httpfs;duckdb;json;postgres;sqlite;fts;delta;iceberg" \
-DBUILD_EXTENSIONS="httpfs;duckdb;json;postgres;sqlite;fts;delta;iceberg;unity_catalog" \
-DBUILD_JAVA=TRUE \
-DBUILD_NODEJS=TRUE \
-DBUILD_PYTHON=TRUE \
Expand All @@ -106,7 +106,7 @@ alldebug:
$(call run-cmake-debug, \
-DBUILD_BENCHMARK=TRUE \
-DBUILD_EXAMPLES=TRUE \
-DBUILD_EXTENSIONS="httpfs;duckdb;json;postgres;sqlite;fts;delta;iceberg" \
-DBUILD_EXTENSIONS="httpfs;duckdb;json;postgres;sqlite;fts;delta;iceberg;unity_catalog" \
-DBUILD_JAVA=TRUE \
-DBUILD_NODEJS=TRUE \
-DBUILD_PYTHON=TRUE \
Expand Down Expand Up @@ -195,7 +195,7 @@ example:

extension-test-build:
$(call run-cmake-relwithdebinfo, \
-DBUILD_EXTENSIONS="httpfs;duckdb;json;postgres;sqlite;fts;delta;iceberg" \
-DBUILD_EXTENSIONS="httpfs;duckdb;json;postgres;sqlite;fts;delta;iceberg;unity_catalog" \
-DBUILD_EXTENSION_TESTS=TRUE \
-DBUILD_TESTS=TRUE \
)
Expand All @@ -221,13 +221,13 @@ extension-json-test: extension-json-test-build

extension-debug:
$(call run-cmake-debug, \
-DBUILD_EXTENSIONS="httpfs;duckdb;json;postgres;sqlite;fts;delta;iceberg" \
-DBUILD_EXTENSIONS="httpfs;duckdb;json;postgres;sqlite;fts;delta;iceberg;unity_catalog" \
-DBUILD_KUZU=FALSE \
)

extension-release:
$(call run-cmake-release, \
-DBUILD_EXTENSIONS="httpfs;duckdb;json;postgres;sqlite;fts;delta;iceberg" \
-DBUILD_EXTENSIONS="httpfs;duckdb;json;postgres;sqlite;fts;delta;iceberg;unity_catalog" \
-DBUILD_KUZU=FALSE \
)

Expand Down Expand Up @@ -270,6 +270,7 @@ clean-extension:
cmake -E rm -rf extension/fts/build
cmake -E rm -rf extension/delta/build
cmake -E rm -rf extension/iceberg/build
cmake -E rm -rf extension/unity_catalog/build

clean-python-api:
cmake -E rm -rf tools/python_api/build
Expand Down
4 changes: 4 additions & 0 deletions extension/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,7 @@ endif ()
if ("iceberg" IN_LIST BUILD_EXTENSIONS)
add_subdirectory(iceberg)
endif ()

if ("unity_catalog" IN_LIST BUILD_EXTENSIONS)
add_subdirectory(unity_catalog)
endif ()
5 changes: 0 additions & 5 deletions extension/delta/src/main/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,6 @@ add_library(kuzu_delta_extension
delta_extension.cpp
${PROJECT_SOURCE_DIR}/extension/httpfs/src/s3_download_options.cpp)

target_include_directories(
kuzu_delta_extension
PRIVATE
)

set(DELTA_EXTENSION_OBJECT_FILES
${DELTA_EXTENSION_OBJECT_FILES} $<TARGET_OBJECTS:kuzu_delta_extension>
PARENT_SCOPE)
45 changes: 45 additions & 0 deletions extension/unity_catalog/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
if (WIN32)
set(DuckDB_USE_STATIC_LIBS ON)
else ()
set(DuckDB_USE_STATIC_LIBS OFF)
endif ()
find_package(DuckDB REQUIRED)

include_directories(
${PROJECT_SOURCE_DIR}/src/include
src/include
${PROJECT_SOURCE_DIR}/extension/duckdb/src/include
${DuckDB_INCLUDE_DIRS})

add_subdirectory(src/installer)
add_subdirectory(src/main)
add_subdirectory(src/connector)
add_subdirectory(src/storage)
add_subdirectory(src/options)

add_library(unity_catalog_extension
SHARED
${UNITY_CATALOG_EXTENSION_OBJECT_FILES})

set_extension_properties(unity_catalog_extension unity_catalog unity_catalog)

target_link_libraries(unity_catalog_extension
PRIVATE
${DuckDB_LIBRARIES})

if (NOT WIN32)
add_library(unity_catalog_loader
SHARED
${PROJECT_SOURCE_DIR}/extension/duckdb/src/loader/duckdb_loader.cpp)
set_extension_properties(unity_catalog_loader unity_catalog_loader unity_catalog)
endif ()

if (WIN32)
# See comments in extension/httpfs/CMakeLists.txt.
target_link_libraries(unity_catalog_extension PRIVATE kuzu)
endif ()

if (APPLE)
set_apple_dynamic_lookup(unity_catalog_extension)
set_apple_dynamic_lookup(unity_catalog_loader)
endif ()
9 changes: 9 additions & 0 deletions extension/unity_catalog/src/connector/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
add_library(kuzu_unity_catalog_connector
OBJECT
unity_catalog_connector.cpp
${PROJECT_SOURCE_DIR}/extension/duckdb/src/connector/duckdb_connector.cpp
${PROJECT_SOURCE_DIR}/extension/duckdb/src/connector/duckdb_result_converter.cpp)

set(UNITY_CATALOG_EXTENSION_OBJECT_FILES
${UNITY_CATALOG_EXTENSION_OBJECT_FILES} $<TARGET_OBJECTS:kuzu_unity_catalog_connector>
PARENT_SCOPE)
24 changes: 24 additions & 0 deletions extension/unity_catalog/src/connector/unity_catalog_connector.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#include "connector/unity_catalog_connector.h"

#include "options/unity_catalog_options.h"

namespace kuzu {
namespace unity_catalog_extension {

void UnityCatalogConnector::connect(const std::string& dbPath, const std::string& catalogName,
const std::string& /*schemaName*/, main::ClientContext* context) {
// Creates an in-memory duckdb instance, then install httpfs and attach postgres.
instance = std::make_unique<duckdb::DuckDB>(nullptr);
connection = std::make_unique<duckdb::Connection>(*instance);
executeQuery("install uc_catalog from core_nightly;");
executeQuery("load uc_catalog;");
executeQuery("install delta;");
executeQuery("load delta;");
executeQuery("install delta;");
executeQuery(DuckDBUnityCatalogSecretManager::getSecret(context));
executeQuery(common::stringFormat("attach '{}' as {} (TYPE UC_CATALOG, read_only);", dbPath,
catalogName));
}

} // namespace unity_catalog_extension
} // namespace kuzu
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#pragma once

#include "connector/duckdb_connector.h"

namespace kuzu {
namespace unity_catalog_extension {

class UnityCatalogConnector : public duckdb_extension::DuckDBConnector {
public:
void connect(const std::string& dbPath, const std::string& catalogName,
const std::string& schemaName, main::ClientContext* context) override;
};

} // namespace unity_catalog_extension
} // namespace kuzu
17 changes: 17 additions & 0 deletions extension/unity_catalog/src/include/main/unity_catalog_extension.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#pragma once

#include "extension/extension.h"

namespace kuzu {
namespace unity_catalog_extension {

class UnityCatalogExtension final : public extension::Extension {
public:
static constexpr char EXTENSION_NAME[] = "UNITY_CATALOG";

public:
static void load(main::ClientContext* context);
};

} // namespace unity_catalog_extension
} // namespace kuzu
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#pragma once

#include "common/types/value/value.h"

namespace kuzu {
namespace main {
class Database;
class ClientContext;
} // namespace main

namespace unity_catalog_extension {

struct UnityCatalogToken {
static constexpr const char* NAME = "uc_token";
static constexpr common::LogicalTypeID TYPE = common::LogicalTypeID::STRING;
static common::Value getDefaultValue() { return common::Value{"not-used"}; }
};

struct UnityCatalogEndPoint {
static constexpr const char* NAME = "uc_endpoint";
static constexpr common::LogicalTypeID TYPE = common::LogicalTypeID::STRING;
static common::Value getDefaultValue() { return common::Value{"http://127.0.0.1:8080"}; }
};

struct UnityCatalogOptions {
static void registerExtensionOptions(main::Database* db);
static void setEnvValue(main::ClientContext* context);
};

struct DuckDBUnityCatalogSecretManager {
static std::string getSecret(main::ClientContext* context);
};

} // namespace unity_catalog_extension
} // namespace kuzu
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#pragma once

#include "storage/storage_extension.h"

namespace kuzu {
namespace main {
class Database;
} // namespace main

namespace unity_catalog_extension {

class UnityCatalogStorageExtension final : public storage::StorageExtension {
public:
static constexpr const char* DB_TYPE = "UC_CATALOG";

static constexpr const char* DEFAULT_SCHEMA_NAME = "default";

explicit UnityCatalogStorageExtension(main::Database* database);

bool canHandleDB(std::string dbType) const override;
};

} // namespace unity_catalog_extension
} // namespace kuzu
11 changes: 11 additions & 0 deletions extension/unity_catalog/src/installer/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
if (NOT WIN32)
add_library(unity_catalog_installer
SHARED
unity_catalog_install_func.cpp
${PROJECT_SOURCE_DIR}/extension/duckdb/src/installer/duckdb_installer.cpp)
set_extension_properties(unity_catalog_installer unity_catalog_installer unity_catalog)
endif ()

if (APPLE)
set_apple_dynamic_lookup(unity_catalog_installer)
endif ()
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#include "installer/duckdb_installer.h"

extern "C" {
// Because we link against the static library on windows, we implicitly inherit KUZU_STATIC_DEFINE,
// which cancels out any exporting, so we can't use KUZU_API.
#if defined(_WIN32)
#define INIT_EXPORT __declspec(dllexport)
#else
#define INIT_EXPORT __attribute__((visibility("default")))
#endif
INIT_EXPORT void install(kuzu::main::ClientContext* context) {
kuzu::duckdb_extension::DuckDBInstaller installer{"unity_catalog"};
installer.install(context);
}
}
7 changes: 7 additions & 0 deletions extension/unity_catalog/src/main/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
add_library(kuzu_unity_catalog_extension
OBJECT
unity_catalog_extension.cpp)

set(UNITY_CATALOG_EXTENSION_OBJECT_FILES
${UNITY_CATALOG_EXTENSION_OBJECT_FILES} $<TARGET_OBJECTS:kuzu_unity_catalog_extension>
PARENT_SCOPE)
37 changes: 37 additions & 0 deletions extension/unity_catalog/src/main/unity_catalog_extension.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@

#include "main/unity_catalog_extension.h"

#include "main/client_context.h"
#include "options/unity_catalog_options.h"
#include "storage/unity_catalog_storage.h"

namespace kuzu {
namespace unity_catalog_extension {

void UnityCatalogExtension::load(main::ClientContext* context) {
auto& db = *context->getDatabase();
db.registerStorageExtension(EXTENSION_NAME,
std::make_unique<UnityCatalogStorageExtension>(&db));
UnityCatalogOptions::registerExtensionOptions(&db);
UnityCatalogOptions::setEnvValue(context);
}

} // namespace unity_catalog_extension
} // namespace kuzu

extern "C" {
// Because we link against the static library on windows, we implicitly inherit KUZU_STATIC_DEFINE,
// which cancels out any exporting, so we can't use KUZU_API.
#if defined(_WIN32)
#define INIT_EXPORT __declspec(dllexport)
#else
#define INIT_EXPORT __attribute__((visibility("default")))
#endif
INIT_EXPORT void init(kuzu::main::ClientContext* context) {
kuzu::unity_catalog_extension::UnityCatalogExtension::load(context);
}

INIT_EXPORT const char* name() {
return kuzu::unity_catalog_extension::UnityCatalogExtension::EXTENSION_NAME;
}
}
7 changes: 7 additions & 0 deletions extension/unity_catalog/src/options/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
add_library(kuzu_unity_catalog_options
OBJECT
unity_catalog_options.cpp)

set(UNITY_CATALOG_EXTENSION_OBJECT_FILES
${UNITY_CATALOG_EXTENSION_OBJECT_FILES} $<TARGET_OBJECTS:kuzu_unity_catalog_options>
PARENT_SCOPE)
48 changes: 48 additions & 0 deletions extension/unity_catalog/src/options/unity_catalog_options.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#include "options/unity_catalog_options.h"

#include "extension/extension.h"
#include "main/client_context.h"
#include "main/database.h"

namespace kuzu {
namespace unity_catalog_extension {

using namespace common;

void UnityCatalogOptions::registerExtensionOptions(main::Database* db) {
ADD_EXTENSION_OPTION(UnityCatalogToken);
ADD_EXTENSION_OPTION(UnityCatalogEndPoint);
}

void UnityCatalogOptions::setEnvValue(main::ClientContext* context) {
auto token = context->getEnvVariable(UnityCatalogToken::NAME);
auto endPoint = context->getEnvVariable(UnityCatalogEndPoint::NAME);
if (token != "") {
context->setExtensionOption(UnityCatalogToken::NAME, Value::createValue(token));
}
if (endPoint != "") {
context->setExtensionOption(UnityCatalogToken::NAME, Value::createValue(endPoint));
}
}

static std::string getUnityCatalogOptions(main::ClientContext* context, std::string optionName) {
static common::case_insensitive_map_t<std::string> UNITY_CATALOG_OPTIONS = {
{UnityCatalogToken::NAME, "TOKEN"}, {UnityCatalogEndPoint::NAME, "ENDPOINT"}};
auto optionNameInDuckDB = UNITY_CATALOG_OPTIONS.at(optionName);
auto optionValueInKuzu = context->getCurrentSetting(optionName).toString();
return common::stringFormat("{} '{}',", optionNameInDuckDB, optionValueInKuzu);
}

std::string DuckDBUnityCatalogSecretManager::getSecret(main::ClientContext* context) {
std::string templateQuery = R"(CREATE SECRET (
{}
TYPE UC
);)";
std::string options = "";
options += getUnityCatalogOptions(context, UnityCatalogToken::NAME);
options += getUnityCatalogOptions(context, UnityCatalogEndPoint::NAME);
return common::stringFormat(templateQuery, options);
}

} // namespace unity_catalog_extension
} // namespace kuzu
13 changes: 13 additions & 0 deletions extension/unity_catalog/src/storage/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
add_library(kuzu_unity_catalog_storage
OBJECT
unity_catalog_storage.cpp
${PROJECT_SOURCE_DIR}/extension/duckdb/src/catalog/duckdb_catalog.cpp
${PROJECT_SOURCE_DIR}/extension/duckdb/src/function/duckdb_scan.cpp
${PROJECT_SOURCE_DIR}/extension/duckdb/src/function/clear_cache.cpp
${PROJECT_SOURCE_DIR}/extension/duckdb/src/connector/duckdb_type_converter.cpp
${PROJECT_SOURCE_DIR}/extension/duckdb/src/catalog/duckdb_table_catalog_entry.cpp
)

set(UNITY_CATALOG_EXTENSION_OBJECT_FILES
${UNITY_CATALOG_EXTENSION_OBJECT_FILES} $<TARGET_OBJECTS:kuzu_unity_catalog_storage>
PARENT_SCOPE)
Loading

0 comments on commit edf39b9

Please sign in to comment.