Terraform module used for Databricks Workspace configuration and Resources creation
This module provides an ability for Databricks Workspace configuration and Resources creation, for example:
- Default Shared Autoscaling cluster
- ADLS Gen2 Mount
- Secret scope and its secrets
- Users for Standard SKU Worspaces
# Prerequisite resources
data "azurerm_databricks_workspace" "example" {
name = "example-workspace"
resource_group_name = "example-rg"
}
# Databricks Provider configuration
provider "databricks" {
alias = "main"
host = data.azurerm_databricks_workspace.example.workspace_url
azure_workspace_resource_id = data.azurerm_databricks_workspace.example.id
}
# Key Vault which contains Service Principal credentials (App ID and Secret) for mounting ADLS Gen 2
data "azurerm_key_vault" "example" {
name = "example-key-vault"
resource_group_name = "example-rg"
}
data "azurerm_storage_account" "example" {
name = "examplestorage"
resource_group_name = "example-rg"
}
# Databricks Runtime module usage example
module "databricks_runtime_core" {
source = "data-platform-hq/databricks-runtime/databricks"
sku = "standard"
workspace_id = data.azurerm_databricks_workspace.example.workspace_id
# Databricks user
users = ["user1", "user2"]
# Default cluster parameters
custom_default_cluster_name = "databricks_example_custer"
cluster_nodes_availability = "SPOT_AZURE" # it required to increase Regional Spot quotas
cluster_log_conf_destination = "dbfs:/cluster-logs"
# Parameters of Service principal used for ADLS mount
# Imports App ID and Secret of Service Principal from target Key Vault
key_vault_id = data.azurerm_key_vault.example.id
sp_client_id_secret_name = "sp-client-id" # secret's name that stores Service Principal App ID
sp_key_secret_name = "sp-key" # secret's name that stores Service Principal Secret Key
tenant_id_secret_name = "infra-arm-tenant-id" # secret's name that stores tenant id value
# Additional Secret Scope
secret_scope = [{
scope_name = "extra-scope"
acl = null # Only group names are allowed. If left empty then only Workspace admins could access these keys
secrets = [
{ key = "secret-name", string_value = "secret-value"}
]
}]
mountpoints = {
storage_account_name = data.azurerm_storage_account.example.name
container_name = "example_container"
}
providers = {
databricks = databricks.main
}
}
Name | Version |
---|---|
terraform | >=1.0.0 |
azurerm | >= 4.0.1 |
databricks | >=1.9.2 |
Name | Version |
---|---|
azurerm | >= 4.0.1 |
databricks | >=1.9.2 |
No modules.
Name | Type |
---|---|
azurerm_role_assignment.this | resource |
databricks_cluster.this | resource |
databricks_mount.adls | resource |
databricks_secret.main | resource |
databricks_secret.this | resource |
databricks_secret_scope.main | resource |
databricks_secret_scope.this | resource |
databricks_token.pat | resource |
databricks_user.this | resource |
azurerm_key_vault_secret.sp_client_id | data source |
azurerm_key_vault_secret.sp_key | data source |
azurerm_key_vault_secret.tenant_id | data source |
Name | Description | Type | Default | Required |
---|---|---|---|---|
autotermination_minutes | Automatically terminate the cluster after being inactive for this time in minutes. If not set, Databricks won't automatically terminate an inactive cluster. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. | number |
15 |
no |
cluster_log_conf_destination | Provide a dbfs location to push all cluster logs to certain location | string |
"" |
no |
cluster_nodes_availability | Availability type used for all subsequent nodes past the first_on_demand ones: [SPOT_AZURE|SPOT_WITH_FALLBACK_AZURE|ON_DEMAND_AZURE] | string |
null |
no |
custom_default_cluster_name | Databricks cluster name, which does not have to be unique | string |
null |
no |
data_security_mode | Security features of the cluster | string |
"NONE" |
no |
first_on_demand | The first first_on_demand nodes of the cluster will be placed on on-demand instances: [[:number]] | number |
0 |
no |
key_vault_id | ID of the Key Vault instance where the Secret resides | string |
n/a | yes |
max_workers | The maximum number of workers to which the cluster can scale up when overloaded. max_workers must be strictly greater than min_workers. | number |
2 |
no |
min_workers | The minimum number of workers to which the cluster can scale down when underutilized. It is also the initial number of workers the cluster will have after creation. | number |
1 |
no |
mountpoints | Mountpoints for databricks | map(object({ |
{} |
no |
node_type | Databricks_node_type id | string |
"Standard_D3_v2" |
no |
pat_token_lifetime_seconds | The lifetime of the token, in seconds. If no lifetime is specified, the token remains valid indefinitely | number |
315569520 |
no |
permissions | Databricks Workspace permission maps | list(map(string)) |
[ |
no |
secret_scope | Provides an ability to create custom Secret Scope, store secrets in it and assigning ACL for access management scope_name - name of Secret Scope to create; acl - list of objects, where 'principal' custom group name, this group is created in 'Premium' module; 'permission' is one of "READ", "WRITE", "MANAGE"; secrets - list of objects, where object's 'key' param is created key name and 'string_value' is a value for it; |
list(object({ |
[ |
no |
single_user_name | single user cluster mode | string |
null |
no |
sp_client_id_secret_name | The name of Azure Key Vault secret that contains ClientID of Service Principal to access in Azure Key Vault | string |
n/a | yes |
sp_key_secret_name | The name of Azure Key Vault secret that contains client secret of Service Principal to access in Azure Key Vault | string |
n/a | yes |
spark_conf | Map with key-value pairs to fine-tune Spark clusters, where you can provide custom Spark configuration properties in a cluster configuration. | map(any) |
{} |
no |
spark_env_vars | Map with environment variable key-value pairs to fine-tune Spark clusters. Key-value pairs of the form (X,Y) are exported (i.e., X='Y') while launching the driver and workers. | map(any) |
{} |
no |
spark_version | Runtime version | string |
"11.3.x-scala2.12" |
no |
spot_bid_max_price | The max price for Azure spot instances. Use -1 to specify lowest price. | number |
-1 |
no |
tenant_id_secret_name | The name of Azure Key Vault secret that contains tenant ID secret of Service Principal to access in Azure Key Vault | string |
n/a | yes |
users | List of users to access Databricks | list(string) |
[] |
no |
workspace_id | Databricks Workspace ID | string |
n/a | yes |
Name | Description |
---|---|
cluster_id | Databricks Cluster Id |
token | Databricks Personal Authorization Token |
Apache 2 Licensed. For more information please see LICENSE