Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Use IAM with RDS, increase number of backups and upgraded engine. #578

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions lib/chatbot-api/rest-api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import * as appsync from "aws-cdk-lib/aws-appsync";
import { parse } from "graphql";
import { readFileSync } from "fs";
import * as s3 from "aws-cdk-lib/aws-s3";
import { AURORA_DB_USERS } from "../rag-engines/aurora-pgvector";

export interface ApiResolversProps {
readonly shared: Shared;
Expand Down Expand Up @@ -75,8 +76,13 @@ export class ApiResolvers extends Construct {
CHATBOT_FILES_BUCKET_NAME: props.filesBucket.bucketName,
PROCESSING_BUCKET_NAME:
props.ragEngines?.processingBucket?.bucketName ?? "",
AURORA_DB_SECRET_ID: props.ragEngines?.auroraPgVector?.database
?.secret?.secretArn as string,
AURORA_DB_USER: AURORA_DB_USERS.READ_ONLY,
AURORA_DB_HOST:
props.ragEngines?.auroraPgVector?.database?.clusterEndpoint
?.hostname ?? "",
AURORA_DB_PORT:
props.ragEngines?.auroraPgVector?.database?.clusterEndpoint?.port +
"",
WORKSPACES_TABLE_NAME:
props.ragEngines?.workspacesTable.tableName ?? "",
WORKSPACES_BY_OBJECT_TYPE_INDEX_NAME:
Expand Down Expand Up @@ -139,7 +145,10 @@ export class ApiResolvers extends Construct {
}

if (props.ragEngines?.auroraPgVector) {
props.ragEngines.auroraPgVector.database.secret?.grantRead(apiHandler);
props.ragEngines.auroraPgVector.database.grantConnect(
apiHandler,
AURORA_DB_USERS.READ_ONLY
);
props.ragEngines.auroraPgVector.database.connections.allowDefaultPortFrom(
apiHandler
);
Expand Down
15 changes: 11 additions & 4 deletions lib/model-interfaces/langchain/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import * as path from "path";
import { RagEngines } from "../../rag-engines";
import { Shared } from "../../shared";
import { SystemConfig } from "../../shared/types";
import { AURORA_DB_USERS } from "../../rag-engines/aurora-pgvector";

interface LangChainInterfaceProps {
readonly shared: Shared;
Expand Down Expand Up @@ -57,8 +58,13 @@ export class LangChainInterface extends Construct {
props.ragEngines?.workspacesTable.tableName ?? "",
WORKSPACES_BY_OBJECT_TYPE_INDEX_NAME:
props.ragEngines?.workspacesByObjectTypeIndexName ?? "",
AURORA_DB_SECRET_ID: props.ragEngines?.auroraPgVector?.database?.secret
?.secretArn as string,
AURORA_DB_USER: AURORA_DB_USERS.READ_ONLY,
AURORA_DB_HOST:
props.ragEngines?.auroraPgVector?.database?.clusterEndpoint
?.hostname ?? "",
AURORA_DB_PORT:
props.ragEngines?.auroraPgVector?.database?.clusterEndpoint?.port +
"",
SAGEMAKER_RAG_MODELS_ENDPOINT:
props.ragEngines?.sageMakerRagModels?.model.endpoint
?.attrEndpointName ?? "",
Expand Down Expand Up @@ -110,8 +116,9 @@ export class LangChainInterface extends Construct {
}

if (props.ragEngines?.auroraPgVector) {
props.ragEngines?.auroraPgVector.database.secret?.grantRead(
requestHandler
props.ragEngines.auroraPgVector.database.grantConnect(
requestHandler,
AURORA_DB_USERS.READ_ONLY
);
props.ragEngines?.auroraPgVector.database.connections.allowDefaultPortFrom(
requestHandler
Expand Down
8 changes: 6 additions & 2 deletions lib/rag-engines/aurora-pgvector/create-aurora-workspace.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import * as lambda from "aws-cdk-lib/aws-lambda";
import * as logs from "aws-cdk-lib/aws-logs";
import * as rds from "aws-cdk-lib/aws-rds";
import { RemovalPolicy } from "aws-cdk-lib";
import { AURORA_DB_USERS } from ".";

export interface CreateAuroraWorkspaceProps {
readonly config: SystemConfig;
Expand Down Expand Up @@ -42,7 +43,9 @@ export class CreateAuroraWorkspace extends Construct {
loggingFormat: lambda.LoggingFormat.JSON,
environment: {
...props.shared.defaultEnvironmentVariables,
AURORA_DB_SECRET_ID: props.dbCluster.secret?.secretArn as string,
AURORA_DB_USER: AURORA_DB_USERS.ADMIN,
AURORA_DB_HOST: props.dbCluster?.clusterEndpoint?.hostname ?? "",
AURORA_DB_PORT: props.dbCluster?.clusterEndpoint?.port + "",
WORKSPACES_TABLE_NAME:
props.ragDynamoDBTables.workspacesTable.tableName,
WORKSPACES_BY_OBJECT_TYPE_INDEX_NAME:
Expand All @@ -51,7 +54,8 @@ export class CreateAuroraWorkspace extends Construct {
}
);

props.dbCluster.secret?.grantRead(createFunction);
// Process will create a new table and requires Admin permission on the SQL Schema
props.dbCluster.grantConnect(createFunction, AURORA_DB_USERS.ADMIN);
props.dbCluster.connections.allowDefaultPortFrom(createFunction);
props.ragDynamoDBTables.workspacesTable.grantReadWriteData(createFunction);

Expand Down
91 changes: 91 additions & 0 deletions lib/rag-engines/aurora-pgvector/functions/pg-setup/index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import json
import boto3
import psycopg2
import cfnresponse
from aws_lambda_powertools import Logger
from aws_lambda_powertools.utilities.typing import LambdaContext
from pgvector.psycopg2 import register_vector

logger = Logger()
secretsmanager_client = boto3.client("secretsmanager")


@logger.inject_lambda_context(log_event=True)
def lambda_handler(event, context: LambdaContext):
request_type = event["RequestType"]
resource_properties = event["ResourceProperties"]
AURORA_DB_SECRET_ID = resource_properties["AURORA_DB_SECRET_ID"]

secret_response = secretsmanager_client.get_secret_value(
SecretId=AURORA_DB_SECRET_ID
)
database_secrets = json.loads(secret_response["SecretString"])
dbhost = database_secrets["host"]
dbport = database_secrets["port"]
dbuser = database_secrets["username"]
dbpass = database_secrets["password"]

if request_type == "Create" or request_type == "Update":
dbconn = psycopg2.connect(
host=dbhost, user=dbuser, password=dbpass, port=dbport, connect_timeout=10
)

dbconn.set_session(autocommit=True)

cur = dbconn.cursor()

cur.execute("CREATE EXTENSION IF NOT EXISTS vector;")
register_vector(dbconn)

cur.execute("SELECT typname FROM pg_type WHERE typname = 'vector';")
rows = cur.fetchall()

for row in rows:
logger.info(f"pg_type.typname: {row}")

cur.execute(
"SELECT extname, extversion FROM pg_extension WHERE extname = 'vector';"
)
rows = cur.fetchall()

if len(rows) == 1:
logger.info("Attempt upgrading vector extension")
cur.execute("ALTER EXTENSION vector UPDATE;")

# Set up IAM user
# https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/UsingWithRDS.IAMDBAuth.DBAccounts.html#UsingWithRDS.IAMDBAuth.DBAccounts.PostgreSQL
cur.execute(
(
"select pg_user.usename from pg_catalog.pg_user where "
"pg_user.usename='aurora_db_iam_admin';"
)
)
rows = cur.fetchall()
if len(rows) == 0:
# Should only run once
cur.execute("CREATE USER aurora_db_iam_admin; ")
cur.execute("CREATE USER aurora_db_iam_read; ")
cur.execute("CREATE USER aurora_db_iam_write; ")
cur.execute("GRANT rds_iam TO aurora_db_iam_admin; ")
cur.execute("GRANT rds_iam TO aurora_db_iam_read; ")
cur.execute("GRANT rds_iam TO aurora_db_iam_write; ")
# Step functions need to create/delete tables on workspace change
# Pre-defined roles
# https://www.postgresql.org/docs/current/predefined-roles.html
cur.execute("GRANT pg_read_all_data TO aurora_db_iam_admin; ")
cur.execute("GRANT pg_write_all_data TO aurora_db_iam_admin; ")
cur.execute("GRANT CREATE ON SCHEMA public TO aurora_db_iam_admin; ")
# Adding documents requires write permissions
cur.execute("GRANT pg_read_all_data TO aurora_db_iam_write; ")
cur.execute("GRANT pg_write_all_data TO aurora_db_iam_write; ")
# Quering the RAG only requires read operations
cur.execute("GRANT pg_read_all_data TO aurora_db_iam_read; ")

cur.close()
dbconn.close()

logger.info("Created vector extension and users")

cfnresponse.send(event, context, cfnresponse.SUCCESS, {"ok": True})

return {"ok": True}
53 changes: 0 additions & 53 deletions lib/rag-engines/aurora-pgvector/functions/pgvector-setup/index.py

This file was deleted.

21 changes: 17 additions & 4 deletions lib/rag-engines/aurora-pgvector/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ export interface AuroraPgVectorProps {
readonly ragDynamoDBTables: RagDynamoDBTables;
}

export enum AURORA_DB_USERS {
READ_ONLY = "aurora_db_iam_read",
WRITE = "aurora_db_iam_write",
ADMIN = "aurora_db_iam_admin",
}

export class AuroraPgVector extends Construct {
readonly database: rds.DatabaseCluster;
public readonly createAuroraWorkspaceWorkflow: sfn.StateMachine;
Expand All @@ -28,13 +34,20 @@ export class AuroraPgVector extends Construct {

const dbCluster = new rds.DatabaseCluster(this, "AuroraDatabase", {
engine: rds.DatabaseClusterEngine.auroraPostgres({
version: rds.AuroraPostgresEngineVersion.VER_15_3,
// Extensions version per engine
// https://docs.aws.amazon.com/AmazonRDS/latest/AuroraPostgreSQLReleaseNotes/AuroraPostgreSQL.Extensions.html
version: rds.AuroraPostgresEngineVersion.VER_15_7,
}),
removalPolicy: cdk.RemovalPolicy.DESTROY,
writer: rds.ClusterInstance.serverlessV2("ServerlessInstance"),
vpc: props.shared.vpc,
vpcSubnets: { subnetType: ec2.SubnetType.PRIVATE_ISOLATED },
iamAuthentication: true,
backup: {
// 35 days is the max value
// https://docs.aws.amazon.com/prescriptive-guidance/latest/backup-recovery/rds.html
retention: cdk.Duration.days(35),
},
});

const databaseSetupFunction = new lambda.Function(
Expand All @@ -43,9 +56,9 @@ export class AuroraPgVector extends Construct {
{
vpc: props.shared.vpc,
code: props.shared.sharedCode.bundleWithLambdaAsset(
path.join(__dirname, "./functions/pgvector-setup")
path.join(__dirname, "./functions/pg-setup")
),
description: "PGVector setup",
description: "Users and PGVector setup",
runtime: props.shared.pythonRuntime,
architecture: props.shared.lambdaArchitecture,
handler: "index.lambda_handler",
Expand Down Expand Up @@ -73,7 +86,7 @@ export class AuroraPgVector extends Construct {

const dbSetupResource = new cdk.CustomResource(
this,
"DatabaseSetupResource",
"DatabaseSetupExtensionsAndUsers",
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rename to force running the script on existing environments.

{
removalPolicy: cdk.RemovalPolicy.DESTROY,
serviceToken: databaseSetupProvider.serviceToken,
Expand Down
11 changes: 8 additions & 3 deletions lib/rag-engines/data-import/file-import-batch-job.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import * as iam from "aws-cdk-lib/aws-iam";
import * as rds from "aws-cdk-lib/aws-rds";
import * as sagemaker from "aws-cdk-lib/aws-sagemaker";
import { NagSuppressions } from "cdk-nag";
import { AURORA_DB_USERS } from "../aurora-pgvector";

export interface FileImportBatchJobProps {
readonly config: SystemConfig;
Expand Down Expand Up @@ -79,8 +80,9 @@ export class FileImportBatchJob extends Construct {
AWS_DEFAULT_REGION: cdk.Stack.of(this).region,
CONFIG_PARAMETER_NAME: props.shared.configParameter.parameterName,
API_KEYS_SECRETS_ARN: props.shared.apiKeysSecret.secretArn,
AURORA_DB_SECRET_ID: props.auroraDatabase?.secret
?.secretArn as string,
AURORA_DB_USER: AURORA_DB_USERS.WRITE,
AURORA_DB_HOST: props.auroraDatabase?.clusterEndpoint?.hostname ?? "",
AURORA_DB_PORT: props.auroraDatabase?.clusterEndpoint?.port + "",
PROCESSING_BUCKET_NAME: props.processingBucket.bucketName,
WORKSPACES_TABLE_NAME:
props.ragDynamoDBTables.workspacesTable.tableName,
Expand Down Expand Up @@ -128,7 +130,10 @@ export class FileImportBatchJob extends Construct {
);

if (props.auroraDatabase) {
props.auroraDatabase.secret?.grantRead(fileImportJobRole);
props.auroraDatabase.grantConnect(
fileImportJobRole,
AURORA_DB_USERS.WRITE
);
props.auroraDatabase.connections.allowDefaultPortFrom(computeEnvironment);
}

Expand Down
11 changes: 8 additions & 3 deletions lib/rag-engines/data-import/web-crawler-batch-job.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import * as iam from "aws-cdk-lib/aws-iam";
import * as rds from "aws-cdk-lib/aws-rds";
import * as sagemaker from "aws-cdk-lib/aws-sagemaker";
import { NagSuppressions } from "cdk-nag";
import { AURORA_DB_USERS } from "../aurora-pgvector";

export interface WebCrawlerBatchJobProps {
readonly config: SystemConfig;
Expand Down Expand Up @@ -78,8 +79,9 @@ export class WebCrawlerBatchJob extends Construct {
AWS_DEFAULT_REGION: cdk.Stack.of(this).region,
CONFIG_PARAMETER_NAME: props.shared.configParameter.parameterName,
API_KEYS_SECRETS_ARN: props.shared.apiKeysSecret.secretArn,
AURORA_DB_SECRET_ID: props.auroraDatabase?.secret
?.secretArn as string,
AURORA_DB_USER: AURORA_DB_USERS.WRITE,
AURORA_DB_HOST: props.auroraDatabase?.clusterEndpoint?.hostname ?? "",
AURORA_DB_PORT: props.auroraDatabase?.clusterEndpoint?.port + "",
PROCESSING_BUCKET_NAME: props.processingBucket.bucketName,
WORKSPACES_TABLE_NAME:
props.ragDynamoDBTables.workspacesTable.tableName,
Expand Down Expand Up @@ -126,7 +128,10 @@ export class WebCrawlerBatchJob extends Construct {
);

if (props.auroraDatabase) {
props.auroraDatabase.secret?.grantRead(webCrawlerJobRole);
props.auroraDatabase.grantConnect(
webCrawlerJobRole,
AURORA_DB_USERS.WRITE
);
props.auroraDatabase.connections.allowDefaultPortFrom(computeEnvironment);
}

Expand Down
Loading
Loading