diff --git a/locals.tf b/locals.tf index 05adca6..d55b6d4 100644 --- a/locals.tf +++ b/locals.tf @@ -1,3 +1,156 @@ locals { query_results_bucket_location = "s3://${var.query_results_bucket}/output/" + alb_logs_bucket_location = "s3://${var.s3_bucket_name}/${var.s3_log_prefix}" + input_format = "org.apache.hadoop.mapred.TextInputFormat" + output_format = "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat" + ser_de_name = "alb-logs-serde" + ser_de_library = "org.apache.hadoop.hive.serde2.RegexSerDe" + ser_de_parameters = { + "serialization.format" = "1" + "input.regex" = "([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \"([^ ]*) (.*) (- |[^ ]*)\" \"([^\"]*)\" ([A-Z0-9-_]+) ([A-Za-z0-9.-]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^\"]*)\" ([-.0-9]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^ ]*)\" \"([^\\s]+?)\" \"([^\\s]+)\" \"([^ ]*)\" \"([^ ]*)\" ?([^ ]*)?" + } + table_type = "EXTERNAL_TABLE" + parameters = { + "classification" = "log" + "typeOfData" = "awsALBLogs" + } + + table_columns = [ + { + name = "type" + type = "string" + }, + { + name = "time" + type = "string" + }, + { + name = "elb" + type = "string" + }, + { + name = "client_ip" + type = "string" + }, + { + name = "client_port" + type = "int" + }, + { + name = "target_ip" + type = "string" + }, + { + name = "target_port" + type = "int" + }, + { + name = "request_processing_time" + type = "double" + }, + { + name = "target_processing_time" + type = "double" + }, + { + name = "response_processing_time" + type = "double" + }, + { + name = "elb_status_code" + type = "int" + }, + { + name = "target_status_code" + type = "string" + }, + { + name = "received_bytes" + type = "bigint" + }, + { + name = "sent_bytes" + type = "bigint" + }, + { + name = "request_verb" + type = "string" + }, + { + name = "request_url" + type = "string" + }, + { + name = "request_proto" + type = "string" + }, + { + name = "user_agent" + type = "string" + }, + { + name = "ssl_cipher" + type = "string" + }, + { + name = "ssl_protocol" + type = "string" + }, + { + name = "target_group_arn" + type = "string" + }, + { + name = "trace_id" + type = "string" + }, + { + name = "domain_name" + type = "string" + }, + { + name = "chosen_cert_arn" + type = "string" + }, + { + name = "matched_rule_priority" + type = "string" + }, + { + name = "request_creation_time" + type = "string" + }, + { + name = "actions_executed" + type = "string" + }, + { + name = "redirect_url" + type = "string" + }, + { + name = "lambda_error_reason" + type = "string" + }, + { + name = "target_port_list" + type = "string" + }, + { + name = "target_status_code_list" + type = "string" + }, + { + name = "classification" + type = "string" + }, + { + name = "classification_reason" + type = "string" + }, + { + name = "conn_trace_id" + type = "string" + } + ] } diff --git a/main.tf b/main.tf index 97d10ba..70e404b 100644 --- a/main.tf +++ b/main.tf @@ -14,3 +14,31 @@ resource "aws_athena_workgroup" "this" { resource "aws_glue_catalog_database" "this" { name = var.database_name } + +resource "aws_glue_catalog_table" "this" { + name = var.table_name + database_name = aws_glue_catalog_database.this.name + table_type = local.table_type + parameters = local.parameters + + storage_descriptor { + location = local.alb_logs_bucket_location + input_format = local.input_format + output_format = local.output_format + + ser_de_info { + name = local.ser_de_name + serialization_library = local.ser_de_library + + parameters = local.ser_de_parameters + } + + dynamic "columns" { + for_each = local.table_columns + content { + name = columns.value.name + type = columns.value.type + } + } + } +} diff --git a/variables.tf b/variables.tf index 558af55..94c87d6 100644 --- a/variables.tf +++ b/variables.tf @@ -12,3 +12,18 @@ variable "database_name" { description = "The name of the Athena database." type = string } + +variable "table_name" { + description = "The name of the Athena table to query ALB logs." + type = string +} + +variable "s3_bucket_name" { + description = "The name of the S3 bucket where ALB logs are stored." + type = string +} + +variable "s3_log_prefix" { + description = "The prefix within the S3 bucket where ALB logs are stored (e.g., 'AWSLogs/')." + type = string +}