Skip to content

Commit

Permalink
add : aws glue catalog table with storage and ser_de configuration
Browse files Browse the repository at this point in the history
  • Loading branch information
Rahul-4480 committed Aug 20, 2024
1 parent 95dee67 commit dbc9894
Show file tree
Hide file tree
Showing 3 changed files with 196 additions and 0 deletions.
153 changes: 153 additions & 0 deletions locals.tf
Original file line number Diff line number Diff line change
@@ -1,3 +1,156 @@
locals {
query_results_bucket_location = "s3://${var.query_results_bucket}/output/"
alb_logs_bucket_location = "s3://${var.s3_bucket_name}/${var.s3_log_prefix}"
input_format = "org.apache.hadoop.mapred.TextInputFormat"
output_format = "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"
ser_de_name = "alb-logs-serde"
ser_de_library = "org.apache.hadoop.hive.serde2.RegexSerDe"
ser_de_parameters = {
"serialization.format" = "1"
"input.regex" = "([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \"([^ ]*) (.*) (- |[^ ]*)\" \"([^\"]*)\" ([A-Z0-9-_]+) ([A-Za-z0-9.-]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^\"]*)\" ([-.0-9]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^ ]*)\" \"([^\\s]+?)\" \"([^\\s]+)\" \"([^ ]*)\" \"([^ ]*)\" ?([^ ]*)?"
}
table_type = "EXTERNAL_TABLE"
parameters = {
"classification" = "log"
"typeOfData" = "awsALBLogs"
}

table_columns = [
{
name = "type"
type = "string"
},
{
name = "time"
type = "string"
},
{
name = "elb"
type = "string"
},
{
name = "client_ip"
type = "string"
},
{
name = "client_port"
type = "int"
},
{
name = "target_ip"
type = "string"
},
{
name = "target_port"
type = "int"
},
{
name = "request_processing_time"
type = "double"
},
{
name = "target_processing_time"
type = "double"
},
{
name = "response_processing_time"
type = "double"
},
{
name = "elb_status_code"
type = "int"
},
{
name = "target_status_code"
type = "string"
},
{
name = "received_bytes"
type = "bigint"
},
{
name = "sent_bytes"
type = "bigint"
},
{
name = "request_verb"
type = "string"
},
{
name = "request_url"
type = "string"
},
{
name = "request_proto"
type = "string"
},
{
name = "user_agent"
type = "string"
},
{
name = "ssl_cipher"
type = "string"
},
{
name = "ssl_protocol"
type = "string"
},
{
name = "target_group_arn"
type = "string"
},
{
name = "trace_id"
type = "string"
},
{
name = "domain_name"
type = "string"
},
{
name = "chosen_cert_arn"
type = "string"
},
{
name = "matched_rule_priority"
type = "string"
},
{
name = "request_creation_time"
type = "string"
},
{
name = "actions_executed"
type = "string"
},
{
name = "redirect_url"
type = "string"
},
{
name = "lambda_error_reason"
type = "string"
},
{
name = "target_port_list"
type = "string"
},
{
name = "target_status_code_list"
type = "string"
},
{
name = "classification"
type = "string"
},
{
name = "classification_reason"
type = "string"
},
{
name = "conn_trace_id"
type = "string"
}
]
}
28 changes: 28 additions & 0 deletions main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,31 @@ resource "aws_athena_workgroup" "this" {
resource "aws_glue_catalog_database" "this" {
name = var.database_name
}

resource "aws_glue_catalog_table" "this" {
name = var.table_name
database_name = aws_glue_catalog_database.this.name
table_type = local.table_type
parameters = local.parameters

storage_descriptor {
location = local.alb_logs_bucket_location
input_format = local.input_format
output_format = local.output_format

ser_de_info {
name = local.ser_de_name
serialization_library = local.ser_de_library

parameters = local.ser_de_parameters
}

dynamic "columns" {
for_each = local.table_columns
content {
name = columns.value.name
type = columns.value.type
}
}
}
}
15 changes: 15 additions & 0 deletions variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,18 @@ variable "database_name" {
description = "The name of the Athena database."
type = string
}

variable "table_name" {
description = "The name of the Athena table to query ALB logs."
type = string
}

variable "s3_bucket_name" {
description = "The name of the S3 bucket where ALB logs are stored."
type = string
}

variable "s3_log_prefix" {
description = "The prefix within the S3 bucket where ALB logs are stored (e.g., 'AWSLogs/')."
type = string
}

0 comments on commit dbc9894

Please sign in to comment.