diff --git a/config/schema.cue b/config/schema.cue index 7cc0613b3..4fb7ace0c 100644 --- a/config/schema.cue +++ b/config/schema.cue @@ -509,6 +509,7 @@ import "path" sync_interval: string | *"10s" } } + max_buckets: uint | *10000 } #analytics_output_name: "opensearch" diff --git a/src/api-umbrella/web-app/actions/admin/stats.lua b/src/api-umbrella/web-app/actions/admin/stats.lua index 806b6b06e..bf4568c73 100644 --- a/src/api-umbrella/web-app/actions/admin/stats.lua +++ b/src/api-umbrella/web-app/actions/admin/stats.lua @@ -286,6 +286,19 @@ function _M.search(self) search:aggregate_by_response_time_average() local results = search:fetch_results() + + -- Optimization: Every request should have an IP, so we don't need to perform + -- extra aggregations to look for total counts and missing values, since we + -- can assume the total count matches the overall hit count, and the missing + -- IPs are zero. But we'll fake the structure needed for `aggregation_result` + -- below. + results["aggregations"]["value_count_request_ip"] = { + value = results["hits"]["_total_value"], + } + results["aggregations"]["missing_request_ip"] = { + doc_count = 0, + } + local response = { stats = { total_hits = results["hits"]["_total_value"], @@ -302,7 +315,7 @@ function _M.search(self) if results["aggregations"] then response["stats"]["total_users"] = results["aggregations"]["unique_user_email"]["value"] - response["stats"]["total_ips"] = results["aggregations"]["unique_request_ip"]["value"] + response["stats"]["total_ips"] = results["aggregations"]["sampled_ips"]["unique_request_ip"]["value"] response["stats"]["average_response_time"] = results["aggregations"]["response_time_average"]["value"] end diff --git a/src/api-umbrella/web-app/models/analytics_search_opensearch.lua b/src/api-umbrella/web-app/models/analytics_search_opensearch.lua index f76e9fdbd..cec45bb5c 100644 --- a/src/api-umbrella/web-app/models/analytics_search_opensearch.lua +++ b/src/api-umbrella/web-app/models/analytics_search_opensearch.lua @@ -402,8 +402,8 @@ function _M:aggregate_by_interval_for_summary() unique_user_ids = { terms = { field = "user_id", - size = 100000000, - shard_size = 100000000 * 4, + size = config["opensearch"]["max_buckets"], + shard_size = config["opensearch"]["max_buckets"] * 4, }, }, response_time_average = { @@ -440,7 +440,7 @@ function _M:aggregate_by_cardinality(field) self.body["aggregations"]["unique_" .. field] = { cardinality = { field = field, - precision_threshold = 100, + precision_threshold = 3000, }, } end @@ -451,8 +451,31 @@ function _M:aggregate_by_users(size) end function _M:aggregate_by_request_ip(size) - self:aggregate_by_term("request_ip", size) - self:aggregate_by_cardinality("request_ip") + self.body["aggregations"]["top_request_ip"] = { + terms = { + field = "request_ip", + size = size, + shard_size = size * 4, + }, + } + + -- TODO: Getting unique IP counts currently not performing well and causing + -- timeouts. Might need to look into mapper-murmur3 field for this. See + -- https://github.com/opensearch-project/OpenSearch/issues/2820 + -- In the meantime, perform sampling to at least return something. + self.body["aggregations"]["sampled_ips"] = { + sampler = { + shard_size = 1000000, + }, + aggregations = { + unique_request_ip = { + cardinality = { + field = "request_ip", + precision_threshold = 3000, + }, + }, + }, + } end function _M:aggregate_by_response_time_average() @@ -506,7 +529,7 @@ function _M:aggregate_by_drilldown(prefix, size) end if not size then - size = 100000000 + size = config["opensearch"]["max_buckets"] end self.body["aggregations"]["drilldown"] = { @@ -592,7 +615,7 @@ function _M:aggregate_by_user_stats(order) self.body["aggregations"]["user_stats"] = { terms = { field = "user_id", - size = 100000000, + size = config["opensearch"]["max_buckets"], }, aggregations = { last_request_at = {