Skip to content

Commit

Permalink
handle empty string case, eject from like optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
gtarpenning committed Feb 27, 2025
1 parent 150cc24 commit 2d6f863
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 3 deletions.
14 changes: 13 additions & 1 deletion tests/trace/test_client_trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -3373,7 +3373,7 @@ def test_call_stream_query_heavy_query_batch(client):
parent_id=parent_id,
started_at=datetime.datetime.now(tz=datetime.timezone.utc)
- datetime.timedelta(seconds=1),
attributes={"a": 5},
attributes={"a": 5, "empty": "", "null": None},
inputs={"param": {"value1": "hello"}},
)
client.server.call_start(tsi.CallStartReq(start=start))
Expand Down Expand Up @@ -3454,3 +3454,15 @@ def test_call_stream_query_heavy_query_batch(client):
for call in res:
assert call.inputs["param"]["value1"] == "helslo"
assert call.output["d"] == 5

# now try to filter by the empty attribute string
query = {
"project_id": project_id,
"query": {
"$expr": {"$eq": [{"$getField": "attributes.empty"}, {"$literal": ""}]}
},
}
res = client.server.calls_query_stream(tsi.CallsQueryReq.model_validate(query))
assert len(list(res)) == 10
for call in res:
assert call.attributes["empty"] == ""
38 changes: 38 additions & 0 deletions tests/trace_server/test_calls_query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -1259,3 +1259,41 @@ def test_calls_query_with_combined_like_optimizations_and_op_filter() -> None:
"pb_12": '%"0.8"%',
},
)


def test_calls_query_filter_by_empty_str() -> None:
cq = CallsQuery(project_id="project")
cq.add_field("id")
cq.add_field("inputs")
cq.add_condition(
tsi_query.EqOperation.model_validate(
{"$eq": [{"$getField": "inputs.param.val"}, {"$literal": ""}]}
)
)
# Empty string is not a valid value for LIKE optimization, this test is to ensure that
# the query builder uses the JSON_VALUE function in the WHERE.
assert_sql(
cq,
"""
SELECT
calls_merged.id AS id,
any(calls_merged.inputs_dump) AS inputs_dump
FROM calls_merged
WHERE
calls_merged.project_id = {pb_2:String}
AND
((JSON_VALUE(calls_merged.inputs_dump, {pb_0:String}) = {pb_1:String})
OR calls_merged.inputs_dump IS NULL)
GROUP BY (calls_merged.project_id, calls_merged.id)
HAVING (
((JSON_VALUE(any(calls_merged.inputs_dump), {pb_0:String}) = {pb_1:String}))
AND ((any(calls_merged.deleted_at) IS NULL))
AND ((NOT ((any(calls_merged.started_at) IS NULL))))
)
""",
{
"pb_0": '$."param"."val"',
"pb_1": "",
"pb_2": "project",
},
)
12 changes: 10 additions & 2 deletions weave/trace_server/calls_query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -1075,6 +1075,9 @@ def _create_like_optimized_eq_condition(

field = get_field_by_name(operation.eq_[0].get_field_).field
literal_value = operation.eq_[1].literal_
if not literal_value:
# Empty string is not a valid value for LIKE optimization
return None
like_pattern = f'%"{literal_value}"%'

return _create_like_condition(field, like_pattern, pb, table_alias)
Expand All @@ -1098,6 +1101,9 @@ def _create_like_optimized_contains_condition(

field = get_field_by_name(operation.contains_.input.get_field_).field
substr_value = operation.contains_.substr.literal_
if not substr_value:
# Empty string is not a valid value for LIKE optimization
return None
case_insensitive = operation.contains_.case_insensitive or False
like_pattern = f'%"%{substr_value}%"%'

Expand Down Expand Up @@ -1130,8 +1136,10 @@ def _create_like_optimized_in_condition(
like_conditions: list[str] = []

for value_operand in operation.in_[1]:
if not isinstance(value_operand, tsi_query.LiteralOperation) or not isinstance(
value_operand.literal_, str
if (
not isinstance(value_operand, tsi_query.LiteralOperation)
or not isinstance(value_operand.literal_, str)
or not value_operand.literal_
):
return None

Expand Down

0 comments on commit 2d6f863

Please sign in to comment.