From 5e10d91799784d80a745ea2d0ecb949c056c0bb6 Mon Sep 17 00:00:00 2001 From: Andrew Truong Date: Fri, 21 Feb 2025 17:25:06 -0500 Subject: [PATCH] feat(weave): Add option to export calls to pandas (#3737) --- .../pandas-test/test_calls_to_pandas.py | 79 +++++++++++++++++++ ...{test_pandas.py => test_dataset_pandas.py} | 0 weave/trace/weave_client.py | 35 ++++++++ 3 files changed, 114 insertions(+) create mode 100644 tests/integrations/pandas-test/test_calls_to_pandas.py rename tests/integrations/pandas-test/{test_pandas.py => test_dataset_pandas.py} (100%) diff --git a/tests/integrations/pandas-test/test_calls_to_pandas.py b/tests/integrations/pandas-test/test_calls_to_pandas.py new file mode 100644 index 000000000000..5e50c2ad8430 --- /dev/null +++ b/tests/integrations/pandas-test/test_calls_to_pandas.py @@ -0,0 +1,79 @@ +import pandas as pd +import pytest + +import weave + + +@weave.op +def func(name: str, age: int) -> str: + return f"Hello, {name}! You are {age} years old." + + +@weave.op +def raising_func(name: str, age: int) -> str: + raise ValueError("This is a test error") + + +@pytest.fixture +def logging_example(client): + func("Alice", 30) + + with weave.attributes({"tag": "test", "version": "1.0"}): + func("Bob", 25) + + try: + raising_func("Claire", 35) + except: + pass + + +def test_calls_to_pandas_basic(logging_example, client): + calls = client.get_calls() + df = calls.to_pandas() + + assert isinstance(df, pd.DataFrame) + assert len(df) == 3 # The three calls we made + + dictified = df.to_dict(orient="records") + calls_as_dicts = [c.to_dict() for c in calls] + + for d1, d2 in zip(dictified, calls_as_dicts): + assert d1 == d2 + + +def test_calls_to_pandas_with_limit(logging_example, client): + calls = client.get_calls(limit=1) + df = calls.to_pandas() + + assert isinstance(df, pd.DataFrame) + assert len(df) == 1 + + dictified = df.to_dict(orient="records") + + # Maintains insertion order + d = dictified[0] + assert d["inputs"]["name"] == "Alice" + assert d["inputs"]["age"] == 30 + + +@pytest.mark.asyncio +async def test_calls_to_pandas_with_evaluations(client): + @weave.op + def model(x: int, y: int) -> int: + return x + y + + ev = weave.Evaluation( + dataset=[ + {"x": 1, "y": 2}, + {"x": 3, "y": 4}, + {"x": 5, "y": 6}, + ] + ) + res = await ev.evaluate(model) + + calls_df = client.get_calls().to_pandas() + assert len(calls_df) == ( + 1 # evaluate + + 3 * 2 # predict and score + model + + 1 # summarize + ) diff --git a/tests/integrations/pandas-test/test_pandas.py b/tests/integrations/pandas-test/test_dataset_pandas.py similarity index 100% rename from tests/integrations/pandas-test/test_pandas.py rename to tests/integrations/pandas-test/test_dataset_pandas.py diff --git a/weave/trace/weave_client.py b/weave/trace/weave_client.py index 41bc8c3bf8d6..96204a864162 100644 --- a/weave/trace/weave_client.py +++ b/weave/trace/weave_client.py @@ -116,6 +116,8 @@ from weave.trace_server_bindings.remote_http_trace_server import RemoteHTTPTraceServer if TYPE_CHECKING: + import pandas as pd + from weave.flow.scorer import ApplyScorerResult, Scorer @@ -253,6 +255,39 @@ def __len__(self) -> int: raise TypeError("This iterator does not support len()") return self.size_func() + def to_pandas(self) -> pd.DataFrame: + """Convert the iterator's contents to a pandas DataFrame. + + Returns: + A pandas DataFrame containing all the data from the iterator. + + Example: + ```python + calls = client.get_calls() + df = calls.to_pandas() + ``` + + Note: + This method will fetch all data from the iterator, which may involve + multiple network calls. For large datasets, consider using limits + or filters to reduce the amount of data fetched. + """ + try: + import pandas as pd + except ImportError: + raise ImportError("pandas is required to use this method") + + records = [] + for item in self: + if isinstance(item, dict): + records.append(item) + elif hasattr(item, "to_dict"): + records.append(item.to_dict()) + else: + raise ValueError(f"Unable to convert item to dict: {item}") + + return pd.DataFrame(records) + # TODO: should be Call, not WeaveObject CallsIter = PaginatedIterator[CallSchema, WeaveObject]