From ba83f62089297fcb8bef2090c360ad4ec19395ae Mon Sep 17 00:00:00 2001 From: michiboo Date: Thu, 5 Oct 2023 19:42:33 +0200 Subject: [PATCH] test: [RLOS2023] Add new e2e test framework for vw (#4644) * intro notebook * test: [RLOS_2023][WIP] updated test for regression weight (#4600) * test: add test for regression weight * test: make test more reusable by using json to specify pytest * test: minor fix on naming * test: add and option to python json test * test: [RLOS_2023] test for contextual bandit (#4612) * test: add basic cb test and configuration * test: add shared context data generation * add test for cb_explore_adf * test: dynamically create pytest test case * test: give fixed reward function signature * test: [RLOS_2023] [WIP] Support + and * expression for grids (#4618) * test: add basic cb test and configuration * test: add shared context data generation * add test for cb_explore_adf * test: dynamically create pytest test case * test: give fixed reward function signature * test: support + and * expression for grids * fix empty expression bugs * test: [RLOS2023] [WIP] add more arguments for reg&cb tests (#4619) * test: add more arguments for reg&cb tests * test: fix minor bug in generate expression & add loss funcs to tests * test: [RLOS2023] [WIP] add classification test (#4623) * test: add more arguments for reg&cb tests * test: fix minor bug in generate expression & add loss funcs to tests * test: add test for classification * test: organize test framework structure (#4624) * test: [RLOS2023][WIP] add option for storing output and grid language redefinition (#4627) * test: redesign grid lang * test: add option for store output * test: change list to dict for config vars * test: [RLOS2023] add test for slate (#4629) * test: add test for slate * test: test cleanup and slate test update * test: minor cleanup and change assert_loss function to equal instead of lower * test: [RLOS2023] add test for cb with continous action (#4630) * test: add test for slate * test: test cleanup and slate test update * test: minor cleanup and change assert_loss function to equal instead of lower * test: add test for cb with continous action * modify blocker testcase * test: [RLOS2023] clean for e2e testing framework v2 (#4633) * test: clean for e2e test v2 * test:change seed to same value for all tests * test: add datagen driver (#4638) * python black * python black 2 * minor demo cleanup --------- Co-authored-by: Alexey Taymanov Co-authored-by: Alexey Taymanov <41013086+ataymano@users.noreply.github.com> --- demo/cmd_getting_started/vw_intro.ipynb | 328 ++++++++++++++++++ python/tests/e2e_v2/assert_job.py | 121 +++++++ python/tests/e2e_v2/cb/data_generation.py | 67 ++++ python/tests/e2e_v2/cb/logging_policies.py | 6 + python/tests/e2e_v2/cb/reward_functions.py | 18 + .../tests/e2e_v2/cb_cont/data_generation.py | 62 ++++ .../tests/e2e_v2/cb_cont/logging_policies.py | 6 + .../tests/e2e_v2/cb_cont/reward_functions.py | 18 + .../classification_functions.py | 19 + .../e2e_v2/classification/data_generation.py | 29 ++ python/tests/e2e_v2/conftest.py | 16 + .../e2e_v2/regression/data_generation.py | 13 + python/tests/e2e_v2/slate/action_space.py | 5 + python/tests/e2e_v2/slate/assert_job.py | 42 +++ python/tests/e2e_v2/slate/data_generation.py | 69 ++++ python/tests/e2e_v2/slate/logging_policies.py | 2 + python/tests/e2e_v2/slate/reward_functions.py | 10 + python/tests/e2e_v2/test_configs/cb.json | 270 ++++++++++++++ python/tests/e2e_v2/test_configs/cb_cont.json | 195 +++++++++++ .../e2e_v2/test_configs/classification.json | 102 ++++++ .../tests/e2e_v2/test_configs/regression.json | 107 ++++++ python/tests/e2e_v2/test_configs/slate.json | 145 ++++++++ python/tests/e2e_v2/test_core.py | 129 +++++++ python/tests/e2e_v2/test_helper.py | 140 ++++++++ 24 files changed, 1919 insertions(+) create mode 100644 demo/cmd_getting_started/vw_intro.ipynb create mode 100644 python/tests/e2e_v2/assert_job.py create mode 100644 python/tests/e2e_v2/cb/data_generation.py create mode 100644 python/tests/e2e_v2/cb/logging_policies.py create mode 100644 python/tests/e2e_v2/cb/reward_functions.py create mode 100644 python/tests/e2e_v2/cb_cont/data_generation.py create mode 100644 python/tests/e2e_v2/cb_cont/logging_policies.py create mode 100644 python/tests/e2e_v2/cb_cont/reward_functions.py create mode 100644 python/tests/e2e_v2/classification/classification_functions.py create mode 100644 python/tests/e2e_v2/classification/data_generation.py create mode 100644 python/tests/e2e_v2/conftest.py create mode 100644 python/tests/e2e_v2/regression/data_generation.py create mode 100644 python/tests/e2e_v2/slate/action_space.py create mode 100644 python/tests/e2e_v2/slate/assert_job.py create mode 100644 python/tests/e2e_v2/slate/data_generation.py create mode 100644 python/tests/e2e_v2/slate/logging_policies.py create mode 100644 python/tests/e2e_v2/slate/reward_functions.py create mode 100644 python/tests/e2e_v2/test_configs/cb.json create mode 100644 python/tests/e2e_v2/test_configs/cb_cont.json create mode 100644 python/tests/e2e_v2/test_configs/classification.json create mode 100644 python/tests/e2e_v2/test_configs/regression.json create mode 100644 python/tests/e2e_v2/test_configs/slate.json create mode 100644 python/tests/e2e_v2/test_core.py create mode 100644 python/tests/e2e_v2/test_helper.py diff --git a/demo/cmd_getting_started/vw_intro.ipynb b/demo/cmd_getting_started/vw_intro.ipynb new file mode 100644 index 00000000000..724697a475c --- /dev/null +++ b/demo/cmd_getting_started/vw_intro.ipynb @@ -0,0 +1,328 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Helpers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def read(path):\n", + " with open(path) as f:\n", + " print(\"\".join(f.readlines()))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Regression" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's generate data of the following form:
\n", + "Every example has single namespace 'f' with single feature 'x' in it
\n", + "Target function is $$\\hat{y} = 2x + 1$$\n", + "And we are learning weights $w$, $b$ for $$y=wx+b$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "with open(\"regression1.txt\", \"w\") as f:\n", + " for i in range(1000):\n", + " x = np.random.rand()\n", + " y = 2 * x + 1\n", + " f.write(f\"{y} |f x:{x}\\n\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Simplest execution" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!vw -d regression1.txt" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Output more artifacts\n", + "-p - predictions
\n", + "--invert_hash - model in readable format
\n", + "-f - model in binary format (consumable by vw)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!vw -d regression1.txt -p regression1.pred --invert_hash regression1.model.txt -f regression1.model.bin" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can look at weights and see the $w$ and $b$ got close to expected 2 and 1 values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "read(\"regression1.model.txt\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Do only predictions, no learning" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!vw -d regression1.txt -t" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!vw -d regression1.txt -t --learning_rate 10" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!vw -d regression1.txt -t -i regression1.model.bin" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Interactions" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's generate another dataset of the following form:
\n", + "Every example has single namespace 'f' with single feature 'x' in it
\n", + "Target function is $$\\hat{y} = x^2 + 1$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "with open(\"regression2.txt\", \"w\") as f:\n", + " for i in range(1000):\n", + " x = np.random.rand() * 4\n", + " y = x**2 + 1\n", + " f.write(f\"{y} |f x:{x}\\n\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can see loss being far from zero if we stil try to learn $$y=wx+b$$ " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!vw -d regression2.txt --invert_hash regression2.model.txt" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "So let's try to learn $$y=w_1 x^2 + w_2 x + b$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!vw -d regression2.txt --invert_hash regression2.model.txt --interactions ff" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "read(\"regression2.model.txt\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Contextual bandits" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "env = {\"Tom\": {\"sports\": 0, \"politics\": 1}, \"Anna\": {\"sports\": 1, \"politics\": 0}}\n", + "\n", + "users = [\"Tom\", \"Anna\"]\n", + "content = [\"sports\", \"politics\"]\n", + "\n", + "with open(\"cb.txt\", \"w\") as f:\n", + " for i in range(1000):\n", + " user = users[np.random.randint(0, 2)]\n", + " chosen = np.random.randint(0, 2)\n", + " reward = env[user][content[chosen]]\n", + "\n", + " f.write(f\"shared |u {user}\\n\")\n", + " f.write(f\"0:{-reward}:0.5 |a {content[chosen]}\\n\")\n", + " f.write(f\"|a {content[(chosen + 1) % 2]}\\n\\n\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's try to learn to predict reward in the following form: $$r = w_1 I(user\\ is\\ Tom) + w_2 I(user\\ is\\ Anna) + w_3 I(topic\\ is\\ sports) + w_4 I(topic\\ is\\ politics) + b$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!vw --cb_explore_adf -d cb.txt --invert_hash cb.model.txt" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can see that average reward is still around 0.5 which is the same as we would get answering randomly. This is expected since personalization is not captured in this form.\n", + "Let's add interaction between 'u' and 'a' namespaces and try to learn function of the following form:\n", + "$$\\begin{aligned}r = w_1 I(user\\ is\\ Tom) I(topic\\ is\\ sports) + w_2 I(user\\ is\\ Tom) I(topic\\ is\\ politics) +\\\\+ w_3 I(user\\ is\\ Anna) I(topic\\ is\\ sports) + w_4 I(user\\ is\\ Anna) I(topic\\ is\\ politics) +\\\\+ w_5 I(user\\ is\\ Tom) + w_6 I(user\\ is\\ Anna) +\\\\+ w_7 I(topic\\ is\\ sports) + w_8 I(topic\\ is\\ politics) + b\\end{aligned}$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!vw --cb_explore_adf -d cb.txt --invert_hash cb.model.txt --interactions ua" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "read(\"cb.model.txt\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/python/tests/e2e_v2/assert_job.py b/python/tests/e2e_v2/assert_job.py new file mode 100644 index 00000000000..4acc1389ec3 --- /dev/null +++ b/python/tests/e2e_v2/assert_job.py @@ -0,0 +1,121 @@ +import numpy as np +import os +from numpy.testing import assert_allclose, assert_almost_equal +from vw_executor.vw import ExecutionStatus +import vowpalwabbit as vw +from test_helper import get_function_object, datagen_driver + + +def remove_non_digits(string): + return "".join(char for char in string if char.isdigit() or char == ".") + + +def get_from_kwargs(kwargs, key, default=None): + if key in kwargs: + return kwargs[key] + else: + return default + + +def majority_close(arr1, arr2, rtol, atol, threshold): + # Check if the majority of elements are close + close_count = np.count_nonzero(np.isclose(arr1, arr2, rtol=rtol, atol=atol)) + return close_count >= len(arr1) * threshold + + +def assert_weight(job, **kwargs): + atol = get_from_kwargs(kwargs, "atol", 10e-8) + rtol = get_from_kwargs(kwargs, "rtol", 10e-5) + expected_weights = kwargs["expected_weights"] + assert job.status == ExecutionStatus.Success, f"{job.opts} job should be successful" + data = job.outputs["--invert_hash"] + with open(data[0], "r") as f: + data = f.readlines() + data = [i.strip() for i in data] + weights = job[0].model9("--invert_hash").weights.to_dict()["weight"] + for x in expected_weights: + assert_allclose( + [weights[x]], [expected_weights[x]], atol=atol, rtol=rtol + ), f"weights {x} should be {expected_weights[x]}" + + +def assert_prediction(job, **kwargs): + assert job.status == ExecutionStatus.Success, "job should be successful" + atol = kwargs.get("atol", 10e-8) + rtol = kwargs.get("rtol", 10e-5) + threshold = kwargs.get("threshold", 0.9) + expected_value = kwargs["expected_value"] + predictions = job.outputs["-p"] + with open(predictions[0], "r") as f: + prediction = [i.strip() for i in f.readlines()] + prediction = [i for i in prediction if i != ""] + if ":" in prediction[0]: + prediction = [[j.split(":")[1] for j in i.split(",")] for i in prediction] + elif "," in prediction[0]: + prediction = [[j for j in i.split(",")] for i in prediction] + if type(prediction[0]) == list: + prediction = [[float(remove_non_digits(j)) for j in i] for i in prediction] + else: + prediction = [float(remove_non_digits(i)) for i in prediction] + assert majority_close( + prediction, + [expected_value] * len(prediction), + rtol=rtol, + atol=atol, + threshold=threshold, + ), f"predicted value should be {expected_value}, \n actual values are {prediction}" + + +def assert_loss(job, **kwargs): + assert job.status == ExecutionStatus.Success, "job should be successful" + assert type(job[0].loss) == float, "loss should be an float" + decimal = kwargs.get("decimal", 2) + assert_almost_equal(job[0].loss, kwargs["expected_loss"], decimal=decimal) + + +def assert_loss_below(job, **kwargs): + assert job.status == ExecutionStatus.Success, "job should be successful" + assert type(job[0].loss) == float, "loss should be an float" + assert ( + job[0].loss <= kwargs["expected_loss"] + ), f"loss should be below {kwargs['expected_loss']}" + + +def assert_prediction_with_generated_data(job, **kwargs): + assert job.status == ExecutionStatus.Success, "job should be successful" + expected_class = [] + trained_model = vw.Workspace(f"-i {job[0].model9('-f').path} --quiet") + predictions = [] + folder_path = os.path.dirname(os.path.realpath(__file__)) + subdirectories = [ + os.path.join(folder_path, name) + for name in os.listdir(folder_path) + if os.path.isdir(os.path.join(folder_path, name)) + ] + for subdir in subdirectories: + try: + subdir_name = subdir.replace("\\", "/").split("/")[-1] + data_func_obj = get_function_object( + f"{subdir_name}.data_generation", kwargs["data_func"]["name"] + ) + if data_func_obj: + break + except: + pass + script_directory = os.path.dirname(os.path.realpath(__file__)) + dataFile = datagen_driver( + os.path.join(script_directory, subdir_name), + data_func_obj, + **kwargs["data_func"]["params"], + ) + with open(dataFile, "r") as f: + for line in f.readlines(): + expected_class.append(line.split("|")[0].strip()) + predicted_class = trained_model.predict(line.strip()) + predictions.append(predicted_class) + accuracy = sum( + [1 if int(yp) == int(ye) else 0 for yp, ye in zip(predictions, expected_class)] + ) / len(expected_class) + assert ( + accuracy >= kwargs["accuracy_threshold"] + ), f"Accuracy is {accuracy} and Threshold is {kwargs['accuracy_threshold']}" diff --git a/python/tests/e2e_v2/cb/data_generation.py b/python/tests/e2e_v2/cb/data_generation.py new file mode 100644 index 00000000000..d7c9bbb702f --- /dev/null +++ b/python/tests/e2e_v2/cb/data_generation.py @@ -0,0 +1,67 @@ +import random +import os +from test_helper import get_function_object + +script_directory = os.path.dirname(os.path.realpath(__file__)) + + +def random_number_items(items): + num_items_to_select = random.randint(1, len(items)) + return random.sample(items, num_items_to_select) + + +def generate_cb_data( + f, + num_examples, + num_features, + num_actions, + reward_function, + logging_policy, + context_name=["1"], + seed=random.randint(0, 100), +): + random.seed(seed) + + reward_function_obj = get_function_object( + "cb.reward_functions", reward_function["name"] + ) + logging_policy_obj = get_function_object( + "cb.logging_policies", logging_policy["name"] + ) + features = [f"feature{index}" for index in range(1, num_features + 1)] + for _ in range(num_examples): + no_context = len(context_name) + if no_context > 1: + context = random.randint(1, no_context) + else: + context = 1 + + def return_cost_probability(chosen_action, context=1): + cost = -reward_function_obj( + chosen_action, context, **reward_function["params"] + ) + if "params" not in logging_policy: + logging_policy["params"] = {} + logging_policy["params"]["chosen_action"] = chosen_action + logging_policy["params"]["num_actions"] = num_actions + probability = logging_policy_obj(**logging_policy["params"]) + return cost, probability + + chosen_action = random.randint(1, num_actions) + if no_context > 1: + f.write(f"shared | User s_{context_name[context-1]}\n") + for action in range(1, num_actions + 1): + cost, probability = return_cost_probability(action, context) + if action == chosen_action: + f.write( + f'{action}:{cost}:{probability} | {" ".join(random_number_items(features))}\n' + ) + else: + f.write(f'| {" ".join(random_number_items(features))}\n') + + else: + cost, probability = return_cost_probability(chosen_action) + f.write( + f'{chosen_action}:{cost}:{probability} | {" ".join(random_number_items(features))}\n' + ) + f.write("\n") diff --git a/python/tests/e2e_v2/cb/logging_policies.py b/python/tests/e2e_v2/cb/logging_policies.py new file mode 100644 index 00000000000..7f6f951fffa --- /dev/null +++ b/python/tests/e2e_v2/cb/logging_policies.py @@ -0,0 +1,6 @@ +def constant_probability(chosen_action): + return 1 + + +def even_probability(chosen_action, num_actions): + return round(1 / num_actions, 2) diff --git a/python/tests/e2e_v2/cb/reward_functions.py b/python/tests/e2e_v2/cb/reward_functions.py new file mode 100644 index 00000000000..a13a699d0a4 --- /dev/null +++ b/python/tests/e2e_v2/cb/reward_functions.py @@ -0,0 +1,18 @@ +def fixed_reward(chosen_action, context): + return 1 + + +def constant_reward(chosen_action, context, reward): + return reward[chosen_action - 1] + + +def fixed_reward_two_action(chosen_action, context): + if context == 1 and chosen_action == 2: + return 1 + elif context == 2 and chosen_action == 2: + return 0 + elif context == 1 and chosen_action == 1: + return 0 + elif context == 2 and chosen_action == 1: + return 1 + return 1 diff --git a/python/tests/e2e_v2/cb_cont/data_generation.py b/python/tests/e2e_v2/cb_cont/data_generation.py new file mode 100644 index 00000000000..df2fa0de536 --- /dev/null +++ b/python/tests/e2e_v2/cb_cont/data_generation.py @@ -0,0 +1,62 @@ +import random +import os +from test_helper import get_function_object + +script_directory = os.path.dirname(os.path.realpath(__file__)) + + +def random_number_items(items): + num_items_to_select = random.randint(1, len(items)) + return random.sample(items, num_items_to_select) + + +def generate_cb_data( + f, + num_examples, + num_features, + action_range, + reward_function, + logging_policy, + context_name=["1"], + seed=random.randint(0, 100), +): + random.seed(seed) + num_actions = int(abs(action_range[1] - action_range[0])) + + reward_function_obj = get_function_object( + "cb_cont.reward_functions", reward_function["name"] + ) + logging_policy_obj = get_function_object( + "cb_cont.logging_policies", logging_policy["name"] + ) + features = [f"feature{index}" for index in range(1, num_features + 1)] + + for _ in range(num_examples): + no_context = len(context_name) + if no_context > 1: + context = random.randint(1, no_context) + else: + context = 1 + + def return_cost_probability(chosen_action, context): + cost = -reward_function_obj( + chosen_action, context, **reward_function["params"] + ) + if "params" not in logging_policy: + logging_policy["params"] = {} + logging_policy["params"]["chosen_action"] = chosen_action + logging_policy["params"]["num_actions"] = num_actions + probability = logging_policy_obj(**logging_policy["params"]) + return cost, probability + + chosen_action = round(random.uniform(0, num_actions), 2) + cost, probability = return_cost_probability(chosen_action, context) + if no_context == 1: + f.write( + f'ca {chosen_action}:{cost}:{probability} | {" ".join(random_number_items(features))}\n' + ) + else: + f.write( + f'ca {chosen_action}:{cost}:{probability} | {"s_" + context_name[context-1]} {" ".join(random_number_items(features))}\n' + ) + f.write("\n") diff --git a/python/tests/e2e_v2/cb_cont/logging_policies.py b/python/tests/e2e_v2/cb_cont/logging_policies.py new file mode 100644 index 00000000000..7f6f951fffa --- /dev/null +++ b/python/tests/e2e_v2/cb_cont/logging_policies.py @@ -0,0 +1,6 @@ +def constant_probability(chosen_action): + return 1 + + +def even_probability(chosen_action, num_actions): + return round(1 / num_actions, 2) diff --git a/python/tests/e2e_v2/cb_cont/reward_functions.py b/python/tests/e2e_v2/cb_cont/reward_functions.py new file mode 100644 index 00000000000..c865e666084 --- /dev/null +++ b/python/tests/e2e_v2/cb_cont/reward_functions.py @@ -0,0 +1,18 @@ +def fixed_reward(chosen_action, context): + return 1 + + +def piecewise_constant(chosen_action, context, reward): + return reward[int(chosen_action) - 1] + + +def fixed_reward_two_action(chosen_action, context): + if context == 1 and chosen_action >= 2: + return 1 + elif context == 2 and chosen_action < 2 and chosen_action >= 1: + return 0 + elif context == 1 and chosen_action < 1 and chosen_action >= 1: + return 0 + elif context == 2 and chosen_action < 1: + return 1 + return 1 diff --git a/python/tests/e2e_v2/classification/classification_functions.py b/python/tests/e2e_v2/classification/classification_functions.py new file mode 100644 index 00000000000..67aeb8a77d2 --- /dev/null +++ b/python/tests/e2e_v2/classification/classification_functions.py @@ -0,0 +1,19 @@ +def binary_classification_one_feature(input_vector): + if input_vector[0] > 0.5: + return 2 + return 1 + + +def multi_classification_two_features(input_vector): + # Define the number of divisions for each feature + divisions = 5 + + # Calculate the division size for each feature + division_size = 1 / divisions + + # Calculate the class index based on the input vector's position in the feature space + class_idx = int(input_vector[0] // division_size) * divisions + int( + input_vector[1] // division_size + ) + + return class_idx + 1 diff --git a/python/tests/e2e_v2/classification/data_generation.py b/python/tests/e2e_v2/classification/data_generation.py new file mode 100644 index 00000000000..c65e49b3350 --- /dev/null +++ b/python/tests/e2e_v2/classification/data_generation.py @@ -0,0 +1,29 @@ +import os, random +from test_helper import get_function_object + + +script_directory = os.path.dirname(os.path.realpath(__file__)) +random.seed(10) + + +def generate_classification_data( + f, + num_example, + num_features, + classify_func, + seed=random.randint(0, 100), + bounds=None, +): + random.seed(seed) + classify_func_obj = get_function_object( + "classification.classification_functions", classify_func["name"] + ) + if not bounds: + bounds = [[0, 1] for _ in range(num_features)] + for _ in range(num_example): + x = [ + random.uniform(bounds[index][0], bounds[index][1]) + for index in range(num_features) + ] + y = classify_func_obj(x, **classify_func["params"]) + f.write(f"{y} |f {' '.join([f'x{i}:{x[i]}' for i in range(num_features)])}\n") diff --git a/python/tests/e2e_v2/conftest.py b/python/tests/e2e_v2/conftest.py new file mode 100644 index 00000000000..5297e54cec2 --- /dev/null +++ b/python/tests/e2e_v2/conftest.py @@ -0,0 +1,16 @@ +# conftest.py +def pytest_addoption(parser): + parser.addoption( + "--store_output", + action="store", + default=False, + help="Store output file for tests.", + ) + + +def pytest_configure(config): + _store_output = config.getoption("--store_output") + # Store the custom_arg_value in a global variable or a custom configuration object. + # For example, you can store it in a global variable like this: + global STORE_OUTPUT + STORE_OUTPUT = _store_output diff --git a/python/tests/e2e_v2/regression/data_generation.py b/python/tests/e2e_v2/regression/data_generation.py new file mode 100644 index 00000000000..cf6e8bca04c --- /dev/null +++ b/python/tests/e2e_v2/regression/data_generation.py @@ -0,0 +1,13 @@ +import random +import os + +script_directory = os.path.dirname(os.path.realpath(__file__)) + + +def constant_function( + f, no_sample, constant, x_lower_bound, x_upper_bound, seed=random.randint(0, 100) +): + random.seed(seed) + for _ in range(no_sample): + x = random.uniform(x_lower_bound, x_upper_bound) + f.write(f"{constant} |f x:{x}\n") diff --git a/python/tests/e2e_v2/slate/action_space.py b/python/tests/e2e_v2/slate/action_space.py new file mode 100644 index 00000000000..bd13b796357 --- /dev/null +++ b/python/tests/e2e_v2/slate/action_space.py @@ -0,0 +1,5 @@ +def new_action_after_threshold(iteration, threshold, before, after): + # before iteration 500, it is sunny and after it is raining + if iteration > threshold: + return after + return before diff --git a/python/tests/e2e_v2/slate/assert_job.py b/python/tests/e2e_v2/slate/assert_job.py new file mode 100644 index 00000000000..aff9061df6e --- /dev/null +++ b/python/tests/e2e_v2/slate/assert_job.py @@ -0,0 +1,42 @@ +from vw_executor.vw import ExecutionStatus +import numpy as np + + +def majority_close(arr1, arr2, rtol, atol, threshold): + # Check if the majority of elements are close + close_count = np.count_nonzero(np.isclose(arr1, arr2, rtol=rtol, atol=atol)) + return close_count >= len(arr1) * threshold + + +def assert_prediction(job, **kwargs): + assert job.status == ExecutionStatus.Success, "job should be successful" + atol = kwargs.get("atol", 10e-8) + rtol = kwargs.get("rtol", 10e-5) + threshold = kwargs.get("threshold", 0.9) + expected_value = kwargs["expected_value"] + predictions = job.outputs["-p"] + res = [] + with open(predictions[0], "r") as f: + exampleRes = [] + while True: + line = f.readline() + if not line: + break + if line.count(":") == 0: + res.append(exampleRes) + exampleRes = [] + continue + slotRes = [0] * line.count(":") + slot = line.split(",") + for i in range(len(slot)): + actionInd = int(slot[i].split(":")[0]) + slotRes[i] = float(slot[actionInd].split(":")[1]) + exampleRes.append(slotRes) + + assert majority_close( + res, + [expected_value] * len(res), + rtol=rtol, + atol=atol, + threshold=threshold, + ), f"predicted value should be {expected_value}, \n actual values are {res}" diff --git a/python/tests/e2e_v2/slate/data_generation.py b/python/tests/e2e_v2/slate/data_generation.py new file mode 100644 index 00000000000..d308b245d3d --- /dev/null +++ b/python/tests/e2e_v2/slate/data_generation.py @@ -0,0 +1,69 @@ +import random +import os +from test_helper import get_function_object + +script_directory = os.path.dirname(os.path.realpath(__file__)) + + +def generate_slate_data( + f, + num_examples, + reward_function, + logging_policy, + action_space, + context_name=["1"], + seed=random.randint(0, 100), +): + random.seed(seed) + action_space_obj = get_function_object("slate.action_space", action_space["name"]) + + reward_function_obj = get_function_object( + "slate.reward_functions", reward_function["name"] + ) + logging_policy_obj = get_function_object( + "slate.logging_policies", logging_policy["name"] + ) + + def return_cost_probability(chosen_action, chosen_slot, context): + cost = -reward_function_obj( + chosen_action, context, chosen_slot, **reward_function["params"] + ) + logging_policy["params"]["num_action"] = num_actions[chosen_slot - 1] + logging_policy["params"]["chosen_action"] = chosen_action + probability = logging_policy_obj(**logging_policy["params"]) + return cost, probability + + for i in range(num_examples): + action_space["params"]["iteration"] = i + action_spaces = action_space_obj(**action_space["params"]) + reward_function["params"]["iteration"] = i + num_slots = len(action_spaces) + num_actions = [len(slot) for slot in action_spaces] + slot_name = [f"slot_{index}" for index in range(1, num_slots + 1)] + chosen_actions = [] + num_context = len(context_name) + if num_context > 1: + context = random.randint(1, num_context) + else: + context = 1 + for s in range(num_slots): + chosen_actions.append(random.randint(1, num_actions[s])) + chosen_actions_cost_prob = [ + return_cost_probability(action, slot + 1, context) + for slot, action in enumerate(chosen_actions) + ] + total_cost = sum([cost for cost, _ in chosen_actions_cost_prob]) + + f.write(f"slates shared {total_cost} |User {context_name[context-1]}\n") + # write actions + for ind, slot in enumerate(action_spaces): + for a in slot: + f.write( + f"slates action {ind} |Action {a}\n", + ) + + for s in range(num_slots): + f.write( + f"slates slot {chosen_actions[s]}:{chosen_actions_cost_prob[s][1]} |Slot {slot_name[s]}\n" + ) + f.write("\n") diff --git a/python/tests/e2e_v2/slate/logging_policies.py b/python/tests/e2e_v2/slate/logging_policies.py new file mode 100644 index 00000000000..4222a514b1f --- /dev/null +++ b/python/tests/e2e_v2/slate/logging_policies.py @@ -0,0 +1,2 @@ +def even_probability(chosen_action, num_action): + return round(1 / num_action, 2) diff --git a/python/tests/e2e_v2/slate/reward_functions.py b/python/tests/e2e_v2/slate/reward_functions.py new file mode 100644 index 00000000000..2dc79a5100f --- /dev/null +++ b/python/tests/e2e_v2/slate/reward_functions.py @@ -0,0 +1,10 @@ +def fixed_reward(chosen_action, context, slot, reward): + return reward[slot - 1][chosen_action - 1] + + +def reverse_reward_after_threshold( + chosen_action, context, slot, reward, iteration, threshold +): + if iteration > threshold: + reward = [i[::-1] for i in reward] + return reward[slot - 1][chosen_action - 1] diff --git a/python/tests/e2e_v2/test_configs/cb.json b/python/tests/e2e_v2/test_configs/cb.json new file mode 100644 index 00000000000..15fa7584fe2 --- /dev/null +++ b/python/tests/e2e_v2/test_configs/cb.json @@ -0,0 +1,270 @@ +[ + { + "test_name": "cb_two_action", + "data_func": { + "name": "generate_cb_data", + "params": { + "num_examples": 100, + "num_features": 1, + "num_actions": 2, + "seed": 1, + "reward_function": { + "name": "constant_reward", + "params": { + "reward": [ + 1, + 0 + ] + } + }, + "logging_policy": { + "name": "even_probability", + "params": {} + }, + "context_name": [ + "1", + "2" + ] + } + }, + "assert_functions": [ + { + "name": "assert_loss", + "params": { + "expected_loss": -1, + "decimal": 1 + } + }, + { + "name": "assert_prediction", + "params": { + "expected_value": [ + 1, + 0 + ], + "threshold": 0.8 + } + } + ], + "grids": { + "cb": { + "#base": [ + "--cb_explore 2" + ] + }, + "epsilon": { + "--epsilon": [ + 0.1, + 0.2, + 0.3 + ] + }, + "first": { + "--first": [ + 1, + 2 + ] + }, + "bag": { + "--bag": [ + 5, + 6, + 7 + ] + }, + "cover": { + "--cover": [ + 1, + 2, + 3 + ] + }, + "squarecb": { + "--squarecb": [ + "--gamma_scale 1000", + "--gamma_scale 10000" + ] + }, + "synthcover": { + "--synthcover": [ + "" + ] + }, + "regcb": { + "--regcb": [ + "" + ] + }, + "softmax": { + "--softmax": [ + "" + ] + } + }, + "grids_expression": "cb * (epsilon + first + bag + cover + squarecb + synthcover + regcb + softmax)", + "output": [ + "--readable_model", + "-p" + ] + }, + { + "test_name": "cb_one_action", + "data_func": { + "name": "generate_cb_data", + "params": { + "num_examples": 100, + "num_features": 1, + "num_actions": 1, + "seed": 1, + "reward_function": { + "name": "fixed_reward", + "params": {} + }, + "logging_policy": { + "name": "even_probability" + } + } + }, + "assert_functions": [ + { + "name": "assert_loss", + "params": { + "expected_loss": -1, + "decimal": 1 + } + }, + { + "name": "assert_prediction", + "params": { + "expected_value": 0, + "threshold": 0.1 + } + } + ], + "grids": { + "g0": { + "#base": [ + "--cb 1 --preserve_performance_counters --save_resume" + ] + }, + "g1": { + "--cb_type": [ + "ips", + "mtr", + "dr", + "dm" + ] + } + }, + "grids_expression": "g0 * g1", + "output": [ + "--readable_model", + "-p" + ] + }, + { + "test_name": "cb_two_action_diff_context", + "data_func": { + "name": "generate_cb_data", + "params": { + "num_examples": 100, + "num_features": 2, + "num_actions": 2, + "seed": 1, + "reward_function": { + "name": "fixed_reward_two_action", + "params": {} + }, + "logging_policy": { + "name": "even_probability", + "params": {} + }, + "context_name": [ + "1", + "2" + ] + } + }, + "assert_functions": [ + { + "name": "assert_loss", + "params": { + "expected_loss": -0.5, + "decimal": 1 + } + }, + { + "name": "assert_prediction", + "params": { + "expected_value": [ + 0.975, + 0.025 + ], + "threshold": 0.1, + "atol": 0.1, + "rtol": 0.1 + } + } + ], + "grids": { + "cb": { + "#base": [ + "--cb_explore_adf" + ] + }, + "epsilon": { + "--epsilon": [ + 0.1, + 0.2, + 0.3 + ] + }, + "first": { + "--first": [ + 1, + 2 + ] + }, + "bag": { + "--bag": [ + 5, + 6, + 7 + ] + }, + "cover": { + "--cover": [ + 1, + 2, + 3 + ] + }, + "squarecb": { + "--squarecb": [ + "--gamma_scale 1000", + "--gamma_scale 10000" + ] + }, + "synthcover": { + "--synthcover": [ + "" + ] + }, + "regcb": { + "--regcb": [ + "" + ] + }, + "softmax": { + "--softmax": [ + "" + ] + } + }, + "grids_expression": "cb * (epsilon + first + bag + cover + squarecb + synthcover + regcb + softmax)", + "output": [ + "--readable_model", + "-p" + ] + } +] \ No newline at end of file diff --git a/python/tests/e2e_v2/test_configs/cb_cont.json b/python/tests/e2e_v2/test_configs/cb_cont.json new file mode 100644 index 00000000000..1a0d6e49066 --- /dev/null +++ b/python/tests/e2e_v2/test_configs/cb_cont.json @@ -0,0 +1,195 @@ +[ + { + "test_name": "cb_two_action", + "data_func": { + "name": "generate_cb_data", + "params": { + "num_examples": 100, + "num_features": 1, + "seed": 1, + "action_range": [ + 0, + 2 + ], + "reward_function": { + "name": "piecewise_constant", + "params": { + "reward": [ + 1, + 0 + ] + } + }, + "logging_policy": { + "name": "even_probability", + "params": {} + } + } + }, + "assert_functions": [ + { + "name": "assert_loss", + "params": { + "expected_loss": -1, + "decimal": 1 + } + }, + { + "name": "assert_prediction", + "params": { + "expected_value": [ + 1, + 0 + ], + "threshold": 0.8 + } + } + ], + "grids": { + "cb": { + "#base": [ + "--cats 2 --min_value 0 --max_value 2 --bandwidth 1" + ] + }, + "epsilon": { + "--epsilon": [ + 0.1, + 0.2, + 0.3 + ] + } + }, + "grids_expression": "cb * (epsilon)", + "output": [ + "--readable_model", + "-p" + ] + }, + { + "test_name": "cb_two_action_diff_context", + "data_func": { + "name": "generate_cb_data", + "params": { + "num_examples": 100, + "num_features": 2, + "seed": 1, + "action_range": [ + 0, + 2 + ], + "reward_function": { + "name": "fixed_reward_two_action", + "params": {} + }, + "logging_policy": { + "name": "even_probability", + "params": {} + }, + "context_name": [ + "1", + "2" + ] + } + }, + "assert_functions": [ + { + "name": "assert_loss", + "params": { + "expected_loss": -0.8, + "decimal": 1 + } + }, + { + "name": "assert_prediction", + "params": { + "expected_value": [ + 0.975, + 0.025 + ], + "threshold": 0.1, + "atol": 0.1, + "rtol": 0.1 + } + } + ], + "grids": { + "cb": { + "#base": [ + "--cats 2 --min_value 0 --max_value 2 --bandwidth 1" + ] + }, + "epsilon": { + "--epsilon": [ + 0.1, + 0.2, + 0.3 + ] + } + }, + "grids_expression": "cb * (epsilon)", + "output": [ + "--readable_model", + "-p" + ] + }, + { + "test_name": "cb_one_action", + "data_func": { + "name": "generate_cb_data", + "params": { + "num_examples": 10, + "num_features": 1, + "seed": 1, + "action_range": [ + 0, + 1 + ], + "reward_function": { + "name": "fixed_reward", + "params": {} + }, + "logging_policy": { + "name": "even_probability" + } + } + }, + "assert_functions": [ + { + "name": "assert_loss", + "params": { + "expected_loss": -1 + } + }, + { + "name": "assert_prediction", + "params": { + "expected_value": [ + 0, + 1 + ], + "threshold": 0.1 + } + } + ], + "grids": { + "g0": { + "#base": [ + "--cats 2 --min_value 0 --max_value 1 --bandwidth 1" + ] + }, + "g1": { + "--cb_type": [ + "ips", + "mtr", + "dr", + "dm" + ] + } + }, + "grids_expression": "g0 * g1", + "output": [ + "--readable_model", + "-p" + ] + } +] \ No newline at end of file diff --git a/python/tests/e2e_v2/test_configs/classification.json b/python/tests/e2e_v2/test_configs/classification.json new file mode 100644 index 00000000000..738e1981eda --- /dev/null +++ b/python/tests/e2e_v2/test_configs/classification.json @@ -0,0 +1,102 @@ +[ + { + "test_name": "binary_class", + "data_func": { + "name": "generate_classification_data", + "params": { + "num_example": 2000, + "num_features": 1, + "seed": 1, + "classify_func": { + "name": "binary_classification_one_feature", + "params": {} + }, + "bounds": [ + [ + 0, + 1 + ] + ] + } + }, + "assert_functions": [ + { + "name": "assert_prediction_with_generated_data", + "params": { + "data_func": { + "name": "generate_classification_data", + "params": { + "num_example": 100, + "num_features": 1, + "classify_func": { + "name": "binary_classification_one_feature", + "params": {} + }, + "seed": 1 + } + }, + "accuracy_threshold": 0.9 + } + } + ], + "grids": { + "g0": { + "#base": [ + "--oaa 3" + ] + } + }, + "grids_expression": "g0", + "output": [ + "--readable_model", + "-p" + ] + }, + { + "test_name": "multiclass_two_features", + "data_func": { + "name": "generate_classification_data", + "params": { + "num_example": 100000, + "num_features": 2, + "classify_func": { + "name": "multi_classification_two_features", + "params": {} + }, + "seed": 1 + } + }, + "assert_functions": [ + { + "name": "assert_prediction_with_generated_data", + "params": { + "data_func": { + "name": "generate_classification_data", + "params": { + "num_example": 500, + "num_features": 2, + "seed": 1, + "classify_func": { + "name": "multi_classification_two_features", + "params": {} + } + } + }, + "accuracy_threshold": 0.5 + } + } + ], + "grids": { + "g0": { + "#base": [ + "--oaa 25" + ] + } + }, + "grids_expression": "g0", + "output": [ + "--readable_model", + "-p" + ] + } +] \ No newline at end of file diff --git a/python/tests/e2e_v2/test_configs/regression.json b/python/tests/e2e_v2/test_configs/regression.json new file mode 100644 index 00000000000..ae4a5bcc973 --- /dev/null +++ b/python/tests/e2e_v2/test_configs/regression.json @@ -0,0 +1,107 @@ +[ + { + "data_func": { + "name": "constant_function", + "params": { + "no_sample": 2000, + "constant": 5, + "x_lower_bound": 1, + "x_upper_bound": 100, + "seed" : 1 + } + }, + "assert_functions": [ + { + "name": "assert_prediction", + "params": { + "expected_value": [ + 5 + ], + "threshold": 0.8 + } + }, + { + "name": "assert_weight", + "params": { + "expected_weights": { + "f^x": 0, + "Constant": 5 + }, + "atol": 0.1, + "rtol": 1 + } + } + ], + "grids": { + "g0": { + "#base": [ + "-P 50000 --preserve_performance_counters --save_resume" + ] + }, + "g1": { + "--learning_rate": [ + null, + 0.1, + 0.01, + 0.001 + ], + "--decay_learning_rate": [ + null, + 1.1, + 1, + 0.9 + ], + "--power_t": [ + null, + 0.5, + 0.6, + 0.4 + ] + }, + "g2": { + "#reg": [ + "--freegrad", + "--conjugate_gradient", + "--bfgs --passes 1 --cache" + ] + }, + "g3": { + "#reg": [ + "--ftrl", + "--coin", + "--pistol" + ], + "--ftrl_alpha": [ + null, + 0.1 + ], + "--ftrl_beta": [ + null, + 0.1 + ] + }, + "g4": { + "--loss_function": [ + null, + "poisson", + "quantile" + ] + }, + "g5": { + "--loss_function": [ + "expectile" + ], + "--expectile_q": [ + 0.25, + 0.5 + ] + } + }, + "grids_expression": "g0 * (g1 + g2 + g3) * (g5 + g4)", + "output": [ + "--readable_model", + "--invert_hash", + "-p" + ] + } +] \ No newline at end of file diff --git a/python/tests/e2e_v2/test_configs/slate.json b/python/tests/e2e_v2/test_configs/slate.json new file mode 100644 index 00000000000..670b2992f40 --- /dev/null +++ b/python/tests/e2e_v2/test_configs/slate.json @@ -0,0 +1,145 @@ +[ + { + "test_name": "slates", + "data_func": { + "name": "generate_slate_data", + "params": { + "num_examples": 1000, + "seed" : 1, + "reward_function": { + "name": "reverse_reward_after_threshold", + "params": { + "reward": [ + [ + 1, + 0 + ], + [ + 0, + 1 + ] + ], + "threshold": 500 + } + }, + "logging_policy": { + "name": "even_probability", + "params": {} + }, + "action_space": { + "name": "new_action_after_threshold", + "params": { + "threshold": 500, + "before": [ + [ + "longshirt", + "tshirt" + ], + [ + "shorts", + "jeans" + ] + ], + "after": [ + [ + "rainshirt", + "buttonupshirt" + ], + [ + "formalpants", + "rainpants" + ] + ] + } + } + } + }, + "assert_functions": [ + { + "name": "assert_loss", + "params": { + "expected_loss": -1.9, + "decimal": 0.1 + } + }, + { + "name": "assert_prediction", + "params": { + "expected_value": [ + [ + 0.1, + 0.9 + ], + [ + 0.9, + 0.1 + ] + ], + "threshold": 0.8, + "atol": 0.01, + "rtol": 0.01 + } + } + ], + "grids": { + "slate": { + "#base": [ + "--slates" + ] + }, + "epsilon": { + "--epsilon": [ + 0.1, + 0.2, + 0.3 + ] + }, + "first": { + "--first": [ + 1, + 2 + ] + }, + "bag": { + "--bag": [ + 5, + 6, + 7 + ] + }, + "cover": { + "--cover": [ + 1, + 2, + 3 + ] + }, + "squarecb": { + "--squarecb": [ + "--gamma_scale 1000", + "--gamma_scale 10000" + ] + }, + "synthcover": { + "--synthcover": [ + "" + ] + }, + "regcb": { + "--regcb": [ + "" + ] + }, + "softmax": { + "--softmax": [ + "" + ] + } + }, + "grids_expression": "slate * (epsilon + first + bag + cover + squarecb + synthcover + regcb + softmax)", + "output": [ + "--readable_model", + "-p" + ] + } +] \ No newline at end of file diff --git a/python/tests/e2e_v2/test_core.py b/python/tests/e2e_v2/test_core.py new file mode 100644 index 00000000000..26d071de838 --- /dev/null +++ b/python/tests/e2e_v2/test_core.py @@ -0,0 +1,129 @@ +from vw_executor.vw import Vw +from vw_executor.vw_opts import Grid +import pytest +import os +import logging +from test_helper import ( + json_to_dict_list, + evaluate_expression, + copy_file, + custom_sort, + get_function_obj_with_dirs, + datagen_driver, +) +from conftest import STORE_OUTPUT + +CURR_DICT = os.path.dirname(os.path.abspath(__file__)) +TEST_CONFIG_FILES_NAME = os.listdir(os.path.join(CURR_DICT, "test_configs")) +TEST_CONFIG_FILES = [json_to_dict_list(i) for i in TEST_CONFIG_FILES_NAME] +GENERATED_TEST_CASES = [] +logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO +) + + +def cleanup_data_file(): + script_directory = os.path.dirname(os.path.realpath(__file__)) + # List all files in the directory + for name in TEST_CONFIG_FILES_NAME: + name = name.split(".")[0] + try: + files = os.listdir(os.path.join(script_directory, name)) + except: + return + # Iterate over the files and remove the ones with .txt extension + for file in files: + if file.endswith(".txt"): + file_path = os.path.join(script_directory + "/" + name, file) + os.remove(file_path) + + +@pytest.fixture +def test_descriptions(request): + resource = request.param + yield resource + cleanup_data_file() + + +def core_test(files, grid, outputs, job_assert, job_assert_args): + vw = Vw(CURR_DICT + "/.vw_cache", reset=True, handler=None) + result = vw.train(files, grid, outputs) + for j in result: + test_name = ( + job_assert.__name__ + + "_" + + "_".join("".join([i for i in str(j.opts) if i != "-"]).split(" ")) + ) + GENERATED_TEST_CASES.append( + [lambda: job_assert(j, **job_assert_args), test_name] + ) + if STORE_OUTPUT: + if not os.path.exists(CURR_DICT + "/output"): + os.mkdir(CURR_DICT + "/output") + if not os.path.exists(CURR_DICT + "/output/" + test_name): + os.mkdir(CURR_DICT + "/output/" + test_name) + fileName = str(list(j.outputs.values())[0][0]).split("/")[-1] + for key, value in list(j.outputs.items()): + copy_file( + value[0], + CURR_DICT + "/output/" + test_name + "/" + f"{key}_" + fileName, + ) + copy_file( + os.path.join(j.cache.path, "cacheNone/" + fileName), + CURR_DICT + "/output/" + test_name + "/" + fileName, + ) + + +def get_options(grids, expression): + final_variables = {} + for key in grids: + final_variables[key] = Grid(grids[key]) + return evaluate_expression(expression, final_variables) + + +@pytest.mark.usefixtures("test_descriptions", TEST_CONFIG_FILES) +def init_all(test_descriptions): + for tIndex, tests in enumerate(test_descriptions): + task_folder = TEST_CONFIG_FILES_NAME[tIndex].split(".")[0] + package_name = [task_folder + ".", ""] + package_name = custom_sort(task_folder, package_name) + package_name.append(".") + if type(tests) is not list: + tests = [tests] + for test_description in tests: + options = get_options( + test_description["grids"], test_description["grids_expression"] + ) + data_func = get_function_obj_with_dirs( + package_name, + "data_generation", + test_description["data_func"]["name"], + ) + scenario_directory = ( + os.path.dirname(os.path.realpath(__file__)) + f"/{task_folder}" + ) + data = datagen_driver( + scenario_directory, data_func, **test_description["data_func"]["params"] + ) + script_directory = os.path.dirname(os.path.realpath(__file__)) + for assert_func in test_description["assert_functions"]: + assert_job = get_function_obj_with_dirs( + package_name, "assert_job", assert_func["name"] + ) + core_test( + os.path.join(script_directory, data), + options, + test_description["output"], + assert_job, + assert_func["params"], + ) + + +try: + init_all(TEST_CONFIG_FILES) + for generated_test_case in GENERATED_TEST_CASES: + test_name = f"test_{generated_test_case[1]}" + generated_test_case[0].__name__ = test_name + globals()[test_name] = generated_test_case[0] +finally: + cleanup_data_file() diff --git a/python/tests/e2e_v2/test_helper.py b/python/tests/e2e_v2/test_helper.py new file mode 100644 index 00000000000..b4fd9bd759c --- /dev/null +++ b/python/tests/e2e_v2/test_helper.py @@ -0,0 +1,140 @@ +import json +import importlib +import os +import itertools +import inspect +import shutil + +# Get the current directory +current_dir = os.path.dirname(os.path.abspath(__file__)) + + +def json_to_dict_list(file): + with open(current_dir + "/test_configs/" + file, "r") as file: + # Load the JSON data + return json.load(file) + + +def evaluate_expression(expression, variables): + # Create a dictionary to hold the variable values + variables_dict = {} + # Populate the variables_dict with the provided variables + for variable_name, variable_value in variables.items(): + variables_dict[variable_name] = variable_value + # Evaluate the expression using eval() + result = eval(expression, variables_dict) + return result + + +def dynamic_function_call(module_name, function_name, *args, **kwargs): + try: + calling_frame = inspect.stack()[1] + calling_module = inspect.getmodule(calling_frame[0]) + calling_package = calling_module.__package__ + module = importlib.import_module(module_name, package=calling_package) + function = getattr(module, function_name) + result = function(*args, **kwargs) + return result + except ImportError: + pass + except AttributeError: + pass + + +def get_function_object(module_name, function_name): + function = None + try: + calling_frame = inspect.stack()[1] + calling_module = inspect.getmodule(calling_frame[0]) + calling_package = calling_module.__package__ + module = importlib.import_module(module_name, package=calling_package) + function = getattr(module, function_name) + return function + except ImportError: + pass + except AttributeError: + pass + + +def generate_string_combinations(*lists): + combinations = list(itertools.product(*lists)) + combinations = ["".join(combination) for combination in combinations] + return combinations + + +def copy_file(source_file, destination_file): + try: + shutil.copy(source_file, destination_file) + print(f"File copied successfully from '{source_file}' to '{destination_file}'.") + except FileNotFoundError: + print(f"Source file '{source_file}' not found.") + except PermissionError: + print( + f"Permission denied. Unable to copy '{source_file}' to '{destination_file}'." + ) + + +def call_function_with_dirs(dirs, module_name, function_name, **kargs): + for dir in dirs: + try: + data = dynamic_function_call( + dir + module_name, + function_name, + **kargs, + ) + if data: + return data + except Exception as error: + if type(error) not in [ModuleNotFoundError]: + raise error + + +def get_function_obj_with_dirs(dirs, module_name, function_name): + obj = None + for dir in dirs: + try: + obj = get_function_object( + dir + module_name, + function_name, + ) + if obj: + return obj + except Exception as error: + if type(error) not in [ModuleNotFoundError]: + raise error + if not obj: + raise ModuleNotFoundError( + f"Module '{module_name}' not found in any of the directories {dirs}." + ) + + +def datagen_driver(script_directory, impl, **kwargs): + names = [] + for i in kwargs.values(): + if type(i) == dict: + names.append(list(i.items())[0][1]) + elif type(i) == list: + pass + else: + names.append(i) + + dataFile = f"{str(impl.__name__)}_{'_'.join([str(i) for i in names])}.txt" + with open(os.path.join(script_directory, dataFile), "w") as f: + impl(f, **kwargs) + return os.path.join(script_directory, dataFile) + + +def calculate_similarity(word, string): + # Calculate the similarity score between the string and the word + score = 0 + for char in word: + if char in string: + score += 1 + return score + + +def custom_sort(word, strings): + # Sort the list of strings based on their similarity to the word + return sorted( + strings, key=lambda string: calculate_similarity(word, string), reverse=True + )