Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BFCL] Introducing custom handlers #756

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion berkeley-function-call-leaderboard/bfcl/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from datetime import datetime
from types import SimpleNamespace
from typing import List
import json
import os

import typer
from bfcl._llm_response_generation import main as generation_main
Expand All @@ -16,6 +18,7 @@
from bfcl.model_handler.handler_map import HANDLER_MAP
from dotenv import load_dotenv
from tabulate import tabulate
from bfcl.model_handler.handler_loader import HandlerLoader


class ExecutionOrderGroup(typer.core.TyperGroup):
Expand Down Expand Up @@ -58,8 +61,20 @@ def models():
"""
List available models.
"""
available_models = set(HANDLER_MAP.keys())

# If a custom handler setting exists, add it to the
handler_config_path = os.getenv("BFCL_HANDLER_CONFIG")
if handler_config_path and os.path.exists(handler_config_path):
try:
with open(handler_config_path) as f:
handler_config = json.load(f)
available_models.update(handler_config.keys())
except Exception as e:
print(f"Warning: Error loading custom handler config: {str(e)}")

Comment on lines +64 to +75
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously, you could simply use dictionary mapping to get handler.
For now, HandlerLoader will controll.

You can set environment variable for BFCL_HANDLER_CONFIG.
This environ var. point to the path of json file.
and if this exists, Loader can find the path of json file.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is example of json files

{
    "my-custom-model": {
        "module_path": "/path/to/my_handlers.py",
        "class_name": "MyCustomHandler"
    }
}

and example of setting env var.

# you can enter this on command"
export BFCL_HANDLER_CONFIG=/path/to/handler_config.json

python -m bfcl generate --model my-custom-model --test-category all

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At /path/to/my_handlers.py you can make custom handlers clas

from bfcl.model_handler.base_handler import BaseHandler
from bfcl.model_handler.model_style import ModelStyle

class MyCustomHandler(BaseHandler):
    def __init__(self, model_name, temperature):
        super().__init__(model_name, temperature)
        self.model_style = ...
        ...

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For detailed example, I've introduced my case.

berkeley-function-call-leaderboard/custom_handler.py

class MyCustomHandler(OpenAIHandler):
    def __init__(self, model_name, temperature):
        BaseHandler.__init__(self, model_name, temperature)
        self.model_style = ModelStyle.OpenAI
        self.client = OpenAI(
            api_key="EMPTY",
            base_url="..."
        )

berkeley-function-call-leaderboard/handler_config.json

{
    "allganize/Alpha-Ko-32B-GPTQ-Int8-202411": {
        "module_path": "custom_handler.py",
        "class_name": "MyCustomHandler"
    }
}
export BFCL_HANDLER_CONFIG=handler_config.json

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The result works well when I enter below command
python3 -m bfcl generate --model allganize/Alpha-Ko-32B-GPTQ-Int8-202411 --test-category multi_turn_base --num-threads 40

image

table = tabulate(
[[model] for model in HANDLER_MAP.keys()],
[[model] for model in sorted(available_models)],
tablefmt="plain",
colalign=("left",),
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
TEST_IDS_TO_GENERATE_PATH,
)
from bfcl.eval_checker.eval_runner_helper import load_file
from bfcl.model_handler.handler_map import HANDLER_MAP
from bfcl.model_handler.handler_loader import HandlerLoader
from bfcl.model_handler.model_style import ModelStyle
from bfcl.utils import (
is_executable,
Expand Down Expand Up @@ -54,8 +54,12 @@ def get_args():


def build_handler(model_name, temperature):
handler = HANDLER_MAP[model_name](model_name, temperature)
return handler
"""Create a handler instance"""
handler_class = HandlerLoader.get_handler_class(model_name)
if handler_class is None:
raise ValueError(f"No handler found for model: {model_name}")

return handler_class(model_name, temperature)


def parse_test_category_argument(test_category_args):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
)
from bfcl.eval_checker.multi_turn_eval.multi_turn_utils import is_empty_execute_response
from bfcl.model_handler.handler_map import HANDLER_MAP
from bfcl.model_handler.handler_loader import HandlerLoader
from bfcl.utils import *
from dotenv import load_dotenv
from tqdm import tqdm
Expand Down Expand Up @@ -655,9 +656,12 @@ def main(model, test_category, api_sanity_check, result_dir, score_dir):


def get_handler(model_name):
return HANDLER_MAP[model_name](
model_name, temperature=0
) # Temperature doesn't matter for evaluation
"""Create a handler instance"""
handler_class = HandlerLoader.get_handler_class(model_name)
if handler_class is None:
raise ValueError(f"No handler found for model: {model_name}")

return handler_class(model_name, temperature=0)


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
write_list_of_dicts_to_file,
)
from tqdm import tqdm
from bfcl.eval_checker.metadata_loader import metadata_loader


def api_status_sanity_check_rest():
Expand Down Expand Up @@ -246,6 +247,9 @@ def get_cost_letency_info(model_name, cost_data, latency_data):
def generate_leaderboard_csv(
leaderboard_table, output_path, eval_models=None, eval_categories=None
):
# Load metadata at the beginning of the function
model_metadata, _, _ = metadata_loader.load_metadata()

Comment on lines +250 to +252
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For dynamic load, we'd used model_metadata rather than MODEL_METADATA_MAPPING

print("📈 Aggregating data to generate leaderboard score table...")
data_non_live = []
data_live = []
Expand Down Expand Up @@ -326,7 +330,7 @@ def generate_leaderboard_csv(
data_non_live.append(
[
"N/A",
MODEL_METADATA_MAPPING[model_name_escaped][0],
model_metadata[model_name_escaped][0],
overall_accuracy_non_live["accuracy"],
summary_ast_non_live["accuracy"],
summary_exec_non_live["accuracy"],
Expand Down Expand Up @@ -387,7 +391,7 @@ def generate_leaderboard_csv(
data_live.append(
[
"N/A",
MODEL_METADATA_MAPPING[model_name_escaped][0],
model_metadata[model_name_escaped][0],
overall_accuracy_live["accuracy"],
summary_ast_live["accuracy"],
python_simple_ast_live["accuracy"],
Expand Down Expand Up @@ -422,7 +426,7 @@ def generate_leaderboard_csv(
data_multi_turn.append(
[
"N/A",
MODEL_METADATA_MAPPING[model_name_escaped][0],
model_metadata[model_name_escaped][0],
overall_accuracy_multi_turn["accuracy"],
multi_turn_base["accuracy"],
multi_turn_miss_func["accuracy"],
Expand All @@ -448,8 +452,8 @@ def generate_leaderboard_csv(
[
"N/A",
total_overall_accuracy["accuracy"],
MODEL_METADATA_MAPPING[model_name_escaped][0],
MODEL_METADATA_MAPPING[model_name_escaped][1],
model_metadata[model_name_escaped][0],
model_metadata[model_name_escaped][1],
cost,
latency_mean,
latency_std,
Expand All @@ -476,8 +480,8 @@ def generate_leaderboard_csv(
multi_turn_long_context["accuracy"],
total_relevance["accuracy"],
total_irrelevance["accuracy"],
MODEL_METADATA_MAPPING[model_name_escaped][2],
MODEL_METADATA_MAPPING[model_name_escaped][3],
model_metadata[model_name_escaped][2],
model_metadata[model_name_escaped][3],
]
)

Expand Down Expand Up @@ -619,6 +623,7 @@ def generate_leaderboard_csv(

def check_model_category_status(score_path):
result_path = score_path.replace("score", "result")
model_metadata, _, _ = metadata_loader.load_metadata()

leaderboard_categories = [
"exec_simple",
Expand All @@ -643,8 +648,8 @@ def check_model_category_status(score_path):

category_status = {}

# Check for all models in MODEL_METADATA_MAPPING
for model_name in MODEL_METADATA_MAPPING.keys():
# Check for all models in metadata
for model_name in model_metadata.keys():
category_status[model_name] = {
category: {"generated": False, "evaluated": False}
for category in leaderboard_categories
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import json
import os
from typing import Dict, Any

from bfcl.model_handler.handler_map import local_inference_handler_map
from bfcl.eval_checker.model_metadata import (
MODEL_METADATA_MAPPING,
OUTPUT_PRICE_PER_MILLION_TOKEN,
NO_COST_MODELS,
)

class MetadataLoader:
@staticmethod
def load_metadata() -> tuple[Dict[str, Any], Dict[str, float], list[str]]:
"""
Load model metadata, pricing information, and list of no-cost models.

Returns:
tuple containing:
- metadata: Dict mapping model names to their metadata
- prices: Dict mapping model names to their prices
- no_cost_models: List of model names that have no associated cost
"""
metadata = dict(MODEL_METADATA_MAPPING)
prices = dict(OUTPUT_PRICE_PER_MILLION_TOKEN)
no_cost = list(NO_COST_MODELS)

# Check for additional metadata config file path in environment variables
metadata_config_path = os.getenv("BFCL_MODEL_METADATA")
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've implemented the same config-based handler loading functionality in the evaluate module that was previously available in generation.

export BFCL_MODEL_METADATA=model_metadata.json

python3 -m bfcl evaluate --model allganize/Alpha-Ko-32B-GPTQ-Int8-202411 --test-category multi_turn_base

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is example of model_metadata.json

{
    "metadata": {
        "allganize/Alpha-Ko-32B-GPTQ-Int8-202411": [
            "alpha-ko-32b-gptq-int8",
            "https://huggingface.co/allganize/Alpha-Ko-32B-GPTQ-Int8-202411",
            "Allganize",
            "Custom License"
        ]
    },
    "prices": {
        "allganize/Alpha-Ko-32B-GPTQ-Int8-202411": 0.0
    },
    "no_cost_models": [
        "allganize/Alpha-Ko-32B-GPTQ-Int8-202411"
    ]
} 

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The result after evaluation

image


if metadata_config_path and os.path.exists(metadata_config_path):
try:
with open(metadata_config_path) as f:
custom_config = json.load(f)

# Add custom model metadata
if "metadata" in custom_config:
metadata.update(custom_config["metadata"])

# Add custom pricing information
if "prices" in custom_config:
prices.update(custom_config["prices"])

# Add additional no-cost models
if "no_cost_models" in custom_config:
no_cost.extend(custom_config["no_cost_models"])

except Exception as e:
print(f"Error loading custom metadata config: {str(e)}")

return metadata, prices, no_cost

# Global metadata loader instance
metadata_loader = MetadataLoader()
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import json
import importlib.util
import os
from pathlib import Path
from typing import Type, Optional

from bfcl.model_handler.base_handler import BaseHandler
from bfcl.model_handler.handler_map import HANDLER_MAP

class HandlerLoader:
@staticmethod
def load_handler_class(module_path: str, class_name: str) -> Optional[Type[BaseHandler]]:
"""Dynamically load handler classes from a specified path"""
try:
abs_path = str(Path(module_path).resolve())
spec = importlib.util.spec_from_file_location("custom_module", abs_path)
if spec is None or spec.loader is None:
raise ImportError(f"Could not load spec for module: {module_path}")

module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)

handler_class = getattr(module, class_name, None)
if handler_class is None:
raise AttributeError(f"Class {class_name} not found in {module_path}")

# Checking for BaseHandler Inheritance
if not issubclass(handler_class, BaseHandler):
raise TypeError(f"Class {class_name} must inherit from BaseHandler")

return handler_class

except Exception as e:
print(f"Error loading handler class {class_name} from {module_path}: {str(e)}")
return None

@staticmethod
def get_handler_class(model_name: str) -> Optional[Type[BaseHandler]]:
"""Returns the handler class corresponding to the model name"""
# Check the path to the handler mapping file in an environment variable
handler_config_path = os.getenv("BFCL_HANDLER_CONFIG")

if handler_config_path and os.path.exists(handler_config_path):
try:
with open(handler_config_path) as f:
handler_config = json.load(f)

if model_name in handler_config:
config = handler_config[model_name]
handler_class = HandlerLoader.load_handler_class(
config["module_path"],
config["class_name"]
)
if handler_class:
return handler_class

except Exception as e:
print(f"Error loading custom handler config: {str(e)}")

# Lookup in the default handler map
return HANDLER_MAP.get(model_name)