Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix the logic for perplexity evaluation (Not enough kv_cache capacity to run generation. Please use a larger sequence_length or a shorter prompt) #1633

Closed
wants to merge 8 commits into from
7 changes: 4 additions & 3 deletions src/deepsparse/evaluation/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,10 @@ def evaluate(

# if target is a string, turn it into an appropriate pipeline
# otherwise assume it is a pipeline
pipeline = (
create_pipeline(model, engine_type) if isinstance(model, (Path, str)) else model
)
if isinstance(model, (Path, str)):
pipeline, kwargs = create_pipeline(model, engine_type, **kwargs)
else:
pipeline = model

eval_integration = EvaluationRegistry.resolve(pipeline, datasets, integration)

Expand Down
12 changes: 11 additions & 1 deletion src/deepsparse/evaluation/integrations/perplexity.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
HumanEvalIteratorWrapper,
process_concatenated_datasets,
)
from deepsparse.transformers.utils.helpers import prepends_bos_token


"""
Expand Down Expand Up @@ -165,6 +166,7 @@ def run_perplexity(
return_input_tokens=True,
)
else:
print(len(pipeline.tokenizer(batch[0]).input_ids))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

debug?

out = pipeline(
prompt=batch,
output_scores=True,
Expand Down Expand Up @@ -252,7 +254,15 @@ def load_perplexity_dataset(
# fetch max_sequence_length from pipeline if not provided
max_sequence_length = kwargs.pop("max_sequence_length", None)
if max_sequence_length is None and pipeline is not None:
max_sequence_length = pipeline.sequence_length
# max_sequence_length for the dataset concatenation needs to be
# smaller than the kv_cache.capacity
# (pipeline.sequence_length - pipeline.prompt_sequence_length)
max_sequence_length = (
pipeline.sequence_length - pipeline.prompt_sequence_length - 1
)
# account for potential additional BOS token
breakpoint()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

debug?

max_sequence_length -= prepends_bos_token(pipeline.tokenizer)

# fetch model_path from pipeline if not provided
model_path = kwargs.pop("model_path", None)
Expand Down
19 changes: 11 additions & 8 deletions src/deepsparse/evaluation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,14 +199,17 @@ def create_pipeline(
text generation model from. This can be a local
or remote path to the model or a sparsezoo stub
:param engine_type: The engine type to initialize the model with.
:return: The initialized pipeline
:return: The initialized pipeline and the mutated
(potentially reduced number of) kwargs
"""
engine_type = engine_type or DEEPSPARSE_ENGINE
return Pipeline.create(
task=kwargs.pop("task", "text-generation"),
model_path=model_path,
sequence_length=kwargs.pop("sequence_length", 2048),
engine_type=engine_type,
batch_size=kwargs.pop("batch_size", 1),
**kwargs,
return (
Pipeline.create(
task=kwargs.pop("task", "text-generation"),
model_path=model_path,
sequence_length=kwargs.pop("sequence_length", 2048),
engine_type=engine_type,
batch_size=kwargs.pop("batch_size", 1),
),
kwargs,
)
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,10 @@ def sequence_length(self) -> int:
"""
return self.ops["single_engine"].sequence_length

@property
def prompt_sequence_length(self) -> int:
return self.ops["multi_engine"].input_ids_length

@property
def batch_size(self) -> int:
return self.ops["single_engine"].batch_size
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,7 @@ def batch_size(self) -> int:
@property
def engine_type(self) -> str:
return self.ops["engine_operator"]._engine_type

@property
def prompt_sequence_length(self) -> int:
return 1
4 changes: 2 additions & 2 deletions tests/deepsparse/evaluation/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,10 @@ def pipeline_target():


def test_initialize_model_from_target_pipeline_onnx(pipeline_target):
model = create_pipeline(pipeline_target, "onnxruntime")
model, _ = create_pipeline(pipeline_target, "onnxruntime")
assert model.ops.get("single_engine")._engine_type == "onnxruntime"


def test_initialize_model_from_target_pipeline_with_kwargs(pipeline_target):
model = create_pipeline(pipeline_target, "deepsparse", sequence_length=64)
model, _ = create_pipeline(pipeline_target, "deepsparse", sequence_length=64)
assert model.ops.get("process_input").sequence_length == 64
Loading