Skip to content

Commit

Permalink
Add the custom model and dataset loaders to prompt debugging notebook.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 675327487
  • Loading branch information
bdu91 authored and LIT team committed Sep 16, 2024
1 parent ba4d975 commit fd8cfd2
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 14 deletions.
8 changes: 8 additions & 0 deletions lit_nlp/examples/prompt_debugging/keras_lms.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from lit_nlp.api import types as lit_types
from lit_nlp.examples.prompt_debugging import constants as pd_constants
from lit_nlp.examples.prompt_debugging import utils as pd_utils
from lit_nlp.lib import file_cache
from lit_nlp.lib import utils as lit_utils


Expand Down Expand Up @@ -74,6 +75,13 @@ def __init__(
if model is not None:
self.model = model
elif model_name_or_path is not None:
if model_name_or_path.endswith(".tar.gz") or file_cache.is_remote(
model_name_or_path
):
model_name_or_path = file_cache.cached_path(
model_name_or_path,
extract_compressed_file=model_name_or_path.endswith(".tar.gz"),
)
self.model = keras_models.CausalLM.from_preset(model_name_or_path)
else:
raise ValueError("Must provide either model or model_name_or_path.")
Expand Down
7 changes: 0 additions & 7 deletions lit_nlp/examples/prompt_debugging/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from lit_nlp import app as lit_app
from lit_nlp.api import model as lit_model
from lit_nlp.api import types as lit_types
from lit_nlp.lib import file_cache


DEFAULT_BATCH_SIZE = 1
Expand Down Expand Up @@ -85,12 +84,6 @@ def get_models(
model_name, path = model_string.split(":", 1)
logging.info("Loading model '%s' from '%s'", model_name, path)

if path.endswith(".tar.gz") or file_cache.is_remote(path):
path = file_cache.cached_path(
path,
extract_compressed_file=path.endswith(".tar.gz"),
)

if dl_framework == "kerasnlp":
from lit_nlp.examples.prompt_debugging import keras_lms # pylint: disable=g-import-not-at-top # pytype: disable=import-error

Expand Down
7 changes: 7 additions & 0 deletions lit_nlp/examples/prompt_debugging/notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,5 +59,12 @@ def make_notebook_widget(
),
layouts=layouts.PROMPT_DEBUGGING_LAYOUTS,
default_layout=layouts.LEFT_RIGHT,
model_loaders=models.get_model_loaders(
dl_framework=dl_framework,
dl_runtime=dl_runtime,
batch_size=batch_size,
max_length=models.DEFAULT_SEQUENCE_LENGTH,
),
dataset_loaders=datasets.get_dataset_loaders(),
**kwargs,
)
15 changes: 9 additions & 6 deletions lit_nlp/examples/prompt_debugging/transformers_lms.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,12 @@ def __init__(
else:
# Normally path is a directory; if it's an archive file, download and
# extract to the transformers cache.
if model_name_or_path.endswith(".tar.gz"):
if model_name_or_path.endswith(".tar.gz") or file_cache.is_remote(
model_name_or_path
):
model_name_or_path = file_cache.cached_path(
model_name_or_path, extract_compressed_file=True
model_name_or_path,
extract_compressed_file=model_name_or_path.endswith(".tar.gz"),
)

# Note: we need to left-pad for generation to work properly.
Expand Down Expand Up @@ -227,11 +230,11 @@ def _postprocess(self, preds: Mapping[str, Any]) -> Mapping[str, Any]:
a dict of the processed model outputs, including the response texts and
embeddings of the input and output tokens (separated into two arrays).
"""
# TODO(b/324957491): return actual decoder scores for each generation.
# GeneratedTextCandidates should be a list[(text, score)]
# TODO(b/324957491): return actual decoder scores for each generation. For
# now, we only output GeneratedText.
processed_preds = {}
processed_preds[pd_constants.FieldNames.RESPONSE] = [
(preds[pd_constants.FieldNames.RESPONSE], 1.0)
processed_preds[pd_constants.FieldNames.RESPONSE] = preds[
pd_constants.FieldNames.RESPONSE
]
ntok_in = preds["ntok_in"]
ntok_out = preds["ntok_out"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def test_gpt2_generation_output(self, framework, model_path):
text=cur_input["prompt"], model=model, framework=framework
)
expected_output_embeddings = _get_text_mean_embeddings(
text=cur_output["response"][0][0], model=model, framework=framework
text=cur_output["response"], model=model, framework=framework
)
np.testing.assert_array_almost_equal(
expected_input_embeddings,
Expand Down

0 comments on commit fd8cfd2

Please sign in to comment.