Skip to content

Commit

Permalink
Improve the schema view to be less overwhelming. (#726)
Browse files Browse the repository at this point in the history
Use the signal titles & concept names instead of the paths for signal
roots.

Definitely alot more we can do, but step in the right direction.

After:
<img width="435" alt="image"
src="https://github.com/lilacai/lilac/assets/1100749/507abdd7-6dc6-4400-9e8f-afc86dcaef04">

Hover chips show the embeddings & signal metadata:
<img width="430" alt="image"
src="https://github.com/lilacai/lilac/assets/1100749/b811c990-0fa5-4f99-89c1-634c62f8aa08">


Before:

<img width="435" alt="image"
src="https://github.com/lilacai/lilac/assets/1100749/965d0f0f-7680-4d94-9f63-a14dc3f0362b">
  • Loading branch information
nsthorat authored Sep 29, 2023
1 parent 067417a commit 5fb2101
Show file tree
Hide file tree
Showing 11 changed files with 94 additions and 50 deletions.
2 changes: 1 addition & 1 deletion docs/concepts/concept_metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ The human readable scores are based on this score:
- `Great`: 0.95 to 1.0
- `Very good`: 0.9 to 0.95
- `Good`: 0.8 to 0.9
- `OK`: 0.5 to 0.9
- `OK`: 0.5 to 0.8
- `Not good`: 0.0 to 0.5

## From the UI
Expand Down
2 changes: 1 addition & 1 deletion lilac/data/dataset_compute_signal_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,7 @@ def test_concept_signal_with_select_groups(make_test_data: TestDataMaker) -> Non
embedding='test_embedding',
path='text')

concept_key = 'test_namespace/test_concept/test_embedding/v1'
concept_key = 'test_namespace/test_concept/test_embedding'
result = dataset.select_groups(f'text.{concept_key}.*.score')
assert result.counts == [('Not in concept', 2), ('In concept', 1)]

Expand Down
5 changes: 3 additions & 2 deletions lilac/data/dataset_duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,8 +416,6 @@ def _compute_signal_items(self,
data_schema: Schema,
num_items: int,
task_step_id: Optional[TaskStepId] = None) -> Iterable[Item]:
signal.setup()

source_path = normalize_path(path)

source_values = self._select_iterable_values(source_path, data_schema)
Expand Down Expand Up @@ -479,6 +477,7 @@ def compute_signal(self,
# Make a dummy task step so we report progress via tqdm.
task_step_id = ('', 0)

signal.setup()
output_items = self._compute_signal_items(signal, path, manifest.data_schema,
manifest.num_items, task_step_id)

Expand All @@ -498,6 +497,7 @@ def compute_signal(self,

signal_schema = create_signal_schema(signal, source_path, manifest.data_schema)

print('signal', signal.model_dump())
# Add progress.
if task_step_id is not None:
output_items = progress(
Expand Down Expand Up @@ -546,6 +546,7 @@ def compute_embedding(self,

signal = get_signal_by_type(embedding, TextEmbeddingSignal)()

signal.setup()
output_items = self._compute_signal_items(signal, path, manifest.data_schema,
manifest.num_items, task_step_id)

Expand Down
2 changes: 1 addition & 1 deletion lilac/data/dataset_select_rows_schema_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ def test_search_concept_schema(make_test_data: TestDataMaker) -> None:
)
})
]),
'test_namespace/test_concept/labels': field(
'test_namespace/test_concept/labels/preview': field(
fields=[field('string_span', fields={
'label': 'boolean',
'draft': 'string'
Expand Down
28 changes: 14 additions & 14 deletions lilac/data/dataset_select_rows_search_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,15 +236,15 @@ def test_concept_search(make_test_data: TestDataMaker, mocker: MockerFixture) ->
'text': enriched_item(
'hello world2.', {
expected_signal_udf.key(): [lilac_span(0, 13, {'score': approx(0.75, abs=0.25)})],
'test_namespace/test_concept/labels': [lilac_span(0, 13, {'label': True})]
'test_namespace/test_concept/labels/preview': [lilac_span(0, 13, {'label': True})]
})
},
{
ROWID: '1',
'text': enriched_item(
'hello world.', {
expected_signal_udf.key(): [lilac_span(0, 12, {'score': approx(0.25, abs=0.25)})],
'test_namespace/test_concept/labels': [lilac_span(0, 12, {'label': False})]
'test_namespace/test_concept/labels/preview': [lilac_span(0, 12, {'label': False})]
})
},
]
Expand Down Expand Up @@ -291,17 +291,17 @@ def test_concept_search_without_rowid(make_test_data: TestDataMaker) -> None:
# Results are sorted by score desc.
{
'text': 'hello world2.',
'text.test_namespace/test_concept/test_embedding': [
'text.test_namespace/test_concept/test_embedding/preview': [
lilac_span(0, 13, {'score': approx(0.75, abs=0.25)})
],
'text.test_namespace/test_concept/labels': [lilac_span(0, 13, {'label': True})]
'text.test_namespace/test_concept/labels/preview': [lilac_span(0, 13, {'label': True})]
},
{
'text': 'hello world.',
'text.test_namespace/test_concept/test_embedding': [
'text.test_namespace/test_concept/test_embedding/preview': [
lilac_span(0, 12, {'score': approx(0.25, abs=0.25)})
],
'text.test_namespace/test_concept/labels': [lilac_span(0, 12, {'label': False})]
'text.test_namespace/test_concept/labels/preview': [lilac_span(0, 12, {'label': False})]
},
]

Expand All @@ -321,17 +321,17 @@ def test_concept_search_without_rowid(make_test_data: TestDataMaker) -> None:
{
'text': enriched_item(
'hello world2.', {
'test_namespace/test_concept/test_embedding':
'test_namespace/test_concept/test_embedding/preview':
[lilac_span(0, 13, {'score': approx(0.75, abs=0.25)})],
'test_namespace/test_concept/labels': [lilac_span(0, 13, {'label': True})]
'test_namespace/test_concept/labels/preview': [lilac_span(0, 13, {'label': True})]
})
},
{
'text': enriched_item(
'hello world.', {
'test_namespace/test_concept/test_embedding':
'test_namespace/test_concept/test_embedding/preview':
[lilac_span(0, 12, {'score': approx(0.25, abs=0.25)})],
'test_namespace/test_concept/labels': [lilac_span(0, 12, {'label': False})]
'test_namespace/test_concept/labels/preview': [lilac_span(0, 12, {'label': False})]
})
},
]
Expand Down Expand Up @@ -372,17 +372,17 @@ def test_concept_search_sort_by_rowid(make_test_data: TestDataMaker) -> None:
# Results are sorted by rowid.
{
'text': 'hello world.',
'text.test_namespace/test_concept/test_embedding': [
'text.test_namespace/test_concept/test_embedding/preview': [
lilac_span(0, 12, {'score': approx(0.25, abs=0.25)})
],
'text.test_namespace/test_concept/labels': [lilac_span(0, 12, {'label': False})]
'text.test_namespace/test_concept/labels/preview': [lilac_span(0, 12, {'label': False})]
},
{
'text': 'hello world2.',
'text.test_namespace/test_concept/test_embedding': [
'text.test_namespace/test_concept/test_embedding/preview': [
lilac_span(0, 13, {'score': approx(0.75, abs=0.25)})
],
'text.test_namespace/test_concept/labels': [lilac_span(0, 13, {'label': True})]
'text.test_namespace/test_concept/labels/preview': [lilac_span(0, 13, {'label': True})]
}
]

Expand Down
18 changes: 10 additions & 8 deletions lilac/signals/concept_labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ class ConceptLabelsSignal(TextSignal):

namespace: str
concept_name: str
# This will get filled out during setup.
version: Optional[int] = None

# The draft version of the concept to use. If not provided, the latest version is used.
draft: str = DRAFT_MAIN
Expand All @@ -28,6 +30,12 @@ class ConceptLabelsSignal(TextSignal):
def fields(self) -> Field:
return field(fields=[field('string_span', fields={'label': 'boolean', 'draft': 'string'})])

@override
def setup(self) -> None:
concept = self._concept_db.get(self.namespace, self.concept_name, self._user)
if concept:
self.version = concept.version

@override
def compute(self, data: Iterable[RichData]) -> Iterable[Optional[Item]]:
concept = self._concept_db.get(self.namespace, self.concept_name, self._user)
Expand Down Expand Up @@ -74,11 +82,5 @@ def set_user(self, user: Optional[UserInfo]) -> None:

@override
def key(self, is_computed_signal: Optional[bool] = False) -> str:
version = ''
if is_computed_signal:
concept = self._concept_db.get(self.namespace, self.concept_name)
if not concept:
raise ValueError(f'Concept "{self.namespace}/{self.concept_name}" does not exist.')
version = f'/v{concept.version}'

return f'{self.namespace}/{self.concept_name}/labels{version}'
suffix = '/preview' if not is_computed_signal else ''
return f'{self.namespace}/{self.concept_name}/labels{suffix}'
4 changes: 2 additions & 2 deletions lilac/signals/concept_labels_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def test_concept_labels_key(concept_db_cls: Type[ConceptDB]) -> None:
concept_db.create(namespace=namespace, name=concept_name, type=SignalInputType.TEXT)

signal = ConceptLabelsSignal(namespace='test', concept_name='test_concept')
assert signal.key() == 'test/test_concept/labels'
assert signal.key() == 'test/test_concept/labels/preview'


@pytest.mark.parametrize('concept_db_cls', ALL_CONCEPT_DBS)
Expand All @@ -125,4 +125,4 @@ def test_concept_labels_compute_signal_key(concept_db_cls: Type[ConceptDB]) -> N
concept_db.create(namespace=namespace, name=concept_name, type=SignalInputType.TEXT)

signal = ConceptLabelsSignal(namespace='test', concept_name='test_concept')
assert signal.key(is_computed_signal=True) == 'test/test_concept/labels/v0'
assert signal.key(is_computed_signal=True) == 'test/test_concept/labels'
11 changes: 9 additions & 2 deletions lilac/signals/concept_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ class ConceptSignal(VectorSignal):

namespace: str
concept_name: str
# This will get filled out during setup.
version: Optional[int] = None

# The draft version of the concept to use. If not provided, the latest version is used.
draft: str = DRAFT_MAIN
Expand Down Expand Up @@ -67,6 +69,11 @@ def _compute_span_vector_batch(self, span_vectors: Iterable[SpanVector],
scores = concept_model.score_embeddings(self.draft, np.array(vectors)).tolist()
return [lilac_span(start, end, {'score': score}) for score, (start, end) in zip(scores, spans)]

@override
def setup(self) -> None:
concept_model = self._get_concept_model()
self.version = concept_model.version

@override
def compute(self, examples: Iterable[RichData]) -> Iterable[Optional[Item]]:
"""Get the scores for the provided examples."""
Expand Down Expand Up @@ -94,7 +101,7 @@ def vector_compute_topk(

@override
def key(self, is_computed_signal: Optional[bool] = False) -> str:
suffix = '/preview' if not is_computed_signal else ''
# NOTE: The embedding is a value so already exists in the path structure. This means we do not
# need to provide the name as part of the key, which still guarantees uniqueness.
version = f'/v{self._get_concept_model().version}' if is_computed_signal else ''
return f'{self.namespace}/{self.concept_name}/{self.embedding}{version}'
return f'{self.namespace}/{self.concept_name}/{self.embedding}{suffix}'
6 changes: 3 additions & 3 deletions lilac/signals/concept_scorer_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,10 +256,10 @@ def test_concept_model_draft(concept_db_cls: Type[ConceptDB], model_db_cls: Type
assert draft_scores != scores


def test_concept_score_key() -> None:
def test_concept_score_preview_key() -> None:
signal = ConceptSignal(
namespace='test', concept_name='test_concept', embedding=TestEmbedding.name)
assert signal.key() == 'test/test_concept/test_embedding'
assert signal.key() == 'test/test_concept/test_embedding/preview'


@pytest.mark.parametrize('concept_db_cls', ALL_CONCEPT_DBS)
Expand All @@ -271,4 +271,4 @@ def test_concept_score_compute_signal_key(concept_db_cls: Type[ConceptDB]) -> No

signal = ConceptSignal(
namespace='test', concept_name='test_concept', embedding=TestEmbedding.name)
assert signal.key(is_computed_signal=True) == 'test/test_concept/test_embedding/v0'
assert signal.key(is_computed_signal=True) == 'test/test_concept/test_embedding'
65 changes: 49 additions & 16 deletions web/blueprint/src/lib/components/schemaView/SchemaField.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import {DTYPE_TO_ICON, getSearches, isPreviewSignal} from '$lib/view_utils';
import {computeSignalMutation} from '$lib/queries/datasetQueries';
import {querySignals} from '$lib/queries/signalQueries';
import {
PATH_WILDCARD,
VALUE_KEY,
Expand All @@ -14,6 +15,7 @@
isSortableField,
pathIsEqual,
serializePath,
type ConceptSignal,
type LilacField,
type LilacSchema,
type TextEmbeddingSignal
Expand Down Expand Up @@ -49,8 +51,37 @@
$: expandedStats = $datasetViewStore.expandedStats[serializePath(path)] || false;
const signals = querySignals();
$: isRepeatedField = path.at(-1) === PATH_WILDCARD ? true : false;
$: fieldName = isRepeatedField ? path.at(-2) : path.at(-1);
$: fieldTitle = isRepeatedField ? path.at(-2) : path.at(-1);
let fieldHoverDetails: string | null = null;
// If the field is a signal root, use the signal name to define the title.
$: {
if (field.signal && $signals.data != null) {
if (field.signal.signal_name === 'concept_score') {
const conceptSignal = field.signal as ConceptSignal;
fieldTitle = `${conceptSignal.concept_name}`;
fieldHoverDetails =
`Concept '${conceptSignal.namespace}/${conceptSignal.concept_name}' arguments:\n\n` +
`embedding: '${conceptSignal.embedding}'` +
(conceptSignal.version != null ? `\nversion: ${conceptSignal.version}` : '');
} else {
const signalInfo = $signals.data.find(s => s.name === field.signal?.signal_name);
if (signalInfo != null) {
fieldTitle = signalInfo.json_schema.title;
const argumentDetails = Object.entries(field.signal || {})
.filter(([arg, _]) => arg != 'signal_name')
.map(([k, v]) => `${k}: ${v}`);
if (argumentDetails.length > 0) {
fieldHoverDetails = `Signal '${signalInfo.name}' arguments: \n\n${argumentDetails}`;
}
}
}
}
}
$: children = childDisplayFields(field);
Expand Down Expand Up @@ -139,22 +170,24 @@
<span class="font-mono">{'{}'}</span>
{/if}
</div>
<button
class="ml-2 grow cursor-pointer truncate whitespace-nowrap text-left text-gray-900"
class:cursor-default={!isExpandable}
disabled={!isExpandable}
on:click={() => {
if (isExpandable) {
if (expandedStats) {
datasetViewStore.removeExpandedColumn(path);
} else {
datasetViewStore.addExpandedColumn(path);
<div class="grow" use:hoverTooltip={{text: fieldHoverDetails || ''}}>
<button
class="ml-2 w-full cursor-pointer truncate whitespace-nowrap text-left text-gray-900"
class:cursor-default={!isExpandable}
disabled={!isExpandable}
on:click={() => {
if (isExpandable) {
if (expandedStats) {
datasetViewStore.removeExpandedColumn(path);
} else {
datasetViewStore.addExpandedColumn(path);
}
}
}
}}
>
{fieldName}
</button>
}}
>
{fieldTitle}
</button>
</div>
{#if isSortedBy}
<RemovableTag
interactive
Expand Down
1 change: 1 addition & 0 deletions web/lib/fastapi_client/models/ConceptSignal.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export type ConceptSignal = {
embedding: 'cohere' | 'sbert' | 'openai' | 'palm' | 'gte-small' | 'gte-base';
namespace: string;
concept_name: string;
version?: (number | null);
draft?: string;
};

0 comments on commit 5fb2101

Please sign in to comment.