Skip to content

Commit

Permalink
added support of two-letter language code (#159)
Browse files Browse the repository at this point in the history
* added support of two-letter language code

* fixed README (minor)

---------

Co-authored-by: Ofer Mendelevitch <ofermend@gmail.com>
  • Loading branch information
adeelehsan and ofermend authored May 30, 2024
1 parent 1978f58 commit 414d02e
Show file tree
Hide file tree
Showing 10 changed files with 108 additions and 88 deletions.
6 changes: 2 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ The way summarization works can be configured as follows:
ux: "summary"

# Default language for summary response (if not specified defaults to "auto").
# accepts two-letter code (en) or three-letter code (eng)
summary_default_language: "eng"

# Number of sentences before and after relevant text segment used for summarization.
Expand Down Expand Up @@ -272,14 +273,11 @@ reranker_name: normal | slingshot | mmr
rerank_num_results: 50
```

To use Vectara's MMR (Maximum Marginal Relevance) functionality please set the `reranker_name = mmr`.
To use Vectara's MMR (Maximum Marginal Relevance) functionality please set the `reranker_name = mmr`, and add a `mmr_diversity_bias` value.

```yaml
# Diversity bias factor (0..1) for MMR reranker. The higher the value, the more MMR is preferred over relevance.
mmr_diversity_bias: 0.3
# number of results to use for reranking
mmr_num_results: 50
```

### Authentication (optional)
Expand Down
8 changes: 4 additions & 4 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"@types/prismjs": "^1.26.0",
"@types/react": "^18.2.21",
"@types/react-dom": "^18.2.7",
"@vectara/stream-query-client": "^1.0.0",
"@vectara/stream-query-client": "^2.1.0",
"analytics": "^0.8.9",
"axios": "^0.27.2",
"classnames": "^2.3.2",
Expand Down
2 changes: 0 additions & 2 deletions server/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ app.post("/config", (req, res) => {
reranker_name,

// MMR
mmr_num_results,
mmr_diversity_bias,

// Hybrid search
Expand Down Expand Up @@ -159,7 +158,6 @@ app.post("/config", (req, res) => {
reranker_name,

// MMR
mmr_num_results,
mmr_diversity_bias,

// Search header
Expand Down
6 changes: 1 addition & 5 deletions src/contexts/ConfigurationContext.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ interface Config {


// MMR
config_mmr_num_results?: number;
config_mmr_diversity_bias?: number;

// questions
Expand Down Expand Up @@ -417,7 +416,6 @@ export const ConfigContextProvider = ({ children }: Props) => {

// MMR
config_mmr_diversity_bias,
config_mmr_num_results,

// hybrid search
config_hybrid_search_num_words,
Expand Down Expand Up @@ -549,9 +547,7 @@ export const ConfigContextProvider = ({ children }: Props) => {

setRerank({
isEnabled: isRankerEnabled(config_reranker_name),
numResults: config_reranker_name === "mmr"
? (config_mmr_num_results ?? 50)
: config_rerank_num_results ?? rerank.numResults,
numResults: config_rerank_num_results ?? rerank.numResults,
id: getRerankerId(config_reranker_name),
diversityBias: getRerankerDiversty(config_reranker_name),
});
Expand Down
14 changes: 6 additions & 8 deletions src/contexts/SearchContext.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ import {
retrieveHistory,
} from "./history";
import { deserializeSearchResponse } from "../utils/deserializeSearchResponse";
import {FactualConsistencyDetail, streamQuery} from "@vectara/stream-query-client";
import { StreamUpdate } from "@vectara/stream-query-client/lib/types";
import { streamQuery } from "@vectara/stream-query-client";

interface SearchContextType {
filterValue: string;
Expand Down Expand Up @@ -57,7 +57,7 @@ interface SearchContextType {
summarizationError: SearchError | undefined;
summarizationResponse: string | undefined;
summaryTime: number;
factualConsistencyScore: number;
factualConsistencyScore: number | undefined;
language: SummaryLanguage;
summaryNumResults: number;
summaryNumSentences: number;
Expand Down Expand Up @@ -117,7 +117,7 @@ export const SearchContextProvider = ({ children }: Props) => {
const [summarizationResponse, setSummarizationResponse] =
useState<string>();
const [summaryTime, setSummaryTime] = useState<number>(0);
const [factualConsistencyScore, setFactualConsistencyScore] = useState<number>(0);
const [factualConsistencyScore, setFactualConsistencyScore] = useState<number | undefined>();

// Citation selection
const searchResultsRef = useRef<HTMLElement[] | null[]>([]);
Expand Down Expand Up @@ -287,17 +287,15 @@ export const SearchContextProvider = ({ children }: Props) => {
const onStreamUpdate = (update: StreamUpdate) => {
// If we send multiple requests in rapid succession, we only want to
// display the results of the most recent request.
const fcsDetail = update.details?.find(
(detail) => detail.type === "factualConsistency") as
| FactualConsistencyDetail
const fcsDetail = update.details?.factualConsistency
if (searchId === searchCount) {
if (update.isDone) {
setIsSummarizing(false);
setSummaryTime(Date.now() - startTime);
}
setSummarizationError(undefined);
setSummarizationResponse(update.updatedText ?? undefined);
setFactualConsistencyScore(fcsDetail?.data?.score)
setFactualConsistencyScore(fcsDetail?.score)
}
};

Expand All @@ -317,7 +315,7 @@ export const SearchContextProvider = ({ children }: Props) => {
customerId: search.customerId!,
corpusIds: search.corpusId!.split(","),
endpoint: search.endpoint!,
apiKey: search.apiKey!
apiKey: search.apiKey!,
},
onStreamUpdate
);
Expand Down
4 changes: 4 additions & 0 deletions src/ui/components/list/_index.scss
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,8 @@
.vuiSummaryCitationColor {
background-color: $colorAccentLightShade !important;
color: $colorAccent !important;
}

.vuiSummaryWidth {
width: 100%;
}
2 changes: 1 addition & 1 deletion src/views/search/SummaryUx.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ export const SummaryUx = () => {

<VuiSpacer size="s" />

<VuiSummary summary={summary} SummaryCitation={SummaryCitation} />
<VuiSummary summary={summary} SummaryCitation={SummaryCitation} className="vuiSummaryWidth" />

<VuiSpacer size="s" />

Expand Down
11 changes: 7 additions & 4 deletions src/views/search/controls/OptionsDrawer.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,13 @@ import {
import {SUMMARY_LANGUAGES, SummaryLanguage, humanizeLanguage, FCS_MODE, UiText, FcsMode} from "../types";
import { useConfigContext } from "../../../contexts/ConfigurationContext";

const languageOptions = SUMMARY_LANGUAGES.map((code) => ({
value: code,
label: humanizeLanguage(code),
}));
const languageOptions = SUMMARY_LANGUAGES.reduce<{ value: string; label: string }[]>((langInfo, code) => {
const label = humanizeLanguage(code);
if (!langInfo.some(option => option.label === label)) {
langInfo.push({ value: code, label });
}
return langInfo;
}, []);

const FcsOptions = FCS_MODE.map((code) => ({
value: code,
Expand Down
141 changes: 82 additions & 59 deletions src/views/search/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,69 +64,92 @@ export type DeserializedSearchResult = {

export const SUMMARY_LANGUAGES = [
"auto",
"eng",
"deu",
"fra",
"zho",
"kor",
"ara",
"rus",
"tha",
"nld",
"ita",
"por",
"spa",
"jpn",
"pol",
"tur",
"heb",
"vie",
"ind",
"ces",
"ukr",
"ell",
"fas",
"hin",
"urd",
"swe",
"ben",
"msa",
"ron",
"eng", "en",
"deu", "de",
"fra", "fr",
"zho", "zh",
"kor", "ko",
"ara", "ar",
"rus", "ru",
"tha", "th",
"nld", "nl",
"ita", "it",
"por", "pt",
"spa", "es",
"jpn", "ja",
"pol", "pl",
"tur", "tr",
"heb", "he",
"vie", "vi",
"ind", "id",
"ces", "cs",
"ukr", "uk",
"ell", "el",
"fas", "fa",
"hin", "hi",
"urd", "ur",
"swe", "sv",
"ben", "bn",
"msa", "ms",
"ron", "ro"
] as const;

export type SummaryLanguage = (typeof SUMMARY_LANGUAGES)[number];

const codeToLanguageMap: Record<SummaryLanguage, string> = {
auto: "Same as query",
eng: "English",
deu: "German",
fra: "French",
zho: "Chinese",
kor: "Korean",
ara: "Arabic",
rus: "Russian",
tha: "Thai",
nld: "Dutch",
ita: "Italian",
por: "Portugese",
spa: "Spanish",
jpn: "Japanese",
pol: "Polish",
tur: "Turkish",
heb: "Hebrew",
vie: "Vietnamese",
ind: "Indonesian",
ces: "Czech",
ukr: "Ukrainian",
ell: "Greek",
fas: "Farsi",
hin: "Hindi",
urd: "Urdu",
swe: "Swedish",
ben: "Bengali",
msa: "Malay",
ron: "Romanian",
} as const;
const languageCodes: { [key: string]: string[] } = {
"Same as query": ["auto"],
English: ["en", "eng"],
Spanish: ["es", "spa"],
French: ["fr", "fra"],
German: ["de", "deu"],
Chinese: ["zh", "zho"],
Japanese: ["ja", "jpn"],
Russian: ["ru", "rus"],
Portuguese: ["pt", "por"],
Italian: ["it", "ita"],
Korean: ["ko", "kor"],
Arabic: ["ar", "ara"],
Dutch: ["nl", "nld"],
Swedish: ["sv", "swe"],
Norwegian: ["no", "nor"],
Danish: ["da", "dan"],
Finnish: ["fi", "fin"],
Polish: ["pl", "pol"],
Czech: ["cs", "ces"],
Hungarian: ["hu", "hun"],
Romanian: ["ro", "ron"],
Turkish: ["tr", "tur"],
Bulgarian: ["bg", "bul"],
Greek: ["el", "ell"],
Hebrew: ["he", "heb"],
Thai: ["th", "tha"],
Ukrainian: ["uk", "ukr"],
Indonesian: ["id", "ind"],
Malay: ["ms", "msa"],
Vietnamese: ["vi", "vie"],
Hindi: ["hi", "hin"],
Bengali: ["bn", "ben"],
Tamil: ["ta", "tam"],
Telugu: ["te", "tel"],
Marathi: ["mr", "mar"],
Urdu: ["ur", "urd"],
Persian: ["fa", "fas"],
};
const summaryLanguages: string[] = [];

Object.values(languageCodes).flat().forEach(code => {
summaryLanguages.push(code);
});

const codeToLanguageMap = Object.entries(languageCodes).reduce(
(langCodes, [language, codes]) => {
codes.forEach((code) => {
langCodes[code as SummaryLanguage] = language;
});
return langCodes;
},
{} as Record<SummaryLanguage, string>
);

export const humanizeLanguage = (language: SummaryLanguage): string => {
return codeToLanguageMap[language];
Expand Down

0 comments on commit 414d02e

Please sign in to comment.