Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include already translated texts as additional context for translation #154

Merged
merged 6 commits into from
Oct 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
package com.composum.ai.aem.core.impl.autotranslate;

import static com.composum.ai.backend.base.service.chat.impl.GPTTranslationServiceImpl.LASTID;
import static com.composum.ai.backend.base.service.chat.impl.GPTTranslationServiceImpl.MULTITRANSLATION_SEPARATOR_END;
import static com.composum.ai.backend.base.service.chat.impl.GPTTranslationServiceImpl.MULTITRANSLATION_SEPARATOR_START;
import static java.util.Objects.requireNonNull;
import static org.apache.commons.lang3.StringUtils.isNotBlank;

Expand Down Expand Up @@ -155,17 +158,17 @@ public Stats translateLiveCopy(@Nonnull Resource resource,
additionalInstructions.replaceAll(MARKER_DEBUG_ADDITIONAL_INSTRUCTIONS, ""));
}

// We also insert texts that are already translated since they might guide the translation process
configuration = maybeIncludeAlreadyTranslatedTextAsExample(propertiesToTranslate, autoTranslateCaConfig, configuration);

propertiesToTranslate = reducePropertiesToTranslate(propertiesToTranslate, autoTranslateCaConfig);
List<String> valuesToTranslate = propertiesToTranslate.stream()
.filter(p -> autoTranslateConfigService.includeAlreadyTranslatedValues() || !p.isAlreadyCorrectlyTranslated)
.map(PropertyToTranslate::getSourceValue)
.collect(Collectors.toList());

List<String> translatedValues =
translationService.fragmentedTranslation(valuesToTranslate, languageName, configuration,
Collections.singletonList(GPTResponseCheck.KEEP_HREF_TRANSLATION_CHECK));
translatedValues = remapPaths(translatedValues, relationship.getLiveCopy().getBlueprintPath(), relationship.getLiveCopy().getPath()
);
translatedValues = remapPaths(translatedValues, relationship.getLiveCopy().getBlueprintPath(), relationship.getLiveCopy().getPath());

Map<String, LiveRelationship> relationships = new HashMap<>();

Expand Down Expand Up @@ -229,6 +232,85 @@ public Stats translateLiveCopy(@Nonnull Resource resource,
return stats;
}

/**
* Collects the values we need to translate.
* If configured, we also insert texts that are already translated since they might guide the translation process.
*/
protected List<PropertyToTranslate> reducePropertiesToTranslate(List<PropertyToTranslate> propertiesToTranslate, AutoTranslateCaConfig autoTranslateCaConfig) {
boolean includeFullPageInRetranslation = autoTranslateConfigService.includeFullPageInRetranslation()
|| trueTristateCaConfig(autoTranslateCaConfig.includeFullPageInRetranslation());
boolean[] includeIndizes = new boolean[propertiesToTranslate.size()];
for (int i = 0; i < propertiesToTranslate.size(); i++) {
includeIndizes[i] = includeFullPageInRetranslation || !propertiesToTranslate.get(i).isAlreadyCorrectlyTranslated;
}

expandSelection(includeIndizes, 2);

List<PropertyToTranslate> reducedProps = new ArrayList<>();
for (int i = 0; i < propertiesToTranslate.size(); i++) {
if (includeIndizes[i]) {
reducedProps.add(propertiesToTranslate.get(i));
}
}
return reducedProps;
}

/**
* Also include 2 items before those already set, and 2 items after those already set, to have some context.
*/
protected static void expandSelection(boolean[] includeIndizes, int selectRange) {
int lastSetIndex = Integer.MIN_VALUE;
for (int i = 0; i < includeIndizes.length; i++) {
if (includeIndizes[i]) {
lastSetIndex = i;
} else if (i <= lastSetIndex + selectRange) {
includeIndizes[i] = true;
}
}
lastSetIndex = Integer.MAX_VALUE;
for (int i = includeIndizes.length - 1; i >= 0; i--) {
if (includeIndizes[i]) {
lastSetIndex = i;
} else if (i >= lastSetIndex - selectRange) {
includeIndizes[i] = true;
}
}
}

/**
* If configured, we include the already translated parts of the page as example.
*/
protected GPTConfiguration maybeIncludeAlreadyTranslatedTextAsExample(
List<PropertyToTranslate> propertiesToTranslate,
AutoTranslateCaConfig autoTranslateCaConfig, GPTConfiguration configuration) {
boolean includeExistingTranslationsInRetranslation =
autoTranslateConfigService.includeExistingTranslationsInRetranslation() ||
trueTristateCaConfig(autoTranslateCaConfig.includeExistingTranslationsInRetranslation());

String alreadyTranslatedText = propertiesToTranslate.stream()
.filter(p -> p.isAlreadyCorrectlyTranslated)
.map(PropertyToTranslate::getTargetValue)
.collect(Collectors.joining("\n"));

if (includeExistingTranslationsInRetranslation && StringUtils.isNotBlank(alreadyTranslatedText)) {
configuration = configuration.merge(GPTConfiguration.ofContext(
"Retrieve the result of a previous translation of parts of the text. You don't need to translate this - this is just contextual information and you can draw on that for translation examples and context of the translation that is done later.",
// we have to follow the final format or that is confusing for the AI
MULTITRANSLATION_SEPARATOR_START + LASTID + MULTITRANSLATION_SEPARATOR_END +
alreadyTranslatedText +
MULTITRANSLATION_SEPARATOR_START + LASTID + MULTITRANSLATION_SEPARATOR_END
));
}
return configuration;
}

/**
* Is counted as true if there is a true value in the array.
*/
protected boolean trueTristateCaConfig(boolean[] value) {
return value != null && Arrays.asList(value).contains(true);
}

/**
* Checks whether there are href="path" in the translatedValues where path is within blueprintPath
* and replaces those with the according path in the live copy.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,17 @@
@Property(label = "Rules that give additional instructions for translation if certain words or phrases are present in the page.")
AutoTranslateRuleConfig[] rules() default {};

@Property(label = "Include Full Page during Retranslation",
description = "If true we do not only provide changed texts to the AI during re-translating a page with some changes," +
"but give the entire page to provide better context. That is a bit slower and a bit more expensive, but likely" +
"improves the result. This overrides the default from OSGI configuration.")
boolean[] includeFullPageInRetranslation();

@Property(label = "Include Existing Translations in Retranslation",
description = "If true, when retranslating a page with some changes we provide" +
"the existing translations of that page to the AI as well as additional context with examples. " +
"That is a bit slower and a bit more expensive, but likely improves the result." +
"This overrides the default from OSGI configuration.")
boolean[] includeExistingTranslationsInRetranslation() default true;

}
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,16 @@
description = "If true, the translator will use the 'high-intelligence model' (see OpenAI config) for translation. Default: true.")
boolean useHighIntelligenceModel() default true;

@AttributeDefinition(name = "Include Already Translated Values",
description = "If a page is re-translated with only a few modified texts: " +
"If true we include the source texts that do not have to be translated, too, " +
"to provide better context to the translation; otherwise " +
"we only include the texts that have to be translated.")
boolean includeAlreadyTranslatedValues() default true;
@AttributeDefinition(name = "Include Full Page during Retranslation",
description = "If true we do not only provide changed texts to the AI during re-translating a page with some changes," +
"but give the entire page to provide better context. That is a bit slower and a bit more expensive, but likely" +
"improves the result.")
boolean includeFullPageInRetranslation() default true;

@AttributeDefinition(name = "Include Existing Translations in Retranslation",
description = "If true, when retranslating a page with some changes we provide" +
"the existing translations of that page to the AI as well as additional context with examples. " +
"That is a bit slower and a bit more expensive, but likely improves the result.")
boolean includeExistingTranslationsInRetranslation() default true;

}
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,17 @@ public interface AutoTranslateConfigService {
List<String> translateableAttributes(@Nullable Resource resource);

/**
* If a page is re-translated with only a few modified texts:
* If true we include the source texts that do not have to be translated, too,
* to provide better context to the translation; otherwise
* we only include the texts that have to be translated.
* If true, we do not only provide changed texts to the AI during re-translating a page with some changes,
* but give the entire page to provide better context.
* That is a bit slower and a bit more expensive, but likely improves the result.
*/
boolean includeAlreadyTranslatedValues();
boolean includeFullPageInRetranslation();

/**
* If true, we when retranslating a page with some changes we provide the existing translations of that page
* to the AI as well as additional context with examples.
* That is a bit slower and a bit more expensive, but likely improves the result."
*/
boolean includeExistingTranslationsInRetranslation();

}
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,13 @@ public List<String> translateableAttributes(@Nullable Resource resource) {
}

@Override
public boolean includeAlreadyTranslatedValues() {
return config == null || config.includeAlreadyTranslatedValues();
public boolean includeFullPageInRetranslation() {
return config == null || config.includeFullPageInRetranslation();
}

@Override
public boolean includeExistingTranslationsInRetranslation() {
return config == null || config.includeExistingTranslationsInRetranslation();
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@


import static com.composum.ai.aem.core.impl.autotranslate.AutoPageTranslateServiceImpl.compileContentPattern;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
Expand All @@ -11,6 +12,7 @@
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.when;

import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -166,4 +168,47 @@ public void testRemapPaths() {
service.remapPaths((String) null, "/content/blueprint", "/content/livecopy"));
}

@Test
public void expandSelection_includesContextBeforeAndAfter() {
boolean[] includeIndizes = {false, false, true, false, false};
AutoPageTranslateServiceImpl.expandSelection(includeIndizes, 2);
assertArrayEquals(new boolean[]{true, true, true, true, true}, includeIndizes);
}

@Test
public void expandSelection_noInitialSelection() {
boolean[] includeIndizes = {false, false, false, false, false};
AutoPageTranslateServiceImpl.expandSelection(includeIndizes, 2);
assertArrayEquals(new boolean[]{false, false, false, false, false}, includeIndizes);
}

@Test
public void expandSelection_singleSelectionAtStart() {
boolean[] includeIndizes = {true, false, false, false, false};
AutoPageTranslateServiceImpl.expandSelection(includeIndizes, 2);
assertArrayEquals(new boolean[]{true, true, true, false, false}, includeIndizes);
}

@Test
public void expandSelection_singleSelectionAtEnd() {
boolean[] includeIndizes = {false, false, false, false, true};
AutoPageTranslateServiceImpl.expandSelection(includeIndizes, 2);
assertArrayEquals(new boolean[]{false, false, true, true, true}, includeIndizes);
}

@Test
public void expandSelection_multipleSelections() {
boolean[] includeIndizes = {false, true, false, true, false};
AutoPageTranslateServiceImpl.expandSelection(includeIndizes, 2);
assertArrayEquals(new boolean[]{true, true, true, true, true}, includeIndizes);
}

@Test
public void expandSelection_long() {
boolean[] includeIndizes = {false, false, false, true, false, true, false, false, false};
AutoPageTranslateServiceImpl.expandSelection(includeIndizes, 2);
assertArrayEquals(new boolean[]{false, true, true, true, true, true, true, true, false}, includeIndizes);
}


}
Loading
Loading