From 4ddd3c70a2aa3e6d5ea1f0a76a849d7fd7ccfa0a Mon Sep 17 00:00:00 2001 From: Petr Pucil Date: Mon, 26 Feb 2024 16:24:41 +0100 Subject: [PATCH] CanonicalizeEncodingNames: fix locale-sensitive `toUpperCase` Avoid the problem with the Turkish locale that we have encountered in the past, see https://github.com/kaitai-io/kaitai_struct/issues/708. --- .../precompile/CanonicalizeEncodingNames$Test.scala | 13 +++++++++++++ .../precompile/CanonicalizeEncodingNames.scala | 7 ++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/jvm/src/test/scala/io/kaitai/struct/precompile/CanonicalizeEncodingNames$Test.scala b/jvm/src/test/scala/io/kaitai/struct/precompile/CanonicalizeEncodingNames$Test.scala index 2b9d74e4d..52b0af68f 100644 --- a/jvm/src/test/scala/io/kaitai/struct/precompile/CanonicalizeEncodingNames$Test.scala +++ b/jvm/src/test/scala/io/kaitai/struct/precompile/CanonicalizeEncodingNames$Test.scala @@ -3,6 +3,7 @@ package io.kaitai.struct.precompile import io.kaitai.struct.problems._ import org.scalatest.funspec.AnyFunSpec import org.scalatest.matchers.should.Matchers._ +import java.util.Locale class CanonicalizeEncodingNames$Test extends AnyFunSpec { describe("CanonicalizeEncodingNames.") { @@ -29,5 +30,17 @@ class CanonicalizeEncodingNames$Test extends AnyFunSpec { newEncoding should be("ISO-8859-1") problem should be(Some(EncodingNameWarning("ISO-8859-1", "iSo-8859-1"))) } + + it("reports warning and fixes bad capitalization for 'iSo-8859-1' even in Turkish locale") { + val oldLocale = Locale.getDefault + Locale.setDefault(new Locale("tr")) + try { + val (newEncoding, problem) = CanonicalizeEncodingNames.canonicalizeName("iSo-8859-1") + newEncoding should be("ISO-8859-1") + problem should be(Some(EncodingNameWarning("ISO-8859-1", "iSo-8859-1"))) + } finally { + Locale.setDefault(oldLocale) + } + } } } diff --git a/shared/src/main/scala/io/kaitai/struct/precompile/CanonicalizeEncodingNames.scala b/shared/src/main/scala/io/kaitai/struct/precompile/CanonicalizeEncodingNames.scala index 9406723c5..67f090937 100644 --- a/shared/src/main/scala/io/kaitai/struct/precompile/CanonicalizeEncodingNames.scala +++ b/shared/src/main/scala/io/kaitai/struct/precompile/CanonicalizeEncodingNames.scala @@ -5,6 +5,7 @@ import io.kaitai.struct.datatype.DataType.StrFromBytesType import io.kaitai.struct.format._ import io.kaitai.struct.precompile.CanonicalizeEncodingNames._ import io.kaitai.struct.problems._ +import io.kaitai.struct.Platform class CanonicalizeEncodingNames(specs: ClassSpecs) extends PrecompileStep { override def run(): Iterable[CompilationProblem] = specs.mapRec(canonicalize) @@ -48,7 +49,7 @@ object CanonicalizeEncodingNames { (original, None) } else { // See if any aliases match - aliasToCanonical.get(original.toUpperCase) match { + aliasToCanonical.get(Platform.toUpperLocaleInsensitive(original)) match { case Some(canonical) => ( canonical, @@ -65,7 +66,7 @@ object CanonicalizeEncodingNames { private val aliasToCanonical: Map[String, String] = EncodingList.canonicalToAlias.flatMap { case (canonical, aliases) => - aliases.map(alias => (alias.toUpperCase, canonical)) + aliases.map(alias => (Platform.toUpperLocaleInsensitive(alias), canonical)) } ++ - EncodingList.canonicalToAlias.keys.map(x => x.toUpperCase -> x) + EncodingList.canonicalToAlias.keys.map(x => Platform.toUpperLocaleInsensitive(x) -> x) }