Skip to content

Commit

Permalink
CanonicalizeEncodingNames: fix locale-sensitive toUpperCase
Browse files Browse the repository at this point in the history
Avoid the problem with the Turkish locale that we have encountered in
the past, see kaitai-io/kaitai_struct#708.
  • Loading branch information
generalmimon committed Feb 26, 2024
1 parent 29d37b8 commit 4ddd3c7
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package io.kaitai.struct.precompile
import io.kaitai.struct.problems._
import org.scalatest.funspec.AnyFunSpec
import org.scalatest.matchers.should.Matchers._
import java.util.Locale

class CanonicalizeEncodingNames$Test extends AnyFunSpec {
describe("CanonicalizeEncodingNames.") {
Expand All @@ -29,5 +30,17 @@ class CanonicalizeEncodingNames$Test extends AnyFunSpec {
newEncoding should be("ISO-8859-1")
problem should be(Some(EncodingNameWarning("ISO-8859-1", "iSo-8859-1")))
}

it("reports warning and fixes bad capitalization for 'iSo-8859-1' even in Turkish locale") {
val oldLocale = Locale.getDefault
Locale.setDefault(new Locale("tr"))
try {
val (newEncoding, problem) = CanonicalizeEncodingNames.canonicalizeName("iSo-8859-1")
newEncoding should be("ISO-8859-1")
problem should be(Some(EncodingNameWarning("ISO-8859-1", "iSo-8859-1")))
} finally {
Locale.setDefault(oldLocale)
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import io.kaitai.struct.datatype.DataType.StrFromBytesType
import io.kaitai.struct.format._
import io.kaitai.struct.precompile.CanonicalizeEncodingNames._
import io.kaitai.struct.problems._
import io.kaitai.struct.Platform

class CanonicalizeEncodingNames(specs: ClassSpecs) extends PrecompileStep {
override def run(): Iterable[CompilationProblem] = specs.mapRec(canonicalize)
Expand Down Expand Up @@ -48,7 +49,7 @@ object CanonicalizeEncodingNames {
(original, None)
} else {
// See if any aliases match
aliasToCanonical.get(original.toUpperCase) match {
aliasToCanonical.get(Platform.toUpperLocaleInsensitive(original)) match {
case Some(canonical) =>
(
canonical,
Expand All @@ -65,7 +66,7 @@ object CanonicalizeEncodingNames {

private val aliasToCanonical: Map[String, String] =
EncodingList.canonicalToAlias.flatMap { case (canonical, aliases) =>
aliases.map(alias => (alias.toUpperCase, canonical))
aliases.map(alias => (Platform.toUpperLocaleInsensitive(alias), canonical))
} ++
EncodingList.canonicalToAlias.keys.map(x => x.toUpperCase -> x)
EncodingList.canonicalToAlias.keys.map(x => Platform.toUpperLocaleInsensitive(x) -> x)
}

0 comments on commit 4ddd3c7

Please sign in to comment.