From 6e1f8153ad63214924920c2d7acbceb983553468 Mon Sep 17 00:00:00 2001 From: IKEDA Soji Date: Sun, 3 Nov 2024 12:31:59 +0900 Subject: [PATCH] Make Unicode::UTF8 mandatory --- .github/workflows/make-check.yml | 2 +- .gitlab-ci.yml | 2 +- .travis.yml | 2 +- cpanfile | 22 +++++++++++----------- src/cgi/wwsympa.fcgi.in | 18 ++---------------- src/lib/Sympa/Tools/Text.pm | 24 +++++++----------------- t/Tools_Text.t | 21 ++++++++------------- 7 files changed, 31 insertions(+), 60 deletions(-) diff --git a/.github/workflows/make-check.yml b/.github/workflows/make-check.yml index a0dacde35..d7621b4c9 100644 --- a/.github/workflows/make-check.yml +++ b/.github/workflows/make-check.yml @@ -61,7 +61,7 @@ jobs: --verbose --no-interactive --with-develop --with-feature=Data::Password --with-feature=ldap - --with-feature=safe-unicode --with-feature=smime + --with-feature=smime --with-feature=soap --with-feature=sqlite ${{ startsWith(matrix.os, 'macos') && '--with-feature=macos' || '' }} - name: Run tests diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index af24d8a83..4a71b5ea3 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -7,7 +7,7 @@ - . ~/bashrc - coverage-install - coverage-setup - - cpanm --quiet --notest --installdeps --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=safe-unicode --with-feature=smime --with-feature=soap --with-feature=sqlite . + - cpanm --quiet --notest --installdeps --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=smime --with-feature=soap --with-feature=sqlite . - autoreconf -i - ./configure - cd src; make; cd .. diff --git a/.travis.yml b/.travis.yml index 494137317..c03b076ed 100644 --- a/.travis.yml +++ b/.travis.yml @@ -25,7 +25,7 @@ before_install: install: - cpan-install --coverage - - cpanm --installdeps --notest --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=safe-unicode --with-feature=smime --with-feature=soap --with-feature=sqlite . + - cpanm --installdeps --notest --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=smime --with-feature=soap --with-feature=sqlite . before_script: - coverage-setup diff --git a/cpanfile b/cpanfile index 41409e677..66626b54f 100644 --- a/cpanfile +++ b/cpanfile @@ -150,6 +150,14 @@ requires 'Time::HiRes', '>= 1.29'; # Used to get Unix time from local time requires 'Time::Local', '>= 1.23'; +# Normalizes file names represented by Unicode. +# Note: Perl 5.8.1 bundles version 0.23. +# Note: Perl 5.10.1 bundles 1.03 (per Unicode 5.1.0). +requires 'Unicode::Normalize', '>= 1.03'; + +# Sanitizes inputs with Unicode text. +requires 'Unicode::UTF8', '>= 0.58'; + # Used to create URI containing non URI-canonical characters. # Note: '3.28' is the version included in URI-1.35. requires 'URI::Escape', '>= 3.28'; @@ -190,13 +198,6 @@ recommends 'Net::DNS', '>= 0.65'; # This is required if you set "list_check_smtp" sympa.conf parameter, used to check existing aliases before mailing list creation. recommends 'Net::SMTP'; -# Normalizes file names represented by Unicode -# Note: Perl 5.8.1 bundles version 0.23. -# Note: Perl 5.10.1 bundles 1.03 (per Unicode 5.1.0). -recommends 'Unicode::Normalize', '>= 1.03'; - -recommends 'Unicode::UTF8', '>= 0.60'; - ### Features ## @@ -324,10 +325,9 @@ feature 'soap', 'Required if you want to run the Sympa SOAP server that provides }; feature 'safe-unicode', 'Sanitizes inputs with Unicode text.' => sub { - # Note: Perl 5.8.1 bundles version 0.23. - # Note: Perl 5.10.1 bundles 1.03 (per Unicode 5.1.0). - requires 'Unicode::Normalize', '>= 1.03'; - requires 'Unicode::UTF8', '>= 0.60'; + # Note: These became required (>=6.2.73b). + #requires 'Unicode::Normalize', '>= 1.03'; + #requires 'Unicode::UTF8', '>= 0.58'; }; on 'test' => sub { diff --git a/src/cgi/wwsympa.fcgi.in b/src/cgi/wwsympa.fcgi.in index 7a45b12e3..a1e806ba7 100644 --- a/src/cgi/wwsympa.fcgi.in +++ b/src/cgi/wwsympa.fcgi.in @@ -39,6 +39,7 @@ use IO::File qw(); use MIME::EncWords; use MIME::Lite::HTML; use POSIX qw(); +use Unicode::UTF8; use URI; use Data::Dumper; # tentative @@ -52,10 +53,6 @@ BEGIN { and $Archive::Zip::SimpleZip::VERSION; } -BEGIN { - eval 'use Unicode::UTF8 qw()'; -} - use Sympa; use Sympa::Archive; use Conf; @@ -1063,18 +1060,7 @@ while ($query = Sympa::WWW::FastCGI->new) { while (my ($k, $v) = each %in) { next if ref $v; next if Encode::is_utf8($v); - - my $valid_utf8; - if ($Unicode::UTF8::VERSION) { - $valid_utf8 = Unicode::UTF8::valid_utf8($v); - } else { - eval { - my $u = $v; - Encode::decode('UTF-8', $u, Encode::FB_CROAK()); - $valid_utf8 = 1; - }; - } - unless ($valid_utf8) { + unless (Unicode::UTF8::valid_utf8($v)) { $log->syslog('err', 'Parameter in invalid UTF-8 %s="%s": Ignored', $k, sprintf("\\x%*v02X", "\\x", $v)); delete $in{$k}; diff --git a/src/lib/Sympa/Tools/Text.pm b/src/lib/Sympa/Tools/Text.pm index fc240fe32..e72b510fc 100644 --- a/src/lib/Sympa/Tools/Text.pm +++ b/src/lib/Sympa/Tools/Text.pm @@ -39,8 +39,8 @@ use MIME::EncWords; use Text::LineFold; use Unicode::GCString; use URI::Escape qw(); -BEGIN { eval 'use Unicode::Normalize qw()'; } -BEGIN { eval 'use Unicode::UTF8 qw()'; } +use Unicode::Normalize qw(); +use Unicode::UTF8; use Sympa::Language; use Sympa::Regexps; @@ -141,15 +141,11 @@ sub canonic_text { my $utext; if (Encode::is_utf8($text)) { $utext = $text; - } elsif ($Unicode::UTF8::VERSION) { + } else { no warnings 'utf8'; $utext = Unicode::UTF8::decode_utf8($text); - } else { - $utext = Encode::decode_utf8($text); - } - if ($Unicode::Normalize::VERSION) { - $utext = Unicode::Normalize::normalize('NFC', $utext); } + $utext = Unicode::Normalize::normalize('NFC', $utext); # Remove DOS linefeeds (^M) that cause problems with Outlook 98, AOL, # and EIMS: @@ -313,13 +309,8 @@ sub guessed_to_utf8 { and length $text and $text =~ /[^\x00-\x7F]/; - my $utf8; - if ($Unicode::UTF8::VERSION) { - $utf8 = Unicode::UTF8::decode_utf8($text) - if Unicode::UTF8::valid_utf8($text); - } else { - $utf8 = eval { Encode::decode_utf8($text, Encode::FB_CROAK()) }; - } + my $utf8 = Unicode::UTF8::decode_utf8($text) + if Unicode::UTF8::valid_utf8($text); unless (defined $utf8) { foreach my $charset (map { $_ ? @$_ : () } @legacy_charsets{@langs}) { $utf8 = @@ -332,8 +323,7 @@ sub guessed_to_utf8 { } # Apply NFC: e.g. for modified-NFD by Mac OS X. - $utf8 = Unicode::Normalize::normalize('NFC', $utf8) - if $Unicode::Normalize::VERSION; + $utf8 = Unicode::Normalize::normalize('NFC', $utf8); return Encode::encode_utf8($utf8); } diff --git a/t/Tools_Text.t b/t/Tools_Text.t index ff08480fd..642a65f29 100644 --- a/t/Tools_Text.t +++ b/t/Tools_Text.t @@ -46,18 +46,13 @@ is $dec, $unicode_email, 'decode_filesystem_safe, Unicode'; # ToDo: foldcase() # ToDo: wrap_text() -SKIP: { - skip 'Unicode::Normalize and Unicode::UTF8 required.' - unless $Unicode::Normalize::VERSION and $Unicode::UTF8::VERSION; - - # Noncharacters: U+D800, U+10FFE, U+110000, U+200000 - is Sympa::Tools::Text::canonic_text( - "\xED\xA0\x80\n\xF4\x8F\xBF\xBE\n\xF4\x90\x80\x80\n\xF8\x88\x80\x80\x80\n" - ), - Encode::encode_utf8( - "\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n" - ), - 'canonic_text'; -} +# Noncharacters: U+D800, U+10FFE, U+110000, U+200000 +is Sympa::Tools::Text::canonic_text( + "\xED\xA0\x80\n\xF4\x8F\xBF\xBE\n\xF4\x90\x80\x80\n\xF8\x88\x80\x80\x80\n" + ), + Encode::encode_utf8( + "\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n" + ), + 'canonic_text'; done_testing();