Patch: Fix UTF8 character encoding
Patrick Ben Koetter
p at sys4.de
Wed Jul 11 21:25:50 CEST 2018
The attached patch fixes an UTF8 encoding error in amavis.
p at rick
--
[*] sys4 AG
https://sys4.de, +49 (89) 30 90 46 64
Schleißheimer Straße 26/MG,80333 München
Sitz der Gesellschaft: München, Amtsgericht München: HRB 199263
Vorstand: Patrick Ben Koetter, Marc Schiffbauer, Wolfgang Stief
Aufsichtsratsvorsitzender: Florian Kirstein
-------------- next part --------------
--- amavisd-new-2.11.0/amavisd 2016-04-26 21:24:33.000000000 +0200
+++ amavis-patched/amavisd 2018-07-11 16:38:46.631642227 +0200
@@ -5154,6 +5154,203 @@
1;
+#^L
+package Email::MIME::RFC2047::Encoder;
+$Email::MIME::RFC2047::Encoder::VERSION = '0.97';
+use strict;
+use utf8;
+
+# ABSTRACT: Encoding of non-ASCII MIME headers
+
+use Encode ();
+use MIME::Base64 (); # Not present, but not needed because we use this module only for Q encoding.
+
+my $rfc_specials = '()<>\[\]:;\@\\,."';
+
+sub new {
+ my $package = shift;
+ my $options = ref($_[0]) ? $_[0] : { @_ };
+
+ my ($encoding, $method) = ($options->{encoding}, $options->{method});
+
+ if (!defined($encoding)) {
+ $encoding = 'utf-8';
+ $method = 'Q' if !defined($method);
+ }
+ else {
+ $method = 'B' if !defined($method);
+ }
+
+ my $encoder = Encode::find_encoding($encoding)
+ or die("encoding '$encoding' not found");
+
+ my $self = {
+ encoding => $encoding,
+ encoder => $encoder,
+ method => uc($method),
+ };
+
+ return bless($self, $package);
+}
+
+sub encode_text {
+ my ($self, $string) = @_;
+
+ return $self->_encode('text', $string);
+}
+
+sub encode_phrase {
+ my ($self, $string) = @_;
+
+ return $self->_encode('phrase', $string);
+}
+
+sub _encode {
+ my ($self, $mode, $string) = @_;
+
+ my $encoder = $self->{encoder};
+ my $result = '';
+
+ # $string is split on whitespace. Each $word is categorized into
+ # 'mime', 'quoted' or 'text'. The intermediate result of the conversion of
+ # consecutive words of the same types is accumulated in $buffer.
+ # The type of the buffer is tracked in $buffer_type.
+ # The method _finish_buffer is called to finish the encoding of the
+ # buffered content and append to the result.
+ my $buffer = '';
+ my $buffer_type;
+
+ for my $word (split(/\s+/, $string)) {
+ next if $word eq ''; # ignore leading white space
+
+ $word =~ s/[\x00-\x1f\x7f]//g; # better remove control chars
+
+ my $word_type;
+
+ if ($word =~ /[\x80-\x{10ffff}]|(^=\?.*\?=\z)/s) {
+ # also encode any word that starts with '=?' and ends with '?='
+ $word_type = 'mime';
+ }
+ elsif ($mode eq 'phrase') {
+ $word_type = 'quoted';
+ }
+ else {
+ $word_type = 'text';
+ }
+
+ $self->_finish_buffer(\$result, $buffer_type, \$buffer)
+ if $buffer ne '' && $buffer_type ne $word_type;
+ $buffer_type = $word_type;
+
+ if ($word_type eq 'text') {
+ $result .= ' ' if $result ne '';
+ $result .= $word;
+ }
+ elsif ($word_type eq 'quoted') {
+ $buffer .= ' ' if $buffer ne '';
+ $buffer .= $word;
+ }
+ else {
+ my $max_len = 75 - 7 - length($self->{encoding});
+ $max_len = 3 * ($max_len >> 2) if $self->{method} eq 'B';
+
+ my @chars;
+ push(@chars, ' ') if $buffer ne '';
+ push(@chars, split(//, $word));
+
+ for my $char (@chars) {
+ my $chunk;
+
+ if ($self->{method} eq 'B') {
+ $chunk = $encoder->encode($char);
+ }
+ elsif ($char =~ /[()<>@,;:\\".\[\]=?_]/) {
+ # special character
+ $chunk = sprintf('=%02x', ord($char));
+ }
+ elsif ($char =~ /[\x80-\x{10ffff}]/) {
+ # non-ASCII character
+
+ my $enc_char = $encoder->encode($char);
+ $chunk = '';
+
+ for my $byte (unpack('C*', $enc_char)) {
+ $chunk .= sprintf('=%02x', $byte);
+ }
+ }
+ elsif ($char eq ' ') {
+ $chunk = '_';
+ }
+ else {
+ $chunk = $char;
+ }
+
+ if (length($buffer) + length($chunk) <= $max_len) {
+ $buffer .= $chunk;
+ }
+ else {
+ $self->_finish_buffer(\$result, 'mime', \$buffer);
+ $buffer = $chunk;
+ }
+ }
+ }
+ }
+
+ $self->_finish_buffer(\$result, $buffer_type, \$buffer)
+ if $buffer ne '';
+
+ return $result;
+}
+
+sub _finish_buffer {
+ my ($self, $result, $buffer_type, $buffer) = @_;
+
+ $$result .= ' ' if $$result ne '';
+
+ if ($buffer_type eq 'quoted') {
+ if ($$buffer =~ /[$rfc_specials]/) {
+ # use quoted string if buffer contains special chars
+ $$buffer =~ s/[\\"]/\\$&/g;
+
+ $$result .= qq("$$buffer");
+ }
+ else {
+ $$result .= $$buffer;
+ }
+ }
+ elsif ($buffer_type eq 'mime') {
+ $$result .= "=?$self->{encoding}?$self->{method}?";
+
+ if ($self->{method} eq 'B') {
+ $$result .= MIME::Base64::encode_base64($$buffer, '');
+ }
+ else {
+ $$result .= $$buffer;
+ }
+
+ $$result .= '?=';
+ }
+
+ $$buffer = '';
+
+ return;
+}
+
+1;
+
+#^L
+package Amavis::Custom::rfc2047_Tools;
+use strict;
+
+# replace buggy q_encode function, original amavis code breaks multibyte characters
+sub q_encode($$$) {
+ my($octets,$encoding,$charset) = @_;
+ my $encoder = Email::MIME::RFC2047::Encoder->new;
+ $encoder->encode_text(Encode::decode_utf8($octets));
+}
+
+1;
+
#
package Amavis::rfc2821_2822_Tools;
use strict;
@@ -9375,7 +9572,7 @@
$field_body_is_utf8?'Y':'N', $chset,
$field_name, $field_body, $field_body_octets);
my $qb = c('hdr_encoding_qb');
- my $encoder_func = uc $qb eq 'Q' ? \&q_encode
+ my $encoder_func = uc $qb eq 'Q' ? \&Amavis::Custom::rfc2047_Tools::q_encode
: \&MIME::Words::encode_mimeword;
$field_body = join("\n", map { /^[\001-\011\013\014\016-\177]*\z/ ? $_
: &$encoder_func($_,$qb,$chset) }
More information about the amavis-users
mailing list