Patch: Fix UTF8 character encoding

Patrick Ben Koetter p at sys4.de
Wed Jul 11 21:25:50 CEST 2018


The attached patch fixes an UTF8 encoding error in amavis.

p at rick



-- 
[*] sys4 AG
 
https://sys4.de, +49 (89) 30 90 46 64
Schleißheimer Straße 26/MG,80333 München
 
Sitz der Gesellschaft: München, Amtsgericht München: HRB 199263
Vorstand: Patrick Ben Koetter, Marc Schiffbauer, Wolfgang Stief
Aufsichtsratsvorsitzender: Florian Kirstein
 
-------------- next part --------------
--- amavisd-new-2.11.0/amavisd	2016-04-26 21:24:33.000000000 +0200
+++ amavis-patched/amavisd	2018-07-11 16:38:46.631642227 +0200
@@ -5154,6 +5154,203 @@
 
 1;
 
+#^L
+package Email::MIME::RFC2047::Encoder;
+$Email::MIME::RFC2047::Encoder::VERSION = '0.97';
+use strict;
+use utf8;
+
+# ABSTRACT: Encoding of non-ASCII MIME headers
+
+use Encode ();
+use MIME::Base64 (); # Not present, but not needed because we use this module only for Q encoding.
+
+my $rfc_specials = '()<>\[\]:;\@\\,."';
+
+sub new {
+    my $package = shift;
+    my $options = ref($_[0]) ? $_[0] : { @_ };
+
+    my ($encoding, $method) = ($options->{encoding}, $options->{method});
+
+    if (!defined($encoding)) {
+        $encoding = 'utf-8';
+        $method = 'Q' if !defined($method);
+    }
+    else {
+        $method = 'B' if !defined($method);
+    }
+
+    my $encoder = Encode::find_encoding($encoding)
+        or die("encoding '$encoding' not found");
+
+    my $self = {
+        encoding => $encoding,
+        encoder  => $encoder,
+        method   => uc($method),
+    };
+
+    return bless($self, $package);
+}
+
+sub encode_text {
+    my ($self, $string) = @_;
+
+    return $self->_encode('text', $string);
+}
+
+sub encode_phrase {
+    my ($self, $string) = @_;
+
+    return $self->_encode('phrase', $string);
+}
+
+sub _encode {
+    my ($self, $mode, $string) = @_;
+
+    my $encoder = $self->{encoder};
+    my $result = '';
+
+    # $string is split on whitespace. Each $word is categorized into
+    # 'mime', 'quoted' or 'text'. The intermediate result of the conversion of
+    # consecutive words of the same types is accumulated in $buffer.
+    # The type of the buffer is tracked in $buffer_type.
+    # The method _finish_buffer is called to finish the encoding of the
+    # buffered content and append to the result.
+    my $buffer = '';
+    my $buffer_type;
+
+    for my $word (split(/\s+/, $string)) {
+        next if $word eq ''; # ignore leading white space
+
+        $word =~ s/[\x00-\x1f\x7f]//g; # better remove control chars
+
+        my $word_type;
+
+        if ($word =~ /[\x80-\x{10ffff}]|(^=\?.*\?=\z)/s) {
+            # also encode any word that starts with '=?' and ends with '?='
+            $word_type = 'mime';
+        }
+        elsif ($mode eq 'phrase') {
+            $word_type = 'quoted';
+        }
+        else {
+            $word_type = 'text';
+        }
+
+        $self->_finish_buffer(\$result, $buffer_type, \$buffer)
+            if $buffer ne '' && $buffer_type ne $word_type;
+        $buffer_type = $word_type;
+
+        if ($word_type eq 'text') {
+            $result .= ' ' if $result ne '';
+            $result .= $word;
+        }
+        elsif ($word_type eq 'quoted') {
+            $buffer .= ' ' if $buffer ne '';
+            $buffer .= $word;
+        }
+        else {
+            my $max_len = 75 - 7 - length($self->{encoding});
+            $max_len = 3 * ($max_len >> 2) if $self->{method} eq 'B';
+
+            my @chars;
+            push(@chars, ' ') if $buffer ne '';
+            push(@chars, split(//, $word));
+
+            for my $char (@chars) {
+                my $chunk;
+
+                if ($self->{method} eq 'B') {
+                    $chunk = $encoder->encode($char);
+                }
+                elsif ($char =~ /[()<>@,;:\\".\[\]=?_]/) {
+                    # special character
+                    $chunk = sprintf('=%02x', ord($char));
+                }
+                elsif ($char =~ /[\x80-\x{10ffff}]/) {
+                    # non-ASCII character
+
+                    my $enc_char = $encoder->encode($char);
+                    $chunk = '';
+
+                    for my $byte (unpack('C*', $enc_char)) {
+                        $chunk .= sprintf('=%02x', $byte);
+                    }
+                }
+                elsif ($char eq ' ') {
+                    $chunk = '_';
+                }
+                else {
+                    $chunk = $char;
+                }
+
+                if (length($buffer) + length($chunk) <= $max_len) {
+                    $buffer .= $chunk;
+                }
+                else {
+                    $self->_finish_buffer(\$result, 'mime', \$buffer);
+                    $buffer = $chunk;
+                }
+            }
+        }
+    }
+
+    $self->_finish_buffer(\$result, $buffer_type, \$buffer)
+        if $buffer ne '';
+
+    return $result;
+}
+
+sub _finish_buffer {
+    my ($self, $result, $buffer_type, $buffer) = @_;
+
+    $$result .= ' ' if $$result ne '';
+
+    if ($buffer_type eq 'quoted') {
+        if ($$buffer =~ /[$rfc_specials]/) {
+            # use quoted string if buffer contains special chars
+            $$buffer =~ s/[\\"]/\\$&/g;
+
+            $$result .= qq("$$buffer");
+        }
+        else {
+            $$result .= $$buffer;
+        }
+    }
+    elsif ($buffer_type eq 'mime') {
+        $$result .= "=?$self->{encoding}?$self->{method}?";
+
+        if ($self->{method} eq 'B') {
+            $$result .= MIME::Base64::encode_base64($$buffer, '');
+        }
+        else {
+            $$result .= $$buffer;
+        }
+
+        $$result .= '?=';
+    }
+
+    $$buffer = '';
+
+    return;
+}
+
+1;
+
+#^L
+package Amavis::Custom::rfc2047_Tools;
+use strict;
+
+# replace buggy q_encode function, original amavis code breaks multibyte characters
+sub q_encode($$$) {
+    my($octets,$encoding,$charset) = @_;
+    my $encoder = Email::MIME::RFC2047::Encoder->new;
+    $encoder->encode_text(Encode::decode_utf8($octets));
+}
+
+1;
+
 #
 package Amavis::rfc2821_2822_Tools;
 use strict;
@@ -9375,7 +9572,7 @@
                       $field_body_is_utf8?'Y':'N', $chset,
                       $field_name, $field_body, $field_body_octets);
     my $qb = c('hdr_encoding_qb');
-    my $encoder_func = uc $qb eq 'Q' ? \&q_encode
+    my $encoder_func = uc $qb eq 'Q' ? \&Amavis::Custom::rfc2047_Tools::q_encode
                                      : \&MIME::Words::encode_mimeword;
     $field_body = join("\n", map { /^[\001-\011\013\014\016-\177]*\z/ ? $_
                                      : &$encoder_func($_,$qb,$chset) }


More information about the amavis-users mailing list