From f71abf8d535c494320afd24a157db5de9079970d Mon Sep 17 00:00:00 2001 From: IKEDA Soji Date: Wed, 11 Mar 2020 17:19:11 +0900 Subject: [PATCH 1/5] - WWSympa: Since CGI of some HTTP servers might split script-path and extra-path of script-URI inproperly, we'd be better to reconstruct them: SCRIPT_NAME and PATH_INFO. Note that we shouldn't use non-standard CGI environment varialbes such as REQUEST_URI. - Additional environment variable SYMPA_DOMAIN stands for available mail domain (a.k.a. "robot"). - If no robot providing web service was found according to client's request, error response will be returned. --- src/cgi/wwsympa.fcgi.in | 15 ++--- src/lib/Makefile.am | 1 + src/lib/Sympa/WWW/FastCGI.pm | 83 +++++++++++++++++++++++++ src/lib/Sympa/WWW/Tools.pm | 116 ++++++++++++++++++++--------------- 4 files changed, 155 insertions(+), 60 deletions(-) create mode 100644 src/lib/Sympa/WWW/FastCGI.pm diff --git a/src/cgi/wwsympa.fcgi.in b/src/cgi/wwsympa.fcgi.in index 2eb8aecb0..7689121ed 100644 --- a/src/cgi/wwsympa.fcgi.in +++ b/src/cgi/wwsympa.fcgi.in @@ -38,7 +38,6 @@ use strict; use lib split(/:/, $ENV{SYMPALIB} || ''), '--modulesdir--'; use Archive::Zip qw(); -use CGI::Fast qw(); use DateTime; use DateTime::Format::Mail; use Digest::MD5; @@ -92,6 +91,7 @@ use Sympa::Tools::Text; use Sympa::Tracking; use Sympa::User; use Sympa::WWW::Auth; +use Sympa::WWW::FastCGI; use Sympa::WWW::Marc::Search; use Sympa::WWW::Report; use Sympa::WWW::Session; @@ -1050,7 +1050,7 @@ $log->syslog('info', 'WWSympa started, process %d', $PID); # Main loop. my $loop_count = 0; my $start_time = time; -while ($query = CGI::Fast->new) { +while ($query = Sympa::WWW::FastCGI->new) { $loop_count++; undef $param; @@ -1111,13 +1111,10 @@ while ($query = CGI::Fast->new) { %in = $query->Vars; # Determin robot. - # N.B. As of 6.2.15, the http_host parameter will match with the host name - # and path locally detected by server. If remotely detected host name - # and / or path should be differ, the proxy must adjust them. - # N.B. As of 6.2.34, wwsympa_url parameter may be optional. - $robot = Sympa::WWW::Tools::get_robot('http_host', 'wwsympa_url'); - unless (Conf::get_robot_conf($robot, 'wwsympa_url')) { - print "Status: 404 Not Found\n"; + $robot = $ENV{SYMPA_DOMAIN}; + unless ($robot) { + # No robot providing web service found. + print "Status: 421 Misdirected Request\n"; print "\n\n"; next; } diff --git a/src/lib/Makefile.am b/src/lib/Makefile.am index d4a070347..9c547c48c 100644 --- a/src/lib/Makefile.am +++ b/src/lib/Makefile.am @@ -188,6 +188,7 @@ nobase_modules_DATA = \ Sympa/Upgrade.pm \ Sympa/User.pm \ Sympa/WWW/Auth.pm \ + Sympa/WWW/FastCGI.pm \ Sympa/WWW/Marc.pm \ Sympa/WWW/Marc/Search.pm \ Sympa/WWW/Report.pm \ diff --git a/src/lib/Sympa/WWW/FastCGI.pm b/src/lib/Sympa/WWW/FastCGI.pm new file mode 100644 index 000000000..a55e371af --- /dev/null +++ b/src/lib/Sympa/WWW/FastCGI.pm @@ -0,0 +1,83 @@ +# -*- indent-tabs-mode: nil; -*- +# vim:ft=perl:et:sw=4 + +# Sympa - SYsteme de Multi-Postage Automatique +# +# Copyright 2020 The Sympa Community. See the AUTHORS.md +# file at the top-level directory of this distribution and at +# . +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +package Sympa::WWW::FastCGI; + +use strict; +use warnings; + +use base qw(CGI::Fast); + +use Sympa::WWW::Tools; + +sub new { + my $class = shift; + my @args = @_; + + my $self = $class->SUPER::new(@args); + + # Determin mail domain (a.k.a. "robot") the request is dispatched. + # N.B. As of 6.2.15, the http_host parameter will match with the host name + # and path locally detected by server. If remotely detected host name + # and / or path should be differ, the proxy must adjust them. + # N.B. As of 6.2.34, wwsympa_url parameter may be optional. + my @vars = Sympa::WWW::Tools::get_robot('http_host', 'wwsympa_url'); + if (@vars) { + @ENV{qw(SYMPA_DOMAIN SCRIPT_NAME PATH_INFO)} = @vars; + } else { + delete $ENV{SYMPA_DOMAIN}; + } + + $self; +} + +1; + +__END__ + +=encoding utf-8 + +=head1 NAME + +Sympa::WWW::FastCGI - CGI Interface for FastCGI of Sympa + +=head1 SYNOPOSIS + + TBD. + +=head1 DESCRIPTION + +TBD. + +=head1 SEE ALSO + +L. + +RFC 3875, The Common Gateway Interface (CGI) Version 1.1. +L. + +=head1 HISTORY + +L appeared on Sympa 6.2.55b. + +=cut + diff --git a/src/lib/Sympa/WWW/Tools.pm b/src/lib/Sympa/WWW/Tools.pm index 62eb7a1c6..4fadc71c6 100644 --- a/src/lib/Sympa/WWW/Tools.pm +++ b/src/lib/Sympa/WWW/Tools.pm @@ -33,6 +33,7 @@ use Digest::MD5; use English qw(-no_match_vars); use File::Path qw(); use URI; +use URI::Escape qw(); use Sympa; use Conf; @@ -227,65 +228,78 @@ sub get_my_url { sub get_robot { my @keys = @_; - my $request_host = _get_server_name(); - my $request_path = $ENV{'REQUEST_URI'} || ''; - my $robot_id; - - if (defined $request_host and length $request_host) { - my $selected_path = ''; - foreach my $rid (Sympa::List::get_robots()) { - my $local_url; - foreach my $key (@keys) { - $local_url = Conf::get_robot_conf($rid, $key); - last if $local_url; - } - next unless $local_url; - - if ($local_url =~ m{\A[-+\w]+:}) { - ; - } elsif ($local_url =~ m{\A//}) { - $local_url = 'http:' . $local_url; - } else { - $local_url = 'http://' . $local_url; - } + # Get host part of script-URI from standard CGI environment variable + # SERVER_NAME. + # NOTE: As of 6.2.15, less trustworthy "X-Forwarded-Server:" request field + # is _no longer_ referred and this function returns only locally detected + # server name. + my $request_host = lc($ENV{SERVER_NAME} // ''); + return unless length $request_host; + my $ipv6_re = Sympa::Regexps::ipv6(); + if ($request_host =~ /\A$ipv6_re\z/) { # IPv6 address + $request_host = sprintf '[%s]', $request_host; + } + + # Since CGI of some HTTP servers might split script-path and extra-path of + # script-URI inproperly, we'd be better to reconstruct them from these + # standard CGI environment variables: + # - SCRIPT_NAME: a URI path which could identify the CGI script. + # - PATH_INFO: derived from the portion of the URI path hierarchy + # following the part that identifies the script itself. + # Note that they are not URL-encoded, unlike non-standard REQUEST_URI. + my $org_script_name = $ENV{SCRIPT_NAME} // ''; + my $org_path_info = $ENV{PATH_INFO} // ''; + return unless '' eq $org_script_name or 0 == index $org_script_name, '/'; + return unless '' eq $org_path_info or 0 == index $org_path_info, '/'; + my $request_path = $org_script_name . $org_path_info; + + # Find mail domain (a.k.a. "robot") of which web URL matches script-URI. + my ($robot_id, $script_path) = (undef, ''); + foreach my $rid (Sympa::List::get_robots()) { + my $local_url; + foreach my $key (@keys) { + $local_url = Conf::get_robot_conf($rid, $key); + last if $local_url; + } + next unless $local_url; - my $uri = URI->new($local_url); - next - unless $uri - and $uri->scheme - and grep { $uri->scheme eq $_ } qw(http https); - - my $host = lc($uri->host || ''); - my $path = $uri->path || '/'; - #FIXME:might need percent-decode hosts and/or paths - next - unless $request_host eq $host - and 0 == index $request_path, $path; - - # The longest path wins. - ($robot_id, $selected_path) = ($rid, $path) - if length $selected_path < length $path; + if ($local_url =~ m{\A[-+\w]+:}) { + ; + } elsif ($local_url =~ m{\A//}) { + $local_url = 'http:' . $local_url; + } else { + $local_url = 'http://' . $local_url; } - } - return (defined $robot_id) ? $robot_id : $Conf::Conf{'domain'}; -} + my $uri = URI->new($local_url); + next + unless $uri + and $uri->scheme + and grep { $uri->scheme eq $_ } qw(http https); -# Old name: (part of) get_header_field() in wwsympa.fcgi. -# NOTE: As of 6.2.15, less trustworthy "X-Forwarded-Server:" request field is -# _no longer_ referred and this function returns only locally detected server -# name. -sub _get_server_name { - my $server = $ENV{SERVER_NAME}; - return undef unless defined $server and length $server; + my $host = lc URI::Escape::uri_unescape($uri->host // ''); + my $path = URI::Escape::uri_unescape($uri->path // '/'); + next unless $request_host eq $host; + next + unless $request_path eq $path + or 0 == index($request_path, $path . '/'); - my $ipv6_re = Sympa::Regexps::ipv6(); - if ($server =~ /\A$ipv6_re\z/) { # IPv6 address - $server = "[$server]"; + # The longest path wins. + ($robot_id, $script_path) = ($rid, $path) + if length $script_path < length $path; } - return lc $server; + + return unless $robot_id; + return + wantarray + ? ($robot_id, $script_path, substr $request_path, length $script_path) + : $robot_id; } +# Old name: (part of) get_header_field() in wwsympa.fcgi. +# No longer used. +#sub _get_server_name; + # Old name: (part of) get_header_field() in wwsympa.fcgi. # NOTE: As of 6.2.15, less trustworthy "X-Forwarded-Host:" request field is # _no longer_ referred and this function returns only locally detected host From 3761cb685280e12bc17353816b5ab8e969f554a8 Mon Sep 17 00:00:00 2001 From: IKEDA Soji Date: Sun, 8 Mar 2020 20:40:38 +0900 Subject: [PATCH 2/5] Refactoring. Don't use REQUEST_URI but use SCRIPT_NAME, PATH_INFO and QUERY_STRING. --- src/cgi/wwsympa.fcgi.in | 4 ++-- src/lib/Sympa/WWW/Tools.pm | 26 +++++++------------------- 2 files changed, 9 insertions(+), 21 deletions(-) diff --git a/src/cgi/wwsympa.fcgi.in b/src/cgi/wwsympa.fcgi.in index 7689121ed..e6f960645 100644 --- a/src/cgi/wwsympa.fcgi.in +++ b/src/cgi/wwsympa.fcgi.in @@ -3861,7 +3861,7 @@ sub do_help { # Strip extensions. $in{'help_topic'} =~ s/[.].*// if $in{'help_topic'}; # Given partial top URI, redirect to base. - unless ($in{'help_topic'} or $ENV{REQUEST_URI} =~ /\/\z/) { + unless ($in{'help_topic'} or ($ENV{PATH_INFO} // '') =~ m{/\z}) { $param->{'redirect_to'} = Sympa::get_url( $list, 'help', nomenu => $param->{'nomenu'}, @@ -8659,7 +8659,7 @@ sub do_arc { ); return 1; } - unless ($in{'arc_file'} or $ENV{REQUEST_URI} =~ /\/\z/) { + unless ($in{'arc_file'} or ($ENV{PATH_INFO} // '') =~ m{/\z}) { $param->{'redirect_to'} = Sympa::get_url( $list, 'arc', nomenu => $param->{'nomenu'}, diff --git a/src/lib/Sympa/WWW/Tools.pm b/src/lib/Sympa/WWW/Tools.pm index 4fadc71c6..1767bf2a9 100644 --- a/src/lib/Sympa/WWW/Tools.pm +++ b/src/lib/Sympa/WWW/Tools.pm @@ -202,26 +202,14 @@ sub get_my_url { my $robot = shift; my %options = @_; - my $original_path_info; - - # Try getting encoded PATH_INFO and query. - my $request_uri = $ENV{REQUEST_URI} || ''; - my $script_name = $ENV{SCRIPT_NAME} || ''; - if ( $request_uri eq $script_name - or 0 == index($request_uri, $script_name . '?') - or 0 == index($request_uri, $script_name . '/')) { - $original_path_info = substr($request_uri, length $script_name); - } else { - # Workaround: Encode PATH_INFO again and use it. - my $path_info = $ENV{PATH_INFO} || ''; - my $query_string = $ENV{QUERY_STRING}; - $original_path_info = - Sympa::Tools::Text::encode_uri($path_info, omit => '/') - . ($query_string ? ('?' . $query_string) : ''); - } + my $path_info = $ENV{PATH_INFO} // ''; + my $query_string = $ENV{QUERY_STRING} // ''; - return Sympa::get_url($robot, undef, authority => $options{authority}) - . $original_path_info; + return + Sympa::get_url($robot, undef, authority => $options{authority}) + . Sympa::Tools::Text::encode_uri($path_info, omit => '/') + . (length $query_string ? '?' : '') + . $query_string; } # Determine robot. From 4c1e9158d1c44e24d2f0b60929c5537e7798eb05 Mon Sep 17 00:00:00 2001 From: IKEDA Soji Date: Tue, 10 Mar 2020 16:14:48 +0900 Subject: [PATCH 3/5] Adding debug log for CGI variables, and tests. --- Makefile.am | 1 + src/cgi/wwsympa.fcgi.in | 11 ++++++-- src/lib/Sympa/WWW/FastCGI.pm | 2 ++ t/WWW_Tools.t | 53 ++++++++++++++++++++++++++++++++++++ 4 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 t/WWW_Tools.t diff --git a/Makefile.am b/Makefile.am index db79137ad..4ba5313d6 100644 --- a/Makefile.am +++ b/Makefile.am @@ -31,6 +31,7 @@ check_SCRIPTS = \ t/Language.t \ t/LockedFile.t \ t/Regexps.t \ + t/WWW_Tools.t \ t/compile_modules.t \ t/compile_executables.t \ t/compile_scenarios.t \ diff --git a/src/cgi/wwsympa.fcgi.in b/src/cgi/wwsympa.fcgi.in index e6f960645..e4ce1e63d 100644 --- a/src/cgi/wwsympa.fcgi.in +++ b/src/cgi/wwsympa.fcgi.in @@ -1107,6 +1107,15 @@ while ($query = Sympa::WWW::FastCGI->new) { ## Though I don't know why, __DIE__ handler is cleared after INIT. Sympa::Crash::register_handler(); + foreach my $envvar ( + qw(ORIG_PATH_INFO ORIG_SCRIPT_NAME + PATH_INFO QUERY_STRING REMOTE_ADDR REMOTE_HOST REQUEST_METHOD + SCRIPT_NAME SERVER_NAME SERVER_PORT + SYMPA_DOMAIN) + ) { + $log->syslog('debug', '%s=%s', $envvar, $ENV{$envvar}); + } + ## Get params in a hash %in = $query->Vars; @@ -1940,8 +1949,6 @@ sub _crash_handler { sub _split_params { my $args_string = shift; - $log->syslog('debug', "PATH_INFO: %s", $ENV{'PATH_INFO'}); - $args_string =~ s+^/++; my $ending_slash = 0; diff --git a/src/lib/Sympa/WWW/FastCGI.pm b/src/lib/Sympa/WWW/FastCGI.pm index a55e371af..8a16c9e1f 100644 --- a/src/lib/Sympa/WWW/FastCGI.pm +++ b/src/lib/Sympa/WWW/FastCGI.pm @@ -42,6 +42,8 @@ sub new { # N.B. As of 6.2.34, wwsympa_url parameter may be optional. my @vars = Sympa::WWW::Tools::get_robot('http_host', 'wwsympa_url'); if (@vars) { + @ENV{qw(ORIG_SCRIPT_NAME ORIG_PATH_INFO)} = + @ENV{qw(SCRIPT_NAME PATH_INFO)}; @ENV{qw(SYMPA_DOMAIN SCRIPT_NAME PATH_INFO)} = @vars; } else { delete $ENV{SYMPA_DOMAIN}; diff --git a/t/WWW_Tools.t b/t/WWW_Tools.t new file mode 100644 index 000000000..9a137e204 --- /dev/null +++ b/t/WWW_Tools.t @@ -0,0 +1,53 @@ +# -*- indent-tabs-mode: nil; -*- +# vim:ft=perl:et:sw=4 + +use strict; +use warnings; +use English qw(-no_match_vars); +use File::Path qw(make_path rmtree); +use Test::More; + +BEGIN { + use_ok 'Sympa::WWW::Tools'; +} + +# get_robot() + +%Conf::Conf = ( + domain => 'mail.example.org', + listmaster => 'listmaster@example.org', + wwsympa_url => 'http://web.example.org/sym/pa', + etc => 't/tmp/etc', +); +make_path $Conf::Conf{'etc'} or die $ERRNO; + +$ENV{SERVER_NAME} = 'web.example.org'; +$ENV{SCRIPT_NAME} = '/sym/pa'; +$ENV{PATH_INFO} = undef; +is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')], + ['mail.example.org', '/sym/pa', ''], + 'SCRIPT_NAME & empty PATH_INFO'; + +$ENV{SERVER_NAME} = 'web.example.org'; +$ENV{SCRIPT_NAME} = '/sym/pa'; +$ENV{PATH_INFO} = '/help'; +is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')], + ['mail.example.org', '/sym/pa', '/help'], + 'SCRIPT_NAME & non-empty PATH_INFO'; + +$ENV{SERVER_NAME} = 'web.example.org'; +$ENV{SCRIPT_NAME} = '/sym'; +$ENV{PATH_INFO} = '/pa/help'; +is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')], + ['mail.example.org', '/sym/pa', '/help'], + 'split script-path (e.g. mod_proxy_fcgi on httpd)'; + +$ENV{SERVER_NAME} = 'web.example.org'; +$ENV{SCRIPT_NAME} = '/sym/pa/help'; +$ENV{PATH_INFO} = undef; +is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')], + ['mail.example.org', '/sym/pa', '/help'], + 'no PATH_INFO (e.g. nginx without fastcgi_split_path_info)'; + +done_testing(); +rmtree 't/tmp'; From ccb338804c1a46de3334ac6f8618ebef502ac77c Mon Sep 17 00:00:00 2001 From: IKEDA Soji Date: Thu, 12 Mar 2020 10:34:06 +0900 Subject: [PATCH 4/5] Adding tests more --- t/WWW_Tools.t | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/t/WWW_Tools.t b/t/WWW_Tools.t index 9a137e204..a7e1846f3 100644 --- a/t/WWW_Tools.t +++ b/t/WWW_Tools.t @@ -49,5 +49,29 @@ is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')], ['mail.example.org', '/sym/pa', '/help'], 'no PATH_INFO (e.g. nginx without fastcgi_split_path_info)'; +$ENV{SERVER_NAME} = 'other.example.org'; +$ENV{SCRIPT_NAME} = '/sym/pa'; +$ENV{PATH_INFO} = '/help'; +is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')], [], + 'mismatch SERVER_NAME'; + +$ENV{SERVER_NAME} = 'web.example.org'; +$ENV{SCRIPT_NAME} = '/sympa'; +$ENV{PATH_INFO} = '/help'; +is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')], [], + 'mismatch SCRIPT_NAME'; + +$ENV{SERVER_NAME} = 'web.example.org'; +$ENV{SCRIPT_NAME} = 'sym/pa'; +$ENV{PATH_INFO} = '/help'; +is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')], [], + 'dubious SCRIPT_NAME'; + +$ENV{SERVER_NAME} = 'web.example.org'; +$ENV{SCRIPT_NAME} = '/sym/pa/'; +$ENV{PATH_INFO} = 'help'; +is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')], [], + 'dubious PATH_INFO'; + done_testing(); rmtree 't/tmp'; From c23bd70a2abfc2d9cd4c4e9de5d3c198b548a23d Mon Sep 17 00:00:00 2001 From: IKEDA Soji Date: Sat, 14 Mar 2020 09:41:37 +0900 Subject: [PATCH 5/5] Small fix. --- src/lib/Sympa/WWW/Tools.pm | 9 +++++---- t/WWW_Tools.t | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/src/lib/Sympa/WWW/Tools.pm b/src/lib/Sympa/WWW/Tools.pm index 1767bf2a9..d16460a44 100644 --- a/src/lib/Sympa/WWW/Tools.pm +++ b/src/lib/Sympa/WWW/Tools.pm @@ -242,7 +242,7 @@ sub get_robot { my $request_path = $org_script_name . $org_path_info; # Find mail domain (a.k.a. "robot") of which web URL matches script-URI. - my ($robot_id, $script_path) = (undef, ''); + my ($robot_id, $script_path) = (undef, undef); foreach my $rid (Sympa::List::get_robots()) { my $local_url; foreach my $key (@keys) { @@ -266,15 +266,16 @@ sub get_robot { and grep { $uri->scheme eq $_ } qw(http https); my $host = lc URI::Escape::uri_unescape($uri->host // ''); - my $path = URI::Escape::uri_unescape($uri->path // '/'); + my $path = URI::Escape::uri_unescape($uri->path // ''); next unless $request_host eq $host; next unless $request_path eq $path - or 0 == index($request_path, $path . '/'); + or 0 == index $request_path, $path . '/'; # The longest path wins. ($robot_id, $script_path) = ($rid, $path) - if length $script_path < length $path; + if not defined $script_path + or length $script_path < length $path; } return unless $robot_id; diff --git a/t/WWW_Tools.t b/t/WWW_Tools.t index a7e1846f3..126033c1c 100644 --- a/t/WWW_Tools.t +++ b/t/WWW_Tools.t @@ -73,5 +73,21 @@ $ENV{PATH_INFO} = 'help'; is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')], [], 'dubious PATH_INFO'; +$Conf::Conf{wwsympa_url} = 'http://web.example.org'; + +$ENV{SERVER_NAME} = 'web.example.org'; +$ENV{SCRIPT_NAME} = ''; +$ENV{PATH_INFO} = '/help'; +is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')], + ['mail.example.org', '', '/help'], + 'URL prefix on the top: (empty) SCRIPT_NAME & non-empty PATH_INFO'; + +$ENV{SERVER_NAME} = 'web.example.org'; +$ENV{SCRIPT_NAME} = '/help'; +$ENV{PATH_INFO} = undef; +is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')], + ['mail.example.org', '', '/help'], + 'URL prefix on the top: no PATH_INFO'; + done_testing(); rmtree 't/tmp';