Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split request URI into SCRIPT_NAME and PATH_INFO by Sympa itself #910

Merged
merged 6 commits into from
Mar 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ check_SCRIPTS = \
t/Tools_SMIME.t \
t/Tools_Text.t \
t/Tools_Time.t \
t/WWW_Tools.t \
t/compile_executables.t \
t/compile_modules.t \
t/compile_scenarios.t \
Expand Down
30 changes: 17 additions & 13 deletions src/cgi/wwsympa.fcgi.in
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ use strict;
use lib split(/:/, $ENV{SYMPALIB} || ''), '--modulesdir--';

use Archive::Zip qw();
use CGI::Fast qw();
use DateTime;
use DateTime::Format::Mail;
use Digest::MD5;
Expand Down Expand Up @@ -92,6 +91,7 @@ use Sympa::Tools::Text;
use Sympa::Tracking;
use Sympa::User;
use Sympa::WWW::Auth;
use Sympa::WWW::FastCGI;
use Sympa::WWW::Marc::Search;
use Sympa::WWW::Report;
use Sympa::WWW::Session;
Expand Down Expand Up @@ -1049,7 +1049,7 @@ $log->syslog('info', 'WWSympa started, process %d', $PID);
# Main loop.
my $loop_count = 0;
my $start_time = time;
while ($query = CGI::Fast->new) {
while ($query = Sympa::WWW::FastCGI->new) {
$loop_count++;

undef $param;
Expand Down Expand Up @@ -1106,17 +1106,23 @@ while ($query = CGI::Fast->new) {
## Though I don't know why, __DIE__ handler is cleared after INIT.
Sympa::Crash::register_handler();

foreach my $envvar (
qw(ORIG_PATH_INFO ORIG_SCRIPT_NAME
PATH_INFO QUERY_STRING REMOTE_ADDR REMOTE_HOST REQUEST_METHOD
SCRIPT_NAME SERVER_NAME SERVER_PORT
SYMPA_DOMAIN)
) {
$log->syslog('debug', '%s=%s', $envvar, $ENV{$envvar});
}

## Get params in a hash
%in = $query->Vars;

# Determin robot.
# N.B. As of 6.2.15, the http_host parameter will match with the host name
# and path locally detected by server. If remotely detected host name
# and / or path should be differ, the proxy must adjust them.
# N.B. As of 6.2.34, wwsympa_url parameter may be optional.
$robot = Sympa::WWW::Tools::get_robot('http_host', 'wwsympa_url');
unless (Conf::get_robot_conf($robot, 'wwsympa_url')) {
print "Status: 404 Not Found\n";
$robot = $ENV{SYMPA_DOMAIN};
unless ($robot) {
# No robot providing web service found.
print "Status: 421 Misdirected Request\n";
print "\n\n";
next;
}
Expand Down Expand Up @@ -1940,8 +1946,6 @@ sub _crash_handler {
sub _split_params {
my $args_string = shift;

$log->syslog('debug', "PATH_INFO: %s", $ENV{'PATH_INFO'});

$args_string =~ s+^/++;

my $ending_slash = 0;
Expand Down Expand Up @@ -3857,7 +3861,7 @@ sub do_help {
# Strip extensions.
$in{'help_topic'} =~ s/[.].*// if $in{'help_topic'};
# Given partial top URI, redirect to base.
unless ($in{'help_topic'} or $ENV{REQUEST_URI} =~ /\/\z/) {
unless ($in{'help_topic'} or ($ENV{PATH_INFO} // '') =~ m{/\z}) {
$param->{'redirect_to'} = Sympa::get_url(
$robot, 'help',
nomenu => $param->{'nomenu'},
Expand Down Expand Up @@ -8645,7 +8649,7 @@ sub do_arc {
);
return 1;
}
unless ($in{'arc_file'} or $ENV{REQUEST_URI} =~ /\/\z/) {
unless ($in{'arc_file'} or ($ENV{PATH_INFO} // '') =~ m{/\z}) {
$param->{'redirect_to'} = Sympa::get_url(
$list, 'arc',
nomenu => $param->{'nomenu'},
Expand Down
1 change: 1 addition & 0 deletions src/lib/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ nobase_modules_DATA = \
Sympa/Upgrade.pm \
Sympa/User.pm \
Sympa/WWW/Auth.pm \
Sympa/WWW/FastCGI.pm \
Sympa/WWW/Marc.pm \
Sympa/WWW/Marc/Search.pm \
Sympa/WWW/Report.pm \
Expand Down
85 changes: 85 additions & 0 deletions src/lib/Sympa/WWW/FastCGI.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# -*- indent-tabs-mode: nil; -*-
# vim:ft=perl:et:sw=4

# Sympa - SYsteme de Multi-Postage Automatique
#
# Copyright 2020 The Sympa Community. See the AUTHORS.md
# file at the top-level directory of this distribution and at
# <https://github.com/sympa-community/sympa.git>.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

package Sympa::WWW::FastCGI;

use strict;
use warnings;

use base qw(CGI::Fast);

use Sympa::WWW::Tools;

sub new {
my $class = shift;
my @args = @_;

my $self = $class->SUPER::new(@args);

# Determin mail domain (a.k.a. "robot") the request is dispatched.
# N.B. As of 6.2.15, the http_host parameter will match with the host name
# and path locally detected by server. If remotely detected host name
# and / or path should be differ, the proxy must adjust them.
# N.B. As of 6.2.34, wwsympa_url parameter may be optional.
my @vars = Sympa::WWW::Tools::get_robot('http_host', 'wwsympa_url');
if (@vars) {
@ENV{qw(ORIG_SCRIPT_NAME ORIG_PATH_INFO)} =
@ENV{qw(SCRIPT_NAME PATH_INFO)};
@ENV{qw(SYMPA_DOMAIN SCRIPT_NAME PATH_INFO)} = @vars;
} else {
delete $ENV{SYMPA_DOMAIN};
}

$self;
}

1;

__END__

=encoding utf-8

=head1 NAME

Sympa::WWW::FastCGI - CGI Interface for FastCGI of Sympa

=head1 SYNOPOSIS

TBD.

=head1 DESCRIPTION

TBD.

=head1 SEE ALSO

L<CGI::Fast>.

RFC 3875, The Common Gateway Interface (CGI) Version 1.1.
L<https://tools.ietf.org/html/rfc3875>.

=head1 HISTORY

L<Sympa::WWW::FastCGI> appeared on Sympa 6.2.55b.

=cut

143 changes: 73 additions & 70 deletions src/lib/Sympa/WWW/Tools.pm
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ use Digest::MD5;
use English qw(-no_match_vars);
use File::Path qw();
use URI;
use URI::Escape qw();

use Sympa;
use Conf;
Expand Down Expand Up @@ -201,91 +202,93 @@ sub get_my_url {
my $robot = shift;
my %options = @_;

my $original_path_info;
my $path_info = $ENV{PATH_INFO} // '';
my $query_string = $ENV{QUERY_STRING} // '';

# Try getting encoded PATH_INFO and query.
my $request_uri = $ENV{REQUEST_URI} || '';
my $script_name = $ENV{SCRIPT_NAME} || '';
if ( $request_uri eq $script_name
or 0 == index($request_uri, $script_name . '?')
or 0 == index($request_uri, $script_name . '/')) {
$original_path_info = substr($request_uri, length $script_name);
} else {
# Workaround: Encode PATH_INFO again and use it.
my $path_info = $ENV{PATH_INFO} || '';
my $query_string = $ENV{QUERY_STRING};
$original_path_info =
Sympa::Tools::Text::encode_uri($path_info, omit => '/')
. ($query_string ? ('?' . $query_string) : '');
}

return Sympa::get_url($robot, undef, authority => $options{authority})
. $original_path_info;
return
Sympa::get_url($robot, undef, authority => $options{authority})
. Sympa::Tools::Text::encode_uri($path_info, omit => '/')
. (length $query_string ? '?' : '')
. $query_string;
}

# Determine robot.
sub get_robot {
my @keys = @_;

my $request_host = _get_server_name();
my $request_path = $ENV{'REQUEST_URI'} || '';
my $robot_id;

if (defined $request_host and length $request_host) {
my $selected_path = '';
foreach my $rid (Sympa::List::get_robots()) {
my $local_url;
foreach my $key (@keys) {
$local_url = Conf::get_robot_conf($rid, $key);
last if $local_url;
}
next unless $local_url;

if ($local_url =~ m{\A[-+\w]+:}) {
;
} elsif ($local_url =~ m{\A//}) {
$local_url = 'http:' . $local_url;
} else {
$local_url = 'http://' . $local_url;
}
# Get host part of script-URI from standard CGI environment variable
# SERVER_NAME.
# NOTE: As of 6.2.15, less trustworthy "X-Forwarded-Server:" request field
# is _no longer_ referred and this function returns only locally detected
# server name.
my $request_host = lc($ENV{SERVER_NAME} // '');
return unless length $request_host;
my $ipv6_re = Sympa::Regexps::ipv6();
if ($request_host =~ /\A$ipv6_re\z/) { # IPv6 address
$request_host = sprintf '[%s]', $request_host;
}

# Since CGI of some HTTP servers might split script-path and extra-path of
# script-URI inproperly, we'd be better to reconstruct them from these
# standard CGI environment variables:
# - SCRIPT_NAME: a URI path which could identify the CGI script.
# - PATH_INFO: derived from the portion of the URI path hierarchy
# following the part that identifies the script itself.
# Note that they are not URL-encoded, unlike non-standard REQUEST_URI.
my $org_script_name = $ENV{SCRIPT_NAME} // '';
my $org_path_info = $ENV{PATH_INFO} // '';
return unless '' eq $org_script_name or 0 == index $org_script_name, '/';
return unless '' eq $org_path_info or 0 == index $org_path_info, '/';
my $request_path = $org_script_name . $org_path_info;

# Find mail domain (a.k.a. "robot") of which web URL matches script-URI.
my ($robot_id, $script_path) = (undef, undef);
foreach my $rid (Sympa::List::get_robots()) {
my $local_url;
foreach my $key (@keys) {
$local_url = Conf::get_robot_conf($rid, $key);
last if $local_url;
}
next unless $local_url;

my $uri = URI->new($local_url);
next
unless $uri
and $uri->scheme
and grep { $uri->scheme eq $_ } qw(http https);

my $host = lc($uri->host || '');
my $path = $uri->path || '/';
#FIXME:might need percent-decode hosts and/or paths
next
unless $request_host eq $host
and 0 == index $request_path, $path;

# The longest path wins.
($robot_id, $selected_path) = ($rid, $path)
if length $selected_path < length $path;
if ($local_url =~ m{\A[-+\w]+:}) {
;
} elsif ($local_url =~ m{\A//}) {
$local_url = 'http:' . $local_url;
} else {
$local_url = 'http://' . $local_url;
}
}

return (defined $robot_id) ? $robot_id : $Conf::Conf{'domain'};
}
my $uri = URI->new($local_url);
next
unless $uri
and $uri->scheme
and grep { $uri->scheme eq $_ } qw(http https);

# Old name: (part of) get_header_field() in wwsympa.fcgi.
# NOTE: As of 6.2.15, less trustworthy "X-Forwarded-Server:" request field is
# _no longer_ referred and this function returns only locally detected server
# name.
sub _get_server_name {
my $server = $ENV{SERVER_NAME};
return undef unless defined $server and length $server;
my $host = lc URI::Escape::uri_unescape($uri->host // '');
my $path = URI::Escape::uri_unescape($uri->path // '');
next unless $request_host eq $host;
next
unless $request_path eq $path
or 0 == index $request_path, $path . '/';

my $ipv6_re = Sympa::Regexps::ipv6();
if ($server =~ /\A$ipv6_re\z/) { # IPv6 address
$server = "[$server]";
# The longest path wins.
($robot_id, $script_path) = ($rid, $path)
if not defined $script_path
or length $script_path < length $path;
}
return lc $server;

return unless $robot_id;
return
wantarray
? ($robot_id, $script_path, substr $request_path, length $script_path)
: $robot_id;
}

# Old name: (part of) get_header_field() in wwsympa.fcgi.
# No longer used.
#sub _get_server_name;

# Old name: (part of) get_header_field() in wwsympa.fcgi.
# NOTE: As of 6.2.15, less trustworthy "X-Forwarded-Host:" request field is
# _no longer_ referred and this function returns only locally detected host
Expand Down
Loading