From 873522eee8aad0319015d74cc3fbb866f384ccae Mon Sep 17 00:00:00 2001 From: Joel Berger Date: Sat, 23 Apr 2016 17:39:15 +0100 Subject: [PATCH] Added script which fetches and indexes river data closes #460 --- .gitignore | 1 + lib/MetaCPAN/Document/Distribution.pm | 9 ++- lib/MetaCPAN/Role/Logger.pm | 3 +- lib/MetaCPAN/Role/Script.pm | 3 +- lib/MetaCPAN/Script/River.pm | 80 +++++++++++++++++++++++++++ lib/MetaCPAN/Types/Internal.pm | 4 ++ t/script/river.t | 61 ++++++++++++++++++++ t/var/river.json | 14 +++++ 8 files changed, 172 insertions(+), 3 deletions(-) create mode 100644 lib/MetaCPAN/Script/River.pm create mode 100644 t/script/river.t create mode 100644 t/var/river.json diff --git a/.gitignore b/.gitignore index f7bc970db..ad29cd523 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ /var /t/var/tmp/ /t/var/darkpan/ +/t/var/log/ /etc/metacpan_local.pl metacpan_server_local.conf diff --git a/lib/MetaCPAN/Document/Distribution.pm b/lib/MetaCPAN/Document/Distribution.pm index cafd174f3..4d6028453 100644 --- a/lib/MetaCPAN/Document/Distribution.pm +++ b/lib/MetaCPAN/Document/Distribution.pm @@ -7,7 +7,7 @@ use namespace::autoclean; use Moose; use ElasticSearchX::Model::Document; -use MetaCPAN::Types qw( ArrayRef BugSummary ); +use MetaCPAN::Types qw( ArrayRef BugSummary RiverSummary); has name => ( is => 'ro', @@ -22,6 +22,13 @@ has bugs => ( writer => '_set_bugs', ); +has river => ( + is => 'ro', + isa => RiverSummary, + dynamic => 1, + writer => '_set_river', +); + sub releases { my $self = shift; return $self->index->type("release") diff --git a/lib/MetaCPAN/Role/Logger.pm b/lib/MetaCPAN/Role/Logger.pm index 14e33a8ca..f9b1bdaa0 100644 --- a/lib/MetaCPAN/Role/Logger.pm +++ b/lib/MetaCPAN/Role/Logger.pm @@ -49,7 +49,8 @@ sub set_logger_once { # XXX This doesn't belong here. sub _build_logger { my ($config) = @_; - my $log = Log::Log4perl->get_logger( $ARGV[0] ); + my $log = Log::Log4perl->get_logger( $ARGV[0] + || 'this_would_have_been_argv_0_but_there_is_no_such_thing' ); foreach my $c (@$config) { my $layout = Log::Log4perl::Layout::PatternLayout->new( $c->{layout} || qq{%d %p{1} %c: %m{chomp}%n} ); diff --git a/lib/MetaCPAN/Role/Script.pm b/lib/MetaCPAN/Role/Script.pm index 0cd064c55..8d2004ea4 100644 --- a/lib/MetaCPAN/Role/Script.pm +++ b/lib/MetaCPAN/Role/Script.pm @@ -9,6 +9,7 @@ use Log::Contextual qw( :log :dlog ); use MetaCPAN::Model; use MetaCPAN::Types qw(:all); use Moose::Role; +use Carp (); has 'cpan' => ( is => 'ro', @@ -74,7 +75,7 @@ sub handle_error { log_fatal {$error}; # Die if configured (for the test suite). - die $error if $self->die_on_error; + Carp::croak $error if $self->die_on_error; } sub index { diff --git a/lib/MetaCPAN/Script/River.pm b/lib/MetaCPAN/Script/River.pm new file mode 100644 index 000000000..ebc98beb7 --- /dev/null +++ b/lib/MetaCPAN/Script/River.pm @@ -0,0 +1,80 @@ +package MetaCPAN::Script::River; + +use Moose; +use namespace::autoclean; + +use JSON::MaybeXS qw( decode_json ); +use Log::Contextual qw( :log :dlog ); +use LWP::UserAgent; +use MetaCPAN::Types qw( ArrayRef Str Uri); + +with 'MetaCPAN::Role::Script', 'MooseX::Getopt'; + +has river_url => ( + is => 'ro', + isa => Uri, + coerce => 1, + required => 1, + default => 'https://neilb.org/FIXME', +); + +has _ua => ( + is => 'ro', + isa => 'LWP::UserAgent', + default => sub { LWP::UserAgent->new }, +); + +sub run { + my $self = shift; + my $summaries = $self->retrieve_river_summaries; + $self->index_river_summaries($summaries); + + return 1; +} + +sub index_river_summaries { + my ( $self, $summaries ) = @_; + $self->index->refresh; + my $dists = $self->index->type('distribution'); + my $bulk = $self->index->bulk( size => 300 ); + for my $summary (@$summaries) { + my $dist = delete $summary->{dist}; + my $doc = $dists->get($dist); + $doc ||= $dists->new_document( { name => $dist } ); + $doc->_set_river($summary); + $bulk->put($doc); + } + $bulk->commit; +} + +sub retrieve_river_summaries { + my $self = shift; + my $resp = $self->_ua->get( $self->river_url ); + + $self->handle_error( $resp->status_line ) unless $resp->is_success; + + return decode_json $resp->content; +} + +__PACKAGE__->meta->make_immutable; + +1; + +=pod + +=head1 SYNOPSIS + + # bin/metacpan river + +=head1 DESCRIPTION + +Retrieves the CPAN river data from its source and +updates our ES information. + +This can then be accessed here: + +http://api.metacpan.org/distribution/Moose +http://api.metacpan.org/distribution/HTTP-BrowserDetect + +=cut + diff --git a/lib/MetaCPAN/Types/Internal.pm b/lib/MetaCPAN/Types/Internal.pm index 2b9bc7691..6416cac72 100644 --- a/lib/MetaCPAN/Types/Internal.pm +++ b/lib/MetaCPAN/Types/Internal.pm @@ -26,6 +26,7 @@ use MooseX::Types -declare => [ PerlMongers Tests BugSummary + RiverSummary ) ]; @@ -104,6 +105,9 @@ subtype BugSummary, source => Str ]; +subtype RiverSummary, + as Dict [ ( map { $_ => Optional [Int] } qw(total immediate bucket) ), ]; + subtype Resources, as Dict [ license => Optional [ ArrayRef [Str] ], diff --git a/t/script/river.t b/t/script/river.t new file mode 100644 index 000000000..b94640486 --- /dev/null +++ b/t/script/river.t @@ -0,0 +1,61 @@ +use strict; +use warnings; + +use lib 't/lib'; + +use Git::Helpers qw( checkout_root ); +use MetaCPAN::Script::River; +use MetaCPAN::Script::Runner; +use MetaCPAN::Server::Test; +use MetaCPAN::TestHelpers; +use Test::More; +use URI; + +my $config = MetaCPAN::Script::Runner::build_config; + +# local json file with structure from https://github.com/CPAN-API/cpan-api/issues/460 +my $root = checkout_root(); +my $file = URI->new('t/var/river.json')->abs("file://$root/"); +$config->{'river_url'} = "$file"; + +my $river = MetaCPAN::Script::River->new_with_options($config); +ok $river->run, 'runs and returns true'; + +my %expect = ( + 'System-Command' => { + total => 92, + immediate => 4, + bucket => 2, + }, + 'Text-Markdown' => { + total => 92, + immediate => 56, + bucket => 2, + } +); + +test_psgi app, sub { + my $cb = shift; + for my $dist ( keys %expect ) { + my $test = $expect{$dist}; + subtest "Check $dist" => sub { + my $url = "/distribution/$dist"; + ok( my $res = $cb->( GET $url ), "GET $url" ); + + # TRAVIS 5.18 + is( $res->code, 200, "code 200" ); + is( + $res->header('content-type'), + 'application/json; charset=utf-8', + 'Content-type' + ); + my $json = decode_json_ok($res); + + # TRAVIS 5.18 + is_deeply( $json->{river}, $test, + "$dist river summary roundtrip" ); + }; + } +}; + +done_testing(); diff --git a/t/var/river.json b/t/var/river.json new file mode 100644 index 000000000..2bbc6ea7e --- /dev/null +++ b/t/var/river.json @@ -0,0 +1,14 @@ +[ + { + "dist": "System-Command", + "total": 92, + "immediate": 4, + "bucket": 2 + }, + { + "dist": "Text-Markdown", + "total": 92, + "immediate": 56, + "bucket": 2 + } +]