Skip to content

Commit

Permalink
Fetch policy/alignment data from new location.
Browse files Browse the repository at this point in the history
  • Loading branch information
dracos committed Dec 15, 2023
1 parent 72b0d16 commit 788fb73
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 187 deletions.
1 change: 1 addition & 0 deletions conf/general-example
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ define ("RECESSFILE","https://www.theyworkforyou.com/pwdata/parl-recesses.txt");
// AND amend your global php.ini to 'allow_url_fopen = On'
//define ("RECESSFILE", RAWDATA . "/parl-recesses.txt");

define('TWFY_VOTES_URL', '');


// *******************************************************************************
Expand Down
243 changes: 65 additions & 178 deletions scripts/json2db.pl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#! /usr/bin/perl -w
#!/usr/bin/env perl

use strict;
use v5.14;
use warnings;
use utf8;

use FindBin;
Expand All @@ -11,8 +12,6 @@
use mySociety::Config;
mySociety::Config::set_file("$FindBin::Bin/../conf/general");

my $parldata = mySociety::Config::get('RAWDATA');

my $verbose = 0;
for( @ARGV ){
if( $_ eq "--verbose" ){
Expand All @@ -22,10 +21,10 @@
}

use DBI;
use File::Slurp::Unicode;
use JSON::XS;
use LWP::Simple;

use vars qw($motion_count $policy_count $vote_count %motions_seen @policyids);
use vars qw($motion_count $policy_count $align_count @policyids);

require 'policyids.pl';
my $json = JSON::XS->new->latin1;
Expand All @@ -36,34 +35,23 @@
my $policycheck = $dbh->prepare("SELECT policy_id from policies where policy_id = ?");
my $policyadd = $dbh->prepare("INSERT INTO policies (policy_id, title, description) VALUES (?, ?, ?)");

my $divisioncheck = $dbh->prepare("SELECT division_title, gid, yes_text, no_text, yes_total, no_total, absent_total, both_total, majority_vote FROM divisions WHERE division_id = ?");
my $divisionadd = $dbh->prepare("INSERT INTO divisions (division_id, house, division_title, yes_text, no_text, division_date, division_number, gid, yes_total, no_total, absent_total, both_total, majority_vote) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
my $divisionupdate = $dbh->prepare("UPDATE divisions SET gid = ?, division_title = ?, yes_text = ?, no_text = ?, yes_total = ?, no_total = ?, absent_total = ?, both_total = ?, majority_vote = ? WHERE division_id = ?");

my $motioncheck = $dbh->prepare("SELECT direction, policy_vote FROM policydivisions WHERE division_id = ? AND policy_id = ?");
my $motionadd = $dbh->prepare("INSERT INTO policydivisions (division_id, policy_id, direction, policy_vote) VALUES (?, ?, ?, ?)");
my $motionupdate = $dbh->prepare("UPDATE policydivisions SET direction = ?, policy_vote = ? WHERE division_id = ? AND policy_id = ?");

my $votecheck = $dbh->prepare("SELECT person_id, vote FROM persondivisionvotes WHERE division_id = ?");
my $voteadd = $dbh->prepare("INSERT INTO persondivisionvotes (person_id, division_id, vote) VALUES (?, ?, ?)");
my $voteupdate= $dbh->prepare("UPDATE persondivisionvotes SET vote = ? WHERE person_id = ? AND division_id = ?");

my $strong_vote_check = $dbh->prepare("SELECT data_value from personinfo where data_key = ? and person_id = ?");
my $personinfo_set = $dbh->prepare('INSERT INTO personinfo (person_id, data_key, data_value) VALUES(?, ?, ?) ON DUPLICATE KEY UPDATE data_value=?');
my $personinfo_check = $dbh->prepare("SELECT data_value from personinfo where data_key = ? and person_id = ?");
my $strong_for_policy_check = $dbh->prepare("SELECT count(*) as strong_votes FROM persondivisionvotes JOIN policydivisions USING (division_id) WHERE policy_id = ? AND person_id = ? AND policy_vote LIKE '%3'");
my $strong_vote_add = $dbh->prepare("INSERT into personinfo ( data_key, data_value, person_id ) VALUES ( ?, ?, ? )");
my $strong_vote_update = $dbh->prepare("UPDATE personinfo SET data_value = ? WHERE data_key = ? AND person_id = ?");

my $motionsdir = $parldata . "scrapedjson/policy-motions/";

$motion_count = $policy_count = $vote_count = 0;
$motion_count = $policy_count = $align_count = 0;

foreach my $dreamid ( @policyids ) {
my $policy_file = $motionsdir . $dreamid . ".json";
if ( ! -f $policy_file ) {
warn "no json file for policy $dreamid at $policy_file";
my $policy_url = mySociety::Config::get('TWFY_VOTES_URL') . '/twfy-compatible/popolo/' . $dreamid . '.json';
my $policy_json = get($policy_url);
if (!$policy_json) {
warn "no json file for policy $dreamid at $policy_url";
next;
}
my $policy_json = read_file($policy_file);
my $policy = $json->decode($policy_json);

my $curr_policy = $dbh->selectrow_hashref($policycheck, {}, $dreamid);
Expand All @@ -75,199 +63,98 @@

$policy_count++;

if ($verbose){
print("processing motions for $dreamid\n");
}
process_motions($policy, $dreamid);
}

# And recently changed ones
my $policy_file = $motionsdir . "recently-changed-divisions.json";
if (-f $policy_file) {
if ($verbose){
print("processing recently changed divisions\n");
}
my $policy_json = read_file($policy_file);
my $policy = $json->decode($policy_json);
process_motions($policy);
say "processing motions for $dreamid" if $verbose;
process_motions($policy->{aspects}, $dreamid);
say "processing alignments for $dreamid" if $verbose;
process_alignments($policy->{alignments}, $dreamid);
}

print "parsed $policy_count policies, $motion_count divisions and $vote_count votes from PW JSON\n";
print "parsed $policy_count policies, $motion_count divisions, and $align_count alignments from JSON\n";

sub process_motions {
my ($policy, $dreamid) = @_;
my ($aspects, $dreamid) = @_;
# Set AutoCommit off
$dbh->{AutoCommit} = 0;
for my $motion ( @{ $policy->{aspects} } ) {
for my $motion (@$aspects) {
$motion_count++;
if ($verbose && $motion_count % 10 == 0){
print("$motion_count\n");
};
say $motion_count if $verbose && $motion_count % 10 == 0;
my ($motion_num) = $motion->{motion}->{id} =~ /pw-\d+-\d+-\d+-(\d+)/;
my ($house) = $motion->{motion}->{organization_id} =~ /uk\.parliament\.(\w+)/;

my $sources = $motion->{motion}->{sources};
my $gid = '';
foreach my $source (@$sources) {
if ( defined $source->{gid} ) {
$gid = $source->{gid};
}
}

my $motion_id = $motion->{motion}->{id};
my $text = $motion->{motion}->{text};

my $curr_division = $dbh->selectrow_hashref($divisioncheck, {}, $motion_id);
if ( $curr_division ) {
$curr_division->{yes_text} ||= '';
$curr_division->{no_text} ||= '';
}

my $curr_motion;
if ($dreamid) {
$curr_motion = $dbh->selectrow_hashref($motioncheck, {}, $motion_id, $dreamid);
if ($curr_motion) {
$curr_motion->{direction} ||= '';
}
}

my $yes_text = '';
my $no_text = '';
if ( $motion->{motion}->{actions} ) {
$yes_text = $motion->{motion}->{actions}->{yes};
$no_text = $motion->{motion}->{actions}->{no};
}

my $totals = {
yes => 0,
no => 0,
absent => 0,
both => 0,
};
my $majority_vote = '';

if ( $motion->{motion}->{vote_events}->[0]->{counts} ) {
for my $count ( @{ $motion->{motion}->{vote_events}->[0]->{counts} } ) {
$totals->{$count->{option}} = $count->{value};
}

if ($totals->{yes} > $totals->{no}) {
$majority_vote = 'aye';
} else {
$majority_vote = 'no';
}
$curr_motion = $dbh->selectrow_hashref($motioncheck, {}, $motion_id, $dreamid);
if ($curr_motion) {
$curr_motion->{direction} ||= '';
}

# Ignore tellers in totals
$totals->{yes} -= grep { $_->{option} =~ /tellaye/ } @{ $motion->{motion}->{ vote_events }->[0]->{votes} };
$totals->{no} -= grep { $_->{option} =~ /tellno/ } @{ $motion->{motion}->{ vote_events }->[0]->{votes} };

if ( !defined $curr_division ) {
my $r = $divisionadd->execute($motion_id, $house, $motion->{motion}->{text}, $yes_text, $no_text, $motion->{motion}->{date}, $motion_num, $gid, $totals->{yes}, $totals->{no}, $totals->{absent}, $totals->{both}, $majority_vote);
if ( !defined $curr_motion ) {
my $r = $motionadd->execute($motion_id, $dreamid, $motion->{direction}, $motion->{motion}->{policy_vote});
unless ( $r > 0 ) {
warn "problem creating division $motion_id, skipping motions\n";
warn "problem creating policydivision for $motion_id / $dreamid, skipping motions\n";
next;
}
} elsif ( $curr_division->{division_title} ne $text ||
$curr_division->{gid} ne $gid ||
$curr_division->{yes_text} ne $yes_text ||
$curr_division->{no_text} ne $no_text ||
$curr_division->{yes_total} ne $totals->{yes} ||
$curr_division->{no_total} ne $totals->{no} ||
$curr_division->{absent_total} ne $totals->{absent} ||
$curr_division->{both_total} ne $totals->{both} ||
$curr_division->{majority_vote} ne $majority_vote
} elsif ( $motion->{direction} ne $curr_motion->{direction} ||
$motion->{motion}->{policy_vote} ne $curr_motion->{policy_vote}
) {
my $r = $divisionupdate->execute($gid, $text, $yes_text, $no_text, $totals->{yes}, $totals->{no}, $totals->{absent}, $totals->{both}, $majority_vote, $motion_id);
my $r = $motionupdate->execute($motion->{direction}, $motion->{motion}->{policy_vote}, $motion_id, $dreamid);
unless ( $r > 0 ) {
warn "problem updating division $motion_id from $curr_division->{division_title} to $text AND $curr_division->{gid} to $gid\n";
}
}

if ($dreamid) {
if ( !defined $curr_motion ) {
my $r = $motionadd->execute($motion_id, $dreamid, $motion->{direction}, $motion->{motion}->{policy_vote});
unless ( $r > 0 ) {
warn "problem creating policydivision for $motion_id / $dreamid, skipping motions\n";
next;
}
} elsif ( $motion->{direction} ne $curr_motion->{direction} ||
$motion->{motion}->{policy_vote} ne $curr_motion->{policy_vote}
) {
my $r = $motionupdate->execute($motion->{direction}, $motion->{motion}->{policy_vote}, $motion_id, $dreamid);
unless ( $r > 0 ) {
warn "problem updating policydivision $motion_id / $dreamid from $curr_motion->{direction} to $motion->{direction}\n";
}
warn "problem updating policydivision $motion_id / $dreamid from $curr_motion->{direction} to $motion->{direction}\n";
}
}

my $curr_votes = $dbh->selectall_hashref($votecheck, 'person_id', {}, $motion_id);

for my $vote ( @{ $motion->{motion}->{ vote_events }->[0]->{votes} } ) {
my $mp_id_num;
$mp_id_num = $vote->{id};
$mp_id_num =~ s:uk.org.publicwhip/person/::;
next unless $mp_id_num;
if ( $mp_id_num !~ /^[1-9]\d+$/ ) {
print "$mp_id_num doesn't look like a valid person id - skipping vote for $motion_id - " . ($dreamid || "") . "\n";
print "$mp_id_num doesn't look like a valid person id - skipping vote for $motion_id - " . $dreamid . "\n";
next;
}

# if we've seen this motion before then don't process it, however we want
# to make sure that the strong vote processing below happens so we still
# need to look at all the votes, just not update the details of them in
# the database
if ( !$motions_seen{$motion_id} ) {
$vote_count++;

if ( !defined $curr_votes->{$mp_id_num} ) {
$voteadd->execute($mp_id_num, $motion_id, $vote->{option});
$curr_votes->{$mp_id_num} = { vote => $vote->{option}};
} elsif ( $curr_votes->{$mp_id_num}->{vote} ne $vote->{option} ) {
# because we probably want to know if this ever happens
print "updating $motion_id vote for $mp_id_num from " . $curr_votes->{$mp_id_num}->{vote} . " to " . $vote->{option} . "\n";
my $r = $voteupdate->execute($vote->{option}, $mp_id_num, $motion_id);
unless ( $r > 0 ) {
warn "problem updating $motion_id vote for $mp_id_num from " . $curr_votes->{$mp_id_num}->{vote} . " to " . $vote->{option} . "\n"
. DBI->errstr . "\n";
}
}
# if it's a strong vote, i.e. yes3 or no3, then set mp has strong_vote attribute
my $pw_id = "public_whip_dreammp" . $dreamid . "_has_strong_vote";
if ( $motion->{motion}->{policy_vote} =~ /3/ ) {
$personinfo_set->execute($mp_id_num, $pw_id, 1, 1);
}

if ($dreamid) {
# if it's a strong vote, i.e. yes3 or no3, then set mp has strong_vote attribute
if ( $motion->{motion}->{policy_vote} =~ /3/ ) {
my $pw_id = "public_whip_dreammp" . $dreamid . "_has_strong_vote";
my $has_strong = $strong_vote_check->execute( $pw_id, $mp_id_num );
if ( $strong_vote_check->rows() < 1 ) {
$strong_vote_add->execute( $pw_id, 1, $mp_id_num);
}
}

# if the motion has been unset from strong -> weak then check if we need to unset
# the MP has strong vote attribute
if ( $curr_motion && $curr_motion->{policy_vote} =~ /3/ && $motion->{motion}->{policy_vote} !~ /3/ ) {
my $pw_id = "public_whip_dreammp" . $dreamid . "_has_strong_vote";
my $has_strong = $strong_vote_check->execute( $pw_id, $mp_id_num );
if ( $strong_vote_check->rows() > 0 ) {
my $has_strong_for_policy = $strong_for_policy_check->execute( $dreamid, $mp_id_num );
my $row = $strong_for_policy_check->fetchrow_hashref();
if ( $row->{strong_votes} == 0 ) {
$strong_vote_update->execute( 0, $pw_id, $mp_id_num);
}
# if the motion has been unset from strong -> weak then check if we need to unset
# the MP has strong vote attribute
if ( $curr_motion && $curr_motion->{policy_vote} =~ /3/ && $motion->{motion}->{policy_vote} !~ /3/ ) {
$personinfo_check->execute( $pw_id, $mp_id_num );
if ( $personinfo_check->rows() > 0 ) {
$strong_for_policy_check->execute( $dreamid, $mp_id_num );
my $row = $strong_for_policy_check->fetchrow_hashref();
if ( $row->{strong_votes} == 0 ) {
$personinfo_set->execute($mp_id_num, $pw_id, 0, 0);
}
}
}
}

# some divisions are in more than one policy and we want to take note of
# this so we can skip processing of them
if ( !$motions_seen{$motion_id} ) {
$motions_seen{$motion_id} = 1;
}

}
$dbh->commit();
# Set AutoCommit on
$dbh->{AutoCommit} = 1;
}

sub process_alignments {
my ($alignments, $dreamid) = @_;
foreach (@$alignments) {
$align_count++;
say $align_count if $verbose && $align_count % 100 == 0;

my $person_id = $_->{person_id};
$person_id =~ s:uk.org.publicwhip/person/::;

foreach my $term (
[ distance => 'person_distance_from_policy' ],
[ both_voted => 'count_present' ],
[ absent => 'count_absent' ],
) {
my $pw_id = "public_whip_dreammp${dreamid}_$term->[0]";
my $val = $_->{$term->[1]};
$personinfo_set->execute($person_id, $pw_id, $val, $val);
}
}
}
10 changes: 1 addition & 9 deletions scripts/mpinfoin.pl
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,6 @@
use JSON;
use File::Slurp;

use vars qw(@policyids);

require 'policyids.pl';

my %action;
my $verbose;
foreach (@ARGV) {
Expand Down Expand Up @@ -122,13 +118,9 @@

if ($action{'pw'}) {
my $ua = LWP::UserAgent->new( agent => 'mySociety/1.0 (TheyWorkForYou)' );
print "Parsing Public Whip attendance and policies\n" if $verbose;
print "Parsing Public Whip attendance\n" if $verbose;
$twig->parseurl("https://www.publicwhip.org.uk/feeds/mp-info.xml", $ua);
$twig->parseurl("https://www.publicwhip.org.uk/feeds/mp-info.xml?house=lords", $ua);
# Various policy IDs, see https://www.publicwhip.org.uk/policies.php for what they are
foreach my $dreamid (@policyids) {
$twig->parseurl("https://www.publicwhip.org.uk/feeds/mpdream-info.xml?id=$dreamid", $ua);
}
}

if ($action{'eu_ref_position'}) {
Expand Down

0 comments on commit 788fb73

Please sign in to comment.