Skip to content

Commit

Permalink
Massviews: Add option to include all subcategories
Browse files Browse the repository at this point in the history
Bug: T149334
  • Loading branch information
MusikAnimal committed May 23, 2017
1 parent 25e9d3b commit e0bcf38
Show file tree
Hide file tree
Showing 12 changed files with 225 additions and 87 deletions.
5 changes: 3 additions & 2 deletions javascripts/massviews/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,14 @@ const config = {
sourceInput: '#source_input',
formStates: ['initial', 'processing', 'complete', 'invalid'],
timestampFormat: 'YYYYMMDD00',
validateParams: ['source', 'subjectpage', 'platform', 'agent', 'direction', 'sort', 'view'],
validateParams: ['source', 'subjectpage', 'subcategories', 'platform', 'agent', 'direction', 'sort', 'view'],
validParams: {
direction: ['-1', '1'],
sort: ['title', 'views', 'original'],
source: ['pagepile', 'wikilinks', 'category', 'subpages', 'transclusions', 'quarry', 'hashtag', 'external-link', 'search'],
view: ['list', 'chart'],
subjectpage: ['0', '1']
subjectpage: ['0', '1'],
subcategories: ['0', '1']
}
};

Expand Down
90 changes: 36 additions & 54 deletions javascripts/massviews/massviews.js
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,9 @@ class MassViews extends mix(Pv).with(ChartHelpers, ListHelpers) {
);

if (source === 'category') {
$('.category-subject-toggle').show();
$('.category-options').show();
} else {
$('.category-subject-toggle').hide();
$('.category-options').hide();
}

if (['quarry', 'external-link', 'search'].includes(source)) {
Expand Down Expand Up @@ -154,6 +154,7 @@ class MassViews extends mix(Pv).with(ChartHelpers, ListHelpers) {

if (params.source === 'category') {
params.subjectpage = $('.category-subject-toggle--input').is(':checked') ? '1' : '0';
params.subcategories = $('.subcategories-toggle--input').is(':checked') ? '1' : '0';
} else if (['quarry', 'external-link', 'search'].includes(params.source)) {
params.project = $('.project-input').val();
}
Expand Down Expand Up @@ -576,6 +577,10 @@ class MassViews extends mix(Pv).with(ChartHelpers, ListHelpers) {
$('.category-subject-toggle--input').prop('checked', true);
}

if (params.subcategories === '1') {
$('.subcategories-toggle--input').prop('checked', true);
}

/** start up processing if necessary params are present */
if (params.target) {
this.processInput();
Expand Down Expand Up @@ -704,66 +709,41 @@ class MassViews extends mix(Pv).with(ChartHelpers, ListHelpers) {
* given the label and link for the category and the pageviews data
*/
processCategory(project, category, cb) {
let requestData = {
list: 'categorymembers',
cmlimit: 500,
cmtitle: category,
prop: 'categoryinfo',
titles: category
};

const categoryLink = this.getPageLink(category, project);

$('.progress-counter').text($.i18n('fetching-data', 'Category API'));
this.massApi(requestData, project, 'cmcontinue', 'categorymembers').done(data => {
if (data.error) {
return this.apiErrorReset('Category API', data.error.info);
}

const pageObj = data.pages[0];
let url = `/massviews/api.php?project=${project}&category=${category.replace(/^.*?:/, '')}`
+ `&limit=${this.config.apiLimit}`;

if (pageObj.missing) {
return this.setState('initial', () => {
this.writeMessage($.i18n('api-error-no-data'));
});
}

const size = pageObj.categoryinfo.size,
// siteInfo is only populated if they've opted to see subject pages instead of talk pages
// Otherwise namespaces are not needed by this.mapCategoryPageNames
namespaces = this.getSiteInfo(project) ? this.getSiteInfo(project).namespaces : undefined;
let pages = data.categorymembers;
if ($('.subcategories-toggle--input').is(':checked')) {
url += '&recursive=1';
}

$.getJSON(url).done(pages => {
if (!pages.length) {
return this.setState('initial', () => {
this.writeMessage($.i18n('massviews-empty-set', categoryLink));
this.writeMessage($.i18n('api-error-no-data'));
});
}

if (size > this.config.apiLimit) {
if (pages.length >= this.config.apiLimit) {
this.writeMessage(
$.i18n('massviews-oversized-set', categoryLink, this.formatNumber(size), this.config.apiLimit)
$.i18n('massviews-oversized-set-unknown', categoryLink, this.config.apiLimit)
);

pages = pages.slice(0, this.config.apiLimit);
}

const pageNames = this.mapCategoryPageNames(pages, namespaces);
const useSubjectPage = $('.category-subject-toggle--input').is(':checked');
const pageTitles = this.mapCategoryPageNames(pages, this.getSiteInfo(project).namespaces, useSubjectPage);

this.getPageViewsData(pageNames, project).done(pageViewsData => {
this.getPageViewsData(pageTitles, project).done(pageViewsData => {
cb(category, categoryLink, pageViewsData);
});
}).fail(data => {
this.setState('initial');

/** structured error comes back as a string, otherwise we don't know what happened */
if (data && typeof data.error === 'string') {
this.writeMessage(
$.i18n('api-error', categoryLink + ': ' + data.error)
);
} else {
this.writeMessage($.i18n('api-error-unknown', categoryLink));
}
this.writeMessage($.i18n('api-error-unknown', categoryLink));
});
}

Expand Down Expand Up @@ -1311,22 +1291,28 @@ class MassViews extends mix(Pv).with(ChartHelpers, ListHelpers) {
}

/**
* Get subject pages of given talk pages in given namespace
* Get full page titles given array of objects with page title and namespace number
* You can optionally return the subject pages of any given talk pages
* @param {Array} pages - page names
* @param {Object} namespaces - as returned by the siteInfo
* @param {Boolean} useSubjectPage - whether to convert any talk pages
* to their corresponding subject page
* @return {Array} - mapped page names
*/
mapCategoryPageNames(pages, namespaces) {
mapCategoryPageNames(pages, namespaces, useSubjectPage) {
let pageNames = [];

pages.forEach(page => {
if (namespaces && page.ns % 2 === 1) {
const namespace = namespaces[page.ns].canonical;
const subjectNamespace = namespaces[page.ns - 1].canonical || '';
pageNames.push(page.title.replace(namespace, subjectNamespace).replace(/^\:/, ''));
const ns = parseInt(page.ns, 10);
let namespace;

if (ns % 2 === 1 && useSubjectPage) {
namespace = namespaces[ns - 1]['*'] || '';
} else {
pageNames.push(page.title);
namespace = namespaces[page.ns]['*'];
}

pageNames.push(`${namespace}${namespace === '' ? '' : ':'}${page.title}`);
});

return pageNames;
Expand Down Expand Up @@ -1399,14 +1385,10 @@ class MassViews extends mix(Pv).with(ChartHelpers, ListHelpers) {

switch (source) {
case 'category':
// fetch siteinfo to get namespaces if they've opted to use subject page instead of talk
if ($('.category-subject-toggle--input').is(':checked')) {
this.fetchSiteInfo(project).then(() => {
this.processCategory(project, target, cb);
});
} else {
// namespaces needed as internal Category API fetches from the replicas
this.fetchSiteInfo(project).then(() => {
this.processCategory(project, target, cb);
}
});
break;
case 'subpages':
// fetch namespaces first
Expand Down
1 change: 1 addition & 0 deletions messages/en.json
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@
"hover-to-exclude": "To exclude pages from view, hover over their names and click the ✖",
"images": "Images",
"info": "Info",
"include-subcategories": "Include all subcategories",
"invalid-category-url": "Invalid category! Please enter the full wiki URL of the category.",
"invalid-lang-project": "$1 is either invalid or not a multilingual project",
"invalid-page-url": "Invalid page! Please enter the full wiki URL of the page.",
Expand Down
1 change: 1 addition & 0 deletions messages/qqq.json
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@
"hover-to-exclude": "Placeholder in the input of the Topviews application, instructing to hover over entries and click on the X to exclude them from view",
"images": "The word 'images'.\n{{Identical|Image}}",
"info": "A link to the wiki page information page of the article. This appears in the 'info tiles' on Pageviews Analysis\n{{Identical|Info}}",
"include-categories": "Option shown on Massviews when the 'Category URL' source is selected. This option tells the tool to include all nested categories within the given category.",
"invalid-category-url": "Error message saying the user inputted wiki URL is not a valid category (as in the Category namespace).",
"invalid-lang-project": "Error message saying the given value is not a valid WMF multilingual project. $1 is the name of the project.",
"invalid-page-url": "Error message saying the user inputted wiki URL is not a valid page.",
Expand Down
122 changes: 122 additions & 0 deletions public_html/massviews/api.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
<?php

// config.php may be either in the parent directory or up two directories,
// depending on if this is ran on localhost or Tool Labs
if ( file_exists( __DIR__ . '/../config.php' ) ) {
require_once __DIR__ . '/../config.php';
} else {
require_once __DIR__ . '/../../config.php';
}

// set header as JSON
header('Content-type: application/json');

$required_fields = [ 'category', 'project' ];
$errors = [];

foreach ($required_fields as $field) {
if ( !isset( $_GET[$field] ) ) {
$errors[] = "The '$field' parameter is required";
} else if ( ( $field === 'start' || $field === 'end' ) && !preg_match( '/\d{4}-\d{2}-\d{2}/', $_GET[$field] ) ) {
$errors[] = "The '$field' parameter must be in the format YYYY-MM-DD";
}
}

if ( count( $errors ) ) {
echo json_encode( [
'errors' => $errors
] );
return;
}

// get database name given the project
// first add .org if not present
$project = $_GET['project'];
if ( !preg_match( '/\.org$/' , $project ) ) {
$project .= '.org';
}
$site_map = (array) json_decode( file_get_contents( ROOTDIR . '/site_map.json' ) );
if ( !isset( $site_map[$project] ) ) {
echo json_encode( [
'errors' => [ "$project is not a valid project" ]
] );
return;
}
$db = $site_map[$project] . '_p';

// connect to database
$client = new mysqli( DB_HOST, DB_USER, DB_PASSWORD, $db, DB_PORT );
if (mysqli_connect_errno()) {
printf("Connect failed: %s\n", mysqli_connect_error());
exit();
}

$categories = [ str_replace(' ', '_', $_GET['category']) ];

if ( isset( $_GET['recursive'] ) ) {
$categories = array_merge(
recurseCategory($client, $db, $categories),
$categories
);
}

$limit = isset( $_GET['limit'] ) ? (int) $_GET['limit'] : 20000;

echo json_encode( getCategoryMembers($client, $db, $categories, $limit) );

function recurseCategory( $client, $db, $searchCats, $allCats = [], $count = 0 ) {
$searchCatStr = implode( ',', array_map( function( $searchCat ) {
return "'$searchCat'";
}, $searchCats));

$sql = "SELECT page_title
FROM $db.categorylinks
JOIN $db.page ON page_id = cl_from
WHERE cl_to IN ( $searchCatStr )
AND cl_type = 'subcat'";

$res = $client->query( $sql );

if ( !$res ) {
return $allCats;
}

$newCats = array_diff(
array_column( $res->fetch_all(), 0 ),
$allCats
);

$allCats = array_merge( $allCats, $newCats );

if ( $count < 50 ) {
$allCats = array_merge(
recurseCategory( $client, $db, $newCats, $allCats, $count + 1 ),
$allCats
);
}

return $allCats;
}

function getCategoryMembers( $client, $db, $categories, $limit ) {
$categoriesStr = implode( ',', array_map( function( $category ) {
return "'$category'";
}, $categories));

$sql = "SELECT page_title AS title, page_namespace AS ns
FROM $db.categorylinks
JOIN $db.page ON page_id = cl_from
WHERE cl_to IN ( $categoriesStr )
AND cl_type IN ('page', 'file')
LIMIT $limit";

$res = $client->query( $sql );

if ( !$res ) {
return [];
}

$ret = array_values( array_unique( $res->fetch_all(MYSQLI_ASSOC), SORT_REGULAR ) );

return $ret;
}
2 changes: 1 addition & 1 deletion public_html/massviews/application.css

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion public_html/massviews/application.js

Large diffs are not rendered by default.

24 changes: 16 additions & 8 deletions public_html/massviews/index.php
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,22 @@
<span class="glyphicon glyphicon-question-sign"></span>
</a>
</label>
<div class="checkbox pull-right category-subject-toggle">
<label>
<input class="category-subject-toggle--input" type="checkbox">
<?php echo $I18N->msg( 'category-subject-toggle' ); ?>
<a class="help-link" href="/massviews/faq#category_subject_toggle">
<span class="glyphicon glyphicon-question-sign"></span>
</a>
</label>
<div class="category-options">
<div class="checkbox pull-right category-subject-toggle">
<label>
<input class="category-subject-toggle--input" type="checkbox">
<?php echo $I18N->msg( 'category-subject-toggle' ); ?>
<a class="help-link" href="/massviews/faq#category_subject_toggle">
<span class="glyphicon glyphicon-question-sign"></span>
</a>
</label>
</div>
<div class="checkbox pull-right subcategories-toggle">
<label>
<input class="subcategories-toggle--input" type="checkbox">
<?php echo $I18N->msg( 'include-subcategories' ); ?>
</label>
</div>
</div>
<div class="input-group clearfix">
<div class="input-group-btn">
Expand Down
Loading

0 comments on commit e0bcf38

Please sign in to comment.