-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
OSM Multilingual data directly from wiki (wikibase data items) #5647
Merged
Merged
Changes from all commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
22e36be
WIP on replacing documentation with direct calls to OSM wikibase
bhousel b281010
Implement support for multilingual descriptions from wiki data items
nyurik aa78cd1
Data items - remove hardcoded locale IDs
nyurik 8b931f3
Use new message to encourage editing and translation
nyurik 9f7f4aa
Fix language fallback processing, caching bug.
nyurik File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,201 @@ | ||
import _debounce from 'lodash-es/debounce'; | ||
import _forEach from 'lodash-es/forEach'; | ||
|
||
import { json as d3_json } from 'd3-request'; | ||
|
||
import { utilQsString } from '../util'; | ||
|
||
|
||
var apibase = 'https://wiki.openstreetmap.org/w/api.php'; | ||
var _inflight = {}; | ||
var _wikibaseCache = {}; | ||
var _localeIds = { en: false }; | ||
|
||
|
||
var debouncedRequest = _debounce(request, 500, { leading: false }); | ||
|
||
function request(url, callback) { | ||
if (_inflight[url]) return; | ||
|
||
_inflight[url] = d3_json(url, function (err, data) { | ||
delete _inflight[url]; | ||
callback(err, data); | ||
}); | ||
} | ||
|
||
|
||
/** | ||
* Get the best string value from the descriptions/labels result | ||
* Note that if mediawiki doesn't recognize language code, it will return all values. | ||
* In that case, fallback to use English. | ||
* @param values object - either descriptions or labels | ||
* @param langCode String | ||
* @returns localized string | ||
*/ | ||
function localizedToString(values, langCode) { | ||
if (values) { | ||
values = values[langCode] || values.en; | ||
} | ||
return values ? values.value : ''; | ||
} | ||
|
||
|
||
export default { | ||
|
||
init: function() { | ||
_inflight = {}; | ||
_wikibaseCache = {}; | ||
_localeIds = {}; | ||
}, | ||
|
||
|
||
reset: function() { | ||
_forEach(_inflight, function(req) { req.abort(); }); | ||
_inflight = {}; | ||
}, | ||
|
||
|
||
/** | ||
* Get the best value for the property, or undefined if not found | ||
* @param entity object from wikibase | ||
* @param property string e.g. 'P4' for image | ||
* @param langCode string e.g. 'fr' for French | ||
*/ | ||
claimToValue: function(entity, property, langCode) { | ||
if (!entity.claims[property]) return undefined; | ||
var locale = _localeIds[langCode]; | ||
var preferredPick, localePick; | ||
_forEach(entity.claims[property], function(stmt) { | ||
// If exists, use value limited to the needed language (has a qualifier P26 = locale) | ||
// Or if not found, use the first value with the "preferred" rank | ||
if (!preferredPick && stmt.rank === 'preferred') { | ||
preferredPick = stmt; | ||
} | ||
if (locale && stmt.qualifiers && stmt.qualifiers.P26 && | ||
stmt.qualifiers.P26[0].datavalue.value.id === locale | ||
) { | ||
localePick = stmt; | ||
} | ||
}); | ||
var result = localePick || preferredPick; | ||
|
||
if (result) { | ||
var datavalue = result.mainsnak.datavalue; | ||
return datavalue.type === 'wikibase-entityid' ? datavalue.value.id : datavalue.value; | ||
} else { | ||
return undefined; | ||
} | ||
}, | ||
|
||
|
||
toSitelink: function(key, value) { | ||
var result = value ? 'Tag:' + key + '=' + value : 'Key:' + key; | ||
return result.replace(/_/g, ' ').trim(); | ||
}, | ||
|
||
|
||
getEntity: function(params, callback) { | ||
var doRequest = params.debounce ? debouncedRequest : request; | ||
var self = this; | ||
var titles = []; | ||
var result = {}; | ||
var keySitelink = this.toSitelink(params.key); | ||
var tagSitelink = params.value ? this.toSitelink(params.key, params.value) : false; | ||
var localeSitelink; | ||
|
||
if (params.langCode && _localeIds[params.langCode] === undefined) { | ||
// If this is the first time we are asking about this locale, | ||
// fetch corresponding entity (if it exists), and cache it. | ||
// If there is no such entry, cache `false` value to avoid re-requesting it. | ||
localeSitelink = ('Locale:' + params.langCode).replace(/_/g, ' ').trim(); | ||
titles.push(localeSitelink); | ||
} | ||
|
||
if (_wikibaseCache[keySitelink]) { | ||
result.key = _wikibaseCache[keySitelink]; | ||
} else { | ||
titles.push(keySitelink); | ||
} | ||
|
||
if (tagSitelink) { | ||
if (_wikibaseCache[tagSitelink]) { | ||
result.tag = _wikibaseCache[tagSitelink]; | ||
} else { | ||
titles.push(tagSitelink); | ||
} | ||
} | ||
|
||
if (!titles.length) { | ||
// Nothing to do, we already had everything in the cache | ||
return callback(null, result); | ||
} | ||
|
||
// Requesting just the user language code | ||
// If backend recognizes the code, it will perform proper fallbacks, | ||
// and the result will contain the requested code. If not, all values are returned: | ||
// {"zh-tw":{"value":"...","language":"zh-tw","source-language":"zh-hant"} | ||
// {"pt-br":{"value":"...","language":"pt","for-language":"pt-br"}} | ||
var obj = { | ||
action: 'wbgetentities', | ||
nyurik marked this conversation as resolved.
Show resolved
Hide resolved
|
||
sites: 'wiki', | ||
titles: titles.join('|'), | ||
languages: params.langCode, | ||
languagefallback: 1, | ||
origin: '*', | ||
format: 'json', | ||
// There is an MW Wikibase API bug https://phabricator.wikimedia.org/T212069 | ||
// We shouldn't use v1 until it gets fixed, but should switch to it afterwards | ||
// formatversion: 2, | ||
}; | ||
|
||
var url = apibase + '?' + utilQsString(obj); | ||
doRequest(url, function(err, d) { | ||
if (err) { | ||
callback(err); | ||
} else if (!d.success || d.error) { | ||
callback(d.error.messages.map(function(v) { return v.html['*']; }).join('<br>')); | ||
} else { | ||
var localeId = false; | ||
_forEach(d.entities, function(res) { | ||
if (res.missing !== '') { | ||
var title = res.sitelinks.wiki.title; | ||
// Simplify access to the localized values | ||
res.description = localizedToString(res.descriptions, params.langCode); | ||
res.label = localizedToString(res.labels, params.langCode); | ||
if (title === keySitelink) { | ||
_wikibaseCache[keySitelink] = res; | ||
result.key = res; | ||
} else if (title === tagSitelink) { | ||
_wikibaseCache[tagSitelink] = res; | ||
result.tag = res; | ||
} else if (title === localeSitelink) { | ||
localeId = res.id; | ||
} else { | ||
console.log('Unexpected title ' + title); | ||
} | ||
} | ||
}); | ||
|
||
if (localeSitelink) { | ||
// If locale ID is not found, store false to prevent repeated queries | ||
self.addLocale(params.langCode, localeId); | ||
} | ||
|
||
callback(null, result); | ||
} | ||
}); | ||
}, | ||
|
||
|
||
addLocale: function(langCode, qid) { | ||
// Makes it easier to unit test | ||
_localeIds[langCode] = qid; | ||
}, | ||
|
||
apibase: function(_) { | ||
if (!arguments.length) return apibase; | ||
apibase = _; | ||
return this; | ||
} | ||
|
||
}; |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this string used anywhere? I see it in the screenshots above, but my browser’s in-page find isn’t finding any references to it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch! It got lost in a merge accident :)