Skip to content

Commit

Permalink
wb data: add a --join option for the csv format
Browse files Browse the repository at this point in the history
as suggested by #148 (comment)
  • Loading branch information
maxlath committed May 6, 2021
1 parent 8cf328d commit e159ca7
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 13 deletions.
1 change: 1 addition & 0 deletions bin/wb-data
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ program
.option('-p, --props <props>', 'request only certain properties (info, sitelinks, aliases, labels, descriptions, claims, datatype)')
.option('-r, --revision <id>', 'request a specific revision')
.option('-f, --format <format>', 'Default: ndjson. Alternatives: ttl, csv')
.option('--join', 'when using the csv format, and selecting a single property, join values per entity, instead of creating one row per value')
.process('data')

const { isGuid, isPropertyClaimsId } = require('wikibase-sdk')
Expand Down
4 changes: 2 additions & 2 deletions lib/fetch_and_log_ids_data.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ const requestedPropsAndSubProps = parseProps(program.props)
const requestedProps = Object.keys(requestedPropsAndSubProps)

const options = require('../lib/parse_simplify_options')(program.keep)
const { simplify: simplifyOption, format } = program
const { simplify: simplifyOption, format, join } = program

module.exports = ids => {
if (format === 'ttl') return getTtlEntities(ids)
Expand All @@ -30,7 +30,7 @@ module.exports = ids => {
entities = entities.map(entity => simplify.entity(entity, options))
}
if (format === 'csv') {
logCsv(entities, requestedPropsAndSubProps)
logCsv({ entities, propsAndSubProps: requestedPropsAndSubProps, join })
} else {
logNdjson(entities)
}
Expand Down
42 changes: 31 additions & 11 deletions lib/log_csv.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,17 @@ const { unparse: convertToCsv } = require('papaparse')
const { exitMessage } = require('./errors')

let headers
let oneValuePerRow
let firstBatch = true

const logCsv = (entities, propsAndSubProps) => {
let csv
const formattedEntities = entities.map(format(propsAndSubProps))
const logCsv = ({ entities, propsAndSubProps, join }) => {
let csv, formattedEntities
if (join) oneValuePerRow = false
if (oneValuePerRow) {
formattedEntities = flatten(entities.map(format(propsAndSubProps)))
} else {
formattedEntities = entities.map(format(propsAndSubProps))
}
if (firstBatch) {
csv = convertToCsv({ fields: headers, data: formattedEntities })
firstBatch = false
Expand All @@ -17,25 +23,39 @@ const logCsv = (entities, propsAndSubProps) => {
}

const format = propsAndSubProps => entity => {
const result = [ entity.id ]
for (const prop in propsAndSubProps) {
for (const subprop in propsAndSubProps[prop]) {
let value
if (entity[prop] && entity[prop][subprop]) {
value = entity[prop][subprop]
if (oneValuePerRow) {
const prop = Object.keys(propsAndSubProps)[0]
const subprop = Object.keys(propsAndSubProps[prop])[0]
const values = entity[prop] && entity[prop][subprop]
if (values && values.length > 0) {
return entity[prop][subprop].map(value => [ entity.id, value ])
} else {
return [ [ entity.id, undefined ] ]
}
} else {
const result = [ entity.id ]
for (const prop in propsAndSubProps) {
for (const subprop in propsAndSubProps[prop]) {
let value
if (entity[prop] && entity[prop][subprop]) {
value = entity[prop][subprop]
}
result.push(value)
}
result.push(value)
}
return result
}
return result
}

const flatten = array => [].concat(...array)

const validatePropsAndInitCsvHeaders = propsAndSubProps => {
headers = [ 'id' ]
const props = Object.keys(propsAndSubProps)
if (props.length === 0) {
exitMessage('--props are required to output in csv format', example)
}
oneValuePerRow = props.length === 1
for (const prop of props) {
const subprops = Object.keys(propsAndSubProps[prop])
if (subprops.length === 0) {
Expand Down
16 changes: 16 additions & 0 deletions test/wb-data.js
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,22 @@ describe('wb data', function () {
lines[1].trim().should.startWith('Q123,septembre,')
})

it('should generate one row per value when there is only one property requested', async () => {
const { stdout } = await shellExec('./bin/wd data Q10428420 --props P6375 --format csv')
const lines = stdout.split('\n').slice(1)
lines.length.should.be.above(1)
lines.forEach(line => {
line.should.startWith('Q10428420')
})
})

it('should generate one row per value when there is only one property requested', async () => {
const { stdout } = await shellExec('./bin/wd data Q10428420 --props P6375 --format csv --join')
const lines = stdout.split('\n').slice(1)
lines.length.should.equal(1)
lines[0].should.startWith('Q10428420')
})

it('should require props to return in csv format', async () => {
await shellExec('./bin/wd data Q123 --format csv')
.then(shouldNotBeCalled)
Expand Down

0 comments on commit e159ca7

Please sign in to comment.