Skip to content
This repository has been archived by the owner on May 9, 2022. It is now read-only.

2.0.5: use options.windowP instead of options.rawP when scraping a page #12

Merged
merged 12 commits into from
Feb 20, 2018
102 changes: 64 additions & 38 deletions getMedia.js
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,16 @@ const getPublisherFromProviders = (providers, mediaURL, options, firstErr, callb

parts = url.parse(provider.url + '?' + querystring.stringify({ format: 'json', url: mediaURL }))

retryTrip({
cachedTrip({
server: parts.protocol + '//' + parts.host,
path: parts.path,
timeout: options.timeout
}, options, (err, response, payload) => {
if (err) return next(providers, mediaURL, options, firstErr || err, callback)

if (options.verboseP) console.log('\nmediaURL=' + mediaURL + ' oembed=' + JSON.stringify(payload, null, 2))
if (!payload) return next(providers, mediaURL, options, firstErr || new Error('empty oembed result'), callback)

resolver(providers, mediaURL, options, payload, firstErr, callback)
})
}
Expand All @@ -113,18 +115,47 @@ const resolvers = {
paths = parts && parts.pathname.split('/')
if ((!paths) || (!payload._channel.validP(paths))) throw new Error('invalid author_url: ' + payload.author_url)

const inner = (publisherInfo) => {
underscore.extend(publisherInfo, {
TLD: publisherInfo.publisher.split(':')[0],
SLD: publisherInfo.publisher,
RLD: publisherInfo.providerValue,
QLD: '',
URL: publisherInfo.publisherURL
})

getPropertiesForPublisher(publisherInfo, options, (err, result) => {
if ((err) && (options.verboseP)) {
console.log('\ngetPropertiesForPublisher=' + publisherInfo.publisher + ': ' + err.toString())
}

getFaviconForPublisher(publisherInfo, publisherInfo.faviconURL, options, (err, result) => {
if (!err) return callback(null, publisherInfo)

if (options.verboseP) console.log('\ngetFavIconforPublisher=' + publisherInfo.faviconURL + ': ' + err.toString())

getFaviconForPublisher(publisherInfo, publisherInfo.faviconURL2, options, (err, result) => {
if (!err) return callback(null, publisherInfo)

if (options.verboseP) console.log('\ngetFavIconforPublisher=' + publisherInfo.faviconURL + ': ' + err.toString())
})
})
})
}

if (payload._channel.publisherInfo) return inner(payload._channel.publisherInfo)

cachedTrip({
server: parts.protocol + '//' + parts.host,
path: parts.path,
timeout: options.timeout
}, underscore.extend({ rawP: true }, options), (err, response, body) => {
timeout: options.timeouut
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is a typo

}, underscore.extend({ scrapeP: true }, options), (err, response, body) => {
if (err) return next(providers, mediaURL, options, firstErr || err, callback)

metascraper.scrapeHtml(body).then((result) => {
const parts = url.parse(result.url)
const channel = payload._channel.get(paths, parts)
const publisherInfo = {
publisher: payload._channel.providerName + '#channel:' + channel,
publisher: payload._channel.providerName + '#channel:' + payload._channel.get(paths, parts),
publisherType: 'provider',
publisherURL: payload.author_url + '/videos',
providerName: provider.provider_name,
Expand All @@ -135,49 +166,41 @@ const resolvers = {
}

if (publisherInfo.faviconURL !== payload.thumbnail_url) publisherInfo.faviconURL2 = payload.thumbnail_url

if (options.verboseP) console.log('\nmediaURL=' + mediaURL + ' scraper=' + JSON.stringify(result, null, 2))
underscore.extend(publisherInfo, {
TLD: publisherInfo.publisher.split(':')[0],
SLD: publisherInfo.publisher,
RLD: publisherInfo.providerValue,
QLD: '',
URL: publisherInfo.publisherURL
})

getPropertiesForPublisher(publisherInfo, options, (err, result) => {
if ((err) && (options.verboseP)) {
console.log('\ngetPropertiesForPublisher=' + publisherInfo.publisher + ': ' + err.toString())
}

getFaviconForPublisher(publisherInfo, publisherInfo.faviconURL, options, (err, result) => {
if (!err) return callback(null, publisherInfo)

if (options.verboseP) console.log('\ngetFavIconforPublisher=' + publisherInfo.faviconURL + ': ' + err.toString())

getFaviconForPublisher(publisherInfo, publisherInfo.faviconURL2, options, (err, result) => {
if (!err) return callback(null, publisherInfo)

if (options.verboseP) console.log('\ngetFavIconforPublisher=' + publisherInfo.faviconURL + ': ' + err.toString())
})
})
})
inner(publisherInfo)
}).catch((err) => {
next(providers, mediaURL, options, firstErr || err, callback)
})
})
},

Twitch: (providers, mediaURL, options, payload, firstErr, callback) => {
const parts = url.parse(payload.author_url)
const paths = parts && parts.pathname.split('/')

const get = (paths, parts) => {
const cpaths = parts && parts.pathname.split('/')

return ((parts.pathname === parts.path) && (cpaths.length === 2) ? cpaths[1] : paths[1])
}

let providerValue = get(paths, parts)

resolvers._channel(providers, mediaURL, options, underscore.extend({
_channel: {
providerName: 'twitch',
param1: 2,
validP: (paths) => { return (paths.length === 2) },
get: (paths, parts) => {
const cpaths = parts && parts.pathname.split('/')

return ((parts.pathname === parts.path) && (cpaths.length === 2) ? cpaths[1] : paths[1])
get: get,
publisherInfo: {
publisher: 'twitch#author:' + providerValue,
publisherType: 'provider',
publisherURL: payload.author_url + '/videos',
providerName: 'twitch',
providerSuffix: 'author',
providerValue: providerValue,
faviconName: payload.author_name,
faviconURL: payload.author_thumbnail_url,
faviconURL2: payload.thumbnail_url
}
}
}, payload), firstErr, callback)
Expand Down Expand Up @@ -309,8 +332,11 @@ const retryTrip = (params, options, callback, retry) => {
method = method(retry.delay)

options.roundtrip(params, options, (err, response, payload) => {
const code = Math.floor(response.statusCode / 100)
let code

if (!response) return callback(err, response, payload)

code = Math.floor(response.statusCode / 100)
if ((!err) || (code !== 5) || (retry.retries-- < 0)) return callback(err, response, payload)

return setTimeout(() => { retryTrip(params, options, callback, retry) }, method(++retry.tries))
Expand All @@ -325,7 +351,7 @@ const roundTrip = (params, options, callback) => {

params = underscore.defaults(underscore.extend(underscore.pick(parts, 'protocol', 'hostname', 'port'), params),
{ method: params.payload ? 'POST' : 'GET' })
if (options.binaryP) options.rawP = true
if (options.binaryP || options.scrapeP) options.rawP = true
if (options.debugP) console.log('\nparams=' + JSON.stringify(params, null, 2))

request = client.request(underscore.omit(params, [ 'payload', 'timeout' ]), (response) => {
Expand Down
2 changes: 1 addition & 1 deletion media/providers.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"provider_url": "https://www.twitch.tv",
"schemes": [],
"domain": "api.twitch.tv",
"url": "https://api.twitch.tv/v4/oembed"
"url": "https://api.twitch.tv/v5/oembed"
},
{
"provider_name": "YouTube",
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "bat-publisher",
"version": "2.0.4",
"version": "2.0.5",
"description": "Routines to identify publishers for the BAT.",
"main": "index.js",
"scripts": {
Expand Down