Skip to content

add support for bulk oembed #178

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
FROM node:5.8
#FROM node:5.8
FROM arm64v8/node:6.11.2

EXPOSE 8061

5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# Knil Version
- /oembed.bulk Post end point to process upto 10 urls in async
- fix to include unshortened final url
- fix to use oemebed type if available (didnt work for facebook videos)

# Iframely API for Responsive Web Embeds

This is the self-hosted version of Iframely's APIs and parsers.
9 changes: 5 additions & 4 deletions app.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
var sysUtils = require('./utils');
var bodyParser = require('body-parser')

console.log("");
console.log("Starting Iframely...");
@@ -39,7 +40,7 @@ app.use(function(req, res, next) {
res.setHeader('X-Powered-By', 'Iframely');
next();
});

app.use(bodyParser.json());
app.use(sysUtils.cacheMiddleware);


@@ -126,16 +127,16 @@ function errorHandler(err, req, res, next) {
}
else if (code === 404) {
respondWithError(req, res, 404, 'Not found');
}
}
else if (code === 410) {
respondWithError(req, res, 410, 'Gone');
}
else if (code === 415) {
respondWithError(req, res, 415, 'Unsupported Media Type');
}
}
else if (code === 417) {
respondWithError(req, res, 417, 'Unsupported Media Type');
}
}
else {
respondWithError(req, res, code, 'Server error');
}
2 changes: 1 addition & 1 deletion config.js
Original file line number Diff line number Diff line change
@@ -33,7 +33,7 @@
CACHE_TTL_PAGE_OTHER_ERROR: 1 * 60,

CLUSTER_WORKER_RESTART_ON_PERIOD: 8 * 3600 * 1000, // 8 hours.
CLUSTER_WORKER_RESTART_ON_MEMORY_USED: 120 * 1024 * 1024, // 120 MB.
CLUSTER_WORKER_RESTART_ON_MEMORY_USED: 512 * 1024 * 1024, // 120 MB.

RESPONSE_TIMEOUT: 5 * 1000,

12 changes: 6 additions & 6 deletions lib/core.js
Original file line number Diff line number Diff line change
@@ -1035,7 +1035,7 @@
var link2 = canonical.replace(/\/+$/, '');

if (link1 === link2 && link.rel.indexOf(CONFIG.R.oembed) == -1) {
// allow the canonical links for oEmbeds, as such mistakes are usually made for OG and Twitter:
// allow the canonical links for oEmbeds, as such mistakes are usually made for OG and Twitter:
// if publisher has oEmbed, he is most likely to have the valid embed codes
link.error = "Removed canonical link";
}
@@ -1193,18 +1193,18 @@
}
}

function findResponseStatusCode(result) {
function findResponseStatusCode(result,uri) {
if (result) {
for(var i = 0; i < result.length; i++) {
var r = result[i];

if (r.error && r.error[SYS_ERRORS.responseStatusCode]) {
sysUtils.log(' -- response (by "' + r.method.pluginId + '")', r.error[SYS_ERRORS.responseStatusCode]);
sysUtils.log(' -- response (by "' + r.method.pluginId + '")', r.error[SYS_ERRORS.responseStatusCode], uri);
return r.error[SYS_ERRORS.responseStatusCode];
}

if (r.error && r.error === SYS_ERRORS.timeout) {
sysUtils.log(' -- response (by "' + r.method.pluginId + '")', SYS_ERRORS.timeout);
sysUtils.log(' -- response (by "' + r.method.pluginId + '")', SYS_ERRORS.timeout, uri);
return SYS_ERRORS.timeout;
}
}
@@ -1413,7 +1413,7 @@
}

// Abort on error response code.
var errorResponseCode = findResponseStatusCode(result);
var errorResponseCode = findResponseStatusCode(result,uri);
if (errorResponseCode) {
abortCurrentRequest();
aborted = true;
@@ -1506,4 +1506,4 @@

exports.getOembed = oembedUtils.getOembed;

})(exports);
})(exports);
68 changes: 68 additions & 0 deletions modules/api/views.js
Original file line number Diff line number Diff line change
@@ -9,6 +9,7 @@ var whitelist = require('../../lib/whitelist');
var pluginLoader = require('../../lib/loader/pluginLoader');
var jsonxml = require('jsontoxml');


function prepareUri(uri) {

if (!uri) {
@@ -422,4 +423,71 @@ module.exports = function(app) {
});
});

let processUrlOEmbed = (req,url,cb) => {
var uri = prepareUri(url);
if (!uri) {
return cb({url:url,error:"empty url",error_code:"EMPTYURI"},null);
}

if (!CONFIG.DEBUG && uri.split('/')[2].indexOf('.') === -1) {
return cb({url:url,error:"local domains not supported",error_code:"LOCALDOMAIN"},null);
}

log(req, 'Loading /oembed for', uri);

async.waterfall([

function(wcb) {

iframelyCore.run(uri, {
getWhitelistRecord: whitelist.findWhitelistRecordFor,
filterNonSSL: getBooleanParam(req, 'ssl'),
filterNonHTML5: getBooleanParam(req, 'html5'),
maxWidth: getIntParam(req, 'maxwidth') || getIntParam(req, 'max-width'),
refresh: getBooleanParam(req, 'refresh')
}, wcb);
}

], function(error, result) {

if (error) {
return cb({url:url,error:error,error_code:error.code || error},null)
}

iframelyCore.sortLinks(result.links);

iframelyUtils.filterLinks(result, {
filterNonSSL: getBooleanParam(req, 'ssl'),
filterNonHTML5: getBooleanParam(req, 'html5'),
maxWidth: getIntParam(req, 'maxwidth') || getIntParam(req, 'max-width')
});

var oembed = oembedUtils.getOembed(uri, result, {
mediaPriority: getBooleanParam(req, 'media'),
omit_css: getBooleanParam(req, 'omit_css')
});
if(!oembed.url)
oembed.url = url
oembed.orgUrl = url
cb(null,oembed)
})
}

app.post('/oembed.bulk',function(req, res, next) {

if (!req.body.urls || req.body.urls.length==0) {
return next(new Error("'urls' post param expected"));
}
var urls = req.body.urls.slice(0,10)
let curriedProcessUrlOEmbed = processUrlOEmbed.bind(undefined,req)
async.map(urls,async.reflect(curriedProcessUrlOEmbed),function(err,result) {
if (err) {
return handleIframelyError(error, res, next);
}
let response = _.map(result,r => r.error || r.value)
res.jsonpCached(response);
});

});

};
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
@@ -19,6 +19,7 @@
},
"license": "MIT",
"dependencies": {
"body-parser": "^1.17.2",
"async": "2.4.1",
"cheerio": "0.22.0",
"chokidar": "1.7.0",
40 changes: 40 additions & 0 deletions test/stress/oembedbulk.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
config:
payload:
# path is relative to the location of the test script
path: "urlsbatches.csv"
fields:
- "batch"
environments:
dev:
target: "http://localhost:8061"
phases:
- duration: 10
arrivalRate: 1
production-slow:
target: "https://api.knil.co"
phases:
- duration: 30
arrivalRate: 1
production:
target: "https://api.knil.co"
phases:
- duration: 90
arrivalRate: 1
rampTo: 4
scenarios:
- name: "load testing"
flow:
# - log: "batch: {{ batch }}"
- post:
url: "/oembed.bulk"
headers:
Content-Type: "application/json"
body: '{"urls":[{{ batch }}]}'
capture:
- json: "$.results.length"
as: "numres"
- json: "$.errors.length"
as: "numerr"
- json: "$.results"
as: "results"
- log: "results:{{ numres }} errors: {{ numerr }} {{ results }}"
1,000 changes: 1,000 additions & 0 deletions test/stress/urlsbatches_small.csv

Large diffs are not rendered by default.

11 changes: 10 additions & 1 deletion utils.js
Original file line number Diff line number Diff line change
@@ -104,6 +104,14 @@
var urlObj = urlLib.parse(req.url, true);

var query = urlObj.query;
//use slice to generate a copy of array so sort doesnt modify the original
var postUrls = []
if(req.body && req.body.urls)
{
postUrls = req.body.urls.slice(0)
if(postUrls)
postUrls.sort()
}

delete query.refresh;

@@ -124,6 +132,7 @@
keys.forEach(function(key) {
newQuery[key] = query[key];
});
newQuery['urls'] = postUrls

urlObj.query = newQuery;

@@ -188,7 +197,7 @@

if (head) {

log(req, "Using cache for", req.url.replace(/\?.+/, ''), req.query.uri || req.query.url);
log(req, "Using cache for", req.url.replace(/\?.+/, ''), req.query.uri || req.query.url || req.body.urls);

var requestedEtag = req.headers['if-none-match'];