Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a maximum feed size #906

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .eslintrc.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ module.exports = {
],
extends: [
'eslint:recommended',
'plugin:mocha/recommended',
'plugin:@typescript-eslint/recommended',
],
// eslint-config-preact needs a Jest version to be happy, even if Jest isn't used.
Expand All @@ -18,7 +19,10 @@ module.exports = {
"@typescript-eslint/no-explicit-any": "warn",
"@typescript-eslint/no-unused-vars": "warn",
"camelcase": ["error", { "properties": "never", "ignoreDestructuring": true }],
"no-console": "error"
"no-console": "error",
"mocha/no-exclusive-tests": "error",
// We do this everywhere.
"mocha/no-mocha-arrows": "off",
},
env: {
node: true,
Expand Down
1 change: 1 addition & 0 deletions changelog.d/906.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add a maximum allowed payload size to RSS feeds. This prevents oversized feeds from potentially slowing down Hookshot.
3 changes: 3 additions & 0 deletions config.sample.yml
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ listeners:
# pollConcurrency: 4
# pollIntervalSeconds: 600
# pollTimeoutSeconds: 30
# maximumFeedSizeMB:
# # (Optional) The maximum response size of a feed on first load. Oversized responses will prevent a connection from being created.
# 25

#provisioning:
# # (Optional) Provisioning API for integration managers
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@
"chai": "^4.3.4",
"eslint": "^8.49.0",
"eslint-config-preact": "^1.3.0",
"eslint-plugin-mocha": "^10.1.0",
"eslint-plugin-mocha": "^10.4.2",
"homerunner-client": "^1.0.0",
"jest": "^29.7.0",
"mini.css": "^3.0.1",
Expand Down
6 changes: 4 additions & 2 deletions src/Connections/FeedConnection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import { readFeed, sanitizeHtml } from "../libRs";
import UserAgent from "../UserAgent";
import { retry, retryMatrixErrorFilter } from "../PromiseUtil";
import { BridgeConfigFeeds } from "../config/Config";
const log = new Logger("FeedConnection");
const md = new markdown({
html: true,
Expand Down Expand Up @@ -57,14 +58,14 @@
static readonly ServiceCategory = "feeds";


public static createConnectionForState(roomId: string, event: StateEvent<any>, {config, intent}: InstantiateConnectionOpts) {

Check warning on line 61 in src/Connections/FeedConnection.ts

View workflow job for this annotation

GitHub Actions / lint-node

Unexpected any. Specify a different type
if (!config.feeds?.enabled) {
throw Error('RSS/Atom feeds are not configured');
}
return new FeedConnection(roomId, event.stateKey, event.content, intent);
}

static async validateUrl(url: string): Promise<void> {
static async validateUrl(url: string, config: BridgeConfigFeeds): Promise<void> {
try {
new URL(url);
} catch (ex) {
Expand All @@ -75,6 +76,7 @@
await readFeed(url, {
userAgent: UserAgent,
pollTimeoutSeconds: VALIDATION_FETCH_TIMEOUT_S,
maximumFeedSizeMb: config.maximumFeedSizeMB,
});
} catch (ex) {
throw new ApiError(`Could not read feed from URL: ${ex.message}`, ErrCode.BadValue);
Expand Down Expand Up @@ -113,7 +115,7 @@
}

const state = this.validateState(data);
await FeedConnection.validateUrl(state.url);
await FeedConnection.validateUrl(state.url, config.feeds);
const connection = new FeedConnection(roomId, state.url, state, intent);
await intent.underlyingClient.sendStateEvent(roomId, FeedConnection.CanonicalEventType, state.url, state);

Expand Down
2 changes: 1 addition & 1 deletion src/Connections/SetupConnection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ export class SetupConnection extends CommandConnection {

// provisionConnection will check it again, but won't give us a nice CommandError on failure
try {
await FeedConnection.validateUrl(url);
await FeedConnection.validateUrl(url, this.config.feeds);
} catch (err: unknown) {
log.debug(`Feed URL '${url}' failed validation: ${err}`);
if (err instanceof ApiError) {
Expand Down
12 changes: 12 additions & 0 deletions src/config/Config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ export interface BridgeConfigFeedsYAML {
pollIntervalSeconds?: number;
pollConcurrency?: number;
pollTimeoutSeconds?: number;
maximumFeedSizeMB?: number;
}

export class BridgeConfigFeeds {
Expand All @@ -259,13 +260,24 @@ export class BridgeConfigFeeds {
public pollTimeoutSeconds: number;
public pollConcurrency: number;

@configKey("The maximum response size of a feed on first load. Oversized responses will prevent a connection from being created.", true)
public maximumFeedSizeMB: number;

constructor(yaml: BridgeConfigFeedsYAML) {
this.enabled = yaml.enabled;
this.pollConcurrency = yaml.pollConcurrency ?? 4;
this.pollIntervalSeconds = yaml.pollIntervalSeconds ?? 600;
assert.strictEqual(typeof this.pollIntervalSeconds, "number");
this.pollTimeoutSeconds = yaml.pollTimeoutSeconds ?? 30;
assert.strictEqual(typeof this.pollTimeoutSeconds, "number");
this.maximumFeedSizeMB = yaml.maximumFeedSizeMB ?? 25;
assert.strictEqual(typeof this.maximumFeedSizeMB, "number");
if (this.maximumFeedSizeMB < 1) {
throw new ConfigError('feeds.maximumFeedSizeMB', 'Must be at least 1MB or greater');
}
if (!Number.isInteger(this.maximumFeedSizeMB)) {
throw new ConfigError('feeds.maximumFeedSizeMB', 'Must be a whole number');
}
}

@hideKey()
Expand Down
1 change: 1 addition & 0 deletions src/config/Defaults.ts
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ export const DefaultConfigRoot: BridgeConfigRoot = {
pollIntervalSeconds: 600,
pollTimeoutSeconds: 30,
pollConcurrency: 4,
maximumFeedSizeMB: 25,
},
provisioning: {
secret: "!secretToken"
Expand Down
1 change: 1 addition & 0 deletions src/feeds/FeedReader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ export class FeedReader {
etag,
lastModified,
userAgent: UserAgent,
maximumFeedSizeMb: this.config.maximumFeedSizeMB,
});

// Store any entity tags/cache times.
Expand Down
45 changes: 31 additions & 14 deletions src/feeds/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ pub struct ReadFeedOptions {
pub etag: Option<String>,
pub poll_timeout_seconds: i64,
pub user_agent: String,
pub maximum_feed_size_mb: i64,
}

#[derive(Serialize, Debug, Deserialize)]
Expand Down Expand Up @@ -199,25 +200,41 @@ pub async fn js_read_feed(url: String, options: ReadFeedOptions) -> Result<FeedR
headers.append("If-None-Match", HeaderValue::from_str(&etag).unwrap());
}

let max_content_size = (options.maximum_feed_size_mb * 1024 * 1024) as u64;

match req.headers(headers).send().await {
Ok(res) => {
// Pre-emptive check
let content_length = res.content_length().unwrap_or(0);
if content_length > max_content_size {
return Err(JsError::new(Status::Unknown, "Feed exceeded maximum size"));
}

let res_headers = res.headers().clone();
match res.status() {
StatusCode::OK => match res.text().await {
Ok(body) => match js_parse_feed(body) {
Ok(feed) => Ok(FeedResult {
feed: Some(feed),
etag: res_headers
.get("ETag")
.map(|v| v.to_str().unwrap())
.map(|v| v.to_string()),
last_modified: res_headers
.get("Last-Modified")
.map(|v| v.to_str().unwrap())
.map(|v| v.to_string()),
}),
Err(err) => Err(err),
},
Ok(body) => {
// Check if we only got the length after loading the response.
match body.len() as u64 <= max_content_size {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor: I'd just write it as if body.len() as u64 <= max_content_size {..., since if is also an expression in Rust. match kinda makes it seems like there's more to it than that.

true => match js_parse_feed(body) {
Ok(feed) => Ok(FeedResult {
feed: Some(feed),
etag: res_headers
.get("ETag")
.map(|v| v.to_str().unwrap())
.map(|v| v.to_string()),
last_modified: res_headers
.get("Last-Modified")
.map(|v| v.to_str().unwrap())
.map(|v| v.to_string()),
}),
Err(err) => Err(err),
},
false => {
Err(JsError::new(Status::Unknown, "Feed exceeded maximum size"))
}
}
}
Err(err) => Err(JsError::new(Status::Unknown, err)),
},
StatusCode::NOT_MODIFIED => Ok(FeedResult {
Expand Down
76 changes: 64 additions & 12 deletions tests/FeedReader.spec.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { expect } from "chai";
import EventEmitter from "events";
import { BridgeConfigFeeds } from "../src/config/Config";
import { BridgeConfigFeeds, BridgeConfigFeedsYAML } from "../src/config/Config";
import { ConnectionManager } from "../src/ConnectionManager";
import { IConnection } from "../src/Connections";
import { FeedEntry, FeedReader } from "../src/feeds/FeedReader";
Expand Down Expand Up @@ -39,14 +39,15 @@ class MockMessageQueue extends EventEmitter implements MessageQueue {
}
}

async function constructFeedReader(feedResponse: () => {headers: Record<string,string>, data: string}) {
async function constructFeedReader(feedResponse: () => {headers?: Record<string,string>, data: string}, extraConfig?: Partial<BridgeConfigFeedsYAML>) {

const httpServer = await new Promise<Server>(resolve => {
const srv = createServer((_req, res) => {
res.writeHead(200);
const { headers, data } = feedResponse();
Object.entries(headers).forEach(([key,value]) => {
Object.entries(headers ?? {}).forEach(([key,value]) => {
res.setHeader(key, value);
});
res.writeHead(200);
res.write(data);
res.end();
}).listen(0, '127.0.0.1', () => {
Expand All @@ -59,6 +60,7 @@ async function constructFeedReader(feedResponse: () => {headers: Record<string,s
enabled: true,
pollIntervalSeconds: 1,
pollTimeoutSeconds: 1,
...extraConfig,
});
const cm = new MockConnectionManager([{ feedUrl } as unknown as IConnection]) as unknown as ConnectionManager
const mq = new MockMessageQueue();
Expand All @@ -71,7 +73,12 @@ async function constructFeedReader(feedResponse: () => {headers: Record<string,s
const feedReader = new FeedReader(
config, cm, mq, storage,
);
after(() => httpServer.close());

after(() => {
httpServer.close()
feedReader.stop();
});

return {config, cm, events, feedReader, feedUrl, httpServer, storage};
}

Expand All @@ -94,12 +101,12 @@ describe("FeedReader", () => {
}));

await feedReader.pollFeed(feedUrl);
feedReader.stop();
expect(events).to.have.lengthOf(1);

expect(events[0].data.feed.title).to.equal(null);
expect(events[0].data.title).to.equal(null);
});

it("should handle RSS 2.0 feeds", async () => {
const { events, feedReader, feedUrl } = await constructFeedReader(() => ({
headers: {}, data: `
Expand Down Expand Up @@ -127,7 +134,6 @@ describe("FeedReader", () => {
}));

await feedReader.pollFeed(feedUrl);
feedReader.stop();
expect(events).to.have.lengthOf(1);

expect(events[0].data.feed.title).to.equal('RSS Title');
Expand All @@ -137,6 +143,7 @@ describe("FeedReader", () => {
expect(events[0].data.link).to.equal('http://www.example.com/blog/post/1');
expect(events[0].data.pubdate).to.equal('Sun, 6 Sep 2009 16:20:00 +0000');
});

it("should handle RSS feeds with a permalink url", async () => {
const { events, feedReader, feedUrl } = await constructFeedReader(() => ({
headers: {}, data: `
Expand All @@ -163,7 +170,6 @@ describe("FeedReader", () => {
}));

await feedReader.pollFeed(feedUrl);
feedReader.stop();
expect(events).to.have.lengthOf(1);

expect(events[0].data.feed.title).to.equal('RSS Title');
Expand All @@ -173,6 +179,7 @@ describe("FeedReader", () => {
expect(events[0].data.link).to.equal('http://www.example.com/blog/post/1');
expect(events[0].data.pubdate).to.equal('Sun, 6 Sep 2009 16:20:00 +0000');
});

it("should handle Atom feeds", async () => {
const { events, feedReader, feedUrl } = await constructFeedReader(() => ({
headers: {}, data: `
Expand Down Expand Up @@ -203,7 +210,6 @@ describe("FeedReader", () => {
}));

await feedReader.pollFeed(feedUrl);
feedReader.stop();
expect(events).to.have.lengthOf(1);

expect(events[0].data.feed.title).to.equal('Example Feed');
Expand All @@ -213,6 +219,7 @@ describe("FeedReader", () => {
expect(events[0].data.link).to.equal('http://example.org/2003/12/13/atom03');
expect(events[0].data.pubdate).to.equal('Sat, 13 Dec 2003 18:30:02 +0000');
});

it("should not duplicate feed entries", async () => {
const { events, feedReader, feedUrl } = await constructFeedReader(() => ({
headers: {}, data: `
Expand All @@ -235,9 +242,9 @@ describe("FeedReader", () => {
await feedReader.pollFeed(feedUrl);
await feedReader.pollFeed(feedUrl);
await feedReader.pollFeed(feedUrl);
feedReader.stop();
expect(events).to.have.lengthOf(1);
});

it("should always hash to the same value for Atom feeds", async () => {
const expectedHash = ['md5:d41d8cd98f00b204e9800998ecf8427e'];
const { feedReader, feedUrl, storage } = await constructFeedReader(() => ({
Expand All @@ -254,10 +261,10 @@ describe("FeedReader", () => {
}));

await feedReader.pollFeed(feedUrl);
feedReader.stop();
const items = await storage.hasSeenFeedGuids(feedUrl, ...expectedHash);
expect(items).to.deep.equal(expectedHash);
});

it("should always hash to the same value for RSS feeds", async () => {
const expectedHash = [
'md5:98bafde155b931e656ad7c137cd7711e', // guid
Expand Down Expand Up @@ -288,8 +295,53 @@ describe("FeedReader", () => {
}));

await feedReader.pollFeed(feedUrl);
feedReader.stop();
const items = await storage.hasSeenFeedGuids(feedUrl, ...expectedHash);
expect(items).to.deep.equal(expectedHash);
});

it("should fail to handle a feed which exceed the maximum size.", async () => {
// Create some data of the right length
const data = `
<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0">
<channel>
<title>RSS Title</title>
<description>This is an example of an RSS feed</description>
${Array.from({length: 8000}).map((_, i) => `<item>
<title>Example entry</title>
<guid isPermaLink="true">http://www.example.com/blog/post/${i}</guid>
</item>`).join('')}
</channel>
</rss>`;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: this is fine, measuring at 1.2 MB. If we wanted to be extra careful, we could do an assert checking if it's longer than 1024*1024.

const { feedReader, feedUrl } = await constructFeedReader(() => ({
data, headers: { 'Content-Length': data.length.toString()}
}), {
maximumFeedSizeMB: 1
});
await feedReader.pollFeed(feedUrl);
expect(feedReader["feedsFailingParsing"]).to.contain(feedUrl);
});

it("should fail to handle a feed which exceed the maximum size which does NOT send a Content-Length.", async () => {
// Create some data of the right length
const data = `
<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0">
<channel>
<title>RSS Title</title>
<description>This is an example of an RSS feed</description>
${Array.from({length: 8000}).map((_, i) => `<item>
<title>Example entry</title>
<guid isPermaLink="true">http://www.example.com/blog/post/${i}</guid>
</item>`).join('')}
</channel>
</rss>`;
const { feedReader, feedUrl } = await constructFeedReader(() => ({
data
}), {
maximumFeedSizeMB: 1
});
await feedReader.pollFeed(feedUrl);
expect(feedReader["feedsFailingParsing"]).to.contain(feedUrl);
});
});
Loading
Loading