Skip to content

Commit

Permalink
Improve atom parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
tyhopp committed Dec 7, 2024
1 parent cb61379 commit 95945fe
Show file tree
Hide file tree
Showing 9 changed files with 146 additions and 122 deletions.
24 changes: 24 additions & 0 deletions infra/entries-handler/src/lib/get-atom-entries.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import type { RssFeedEntries } from './types';
import type { XMLDocument } from 'linkedom/types/xml/document';

export function getAtomEntries(doc: XMLDocument): RssFeedEntries {
const entries = [];

const entryElements = doc.querySelectorAll('entry') || [];

for (const entryElement of entryElements) {
const url = entryElement.querySelector('link')?.getAttribute('href') ?? entryElement.querySelector('id')?.textContent;
const title = entryElement.querySelector('title')?.textContent;
const published = entryElement.querySelector('published, updated')?.textContent;

const entry = {
url,
title,
published
};

entries.push(entry);
}

return entries;
}
51 changes: 0 additions & 51 deletions infra/entries-handler/src/lib/get-feed-entries.ts

This file was deleted.

24 changes: 24 additions & 0 deletions infra/entries-handler/src/lib/get-rss-entries.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import type { RssFeedEntries } from './types';
import type { XMLDocument } from 'linkedom/types/xml/document';

export function getRssEntries(doc: XMLDocument): RssFeedEntries {
const entries = [];

const entryElements = doc.querySelectorAll('item') || [];

for (const entryElement of entryElements) {
const url = entryElement.querySelector('link')?.textContent;
const title = entryElement.querySelector('title')?.textContent;
const published = entryElement.querySelector('pubDate')?.textContent;

const entry = {
url,
title,
published
};

entries.push(entry);
}

return entries;
}
9 changes: 6 additions & 3 deletions infra/entries-handler/src/lib/parse-feed.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import { DOMParser } from 'linkedom';
import { getFeedFormat } from './get-feed-format';
import { getFeedEntries } from './get-feed-entries';
import type { RssFeedEntries } from './types';
import { getRssEntries } from './get-rss-entries';
import { getAtomEntries } from './get-atom-entries';
import { RssFeedFormat, type RssFeedEntries } from './types';

export function parseFeed(url: string, xml: string): RssFeedEntries {
const parser = new DOMParser();
Expand All @@ -18,7 +19,9 @@ export function parseFeed(url: string, xml: string): RssFeedEntries {
throw new Error(`Unable to determine feed format of RSS feed '${url}'.`);
}

let entries: RssFeedEntries = getFeedEntries(doc, format);
const entries: RssFeedEntries = format === RssFeedFormat.rss
? getRssEntries(doc)
: getAtomEntries(doc);

return entries;
}
5 changes: 5 additions & 0 deletions infra/entries-handler/test/fixture/atom.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
<entry>
<title>atom-title</title>
<published>atom-published</published>
<link rel="alternate" href="atom-url"/>
</entry>
<entry>
<id>atom-url</id>
<title>atom-title</title>
Expand Down
34 changes: 34 additions & 0 deletions infra/entries-handler/test/lib/get-atom-entries.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import * as path from 'path';
import * as fs from 'fs';
import { test, expect } from 'vitest';
import { DOMParser } from 'linkedom';
import { getAtomEntries } from '../../src/lib/get-atom-entries';

const parser = new DOMParser();

const atomPath = path.resolve(__dirname, '../fixture/atom.xml');
const atomXml = fs.readFileSync(atomPath, 'utf-8');
const atomDoc = parser.parseFromString(atomXml, 'text/xml');
const emptyAtomDoc = parser.parseFromString('<feed></feed>', 'text/xml');
const emptyRandomDoc = parser.parseFromString('<hello></hello>', 'text/xml');

const atomEntry = {
url: 'atom-url',
title: 'atom-title',
published: 'atom-published'
};

test('should handle atom feeds', () => {
const entries = getAtomEntries(atomDoc);
expect(entries).toEqual([atomEntry, atomEntry]);
});

test('should return no atom feed entries if there are none', () => {
const entries = getAtomEntries(emptyAtomDoc);
expect(entries).toEqual([]);
});

test('should return no atom feed entries if the document is not an atom document', () => {
const entries = getAtomEntries(emptyRandomDoc);
expect(entries).toEqual([]);
});
63 changes: 0 additions & 63 deletions infra/entries-handler/test/lib/get-feed-entries.test.ts

This file was deleted.

34 changes: 34 additions & 0 deletions infra/entries-handler/test/lib/get-rss-entries.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import * as path from 'path';
import * as fs from 'fs';
import { test, expect } from 'vitest';
import { DOMParser } from 'linkedom';
import { getRssEntries } from '../../src/lib/get-rss-entries';

const parser = new DOMParser();

const rssPath = path.resolve(__dirname, '../fixture/rss.xml');
const rssXml = fs.readFileSync(rssPath, 'utf-8');
const rssDoc = parser.parseFromString(rssXml, 'text/xml');
const emptyRssDoc = parser.parseFromString('<rss></rss>', 'text/xml');
const emptyRandomDoc = parser.parseFromString('<hello></hello>', 'text/xml');

const rssEntry = {
url: 'rss-url',
title: 'rss-title',
published: 'rss-published'
};

test('should handle rss feeds', () => {
const entries = getRssEntries(rssDoc);
expect(entries).toEqual([rssEntry]);
});

test('should return no rss feed entries if there are none', () => {
const entries = getRssEntries(emptyRssDoc);
expect(entries).toEqual([]);
});

test('should return no rss feed entries if the document is not an rss document', () => {
const entries = getRssEntries(emptyRandomDoc);
expect(entries).toEqual([]);
});
24 changes: 19 additions & 5 deletions infra/entries-handler/test/lib/parse-feed.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { test, expect, vi } from 'vitest';
import { parseFeed } from '../../src/lib/parse-feed';
import { getFeedEntries } from '../../src/lib/get-feed-entries';
import { getRssEntries } from '../../src/lib/get-rss-entries';
import { getAtomEntries } from '../../src/lib/get-atom-entries';
import { getFeedFormat } from '../../src/lib/get-feed-format';
import { RssFeedFormat } from '../../src/lib/types';

Expand All @@ -16,8 +17,12 @@ vi.mock('../../src/lib/get-feed-format', () => ({
getFeedFormat: vi.fn()
}));

vi.mock('../../src/lib/get-feed-entries', () => ({
getFeedEntries: vi.fn()
vi.mock('../../src/lib/get-rss-entries', () => ({
getRssEntries: vi.fn()
}));

vi.mock('../../src/lib/get-atom-entries', () => ({
getAtomEntries: vi.fn()
}));

test('should error on feeds that are not a known format', () => {
Expand All @@ -26,9 +31,18 @@ test('should error on feeds that are not a known format', () => {
);
});

test('should return entries', () => {
test('should return rss entries', () => {
vi.mocked(getFeedFormat).mockReturnValueOnce(RssFeedFormat.rss);
vi.mocked(getRssEntries).mockReturnValueOnce([entry]);

const entries = parseFeed(url, xml);

expect(entries).toEqual([entry]);
});

test('should return atom entries', () => {
vi.mocked(getFeedFormat).mockReturnValueOnce(RssFeedFormat.atom);
vi.mocked(getFeedEntries).mockReturnValueOnce([entry]);
vi.mocked(getAtomEntries).mockReturnValueOnce([entry]);

const entries = parseFeed(url, xml);

Expand Down

0 comments on commit 95945fe

Please sign in to comment.