Skip to content

Commit

Permalink
Mass refactoring and stackoverflow users parser (#83)
Browse files Browse the repository at this point in the history
* refactor: public config

delete public config, replace with package.json. Update version to 1.6.0 for this pull request.

* fix: searx pagination

* refactor: type system for routes

* refactor: universal redirection

* fix: stackoverflow questions

add No handler Found error

* feat: stackoverflow users parser
  • Loading branch information
artegoser authored Feb 25, 2024
1 parent c9f9e48 commit b78da40
Show file tree
Hide file tree
Showing 23 changed files with 216 additions and 132 deletions.
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"name": "txtdot",
"version": "1.5.3",
"version": "1.6.0",
"private": true,
"description": "",
"description": "txtdot is an HTTP proxy that parses only text, links and pictures from pages reducing internet bandwidth usage, removing ads and heavy scripts",
"main": "dist/app.js",
"dependencies": {
"@fastify/static": "^6.12.0",
Expand Down
10 changes: 5 additions & 5 deletions src/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ import proxyRoute from './routes/browser/proxy';
import parseRoute from './routes/api/parse';
import rawHtml from './routes/api/raw-html';

import publicConfig from './publicConfig';
import packageJSON from './package';
import errorHandler from './errors/handler';
import getConfig from './config/main';
import searchRoute from './routes/browser/search';
import redirectRoute from './routes/browser/redirect';

class App {
async init() {
Expand Down Expand Up @@ -46,8 +46,8 @@ class App {
swagger: {
info: {
title: 'TXTDot API',
description: publicConfig.description,
version: publicConfig.version,
description: packageJSON.description,
version: packageJSON.version,
},
},
});
Expand All @@ -58,7 +58,7 @@ class App {
fastify.register(getRoute);

if (config.search.enabled) {
fastify.register(searchRoute);
fastify.register(redirectRoute);
}

if (config.proxy_res) fastify.register(proxyRoute);
Expand Down
6 changes: 6 additions & 0 deletions src/errors/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ export class EngineParseError extends TxtDotError {
}
}

export class NoHandlerFoundError extends TxtDotError {
constructor(message: string) {
super(404, 'NoHandlerFoundError', `No handler found for: ${message}`);
}
}

export class LocalResourceError extends TxtDotError {
constructor() {
super(403, 'LocalResourceError', 'Proxying local resources is forbidden.');
Expand Down
25 changes: 16 additions & 9 deletions src/handlers/engine.ts
Original file line number Diff line number Diff line change
@@ -1,25 +1,29 @@
import Route from 'route-parser';
import { HandlerInput } from './handler-input';
import { IHandlerOutput } from './handler.interface';
import { EngineParseError } from '../errors/main';
import { EngineFunction } from '../types/handlers';
import { NoHandlerFoundError } from '../errors/main';
import { EngineFunction, RouteValues } from '../types/handlers';

interface IRoute {
interface IRoute<TParams extends RouteValues> {
route: Route;
handler: EngineFunction;
handler: EngineFunction<TParams>;
}

export class Engine {
name: string;
domains: string[];
routes: IRoute[] = [];
// eslint-disable-next-line @typescript-eslint/no-explicit-any
routes: IRoute<any>[] = [];
constructor(name: string, domains: string[] = []) {
this.domains = domains;
this.name = name;
}

route(path: string, handler: EngineFunction) {
this.routes.push({ route: new Route(path), handler: handler });
route<TParams extends RouteValues>(
path: string,
handler: EngineFunction<TParams>
) {
this.routes.push({ route: new Route<TParams>(path), handler });
}

async handle(input: HandlerInput): Promise<IHandlerOutput> {
Expand All @@ -29,10 +33,13 @@ export class Engine {
const match = route.route.match(path);

if (match) {
return await route.handler(input, match);
return await route.handler(input, {
q: match,
reverse: (req) => route.route.reverse(req),
});
}
}

throw new EngineParseError(`No handler for ${path}. [${this.name}]`);
throw new NoHandlerFoundError(`${path}. [${this.name}]`);
}
}
6 changes: 2 additions & 4 deletions src/handlers/engines/readability.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,12 @@ import { Engine } from '../engine';

const ReadabilityEngine = new Engine('Readability');

ReadabilityEngine.route('*path', async (input, req) => {
ReadabilityEngine.route('*path', async (input, ro) => {
const reader = new Readability(input.parseDom().window.document);
const parsed = reader.parse();

if (!parsed) {
throw new EngineParseError(
`Parse error (${req.path}). [${ReadabilityEngine.name}]`
);
throw new EngineParseError(`(${ro.q.path}). [${ReadabilityEngine.name}]`);
}

return {
Expand Down
25 changes: 14 additions & 11 deletions src/handlers/engines/searx.ts
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
import { Route } from '../../types/handlers';
import { Engine } from '../engine';
import { HandlerInput } from '../handler-input';

const SearXEngine = new Engine('SearX', ['searx.*']);

SearXEngine.route('/search?q=:search', async (input, req) => {
async function search(
input: HandlerInput,
ro: Route<{ search: string; pageno?: string }>
) {
const document = input.parseDom().window.document;
const search = req.search;
const url = new URL(input.getUrl());
const page = parseInt(url.searchParams.get('pageno') || '1');
const search = ro.q.search;
const page = parseInt(ro.q.pageno || '1');

const page_footer = `${
page !== 1
? `<a href="${url.origin}${url.pathname}?q=${search}&pageno=${
page - 1
}">Previous </a>|`
? `<a href="${ro.reverse({ search, pageno: page - 1 })}">Previous </a>|`
: ''
}<a href="${url.origin}${url.pathname}?q=${search}&pageno=${
page + 1
}"> Next</a>`;
}<a href="${ro.reverse({ search, pageno: page + 1 })}"> Next</a>`;

const articles = Array.from(document.querySelectorAll('.result'));
const articles_parsed = articles.map((a) => {
Expand Down Expand Up @@ -49,6 +49,9 @@ SearXEngine.route('/search?q=:search', async (input, req) => {
title: `${search} - Searx - Page ${page}`,
lang: document.documentElement.lang,
};
});
}

SearXEngine.route('/search?q=:search&pageno=:pageno', search);
SearXEngine.route('/search?q=:search', search);

export default SearXEngine;
45 changes: 0 additions & 45 deletions src/handlers/engines/stackoverflow.ts

This file was deleted.

18 changes: 18 additions & 0 deletions src/handlers/engines/stackoverflow/main.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import { Engine } from '../../engine';
import questions from './questions';
import users from './users';
const soEngine = new Engine('StackOverflow', [
'stackoverflow.com',
'*.stackoverflow.com',
'*.stackexchange.com',
'askubuntu.com',
'stackapps.com',
'mathoverflow.net',
'superuser.com',
'serverfault.com',
]);

soEngine.route('/questions/:id/*slug', questions);
soEngine.route('/users/:id/*slug', users);

export default soEngine;
49 changes: 49 additions & 0 deletions src/handlers/engines/stackoverflow/questions.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import { Route } from '../../../types/handlers';
import { HandlerInput } from '../../handler-input';

async function questions(
input: HandlerInput,
ro: Route<{ id: string; slug: string }>
) {
const document = input.parseDom().window.document;

const questionEl = document.getElementById('question');
const question = postParser(questionEl);

const title = document.querySelector('.question-hyperlink')?.innerHTML || '';

const allAnswers = [...document.querySelectorAll('.answer')];
const answers = allAnswers.map((a) => postParser(a));

return {
content: `${question}<hr>${answers.length} answers <hr>${answers.join(
'<hr>'
)}`,
textContent: `${ro.q.id}/${ro.q.slug}\n`, // TODO
title,
lang: document.documentElement.lang,
};
}

function postParser(el: Element | null): string {
if (!el) {
return '';
}
const body = el.querySelector('.js-post-body')?.innerHTML || '';
const voteCount = el.querySelector('.js-vote-count')?.textContent || '';

const footer = [...el.querySelectorAll('.post-signature')].map((el) => {
const userName = el.querySelector('.user-details a')?.textContent || '';
const userUrl =
(el.querySelector('.user-details a') as HTMLAnchorElement)?.href || '';
const userTitle = el.querySelector('.user-action-time')?.textContent || '';

return `<h4>${userTitle}${
userUrl ? ` by <a href="${userUrl}">${userName}</a>` : ''
}</h4>`;
});

return `<h3>${voteCount} votes</h3>${body}${footer.join('')}`;
}

export default questions;
37 changes: 37 additions & 0 deletions src/handlers/engines/stackoverflow/users.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import { Route } from '../../../types/handlers';
import { HandlerInput } from '../../handler-input';

async function users(
input: HandlerInput,
ro: Route<{ id: string; slug: string }>
) {
const document = input.parseDom().window.document;

const userInfo =
document.querySelector('.md\\:ai-start > div:nth-child(2)')?.textContent ||
'';

const topPosts = [
...(document.querySelector('#js-top-posts > div:nth-child(2)')?.children ||
[]),
]
.map((el) => {
const title = el.querySelector('a')?.textContent || '';
const url = el.querySelector('a')?.href || '';
const votes = el.querySelector('.s-badge__votes')?.textContent || '';
const type =
el.querySelector('.iconAnswer, .iconQuestion')?.textContent || '';

return `<strong>${type} (${votes}) </strong><a href="${url}">${title}</a>`;
})
.join('<br/>');

return {
content: `${userInfo}<hr><h3>Top Posts</h3>${topPosts}`,
textContent: `${ro.q.id}/${ro.q.slug}\n`, // TODO
title: document.querySelector('title')?.textContent || '',
lang: document.documentElement.lang,
};
}

export default users;
2 changes: 1 addition & 1 deletion src/handlers/main.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { Distributor } from './distributor';
import Readability from './engines/readability';
import SearX from './engines/searx';
import StackOverflow from './engines/stackoverflow';
import StackOverflow from './engines/stackoverflow/main';

const distributor = new Distributor();

Expand Down
3 changes: 3 additions & 0 deletions src/package.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import * as config from '../package.json';

export default config;
5 changes: 0 additions & 5 deletions src/publicConfig.ts

This file was deleted.

4 changes: 2 additions & 2 deletions src/routes/browser/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { FastifyInstance } from 'fastify';

import publicConfig from '../../publicConfig';
import packageJSON from '../../package';
import { engineList } from '../../handlers/main';
import { indexSchema } from '../../types/requests/browser';

Expand All @@ -9,7 +9,7 @@ import getConfig from '../../config/main';
export default async function indexRoute(fastify: FastifyInstance) {
fastify.get('/', { schema: indexSchema }, async (_, reply) => {
return reply.view('/templates/index.ejs', {
publicConfig,
packageJSON,
engineList,
config: getConfig(),
});
Expand Down
20 changes: 20 additions & 0 deletions src/routes/browser/redirect.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import { FastifyInstance } from 'fastify';

import { redirectSchema, IRedirectSchema } from '../../types/requests/browser';

export default async function redirectRoute(fastify: FastifyInstance) {
fastify.get<IRedirectSchema>(
'/redirect',
{ schema: redirectSchema },
async (request, reply) => {
const params = new URLSearchParams(request.query);
params.delete('url');

reply.redirect(
`/get?url=${encodeURIComponent(
request.query.url + '?' + params.toString()
)}`
);
}
);
}
Loading

0 comments on commit b78da40

Please sign in to comment.