Skip to content

Commit

Permalink
feat(actions): add fetchWeb()
Browse files Browse the repository at this point in the history
  • Loading branch information
ivangabriele committed Oct 11, 2023
1 parent 1465b09 commit f74a102
Show file tree
Hide file tree
Showing 14 changed files with 4,235 additions and 950 deletions.
21 changes: 21 additions & 0 deletions .github/workflows/e2e.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: E2E

on: push

jobs:
test_e2e:
name: E2E Test
if: ${{ !startsWith(github.ref, 'refs/heads/ci-release-v') }}
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v3
with:
cache: yarn
node-version: 20
- name: Install
run: yarn
- name: Test
run: yarn test:e2e
36 changes: 34 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,35 @@
# FirePT
<p align="center">
<img alt="FirePT Logo" height="128" src="public/images/logo.png" />
</p>
<h1 align="center">FirePT</h1>
<h3 align="center">A ChatGPT plugin server to give ChatGPT extra powers.</h3>

A ChatGPT plugin to give ChatGPT extra powers.
---

## Get started

### Install

```sh
npm i -g firept
```

### Run

```sh
firept
```

## Features

### Web Features

- [x] Searching (Brave Search)
- [x] Fetching web pages

### Current Working Directory Features

- [ ] Listing files
- [ ] Reading files
- [ ] Writing files
- [ ] Running bash scripts
32 changes: 32 additions & 0 deletions config/jest.e2e.config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/**
* For a detailed explanation regarding each configuration property, visit:
* https://jestjs.io/docs/configuration
*/

/** @type {import('jest').Config} */
const config = {
clearMocks: true,
coverageProvider: 'v8',
errorOnDeprecated: true,
extensionsToTreatAsEsm: ['.ts'],
maxWorkers: '50%',
moduleNameMapper: {
'^(\\.{1,2}/.*)\\.js$': '$1',
},
preset: 'ts-jest/presets/default-esm',
rootDir: '..',
globalSetup: '<rootDir>/config/jest.e2e.setup.ts',
globalTeardown: '<rootDir>/config/jest.e2e.teardown.ts',
silent: false,
testMatch: ['<rootDir>/e2e/**/*.spec.ts'],
transform: {
'^.+\\.m?[tj]sx?$': [
'ts-jest',
{
useESM: true,
},
],
},
}

export default config
17 changes: 17 additions & 0 deletions config/jest.e2e.setup.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import { spawn } from 'child_process'
import tcpPortUsed from 'tcp-port-used'

const PORT = process.env.PORT ? Number(process.env.PORT) : 3333

export default async () => {
console.info()

console.info('[E2E]', 'Starting server...')
spawn('yarn', ['start'], {
stdio: 'inherit',
shell: true,
})

console.info('[E2E]', 'Waiting for server to start...')
await tcpPortUsed.waitUntilUsed(PORT, 250, 5000)
}
12 changes: 12 additions & 0 deletions config/jest.e2e.teardown.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import killPort from 'kill-port'
import tcpPortUsed from 'tcp-port-used'

const PORT = process.env.PORT ? Number(process.env.PORT) : 3333

export default async () => {
console.info('[E2E]', 'Stopping server...')
await killPort(PORT)

console.info('[E2E]', 'Waiting for server to stop...')
await tcpPortUsed.waitUntilFree(PORT, 250, 5000)
}
3 changes: 3 additions & 0 deletions e2e/web/__snapshots__/fetchWeb.spec.ts.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP

exports[`E2E tests should fetch and convert web page from /web/fetch 1`] = `"{"sourceAsMarkdown":"# Example Domain\\n\\nThis domain is for use in illustrative examples in documents. You may use this domain in literature without prior coordination or asking for permission.\\n\\n[More information...](https://www.iana.org/domains/example)"}"`;
28 changes: 28 additions & 0 deletions e2e/web/fetchWeb.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import got from 'got'
// import nock from 'nock';
// import app from '../../src/index'

describe('E2E tests', () => {
// Mock Brave Search API response
// beforeAll(() => {
// nock('https://api.search.brave.com').get('/res/v1/web/search').query(true).reply(200, {
// // Your mock response here
// })
// })

// it('should fetch search results from /web/search', async () => {
// const query = 'test query';
// const res = await request(app.callback()).get(`/web/search?query=${query}`);

// expect(res.status).toBe(200);
// expect(res.body).toHaveProperty('searchResults');
// });

it('should fetch and convert web page from /web/fetch', async () => {
const url = 'https://example.org'
const res = await got.get(`http://localhost:3333/web/fetch?url=${url}`)

expect(res.ok).toBe(true)
expect(res.body).toMatchSnapshot()
})
})
24 changes: 19 additions & 5 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,44 @@
"yarn": "3"
},
"scripts": {
"build": "tsc -p ./tsconfig.json",
"copy": "cp -R ./public ./build",
"clean": "rm -Rf ./build && mkdir ./build",
"build": "rm -Rf ./build && mkdir ./build && cp -R ./public ./build && tsc -p ./tsconfig.build.json",
"dev": "ts-node-dev --respawn ./src/index.ts",
"start": "yarn clean && yarn build && yarn copy && node ./build/index.js"
"start": "yarn build && node ./build/index.js",
"test:e2e": "yarn node --experimental-vm-modules $(yarn bin jest) -c ./config/jest.e2e.config.js",
"test:unit": "yarn node --experimental-vm-modules $(yarn bin jest) -c ./config/jest.unit.config.js"
},
"dependencies": {
"@koa/bodyparser": "5.0.0",
"@koa/cors": "4.0.0",
"@koa/router": "12.0.0",
"cheerio": "1.0.0-rc.12",
"class-transformer": "0.5.1",
"dotenv": "16.3.1",
"got": "13.0.0",
"koa": "2.14.2",
"koa-static": "5.0.0"
"koa-static": "5.0.0",
"turndown": "7.1.2",
"turndown-plugin-gfm": "1.0.2"
},
"devDependencies": {
"@ivangabriele/prettier-config": "3.1.0",
"@swc/core": "1.3.92",
"@swc/jest": "0.2.29",
"@tsconfig/node20": "20.1.2",
"@types/jest": "29.5.5",
"@types/kill-port": "2.0.1",
"@types/koa": "2.13.9",
"@types/koa-static": "4.0.2",
"@types/koa__cors": "4.0.1",
"@types/koa__router": "12.0.1",
"@types/tcp-port-used": "1.0.2",
"@types/turndown": "5.0.2",
"concurrently": "8.2.1",
"jest": "29.7.0",
"kill-port": "2.0.1",
"prettier": "3.0.3",
"tcp-port-used": "1.0.2",
"ts-jest": "29.1.1",
"ts-node-dev": "2.0.0",
"typescript": "5.2.2"
},
Expand Down
46 changes: 45 additions & 1 deletion public/.well-known/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,34 @@ servers:
- url: http://localhost:3333

paths:
/web/fetch:
get:
operationId: getWebFetch
summary: Get web page content as Markdown from an URL
parameters:
- in: query
name: url
description: URL to fetch from
schema:
type: string
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/getWebFetchResponse'
'400':
description: KO
content:
application/json:
schema:
$ref: '#/components/schemas/errorResponse'

/web/search:
get:
operationId: getWebSearch
summary: Get web search engine results from a query.
summary: Get web search engine results from a query
parameters:
- in: query
name: query
Expand All @@ -24,9 +48,29 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/getWebSearchResponse'
'400':
description: KO
content:
application/json:
schema:
$ref: '#/components/schemas/errorResponse'

components:
schemas:
errorResponse:
type: object
properties:
error:
type: string
description: Error message

getWebFetchResponse:
type: object
properties:
sourceAsMarkdown:
type: string
description: Web page source as simplified Markdown

getWebSearchResponse:
type: object
properties:
Expand Down
89 changes: 89 additions & 0 deletions src/actions/fetchWeb.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import { load as cheerioLoad } from 'cheerio'
import got from 'got'
import TurndownService from 'turndown'
// @ts-ignore
import turndownPluginGfm from 'turndown-plugin-gfm'

const turndownService = new TurndownService({
bulletListMarker: '-',
codeBlockStyle: 'fenced',
headingStyle: 'atx',
})
turndownService.use(turndownPluginGfm.gfm)

export async function fetchWeb(url: string): Promise<string | null> {
try {
const sourceAsHtml = await got.get(url).text()

const $ = cheerioLoad(sourceAsHtml)
$('head').remove()
$('body').each(function () {
$(this).find('audio').remove()
$(this).find('button').remove()
$(this).find('footer').remove()
$(this).find('img').remove()
$(this).find('picture').remove()
$(this).find('style').remove()
$(this).find('template').remove()
$(this).find('video').remove()

// Anchors
$(this)
.find('a')
.each(function () {
if ($(this).attr('href')?.startsWith('#')) {
$(this).remove()
}
})

$(this).find('br').replaceWith(' ')

if (url.includes('github.com')) {
$(this)
.find('script[data-target="react-app.embeddedData"]')
.each(function () {
const embeddedData = JSON.parse($(this).text())

if (embeddedData?.payload?.blob?.rawLines) {
const sourceCode = embeddedData.payload.blob.rawLines.join('\n')

$(this).replaceWith($('<pre>').append($('<code>').text(sourceCode)))
}
})

$(this)
.find('table')
.each(function () {
if ($(this).data('hpc') !== undefined) {
let sourceCode = ''
$(this)
.find('tbody tr')
.each(function () {
const codeLine = $(this).find('td.blob-code').text()
sourceCode += codeLine + '\n'
})

$(this).replaceWith($('<pre>').append($('<code>').text(sourceCode)))
}
})
}

$(this).find('script').remove()
})

const sourceAsMarkdown = turndownService
.turndown($.html())
.replace(/\[\s*\]\([^)]*\)/g, '')
.replace(/\[[\n\s]*(.*)[\n\s]*\]\(([^)]*)\)/g, '[$1]($2)')
.replace(/^-\s{2,}/gm, '- ')
.replace(/\n{2,}/gm, '\n\n')
// Github
.replace('You can’t perform that action at this time.', '')
.trim()

return sourceAsMarkdown
} catch (error) {
console.error(`Error fetching or converting web page: ${error}`)
return null
}
}
Loading

0 comments on commit f74a102

Please sign in to comment.