Add linter

meilisearch · Jun 22, 2023 · 6071ab0 · 6071ab0
1 parent 9d984a3
commit 6071ab0
Show file tree

Hide file tree

Showing 18 changed files with 1,421 additions and 547 deletions.
diff --git a/.eslintignore b/.eslintignore
@@ -0,0 +1,6 @@
+node_modules
+dist
+examples
+scripts
+tests/env
+coverage
diff --git a/.eslintrc.cjs b/.eslintrc.cjs
@@ -0,0 +1,61 @@
+module.exports = {
+  env: {
+    browser: true,
+    es6: true,
+    es2020: true,
+    'jest/globals': true,
+    node: true,
+    jasmine: true,
+  },
+  extends: [
+    'eslint:recommended',
+    'plugin:@typescript-eslint/recommended',
+    'plugin:@typescript-eslint/recommended-requiring-type-checking',
+    'plugin:prettier/recommended',
+  ],
+  parser: '@typescript-eslint/parser',
+  parserOptions: {
+    ecmaVersion: 2019,
+    project: ['tsconfig.eslint.json'],
+    sourceType: 'module',
+    projectFolderIgnoreList: ['dist'],
+  },
+  plugins: ['@typescript-eslint', 'prettier', 'jest'],
+  rules: {
+    'no-dupe-class-members': 'off', // Off due to conflict with typescript overload functions
+    'prettier/prettier': [
+      'error',
+      {
+        singleQuote: true,
+        arrowParens: 'always',
+        semi: false,
+        bracketSpacing: true,
+        trailingComma: 'es5',
+        tsdoc: true,
+        printWidth: 80,
+      },
+    ],
+    '@typescript-eslint/array-type': ['warn', { default: 'array-simple' }],
+    '@typescript-eslint/return-await': 'off',
+    '@typescript-eslint/no-explicit-any': 'off',
+    '@typescript-eslint/explicit-function-return-type': 'off',
+    '@typescript-eslint/member-delimiter-style': [
+      'error',
+      {
+        multiline: {
+          delimiter: 'none', // 'none' or 'semi' or 'comma'
+          requireLast: true,
+        },
+        singleline: {
+          delimiter: 'semi', // 'semi' or 'comma'
+          requireLast: false,
+        },
+      },
+    ],
+    'comma-dangle': 'off',
+    '@typescript-eslint/ban-ts-ignore': 'off',
+    '@typescript-eslint/no-misused-promises': ['off'],
+    '@typescript-eslint/no-unsafe-member-access': ['off'],
+    '@typescript-eslint/no-unsafe-argument': 'off',
+  },
+}
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -0,0 +1,40 @@
+name: Tests
+
+on:
+  pull_request:
+  push:
+    # trying and staging branches are for BORS config
+    branches:
+      - trying
+      - staging
+      - main
+
+jobs:
+  lint_tests:
+    runs-on: ubuntu-latest
+    name: lint tests
+    steps:
+      - uses: actions/checkout@v3
+      - name: Setup node
+        uses: actions/setup-node@v3
+        with:
+          node-version: 16
+          cache: 'yarn'
+      - name: Install dependencies
+        run: yarn
+      - name: Run JS/TS linter
+        run: yarn lint
+  build_test:
+    runs-on: ubuntu-latest
+    name: types-check
+    steps:
+      - uses: actions/checkout@v3
+      - name: Setup node
+        uses: actions/setup-node@v3
+        with:
+          node-version: 16
+          cache: 'yarn'
+      - name: Install dependencies
+        run: yarn
+      - name: Build project
+        run: yarn build
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -0,0 +1,7 @@
+# Run linting tests
+
+```sh
+yarn lint # to test
+yarn lint:fix # to fix errors
+```
+
diff --git a/package.json b/package.json
@@ -16,8 +16,11 @@
     "yargs": "^17.7.2"
   },
   "scripts": {
+    "build": "tsc",
     "start": "tsc && node dist/src/index.js",
     "serve": "tsc && node dist/src/server.js",
+    "lint": "eslint .",
+    "lint:fix": "eslint . --fix",
     "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
   },
   "author": "It's not you it's me",
@@ -29,6 +32,12 @@
     "@types/prettier": "^2.7.3",
     "@types/uuid": "^9.0.2",
     "@types/yargs": "^17.0.24",
+    "@typescript-eslint/eslint-plugin": "^5.60.0",
+    "@typescript-eslint/parser": "^5.60.0",
+    "eslint": "^8.43.0",
+    "eslint-config-prettier": "^8.8.0",
+    "eslint-plugin-jest": "^27.2.2",
+    "eslint-plugin-prettier": "^4.2.1",
     "typescript": "^5.1.3"
   }
 }
diff --git a/src/crawler.ts b/src/crawler.ts
@@ -1,14 +1,20 @@
-import { createPuppeteerRouter, PuppeteerCrawler, Router, PuppeteerCrawlingContext } from "crawlee";
-import { minimatch } from "minimatch";
-import DefaultScraper from "./scrapers/default.js";
-import DocsearchScraper from "./scrapers/docsearch.js";
-import CustomScraper from "./scrapers/custom.js";
-import SchemaScraper from "./scrapers/schema.js";
-import { Sender } from "./sender.js";
-import { Config, Scraper } from "./types.js";
-
-
-type DefaultHandler = Parameters<Parameters<Router<PuppeteerCrawlingContext>['addDefaultHandler']>[0]>[0]
+import {
+  createPuppeteerRouter,
+  PuppeteerCrawler,
+  Router,
+  PuppeteerCrawlingContext,
+} from 'crawlee'
+import { minimatch } from 'minimatch'
+import DefaultScraper from './scrapers/default.js'
+import DocsearchScraper from './scrapers/docsearch.js'
+import CustomScraper from './scrapers/custom.js'
+import SchemaScraper from './scrapers/schema.js'
+import { Sender } from './sender.js'
+import { Config, Scraper } from './types.js'
+
+type DefaultHandler = Parameters<
+  Parameters<Router<PuppeteerCrawlingContext>['addDefaultHandler']>[0]
+>[0]
 
 // Crawler class
 // This class is responsible for crawling the urls and extract content to send to Meilisearch
@@ -23,26 +29,26 @@ export default class Crawler {
   crawler: PuppeteerCrawler
 
   constructor(sender: Sender, config: Config) {
-    console.info("Crawler::constructor");
-    this.sender = sender;
-    this.config = config;
-    this.urls = config.crawled_urls;
-    this.custom_crawler = config.custom_crawler;
+    console.info('Crawler::constructor')
+    this.sender = sender
+    this.config = config
+    this.urls = config.crawled_urls
+    this.custom_crawler = config.custom_crawler
     // init the custome scraper depending on if config.strategy is docsearch, custom or default
     this.scraper =
-      config.strategy == "docsearch"
+      config.strategy == 'docsearch'
         ? new DocsearchScraper(this.sender)
-        : config.strategy == "custom"
+        : config.strategy == 'custom'
         ? new CustomScraper(this.sender, config)
-        : config.strategy == "schema"
+        : config.strategy == 'schema'
         ? new SchemaScraper(this.sender, config)
-        : new DefaultScraper(this.sender, config);
+        : new DefaultScraper(this.sender, config)
 
     //Create the router
-    let router = createPuppeteerRouter();
+    const router = createPuppeteerRouter()
 
     // type DefaultHandler = Parameters<typeof router.addDefaultHandler>[0];
-    router.addDefaultHandler(this.defaultHandler.bind(this));
+    router.addDefaultHandler(this.defaultHandler.bind(this))
 
     // create the crawler
     this.crawler = new PuppeteerCrawler({
@@ -51,39 +57,39 @@ export default class Crawler {
       launchContext: {
         launchOptions: {
           headless: config.headless || true,
-          args: ["--no-sandbox", "--disable-setuid-sandbox"],
-          ignoreDefaultArgs: ["--disable-extensions"],
+          args: ['--no-sandbox', '--disable-setuid-sandbox'],
+          ignoreDefaultArgs: ['--disable-extensions'],
         },
       },
-    });
+    })
   }
 
   async run() {
-    await this.crawler.run(this.urls);
+    await this.crawler.run(this.urls)
   }
 
   // Should we use `log`
-  async defaultHandler({ request , enqueueLinks, page }: DefaultHandler ) {
-    const title = await page.title();
-    console.log(`${title}`, { url: request.loadedUrl });
-    const crawled_globs = this.__generate_globs(this.urls);
+  async defaultHandler({ request, enqueueLinks, page }: DefaultHandler) {
+    const title = await page.title()
+    console.log(`${title}`, { url: request.loadedUrl })
+    const crawled_globs = this.__generate_globs(this.urls)
     const excluded_crawled_globs = this.__generate_globs(
       this.config.exclude_crawled_urls || []
-    );
+    )
     const indexed_globs = this.__generate_globs(
       this.config.indexed_urls || this.urls
-    );
+    )
     const excluded_indexed_globs = this.__generate_globs(
       this.config.exclude_indexed_urls || []
-    );
+    )
 
     if (request.loadedUrl && !this.__is_paginated_url(request.loadedUrl)) {
       //check if the url is in the list of urls to scrap
       if (
         this.__match_globs(request.loadedUrl, indexed_globs) &&
         !this.__match_globs(request.loadedUrl, excluded_indexed_globs)
       ) {
-        await this.scraper.get(request.loadedUrl, page);
+        await this.scraper.get(request.loadedUrl, page)
       }
     }
 
@@ -93,78 +99,78 @@ export default class Crawler {
       transformRequestFunction: (req) => {
         // exclude all links that are files not parsable by puppeteer
         if (this.__is_file_url(req.url)) {
-          return false;
+          return false
         }
         // remove all query params to avoid duplicates
-        const urlObject = new URL(req.url);
-        urlObject.search = "";
-        req.url = urlObject.toString();
+        const urlObject = new URL(req.url)
+        urlObject.search = ''
+        req.url = urlObject.toString()
 
-        return req;
+        return req
       },
-    });
+    })
   }
 
   __generate_globs(urls: string[]) {
     return urls.map((url) => {
-      if (url.endsWith("/")) {
-        return url + "**";
+      if (url.endsWith('/')) {
+        return url + '**'
       }
-      return url + "/**";
-    });
+      return url + '/**'
+    })
   }
 
   __match_globs(url: string, globs: string[]) {
-    return globs.some((glob) => minimatch(url, glob));
+    return globs.some((glob) => minimatch(url, glob))
   }
 
   __is_file_url(url: string) {
     const fileExtensions = [
-      ".zip",
-      ".pdf",
-      ".doc",
-      ".docx",
-      ".xls",
-      ".xlsx",
-      ".ppt",
-      ".pptx",
-      ".rar",
-      ".tar",
-      ".gz",
-      ".tgz",
-      ".7z",
-      ".bz2",
-      ".jpg",
-      ".jpeg",
-      ".png",
-      ".gif",
-      ".svg",
-      ".css",
-      ".js",
-      ".xml",
-      ".txt",
-      ".csv",
-      ".rtf",
-      ".mp3",
-      ".wav",
-      ".mp4",
-      ".avi",
-      ".mkv",
-      ".mov",
-      ".flv",
-      ".wmv",
-      ".m4v",
-      ".ogg",
-      ".mpg",
-      ".mpeg",
-      ".swf",
-    ];
-    return fileExtensions.some((extension) => url.endsWith(extension));
+      '.zip',
+      '.pdf',
+      '.doc',
+      '.docx',
+      '.xls',
+      '.xlsx',
+      '.ppt',
+      '.pptx',
+      '.rar',
+      '.tar',
+      '.gz',
+      '.tgz',
+      '.7z',
+      '.bz2',
+      '.jpg',
+      '.jpeg',
+      '.png',
+      '.gif',
+      '.svg',
+      '.css',
+      '.js',
+      '.xml',
+      '.txt',
+      '.csv',
+      '.rtf',
+      '.mp3',
+      '.wav',
+      '.mp4',
+      '.avi',
+      '.mkv',
+      '.mov',
+      '.flv',
+      '.wmv',
+      '.m4v',
+      '.ogg',
+      '.mpg',
+      '.mpeg',
+      '.swf',
+    ]
+    return fileExtensions.some((extension) => url.endsWith(extension))
   }
 
   __is_paginated_url(url: string) {
-    const urlObject = new URL(url);
-    const pathname = urlObject.pathname;
-    return /\/\d+\//.test(pathname);
+    const urlObject = new URL(url)
+    const pathname = urlObject.pathname
+    return /\/\d+\//.test(pathname)
   }
 }