From e52fb9c2dcbccbce87e63b0b5450eb788b2c289c Mon Sep 17 00:00:00 2001 From: Andy Hanson Date: Fri, 17 Mar 2017 19:16:53 -0700 Subject: [PATCH] Add 'encoding' rule --- src/rules/encodingRule.ts | 120 +++++++++++++++++++++++++++ src/test.ts | 14 ++-- test/rules/encoding/tslint.json | 8 ++ test/rules/encoding/utf16be.js.lint | Bin 0 -> 138 bytes test/rules/encoding/utf16le.js.lint | Bin 0 -> 144 bytes test/rules/encoding/utf8-bom.ts.lint | 2 + test/rules/encoding/utf8.ts.lint | 0 7 files changed, 137 insertions(+), 7 deletions(-) create mode 100644 src/rules/encodingRule.ts create mode 100644 test/rules/encoding/tslint.json create mode 100644 test/rules/encoding/utf16be.js.lint create mode 100644 test/rules/encoding/utf16le.js.lint create mode 100644 test/rules/encoding/utf8-bom.ts.lint create mode 100644 test/rules/encoding/utf8.ts.lint diff --git a/src/rules/encodingRule.ts b/src/rules/encodingRule.ts new file mode 100644 index 00000000000..f238babbabb --- /dev/null +++ b/src/rules/encodingRule.ts @@ -0,0 +1,120 @@ +/** + * @license + * Copyright 2017 Palantir Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import * as fs from "fs"; +import * as ts from "typescript"; + +import * as Lint from "../index"; + +export class Rule extends Lint.Rules.AbstractRule { + /* tslint:disable:object-literal-sort-keys */ + public static metadata: Lint.IRuleMetadata = { + ruleName: "encoding", + description: "Enforces UTF-8 file encoding.", + optionsDescription: "Not configurable.", + options: null, + optionExamples: ["true"], + type: "style", + typescriptOnly: false, + }; + /* tslint:enable:object-literal-sort-keys */ + + public static FAILURE_STRING(actual: Encoding): string { + return `This file is encoded as ${showEncoding(actual)} instead of UTF-8.`; + } + + public apply(sourceFile: ts.SourceFile): Lint.RuleFailure[] { + return this.applyWithFunction(sourceFile, walk); + } +} + +function walk(ctx: Lint.WalkContext): void { + const encoding = detectEncoding(ctx.sourceFile.fileName); + if (encoding !== "utf8") { + ctx.addFailure(0, 1, Rule.FAILURE_STRING(encoding)); + } +} + +function showEncoding(encoding: Encoding): string { + switch (encoding) { + case "utf8": + return "UTF-8"; + case "utf8-bom": + return "UTF-8 with byte-order marker (BOM)"; + case "utf16le": + return "UTF-16 (little-endian)"; + case "utf16be": + return "UTF-16 (big-endian)"; + } +} + +export type Encoding = "utf8" | "utf8-bom" | "utf16le" | "utf16be"; +function detectEncoding(fileName: string): Encoding { + const fd = fs.openSync(fileName, "r"); + const maxBytesRead = 3; // Only need 3 bytes to detect the encoding. + const buffer = new Buffer(maxBytesRead); + const bytesRead = fs.readSync(fd, buffer, /*offset*/ 0, /*length*/ maxBytesRead, /*position*/ 0); + return detectBufferEncoding(buffer, bytesRead); +} + +export function readBufferWithDetectedEncoding(buffer: Buffer): string { + switch (detectBufferEncoding(buffer)) { + case "utf8": + return buffer.toString(); + case "utf8-bom": + return buffer.toString("utf-8", 2); + case "utf16le": + return buffer.toString("utf16le", 2); + case "utf16be": + // Round down to nearest multiple of 2. + const len = buffer.length & ~1; // tslint:disable-line no-bitwise + // Flip all byte pairs, then read as little-endian. + for (let i = 0; i < len; i += 2) { + const temp = buffer[i]; + buffer[i] = buffer[i + 1]; + buffer[i + 1] = temp; + } + return buffer.toString("utf16le", 2); + } +} + +function detectBufferEncoding(buffer: Buffer, length = buffer.length): Encoding { + if (length < 2) { + return "utf8"; + } + + switch (buffer[0]) { + case 0xef: + if (buffer[1] === 0xbb && length >= 3 && buffer[2] === 0xbf) { + return "utf8-bom"; + } + break; + + case 0xfe: + if (buffer[1] === 0xff) { + return "utf16be"; + } + break; + + case 0xff: + if (buffer[1] === 0xfe) { + return "utf16le"; + } + } + + return "utf8"; +} diff --git a/src/test.ts b/src/test.ts index 22bb95af0c2..ca9b4bca8e8 100644 --- a/src/test.ts +++ b/src/test.ts @@ -25,6 +25,7 @@ import * as ts from "typescript"; import {Fix} from "./language/rule/rule"; import * as Linter from "./linter"; +import {readBufferWithDetectedEncoding} from "./rules/encodingRule"; import {LintError} from "./test/lintError"; import * as parse from "./test/parse"; @@ -86,9 +87,8 @@ export function runTest(testDirectory: string, rulesDirectory?: string | string[ const results: TestResult = { directory: testDirectory, results: {} }; for (const fileToLint of filesToLint) { - const fileBasename = path.basename(fileToLint, MARKUP_FILE_EXTENSION); - const fileCompileName = fileBasename.replace(/\.lint$/, ""); - let fileText = fs.readFileSync(fileToLint, "utf8"); + const fileCompileName = path.basename(fileToLint, MARKUP_FILE_EXTENSION); + let fileText = readBufferWithDetectedEncoding(fs.readFileSync(fileToLint)); const tsVersionRequirement = parse.getTypescriptVersionRequirement(fileText); if (tsVersionRequirement) { const tsVersion = new semver.SemVer(ts.version); @@ -123,12 +123,12 @@ export function runTest(testDirectory: string, rulesDirectory?: string | string[ getSourceFile(filenameToGet: string) { const target = compilerOptions.target === undefined ? ts.ScriptTarget.ES5 : compilerOptions.target; if (filenameToGet === ts.getDefaultLibFileName(compilerOptions)) { - const fileContent = fs.readFileSync(ts.getDefaultLibFilePath(compilerOptions)).toString(); + const fileContent = fs.readFileSync(ts.getDefaultLibFilePath(compilerOptions), "utf-8"); return ts.createSourceFile(filenameToGet, fileContent, target); } else if (filenameToGet === fileCompileName) { - return ts.createSourceFile(fileBasename, fileTextWithoutMarkup, target, true); + return ts.createSourceFile(fileToLint, fileTextWithoutMarkup, target, true); } else if (fs.existsSync(path.resolve(path.dirname(fileToLint), filenameToGet))) { - const text = fs.readFileSync(path.resolve(path.dirname(fileToLint), filenameToGet), {encoding: "utf-8"}); + const text = fs.readFileSync(path.resolve(path.dirname(fileToLint), filenameToGet), "utf-8"); return ts.createSourceFile(filenameToGet, text, target, true); } throw new Error(`Couldn't get source file '${filenameToGet}'`); @@ -150,7 +150,7 @@ export function runTest(testDirectory: string, rulesDirectory?: string | string[ rulesDirectory, }; const linter = new Linter(lintOptions, program); - linter.lint(fileBasename, fileTextWithoutMarkup, tslintConfig); + linter.lint(fileToLint, fileTextWithoutMarkup, tslintConfig); const failures = linter.getResult().failures; const errorsFromLinter: LintError[] = failures.map((failure) => { const startLineAndCharacter = failure.getStartPosition().getLineAndCharacter(); diff --git a/test/rules/encoding/tslint.json b/test/rules/encoding/tslint.json new file mode 100644 index 00000000000..6cd6f7e2d49 --- /dev/null +++ b/test/rules/encoding/tslint.json @@ -0,0 +1,8 @@ +{ + "jsRules": { + "encoding": true + }, + "rules": { + "encoding": true + } +} \ No newline at end of file diff --git a/test/rules/encoding/utf16be.js.lint b/test/rules/encoding/utf16be.js.lint new file mode 100644 index 0000000000000000000000000000000000000000..9e23e87aa89dcf5ac20167b9b0fc6d67b69f9617 GIT binary patch literal 138 zcmXwxyAD8507K90D~t>z7O`4<0h1Um*DDbQW}l9_VrqNR_PWXOIPs||nV8w-2kAmD w_vN0IjtwhjMU87!S`?Ax{p=;L)^IarhlJ*oe|UEJ>oGJWB>pF literal 0 HcmV?d00001 diff --git a/test/rules/encoding/utf16le.js.lint b/test/rules/encoding/utf16le.js.lint new file mode 100644 index 0000000000000000000000000000000000000000..cca0c813eb3f5bce94bd91658b40c8ed36a8a4c9 GIT binary patch literal 144 zcmXwzu?|2$5JX??E0h!`(TG;#3#ddPoVY|zLG9BqPGqy$*~!f8-d9D$NlZt>f@2q+ zHCL?Aw_7|T8=0C~95sfVLR=z;<50a$qxRA|X+3F3@^T&Ghmf)9O3a^8lF{pv E2cd5mzyJUM literal 0 HcmV?d00001 diff --git a/test/rules/encoding/utf8-bom.ts.lint b/test/rules/encoding/utf8-bom.ts.lint new file mode 100644 index 00000000000..e4366f08bd3 --- /dev/null +++ b/test/rules/encoding/utf8-bom.ts.lint @@ -0,0 +1,2 @@ +A +~ [This file is encoded as UTF-8 with byte-order marker (BOM) instead of UTF-8.] diff --git a/test/rules/encoding/utf8.ts.lint b/test/rules/encoding/utf8.ts.lint new file mode 100644 index 00000000000..e69de29bb2d