Skip to content

Commit

Permalink
fixes #21146
Browse files Browse the repository at this point in the history
  • Loading branch information
joaomoreno committed Mar 27, 2018
1 parent 2b11285 commit ab149ab
Show file tree
Hide file tree
Showing 6 changed files with 108 additions and 13 deletions.
3 changes: 2 additions & 1 deletion extensions/git/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -1113,6 +1113,7 @@
"byline": "^5.0.0",
"file-type": "^7.2.0",
"iconv-lite": "0.4.19",
"jschardet": "^1.6.0",
"vscode-extension-telemetry": "0.0.15",
"vscode-nls": "^3.2.1",
"which": "^1.3.0"
Expand All @@ -1125,4 +1126,4 @@
"@types/which": "^1.0.28",
"mocha": "^3.2.0"
}
}
}
81 changes: 81 additions & 0 deletions extensions/git/src/encoding.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/

'use strict';

import * as jschardet from 'jschardet';

jschardet.Constants.MINIMUM_THRESHOLD = 0.2;

function detectEncodingByBOM(buffer: NodeBuffer): string | null {
if (!buffer || buffer.length < 2) {
return null;
}

const b0 = buffer.readUInt8(0);
const b1 = buffer.readUInt8(1);

// UTF-16 BE
if (b0 === 0xFE && b1 === 0xFF) {
return 'utf16be';
}

// UTF-16 LE
if (b0 === 0xFF && b1 === 0xFE) {
return 'utf16le';
}

if (buffer.length < 3) {
return null;
}

const b2 = buffer.readUInt8(2);

// UTF-8
if (b0 === 0xEF && b1 === 0xBB && b2 === 0xBF) {
return 'utf8';
}

return null;
}

const IGNORE_ENCODINGS = [
'ascii',
'utf-8',
'utf-16',
'utf-32'
];

const JSCHARDET_TO_ICONV_ENCODINGS: { [name: string]: string } = {
'ibm866': 'cp866',
'big5': 'cp950'
};

export function detectEncoding(buffer: Buffer): string | null {
let result = detectEncodingByBOM(buffer);

if (result) {
return result;
}

const detected = jschardet.detect(buffer);

if (!detected || !detected.encoding) {
return null;
}

const encoding = detected.encoding;

// Ignore encodings that cannot guess correctly
// (http://chardet.readthedocs.io/en/latest/supported-encodings.html)
if (0 <= IGNORE_ENCODINGS.indexOf(encoding.toLowerCase())) {
return null;
}

const normalizedEncodingName = encoding.replace(/[^a-zA-Z0-9]/g, '').toLowerCase();
const mapped = JSCHARDET_TO_ICONV_ENCODINGS[normalizedEncodingName];

return mapped || normalizedEncodingName;
}
6 changes: 5 additions & 1 deletion extensions/git/src/git.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import iconv = require('iconv-lite');
import * as filetype from 'file-type';
import { assign, uniqBy, groupBy, denodeify, IDisposable, toDisposable, dispose, mkdirp, readBytes, detectUnicodeEncoding, Encoding, onceEvent } from './util';
import { CancellationToken } from 'vscode';
import { detectEncoding } from './encoding';

const readfile = denodeify<string, string | null, string>(fs.readFile);

Expand Down Expand Up @@ -661,7 +662,10 @@ export class Repository {

async bufferString(object: string, encoding: string = 'utf8'): Promise<string> {
const stdout = await this.buffer(object);
return iconv.decode(stdout, iconv.encodingExists(encoding) ? encoding : 'utf8');
encoding = detectEncoding(stdout) || encoding;
encoding = iconv.encodingExists(encoding) ? encoding : 'utf8';

return iconv.decode(stdout, encoding);
}

async buffer(object: string): Promise<Buffer> {
Expand Down
16 changes: 5 additions & 11 deletions extensions/git/src/repository.ts
Original file line number Diff line number Diff line change
Expand Up @@ -811,24 +811,18 @@ export class Repository implements Disposable {
}

async show(ref: string, filePath: string): Promise<string> {
return await this.run(Operation.Show, async () => {
return this.run(Operation.Show, () => {
const relativePath = path.relative(this.repository.root, filePath).replace(/\\/g, '/');
const configFiles = workspace.getConfiguration('files', Uri.file(filePath));
const encoding = configFiles.get<string>('encoding');

// TODO@joao: Resource config api
return await this.repository.bufferString(`${ref}:${relativePath}`, encoding);
const defaultEncoding = configFiles.get<string>('encoding');
return this.repository.bufferString(`${ref}:${relativePath}`, defaultEncoding);
});
}

async buffer(ref: string, filePath: string): Promise<Buffer> {
return await this.run(Operation.Show, async () => {
return this.run(Operation.Show, () => {
const relativePath = path.relative(this.repository.root, filePath).replace(/\\/g, '/');
// const configFiles = workspace.getConfiguration('files', Uri.file(filePath));
// const encoding = configFiles.get<string>('encoding');

// TODO@joao: REsource config api
return await this.repository.buffer(`${ref}:${relativePath}`);
return this.repository.buffer(`${ref}:${relativePath}`);
});
}

Expand Down
11 changes: 11 additions & 0 deletions extensions/git/src/typings/jschardet.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
declare module 'jschardet' {
export interface IDetectedMap {
encoding: string,
confidence: number
}
export function detect(buffer: NodeBuffer): IDetectedMap;

export const Constants: {
MINIMUM_THRESHOLD: number,
}
}
4 changes: 4 additions & 0 deletions extensions/git/yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,10 @@ isexe@^2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/isexe/-/isexe-2.0.0.tgz#e8fbf374dc556ff8947a10dcb0572d633f2cfa10"

jschardet@^1.6.0:
version "1.6.0"
resolved "https://registry.yarnpkg.com/jschardet/-/jschardet-1.6.0.tgz#c7d1a71edcff2839db2f9ec30fc5d5ebd3c1a678"

json3@3.3.2:
version "3.3.2"
resolved "https://registry.yarnpkg.com/json3/-/json3-3.3.2.tgz#3c0434743df93e2f5c42aee7b19bcb483575f4e1"
Expand Down

0 comments on commit ab149ab

Please sign in to comment.