Skip to content
This repository has been archived by the owner on Sep 6, 2021. It is now read-only.

#10071 bash detection for extensionless file #10141

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions src/document/Document.js
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ define(function (require, exports, module) {
*/
function Document(file, initialTimestamp, rawText) {
this.file = file;
this._updateLanguage();
this._updateLanguage(rawText);
this.refreshText(rawText, initialTimestamp, true);
}

Expand Down Expand Up @@ -675,10 +675,15 @@ define(function (require, exports, module) {

/**
* Updates the language to match the current mapping given by LanguageManager
*
* @param {!string} rawText Text content of the file.
*/
Document.prototype._updateLanguage = function () {
Document.prototype._updateLanguage = function (rawText) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is called from other places that won't pass rawText (e.g. _notifyFilePathChanged(), DocumentManager._handleLanguageAdded(), etc.). Will the behavior be correct then? It safer & simpler to remove the arg and just invoke this.getText() instead.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But also... since this is called in multiple cases, various actions will trigger a "re-check" of the document's text. If the user edits the document to add/remove the shebang, at what point would they expect the document to switch to/from Bash mode? We should make sure that feels predictable.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this.getText() won't exist cause the functions are called by Document constructor. Therefore, using rawText is probably the best way to pass the contents to LanguageManager. So far, the shebang detection happens only when user opens a file, I am not sure about the "re-check" triggers during editing. Could you or anybody help me for that?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

... and yes rawText can be null, so _updateLanguage() can be called without rawText.

var oldLanguage = this.language;
this.language = LanguageManager.getLanguageForPath(this.file.fullPath);
if (this.language === LanguageManager.getLanguage("unknown")) {
this.language = LanguageManager.getLanguageForContent(rawText);
}
if (oldLanguage && oldLanguage !== this.language) {
this.trigger("languageChanged", oldLanguage, this.language);
}
Expand Down
31 changes: 27 additions & 4 deletions src/language/LanguageManager.js
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,28 @@ define(function (require, exports, module) {
function getLanguage(id) {
return _languages[id];
}

/**
* Resolves a file content to a Language object.
* @param {!string} rawText Text content of the file.
*
* @return {Language} The language detected based on file content or the fallback language
*/
function getLanguageForContent(rawText) {
// Shebang detection for script languages
if (rawText) {
//filter out #!/usr/bin/, #!/usr/local/bin/, #!/bin/, #!/usr/bin/env, and arguments in order to get language name
//for python, it filters out version number as well (python2 python3 -> python)
var lang = rawText.match(/^#!(?:\/usr\/bin\/env\ |(?:\/usr|\/usr\/local)?\/bin\/)(python(?=[23]\W)|(?!python)\w+)[^\r\n]*[\r\n]/m);
if (lang && lang[1] && _languages[lang[1]]) {
return _languages[lang[1]];
}
else if (lang && lang[1] && _languages[lang[1]] === "sh") { //workaround for bash
return _languages.bash;
}
}
return _fallbackLanguage;
}

/**
* Resolves a file extension to a Language object.
Expand Down Expand Up @@ -461,7 +483,7 @@ define(function (require, exports, module) {
* @type {{ prefix: string, suffix: string }}
*/
Language.prototype._blockCommentSyntax = null;

/**
* Whether or not the language is binary
* @type {boolean}
Expand Down Expand Up @@ -803,7 +825,7 @@ define(function (require, exports, module) {

return true;
};

/**
* Returns either a language associated with the mode or the fallback language.
* Used to disambiguate modes used by multiple languages.
Expand Down Expand Up @@ -863,7 +885,7 @@ define(function (require, exports, module) {
Language.prototype.isBinary = function () {
return this._isBinary;
};

/**
* Sets whether or not the language is binary
* @param {!boolean} isBinary
Expand Down Expand Up @@ -922,7 +944,7 @@ define(function (require, exports, module) {
}

language._setBinary(!!definition.isBinary);

// store language to language map
_languages[language.getId()] = language;
}
Expand Down Expand Up @@ -1128,6 +1150,7 @@ define(function (require, exports, module) {
exports.getLanguage = getLanguage;
exports.getLanguageForExtension = getLanguageForExtension;
exports.getLanguageForPath = getLanguageForPath;
exports.getLanguageForContent = getLanguageForContent;
exports.getLanguages = getLanguages;
exports.setLanguageOverrideForPath = setLanguageOverrideForPath;
});
31 changes: 31 additions & 0 deletions test/spec/LanguageManager-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,37 @@ define(function (require, exports, module) {
});
});

describe("auto detection for extensionless file", function () {

it("should return appropriate language", function () {
var text = "#!/bin/sh\n\necho 123";
var text2 = "#!/bin/bash\n\necho 123";
var text3 = "#!/usr/bin/env bash\n\necho 123";
var text4 = "123\n\necho 123";
var text5 = "#!/bin/shell\n\necho 123";
var text6 = "#!/usr/bin/python2\n\nprint 'hello world'";
var text7 = "#!/usr/bin/python3\n\nprint 'hello world'";
var text8 = "#!/usr/bin/python222\n\nprint 'hello world'";
var text9 = "#!/usr/bin/python -c\n\nprint 'hello world'";

var bash = LanguageManager.getLanguage("bash"),
python = LanguageManager.getLanguage("python"),
unknown = LanguageManager.getLanguage("unknown");

expect(LanguageManager.getLanguageForContent(text)).toBe(bash);
expect(LanguageManager.getLanguageForContent(text2)).toBe(bash);
expect(LanguageManager.getLanguageForContent(text3)).toBe(bash);
expect(LanguageManager.getLanguageForContent(text4)).toBe(unknown);
expect(LanguageManager.getLanguageForContent(text5)).toBe(unknown);
expect(LanguageManager.getLanguageForContent(text6)).toBe(python);
expect(LanguageManager.getLanguageForContent(text7)).toBe(python);
expect(LanguageManager.getLanguageForContent(text8)).toBe(unknown);
expect(LanguageManager.getLanguageForContent(text9)).toBe(python);

});

});

describe("defineLanguage", function () {

it("should create a basic language", function () {
Expand Down