Skip to content

Commit

Permalink
Handle special characters in markdown slugify (#44788)
Browse files Browse the repository at this point in the history
Fixes #44779
  • Loading branch information
mjbvz authored Mar 2, 2018
1 parent 597576c commit 5a0e115
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 4 deletions.
13 changes: 9 additions & 4 deletions extensions/markdown/src/tableOfContentsProvider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,18 @@ import * as vscode from 'vscode';
import { MarkdownEngine } from './markdownEngine';

export class Slug {
private static specialChars: any = { 'à': 'a', 'ä': 'a', 'ã': 'a', 'á': 'a', 'â': 'a', 'æ': 'a', 'å': 'a', 'ë': 'e', 'è': 'e', 'é': 'e', 'ê': 'e', 'î': 'i', 'ï': 'i', 'ì': 'i', 'í': 'i', 'ò': 'o', 'ó': 'o', 'ö': 'o', 'ô': 'o', 'ø': 'o', 'ù': 'o', 'ú': 'u', 'ü': 'u', 'û': 'u', 'ñ': 'n', 'ç': 'c', 'ß': 's', 'ÿ': 'y', 'œ': 'o', 'ŕ': 'r', 'ś': 's', 'ń': 'n', 'ṕ': 'p', 'ẃ': 'w', 'ǵ': 'g', 'ǹ': 'n', 'ḿ': 'm', 'ǘ': 'u', 'ẍ': 'x', 'ź': 'z', 'ḧ': 'h', '·': '-', '/': '-', '_': '-', ',': '-', ':': '-', ';': '-' };

public static fromHeading(heading: string): Slug {
const slugifiedHeading = encodeURI(heading.trim()
.toLowerCase()
.replace(/[\]\[\!\"\#\$\%\&\'\(\)\*\+\,\.\/\:\;\<\=\>\?\@\\\^\_\{\|\}\~\`]/g, '')
.replace(/\s+/g, '-')
.replace(/^\-+/, '')
.replace(/\-+$/, ''));
.replace(/./g, c => Slug.specialChars[c] || c)
.replace(/[\]\[\!\'\#\$\%\&\'\(\)\*\+\,\.\/\:\;\<\=\>\?\@\\\^\_\{\|\}\~\`]/g, '')
.replace(/\s+/g, '-') // Replace whitespace with -
.replace(/[^\w\-]+/g, '') // Remove remaining non-word chars

This comment has been minimized.

Copy link
@yzhang-gh

yzhang-gh Apr 19, 2018

Contributor

Don't you think it is an overkill? All Chinese characters are removed from the anchor... 😥

This comment has been minimized.

Copy link
@kishkin

kishkin Apr 24, 2018

Russian characters also, everything actually is removed that does not have a special mapping.

.replace(/^\-+/, '') // Remove leading -
.replace(/\-+$/, '') // Remove trailing -
);

return new Slug(slugifiedHeading);
}
Expand Down
7 changes: 7 additions & 0 deletions extensions/markdown/src/test/tableOfContentsProvider.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,13 @@ suite('markdown.TableOfContentsProvider', () => {
assert.strictEqual(await provider.lookup('foo'), undefined);
assert.strictEqual(await provider.lookup('fo o'), undefined);
});

test('should normalize special characters #44779', async () => {
const doc = new InMemoryDocument(testFileName, `# Indentação\n`);
const provider = new TableOfContentsProvider(new MarkdownEngine(), doc);

assert.strictEqual((await provider.lookup('indentacao'))!.line, 0);
});
});

class InMemoryDocument implements vscode.TextDocument {
Expand Down

0 comments on commit 5a0e115

Please sign in to comment.