From c52f1073e589b74198a160e0b1fb9f210c267b53 Mon Sep 17 00:00:00 2001 From: Chris Manson Date: Fri, 30 Jun 2023 14:13:37 +0100 Subject: [PATCH] don't find headers in code blocks for the on-this-page --- lib/markdown-to-jsonapi.js | 3 ++ lib/showdown-subparsers.js | 60 +++++++++++++++++++++++++++++++ package-lock.json | 23 ++++++++++++ test/attributes.js | 74 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 160 insertions(+) create mode 100644 lib/showdown-subparsers.js diff --git a/lib/markdown-to-jsonapi.js b/lib/markdown-to-jsonapi.js index 662c85c..e1b3eb3 100644 --- a/lib/markdown-to-jsonapi.js +++ b/lib/markdown-to-jsonapi.js @@ -5,6 +5,9 @@ const _ = require('lodash'); const h2p = require('html2plaintext'); const { Serializer } = require('jsonapi-serializer'); const { JSDOM } = require('jsdom'); +const subparsers = require('./showdown-subparsers'); + +subparsers(showdown); const supportedContentTypes = ['content', 'html', 'description', 'toc']; diff --git a/lib/showdown-subparsers.js b/lib/showdown-subparsers.js new file mode 100644 index 0000000..729b256 --- /dev/null +++ b/lib/showdown-subparsers.js @@ -0,0 +1,60 @@ +/* eslint-disable */ +module.exports = function(showdown) { + /** + * This is a copy and paste of the exact subparser from showdown with one *very* subtle change. + * We need code blocks to work when they have "stuff" following the language definition + * e.g. ```html some-stuff-here + * + * This doesn't work by default so we had to update the regex from: + * + * /(?:^|\n)(?: {0,3})(```+|~~~+)(?: *)([^\s`~]*)\n([\s\S]*?)\n(?: {0,3})\1/g + * + * to + * + * /(?:^|\n)(?: {0,3})(```+|~~~+)(?: *)([^\s`~]*).*\n([\s\S]*?)\n(?: {0,3})\1/g + * + * if you look carefully we have added an extra .* just after the middle there. + * + * If you're thinking this all looks like gobbledegook then you are correct. If this + * doesn't work then https://regex101.com/ is your friend. Put the regex that you want + * to debug in there and some examples of a markdown file and just keep change things + * until it starts working 🤷‍♀️ + * + */ + showdown.subParser('githubCodeBlocks', function (text, options, globals) { + 'use strict'; + + // early exit if option is not enabled + if (!options.ghCodeBlocks) { + return text; + } + + text = globals.converter._dispatch('githubCodeBlocks.before', text, options, globals); + + text += '¨0'; + + text = text.replace(/(?:^|\n)(?: {0,3})(```+|~~~+)(?: *)([^\s`~]*).*\n([\s\S]*?)\n(?: {0,3})\1/g, function (wholeMatch, delim, language, codeblock) { + var end = (options.omitExtraWLInCodeBlocks) ? '' : '\n'; + + // First parse the github code block + codeblock = showdown.subParser('encodeCode')(codeblock, options, globals); + codeblock = showdown.subParser('detab')(codeblock, options, globals); + codeblock = codeblock.replace(/^\n+/g, ''); // trim leading newlines + codeblock = codeblock.replace(/\n+$/g, ''); // trim trailing whitespace + + codeblock = '
' + codeblock + end + '
'; + + codeblock = showdown.subParser('hashBlock')(codeblock, options, globals); + + // Since GHCodeblocks can be false positives, we need to + // store the primitive text and the parsed text in a global var, + // and then return a token + return '\n\n¨G' + (globals.ghCodeBlocks.push({text: wholeMatch, codeblock: codeblock}) - 1) + 'G\n\n'; + }); + + // attacklab: strip sentinel + text = text.replace(/¨0/, ''); + + return globals.converter._dispatch('githubCodeBlocks.after', text, options, globals); + }); +} \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index fb0f869..0f1ab5d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12526,6 +12526,18 @@ "yaml-front-matter": "bin/js-yaml-front.js" } }, + "node_modules/yaml-front-matter/node_modules/js-yaml": { + "version": "3.14.1", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.1.tgz", + "integrity": "sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==", + "dependencies": { + "argparse": "^1.0.7", + "esprima": "^4.0.0" + }, + "bin": { + "js-yaml": "bin/js-yaml.js" + } + }, "node_modules/yargs": { "version": "16.2.0", "resolved": "https://registry.npmjs.org/yargs/-/yargs-16.2.0.tgz", @@ -22554,6 +22566,17 @@ "requires": { "commander": "^2.14.1", "js-yaml": "^3.10.0" + }, + "dependencies": { + "js-yaml": { + "version": "3.14.1", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.1.tgz", + "integrity": "sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==", + "requires": { + "argparse": "^1.0.7", + "esprima": "^4.0.0" + } + } } }, "yargs": { diff --git a/test/attributes.js b/test/attributes.js index a8c8fc5..0ceaab0 100644 --- a/test/attributes.js +++ b/test/attributes.js @@ -233,4 +233,78 @@ you're being silly now { text: 'Sub sub sub point', depth: '5', id: 'subsubsubpoint' }, ]); }); + + it('not include headers that are in code blocks', async function () { + const result = await buildSingleFile(`# Hello world + +This is the first part + +\`\`\`html face +
+ +
+

+ Tomster + their local time is 4:56pm +

+ +

+ Hey Zoey, have you had a chance to look at the EmberConf brainstorming doc + I sent you? +

+
+ + +
+

Zoey

+ +

Hey!

+ +

+ I love the ideas! I'm really excited about where this year's EmberConf is + going, I'm sure it's going to be the best one yet. Some quick notes: +

+ +
    +
  • + Definitely agree that we should double the coffee budget this year (it + really is impressive how much we go through!) +
  • +
  • + A blimp would definitely make the venue very easy to find, but I think + it might be a bit out of our budget. Maybe we could rent some spotlights + instead? +
  • +
  • + We absolutely will need more hamster wheels, last year's line was + way too long. Will get on that now before rental season hits + its peak. +
  • +
+ +

Let me know when you've nailed down the dates!

+
+ +
+ + + +
+
+\`\`\` + +`, { + contentTypes: ['toc'], + }); + + expect(result.attributes.toc).to.deep.equal([ + { text: 'Hello world', depth: '1', id: 'helloworld' }, + ]); + }); });