3131 * parse(value: string | null | undefined): NlcstRoot
3232 * tokenize(value: string | null | undefined): Array<NlcstSentenceContent>
3333 * }} ParserInstance
34+ * nlcst parser.
35+ *
36+ * For example, `parse-dutch`, `parse-english`, or `parse-latin`.
3437 * @typedef {new () => ParserInstance } ParserConstructor
38+ * Create a new parser.
3539 *
3640 * @typedef Options
37- * @property {Array<string> } [ignore]
41+ * Configuration.
42+ * @property {Array<string> | null | undefined } [ignore]
3843 * List of mdast node types to ignore.
39- * @property {Array<string> } [source]
44+ * @property {Array<string> | null | undefined } [source]
4045 * List of mdast node types to mark as `source`.
4146 *
42- * @typedef Context
47+ * @typedef State
48+ * Info passed around.
4349 * @property {string } doc
50+ * Whole document.
4451 * @property {Location } place
52+ * Location info.
4553 * @property {ParserInstance } parser
54+ * Parser.
4655 * @property {Array<string> } ignore
56+ * List of mdast node types to ignore.
4757 * @property {Array<string> } source
58+ * List of mdast node types to mark as source.
4859 */
4960
5061import { toString } from 'nlcst-to-string'
@@ -63,12 +74,19 @@ const terminalMarker = /^([!.?\u2026\u203D]+)$/
6374 * Transform a `tree` in mdast to nlcst.
6475 *
6576 * @param {MdastNode } tree
77+ * mdast tree to transform.
6678 * @param {VFile } file
67- * @param {ParserInstance|ParserConstructor } Parser
68- * @param {Options } [options]
79+ * Virtual file.
80+ * @param {ParserInstance | ParserConstructor } Parser
81+ * Parser to use.
82+ * @param {Options | null | undefined } [options]
83+ * Configuration.
84+ * @returns {NlcstRoot }
85+ * nlcst tree.
6986 */
70- // eslint-disable-next-line complexity
71- export function toNlcst ( tree , file , Parser , options = { } ) {
87+ export function toNlcst ( tree , file , Parser , options ) {
88+ const options_ = options || { }
89+
7290 // Crash on invalid parameters.
7391 if ( ! tree || ! tree . type ) {
7492 throw new Error ( 'mdast-util-to-nlcst expected node' )
@@ -83,140 +101,151 @@ export function toNlcst(tree, file, Parser, options = {}) {
83101 throw new Error ( 'mdast-util-to-nlcst expected parser' )
84102 }
85103
86- if (
87- ! tree . position ||
88- ! tree . position . start ||
89- ! tree . position . start . column ||
90- ! tree . position . start . line
91- ) {
104+ if ( ! pointStart ( tree ) . line || ! pointStart ( tree ) . column ) {
92105 throw new Error ( 'mdast-util-to-nlcst expected position on nodes' )
93106 }
94107
95- const parser = 'parse' in Parser ? Parser : new Parser ( )
96-
97- /** @type {Context } */
98- const context = {
108+ /** @type {State } */
109+ const state = {
99110 doc : String ( file ) ,
100111 place : location ( file ) ,
101- parser,
102- ignore : options . ignore
103- ? defaultIgnore . concat ( options . ignore )
112+ parser : 'parse' in Parser ? Parser : new Parser ( ) ,
113+ ignore : options_ . ignore
114+ ? [ ... defaultIgnore , ... options_ . ignore ]
104115 : defaultIgnore ,
105- source : options . source
106- ? defaultSource . concat ( options . source )
116+ source : options_ . source
117+ ? [ ... defaultSource , ... options_ . source ]
107118 : defaultSource
108119 }
109120
110- const result = one ( context , tree )
121+ return sentenceContentToRoot ( state , one ( state , tree ) || [ ] )
122+ }
111123
112- if ( result && result . length > 0 ) {
113- const start = pointStart ( result [ 0 ] )
114- const end = pointEnd ( result [ result . length - 1 ] )
124+ /**
125+ * Turn sentence content into an nlcst root.
126+ *
127+ * @param {State } state
128+ * State.
129+ * @param {Array<NlcstSentenceContent> } nodes
130+ * Sentence content.
131+ * @returns {NlcstRoot }
132+ * Root.
133+ */
134+ function sentenceContentToRoot ( state , nodes ) {
135+ if ( nodes . length === 0 ) {
136+ return { type : 'RootNode' , children : [ ] }
137+ }
115138
116- // Turn into a sentence.
117- /** @type {NlcstSentence } */
118- const sentence = { type : 'SentenceNode' , children : result }
139+ const start = pointStart ( nodes [ 0 ] )
140+ const end = pointEnd ( nodes [ nodes . length - 1 ] )
119141
120- if ( start && start . line && end && end . line ) {
121- sentence . position = { start , end }
122- }
142+ // Turn into a sentence.
143+ /** @type { NlcstSentence } */
144+ const sentence = { type : 'SentenceNode' , children : nodes }
123145
124- let index = - 1
125- while ( parser . tokenizeSentencePlugins [ ++ index ] ) {
126- parser . tokenizeSentencePlugins [ index ] ( sentence )
127- }
146+ if ( start && start . line && end && end . line ) {
147+ sentence . position = { start, end}
148+ }
128149
129- // Turn into a paragraph.
130- /** @type {NlcstParagraph } */
131- const paragraph = {
132- type : 'ParagraphNode' ,
133- children : splitNode ( sentence , 'PunctuationNode' , terminalMarker )
134- }
135- if ( start && start . line && end && end . line ) {
136- paragraph . position = { start : { ...start } , end : { ...end } }
137- }
150+ let index = - 1
151+ while ( state . parser . tokenizeSentencePlugins [ ++ index ] ) {
152+ state . parser . tokenizeSentencePlugins [ index ] ( sentence )
153+ }
138154
139- index = - 1
140- while ( parser . tokenizeParagraphPlugins [ ++ index ] ) {
141- parser . tokenizeParagraphPlugins [ index ] ( paragraph )
142- }
155+ // Turn into a paragraph.
156+ /** @type {NlcstParagraph } */
157+ const paragraph = {
158+ type : 'ParagraphNode' ,
159+ children : splitNode ( sentence , 'PunctuationNode' , terminalMarker )
160+ }
161+ if ( start && start . line && end && end . line ) {
162+ paragraph . position = { start : { ...start } , end : { ...end } }
163+ }
143164
144- /** @type {NlcstRoot } */
145- const root = {
146- type : 'RootNode' ,
147- children : splitNode ( paragraph , 'WhiteSpaceNode' , newLine )
148- }
149- if ( start && start . line && end && end . line ) {
150- root . position = { start : { ...start } , end : { ...end } }
151- }
165+ index = - 1
166+ while ( state . parser . tokenizeParagraphPlugins [ ++ index ] ) {
167+ state . parser . tokenizeParagraphPlugins [ index ] ( paragraph )
168+ }
152169
153- index = - 1
154- while ( parser . tokenizeRootPlugins [ ++ index ] ) {
155- parser . tokenizeRootPlugins [ index ] ( root )
156- }
170+ // Turn into a root.
171+ /** @type {NlcstRoot } */
172+ const root = {
173+ type : 'RootNode' ,
174+ children : splitNode ( paragraph , 'WhiteSpaceNode' , newLine )
175+ }
176+ if ( start && start . line && end && end . line ) {
177+ root . position = { start : { ...start } , end : { ...end } }
178+ }
157179
158- return root
180+ index = - 1
181+ while ( state . parser . tokenizeRootPlugins [ ++ index ] ) {
182+ state . parser . tokenizeRootPlugins [ index ] ( root )
159183 }
160184
161- return { type : 'RootNode' , children : [ ] }
185+ return root
162186}
163187
164188/**
165189 * Transform a single node.
166- * @param {Context } config
190+ *
191+ * @param {State } state
192+ * State.
167193 * @param {MdastNode } node
168- * @returns {Array<NlcstSentenceContent>|undefined }
194+ * mdast node.
195+ * @returns {Array<NlcstSentenceContent> | undefined }
196+ * nlcst sentence content.
169197 */
170- function one ( config , node ) {
171- const start = node . position ? node . position . start . offset : undefined
172-
173- if ( ! config . ignore . includes ( node . type ) ) {
174- if ( config . source . includes ( node . type ) && start && node . position ) {
175- return patch (
176- config ,
177- [
178- {
179- type : 'SourceNode' ,
180- value : config . doc . slice ( start , node . position . end . offset )
181- }
182- ] ,
183- start
184- )
185- }
198+ function one ( state , node ) {
199+ if ( state . ignore . includes ( node . type ) ) {
200+ return
201+ }
186202
187- if ( 'children' in node ) {
188- return all ( config , node )
189- }
203+ let start = node . position ? node . position . start . offset : undefined
204+ const end = node . position ? node . position . end . offset : undefined
205+ /** @type {Array<NlcstSentenceContent> | undefined } */
206+ let results
190207
191- if ( ( node . type === 'image' || node . type === 'imageReference' ) && node . alt ) {
192- return patch (
193- config ,
194- config . parser . tokenize ( node . alt ) ,
195- typeof start === 'number' ? start + 2 : undefined
196- )
208+ if ( state . source . includes ( node . type ) ) {
209+ if ( start !== undefined && end !== undefined ) {
210+ results = [ { type : 'SourceNode' , value : state . doc . slice ( start , end ) } ]
197211 }
198-
199- if ( node . type === 'break' ) {
200- return patch ( config , [ { type : 'WhiteSpaceNode' , value : '\n' } ] , start )
212+ } else if ( 'children' in node ) {
213+ return all ( state , node )
214+ } else if ( node . type === 'image' || node . type === 'imageReference' ) {
215+ if ( node . alt ) {
216+ results = state . parser . tokenize ( node . alt )
217+
218+ if ( typeof start === 'number' ) {
219+ start += 2
220+ }
201221 }
222+ } else if ( node . type === 'break' ) {
223+ results = [ { type : 'WhiteSpaceNode' , value : '\n' } ]
224+ } else if ( node . type === 'text' ) {
225+ results = state . parser . tokenize ( node . value )
226+ }
202227
203- if ( node . type === 'text' ) {
204- return patch ( config , config . parser . tokenize ( node . value ) , start )
205- }
228+ if ( results ) {
229+ patch ( state , results , start )
230+ return results
206231 }
207232}
208233
209234/**
210235 * Transform all nodes in `parent`.
211- * @param {Context } config
236+ *
237+ * @param {State } state
238+ * State.
212239 * @param {MdastParent } parent
240+ * mdast parent node.
213241 * @returns {Array<NlcstSentenceContent> }
242+ * nlcst sentence content.
214243 */
215- function all ( config , parent ) {
244+ function all ( state , parent ) {
216245 let index = - 1
217246 /** @type {Array<NlcstSentenceContent> } */
218247 const results = [ ]
219- /** @type {Point| undefined } */
248+ /** @type {Point | undefined } */
220249 let end
221250
222251 while ( ++ index < parent . children . length ) {
@@ -234,16 +263,17 @@ function all(config, parent) {
234263 type : 'WhiteSpaceNode' ,
235264 value : '\n' . repeat ( start . line - end . line )
236265 }
237- patch ( config , [ lineEnding ] , end . offset )
266+ patch ( state , [ lineEnding ] , end . offset )
238267
268+ // Make sure it’ll be seen as a break between paragraphs.
239269 if ( lineEnding . value . length < 2 ) {
240270 lineEnding . value = '\n\n'
241271 }
242272
243273 results . push ( lineEnding )
244274 }
245275
246- const result = one ( config , child )
276+ const result = one ( state , child )
247277 if ( result ) results . push ( ...result )
248278 end = pointEnd ( child )
249279 }
@@ -253,40 +283,39 @@ function all(config, parent) {
253283
254284/**
255285 * Patch a position on each node in `nodes`.
286+ *
256287 * `offset` is the offset in `file` this run of content starts at.
257288 *
258- * @template {Array<NlcstContent>} T
259- * @param {Context } config
260- * @param {T } nodes
261- * @param {number|undefined } offset
262- * @returns {T }
289+ * @param {State } state
290+ * State.
291+ * @param {Array<NlcstContent> } nodes
292+ * nlcst sentence content.
293+ * @param {number | undefined } offset
294+ * Offset.
295+ * @returns {void }
296+ * Nothing.
263297 */
264- function patch ( config , nodes , offset ) {
298+ function patch ( state , nodes , offset ) {
265299 let index = - 1
266300 let start = offset
267301
268302 while ( ++ index < nodes . length ) {
269303 const node = nodes [ index ]
270304
271305 if ( 'children' in node ) {
272- patch ( config , node . children , start )
306+ patch ( state , node . children , start )
273307 }
274308
275309 const end =
276310 typeof start === 'number' ? start + toString ( node ) . length : undefined
277311
278312 node . position =
279313 start !== undefined && end !== undefined
280- ? {
281- start : config . place . toPoint ( start ) ,
282- end : config . place . toPoint ( end )
283- }
314+ ? { start : state . place . toPoint ( start ) , end : state . place . toPoint ( end ) }
284315 : undefined
285316
286317 start = end
287318 }
288-
289- return nodes
290319}
291320
292321// Ported from:
0 commit comments