From 1600ca3fff95e76c39cabf23dd72ee24d8f7fb43 Mon Sep 17 00:00:00 2001 From: Ozan Onay Date: Sat, 12 Mar 2016 12:29:49 -0800 Subject: [PATCH 1/5] Move to generic naming --- build.js | 44 +++++++++++----------- litpy-plugin.js => literate-code-plugin.js | 11 ++++++ 2 files changed, 34 insertions(+), 21 deletions(-) rename litpy-plugin.js => literate-code-plugin.js (93%) diff --git a/build.js b/build.js index b6c0361e..44e3a6e4 100644 --- a/build.js +++ b/build.js @@ -8,7 +8,7 @@ const Metalsmith = require('metalsmith') const permalinks = require('metalsmith-permalinks') const { convertToKatex } = require('./katex-plugin') -const { incorporateLiteratePython } = require('./litpy-plugin') +const { incorporateLiterateCode } = require('./literate-code-plugin') const { highlightCode } = require('./prism-plugin') const { wrapFigures } = require('./captions-plugin') @@ -119,23 +119,25 @@ const removeNonPublicFiles = } } -Metalsmith(__dirname) -.source('book') -.destination(BUILD_DESTINATION) -// .use(debugSingleFile('graphs/knights-tour.md')) -.use(drafts()) -.use(incorporateLiteratePython) -.use(convertToKatex) -.use(highlightCode) -.use(markdown({ tables: true })) -.use(collections(collectionConfig)) -.use(bridgeLinksBetweenCollections) -.use(wrapFigures) -.use(removeNonPublicFiles) -.use(permalinks()) -.use(generateTableOfContents) -.use(layouts({ engine: 'ejs' })) -.build(err => { - console.log('Built') - if (err) { throw err } -}) +if (!process.env.TEST) { + Metalsmith(__dirname) + .source('book') + .destination(BUILD_DESTINATION) + // .use(debugSingleFile('graphs/knights-tour.md')) + .use(drafts()) + .use(incorporateLiterateCode) + .use(convertToKatex) + .use(highlightCode) + .use(markdown({ tables: true })) + .use(collections(collectionConfig)) + .use(bridgeLinksBetweenCollections) + .use(wrapFigures) + .use(removeNonPublicFiles) + .use(permalinks()) + .use(generateTableOfContents) + .use(layouts({ engine: 'ejs' })) + .build(err => { + console.log('Built') + if (err) { throw err } + }) +} diff --git a/litpy-plugin.js b/literate-code-plugin.js similarity index 93% rename from litpy-plugin.js rename to literate-code-plugin.js index cd22dea9..78070432 100644 --- a/litpy-plugin.js +++ b/literate-code-plugin.js @@ -45,3 +45,14 @@ const incorporateLiteratePython = files => { } module.exports = { incorporateLiteratePython } + + +/* TESTS */ + +const test = () => { + +} + +if (process.env.TEST) test() + + From c4ba7a68e165dd9a416a8abda5b0b9b4fe432835 Mon Sep 17 00:00:00 2001 From: Ozan Onay Date: Sat, 12 Mar 2016 12:30:00 -0800 Subject: [PATCH 2/5] Add a simple way to add js tests --- run | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/run b/run index bed0f855..02419847 100755 --- a/run +++ b/run @@ -12,7 +12,12 @@ PORT=${PORT-"5000"} function test { + log-msg 'Running Python tests of code samples' python -m unittest discover -p "*_test.py" + log-msg 'Running JavaScript tests of book building code' + for file in *.js; do + TEST=1 node --harmony_destructuring $file + done } From cc3f41d4bf0cade339a3a11f49eb3c7eaeaf4444 Mon Sep 17 00:00:00 2001 From: Ozan Onay Date: Sat, 12 Mar 2016 14:24:13 -0800 Subject: [PATCH 3/5] Add js (and general language) support for literate code --- book/analysis/an-anagram-detection-example.md | 2 +- book/deques/palindrome-checker.md | 2 +- book/graphs/depth-first-search.md | 2 +- book/graphs/dijkstras-algorithm.md | 2 +- book/graphs/knights-tour.md | 2 +- book/graphs/prims-spanning-tree-algorithm.md | 2 +- book/graphs/word-ladder.md | 2 +- book/lists/implementing-an-ordered-list.md | 2 +- book/lists/implementing-an-unordered-list.md | 2 +- book/queues/implementation.md | 2 +- book/queues/simulating-hot-potato.md | 2 +- ...alculating-the-sum-of-a-list-of-numbers.md | 2 +- .../converting-an-integer-to-a-string.md | 2 +- book/recursion/dynamic-programming.md | 4 +- book/stacks/balanced-parentheses.md | 4 +- book/stacks/converting-number-bases.md | 4 +- book/stacks/implementation.md | 4 +- .../infix-prefix-and-postfix-expressions.md | 4 +- book/trees/avl-trees.md | 2 +- book/trees/binary-search-trees.md | 2 +- book/trees/parse-trees.md | 2 +- .../priority-queues-with-binary-heaps.md | 2 +- book/trees/tree-traversals.md | 2 +- literate-code-plugin.js | 193 ++++++++++++++---- 24 files changed, 182 insertions(+), 67 deletions(-) diff --git a/book/analysis/an-anagram-detection-example.md b/book/analysis/an-anagram-detection-example.md index b0fb2212..850d5485 100644 --- a/book/analysis/an-anagram-detection-example.md +++ b/book/analysis/an-anagram-detection-example.md @@ -14,4 +14,4 @@ made up of symbols from the set of 26 lowercase alphabetic characters. Our goal is to write a boolean function that will take two strings and return whether they are anagrams. - + diff --git a/book/deques/palindrome-checker.md b/book/deques/palindrome-checker.md index baf8435b..db405e1c 100644 --- a/book/deques/palindrome-checker.md +++ b/book/deques/palindrome-checker.md @@ -27,4 +27,4 @@ items, we will eventually either run out of characters or be left with a deque of size 1 depending on whether the length of the original string was even or odd. In either case, the string must be a palindrome. A complete implementation for this strategy may look like: - + diff --git a/book/graphs/depth-first-search.md b/book/graphs/depth-first-search.md index ed763085..0e8c852e 100644 --- a/book/graphs/depth-first-search.md +++ b/book/graphs/depth-first-search.md @@ -5,4 +5,4 @@ collection: graphs position: 8 --- - + diff --git a/book/graphs/dijkstras-algorithm.md b/book/graphs/dijkstras-algorithm.md index 20fc0c44..f8143648 100644 --- a/book/graphs/dijkstras-algorithm.md +++ b/book/graphs/dijkstras-algorithm.md @@ -5,4 +5,4 @@ collection: graphs position: 10 --- - + diff --git a/book/graphs/knights-tour.md b/book/graphs/knights-tour.md index 3fb69bf8..e717f875 100644 --- a/book/graphs/knights-tour.md +++ b/book/graphs/knights-tour.md @@ -5,4 +5,4 @@ collection: graphs position: 7 --- - + diff --git a/book/graphs/prims-spanning-tree-algorithm.md b/book/graphs/prims-spanning-tree-algorithm.md index 8273dfa1..14297262 100644 --- a/book/graphs/prims-spanning-tree-algorithm.md +++ b/book/graphs/prims-spanning-tree-algorithm.md @@ -91,7 +91,7 @@ The Python code to implement Prim’s algorithm is shown below. Prim’s algorithm is similar to Dijkstra’s algorithm in that they both use a priority queue to select the next vertex to add to the growing graph. - + The following sequence of diagrams shows the algorithm in operation on our sample tree. We begin with the starting vertex as A. Looking at the neighbors diff --git a/book/graphs/word-ladder.md b/book/graphs/word-ladder.md index cfbfbe04..6606a0a7 100644 --- a/book/graphs/word-ladder.md +++ b/book/graphs/word-ladder.md @@ -5,4 +5,4 @@ collection: graphs position: 6 --- - + diff --git a/book/lists/implementing-an-ordered-list.md b/book/lists/implementing-an-ordered-list.md index a31eef24..f2623b58 100644 --- a/book/lists/implementing-an-ordered-list.md +++ b/book/lists/implementing-an-ordered-list.md @@ -18,7 +18,7 @@ seen previously with unordered lists. We will subclass `UnorderedList` and leave the `__init__` method intact as once again, an empty list will be denoted by a `head` reference to `None`. - + Analysis of Linked Lists ------------------------ diff --git a/book/lists/implementing-an-unordered-list.md b/book/lists/implementing-an-unordered-list.md index 7822cdd0..a4b70e0a 100644 --- a/book/lists/implementing-an-unordered-list.md +++ b/book/lists/implementing-an-unordered-list.md @@ -28,4 +28,4 @@ first item can tell us where the second is, and so on. The external reference is often referred to as the **head** of the list. Similarly, the last item needs to know that there is no next item. - + diff --git a/book/queues/implementation.md b/book/queues/implementation.md index 11ec522a..0dc9d9aa 100644 --- a/book/queues/implementation.md +++ b/book/queues/implementation.md @@ -9,6 +9,6 @@ Just like with a stack, it is possible to “use a Python list as a queue”. Ag _Unlike_ with a stack, the performance implication of using a Python list as a queue is significant. The implementation shown below uses `insert(0, item)` to enqueue a new item, which will be an $$O(n)$$ operation. - + In practice, many Python programmers will use the standard library’s `collections.deque` class to achieve $$O(1)$$ enqueues and dequeues. We will cover deques in depth in the next chapter; for now consider deques to be a combination of a stack and a queue, enabling $$O(1)$$ pushing and popping from both ends. diff --git a/book/queues/simulating-hot-potato.md b/book/queues/simulating-hot-potato.md index 744e1b00..2c23c551 100644 --- a/book/queues/simulating-hot-potato.md +++ b/book/queues/simulating-hot-potato.md @@ -48,7 +48,7 @@ until only one name remains (the size of the queue is 1). A possible implementation of this simulation is: - + Note that in this example the value of the counting constant is greater than the number of names in the list. This is not a problem since the diff --git a/book/recursion/calculating-the-sum-of-a-list-of-numbers.md b/book/recursion/calculating-the-sum-of-a-list-of-numbers.md index 2826c463..10364175 100644 --- a/book/recursion/calculating-the-sum-of-a-list-of-numbers.md +++ b/book/recursion/calculating-the-sum-of-a-list-of-numbers.md @@ -5,4 +5,4 @@ collection: recursion position: 2 --- - + diff --git a/book/recursion/converting-an-integer-to-a-string.md b/book/recursion/converting-an-integer-to-a-string.md index 1d3a0f05..ea1ecf6b 100644 --- a/book/recursion/converting-an-integer-to-a-string.md +++ b/book/recursion/converting-an-integer-to-a-string.md @@ -56,7 +56,7 @@ side of the diagram. Below is a Python implementation of this algorithm for any base between 2 and 16. - + Notice that we check for the base case where `n` is less than the base we are converting to. When we detect the base case, we stop diff --git a/book/recursion/dynamic-programming.md b/book/recursion/dynamic-programming.md index cc0e4821..8a000b3e 100644 --- a/book/recursion/dynamic-programming.md +++ b/book/recursion/dynamic-programming.md @@ -479,7 +479,7 @@ Putting our base case and general case together, we obtain a succinct recursive solution:
- +
```javascript @@ -600,7 +600,7 @@ Below is a possible implementation of the dynamic programming strategy we have discussed.
- +
```javascript diff --git a/book/stacks/balanced-parentheses.md b/book/stacks/balanced-parentheses.md index 4dbf96e7..2c7d06ac 100644 --- a/book/stacks/balanced-parentheses.md +++ b/book/stacks/balanced-parentheses.md @@ -72,7 +72,7 @@ not balanced properly. At the end of the string, when all symbols have been processed, the stack should be empty. The Python code to implement this algorithm may look like this: - + This function, `is_balanced`, returns a boolean result as to whether the string of parentheses is balanced. If the current symbol is `(`, then it @@ -127,7 +127,7 @@ that we use a dictionary to ensure that symbols popped from the stack correctly match our expectations of pairing with the symbol being considered at the time. - + These two examples show that stacks are very important data structures for the processing of language constructs in computer science. Almost diff --git a/book/stacks/converting-number-bases.md b/book/stacks/converting-number-bases.md index 0fd06a07..aa1e3241 100644 --- a/book/stacks/converting-number-bases.md +++ b/book/stacks/converting-number-bases.md @@ -52,7 +52,7 @@ the division process reaches 0, a binary string is constructed in lines from the stack one at a time and appended to the right-hand end of the string. The binary string is then returned. - + The algorithm for binary conversion can easily be extended to perform the conversion for any base. In computer science it is common to use a @@ -82,7 +82,7 @@ simply use the remainders, as they are themselves represented as two- digit decimal numbers. Instead we need to create a set of digits that can be used to represent those remainders beyond 9. - + A solution to this problem is to extend the digit set to include some alphabet characters. For example, hexadecimal uses the ten decimal diff --git a/book/stacks/implementation.md b/book/stacks/implementation.md index 3d92f26d..7c313b9a 100644 --- a/book/stacks/implementation.md +++ b/book/stacks/implementation.md @@ -35,7 +35,7 @@ Such an abstraction is also illustrative of the distinction between concrete data structures and abstract data types, so we provide a possible implementation of a stack class here: - + It is important to note that we could have chosen to implement the stack using a list where the top is at the beginning instead of at the end. In @@ -43,7 +43,7 @@ this case, instead of using `pop` and `append` as above, instead we would pop from and insert into position 0 in the. Here is a possible implementation of that approach: - + This ability to change the physical implementation of an abstract data type while maintaining the logical characteristics is an example of diff --git a/book/stacks/infix-prefix-and-postfix-expressions.md b/book/stacks/infix-prefix-and-postfix-expressions.md index 918476af..99a20651 100644 --- a/book/stacks/infix-prefix-and-postfix-expressions.md +++ b/book/stacks/infix-prefix-and-postfix-expressions.md @@ -246,7 +246,7 @@ used the integers 3, 2, and 1). The left parenthesis will receive the lowest value possible. This way any operator that is compared against it will have higher precedence and will be placed on top of it. - + Postfix Evaluation ------------------ @@ -317,7 +317,7 @@ module from the Python standard library to specify functions that will take two arguments and return the result of the proper arithmetic operation. - + It is important to note that in both the postfix conversion and the postfix evaluation programs we assumed that there were no errors in the diff --git a/book/trees/avl-trees.md b/book/trees/avl-trees.md index 34b7aaed..adf9f6de 100644 --- a/book/trees/avl-trees.md +++ b/book/trees/avl-trees.md @@ -121,7 +121,7 @@ updating balance factors: should convince yourself that once a subtree has a balance factor of zero, then the balance of its ancestor nodes does not change. - + By keeping the tree in balance at all times, we can ensure that the `get` method will run in order $$O(\log_2{n})$$ time. But the question is diff --git a/book/trees/binary-search-trees.md b/book/trees/binary-search-trees.md index 6ace19eb..4f9f662e 100644 --- a/book/trees/binary-search-trees.md +++ b/book/trees/binary-search-trees.md @@ -66,7 +66,7 @@ level logic to construct and manipulate the tree itself, and `BinarySearchTree` to hold a reference to the root node and provide a map-like interface to the user. - + Analysis --- diff --git a/book/trees/parse-trees.md b/book/trees/parse-trees.md index fc0c37a0..1d0736e1 100644 --- a/book/trees/parse-trees.md +++ b/book/trees/parse-trees.md @@ -126,4 +126,4 @@ Using the rules described above, along with the stack and binary tree abstract data types, we are now ready to write a Python function to create a parse tree. The code for our parse tree builder is presented below. - + diff --git a/book/trees/priority-queues-with-binary-heaps.md b/book/trees/priority-queues-with-binary-heaps.md index 531924fb..8e5b9a2e 100644 --- a/book/trees/priority-queues-with-binary-heaps.md +++ b/book/trees/priority-queues-with-binary-heaps.md @@ -92,4 +92,4 @@ that has the heap order property. Heap Operations --------------- - + diff --git a/book/trees/tree-traversals.md b/book/trees/tree-traversals.md index 77408105..5c0c37c9 100644 --- a/book/trees/tree-traversals.md +++ b/book/trees/tree-traversals.md @@ -109,4 +109,4 @@ recursive call to the left subtree, and print a right parenthesis *after* the recursive call to the right subtree. The modified code is shown below. - + diff --git a/literate-code-plugin.js b/literate-code-plugin.js index 78070432..329cc95b 100644 --- a/literate-code-plugin.js +++ b/literate-code-plugin.js @@ -1,55 +1,170 @@ +/* +Quick and dirty literate coding style for multiple +languages. + +To use, in a markdown file reference a code sample as: + + + +Placeholders of this form in any markdown file will +be replaced with the contents of the referenced file, +following two rules: + + 1. Anything in a block comment will be concatenated + to the markdown, for further processing; and, + 2. Anything _not_ in a block comment will be wrapped + with markdown tags indicating that it is a section + of code to be formatted in
+ tags and syntax highlighted further down the + pipeline. + +For instance, a Python literate program like so: + + """ + A **comment** + """ + def foo(): pass + +Will be converted to the following markdown in the +calling file: + + A **comment** + + ```python + def foo(): pass + ``` +*/ + 'use strict' +const assert = require('assert') const _ = require('lodash') -const litpyMarker = //g - -const removePragmas = - content => - content.replace(/# -\*-.+-\*-\n/g, '') - -const invert = - content => - '```python\n' + - content.replace(/"""\n([\s\S]+?)\n^"""/gm, '```\n$1\n```python') + - '\n```' - -const removeEmptyCodeBlocks = - content => - content.replace(/```python\n+```/g, '') - -const stripNewlinesInCodeBlocks = - content => - content - .replace(/```python\n+/g, '```python\n') - .replace(/\n+```/g, '\n```') - -const replacer = - files => - (match, group) => - _.flow( - removePragmas, - invert, - removeEmptyCodeBlocks, - stripNewlinesInCodeBlocks - )(files[group].contents.toString('utf8')) - -const incorporateLiteratePython = files => { + +const placeholders = //g + + +const languages = { + py: { + blockComment: /"""\n([\s\S]+?)\n^"""/gm, + pragmas: /^# -\*-.+-\*-\n/g, + syntax: 'python', + }, + js: { + blockComment: /\/\*\n([\s\S]+?)\n^\*\//gm, + pragmas: /^'use strict';?\n/g, + syntax: 'javascript', + } +} + + +const removePragmas = (config, content) => + content.replace(config.pragmas, '') + + +const invert = (config, content) => + '```' + config.syntax + '\n' + + content.replace(config.blockComment, '```\n$1\n```' + config.syntax) + + '\n```' + + +const removeEmptyCodeBlocks = (config, content) => { + const emptyBlocks = new RegExp('```' + config.syntax + '\n+```', 'g') + return content.replace(emptyBlocks, '') +} + + +const stripNewlinesInCodeBlocks = (config, content) => { + const multipeNewlines = new RegExp('```' + config.syntax + '\n+', 'g') + return content + .replace(multipeNewlines, '```' + config.syntax + '\n') + .replace(/\n+```/g, '\n```') +} + + +const conversionSteps = (config) => + ([ + removePragmas, + invert, + removeEmptyCodeBlocks, + stripNewlinesInCodeBlocks + ].map(f => _.partial(f, config))) + + +const converted = (steps, fileContent) => + _.flow.apply(null, steps)(fileContent) + + +// Return a function which when called with `replace` +// returns the markdown form of the referenced literate +// code +const replacePlaceholders = (files) => + (m, targetFilename) => { + const ext = targetFilename.split('.')[1] + const fileContent = files[targetFilename].contents.toString('utf8') + const config = languages[ext] + const steps = conversionSteps(config) + + return converted(steps, fileContent) + } + + +// For each markdown file, find placeholders and replace +// with the converted form of the referenced files +const incorporateLiterateCode = (files) => { + const replacer = replacePlaceholders(files) for (let path in files) { - if (path.search('\.md$') !== -1) { - const file = files[path] - const replaced = file.contents.toString('utf8').replace(litpyMarker, replacer(files)) - file.contents = new Buffer(replaced, 'utf8') - } + if (path.search('\.md$') === -1) continue + + const file = files[path] + const fileContent = file.contents.toString('utf8') + const replaced = fileContent.replace(placeholders, replacer) + + // Unfortunately, the contract is that we mutate the existing + // file.contents + file.contents = new Buffer(replaced, 'utf8') } } -module.exports = { incorporateLiteratePython } +module.exports = { incorporateLiterateCode } /* TESTS */ const test = () => { + // Most of the literate code plugin is plumbing; the + // important thing to test is the conversion from + // literate form to markdown, as it uses a series + // of fiddly rexexp + + const convertPython = _.partial(converted, conversionSteps(languages.py)) + const convertJS = _.partial(converted, conversionSteps(languages.js)) + + assert.equal(convertPython(` +""" +A comment with _markdown_ +""" +def foo_bar_baz(): + pass +`), + ` +A comment with _markdown_ +\`\`\`python +def foo_bar_baz(): + pass +\`\`\``) + + assert.equal(convertJS(` +/* +A comment with _markdown_ +*/ +const foo = () => {} +`), + ` +A comment with _markdown_ +\`\`\`javascript +const foo = () => {} +\`\`\``) } From 5c37adf179375f2b8e3b7fcf1bc97ecaf1f75974 Mon Sep 17 00:00:00 2001 From: Ozan Onay Date: Sat, 12 Mar 2016 15:12:59 -0800 Subject: [PATCH 4/5] Add language markers to enable markdown inside of literate code --- book/analysis/an-anagram-detection-example.md | 4 +++ book/recursion/dynamic-programming.md | 34 ++++++++++--------- build.js | 18 +++++----- language-switching-plugin.js | 18 ++++++++++ 4 files changed, 48 insertions(+), 26 deletions(-) create mode 100644 language-switching-plugin.js diff --git a/book/analysis/an-anagram-detection-example.md b/book/analysis/an-anagram-detection-example.md index 850d5485..0fbb4379 100644 --- a/book/analysis/an-anagram-detection-example.md +++ b/book/analysis/an-anagram-detection-example.md @@ -14,4 +14,8 @@ made up of symbols from the set of 26 lowercase alphabetic characters. Our goal is to write a boolean function that will take two strings and return whether they are anagrams. + + + + diff --git a/book/recursion/dynamic-programming.md b/book/recursion/dynamic-programming.md index 8a000b3e..b0878191 100644 --- a/book/recursion/dynamic-programming.md +++ b/book/recursion/dynamic-programming.md @@ -32,23 +32,23 @@ numbers” and recognizes the relationship $$f(n) = f(n-1) + f(n-2)$$. With 0 and 1 as our base cases, this leads to an implementation in code that looks very much like the mathematical definition of the sequence: -
+ ```python def fib(n): if n <= 1: return n # base cases: return 0 or 1 if n is 0 or 1, respectively return fib(n - 1) + fib(n - 2) ``` -
+ -
+ ```javascript const fib = (n) => { if (n <= 1) return n return fib(n - 1) + fib(n - 2) } ``` -
+ This is a correct solution, but it poses a problem evident to those who run `fib(50)` and wait for an answer. The running time of this implementation is @@ -136,7 +136,7 @@ calculations, and we never obtain the same sum twice. An implementation of this strategy might look like: -
+ ```python def fib(n): a, b = 0, 1 @@ -144,8 +144,8 @@ def fib(n): a, b = a + b, a return a ``` -
-
+ + ```javascript const fib = (n) => { let a = 0 @@ -158,7 +158,7 @@ const fib = (n) => { return a } ``` -
+ With this implementation, we sacrifice some of the elegance and readability of our recursive solution, but gain a much better $$O(n)$$ @@ -478,17 +478,18 @@ is simply `1`. Putting our base case and general case together, we obtain a succinct recursive solution: -
+ -
-
+ + + ```javascript const numPaths = (height, width) => { if (height === 0 || width === 0) return 1 return numPaths(height, width - 1) + numPaths(height - 1, width) } ``` -
+ Unfortunately, we find ourselves with another $$O(2^n)$$ solution (where $$n = max(H, W)$$) due @@ -599,10 +600,11 @@ This is what the memo looks like for `f(10, 10)`: Below is a possible implementation of the dynamic programming strategy we have discussed. -
+ -
-
+ + + ```javascript const numPathsDp = (height, width) { const memo = Array.from(Array(height + 1)).map( @@ -617,7 +619,7 @@ const numPathsDp = (height, width) { return memo[height][width] } ``` -
+ Both the time and space cost for this implementation are $$O(H \times W)$$, compared to $$2^{max(H, W)}$$ previously, making a big difference as $$H$$ diff --git a/build.js b/build.js index 44e3a6e4..c4a7b7f8 100644 --- a/build.js +++ b/build.js @@ -8,6 +8,7 @@ const Metalsmith = require('metalsmith') const permalinks = require('metalsmith-permalinks') const { convertToKatex } = require('./katex-plugin') +const { addLanguageMarkers } = require('./language-switching-plugin') const { incorporateLiterateCode } = require('./literate-code-plugin') const { highlightCode } = require('./prism-plugin') const { wrapFigures } = require('./captions-plugin') @@ -93,14 +94,6 @@ const generateTableOfContents = console.log(`Building to ${BUILD_DESTINATION} ..`) -// const debugSingleFile = -// (targetPath) => -// (files) => { -// for (let path in files) { -// if (path !== targetPath) delete files[path] -// } -// } - const EXCLUSION_FILE_PATTERNS = [ '\.pyc$', '\.py$', @@ -119,16 +112,21 @@ const removeNonPublicFiles = } } +const log = (filePath) => + (files) => + console.log(files[filePath].contents.toString('utf8')) + if (!process.env.TEST) { Metalsmith(__dirname) .source('book') .destination(BUILD_DESTINATION) - // .use(debugSingleFile('graphs/knights-tour.md')) .use(drafts()) .use(incorporateLiterateCode) .use(convertToKatex) .use(highlightCode) - .use(markdown({ tables: true })) + .use(markdown({ tables: true, pedantic: true })) + .use(addLanguageMarkers) + // .use(log('analysis/an-anagram-detection-example.html')) .use(collections(collectionConfig)) .use(bridgeLinksBetweenCollections) .use(wrapFigures) diff --git a/language-switching-plugin.js b/language-switching-plugin.js new file mode 100644 index 00000000..37198eb3 --- /dev/null +++ b/language-switching-plugin.js @@ -0,0 +1,18 @@ +'use strict' + + +const addLanguageMarkers = (files) => { + for (let path in files) { + if (path.search('\.html$') === -1) continue + + const file = files[path] + const replaced = + file.contents.toString('utf8') + .replace(//g, '
') + .replace(//g, '
') + + file.contents = new Buffer(replaced, 'utf8') + } +} + +module.exports = { addLanguageMarkers } From c973bc3bbba21d9a6f72889a41d3fb6dca1eacba Mon Sep 17 00:00:00 2001 From: Ozan Onay Date: Sat, 12 Mar 2016 15:19:38 -0800 Subject: [PATCH 5/5] Add Richie's anagram js example --- book/analysis/an-anagram-detection-example.md | 4 +- book/analysis/anagrams.js | 245 ++++++++++++++++++ 2 files changed, 248 insertions(+), 1 deletion(-) create mode 100644 book/analysis/anagrams.js diff --git a/book/analysis/an-anagram-detection-example.md b/book/analysis/an-anagram-detection-example.md index 0fbb4379..d7004b60 100644 --- a/book/analysis/an-anagram-detection-example.md +++ b/book/analysis/an-anagram-detection-example.md @@ -15,7 +15,9 @@ Our goal is to write a boolean function that will take two strings and return whether they are anagrams. - + + + diff --git a/book/analysis/anagrams.js b/book/analysis/anagrams.js new file mode 100644 index 00000000..62b13809 --- /dev/null +++ b/book/analysis/anagrams.js @@ -0,0 +1,245 @@ +/* +Solution 1: Checking Off +------------------------ + +Our first solution to the anagram problem will check to see that each +character in the first string actually occurs in the second. If it is +possible to “checkoff” each character, then the two strings must be +anagrams. Checking off a character will be accomplished by replacing it +with the special JavaScript value `null`. However, since strings in JavaScript +are immutable, the first step in the process will be to convert the +second string to an array. Each character from the first string +can be checked against the characters in the list and if found, checked off by +replacement. An implementation of this strategy may look like this: +*/ + +function anagramCheckingOff(string1, string2) { + if (string1.length != string2.length) { + return false; + } + + var string2ToCheckOff = string2.split(""); + + for (var i = 0; i < string1.length; i++) { + var letterFound = false; + for (var j = 0; j < string2ToCheckOff.length; j++) { + if (string1[i] === string2ToCheckOff[j]) { + string2ToCheckOff[j] = null; + letterFound = true; + break; + } + } + if (!letterFound) { + return false; + } + } + + return true; +} + +anagramCheckingOff('abcd', 'dcba') // => True +anagramCheckingOff('abcd', 'abcc') // => False + +/* +To analyze this algorithm, we need to note that each of the `n` +characters in `s1` will cause an iteration through up to `n` characters +in the list from `s2`. Each of the `n` positions in the list will be +visited once to match a character from `s1`. The number of visits then +becomes the sum of the integers from 1 to `n`. We recognized earlier that +this can be written as + +$$ +\sum_{i=1}^{n} i = \frac {n(n+1)}{2} +$$ + +$$ + = \frac {1}{2}n^{2} + \frac {1}{2}n +$$ + +As $$n$$ gets large, the $$n^{2}$$ term will dominate the $$n$$ term and the +$$\frac {1}{2}$$ can be ignored. Therefore, this solution is $$O(n^{2})$$. + + +Solution 2: Sort and Compare +---------------------------- + +Another solution to the anagram problem will make use of the fact that +even though `string1` and `string2` are different, they are anagrams only if +they consist of exactly the same characters. So, if we begin by sorting each +string alphabetically, from a to z, we will end up with the same string +if the original two strings are anagrams. Below is a possible +implementation of this strategy. First, we convert each string to an array using +the string method `split`, and then we use the array method `sort` which +lexographically sorts an array in place and then returns the array. Finally, we +loop through the first string, checking to make sure that both strings contain +the same letter at every index. +*/ + +function anagramSortAndCompare(string1, string2) { + if (string1.length !== string2.length) { + return false; + } + + var sortedString1 = string1.split("").sort(); + var sortedString2 = string2.split("").sort(); + + for (var i = 0; i < sortedString1.length; i++) { + if (sortedString1[i] !== sortedString2[i]) { + return false; + } + } + + return true; +} + +anagramSortAndCompare('abcde', 'edcba') // => True +anagramSortAndCompare('abcde', 'abcd') // => False + +/* +At first glance you may be tempted to think that this algorithm is +$$O(n)$$, since there is one simple iteration to compare the *n* +characters after the sorting process. However, the two calls to the +Python `sorted` method are not without their own cost. Sorting is +typically either $$O(n^{2})$$ or $$O(n\log n)$$, so the sorting +operations dominate the iteration. In the end, this algorithm will have +the same order of magnitude as that of the sorting process. + +Solution 3: Brute Force +----------------------- + +A *brute force* technique for solving a problem typically tries to +exhaust all possibilities. For the anagram detection problem, we can +simply generate a list of all possible strings using the characters from +`s1` and then see if `s2` occurs. However, there is a difficulty with +this approach. When generating all possible strings from `s1`, there are +$$n$$ possible first characters, $$n-1$$ possible characters for the second +position, $$n-2$$ for the third, and so on. The total number of candidate +strings is $$n*(n-1)*(n-2)*...*3*2*1$$, which is $$n!$$. Although some of +the strings may be duplicates, the program cannot know this ahead of +time and so it will still generate $$n!$$ different strings. + +It turns out that $$n!$$ grows even faster than $$2^{n}$$ as *n* gets large. +In fact, if `s1` were 20 characters long, there would be +$$20!$$ or 2,432,902,008,176,640,000 possible candidate strings. If we +processed one possibility every second, it would still take us +77,146,816,596 years to go through the entire list. This is probably not +going to be a good solution. + +Solution 4: Count and Compare +----------------------------- + +Our final solution to the anagram problem takes advantage of the fact +that any two anagrams will have the same number of a’s, the same number +of b’s, the same number of c’s, and so on. In order to decide whether +two strings are anagrams, we will first count the number of times each +character occurs. Since there are 26 possible characters, we can use a +array of 26 counters, one for each possible character. Each time we see a +particular character, we will increment the counter at that position. In +the end, if the two arrays of counters are identical, the strings must be +anagrams. Here is a possible implementation of the strategy: + +*/ + +function anagramCountCompare(string1, string2) { + + function getLetterPosition(letter) { + return letter.charCodeAt() - 'a'.charCodeAt(); + } + + // No "clean" way to prepopulate an array in JavaScript with 0's + var string1LetterCounts = new Array(26); + var string2LetterCounts = new Array(26); + + for (var i = 0; i < string1.length; i++) { + var letterPosition = getLetterPosition(string1[i]); + if (!string1LetterCounts[letterPosition]) { + string1LetterCounts[letterPosition] = 1; + } else { + string1LetterCounts[letterPosition]++; + } + } + + for (var i = 0; i < string2.length; i++) { + var letterPosition = getLetterPosition(string2[i]); + if (!string2LetterCounts[letterPosition]) { + string2LetterCounts[letterPosition] = 1; + } else { + string2LetterCounts[letterPosition]++; + } + } + + for (var i = 0; i < string1LetterCounts.length; i++) { + if (string1LetterCounts[i] !== string2LetterCounts[i]) { + return false; + } + } + + return true; +} + +anagramCountCompare('apple', 'pleap') // => True +anagramCountCompare('apple', 'applf') // => False + +/* +Again, the solution has a number of iterations. However, unlike the +first solution, none of them are nested. The first two iterations used +to count the characters are both based on $$n$$. The third iteration, +comparing the two lists of counts, always takes 26 steps since there are +26 possible characters in the strings. Adding it all up gives us +$$T(n)=2n+26$$ steps. That is $$O(n)$$. We have found a linear order of +magnitude algorithm for solving this problem. + +Those with greater familiarity with JavaScript may note that the counting +strategy we implemented in `anagramCountCompare` could be much more +succinctly approached using the `reduce` method of arrays. Note that we are +required to add an additional check that the strings are of the same length +since our dictionary comparison loop will not account for string2 having +additional characters. +*/ + +function anagramCountCompareWithReduce(string1, string2) { + + function letterCountReducer(letterCounts, letter) { + if (letterCounts[letter]) { + letterCounts[letter]++; + } else { + letterCounts[letter] = 0; + } + return letterCounts; + } + + var string1LetterCounts = string1.split('').reduce(letterCountReducer, {}); + var string2LetterCounts = string2.split('').reduce(letterCountReducer, {}); + + + for (var letter in string1LetterCounts) { + if (string1LetterCounts[letter] !== string2LetterCounts[letter]) { + return false; + } + } + + return string1.length === string2.length; +} + +anagramCountCompareWithReduce('apple', 'pleap') // => True +anagramCountCompareWithReduce('apple', 'applf') // => False + +/* +It is worth noting that `anagramCounterCompareWithReduce` is also $$O(n)$$, but +that it is impossible to determine this without understanding the +implementation of `Array.reduce` method. + +Before leaving this example, we need to say something about space +requirements. Although the last solution was able to run in linear time, +it could only do so by using additional storage to keep the two dictionaries of +character counts. In other words, this algorithm sacrificed space in +order to gain time. + +This is a common tradeoff. On many occasions you will need to make +decisions between time and space trade-offs. In this case, the amount of +extra space is not significant. However, if the underlying alphabet had +millions of characters, there would be more concern. As a software +engineer, when given a choice of algorithms, it will be up to you to +determine the best use of computing resources given a particular +problem. +*/