diff --git a/book/analysis/an-anagram-detection-example.md b/book/analysis/an-anagram-detection-example.md index b0fb2212..d7004b60 100644 --- a/book/analysis/an-anagram-detection-example.md +++ b/book/analysis/an-anagram-detection-example.md @@ -14,4 +14,10 @@ made up of symbols from the set of 26 lowercase alphabetic characters. Our goal is to write a boolean function that will take two strings and return whether they are anagrams. - + + + + + + + diff --git a/book/analysis/anagrams.js b/book/analysis/anagrams.js new file mode 100644 index 00000000..62b13809 --- /dev/null +++ b/book/analysis/anagrams.js @@ -0,0 +1,245 @@ +/* +Solution 1: Checking Off +------------------------ + +Our first solution to the anagram problem will check to see that each +character in the first string actually occurs in the second. If it is +possible to “checkoff” each character, then the two strings must be +anagrams. Checking off a character will be accomplished by replacing it +with the special JavaScript value `null`. However, since strings in JavaScript +are immutable, the first step in the process will be to convert the +second string to an array. Each character from the first string +can be checked against the characters in the list and if found, checked off by +replacement. An implementation of this strategy may look like this: +*/ + +function anagramCheckingOff(string1, string2) { + if (string1.length != string2.length) { + return false; + } + + var string2ToCheckOff = string2.split(""); + + for (var i = 0; i < string1.length; i++) { + var letterFound = false; + for (var j = 0; j < string2ToCheckOff.length; j++) { + if (string1[i] === string2ToCheckOff[j]) { + string2ToCheckOff[j] = null; + letterFound = true; + break; + } + } + if (!letterFound) { + return false; + } + } + + return true; +} + +anagramCheckingOff('abcd', 'dcba') // => True +anagramCheckingOff('abcd', 'abcc') // => False + +/* +To analyze this algorithm, we need to note that each of the `n` +characters in `s1` will cause an iteration through up to `n` characters +in the list from `s2`. Each of the `n` positions in the list will be +visited once to match a character from `s1`. The number of visits then +becomes the sum of the integers from 1 to `n`. We recognized earlier that +this can be written as + +$$ +\sum_{i=1}^{n} i = \frac {n(n+1)}{2} +$$ + +$$ + = \frac {1}{2}n^{2} + \frac {1}{2}n +$$ + +As $$n$$ gets large, the $$n^{2}$$ term will dominate the $$n$$ term and the +$$\frac {1}{2}$$ can be ignored. Therefore, this solution is $$O(n^{2})$$. + + +Solution 2: Sort and Compare +---------------------------- + +Another solution to the anagram problem will make use of the fact that +even though `string1` and `string2` are different, they are anagrams only if +they consist of exactly the same characters. So, if we begin by sorting each +string alphabetically, from a to z, we will end up with the same string +if the original two strings are anagrams. Below is a possible +implementation of this strategy. First, we convert each string to an array using +the string method `split`, and then we use the array method `sort` which +lexographically sorts an array in place and then returns the array. Finally, we +loop through the first string, checking to make sure that both strings contain +the same letter at every index. +*/ + +function anagramSortAndCompare(string1, string2) { + if (string1.length !== string2.length) { + return false; + } + + var sortedString1 = string1.split("").sort(); + var sortedString2 = string2.split("").sort(); + + for (var i = 0; i < sortedString1.length; i++) { + if (sortedString1[i] !== sortedString2[i]) { + return false; + } + } + + return true; +} + +anagramSortAndCompare('abcde', 'edcba') // => True +anagramSortAndCompare('abcde', 'abcd') // => False + +/* +At first glance you may be tempted to think that this algorithm is +$$O(n)$$, since there is one simple iteration to compare the *n* +characters after the sorting process. However, the two calls to the +Python `sorted` method are not without their own cost. Sorting is +typically either $$O(n^{2})$$ or $$O(n\log n)$$, so the sorting +operations dominate the iteration. In the end, this algorithm will have +the same order of magnitude as that of the sorting process. + +Solution 3: Brute Force +----------------------- + +A *brute force* technique for solving a problem typically tries to +exhaust all possibilities. For the anagram detection problem, we can +simply generate a list of all possible strings using the characters from +`s1` and then see if `s2` occurs. However, there is a difficulty with +this approach. When generating all possible strings from `s1`, there are +$$n$$ possible first characters, $$n-1$$ possible characters for the second +position, $$n-2$$ for the third, and so on. The total number of candidate +strings is $$n*(n-1)*(n-2)*...*3*2*1$$, which is $$n!$$. Although some of +the strings may be duplicates, the program cannot know this ahead of +time and so it will still generate $$n!$$ different strings. + +It turns out that $$n!$$ grows even faster than $$2^{n}$$ as *n* gets large. +In fact, if `s1` were 20 characters long, there would be +$$20!$$ or 2,432,902,008,176,640,000 possible candidate strings. If we +processed one possibility every second, it would still take us +77,146,816,596 years to go through the entire list. This is probably not +going to be a good solution. + +Solution 4: Count and Compare +----------------------------- + +Our final solution to the anagram problem takes advantage of the fact +that any two anagrams will have the same number of a’s, the same number +of b’s, the same number of c’s, and so on. In order to decide whether +two strings are anagrams, we will first count the number of times each +character occurs. Since there are 26 possible characters, we can use a +array of 26 counters, one for each possible character. Each time we see a +particular character, we will increment the counter at that position. In +the end, if the two arrays of counters are identical, the strings must be +anagrams. Here is a possible implementation of the strategy: + +*/ + +function anagramCountCompare(string1, string2) { + + function getLetterPosition(letter) { + return letter.charCodeAt() - 'a'.charCodeAt(); + } + + // No "clean" way to prepopulate an array in JavaScript with 0's + var string1LetterCounts = new Array(26); + var string2LetterCounts = new Array(26); + + for (var i = 0; i < string1.length; i++) { + var letterPosition = getLetterPosition(string1[i]); + if (!string1LetterCounts[letterPosition]) { + string1LetterCounts[letterPosition] = 1; + } else { + string1LetterCounts[letterPosition]++; + } + } + + for (var i = 0; i < string2.length; i++) { + var letterPosition = getLetterPosition(string2[i]); + if (!string2LetterCounts[letterPosition]) { + string2LetterCounts[letterPosition] = 1; + } else { + string2LetterCounts[letterPosition]++; + } + } + + for (var i = 0; i < string1LetterCounts.length; i++) { + if (string1LetterCounts[i] !== string2LetterCounts[i]) { + return false; + } + } + + return true; +} + +anagramCountCompare('apple', 'pleap') // => True +anagramCountCompare('apple', 'applf') // => False + +/* +Again, the solution has a number of iterations. However, unlike the +first solution, none of them are nested. The first two iterations used +to count the characters are both based on $$n$$. The third iteration, +comparing the two lists of counts, always takes 26 steps since there are +26 possible characters in the strings. Adding it all up gives us +$$T(n)=2n+26$$ steps. That is $$O(n)$$. We have found a linear order of +magnitude algorithm for solving this problem. + +Those with greater familiarity with JavaScript may note that the counting +strategy we implemented in `anagramCountCompare` could be much more +succinctly approached using the `reduce` method of arrays. Note that we are +required to add an additional check that the strings are of the same length +since our dictionary comparison loop will not account for string2 having +additional characters. +*/ + +function anagramCountCompareWithReduce(string1, string2) { + + function letterCountReducer(letterCounts, letter) { + if (letterCounts[letter]) { + letterCounts[letter]++; + } else { + letterCounts[letter] = 0; + } + return letterCounts; + } + + var string1LetterCounts = string1.split('').reduce(letterCountReducer, {}); + var string2LetterCounts = string2.split('').reduce(letterCountReducer, {}); + + + for (var letter in string1LetterCounts) { + if (string1LetterCounts[letter] !== string2LetterCounts[letter]) { + return false; + } + } + + return string1.length === string2.length; +} + +anagramCountCompareWithReduce('apple', 'pleap') // => True +anagramCountCompareWithReduce('apple', 'applf') // => False + +/* +It is worth noting that `anagramCounterCompareWithReduce` is also $$O(n)$$, but +that it is impossible to determine this without understanding the +implementation of `Array.reduce` method. + +Before leaving this example, we need to say something about space +requirements. Although the last solution was able to run in linear time, +it could only do so by using additional storage to keep the two dictionaries of +character counts. In other words, this algorithm sacrificed space in +order to gain time. + +This is a common tradeoff. On many occasions you will need to make +decisions between time and space trade-offs. In this case, the amount of +extra space is not significant. However, if the underlying alphabet had +millions of characters, there would be more concern. As a software +engineer, when given a choice of algorithms, it will be up to you to +determine the best use of computing resources given a particular +problem. +*/ diff --git a/book/deques/palindrome-checker.md b/book/deques/palindrome-checker.md index baf8435b..db405e1c 100644 --- a/book/deques/palindrome-checker.md +++ b/book/deques/palindrome-checker.md @@ -27,4 +27,4 @@ items, we will eventually either run out of characters or be left with a deque of size 1 depending on whether the length of the original string was even or odd. In either case, the string must be a palindrome. A complete implementation for this strategy may look like: - + diff --git a/book/graphs/depth-first-search.md b/book/graphs/depth-first-search.md index ed763085..0e8c852e 100644 --- a/book/graphs/depth-first-search.md +++ b/book/graphs/depth-first-search.md @@ -5,4 +5,4 @@ collection: graphs position: 8 --- - + diff --git a/book/graphs/dijkstras-algorithm.md b/book/graphs/dijkstras-algorithm.md index 20fc0c44..f8143648 100644 --- a/book/graphs/dijkstras-algorithm.md +++ b/book/graphs/dijkstras-algorithm.md @@ -5,4 +5,4 @@ collection: graphs position: 10 --- - + diff --git a/book/graphs/knights-tour.md b/book/graphs/knights-tour.md index 3fb69bf8..e717f875 100644 --- a/book/graphs/knights-tour.md +++ b/book/graphs/knights-tour.md @@ -5,4 +5,4 @@ collection: graphs position: 7 --- - + diff --git a/book/graphs/prims-spanning-tree-algorithm.md b/book/graphs/prims-spanning-tree-algorithm.md index 8273dfa1..14297262 100644 --- a/book/graphs/prims-spanning-tree-algorithm.md +++ b/book/graphs/prims-spanning-tree-algorithm.md @@ -91,7 +91,7 @@ The Python code to implement Prim’s algorithm is shown below. Prim’s algorithm is similar to Dijkstra’s algorithm in that they both use a priority queue to select the next vertex to add to the growing graph. - + The following sequence of diagrams shows the algorithm in operation on our sample tree. We begin with the starting vertex as A. Looking at the neighbors diff --git a/book/graphs/word-ladder.md b/book/graphs/word-ladder.md index cfbfbe04..6606a0a7 100644 --- a/book/graphs/word-ladder.md +++ b/book/graphs/word-ladder.md @@ -5,4 +5,4 @@ collection: graphs position: 6 --- - + diff --git a/book/lists/implementing-an-ordered-list.md b/book/lists/implementing-an-ordered-list.md index a31eef24..f2623b58 100644 --- a/book/lists/implementing-an-ordered-list.md +++ b/book/lists/implementing-an-ordered-list.md @@ -18,7 +18,7 @@ seen previously with unordered lists. We will subclass `UnorderedList` and leave the `__init__` method intact as once again, an empty list will be denoted by a `head` reference to `None`. - + Analysis of Linked Lists ------------------------ diff --git a/book/lists/implementing-an-unordered-list.md b/book/lists/implementing-an-unordered-list.md index 7822cdd0..a4b70e0a 100644 --- a/book/lists/implementing-an-unordered-list.md +++ b/book/lists/implementing-an-unordered-list.md @@ -28,4 +28,4 @@ first item can tell us where the second is, and so on. The external reference is often referred to as the **head** of the list. Similarly, the last item needs to know that there is no next item. - + diff --git a/book/queues/implementation.md b/book/queues/implementation.md index 11ec522a..0dc9d9aa 100644 --- a/book/queues/implementation.md +++ b/book/queues/implementation.md @@ -9,6 +9,6 @@ Just like with a stack, it is possible to “use a Python list as a queue”. Ag _Unlike_ with a stack, the performance implication of using a Python list as a queue is significant. The implementation shown below uses `insert(0, item)` to enqueue a new item, which will be an $$O(n)$$ operation. - + In practice, many Python programmers will use the standard library’s `collections.deque` class to achieve $$O(1)$$ enqueues and dequeues. We will cover deques in depth in the next chapter; for now consider deques to be a combination of a stack and a queue, enabling $$O(1)$$ pushing and popping from both ends. diff --git a/book/queues/simulating-hot-potato.md b/book/queues/simulating-hot-potato.md index 744e1b00..2c23c551 100644 --- a/book/queues/simulating-hot-potato.md +++ b/book/queues/simulating-hot-potato.md @@ -48,7 +48,7 @@ until only one name remains (the size of the queue is 1). A possible implementation of this simulation is: - + Note that in this example the value of the counting constant is greater than the number of names in the list. This is not a problem since the diff --git a/book/recursion/calculating-the-sum-of-a-list-of-numbers.md b/book/recursion/calculating-the-sum-of-a-list-of-numbers.md index 2826c463..10364175 100644 --- a/book/recursion/calculating-the-sum-of-a-list-of-numbers.md +++ b/book/recursion/calculating-the-sum-of-a-list-of-numbers.md @@ -5,4 +5,4 @@ collection: recursion position: 2 --- - + diff --git a/book/recursion/converting-an-integer-to-a-string.md b/book/recursion/converting-an-integer-to-a-string.md index 1d3a0f05..ea1ecf6b 100644 --- a/book/recursion/converting-an-integer-to-a-string.md +++ b/book/recursion/converting-an-integer-to-a-string.md @@ -56,7 +56,7 @@ side of the diagram. Below is a Python implementation of this algorithm for any base between 2 and 16. - + Notice that we check for the base case where `n` is less than the base we are converting to. When we detect the base case, we stop diff --git a/book/recursion/dynamic-programming.md b/book/recursion/dynamic-programming.md index cc0e4821..b0878191 100644 --- a/book/recursion/dynamic-programming.md +++ b/book/recursion/dynamic-programming.md @@ -32,23 +32,23 @@ numbers” and recognizes the relationship $$f(n) = f(n-1) + f(n-2)$$. With 0 and 1 as our base cases, this leads to an implementation in code that looks very much like the mathematical definition of the sequence: -
+ tags and syntax highlighted further down the
+ pipeline.
+
+For instance, a Python literate program like so:
+
+ """
+ A **comment**
+ """
+ def foo(): pass
+
+Will be converted to the following markdown in the
+calling file:
+
+ A **comment**
+
+ ```python
+ def foo(): pass
+ ```
+*/
+
+'use strict'
+
+const assert = require('assert')
+const _ = require('lodash')
+
+
+const placeholders = //g
+
+
+const languages = {
+ py: {
+ blockComment: /"""\n([\s\S]+?)\n^"""/gm,
+ pragmas: /^# -\*-.+-\*-\n/g,
+ syntax: 'python',
+ },
+ js: {
+ blockComment: /\/\*\n([\s\S]+?)\n^\*\//gm,
+ pragmas: /^'use strict';?\n/g,
+ syntax: 'javascript',
+ }
+}
+
+
+const removePragmas = (config, content) =>
+ content.replace(config.pragmas, '')
+
+
+const invert = (config, content) =>
+ '```' + config.syntax + '\n' +
+ content.replace(config.blockComment, '```\n$1\n```' + config.syntax) +
+ '\n```'
+
+
+const removeEmptyCodeBlocks = (config, content) => {
+ const emptyBlocks = new RegExp('```' + config.syntax + '\n+```', 'g')
+ return content.replace(emptyBlocks, '')
+}
+
+
+const stripNewlinesInCodeBlocks = (config, content) => {
+ const multipeNewlines = new RegExp('```' + config.syntax + '\n+', 'g')
+ return content
+ .replace(multipeNewlines, '```' + config.syntax + '\n')
+ .replace(/\n+```/g, '\n```')
+}
+
+
+const conversionSteps = (config) =>
+ ([
+ removePragmas,
+ invert,
+ removeEmptyCodeBlocks,
+ stripNewlinesInCodeBlocks
+ ].map(f => _.partial(f, config)))
+
+
+const converted = (steps, fileContent) =>
+ _.flow.apply(null, steps)(fileContent)
+
+
+// Return a function which when called with `replace`
+// returns the markdown form of the referenced literate
+// code
+const replacePlaceholders = (files) =>
+ (m, targetFilename) => {
+ const ext = targetFilename.split('.')[1]
+ const fileContent = files[targetFilename].contents.toString('utf8')
+ const config = languages[ext]
+ const steps = conversionSteps(config)
+
+ return converted(steps, fileContent)
+ }
+
+
+// For each markdown file, find placeholders and replace
+// with the converted form of the referenced files
+const incorporateLiterateCode = (files) => {
+ const replacer = replacePlaceholders(files)
+ for (let path in files) {
+ if (path.search('\.md$') === -1) continue
+
+ const file = files[path]
+ const fileContent = file.contents.toString('utf8')
+ const replaced = fileContent.replace(placeholders, replacer)
+
+ // Unfortunately, the contract is that we mutate the existing
+ // file.contents
+ file.contents = new Buffer(replaced, 'utf8')
+ }
+}
+
+module.exports = { incorporateLiterateCode }
+
+
+/* TESTS */
+
+const test = () => {
+ // Most of the literate code plugin is plumbing; the
+ // important thing to test is the conversion from
+ // literate form to markdown, as it uses a series
+ // of fiddly rexexp
+
+ const convertPython = _.partial(converted, conversionSteps(languages.py))
+ const convertJS = _.partial(converted, conversionSteps(languages.js))
+
+ assert.equal(convertPython(`
+"""
+A comment with _markdown_
+"""
+def foo_bar_baz():
+ pass
+`),
+ `
+A comment with _markdown_
+\`\`\`python
+def foo_bar_baz():
+ pass
+\`\`\``)
+
+ assert.equal(convertJS(`
+/*
+A comment with _markdown_
+*/
+const foo = () => {}
+`),
+ `
+A comment with _markdown_
+\`\`\`javascript
+const foo = () => {}
+\`\`\``)
+
+}
+
+if (process.env.TEST) test()
+
+
diff --git a/litpy-plugin.js b/litpy-plugin.js
deleted file mode 100644
index cd22dea9..00000000
--- a/litpy-plugin.js
+++ /dev/null
@@ -1,47 +0,0 @@
-'use strict'
-
-const _ = require('lodash')
-
-const litpyMarker = //g
-
-const removePragmas =
- content =>
- content.replace(/# -\*-.+-\*-\n/g, '')
-
-const invert =
- content =>
- '```python\n' +
- content.replace(/"""\n([\s\S]+?)\n^"""/gm, '```\n$1\n```python') +
- '\n```'
-
-const removeEmptyCodeBlocks =
- content =>
- content.replace(/```python\n+```/g, '')
-
-const stripNewlinesInCodeBlocks =
- content =>
- content
- .replace(/```python\n+/g, '```python\n')
- .replace(/\n+```/g, '\n```')
-
-const replacer =
- files =>
- (match, group) =>
- _.flow(
- removePragmas,
- invert,
- removeEmptyCodeBlocks,
- stripNewlinesInCodeBlocks
- )(files[group].contents.toString('utf8'))
-
-const incorporateLiteratePython = files => {
- for (let path in files) {
- if (path.search('\.md$') !== -1) {
- const file = files[path]
- const replaced = file.contents.toString('utf8').replace(litpyMarker, replacer(files))
- file.contents = new Buffer(replaced, 'utf8')
- }
- }
-}
-
-module.exports = { incorporateLiteratePython }
diff --git a/run b/run
index bed0f855..02419847 100755
--- a/run
+++ b/run
@@ -12,7 +12,12 @@ PORT=${PORT-"5000"}
function test {
+ log-msg 'Running Python tests of code samples'
python -m unittest discover -p "*_test.py"
+ log-msg 'Running JavaScript tests of book building code'
+ for file in *.js; do
+ TEST=1 node --harmony_destructuring $file
+ done
}