From 4f52c1ee009f9364aaf1707be83794d003e3b4c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A1s=20B=20Nagy?= <20251272+BNAndras@users.noreply.github.com> Date: Wed, 1 Jan 2025 22:13:28 -0800 Subject: [PATCH] add `word-count` --- config.json | 8 ++ .../practice/word-count/.docs/instructions.md | 47 ++++++++ .../practice/word-count/.docs/introduction.md | 8 ++ .../practice/word-count/.meta/config.json | 18 ++++ .../practice/word-count/.meta/src/example.art | 3 + .../practice/word-count/.meta/tests.toml | 57 ++++++++++ .../practice/word-count/src/word-count.art | 3 + exercises/practice/word-count/tester.art | 3 + .../word-count/tests/test-word-count.art | 102 ++++++++++++++++++ 9 files changed, 249 insertions(+) create mode 100644 exercises/practice/word-count/.docs/instructions.md create mode 100644 exercises/practice/word-count/.docs/introduction.md create mode 100644 exercises/practice/word-count/.meta/config.json create mode 100644 exercises/practice/word-count/.meta/src/example.art create mode 100644 exercises/practice/word-count/.meta/tests.toml create mode 100644 exercises/practice/word-count/src/word-count.art create mode 100644 exercises/practice/word-count/tester.art create mode 100644 exercises/practice/word-count/tests/test-word-count.art diff --git a/config.json b/config.json index b142827..7ba1d8b 100644 --- a/config.json +++ b/config.json @@ -389,6 +389,14 @@ "prerequisites": [], "difficulty": 2 }, + { + "slug": "word-count", + "name": "Word Count", + "uuid": "82b4b6f7-79eb-448f-bc4e-dea413da71e8", + "practices": [], + "prerequisites": [], + "difficulty": 2 + }, { "slug": "yacht", "name": "Yacht", diff --git a/exercises/practice/word-count/.docs/instructions.md b/exercises/practice/word-count/.docs/instructions.md new file mode 100644 index 0000000..064393c --- /dev/null +++ b/exercises/practice/word-count/.docs/instructions.md @@ -0,0 +1,47 @@ +# Instructions + +Your task is to count how many times each word occurs in a subtitle of a drama. + +The subtitles from these dramas use only ASCII characters. + +The characters often speak in casual English, using contractions like _they're_ or _it's_. +Though these contractions come from two words (e.g. _we are_), the contraction (_we're_) is considered a single word. + +Words can be separated by any form of punctuation (e.g. ":", "!", or "?") or whitespace (e.g. "\t", "\n", or " "). +The only punctuation that does not separate words is the apostrophe in contractions. + +Numbers are considered words. +If the subtitles say _It costs 100 dollars._ then _100_ will be its own word. + +Words are case insensitive. +For example, the word _you_ occurs three times in the following sentence: + +> You come back, you hear me? DO YOU HEAR ME? + +The ordering of the word counts in the results doesn't matter. + +Here's an example that incorporates several of the elements discussed above: + +- simple words +- contractions +- numbers +- case insensitive words +- punctuation (including apostrophes) to separate words +- different forms of whitespace to separate words + +`"That's the password: 'PASSWORD 123'!", cried the Special Agent.\nSo I fled.` + +The mapping for this subtitle would be: + +```text +123: 1 +agent: 1 +cried: 1 +fled: 1 +i: 1 +password: 2 +so: 1 +special: 1 +that's: 1 +the: 2 +``` diff --git a/exercises/practice/word-count/.docs/introduction.md b/exercises/practice/word-count/.docs/introduction.md new file mode 100644 index 0000000..1654508 --- /dev/null +++ b/exercises/practice/word-count/.docs/introduction.md @@ -0,0 +1,8 @@ +# Introduction + +You teach English as a foreign language to high school students. + +You've decided to base your entire curriculum on TV shows. +You need to analyze which words are used, and how often they're repeated. + +This will let you choose the simplest shows to start with, and to gradually increase the difficulty as time passes. diff --git a/exercises/practice/word-count/.meta/config.json b/exercises/practice/word-count/.meta/config.json new file mode 100644 index 0000000..2796cc9 --- /dev/null +++ b/exercises/practice/word-count/.meta/config.json @@ -0,0 +1,18 @@ +{ + "authors": [ + "BNAndras" + ], + "files": { + "solution": [ + "src/word-count.art" + ], + "test": [ + "tests/test-word-count.art" + ], + "example": [ + ".meta/src/example.art" + ] + }, + "blurb": "Given a phrase, count the occurrences of each word in that phrase.", + "source": "This is a classic toy problem, but we were reminded of it by seeing it in the Go Tour." +} diff --git a/exercises/practice/word-count/.meta/src/example.art b/exercises/practice/word-count/.meta/src/example.art new file mode 100644 index 0000000..118309f --- /dev/null +++ b/exercises/practice/word-count/.meta/src/example.art @@ -0,0 +1,3 @@ +countWords: function [sentence][ + tally match lower sentence {/\b[\w']+\b/} +] diff --git a/exercises/practice/word-count/.meta/tests.toml b/exercises/practice/word-count/.meta/tests.toml new file mode 100644 index 0000000..1be425b --- /dev/null +++ b/exercises/practice/word-count/.meta/tests.toml @@ -0,0 +1,57 @@ +# This is an auto-generated file. +# +# Regenerating this file via `configlet sync` will: +# - Recreate every `description` key/value pair +# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications +# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion) +# - Preserve any other key/value pair +# +# As user-added comments (using the # character) will be removed when this file +# is regenerated, comments can be added via a `comment` key. + +[61559d5f-2cad-48fb-af53-d3973a9ee9ef] +description = "count one word" + +[5abd53a3-1aed-43a4-a15a-29f88c09cbbd] +description = "count one of each word" + +[2a3091e5-952e-4099-9fac-8f85d9655c0e] +description = "multiple occurrences of a word" + +[e81877ae-d4da-4af4-931c-d923cd621ca6] +description = "handles cramped lists" + +[7349f682-9707-47c0-a9af-be56e1e7ff30] +description = "handles expanded lists" + +[a514a0f2-8589-4279-8892-887f76a14c82] +description = "ignore punctuation" + +[d2e5cee6-d2ec-497b-bdc9-3ebe092ce55e] +description = "include numbers" + +[dac6bc6a-21ae-4954-945d-d7f716392dbf] +description = "normalize case" + +[4185a902-bdb0-4074-864c-f416e42a0f19] +description = "with apostrophes" +include = false + +[4ff6c7d7-fcfc-43ef-b8e7-34ff1837a2d3] +description = "with apostrophes" +reimplements = "4185a902-bdb0-4074-864c-f416e42a0f19" + +[be72af2b-8afe-4337-b151-b297202e4a7b] +description = "with quotations" + +[8d6815fe-8a51-4a65-96f9-2fb3f6dc6ed6] +description = "substrings from the beginning" + +[c5f4ef26-f3f7-4725-b314-855c04fb4c13] +description = "multiple spaces not detected as a word" + +[50176e8a-fe8e-4f4c-b6b6-aa9cf8f20360] +description = "alternating word separators not detected as a word" + +[6d00f1db-901c-4bec-9829-d20eb3044557] +description = "quotation for word with apostrophe" diff --git a/exercises/practice/word-count/src/word-count.art b/exercises/practice/word-count/src/word-count.art new file mode 100644 index 0000000..4e04ba1 --- /dev/null +++ b/exercises/practice/word-count/src/word-count.art @@ -0,0 +1,3 @@ +countWords: function [sentence][ + panic "Please implement the countWords function" +] diff --git a/exercises/practice/word-count/tester.art b/exercises/practice/word-count/tester.art new file mode 100644 index 0000000..80f4a8f --- /dev/null +++ b/exercises/practice/word-count/tester.art @@ -0,0 +1,3 @@ +import {unitt}! + +runTests.failFast findTests "tests" diff --git a/exercises/practice/word-count/tests/test-word-count.art b/exercises/practice/word-count/tests/test-word-count.art new file mode 100644 index 0000000..95cdf92 --- /dev/null +++ b/exercises/practice/word-count/tests/test-word-count.art @@ -0,0 +1,102 @@ +import {unitt}! +import {src/word-count}! + +suite "Word Count" [ + test "count one word" [ + sentence: "word" + result: countWords sentence + expected: #["word": 1] + assert -> expected = result + ] + + test.skip "count one of each word" [ + sentence: "one of each" + result: countWords sentence + expected: #["one": 1 "of": 1 "each": 1] + assert -> expected = result + ] + + test.skip "multiple occurrences of a word" [ + sentence: "one fish two fish red fish blue fish" + result: countWords sentence + expected: #["one": 1 "fish": 4 "two": 1 "red": 1 "blue": 1] + assert -> expected = result + ] + + test.skip "handles cramped lists" [ + sentence: "one,two,three" + result: countWords sentence + expected: #["one": 1 "two": 1 "three": 1] + assert -> expected = result + ] + + test.skip "handles expanded lists" [ + sentence: "one,\ntwo,\nthree" + result: countWords sentence + expected: #["one": 1 "two": 1 "three": 1] + assert -> expected = result + ] + + test.skip "ignore punctuation" [ + sentence: "car: carpet as java: javascript!!&@$%^&" + result: countWords sentence + expected: #["car": 1 "carpet": 1 "as": 1 "java": 1 "javascript": 1] + assert -> expected = result + ] + + test.skip "include numbers" [ + sentence: "testing, 1, 2 testing" + result: countWords sentence + expected: #["testing": 2 "1": 1 "2": 1] + assert -> expected = result + ] + + test.skip "normalize case" [ + sentence: "go Go GO Stop stop" + result: countWords sentence + expected: #["go": 3 "stop": 2] + assert -> expected = result + ] + + test.skip "with apostrophes" [ + sentence: "'First: don't laugh. Then: don't cry. You're getting it.'" + result: countWords sentence + expected: #["first": 1 "don't": 2 "laugh": 1 "then": 1 "cry": 1 "you're": 1 "getting": 1 "it": 1] + assert -> expected = result + ] + + test.skip "with quotations" [ + sentence: "Joe can't tell between 'large' and large." + result: countWords sentence + expected: #["joe": 1 "can't": 1 "tell": 1 "between": 1 "large": 2 "and": 1] + assert -> expected = result + ] + + test.skip "substrings from the beginning" [ + sentence: "Joe can't tell between app, apple and a." + result: countWords sentence + expected: #["joe": 1 "can't": 1 "tell": 1 "between": 1 "app": 1 "apple": 1 "and": 1 "a": 1] + assert -> expected = result + ] + + test.skip "multiple spaces not detected as a word" [ + sentence: " multiple whitespaces" + result: countWords sentence + expected: #["multiple": 1 "whitespaces": 1] + assert -> expected = result + ] + + test.skip "alternating word separators not detected as a word" [ + sentence: ",\n,one,\n ,two \n 'three'" + result: countWords sentence + expected: #["one": 1 "two": 1 "three": 1] + assert -> expected = result + ] + + test.skip "quotation for word with apostrophe" [ + sentence: "can, can't, 'can't'" + result: countWords sentence + expected: #["can": 1 "can't": 2] + assert -> expected = result + ] +]