diff --git a/docs/reference/analysis/tokenfilters/cjk-bigram-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/cjk-bigram-tokenfilter.asciidoc
index 8ad2403f38e0a..712538ec2786c 100644
--- a/docs/reference/analysis/tokenfilters/cjk-bigram-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/cjk-bigram-tokenfilter.asciidoc
@@ -1,18 +1,176 @@
[[analysis-cjk-bigram-tokenfilter]]
-=== CJK Bigram Token Filter
+=== CJK bigram token filter
+++++
+CJK bigram
+++++
-The `cjk_bigram` token filter forms bigrams out of the CJK
-terms that are generated by the <>
-or the `icu_tokenizer` (see {plugins}/analysis-icu-tokenizer.html[`analysis-icu` plugin]).
+Forms https://en.wikipedia.org/wiki/Bigram[bigrams] out of CJK (Chinese,
+Japanese, and Korean) tokens.
-By default, when a CJK character has no adjacent characters to form a bigram,
-it is output in unigram form. If you always want to output both unigrams and
-bigrams, set the `output_unigrams` flag to `true`. This can be used for a
-combined unigram+bigram approach.
+This filter is included in {es}'s built-in <>. It uses Lucene's
+https://lucene.apache.org/core/{lucene_version_path}/analyzers-common/org/apache/lucene/analysis/cjk/CJKBigramFilter.html[CJKBigramFilter].
-Bigrams are generated for characters in `han`, `hiragana`, `katakana` and
-`hangul`, but bigrams can be disabled for particular scripts with the
-`ignored_scripts` parameter. All non-CJK input is passed through unmodified.
+
+[[analysis-cjk-bigram-tokenfilter-analyze-ex]]
+==== Example
+
+The following <> request demonstrates how the
+CJK bigram token filter works.
+
+[source,console]
+--------------------------------------------------
+GET /_analyze
+{
+ "tokenizer" : "standard",
+ "filter" : ["cjk_bigram"],
+ "text" : "東京都は、日本の首都であり"
+}
+--------------------------------------------------
+
+The filter produces the following tokens:
+
+[source,text]
+--------------------------------------------------
+[ 東京, 京都, 都は, 日本, 本の, の首, 首都, 都で, であ, あり ]
+--------------------------------------------------
+
+/////////////////////
+[source,console-result]
+--------------------------------------------------
+{
+ "tokens" : [
+ {
+ "token" : "東京",
+ "start_offset" : 0,
+ "end_offset" : 2,
+ "type" : "",
+ "position" : 0
+ },
+ {
+ "token" : "京都",
+ "start_offset" : 1,
+ "end_offset" : 3,
+ "type" : "",
+ "position" : 1
+ },
+ {
+ "token" : "都は",
+ "start_offset" : 2,
+ "end_offset" : 4,
+ "type" : "",
+ "position" : 2
+ },
+ {
+ "token" : "日本",
+ "start_offset" : 5,
+ "end_offset" : 7,
+ "type" : "",
+ "position" : 3
+ },
+ {
+ "token" : "本の",
+ "start_offset" : 6,
+ "end_offset" : 8,
+ "type" : "",
+ "position" : 4
+ },
+ {
+ "token" : "の首",
+ "start_offset" : 7,
+ "end_offset" : 9,
+ "type" : "",
+ "position" : 5
+ },
+ {
+ "token" : "首都",
+ "start_offset" : 8,
+ "end_offset" : 10,
+ "type" : "",
+ "position" : 6
+ },
+ {
+ "token" : "都で",
+ "start_offset" : 9,
+ "end_offset" : 11,
+ "type" : "",
+ "position" : 7
+ },
+ {
+ "token" : "であ",
+ "start_offset" : 10,
+ "end_offset" : 12,
+ "type" : "",
+ "position" : 8
+ },
+ {
+ "token" : "あり",
+ "start_offset" : 11,
+ "end_offset" : 13,
+ "type" : "",
+ "position" : 9
+ }
+ ]
+}
+--------------------------------------------------
+/////////////////////
+
+[[analysis-cjk-bigram-tokenfilter-analyzer-ex]]
+==== Add to an analyzer
+
+The following <> request uses the
+CJK bigram token filter to configure a new
+<>.
+
+[source,console]
+--------------------------------------------------
+PUT /cjk_bigram_example
+{
+ "settings" : {
+ "analysis" : {
+ "analyzer" : {
+ "standard_cjk_bigram" : {
+ "tokenizer" : "standard",
+ "filter" : ["cjk_bigram"]
+ }
+ }
+ }
+ }
+}
+--------------------------------------------------
+
+
+[[analysis-cjk-bigram-tokenfilter-configure-parms]]
+==== Configurable parameters
+
+`ignored_scripts`::
++
+--
+(Optional, array of character scripts)
+Array of character scripts for which to disable bigrams.
+Possible values:
+
+* `han`
+* `hangul`
+* `hiragana`
+* `katakana`
+
+All non-CJK input is passed through unmodified.
+--
+
+`output_unigrams`
+(Optional, boolean)
+If `true`, emit tokens in both bigram and
+https://en.wikipedia.org/wiki/N-gram[unigram] form. If `false`, a CJK character
+is output in unigram form when it has no adjacent characters. Defaults to
+`false`.
+
+[[analysis-cjk-bigram-tokenfilter-customize]]
+==== Customize
+
+To customize the CJK bigram token filter, duplicate it to create the basis
+for a new custom token filter. You can modify the filter using its configurable
+parameters.
[source,console]
--------------------------------------------------
@@ -30,9 +188,9 @@ PUT /cjk_bigram_example
"han_bigrams_filter" : {
"type" : "cjk_bigram",
"ignored_scripts": [
+ "hangul",
"hiragana",
- "katakana",
- "hangul"
+ "katakana"
],
"output_unigrams" : true
}
diff --git a/docs/reference/analysis/tokenfilters/cjk-width-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/cjk-width-tokenfilter.asciidoc
index 21bde5509a6a1..83b3ba8dee776 100644
--- a/docs/reference/analysis/tokenfilters/cjk-width-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/cjk-width-tokenfilter.asciidoc
@@ -1,12 +1,83 @@
[[analysis-cjk-width-tokenfilter]]
-=== CJK Width Token Filter
+=== CJK width token filter
+++++
+CJK width
+++++
-The `cjk_width` token filter normalizes CJK width differences:
+Normalizes width differences in CJK (Chinese, Japanese, and Korean) characters
+as follows:
-* Folds fullwidth ASCII variants into the equivalent basic Latin
-* Folds halfwidth Katakana variants into the equivalent Kana
+* Folds full-width ASCII character variants into the equivalent basic Latin
+characters
+* Folds half-width Katakana character variants into the equivalent Kana
+characters
-NOTE: This token filter can be viewed as a subset of NFKC/NFKD
-Unicode normalization. See the {plugins}/analysis-icu-normalization-charfilter.html[`analysis-icu` plugin]
-for full normalization support.
+This filter is included in {es}'s built-in <>. It uses Lucene's
+https://lucene.apache.org/core/{lucene_version_path}/analyzers-common/org/apache/lucene/analysis/cjk/CJKWidthFilter.html[CJKWidthFilter].
+NOTE: This token filter can be viewed as a subset of NFKC/NFKD Unicode
+normalization. See the
+{plugins}/analysis-icu-normalization-charfilter.html[`analysis-icu` plugin] for
+full normalization support.
+
+[[analysis-cjk-width-tokenfilter-analyze-ex]]
+==== Example
+
+[source,console]
+--------------------------------------------------
+GET /_analyze
+{
+ "tokenizer" : "standard",
+ "filter" : ["cjk_width"],
+ "text" : "シーサイドライナー"
+}
+--------------------------------------------------
+
+The filter produces the following token:
+
+[source,text]
+--------------------------------------------------
+シーサイドライナー
+--------------------------------------------------
+
+/////////////////////
+[source,console-result]
+--------------------------------------------------
+{
+ "tokens" : [
+ {
+ "token" : "シーサイドライナー",
+ "start_offset" : 0,
+ "end_offset" : 10,
+ "type" : "",
+ "position" : 0
+ }
+ ]
+}
+--------------------------------------------------
+/////////////////////
+
+[[analysis-cjk-width-tokenfilter-analyzer-ex]]
+==== Add to an analyzer
+
+The following <> request uses the
+CJK width token filter to configure a new
+<>.
+
+[source,console]
+--------------------------------------------------
+PUT /cjk_width_example
+{
+ "settings" : {
+ "analysis" : {
+ "analyzer" : {
+ "standard_cjk_width" : {
+ "tokenizer" : "standard",
+ "filter" : ["cjk_width"]
+ }
+ }
+ }
+ }
+}
+--------------------------------------------------