diff --git a/source/basic.tex b/source/basic.tex
index a2b6c8163c..b03d5a743a 100644
--- a/source/basic.tex
+++ b/source/basic.tex
@@ -2978,9 +2978,10 @@
 \indextext{memory model|(}%
 The fundamental storage unit in the \Cpp{} memory model is the
 \defn{byte}.
-A byte is at least large enough to contain any member of the basic
-\indextext{character set!basic execution}%
-execution character set\iref{lex.charset}
+A byte is at least large enough to contain
+the ordinary literal encoding of any element of the basic
+\indextext{character set!basic literal}%
+literal character set\iref{lex.charset}
 and the eight-bit code units of the Unicode
 \begin{footnote}
 Unicode\textregistered\ is a registered trademark of Unicode, Inc.
@@ -4880,8 +4881,6 @@
 Type \keyword{char} is a distinct type
 that has an \impldef{underlying type of \tcode{char}} choice of
 ``\tcode{\keyword{signed} \keyword{char}}'' or ``\tcode{\keyword{unsigned} \keyword{char}}'' as its underlying type.
-The values of type \keyword{char} can represent distinct codes
-for all members of the implementation's basic character set.
 The three types \keyword{char}, \tcode{\keyword{signed} \keyword{char}}, and \tcode{\keyword{unsigned} \keyword{char}}
 are collectively called
 \defnadjx{ordinary character}{types}{type}.
@@ -4942,10 +4941,10 @@
 
 \pnum
 \indextext{type!integral}%
-Types
-\keyword{bool},
-\keyword{char}, \keyword{wchar_t},
-\keyword{char8_t}, \keyword{char16_t}, \keyword{char32_t}, and
+The types \keyword{char}, \keyword{wchar_t},
+\keyword{char8_t}, \keyword{char16_t}, and \keyword{char32_t}
+are collectively called \defnadjx{character}{types}{type}.
+The character types, \keyword{bool},
 the signed and unsigned integer types,
 and cv-qualified versions\iref{basic.type.qualifier} thereof,
 are collectively termed
diff --git a/source/compatibility.tex b/source/compatibility.tex
index eab16c0e68..80b7776a5d 100644
--- a/source/compatibility.tex
+++ b/source/compatibility.tex
@@ -801,8 +801,8 @@
 semantics in this revision of \Cpp{}. Implementations may choose to
 translate trigraphs as specified in \CppXIV{} if they appear outside of a raw
 string literal, as part of the \impldef{mapping from physical source file characters
-to basic source character set} mapping from physical source file characters to
-the basic source character set.
+to translation character set} mapping from physical source file characters to
+the translation character set.
 
 \diffref{lex.ppnumber}
 \change
diff --git a/source/expressions.tex b/source/expressions.tex
index 4dfce6de52..6f1a448773 100644
--- a/source/expressions.tex
+++ b/source/expressions.tex
@@ -1181,10 +1181,10 @@
 \pnum
 \indextext{literal}%
 \indextext{constant}%
-A \grammarterm{literal} is a primary expression.
 The type of a \grammarterm{literal}
 is determined based on its form as specified in \ref{lex.literal}.
-A \grammarterm{string-literal} is an lvalue,
+A \grammarterm{string-literal} is an lvalue
+designating a corresponding string literal object\iref{lex.string},
 a \grammarterm{user-defined-literal}
 has the same value category
 as the corresponding operator call expression described in \ref{lex.ext},
diff --git a/source/intro.tex b/source/intro.tex
index 5f5e449d1d..97433869f5 100644
--- a/source/intro.tex
+++ b/source/intro.tex
@@ -420,13 +420,8 @@
 
 \indexdefn{character!multibyte}%
 \definition{multibyte character}{defns.multibyte}
-sequence of one or more bytes representing a member of the extended
-character set of either the source or the execution environment
-
-\begin{defnote}
-The extended character set is a superset of the basic character
-set\iref{lex.charset}.
-\end{defnote}
+sequence of one or more bytes representing
+the code unit sequence for an encoded character of the execution character set
 
 \definition{NTCTS}{defns.ntcts}
 \defncontext{library}
diff --git a/source/iostreams.tex b/source/iostreams.tex
index cd7215a638..07c873dfca 100644
--- a/source/iostreams.tex
+++ b/source/iostreams.tex
@@ -13167,7 +13167,7 @@
 for pathnames\iref{fs.class.path}.
 The \defn{native encoding} for wide character strings is
 the implementation-defined execution
-wide-character set encoding\iref{lex.charset}.
+wide-character set encoding\iref{character.seq}.
 
 \pnum
 For member function arguments that take character sequences representing
diff --git a/source/lex.tex b/source/lex.tex
index d705176b72..b3cf65d15c 100644
--- a/source/lex.tex
+++ b/source/lex.tex
@@ -68,22 +68,11 @@
 \indextext{character!source file}%
 \indextext{character set!basic source}%
 Physical source file characters are mapped, in an
-\impldef{mapping physical source file characters to basic source character set} manner,
-to the basic source character set (introducing new-line characters for end-of-line
-indicators) if necessary.
+\impldef{mapping physical source file characters to translation character set} manner,
+to the translation character set\iref{lex.charset}
+(introducing new-line characters for end-of-line indicators).
 The set of physical source file characters accepted is \impldef{physical source file
 characters}.
-Any
-source file character not in the basic source character
-set\iref{lex.charset} is replaced by the
-\indextext{universal character name}\grammarterm{universal-character-name} that
-designates that character. An implementation may use any internal
-encoding, so long as an actual extended character encountered in the
-source file, and the same extended character expressed in the source
-file as a \grammarterm{universal-character-name} (e.g., using the \tcode{\textbackslash
-uXXXX} notation), are handled equivalently
-except where this replacement is reverted\iref{lex.pptoken} in a raw string literal.
-
 \item
 \indextext{line splicing}%
 Each sequence of a backslash character (\textbackslash)
@@ -118,7 +107,19 @@
 Each comment is replaced by one space character. New-line characters are
 retained. Whether each nonempty sequence of whitespace characters other
 than new-line is retained or replaced by one space character is
-unspecified. The process of dividing a source file's
+unspecified.
+As characters from the source file are consumed
+to form the next preprocessing token
+(i.e., not being consumed as part of a comment or other forms of whitespace),
+except when matching a
+\grammarterm{c-char-sequence},
+\grammarterm{s-char-sequence},
+\grammarterm{r-char-sequence},
+\grammarterm{h-char-sequence}, or
+\grammarterm{q-char-sequence},
+\grammarterm{universal-character-names} are recognized and
+replaced by the designated element of the translation character set.
+The process of dividing a source file's
 characters into preprocessing tokens is context-dependent.
 \begin{example}
 See the handling of \tcode{<} within a \tcode{\#include} preprocessing
@@ -127,29 +128,19 @@
 
 \item Preprocessing directives are executed, macro invocations are
 expanded, and \tcode{_Pragma} unary operator expressions are executed.
-If a character sequence that matches the syntax of a
-\grammarterm{universal-character-name} is produced by token
-concatenation\iref{cpp.concat}, the behavior is undefined. A
-\tcode{\#include} preprocessing directive causes the named header or
+A \tcode{\#include} preprocessing directive causes the named header or
 source file to be processed from phase 1 through phase 4, recursively.
 All preprocessing directives are then deleted.
 
 \item
-Each
-\grammarterm{basic-c-char},
-\grammarterm{basic-s-char}, and
-\grammarterm{r-char}
-in a \grammarterm{character-literal} or a \grammarterm{string-literal},
-as well as each
-\grammarterm{escape-sequence} and \grammarterm{universal-character-name}
-in a \grammarterm{character-literal} or a non-raw string literal,
-is encoded in the literal's associated character encoding as specified in
-\ref{lex.ccon} and \ref{lex.string}.
+For a sequence of two or more adjacent \grammarterm{string-literal} tokens,
+a common \grammarterm{encoding-prefix} is determined
+as specified in \ref{lex.string}.
+Each such \grammarterm{string-literal} token is then considered to have
+that common \grammarterm{encoding-prefix}.
 
 \item
-Adjacent \grammarterm{string-literal}s are concatenated
-and a null character is appended to the result
-as specified in \ref{lex.string}.
+Adjacent \grammarterm{string-literal} tokens are concatenated\iref{lex.string}.
 
 \item Whitespace characters separating tokens are no longer
 significant. Each preprocessing token is converted into a
@@ -224,25 +215,74 @@
 
 \pnum
 \indextext{character set|(}%
-The \defnx{basic source character set}{character set!basic source} consists of 96 characters: the space character,
-the control characters representing horizontal tab, vertical tab, form feed, and
-new-line, plus the following 91 graphical characters:
-\begin{footnote}
-The glyphs for
-the members of the basic source character set are intended to
-identify characters from the subset of ISO/IEC 10646 which corresponds to the ASCII
-character set. However, the mapping from source file characters to the source
-character set (described in translation phase 1) is specified as
-\impldef{mapping from physical source file characters to basic source character set},
-and therefore implementations must document how the basic source characters are
-represented in source files.
-\end{footnote}
-\begin{codeblock}
-a b c d e f g h i j k l m n o p q r s t u v w x y z
-A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
-0 1 2 3 4 5 6 7 8 9
-_ { } [ ] # ( ) < > % : ; . ? * + - / ^ & | ~ ! = , @\textbackslash@ " '
-\end{codeblock}
+The \defnadj{translation}{character set} consists of the following elements:
+\begin{itemize}
+\item
+each character named by ISO/IEC 10646,
+as identified by its unique UCS scalar value, and
+\item
+a distinct character for each UCS scalar value
+where no named character is assigned.
+\end{itemize}
+\begin{note}
+ISO/IEC 10646 code points are integers
+in the range $[0, \mathrm{10FFFF}]$ (hexadecimal).
+A surrogate code point is a value
+in the range $[\mathrm{D800}, \mathrm{DFFF}]$ (hexadecimal).
+A UCS scalar value is any code point that is not a surrogate code point.
+\end{note}
+
+\pnum
+The \defnadj{basic}{character set} is a subset of the translation character set,
+consisting of 96 characters as specified in \tref{lex.charset.basic}.
+\begin{note}
+Unicode short names are given only as a means to identifying the character;
+the numerical value has no other meaning in this context.
+\end{note}
+
+\begin{floattable}{Basic character set}{lex.charset.basic}{lll}
+\topline
+\lhdrx{2}{character} & \rhdr{glyph} \\ \capsep
+U+0009 & CHARACTER TABULATION & \\
+U+000B & LINE TABULATION & \\
+U+000C & FORM FEED (FF) & \\
+U+0020 & SPACE & \\
+U+000A & LINE FEED (LF) & new-line \\
+U+0021 & EXCLAMATION MARK & \tcode{!} \\
+U+0022 & QUOTATION MARK & \tcode{"} \\
+U+0023 & NUMBER SIGN & \tcode{\#} \\
+U+0025 & PERCENT SIGN & \tcode{\%} \\
+U+0026 & AMPERSAND  & \tcode{\&} \\
+U+0027 & APOSTROPHE & \tcode{'} \\
+U+0028 & LEFT PARENTHESIS & \tcode{(} \\
+U+0029 & RIGHT PARENTHESIS & \tcode{)} \\
+U+002A & ASTERISK & \tcode{*} \\
+U+002B & PLUS SIGN & \tcode{+} \\
+U+002C & COMMA & \tcode{,} \\
+U+002D & HYPHEN-MINUS & \tcode{-} \\
+U+002E & FULL STOP & \tcode{.} \\
+U+002F & SOLIDUS & \tcode{/} \\
+U+0030 .. U+0039 & DIGIT ZERO .. NINE & \tcode{0 1 2 3 4 5 6 7 8 9} \\
+U+003A & COLON & \tcode{:} \\
+U+003B & SEMICOLON & \tcode{;} \\
+U+003C & LESS-THAN SIGN & \tcode{<} \\
+U+003D & EQUALS SIGN & \tcode{=} \\
+U+003E & GREATER-THAN SIGN& \tcode{>} \\
+U+003F & QUESTION MARK & \tcode{?} \\
+U+0041 .. U+005A & LATIN CAPITAL LETTER A .. Z & \tcode{A B C D E F G H I J K L M} \\
+& & \tcode{N O P Q R S T U V W X Y Z} \\
+U+005B & LEFT SQUARE BRACKET & \tcode{[} \\
+U+005C & REVERSE SOLIDUS & \tcode{\textbackslash} \\
+U+005D & RIGHT SQUARE BRACKET & \tcode{]} \\
+U+005E & CIRCUMFLEX ACCENT & \tcode{\caret} \\
+U+005F & LOW LINE & \tcode{_} \\
+U+0061 .. U+007A & LATIN SMALL LETTER A .. Z & \tcode{a b c d e f g h i j k l m} \\
+ & & \tcode{n o p q r s t u v w x y z} \\
+U+007B & LEFT CURLY BRACKET & \tcode{\{} \\
+U+007C & VERTICAL LINE & \tcode{|} \\
+U+007D & RIGHT CURLY BRACKET & \tcode{\}} \\
+U+007E & TILDE & \tcode{\textasciitilde} \\
+\end{floattable}
 
 \pnum
 The \grammarterm{universal-character-name} construct provides a way to name
@@ -260,54 +300,74 @@
 \end{bnf}
 
 A \grammarterm{universal-character-name}
-designates the character in ISO/IEC 10646 (if any)
-whose code point is the hexadecimal number represented by
+designates the character in the translation character set
+whose UCS scalar value is the hexadecimal number represented by
 the sequence of \grammarterm{hexadecimal-digit}s
 in the \grammarterm{universal-character-name}.
-The program is ill-formed if that number is not a code point
-or if it is a surrogate code point.
-Noncharacter code points and reserved code points
-are considered to designate separate characters distinct from
-any ISO/IEC 10646 character.
+The program is ill-formed if that number is not a UCS scalar value.
 If a \grammarterm{universal-character-name} outside
 the \grammarterm{c-char-sequence}, \grammarterm{s-char-sequence}, or
 \grammarterm{r-char-sequence} of
 a \grammarterm{character-literal} or \grammarterm{string-literal}
 (in either case, including within a \grammarterm{user-defined-literal})
 corresponds to a control character or
-to a character in the basic
-source character set, the program is ill-formed.
-\begin{footnote}
+to a character in the basic character set, the program is ill-formed.
+\begin{note}
 A sequence of characters resembling a \grammarterm{universal-character-name} in an
 \grammarterm{r-char-sequence}\iref{lex.string} does not form a
 \grammarterm{universal-character-name}.
-\end{footnote}
-\begin{note}
-ISO/IEC 10646 code points are integers in the range $[0, \mathrm{10FFFF}]$ (hexadecimal).
-A surrogate code point is a value in the range $[\mathrm{D800}, \mathrm{DFFF}]$ (hexadecimal).
-A control character is a character whose code point is
-in either of the ranges $[0, \mathrm{1F}]$ or $[\mathrm{7F}, \mathrm{9F}]$ (hexadecimal).
 \end{note}
 
 \pnum
-The \defnx{basic execution character set}{character set!basic execution} and the
-\defnx{basic execution wide-character set}{wide-character set!basic execution}
-shall each contain all the members of the
-basic source character set, plus control characters representing alert,
-backspace, and carriage return, plus a \defnx{null character}{character!null}
-(respectively, \defnx{null wide character}{wide-character!null}), whose value is 0.
-For each basic execution character set, the values of the
-members shall be non-negative and distinct from one another. In both the
-source and execution basic character sets, the value of each character
-after \tcode{0} in the above list of decimal digits shall be one greater
-than the value of the previous. The \defnx{execution character set}{character set!execution}
-and the \defnx{execution wide-character set}{wide-character set!execution} are
-\impldef{execution character set and execution wide-character set}
-supersets of the
-basic execution character set and the basic execution wide-character
-set, respectively. The values of the members of the execution character sets
-and the sets of additional members
-are locale-specific.%
+The \defnadj{basic literal}{character set} consists of
+all characters of the basic character set,
+plus the control characters specified in \tref{lex.charset.literal}.
+\begin{floattable}{Additional control characters in the basic literal character set}{lex.charset.literal}{ll}
+\topline
+\ohdrx{2}{character} \\ \capsep
+U+0000 & NULL \\
+U+0007 & BELL \\
+U+0008 & BACKSPACE \\
+U+000D & CARRIAGE RETURN (CR) \\
+\end{floattable}
+
+\pnum
+A \defn{code unit} is an integer value
+of character type\iref{basic.fundamental}.
+Characters in a \grammarterm{character-literal}
+other than a multicharacter or non-encodable character literal or
+in a \grammarterm{string-literal} are encoded as
+a sequence of one or more code units, as determined
+by the \grammarterm{encoding-prefix} (\ref{lex.ccon}, \ref{lex.string});
+this is termed the respective \defnadj{literal}{encoding}.
+The \defnadj{ordinary literal}{encoding} is
+the encoding applied to an ordinary character or string literal.
+The \defnadj{wide literal}{encoding} is the encoding applied
+to a wide character or string literal.
+
+\pnum
+A literal encoding or a locale-specific encoding of one of
+the execution character sets\iref{character.seq}
+encodes each element of the basic literal character set as
+a single code unit with non-negative value,
+distinct from the code unit for any other such element.
+\begin{note}
+A character not in the basic literal character set
+can be encoded with more than one code unit;
+the value of such a code unit can be the same as
+that of a code unit for an element of the basic literal character set.
+\end{note}
+The U+0000 NULL character is encoded as the value \tcode{0}.
+No other element of the translation character set
+is encoded with a code unit of value \tcode{0}.
+The code unit value of each decimal digit character after the digit \tcode{0} (U+0030)
+shall be one greater than the value of the previous.
+The ordinary and wide literal encodings are otherwise
+\impldef{ordinary and wide literal encodings}.
+For a UTF-8, UTF-16, or UTF-32 literal,
+the UCS scalar value
+corresponding to each character of the translation character set
+is encoded as specified in ISO/IEC 10646 for the respective UCS encoding form.
 \indextext{character set|)}
 
 \rSec1[lex.pptoken]{Preprocessing tokens}
@@ -326,7 +386,6 @@
     string-literal\br
     user-defined-string-literal\br
     preprocessing-op-or-punc\br
-    \textnormal{each} universal-character-name \textnormal{that cannot be one of the above}\br
     \textnormal{each non-whitespace character that cannot be one of the above}
 \end{bnf}
 
@@ -337,25 +396,27 @@
 
 \pnum
 A preprocessing token is the minimal lexical element of the language in translation
-phases 3 through 6. The categories of preprocessing token are: header names,
+phases 3 through 6.
+In this document,
+glyphs are used to identify
+elements of the basic character set\iref{lex.charset}.
+The categories of preprocessing token are: header names,
 placeholder tokens produced by preprocessing \tcode{import} and \tcode{module} directives
 (\grammarterm{import-keyword}, \grammarterm{module-keyword}, and \grammarterm{export-keyword}),
 identifiers, preprocessing numbers, character literals (including user-defined character
 literals), string literals (including user-defined string literals), preprocessing
-operators and punctuators, and single \grammarterm{universal-character-name}s and non-whitespace characters that do not lexically
+operators and punctuators, and single non-whitespace characters that do not lexically
 match the other preprocessing token categories.
-If a single \grammarterm{universal-character-name}
-does not match any of the other preprocessing token categories,
+If a U+0027 APOSTROPHE or a U+0022 QUOTATION MARK character
+matches the last category, the behavior is undefined.
+If any character not in the basic character set matches the last category,
 the program is ill-formed.
-If a \tcode{'} or a \tcode{"} character
-matches the last category, the behavior is undefined. Preprocessing tokens can be
-separated by
+Preprocessing tokens can be separated by
 \indextext{whitespace}%
 whitespace;
 \indextext{comment}%
 this consists of comments\iref{lex.comment}, or whitespace
-characters (space, horizontal tab, new-line, vertical tab, and
-form-feed), or both. As described in \ref{cpp}, in certain
+characters (U+0020 SPACE, U+0009 CHARACTER TABULATION, new-line, U+000B LINE TABULATION, and U+000C FORM FEED), or both. As described in \ref{cpp}, in certain
 circumstances during translation phase 4, whitespace (or the absence
 thereof) serves as more than preprocessing token separation. Whitespace
 can appear within a preprocessing token only as part of a header name or
@@ -371,8 +432,8 @@
 If the next character begins a sequence of characters that could be the prefix
 and initial double quote of a raw string literal, such as \tcode{R"}, the next preprocessing
 token shall be a raw string literal. Between the initial and final
-double quote characters of the raw string, any transformations performed in phases
-1 and 2 (\grammarterm{universal-character-name}{s} and line splicing) are reverted; this reversion
+double quote characters of the raw string, any transformations performed in phase
+2 (line splicing) are reverted; this reversion
 shall apply before any \grammarterm{d-char}, \grammarterm{r-char}, or delimiting
 parenthesis is identified. The raw string literal is defined as the shortest sequence
 of characters that matches the raw-string pattern
@@ -559,7 +620,7 @@
 
 \begin{bnf}
 \nontermdef{h-char}\br
-    \textnormal{any member of the source character set except new-line and \terminal{>}}
+    \textnormal{any member of the translation character set except new-line and \terminal{U+003E GREATER-THAN SIGN}}
 \end{bnf}
 
 \begin{bnf}
@@ -570,7 +631,7 @@
 
 \begin{bnf}
 \nontermdef{q-char}\br
-    \textnormal{any member of the source character set except new-line and \terminal{"}}
+    \textnormal{any member of the translation character set except new-line and \terminal{U+0022 QUOTATION MARK}}
 \end{bnf}
 
 \pnum
@@ -642,14 +703,14 @@
 \begin{bnf}
 \nontermdef{identifier-start}\br
     nondigit\br
-    universal-character-name \textnormal{of class XID_Start}
+    \textnormal{an element of the translation character set of class XID_Start}
 \end{bnf}
 
 \begin{bnf}
 \nontermdef{identifier-continue}\br
     digit\br
     nondigit\br
-    universal-character-name \textnormal{of class XID_Continue}
+    \textnormal{an element of the translation character set of class XID_Continue}
 \end{bnf}
 
 \begin{bnf}
@@ -1207,7 +1268,8 @@
 
 \begin{bnf}
 \nontermdef{basic-c-char}\br
-    \textnormal{any member of the basic source character set except the single-quote \terminal{'}, backslash \terminal{\textbackslash}, or new-line character}
+    \textnormal{any member of the translation character set except the U+0027 APOSTROPHE,}\br
+    \bnfindent\textnormal{U+005C REVERSE SOLIDUS, or new-line character}
 \end{bnf}
 
 \begin{bnf}
@@ -1253,7 +1315,7 @@
 
 \begin{bnf}
 \nontermdef{conditional-escape-sequence-char}\br
-    \textnormal{any member of the basic source character set that is not an} octal-digit\textnormal{, a} simple-escape-sequence-char\textnormal{, or the characters \terminal{u}, \terminal{U}, or \terminal{x}}
+    \textnormal{any member of the basic character set that is not an} octal-digit\textnormal{, a} simple-escape-sequence-char\textnormal{, or the characters \terminal{u}, \terminal{U}, or \terminal{x}}
 \end{bnf}
 
 \pnum
@@ -1300,12 +1362,12 @@
 The examples in \tref{lex.ccon.literal}
 for non-encodable ordinary and wide character literals assume that
 the specified character lacks representation in
-the execution character set or execution wide-character set, respectively, or
+the ordinary literal encoding or wide literal encoding, respectively, or
 that encoding it would require more than one code unit.
 \end{note}
 
 \begin{floattable}{Character literals}{lex.ccon.literal}
-{lllll}
+{l|l|l|l|l}
 \topline
 Encoding & Kind & Type & Associated char- & Example \\
 prefix & & & acter encoding & \\
@@ -1313,32 +1375,32 @@
 none &
 \defnx{ordinary character literal}{literal!character!ordinary} &
 \keyword{char} &
-encoding of &
-\tcode{'v'} \\
+ordinary &
+\tcode{'v'} \\ \cline{2-3}\cline{5-5}
  &
 non-encodable ordinary character literal &
 \keyword{int} &
-the execution &
-\tcode{'\textbackslash U0001F525'} \\
+literal &
+\tcode{'\textbackslash U0001F525'} \\ \cline{2-3}\cline{5-5}
  &
 ordinary multicharacter literal &
 \keyword{int} &
-character set &
+encoding &
 \tcode{'abcd'} \\ \hline
 \tcode{L} &
 \defnx{wide character literal}{literal!character!wide} &
 \keyword{wchar_t} &
-encoding of &
-\tcode{L'w'} \\
+wide &
+\tcode{L'w'} \\ \cline{2-3}\cline{5-5}
  &
 non-encodable wide character literal &
 \keyword{wchar_t} &
-the execution &
-\tcode{L'\textbackslash U0001F32A'} \\
+literal &
+\tcode{L'\textbackslash U0001F32A'} \\ \cline{2-3}\cline{5-5}
  &
 wide multicharacter literal &
 \keyword{wchar_t} &
-wide-character set &
+encoding &
 \tcode{L'abcd'} \\ \hline
 \tcode{u8} &
 \defnx{UTF-8 character literal}{literal!character!UTF-8} &
@@ -1424,17 +1486,18 @@
 \begin{floattable}{Simple escape sequences}{lex.ccon.esc}
 {lll}
 \topline
-new-line        &   NL(LF)          &   \tcode{\textbackslash n}                \\
-horizontal tab  &   HT              &   \tcode{\textbackslash t}                \\
-vertical tab    &   VT              &   \tcode{\textbackslash v}                \\
-backspace       &   BS              &   \tcode{\textbackslash b}                \\
-carriage return &   CR              &   \tcode{\textbackslash r}                \\
-form feed       &   FF              &   \tcode{\textbackslash f}                \\
-alert           &   BEL             &   \tcode{\textbackslash a}                \\
-backslash       &   \textbackslash  &   \tcode{\textbackslash\textbackslash}    \\
-question mark   &   ?               &   \tcode{\textbackslash ?}                \\
-single quote    &   \tcode{'}       &   \tcode{\textbackslash '}                \\
-double quote    &   \tcode{"}       &   \tcode{\textbackslash "}                \\
+\lhdrx{2}{character} &  \rhdr{\grammarterm{simple-escape-sequence}} \\ \capsep
+U+000A & LINE FEED (LF)       & \tcode{\textbackslash n} \\
+U+0009 & CHARACTER TABULATION & \tcode{\textbackslash t} \\
+U+000B & LINE TABULATION      & \tcode{\textbackslash v} \\
+U+0008 & BACKSPACE            & \tcode{\textbackslash b} \\
+U+000D & CARRIAGE RETURN (CR) & \tcode{\textbackslash r} \\
+U+000C & FORM FEED (FF)       & \tcode{\textbackslash f} \\
+U+0007 & BELL                 & \tcode{\textbackslash a} \\
+U+005C & REVERSE SOLIDUS      & \tcode{\textbackslash\textbackslash} \\
+U+003F & QUESTION MARK        & \tcode{\textbackslash ?} \\
+U+0027 & APOSTROPHE           & \tcode{\textbackslash '} \\
+U+0022 & QUOTATION MARK       & \tcode{\textbackslash "} \\
 \end{floattable}
 
 \rSec2[lex.fcon]{Floating-point literals}
@@ -1586,7 +1649,8 @@
 
 \begin{bnf}
 \nontermdef{basic-s-char}\br
-    \textnormal{any member of the basic source character set except the double-quote \terminal{"}, backslash \terminal{\textbackslash}, or new-line character}
+    \textnormal{any member of the translation character set except the U+0022 QUOTATION MARK,}\br
+    \bnfindent\textnormal{U+005C REVERSE SOLIDUS, or new-line character}
 \end{bnf}
 
 \begin{bnf}
@@ -1602,8 +1666,8 @@
 
 \begin{bnf}
 \nontermdef{r-char}\br
-    \textnormal{any member of the source character set, except a right parenthesis \terminal{)} followed by}\br
-    \bnfindent\textnormal{the initial \grammarterm{d-char-sequence} (which may be empty) followed by a double quote \terminal{"}.}
+    \textnormal{any member of the translation character set, except a U+0029 RIGHT PARENTHESIS followed by}\br
+    \bnfindent\textnormal{the initial \grammarterm{d-char-sequence} (which may be empty) followed by a U+0022 QUOTATION MARK}
 \end{bnf}
 
 \begin{bnf}
@@ -1614,9 +1678,10 @@
 
 \begin{bnf}
 \nontermdef{d-char}\br
-    \textnormal{any member of the basic source character set except:}\br
-    \bnfindent\textnormal{space, the left parenthesis \terminal{(}, the right parenthesis \terminal{)}, the backslash \terminal{\textbackslash}, and the control characters}\br
-    \bnfindent\textnormal{representing horizontal tab, vertical tab, form feed, and newline.}
+    \textnormal{any member of the basic character set except:}\br
+    \bnfindent\textnormal{U+0020 SPACE, U+0028 LEFT PARENTHESIS, U+0029 RIGHT PARENTHESIS,}\br
+    \bnfindent\textnormal{U+005C REVERSE SOLIDUS, U+0009 CHARACTER TABULATION,}\br
+    \bnfindent\textnormal{U+000B LINE TABULATION, U+000C FORM FEED (FF), and new-line}
 \end{bnf}
 
 \pnum
@@ -1641,18 +1706,19 @@
 {llp{2.6cm}p{2.3cm}p{4.7cm}}
 \topline
 Encoding & Kind & Type & Associated & Examples \\
-prefix & & & character encoding & \\
+prefix   &      &      & character  & \\
+         &      &      & encoding   & \\
 \capsep
 none &
 \defnx{ordinary string literal}{literal!string!ordinary} &
 array of $n$\newline \tcode{\keyword{const} \keyword{char}} &
-encoding of the execution character set &
+ordinary literal encoding &
 \tcode{"ordinary string"}\newline
 \tcode{R"(ordinary raw string)"} \\
 \tcode{L} &
 \defnx{wide string literal}{literal!string!wide} &
 array of $n$\newline \tcode{\keyword{const} \keyword{wchar_t}} &
-encoding of the execution wide-character set &
+wide literal\newline encoding &
 \tcode{L"wide string"}\newline
 \tcode{LR"w(wide raw string)w"} \\
 \tcode{u8} &
@@ -1729,19 +1795,45 @@
 
 \pnum
 \indextext{concatenation!string}%
-In translation phase 6\iref{lex.phases}, adjacent \grammarterm{string-literal}{s} are concatenated. If
-both \grammarterm{string-literal}{s} have the same \grammarterm{encoding-prefix}, the resulting concatenated \grammarterm{string-literal} has
-that \grammarterm{encoding-prefix}. If one \grammarterm{string-literal} has no \grammarterm{encoding-prefix}, it is treated as a \grammarterm{string-literal} of
-the same \grammarterm{encoding-prefix} as the other operand. Any other concatenations are ill-formed.
+The common \grammarterm{encoding-prefix}
+for a sequence of adjacent \grammarterm{string-literal}s
+is determined pairwise as follows:
+If two \grammarterm{string-literal}{s} have
+the same \grammarterm{encoding-prefix},
+the common \grammarterm{encoding-prefix} is that \grammarterm{encoding-prefix}.
+If one \grammarterm{string-literal} has no \grammarterm{encoding-prefix},
+the common \grammarterm{encoding-prefix} is that
+of the other \grammarterm{string-literal}.
+Any other combinations are ill-formed.
 \begin{note}
-This concatenation is an interpretation, not a conversion.
-Because the interpretation happens in translation phase 6
-(after the string literal contents have been encoded in
-the \grammarterm{string-literal}'s associated character encoding),
-a \grammarterm{string-literal}'s initial rawness
-has no effect on the interpretation or well-formedness of the concatenation.
+A \grammarterm{string-literal}'s rawness has
+no effect on the determination of the common \grammarterm{encoding-prefix}.
 \end{note}
+
+\pnum
+In translation phase 6\iref{lex.phases},
+adjacent \grammarterm{string-literal}s are concatenated.
+The lexical structure and grouping of
+the contents of the individual \grammarterm{string-literal}s is retained.
+\begin{example}
+\begin{codeblock}
+"\xA" "B"
+\end{codeblock}
+represents
+the code unit \tcode{'\textbackslash xA'} and the character \tcode{'B'}
+after concatenation
+(and not the single code unit \tcode{'\textbackslash xAB'}).
+Similarly,
+\begin{codeblock}
+"(\u00)" "41"
+\end{codeblock}
+represents six characters,
+starting with a backslash and ending with the digit \tcode{1}
+(and not the single character \tcode{'A'}
+specified by a \grammarterm{universal-character-name}).
+
 \tref{lex.string.concat} has some examples of valid concatenations.
+\end{example}
 
 \begin{floattable}{String literal concatenations}{lex.string.concat}
 {lll|lll|lll}
@@ -1763,17 +1855,6 @@
 \tcode{"a"}  & \tcode{L"b"} & \tcode{L"ab"} \\
 \end{floattable}
 
-Characters in concatenated strings are kept distinct.
-
-\begin{example}
-\begin{codeblock}
-"\xA" "B"
-\end{codeblock}
-contains the two characters \tcode{'\textbackslash xA'} and \tcode{'B'}
-after concatenation (and not the single hexadecimal character
-\tcode{'\textbackslash xAB'}).
-\end{example}
-
 \pnum
 \indextext{\idxcode{0}|seealso{zero, null}}%
 \indextext{\idxcode{0}!string terminator}%
@@ -1799,8 +1880,9 @@
 String literal objects are initialized with
 the sequence of code unit values
 corresponding to the \grammarterm{string-literal}'s sequence of
-\grammarterm{s-char}s (for a non-raw string literal) and
-\grammarterm{r-char}s (for a raw string literal)
+\grammarterm{s-char}s (originally from non-raw string literals) and
+\grammarterm{r-char}s (originally from raw string literals),
+plus a terminating U+0000 NULL character,
 in order as follows:
 \begin{itemize}
 \item
@@ -1811,18 +1893,14 @@
 \grammarterm{universal-character-name}s\iref{lex.charset}
 is encoded to a code unit sequence
 using the \grammarterm{string-literal}'s associated character encoding.
-If a character lacks representation in the associated character encoding, then:
-\begin{itemize}
-\item
-If the \grammarterm{string-literal}'s \grammarterm{encoding-prefix}
-is absent or \tcode{L},
+If a character lacks representation in the associated character encoding,
 then the \grammarterm{string-literal} is conditionally-supported and
 an
 \impldef{code unit sequence for non-representable \grammarterm{string-literal}}
 code unit sequence is encoded.
-\item
-Otherwise, the \grammarterm{string-literal} is ill-formed.
-\end{itemize}
+\begin{note}
+No character lacks representation in any of the UCS encoding forms.
+\end{note}
 When encoding a stateful character encoding,
 implementations should encode the first such sequence
 beginning with the initial encoding state and
@@ -1988,7 +2066,7 @@
 where \placeholder{n} is the source character sequence $c_1c_2...c_k$.
 \begin{note}
 The sequence
-$c_1c_2...c_k$ can only contain characters from the basic source character set.
+$c_1c_2...c_k$ can only contain characters from the basic character set.
 \end{note}
 
 \pnum
@@ -2014,7 +2092,7 @@
 where \placeholder{f} is the source character sequence $c_1c_2...c_k$.
 \begin{note}
 The sequence
-$c_1c_2...c_k$ can only contain characters from the basic source character set.
+$c_1c_2...c_k$ can only contain characters from the basic character set.
 \end{note}
 
 \pnum
diff --git a/source/lib-intro.tex b/source/lib-intro.tex
index a5d9179652..3f85b183ff 100644
--- a/source/lib-intro.tex
+++ b/source/lib-intro.tex
@@ -646,14 +646,30 @@
 
 \begin{itemize}
 \item
+Properties specified as \defn{locale-specific}
+may change during program execution
+by a call to \tcode{setlocale(int, const char*)}\iref{clocale.syn}, or
+by a change to a \tcode{locale} object,
+as described in \ref{locales} and \ref{input.output}.
+\item
+The \defnadj{execution}{character set} and
+the \defnadj{execution}{wide-character set}
+are supersets of the basic literal character set\iref{lex.charset}.
+The encodings of the execution character sets and
+the sets of additional elements (if any) are locale-specific.
+\begin{note}
+The encodings of the execution character sets can be unrelated
+to any literal encoding.
+\end{note}
+\item
 A \defn{letter} is any of the 26 lowercase or 26
 \indextext{lowercase}%
 \indextext{uppercase}%
-uppercase letters in the basic execution character set.
+uppercase letters in the basic character set.
 \item
 The
 \defnx{decimal-point character}{character!decimal-point}
-is the
+is the locale-specific
 (single-byte) character used by functions that convert between a (single-byte)
 character sequence and a value of one of the floating-point types.
 It is used
@@ -665,17 +681,7 @@
 \tcode{'.'},
 which is
 also its value in the \tcode{"C"}
-locale, but may change during program
-execution by a call to
-\tcode{setlocale(int, const char*)},
-\begin{footnote}
-declared in
-\libheaderref{clocale}.
-\indexlibraryglobal{setlocale}%
-\end{footnote}
-or by a change to a
-\tcode{locale}
-object, as described in \ref{locales} and \ref{input.output}.
+locale.
 \item
 A
 \defn{character sequence}
@@ -748,7 +754,7 @@
 shift state.
 \begin{footnote}
 An \ntbs{} that contains characters only from the
-basic execution character set is also an \ntmbs{}.
+basic literal character set is also an \ntmbs{}.
 Each multibyte character then
 consists of a single byte.
 \end{footnote}
diff --git a/source/locales.tex b/source/locales.tex
index b787bfc65d..e0f561fa68 100644
--- a/source/locales.tex
+++ b/source/locales.tex
@@ -1232,7 +1232,7 @@
 for conversion to the locale's encoding.
 \end{footnote}
 The only characters for which unique transformations are required
-are those in the basic source character set\iref{lex.charset}.
+are those in the basic character set\iref{lex.charset}.
 
 For any named \tcode{ctype} category with
 a \tcode{ctype<charT>} facet \tcode{ctc} and
@@ -1267,7 +1267,7 @@
 from a \tcode{charT} value or sequence of \tcode{charT} values
 to the corresponding \tcode{char} value or values.
 
-For any character \tcode{c} in the basic source character set\iref{lex.charset}
+For any character \tcode{c} in the basic character set\iref{lex.charset}
 the transformation is such that
 \begin{codeblock}
 do_widen(do_narrow(c, 0)) == c
diff --git a/source/preprocessor.tex b/source/preprocessor.tex
index e87ba67cae..fb03b852f9 100644
--- a/source/preprocessor.tex
+++ b/source/preprocessor.tex
@@ -1360,6 +1360,12 @@
 of two placemarkers results in a single placemarker preprocessing token, and
 concatenation of a placemarker with a non-placemarker preprocessing token results
 in the non-placemarker preprocessing token.
+If the result begins with a sequence matching the syntax of \grammarterm{universal-character-name},
+the behavior is undefined.
+\begin{note}
+This determination does not consider the replacement of
+\grammarterm{universal-character-name}s in translation phase 3\iref{lex.phases}.
+\end{note}
 If the result is not a valid preprocessing token,
 the behavior is undefined.
 The resulting token is available for further macro replacement.
diff --git a/source/time.tex b/source/time.tex
index 346cbeecfa..54db568d3e 100644
--- a/source/time.tex
+++ b/source/time.tex
@@ -11372,7 +11372,7 @@
 The time zone abbreviation or name.
 A single word is parsed.
 This word can only contain characters
-from the basic source character set\iref{lex.charset}
+from the basic character set\iref{lex.charset}
 that are alphanumeric, or one of
 \tcode{'_'}, \tcode{'/'}, \tcode{'-'}, or \tcode{'+'}.
 \\ \rowsep