cplusplus · AlisdairM · Oct 28, 2024 · jensmaurer · Oct 29, 2024 · AlisdairM
diff --git a/source/lex.tex b/source/lex.tex
@@ -110,9 +110,9 @@
 \indextext{line splicing}%
 If the first translation character is \unicode{feff}{byte order mark},
 it is deleted.
-Each sequence of a backslash character (\textbackslash)
+Each sequence of a backslash character (\unicode{005c}{reverse solidus})
 immediately followed by
-zero or more whitespace characters other than new-line followed by
+zero or more \grammarterm{whitespace-character}s other than new-line followed by
 a new-line character is deleted, splicing
 physical source lines to form \defnx{logical source lines}{source line!logical}. Only the last
 backslash on any physical source line shall be eligible for being part
@@ -126,9 +126,13 @@
 shall be processed as if an additional new-line character were appended
 to the file.
 
-\item The source file is decomposed into preprocessing
-tokens\iref{lex.pptoken} and sequences of whitespace characters
-(including comments). A source file shall not end in a partial
+\item
+\indextext{whitespace}%
+\indextext{comment}%
+\indextext{token!preprocessing}%
+The source file is decomposed into preprocessing
+tokens\iref{lex.pptoken} and whitespace\iref{lex.whitespace} (sequences of \grammarterm{whitespace-character}s
+and comments). A source file shall not end in a partial
 preprocessing token or in a partial comment.
 \begin{footnote}
 A partial preprocessing
@@ -140,9 +144,9 @@
 would arise from a source file ending with an unclosed \tcode{/*}
 comment.
 \end{footnote}
-Each comment\iref{lex.comment} is replaced by one space character. New-line characters are
-retained. Whether each nonempty sequence of whitespace characters other
-than new-line is retained or replaced by one space character is
+Each comment\iref{lex.comment} is replaced by one \unicode{0020}{space} character. New-line characters are
+retained. Whether each nonempty sequence of \grammarterm{whitespace-character}s other
+than new-line is retained or replaced by one \unicode{0020}{space} character is
 unspecified.
 As characters from the source file are consumed
 to form the next preprocessing token
@@ -178,10 +182,10 @@
 \item
 Adjacent \grammarterm{string-literal} tokens are concatenated\iref{lex.string}.
 
-\item Whitespace characters separating tokens are no longer
-significant. Each preprocessing token is converted into a
-token\iref{lex.token}. The resulting tokens
-constitute a \defn{translation unit} and
+\item
+Each preprocessing token is converted into a token\iref{lex.token}.
+Any \grammarterm{whitespace-character}s separating tokens are no longer significant.
+The resulting tokens constitute a \defn{translation unit} and
 are syntactically and
 semantically analyzed and translated.
 \begin{note}
@@ -467,7 +471,28 @@
 None of these names or aliases have leading or trailing spaces.
 \end{note}
 
-\rSec1[lex.comment]{Comments}
+\rSec1[lex.whitespace]{Whitespace}
+\indextext{whitespace|(}%
+
+\rSec2[lex.whitechar]{Whitespace Characters}
+
+\indextext{character!whitespace|(}%
+\begin{bnf}
+\nontermdef{whitespace-character}\br
+    \unicode{0009}{character tabulation}\br
+    \textnormal{new-line}\br
+    \unicode{000b}{line tabulation}\br
+    \unicode{000c}{form feed}\br
+    \unicode{0020}{space}\br
+\end{bnf}
+
+\pnum
+\begin{note}
+Whitespace characters are used to separate elements of the \Cpp grammar.
+\end{note}
+\indextext{character!whitespace|)}
+
+\rSec2[lex.comment]{Comments}
 
 \pnum
 \indextext{comment|(}%
@@ -477,8 +502,8 @@
 characters \tcode{*/}. These comments do not nest.
 \indextext{comment!\tcode{//}}%
 The characters \tcode{//} start a comment, which terminates immediately before the
-next new-line character. If there is a form-feed or a vertical-tab
-character in such a comment, only whitespace characters shall appear
+next new-line character. If there is a \unicode{000c}{form feed} or a \unicode{000b}{line tabulation}
+character in such a comment, only \grammarterm{whitespace-character}s shall appear
 between it and the new-line that terminates the comment; no diagnostic
 is required.
 \begin{note}
@@ -489,6 +514,7 @@
 \tcode{/*} comment.
 \end{note}
 \indextext{comment|)}
+\indextext{whitespace|)}%
 
 \rSec1[lex.pptoken]{Preprocessing tokens}
 
@@ -506,7 +532,7 @@
     string-literal\br
     user-defined-string-literal\br
     preprocessing-op-or-punc\br
-    \textnormal{each non-whitespace character that cannot be one of the above}
+    \textnormal{each non-\grammarterm{whitespace-character} that cannot be one of the above}
 \end{bnf}
 
 \pnum
@@ -520,22 +546,17 @@
 (\grammarterm{import-keyword}, \grammarterm{module-keyword}, and \grammarterm{export-keyword}),
 identifiers, preprocessing numbers, character literals (including user-defined character
 literals), string literals (including user-defined string literals), preprocessing
-operators and punctuators, and single non-whitespace characters that do not lexically
+operators and punctuators, and single non-\grammarterm{whitespace-character}s that do not lexically
 match the other preprocessing token categories.
 If a \unicode{0027}{apostrophe} or a \unicode{0022}{quotation mark} character
 matches the last category, the program is ill-formed.
 If any character not in the basic character set matches the last category,
 the program is ill-formed.
 Preprocessing tokens can be separated by
 \indextext{whitespace}%
-whitespace;
+whitespace\iref{lex.whitespace};
 \indextext{comment}%
-this consists of comments\iref{lex.comment}, or whitespace characters
-(\unicode{0020}{space},
-\unicode{0009}{character tabulation},
-new-line,
-\unicode{000b}{line tabulation}, and
-\unicode{000c}{form feed}), or both.
+this consists of comments, \grammarterm{whitespace-character}s, or both.
 As described in \ref{cpp}, in certain
 circumstances during translation phase 4, whitespace (or the absence
 thereof) serves as more than preprocessing token separation. Whitespace
@@ -826,9 +847,7 @@
 \end{footnote}
 operators, and other separators.
 \indextext{whitespace}%
-Blanks, horizontal and vertical tabs, newlines, formfeeds, and comments
-(collectively, ``whitespace''), as described below, are ignored except
-as they serve to separate tokens.
+Whitespace\iref{lex.whitespace} is ignored except to separate tokens.
 \begin{note}
 Whitespace can separate otherwise adjacent identifiers, keywords, numeric
 literals, and alternative tokens containing alphabetic characters.
@@ -1790,8 +1809,8 @@
 \begin{bnf}
 \nontermdef{d-char}\br
     \textnormal{any member of the basic character set except:}\br
-    \bnfindent\textnormal{\unicode{0020}{space}, \unicode{0028}{left parenthesis}, \unicode{0029}{right parenthesis}, \unicode{005c}{reverse solidus},}\br
-    \bnfindent\textnormal{\unicode{0009}{character tabulation}, \unicode{000b}{line tabulation}, \unicode{000c}{form feed}, and new-line}
+    \bnfindent\textnormal{a \grammarterm{whitespace-character}, \unicode{0028}{left parenthesis}, \unicode{0029}{right parenthesis},}\br
+    \bnfindent\textnormal{and \unicode{005c}{reverse solidus}}
 \end{bnf}
 
 \pnum