diff --git a/BUILD.bazel b/BUILD.bazel
index 30d53a401..cd509a0d2 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -28,6 +28,7 @@ cc_library(
         "src/pcre2_auto_possess.c",
         "src/pcre2_chkdint.c",
         "src/pcre2_compile.c",
+        "src/pcre2_compile_class.c",
         "src/pcre2_config.c",
         "src/pcre2_context.c",
         "src/pcre2_convert.c",
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f3d6c3c2f..dd0fff469 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -618,6 +618,7 @@ SET(PCRE2_SOURCES
   ${PROJECT_BINARY_DIR}/pcre2_chartables.c
   src/pcre2_chkdint.c
   src/pcre2_compile.c
+  src/pcre2_compile_class.c
   src/pcre2_config.c
   src/pcre2_context.c
   src/pcre2_convert.c
diff --git a/Makefile.am b/Makefile.am
index 1595d00da..466edd8b8 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -374,6 +374,8 @@ COMMON_SOURCES = \
   src/pcre2_auto_possess.c \
   src/pcre2_chkdint.c \
   src/pcre2_compile.c \
+  src/pcre2_compile.h \
+  src/pcre2_compile_class.c \
   src/pcre2_config.c \
   src/pcre2_context.c \
   src/pcre2_convert.c \
diff --git a/NON-AUTOTOOLS-BUILD b/NON-AUTOTOOLS-BUILD
index d7becc974..0026606c1 100644
--- a/NON-AUTOTOOLS-BUILD
+++ b/NON-AUTOTOOLS-BUILD
@@ -105,6 +105,7 @@ example.
        pcre2_chkdint.c
        pcre2_chartables.c
        pcre2_compile.c
+       pcre2_compile_class.c
        pcre2_config.c
        pcre2_context.c
        pcre2_convert.c
diff --git a/PrepareRelease b/PrepareRelease
index be5989de5..dcbb8ca45 100755
--- a/PrepareRelease
+++ b/PrepareRelease
@@ -217,6 +217,8 @@ files="\
   src/pcre2_auto_possess.c \
   src/pcre2_chkdint.c \
   src/pcre2_compile.c \
+  src/pcre2_compile.h \
+  src/pcre2_compile_class.c \
   src/pcre2_config.c \
   src/pcre2_context.c \
   src/pcre2_convert.c \
diff --git a/README b/README
index 47d879d7f..75dcc2809 100644
--- a/README
+++ b/README
@@ -821,37 +821,38 @@ The distribution should contain the files listed below.
                            ASCII coding; unless --enable-rebuild-chartables is
                            specified, used by copying to pcre2_chartables.c
 
-  src/pcre2posix.c         )
-  src/pcre2_auto_possess.c )
-  src/pcre2_chkdint.c      )
-  src/pcre2_compile.c      )
-  src/pcre2_config.c       )
-  src/pcre2_context.c      )
-  src/pcre2_convert.c      )
-  src/pcre2_dfa_match.c    )
-  src/pcre2_error.c        )
-  src/pcre2_extuni.c       )
-  src/pcre2_find_bracket.c )
-  src/pcre2_jit_compile.c  )
-  src/pcre2_jit_match.c    ) sources for the functions in the library,
-  src/pcre2_jit_misc.c     )   and some internal functions that they use
-  src/pcre2_maketables.c   )
-  src/pcre2_match.c        )
-  src/pcre2_match_data.c   )
-  src/pcre2_newline.c      )
-  src/pcre2_ord2utf.c      )
-  src/pcre2_pattern_info.c )
-  src/pcre2_script_run.c   )
-  src/pcre2_serialize.c    )
-  src/pcre2_string_utils.c )
-  src/pcre2_study.c        )
-  src/pcre2_substitute.c   )
-  src/pcre2_substring.c    )
-  src/pcre2_tables.c       )
-  src/pcre2_ucd.c          )
-  src/pcre2_ucptables.c    )
-  src/pcre2_valid_utf.c    )
-  src/pcre2_xclass.c       )
+  src/pcre2posix.c           )
+  src/pcre2_auto_possess.c   )
+  src/pcre2_chkdint.c        )
+  src/pcre2_compile.c        )
+  src/pcre2_compile_class.c  )
+  src/pcre2_config.c         )
+  src/pcre2_context.c        )
+  src/pcre2_convert.c        )
+  src/pcre2_dfa_match.c      )
+  src/pcre2_error.c          )
+  src/pcre2_extuni.c         )
+  src/pcre2_find_bracket.c   )
+  src/pcre2_jit_compile.c    )
+  src/pcre2_jit_match.c      ) sources for the functions in the library,
+  src/pcre2_jit_misc.c       )   and some internal functions that they use
+  src/pcre2_maketables.c     )
+  src/pcre2_match.c          )
+  src/pcre2_match_data.c     )
+  src/pcre2_newline.c        )
+  src/pcre2_ord2utf.c        )
+  src/pcre2_pattern_info.c   )
+  src/pcre2_script_run.c     )
+  src/pcre2_serialize.c      )
+  src/pcre2_string_utils.c   )
+  src/pcre2_study.c          )
+  src/pcre2_substitute.c     )
+  src/pcre2_substring.c      )
+  src/pcre2_tables.c         )
+  src/pcre2_ucd.c            )
+  src/pcre2_ucptables.c      )
+  src/pcre2_valid_utf.c      )
+  src/pcre2_xclass.c         )
 
   src/pcre2_printint.c     debugging function that is used by pcre2test,
   src/pcre2_fuzzsupport.c  function for (optional) fuzzing support
@@ -859,6 +860,7 @@ The distribution should contain the files listed below.
   src/config.h.in          template for config.h, when built by "configure"
   src/pcre2.h.in           template for pcre2.h when built by "configure"
   src/pcre2posix.h         header for the external POSIX wrapper API
+  src/pcre2_compile.h      header for internal use
   src/pcre2_internal.h     header for internal use
   src/pcre2_intmodedep.h   a mode-specific internal header
   src/pcre2_jit_neon_inc.h header used by JIT
diff --git a/build.zig b/build.zig
index 1af8b6b54..7ccc0fc2b 100644
--- a/build.zig
+++ b/build.zig
@@ -48,6 +48,7 @@ pub fn build(b: *std.Build) !void {
             "src/pcre2_auto_possess.c",
             "src/pcre2_chkdint.c",
             "src/pcre2_compile.c",
+            "src/pcre2_compile_class.c",
             "src/pcre2_config.c",
             "src/pcre2_context.c",
             "src/pcre2_convert.c",
diff --git a/doc/html/NON-AUTOTOOLS-BUILD.txt b/doc/html/NON-AUTOTOOLS-BUILD.txt
index d7becc974..0026606c1 100644
--- a/doc/html/NON-AUTOTOOLS-BUILD.txt
+++ b/doc/html/NON-AUTOTOOLS-BUILD.txt
@@ -105,6 +105,7 @@ example.
        pcre2_chkdint.c
        pcre2_chartables.c
        pcre2_compile.c
+       pcre2_compile_class.c
        pcre2_config.c
        pcre2_context.c
        pcre2_convert.c
diff --git a/doc/html/README.txt b/doc/html/README.txt
index 47d879d7f..6a15c1254 100644
--- a/doc/html/README.txt
+++ b/doc/html/README.txt
@@ -821,37 +821,38 @@ The distribution should contain the files listed below.
                            ASCII coding; unless --enable-rebuild-chartables is
                            specified, used by copying to pcre2_chartables.c
 
-  src/pcre2posix.c         )
-  src/pcre2_auto_possess.c )
-  src/pcre2_chkdint.c      )
-  src/pcre2_compile.c      )
-  src/pcre2_config.c       )
-  src/pcre2_context.c      )
-  src/pcre2_convert.c      )
-  src/pcre2_dfa_match.c    )
-  src/pcre2_error.c        )
-  src/pcre2_extuni.c       )
-  src/pcre2_find_bracket.c )
-  src/pcre2_jit_compile.c  )
-  src/pcre2_jit_match.c    ) sources for the functions in the library,
-  src/pcre2_jit_misc.c     )   and some internal functions that they use
-  src/pcre2_maketables.c   )
-  src/pcre2_match.c        )
-  src/pcre2_match_data.c   )
-  src/pcre2_newline.c      )
-  src/pcre2_ord2utf.c      )
-  src/pcre2_pattern_info.c )
-  src/pcre2_script_run.c   )
-  src/pcre2_serialize.c    )
-  src/pcre2_string_utils.c )
-  src/pcre2_study.c        )
-  src/pcre2_substitute.c   )
-  src/pcre2_substring.c    )
-  src/pcre2_tables.c       )
-  src/pcre2_ucd.c          )
-  src/pcre2_ucptables.c    )
-  src/pcre2_valid_utf.c    )
-  src/pcre2_xclass.c       )
+  src/pcre2posix.c          )
+  src/pcre2_auto_possess.c  )
+  src/pcre2_chkdint.c       )
+  src/pcre2_compile.c       )
+  src/pcre2_compile_class.c )
+  src/pcre2_config.c        )
+  src/pcre2_context.c       )
+  src/pcre2_convert.c       )
+  src/pcre2_dfa_match.c     )
+  src/pcre2_error.c         )
+  src/pcre2_extuni.c        )
+  src/pcre2_find_bracket.c  )
+  src/pcre2_jit_compile.c   )
+  src/pcre2_jit_match.c     ) sources for the functions in the library,
+  src/pcre2_jit_misc.c      )   and some internal functions that they use
+  src/pcre2_maketables.c    )
+  src/pcre2_match.c         )
+  src/pcre2_match_data.c    )
+  src/pcre2_newline.c       )
+  src/pcre2_ord2utf.c       )
+  src/pcre2_pattern_info.c  )
+  src/pcre2_script_run.c    )
+  src/pcre2_serialize.c     )
+  src/pcre2_string_utils.c  )
+  src/pcre2_study.c         )
+  src/pcre2_substitute.c    )
+  src/pcre2_substring.c     )
+  src/pcre2_tables.c        )
+  src/pcre2_ucd.c           )
+  src/pcre2_ucptables.c     )
+  src/pcre2_valid_utf.c     )
+  src/pcre2_xclass.c        )
 
   src/pcre2_printint.c     debugging function that is used by pcre2test,
   src/pcre2_fuzzsupport.c  function for (optional) fuzzing support
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index bdf2e8c45..29b251c4e 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -47,7 +47,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #define PSSTART start_pattern  /* Field containing processed string start */
 #define PSEND   end_pattern    /* Field containing processed string end */
 
-#include "pcre2_internal.h"
+#include "pcre2_compile.h"
 
 /* In rare error cases debugging might require calling pcre2_printint(). */
 
@@ -108,20 +108,8 @@ them will be able to (i.e. assume a 64-bit world). */
 #define SIZEOFFSET 2
 #endif
 
-/* Macros for manipulating elements of the parsed pattern vector. */
-
-#define META_CODE(x)   (x & 0xffff0000u)
-#define META_DATA(x)   (x & 0x0000ffffu)
-#define META_DIFF(x,y) ((x-y)>>16)
-
 /* Function definitions to allow mutual recursion */
 
-#ifdef SUPPORT_UNICODE
-static unsigned int
-  add_list_to_class_internal(uint8_t *, PCRE2_UCHAR **, uint32_t, uint32_t,
-    compile_block *, const uint32_t *, unsigned int);
-#endif
-
 static int
   compile_regex(uint32_t, uint32_t, PCRE2_UCHAR **, uint32_t **, int *,
     uint32_t, uint32_t *, uint32_t *, uint32_t *, uint32_t *, branch_chain *,
@@ -199,110 +187,6 @@ don't have to check them every time. */
 
 #define OFLOW_MAX (INT_MAX - 20)
 
-/* Code values for parsed patterns, which are stored in a vector of 32-bit
-unsigned ints. Values less than META_END are literal data values. The coding
-for identifying the item is in the top 16-bits, leaving 16 bits for the
-additional data that some of them need. The META_CODE, META_DATA, and META_DIFF
-macros are used to manipulate parsed pattern elements.
-
-NOTE: When these definitions are changed, the table of extra lengths for each
-code (meta_extra_lengths, just below) must be updated to remain in step. */
-
-#define META_END              0x80000000u  /* End of pattern */
-
-#define META_ALT              0x80010000u  /* alternation */
-#define META_ATOMIC           0x80020000u  /* atomic group */
-#define META_BACKREF          0x80030000u  /* Back ref */
-#define META_BACKREF_BYNAME   0x80040000u  /* \k'name' */
-#define META_BIGVALUE         0x80050000u  /* Next is a literal > META_END */
-#define META_CALLOUT_NUMBER   0x80060000u  /* (?C with numerical argument */
-#define META_CALLOUT_STRING   0x80070000u  /* (?C with string argument */
-#define META_CAPTURE          0x80080000u  /* Capturing parenthesis */
-#define META_CIRCUMFLEX       0x80090000u  /* ^ metacharacter */
-#define META_CLASS            0x800a0000u  /* start non-empty class */
-#define META_CLASS_EMPTY      0x800b0000u  /* empty class */
-#define META_CLASS_EMPTY_NOT  0x800c0000u  /* negative empty class */
-#define META_CLASS_END        0x800d0000u  /* end of non-empty class */
-#define META_CLASS_NOT        0x800e0000u  /* start non-empty negative class */
-#define META_COND_ASSERT      0x800f0000u  /* (?(?assertion)... */
-#define META_COND_DEFINE      0x80100000u  /* (?(DEFINE)... */
-#define META_COND_NAME        0x80110000u  /* (?(<name>)... */
-#define META_COND_NUMBER      0x80120000u  /* (?(digits)... */
-#define META_COND_RNAME       0x80130000u  /* (?(R&name)... */
-#define META_COND_RNUMBER     0x80140000u  /* (?(Rdigits)... */
-#define META_COND_VERSION     0x80150000u  /* (?(VERSION<op>x.y)... */
-#define META_SCS_NAME         0x80160000u  /* (*scan_substring:(<name>)... */
-#define META_SCS_NUMBER       0x80170000u  /* (*scan_substring:(digits)... */
-#define META_SCS_NEXT_NAME    0x80180000u  /* Next <name> of scan_substring */
-#define META_SCS_NEXT_NUMBER  0x80190000u  /* Next digits of scan_substring */
-#define META_DOLLAR           0x801a0000u  /* $ metacharacter */
-#define META_DOT              0x801b0000u  /* . metacharacter */
-#define META_ESCAPE           0x801c0000u  /* \d and friends */
-#define META_KET              0x801d0000u  /* closing parenthesis */
-#define META_NOCAPTURE        0x801e0000u  /* no capture parens */
-#define META_OPTIONS          0x801f0000u  /* (?i) and friends */
-#define META_POSIX            0x80200000u  /* POSIX class item */
-#define META_POSIX_NEG        0x80210000u  /* negative POSIX class item */
-#define META_RANGE_ESCAPED    0x80220000u  /* range with at least one escape */
-#define META_RANGE_LITERAL    0x80230000u  /* range defined literally */
-#define META_RECURSE          0x80240000u  /* Recursion */
-#define META_RECURSE_BYNAME   0x80250000u  /* (?&name) */
-#define META_SCRIPT_RUN       0x80260000u  /* (*script_run:...) */
-
-/* These must be kept together to make it easy to check that an assertion
-is present where expected in a conditional group. */
-
-#define META_LOOKAHEAD        0x80270000u  /* (?= */
-#define META_LOOKAHEADNOT     0x80280000u  /* (?! */
-#define META_LOOKBEHIND       0x80290000u  /* (?<= */
-#define META_LOOKBEHINDNOT    0x802a0000u  /* (?<! */
-
-/* These cannot be conditions */
-
-#define META_LOOKAHEAD_NA     0x802b0000u  /* (*napla: */
-#define META_LOOKBEHIND_NA    0x802c0000u  /* (*naplb: */
-
-/* These must be kept in this order, with consecutive values, and the _ARG
-versions of COMMIT, PRUNE, SKIP, and THEN immediately after their non-argument
-versions. */
-
-#define META_MARK             0x802d0000u  /* (*MARK) */
-#define META_ACCEPT           0x802e0000u  /* (*ACCEPT) */
-#define META_FAIL             0x802f0000u  /* (*FAIL) */
-#define META_COMMIT           0x80300000u  /* These               */
-#define META_COMMIT_ARG       0x80310000u  /*   pairs             */
-#define META_PRUNE            0x80320000u  /*     must            */
-#define META_PRUNE_ARG        0x80330000u  /*       be            */
-#define META_SKIP             0x80340000u  /*         kept        */
-#define META_SKIP_ARG         0x80350000u  /*           in        */
-#define META_THEN             0x80360000u  /*             this    */
-#define META_THEN_ARG         0x80370000u  /*               order */
-
-/* These must be kept in groups of adjacent 3 values, and all together. */
-
-#define META_ASTERISK         0x80380000u  /* *  */
-#define META_ASTERISK_PLUS    0x80390000u  /* *+ */
-#define META_ASTERISK_QUERY   0x803a0000u  /* *? */
-#define META_PLUS             0x803b0000u  /* +  */
-#define META_PLUS_PLUS        0x803c0000u  /* ++ */
-#define META_PLUS_QUERY       0x803d0000u  /* +? */
-#define META_QUERY            0x803e0000u  /* ?  */
-#define META_QUERY_PLUS       0x803f0000u  /* ?+ */
-#define META_QUERY_QUERY      0x80400000u  /* ?? */
-#define META_MINMAX           0x80410000u  /* {n,m}  repeat */
-#define META_MINMAX_PLUS      0x80420000u  /* {n,m}+ repeat */
-#define META_MINMAX_QUERY     0x80430000u  /* {n,m}? repeat */
-
-#define META_FIRST_QUANTIFIER META_ASTERISK
-#define META_LAST_QUANTIFIER  META_MINMAX_QUERY
-
-/* This is a special "meta code" that is used only to distinguish (*asr: from
-(*sr: in the table of alphabetic assertions. It is never stored in the parsed
-pattern because (*asr: is turned into (*sr:(*atomic: at that stage. There is
-therefore no need for it to have a length entry, so use a high value. */
-
-#define META_ATOMIC_SCRIPT_RUN 0x8fff0000u
-
 /* Table of extra lengths for each of the meta codes. Must be kept in step with
 the definitions above. For some items these values are a basic length to which
 a variable amount has to be added. */
@@ -803,26 +687,6 @@ are allowed. */
     PCRE2_EXTRA_ASCII_BSS|PCRE2_EXTRA_ASCII_BSW|PCRE2_EXTRA_ASCII_POSIX| \
     PCRE2_EXTRA_ASCII_DIGIT|PCRE2_EXTRA_PYTHON_OCTAL|PCRE2_EXTRA_NO_BS0)
 
-/* Compile time error code numbers. They are given names so that they can more
-easily be tracked. When a new number is added, the tables called eint1 and
-eint2 in pcre2posix.c may need to be updated, and a new error text must be
-added to compile_error_texts in pcre2_error.c. Also, the error codes in
-pcre2.h.in must be updated - their values are exactly 100 greater than these
-values. */
-
-enum { ERR0 = COMPILE_ERROR_BASE,
-       ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,  ERR10,
-       ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20,
-       ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29, ERR30,
-       ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40,
-       ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50,
-       ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
-       ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
-       ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
-       ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90,
-       ERR91, ERR92, ERR93, ERR94, ERR95, ERR96, ERR97, ERR98, ERR99, ERR100,
-       ERR101 };
-
 /* This is a table of start-of-pattern options such as (*UTF) and settings such
 as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
 compatibility, (*UTFn) is supported in the relevant libraries, but (*UTF) is
@@ -5382,124 +5246,6 @@ for (;;)
 
 
 
-#ifdef SUPPORT_UNICODE
-/*************************************************
-*           Get othercase range                  *
-*************************************************/
-
-/* This function is passed the start and end of a class range in UCP mode. For
-single characters the range may be just one character long. The function
-searches up the characters, looking for ranges of characters in the "other"
-case. Each call returns the next one, updating the start address. A character
-with multiple other cases is returned on its own with a special return value.
-
-Arguments:
-  cptr        points to starting character value; updated
-  d           end value
-  ocptr       where to put start of othercase range
-  odptr       where to put end of othercase range
-  restricted  TRUE if caseless restriction applies
-
-Yield:        -1 when no more
-               0 when a range is returned
-              >0 the CASESET offset for char with multiple other cases;
-                 for this return, *ocptr contains the original
-*/
-
-static int
-get_othercase_range(uint32_t *cptr, uint32_t d, uint32_t *ocptr,
-  uint32_t *odptr, BOOL restricted)
-{
-uint32_t c, othercase, next;
-unsigned int co;
-
-/* Find the first character that has an other case. If it has multiple other
-cases, return its case offset value. When CASELESS_RESTRICT is set, ignore the
-multi-case entries that begin with ASCII values. In 32-bit mode, a value
-greater than the Unicode maximum ends the range. */
-
-for (c = *cptr; c <= d; c++)
-  {
-#if PCRE2_CODE_UNIT_WIDTH == 32
-  if (c > MAX_UTF_CODE_POINT) return -1;
-#endif
-  if ((co = UCD_CASESET(c)) != 0 &&
-      (!restricted || PRIV(ucd_caseless_sets)[co] > 127))
-    {
-    *ocptr = c++;   /* Character that has the set */
-    *cptr = c;      /* Rest of input range */
-    return (int)co;
-    }
-
-   /* This is not a valid multiple-case character. Check that the single other
-   case is different to the original. We don't need to check "restricted" here
-   because the non-ASCII characters with multiple cases that include an ASCII
-   character don't have a different "othercase". */
-
-  if ((othercase = UCD_OTHERCASE(c)) != c) break;
-  }
-
-if (c > d) return -1;  /* Reached end of range */
-
-/* Found a character that has a single other case. Search for the end of the
-range, which is either the end of the input range, or a character that has zero
-or more than one other cases. */
-
-*ocptr = othercase;
-next = othercase + 1;
-
-for (++c; c <= d; c++)
-  {
-  if ((co = UCD_CASESET(c)) != 0 || UCD_OTHERCASE(c) != next) break;
-  next++;
-  }
-
-*odptr = next - 1;     /* End of othercase range */
-*cptr = c;             /* Rest of input range */
-return 0;
-}
-
-
-
-/*************************************************
-*             Get nocase ranges                  *
-*************************************************/
-
-/* This function returns the next nocase range after a character
-using binary search. The character might be included in the range.
-
-Arguments:
-  c           current character
-
-Yield:        range (start/end pair)
-*/
-
-static const uint32_t*
-get_nocase_range(uint32_t c)
-{
-uint32_t left = 0;
-uint32_t right = PRIV(ucd_nocase_ranges_size);
-uint32_t middle;
-
-if (c > MAX_UTF_CODE_POINT) return PRIV(ucd_nocase_ranges) + right;
-
-while (TRUE)
-  {
-  /* Range end of the middle element. */
-  middle = ((left + right) >> 1) | 0x1;
-
-  if (PRIV(ucd_nocase_ranges)[middle] <= c)
-    left = middle + 1;
-  else if (middle > 1 && PRIV(ucd_nocase_ranges)[middle - 2] > c)
-    right = middle - 1;
-  else
-    return PRIV(ucd_nocase_ranges) + (middle - 1);
-  }
-}
-#endif  /* SUPPORT_UNICODE */
-
-
-
 /*************************************************
 * Add a character or range to a class (internal) *
 *************************************************/
@@ -5526,8 +5272,7 @@ Returns:        the number of < 256 characters added
 
 static unsigned int
 add_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr,
-  uint32_t options, uint32_t xoptions, compile_block *cb, uint32_t start,
-  uint32_t end)
+  uint32_t options, compile_block *cb, uint32_t start, uint32_t end)
 {
 uint32_t c;
 uint32_t classbits_end = (end <= 0xff ? end : 0xff);
@@ -5540,86 +5285,15 @@ restriction is in force). Sometimes we can just extend the original range. */
 
 if ((options & PCRE2_CASELESS) != 0)
   {
-#ifdef SUPPORT_UNICODE
-  if ((options & (PCRE2_UTF|PCRE2_UCP)) != 0)
-    {
-    int rc;
-    uint32_t oc, od, skip_start;
-    const uint32_t *skip_range;
-
-    options &= ~PCRE2_CASELESS;   /* Remove for recursive calls */
-    c = start;
-    skip_range = get_nocase_range(c);
-    skip_start = skip_range[0];
-    if (c > skip_start)
-      {
-      c = skip_range[1];
-      skip_range += 2;
-      skip_start = skip_range[0];
-      }
-
-    while ((rc = get_othercase_range(&c, end, &oc, &od,
-             (xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)) >= 0)
+#ifndef SUPPORT_UNICODE
+  if ((options & (PCRE2_UTF|PCRE2_UCP)) == 0)
+#endif  /* SUPPORT_UNICODE */
+    /* Not UTF mode */
+    for (c = start; c <= classbits_end; c++)
       {
-      if (c > skip_start)
-        {
-        if (c < skip_range[1])
-          {
-          c = skip_range[1];
-          skip_range += 2;
-          skip_start = skip_range[0];
-          }
-        else
-          {
-          skip_range = get_nocase_range(c);
-          skip_start = skip_range[0];
-
-          if (c > skip_start)
-            {
-            c = skip_range[1];
-            skip_range += 2;
-            skip_start = skip_range[0];
-            }
-          }
-        }
-
-      /* Handle a single character that has more than one other case. */
-
-      if (rc > 0) n8 += add_list_to_class_internal(classbits, uchardptr,
-        options, xoptions, cb, PRIV(ucd_caseless_sets) + rc, oc);
-
-      /* Do nothing if the other case range is within the original range. */
-
-      else if (oc >= cb->class_range_start && od <= cb->class_range_end)
-        continue;
-
-      /* Extend the original range if there is overlap, noting that if oc < c,
-      we can't have od > end because a subrange is always shorter than the
-      basic range. Otherwise, use a recursive call to add the additional range.
-      */
-
-      else if (oc < start && od >= start - 1) start = oc; /* Extend downwards */
-      else if (od > end && oc <= end + 1)
-        {
-        end = od;       /* Extend upwards */
-        if (end > classbits_end) classbits_end = (end <= 0xff ? end : 0xff);
-        }
-      else n8 += add_to_class_internal(classbits, uchardptr, options, xoptions,
-        cb, oc, od);
+      SETBIT(classbits, cb->fcc[c]);
+      n8++;
       }
-    }
-  else
-#else
-  (void)xoptions;   /* Avoid compiler warning */
-#endif  /* SUPPORT_UNICODE */
-
-  /* Not UTF mode */
-
-  for (c = start; c <= classbits_end; c++)
-    {
-    SETBIT(classbits, cb->fcc[c]);
-    n8++;
-    }
   }
 
 /* Now handle the originally supplied range. Adjust the final value according
@@ -5694,53 +5368,6 @@ return n8;    /* Number of 8-bit characters */
 
 
 
-#ifdef SUPPORT_UNICODE
-/*************************************************
-* Add a list of characters to a class (internal) *
-*************************************************/
-
-/* This function is used for adding a list of case-equivalent characters to a
-class when in UTF mode. This function is called only from within
-add_to_class_internal(), with which it is mutually recursive.
-
-Arguments:
-  classbits     the bit map for characters < 256
-  uchardptr     points to the pointer for extra data
-  options       the options bits
-  xoptions      the extra options bits
-  cb            contains pointers to tables etc.
-  p             points to row of 32-bit values, terminated by NOTACHAR
-  except        character to omit; this is used when adding lists of
-                  case-equivalent characters to avoid including the one we
-                  already know about
-
-Returns:        the number of < 256 characters added
-                the pointer to extra data is updated
-*/
-
-static unsigned int
-add_list_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr,
-  uint32_t options, uint32_t xoptions, compile_block *cb, const uint32_t *p,
-  unsigned int except)
-{
-unsigned int n8 = 0;
-while (p[0] < NOTACHAR)
-  {
-  unsigned int n = 0;
-  if (p[0] != except)
-    {
-    while(p[n+1] == p[0] + n + 1) n++;
-    n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb,
-      p[0], p[n]);
-    }
-  p += n + 1;
-  }
-return n8;
-}
-#endif
-
-
-
 /*************************************************
 *   External entry point for add range to class  *
 *************************************************/
@@ -5763,12 +5390,11 @@ Returns:        the number of < 256 characters added
 
 static unsigned int
 add_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options,
-  uint32_t xoptions, compile_block *cb, uint32_t start, uint32_t end)
+  compile_block *cb, uint32_t start, uint32_t end)
 {
 cb->class_range_start = start;
 cb->class_range_end = end;
-return add_to_class_internal(classbits, uchardptr, options, xoptions, cb,
-  start, end);
+return add_to_class_internal(classbits, uchardptr, options, cb, start, end);
 }
 
 
@@ -5798,8 +5424,8 @@ Returns:        the number of < 256 characters added
 */
 
 static unsigned int
-add_list_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options,
-  uint32_t xoptions, compile_block *cb, const uint32_t *p, unsigned int except)
+add_list_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr,
+  uint32_t options, compile_block *cb, const uint32_t *p, unsigned int except)
 {
 unsigned int n8 = 0;
 while (p[0] < NOTACHAR)
@@ -5810,8 +5436,7 @@ while (p[0] < NOTACHAR)
     while(p[n+1] == p[0] + n + 1) n++;
     cb->class_range_start = p[0];
     cb->class_range_end = p[n];
-    n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb,
-      p[0], p[n]);
+    n8 += add_to_class_internal(classbits, uchardptr, options, cb, p[0], p[n]);
     }
   p += n + 1;
   }
@@ -5841,16 +5466,16 @@ Returns:        the number of < 256 characters added
 
 static unsigned int
 add_not_list_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr,
-  uint32_t options, uint32_t xoptions, compile_block *cb, const uint32_t *p)
+  uint32_t options, compile_block *cb, const uint32_t *p)
 {
 BOOL utf = (options & PCRE2_UTF) != 0;
 unsigned int n8 = 0;
 if (p[0] > 0)
-  n8 += add_to_class(classbits, uchardptr, options, xoptions, cb, 0, p[0] - 1);
+  n8 += add_to_class(classbits, uchardptr, options, cb, 0, p[0] - 1);
 while (p[0] < NOTACHAR)
   {
   while (p[1] == p[0] + 1) p++;
-  n8 += add_to_class(classbits, uchardptr, options, xoptions, cb, p[0] + 1,
+  n8 += add_to_class(classbits, uchardptr, options, cb, p[0] + 1,
     (p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1);
   p++;
   }
@@ -6019,6 +5644,8 @@ PCRE2_UCHAR *class_uchardata;
 #ifdef SUPPORT_WIDE_CHARS
 BOOL xclass;
 PCRE2_UCHAR *class_uchardata_base;
+uint32_t* class_ranges;
+size_t class_ranges_size;
 #endif
 
 /* Set up the default and non-default settings for greediness */
@@ -6331,6 +5958,22 @@ for (;; pptr++)
     might match. */
 
 #ifdef SUPPORT_WIDE_CHARS
+#if PCRE2_CODE_UNIT_WIDTH == 8
+    class_ranges = NULL;
+    class_ranges_size = 0;
+
+    if (utf)
+#endif
+      {
+      class_ranges = PRIV(optimize_class)(pptr, options, &class_ranges_size, cb);
+
+      if (class_ranges == NULL && class_ranges_size != 0)
+        {
+        *errorcodeptr = ERR21;
+        return 0;
+        }
+      }
+
     xclass = FALSE;
     class_uchardata = code + LINK_SIZE + 2;   /* For XCLASS items */
     class_uchardata_base = class_uchardata;   /* Save the start */
@@ -6545,24 +6188,24 @@ for (;; pptr++)
 
           case ESC_h:
           (void)add_list_to_class(classbits, &class_uchardata,
-            options & ~PCRE2_CASELESS, xoptions, cb, PRIV(hspace_list),
+            options & ~PCRE2_CASELESS, cb, PRIV(hspace_list),
               NOTACHAR);
           break;
 
           case ESC_H:
           (void)add_not_list_to_class(classbits, &class_uchardata,
-            options & ~PCRE2_CASELESS, xoptions, cb, PRIV(hspace_list));
+            options & ~PCRE2_CASELESS, cb, PRIV(hspace_list));
           break;
 
           case ESC_v:
           (void)add_list_to_class(classbits, &class_uchardata,
-            options & ~PCRE2_CASELESS, xoptions, cb, PRIV(vspace_list),
+            options & ~PCRE2_CASELESS, cb, PRIV(vspace_list),
               NOTACHAR);
           break;
 
           case ESC_V:
           (void)add_not_list_to_class(classbits, &class_uchardata,
-            options & ~PCRE2_CASELESS, xoptions, cb, PRIV(vspace_list));
+            options & ~PCRE2_CASELESS, cb, PRIV(vspace_list));
           break;
 
           /* If Unicode is not supported, \P and \p are not allowed and are
@@ -6630,6 +6273,11 @@ for (;; pptr++)
 
           if (d == CHAR_CR || d == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF;
 
+#ifdef SUPPORT_WIDE_CHARS
+          /* Character ranges are ignored when class_ranges is present. */
+          if (class_ranges != NULL) continue;
+#endif
+
           /* In an EBCDIC environment, Perl treats alphabetic ranges specially
           because there are holes in the encoding, and simply using the range
           A-Z (for example) would include the characters in the holes. This
@@ -6648,7 +6296,7 @@ for (;; pptr++)
             if (C <= CHAR_i)
               {
               class_has_8bitchar +=
-                add_to_class(classbits, &class_uchardata, options, xoptions,
+                add_to_class(classbits, &class_uchardata, options,
                   cb, C + uc, ((D < CHAR_i)? D : CHAR_i) + uc);
               C = CHAR_j;
               }
@@ -6656,7 +6304,7 @@ for (;; pptr++)
             if (C <= D && C <= CHAR_r)
               {
               class_has_8bitchar +=
-                add_to_class(classbits, &class_uchardata, options, xoptions,
+                add_to_class(classbits, &class_uchardata, options,
                   cb, C + uc, ((D < CHAR_r)? D : CHAR_r) + uc);
               C = CHAR_s;
               }
@@ -6664,7 +6312,7 @@ for (;; pptr++)
             if (C <= D)
               {
               class_has_8bitchar +=
-                add_to_class(classbits, &class_uchardata, options, xoptions,
+                add_to_class(classbits, &class_uchardata, options,
                   cb, C + uc, D + uc);
               }
             }
@@ -6673,16 +6321,19 @@ for (;; pptr++)
           /* Not an EBCDIC special range */
 
           class_has_8bitchar += add_to_class(classbits, &class_uchardata,
-            options, xoptions, cb, c, d);
+            options, cb, c, d);
           goto CONTINUE_CLASS;   /* Go get the next char in the class */
           }  /* End of range handling */
 
+#ifdef SUPPORT_WIDE_CHARS
+        /* Character ranges are ignored when class_ranges is present. */
+        if (class_ranges != NULL) continue;
+#endif
 
         /* Handle a single character. */
 
         class_has_8bitchar +=
-          add_to_class(classbits, &class_uchardata, options, xoptions, cb,
-            meta, meta);
+          add_to_class(classbits, &class_uchardata, options, cb, meta, meta);
         }
 
       /* Continue to the next item in the class. */
@@ -6710,6 +6361,36 @@ for (;; pptr++)
       continue;  /* Needed to avoid error when not supporting wide chars */
       }   /* End of main class-processing loop */
 
+#ifdef SUPPORT_WIDE_CHARS
+    if (class_ranges != NULL)
+      {
+      uint32_t *range = class_ranges;
+      uint32_t *end = class_ranges + class_ranges_size;
+
+      do
+        {
+        class_has_8bitchar +=
+          add_to_class(classbits, &class_uchardata, options, cb,
+            range[0], range[1]);
+
+        if (class_uchardata > class_uchardata_base)
+          {
+          xclass = TRUE;
+          if (lengthptr != NULL)
+            {
+            *lengthptr += class_uchardata - class_uchardata_base;
+            class_uchardata = class_uchardata_base;
+            }
+          }
+
+        range += 2;
+        }
+      while (range < end);
+
+      cb->cx->memctl.free(class_ranges, cb->cx->memctl.memory_data);
+      }
+#endif
+
     /* If this class is the first thing in the branch, there can be no first
     char setting, whatever the repeat count. Any reqcu setting must remain
     unchanged after any kind of repeat. */
diff --git a/src/pcre2_compile.h b/src/pcre2_compile.h
new file mode 100644
index 000000000..3fff760bc
--- /dev/null
+++ b/src/pcre2_compile.h
@@ -0,0 +1,183 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE2 is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2024 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+#ifndef PCRE2_COMPILE_H_IDEMPOTENT_GUARD
+#define PCRE2_COMPILE_H_IDEMPOTENT_GUARD
+
+#include "pcre2_internal.h"
+
+/* Compile time error code numbers. They are given names so that they can more
+easily be tracked. When a new number is added, the tables called eint1 and
+eint2 in pcre2posix.c may need to be updated, and a new error text must be
+added to compile_error_texts in pcre2_error.c. Also, the error codes in
+pcre2.h.in must be updated - their values are exactly 100 greater than these
+values. */
+
+enum { ERR0 = COMPILE_ERROR_BASE,
+       ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,  ERR10,
+       ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20,
+       ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29, ERR30,
+       ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40,
+       ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50,
+       ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
+       ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
+       ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
+       ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90,
+       ERR91, ERR92, ERR93, ERR94, ERR95, ERR96, ERR97, ERR98, ERR99, ERR100,
+       ERR101 };
+
+/* Code values for parsed patterns, which are stored in a vector of 32-bit
+unsigned ints. Values less than META_END are literal data values. The coding
+for identifying the item is in the top 16-bits, leaving 16 bits for the
+additional data that some of them need. The META_CODE, META_DATA, and META_DIFF
+macros are used to manipulate parsed pattern elements.
+
+NOTE: When these definitions are changed, the table of extra lengths for each
+code (meta_extra_lengths, just below) must be updated to remain in step. */
+
+#define META_END              0x80000000u  /* End of pattern */
+
+#define META_ALT              0x80010000u  /* alternation */
+#define META_ATOMIC           0x80020000u  /* atomic group */
+#define META_BACKREF          0x80030000u  /* Back ref */
+#define META_BACKREF_BYNAME   0x80040000u  /* \k'name' */
+#define META_BIGVALUE         0x80050000u  /* Next is a literal > META_END */
+#define META_CALLOUT_NUMBER   0x80060000u  /* (?C with numerical argument */
+#define META_CALLOUT_STRING   0x80070000u  /* (?C with string argument */
+#define META_CAPTURE          0x80080000u  /* Capturing parenthesis */
+#define META_CIRCUMFLEX       0x80090000u  /* ^ metacharacter */
+#define META_CLASS            0x800a0000u  /* start non-empty class */
+#define META_CLASS_EMPTY      0x800b0000u  /* empty class */
+#define META_CLASS_EMPTY_NOT  0x800c0000u  /* negative empty class */
+#define META_CLASS_END        0x800d0000u  /* end of non-empty class */
+#define META_CLASS_NOT        0x800e0000u  /* start non-empty negative class */
+#define META_COND_ASSERT      0x800f0000u  /* (?(?assertion)... */
+#define META_COND_DEFINE      0x80100000u  /* (?(DEFINE)... */
+#define META_COND_NAME        0x80110000u  /* (?(<name>)... */
+#define META_COND_NUMBER      0x80120000u  /* (?(digits)... */
+#define META_COND_RNAME       0x80130000u  /* (?(R&name)... */
+#define META_COND_RNUMBER     0x80140000u  /* (?(Rdigits)... */
+#define META_COND_VERSION     0x80150000u  /* (?(VERSION<op>x.y)... */
+#define META_SCS_NAME         0x80160000u  /* (*scan_substring:(<name>)... */
+#define META_SCS_NUMBER       0x80170000u  /* (*scan_substring:(digits)... */
+#define META_SCS_NEXT_NAME    0x80180000u  /* Next <name> of scan_substring */
+#define META_SCS_NEXT_NUMBER  0x80190000u  /* Next digits of scan_substring */
+#define META_DOLLAR           0x801a0000u  /* $ metacharacter */
+#define META_DOT              0x801b0000u  /* . metacharacter */
+#define META_ESCAPE           0x801c0000u  /* \d and friends */
+#define META_KET              0x801d0000u  /* closing parenthesis */
+#define META_NOCAPTURE        0x801e0000u  /* no capture parens */
+#define META_OPTIONS          0x801f0000u  /* (?i) and friends */
+#define META_POSIX            0x80200000u  /* POSIX class item */
+#define META_POSIX_NEG        0x80210000u  /* negative POSIX class item */
+#define META_RANGE_ESCAPED    0x80220000u  /* range with at least one escape */
+#define META_RANGE_LITERAL    0x80230000u  /* range defined literally */
+#define META_RECURSE          0x80240000u  /* Recursion */
+#define META_RECURSE_BYNAME   0x80250000u  /* (?&name) */
+#define META_SCRIPT_RUN       0x80260000u  /* (*script_run:...) */
+
+/* These must be kept together to make it easy to check that an assertion
+is present where expected in a conditional group. */
+
+#define META_LOOKAHEAD        0x80270000u  /* (?= */
+#define META_LOOKAHEADNOT     0x80280000u  /* (?! */
+#define META_LOOKBEHIND       0x80290000u  /* (?<= */
+#define META_LOOKBEHINDNOT    0x802a0000u  /* (?<! */
+
+/* These cannot be conditions */
+
+#define META_LOOKAHEAD_NA     0x802b0000u  /* (*napla: */
+#define META_LOOKBEHIND_NA    0x802c0000u  /* (*naplb: */
+
+/* These must be kept in this order, with consecutive values, and the _ARG
+versions of COMMIT, PRUNE, SKIP, and THEN immediately after their non-argument
+versions. */
+
+#define META_MARK             0x802d0000u  /* (*MARK) */
+#define META_ACCEPT           0x802e0000u  /* (*ACCEPT) */
+#define META_FAIL             0x802f0000u  /* (*FAIL) */
+#define META_COMMIT           0x80300000u  /* These               */
+#define META_COMMIT_ARG       0x80310000u  /*   pairs             */
+#define META_PRUNE            0x80320000u  /*     must            */
+#define META_PRUNE_ARG        0x80330000u  /*       be            */
+#define META_SKIP             0x80340000u  /*         kept        */
+#define META_SKIP_ARG         0x80350000u  /*           in        */
+#define META_THEN             0x80360000u  /*             this    */
+#define META_THEN_ARG         0x80370000u  /*               order */
+
+/* These must be kept in groups of adjacent 3 values, and all together. */
+
+#define META_ASTERISK         0x80380000u  /* *  */
+#define META_ASTERISK_PLUS    0x80390000u  /* *+ */
+#define META_ASTERISK_QUERY   0x803a0000u  /* *? */
+#define META_PLUS             0x803b0000u  /* +  */
+#define META_PLUS_PLUS        0x803c0000u  /* ++ */
+#define META_PLUS_QUERY       0x803d0000u  /* +? */
+#define META_QUERY            0x803e0000u  /* ?  */
+#define META_QUERY_PLUS       0x803f0000u  /* ?+ */
+#define META_QUERY_QUERY      0x80400000u  /* ?? */
+#define META_MINMAX           0x80410000u  /* {n,m}  repeat */
+#define META_MINMAX_PLUS      0x80420000u  /* {n,m}+ repeat */
+#define META_MINMAX_QUERY     0x80430000u  /* {n,m}? repeat */
+
+#define META_FIRST_QUANTIFIER META_ASTERISK
+#define META_LAST_QUANTIFIER  META_MINMAX_QUERY
+
+/* This is a special "meta code" that is used only to distinguish (*asr: from
+(*sr: in the table of alphabetic assertions. It is never stored in the parsed
+pattern because (*asr: is turned into (*sr:(*atomic: at that stage. There is
+therefore no need for it to have a length entry, so use a high value. */
+
+#define META_ATOMIC_SCRIPT_RUN 0x8fff0000u
+
+/* Macros for manipulating elements of the parsed pattern vector. */
+
+#define META_CODE(x)   (x & 0xffff0000u)
+#define META_DATA(x)   (x & 0x0000ffffu)
+#define META_DIFF(x,y) ((x-y)>>16)
+
+/* Merge intersecting ranges of classes. */
+
+uint32_t *PRIV(optimize_class)(uint32_t *start_ptr, uint32_t options,
+  size_t *buffer_size, compile_block* cb);
+
+#endif  /* PCRE2_COMPILE_H_IDEMPOTENT_GUARD */
+
+/* End of pcre2_compile.h */
diff --git a/src/pcre2_compile_class.c b/src/pcre2_compile_class.c
new file mode 100644
index 000000000..bbeae23fe
--- /dev/null
+++ b/src/pcre2_compile_class.c
@@ -0,0 +1,355 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2024 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "pcre2_compile.h"
+
+#ifdef SUPPORT_WIDE_CHARS
+
+/* Heapsort algorithm. */
+
+static void do_heapify(uint32_t *buffer, size_t size, size_t i)
+{
+size_t max;
+size_t left;
+size_t right;
+uint32_t tmp1, tmp2;
+
+while (TRUE)
+  {
+  max = i;
+  left = (i << 1) + 2;
+  right = left + 2;
+
+  if (left < size && buffer[left] > buffer[max]) max = left;
+  if (right < size && buffer[right] > buffer[max]) max = right;
+  if (i == max) return;
+
+  /* Swap items. */
+  tmp1 = buffer[i];
+  tmp2 = buffer[i + 1];
+  buffer[i] = buffer[max];
+  buffer[i + 1] = buffer[max + 1];
+  buffer[max] = tmp1;
+  buffer[max + 1] = tmp2;
+  i = max;
+  }
+}
+
+#ifdef SUPPORT_UNICODE
+
+#define PARSE_CLASS_CASELESS_UTF      0x1
+#define PARSE_CLASS_RESTRICTED_UTF    0x2
+
+static const uint32_t*
+get_nocase_range(uint32_t c)
+{
+uint32_t left = 0;
+uint32_t right = PRIV(ucd_nocase_ranges_size);
+uint32_t middle;
+
+if (c > MAX_UTF_CODE_POINT) return PRIV(ucd_nocase_ranges) + right;
+
+while (TRUE)
+  {
+  /* Range end of the middle element. */
+  middle = ((left + right) >> 1) | 0x1;
+
+  if (PRIV(ucd_nocase_ranges)[middle] <= c)
+    left = middle + 1;
+  else if (middle > 1 && PRIV(ucd_nocase_ranges)[middle - 2] > c)
+    right = middle - 1;
+  else
+    return PRIV(ucd_nocase_ranges) + (middle - 1);
+  }
+}
+
+static size_t
+utf_caseless_extend(uint32_t start, uint32_t end, uint32_t options,
+  uint32_t *buffer)
+{
+uint32_t new_start = start;
+uint32_t new_end = end;
+uint32_t c = start;
+const uint32_t *list;
+uint32_t tmp[3];
+size_t result = 2;
+const uint32_t *skip_range = get_nocase_range(c);
+uint32_t skip_start = skip_range[0];
+
+#if PCRE2_CODE_UNIT_WIDTH == 32
+if (end > MAX_UTF_CODE_POINT) end = MAX_UTF_CODE_POINT;
+#endif
+
+while (c <= end)
+  {
+  if (c > skip_start)
+    {
+    c = skip_range[1];
+    skip_range += 2;
+    skip_start = skip_range[0];
+    continue;
+    }
+
+  /* Compute caseless set. */
+  uint32_t co = UCD_CASESET(c);
+
+  if (co != 0 && (!(options & PARSE_CLASS_RESTRICTED_UTF)
+                  || PRIV(ucd_caseless_sets)[co] > 127))
+    list = PRIV(ucd_caseless_sets) + co;
+  else
+    {
+    co = UCD_OTHERCASE(c);
+    list = tmp;
+    tmp[0] = c;
+    tmp[1] = NOTACHAR;
+
+    if (co != c)
+      {
+      tmp[1] = co;
+      tmp[2] = NOTACHAR;
+      }
+    }
+  c++;
+
+  /* Add characters. */
+  do
+    {
+    if (*list < new_start)
+      {
+      if (*list + 1 == new_start)
+        {
+        new_start--;
+        continue;
+        }
+      }
+    else if (*list > new_end)
+      {
+      if (*list - 1 == new_end)
+        {
+        new_end++;
+        continue;
+        }
+      }
+    else continue;
+
+    result += 2;
+    if (buffer != NULL)
+      {
+      buffer[0] = *list;
+      buffer[1] = *list;
+      buffer += 2;
+      }
+    }
+  while (*(++list) != NOTACHAR);
+  }
+
+  if (buffer != NULL)
+    {
+    buffer[0] = new_start;
+    buffer[1] = new_end;
+    buffer += 2;
+    }
+  return result;
+}
+
+#endif
+
+static size_t
+parse_class(uint32_t *ptr, uint32_t options, uint32_t *buffer)
+{
+size_t total_size = 0;
+uint32_t meta_arg;
+uint32_t start_char;
+
+(void)options; /* Avoid compiler warning. */
+
+while (*ptr != META_CLASS_END)
+  {
+  switch (META_CODE(*ptr))
+    {
+    case META_ESCAPE:
+      meta_arg = META_DATA(*ptr);
+      if (meta_arg == ESC_P || meta_arg == ESC_p) ptr++;
+      ptr++;
+      continue;
+    case META_POSIX:
+    case META_POSIX_NEG:
+      ptr += 2;
+      continue;
+    case META_BIGVALUE:
+      /* Character literal */
+      ptr++;
+      break;
+    default:
+      PCRE2_ASSERT(*ptr < META_END);
+      break;
+    }
+
+    start_char = *ptr;
+
+    if (ptr[1] == META_RANGE_LITERAL || ptr[1] == META_RANGE_ESCAPED)
+      {
+      ptr += 2;
+      PCRE2_ASSERT(*ptr < META_END || *ptr == META_BIGVALUE);
+
+      if (*ptr == META_BIGVALUE) ptr++;
+      }
+
+#ifdef SUPPORT_UNICODE
+    if (options & PARSE_CLASS_CASELESS_UTF)
+      {
+      size_t size = utf_caseless_extend(start_char, *ptr++, options, buffer);
+      if (buffer != NULL) buffer += size;
+      total_size += size;
+      continue;
+      }
+#endif
+
+    if (buffer != NULL)
+      {
+      buffer[0] = start_char;
+      buffer[1] = *ptr;
+      buffer += 2;
+      }
+
+    ptr++;
+    total_size += 2;
+  }
+
+  return total_size;
+}
+
+uint32_t *PRIV(optimize_class)(uint32_t *start_ptr, uint32_t options,
+  size_t *buffer_size, compile_block* cb)
+{
+uint32_t *ptr = start_ptr + 1;
+uint32_t *buffer;
+uint32_t *dst;
+size_t size = 0, i;
+uint32_t tmp1, tmp2;
+
+PCRE2_ASSERT(*start_ptr == META_CLASS || *start_ptr == META_CLASS_NOT);
+
+#ifdef SUPPORT_UNICODE
+if ((options & PCRE2_CASELESS) && (options & (PCRE2_UTF|PCRE2_UCP)))
+  options = PARSE_CLASS_CASELESS_UTF;
+else
+  options = 0;
+
+if (cb->cx->extra_options & PCRE2_EXTRA_CASELESS_RESTRICT)
+  options |= PARSE_CLASS_RESTRICTED_UTF;
+#endif
+
+/* Compute required space for the range. */
+
+size = parse_class(start_ptr + 1, options, NULL);
+
+*buffer_size = size;
+if (size == 0) return NULL;
+
+/* Allocate and buffer. */
+
+buffer = (uint32_t*)
+  cb->cx->memctl.malloc(size * sizeof(uint32_t), cb->cx->memctl.memory_data);
+
+if (buffer == NULL) return NULL;
+
+parse_class(start_ptr + 1, options, buffer);
+
+if (size == 2) return buffer;
+
+/* In-place sorting of ranges. */
+
+i = (((size >> 2) - 1) << 1);
+while (TRUE)
+  {
+  do_heapify(buffer, size, i);
+  if (i == 0) break;
+  i -= 2;
+  }
+
+i = size - 2;
+while (TRUE)
+  {
+  tmp1 = buffer[i];
+  tmp2 = buffer[i + 1];
+  buffer[i] = buffer[0];
+  buffer[i + 1] = buffer[1];
+  buffer[0] = tmp1;
+  buffer[1] = tmp2;
+
+  do_heapify(buffer, i, 0);
+  if (i == 0) break;
+  i -= 2;
+  }
+
+/* Merge ranges whenever possible. */
+dst = buffer;
+ptr = buffer + 2;
+size -= 2;
+
+/* The second condition is a very rare corner case, where the end of the last
+range is the maximum character. This range cannot be extended further. */
+
+while (size > 0 && dst[1] != ~(uint32_t)0)
+  {
+  if (dst[1] + 1 < ptr[0])
+    {
+    dst += 2;
+    dst[0] = ptr[0];
+    dst[1] = ptr[1];
+    }
+  else if (dst[1] < ptr[1]) dst[1] = ptr[1];
+
+  ptr += 2;
+  size -= 2;
+  }
+
+*buffer_size = (size_t)(dst + 2 - buffer);
+return buffer;
+}
+
+#endif /* SUPPORT_WIDE_CHARS */
+
+/* End of pcre2_compile_class.c */
diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h
index dd251ef8b..3cbb56bb3 100644
--- a/src/pcre2_internal.h
+++ b/src/pcre2_internal.h
@@ -2050,6 +2050,7 @@ is available. */
 #define _pcre2_valid_utf             PCRE2_SUFFIX(_pcre2_valid_utf_)
 #define _pcre2_was_newline           PCRE2_SUFFIX(_pcre2_was_newline_)
 #define _pcre2_xclass                PCRE2_SUFFIX(_pcre2_xclass_)
+#define _pcre2_optimize_class        PCRE2_SUFFIX(_pcre2_optimize_class_)
 
 extern int          _pcre2_auto_possessify(PCRE2_UCHAR *,
                       const compile_block *);
diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h
index a798cdd4f..f2ed042b7 100644
--- a/src/pcre2_intmodedep.h
+++ b/src/pcre2_intmodedep.h
@@ -731,7 +731,7 @@ typedef struct compile_block {
   const uint8_t *cbits;            /* Points to character type table */
   const uint8_t *ctypes;           /* Points to table of type maps */
   PCRE2_UCHAR *start_workspace;    /* The start of working space */
-  PCRE2_UCHAR * start_code;        /* The start of the compiled code */
+  PCRE2_UCHAR *start_code;         /* The start of the compiled code */
   PCRE2_SPTR start_pattern;        /* The start of the pattern */
   PCRE2_SPTR end_pattern;          /* The end of the pattern */
   PCRE2_UCHAR *name_table;         /* The name/number table */
diff --git a/testdata/testinput5 b/testdata/testinput5
index 8a48cec6f..b240c7e41 100644
--- a/testdata/testinput5
+++ b/testdata/testinput5
@@ -2539,4 +2539,20 @@
 /abc/utf,substitute_extended,python_octal
     abc\=replace=\400
 
+# Character range merging tests
+
+/[\x{1200}\s\x{1202}\d\x{1201}]+/B,utf,ucp
+    \x{11ff}\x{1200}\x{1201}\x{1202}\x{1203}
+
+/[\x{2000}-\x{2500}\x{2100}-\x{2600}\d\x{1800}-\x{1fff}]+/B,utf,ucp
+    \x{17ff}\x{1800}\x{2600}\x{2601}
+
+/[\x{10008}\x{10003}\x{10006}\x{10004}\x{10007}]+/B,utf
+    \x{10002}\x{10005}\x{10003}\x{10004}\x{10006}\x{10007}\x{10008}\x{10009}
+
+/[\x{100}-\x{400}]+/Bi,utf
+    qS\x{ff}\x{100}\x{a7c5}\x{401}
+    \x{2c63}\x{2c64}\x{2c65}\x{2c66}\x{2c67}
+    \x{a7af}\x{a7b0}\x{a7b1}\x{a7b2}\x{a7b3}
+
 # End of testinput5
diff --git a/testdata/testoutput10 b/testdata/testoutput10
index 1d1b7f09d..ddc8e3e37 100644
--- a/testdata/testoutput10
+++ b/testdata/testoutput10
@@ -1443,7 +1443,7 @@ No match
 /[z-\x{100}]/IBi,utf
 ------------------------------------------------------------------
         Bra
-        [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
+        [Zz-\xff\x{100}-\x{101}\x{178}\x{39c}\x{3bc}\x{1e9e}\x{212b}]
         Ket
         End
 ------------------------------------------------------------------
@@ -1480,7 +1480,7 @@ No match
 /[z-\x{100}]/IBi,utf
 ------------------------------------------------------------------
         Bra
-        [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
+        [Zz-\xff\x{100}-\x{101}\x{178}\x{39c}\x{3bc}\x{1e9e}\x{212b}]
         Ket
         End
 ------------------------------------------------------------------
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
index e7a63167d..11eda4cda 100644
--- a/testdata/testoutput12-16
+++ b/testdata/testoutput12-16
@@ -1285,7 +1285,7 @@ No match
 /[z-\x{100}]/IBi,utf
 ------------------------------------------------------------------
         Bra
-        [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
+        [Zz-\xff\x{100}-\x{101}\x{178}\x{39c}\x{3bc}\x{1e9e}\x{212b}]
         Ket
         End
 ------------------------------------------------------------------
@@ -1331,7 +1331,7 @@ No match
 /[z-\x{100}]/IBi,utf
 ------------------------------------------------------------------
         Bra
-        [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
+        [Zz-\xff\x{100}-\x{101}\x{178}\x{39c}\x{3bc}\x{1e9e}\x{212b}]
         Ket
         End
 ------------------------------------------------------------------
diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
index ad01800ac..b4a64eab9 100644
--- a/testdata/testoutput12-32
+++ b/testdata/testoutput12-32
@@ -1279,7 +1279,7 @@ No match
 /[z-\x{100}]/IBi,utf
 ------------------------------------------------------------------
         Bra
-        [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
+        [Zz-\xff\x{100}-\x{101}\x{178}\x{39c}\x{3bc}\x{1e9e}\x{212b}]
         Ket
         End
 ------------------------------------------------------------------
@@ -1325,7 +1325,7 @@ No match
 /[z-\x{100}]/IBi,utf
 ------------------------------------------------------------------
         Bra
-        [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
+        [Zz-\xff\x{100}-\x{101}\x{178}\x{39c}\x{3bc}\x{1e9e}\x{212b}]
         Ket
         End
 ------------------------------------------------------------------
diff --git a/testdata/testoutput5 b/testdata/testoutput5
index dd8710424..f8ccaa654 100644
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
@@ -3950,7 +3950,7 @@ Subject length lower bound = 1
 /[A-`]/iB,utf
 ------------------------------------------------------------------
         Bra
-        [A-z\x{212a}\x{17f}]
+        [A-z\x{17f}\x{212a}]
         Ket
         End
 ------------------------------------------------------------------
@@ -5103,7 +5103,7 @@ No match
 /[Ss]+/iB,utf
 ------------------------------------------------------------------
         Bra
-        [Ss\x{17f}\x{17f}]++
+        [Ss\x{17f}]++
         Ket
         End
 ------------------------------------------------------------------
@@ -5121,7 +5121,7 @@ No match
 /[S\x{17f}]/iB,utf
 ------------------------------------------------------------------
         Bra
-        [Ss\x{17f}\x{17f}]
+        [Ss\x{17f}]
         Ket
         End
 ------------------------------------------------------------------
@@ -5137,7 +5137,7 @@ No match
 /[\x{17f}s]/iB,utf
 ------------------------------------------------------------------
         Bra
-        [Ss\x{17f}\x{17f}]
+        [Ss\x{17f}]
         Ket
         End
 ------------------------------------------------------------------
@@ -5153,7 +5153,7 @@ No match
 /[\x{4b}\x{6b}]/iB,utf
 ------------------------------------------------------------------
         Bra
-        [Kk\x{212a}\x{212a}]
+        [Kk\x{212a}]
         Ket
         End
 ------------------------------------------------------------------
@@ -5537,4 +5537,50 @@ Failed: error 151 at offset 4: octal value is greater than \377 in 8-bit non-UTF
     abc\=replace=\400
 Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string
 
+# Character range merging tests
+
+/[\x{1200}\s\x{1202}\d\x{1201}]+/B,utf,ucp
+------------------------------------------------------------------
+        Bra
+        [\p{Xsp}\p{Nd}\x{1200}-\x{1202}]++
+        Ket
+        End
+------------------------------------------------------------------
+    \x{11ff}\x{1200}\x{1201}\x{1202}\x{1203}
+ 0: \x{1200}\x{1201}\x{1202}
+
+/[\x{2000}-\x{2500}\x{2100}-\x{2600}\d\x{1800}-\x{1fff}]+/B,utf,ucp
+------------------------------------------------------------------
+        Bra
+        [\p{Nd}\x{1800}-\x{2600}]++
+        Ket
+        End
+------------------------------------------------------------------
+    \x{17ff}\x{1800}\x{2600}\x{2601}
+ 0: \x{1800}\x{2600}
+
+/[\x{10008}\x{10003}\x{10006}\x{10004}\x{10007}]+/B,utf
+------------------------------------------------------------------
+        Bra
+        [\x{10003}-\x{10004}\x{10006}-\x{10008}]++
+        Ket
+        End
+------------------------------------------------------------------
+    \x{10002}\x{10005}\x{10003}\x{10004}\x{10006}\x{10007}\x{10008}\x{10009}
+ 0: \x{10003}\x{10004}\x{10006}\x{10007}\x{10008}
+
+/[\x{100}-\x{400}]+/Bi,utf
+------------------------------------------------------------------
+        Bra
+        [Ss\xb5\xff\x{100}-\x{400}\x{450}\x{1fbe}\x{2126}\x{2c62}\x{2c64}-\x{2c66}\x{2c6d}-\x{2c70}\x{2c7e}-\x{2c7f}\x{a78d}\x{a7aa}-\x{a7ae}\x{a7b0}-\x{a7b2}\x{a7c5}]++
+        Ket
+        End
+------------------------------------------------------------------
+    qS\x{ff}\x{100}\x{a7c5}\x{401}
+ 0: S\x{ff}\x{100}\x{a7c5}
+    \x{2c63}\x{2c64}\x{2c65}\x{2c66}\x{2c67}
+ 0: \x{2c64}\x{2c65}\x{2c66}
+    \x{a7af}\x{a7b0}\x{a7b1}\x{a7b2}\x{a7b3}
+ 0: \x{a7b0}\x{a7b1}\x{a7b2}
+
 # End of testinput5
diff --git a/testdata/testoutput7 b/testdata/testoutput7
index 36b47e3c1..e1c45e559 100644
--- a/testdata/testoutput7
+++ b/testdata/testoutput7
@@ -3816,7 +3816,7 @@ No match
 /[Ss]+/iB,utf
 ------------------------------------------------------------------
         Bra
-        [Ss\x{17f}\x{17f}]++
+        [Ss\x{17f}]++
         Ket
         End
 ------------------------------------------------------------------
@@ -3834,7 +3834,7 @@ No match
 /[S\x{17f}]/iB,utf
 ------------------------------------------------------------------
         Bra
-        [Ss\x{17f}\x{17f}]
+        [Ss\x{17f}]
         Ket
         End
 ------------------------------------------------------------------
@@ -3850,7 +3850,7 @@ No match
 /[\x{17f}s]/iB,utf
 ------------------------------------------------------------------
         Bra
-        [Ss\x{17f}\x{17f}]
+        [Ss\x{17f}]
         Ket
         End
 ------------------------------------------------------------------
@@ -3866,7 +3866,7 @@ No match
 /[\x{4b}\x{6b}]/iB,utf
 ------------------------------------------------------------------
         Bra
-        [Kk\x{212a}\x{212a}]
+        [Kk\x{212a}]
         Ket
         End
 ------------------------------------------------------------------
diff --git a/vms/configure.com b/vms/configure.com
index c6024e87c..7e3bf1495 100644
--- a/vms/configure.com
+++ b/vms/configure.com
@@ -1040,6 +1040,9 @@ PCRE2_CHKDINT.OBJ : PCRE2_CHKDINT.C
 PCRE2_COMPILE.OBJ : PCRE2_COMPILE.C
          $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET)
 
+PCRE2_COMPILE.OBJ : PCRE2_COMPILE_CLASS.C
+         $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET)
+
 PCRE2_CONFIG.OBJ : PCRE2_CONFIG.C
          $(CC) $(CFLAGS) $(MMS$SOURCE) /OBJ=$(MMS$TARGET)