@@ -693,6 +693,129 @@ mb_regex_groups_iter(const OnigUChar* name, const OnigUChar* name_end, int ngrou
693
693
}
694
694
/* }}} */
695
695
696
+ /*
697
+ * Helper for _php_mb_regex_ereg_replace_exec
698
+ */
699
+ /* {{{ mb_regex_substitute */
700
+ static inline void mb_regex_substitute (
701
+ smart_str * pbuf ,
702
+ char * subject ,
703
+ size_t subject_len ,
704
+ char * replace ,
705
+ size_t replace_len ,
706
+ php_mb_regex_t * regexp ,
707
+ OnigRegion * regs ,
708
+ const mbfl_encoding * enc
709
+ ) {
710
+ char * p , * sp , * eos ;
711
+ int no ; /* bakreference group number */
712
+ int clen ; /* byte-length of the current character */
713
+
714
+ p = replace ;
715
+ eos = replace + replace_len ;
716
+
717
+ while (p < eos ) {
718
+ clen = (int ) php_mb_mbchar_bytes_ex (p , enc );
719
+ if (clen != 1 || p == eos || p [0 ] != '\\' ) {
720
+ /* skip anything that's not an ascii backslash */
721
+ smart_str_appendl (pbuf , p , clen );
722
+ p += clen ;
723
+ continue ;
724
+ }
725
+ sp = p ; /* save position */
726
+ clen = (int ) php_mb_mbchar_bytes_ex (++ p , enc );
727
+ if (clen != 1 || p == eos ) {
728
+ /* skip escaped multibyte char */
729
+ p += clen ;
730
+ smart_str_appendl (pbuf , sp , p - sp );
731
+ continue ;
732
+ }
733
+ no = -1 ;
734
+ switch (p [0 ]) {
735
+ case '0' :
736
+ no = 0 ;
737
+ p ++ ;
738
+ break ;
739
+ case '1' : case '2' : case '3' : case '4' :
740
+ case '5' : case '6' : case '7' : case '8' : case '9' :
741
+ if (!onig_noname_group_capture_is_active (regexp )) {
742
+ /*
743
+ * FIXME:
744
+ * Oniguruma throws a compile error if numbered backrefs are used with named groups in the pattern.
745
+ * For now we just ignore them, but in the future we might want to raise a warning
746
+ * and abort the whole replace operation.
747
+ */
748
+ p ++ ;
749
+ smart_str_appendl (pbuf , sp , p - sp );
750
+ continue ;
751
+ }
752
+ no = p [0 ] - '0' ;
753
+ p ++ ;
754
+ break ;
755
+ case 'k' :
756
+ clen = (int ) php_mb_mbchar_bytes_ex (++ p , enc );
757
+ if (clen != 1 || p == eos || (p [0 ] != '<' && p [0 ] != '\'' )) {
758
+ /* not a backref delimiter */
759
+ p += clen ;
760
+ smart_str_appendl (pbuf , sp , p - sp );
761
+ continue ;
762
+ }
763
+ /* try to consume everything until next delimiter */
764
+ char delim = p [0 ] == '<' ? '>' : '\'' ;
765
+ char * name , * name_end ;
766
+ int maybe_num = 1 ;
767
+ name_end = name = p + 1 ;
768
+ while (name_end < eos ) {
769
+ clen = (int ) php_mb_mbchar_bytes_ex (name_end , enc );
770
+ if (clen != 1 ) {
771
+ name_end += clen ;
772
+ maybe_num = 0 ;
773
+ continue ;
774
+ }
775
+ if (name_end [0 ] == delim ) break ;
776
+ if (maybe_num && !isdigit (name_end [0 ])) {
777
+ maybe_num = 0 ;
778
+ }
779
+ name_end ++ ;
780
+ }
781
+ if (name_end < eos ) {
782
+ p = name_end + 1 ;
783
+ /* we have either a name or a number */
784
+ if (maybe_num ) {
785
+ /* check for NaN */
786
+ if (name [0 ] == '0' && name_end - name > 1 ) {
787
+ break ;
788
+ }
789
+ no = (int ) strtoul (name , NULL , 10 );
790
+ break ;
791
+ }
792
+ no = onig_name_to_backref_number (regexp , (OnigUChar * )name , (OnigUChar * )name_end , regs );
793
+ break ;
794
+ }
795
+ /* we failed to find a '>' */
796
+ smart_str_appendl (pbuf , sp , p - sp );
797
+ continue ;
798
+ default :
799
+ p += clen ;
800
+ smart_str_appendl (pbuf , sp , p - sp );
801
+ continue ;
802
+ }
803
+ if (no < 0 || no >= regs -> num_regs ) {
804
+ /* invalid group number reference, keep the escape sequence in the output */
805
+ smart_str_appendl (pbuf , sp , p - sp );
806
+ continue ;
807
+ }
808
+ if (regs -> beg [no ] >= 0 && regs -> beg [no ] < regs -> end [no ] && (size_t )regs -> end [no ] <= subject_len ) {
809
+ smart_str_appendl (pbuf , subject + regs -> beg [no ], regs -> end [no ] - regs -> beg [no ]);
810
+ }
811
+ }
812
+
813
+ if (p < eos ) {
814
+ smart_str_appendl (pbuf , p , eos - p );
815
+ }
816
+ }
817
+ /* }}} */
818
+
696
819
/*
697
820
* php functions
698
821
*/
@@ -859,14 +982,12 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp
859
982
char * string ;
860
983
size_t string_len ;
861
984
862
- char * p ;
863
985
php_mb_regex_t * re ;
864
986
OnigSyntaxType * syntax ;
865
987
OnigRegion * regs = NULL ;
866
988
smart_str out_buf = {0 };
867
989
smart_str eval_buf = {0 };
868
990
smart_str * pbuf ;
869
- size_t i ;
870
991
int err , eval , n ;
871
992
OnigUChar * pos ;
872
993
OnigUChar * string_lim ;
@@ -976,38 +1097,11 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp
976
1097
break ;
977
1098
}
978
1099
if (err >= 0 ) {
979
- #if moriyoshi_0
980
- if (regs -> beg [0 ] == regs -> end [0 ]) {
981
- php_error_docref (NULL , E_WARNING , "Empty regular expression" );
982
- break ;
983
- }
984
- #endif
985
1100
/* copy the part of the string before the match */
986
1101
smart_str_appendl (& out_buf , (char * )pos , (size_t )((OnigUChar * )(string + regs -> beg [0 ]) - pos ));
987
1102
988
1103
if (!is_callable ) {
989
- /* copy replacement and backrefs */
990
- i = 0 ;
991
- p = replace ;
992
- while (i < replace_len ) {
993
- int fwd = (int ) php_mb_mbchar_bytes_ex (p , enc );
994
- n = -1 ;
995
- if ((replace_len - i ) >= 2 && fwd == 1 &&
996
- p [0 ] == '\\' && p [1 ] >= '0' && p [1 ] <= '9' ) {
997
- n = p [1 ] - '0' ;
998
- }
999
- if (n >= 0 && n < regs -> num_regs ) {
1000
- if (regs -> beg [n ] >= 0 && regs -> beg [n ] < regs -> end [n ] && (size_t )regs -> end [n ] <= string_len ) {
1001
- smart_str_appendl (pbuf , string + regs -> beg [n ], regs -> end [n ] - regs -> beg [n ]);
1002
- }
1003
- p += 2 ;
1004
- i += 2 ;
1005
- } else {
1006
- smart_str_appendl (pbuf , p , fwd );
1007
- p += fwd ;
1008
- i += fwd ;
1009
- }
1010
- }
1104
+ mb_regex_substitute (pbuf , string , string_len , replace , replace_len , re , regs , enc );
1011
1105
}
1012
1106
1013
1107
if (eval ) {
@@ -1047,6 +1141,10 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp
1047
1141
for (i = 0 ; i < regs -> num_regs ; i ++ ) {
1048
1142
add_next_index_stringl (& subpats , string + regs -> beg [i ], regs -> end [i ] - regs -> beg [i ]);
1049
1143
}
1144
+ if (onig_number_of_names (re ) > 0 ) {
1145
+ mb_regex_groups_iter_args args = {& subpats , string , string_len , regs };
1146
+ onig_foreach_name (re , mb_regex_groups_iter , & args );
1147
+ }
1050
1148
1051
1149
ZVAL_COPY_VALUE (& args [0 ], & subpats );
1052
1150
/* null terminate buffer */
0 commit comments