@@ -693,6 +693,136 @@ mb_regex_groups_iter(const OnigUChar* name, const OnigUChar* name_end, int ngrou
693
693
}
694
694
/* }}} */
695
695
696
+ /*
697
+ * Helper for _php_mb_regex_ereg_replace_exec
698
+ */
699
+ /* {{{ mb_regex_substitute */
700
+ static inline void mb_regex_substitute (
701
+ smart_str * pbuf ,
702
+ char * subject ,
703
+ size_t subject_len ,
704
+ char * replace ,
705
+ size_t replace_len ,
706
+ php_mb_regex_t * regexp ,
707
+ OnigRegion * regs ,
708
+ const mbfl_encoding * enc
709
+ ) {
710
+ char * p , * sp , * eos ;
711
+ int no ; /* bakreference group number */
712
+ int clen ; /* byte-length of the current character */
713
+
714
+ p = replace ;
715
+ eos = replace + replace_len ;
716
+
717
+ while (p < eos ) {
718
+ clen = (int ) php_mb_mbchar_bytes_ex (p , enc );
719
+ if (clen != 1 || p == eos || p [0 ] != '\\' ) {
720
+ /* skip anything that's not an ascii backslash */
721
+ smart_str_appendl (pbuf , p , clen );
722
+ p += clen ;
723
+ continue ;
724
+ }
725
+ sp = p ; /* save position */
726
+ clen = (int ) php_mb_mbchar_bytes_ex (++ p , enc );
727
+ if (clen != 1 || p == eos ) {
728
+ /* skip escaped multibyte char */
729
+ p += clen ;
730
+ smart_str_appendl (pbuf , sp , p - sp );
731
+ continue ;
732
+ }
733
+ no = -1 ;
734
+ switch (p [0 ]) {
735
+ case '0' :
736
+ no = 0 ;
737
+ p ++ ;
738
+ break ;
739
+ case '1' : case '2' : case '3' : case '4' :
740
+ case '5' : case '6' : case '7' : case '8' : case '9' :
741
+ if (!onig_noname_group_capture_is_active (regexp )) {
742
+ /*
743
+ * FIXME:
744
+ * Oniguruma throws a compile error if numbered backrefs are used with named groups in the pattern.
745
+ * For now we just ignore them, but in the future we might want to raise a warning
746
+ * and abort the whole replace operation.
747
+ */
748
+ p ++ ;
749
+ smart_str_appendl (pbuf , sp , p - sp );
750
+ continue ;
751
+ }
752
+ no = p [0 ] - '0' ;
753
+ p ++ ;
754
+ break ;
755
+ case 'k' :
756
+ clen = (int ) php_mb_mbchar_bytes_ex (++ p , enc );
757
+ if (clen != 1 || p == eos || (p [0 ] != '<' && p [0 ] != '\'' )) {
758
+ /* not a backref delimiter */
759
+ p += clen ;
760
+ smart_str_appendl (pbuf , sp , p - sp );
761
+ continue ;
762
+ }
763
+ /* try to consume everything until next delimiter */
764
+ char delim = p [0 ] == '<' ? '>' : '\'' ;
765
+ char * name , * name_end ;
766
+ int maybe_num = 1 ;
767
+ name_end = name = p + 1 ;
768
+ while (name_end < eos ) {
769
+ clen = (int ) php_mb_mbchar_bytes_ex (name_end , enc );
770
+ if (clen != 1 ) {
771
+ name_end += clen ;
772
+ maybe_num = 0 ;
773
+ continue ;
774
+ }
775
+ if (name_end [0 ] == delim ) break ;
776
+ if (maybe_num && !isdigit (name_end [0 ])) maybe_num = 0 ;
777
+ name_end ++ ;
778
+ }
779
+ p = name_end + 1 ;
780
+ if (name_end - name < 1 || name_end >= eos ) {
781
+ /* the backref was empty or we failed to find the end delimiter */
782
+ smart_str_appendl (pbuf , sp , p - sp );
783
+ continue ;
784
+ }
785
+ /* we have either a name or a number */
786
+ if (maybe_num ) {
787
+ if (!onig_noname_group_capture_is_active (regexp )) {
788
+ /* see above note on mixing numbered & named backrefs */
789
+ smart_str_appendl (pbuf , sp , p - sp );
790
+ continue ;
791
+ }
792
+ if (name_end - name == 1 ) {
793
+ no = name [0 ] - '0' ;
794
+ break ;
795
+ }
796
+ if (name [0 ] == '0' ) {
797
+ /* 01 is not a valid number */
798
+ break ;
799
+ }
800
+ no = (int ) strtoul (name , NULL , 10 );
801
+ break ;
802
+ }
803
+ no = onig_name_to_backref_number (regexp , (OnigUChar * )name , (OnigUChar * )name_end , regs );
804
+ break ;
805
+ default :
806
+ p += clen ;
807
+ smart_str_appendl (pbuf , sp , p - sp );
808
+ continue ;
809
+ }
810
+ if (no < 0 || no >= regs -> num_regs ) {
811
+ /* invalid group number reference, keep the escape sequence in the output */
812
+ smart_str_appendl (pbuf , sp , p - sp );
813
+ continue ;
814
+ }
815
+ if (regs -> beg [no ] >= 0 && regs -> beg [no ] < regs -> end [no ] && (size_t )regs -> end [no ] <= subject_len ) {
816
+ smart_str_appendl (pbuf , subject + regs -> beg [no ], regs -> end [no ] - regs -> beg [no ]);
817
+ }
818
+ }
819
+
820
+ if (p < eos ) {
821
+ smart_str_appendl (pbuf , p , eos - p );
822
+ }
823
+ }
824
+ /* }}} */
825
+
696
826
/*
697
827
* php functions
698
828
*/
@@ -859,14 +989,12 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp
859
989
char * string ;
860
990
size_t string_len ;
861
991
862
- char * p ;
863
992
php_mb_regex_t * re ;
864
993
OnigSyntaxType * syntax ;
865
994
OnigRegion * regs = NULL ;
866
995
smart_str out_buf = {0 };
867
996
smart_str eval_buf = {0 };
868
997
smart_str * pbuf ;
869
- size_t i ;
870
998
int err , eval , n ;
871
999
OnigUChar * pos ;
872
1000
OnigUChar * string_lim ;
@@ -976,38 +1104,11 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp
976
1104
break ;
977
1105
}
978
1106
if (err >= 0 ) {
979
- #if moriyoshi_0
980
- if (regs -> beg [0 ] == regs -> end [0 ]) {
981
- php_error_docref (NULL , E_WARNING , "Empty regular expression" );
982
- break ;
983
- }
984
- #endif
985
1107
/* copy the part of the string before the match */
986
1108
smart_str_appendl (& out_buf , (char * )pos , (size_t )((OnigUChar * )(string + regs -> beg [0 ]) - pos ));
987
1109
988
1110
if (!is_callable ) {
989
- /* copy replacement and backrefs */
990
- i = 0 ;
991
- p = replace ;
992
- while (i < replace_len ) {
993
- int fwd = (int ) php_mb_mbchar_bytes_ex (p , enc );
994
- n = -1 ;
995
- if ((replace_len - i ) >= 2 && fwd == 1 &&
996
- p [0 ] == '\\' && p [1 ] >= '0' && p [1 ] <= '9' ) {
997
- n = p [1 ] - '0' ;
998
- }
999
- if (n >= 0 && n < regs -> num_regs ) {
1000
- if (regs -> beg [n ] >= 0 && regs -> beg [n ] < regs -> end [n ] && (size_t )regs -> end [n ] <= string_len ) {
1001
- smart_str_appendl (pbuf , string + regs -> beg [n ], regs -> end [n ] - regs -> beg [n ]);
1002
- }
1003
- p += 2 ;
1004
- i += 2 ;
1005
- } else {
1006
- smart_str_appendl (pbuf , p , fwd );
1007
- p += fwd ;
1008
- i += fwd ;
1009
- }
1010
- }
1111
+ mb_regex_substitute (pbuf , string , string_len , replace , replace_len , re , regs , enc );
1011
1112
}
1012
1113
1013
1114
if (eval ) {
@@ -1047,6 +1148,10 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp
1047
1148
for (i = 0 ; i < regs -> num_regs ; i ++ ) {
1048
1149
add_next_index_stringl (& subpats , string + regs -> beg [i ], regs -> end [i ] - regs -> beg [i ]);
1049
1150
}
1151
+ if (onig_number_of_names (re ) > 0 ) {
1152
+ mb_regex_groups_iter_args args = {& subpats , string , string_len , regs };
1153
+ onig_foreach_name (re , mb_regex_groups_iter , & args );
1154
+ }
1050
1155
1051
1156
ZVAL_COPY_VALUE (& args [0 ], & subpats );
1052
1157
/* null terminate buffer */
0 commit comments