diff --git a/docs/02.API-REFERENCE.md b/docs/02.API-REFERENCE.md index adf2fec181..8246ce368c 100644 --- a/docs/02.API-REFERENCE.md +++ b/docs/02.API-REFERENCE.md @@ -62,6 +62,16 @@ Possible compile time enabled feature types: - JERRY_FEATURE_LINE_INFO - line info available - JERRY_FEATURE_LOGGING - logging +## jerry_regexp_flags_t + +RegExp object optional flags: + + - JERRY_REGEXP_FLAG_GLOBAL - global match; find all matches rather than stopping after the first match + - JERRY_REGEXP_FLAG_IGNORE_CASE - ignore case + - JERRY_REGEXP_FLAG_MULTILINE - multiline; treat beginning and end characters (^ and $) as working over + multiple lines (i.e., match the beginning or end of each line (delimited by \n or \r), not only the + very beginning or end of the whole input string) + ## jerry_parse_opts_t Option bits for [jerry_parse](#jerry_parse) and @@ -3406,6 +3416,74 @@ jerry_create_string_sz (const jerry_char_t *str_p, - [jerry_create_string_from_utf8](#jerry_create_string_from_utf8) +## jerry_create_regexp + +**Summary** + +Returns a jerry_value_t RegExp object or an error, if the construction of the object fails. +Optional flags can be set using [jerry_regexp_flags_t](#jerry_regexp_flags_t); + +**Prototype** +```c +jerry_value_t +jerry_create_regexp (const jerry_char_t *pattern_p, jerry_regexp_flags_t flags); +``` + +- `pattern_p` - the RegExp pattern as a zero-terminated UTF-8 string +- `flags` - optional flags for the RegExp object +- return value - the RegExp object as a `jerry_value_t` + +**Example** + +```c +{ + jerry_char_t pattern_p = "[cgt]gggtaaa|tttaccc[acg]"; + jerry_regexp_flags_t pattern_flags = JERRY_REGEXP_FLAG_IGNORE_CASE; + + jerry_value_t regexp = jerry_create_regexp (pattern_p, pattern_flags); + + ... + + jerry_release_value (regexp); +} +``` + + +## jerry_create_regexp_sz + +**Summary** + +Returns a jerry_value_t RegExp object or an error, if the construction of the object fails. +Optional flags can be set using [jerry_regexp_flags_t](#jerry_regexp_flags_t); + +**Prototype** +```c +jerry_value_t +jerry_create_regexp_sz (const jerry_char_t *pattern_p, jerry_size_t pattern_size, jerry_regexp_flags_t flags); +``` + +- `pattern_p` - the RegExp pattern as a zero-terminated UTF-8 string +- `pattern_size` - size of the `pattern` +- `flags` - optional flags for the RegExp object +- return value - the RegExp object as a `jerry_value_t` + +**Example** + +```c +{ + jerry_char_t pattern_p = "[cgt]gggtaaa|tttaccc[acg]"; + jerry_size_t pattern_size = sizeof (pattern_p) - 1; + jerry_regexp_flags_t pattern_flags = JERRY_REGEXP_FLAG_IGNORE_CASE; + + jerry_value_t regexp = jerry_create_regexp_sz (pattern_p, pattern_size, pattern_flags); + + ... + + jerry_release_value (regexp); +} +``` + + ## jerry_create_typedarray **Summary** diff --git a/jerry-core/api/jerry.c b/jerry-core/api/jerry.c index 7d6581b759..33cc8ab29f 100644 --- a/jerry-core/api/jerry.c +++ b/jerry-core/api/jerry.c @@ -31,6 +31,7 @@ #include "ecma-literal-storage.h" #include "ecma-objects.h" #include "ecma-objects-general.h" +#include "ecma-regexp-object.h" #include "ecma-promise-object.h" #include "ecma-typedarray-object.h" #include "jcontext.h" @@ -59,6 +60,13 @@ JERRY_STATIC_ASSERT ((int) ECMA_INIT_EMPTY == (int) JERRY_INIT_EMPTY && (int) ECMA_INIT_MEM_STATS == (int) JERRY_INIT_MEM_STATS, ecma_init_flag_t_must_be_equal_to_jerry_init_flag_t); +#ifndef CONFIG_DISABLE_REGEXP_BUILTIN +JERRY_STATIC_ASSERT ((int) RE_FLAG_GLOBAL == (int) JERRY_REGEXP_FLAG_GLOBAL + && (int) RE_FLAG_MULTILINE == (int) JERRY_REGEXP_FLAG_MULTILINE + && (int) RE_FLAG_IGNORE_CASE == (int) JERRY_REGEXP_FLAG_IGNORE_CASE, + re_flags_t_must_be_equal_to_jerry_regexp_flags_t); +#endif /* !CONFIG_DISABLE_REGEXP_BUILTIN */ + #if defined JERRY_DISABLE_JS_PARSER && !defined JERRY_ENABLE_SNAPSHOT_EXEC #error JERRY_ENABLE_SNAPSHOT_EXEC must be defined if JERRY_DISABLE_JS_PARSER is defined! #endif /* JERRY_DISABLE_JS_PARSER && !JERRY_ENABLE_SNAPSHOT_EXEC */ @@ -1482,6 +1490,52 @@ jerry_create_string_sz (const jerry_char_t *str_p, /**< pointer to string */ return ecma_make_string_value (ecma_str_p); } /* jerry_create_string_sz */ +/** + * Calculates the size of the given pattern and creates a RegExp object. + * + * @return value of the constructed RegExp object. + */ +jerry_value_t +jerry_create_regexp (const jerry_char_t *pattern_p, /**< zero-terminated UTF-8 string as RegExp pattern */ + jerry_regexp_flags_t flags) /**< optional RegExp flags */ +{ + return jerry_create_regexp_sz (pattern_p, lit_zt_utf8_string_size (pattern_p), flags); +} /* jerry_create_regexp */ + +/** + * Creates a RegExp object with the given pattern and flags. + * + * @return value of the constructed RegExp object. + */ +jerry_value_t +jerry_create_regexp_sz (const jerry_char_t *pattern_p, /**< zero-terminated UTF-8 string as RegExp pattern */ + jerry_size_t pattern_size, /**< length of the pattern */ + jerry_regexp_flags_t flags) /**< optional RegExp flags */ +{ + jerry_assert_api_available (); + +#ifndef CONFIG_DISABLE_REGEXP_BUILTIN + if (!lit_is_valid_utf8_string (pattern_p, pattern_size)) + { + return jerry_throw (ecma_raise_common_error (ECMA_ERR_MSG ("Input must be a valid utf8 string"))); + } + + ecma_string_t *ecma_pattern = ecma_new_ecma_string_from_utf8 (pattern_p, pattern_size); + + jerry_value_t ret_val = ecma_op_create_regexp_object (ecma_pattern, flags); + + ecma_deref_ecma_string (ecma_pattern); + return ret_val; + +#else /* CONFIG_DISABLE_REGEXP_BUILTIN */ + JERRY_UNUSED (pattern_p); + JERRY_UNUSED (pattern_size); + JERRY_UNUSED (flags); + + return jerry_throw (ecma_raise_type_error (ECMA_ERR_MSG ("RegExp is not supported."))); +#endif /* !CONFIG_DISABLE_REGEXP_BUILTIN */ +} /* jerry_create_regexp_sz */ + /** * Get length of an array object * diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp.c b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp.c index b897a0b667..d4c5857e50 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp.c +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp.c @@ -110,9 +110,15 @@ ecma_builtin_regexp_dispatch_construct (const ecma_value_t *arguments_list_p, /* ECMA_FINALIZE (flags_str_value); } + uint16_t flags = 0; + if (ecma_is_value_empty (ret_value) && (flags_string_p != NULL)) + { + ret_value = re_parse_regexp_flags (flags_string_p, &flags); + } + if (ecma_is_value_empty (ret_value)) { - ret_value = ecma_op_create_regexp_object (pattern_string_p, flags_string_p); + ret_value = ecma_op_create_regexp_object (pattern_string_p, flags); } if (pattern_string_p != NULL) diff --git a/jerry-core/ecma/operations/ecma-regexp-object.c b/jerry-core/ecma/operations/ecma-regexp-object.c index d618b8438a..c502323cb3 100644 --- a/jerry-core/ecma/operations/ecma-regexp-object.c +++ b/jerry-core/ecma/operations/ecma-regexp-object.c @@ -249,22 +249,10 @@ ecma_op_create_regexp_object_from_bytecode (re_compiled_code_t *bytecode_p) /**< */ ecma_value_t ecma_op_create_regexp_object (ecma_string_t *pattern_p, /**< input pattern */ - ecma_string_t *flags_str_p) /**< flags */ + uint16_t flags) /**< flags */ { JERRY_ASSERT (pattern_p != NULL); ecma_value_t ret_value = ECMA_VALUE_EMPTY; - uint16_t flags = 0; - - if (flags_str_p != NULL) - { - ECMA_TRY_CATCH (empty, re_parse_regexp_flags (flags_str_p, &flags), ret_value); - ECMA_FINALIZE (empty); - - if (!ecma_is_value_empty (ret_value)) - { - return ret_value; - } - } ecma_object_t *re_prototype_obj_p = ecma_builtin_get (ECMA_BUILTIN_ID_REGEXP_PROTOTYPE); diff --git a/jerry-core/ecma/operations/ecma-regexp-object.h b/jerry-core/ecma/operations/ecma-regexp-object.h index 97e94cb7eb..2035d4dbec 100644 --- a/jerry-core/ecma/operations/ecma-regexp-object.h +++ b/jerry-core/ecma/operations/ecma-regexp-object.h @@ -30,6 +30,8 @@ /** * RegExp flags + * Note: + * This enum has to be kept in sync with jerry_regexp_flags_t. */ typedef enum { @@ -53,7 +55,7 @@ typedef struct } re_matcher_ctx_t; ecma_value_t ecma_op_create_regexp_object_from_bytecode (re_compiled_code_t *bytecode_p); -ecma_value_t ecma_op_create_regexp_object (ecma_string_t *pattern_p, ecma_string_t *flags_str_p); +ecma_value_t ecma_op_create_regexp_object (ecma_string_t *pattern_p, uint16_t flags); ecma_value_t ecma_regexp_exec_helper (ecma_value_t regexp_value, ecma_value_t input_string, bool ignore_global); ecma_value_t ecma_regexp_read_pattern_str_helper (ecma_value_t pattern_arg, ecma_string_t **pattern_string_p); ecma_char_t re_canonicalize (ecma_char_t ch, bool is_ignorecase); diff --git a/jerry-core/include/jerryscript-core.h b/jerry-core/include/jerryscript-core.h index 2a65ee8e1a..1a29312d3b 100644 --- a/jerry-core/include/jerryscript-core.h +++ b/jerry-core/include/jerryscript-core.h @@ -115,6 +115,16 @@ typedef enum JERRY_GC_SEVERITY_HIGH /**< free as much memory as possible */ } jerry_gc_mode_t; +/** + * Jerry regexp flags. + */ +typedef enum +{ + JERRY_REGEXP_FLAG_GLOBAL = (1u << 1), /**< Globally scan string */ + JERRY_REGEXP_FLAG_IGNORE_CASE = (1u << 2), /**< Ignore case */ + JERRY_REGEXP_FLAG_MULTILINE = (1u << 3) /**< Multiline string scan */ +} jerry_regexp_flags_t; + /** * Character type of JerryScript. */ @@ -447,6 +457,9 @@ jerry_value_t jerry_create_number_nan (void); jerry_value_t jerry_create_null (void); jerry_value_t jerry_create_object (void); jerry_value_t jerry_create_promise (void); +jerry_value_t jerry_create_regexp (const jerry_char_t *pattern, jerry_regexp_flags_t flags); +jerry_value_t jerry_create_regexp_sz (const jerry_char_t *pattern, jerry_size_t pattern_size, + jerry_regexp_flags_t flags); jerry_value_t jerry_create_string_from_utf8 (const jerry_char_t *str_p); jerry_value_t jerry_create_string_sz_from_utf8 (const jerry_char_t *str_p, jerry_size_t str_size); jerry_value_t jerry_create_string (const jerry_char_t *str_p); diff --git a/tests/unit-core/test-regexp.c b/tests/unit-core/test-regexp.c new file mode 100644 index 0000000000..b33b7e26e4 --- /dev/null +++ b/tests/unit-core/test-regexp.c @@ -0,0 +1,71 @@ +/* Copyright JS Foundation and other contributors, http://js.foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "jerryscript.h" + +#include "test-common.h" + +int +main (void) +{ + TEST_INIT (); + jerry_init (JERRY_INIT_EMPTY); + + jerry_value_t global_obj_val = jerry_get_global_object (); + + jerry_char_t pattern[] = "[^.]+"; + jerry_regexp_flags_t flags = JERRY_REGEXP_FLAG_GLOBAL | JERRY_REGEXP_FLAG_MULTILINE; + jerry_value_t regex_obj = jerry_create_regexp (pattern, flags); + TEST_ASSERT (jerry_value_is_object (regex_obj)); + + const jerry_char_t func_resource[] = "unknown"; + const jerry_char_t func_arg_list[] = "regex"; + const jerry_char_t func_src[] = "return [regex.exec('something.domain.com'), regex.multiline, regex.global];"; + jerry_value_t func_val = jerry_parse_function (func_resource, + sizeof (func_resource) - 1, + func_arg_list, + sizeof (func_arg_list) - 1, + func_src, + sizeof (func_src) - 1, + JERRY_PARSE_NO_OPTS); + + jerry_value_t res = jerry_call_function (func_val, global_obj_val, ®ex_obj, 1); + jerry_value_t regex_res = jerry_get_property_by_index (res, 0); + jerry_value_t regex_res_str = jerry_get_property_by_index (regex_res, 0); + jerry_value_t is_multiline = jerry_get_property_by_index (res, 1); + jerry_value_t is_global = jerry_get_property_by_index (res, 2); + + jerry_size_t str_size = jerry_get_string_size (regex_res_str); + jerry_char_t res_buff[str_size]; + jerry_size_t res_size = jerry_string_to_char_buffer (regex_res_str, res_buff, str_size); + + const char expected_result[] = "something"; + TEST_ASSERT (res_size == (sizeof (expected_result) - 1)); + TEST_ASSERT (strncmp (expected_result, (const char *) res_buff, res_size) == 0); + TEST_ASSERT (jerry_get_boolean_value (is_multiline)); + TEST_ASSERT (jerry_get_boolean_value (is_global)); + + jerry_release_value (regex_obj); + jerry_release_value (res); + jerry_release_value (func_val); + jerry_release_value (regex_res); + jerry_release_value (regex_res_str); + jerry_release_value (is_multiline); + jerry_release_value (is_global); + jerry_release_value (global_obj_val); + + jerry_cleanup (); + return 0; +} /* main */