@@ -34,7 +34,11 @@ const LIFECYLE_TIMESTAMP_FILE = 'lifecycle.timestamp';
3434const config_fs_options = { silent_if_missing : true } ;
3535const ILM_POLICIES_TMP_DIR = path . join ( config . NC_LIFECYCLE_LOGS_DIR , 'lifecycle_ilm_policies' ) ;
3636const ILM_CANDIDATES_TMP_DIR = path . join ( config . NC_LIFECYCLE_LOGS_DIR , 'lifecycle_ilm_candidates' ) ;
37-
37+ const escape_backslash_str = "ESCAPE '\\'" ;
38+ const underscore_wildcard_regex = / _ / g;
39+ const precentage_wildcard_regex = / % / g;
40+ const single_quote_regex = / ' / g;
41+ const backslash_regex = / \\ / g;
3842
3943const TIMED_OPS = Object . freeze ( {
4044 RUN_LIFECYLE : 'run_lifecycle' ,
@@ -1257,16 +1261,17 @@ class NCLifecycle {
12571261 convert_lifecycle_policy_to_gpfs_ilm_policy ( lifecycle_rule , bucket_json ) {
12581262 const bucket_path = bucket_json . path ;
12591263 const bucket_rule_id = this . get_lifecycle_ilm_candidate_file_suffix ( bucket_json . name , lifecycle_rule ) ;
1260- const in_bucket_path = path . join ( bucket_path , '/%' ) ;
1261- const in_bucket_internal_dir = path . join ( bucket_path , `/${ config . NSFS_TEMP_DIR_NAME } %/%` ) ;
1262- const in_versions_dir = path . join ( bucket_path , '/.versions/%' ) ;
1263- const in_nested_versions_dir = path . join ( bucket_path , '/%/.versions/%' ) ;
1264+ const escaped_bucket_path = this . _escape_like_clause_ilm_policy ( bucket_path ) ;
1265+ const in_bucket_path = path . join ( escaped_bucket_path , '/%' ) ;
1266+ const in_bucket_internal_dir = path . join ( escaped_bucket_path , `/${ config . NSFS_TEMP_DIR_NAME } %/%` ) ;
1267+ const in_versions_dir = path . join ( escaped_bucket_path , '/.versions/%' ) ;
1268+ const in_nested_versions_dir = path . join ( escaped_bucket_path , '/%/.versions/%' ) ;
12641269 const ilm_policy_helpers = { bucket_rule_id, in_bucket_path, in_bucket_internal_dir, in_versions_dir, in_nested_versions_dir } ;
12651270
12661271 const policy_base = this . _get_gpfs_ilm_policy_base ( ilm_policy_helpers ) ;
12671272 const expiry_string = this . convert_expiry_rule_to_gpfs_ilm_policy ( lifecycle_rule , ilm_policy_helpers ) ;
12681273 const non_current_days_string = this . convert_noncurrent_version_by_days_to_gpfs_ilm_policy ( lifecycle_rule , ilm_policy_helpers ) ;
1269- const filter_policy = this . convert_filter_to_gpfs_ilm_policy ( lifecycle_rule , bucket_json ) ;
1274+ const filter_policy = this . convert_filter_to_gpfs_ilm_policy ( lifecycle_rule , escaped_bucket_path ) ;
12701275 return policy_base + non_current_days_string + expiry_string + filter_policy ;
12711276 }
12721277
@@ -1280,12 +1285,29 @@ class NCLifecycle {
12801285 const mod_age_definition = `define( mod_age, (DAYS(CURRENT_TIMESTAMP) - DAYS(MODIFICATION_TIME)) )\n` ;
12811286 const change_age_definition = `define( change_age, (DAYS(CURRENT_TIMESTAMP) - DAYS(CHANGE_TIME)) )\n` ;
12821287 const rule_id_definition = `RULE '${ bucket_rule_id } ' LIST '${ bucket_rule_id } '\n` ;
1283- const policy_path_base = `WHERE PATH_NAME LIKE '${ in_bucket_path } '\n` +
1284- `AND PATH_NAME NOT LIKE '${ in_bucket_internal_dir } '\n` ;
1288+ const policy_path_base = `WHERE PATH_NAME LIKE '${ in_bucket_path } ' ${ escape_backslash_str } \n` +
1289+ `AND PATH_NAME NOT LIKE '${ in_bucket_internal_dir } ' ${ escape_backslash_str } \n` ;
12851290
12861291 return mod_age_definition + change_age_definition + rule_id_definition + policy_path_base ;
12871292 }
12881293
1294+ /**
1295+ * escape_like_clause_ilm_policy escapes the \ _ % and ' characters in the ILM policy string
1296+ * this is needed because GPFS ILM policies use _ and % as wildcards
1297+ * and we need to escape them to use them as normal characters
1298+ * since we are escaping using backslash we also need to escape the backslash itself
1299+ * IMPORTANT - escaping of the backslash must be done before escaping of the underscore and percentage
1300+ * @param {String } ilm_policy_string
1301+ * @returns String
1302+ */
1303+ _escape_like_clause_ilm_policy ( ilm_policy_string ) {
1304+ return ilm_policy_string
1305+ . replace ( backslash_regex , '\\\\' )
1306+ . replace ( underscore_wildcard_regex , '\\_' )
1307+ . replace ( precentage_wildcard_regex , '\\%' )
1308+ . replace ( single_quote_regex , `''` ) ;
1309+ }
1310+
12891311 /**
12901312 * convert_expiry_rule_to_gpfs_ilm_policy converts the expiry rule to GPFS ILM policy
12911313 * expiration rule works on latest version path (not inside .versions or in nested .versions)
@@ -1296,8 +1318,8 @@ class NCLifecycle {
12961318 convert_expiry_rule_to_gpfs_ilm_policy ( lifecycle_rule , { in_versions_dir, in_nested_versions_dir } ) {
12971319 const { expiration = undefined } = lifecycle_rule ;
12981320 if ( ! expiration ) return '' ;
1299- const current_path_policy = `AND PATH_NAME NOT LIKE '${ in_versions_dir } '\n` +
1300- `AND PATH_NAME NOT LIKE '${ in_nested_versions_dir } '\n` ;
1321+ const current_path_policy = `AND PATH_NAME NOT LIKE '${ in_versions_dir } ' ${ escape_backslash_str } \n` +
1322+ `AND PATH_NAME NOT LIKE '${ in_nested_versions_dir } ' ${ escape_backslash_str } \n` ;
13011323
13021324 const expiry_policy = expiration . days ? `AND mod_age > ${ expiration . days } \n` : '' ;
13031325 return current_path_policy + expiry_policy ;
@@ -1317,20 +1339,23 @@ class NCLifecycle {
13171339 /**
13181340 * convert_filter_to_gpfs_ilm_policy converts the filter to GPFS ILM policy
13191341 * @param {* } lifecycle_rule
1320- * @param {Object } bucket_json
1342+ * @param {String } escaped_bucket_path
13211343 * @returns {String }
13221344 */
1323- convert_filter_to_gpfs_ilm_policy ( lifecycle_rule , bucket_json ) {
1345+ convert_filter_to_gpfs_ilm_policy ( lifecycle_rule , escaped_bucket_path ) {
13241346 const { prefix = undefined , filter = { } } = lifecycle_rule ;
1325- const bucket_path = bucket_json . path ;
13261347 let filter_policy = '' ;
13271348 if ( prefix || Object . keys ( filter ) . length > 0 ) {
13281349 const { object_size_greater_than = undefined , object_size_less_than = undefined , tags = undefined } = filter ;
13291350 const rule_prefix = prefix || filter . prefix ;
1330- filter_policy += rule_prefix ? `AND PATH_NAME LIKE '${ path . join ( bucket_path , rule_prefix ) } %'\n` : '' ;
1351+ const escaped_prefix = this . _escape_like_clause_ilm_policy ( rule_prefix || '' ) ;
1352+ filter_policy += rule_prefix ? `AND PATH_NAME LIKE '${ path . join ( escaped_bucket_path , escaped_prefix ) } %' ${ escape_backslash_str } \n` : '' ;
13311353 filter_policy += object_size_greater_than === undefined ? '' : `AND FILE_SIZE > ${ object_size_greater_than } \n` ;
13321354 filter_policy += object_size_less_than === undefined ? '' : `AND FILE_SIZE < ${ object_size_less_than } \n` ;
1333- filter_policy += tags ? tags . map ( tag => `AND XATTR('user.noobaa.tag.${ tag . key } ') LIKE ${ tag . value } \n` ) . join ( '' ) : '' ;
1355+ filter_policy += tags ? tags . map ( tag => {
1356+ const escaped_tag_value = this . _escape_like_clause_ilm_policy ( tag . value ) ;
1357+ return `AND XATTR('user.noobaa.tag.${ tag . key } ') LIKE '${ escaped_tag_value } ' ${ escape_backslash_str } \n` ;
1358+ } ) . join ( '' ) : '' ;
13341359 }
13351360 return filter_policy ;
13361361 }
@@ -1493,16 +1518,21 @@ class NCLifecycle {
14931518 * example -
14941519 * 17460 1316236366 0 -- /mnt/gpfs0/account1_new_buckets_path/bucket1_storage/key1.txt
14951520 * if file is .folder (directory object) we need to return its parent directory
1496- * @param {* } entry
1521+ * Notice that trim() is not used here because if used will remove whitespaces from the end of the line and might delete
1522+ * spaces at the end of the file name that might be part of the file name, file reader should trim the line before passing it to this function
1523+ * @param {Object } entry - entry from the candidates file
1524+ * @param {Object } bucket_json
14971525 */
14981526 _parse_key_from_line ( entry , bucket_json ) {
1499- const line_array = entry . path . split ( ' ' ) ;
1500- const file_path = line_array [ line_array . length - 1 ] ;
1527+ dbg . log1 ( `_parse_key_from_line entry=${ util . inspect ( entry ) } , bucket_json=${ util . inspect ( bucket_json ) } ` ) ;
1528+ const path_start_index = entry . path . indexOf ( bucket_json . path ) ;
1529+ const file_path = entry . path . slice ( path_start_index ) ;
15011530 let file_key = file_path . replace ( path . join ( bucket_json . path , '/' ) , '' ) ;
15021531 const basename = path . basename ( file_key ) ;
15031532 if ( basename . startsWith ( config . NSFS_FOLDER_OBJECT_NAME ) ) {
15041533 file_key = path . join ( path . dirname ( file_key ) , '/' ) ;
15051534 }
1535+ dbg . log1 ( `_parse_key_from_line file_path=${ util . inspect ( file_path ) } , file_key=${ util . inspect ( file_key ) } ` ) ;
15061536 return file_key ;
15071537 }
15081538}
0 commit comments