-
Notifications
You must be signed in to change notification settings - Fork 93
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(pii): Scrub arrays of 2 elements as key-value pairs #3639
Changes from 2 commits
0ef6862
b5c2d3e
7aedc7b
fd9a5b2
d1acc1c
0967300
0672c9c
4902859
901545f
e13348d
8b44603
a99c1ae
399814f
a2d7868
8bfc141
7a2c98e
0d40b4c
b3194c9
53e68be
684b095
48aef40
b96d3d7
123f6be
24d772b
d776b99
815a230
26842af
eff37fe
685cf83
d4af9b6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -111,33 +111,39 @@ impl<'a> Processor for PiiProcessor<'a> { | |||||
T: ProcessValue, | ||||||
{ | ||||||
if is_pairlist(array) { | ||||||
for annotated in array { | ||||||
let mut mapped = std::mem::take(annotated).map_value(T::into_value); | ||||||
for (index, annotated) in array.iter_mut().enumerate() { | ||||||
let mut mapped = mem::take(annotated).map_value(T::into_value); | ||||||
|
||||||
if let Some(Value::Array(ref mut pair)) = mapped.value_mut() { | ||||||
let mut value = std::mem::take(&mut pair[1]); | ||||||
let mut value = mem::take(&mut pair[1]); | ||||||
let value_type = ValueType::for_field(&value); | ||||||
|
||||||
let value_type = ValueType::for_field(&pair[1]); | ||||||
// BEGIN copied from process_pairlist | ||||||
if let Some(key_name) = &pair[0].as_str() { | ||||||
// if the pair has no key name, we skip over it for PII stripping. It is | ||||||
// still processed with index-based path in the invocation of | ||||||
// `process_child_values`. | ||||||
let entered = | ||||||
state.enter_borrowed(key_name, state.inner_attrs(), value_type); | ||||||
|
||||||
processor::process_value(&mut value, self, &entered)?; | ||||||
// We enter the index of the array. | ||||||
let index_state = state.enter_index(index, state.inner_attrs(), value_type); | ||||||
// We enter the key of the first element of the array, since we treat it | ||||||
// as a pair. | ||||||
let key_state = index_state.enter_borrowed( | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need to enter both by index and by key? That seems semantically incorrect, because we want the same semantics for See also Lines 28 to 29 in d812b63
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh yeah, that's actually a good point. I mistakenly implemented it before. Good catch |
||||||
key_name, | ||||||
index_state.inner_attrs(), | ||||||
value_type, | ||||||
); | ||||||
// We process the value with a state that "simulates" the first value of the | ||||||
// array as if it was the key of a dictionary. | ||||||
process_value(&mut value, self, &key_state)?; | ||||||
} | ||||||
// END copied from process_pairlist | ||||||
|
||||||
// Put value back into pair | ||||||
// Put value back into pair. | ||||||
pair[1] = value; | ||||||
} | ||||||
// Put pair back into array | ||||||
|
||||||
// Put pair back into array. | ||||||
*annotated = T::from_value(mapped); | ||||||
} | ||||||
|
||||||
Ok(()) | ||||||
} else { | ||||||
// default treatment. | ||||||
// If we didn't find a pairlist, we can process child values as normal. | ||||||
array.process_child_values(self, state) | ||||||
} | ||||||
} | ||||||
|
@@ -244,12 +250,20 @@ impl<'a> Processor for PiiProcessor<'a> { | |||||
} | ||||||
|
||||||
#[derive(Default)] | ||||||
struct IsObjectPair { | ||||||
struct PairArrayChecker { | ||||||
is_pair: bool, | ||||||
has_string_key: bool, | ||||||
} | ||||||
|
||||||
impl Processor for IsObjectPair { | ||||||
impl PairArrayChecker { | ||||||
/// Returns true if the visitor identified the supplied data as an array composed of | ||||||
/// a key (string) and a value. | ||||||
fn is_pair_array(&self) -> bool { | ||||||
self.is_pair && self.has_string_key | ||||||
} | ||||||
} | ||||||
|
||||||
impl Processor for PairArrayChecker { | ||||||
fn process_array<T>( | ||||||
&mut self, | ||||||
value: &mut relay_protocol::Array<T>, | ||||||
|
@@ -259,9 +273,8 @@ impl Processor for IsObjectPair { | |||||
where | ||||||
T: ProcessValue, | ||||||
{ | ||||||
self.is_pair = value.len() == 2; | ||||||
self.is_pair = state.depth() == 0 && value.len() == 2; | ||||||
if self.is_pair { | ||||||
// Visit only key: | ||||||
let value_type = ValueType::for_field(&value[0]); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
process_value( | ||||||
&mut value[0], | ||||||
|
@@ -287,9 +300,9 @@ impl Processor for IsObjectPair { | |||||
|
||||||
fn is_pairlist<T: ProcessValue>(array: &mut Array<T>) -> bool { | ||||||
for element in array.iter_mut() { | ||||||
let mut visitor = IsObjectPair::default(); | ||||||
let mut visitor = PairArrayChecker::default(); | ||||||
process_value(element, &mut visitor, ProcessingState::root()).ok(); | ||||||
if !visitor.is_pair || !visitor.has_string_key { | ||||||
if !visitor.is_pair_array() { | ||||||
return false; | ||||||
} | ||||||
} | ||||||
|
@@ -1899,29 +1912,8 @@ mod tests { | |||||
[ | ||||||
Array( | ||||||
[ | ||||||
Annotated( | ||||||
String( | ||||||
"", | ||||||
), | ||||||
Meta { | ||||||
remarks: [ | ||||||
Remark { | ||||||
ty: Removed, | ||||||
rule_id: "@password:remove", | ||||||
range: Some( | ||||||
( | ||||||
0, | ||||||
0, | ||||||
), | ||||||
), | ||||||
}, | ||||||
], | ||||||
errors: [], | ||||||
original_length: Some( | ||||||
13, | ||||||
), | ||||||
original_value: None, | ||||||
}, | ||||||
String( | ||||||
"authorization", | ||||||
), | ||||||
Meta { | ||||||
remarks: [ | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,7 +13,7 @@ expression: data | |
"request": { | ||
"headers": [ | ||
[ | ||
"[Filtered]", | ||
"AuthToken", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The first element is now kept since we treat is as a key. |
||
"[Filtered]" | ||
] | ||
] | ||
|
@@ -36,19 +36,6 @@ expression: data | |
"request": { | ||
"headers": { | ||
"0": { | ||
"0": { | ||
"": { | ||
"rem": [ | ||
[ | ||
"@password:filter", | ||
"s", | ||
0, | ||
10 | ||
] | ||
], | ||
"len": 9 | ||
} | ||
}, | ||
"1": { | ||
"": { | ||
"rem": [ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I prefer a design where the check already returns you a prepared datastructure to visit:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We would just pipe through the array since this check is not doing anything with the data structure itself.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same here, this was the original design, but I didn't manage to create a lightweight (i.e. borrowing) pairlist from the visitor because the visitor (
process_string
) gets a string reference without a lifetime parameter. I'm still open to going that route though, either by changing theProcessor
framework to allow such a thing, or by simply accepting the cost and creating an ownedPairList
tentatively.