Skip to content

Commit

Permalink
Merge branch 'master' into feat/redirect-dsn
Browse files Browse the repository at this point in the history
* master:
  feat: Scrub filenames in the minidump modules list (#784)
  • Loading branch information
jan-auer committed Sep 28, 2020
2 parents 86e6566 + 34af98b commit e16488c
Show file tree
Hide file tree
Showing 14 changed files with 762 additions and 109 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

**Features**:

- Add support for scrubbing UTF-16 data in attachments ([#742](https://github.com/getsentry/relay/pull/742))
- Add support for scrubbing UTF-16 data in attachments ([#742](https://github.com/getsentry/relay/pull/742), [#784](https://github.com/getsentry/relay/pull/784))

**Bug Fixes**:

Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions relay-general/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ relay-common = { path = "../relay-common" }
relay-general-derive = { path = "derive" }
relay-wstring = { path = "../relay-wstring" }
schemars = { version = "0.8.0-alpha-4", features = ["uuid", "chrono"], optional = true }
scroll = "0.9.0"
serde = { version = "1.0.114", features = ["derive"] }
serde_json = "1.0.55"
serde_urlencoded = "0.5.5"
Expand Down
120 changes: 90 additions & 30 deletions relay-general/src/pii/attachments.rs
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,13 @@ pub struct PiiAttachmentsProcessor<'a> {
root_state: ProcessingState<'static>,
}

/// Which encodings to scrub for `scrub_bytes`.
pub enum ScrubEncodings {
Utf8,
Utf16Le,
All,
}

impl<'a> PiiAttachmentsProcessor<'a> {
/// Creates a new `PiiAttachmentsProcessor` from the given PII config.
pub fn new(compiled_config: &'a CompiledPiiConfig) -> Self {
Expand Down Expand Up @@ -396,7 +403,12 @@ impl<'a> PiiAttachmentsProcessor<'a> {
/// Applies PII rules to a plain buffer.
///
/// Returns `true`, if the buffer was modified.
pub(crate) fn scrub_bytes(&self, data: &mut [u8], state: &ProcessingState<'_>) -> bool {
pub(crate) fn scrub_bytes(
&self,
data: &mut [u8],
state: &ProcessingState<'_>,
encodings: ScrubEncodings,
) -> bool {
let mut changed = false;

for (selector, rules) in &self.compiled_config.applications {
Expand All @@ -414,33 +426,50 @@ impl<'a> PiiAttachmentsProcessor<'a> {
for (_pattern_type, regex, replace_behavior) in
get_regex_for_rule_type(&rule.ty)
{
let matches =
apply_regex_to_utf8_bytes(data, rule, regex, &replace_behavior);
changed |= !(matches.is_empty());

// Only scrub regions with the UTF-16 scrubber if they haven't been
// scrubbed yet.
let windowed_matches = matches
.into_iter()
.chain(std::iter::once((data.len(), 0)))
.scan((0usize, 0usize), |previous, current| {
let start = if previous.1 % 2 == 0 {
previous.1
} else {
previous.1 + 1
};
let item = (start, current.0);
*previous = current;
Some(item)
})
.filter(|(start, end)| end > start);
for (start, end) in windowed_matches {
changed |= apply_regex_to_utf16le_bytes(
&mut data[start..end],
rule,
regex,
&replace_behavior,
);
match encodings {
ScrubEncodings::Utf8 => {
let matches =
apply_regex_to_utf8_bytes(data, rule, regex, &replace_behavior);
changed |= !(matches.is_empty());
}
ScrubEncodings::Utf16Le => {
changed |= apply_regex_to_utf16le_bytes(
data,
rule,
regex,
&replace_behavior,
);
}
ScrubEncodings::All => {
let matches =
apply_regex_to_utf8_bytes(data, rule, regex, &replace_behavior);
changed |= !(matches.is_empty());

// Only scrub regions with the UTF-16 scrubber if they haven't been
// scrubbed yet.
let unscrubbed_ranges = matches
.into_iter()
.chain(std::iter::once((data.len(), 0)))
.scan((0usize, 0usize), |previous, current| {
let start = if previous.1 % 2 == 0 {
previous.1
} else {
previous.1 + 1
};
let item = (start, current.0);
*previous = current;
Some(item)
})
.filter(|(start, end)| end > start);
for (start, end) in unscrubbed_ranges {
changed |= apply_regex_to_utf16le_bytes(
&mut data[start..end],
rule,
regex,
&replace_behavior,
);
}
}
}
}
}
Expand All @@ -455,7 +484,38 @@ impl<'a> PiiAttachmentsProcessor<'a> {
/// Returns `true`, if the attachment was modified.
pub fn scrub_attachment(&self, filename: &str, data: &mut [u8]) -> bool {
let state = self.state(filename, ValueType::Binary);
self.scrub_bytes(data, &state)
self.scrub_bytes(data, &state, ScrubEncodings::All)
}

/// Scrub a filepath, preserving the basename.
pub fn scrub_utf8_filepath(&self, path: &mut str, state: &ProcessingState<'_>) -> bool {
if let Some(index) = path.rfind(|c| c == '/' || c == '\\') {
let data = unsafe { &mut path.as_bytes_mut()[..index] };
self.scrub_bytes(data, state, ScrubEncodings::Utf8)
} else {
false
}
}

/// Scrub a filepath, preserving the basename.
pub fn scrub_utf16_filepath(&self, path: &mut WStr, state: &ProcessingState<'_>) -> bool {
let index =
path.char_indices().rev().find_map(
|(i, c)| {
if c == '/' || c == '\\' {
Some(i)
} else {
None
}
},
);

if let Some(index) = index {
let data = unsafe { &mut path.as_bytes_mut()[..index] };
self.scrub_bytes(data, state, ScrubEncodings::Utf16Le)
} else {
false
}
}
}

Expand Down Expand Up @@ -545,7 +605,7 @@ mod tests {
let mut data = input.to_owned();
let processor = PiiAttachmentsProcessor::new(&compiled);
let state = processor.state(filename, value_type);
let has_changed = processor.scrub_bytes(&mut data, &state);
let has_changed = processor.scrub_bytes(&mut data, &state, ScrubEncodings::All);

assert_eq_bytes_str!(data, output);
assert_eq!(changed, has_changed);
Expand Down
5 changes: 5 additions & 0 deletions relay-general/src/pii/legacy.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
//! Legacy datascrubbing coniguration
//!
//! All these configuration options are ignored by the new data scrubbers which operate
//! solely from the [PiiConfig] rules for the project.
use serde::{Deserialize, Serialize};

use relay_common::{LazyCellRef, UpsertingLazyCell};
Expand Down
Loading

0 comments on commit e16488c

Please sign in to comment.