Skip to content

Commit

Permalink
Support dynamic charset change on meta tags in HtmlRewriter.
Browse files Browse the repository at this point in the history
  • Loading branch information
orium committed Mar 17, 2023
1 parent a0c3fb2 commit c262135
Show file tree
Hide file tree
Showing 14 changed files with 263 additions and 34 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ safemem = "0.3.3"
selectors = "0.22.0"
thiserror = "1.0.2"
hashbrown = "0.13.1"
mime = "0.3.16"

[dev-dependencies]
criterion = "0.4.0"
Expand Down
2 changes: 2 additions & 0 deletions c-api/src/rewriter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ pub extern "C" fn lol_html_rewriter_build(
memory_settings,
strict,
enable_esi_tags: false,
adjust_charset_on_meta_tag: false,
};

let output_sink = ExternOutputSink::new(output_sink, output_sink_user_data);
Expand Down Expand Up @@ -88,6 +89,7 @@ pub extern "C" fn unstable_lol_html_rewriter_build_with_esi_tags(
memory_settings,
strict,
enable_esi_tags: true,
adjust_charset_on_meta_tag: false,
};

let output_sink = ExternOutputSink::new(output_sink, output_sink_user_data);
Expand Down
1 change: 1 addition & 0 deletions fuzz/test_case/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ fn run_rewriter_iter(data: &[u8], selector: &str, encoding: &'static Encoding) -
encoding: encoding.try_into().unwrap(),
memory_settings: MemorySettings::default(),
strict: false,
adjust_charset_on_meta_tag: false,
},
|_: &[u8]| {},
);
Expand Down
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,8 @@ cfg_if! {
if #[cfg(feature = "integration_test")] {
pub mod selectors_vm;

pub use self::rewriter::SharedEncoding;

pub use self::transform_stream::{
StartTagHandlingResult, TransformController, TransformStream,
TransformStreamSettings
Expand Down
1 change: 0 additions & 1 deletion src/rewritable_units/document_end.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use super::mutations::content_to_bytes;
use super::ContentType;

use encoding_rs::Encoding;

use crate::transform_stream::OutputSink;
Expand Down
11 changes: 5 additions & 6 deletions src/rewritable_units/tokens/capturer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@ mod to_token;
use self::text_decoder::TextDecoder;
use super::*;
use crate::parser::Lexeme;
use crate::rewriter::RewritingError;
use crate::rewriter::{RewritingError, SharedEncoding};
use bitflags::bitflags;
use encoding_rs::Encoding;

pub use self::to_token::{ToToken, ToTokenResult};

Expand All @@ -30,15 +29,15 @@ pub enum TokenCapturerEvent<'i> {
type CapturerEventHandler<'h> = &'h mut dyn FnMut(TokenCapturerEvent) -> Result<(), RewritingError>;

pub struct TokenCapturer {
encoding: &'static Encoding,
encoding: SharedEncoding,
text_decoder: TextDecoder,
capture_flags: TokenCaptureFlags,
}

impl TokenCapturer {
pub fn new(capture_flags: TokenCaptureFlags, encoding: &'static Encoding) -> Self {
pub fn new(capture_flags: TokenCaptureFlags, encoding: SharedEncoding) -> Self {
TokenCapturer {
encoding,
encoding: SharedEncoding::clone(&encoding),
text_decoder: TextDecoder::new(encoding),
capture_flags,
}
Expand Down Expand Up @@ -70,7 +69,7 @@ impl TokenCapturer {
where
Lexeme<'i, T>: ToToken,
{
match lexeme.to_token(&mut self.capture_flags, self.encoding) {
match lexeme.to_token(&mut self.capture_flags, self.encoding.get()) {
ToTokenResult::Token(token) => {
self.flush_pending_text(&mut event_handler)?;
event_handler(TokenCapturerEvent::LexemeConsumed)?;
Expand Down
10 changes: 5 additions & 5 deletions src/rewritable_units/tokens/capturer/text_decoder.rs
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
use super::*;
use crate::html::TextType;
use crate::rewriter::RewritingError;
use encoding_rs::{CoderResult, Decoder, Encoding};
use encoding_rs::{CoderResult, Decoder};

// NOTE: this can't be refactored into method, because we hold a mutable reference for `self`
// during the decoding loop in `feed_text`.
macro_rules! emit {
($self:tt, $text:expr, $last:ident, $event_handler:ident) => {{
let token = TextChunk::new_token($text, $self.last_text_type, $last, $self.encoding);
let token = TextChunk::new_token($text, $self.last_text_type, $last, $self.encoding.get());

$event_handler(TokenCapturerEvent::TokenProduced(Box::new(token)))
}};
}

pub struct TextDecoder {
encoding: &'static Encoding,
encoding: SharedEncoding,
pending_text_streaming_decoder: Option<Decoder>,
text_buffer: String,
last_text_type: TextType,
}

impl TextDecoder {
pub fn new(encoding: &'static Encoding) -> Self {
pub fn new(encoding: SharedEncoding) -> Self {
TextDecoder {
encoding,
pending_text_streaming_decoder: None,
Expand Down Expand Up @@ -49,7 +49,7 @@ impl TextDecoder {
last: bool,
event_handler: CapturerEventHandler,
) -> Result<(), RewritingError> {
let encoding = self.encoding;
let encoding = self.encoding.get();
let buffer = self.text_buffer.as_mut_str();

let decoder = self
Expand Down
Loading

0 comments on commit c262135

Please sign in to comment.