Skip to content

Commit

Permalink
Mark buffers created from stdin as modified (helix-editor#7431)
Browse files Browse the repository at this point in the history
This resolves some confusing behavior where a scratch document created
by piping into hx is discarded when navigating away from that document.

We discard any scratch documents that are not modified and the original
`Editor::new_file_from_stdin` would create unmodified documents. We
refactor this function to create an empty document first and then to
apply the text from stdin as a change.
  • Loading branch information
the-mikedavis authored and mtoohey31 committed Jun 2, 2024
1 parent 3c6f7ba commit abb31a0
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 31 deletions.
105 changes: 79 additions & 26 deletions helix-view/src/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -397,33 +397,11 @@ pub fn from_reader<R: std::io::Read + ?Sized>(
let mut buf_out = [0u8; BUF_SIZE];
let mut builder = RopeBuilder::new();

// By default, the encoding of the text is auto-detected by
// `encoding_rs` for_bom, and if it fails, from `chardetng`
// crate which requires sample data from the reader.
// As a manual override to this auto-detection is possible, the
// same data is read into `buf` to ensure symmetry in the upcoming
// loop.
let (encoding, has_bom, mut decoder, mut slice, mut is_empty) = {
let read = reader.read(&mut buf)?;
let is_empty = read == 0;
let (encoding, has_bom) = encoding
.map(|encoding| (encoding, false))
.or_else(|| {
encoding::Encoding::for_bom(&buf).map(|(encoding, _bom_size)| (encoding, true))
})
.unwrap_or_else(|| {
let mut encoding_detector = chardetng::EncodingDetector::new();
encoding_detector.feed(&buf, is_empty);
(encoding_detector.guess(None, true), false)
});

let decoder = encoding.new_decoder();
let (encoding, has_bom, mut decoder, read) =
read_and_detect_encoding(reader, encoding, &mut buf)?;

// If the amount of bytes read from the reader is less than
// `buf.len()`, it is undesirable to read the bytes afterwards.
let slice = &buf[..read];
(encoding, has_bom, decoder, slice, is_empty)
};
let mut slice = &buf[..read];
let mut is_empty = read == 0;

// `RopeBuilder::append()` expects a `&str`, so this is the "real"
// output buffer. When decoding, the number of bytes in the output
Expand Down Expand Up @@ -493,6 +471,81 @@ pub fn from_reader<R: std::io::Read + ?Sized>(
Ok((rope, encoding, has_bom))
}

pub fn read_to_string<R: std::io::Read + ?Sized>(
reader: &mut R,
encoding: Option<&'static Encoding>,
) -> Result<(String, &'static Encoding, bool), Error> {
let mut buf = [0u8; BUF_SIZE];

let (encoding, has_bom, mut decoder, read) =
read_and_detect_encoding(reader, encoding, &mut buf)?;

let mut slice = &buf[..read];
let mut is_empty = read == 0;
let mut buf_string = String::with_capacity(buf.len());

loop {
let mut total_read = 0usize;

loop {
let (result, read, ..) =
decoder.decode_to_string(&slice[total_read..], &mut buf_string, is_empty);

total_read += read;

match result {
encoding::CoderResult::InputEmpty => {
debug_assert_eq!(slice.len(), total_read);
break;
}
encoding::CoderResult::OutputFull => {
debug_assert!(slice.len() > total_read);
buf_string.reserve(buf.len())
}
}
}

if is_empty {
debug_assert_eq!(reader.read(&mut buf)?, 0);
break;
}

let read = reader.read(&mut buf)?;
slice = &buf[..read];
is_empty = read == 0;
}
Ok((buf_string, encoding, has_bom))
}

/// Reads the first chunk from a Reader into the given buffer
/// and detects the encoding.
///
/// By default, the encoding of the text is auto-detected by
/// `encoding_rs` for_bom, and if it fails, from `chardetng`
/// crate which requires sample data from the reader.
/// As a manual override to this auto-detection is possible, the
/// same data is read into `buf` to ensure symmetry in the upcoming
/// loop.
fn read_and_detect_encoding<R: std::io::Read + ?Sized>(
reader: &mut R,
encoding: Option<&'static Encoding>,
buf: &mut [u8],
) -> Result<(&'static Encoding, bool, encoding::Decoder, usize), Error> {
let read = reader.read(buf)?;
let is_empty = read == 0;
let (encoding, has_bom) = encoding
.map(|encoding| (encoding, false))
.or_else(|| encoding::Encoding::for_bom(buf).map(|(encoding, _bom_size)| (encoding, true)))
.unwrap_or_else(|| {
let mut encoding_detector = chardetng::EncodingDetector::new();
encoding_detector.feed(buf, is_empty);
(encoding_detector.guess(None, true), false)
});
let decoder = encoding.new_decoder();

Ok((encoding, has_bom, decoder, read))
}

// The documentation and implementation of this function should be up-to-date with
// its sibling function, `from_reader()`.
//
Expand Down
21 changes: 16 additions & 5 deletions helix-view/src/editor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1386,11 +1386,22 @@ impl Editor {
}

pub fn new_file_from_stdin(&mut self, action: Action) -> Result<DocumentId, Error> {
let (rope, encoding, has_bom) = crate::document::from_reader(&mut stdin(), None)?;
Ok(self.new_file_from_document(
action,
Document::from(rope, Some((encoding, has_bom)), self.config.clone()),
))
let (stdin, encoding, has_bom) = crate::document::read_to_string(&mut stdin(), None)?;
let doc = Document::from(
helix_core::Rope::default(),
Some((encoding, has_bom)),
self.config.clone(),
);
let doc_id = self.new_file_from_document(action, doc);
let doc = doc_mut!(self, &doc_id);
let view = view_mut!(self);
doc.ensure_view_init(view.id);
let transaction =
helix_core::Transaction::insert(doc.text(), doc.selection(view.id), stdin.into())
.with_selection(Selection::point(0));
doc.apply(&transaction, view.id);
doc.append_changes_to_history(view);
Ok(doc_id)
}

// ??? possible use for integration tests
Expand Down

0 comments on commit abb31a0

Please sign in to comment.