diff --git a/crates/ruff_notebook/src/cell.rs b/crates/ruff_notebook/src/cell.rs new file mode 100644 index 0000000000000..2251df13311d9 --- /dev/null +++ b/crates/ruff_notebook/src/cell.rs @@ -0,0 +1,170 @@ +use std::fmt; + +use crate::schema::{Cell, SourceValue}; + +impl fmt::Display for SourceValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + SourceValue::String(string) => f.write_str(string), + SourceValue::StringArray(string_array) => { + for string in string_array { + f.write_str(string)?; + } + Ok(()) + } + } + } +} + +impl Cell { + /// Return the [`SourceValue`] of the cell. + pub(crate) fn source(&self) -> &SourceValue { + match self { + Cell::Code(cell) => &cell.source, + Cell::Markdown(cell) => &cell.source, + Cell::Raw(cell) => &cell.source, + } + } + + /// Update the [`SourceValue`] of the cell. + pub(crate) fn set_source(&mut self, source: SourceValue) { + match self { + Cell::Code(cell) => cell.source = source, + Cell::Markdown(cell) => cell.source = source, + Cell::Raw(cell) => cell.source = source, + } + } + + /// Return `true` if it's a valid code cell. + /// + /// A valid code cell is a cell where the cell type is [`Cell::Code`] and the + /// source doesn't contain a cell magic. + pub(crate) fn is_valid_code_cell(&self) -> bool { + let source = match self { + Cell::Code(cell) => &cell.source, + _ => return false, + }; + // Ignore cells containing cell magic as they act on the entire cell + // as compared to line magic which acts on a single line. + !match source { + SourceValue::String(string) => Self::is_magic_cell(string.lines()), + SourceValue::StringArray(string_array) => { + Self::is_magic_cell(string_array.iter().map(String::as_str)) + } + } + } + + /// Returns `true` if a cell should be ignored due to the use of cell magics. + fn is_magic_cell<'a>(lines: impl Iterator) -> bool { + let mut lines = lines.peekable(); + + // Detect automatic line magics (automagic), which aren't supported by the parser. If a line + // magic uses automagic, Jupyter doesn't allow following it with non-magic lines anyway, so + // we aren't missing out on any valid Python code. + // + // For example, this is valid: + // ```jupyter + // cat /path/to/file + // cat /path/to/file + // ``` + // + // But this is invalid: + // ```jupyter + // cat /path/to/file + // x = 1 + // ``` + // + // See: https://ipython.readthedocs.io/en/stable/interactive/magics.html + if lines + .peek() + .and_then(|line| line.split_whitespace().next()) + .is_some_and(|token| { + matches!( + token, + "alias" + | "alias_magic" + | "autoawait" + | "autocall" + | "automagic" + | "bookmark" + | "cd" + | "code_wrap" + | "colors" + | "conda" + | "config" + | "debug" + | "dhist" + | "dirs" + | "doctest_mode" + | "edit" + | "env" + | "gui" + | "history" + | "killbgscripts" + | "load" + | "load_ext" + | "loadpy" + | "logoff" + | "logon" + | "logstart" + | "logstate" + | "logstop" + | "lsmagic" + | "macro" + | "magic" + | "mamba" + | "matplotlib" + | "micromamba" + | "notebook" + | "page" + | "pastebin" + | "pdb" + | "pdef" + | "pdoc" + | "pfile" + | "pinfo" + | "pinfo2" + | "pip" + | "popd" + | "pprint" + | "precision" + | "prun" + | "psearch" + | "psource" + | "pushd" + | "pwd" + | "pycat" + | "pylab" + | "quickref" + | "recall" + | "rehashx" + | "reload_ext" + | "rerun" + | "reset" + | "reset_selective" + | "run" + | "save" + | "sc" + | "set_env" + | "sx" + | "system" + | "tb" + | "time" + | "timeit" + | "unalias" + | "unload_ext" + | "who" + | "who_ls" + | "whos" + | "xdel" + | "xmode" + ) + }) + { + return true; + } + + // Detect cell magics (which operate on multiple lines). + lines.any(|line| line.trim_start().starts_with("%%")) + } +} diff --git a/crates/ruff_notebook/src/lib.rs b/crates/ruff_notebook/src/lib.rs index 0d0bb5dc0a9e0..03271682f85ab 100644 --- a/crates/ruff_notebook/src/lib.rs +++ b/crates/ruff_notebook/src/lib.rs @@ -4,6 +4,7 @@ pub use index::*; pub use notebook::*; pub use schema::*; +mod cell; mod index; mod notebook; mod schema; diff --git a/crates/ruff_notebook/src/notebook.rs b/crates/ruff_notebook/src/notebook.rs index 61994e7702648..3deb08be83708 100644 --- a/crates/ruff_notebook/src/notebook.rs +++ b/crates/ruff_notebook/src/notebook.rs @@ -1,5 +1,4 @@ use std::cmp::Ordering; -use std::fmt::Display; use std::fs::File; use std::io::{BufReader, Cursor, Read, Seek, SeekFrom, Write}; use std::path::Path; @@ -35,173 +34,6 @@ pub fn round_trip(path: &Path) -> anyhow::Result { Ok(String::from_utf8(writer)?) } -impl Display for SourceValue { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - SourceValue::String(string) => f.write_str(string), - SourceValue::StringArray(string_array) => { - for string in string_array { - f.write_str(string)?; - } - Ok(()) - } - } - } -} - -impl Cell { - /// Return the [`SourceValue`] of the cell. - fn source(&self) -> &SourceValue { - match self { - Cell::Code(cell) => &cell.source, - Cell::Markdown(cell) => &cell.source, - Cell::Raw(cell) => &cell.source, - } - } - - /// Update the [`SourceValue`] of the cell. - fn set_source(&mut self, source: SourceValue) { - match self { - Cell::Code(cell) => cell.source = source, - Cell::Markdown(cell) => cell.source = source, - Cell::Raw(cell) => cell.source = source, - } - } - - /// Return `true` if it's a valid code cell. - /// - /// A valid code cell is a cell where the cell type is [`Cell::Code`] and the - /// source doesn't contain a cell magic. - fn is_valid_code_cell(&self) -> bool { - let source = match self { - Cell::Code(cell) => &cell.source, - _ => return false, - }; - // Ignore cells containing cell magic as they act on the entire cell - // as compared to line magic which acts on a single line. - !match source { - SourceValue::String(string) => Self::is_magic_cell(string.lines()), - SourceValue::StringArray(string_array) => { - Self::is_magic_cell(string_array.iter().map(String::as_str)) - } - } - } - - /// Returns `true` if a cell should be ignored due to the use of cell magics. - fn is_magic_cell<'a>(lines: impl Iterator) -> bool { - let mut lines = lines.peekable(); - - // Detect automatic line magics (automagic), which aren't supported by the parser. If a line - // magic uses automagic, Jupyter doesn't allow following it with non-magic lines anyway, so - // we aren't missing out on any valid Python code. - // - // For example, this is valid: - // ```jupyter - // cat /path/to/file - // cat /path/to/file - // ``` - // - // But this is invalid: - // ```jupyter - // cat /path/to/file - // x = 1 - // ``` - // - // See: https://ipython.readthedocs.io/en/stable/interactive/magics.html - if lines - .peek() - .and_then(|line| line.split_whitespace().next()) - .is_some_and(|token| { - matches!( - token, - "alias" - | "alias_magic" - | "autoawait" - | "autocall" - | "automagic" - | "bookmark" - | "cd" - | "code_wrap" - | "colors" - | "conda" - | "config" - | "debug" - | "dhist" - | "dirs" - | "doctest_mode" - | "edit" - | "env" - | "gui" - | "history" - | "killbgscripts" - | "load" - | "load_ext" - | "loadpy" - | "logoff" - | "logon" - | "logstart" - | "logstate" - | "logstop" - | "lsmagic" - | "macro" - | "magic" - | "mamba" - | "matplotlib" - | "micromamba" - | "notebook" - | "page" - | "pastebin" - | "pdb" - | "pdef" - | "pdoc" - | "pfile" - | "pinfo" - | "pinfo2" - | "pip" - | "popd" - | "pprint" - | "precision" - | "prun" - | "psearch" - | "psource" - | "pushd" - | "pwd" - | "pycat" - | "pylab" - | "quickref" - | "recall" - | "rehashx" - | "reload_ext" - | "rerun" - | "reset" - | "reset_selective" - | "run" - | "save" - | "sc" - | "set_env" - | "sx" - | "system" - | "tb" - | "time" - | "timeit" - | "unalias" - | "unload_ext" - | "who" - | "who_ls" - | "whos" - | "xdel" - | "xmode" - ) - }) - { - return true; - } - - // Detect cell magics (which operate on multiple lines). - lines.any(|line| line.trim_start().starts_with("%%")) - } -} - /// An error that can occur while deserializing a Jupyter Notebook. #[derive(Error, Debug)] pub enum NotebookError {