Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extract some private submodules from 'bat::assets' #1850

Merged
merged 4 commits into from
Sep 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 21 additions & 121 deletions src/assets.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use std::collections::HashMap;
use std::ffi::OsStr;
use std::fs;
use std::path::{Path, PathBuf};
use std::path::Path;

use lazycell::LazyCell;

Expand All @@ -15,17 +14,26 @@ use crate::error::*;
use crate::input::{InputReader, OpenedInput, OpenedInputKind};
use crate::syntax_mapping::{MappingTarget, SyntaxMapping};

use ignored_suffixes::*;
use minimal_assets::*;
use serialized_syntax_set::*;

#[cfg(feature = "build-assets")]
pub use crate::assets::build_assets::*;

pub(crate) mod assets_metadata;
#[cfg(feature = "build-assets")]
mod build_assets;
mod ignored_suffixes;
mod minimal_assets;
mod serialized_syntax_set;

#[derive(Debug)]
pub struct HighlightingAssets {
syntax_set_cell: LazyCell<SyntaxSet>,
serialized_syntax_set: SerializedSyntaxSet,

minimal_syntaxes: MinimalSyntaxes,

/// Lazily load serialized [SyntaxSet]s from [Self.minimal_syntaxes]. The
/// index in this vec matches the index in
/// [Self.minimal_syntaxes.serialized_syntax_sets]
deserialized_minimal_syntaxes: Vec<LazyCell<SyntaxSet>>,
minimal_assets: MinimalAssets,

theme_set: ThemeSet,
fallback_theme: Option<&'static str>,
Expand All @@ -37,22 +45,6 @@ pub struct SyntaxReferenceInSet<'a> {
pub syntax_set: &'a SyntaxSet,
}

/// Stores and allows lookup of minimal [SyntaxSet]s. The [SyntaxSet]s are
/// stored in serialized form, and are deserialized on-demand. This gives good
/// startup performance since only the necessary [SyntaxReference]s needs to be
/// deserialized.
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
pub(crate) struct MinimalSyntaxes {
/// Lookup the index into `serialized_syntax_sets` of a [SyntaxSet] by the
/// name of any [SyntaxReference] inside the [SyntaxSet]
/// (We will later add `by_extension`, `by_first_line`, etc.)
pub(crate) by_name: HashMap<String, usize>,

/// Serialized [SyntaxSet]s. Whether or not this data is compressed is
/// decided by [COMPRESS_SERIALIZED_MINIMAL_SYNTAXES]
pub(crate) serialized_syntax_sets: Vec<Vec<u8>>,
}

// Compress for size of ~700 kB instead of ~4600 kB at the cost of ~30% longer deserialization time
pub(crate) const COMPRESS_SYNTAXES: bool = true;

Expand All @@ -70,41 +62,16 @@ pub(crate) const COMPRESS_SERIALIZED_MINIMAL_SYNTAXES: bool = true;
// efficient byte-by-byte copy of `serialized_syntax_sets`.
pub(crate) const COMPRESS_MINIMAL_SYNTAXES: bool = false;

const IGNORED_SUFFIXES: [&str; 13] = [
// Editor etc backups
"~",
".bak",
".old",
".orig",
// Debian and derivatives apt/dpkg/ucf backups
".dpkg-dist",
".dpkg-old",
".ucf-dist",
".ucf-new",
".ucf-old",
// Red Hat and derivatives rpm backups
".rpmnew",
".rpmorig",
".rpmsave",
// Build system input/template files
".in",
];

impl HighlightingAssets {
fn new(
serialized_syntax_set: SerializedSyntaxSet,
minimal_syntaxes: MinimalSyntaxes,
theme_set: ThemeSet,
) -> Self {
// Prepare so we can lazily load minimal syntaxes without a mut reference
let deserialized_minimal_syntaxes =
vec![LazyCell::new(); minimal_syntaxes.serialized_syntax_sets.len()];

HighlightingAssets {
syntax_set_cell: LazyCell::new(),
serialized_syntax_set,
deserialized_minimal_syntaxes,
minimal_syntaxes,
minimal_assets: MinimalAssets::new(minimal_syntaxes),
theme_set,
fallback_theme: None,
}
Expand Down Expand Up @@ -167,37 +134,12 @@ impl HighlightingAssets {
/// tries to find a minimal [SyntaxSet]. If none is found, returns the
/// [SyntaxSet] that contains all syntaxes.
fn get_syntax_set_by_name(&self, name: &str) -> Result<&SyntaxSet> {
let minimal_syntax_set = self
.minimal_syntaxes
.by_name
.get(&name.to_ascii_lowercase())
.and_then(|index| self.get_minimal_syntax_set_with_index(*index));

match minimal_syntax_set {
match self.minimal_assets.get_syntax_set_by_name(name) {
Some(syntax_set) => Ok(syntax_set),
None => self.get_syntax_set(),
}
}

fn load_minimal_syntax_set_with_index(&self, index: usize) -> Result<SyntaxSet> {
let serialized_syntax_set = &self.minimal_syntaxes.serialized_syntax_sets[index];
asset_from_contents(
&serialized_syntax_set[..],
&format!("minimal syntax set {}", index),
COMPRESS_SERIALIZED_MINIMAL_SYNTAXES,
)
.map_err(|_| format!("Could not parse minimal syntax set {}", index).into())
}

fn get_minimal_syntax_set_with_index(&self, index: usize) -> Option<&SyntaxSet> {
self.deserialized_minimal_syntaxes
.get(index)
.and_then(|cell| {
cell.try_borrow_with(|| self.load_minimal_syntax_set_with_index(index))
.ok()
})
}

/// Use [Self::get_syntax_for_file_name] instead
#[deprecated]
pub fn syntax_for_file_name(
Expand Down Expand Up @@ -314,7 +256,9 @@ impl HighlightingAssets {
syntax = self.find_syntax_by_file_name_extension(file_name)?;
}
if syntax.is_none() {
syntax = self.get_extension_syntax_with_stripped_suffix(file_name)?;
syntax = try_with_stripped_suffix(file_name, |stripped_file_name| {
self.get_extension_syntax(stripped_file_name) // Note: recursion
})?;
}
Ok(syntax)
}
Expand Down Expand Up @@ -342,25 +286,6 @@ impl HighlightingAssets {
.map(|syntax| SyntaxReferenceInSet { syntax, syntax_set }))
}

/// If we find an ignored suffix on the file name, e.g. '~', we strip it and
/// then try again to find a syntax without it. Note that we do this recursively.
fn get_extension_syntax_with_stripped_suffix(
&self,
file_name: &OsStr,
) -> Result<Option<SyntaxReferenceInSet>> {
let file_path = Path::new(file_name);
let mut syntax = None;
if let Some(file_str) = file_path.to_str() {
for suffix in &IGNORED_SUFFIXES {
if let Some(stripped_filename) = file_str.strip_suffix(suffix) {
syntax = self.get_extension_syntax(OsStr::new(stripped_filename))?;
break;
}
}
}
Ok(syntax)
}

fn get_first_line_syntax(
&self,
reader: &mut InputReader,
Expand All @@ -373,31 +298,6 @@ impl HighlightingAssets {
}
}

#[cfg(feature = "build-assets")]
pub use crate::build_assets::build_assets as build;

/// A SyntaxSet in serialized form, i.e. bincoded and flate2 compressed.
/// We keep it in this format since we want to load it lazily.
#[derive(Debug)]
enum SerializedSyntaxSet {
/// The data comes from a user-generated cache file.
FromFile(PathBuf),

/// The data to use is embedded into the bat binary.
FromBinary(&'static [u8]),
}

impl SerializedSyntaxSet {
fn deserialize(&self) -> Result<SyntaxSet> {
match self {
SerializedSyntaxSet::FromBinary(data) => Ok(from_binary(data, COMPRESS_SYNTAXES)),
SerializedSyntaxSet::FromFile(ref path) => {
asset_from_cache(path, "syntax set", COMPRESS_SYNTAXES)
}
}
}
}

pub(crate) fn get_serialized_integrated_syntaxset() -> &'static [u8] {
include_bytes!("../assets/syntaxes.bin")
}
Expand Down
File renamed without changes.
3 changes: 1 addition & 2 deletions src/build_assets.rs → src/assets/build_assets.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ use syntect::parsing::syntax_definition::{
use syntect::parsing::{Scope, SyntaxSet, SyntaxSetBuilder};

use crate::assets::*;
use crate::error::*;

type SyntaxName = String;

Expand All @@ -27,7 +26,7 @@ enum Dependency {
ByScope(Scope),
}

pub fn build_assets(
pub fn build(
source_dir: &Path,
include_integrated_assets: bool,
target_dir: &Path,
Expand Down
42 changes: 42 additions & 0 deletions src/assets/ignored_suffixes.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
use std::ffi::OsStr;
use std::path::Path;

use crate::error::*;

const IGNORED_SUFFIXES: [&str; 13] = [
// Editor etc backups
"~",
".bak",
".old",
".orig",
// Debian and derivatives apt/dpkg/ucf backups
".dpkg-dist",
".dpkg-old",
".ucf-dist",
".ucf-new",
".ucf-old",
// Red Hat and derivatives rpm backups
".rpmnew",
".rpmorig",
".rpmsave",
// Build system input/template files
".in",
];

/// If we find an ignored suffix on the file name, e.g. '~', we strip it and
/// then try again without it.
pub fn try_with_stripped_suffix<T, F>(file_name: &OsStr, func: F) -> Result<Option<T>>
where
F: Fn(&OsStr) -> Result<Option<T>>,
{
let mut from_stripped = None;
if let Some(file_str) = Path::new(file_name).to_str() {
for suffix in &IGNORED_SUFFIXES {
if let Some(stripped_filename) = file_str.strip_suffix(suffix) {
from_stripped = func(OsStr::new(stripped_filename))?;
break;
}
}
}
Ok(from_stripped)
}
72 changes: 72 additions & 0 deletions src/assets/minimal_assets.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
use std::collections::HashMap;

use lazycell::LazyCell;

use syntect::parsing::SyntaxSet;

use super::*;

#[derive(Debug)]
pub(crate) struct MinimalAssets {
minimal_syntaxes: MinimalSyntaxes,

/// Lazily load serialized [SyntaxSet]s from [Self.minimal_syntaxes]. The
/// index in this vec matches the index in
/// [Self.minimal_syntaxes.serialized_syntax_sets]
deserialized_minimal_syntaxes: Vec<LazyCell<SyntaxSet>>,
}

/// Stores and allows lookup of minimal [SyntaxSet]s. The [SyntaxSet]s are
/// stored in serialized form, and are deserialized on-demand. This gives good
/// startup performance since only the necessary [SyntaxReference]s needs to be
/// deserialized.
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
pub(crate) struct MinimalSyntaxes {
/// Lookup the index into `serialized_syntax_sets` of a [SyntaxSet] by the
/// name of any [SyntaxReference] inside the [SyntaxSet]
/// (We will later add `by_extension`, `by_first_line`, etc.)
pub(crate) by_name: HashMap<String, usize>,

/// Serialized [SyntaxSet]s. Whether or not this data is compressed is
/// decided by [COMPRESS_SERIALIZED_MINIMAL_SYNTAXES]
pub(crate) serialized_syntax_sets: Vec<Vec<u8>>,
}

impl MinimalAssets {
pub(crate) fn new(minimal_syntaxes: MinimalSyntaxes) -> Self {
// Prepare so we can lazily load minimal syntaxes without a mut reference
let deserialized_minimal_syntaxes =
vec![LazyCell::new(); minimal_syntaxes.serialized_syntax_sets.len()];

Self {
minimal_syntaxes,
deserialized_minimal_syntaxes,
}
}

pub fn get_syntax_set_by_name(&self, name: &str) -> Option<&SyntaxSet> {
self.minimal_syntaxes
.by_name
.get(&name.to_ascii_lowercase())
.and_then(|index| self.get_minimal_syntax_set_with_index(*index))
}

fn load_minimal_syntax_set_with_index(&self, index: usize) -> Result<SyntaxSet> {
let serialized_syntax_set = &self.minimal_syntaxes.serialized_syntax_sets[index];
asset_from_contents(
&serialized_syntax_set[..],
&format!("minimal syntax set {}", index),
COMPRESS_SERIALIZED_MINIMAL_SYNTAXES,
)
.map_err(|_| format!("Could not parse minimal syntax set {}", index).into())
}

fn get_minimal_syntax_set_with_index(&self, index: usize) -> Option<&SyntaxSet> {
self.deserialized_minimal_syntaxes
.get(index)
.and_then(|cell| {
cell.try_borrow_with(|| self.load_minimal_syntax_set_with_index(index))
.ok()
})
}
}
27 changes: 27 additions & 0 deletions src/assets/serialized_syntax_set.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
use std::path::PathBuf;

use syntect::parsing::SyntaxSet;

use super::*;

/// A SyntaxSet in serialized form, i.e. bincoded and flate2 compressed.
/// We keep it in this format since we want to load it lazily.
#[derive(Debug)]
pub enum SerializedSyntaxSet {
/// The data comes from a user-generated cache file.
FromFile(PathBuf),

/// The data to use is embedded into the bat binary.
FromBinary(&'static [u8]),
}

impl SerializedSyntaxSet {
pub fn deserialize(&self) -> Result<SyntaxSet> {
match self {
SerializedSyntaxSet::FromBinary(data) => Ok(from_binary(data, COMPRESS_SYNTAXES)),
SerializedSyntaxSet::FromFile(ref path) => {
asset_from_cache(path, "syntax set", COMPRESS_SYNTAXES)
}
}
}
}
6 changes: 3 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
mod macros;

pub mod assets;
pub mod assets_metadata;
#[cfg(feature = "build-assets")]
mod build_assets;
pub mod assets_metadata {
pub use super::assets::assets_metadata::*;
}
pub mod config;
pub mod controller;
mod decorations;
Expand Down