Skip to content

Commit

Permalink
WiP: Remove syntaxes.bin, all syntaxes in minimal_syntaxes.bin
Browse files Browse the repository at this point in the history
Each syntaxes is in minimal_syntaxes.bin once, so size is just slightly bigger
than syntaxes.bin.
  • Loading branch information
Enselic committed Sep 26, 2021
1 parent fe2e591 commit be1a62e
Show file tree
Hide file tree
Showing 11 changed files with 277 additions and 248 deletions.
Binary file modified assets/minimal_syntaxes.bin
Binary file not shown.
119 changes: 25 additions & 94 deletions src/assets.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ use std::ffi::OsStr;
use std::fs;
use std::path::Path;

use lazycell::LazyCell;

use syntect::highlighting::{Theme, ThemeSet};
use syntect::parsing::{SyntaxReference, SyntaxSet};

Expand All @@ -16,7 +14,6 @@ use crate::syntax_mapping::{MappingTarget, SyntaxMapping};

use ignored_suffixes::*;
use minimal_assets::*;
use serialized_syntax_set::*;

#[cfg(feature = "build-assets")]
pub use crate::assets::build_assets::*;
Expand All @@ -26,13 +23,9 @@ pub(crate) mod assets_metadata;
mod build_assets;
mod ignored_suffixes;
mod minimal_assets;
mod serialized_syntax_set;

#[derive(Debug)]
pub struct HighlightingAssets {
syntax_set_cell: LazyCell<SyntaxSet>,
serialized_syntax_set: SerializedSyntaxSet,

minimal_assets: MinimalAssets,

theme_set: ThemeSet,
Expand All @@ -45,9 +38,6 @@ pub struct SyntaxReferenceInSet<'a> {
pub syntax_set: &'a SyntaxSet,
}

/// Compress for size of ~700 kB instead of ~4600 kB at the cost of ~30% longer deserialization time
pub(crate) const COMPRESS_SYNTAXES: bool = true;

/// Compress for size of ~20 kB instead of ~200 kB at the cost of ~30% longer deserialization time
pub(crate) const COMPRESS_THEMES: bool = true;

Expand All @@ -63,14 +53,8 @@ pub(crate) const COMPRESS_SERIALIZED_MINIMAL_SYNTAXES: bool = true;
pub(crate) const COMPRESS_MINIMAL_SYNTAXES: bool = false;

impl HighlightingAssets {
fn new(
serialized_syntax_set: SerializedSyntaxSet,
minimal_syntaxes: MinimalSyntaxes,
theme_set: ThemeSet,
) -> Self {
fn new(minimal_syntaxes: MinimalSyntaxes, theme_set: ThemeSet) -> Self {
HighlightingAssets {
syntax_set_cell: LazyCell::new(),
serialized_syntax_set,
minimal_assets: MinimalAssets::new(minimal_syntaxes),
theme_set,
fallback_theme: None,
Expand All @@ -83,7 +67,6 @@ impl HighlightingAssets {

pub fn from_cache(cache_path: &Path) -> Result<Self> {
Ok(HighlightingAssets::new(
SerializedSyntaxSet::FromFile(cache_path.join("syntaxes.bin")),
asset_from_cache(
&cache_path.join("minimal_syntaxes.bin"),
"minimal syntax sets",
Expand All @@ -94,32 +77,15 @@ impl HighlightingAssets {
}

pub fn from_binary() -> Self {
HighlightingAssets::new(
SerializedSyntaxSet::FromBinary(get_serialized_integrated_syntaxset()),
get_integrated_minimal_syntaxes(),
get_integrated_themeset(),
)
HighlightingAssets::new(get_integrated_minimal_syntaxes(), get_integrated_themeset())
}

pub fn set_fallback_theme(&mut self, theme: &'static str) {
self.fallback_theme = Some(theme);
}

pub(crate) fn get_syntax_set(&self) -> Result<&SyntaxSet> {
self.syntax_set_cell
.try_borrow_with(|| self.serialized_syntax_set.deserialize())
}

/// Use [Self::get_syntaxes] instead
#[deprecated]
pub fn syntaxes(&self) -> &[SyntaxReference] {
self.get_syntax_set()
.expect(".syntaxes() is deprecated, use .get_syntaxes() instead")
.syntaxes()
}

pub fn get_syntaxes(&self) -> Result<&[SyntaxReference]> {
Ok(self.get_syntax_set()?.syntaxes())
pub fn syntaxes_iter(&self) -> impl Iterator<Item = &SyntaxReference> {
self.minimal_assets.syntaxes_iter()
}

fn get_theme_set(&self) -> &ThemeSet {
Expand All @@ -130,16 +96,6 @@ impl HighlightingAssets {
self.get_theme_set().themes.keys().map(|s| s.as_ref())
}

/// Finds a [SyntaxSet] that contains a [SyntaxReference] by its name. First
/// tries to find a minimal [SyntaxSet]. If none is found, returns the
/// [SyntaxSet] that contains all syntaxes.
fn get_syntax_set_by_name(&self, name: &str) -> Result<&SyntaxSet> {
match self.minimal_assets.get_syntax_set_by_name(name) {
Some(syntax_set) => Ok(syntax_set),
None => self.get_syntax_set(),
}
}

/// Use [Self::get_syntax_for_file_name] instead
#[deprecated]
pub fn syntax_for_file_name(
Expand All @@ -162,8 +118,12 @@ impl HighlightingAssets {
let file_name = file_name.as_ref();
Ok(match mapping.get_syntax_for(file_name) {
Some(MappingTarget::MapToUnknown) => None,
Some(MappingTarget::MapTo(syntax_name)) => self.find_syntax_by_name(syntax_name)?,
None => self.get_extension_syntax(file_name.as_os_str())?,
Some(MappingTarget::MapTo(syntax_name)) => {
self.minimal_assets.find_syntax_by_name(syntax_name)?
}
None => self
.minimal_assets
.get_extension_syntax(file_name.as_os_str())?,
})
}

Expand All @@ -184,18 +144,20 @@ impl HighlightingAssets {
}
}

pub(crate) fn find_syntax_by_token(&self, language: &str) -> Result<SyntaxReferenceInSet> {
self.minimal_assets
.find_syntax_by_token(language)?
.ok_or_else(|| Error::UnknownSyntax(language.to_owned()))
}

pub(crate) fn get_syntax(
&self,
language: Option<&str>,
input: &mut OpenedInput,
mapping: &SyntaxMapping,
) -> Result<SyntaxReferenceInSet> {
if let Some(language) = language {
let syntax_set = self.get_syntax_set_by_name(language)?;
return syntax_set
.find_syntax_by_token(language)
.map(|syntax| SyntaxReferenceInSet { syntax, syntax_set })
.ok_or_else(|| Error::UnknownSyntax(language.to_owned()));
return self.find_syntax_by_token(language);
}

let path = input.path();
Expand All @@ -209,12 +171,14 @@ impl HighlightingAssets {
}

Some(MappingTarget::MapTo(syntax_name)) => self
.minimal_assets
.find_syntax_by_name(syntax_name)?
.ok_or_else(|| Error::UnknownSyntax(syntax_name.to_owned())),

None => {
let file_name = path.file_name().unwrap_or_default();
self.get_extension_syntax(file_name)?
self.minimal_assets
.get_extension_syntax(file_name)?
.ok_or_else(|| Error::UndetectedSyntax(path.to_string_lossy().into()))
}
}
Expand All @@ -232,55 +196,22 @@ impl HighlightingAssets {
}
}

fn find_syntax_by_name(&self, syntax_name: &str) -> Result<Option<SyntaxReferenceInSet>> {
let syntax_set = self.get_syntax_set()?;
Ok(syntax_set
.find_syntax_by_name(syntax_name)
.map(|syntax| SyntaxReferenceInSet { syntax, syntax_set }))
}

fn find_syntax_by_extension(&self, e: Option<&OsStr>) -> Result<Option<SyntaxReferenceInSet>> {
let syntax_set = self.get_syntax_set()?;
let extension = e.and_then(|x| x.to_str()).unwrap_or_default();
Ok(syntax_set
.find_syntax_by_extension(extension)
.map(|syntax| SyntaxReferenceInSet { syntax, syntax_set }))
}

fn get_extension_syntax(&self, file_name: &OsStr) -> Result<Option<SyntaxReferenceInSet>> {
let mut syntax = self.find_syntax_by_extension(Some(file_name))?;
if syntax.is_none() {
syntax = self.find_syntax_by_extension(Path::new(file_name).extension())?;
}
if syntax.is_none() {
syntax = try_with_stripped_suffix(file_name, |stripped_file_name| {
self.get_extension_syntax(stripped_file_name) // Note: recursion
})?;
}
Ok(syntax)
}

fn get_first_line_syntax(
&self,
reader: &mut InputReader,
) -> Result<Option<SyntaxReferenceInSet>> {
let syntax_set = self.get_syntax_set()?;
Ok(String::from_utf8(reader.first_line.clone())
.ok()
.and_then(|l| syntax_set.find_syntax_by_first_line(&l))
.map(|syntax| SyntaxReferenceInSet { syntax, syntax_set }))
match String::from_utf8(reader.first_line.clone()).ok() {
Some(line) => self.minimal_assets.find_syntax_by_first_line(&line),
None => Ok(None),
}
}
}

pub(crate) fn get_serialized_integrated_syntaxset() -> &'static [u8] {
include_bytes!("../assets/syntaxes.bin")
}

pub(crate) fn get_integrated_themeset() -> ThemeSet {
from_binary(include_bytes!("../assets/themes.bin"), COMPRESS_THEMES)
}

fn get_integrated_minimal_syntaxes() -> MinimalSyntaxes {
pub(crate) fn get_integrated_minimal_syntaxes() -> MinimalSyntaxes {
from_binary(
include_bytes!("../assets/minimal_syntaxes.bin"),
COMPRESS_MINIMAL_SYNTAXES,
Expand Down
2 changes: 2 additions & 0 deletions src/assets/assets_metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ impl AssetsMetadata {
Ok(serde_yaml::from_reader(file)?)
}

/// TODO: Update below code with minimal_syntaxes.bin
///
/// Load metadata about the stored cache file from the given folder.
///
/// There are several possibilities:
Expand Down
Loading

0 comments on commit be1a62e

Please sign in to comment.