From b764ae0e62444818114eb71b294c22b7cc8c4093 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Toma=C5=9Bko?= Date: Thu, 22 Aug 2024 19:00:07 +0200 Subject: [PATCH] #79 add `UniCase` for faster case-insensitive comparisons, faster `find` fns --- Cargo.lock | 16 +++++++++ Cargo.toml | 1 + benches/search.rs | 80 +++++++++++++++++------------------------ endsong_ui/Cargo.lock | 1 + endsong_ui/src/trace.rs | 6 ++-- src/aspect.rs | 61 ++++++++++++++++++++----------- src/entry.rs | 6 ++-- src/find.rs | 12 +++---- src/gather.rs | 8 +++-- src/parse.rs | 5 +-- 10 files changed, 112 insertions(+), 84 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 281d4ab..859a904 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -234,6 +234,7 @@ dependencies = [ "serde_json", "thiserror", "tracing", + "unicase", ] [[package]] @@ -634,12 +635,27 @@ dependencies = [ "once_cell", ] +[[package]] +name = "unicase" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89" +dependencies = [ + "version_check", +] + [[package]] name = "unicode-ident" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "walkdir" version = "2.5.0" diff --git a/Cargo.toml b/Cargo.toml index f064741..1efec1b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ chrono = "0.4" itertools = "0.13" tracing = "0.1" thiserror = "1.0" +unicase = "2.7" [dev-dependencies] criterion = "0.5" diff --git a/benches/search.rs b/benches/search.rs index a16b5ef..c3a5523 100644 --- a/benches/search.rs +++ b/benches/search.rs @@ -91,6 +91,16 @@ fn parse(c: &mut Criterion) { }) }); + // let a = SongEntries::new(&paths) + // .unwrap() + // .sum_different_capitalization() + // .filter(30, TimeDelta::seconds(10)); + // let b = SongEntries::new(&paths) + // .unwrap() + // .new_sum_different_capitalization() + // .filter(30, TimeDelta::seconds(10)); + // assert!(a.iter().eq(b.iter())); + c.bench_function("parse, sum and filter", |c| { c.iter(|| { black_box( @@ -101,6 +111,17 @@ fn parse(c: &mut Criterion) { ); }) }); + + // c.bench_function("new parse, sum and filter", |c| { + // c.iter(|| { + // black_box( + // SongEntries::new(&paths) + // .unwrap() + // .new_sum_different_capitalization() + // .filter(30, TimeDelta::seconds(10)), + // ); + // }) + // }); } // not related to this at all but just curious xd @@ -213,52 +234,14 @@ fn capitalization(c: &mut Criterion) { #[allow(dead_code)] fn find(c: &mut Criterion) { let entries = black_box(SongEntries::new(&paths(0, 9)).unwrap()); - let usr_song_one = Song::new("MUKANJYO", "MUKANJYO", "Survive Said The Prophet"); - let usr_song_two = Song::new("MUKANJYO", "Mukanjyo", "Survive Said The Prophet"); - - let one = entries - .iter() - .find(|entry| usr_song_one.is_entry_lowercase(entry)) - .map(Song::from) - .unwrap(); - let two = entries - .iter() - .find(|entry| usr_song_two.is_entry_lowercase(entry)) - .map(Song::from) - .unwrap(); - dbg!(one, two); - - let (song_name, artist_name) = ( - usr_song_one.name.to_lowercase(), - usr_song_one.album.artist.name.to_lowercase(), - ); - - let onetwo = entries - .iter() - .filter(|entry| { - entry.track.to_lowercase() == song_name && entry.artist.to_lowercase() == artist_name - }) - .unique() - .map(Song::from) - .collect_vec(); - dbg!(onetwo); c.bench_function("find v1", |c| { c.iter(|| { - entries - .iter() - .find(|entry| usr_song_one.is_entry_lowercase(entry)) - .map(Song::from) - }) - }); - c.bench_function("find v2", |c| { - c.iter(|| { - entries - .iter() - .filter(|entry| usr_song_one.is_entry_lowercase(entry)) - .map(Song::from) - .unique() - .collect_vec() + black_box(entries.find().song_from_album( + "MUKANJYO", + "MUKANJYO", + "Survive Said The Prophet", + )); }) }); @@ -267,7 +250,7 @@ fn find(c: &mut Criterion) { c.iter(|| { entries .iter() - .find(|entry| art.is_entry_lowercase(entry)) + .find(|entry| art.is_entry_ignore_case(entry)) .map(Artist::from) }) }); @@ -275,19 +258,22 @@ fn find(c: &mut Criterion) { c.iter(|| { entries .iter() - .filter(|entry| art.is_entry_lowercase(entry)) + .filter(|entry| art.is_entry_ignore_case(entry)) .map(Artist::from) .unique() .collect_vec() }) }); + c.bench_function("find song", |c| { + c.iter(|| black_box(entries.find().song("mukanjyo", "survive said THE PROPHET"))) + }); } // criterion_group!(benches, lol); // criterion_group!(benches, kekw); -criterion_group!(benches, parse); +// criterion_group!(benches, parse); // criterion_group!(benches, unique_sum); -// criterion_group!(benches, gather); +criterion_group!(benches, gather); // criterion_group!(benches, capitalization); // criterion_group!(benches, find); criterion_main!(benches); diff --git a/endsong_ui/Cargo.lock b/endsong_ui/Cargo.lock index f72e598..8e77a79 100644 --- a/endsong_ui/Cargo.lock +++ b/endsong_ui/Cargo.lock @@ -372,6 +372,7 @@ dependencies = [ "serde_json", "thiserror", "tracing", + "unicase", ] [[package]] diff --git a/endsong_ui/src/trace.rs b/endsong_ui/src/trace.rs index 13240e0..f47470c 100644 --- a/endsong_ui/src/trace.rs +++ b/endsong_ui/src/trace.rs @@ -70,7 +70,7 @@ pub fn absolute_ignore_album(entries: &SongEntries, song: &Song) -> TraceType { for entry in entries .iter() - .filter(|entry| song.is_entry_lowercase_ignore_album(entry)) + .filter(|entry| song.is_entry_ignore_album_and_case(entry)) { song_plays += 1; times.push(format_date(&entry.timestamp)); @@ -224,7 +224,7 @@ pub mod relative { for entry in entries.iter() { all_plays += 1.0; - if song.is_entry_lowercase_ignore_album(entry) { + if song.is_entry_ignore_album_and_case(entry) { aspect_found = true; aspect_plays += 1.0; } @@ -263,7 +263,7 @@ pub mod relative { for entry in entries.iter().filter(|entry| artist.is_entry(entry)) { artist_plays += 1.0; - if song.is_entry_lowercase_ignore_album(entry) { + if song.is_entry_ignore_album_and_case(entry) { aspect_found = true; aspect_plays += 1.0; } diff --git a/src/aspect.rs b/src/aspect.rs index 7f68a35..051678a 100644 --- a/src/aspect.rs +++ b/src/aspect.rs @@ -49,6 +49,8 @@ use std::cmp::Ordering; use std::fmt::Display; use std::rc::Rc; +use unicase::UniCase; + use crate::entry::SongEntry; /// Used for functions that accept either @@ -57,10 +59,10 @@ pub trait Music: Display + Clone + Eq + Ord { /// Checks if a [`SongEntry`] is a [`Music`] fn is_entry(&self, entry: &SongEntry) -> bool; - /// Checks if a [`SongEntry`] is a [`Music`] but case insensitive + /// Checks if a [`SongEntry`] is a [`Music`] but case-insensitive /// - /// Performs `.to_lowercase()` on both `entry` and on [`self`]. - fn is_entry_lowercase(&self, entry: &SongEntry) -> bool; + /// Uses [`UniCase`] for fast case-insensitive comparison `entry` between [`self`]. + fn is_entry_ignore_case(&self, entry: &SongEntry) -> bool; } /// Trait used to accept only [`Artist`] and [`Album`] @@ -150,8 +152,8 @@ impl Music for Artist { fn is_entry(&self, entry: &SongEntry) -> bool { entry.artist == self.name } - fn is_entry_lowercase(&self, entry: &SongEntry) -> bool { - entry.artist.to_lowercase() == self.name.to_lowercase() + fn is_entry_ignore_case(&self, entry: &SongEntry) -> bool { + UniCase::new(&entry.artist) == UniCase::new(&self.name) } } impl HasSongs for Artist {} @@ -256,11 +258,11 @@ impl AsRef for Album { } impl Music for Album { fn is_entry(&self, entry: &SongEntry) -> bool { - entry.artist == self.artist.name && entry.album == self.name + self.artist.is_entry(entry) && entry.album == self.name } - fn is_entry_lowercase(&self, entry: &SongEntry) -> bool { - entry.artist.to_lowercase() == self.artist.name.to_lowercase() - && entry.album.to_lowercase() == self.name.to_lowercase() + fn is_entry_ignore_case(&self, entry: &SongEntry) -> bool { + self.artist.is_entry_ignore_case(entry) + && UniCase::new(&entry.album) == UniCase::new(&self.name) } } impl HasSongs for Album {} @@ -292,11 +294,10 @@ impl Song { } /// Checks if a [`SongEntry`] is this song, but only regarding artist and track name, - /// ignoring album name + /// ignoring album name and capitalization for track name #[must_use] - pub fn is_entry_lowercase_ignore_album(&self, entry: &SongEntry) -> bool { - entry.artist.to_lowercase() == self.album.artist.name.to_lowercase() - && entry.track.to_lowercase() == self.name.to_lowercase() + pub fn is_entry_ignore_album_and_case(&self, entry: &SongEntry) -> bool { + self.album.artist.is_entry(entry) && UniCase::new(&entry.track) == UniCase::new(&self.name) } } impl Clone for Song { @@ -380,14 +381,11 @@ impl AsRef for Song { } impl Music for Song { fn is_entry(&self, entry: &SongEntry) -> bool { - entry.artist == self.album.artist.name - && entry.album == self.album.name - && entry.track == self.name + self.album.is_entry(entry) && entry.track == self.name } - fn is_entry_lowercase(&self, entry: &SongEntry) -> bool { - entry.artist.to_lowercase() == self.album.artist.name.to_lowercase() - && entry.album.to_lowercase() == self.album.name.to_lowercase() - && entry.track.to_lowercase() == self.name.to_lowercase() + fn is_entry_ignore_case(&self, entry: &SongEntry) -> bool { + self.album.is_entry_ignore_case(entry) + && UniCase::new(&entry.track) == UniCase::new(&self.name) } } @@ -547,6 +545,29 @@ mod tests { ); } + #[test] + fn aspect_equality() { + let entry = SongEntry { + timestamp: crate::parse_date("now").unwrap(), + time_played: chrono::TimeDelta::zero(), + track: Rc::from("White Death"), + album: Rc::from("Coat of Arms"), + artist: Rc::from("Sabaton"), + }; + + assert!(Artist::new("Sabaton").is_entry(&entry)); + assert!(!Artist::new("SABATON").is_entry(&entry)); + assert!(Artist::new("SABATON").is_entry_ignore_case(&entry)); + + assert!(Album::new("Coat of Arms", "Sabaton").is_entry(&entry)); + assert!(!Album::new("Coat Of Arms", "SABATON").is_entry(&entry)); + assert!(Album::new("Coat Of Arms", "SABATON").is_entry_ignore_case(&entry)); + + assert!(Song::new("White Death", "Coat of Arms", "Sabaton").is_entry(&entry)); + assert!(!Song::new("white death", "Coat Of Arms", "SABATON").is_entry(&entry)); + assert!(Song::new("white death", "Coat Of Arms", "SABATON").is_entry_ignore_case(&entry)); + } + #[test] fn test_dates() { // MAYBE RATHER INTEGRATION TEST THAN UNIT TEST?! diff --git a/src/entry.rs b/src/entry.rs index 8e7eb12..3689743 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -50,8 +50,8 @@ pub struct SongEntry { pub album: Rc, /// name of the artist pub artist: Rc, - /// Spotify URI - pub id: String, + // /// Spotify URI + // pub id: String, } /// Equal if `artist`, `album` and `track` name are the same impl PartialEq for SongEntry { @@ -174,7 +174,7 @@ impl SongEntries { // the last album in the vector is the one that will be kept // cause it's the most recent one - // key: albym, value: newest album name + // key: album, value: newest album name let mut album_mappings: HashMap> = HashMap::new(); for alb in albums { diff --git a/src/find.rs b/src/find.rs index 65f09e5..90511d5 100644 --- a/src/find.rs +++ b/src/find.rs @@ -37,7 +37,7 @@ pub fn artist(entries: &[SongEntry], artist_name: &str) -> Option> { let artists = entries .iter() - .filter(|entry| usr_artist.is_entry_lowercase(entry)) + .filter(|entry| usr_artist.is_entry_ignore_case(entry)) .map(Artist::from) .unique() .collect_vec(); @@ -65,7 +65,7 @@ pub fn album(entries: &[SongEntry], album_name: &str, artist_name: &str) -> Opti let albums = entries .iter() - .filter(|entry| usr_album.is_entry_lowercase(entry)) + .filter(|entry| usr_album.is_entry_ignore_case(entry)) .map(Album::from) .unique() .collect_vec(); @@ -99,7 +99,7 @@ pub fn song_from_album( let songs = entries .iter() - .filter(|entry| usr_song.is_entry_lowercase(entry)) + .filter(|entry| usr_song.is_entry_ignore_case(entry)) .map(Song::from) .unique() .collect_vec(); @@ -119,13 +119,11 @@ pub fn song_from_album( /// /// See #2 pub fn song(entries: &[SongEntry], song_name: &str, artist_name: &str) -> Option> { - let (song_name, artist_name) = (song_name.to_lowercase(), artist_name.to_lowercase()); + let song_bogus_album = Song::new(song_name, "", artist_name); let song_versions = entries .iter() - .filter(|entry| { - entry.track.to_lowercase() == song_name && entry.artist.to_lowercase() == artist_name - }) + .filter(|entry| song_bogus_album.is_entry_ignore_album_and_case(entry)) .map(Song::from) .unique() .collect_vec(); diff --git a/src/gather.rs b/src/gather.rs index 2c8653c..f8a7cfc 100644 --- a/src/gather.rs +++ b/src/gather.rs @@ -64,7 +64,9 @@ pub fn songs(entries: &[SongEntry]) -> HashMap { /// and artist name are the same -> ignores the album and capitalization /// /// It matters because oftentimes the same song will be in many albums (or singles). -/// It's case-INSENSITIVE! +/// It's case-INSENSITIVE in regards to the song name! +/// +/// The [`Album`] of the returned [`Song`] is the one the song has the most plays from. /// /// # Panics /// @@ -139,7 +141,9 @@ pub fn songs_from(entries: &[SongEntry], aspect: &Asp) -> HashMap /// Returns a map with all [`Songs`][Song] from the `artist` with their playcount /// while ignoring the album the song is in and its capitalization /// -/// It's case-INSENSITIVE +/// It's case-INSENSITIVE in regards to the song name! +/// +/// The [`Album`] of the returned [`Song`] is the one the song has the most plays from. /// /// See [`songs_summed_across_albums`] for similar /// diff --git a/src/parse.rs b/src/parse.rs index ac692e6..5ed01bd 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -82,7 +82,8 @@ struct Entry { /// Option because the field will be empty if it's a podcast master_metadata_album_album_name: Option, /// Spotify URI (ID) - spotify_track_uri: Option, + #[serde(skip_deserializing)] + _spotify_track_uri: Option, /// TBD: Podcast stuff #[serde(skip_deserializing)] _episode_name: (), @@ -222,7 +223,7 @@ fn entry_to_songentry( track, album, artist, - id: entry.spotify_track_uri?, + // id: entry.spotify_track_uri?, }) }