Skip to content

Commit

Permalink
Use Spotify for lyrics source
Browse files Browse the repository at this point in the history
  • Loading branch information
JingYenLoh committed Jun 26, 2023
1 parent c69186c commit 6908ed6
Show file tree
Hide file tree
Showing 7 changed files with 62 additions and 182 deletions.
4 changes: 1 addition & 3 deletions lyric_finder/examples/lyric-finder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,9 @@ async fn main() -> anyhow::Result<()> {
let result = client.get_lyric(&args[1]).await?;
match result {
lyric_finder::LyricResult::Some {
track,
artists,
lyric,
} => {
println!("{track} by {artists}'s lyric:\n{lyric}");
println!("lyric:\n{lyric}");
}
lyric_finder::LyricResult::None => {
println!("lyric not found!");
Expand Down
191 changes: 31 additions & 160 deletions lyric_finder/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,15 @@
//! # }
//! ```
const SEARCH_BASE_URL: &str = "https://genius.com/api/search";
use crate::search::LyricsEntity;

pub struct Client {
http: reqwest::Client,
}

#[derive(Debug)]
pub enum LyricResult {
Some {
track: String,
artists: String,
lyric: String,
},
Some { lyric: String },
None,
}

Expand All @@ -56,77 +52,31 @@ impl Client {
Self { http: http.clone() }
}

/// Search songs satisfying a given `query`.
pub async fn search_songs(&self, query: &str) -> anyhow::Result<Vec<search::Result>> {
log::debug!("search songs: query={query}");
/// Get the lyric of a song satisfying a given `query`.
pub async fn get_lyric(&self, id: &str) -> anyhow::Result<LyricResult> {
let url = format!("https://api.lyricstify.vercel.app/v1/lyrics/{id}");

log::debug!("fetching for {}", &id);

let body = self
.http
.get(format!("{SEARCH_BASE_URL}?q={query}"))
.get(url)
.send()
.await?
.json::<search::Body>()
.json::<LyricsEntity>()
.await?;

if body.meta.status != 200 {
let message = match body.meta.message {
Some(m) => m,
None => format!("request failed with status code: {}", body.meta.status),
};
anyhow::bail!(message);
}

Ok(body
.response
.map(|r| {
r.hits
.into_iter()
.filter(|hit| hit.ty == "song")
.map(|hit| hit.result)
.collect::<Vec<_>>()
})
.unwrap_or_default())
}

/// Retrieve a song's lyric from a "genius.com" `url`.
pub async fn retrieve_lyric(&self, url: &str) -> anyhow::Result<String> {
let html = self.http.get(url).send().await?.text().await?;
log::debug!("retrieve lyric from url={url}: html={html}");
let lyric = parse::parse(html)?;
Ok(lyric.trim().to_string())
}

/// Process a lyric obtained by crawling the [Genius](https://genius.com) website.
///
/// The lyric received this way may have weird newline spacings between sections (*).
/// The below function tries an ad-hoc method to fix this issue.
///
/// (*): A section often starts with `[`.
fn process_lyric(lyric: String) -> String {
// the below code modifies the `lyric` to make the newline between sections consistent
lyric.replace("\n\n[", "\n[").replace("\n[", "\n\n[")
}

/// Get the lyric of a song satisfying a given `query`.
pub async fn get_lyric(&self, query: &str) -> anyhow::Result<LyricResult> {
// The function first searches songs satisfying the query
// then it retrieves the song's lyric by crawling the "genius.com" website.

let result = {
let mut results = self.search_songs(query).await?;
log::debug!("search results: {results:?}");
if results.is_empty() {
return Ok(LyricResult::None);
}
results.remove(0)
};

let lyric = self.retrieve_lyric(&result.url).await?;
Ok(LyricResult::Some {
track: result.title,
artists: result.artist_names,
lyric: Self::process_lyric(lyric),
})
let lyric = body
.lyrics
.lines
.iter()
.fold(String::new(), |mut acc, line| {
acc.push_str(&line.words);
acc.push('\n');
acc
});

Ok(LyricResult::Some { lyric })
}
}

Expand All @@ -136,104 +86,25 @@ impl Default for Client {
}
}

mod parse {
use html5ever::tendril::TendrilSink;
use html5ever::*;
use markup5ever_rcdom::{Handle, NodeData, RcDom};

const LYRIC_CONTAINER_ATTR: &str = "data-lyrics-container";

/// Parse the HTML content of a "genius.com" lyric page to retrieve the corresponding lyric.
pub fn parse(html: String) -> anyhow::Result<String> {
// parse HTML content into DOM node(s)
let dom = parse_document(RcDom::default(), Default::default())
.from_utf8()
.read_from(&mut (html.as_bytes()))?;

let filter = |data: &NodeData| match data {
NodeData::Element { ref attrs, .. } => attrs
.borrow()
.iter()
.any(|attr| attr.name.local.to_string() == LYRIC_CONTAINER_ATTR),
_ => false,
};

Ok(parse_dom_node(dom.document, &Some(filter), false))
}

/// Parse a dom node and extract the text of children nodes satisfying a requirement.
///
/// The requirement is represented by a `filter` function and a `should_parse` variable.
/// Once a node satisfies a requirement, its children should also satisfy it.
fn parse_dom_node<F>(node: Handle, filter: &Option<F>, mut should_parse: bool) -> String
where
F: Fn(&NodeData) -> bool,
{
log::debug!("parse dom node: node={node:?}, should_parse={should_parse}");

let mut s = String::new();

if !should_parse {
if let Some(f) = filter {
should_parse = f(&node.data);
}
}

match &node.data {
NodeData::Text { contents } => {
if should_parse {
s.push_str(&contents.borrow().to_string());
}
}
NodeData::Element { ref name, .. } => {
if let expanded_name!(html "br") = name.expanded() {
if should_parse {
s.push('\n');
}
}
}
_ => {}
}

node.children.borrow().iter().for_each(|node| {
s.push_str(&parse_dom_node(node.clone(), filter, should_parse));
});

s
}
}

mod search {
use serde::Deserialize;

#[derive(Debug, Deserialize)]
pub struct Body {
pub meta: Metadata,
pub response: Option<Response>,
}

#[derive(Debug, Deserialize)]
pub struct Metadata {
pub status: u16,
pub message: Option<String>,
}

#[derive(Debug, Deserialize)]
pub struct Response {
pub hits: Vec<Hit>,
pub struct LyricsEntity {
pub lyrics: Lyrics,
}

#[derive(Debug, Deserialize)]
pub struct Hit {
#[serde(rename(deserialize = "type"))]
pub ty: String,
pub result: Result,
pub struct Lyrics {
#[serde(rename = "syncType")]
pub sync_type: String,
pub lines: Vec<Line>,
pub language: String,
}

#[derive(Debug, Deserialize)]
pub struct Result {
pub url: String,
pub title: String,
pub artist_names: String,
pub struct Line {
#[serde(rename = "startTimeMs")]
pub start_time_ms: u64,
pub words: String,
}
}
6 changes: 4 additions & 2 deletions spotify_player/src/client/handlers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ pub async fn start_player_event_watchers(
track,
artists,
scroll_offset,
..
} => {
if let Some(current_track) = state.player.read().current_playing_track() {
if current_track.name != *track {
Expand All @@ -199,10 +200,11 @@ pub async fn start_player_event_watchers(
*artists = map_join(&current_track.artists, |a| &a.name, ", ");
*scroll_offset = 0;

let track_id = current_track.id.clone().unwrap().to_string();

client_pub
.send(ClientRequest::GetLyric {
track: track.clone(),
artists: artists.clone(),
track_id,
})
.unwrap_or_default();
}
Expand Down
25 changes: 17 additions & 8 deletions spotify_player/src/client/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,16 +202,25 @@ impl Client {
.insert(category.id, playlists);
}
#[cfg(feature = "lyric-finder")]
ClientRequest::GetLyric { track, artists } => {
ClientRequest::GetLyric { track_id } => {
let client = lyric_finder::Client::from_http_client(&self.http);
let query = format!("{track} {artists}");

if !state.data.read().caches.lyrics.contains(&query) {
let result = client.get_lyric(&query).await.context(format!(
"failed to get lyric for track {track} - artists {artists}"
))?;
// let query = format!("{track} {artists}");

if !state.data.read().caches.lyrics.contains(&track_id) {
let stripped = track_id.strip_prefix("spotify:track:").unwrap_or(&track_id);
let result = client
.get_lyric(&stripped)
.await
.context(format!("failed to get lyric for track TODO - artists TODO"))?;

match result {
lyric_finder::LyricResult::None => {
tracing::info!("no lyric found for track {}", stripped)
}
_ => tracing::info!("found lyric for track {}", stripped),
}

state.data.write().caches.lyrics.put(query, result);
state.data.write().caches.lyrics.put(track_id, result);
}
}
ClientRequest::ConnectDevice(id) => {
Expand Down
7 changes: 3 additions & 4 deletions spotify_player/src/event/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,7 @@ pub enum ClientRequest {
GetCurrentUserQueue,
#[cfg(feature = "lyric-finder")]
GetLyric {
track: String,
artists: String,
track_id: String,
},
#[cfg(feature = "streaming")]
NewStreamingConnection,
Expand Down Expand Up @@ -350,13 +349,13 @@ fn handle_global_command(
let artists = map_join(&track.artists, |a| &a.name, ", ");
ui.create_new_page(PageState::Lyric {
track: track.name.clone(),
track_id: track.id.clone().unwrap().to_string(),
artists: artists.clone(),
scroll_offset: 0,
});

client_pub.send(ClientRequest::GetLyric {
track: track.name.clone(),
artists,
track_id: track.id.clone().unwrap().to_string(),
})?;
}
}
Expand Down
1 change: 1 addition & 0 deletions spotify_player/src/state/ui/page.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ pub enum PageState {
#[cfg(feature = "lyric-finder")]
Lyric {
track: String,
track_id: String,
artists: String,
scroll_offset: usize,
},
Expand Down
10 changes: 5 additions & 5 deletions spotify_player/src/ui/page.rs
Original file line number Diff line number Diff line change
Expand Up @@ -501,16 +501,18 @@ pub fn render_lyric_page(
.split(rect);

// 3. Construct the app's widgets
let (track, artists, scroll_offset) = match ui.current_page_mut() {
let (track, track_id, artists, scroll_offset) = match ui.current_page_mut() {
PageState::Lyric {
track,
track_id,
artists,
scroll_offset,
} => (track, artists, scroll_offset),
} => (track, track_id, artists, scroll_offset),
s => anyhow::bail!("expect a lyric page state, found {s:?}"),
};

let (desc, lyric) = match data.caches.lyrics.peek(&format!("{track} {artists}")) {
// TODO query lru cache properly
let (desc, lyric) = match data.caches.lyrics.peek(track_id) {
None => {
frame.render_widget(Paragraph::new("Loading..."), rect);
return Ok(());
Expand All @@ -520,8 +522,6 @@ pub fn render_lyric_page(
return Ok(());
}
Some(lyric_finder::LyricResult::Some {
track,
artists,
lyric,
}) => (format!("{track} by {artists}"), format!("\n{lyric}")),
};
Expand Down

0 comments on commit 6908ed6

Please sign in to comment.