Skip to content

Commit

Permalink
Added lyrics processing to split lines
Browse files Browse the repository at this point in the history
  • Loading branch information
beveradb committed Dec 19, 2023
1 parent 2d988de commit ac774d9
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 5 deletions.
60 changes: 56 additions & 4 deletions karaoke_prep/karaoke_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def __init__(
}

self.persistent_artist = None

self.logger.debug(f"KaraokePrep output_format: {self.output_format}")

if not os.path.exists(self.output_dir):
Expand Down Expand Up @@ -171,6 +171,7 @@ def write_lyrics_from_genius(self, artist, title, filename):
f.write(lyrics)

self.logger.info("Lyrics for %s by %s fetched successfully", title, artist)
return lyrics.split('\n')
else:
self.logger.warning("Could not find lyrics for %s by %s", title, artist)

Expand All @@ -187,6 +188,52 @@ def clean_genius_lyrics(self, lyrics):
# add any additional cleaning rules here
return lyrics

def find_best_split_point(self, line):
"""
Find the best split point in a line based on the specified criteria.
"""

self.logger.debug(f"Finding best_split_point for line: {line}")
words = line.split()
mid_word_index = len(words) // 2

# Check for a comma within one or two words of the middle word
if "," in line:
mid_point = len(" ".join(words[:mid_word_index]))
comma_indices = [i for i, char in enumerate(line) if char == ","]

for index in comma_indices:
if abs(mid_point - index) < 20: # Roughly the length of two average words
self.logger.debug(f"Found comma at index {index} which is within 20 characters of mid_point {mid_point}, accepting as split point")
return index + 1 # Include the comma in the first line

# Check for 'and'
if " and " in line:
mid_point = len(line) // 2
and_indices = [m.start() for m in re.finditer(" and ", line)]
split_point = min(and_indices, key=lambda x: abs(x - mid_point))
self.logger.debug(f"Found 'and' at index {split_point} which is close to mid_point {mid_point}, accepting as split point")
return split_point + len(" and ") # Include 'and' in the first line

# Split at the middle word
self.logger.debug(f"No comma or suitable 'and' found, using middle word as split point")
return len(" ".join(words[:mid_word_index]))

def write_processed_lyrics(self, lyrics, processed_lyrics_file):
self.logger.debug(f"Writing processed lyrics to {processed_lyrics_file}")

with open(processed_lyrics_file, "w") as outfile:
for line in lyrics:
line = line.strip()
if len(line) > 40:
self.logger.debug(f"Line is longer than 40 characters, splitting at best split point: {line}")
split_point = self.find_best_split_point(line)
outfile.write(line[:split_point].strip() + "\n")
outfile.write(line[split_point:].strip() + "\n")
else:
self.logger.debug(f"Line is shorter than 40 characters, writing as-is: {line}")
outfile.write(line + "\n")

def sanitize_filename(self, filename):
"""Replace or remove characters that are unsafe for filenames."""
# Replace problematic characters with underscores
Expand Down Expand Up @@ -298,10 +345,12 @@ def create_intro_video(self, artist, title, format, output_image_filepath, outpu
artist_font, _ = self.calculate_text_size_and_position(
draw, artist, format["artist_font"], initial_font_size, resolution, artist_padding
)

# Calculate vertical positions with consistent gap
title_text_position, title_height = self.calculate_text_position(draw, title, title_font, resolution, top_padding)
artist_text_position, _ = self.calculate_text_position(draw, artist, artist_font, resolution, title_text_position[1] + title_height + fixed_gap)
artist_text_position, _ = self.calculate_text_position(
draw, artist, artist_font, resolution, title_text_position[1] + title_height + fixed_gap
)

draw.text(title_text_position, title, fill=format["title_color"], font=title_font)
draw.text(artist_text_position, artist, fill=format["artist_color"], font=artist_font)
Expand Down Expand Up @@ -400,13 +449,16 @@ def prep_single_track(self):
self.logger.warning(f"Skipping {title} by {artist} due to missing YouTube ID.")

lyrics_file = os.path.join(track_output_dir, f"{artist_title} (Lyrics).txt")
processed_lyrics_file = os.path.join(track_output_dir, f"{artist_title} (Lyrics Processed).txt")
if os.path.exists(lyrics_file):
self.logger.debug(f"Lyrics file already exists, skipping fetch: {lyrics_file}")
else:
self.logger.info("Fetching lyrics from Genius...")
self.write_lyrics_from_genius(artist, title, lyrics_file)
lyrics = self.write_lyrics_from_genius(artist, title, lyrics_file)
self.write_processed_lyrics(lyrics, processed_lyrics_file)

processed_track["lyrics"] = lyrics_file
processed_track["processed_lyrics"] = processed_lyrics_file

self.logger.info(f"Separating audio twice for track: {title} by {artist}")

Expand Down
1 change: 1 addition & 0 deletions karaoke_prep/utils/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ def main():
logger.info(f" YouTube Audio: {track['youtube_audio']}")
logger.info(f" YouTube Still Image: {track['youtube_still_image']}")
logger.info(f" Lyrics: {track['lyrics']}")
logger.info(f" Processed Lyrics: {track['processed_lyrics']}")
logger.info(f" Instrumental: {track['instrumental_audio']}")
logger.info(f" Vocals: {track['vocals_audio']}")

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "karaoke-prep"
version = "0.4.9"
version = "0.5.0"
description = "Prepare for karaoke video creation, by downloading audio and lyrics for a specified song or youtube playlist and separatung audio stems."
authors = ["Andrew Beveridge <andrew@beveridge.uk>"]
license = "MIT"
Expand Down

0 comments on commit ac774d9

Please sign in to comment.