-
Notifications
You must be signed in to change notification settings - Fork 142
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base setup for mangadex import; handling base cases #550
base: the-future
Are you sure you want to change the base?
Changes from all commits
65754d8
d9f342b
5553671
0f25337
37450b2
73e8efb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
class MangadexImport | ||
attr_reader :file_location | ||
|
||
LANGUAGES = { | ||
'arabic' => 'ar', | ||
'bulgarian' => 'bg', | ||
'burmese' => 'my', | ||
'catalan' => 'ca', | ||
'chinese (trad)' => 'zh_Hant', | ||
'chinese (simp)' => 'zh_Hans', | ||
'english' => 'en', | ||
'filipino' => 'fil', | ||
'french' => 'fr', | ||
'german' => 'de', | ||
'hungarian' => 'hu', | ||
'indonesian' => 'id_in', | ||
'italian' => 'it', | ||
'japanese' => 'en_jp', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this correct? |
||
# 'japanese' => 'ja_jp', | ||
'korean' => 'ko', | ||
'malay' => 'ms', | ||
'persian' => 'fa', | ||
'polish' => 'pl', | ||
'portuguese (br)' => 'pt_br', | ||
'portuguese (pt)' => 'pt', | ||
'romanian' => 'ro', | ||
'russian' => 'ru', | ||
'spanish' => 'es', | ||
'spanish (es)' => 'es', | ||
'spanish (latem)' => 'es', | ||
'thai' => 'th', | ||
'turkish' => 'tr', | ||
'vietnamese' => 'vi' | ||
}.freeze | ||
|
||
# ideally pass in location here? | ||
def initialize(file_location = nil) | ||
@file_location = file_location || 'tmp/mangadex_import/manga-batch-1-temp.ndjson' | ||
end | ||
|
||
def import! | ||
each_mangadex_entry do |data, name, mal_id| | ||
kitsu_id = kitsu_id_by_mal_id(mal_id) if mal_id.present? | ||
|
||
if kitsu_id.blank? | ||
kitsu_id = kitsu_id_by_name(name) | ||
validate_kitsu_id(kitsu_id) | ||
end | ||
|
||
puts "Kitsu Id: #{kitsu_id}" | ||
|
||
row = Row.new(kitsu_data(kitsu_id), data) | ||
row.create_or_update | ||
end | ||
end | ||
|
||
def each_mangadex_entry | ||
File.foreach(file_location) do |line| | ||
line = JSON.parse(line) | ||
name = line['title']['name'] | ||
mal_id = formatted_mal_id(line.dig('external_links', 'MyAnimeList')) | ||
|
||
yield line, name, mal_id | ||
end | ||
end | ||
|
||
def formatted_mal_id(url) | ||
return nil if url.blank? | ||
|
||
url.split('/').last | ||
end | ||
|
||
def validate_kitsu_id(kitsu_id) | ||
# I am not sure what to really validate this on | ||
# we already know subtype is correct. | ||
end | ||
|
||
private | ||
|
||
def kitsu_id_by_mal_id(mal_id) | ||
Mapping.where( | ||
external_site: 'myanimelist/manga', | ||
external_id: mal_id | ||
).first&.item_id | ||
end | ||
|
||
# Will attemp to find the Kitsu Manga by id | ||
# | ||
# This is kind of inefficient because we then get this again | ||
# Using Manga.where in #kitsu_data(kitsu_id) | ||
# | ||
# @param name [String] title of manga from mangadex data | ||
# @return [Int, nil] kitsu id if it exists | ||
def kitsu_id_by_name(name) | ||
Mapping.guess( | ||
'manga', | ||
title: name, | ||
subtype: 'NOT subtype:novel' | ||
)&.id | ||
end | ||
|
||
def kitsu_data(kitsu_id) | ||
Manga.where(id: kitsu_id).first_or_initialize | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
class MangadexImport | ||
class Chapter1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @NuckChorris I am having naming conflicts with ActiveRecord |
||
def initialize(kitsu_chapter, mangadex_chapter) | ||
@kitsu_chapter = kitsu_chapter | ||
@mangadex_chapter = mangadex_chapter | ||
end | ||
|
||
def create_or_update! | ||
@kitsu_chapter.volume ||= mangadex_volume | ||
@kitsu_chapter.titles = mangadex_chapter_titles | ||
@kitsu_chapter.canonical_title ||= mangadex_canonical_title if @kitsu_chapter.titles.present? | ||
|
||
@kitsu_chapter.save! | ||
end | ||
|
||
def mangadex_volume | ||
volume_number = @mangadex_chapter['volume'] | ||
return if volume_number.blank? | ||
|
||
Volume.where(manga_id: @kitsu_chapter.manga_id, number: volume_number).first_or_initialize | ||
end | ||
|
||
def mangadex_chapter_titles | ||
kitsu_titles = @kitsu_chapter.titles.compact | ||
|
||
@mangadex_chapter['alt_titles']&.compact&.each do |title, value| | ||
kitsu_titles[MangadexImport::LANGUAGES[title]] ||= value | ||
end | ||
|
||
kitsu_titles | ||
end | ||
|
||
def mangadex_canonical_title | ||
@kitsu_chapter.titles.keys.first | ||
end | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,190 @@ | ||
class MangadexImport | ||
class Row | ||
CATEGORY_MAPPINGS = { | ||
'4-koma' => '', | ||
'award-winning' => '', | ||
'doujinshi' => '', | ||
'drama' => '', | ||
'game' => '', | ||
'isekai' => '', | ||
'medical' => '', | ||
'oneshot' => '', | ||
'sci-fi' => 'science-fiction', | ||
'shoujo' => 'shoujou', | ||
'slice-of-life' => '', | ||
'smut' => '', | ||
'webtoon' => '' | ||
}.freeze | ||
|
||
CATEGORY_SKIPS = %w[[no chapters]].freeze | ||
|
||
attr_reader :kitsu_data, :mangadex_data | ||
|
||
def initialize(kitsu_data, mangadex_data) | ||
@kitsu_data = kitsu_data | ||
@mangadex_data = mangadex_data | ||
end | ||
|
||
def create_or_update | ||
kitsu_generic_fields.each do |kitsu_field| | ||
@kitsu_data[kitsu_field] ||= public_send("mangadex_#{kitsu_field}") | ||
end | ||
|
||
kitsu_custom_fields.each do |kitsu_field| | ||
public_send("mangadex_#{kitsu_field}") | ||
end | ||
|
||
@kitsu_data.save! | ||
kitsu_after_manga_save_fields.each do |kitsu_field| | ||
public_send("mangadex_#{kitsu_field}") | ||
end | ||
end | ||
|
||
def kitsu_generic_fields | ||
%w[ | ||
age_rating canonical_title | ||
end_date original_locale | ||
serialization slug start_date synopsis | ||
volume_count | ||
] | ||
end | ||
|
||
def kitsu_custom_fields | ||
%w[ | ||
abbreviated_titles chapter_count titles categories subtype | ||
] | ||
end | ||
|
||
def kitsu_after_manga_save_fields | ||
%w[ | ||
author artist chapters poster_image | ||
] | ||
end | ||
|
||
def mangadex_age_rating | ||
mangadex_data['hentai'] ? 'R18' : nil | ||
end | ||
|
||
def mangadex_abbreviated_titles | ||
kitsu_data.abbreviated_titles ||= [] | ||
kitsu_data.abbreviated_titles.concat(mangadex_data['alt_titles']).uniq! | ||
end | ||
|
||
def mangadex_canonical_title | ||
MangadexImport::LANGUAGES[mangadex_original_locale&.downcase] | ||
end | ||
|
||
def mangadex_chapter_count | ||
# reset kitsu chapter count if at 0 | ||
kitsu_data.chapter_count = nil if kitsu_data.chapter_count&.zero? | ||
# set the variable to 0 if kitsu chapter count is nil | ||
kitsu_chapter_count = kitsu_data.chapter_count || 0 | ||
mangadex_data['total_chapters'] ||= 0 | ||
|
||
@kitsu_data.chapter_count = [kitsu_chapter_count, mangadex_data['total_chapters']].max | ||
end | ||
|
||
def mangadex_original_locale | ||
mangadex_data['title']['origin'] | ||
end | ||
|
||
def mangadex_poster_image | ||
@kitsu_data.poster_image = mangadex_data['thumbnail'] if @kitsu_data.poster_image.blank? | ||
end | ||
|
||
def mangadex_serialization | ||
nil | ||
end | ||
|
||
def mangadex_slug | ||
mangadex_data['title']['slug'] | ||
end | ||
|
||
def mangadex_subtype | ||
# When using first_or_initialize it sets default to 'novel' | ||
# yeah... idk who thought that would be a good idea in a Manga table.... | ||
# p.s: to the guy who set default to 1... arrays are 0 index based. | ||
@kitsu_data.subtype = 'manga' if kitsu_data.new_record? | ||
end | ||
|
||
# TODO: do we need to sanitize? | ||
def mangadex_synopsis | ||
mangadex_data['description'] | ||
end | ||
|
||
def mangadex_titles | ||
@kitsu_data.titles['en_jp'] ||= mangadex_data['title']['name'] | ||
end | ||
|
||
def mangadex_volume_count | ||
nil | ||
end | ||
|
||
def mangadex_start_date | ||
nil | ||
end | ||
|
||
def mangadex_end_date | ||
nil | ||
end | ||
|
||
def mangadex_categories | ||
mangadex_data['genre_tags'].each do |category_name| | ||
next if CATEGORY_SKIPS.include?(category_name) | ||
|
||
category_slug = mapped_mangadex_category(category_name) | ||
|
||
# skip if this category already exists in the association array. | ||
next if kitsu_data.categories.find { |cat| cat.slug == category_slug } | ||
|
||
category = Category.find_by(slug: category_slug) | ||
# NOTE: I don't think this works the way I intended. | ||
# I was thinking that we can just do save at the end and it will save | ||
# the associations. | ||
@kitsu_data.categories << category | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This saves right away is what I realized |
||
end | ||
end | ||
|
||
def mangadex_author | ||
author = mangadex_data['author'] | ||
return if author.blank? | ||
|
||
mangadex_staff(author) | ||
end | ||
|
||
def mangadex_artist | ||
artist = mangadex_data['artist'] | ||
return if artist.blank? | ||
|
||
mangadex_staff(artist) | ||
end | ||
|
||
def mangadex_staff(name) | ||
staff = Person.create_with( | ||
canonical_name: 'en', | ||
names: { 'en' => name } | ||
).find_or_create_by(name: name) | ||
|
||
MangaStaff.find_or_create_by(person_id: staff.id, manga_id: kitsu_data.id) | ||
end | ||
|
||
# I am thinking of moving everything chapte related to another class. | ||
def mangadex_chapters | ||
return if mangadex_data['chapters'].blank? | ||
|
||
mangadex_data['chapters'].each do |mangadex_chapter| | ||
kitsu_chapter = kitsu_data.chapters.where(number: mangadex_chapter['chapter']).first_or_initialize | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Line is too long. [106/100] (https://github.com/bbatsov/ruby-style-guide#80-character-limits) |
||
chapter = MangadexImport::Chapter1.new(kitsu_chapter, mangadex_chapter) | ||
chapter.create_or_update! | ||
end | ||
end | ||
|
||
private | ||
|
||
def mapped_mangadex_category(category) | ||
category = category.tr(' ', '-').downcase | ||
return CATEGORY_MAPPINGS[category] if CATEGORY_MAPPINGS.key?(category) | ||
category | ||
end | ||
end | ||
end |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should this also be
en_jp
because when saving with Chapter it will fail validation...