-
Notifications
You must be signed in to change notification settings - Fork 142
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base setup for mangadex import; handling base cases #550
base: the-future
Are you sure you want to change the base?
Changes from 5 commits
65754d8
d9f342b
5553671
0f25337
37450b2
73e8efb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
class MangadexImport | ||
attr_reader :file_location | ||
|
||
LANGUAGES = { | ||
'arabic' => 'ar', | ||
'bulgarian' => 'bg', | ||
'burmese' => 'my', | ||
'catalan' => 'ca', | ||
'chinese (trad)' => 'zh_Hant', | ||
'chinese (simp)' => 'zh_Hans', | ||
'english' => 'en', | ||
'filipino' => 'fil', | ||
'french' => 'fr', | ||
'german' => 'de', | ||
'hungarian' => 'hu', | ||
'indonesian' => 'id_in', | ||
'italian' => 'it', | ||
'japanese' => 'ja_jp', | ||
'korean' => 'ko', | ||
'malay' => 'ms', | ||
'persian' => 'fa', | ||
'polish' => 'pl', | ||
'portuguese (br)' => 'pt_br', | ||
'portuguese (pt)' => 'pt', | ||
'romanian' => 'ro', | ||
'russian' => 'ru', | ||
'spanish' => 'es', | ||
'spanish (es)' => 'es', | ||
'spanish (latem)' => 'es', | ||
'thai' => 'th', | ||
'turkish' => 'tr', | ||
'vietnamese' => 'vi' | ||
}.freeze | ||
|
||
# ideally pass in location here? | ||
def initialize | ||
@file_location = 'tmp/mangadex_import/manga-batch-1-temp.ndjson' | ||
end | ||
|
||
def import! | ||
each_mangadex_entry do |data, name, mal_id| | ||
kitsu_id = kitsu_id_by_mal_id(mal_id) if mal_id.present? | ||
|
||
if kitsu_id.blank? | ||
kitsu_id = kitsu_id_by_name(name) | ||
validate_kitsu_id(kitsu_id) | ||
end | ||
|
||
puts "Kitsu Id: #{kitsu_id}" | ||
|
||
row = Row.new(kitsu_data(kitsu_id), data) | ||
row.create_or_update | ||
end | ||
end | ||
|
||
def each_mangadex_entry | ||
File.foreach(file_location) do |line| | ||
line = JSON.parse(line) | ||
name = line['title']['name'] | ||
mal_id = formatted_mal_id(line.dig('external_links', 'MyAnimeList')) | ||
|
||
yield line, name, mal_id | ||
end | ||
end | ||
|
||
def formatted_mal_id(url) | ||
return nil if url.blank? | ||
|
||
url.split('/').last | ||
end | ||
|
||
def validate_kitsu_id(kitsu_id) | ||
# I am not sure what to really validate this on | ||
# we already know subtype is correct. | ||
end | ||
|
||
private | ||
|
||
def kitsu_id_by_mal_id(mal_id) | ||
Mapping.where( | ||
external_site: 'myanimelist/manga', | ||
external_id: mal_id | ||
).first&.item_id | ||
end | ||
|
||
# TODO: check what this actually returns | ||
def kitsu_id_by_name(name) | ||
Mapping.guess( | ||
'manga', | ||
title: name, | ||
subtype: 'NOT subtype:novel' | ||
) | ||
end | ||
|
||
def kitsu_data(kitsu_id) | ||
Manga.where(id: kitsu_id).first_or_initialize | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
class MangadexImport | ||
class Chapter1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @NuckChorris I am having naming conflicts with ActiveRecord |
||
def initialize(kitsu_chapter, mangadex_chapter) | ||
@kitsu_chapter = kitsu_chapter | ||
@mangadex_chapter = mangadex_chapter | ||
end | ||
|
||
def create_or_update! | ||
@kitsu_chapter.volume ||= mangadex_volume | ||
@kitsu_chapter.titles = mangadex_chapter_titles | ||
|
||
@kitsu_chapter.save | ||
end | ||
|
||
def mangadex_volume | ||
volume_number = @mangadex_chapter['volume'] | ||
return if volume_number.blank? | ||
|
||
Volume.where(manga_id: @kitsu_chapter.manga_id, number: volume_number).first_or_initialize | ||
end | ||
|
||
def mangadex_chapter_titles | ||
kitsu_titles = @kitsu_chapter.titles.compact | ||
|
||
@mangadex_chapter['alt_titles'].compact.each do |title, value| | ||
kitsu_titles[MangadexImport::LANGUAGES[title]] ||= value | ||
end | ||
|
||
puts kitsu_titles | ||
|
||
kitsu_titles | ||
end | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,182 @@ | ||
class MangadexImport | ||
class Row | ||
CATEGORY_MAPPINGS = { | ||
'4-koma' => '', | ||
'award-winning' => '', | ||
'doujinshi' => '', | ||
'drama' => '', | ||
'game' => '', | ||
'isekai' => '', | ||
'medical' => '', | ||
'oneshot' => '', | ||
'sci-fi' => 'science-fiction', | ||
'shoujo' => 'shoujou', | ||
'slice-of-life' => '', | ||
'smut' => '', | ||
'webtoon' => '' | ||
}.freeze | ||
|
||
CATEGORY_SKIPS = %w[[no chapters]].freeze | ||
|
||
attr_reader :kitsu_data, :mangadex_data | ||
|
||
def initialize(kitsu_data, mangadex_data) | ||
@kitsu_data = kitsu_data | ||
@mangadex_data = mangadex_data | ||
end | ||
|
||
def create_or_update | ||
kitsu_generic_fields.each do |kitsu_field| | ||
@kitsu_data[kitsu_field] ||= public_send("mangadex_#{kitsu_field}") | ||
end | ||
|
||
kitsu_custom_fields.each do |kitsu_field| | ||
public_send("mangadex_#{kitsu_field}") | ||
end | ||
|
||
@kitsu_data.save! | ||
end | ||
|
||
def kitsu_generic_fields | ||
%w[ | ||
age_rating canonical_title | ||
end_date original_locale poster_image_file_name | ||
serialization slug start_date synopsis | ||
volume_count | ||
] | ||
end | ||
|
||
def kitsu_custom_fields | ||
%w[ | ||
abbreviated_titles chapter_count titles categories author artist | ||
chapters subtype | ||
] | ||
end | ||
|
||
def mangadex_age_rating | ||
mangadex_data['hentai'] ? 'R18' : nil | ||
end | ||
|
||
def mangadex_abbreviated_titles | ||
kitsu_data.abbreviated_titles ||= [] | ||
kitsu_data.abbreviated_titles.concat(mangadex_data['alt_titles']).uniq! | ||
end | ||
|
||
def mangadex_canonical_title | ||
MangadexImport::LANGUAGES[mangadex_original_locale&.downcase] | ||
end | ||
|
||
def mangadex_chapter_count | ||
# reset kitsu chapter count if at 0 | ||
kitsu_data.chapter_count = nil if kitsu_data.chapter_count&.zero? | ||
# set the variable to 0 if kitsu chapter count is nil | ||
kitsu_chapter_count = kitsu_data.chapter_count || 0 | ||
mangadex_data['total_chapters'] ||= 0 | ||
|
||
@kitsu_data.chapter_count = [kitsu_chapter_count, mangadex_data['total_chapters']].max | ||
end | ||
|
||
def mangadex_original_locale | ||
mangadex_data['title']['origin'] | ||
end | ||
|
||
def mangadex_poster_image_file_name | ||
mangadex_data['thumbnail'] | ||
end | ||
|
||
def mangadex_serialization | ||
nil | ||
end | ||
|
||
def mangadex_slug | ||
mangadex_data['title']['slug'] | ||
end | ||
|
||
def mangadex_subtype | ||
# When using first_or_initialize it sets default to 'novel' | ||
# yeah... idk who thought that would be a good idea in a Manga table.... | ||
# p.s: to the guy who set default to 1... arrays are 0 index based. | ||
@kitsu_data.subtype = 'manga' if kitsu_data.new_record? | ||
end | ||
|
||
# TODO: do we need to sanitize? | ||
def mangadex_synopsis | ||
mangadex_data['description'] | ||
end | ||
|
||
def mangadex_titles | ||
@kitsu_data.titles[kitsu_data.canonical_title] ||= mangadex_data['title']['name'] | ||
end | ||
|
||
def mangadex_volume_count | ||
nil | ||
end | ||
|
||
def mangadex_start_date | ||
nil | ||
end | ||
|
||
def mangadex_end_date | ||
nil | ||
end | ||
|
||
def mangadex_categories | ||
mangadex_data['genre_tags'].each do |category_name| | ||
next if CATEGORY_SKIPS.include?(category_name) | ||
|
||
category_slug = mapped_mangadex_category(category_name) | ||
|
||
# skip if this category already exists in the association array. | ||
next if kitsu_data.categories.find { |cat| cat.slug == category_slug } | ||
|
||
category = Category.find_by(slug: category_slug) | ||
# NOTE: I don't think this works the way I intended. | ||
# I was thinking that we can just do save at the end and it will save | ||
# the associations. | ||
@kitsu_data.categories << category | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This saves right away is what I realized |
||
end | ||
end | ||
|
||
def mangadex_author | ||
author = mangadex_data['author'] | ||
return if author.blank? | ||
|
||
mangadex_staff(author) | ||
end | ||
|
||
def mangadex_artist | ||
artist = mangadex_data['artist'] | ||
return if artist.blank? | ||
|
||
mangadex_staff(artist) | ||
end | ||
|
||
def mangadex_staff(name) | ||
staff = Person.create_with( | ||
canonical_name: 'en', | ||
names: { 'en' => name } | ||
).find_or_create_by(name: name) | ||
|
||
MangaStaff.find_or_create_by(person_id: staff.id, manga_id: kitsu_data.id) | ||
end | ||
|
||
# I am thinking of moving everything chapte related to another class. | ||
def mangadex_chapters | ||
return if mangadex_data['chapters'].blank? | ||
|
||
mangadex_data['chapters'].each do |mangadex_chapter| | ||
kitsu_chapter = kitsu_data.chapters.where(number: mangadex_chapter['chapter']).first_or_initialize | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Line is too long. [106/100] (https://github.com/bbatsov/ruby-style-guide#80-character-limits) |
||
chapter = MangadexImport::Chapter.new(kitsu_chapter, mangadex_chapter) | ||
chapter.create_or_update! | ||
end | ||
end | ||
|
||
private | ||
|
||
def mapped_mangadex_category(category) | ||
category = category.tr(' ', '-').downcase | ||
return CATEGORY_MAPPINGS[category] if CATEGORY_MAPPINGS.key?(category) | ||
category | ||
end | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
{ | ||
"id": 6899, | ||
"slug": "kyou-wa-kaisha-yasumimasu", | ||
"synopsis": "Aoishi Hanae is a 33-year-old woman who has a job, but no boyfriend. In fact, she's also still a virgin. It's not that she never had a chance to lose her virginity, but she let the chance pass by. At a drinking party with her colleagues, Hanae ends up confiding in Tanokura Yuuto, a handsome young 21-year-old part timer at her company who also attends college, and who has recently broken up with his girlfriend.rnrnThe next morning, she wakes up in a hotel next to him, and learns that they apparently agreed to start dating and then had sex that night. She has only hazy memories, and has no idea how to deal with the situation. Yuuto is shocked that she doesn't remember, but hopes that they can still date. How will Hanae, who is older but much less experienced in romance, handle her surprising new relationship with the younger and more experienced Yuuto?rnrn(Source: MangaHelpers)", | ||
"poster_image_file_name": "img_19898787914c27979786892c7a2b160f206771.jpg", | ||
"poster_image_content_type": "image/jpeg", | ||
"poster_image_file_size": 185685, | ||
"poster_image_updated_at": "2017-10-23T07:42:58.421Z", | ||
"cover_image_file_name": "image64.jpg", | ||
"cover_image_content_type": "image/jpeg", | ||
"cover_image_file_size": 29937, | ||
"cover_image_updated_at": "2017-10-23T07:37:14.698Z", | ||
"start_date": "2011-11-28", | ||
"end_date": "2017-01-28", | ||
"serialization": "Cocohana", | ||
"created_at": "2013-12-18T13:52:36.637Z", | ||
"updated_at": "2019-08-11T12:09:34.810Z", | ||
"cover_image_top_offset": 0, | ||
"volume_count": 13, | ||
"chapter_count": 51, | ||
"subtype": "manga", | ||
"average_rating": null, | ||
"rating_frequencies": { | ||
"2": "0", | ||
"3": "0", | ||
"4": "0", | ||
"5": "0", | ||
"6": "0", | ||
"7": "0", | ||
"8": "0", | ||
"9": "0", | ||
"10": "3", | ||
"11": "0", | ||
"12": "3", | ||
"13": "0", | ||
"14": "7", | ||
"15": "0", | ||
"16": "5", | ||
"17": "0", | ||
"18": "1", | ||
"19": "0", | ||
"20": "0" | ||
}, | ||
"titles": { | ||
"en": "Today I Will Take a Break From the Company.", | ||
"en_jp": "Kyou wa Kaisha Yasumimasu.", | ||
"ja_jp": "きょうは会社休みます。" | ||
}, | ||
"canonical_title": "Kyou wa Kaisha Yasumimasu.", | ||
"abbreviated_titles": [ | ||
"I'm Taking off Work Today.", | ||
"Allez-je vais pas au travail aujourd'hui.", | ||
"Today I Will Take a Break From the Company." | ||
], | ||
"user_count": 152, | ||
"popularity_rank": 2969, | ||
"rating_rank": null, | ||
"age_rating": "PG", | ||
"age_rating_guide": "", | ||
"favorites_count": 1, | ||
"cover_image_processing": false, | ||
"tba": "", | ||
"chapter_count_guess": null, | ||
"poster_image_meta": "BAh7CjoNb3JpZ2luYWx7CDoKd2lkdGhpAvcCOgtoZWlnaHRpAk8EOglzaXplnaQNV1QI6CXRpbnl7CDsGaXM7B2kBnDsIaQJgIToKc21hbGx7CDsGaQIcATsHnaQKSATsIaQL3ZjoLbWVkaXVtewg7BmkChgE7B2kCKgI7CGkCV546Cmxhcmdlnewg7BmkCJgI7B2kCDAM7CGkC7uk=n", | ||
"cover_image_meta": "BAh7CToNb3JpZ2luYWx7CDoKd2lkdGhpAmwCOgtoZWlnaHRpAl0BOglzaXplnaQLxdDoJdGlueXsIOwZpAkgDOwdpAcg7CGkCwnY6Cmxhcmdlewg7BmkCIA07nB2kCIAM7CGkDdCABOgpzbWFsbHsIOwZpApAGOwdpApABOwhpAkewn", | ||
"release_schedule": null, | ||
"original_locale": null, | ||
"chapters": [] | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"id":12091,"title":{"name":"Sayonara Sorcier","slug":"sayonara-sorcier","origin":"Japanese"},"related":[],"alt_titles":["Bon au revoir","Elveda Büyücü","Goodbye Sorcier","Sayonara Sorushie","Sorcier","さよならソルシエ","再见了魔法师"],"genre_tags":["Drama","Historical","[no chapters]"],"thumbnail":"https://mangadex.org/images/manga/12091.jpg","publish_status":"completed","demographic":"Josei","author":"Hozumi","artist":"Hozumi","description":"In the late 19th century in Paris, Theodorus van Gogh, famous art dealer in Paris and the branch manager of the prestigious Goupil & Cie patroned exclusively by Bourgeoisie clients, seeks to embrace new art talents and techniques. However, the period is full of the prestigious and conservative who think that art belongs solely to the upper echelon of the society whereas commoners are considered as unable to appreciate art.\r\n\r\nStating that "Destroying the system from within is more interesting", Thedorus struggles to overcome the obstacle by bring forth works which depict the truth and daily lives of people which are not acknowledged by the academy.\r\n\r\nRank #1 in Kono Manga ga Sugoi 2014 for Female Voters.","total_chapters":3,"hentai":false,"external_links":{"MangaUpdates":"https://www.mangaupdates.com/series.html?id=82421","MyAnimeList":"https://myanimelist.net/manga/59191"},"chapters":[]} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should this also be
en_jp
because when saving with Chapter it will fail validation...