From 4a24a5edfeff9387c0695c6bc57b16b0c6b54c33 Mon Sep 17 00:00:00 2001 From: Zach Stultz Date: Tue, 4 Jun 2024 09:50:23 -0500 Subject: [PATCH] v2.5.13 --- komga_cover_extractor.py | 151 +++++++++++++++++++++------------------ 1 file changed, 82 insertions(+), 69 deletions(-) diff --git a/komga_cover_extractor.py b/komga_cover_extractor.py index 89ea266..85d6687 100644 --- a/komga_cover_extractor.py +++ b/komga_cover_extractor.py @@ -46,7 +46,7 @@ import settings as settings_file # Version of the script -script_version = (2, 5, 12) +script_version = (2, 5, 13) script_version_text = "v{}.{}.{}".format(*script_version) # Paths = existing library @@ -590,6 +590,17 @@ def get_sort_key(index_number): return index_number +# Sorts the volumes by the index number if they're all numbers, +# otherwise it sorts the volumes alphabetically by the file name. +def sort_volumes(volumes): + if any(isinstance(item.index_number, str) for item in volumes): + # sort alphabetically by the file name + return sorted(volumes, key=lambda x: x.name) + else: + # sort by the index number + return sorted(volumes, key=lambda x: get_sort_key(x.index_number)) + + # Path Class class Path: def __init__( @@ -1722,7 +1733,7 @@ def send_discord_message( if embeds: # Limit the number of embeds to 10 - for index, embed in enumerate(embeds[:10]): + for index, embed in enumerate(embeds[:10], start=1): if script_version_text: embed.embed.set_footer(text=script_version_text) @@ -1736,9 +1747,7 @@ def send_discord_message( embed.embed.set_image(url=image) elif embed.file: file_name = ( - "cover.jpg" - if len(embeds) == 1 - else f"cover_{index + 1}.jpg" + "cover.jpg" if len(embeds) == 1 else f"cover_{index}.jpg" ) webhook_obj.add_file(file=embed.file, filename=file_name) embed.embed.set_image(url=f"attachment://{file_name}") @@ -2545,6 +2554,7 @@ def get_series_name_from_volume(name, root, test_mode=False, second=False): os.path.basename(root) not in str(download_folders) or not download_folders ) and (os.path.basename(root) not in str(paths) or not paths) + and not contains_keyword(os.path.basename(root)) ): # Get the series namne from the root folder # EX: "Kindaichi 37-sai no Jikenbo -v01-v12-"" -> "Kindaichi 37-sai no Jikenbo" @@ -2694,6 +2704,7 @@ def get_series_name_from_chapter(name, root, chapter_number="", second=False): and not second and root and os.path.basename(root) not in str(download_folders + paths) + and not contains_keyword(os.path.basename(root)) ): root_number = get_release_number_cache(os.path.basename(root)) @@ -3007,12 +3018,12 @@ def get_release_year(name, metadata=None): if match: result = int(re.sub(r"(\(|\[|\{)|(\)|\]|\})", "", match.group())) - if metadata and not result: + if not result and metadata: release_year_from_file = None - if "Year" in metadata: + if "Summary" in metadata and "Year" in metadata: release_year_from_file = metadata["Year"] - elif "dc:date" in metadata: + elif "dc:description" in metadata and "dc:date" in metadata: release_year_from_file = metadata["dc:date"].strip() release_year_from_file = re.search(r"\d{4}", release_year_from_file) release_year_from_file = ( @@ -6027,13 +6038,8 @@ def get_matching_volumes(file, img_volumes): else: volumes = test_mode - # check that all volumes' index numbers aren't strings - if any(isinstance(item.index_number, str) for item in volumes): - # sort alphabetically by the file name - volumes = sorted(volumes, key=lambda x: x.name) - else: - # sort by the index number - volumes = sorted(volumes, key=lambda x: get_sort_key(x.index_number)) + # Sort the volumes + volumes = sort_volumes(volumes) exclude = None @@ -6172,7 +6178,7 @@ def get_matching_volumes(file, img_volumes): # 2 - Use the cached paths if cached_paths: print("\n\tChecking path types...") - for cached_path_index, p in enumerate(cached_paths[:]): + for cached_path_index, p in enumerate(cached_paths[:], start=1): if ( not os.path.exists(p) or not os.path.isdir(p) @@ -6218,7 +6224,7 @@ def get_matching_volumes(file, img_volumes): ) print( - f"\n\t\t-(CACHE)- {cached_path_index+1} of {len(cached_paths)} - " + f"\n\t\t-(CACHE)- {cached_path_index} of {len(cached_paths)} - " f'"{file.name}"\n\t\tCHECKING: {downloaded_file_series_name}\n\t\tAGAINST: {successful_series_name}\n\t\tSCORE: {successful_similarity_score}' ) if successful_similarity_score >= required_similarity_score: @@ -6262,7 +6268,7 @@ def get_matching_volumes(file, img_volumes): directories_found = [] matched_ids = [] - for path_position, path in enumerate(paths): + for path_position, path in enumerate(paths, start=1): if done or not os.path.exists(path) or path in download_folders: continue @@ -6358,7 +6364,7 @@ def get_matching_volumes(file, img_volumes): print(f"\n\tLooking for: {file.series_name}") for dir_position, inner_dir in enumerate( - folder_accessor.dirs + folder_accessor.dirs, start=1 ): if done: break @@ -6380,7 +6386,7 @@ def get_matching_volumes(file, img_volumes): ) print( - f'\n\t\t-(NOT CACHE)- {dir_position+1} of {len(folder_accessor.dirs)} - path {path_position+1} of {len(paths)} - "{file.name}"\n\t\tCHECKING: {downloaded_file_series_name}\n\t\tAGAINST: {existing_series_folder_from_library}\n\t\tSCORE: {similarity_score}' + f'\n\t\t-(NOT CACHE)- {dir_position} of {len(folder_accessor.dirs)} - path {path_position} of {len(paths)} - "{file.name}"\n\t\tCHECKING: {downloaded_file_series_name}\n\t\tAGAINST: {existing_series_folder_from_library}\n\t\tSCORE: {similarity_score}' ) file_root = os.path.join( folder_accessor.root, inner_dir @@ -8240,7 +8246,7 @@ def is_blank_image(image_data): # Returns the highest volume number and volume part number of a release in a list of volume releases @lru_cache(maxsize=None) def get_highest_release(releases, is_chapter_directory=False): - highest_index_number = "" + highest_num = "" if use_latest_volume_cover_as_series_cover and not is_chapter_directory: contains_empty_or_tuple_index_number = any( @@ -8253,16 +8259,16 @@ def get_highest_release(releases, is_chapter_directory=False): number = item if isinstance(number, (int, float)): - if highest_index_number == "" or number > highest_index_number: - highest_index_number = number + if highest_num == "" or number > highest_num: + highest_num = number elif isinstance(number, (tuple, list)): max_number = max(number) - if highest_index_number == "" or max_number > highest_index_number: - highest_index_number = max_number + if highest_num == "" or max_number > highest_num: + highest_num = max_number else: - highest_index_number = max(releases) + highest_num = max(releases) - return highest_index_number + return highest_num # Series covers that have been checked and can be skipped. @@ -8763,40 +8769,48 @@ def filter_series_by_first_word(filtered_series, first_word): ): continue - volumes = upgrade_to_file_class( - [ - f - for f in [ - entry.name - for entry in os.scandir(folder_path) - if entry.is_file() - ] - ], - folder_path, - clean=True, + volumes = upgrade_to_volume_class( + upgrade_to_file_class( + [ + f + for f in [ + entry.name + for entry in os.scandir(folder_path) + if entry.is_file() + ] + ], + folder_path, + clean=True, + ), + skip_release_year=True, + skip_release_group=True, + skip_extras=True, + skip_publisher=True, + skip_premium_content=True, + skip_subtitle=True, + skip_multi_volume=True, ) if not volumes: continue # sort the volumes by name - volumes = sorted(volumes, key=lambda x: x.name) - - volume_one = next( - ( - x - for x in volumes - if x.volume_number == 1 - or ( - isinstance(x.volume_number, list) - and 1 in x.volume_number - ) - ), - None, - ) + volumes = sort_volumes(volumes) - if not volume_one: - continue + volume_one = ( + next( + ( + x + for x in volumes + if x.volume_number == 1 + or ( + isinstance(x.volume_number, list) + and 1 in x.volume_number + ) + ), + None, + ) + ) or volumes[0] # find the image cover cover_path = next( @@ -9373,8 +9387,6 @@ def search_bookwalker( bookwalker_light_novel_category = "&qcat=3" bookwalker_intll_manga_category = "&qcat=11" - start_time = datetime.now() - done = False search_type = type count = 0 @@ -9384,6 +9396,7 @@ def search_bookwalker( search = urllib.parse.quote(query) base_url = "https://global.bookwalker.jp/search/?word=" + chapter_exclusion_url = "&np=1&qnot%5B%5D=Chapter&x=13&y=16" series_only = "&np=0" series_url = f"{base_url}{search}{series_only}" original_similarity_score = required_similarity_score @@ -9404,7 +9417,6 @@ def search_bookwalker( print(f"{keyword}{query}\n\t\tCategory: {category} {series_info}") - chapter_exclusion_url = "&np=1&qnot%5B%5D=Chapter&x=13&y=16" series_page = scrape_url( series_url, cookies=default_cookies, @@ -9425,7 +9437,6 @@ def search_bookwalker( while page_count < total_pages_to_scrape + 1: page_count_url = f"&page={page_count}" - alternate_url = "" url = f"{base_url}{search}{page_count_url}" category = "" @@ -9547,12 +9558,6 @@ def search_bookwalker( ul_tag_box = o_tile_book_info.find("ul", class_="m-tile-tag-box") li_tag_item = ul_tag_box.find_all("li", class_="m-tile-tag") - a_tag_chapter = None - a_tag_simulpub = None - a_tag_manga = None - a_tag_light_novel = None - a_tag_other = None - tag_dict = { "a-tag-manga": None, "a-tag-light-novel": None, @@ -9566,11 +9571,11 @@ def search_bookwalker( if i.find("div", class_=tag_name): tag_dict[tag_name] = i.find("div", class_=tag_name) + a_tag_chapter = tag_dict["a-tag-chapter"] + a_tag_simulpub = tag_dict["a-tag-simulpub"] a_tag_manga = tag_dict["a-tag-manga"] a_tag_light_novel = tag_dict["a-tag-light-novel"] a_tag_other = tag_dict["a-tag-other"] - a_tag_chapter = tag_dict["a-tag-chapter"] - a_tag_simulpub = tag_dict["a-tag-simulpub"] book_type = a_tag_manga or a_tag_light_novel or a_tag_other @@ -9803,7 +9808,6 @@ def search_bookwalker( # find table class="product-detail" product_detail = soup_two.find("table", class_="product-detail") - # print(f"{datetime.now() - start_time}") # find all inside of product-detail product_detail_td = product_detail.find_all("td") @@ -10139,7 +10143,7 @@ def sort_and_log_releases(released, pre_orders): # Remove any paths that are in the download folders list paths_clean = [p for p in paths if p not in download_folders] - for path_index, path in enumerate(paths_clean): + for path_index, path in enumerate(paths_clean, start=1): if not os.path.exists(path): print(f"\n\tPath does not exist: {path}") continue @@ -10165,7 +10169,7 @@ def sort_and_log_releases(released, pre_orders): base_name = os.path.basename(root) print( - f"\n\t[Folder {dir_index} of {len(folders)} - Path {path_index + 1} of {len(paths_clean)}]" + f"\n\t[Folder {dir_index} of {len(folders)} - Path {path_index} of {len(paths_clean)}]" ) print(f"\tPath: {root}") @@ -11208,6 +11212,15 @@ def correct_file_extensions(): print("\t\t\tSkipped") +# Checks if the file string contains a chapter/volume keyword +def contains_keyword(file_string, chapter=False): + return re.search( + rf"(\b({chapter_regex_keywords if chapter else volume_regex_keywords})([-_.]|)(([0-9]+)((([-_.]|)([0-9]+))+|))(\s|{file_extensions_regex}))", + file_string, + re.IGNORECASE, + ) + + # Optional features below, use at your own risk. # Activate them in settings.py def main():