diff --git a/docs/supportedsites.md b/docs/supportedsites.md index bfe280bb55..6e47db38cf 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -160,7 +160,7 @@ Consider all listed sites to potentially be NSFW. Civitai https://www.civitai.com/ - Models, Search Results, Tag Searches, User Profiles + individual Images, Models, Search Results, Tag Searches (Images), Tag Searches (Models), User Profiles, User Images, User Models diff --git a/gallery_dl/extractor/civitai.py b/gallery_dl/extractor/civitai.py index 6787724d12..30f7cff278 100644 --- a/gallery_dl/extractor/civitai.py +++ b/gallery_dl/extractor/civitai.py @@ -10,26 +10,56 @@ from .common import Extractor, Message from .. import text +import functools import itertools import re BASE_PATTERN = r"(?:https?://)?civitai\.com" +USER_PATTERN = BASE_PATTERN + r"/user/([^/?#]+)" class CivitaiExtractor(Extractor): """Base class for civitai extractors""" category = "civitai" root = "https://civitai.com" + directory_fmt = ("{category}", "{username}", "images") + filename_fmt = "{id}.{extension}" + archive_fmt = "{hash}" request_interval = (0.5, 1.5) def _init(self): self.api = CivitaiAPI(self) def items(self): - data = {"_extractor": CivitaiModelExtractor} - for model in self.models(): - url = "{}/models/{}".format(self.root, model["id"]) - yield Message.Queue, url, data + models = self.models() + if models: + data = {"_extractor": CivitaiModelExtractor} + for model in models: + url = "{}/models/{}".format(self.root, model["id"]) + yield Message.Queue, url, data + return + + images = self.images() + if images: + for image in images: + url = self._orig(image["url"]) + image["date"] = text.parse_datetime( + image["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ") + text.nameext_from_url(url, image) + yield Message.Directory, image + yield Message.Url, url, image + return + + def models(self): + return () + + def images(self): + return () + + def _orig(self, url): + sub_width = functools.partial(re.compile(r"/width=\d*/").sub, "/w/") + CivitaiExtractor._orig = sub_width + return sub_width(url) class CivitaiModelExtractor(CivitaiExtractor): @@ -43,7 +73,6 @@ class CivitaiModelExtractor(CivitaiExtractor): def items(self): model_id, version_id = self.groups - self._sub = re.compile(r"/width=\d*/").sub model = self.api.model(model_id) creator = model["creator"] @@ -114,7 +143,7 @@ def _extract_files_image(self, model, version): text.nameext_from_url(file["url"], { "num" : num, "file": file, - "url" : self._sub("/w/", file["url"]), + "url" : self._orig(file["url"]), }) for num, file in enumerate(version["images"], 1) ] @@ -129,7 +158,7 @@ def _extract_files_gallery(self, model, version): yield text.nameext_from_url(file["url"], { "num" : num, "file": file, - "url" : self._sub("/w/", file["url"]), + "url" : self._orig(file["url"]), }) def _validate_file_model(self, response): @@ -146,9 +175,18 @@ def _validate_file_model(self, response): return True -class CivitaiTagExtractor(CivitaiExtractor): - subcategory = "tag" - pattern = BASE_PATTERN + r"/tag/([^?/#]+)" +class CivitaiImageExtractor(CivitaiExtractor): + subcategory = "image" + pattern = BASE_PATTERN + r"/images/(\d+)" + example = "https://civitai.com/images/12345" + + def images(self): + return self.api.images({"imageId": self.groups[0]}) + + +class CivitaiTagModelsExtractor(CivitaiExtractor): + subcategory = "tag-models" + pattern = BASE_PATTERN + r"/(?:tag/|models\?tag=)([^/?&#]+)" example = "https://civitai.com/tag/TAG" def models(self): @@ -156,6 +194,16 @@ def models(self): return self.api.models({"tag": tag}) +class CivitaiTagImagesExtractor(CivitaiExtractor): + subcategory = "tag-images" + pattern = BASE_PATTERN + r"/images\?tags=([^&#]+)" + example = "https://civitai.com/images?tags=12345" + + def images(self): + tag = text.unquote(self.groups[0]) + return self.api.images({"tag": tag}) + + class CivitaiSearchExtractor(CivitaiExtractor): subcategory = "search" pattern = BASE_PATTERN + r"/search/models\?([^#]+)" @@ -168,14 +216,42 @@ def models(self): class CivitaiUserExtractor(CivitaiExtractor): subcategory = "user" - pattern = BASE_PATTERN + r"/user/([^/?#]+)(?:/models)?/?(?:$|\?|#)" + pattern = USER_PATTERN + r"/?(?:$|\?|#)" + example = "https://civitai.com/user/USER" + + def initialize(self): + pass + + def items(self): + base = "{}/user/{}/".format(self.root, self.groups[0]) + return self._dispatch_extractors(( + (CivitaiUserModelsExtractor, base + "models"), + (CivitaiUserImagesExtractor, base + "images"), + ), ("user-models", "user-images")) + + +class CivitaiUserModelsExtractor(CivitaiExtractor): + subcategory = "user-models" + pattern = USER_PATTERN + r"/models/?(?:\?([^#]+))?" example = "https://civitai.com/user/USER/models" def models(self): - params = {"username": text.unquote(self.groups[0])} + params = text.parse_query(self.groups[1]) + params["username"] = text.unquote(self.groups[0]) return self.api.models(params) +class CivitaiUserImagesExtractor(CivitaiExtractor): + subcategory = "user-images" + pattern = USER_PATTERN + r"/images/?(?:\?([^#]+))?" + example = "https://civitai.com/user/USER/images" + + def images(self): + params = text.parse_query(self.groups[1]) + params["username"] = text.unquote(self.groups[0]) + return self.api.images(params) + + class CivitaiAPI(): """Interface for the Civitai Public REST API diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 764a6f9dde..c5bf3d7c16 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -194,6 +194,12 @@ "bluesky": { "posts": "", }, + "civitai": { + "tag-models": "Tag Searches (Models)", + "tag-images": "Tag Searches (Images)", + "user-models": "User Models", + "user-images": "User Images", + }, "coomerparty": { "discord" : "", "discord-server": "", diff --git a/test/results/civitai.py b/test/results/civitai.py index c28f4fc575..2470c4db81 100644 --- a/test/results/civitai.py +++ b/test/results/civitai.py @@ -10,7 +10,6 @@ __tests__ = ( { "#url" : "https://civitai.com/models/703211/maid-classic", - "#category": ("", "civitai", "model"), "#class" : civitai.CivitaiModelExtractor, "#urls" : [ "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/5c4efa68-bb58-47c5-a716-98cd0f51f047/w/26962950.jpeg", @@ -49,7 +48,6 @@ { "#url" : "https://civitai.com/models/703211?modelVersionId=786644", - "#category": ("", "civitai", "model"), "#class" : civitai.CivitaiModelExtractor, "#urls" : [ "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/52b6efa7-801c-4901-90b4-fa3964d23480/w/26887862.jpeg", @@ -88,4 +86,77 @@ "num" : range(1, 3), }, +{ + "#url" : "https://civitai.com/images/26962948", + "#class" : civitai.CivitaiImageExtractor, + "#urls" : "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/69bf3279-df2c-4ec8-b795-479e9cd3db1b/w/69bf3279-df2c-4ec8-b795-479e9cd3db1b.jpeg", + "#sha1_content": "a9a9d08f5fcdbc1e1eec7f203717f9df97b7a671", + + "browsingLevel": 1, + "createdAt": "2024-08-31T01:11:47.021Z", + "date" : "dt:2024-08-31 01:11:47", + "extension": "jpeg", + "filename" : "69bf3279-df2c-4ec8-b795-479e9cd3db1b", + "hash" : "ULN0-w?b4nRjxGM{-;t7M_t7NGae~qRjMyt7", + "height" : 1536, + "id" : 26962948, + "meta": { + "Denoising strength": "0.4", + "Model": "boleromix_XL_V1.3", + "Model hash": "afaf521da2", + "Size": "1152x1536", + "TI hashes": { + "negativeXL_D": "fff5d51ab655" + }, + "Tiled Diffusion scale factor": "1.5", + "Tiled Diffusion upscaler": "R-ESRGAN 4x+ Anime6B", + "VAE": "sdxl_vae.safetensors", + "Version": "v1.7.0", + "cfgScale": 7, + "hashes": { + "lora:add-detail-xl": "9c783c8ce46c", + "lora:classic maid_XL_V1.0": "e8f6e4297112", + "model": "afaf521da2", + "vae": "735e4c3a44", + }, + "negativePrompt": "negativeXL_D,(worst quality,extra legs,extra arms,extra ears,bad fingers,extra fingers,bad anatomy, missing fingers, lowres,username, artist name, text,pubic hair,bar censor,censored,multipul angle,split view,realistic,3D:1)", + "prompt": "masterpiece,ultra-detailed,best quality,8K,illustration,cute face,clean skin ,shiny hair,girl,ultra-detailed-eyes,simple background, maid, maid apron, maid headdress, long sleeves,tray,tea,cup,skirt lift", + "resources": [ + { + "hash": "9c783c8ce46c", + "name": "add-detail-xl", + "type": "lora", + "weight": 1, + }, + { + "hash": "e8f6e4297112", + "name": "classic maid_XL_V1.0", + "type": "lora", + }, + { + "hash": "afaf521da2", + "name": "boleromix_XL_V1.3", + "type": "model", + }, + ], + "sampler": "DPM++ 2M Karras", + "seed": 3150861441, + "steps": 20, + }, + "nsfw": False, + "nsfwLevel": "None", + "postId": 6030721, + "stats": { + "commentCount": int, + "cryCount" : int, + "dislikeCount": int, + "heartCount" : int, + "laughCount" : int, + "likeCount" : int, + }, + "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/69bf3279-df2c-4ec8-b795-479e9cd3db1b/width=1152/69bf3279-df2c-4ec8-b795-479e9cd3db1b.jpeg", + "username": "bolero537", + "width": 1152, +}, + )