Skip to content

Commit

Permalink
[civitai] add 'image' extractors (#3706, #6220)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed Sep 23, 2024
1 parent 933dc56 commit 92bbb9b
Show file tree
Hide file tree
Showing 4 changed files with 168 additions and 15 deletions.
2 changes: 1 addition & 1 deletion docs/supportedsites.md
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ Consider all listed sites to potentially be NSFW.
<tr>
<td>Civitai</td>
<td>https://www.civitai.com/</td>
<td>Models, Search Results, Tag Searches, User Profiles</td>
<td>individual Images, Models, Search Results, Tag Searches (Images), Tag Searches (Models), User Profiles, User Images, User Models</td>
<td></td>
</tr>
<tr>
Expand Down
100 changes: 88 additions & 12 deletions gallery_dl/extractor/civitai.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,26 +10,56 @@

from .common import Extractor, Message
from .. import text
import functools
import itertools
import re

BASE_PATTERN = r"(?:https?://)?civitai\.com"
USER_PATTERN = BASE_PATTERN + r"/user/([^/?#]+)"


class CivitaiExtractor(Extractor):
"""Base class for civitai extractors"""
category = "civitai"
root = "https://civitai.com"
directory_fmt = ("{category}", "{username}", "images")
filename_fmt = "{id}.{extension}"
archive_fmt = "{hash}"
request_interval = (0.5, 1.5)

def _init(self):
self.api = CivitaiAPI(self)

def items(self):
data = {"_extractor": CivitaiModelExtractor}
for model in self.models():
url = "{}/models/{}".format(self.root, model["id"])
yield Message.Queue, url, data
models = self.models()
if models:
data = {"_extractor": CivitaiModelExtractor}
for model in models:
url = "{}/models/{}".format(self.root, model["id"])
yield Message.Queue, url, data
return

images = self.images()
if images:
for image in images:
url = self._orig(image["url"])
image["date"] = text.parse_datetime(
image["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
text.nameext_from_url(url, image)
yield Message.Directory, image
yield Message.Url, url, image
return

def models(self):
return ()

def images(self):
return ()

def _orig(self, url):
sub_width = functools.partial(re.compile(r"/width=\d*/").sub, "/w/")
CivitaiExtractor._orig = sub_width
return sub_width(url)


class CivitaiModelExtractor(CivitaiExtractor):
Expand All @@ -43,7 +73,6 @@ class CivitaiModelExtractor(CivitaiExtractor):

def items(self):
model_id, version_id = self.groups
self._sub = re.compile(r"/width=\d*/").sub

model = self.api.model(model_id)
creator = model["creator"]
Expand Down Expand Up @@ -114,7 +143,7 @@ def _extract_files_image(self, model, version):
text.nameext_from_url(file["url"], {
"num" : num,
"file": file,
"url" : self._sub("/w/", file["url"]),
"url" : self._orig(file["url"]),
})
for num, file in enumerate(version["images"], 1)
]
Expand All @@ -129,7 +158,7 @@ def _extract_files_gallery(self, model, version):
yield text.nameext_from_url(file["url"], {
"num" : num,
"file": file,
"url" : self._sub("/w/", file["url"]),
"url" : self._orig(file["url"]),
})

def _validate_file_model(self, response):
Expand All @@ -146,16 +175,35 @@ def _validate_file_model(self, response):
return True


class CivitaiTagExtractor(CivitaiExtractor):
subcategory = "tag"
pattern = BASE_PATTERN + r"/tag/([^?/#]+)"
class CivitaiImageExtractor(CivitaiExtractor):
subcategory = "image"
pattern = BASE_PATTERN + r"/images/(\d+)"
example = "https://civitai.com/images/12345"

def images(self):
return self.api.images({"imageId": self.groups[0]})


class CivitaiTagModelsExtractor(CivitaiExtractor):
subcategory = "tag-models"
pattern = BASE_PATTERN + r"/(?:tag/|models\?tag=)([^/?&#]+)"
example = "https://civitai.com/tag/TAG"

def models(self):
tag = text.unquote(self.groups[0])
return self.api.models({"tag": tag})


class CivitaiTagImagesExtractor(CivitaiExtractor):
subcategory = "tag-images"
pattern = BASE_PATTERN + r"/images\?tags=([^&#]+)"
example = "https://civitai.com/images?tags=12345"

def images(self):
tag = text.unquote(self.groups[0])
return self.api.images({"tag": tag})


class CivitaiSearchExtractor(CivitaiExtractor):
subcategory = "search"
pattern = BASE_PATTERN + r"/search/models\?([^#]+)"
Expand All @@ -168,14 +216,42 @@ def models(self):

class CivitaiUserExtractor(CivitaiExtractor):
subcategory = "user"
pattern = BASE_PATTERN + r"/user/([^/?#]+)(?:/models)?/?(?:$|\?|#)"
pattern = USER_PATTERN + r"/?(?:$|\?|#)"
example = "https://civitai.com/user/USER"

def initialize(self):
pass

def items(self):
base = "{}/user/{}/".format(self.root, self.groups[0])
return self._dispatch_extractors((
(CivitaiUserModelsExtractor, base + "models"),
(CivitaiUserImagesExtractor, base + "images"),
), ("user-models", "user-images"))


class CivitaiUserModelsExtractor(CivitaiExtractor):
subcategory = "user-models"
pattern = USER_PATTERN + r"/models/?(?:\?([^#]+))?"
example = "https://civitai.com/user/USER/models"

def models(self):
params = {"username": text.unquote(self.groups[0])}
params = text.parse_query(self.groups[1])
params["username"] = text.unquote(self.groups[0])
return self.api.models(params)


class CivitaiUserImagesExtractor(CivitaiExtractor):
subcategory = "user-images"
pattern = USER_PATTERN + r"/images/?(?:\?([^#]+))?"
example = "https://civitai.com/user/USER/images"

def images(self):
params = text.parse_query(self.groups[1])
params["username"] = text.unquote(self.groups[0])
return self.api.images(params)


class CivitaiAPI():
"""Interface for the Civitai Public REST API
Expand Down
6 changes: 6 additions & 0 deletions scripts/supportedsites.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,12 @@
"bluesky": {
"posts": "",
},
"civitai": {
"tag-models": "Tag Searches (Models)",
"tag-images": "Tag Searches (Images)",
"user-models": "User Models",
"user-images": "User Images",
},
"coomerparty": {
"discord" : "",
"discord-server": "",
Expand Down
75 changes: 73 additions & 2 deletions test/results/civitai.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
__tests__ = (
{
"#url" : "https://civitai.com/models/703211/maid-classic",
"#category": ("", "civitai", "model"),
"#class" : civitai.CivitaiModelExtractor,
"#urls" : [
"https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/5c4efa68-bb58-47c5-a716-98cd0f51f047/w/26962950.jpeg",
Expand Down Expand Up @@ -49,7 +48,6 @@

{
"#url" : "https://civitai.com/models/703211?modelVersionId=786644",
"#category": ("", "civitai", "model"),
"#class" : civitai.CivitaiModelExtractor,
"#urls" : [
"https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/52b6efa7-801c-4901-90b4-fa3964d23480/w/26887862.jpeg",
Expand Down Expand Up @@ -88,4 +86,77 @@
"num" : range(1, 3),
},

{
"#url" : "https://civitai.com/images/26962948",
"#class" : civitai.CivitaiImageExtractor,
"#urls" : "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/69bf3279-df2c-4ec8-b795-479e9cd3db1b/w/69bf3279-df2c-4ec8-b795-479e9cd3db1b.jpeg",
"#sha1_content": "a9a9d08f5fcdbc1e1eec7f203717f9df97b7a671",

"browsingLevel": 1,
"createdAt": "2024-08-31T01:11:47.021Z",
"date" : "dt:2024-08-31 01:11:47",
"extension": "jpeg",
"filename" : "69bf3279-df2c-4ec8-b795-479e9cd3db1b",
"hash" : "ULN0-w?b4nRjxGM{-;t7M_t7NGae~qRjMyt7",
"height" : 1536,
"id" : 26962948,
"meta": {
"Denoising strength": "0.4",
"Model": "boleromix_XL_V1.3",
"Model hash": "afaf521da2",
"Size": "1152x1536",
"TI hashes": {
"negativeXL_D": "fff5d51ab655"
},
"Tiled Diffusion scale factor": "1.5",
"Tiled Diffusion upscaler": "R-ESRGAN 4x+ Anime6B",
"VAE": "sdxl_vae.safetensors",
"Version": "v1.7.0",
"cfgScale": 7,
"hashes": {
"lora:add-detail-xl": "9c783c8ce46c",
"lora:classic maid_XL_V1.0": "e8f6e4297112",
"model": "afaf521da2",
"vae": "735e4c3a44",
},
"negativePrompt": "negativeXL_D,(worst quality,extra legs,extra arms,extra ears,bad fingers,extra fingers,bad anatomy, missing fingers, lowres,username, artist name, text,pubic hair,bar censor,censored,multipul angle,split view,realistic,3D:1)",
"prompt": "masterpiece,ultra-detailed,best quality,8K,illustration,cute face,clean skin ,shiny hair,girl,ultra-detailed-eyes,simple background, <lora:add-detail-xl:1> <lora:classic maid_XL_V1.0:1> maid, maid apron, maid headdress, long sleeves,tray,tea,cup,skirt lift",
"resources": [
{
"hash": "9c783c8ce46c",
"name": "add-detail-xl",
"type": "lora",
"weight": 1,
},
{
"hash": "e8f6e4297112",
"name": "classic maid_XL_V1.0",
"type": "lora",
},
{
"hash": "afaf521da2",
"name": "boleromix_XL_V1.3",
"type": "model",
},
],
"sampler": "DPM++ 2M Karras",
"seed": 3150861441,
"steps": 20,
},
"nsfw": False,
"nsfwLevel": "None",
"postId": 6030721,
"stats": {
"commentCount": int,
"cryCount" : int,
"dislikeCount": int,
"heartCount" : int,
"laughCount" : int,
"likeCount" : int,
},
"url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/69bf3279-df2c-4ec8-b795-479e9cd3db1b/width=1152/69bf3279-df2c-4ec8-b795-479e9cd3db1b.jpeg",
"username": "bolero537",
"width": 1152,
},

)

0 comments on commit 92bbb9b

Please sign in to comment.