Skip to content

Commit

Permalink
updated some of the broken extractors
Browse files Browse the repository at this point in the history
  • Loading branch information
gan-of-culture committed Jan 31, 2024
1 parent 247a801 commit 70bfd34
Show file tree
Hide file tree
Showing 17 changed files with 283 additions and 50 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,4 @@ media
*token
*exhentai_test.go
*muchohentai_test.go
*webdriver
get-sauce*
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ The following links will direct you to adult content. Please keep that in mind!
| [booruproject (ex. rule34, gelbooru)](https://booru.org/top) | :heavy_check_mark: |:heavy_check_mark:|
| [booru.io](https://booru.io/) | :heavy_check_mark: | ? |
| [comicporn.xxx](https://comicporn.xxx) | :heavy_check_mark: | ? |
| [danbooru.donmai.us](https://danbooru.donmai.us) | :heavy_check_mark: | ? |
| [danbooru.donmai.us](https://danbooru.donmai.us) | :heavy_check_mark: | ? |:car:|
| [doujin.sexy](https://doujin.sexy) | :heavy_check_mark: | ? |
| [e-hentai.org](http://e-hentai.org/) | :heavy_check_mark: | ? |
| [exhentai.org](http://exhentai.org/) | :heavy_check_mark: | ? |:closed_lock_with_key:|
Expand Down Expand Up @@ -335,6 +335,9 @@ user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,
get-sauce -un "MyUserName" -up "MyUserPassword" http...
```

🚗
--> requires [geckodriver](https://github.com/mozilla/geckodriver) to workaround DDOS protection

## Credit

- Thanks to [lux](https://github.com/iawia002/lux) for the great template
Expand Down
2 changes: 1 addition & 1 deletion downloader/downloader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func TestDownload(t *testing.T) {
Type: static.DataTypeImage,
URLs: []*static.URL{
{
URL: "https://lotus.paheal.net/_images/cf21c36b64db166b1e1aac9f3243d3ec/4698365%20-%20Ahri%20Cian_Yo%20League_of_Legends.jpg",
URL: "https://r34i.paheal-cdn.net/cf/21/cf21c36b64db166b1e1aac9f3243d3ec",
Ext: "jpg",
},
},
Expand Down
33 changes: 19 additions & 14 deletions extractors/danbooru/danbooru.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
package danbooru

import (
"errors"
"fmt"
"log"
"regexp"
"strings"

"github.com/gan-of-culture/get-sauce/config"
"github.com/gan-of-culture/get-sauce/request"
"github.com/gan-of-culture/get-sauce/request/webdriver"
"github.com/gan-of-culture/get-sauce/static"
"github.com/gan-of-culture/get-sauce/utils"
)
Expand All @@ -25,10 +25,7 @@ func New() static.Extractor {

// Extract for danbooru pages
func (e *extractor) Extract(URL string) ([]*static.Data, error) {
config.FakeHeaders["User-Agent"] = "Mozilla/4.0 (compatible; MSIE 9.0; Windows NT 6.1)"
defer func() {
config.FakeHeaders["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36"
}()
config.FakeHeaders["User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0"

posts, err := parseURL(URL)
if err != nil {
Expand Down Expand Up @@ -63,7 +60,13 @@ func parseURL(URL string) ([]string, error) {
return []string{linkToPost}, nil
}

htmlString, err := request.Get(URL)
wd, err := webdriver.New()
if err != nil {
return nil, err
}
defer wd.Close()

htmlString, err := wd.Get(URL)
if err != nil {
return nil, err
}
Expand All @@ -80,22 +83,24 @@ func parseURL(URL string) ([]string, error) {
}

func extractData(postURL string) (*static.Data, error) {
htmlString, err := request.Get(postURL)
wd, err := webdriver.New()
if err != nil {
return nil, err
}
defer wd.Close()

htmlString, err := wd.Get(postURL)
if err != nil {
return nil, err
}

matchedImgData := reIMGData.FindStringSubmatch(htmlString)
if len(matchedImgData) != 5 {
log.Println(htmlString)
return nil, static.ErrDataSourceParseFailed
}
// [1] = img original width [2] image original height [3] image name [4] src URL

size, err := request.Size(matchedImgData[4], postURL)
if err != nil {
return nil, errors.New("no image size not found")
}

return &static.Data{
Site: site,
Title: matchedImgData[3],
Expand All @@ -110,7 +115,7 @@ func extractData(postURL string) (*static.Data, error) {
},
},
Quality: fmt.Sprintf("%s x %s", matchedImgData[1], matchedImgData[2]),
Size: size,
Size: 0,
},
},
URL: postURL,
Expand Down
2 changes: 1 addition & 1 deletion extractors/danbooru/danbooru_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ func TestExtract(t *testing.T) {
URL: "https://danbooru.donmai.us/posts/3749687",
Title: "konpaku youmu and konpaku youmu (touhou) drawn by niwashi_(yuyu)",
Quality: "1782 x 2048",
Size: 157584,
Size: 0,
},
},
}
Expand Down
4 changes: 3 additions & 1 deletion extractors/extractors.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ func init() {
ninehentaiExtractor := ninehentai.New()
vravenExtractor := vraven.New()
nhgroupExtractor := nhgroup.New()
iwaraExtractor := iwara.New()

extractorsMap = map[string]static.Extractor{
"": universal.New(),
Expand Down Expand Up @@ -93,7 +94,8 @@ func init() {
"hentaizap.com": htdoujinExtractor,
"hitomi.la": hitomi.New(),
"imhentai.xxx": htdoujinExtractor,
"iwara.tv": iwara.New(),
"iwara.tv": iwaraExtractor,
"www.iwara.tv": iwaraExtractor,
"latesthentai.com": nhgroupExtractor,
"miohentai.com": miohentai.New(),
"muchohentai.com": muchohentai.New(),
Expand Down
2 changes: 1 addition & 1 deletion extractors/hentaimoon/hentaimoon.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ func extractData(URL string) (*static.Data, error) {
}

data[0].Site = site
data[0].Title = utils.GetH1(&htmlString, -1)
data[0].Title = utils.GetH1(&htmlString, 0)

matchedSubtitleURL := reSubtitles.FindString(htmlString)
if matchedSubtitleURL != "" {
Expand Down
9 changes: 2 additions & 7 deletions extractors/hentaipulse/hentaipulse_test.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
package hentaipulse

import (
"testing"

"github.com/gan-of-culture/get-sauce/test"
)

/*
func TestParseURL(t *testing.T) {
tests := []struct {
Name string
Expand Down Expand Up @@ -54,4 +49,4 @@ func TestExtract(t *testing.T) {
test.Check(t, tt.Args, data[0])
})
}
}
}*/
9 changes: 0 additions & 9 deletions extractors/hstream/hstream_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,6 @@ func TestExtract(t *testing.T) {
Size: 804660690,
},
},
{
Name: "Single Episode 4k legacy",
Args: test.Args{
URL: "https://hstream.moe/hentai/aku-no-onna-kanbu-full-moon-night-r-1",
Title: "Aku no Onna Kanbu: Full Moon Night R - 1",
Quality: "av1.2160p.webm",
Size: 397722506,
},
},
{
Name: "Single Episode",
Args: test.Args{
Expand Down
4 changes: 2 additions & 2 deletions extractors/iwara/iwara_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ func TestExtract(t *testing.T) {
Args: test.Args{
URL: "https://iwara.tv/image/x6hVrNaf0WVdLE/nico-tomoare-provocation-dance-preview-mmdd",
Title: "【コイカツ】 Nico Thick Tomoare Provocation Dance Preview 【MMDD】",
Quality: "1280x720",
Size: 294562,
Quality: "1920x1080",
Size: 454545,
},
},
}
Expand Down
2 changes: 1 addition & 1 deletion extractors/kvsplayer/kvsplayer.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ func ExtractFromHTML(htmlString *string) ([]*static.Data, error) {
}

switch matchedKVSPlayer[0][2] {
case "4", "5", "8", "9", "11", "12", "13", "15":
case "4", "5", "6", "8", "9", "11", "12", "13", "15":
break
default:
fmt.Printf("Untested major version (%s) in player engine--Download may fail.", matchedKVSPlayer[0][2])
Expand Down
14 changes: 9 additions & 5 deletions extractors/rule34/rule34.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ var reParsePostID = regexp.MustCompile(`data-post-id=["']([^"']+)`)
var rePostID = regexp.MustCompile(`[0-9]{3,}`)
var reSourceURL = regexp.MustCompile(`id='main_image' src='([^']+)`)
var reVideoSourceURL = regexp.MustCompile(`<source src='([^']+)`)
var reTagBox = regexp.MustCompile(`tag_edit__tags' value='([^']+)`)
var reTagBox = regexp.MustCompile(`tag'[^>]+>([^<]+)`)
var reQuality = regexp.MustCompile(`data-(width|height)='([0-9]+)`)
var reVideoQuality = regexp.MustCompile(`id='main_image'.+\n[^0-9]+([0-9]+)[^0-9]+([0-9]+)`)
var reVideoQuality = regexp.MustCompile(`id='main_image'.+?: ([0-9]+)[^0-9]+([0-9]+)`)

type extractor struct{}

Expand Down Expand Up @@ -107,12 +107,16 @@ func extractData(URL string) (*static.Data, error) {

postSrcURL := matchedPostSrcURL[1]

matchedTagBox := reTagBox.FindStringSubmatch(htmlString)
if len(matchedTagBox) != 2 {
matchedTagBox := reTagBox.FindAllStringSubmatch(htmlString, -1)
if len(matchedTagBox) < 1 {
return nil, errors.New("couldn't extract tags for post")
}
tags := []string{}
for _, tag := range matchedTagBox {
tags = append(tags, tag[1])
}

title := fmt.Sprintf("%s %s", matchedTagBox[1], id[0])
title := fmt.Sprintf("%s %s", strings.Join(tags, " "), id[0])

var size int64
if config.Amount == 0 {
Expand Down
4 changes: 2 additions & 2 deletions extractors/rule34/rule34_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ func TestExtract(t *testing.T) {
Name: "Single video",
Args: test.Args{
URL: "https://rule34.paheal.net/post/view/3464181",
Title: "animated audiodude blender Hv54rDSL Nier_(series) Nier_Automata sound webm YoRHa_No.2_Type_B 3464181",
Title: "Hv54rDSL Nier_(series) Nier_Automata YoRHa_No.2_Type_B animated audiodude blender sound webm 3464181",
Quality: "540 x 1280",
Size: 7503936,
},
Expand All @@ -76,7 +76,7 @@ func TestExtract(t *testing.T) {
Name: "Single GIF",
Args: test.Args{
URL: "https://rule34.paheal.net/post/view/3461411",
Title: "animated blood_elf World_of_Warcraft 3461411",
Title: "World_of_Warcraft animated blood_elf 3461411",
Quality: "480 x 854",
Size: 7811055,
},
Expand Down
2 changes: 1 addition & 1 deletion extractors/rule34video/rule34video.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func parseURL(URL string) []string {
return []string{URL}
}

re := regexp.MustCompile(site + `videos/\d+/[^"]+`)
re := regexp.MustCompile(site + `video/\d+/[^?"]+`)
return re.FindAllString(htmlString, -1)
}

Expand Down
Loading

0 comments on commit 70bfd34

Please sign in to comment.