From 2e610b3fd79e1ac0945b694385edd88028f821ce Mon Sep 17 00:00:00 2001 From: theblackturtle Date: Tue, 29 Dec 2020 08:50:03 +0700 Subject: [PATCH] fix output --- core/crawler.go | 22 +++++++++-------- core/linkfinder.go | 38 ++++++++++++++--------------- core/utils.go | 59 +++++++++++++++++++++++++--------------------- core/utils_test.go | 12 +++++++--- 4 files changed, 72 insertions(+), 59 deletions(-) diff --git a/core/crawler.go b/core/crawler.go index 0525b38..0f0a2f2 100644 --- a/core/crawler.go +++ b/core/crawler.go @@ -443,17 +443,19 @@ func (crawler *Crawler) setupLinkFinder() { } for _, relPath := range paths { // JS Regex Result - outputFormat := fmt.Sprintf("[linkfinder] - [from: %s] - %s", response.Request.URL.String(), relPath) if !crawler.quiet { - outputFormat = fmt.Sprintf("%s%s", response.Request.URL.String(), relPath) + outputFormat := fmt.Sprintf("[linkfinder] - [from: %s] - %s", response.Request.URL.String(), relPath) fmt.Println(outputFormat) } - urlWithMainSite := FixUrl(crawler.site, relPath) - if urlWithMainSite != "" { - outputFormat = fmt.Sprintf("[linkfinder] - %s", urlWithMainSite) - if !crawler.quiet { - fmt.Println(outputFormat) - } + + rebuildURL := FixUrl(crawler.site, relPath) + if rebuildURL == "" { + continue + } + + outputFormat := fmt.Sprintf("[linkfinder] - %s", rebuildURL) + if !crawler.quiet { + fmt.Println(outputFormat) } if crawler.Output != nil { @@ -463,8 +465,8 @@ func (crawler *Crawler) setupLinkFinder() { // Try to request JS path // Try to generate URLs with main site - if urlWithMainSite != "" { - _ = crawler.C.Visit(urlWithMainSite) + if rebuildURL != "" { + _ = crawler.C.Visit(rebuildURL) } // Try to generate URLs with the site where Javascript file host in (must be in main or sub domain) diff --git a/core/linkfinder.go b/core/linkfinder.go index 680953b..1eba998 100644 --- a/core/linkfinder.go +++ b/core/linkfinder.go @@ -1,29 +1,29 @@ package core import ( - "regexp" - "strings" + "regexp" + "strings" ) var linkFinderRegex = regexp.MustCompile(`(?:"|')(((?:[a-zA-Z]{1,10}://|//)[^"'/]{1,}\.[a-zA-Z]{2,}[^"']{0,})|((?:/|\.\./|\./)[^"'><,;| *()(%%$^/\\\[\]][^"'><,;|()]{1,})|([a-zA-Z0-9_\-/]{1,}/[a-zA-Z0-9_\-/]{1,}\.(?:[a-zA-Z]{1,4}|action)(?:[\?|#][^"|']{0,}|))|([a-zA-Z0-9_\-/]{1,}/[a-zA-Z0-9_\-/]{3,}(?:[\?|#][^"|']{0,}|))|([a-zA-Z0-9_\-]{1,}\.(?:php|asp|aspx|jsp|json|action|html|js|txt|xml)(?:[\?|#][^"|']{0,}|)))(?:"|')`) func LinkFinder(source string) ([]string, error) { - var links []string - //source = strings.ToLower(source) - if len(source) > 1000000 { - source = strings.ReplaceAll(source, ";", ";\r\n") - source = strings.ReplaceAll(source, ",", ",\r\n") - } - source = DecodeChars(source) + var links []string + // source = strings.ToLower(source) + if len(source) > 1000000 { + source = strings.ReplaceAll(source, ";", ";\r\n") + source = strings.ReplaceAll(source, ",", ",\r\n") + } + source = DecodeChars(source) - match := linkFinderRegex.FindAllStringSubmatch(source, -1) - for _, m := range match { - matchGroup1 := FilterNewLines(m[1]) - if matchGroup1 == "" { - continue - } - links = append(links, matchGroup1) - } - links = Unique(links) - return links, nil + match := linkFinderRegex.FindAllStringSubmatch(source, -1) + for _, m := range match { + matchGroup1 := FilterNewLines(m[1]) + if matchGroup1 == "" { + continue + } + links = append(links, matchGroup1) + } + links = Unique(links) + return links, nil } diff --git a/core/utils.go b/core/utils.go index 07d26ac..098303a 100644 --- a/core/utils.go +++ b/core/utils.go @@ -33,35 +33,40 @@ func GetDomain(site *url.URL) string { return domain } -// func FixUrl(url string, site *url.URL) string { -// var newUrl string -// if strings.HasPrefix(url, "//") { -// // //google.com/example.php -// newUrl = site.Scheme + ":" + url +// func FixUrl(site *url.URL, nextLoc string) string { +// var newUrl string +// if strings.HasPrefix(nextLoc, "//") { +// // //google.com/example.php +// newUrl = site.Scheme + ":" + nextLoc // -// } else if strings.HasPrefix(url, "http") { -// // http://google.com || https://google.com -// newUrl = url +// } else if strings.HasPrefix(nextLoc, "http") { +// // http://google.com || https://google.com +// newUrl = nextLoc // -// } else if !strings.HasPrefix(url, "//") { -// if strings.HasPrefix(url, "/") { -// // Ex: /?thread=10 -// newUrl = site.Scheme + "://" + site.Host + url -// -// } else { -// if strings.HasPrefix(url, ".") { -// if strings.HasPrefix(url, "..") { -// newUrl = site.Scheme + "://" + site.Host + url[2:] -// } else { -// newUrl = site.Scheme + "://" + site.Host + url[1:] -// } -// } else { -// // "console/test.php" -// newUrl = site.Scheme + "://" + site.Host + "/" + url -// } -// } -// } -// return newUrl +// } else if !strings.HasPrefix(nextLoc, "//") { +// // if strings.HasPrefix(nextLoc, "/") { +// // // Ex: /?thread=10 +// // newUrl = site.Scheme + "://" + site.Host + nextLoc +// // +// // } else { +// // if strings.HasPrefix(nextLoc, ".") { +// // if strings.HasPrefix(nextLoc, "..") { +// // newUrl = site.Scheme + "://" + site.Host + nextLoc[2:] +// // } else { +// // newUrl = site.Scheme + "://" + site.Host + nextLoc[1:] +// // } +// // } else { +// // // "console/test.php" +// // newUrl = site.Scheme + "://" + site.Host + "/" + nextLoc +// // } +// // } +// nextLocUrl, err := url.Parse(nextLoc) +// if err != nil { +// return "" +// } +// newUrl = site.ResolveReference(nextLocUrl).String() +// } +// return newUrl // } func FixUrl(mainSite *url.URL, nextLoc string) string { diff --git a/core/utils_test.go b/core/utils_test.go index 814ab3b..8776685 100644 --- a/core/utils_test.go +++ b/core/utils_test.go @@ -1,8 +1,14 @@ package core -import "testing" +import ( + "testing" +) func TestGetExtType(t *testing.T) { - url := "https://domain.com/data/avatars/m/123/12312312.jpg?1562846649" - t.Log(GetExtType(url)) + url := "https://domain.com/data/avatars/m/123/12312312.jpg?1562846649" + t.Log(GetExtType(url)) +} + +func TestFixUrl(t *testing.T) { + // }