From 335ee5c8d1453b8fd62a513261efaf1fcfa6d3ca Mon Sep 17 00:00:00 2001 From: theblackturtle Date: Tue, 11 Feb 2020 12:05:16 +0700 Subject: [PATCH] add alienvault.com source and new version --- core/crawler.go | 30 +++++++++++++--------------- core/othersource.go | 43 ++++++++++++++++++++++++++++++++++++++++ core/othersource_test.go | 9 +++++++++ core/version.go | 2 +- 4 files changed, 67 insertions(+), 17 deletions(-) diff --git a/core/crawler.go b/core/crawler.go index e86b92c..2db5743 100644 --- a/core/crawler.go +++ b/core/crawler.go @@ -256,25 +256,23 @@ func (crawler *Crawler) Start() { } fileExt := GetExtType(jsFileUrl) - if fileExt != ".js" || fileExt != ".xml" || fileExt != ".json" { - return - } + if fileExt == ".js" || fileExt == ".xml" || fileExt == ".json" { + if !crawler.jsSet.Duplicate(jsFileUrl) { + outputFormat := fmt.Sprintf("[javascript] - %s", jsFileUrl) + fmt.Println(outputFormat) + if crawler.Output != nil { + crawler.Output.WriteToFile(outputFormat) + } - if !crawler.jsSet.Duplicate(jsFileUrl) { - outputFormat := fmt.Sprintf("[javascript] - %s", jsFileUrl) - fmt.Println(outputFormat) - if crawler.Output != nil { - crawler.Output.WriteToFile(outputFormat) - } + // If JS file is minimal format. Try to find original format + if strings.Contains(jsFileUrl, ".min.js") { + originalJS := strings.ReplaceAll(jsFileUrl, ".min.js", ".js") + crawler.linkFinder(originalJS) + } - // If JS file is minimal format. Try to find original format - if strings.Contains(jsFileUrl, ".min.js") { - originalJS := strings.ReplaceAll(jsFileUrl, ".min.js", ".js") - crawler.linkFinder(originalJS) + // Request and Get JS link + crawler.linkFinder(jsFileUrl) } - - // Request and Get JS link - crawler.linkFinder(jsFileUrl) } }) diff --git a/core/othersource.go b/core/othersource.go index 764079d..ac26b86 100644 --- a/core/othersource.go +++ b/core/othersource.go @@ -21,6 +21,7 @@ func OtherSources(domain string, includeSubs bool) []string { getWaybackURLs, getCommonCrawlURLs, getVirusTotalURLs, + getOtxUrls, } var wg sync.WaitGroup @@ -170,3 +171,45 @@ func getVirusTotalURLs(domain string, noSubs bool) ([]wurl, error) { return out, nil } + +func getOtxUrls(domain string, noSubs bool) ([]wurl, error) { + var urls []wurl + page := 0 + for { + r, err := http.Get(fmt.Sprintf("https://otx.alienvault.com/api/v1/indicators/hostname/%s/url_list?limit=50&page=%d", domain, page)) + if err != nil { + return []wurl{}, err + } + bytes, err := ioutil.ReadAll(r.Body) + if err != nil { + return []wurl{}, err + } + r.Body.Close() + + wrapper := struct { + HasNext bool `json:"has_next"` + ActualSize int `json:"actual_size"` + URLList []struct { + Domain string `json:"domain"` + URL string `json:"url"` + Hostname string `json:"hostname"` + Httpcode int `json:"httpcode"` + PageNum int `json:"page_num"` + FullSize int `json:"full_size"` + Paged bool `json:"paged"` + } `json:"url_list"` + }{} + err = json.Unmarshal(bytes, &wrapper) + if err != nil { + return []wurl{}, err + } + for _, url := range wrapper.URLList { + urls = append(urls, wurl{url: url.URL}) + } + if !wrapper.HasNext { + break + } + page++ + } + return urls, nil +} diff --git a/core/othersource_test.go b/core/othersource_test.go index 4fbcc2b..45f6501 100644 --- a/core/othersource_test.go +++ b/core/othersource_test.go @@ -36,3 +36,12 @@ func TestGetWaybackURLs(t *testing.T) { t.Log(len(urls)) t.Log(urls) } + +func TestGetOtxUrls(t *testing.T) { + urls, err := getOtxUrls(domain, false) + if err != nil { + t.Fatal(err) + } + t.Log(len(urls)) + t.Log(urls) +} diff --git a/core/version.go b/core/version.go index b8a6c7c..9d3d840 100644 --- a/core/version.go +++ b/core/version.go @@ -3,5 +3,5 @@ package core const ( CLIName = "gospider" AUTHOR = "@theblackturtle" - VERSION = "v1.0.7" + VERSION = "v1.0.8" )