Skip to content

Commit

Permalink
fix bugs and new version
Browse files Browse the repository at this point in the history
  • Loading branch information
theblackturtle committed Feb 8, 2020
1 parent ac22388 commit 4e0d242
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 35 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ go get -u github.com/theblackturtle/gospider

## Usage
```
A Simple Web Spider - v1.0 by @theblackturtle
A Simple Web Spider - v1.0.4 by @theblackturtle
Usage:
gospider [flags]
Expand Down Expand Up @@ -49,7 +49,9 @@ Flags:
-a, --other-source Find URLs from 3rd party (Archive.org, CommonCrawl.org, VirusTotal.com)
-w, --include-subs Include subdomains crawled from 3rd party. Default is main domain
--debug Turn on debug mode
-v, --verbose Turn on verbose
--no-redirect Disable redirect
--version Check version
-h, --help help for gospider
```

Expand Down
48 changes: 25 additions & 23 deletions core/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"github.com/spf13/cobra"
"github.com/theblackturtle/gospider/stringset"
"io/ioutil"
"net"
"net/http"
"net/url"
"os"
Expand Down Expand Up @@ -48,8 +49,18 @@ func NewCrawler(site string, cmd *cobra.Command) *Crawler {

// Setup http client
tr := &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
DialContext: (&net.Dialer{
Timeout: 60 * time.Second,
KeepAlive: 30 * time.Second,
DualStack: true,
}).DialContext,
MaxIdleConns: 0,
IdleConnTimeout: 5 * time.Second,
TLSHandshakeTimeout: 10 * time.Second,
ExpectContinueTimeout: 5 * time.Second,
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
}

// Set proxy
proxy, _ := cmd.Flags().GetString("proxy")
if proxy != "" {
Expand Down Expand Up @@ -143,16 +154,15 @@ func NewCrawler(site string, cmd *cobra.Command) *Crawler {
}

// Set url whitelist regex
domainRegex := "^(https?|mms|mssx|mmsh|rtsp|pnm)://([^/]+[.])?(?i:" + strings.ReplaceAll(domain, ".", "[.]") + ")(/.*)?$"
domainRe := regexp.MustCompile(domainRegex)
domainRe := regexp.MustCompile(domain)
c.URLFilters = append(c.URLFilters, domainRe)

// Set Limit Rule
err := c.Limit(&colly.LimitRule{
DomainRegexp: domainRegex,
Parallelism: concurrent,
Delay: time.Duration(delay) * time.Second,
RandomDelay: time.Duration(randomDelay) * time.Second,
DomainGlob: domain,
Parallelism: concurrent,
Delay: time.Duration(delay) * time.Second,
RandomDelay: time.Duration(randomDelay) * time.Second,
})
if err != nil {
Logger.Errorf("Failed to set Limit Rule: %s", err)
Expand Down Expand Up @@ -205,7 +215,6 @@ func (crawler *Crawler) Start() {
if !formSet.Duplicate(formUrl) {
if crawler.domainRe.MatchString(formUrl) {
outputFormat := fmt.Sprintf("[form] - %s", formUrl)
//Logger.Info(outputFormat + "\n")
fmt.Println(outputFormat)
if crawler.Output != nil {
crawler.Output.WriteToFile(outputFormat)
Expand All @@ -229,7 +238,6 @@ func (crawler *Crawler) Start() {

if !jsFileSet.Duplicate(jsFileUrl) {
outputFormat := fmt.Sprintf("[javascript] - %s", jsFileUrl)
//Logger.Info(outputFormat + "\n")
fmt.Println(outputFormat)
if crawler.Output != nil {
crawler.Output.WriteToFile(outputFormat)
Expand All @@ -249,7 +257,6 @@ func (crawler *Crawler) Start() {
for _, sub := range subs {
if !subSet.Duplicate(sub) {
outputFormat := fmt.Sprintf("[subdomains] - %s", sub)
//Logger.Info(outputFormat + "\n")
fmt.Println(outputFormat)
if crawler.Output != nil {
crawler.Output.WriteToFile(outputFormat)
Expand All @@ -262,31 +269,27 @@ func (crawler *Crawler) Start() {
for _, e := range aws {
if !awsSet.Duplicate(e) {
outputFormat := fmt.Sprintf("[aws-s3] - %s", e)
//Logger.Info(outputFormat + "\n")
fmt.Println(outputFormat)
if crawler.Output != nil {
crawler.Output.WriteToFile(outputFormat)
}
}
}

// We will pass 404 Not Found and 429 status code
if response.StatusCode == 404 || response.StatusCode == 429 {
return
}

// Verify which links are working
u := response.Request.URL.String()
if crawler.domainRe.MatchString(u) {
outputFormat := fmt.Sprintf("[url] - [code-%d] - %s", response.StatusCode, u)
fmt.Println(outputFormat)
if crawler.Output != nil {
crawler.Output.WriteToFile(outputFormat)
}
outputFormat := fmt.Sprintf("[url] - [code-%d] - %s", response.StatusCode, u)
fmt.Println(outputFormat)
if crawler.Output != nil {
crawler.Output.WriteToFile(outputFormat)
}
})

crawler.C.OnError(func(response *colly.Response, err error) {
// Status == 0 mean "The server IP address could not be found."
if response.StatusCode == 404 || response.StatusCode == 429 || response.StatusCode == 0 {
return
}
u := response.Request.URL.String()
outputFormat := fmt.Sprintf("[url] - [code-%d] - %s", response.StatusCode, u)
fmt.Println(outputFormat)
Expand Down Expand Up @@ -336,7 +339,6 @@ func (crawler *Crawler) linkFinder(site string, jsUrl string) {

// JS Regex Result
outputFormat := fmt.Sprintf("[linkfinder] - [from: %s] - %s", jsUrl, link)
//Logger.Info(outputFormat + "\n")
fmt.Println(outputFormat)
if crawler.Output != nil {
crawler.Output.WriteToFile(outputFormat)
Expand Down
4 changes: 4 additions & 0 deletions core/output.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,7 @@ func (o *Output) WriteToFile(msg string) {
defer o.mu.Unlock()
_, _ = o.f.WriteString(msg + "\n")
}

func (o *Output) Close() error {
return o.f.Close()
}
3 changes: 1 addition & 2 deletions core/sitemap.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,11 @@ func ParseSiteMap(site string, depth int, output *Output, c *colly.Collector, wg
Logger.Infof("Trying to find %s", site+path)
_ = sitemap.ParseFromSite(site+path, func(entry sitemap.Entry) error {
outputFormat := fmt.Sprintf("[sitemap] - %s", entry.GetLocation())
//Logger.Infof(outputFormat + "\n")
fmt.Println(outputFormat)
if output != nil {
output.WriteToFile(outputFormat)
}
c.Visit(entry.GetLocation())
_ = c.Visit(entry.GetLocation())
return nil
})
}
Expand Down
2 changes: 1 addition & 1 deletion core/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ package core
const (
CLIName = "gospider"
AUTHOR = "@theblackturtle"
VERSION = "v1.0.3"
VERSION = "v1.0.4"
)
19 changes: 11 additions & 8 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,21 +62,23 @@ func run(cmd *cobra.Command, args []string) {
}

version, _ := cmd.Flags().GetBool("version")
if version{
fmt.Printf("Version: %s",core.VERSION)
if version {
fmt.Printf("Version: %s\n", core.VERSION)
os.Exit(0)
}

verbose, _ := cmd.Flags().GetBool("verbose")
if !verbose {
core.Logger.SetOutput(ioutil.Discard)
}

isDebug, _ := cmd.Flags().GetBool("debug")
if isDebug {
core.Logger.SetLevel(logrus.DebugLevel)
core.Logger.SetOutput(os.Stdout)
} else {
core.Logger.SetLevel(logrus.InfoLevel)
}

verbose, _ := cmd.Flags().GetBool("verbose")
if !verbose {
core.Logger.SetOutput(ioutil.Discard)
core.Logger.SetOutput(os.Stdout)
}

// Create output folder when save file option selected
Expand Down Expand Up @@ -141,6 +143,7 @@ func run(cmd *cobra.Command, args []string) {

crawler := core.NewCrawler(site, cmd)
site = strings.TrimSuffix(u.String(), "/")

siteWg.Add(1)
go func() {
crawler.Start()
Expand Down Expand Up @@ -169,7 +172,6 @@ func run(cmd *cobra.Command, args []string) {
continue
}
outputFormat := fmt.Sprintf("[other-sources] - %s", url)
//core.Logger.Info(outputFormat + "\n")
fmt.Println(outputFormat)
if crawler.Output != nil {
crawler.Output.WriteToFile(outputFormat)
Expand All @@ -180,6 +182,7 @@ func run(cmd *cobra.Command, args []string) {
}
siteWg.Wait()
crawler.C.Wait()
_ = crawler.Output.Close()
}
}()
}
Expand Down

0 comments on commit 4e0d242

Please sign in to comment.