From 187a3edecf78c38f2260d330c20a3b43e1aa8087 Mon Sep 17 00:00:00 2001 From: pingc0y Date: Thu, 11 May 2023 14:57:29 +0800 Subject: [PATCH] =?UTF-8?q?2023/5/11=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 35 +++--- cmd/cmd.go | 19 ++- config/config.go | 63 ++++++---- crawler/crawler.go | 62 +++++---- crawler/filter.go | 8 +- crawler/find.go | 174 +++++++++++++++++-------- crawler/{fuzz => }/jsFuzz.go | 2 +- crawler/run.go | 230 +++++++++++++++++++++++----------- crawler/state.go | 121 +++++++++--------- crawler/{fuzz => }/urlFuzz.go | 23 +--- main.go | 5 + mode/mode.go | 25 ++-- result/result.go | 8 +- util/utils.go | 43 ++++++- 14 files changed, 507 insertions(+), 311 deletions(-) rename crawler/{fuzz => }/jsFuzz.go (98%) rename crawler/{fuzz => }/urlFuzz.go (90%) diff --git a/README.md b/README.md index 7886aa6..fc48ef1 100644 --- a/README.md +++ b/README.md @@ -12,27 +12,15 @@ URLFinder是一款快速、全面、易用的页面信息提取工具 有什么需求或bug欢迎各位师傅提交lssues -#### 注意: +## 功能说明: fuzz功能是基于抓到的404目录和路径。将其当作字典,随机组合并碰撞出有效路径,从而解决路径拼接错误的问题 -为了更好的兼容和防止漏抓链接,放弃了低误报率,错误的链接会变多但漏抓概率变低,可通过 ‘-s 200’ 筛选状态码过滤无效的链接(但不推荐只看200状态码) - +结果会优先显示输入的url顶级域名,其他域名不做区分显示在 other -## 功能说明 -1.提取页面与JS中的JS、URL链接和敏感信息 -2.提取到的链接会显示状态码、响应大小、标题等(带cookie操作时请使用-m 3 安全模式,防止误操作) -3.提取批量URL -4.yml配置 自定义Headers请求头、代理、抓取规则、黑名单等 -5.结果导出到csv、json、html -6.记录抓取来源,便于手动分析 -7.指定抓取域名(支持正则表达式) -8.指定baseurl路径(指定目录拼接) -9.使用代理ip -10.对404链接Fuzz(测试版,有问题提issue) +结果会优先显示200,按从小到大排序(输入的域名最优先,就算是404也会排序在其他子域名的200前面) -结果会优先显示输入的url顶级域名,其他域名不做区分显示在 other -结果会优先显示200,按从小到大排序(输入的域名最优先,就算是404也会排序在其他子域名的200前面) +为了更好的兼容和防止漏抓链接,放弃了低误报率,错误的链接会变多但漏抓概率变低,可通过 ‘-s 200’ 筛选状态码过滤无效的链接(但不推荐只看200状态码) ## 使用截图 @@ -54,7 +42,7 @@ URLFinder.exe -u http://www.baidu.com -s 200,403 -m 2 ``` URLFinder.exe -s all -m 2 -f url.txt -o d:/ ``` -参数: +参数(更多参数使用 -i 配置): ``` -a 自定义user-agent请求头 -b 自定义baseurl路径 @@ -63,14 +51,16 @@ URLFinder.exe -s all -m 2 -f url.txt -o d:/ -f 批量url抓取,需指定url文本路径 -ff 与-f区别:全部抓取的数据,视为同一个url的结果来处理(只打印一份结果 | 只会输出一份结果) -h 帮助信息 --i 加载yaml配置文件,可自定义请求头 抓取规则等(不存在时,会在当前目录创建一个默认yaml配置文件) +-i 加载yaml配置文件,可自定义请求头、抓取规则等(不存在时,会在当前目录创建一个默认yaml配置文件) -m 抓取模式: 1 正常抓取(默认) 2 深入抓取 (URL深入一层 JS深入三层 防止抓偏) 3 安全深入抓取(过滤delete,remove等敏感路由) +-max 最大抓取数 -o 结果导出到csv、json、html文件,需指定导出文件目录(.代表当前目录) -s 显示指定状态码,all为显示全部 -t 设置线程数(默认50) +-time 设置超时时间(默认5,单位秒) -u 目标URL -x 设置代理,格式: http://username:password@127.0.0.1:8877 -z 提取所有目录对404链接进行fuzz(只对主域名下的链接生效,需要与-s一起使用) @@ -117,7 +107,14 @@ SET GOOS=darwin SET GOARCH=arm64 go build -ldflags "-s -w" -o ./URLFinder-macos-arm64 ``` -## 更新说明 +## 更新说明 +2023/5/11 +变化 -i 配置文件可自定义:线程数、抓取深度、敏感路由、超时时间、最大抓取数 +新增 -time 设置超时时间 +新增 -max 设置最大抓取数 +新增 添加版本更新提示 +修复 已知bug + 2023/5/5 修复 多个任务时html结果混乱 新增 结果添加302跳转信息 diff --git a/cmd/cmd.go b/cmd/cmd.go index a1236e7..57214aa 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -7,6 +7,9 @@ import ( "os" ) +var Update = "2023.5.11" +var XUpdate string + var ( H bool I bool @@ -22,6 +25,8 @@ var ( O string X string T = 50 + TI = 5 + MA = 99999 Z int ) @@ -34,19 +39,21 @@ func init() { flag.StringVar(&FF, "ff", "", "set urlFile one\n与-f区别:全部抓取的数据,视为同一个url的结果来处理(只打印一份结果 | 只会输出一份结果)") flag.BoolVar(&H, "h", false, "this help\n帮助信息") flag.BoolVar(&I, "i", false, "set configFile\n加载yaml配置文件(不存在时,会在当前目录创建一个默认yaml配置文件)") - flag.IntVar(&M, "m", 1, "set mode\n抓取模式 \n 1 normal\n 正常抓取(默认) \n 2 thorough\n 深入抓取 (url深入一层,js深入三层,防止抓偏) \n 3 security\n 安全深入抓取(过滤delete,remove等敏感路由) \n ") + flag.IntVar(&M, "m", 1, "set mode\n抓取模式 \n 1 normal\n 正常抓取(默认) \n 2 thorough\n 深入抓取(默认url深入一层,js深入三层,-i可以自定义) \n 3 security\n 安全深入抓取(过滤delete,remove等敏感路由.-i可自定义) ") + flag.IntVar(&MA, "max", 99999, "set maximum\n最大抓取链接数") flag.StringVar(&O, "o", "", "set outFile\n结果导出到csv、json、html文件,需指定导出文件目录(.代表当前目录)") flag.StringVar(&S, "s", "", "set Status\n显示指定状态码,all为显示全部(多个状态码用,隔开)") - flag.IntVar(&T, "t", 50, "set thread\n设置线程数(默认50)\n") + flag.IntVar(&T, "t", 50, "set Thread\n设置线程数(默认50)") + flag.IntVar(&TI, "time", 5, "set Timeout\n设置超时时间(默认5,单位秒)") flag.StringVar(&U, "u", "", "set Url\n目标URL") - flag.StringVar(&X, "x", "", "set httpProxy\n设置代理,格式: http://username:password@127.0.0.1:8809") - flag.IntVar(&Z, "z", 0, "set Fuzz\n对404链接进行fuzz(只对主域名下的链接生效,需要与-s一起使用) \n 1 decreasing\n 目录递减fuzz \n 2 2combination\n 2级目录组合fuzz(适合少量链接使用) \n 3 3combination\n 3级目录组合fuzz(适合少量链接使用) \n") + flag.StringVar(&X, "x", "", "set Proxy\n设置代理,格式: http://username:password@127.0.0.1:8809") + flag.IntVar(&Z, "z", 0, "set Fuzz\n对404链接进行fuzz(只对主域名下的链接生效,需要与 -s 一起使用) \n 1 decreasing\n 目录递减fuzz \n 2 2combination\n 2级目录组合fuzz(适合少量链接使用) \n 3 3combination\n 3级目录组合fuzz(适合少量链接使用) ") // 改变默认的 Usage flag.Usage = usage } func usage() { - fmt.Fprintf(os.Stderr, `Usage: URLFinder [-a user-agent] [-b baseurl] [-c cookie] [-d domainName] [-f urlFile] [-ff urlFile one] [-h help] [-i configFile] [-m mode] [-o outFile] [-s Status] [-t thread] [-u Url] [-x httpProxy] [-z fuzz] + fmt.Fprintf(os.Stderr, `Usage: URLFinder [-a user-agent] [-b baseurl] [-c cookie] [-d domainName] [-f urlFile] [-ff urlFile one] [-h help] [-i configFile] [-m mode] [-max maximum] [-o outFile] [-s Status] [-t thread] [-time timeout] [-u url] [-x proxy] [-z fuzz] Options: `) @@ -54,6 +61,6 @@ Options: } func Parse() { - color.LightCyan.Println(" __ __ ___ _ _ \n /\\ /\\ /__\\ / / / __(_)_ __ __| | ___ _ __ \n/ / \\ \\/ \\/// / / _\\ | | '_ \\ / _` |/ _ \\ '__|\n\\ \\_/ / _ \\ /___ / | | | | | (_| | __/ | \n \\___/\\/ \\_\\____\\/ |_|_| |_|\\__,_|\\___|_| \n\nBy: pingc0y\nUpdateTime: 2023/5/5\nGithub: https://github.com/pingc0y/URLFinder \n") + color.LightCyan.Printf(" __ __ ___ _ _ \n /\\ /\\ /__\\ / / / __(_)_ __ __| | ___ _ __ \n/ / \\ \\/ \\/// / / _\\ | | '_ \\ / _` |/ _ \\ '__|\n\\ \\_/ / _ \\ /___ / | | | | | (_| | __/ | \n \\___/\\/ \\_\\____\\/ |_|_| |_|\\__,_|\\___|_| \n\nBy: pingc0y\nUpdate: %s | %s\nGithub: https://github.com/pingc0y/URLFinder \n\n", Update, XUpdate) flag.Parse() } diff --git a/config/config.go b/config/config.go index 78ad088..8fb23f0 100644 --- a/config/config.go +++ b/config/config.go @@ -59,29 +59,39 @@ var ( Other = []string{"(access.{0,1}key|access.{0,1}Key|access.{0,1}Id|access.{0,1}id|.{0,5}密码|.{0,5}账号|默认.{0,5}|加密|解密|password:.{0,10}|username:.{0,10})"} ) +var ( + UrlSteps = 1 + JsSteps = 3 +) + var ( Lock sync.Mutex Wg sync.WaitGroup Mux sync.Mutex Ch = make(chan int, 50) - Jsch = make(chan int, 50/2) - Urlch = make(chan int, 50/2) + Jsch = make(chan int, 50/10*3) + Urlch = make(chan int, 50/10*7) ) // 读取配置文件 func GetConfig(path string) { - con := &mode.Config{} if f, err := os.Open(path); err != nil { if strings.Contains(err.Error(), "The system cannot find the file specified") || strings.Contains(err.Error(), "no such file or directory") { - con.Headers = map[string]string{"Cookie": cmd.C, "User-Agent": `Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0`, "Accept": "*/*"} - con.Proxy = "" - con.JsFind = JsFind - con.UrlFind = UrlFind - con.JsFiler = JsFiler - con.UrlFiler = UrlFiler - con.JsFuzzPath = JsFuzzPath - con.InfoFind = map[string][]string{"Phone": Phone, "Email": Email, "IDcard": IDcard, "Jwt": Jwt, "Other": Other} - data, err2 := yaml.Marshal(con) + Conf.Headers = map[string]string{"Cookie": cmd.C, "User-Agent": `Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0`, "Accept": "*/*"} + Conf.Proxy = "" + Conf.JsFind = JsFind + Conf.UrlFind = UrlFind + Conf.JsFiler = JsFiler + Conf.UrlFiler = UrlFiler + Conf.JsFuzzPath = JsFuzzPath + Conf.JsSteps = JsSteps + Conf.UrlSteps = UrlSteps + Conf.Risks = Risks + Conf.Timeout = cmd.TI + Conf.Thread = cmd.T + Conf.Max = cmd.MA + Conf.InfoFind = map[string][]string{"Phone": Phone, "Email": Email, "IDcard": IDcard, "Jwt": Jwt, "Other": Other} + data, err2 := yaml.Marshal(Conf) err2 = os.WriteFile(path, data, 0644) if err2 != nil { fmt.Println(err) @@ -94,18 +104,23 @@ func GetConfig(path string) { } os.Exit(1) } else { - yaml.NewDecoder(f).Decode(con) - Conf = *con - JsFind = con.JsFind - UrlFind = con.UrlFind - JsFiler = con.JsFiler - UrlFiler = con.UrlFiler - JsFuzzPath = con.JsFuzzPath - Phone = con.InfoFind["Phone"] - Email = con.InfoFind["Email"] - IDcard = con.InfoFind["IDcard"] - Jwt = con.InfoFind["Jwt"] - Other = con.InfoFind["Other"] + yaml.NewDecoder(f).Decode(&Conf) + JsFind = Conf.JsFind + UrlFind = Conf.UrlFind + JsFiler = Conf.JsFiler + UrlFiler = Conf.UrlFiler + JsFuzzPath = Conf.JsFuzzPath + Phone = Conf.InfoFind["Phone"] + Email = Conf.InfoFind["Email"] + IDcard = Conf.InfoFind["IDcard"] + Jwt = Conf.InfoFind["Jwt"] + Other = Conf.InfoFind["Other"] + JsSteps = Conf.JsSteps + UrlSteps = Conf.UrlSteps + Risks = Conf.Risks + cmd.T = Conf.Thread + cmd.MA = Conf.Max + cmd.TI = Conf.Timeout } } diff --git a/crawler/crawler.go b/crawler/crawler.go index bb8f30f..d439453 100644 --- a/crawler/crawler.go +++ b/crawler/crawler.go @@ -2,7 +2,6 @@ package crawler import ( "compress/gzip" - "crypto/tls" "fmt" "github.com/pingc0y/URLFinder/cmd" "github.com/pingc0y/URLFinder/config" @@ -11,24 +10,22 @@ import ( "io" "net/http" "net/url" - "os" "regexp" "strings" - "time" ) // 蜘蛛抓取页面内容 func Spider(u string, num int) { - var is bool + is := true defer func() { config.Wg.Done() - if !is { + if is { <-config.Ch } - }() - fmt.Printf("\rStart %d Spider...", config.Progress) + }() config.Mux.Lock() + fmt.Printf("\rStart %d Spider...", config.Progress) config.Progress++ config.Mux.Unlock() //标记完成 @@ -48,23 +45,6 @@ func Spider(u string, num int) { } } AppendEndUrl(u) - - tr := &http.Transport{ - TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, - } - //配置代理 - if cmd.X != "" { - proxyUrl, parseErr := url.Parse(cmd.X) - if parseErr != nil { - fmt.Println("代理地址错误: \n" + parseErr.Error()) - os.Exit(1) - } - tr.Proxy = http.ProxyURL(proxyUrl) - } else if cmd.I { - //加载yaml配置 - util.SetProxyConfig(tr) - } - client := &http.Client{Timeout: 10 * time.Second, Transport: tr} request, err := http.NewRequest("GET", u, nil) if err != nil { return @@ -83,13 +63,32 @@ func Spider(u string, num int) { } //处理返回结果 + //tr := &http.Transport{ + // TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + //} + //client = &http.Client{Timeout: time.Duration(cmd.TI) * time.Second, + // Transport: tr, + // CheckRedirect: func(req *http.Request, via []*http.Request) error { + // if len(via) >= 10 { + // return fmt.Errorf("Too many redirects") + // } + // if len(via) > 0 { + // if via[0] != nil && via[0].URL != nil { + // result.Redirect[via[0].URL.String()] = true + // } else { + // result.Redirect[req.URL.String()] = true + // } + // + // } + // return nil + // }, + //} response, err := client.Do(request) if err != nil { return - } else { - defer response.Body.Close() - } + defer response.Body.Close() + result := "" //解压 if response.Header.Get("Content-Encoding") == "gzip" { @@ -129,13 +128,12 @@ func Spider(u string, num int) { path = "/" } } + is = false <-config.Ch - is = true - //提取js - jsFind(result, host, scheme, path, source, num) + jsFind(result, host, scheme, path, u, num) //提取url - urlFind(result, host, scheme, path, source, num) + urlFind(result, host, scheme, path, u, num) //提取信息 infoFind(result, source) @@ -143,9 +141,9 @@ func Spider(u string, num int) { // 打印Validate进度 func PrintProgress() { + config.Mux.Lock() num := len(result.ResultJs) + len(result.ResultUrl) fmt.Printf("\rValidate %.0f%%", float64(config.Progress+1)/float64(num+1)*100) - config.Mux.Lock() config.Progress++ config.Mux.Unlock() } diff --git a/crawler/filter.go b/crawler/filter.go index 41c930a..e8fc1d9 100644 --- a/crawler/filter.go +++ b/crawler/filter.go @@ -13,14 +13,16 @@ func jsFilter(str [][]string) [][]string { //对不需要的数据过滤 for i := range str { str[i][0], _ = url.QueryUnescape(str[i][1]) + str[i][0] = strings.TrimSpace(str[i][0]) str[i][0] = strings.Replace(str[i][0], " ", "", -1) str[i][0] = strings.Replace(str[i][0], "\\/", "/", -1) str[i][0] = strings.Replace(str[i][0], "%3A", ":", -1) str[i][0] = strings.Replace(str[i][0], "%2F", "/", -1) - + str[i][0] = strings.Replace(str[i][0], "./", "/", -1) //去除不是.js的链接 if !strings.HasSuffix(str[i][0], ".js") && !strings.Contains(str[i][0], ".js?") { str[i][0] = "" + continue } //过滤配置的黑名单 @@ -44,15 +46,17 @@ func urlFilter(str [][]string) [][]string { //对不需要的数据过滤 for i := range str { str[i][0], _ = url.QueryUnescape(str[i][1]) + str[i][0] = strings.TrimSpace(str[i][0]) str[i][0] = strings.Replace(str[i][0], " ", "", -1) str[i][0] = strings.Replace(str[i][0], "\\/", "/", -1) str[i][0] = strings.Replace(str[i][0], "%3A", ":", -1) str[i][0] = strings.Replace(str[i][0], "%2F", "/", -1) - + str[i][0] = strings.Replace(str[i][0], "./", "/", -1) //去除不存在字符串和数字的url,判断为错误数据 match, _ := regexp.MatchString("[a-zA-Z]+|[0-9]+", str[i][0]) if !match { str[i][0] = "" + continue } //对抓到的域名做处理 diff --git a/crawler/find.go b/crawler/find.go index d88fa29..3e06e2e 100644 --- a/crawler/find.go +++ b/crawler/find.go @@ -32,41 +32,61 @@ func jsFind(cont, host, scheme, path, source string, num int) { continue } if strings.HasPrefix(js[0], "https:") || strings.HasPrefix(js[0], "http:") { - AppendJs(js[0], source) - if num < 5 && (cmd.M == 2 || cmd.M == 3) { - config.Wg.Add(1) - config.Ch <- 1 - go Spider(js[0], num+1) + switch AppendJs(js[0], source) { + case 0: + if num <= config.JsSteps && (cmd.M == 2 || cmd.M == 3) { + config.Wg.Add(1) + config.Ch <- 1 + go Spider(js[0], num+1) + } + case 1: + return + case 2: + continue } + } else if strings.HasPrefix(js[0], "//") { - AppendJs(scheme+":"+js[0], source) - if num < 5 && (cmd.M == 2 || cmd.M == 3) { - config.Wg.Add(1) - config.Ch <- 1 - go Spider(scheme+":"+js[0], num+1) + switch AppendJs(scheme+":"+js[0], source) { + case 0: + if num <= config.JsSteps && (cmd.M == 2 || cmd.M == 3) { + config.Wg.Add(1) + config.Ch <- 1 + go Spider(scheme+":"+js[0], num+1) + } + case 1: + return + case 2: + continue } } else if strings.HasPrefix(js[0], "/") { - AppendJs(host+js[0], source) - if num < 5 && (cmd.M == 2 || cmd.M == 3) { - config.Wg.Add(1) - config.Ch <- 1 - go Spider(host+js[0], num+1) - } - } else if strings.HasPrefix(js[0], "./") { - AppendJs(host+"/"+js[0], source) - if num < 5 && (cmd.M == 2 || cmd.M == 3) { - config.Wg.Add(1) - config.Ch <- 1 - go Spider(host+"/"+js[0], num+1) + switch AppendJs(host+js[0], source) { + case 0: + if num <= config.JsSteps && (cmd.M == 2 || cmd.M == 3) { + config.Wg.Add(1) + config.Ch <- 1 + go Spider(host+js[0], num+1) + } + case 1: + return + case 2: + continue } + } else { - AppendJs(host+cata+js[0], source) - if num < 5 && (cmd.M == 2 || cmd.M == 3) { - config.Wg.Add(1) - config.Ch <- 1 - go Spider(host+cata+js[0], num+1) + switch AppendJs(host+cata+js[0], source) { + case 0: + if num <= config.JsSteps && (cmd.M == 2 || cmd.M == 3) { + config.Wg.Add(1) + config.Ch <- 1 + go Spider(host+cata+js[0], num+1) + } + case 1: + return + case 2: + continue } + } } @@ -100,19 +120,32 @@ func urlFind(cont, host, scheme, path, source string, num int) { continue } if strings.HasPrefix(url[0], "https:") || strings.HasPrefix(url[0], "http:") { - AppendUrl(url[0], source) - if num < 2 && (cmd.M == 2 || cmd.M == 3) { - config.Wg.Add(1) - config.Ch <- 1 - go Spider(url[0], num+1) + switch AppendUrl(url[0], source) { + case 0: + if num <= config.UrlSteps && (cmd.M == 2 || cmd.M == 3) { + config.Wg.Add(1) + config.Ch <- 1 + go Spider(url[0], num+1) + } + case 1: + return + case 2: + continue } } else if strings.HasPrefix(url[0], "//") { - AppendUrl(scheme+":"+url[0], source) - if num < 2 && (cmd.M == 2 || cmd.M == 3) { - config.Wg.Add(1) - config.Ch <- 1 - go Spider(scheme+":"+url[0], num+1) + switch AppendUrl(scheme+":"+url[0], source) { + case 0: + if num <= config.UrlSteps && (cmd.M == 2 || cmd.M == 3) { + config.Wg.Add(1) + config.Ch <- 1 + go Spider(scheme+":"+url[0], num+1) + } + case 1: + return + case 2: + continue } + } else if strings.HasPrefix(url[0], "/") { urlz := "" if cmd.B != "" { @@ -120,11 +153,17 @@ func urlFind(cont, host, scheme, path, source string, num int) { } else { urlz = host + url[0] } - AppendUrl(urlz, source) - if num < 2 && (cmd.M == 2 || cmd.M == 3) { - config.Wg.Add(1) - config.Ch <- 1 - go Spider(urlz, num+1) + switch AppendUrl(urlz, source) { + case 0: + if num <= config.UrlSteps && (cmd.M == 2 || cmd.M == 3) { + config.Wg.Add(1) + config.Ch <- 1 + go Spider(urlz, num+1) + } + case 1: + return + case 2: + continue } } else if !strings.HasSuffix(source, ".js") { urlz := "" @@ -137,19 +176,50 @@ func urlFind(cont, host, scheme, path, source string, num int) { } else { urlz = host + cata + url[0] } - AppendUrl(urlz, source) - if num < 2 && (cmd.M == 2 || cmd.M == 3) { - config.Wg.Add(1) - config.Ch <- 1 - go Spider(urlz, num+1) + switch AppendUrl(urlz, source) { + case 0: + if num <= config.UrlSteps && (cmd.M == 2 || cmd.M == 3) { + config.Wg.Add(1) + config.Ch <- 1 + go Spider(urlz, num+1) + } + case 1: + return + case 2: + continue } + } else if strings.HasSuffix(source, ".js") { - AppendUrl(result.Jsinurl[host+path]+url[0], source) - if num < 2 && (cmd.M == 2 || cmd.M == 3) { - config.Wg.Add(1) - config.Ch <- 1 - go Spider(result.Jsinurl[host+path]+url[0], num+1) + urlz := "" + if cmd.B != "" { + if strings.HasSuffix(cmd.B, "/") { + urlz = cmd.B + url[0] + } else { + urlz = cmd.B + "/" + url[0] + } + } else { + config.Lock.Lock() + su := result.Jsinurl[source] + config.Lock.Unlock() + if strings.HasSuffix(su, "/") { + urlz = su + url[0] + } else { + urlz = su + "/" + url[0] + } } + switch AppendUrl(urlz, source) { + case 0: + if num <= config.UrlSteps && (cmd.M == 2 || cmd.M == 3) { + config.Wg.Add(1) + config.Ch <- 1 + go Spider(urlz, num+1) + } + case 1: + return + case 2: + continue + } + } } } diff --git a/crawler/fuzz/jsFuzz.go b/crawler/jsFuzz.go similarity index 98% rename from crawler/fuzz/jsFuzz.go rename to crawler/jsFuzz.go index 80fb61b..e486234 100644 --- a/crawler/fuzz/jsFuzz.go +++ b/crawler/jsFuzz.go @@ -1,4 +1,4 @@ -package fuzz +package crawler import ( "github.com/pingc0y/URLFinder/config" diff --git a/crawler/run.go b/crawler/run.go index 9892dab..6079a84 100644 --- a/crawler/run.go +++ b/crawler/run.go @@ -2,80 +2,28 @@ package crawler import ( "bufio" + "crypto/tls" "flag" "fmt" "github.com/gookit/color" "github.com/pingc0y/URLFinder/cmd" "github.com/pingc0y/URLFinder/config" - "github.com/pingc0y/URLFinder/crawler/fuzz" "github.com/pingc0y/URLFinder/mode" "github.com/pingc0y/URLFinder/result" "github.com/pingc0y/URLFinder/util" "io" + "net" + "net/http" + "net/url" "os" "regexp" "strings" "time" ) -func start(u string) { - fmt.Println("Target URL: " + color.LightBlue.Sprintf(u)) - config.Wg.Add(1) - config.Ch <- 1 - go Spider(u, 1) - config.Wg.Wait() - config.Progress = 1 - fmt.Printf("\r\nSpider OK \n") - result.ResultUrl = util.RemoveRepeatElement(result.ResultUrl) - result.ResultJs = util.RemoveRepeatElement(result.ResultJs) - if cmd.S != "" { - fmt.Printf("Start %d Validate...\n", len(result.ResultUrl)+len(result.ResultJs)) - fmt.Printf("\r ") - fuzz.JsFuzz() - //验证JS状态 - for i, s := range result.ResultJs { - config.Wg.Add(1) - config.Jsch <- 1 - go JsState(s.Url, i, result.ResultJs[i].Source) - } - //验证URL状态 - for i, s := range result.ResultUrl { - config.Wg.Add(1) - config.Urlch <- 1 - go UrlState(s.Url, i) - } - config.Wg.Wait() - - time.Sleep(1 * time.Second) - fmt.Printf("\r ") - fmt.Printf("\rValidate OK \n\n") - - if cmd.Z != 0 { - fuzz.UrlFuzz() - time.Sleep(1 * time.Second) - } - } - AddSource() - -} - -func Res() { - if len(result.ResultJs) == 0 && len(result.ResultUrl) == 0 { - fmt.Println("未获取到数据") - return - } - //打印还是输出 - if len(cmd.O) > 0 { - result.OutFileJson() - result.OutFileCsv() - result.OutFileHtml() - } else { - UrlToRedirect() - result.Print() - } -} +var client *http.Client -func Run() { +func load() { if cmd.O != "" { if !util.IsDir(cmd.O) { return @@ -92,16 +40,66 @@ func Run() { fmt.Println("至少使用 -u -f -ff 指定一个url") os.Exit(0) } - if cmd.U != "" && !regexp.MustCompile("https{0,1}://").MatchString(cmd.U) { + u, ok := url.Parse(cmd.U) + if cmd.U != "" && ok != nil { fmt.Println("url格式错误,请填写正确url") os.Exit(0) } + cmd.U = u.String() if cmd.T != 50 { - config.Ch = make(chan int, cmd.T+1) - config.Jsch = make(chan int, cmd.T/2+1) - config.Urlch = make(chan int, cmd.T/2+1) + config.Ch = make(chan int, cmd.T) + config.Jsch = make(chan int, cmd.T/10*3) + config.Urlch = make(chan int, cmd.T/10*7) + } + + tr := &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + Proxy: http.ProxyFromEnvironment, + DialContext: (&net.Dialer{ + Timeout: time.Second * 30, + KeepAlive: time.Second * 30, + }).DialContext, + MaxIdleConns: cmd.T / 2, + MaxIdleConnsPerHost: cmd.T + 10, + IdleConnTimeout: time.Second * 90, + TLSHandshakeTimeout: time.Second * 90, + ExpectContinueTimeout: time.Second * 10, + } + + if cmd.X != "" { + proxyUrl, parseErr := url.Parse(cmd.X) + if parseErr != nil { + fmt.Println("代理地址错误: \n" + parseErr.Error()) + os.Exit(1) + } + tr.Proxy = http.ProxyURL(proxyUrl) } + if cmd.I { + util.SetProxyConfig(tr) + } + client = &http.Client{Timeout: time.Duration(cmd.TI) * time.Second, + Transport: tr, + CheckRedirect: func(req *http.Request, via []*http.Request) error { + if len(via) >= 10 { + return fmt.Errorf("Too many redirects") + } + if len(via) > 0 { + if via[0] != nil && via[0].URL != nil { + AddRedirect(via[0].URL.String()) + } else { + AddRedirect(req.URL.String()) + } + + } + return nil + }, + } + +} + +func Run() { + load() if cmd.F != "" { // 创建句柄 fi, err := os.Open(cmd.F) @@ -161,38 +159,110 @@ func Run() { Res() } -func AppendJs(url string, urltjs string) { +func start(u string) { + fmt.Println("Target URL: " + color.LightBlue.Sprintf(u)) + config.Wg.Add(1) + config.Ch <- 1 + go Spider(u, 1) + config.Wg.Wait() + config.Progress = 1 + fmt.Printf("\r\nSpider OK \n") + result.ResultUrl = util.RemoveRepeatElement(result.ResultUrl) + result.ResultJs = util.RemoveRepeatElement(result.ResultJs) + if cmd.S != "" { + fmt.Printf("Start %d Validate...\n", len(result.ResultUrl)+len(result.ResultJs)) + fmt.Printf("\r ") + JsFuzz() + //验证JS状态 + for i, s := range result.ResultJs { + config.Wg.Add(1) + config.Jsch <- 1 + go JsState(s.Url, i, result.ResultJs[i].Source) + } + //验证URL状态 + for i, s := range result.ResultUrl { + config.Wg.Add(1) + config.Urlch <- 1 + go UrlState(s.Url, i) + } + config.Wg.Wait() + + time.Sleep(1 * time.Second) + fmt.Printf("\r ") + fmt.Printf("\rValidate OK \n\n") + + if cmd.Z != 0 { + UrlFuzz() + time.Sleep(1 * time.Second) + } + } + AddSource() + +} + +func Res() { + if len(result.ResultJs) == 0 && len(result.ResultUrl) == 0 { + fmt.Println("未获取到数据") + return + } + //打印还是输出 + if len(cmd.O) > 0 { + result.OutFileJson() + result.OutFileCsv() + result.OutFileHtml() + } else { + UrlToRedirect() + result.Print() + } +} + +func AppendJs(ur string, urltjs string) int { config.Lock.Lock() defer config.Lock.Unlock() - url = strings.Replace(url, "/./", "/", -1) + if len(result.ResultUrl)+len(result.ResultJs) >= cmd.MA { + return 1 + } + _, err := url.Parse(ur) + if err != nil { + return 2 + } for _, eachItem := range result.ResultJs { - if eachItem.Url == url { - return + if eachItem.Url == ur { + return 0 } } - result.ResultJs = append(result.ResultJs, mode.Link{Url: url}) + result.ResultJs = append(result.ResultJs, mode.Link{Url: ur}) if strings.HasSuffix(urltjs, ".js") { - result.Jsinurl[url] = result.Jsinurl[urltjs] + result.Jsinurl[ur] = result.Jsinurl[urltjs] } else { re := regexp.MustCompile("[a-zA-z]+://[^\\s]*/|[a-zA-z]+://[^\\s]*") u := re.FindAllStringSubmatch(urltjs, -1) - result.Jsinurl[url] = u[0][0] + result.Jsinurl[ur] = u[0][0] } - result.Jstourl[url] = urltjs + result.Jstourl[ur] = urltjs + return 0 } -func AppendUrl(url string, urlturl string) { +func AppendUrl(ur string, urlturl string) int { config.Lock.Lock() defer config.Lock.Unlock() - url = strings.Replace(url, "/./", "/", -1) + if len(result.ResultUrl)+len(result.ResultJs) >= cmd.MA { + return 1 + } + _, err := url.Parse(ur) + if err != nil { + return 2 + } for _, eachItem := range result.ResultUrl { - if eachItem.Url == url { - return + if eachItem.Url == ur { + return 0 } } - result.ResultUrl = append(result.ResultUrl, mode.Link{Url: url}) - result.Urltourl[url] = urlturl + url.Parse(ur) + result.ResultUrl = append(result.ResultUrl, mode.Link{Url: ur}) + result.Urltourl[ur] = urlturl + return 0 } func AppendInfo(info mode.Info) { @@ -225,6 +295,12 @@ func GetEndUrl(url string) bool { } +func AddRedirect(url string) { + config.Lock.Lock() + defer config.Lock.Unlock() + result.Redirect[url] = true +} + func AddSource() { for i := range result.ResultJs { result.ResultJs[i].Source = result.Jstourl[result.ResultJs[i].Url] @@ -259,4 +335,6 @@ func Initialization() { result.Jsinurl = make(map[string]string) result.Jstourl = make(map[string]string) result.Urltourl = make(map[string]string) + result.Redirect = make(map[string]bool) + } diff --git a/crawler/state.go b/crawler/state.go index 66e1741..e78495f 100644 --- a/crawler/state.go +++ b/crawler/state.go @@ -1,8 +1,6 @@ package crawler import ( - "crypto/tls" - "fmt" "github.com/pingc0y/URLFinder/cmd" "github.com/pingc0y/URLFinder/config" "github.com/pingc0y/URLFinder/mode" @@ -11,15 +9,14 @@ import ( "io" "net/http" "net/url" - "os" "regexp" "strconv" "strings" - "time" ) // 检测js访问状态码 func JsState(u string, i int, sou string) { + defer func() { config.Wg.Done() <-config.Jsch @@ -38,32 +35,14 @@ func JsState(u string, i int, sou string) { } } - tr := &http.Transport{ - TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, - } - //配置代理 - if cmd.X != "" { - proxyUrl, parseErr := url.Parse(cmd.X) - if parseErr != nil { - fmt.Println("代理地址错误: \n" + parseErr.Error()) - os.Exit(1) - } - tr.Proxy = http.ProxyURL(proxyUrl) - } //加载yaml配置(proxy) - if cmd.I { - util.SetProxyConfig(tr) - } + //配置代理 var redirect string - client := &http.Client{Timeout: 15 * time.Second, Transport: tr, - CheckRedirect: func(req *http.Request, via []*http.Request) error { - if len(via) > 0 { - redirect = req.URL.String() - } - return nil - }, + ur, err2 := url.Parse(u) + if err2 != nil { + return } - request, err := http.NewRequest("GET", u, nil) + request, err := http.NewRequest("GET", ur.String(), nil) if err != nil { result.ResultJs[i].Url = "" return @@ -78,7 +57,26 @@ func JsState(u string, i int, sou string) { if cmd.I { util.SetHeadersConfig(&request.Header) } - + //tr := &http.Transport{ + // TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + //} + //client = &http.Client{Timeout: time.Duration(cmd.TI) * time.Second, + // Transport: tr, + // CheckRedirect: func(req *http.Request, via []*http.Request) error { + // if len(via) >= 10 { + // return fmt.Errorf("Too many redirects") + // } + // if len(via) > 0 { + // if via[0] != nil && via[0].URL != nil { + // result.Redirect[via[0].URL.String()] = true + // } else { + // result.Redirect[req.URL.String()] = true + // } + // + // } + // return nil + // }, + //} //处理返回结果 response, err := client.Do(request) if err != nil { @@ -89,9 +87,8 @@ func JsState(u string, i int, sou string) { result.ResultJs[i].Url = "" } return - } else { - defer response.Body.Close() } + defer response.Body.Close() code := response.StatusCode if strings.Contains(cmd.S, strconv.Itoa(code)) || cmd.S == "all" && (sou != "Fuzz" && code == 200) { @@ -102,9 +99,12 @@ func JsState(u string, i int, sou string) { } else { length = len(dataBytes) } - if redirect != "" { + config.Lock.Lock() + if result.Redirect[ur.String()] { code = 302 + redirect = response.Request.URL.String() } + config.Lock.Unlock() result.ResultJs[i] = mode.Link{Url: u, Status: strconv.Itoa(code), Size: strconv.Itoa(length), Redirect: redirect} } else { result.ResultJs[i].Url = "" @@ -131,33 +131,12 @@ func UrlState(u string, i int) { } } - tr := &http.Transport{ - TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, - } - //配置代理 - if cmd.X != "" { - proxyUrl, parseErr := url.Parse(cmd.X) - if parseErr != nil { - fmt.Println("代理地址错误: \n" + parseErr.Error()) - os.Exit(1) - } - tr.Proxy = http.ProxyURL(proxyUrl) - } - - //加载yaml配置(proxy) - if cmd.I { - util.SetProxyConfig(tr) - } var redirect string - client := &http.Client{Timeout: 15 * time.Second, Transport: tr, - CheckRedirect: func(req *http.Request, via []*http.Request) error { - if len(via) > 0 { - redirect = req.URL.String() - } - return nil - }, + ur, err2 := url.Parse(u) + if err2 != nil { + return } - request, err := http.NewRequest("GET", u, nil) + request, err := http.NewRequest("GET", ur.String(), nil) if err != nil { result.ResultUrl[i].Url = "" return @@ -174,9 +153,28 @@ func UrlState(u string, i int) { if cmd.I { util.SetHeadersConfig(&request.Header) } + //tr := &http.Transport{ + // TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + //} + //client = &http.Client{Timeout: time.Duration(cmd.TI) * time.Second, + // Transport: tr, + // CheckRedirect: func(req *http.Request, via []*http.Request) error { + // if len(via) >= 10 { + // return fmt.Errorf("Too many redirects") + // } + // if len(via) > 0 { + // if via[0] != nil && via[0].URL != nil { + // result.Redirect[via[0].URL.String()] = true + // } else { + // result.Redirect[req.URL.String()] = true + // } + // + // } + // return nil + // }, + //} //处理返回结果 response, err := client.Do(request) - if err != nil { if strings.Contains(err.Error(), "Client.Timeout") && cmd.S == "all" { result.ResultUrl[i] = mode.Link{Url: u, Status: "timeout", Size: "0"} @@ -184,9 +182,8 @@ func UrlState(u string, i int) { result.ResultUrl[i].Url = "" } return - } else { - defer response.Body.Close() } + defer response.Body.Close() code := response.StatusCode if strings.Contains(cmd.S, strconv.Itoa(code)) || cmd.S == "all" { @@ -200,9 +197,13 @@ func UrlState(u string, i int) { body := string(dataBytes) re := regexp.MustCompile("<[tT]itle>(.*?)") title := re.FindAllStringSubmatch(body, -1) - if redirect != "" { + config.Lock.Lock() + if result.Redirect[ur.String()] { code = 302 + redirect = response.Request.URL.String() } + config.Lock.Unlock() + if len(title) != 0 { result.ResultUrl[i] = mode.Link{Url: u, Status: strconv.Itoa(code), Size: strconv.Itoa(length), Title: title[0][1], Redirect: redirect} } else { diff --git a/crawler/fuzz/urlFuzz.go b/crawler/urlFuzz.go similarity index 90% rename from crawler/fuzz/urlFuzz.go rename to crawler/urlFuzz.go index d1299d0..2697c38 100644 --- a/crawler/fuzz/urlFuzz.go +++ b/crawler/urlFuzz.go @@ -1,7 +1,6 @@ -package fuzz +package crawler import ( - "crypto/tls" "fmt" "github.com/pingc0y/URLFinder/cmd" "github.com/pingc0y/URLFinder/config" @@ -10,12 +9,9 @@ import ( "github.com/pingc0y/URLFinder/util" "io" "net/http" - "net/url" - "os" "regexp" "strconv" "strings" - "time" ) // Fuzz @@ -54,23 +50,6 @@ func fuzzGet(u string) { } } } - tr := &http.Transport{ - TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, - } - //配置代理 - if cmd.X != "" { - proxyUrl, parseErr := url.Parse(cmd.X) - if parseErr != nil { - fmt.Println("代理地址错误: \n" + parseErr.Error()) - os.Exit(1) - } - tr.Proxy = http.ProxyURL(proxyUrl) - } - //加载yaml配置(proxy) - if cmd.I { - util.SetProxyConfig(tr) - } - client := &http.Client{Timeout: 10 * time.Second, Transport: tr} request, err := http.NewRequest("GET", u, nil) if err != nil { return diff --git a/main.go b/main.go index 88fd258..3070435 100644 --- a/main.go +++ b/main.go @@ -3,9 +3,14 @@ package main import ( "github.com/pingc0y/URLFinder/cmd" "github.com/pingc0y/URLFinder/crawler" + "github.com/pingc0y/URLFinder/util" + "io" + "log" ) func main() { + log.SetOutput(io.Discard) + util.GetUpdate() cmd.Parse() crawler.Run() } diff --git a/mode/mode.go b/mode/mode.go index 5bcb6e6..300e702 100644 --- a/mode/mode.go +++ b/mode/mode.go @@ -1,17 +1,20 @@ package mode type Config struct { - Headers map[string]string `yaml:"headers"` - Proxy string `yaml:"proxy"` - - JsFind []string `yaml:"jsFind"` - UrlFind []string `yaml:"urlFind"` - InfoFind map[string][]string `yaml:"infoFiler"` - - JsFiler []string `yaml:"jsFiler"` - UrlFiler []string `yaml:"urlFiler"` - - JsFuzzPath []string `yaml:"jsFuzzPath"` + Proxy string `yaml:"proxy"` + Timeout int `yaml:"timeout"` + Thread int `yaml:"thread"` + UrlSteps int `yaml:"urlSteps"` + JsSteps int `yaml:"jsSteps"` + Max int `yaml:"max"` + Headers map[string]string `yaml:"headers"` + JsFind []string `yaml:"jsFind"` + UrlFind []string `yaml:"urlFind"` + InfoFind map[string][]string `yaml:"infoFiler"` + Risks []string `yaml:"risks"` + JsFiler []string `yaml:"jsFiler"` + UrlFiler []string `yaml:"urlFiler"` + JsFuzzPath []string `yaml:"jsFuzzPath"` } type Link struct { diff --git a/result/result.go b/result/result.go index 302adf6..dba32c2 100644 --- a/result/result.go +++ b/result/result.go @@ -10,7 +10,6 @@ import ( "github.com/pingc0y/URLFinder/cmd" "github.com/pingc0y/URLFinder/mode" "github.com/pingc0y/URLFinder/util" - "log" "net/url" "os" "regexp" @@ -32,6 +31,7 @@ var ( Jstourl map[string]string Urltourl map[string]string Domains []string + Redirect map[string]bool ) func outHtmlString(link mode.Link) string { @@ -308,7 +308,7 @@ func OutFileJson() { file, err := os.OpenFile(fileName, os.O_CREATE|os.O_WRONLY, 0644) if err != nil { - log.Printf("创建失败:%s", err) + fmt.Printf("创建失败:%s", err) return } if cmd.D == "" { @@ -334,7 +334,7 @@ func OutFileJson() { data, err := json.Marshal(jsons) if err != nil { - log.Printf("json化失败:%s", err) + fmt.Printf("json化失败:%s", err) return } buf := bufio.NewWriter(file) @@ -343,7 +343,7 @@ func OutFileJson() { // 将缓冲中的数据写入 err = buf.Flush() if err != nil { - log.Println("json保存失败:", err) + fmt.Println("json保存失败:", err) } fmt.Println(strconv.Itoa(len(ResultJsHost)+len(ResultJsOther))+"JS + "+strconv.Itoa(len(ResultUrlHost)+len(ResultUrlOther))+"URL --> ", file.Name()) return diff --git a/util/utils.go b/util/utils.go index 2c5d642..59991ad 100644 --- a/util/utils.go +++ b/util/utils.go @@ -1,10 +1,12 @@ package util import ( + "encoding/json" "fmt" "github.com/pingc0y/URLFinder/cmd" "github.com/pingc0y/URLFinder/config" "github.com/pingc0y/URLFinder/mode" + "io" "math/rand" "net/http" "net/url" @@ -12,6 +14,7 @@ import ( "regexp" "strconv" "strings" + "time" ) // 判断所给路径是否为文件夹 @@ -112,7 +115,6 @@ func SetProxyConfig(tr *http.Transport) *http.Transport { // 提取顶级域名 func GetHost(u string) string { - re := regexp.MustCompile("([a-z0-9\\-]+\\.)*([a-z0-9\\-]+\\.[a-z0-9\\-]+)(:[0-9]+)?") var host string hosts := re.FindAllString(u, 1) @@ -179,8 +181,8 @@ func RemoveRepeatElement(list []mode.Link) []mode.Link { // 打印Fuzz进度 func PrintFuzz() { - fmt.Printf("\rFuzz %.0f%%", float64(config.Progress+1)/float64(config.FuzzNum+1)*100) config.Mux.Lock() + fmt.Printf("\rFuzz %.0f%%", float64(config.Progress+1)/float64(config.FuzzNum+1)*100) config.Progress++ config.Mux.Unlock() } @@ -366,3 +368,40 @@ func GetUserAgent() string { } return cmd.A } + +func GetUpdate() { + + url := fmt.Sprintf("https://api.github.com/repos/pingc0y/URLFinder/releases/latest") + client := &http.Client{ + Timeout: time.Second * 2, + } + resp, err := client.Get(url) + if err != nil { + cmd.XUpdate = "更新检测失败" + return + } + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + if err != nil { + cmd.XUpdate = "更新检测失败" + return + } + var release struct { + TagName string `json:"tag_name"` + } + err = json.Unmarshal(body, &release) + if err != nil { + cmd.XUpdate = "更新检测失败" + return + } + if release.TagName == "" { + cmd.XUpdate = "更新检测失败" + return + } + if cmd.Update != release.TagName { + cmd.XUpdate = "有新版本可用: " + release.TagName + } else { + cmd.XUpdate = "已是最新版本" + } + +}