Skip to content

Commit

Permalink
2023/5/11更新
Browse files Browse the repository at this point in the history
  • Loading branch information
pingc0y committed May 11, 2023
1 parent 3b00d5b commit 187a3ed
Show file tree
Hide file tree
Showing 14 changed files with 507 additions and 311 deletions.
35 changes: 16 additions & 19 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,15 @@ URLFinder是一款快速、全面、易用的页面信息提取工具

有什么需求或bug欢迎各位师傅提交lssues

#### 注意:
## 功能说明:

fuzz功能是基于抓到的404目录和路径。将其当作字典,随机组合并碰撞出有效路径,从而解决路径拼接错误的问题

为了更好的兼容和防止漏抓链接,放弃了低误报率,错误的链接会变多但漏抓概率变低,可通过 ‘-s 200’ 筛选状态码过滤无效的链接(但不推荐只看200状态码)

结果会优先显示输入的url顶级域名,其他域名不做区分显示在 other

## 功能说明
1.提取页面与JS中的JS、URL链接和敏感信息
2.提取到的链接会显示状态码、响应大小、标题等(带cookie操作时请使用-m 3 安全模式,防止误操作)
3.提取批量URL
4.yml配置 自定义Headers请求头、代理、抓取规则、黑名单等
5.结果导出到csv、json、html
6.记录抓取来源,便于手动分析
7.指定抓取域名(支持正则表达式)
8.指定baseurl路径(指定目录拼接)
9.使用代理ip
10.对404链接Fuzz(测试版,有问题提issue)
结果会优先显示200,按从小到大排序(输入的域名最优先,就算是404也会排序在其他子域名的200前面)

结果会优先显示输入的url顶级域名,其他域名不做区分显示在 other
结果会优先显示200,按从小到大排序(输入的域名最优先,就算是404也会排序在其他子域名的200前面)
为了更好的兼容和防止漏抓链接,放弃了低误报率,错误的链接会变多但漏抓概率变低,可通过 ‘-s 200’ 筛选状态码过滤无效的链接(但不推荐只看200状态码)

## 使用截图

Expand All @@ -54,7 +42,7 @@ URLFinder.exe -u http://www.baidu.com -s 200,403 -m 2
```
URLFinder.exe -s all -m 2 -f url.txt -o d:/
```
参数:
参数(更多参数使用 -i 配置)
```
-a 自定义user-agent请求头
-b 自定义baseurl路径
Expand All @@ -63,14 +51,16 @@ URLFinder.exe -s all -m 2 -f url.txt -o d:/
-f 批量url抓取,需指定url文本路径
-ff 与-f区别:全部抓取的数据,视为同一个url的结果来处理(只打印一份结果 | 只会输出一份结果)
-h 帮助信息
-i 加载yaml配置文件,可自定义请求头 抓取规则等(不存在时,会在当前目录创建一个默认yaml配置文件)
-i 加载yaml配置文件,可自定义请求头抓取规则等(不存在时,会在当前目录创建一个默认yaml配置文件)
-m 抓取模式:
1 正常抓取(默认)
2 深入抓取 (URL深入一层 JS深入三层 防止抓偏)
3 安全深入抓取(过滤delete,remove等敏感路由)
-max 最大抓取数
-o 结果导出到csv、json、html文件,需指定导出文件目录(.代表当前目录)
-s 显示指定状态码,all为显示全部
-t 设置线程数(默认50)
-time 设置超时时间(默认5,单位秒)
-u 目标URL
-x 设置代理,格式: http://username:[email protected]:8877
-z 提取所有目录对404链接进行fuzz(只对主域名下的链接生效,需要与-s一起使用)
Expand Down Expand Up @@ -117,7 +107,14 @@ SET GOOS=darwin
SET GOARCH=arm64
go build -ldflags "-s -w" -o ./URLFinder-macos-arm64
```
## 更新说明
## 更新说明
2023/5/11
变化 -i 配置文件可自定义:线程数、抓取深度、敏感路由、超时时间、最大抓取数
新增 -time 设置超时时间
新增 -max 设置最大抓取数
新增 添加版本更新提示
修复 已知bug

2023/5/5
修复 多个任务时html结果混乱
新增 结果添加302跳转信息
Expand Down
19 changes: 13 additions & 6 deletions cmd/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ import (
"os"
)

var Update = "2023.5.11"
var XUpdate string

var (
H bool
I bool
Expand All @@ -22,6 +25,8 @@ var (
O string
X string
T = 50
TI = 5
MA = 99999
Z int
)

Expand All @@ -34,26 +39,28 @@ func init() {
flag.StringVar(&FF, "ff", "", "set urlFile one\n与-f区别:全部抓取的数据,视为同一个url的结果来处理(只打印一份结果 | 只会输出一份结果)")
flag.BoolVar(&H, "h", false, "this help\n帮助信息")
flag.BoolVar(&I, "i", false, "set configFile\n加载yaml配置文件(不存在时,会在当前目录创建一个默认yaml配置文件)")
flag.IntVar(&M, "m", 1, "set mode\n抓取模式 \n 1 normal\n 正常抓取(默认) \n 2 thorough\n 深入抓取 (url深入一层,js深入三层,防止抓偏) \n 3 security\n 安全深入抓取(过滤delete,remove等敏感路由) \n ")
flag.IntVar(&M, "m", 1, "set mode\n抓取模式 \n 1 normal\n 正常抓取(默认) \n 2 thorough\n 深入抓取(默认url深入一层,js深入三层,-i可以自定义) \n 3 security\n 安全深入抓取(过滤delete,remove等敏感路由.-i可自定义) ")
flag.IntVar(&MA, "max", 99999, "set maximum\n最大抓取链接数")
flag.StringVar(&O, "o", "", "set outFile\n结果导出到csv、json、html文件,需指定导出文件目录(.代表当前目录)")
flag.StringVar(&S, "s", "", "set Status\n显示指定状态码,all为显示全部(多个状态码用,隔开)")
flag.IntVar(&T, "t", 50, "set thread\n设置线程数(默认50)\n")
flag.IntVar(&T, "t", 50, "set Thread\n设置线程数(默认50)")
flag.IntVar(&TI, "time", 5, "set Timeout\n设置超时时间(默认5,单位秒)")
flag.StringVar(&U, "u", "", "set Url\n目标URL")
flag.StringVar(&X, "x", "", "set httpProxy\n设置代理,格式: http://username:[email protected]:8809")
flag.IntVar(&Z, "z", 0, "set Fuzz\n对404链接进行fuzz(只对主域名下的链接生效,需要与-s一起使用\n 1 decreasing\n 目录递减fuzz \n 2 2combination\n 2级目录组合fuzz(适合少量链接使用) \n 3 3combination\n 3级目录组合fuzz(适合少量链接使用) \n")
flag.StringVar(&X, "x", "", "set Proxy\n设置代理,格式: http://username:[email protected]:8809")
flag.IntVar(&Z, "z", 0, "set Fuzz\n对404链接进行fuzz(只对主域名下的链接生效,需要与 -s 一起使用\n 1 decreasing\n 目录递减fuzz \n 2 2combination\n 2级目录组合fuzz(适合少量链接使用) \n 3 3combination\n 3级目录组合fuzz(适合少量链接使用) ")

// 改变默认的 Usage
flag.Usage = usage
}
func usage() {
fmt.Fprintf(os.Stderr, `Usage: URLFinder [-a user-agent] [-b baseurl] [-c cookie] [-d domainName] [-f urlFile] [-ff urlFile one] [-h help] [-i configFile] [-m mode] [-o outFile] [-s Status] [-t thread] [-u Url] [-x httpProxy] [-z fuzz]
fmt.Fprintf(os.Stderr, `Usage: URLFinder [-a user-agent] [-b baseurl] [-c cookie] [-d domainName] [-f urlFile] [-ff urlFile one] [-h help] [-i configFile] [-m mode] [-max maximum] [-o outFile] [-s Status] [-t thread] [-time timeout] [-u url] [-x proxy] [-z fuzz]
Options:
`)
flag.PrintDefaults()
}

func Parse() {
color.LightCyan.Println(" __ __ ___ _ _ \n /\\ /\\ /__\\ / / / __(_)_ __ __| | ___ _ __ \n/ / \\ \\/ \\/// / / _\\ | | '_ \\ / _` |/ _ \\ '__|\n\\ \\_/ / _ \\ /___ / | | | | | (_| | __/ | \n \\___/\\/ \\_\\____\\/ |_|_| |_|\\__,_|\\___|_| \n\nBy: pingc0y\nUpdateTime: 2023/5/5\nGithub: https://github.com/pingc0y/URLFinder \n")
color.LightCyan.Printf(" __ __ ___ _ _ \n /\\ /\\ /__\\ / / / __(_)_ __ __| | ___ _ __ \n/ / \\ \\/ \\/// / / _\\ | | '_ \\ / _` |/ _ \\ '__|\n\\ \\_/ / _ \\ /___ / | | | | | (_| | __/ | \n \\___/\\/ \\_\\____\\/ |_|_| |_|\\__,_|\\___|_| \n\nBy: pingc0y\nUpdate: %s | %s\nGithub: https://github.com/pingc0y/URLFinder \n\n", Update, XUpdate)
flag.Parse()
}
63 changes: 39 additions & 24 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,29 +59,39 @@ var (
Other = []string{"(access.{0,1}key|access.{0,1}Key|access.{0,1}Id|access.{0,1}id|.{0,5}密码|.{0,5}账号|默认.{0,5}|加密|解密|password:.{0,10}|username:.{0,10})"}
)

var (
UrlSteps = 1
JsSteps = 3
)

var (
Lock sync.Mutex
Wg sync.WaitGroup
Mux sync.Mutex
Ch = make(chan int, 50)
Jsch = make(chan int, 50/2)
Urlch = make(chan int, 50/2)
Jsch = make(chan int, 50/10*3)
Urlch = make(chan int, 50/10*7)
)

// 读取配置文件
func GetConfig(path string) {
con := &mode.Config{}
if f, err := os.Open(path); err != nil {
if strings.Contains(err.Error(), "The system cannot find the file specified") || strings.Contains(err.Error(), "no such file or directory") {
con.Headers = map[string]string{"Cookie": cmd.C, "User-Agent": `Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0`, "Accept": "*/*"}
con.Proxy = ""
con.JsFind = JsFind
con.UrlFind = UrlFind
con.JsFiler = JsFiler
con.UrlFiler = UrlFiler
con.JsFuzzPath = JsFuzzPath
con.InfoFind = map[string][]string{"Phone": Phone, "Email": Email, "IDcard": IDcard, "Jwt": Jwt, "Other": Other}
data, err2 := yaml.Marshal(con)
Conf.Headers = map[string]string{"Cookie": cmd.C, "User-Agent": `Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0`, "Accept": "*/*"}
Conf.Proxy = ""
Conf.JsFind = JsFind
Conf.UrlFind = UrlFind
Conf.JsFiler = JsFiler
Conf.UrlFiler = UrlFiler
Conf.JsFuzzPath = JsFuzzPath
Conf.JsSteps = JsSteps
Conf.UrlSteps = UrlSteps
Conf.Risks = Risks
Conf.Timeout = cmd.TI
Conf.Thread = cmd.T
Conf.Max = cmd.MA
Conf.InfoFind = map[string][]string{"Phone": Phone, "Email": Email, "IDcard": IDcard, "Jwt": Jwt, "Other": Other}
data, err2 := yaml.Marshal(Conf)
err2 = os.WriteFile(path, data, 0644)
if err2 != nil {
fmt.Println(err)
Expand All @@ -94,18 +104,23 @@ func GetConfig(path string) {
}
os.Exit(1)
} else {
yaml.NewDecoder(f).Decode(con)
Conf = *con
JsFind = con.JsFind
UrlFind = con.UrlFind
JsFiler = con.JsFiler
UrlFiler = con.UrlFiler
JsFuzzPath = con.JsFuzzPath
Phone = con.InfoFind["Phone"]
Email = con.InfoFind["Email"]
IDcard = con.InfoFind["IDcard"]
Jwt = con.InfoFind["Jwt"]
Other = con.InfoFind["Other"]
yaml.NewDecoder(f).Decode(&Conf)
JsFind = Conf.JsFind
UrlFind = Conf.UrlFind
JsFiler = Conf.JsFiler
UrlFiler = Conf.UrlFiler
JsFuzzPath = Conf.JsFuzzPath
Phone = Conf.InfoFind["Phone"]
Email = Conf.InfoFind["Email"]
IDcard = Conf.InfoFind["IDcard"]
Jwt = Conf.InfoFind["Jwt"]
Other = Conf.InfoFind["Other"]
JsSteps = Conf.JsSteps
UrlSteps = Conf.UrlSteps
Risks = Conf.Risks
cmd.T = Conf.Thread
cmd.MA = Conf.Max
cmd.TI = Conf.Timeout
}

}
62 changes: 30 additions & 32 deletions crawler/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package crawler

import (
"compress/gzip"
"crypto/tls"
"fmt"
"github.com/pingc0y/URLFinder/cmd"
"github.com/pingc0y/URLFinder/config"
Expand All @@ -11,24 +10,22 @@ import (
"io"
"net/http"
"net/url"
"os"
"regexp"
"strings"
"time"
)

// 蜘蛛抓取页面内容
func Spider(u string, num int) {
var is bool
is := true
defer func() {
config.Wg.Done()
if !is {
if is {
<-config.Ch
}
}()

fmt.Printf("\rStart %d Spider...", config.Progress)
}()
config.Mux.Lock()
fmt.Printf("\rStart %d Spider...", config.Progress)
config.Progress++
config.Mux.Unlock()
//标记完成
Expand All @@ -48,23 +45,6 @@ func Spider(u string, num int) {
}
}
AppendEndUrl(u)

tr := &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
}
//配置代理
if cmd.X != "" {
proxyUrl, parseErr := url.Parse(cmd.X)
if parseErr != nil {
fmt.Println("代理地址错误: \n" + parseErr.Error())
os.Exit(1)
}
tr.Proxy = http.ProxyURL(proxyUrl)
} else if cmd.I {
//加载yaml配置
util.SetProxyConfig(tr)
}
client := &http.Client{Timeout: 10 * time.Second, Transport: tr}
request, err := http.NewRequest("GET", u, nil)
if err != nil {
return
Expand All @@ -83,13 +63,32 @@ func Spider(u string, num int) {
}

//处理返回结果
//tr := &http.Transport{
// TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
//}
//client = &http.Client{Timeout: time.Duration(cmd.TI) * time.Second,
// Transport: tr,
// CheckRedirect: func(req *http.Request, via []*http.Request) error {
// if len(via) >= 10 {
// return fmt.Errorf("Too many redirects")
// }
// if len(via) > 0 {
// if via[0] != nil && via[0].URL != nil {
// result.Redirect[via[0].URL.String()] = true
// } else {
// result.Redirect[req.URL.String()] = true
// }
//
// }
// return nil
// },
//}
response, err := client.Do(request)
if err != nil {
return
} else {
defer response.Body.Close()

}
defer response.Body.Close()

result := ""
//解压
if response.Header.Get("Content-Encoding") == "gzip" {
Expand Down Expand Up @@ -129,23 +128,22 @@ func Spider(u string, num int) {
path = "/"
}
}
is = false
<-config.Ch
is = true

//提取js
jsFind(result, host, scheme, path, source, num)
jsFind(result, host, scheme, path, u, num)
//提取url
urlFind(result, host, scheme, path, source, num)
urlFind(result, host, scheme, path, u, num)
//提取信息
infoFind(result, source)

}

// 打印Validate进度
func PrintProgress() {
config.Mux.Lock()
num := len(result.ResultJs) + len(result.ResultUrl)
fmt.Printf("\rValidate %.0f%%", float64(config.Progress+1)/float64(num+1)*100)
config.Mux.Lock()
config.Progress++
config.Mux.Unlock()
}
8 changes: 6 additions & 2 deletions crawler/filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,16 @@ func jsFilter(str [][]string) [][]string {
//对不需要的数据过滤
for i := range str {
str[i][0], _ = url.QueryUnescape(str[i][1])
str[i][0] = strings.TrimSpace(str[i][0])
str[i][0] = strings.Replace(str[i][0], " ", "", -1)
str[i][0] = strings.Replace(str[i][0], "\\/", "/", -1)
str[i][0] = strings.Replace(str[i][0], "%3A", ":", -1)
str[i][0] = strings.Replace(str[i][0], "%2F", "/", -1)

str[i][0] = strings.Replace(str[i][0], "./", "/", -1)
//去除不是.js的链接
if !strings.HasSuffix(str[i][0], ".js") && !strings.Contains(str[i][0], ".js?") {
str[i][0] = ""
continue
}

//过滤配置的黑名单
Expand All @@ -44,15 +46,17 @@ func urlFilter(str [][]string) [][]string {
//对不需要的数据过滤
for i := range str {
str[i][0], _ = url.QueryUnescape(str[i][1])
str[i][0] = strings.TrimSpace(str[i][0])
str[i][0] = strings.Replace(str[i][0], " ", "", -1)
str[i][0] = strings.Replace(str[i][0], "\\/", "/", -1)
str[i][0] = strings.Replace(str[i][0], "%3A", ":", -1)
str[i][0] = strings.Replace(str[i][0], "%2F", "/", -1)

str[i][0] = strings.Replace(str[i][0], "./", "/", -1)
//去除不存在字符串和数字的url,判断为错误数据
match, _ := regexp.MatchString("[a-zA-Z]+|[0-9]+", str[i][0])
if !match {
str[i][0] = ""
continue
}

//对抓到的域名做处理
Expand Down
Loading

0 comments on commit 187a3ed

Please sign in to comment.