diff --git a/README.md b/README.md index 4f86442..fb9d48e 100644 --- a/README.md +++ b/README.md @@ -117,6 +117,10 @@ SET GOARCH=arm64 go build -ldflags "-s -w" -o ./URLFinder-macos-arm64 ``` ## 更新说明 +2023/2/21 +修复 已知bug + + 2023/2/3 新增 域名信息展示 变化 -i配置文件可配置抓取规则等 diff --git a/URLFinder-linux-386 b/URLFinder-linux-386 deleted file mode 100644 index 545147d..0000000 Binary files a/URLFinder-linux-386 and /dev/null differ diff --git a/URLFinder-linux-amd64 b/URLFinder-linux-amd64 deleted file mode 100644 index 3b1d26c..0000000 Binary files a/URLFinder-linux-amd64 and /dev/null differ diff --git a/URLFinder-linux-arm64 b/URLFinder-linux-arm64 deleted file mode 100644 index 1424135..0000000 Binary files a/URLFinder-linux-arm64 and /dev/null differ diff --git a/URLFinder-macos-amd64 b/URLFinder-macos-amd64 deleted file mode 100644 index 3b1d26c..0000000 Binary files a/URLFinder-macos-amd64 and /dev/null differ diff --git a/URLFinder-macos-arm64 b/URLFinder-macos-arm64 deleted file mode 100644 index 7a547ee..0000000 Binary files a/URLFinder-macos-arm64 and /dev/null differ diff --git a/URLFinder-windows-386.exe b/URLFinder-windows-386.exe deleted file mode 100644 index a07e8bf..0000000 Binary files a/URLFinder-windows-386.exe and /dev/null differ diff --git a/URLFinder-windows-amd64.exe b/URLFinder-windows-amd64.exe deleted file mode 100644 index 84b6e64..0000000 Binary files a/URLFinder-windows-amd64.exe and /dev/null differ diff --git a/cmd/cmd.go b/cmd/cmd.go index ebcd7f2..ffb7a3f 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -16,7 +16,7 @@ var ( D string C string A string - b string + B string F string O string X string @@ -26,7 +26,7 @@ var ( func init() { flag.StringVar(&A, "a", "", "set user-agent\n设置user-agent请求头") - flag.StringVar(&b, "b", "", "set baseurl\n设置baseurl路径") + flag.StringVar(&B, "b", "", "set baseurl\n设置baseurl路径") flag.StringVar(&C, "c", "", "set cookie\n设置cookie") flag.StringVar(&D, "d", "", "set domainName\n指定获取的域名") flag.StringVar(&F, "f", "", "set urlFile\n批量抓取url,指定文件路径") @@ -52,7 +52,7 @@ Options: } func Parse() { - color.LightCyan.Println(" __ __ ___ _ _ \n /\\ /\\ /__\\ / / / __(_)_ __ __| | ___ _ __ \n/ / \\ \\/ \\/// / / _\\ | | '_ \\ / _` |/ _ \\ '__|\n\\ \\_/ / _ \\ /___ / | | | | | (_| | __/ | \n \\___/\\/ \\_\\____\\/ |_|_| |_|\\__,_|\\___|_| \n\nBy: pingc0y\nUpdateTime: 2023/2/3\nGithub: https://github.com/pingc0y/URLFinder \n") + color.LightCyan.Println(" __ __ ___ _ _ \n /\\ /\\ /__\\ / / / __(_)_ __ __| | ___ _ __ \n/ / \\ \\/ \\/// / / _\\ | | '_ \\ / _` |/ _ \\ '__|\n\\ \\_/ / _ \\ /___ / | | | | | (_| | __/ | \n \\___/\\/ \\_\\____\\/ |_|_| |_|\\__,_|\\___|_| \n\nBy: pingc0y\nUpdateTime: 2023/2/21\nGithub: https://github.com/pingc0y/URLFinder \n") flag.Parse() if h || (U == "" && F == "") { flag.Usage() diff --git a/config/config.go b/config/config.go index 9e2aca8..e131986 100644 --- a/config/config.go +++ b/config/config.go @@ -49,7 +49,7 @@ var ( } UrlFiler = []string{ "\\.js\\?|\\.css\\?|\\.jpeg\\?|\\.jpg\\?|\\.png\\?|.gif\\?|www\\.w3\\.org|example\\.com|\\<|\\>|\\{|\\}|\\[|\\]|\\||\\^|;|/js/|\\.src|\\.replace|\\.url|\\.att|\\.href|location\\.href|javascript:|location:|application/x-www-form-urlencoded|\\.createObject|:location|\\.path|\\*#__PURE__\\*|\\*\\$0\\*|\\n", - ".*\\.js$|.*\\.css$|.*\\.scss$|.*,$|.*\\.jpeg$|.*\\.jpg$|.*\\.png&|.*\\.gif&|.*\\.ico$|.*\\.svg$|.*\\.vue$|.*\\.ts$", + ".*\\.js$|.*\\.css$|.*\\.scss$|.*,$|.*\\.jpeg$|.*\\.jpg$|.*\\.png$|.*\\.gif$|.*\\.ico$|.*\\.svg$|.*\\.vue$|.*\\.ts$", } Phone = []string{"['\"](1(3([0-35-9]\\d|4[1-8])|4[14-9]\\d|5([\\d]\\d|7[1-79])|66\\d|7[2-35-8]\\d|8\\d{2}|9[89]\\d)\\d{7})['\"]"} diff --git a/crawler/crawler.go b/crawler/crawler.go index b548b08..e5af757 100644 --- a/crawler/crawler.go +++ b/crawler/crawler.go @@ -1,6 +1,7 @@ package crawler import ( + "compress/gzip" "crypto/tls" "fmt" "github.com/pingc0y/URLFinder/cmd" @@ -50,18 +51,16 @@ func Spider(u string, num int) { tr := &http.Transport{ TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, } - //配置代理 if cmd.X != "" { - proxyUrl, parseErr := url.Parse(config.Conf.Proxy) + proxyUrl, parseErr := url.Parse(cmd.X) if parseErr != nil { fmt.Println("代理地址错误: \n" + parseErr.Error()) os.Exit(1) } tr.Proxy = http.ProxyURL(proxyUrl) - } - //加载yaml配置(proxy) - if cmd.I { + } else if cmd.I { + //加载yaml配置 util.SetProxyConfig(tr) } client := &http.Client{Timeout: 10 * time.Second, Transport: tr} @@ -69,14 +68,19 @@ func Spider(u string, num int) { if err != nil { return } - //增加header选项 - request.Header.Add("Cookie", cmd.C) + + request.Header.Add("Accept-Encoding", "gzip") //使用gzip压缩传输数据让访问更快 request.Header.Add("User-Agent", util.GetUserAgent()) request.Header.Add("Accept", "*/*") - //加载yaml配置(headers) + //增加header选项 + if cmd.C != "" { + request.Header.Add("Cookie", cmd.C) + } + //加载yaml配置(headers) if cmd.I { util.SetHeadersConfig(&request.Header) } + //处理返回结果 response, err := client.Do(request) if err != nil { @@ -85,19 +89,32 @@ func Spider(u string, num int) { defer response.Body.Close() } - - //提取url用于拼接其他url或js - dataBytes, err := io.ReadAll(response.Body) - if err != nil { - return + result := "" + //解压 + if response.Header.Get("Content-Encoding") == "gzip" { + reader, err := gzip.NewReader(response.Body) // gzip解压缩 + if err != nil { + return + } + defer reader.Close() + con, err := io.ReadAll(reader) + if err != nil { + return + } + result = string(con) + } else { + //提取url用于拼接其他url或js + dataBytes, err := io.ReadAll(response.Body) + if err != nil { + return + } + //字节数组 转换成 字符串 + result = string(dataBytes) } path := response.Request.URL.Path host := response.Request.URL.Host scheme := response.Request.URL.Scheme source := scheme + "://" + host + path - - //字节数组 转换成 字符串 - result := string(dataBytes) //处理base标签 re := regexp.MustCompile("base.{1,5}href.{1,5}(http.+?//[^\\s]+?)[\",',‘,“]") base := re.FindAllStringSubmatch(result, -1) diff --git a/crawler/filter.go b/crawler/filter.go index 2c42f3a..b44c199 100644 --- a/crawler/filter.go +++ b/crawler/filter.go @@ -29,6 +29,7 @@ func jsFilter(str [][]string) [][]string { is := re.MatchString(str[i][0]) if is { str[i][0] = "" + break } } @@ -66,6 +67,7 @@ func urlFilter(str [][]string) [][]string { is := re.MatchString(str[i][0]) if is { str[i][0] = "" + break } } diff --git a/crawler/find.go b/crawler/find.go index d04abf8..3d44977 100644 --- a/crawler/find.go +++ b/crawler/find.go @@ -115,8 +115,8 @@ func urlFind(cont, host, scheme, path, source string, num int) { } } else if strings.HasPrefix(url[0], "/") { urlz := "" - if cmd.D != "" { - urlz = cmd.D + url[0] + if cmd.B != "" { + urlz = cmd.B + url[0] } else { urlz = host + url[0] } @@ -128,11 +128,11 @@ func urlFind(cont, host, scheme, path, source string, num int) { } } else if !strings.HasSuffix(source, ".js") { urlz := "" - if cmd.D != "" { - if strings.HasSuffix(cmd.D, "/") { - urlz = cmd.D + url[0] + if cmd.B != "" { + if strings.HasSuffix(cmd.B, "/") { + urlz = cmd.B + url[0] } else { - urlz = cmd.D + "/" + url[0] + urlz = cmd.B + "/" + url[0] } } else { urlz = host + cata + url[0] diff --git a/result/result.go b/result/result.go index 2bee2d6..81a1197 100644 --- a/result/result.go +++ b/result/result.go @@ -10,6 +10,7 @@ import ( "github.com/pingc0y/URLFinder/mode" "github.com/pingc0y/URLFinder/util" "log" + "net/url" "os" "regexp" "strconv" @@ -523,6 +524,7 @@ func Print() { } for _, u := range ResultUrlHost { + u.Url, _ = url.QueryUnescape(u.Url) if cmd.S != "" && len(u.Title) != 0 { if u.Status == "疑似危险路由" { fmt.Printf(color.LightBlue.Sprintf("%-"+ulen+"s", u.Url) + color.LightGreen.Sprintf(" [ %s ]\n", u.Status))