Skip to content

Commit

Permalink
Enhance BotDetector with Custom Rules and Caching
Browse files Browse the repository at this point in the history
Updated the BotDetector README with improved usage instructions, including examples for adding custom detection rules and a caching mechanism. Enhanced the bot detection rules list and upgraded dependencies in the `go.mod` file to ensure compatibility with Go 1.22.
  • Loading branch information
logocomune committed Oct 4, 2024
1 parent 52a03f6 commit 00ab937
Show file tree
Hide file tree
Showing 12 changed files with 147,286 additions and 146,711 deletions.
34 changes: 34 additions & 0 deletions .github/workflows/go.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# This workflow will build a golang project
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go

name: Go

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

jobs:

build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: '1.22.8'

- name: Build
run: go build -v ./...

- name: Test
run: go test -race -coverprofile=coverage.txt -covermode=atomic

- name: Upload coverage reports to Codecov
uses: codecov/[email protected]
with:
token: ${{ secrets.CODECOV_TOKEN }}
slug: logocomune/botdetector
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
language: go

go:
- 1.13.x
- 1.14.x
- 1.21.x
- 1.22.x

script:
- go test -tags what -race -coverprofile=coverage.txt -covermode=atomic ./...
Expand Down
71 changes: 59 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,28 +1,75 @@
# BotDetector

[![Build Status](https://app.travis-ci.com/logocomune/botdetector.svg?branch=master)](https://app.travis-ci.com/logocomune/botdetector)
[![Go Report Card](https://goreportcard.com/badge/github.com/logocomune/botdetector)](https://goreportcard.com/report/github.com/logocomune/botdetector)
[![codecov](https://codecov.io/gh/logocomune/botdetector/branch/master/graph/badge.svg)](https://codecov.io/gh/logocomune/botdetector)

BotDetector is a golang library that detects Bot/Spider/Crawler from user agent
BotDetector is a Go library that detects bots, spiders, and crawlers from user agents.

## Installation

`go get -u github.com/logocomune/botdetector`
`go get -u github.com/logocomune/botdetector/v2`

## Usage

### Simple usage

```go
userAgent := req.Header.Get("User-Agent")
detector := botdetector.New()
isBot := detector.IsBot(userAgnet)
if isBot {
log.Println("Bot, Spider or Crawler detected")
}
userAgent := req.Header.Get("User-Agent")

detector, _ := botdetector.New()
isBot := detector.IsBot(userAgnet)

if isBot {
log.Println("Bot, Spider or Crawler detected")
}

```


### Adding Custom Rules

You can add custom detection rules with the `WithRules` method. For example:

```go
userAgent := req.Header.Get("User-Agent")

detector, _ := botdetector.New(WithRules([]string{"my rule", "^test"}))
isBot := detector.IsBot(userAgent)

if isBot {
log.Println("Bot, Spider or Crawler detected")
}

```

Custom Rule Patterns:

| pattern | description |
|---------|-----------------------------------------------------------|
| "..." | Checks if the string contains the specified pattern. |
| "^..." | Checks if the string starts with the specified pattern. |
| "...$" | Checks if the string ends with the specified pattern. |
| "^...$" | Checks if the string strictly matches the entire pattern. |

In this example, the custom rules "my rule" and "^test" are added to the existing detection rules.

### Adding Cache
You can add a lru cache rules with the `WithCache` method. For example:

```go
userAgent := req.Header.Get("User-Agent")

detector, _ := botdetector.New(WithCache(1000))
isBot := detector.IsBot(userAgent)

if isBot {
log.Println("Bot, Spider or Crawler detected")
}

```



### Example

[Simple example](_example/main.go)
Expand Down
17 changes: 14 additions & 3 deletions _example/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,27 @@ import (
"fmt"
"net/http"

"github.com/logocomune/botdetector"
"github.com/logocomune/botdetector/v2"
)

var detector = botdetector.New()
var detector *botdetector.BotDetector

func init() {
var err error
detector, err = botdetector.New()
if err != nil {
panic(err)
}
}

func main() {

http.HandleFunc("/", userAgentHandler)

http.ListenAndServe(":8080", nil)
err := http.ListenAndServe(":8080", nil)
if err != nil {
panic(err)
}
}

func userAgentHandler(w http.ResponseWriter, r *http.Request) {
Expand Down
122 changes: 65 additions & 57 deletions detector.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package botdetector

import (
"appliedgo.net/what"
lru "github.com/hashicorp/golang-lru/v2"
"strings"
)

Expand All @@ -19,102 +19,110 @@ type expressionInfo struct {
}

type BotDetector struct {
expression map[string]expressionInfo
debugMode bool
expressions map[string]expressionInfo
cache *lru.Cache[string, bool]
}

func New() *BotDetector {
return newDetector(rules)
}

func newDetector(rules []string) *BotDetector {
uBot := BotDetector{expression: make(map[string]expressionInfo)}
// New creates a new instance of BotDetector using predefined rules.
func New(opt ...Option) (*BotDetector, error) {
b := &BotDetector{}
b.importRules(rules)

for _, s := range rules {
uBot.addExpression(s)
var err error
for i := range opt {
b, err = opt[i](b)
if err != nil {
return nil, err
}
}

return &uBot
return b, nil
}

func (b *BotDetector) addExpression(original string) {
e := expressionInfo{
source: original,
}

s := strings.ToLower(original)
if strings.HasPrefix(s, "^") && strings.HasSuffix(s, "$") {
b.expression[original] = expressionInfo{
source: original,
expressionType: strict,
detector: s[1 : len(s)-1],
}

func (b *BotDetector) importRules(r []string) {
if r == nil || len(r) == 0 {
b.expressions = make(map[string]expressionInfo)
return
}
b.expressions = make(map[string]expressionInfo, len(r))
for _, s := range r {
b.addExpression(s)
}
}

if strings.HasPrefix(s, "^") {
b.expression[original] = expressionInfo{
source: original,
expressionType: startWith,
detector: s[1:],
}
// NewWithRules initializes a new instance of BotDetector with provided rules.
func NewWithRules(rules []string) *BotDetector {
uBot := BotDetector{expressions: make(map[string]expressionInfo)}

return
for _, s := range rules {
uBot.addExpression(s)
}

if strings.HasSuffix(s, "$") {
b.expression[original] = expressionInfo{
source: original,
expressionType: endWith,
detector: s[:len(s)-1],
}
return &uBot
}

return
func (b *BotDetector) addExpression(original string) {
lowered := strings.ToLower(original)
isStrict := strings.HasPrefix(lowered, "^") && strings.HasSuffix(lowered, "$")
isStartWith := strings.HasPrefix(lowered, "^")
isEndWith := strings.HasSuffix(lowered, "$")

switch {
case isStrict:
b.addExpressionInfo(original, strict, lowered[1:len(lowered)-1])
case isStartWith:
b.addExpressionInfo(original, startWith, lowered[1:])
case isEndWith:
b.addExpressionInfo(original, endWith, lowered[:len(lowered)-1])
default:
b.addExpressionInfo(original, contains, lowered)
}
}

e.expressionType = contains
b.expression[original] = expressionInfo{
source: original,
expressionType: contains,
detector: s,
func (b *BotDetector) addExpressionInfo(source string, exprType int, detector string) {
b.expressions[source] = expressionInfo{
source: source,
expressionType: exprType,
detector: detector,
}
}

// IsBot tests whether the useragent is a bot, crawler or a spider.
func (b *BotDetector) IsBot(ua string) bool {
uaNormalized := normalize(ua)

for _, exp := range b.expression {
if b.cache != nil {
if ret, ok := b.cache.Get(uaNormalized); ok {
return ret
}
}
ret := false
for _, exp := range b.expressions {
switch exp.expressionType {
case strict:
if uaNormalized == exp.detector {
what.If(b.debugMode, "%s === %s", exp.detector, uaNormalized)

return true
ret = true
}
case startWith:
if strings.HasPrefix(uaNormalized, exp.detector) {
what.If(b.debugMode, "%s .== %s", exp.detector, uaNormalized)

return true
ret = true
}
case endWith:
if strings.HasSuffix(uaNormalized, exp.detector) {
what.If(b.debugMode, "%s ==. %s", exp.detector, uaNormalized)

return true
ret = true
}
case contains:
if strings.Contains(uaNormalized, exp.detector) {
what.If(b.debugMode, "%s =.= %s", exp.detector, uaNormalized)

return true
ret = true
}
}
}
if b.cache != nil {
b.cache.Add(uaNormalized, ret)
}

return false
return ret
}

func normalize(userAgent string) string {
Expand Down
Loading

0 comments on commit 00ab937

Please sign in to comment.