Skip to content

Commit

Permalink
Transform EZPaarse to anonymized JSON with department/role
Browse files Browse the repository at this point in the history
  • Loading branch information
joecorall committed Nov 22, 2024
1 parent d4f6f18 commit 9b206a1
Show file tree
Hide file tree
Showing 5 changed files with 206 additions and 6 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
ezproxy-log-analyzer
logs/*
!logs/.keep
ezproxy.json
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Get usage from your EZProxy log files
Start an [ezpaarse](https://github.com/ezpaarse-project/ezpaarse) instance

```
git clone https://github.com/ezpaarse-project/ezpaarse`
git clone https://github.com/ezpaarse-project/ezpaarse
cd ezpaarse
docker compose pull
docker compose up -d
Expand All @@ -17,12 +17,12 @@ cd ..
Aggregate and process ezproxy log files

```
scp ezproxy.host:/path/to/ezproxy/logs/ .
scp -r ezproxy.host:/path/to/ezproxy/logs .
./process.sh
```

See usage by platform
Process a clean JSON file you can upload into an analytics tool

```
jq -r '.[]| "\(.platform_name) \(.publisher_name)"' logs/ezpaarse.json|sort|uniq -c|sort -n
go run main.go
```
3 changes: 3 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
module github.com/lehigh-university-libraries/ezproxy-log-analyzer

go 1.22.5
194 changes: 194 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
package main

import (
"encoding/json"
"fmt"
"io"
"log/slog"
"net/http"
"os"
"regexp"
"strconv"
"strings"
"time"
)

type EzpaarseLog struct {
Date string `json:"date"`
Datetime string `json:"datetime"`
Domain string `json:"domain"`
City string `json:"geoip-city"`
Coordinates string `json:"geoip-coordinates"`
Country string `json:"geoip-country"`
Latitude string `json:"geoip-latitude"`
Longitude string `json:"geoip-longitude"`
Region string `json:"geoip-region"`
Host string `json:"host"`
Identd string `json:"identd"`
LogID string `json:"log_id"`
Login string `json:"login"`
Middlewares string `json:"middlewares"`
MiddlewaresDate string `json:"middlewares_date"`
MiddlewaresVersion string `json:"middlewares_version"`
OnCampus string `json:"on_campus"`
Platform string `json:"platform"`
PlatformName string `json:"platform_name"`
PlatformsDate string `json:"platforms_date"`
PlatformsVersion string `json:"platforms_version"`
PublisherName string `json:"publisher_name"`
Robot string `json:"robot"`
Size string `json:"size"`
Status string `json:"status"`
Timestamp int `json:"timestamp"`
URL string `json:"url"`
}

type Quicksight struct {
Date string `json:"date"`
Domain string `json:"domain"`
City string `json:"city"`
Country string `json:"country"`
Latitude float64 `json:"lat"`
Longitude float64 `json:"lng"`
Region string `json:"region"`
Platform string `json:"platform"`
PlatformName string `json:"platform_name"`
PublisherName string `json:"publisher_name"`
Department string `json:"department"`
Role string `json:"role"`
}

// Function to check if a file exists
func fileExists(filename string) bool {
_, err := os.Stat(filename)
return !os.IsNotExist(err)
}

// Function to save the response body to a file
func saveToFile(filename string, data io.Reader) error {
file, err := os.Create(filename)
if err != nil {
return err
}
defer file.Close()
_, err = io.Copy(file, data)
return err
}

// Function to extract description
func extractDescription(filename string) (string, string, error) {
data, err := os.ReadFile(filename)
if err != nil {
return "", "", err
}
// Use regex to capture everything in the second <td> tag
re := regexp.MustCompile(`<tr><td>Description</td><td>([^<]+)</td></tr>`)
matches := re.FindStringSubmatch(string(data))
if len(matches) < 2 {
return "", "", fmt.Errorf("description not found")
}
// Now, we have everything inside the second <td>
fullDescription := matches[1]

// Try to split it on " - " to separate category and description
parts := strings.SplitN(fullDescription, " - ", 2)
category := parts[0]
description := "unknown"
if len(parts) == 2 {
description = parts[1]
}

return category, description, nil
}

func main() {
ldapSearchServer := os.Getenv("LDAP_SEARCH_SERVER")

file, err := os.Open("logs/ezpaarse.json")
if err != nil {
slog.Error("Failed to open ezpaarse JSON. Did you run process.sh?", "err", err)
os.Exit(1)
}
defer file.Close()

// Decode the JSON file into a slice of EzpaarseLog
var logs []EzpaarseLog
if err := json.NewDecoder(file).Decode(&logs); err != nil {
slog.Error("Failed to decode JSON", "err", err)
os.Exit(1)
}

// Iterate over the logs and print some fields
visitors := []Quicksight{}
for _, log := range logs {
visitor := Quicksight{
Date: log.Date,
Domain: log.Domain,
City: log.City,
Country: log.Country,
Region: log.Region,
Platform: log.Platform,
PlatformName: log.PlatformName,
PublisherName: log.PublisherName,
}
lat, err := strconv.ParseFloat(log.Latitude, 64)
if err != nil {
slog.Warn("Error converting string to float", "err", err, "lng", log.Latitude)
continue
}
lng, err := strconv.ParseFloat(log.Longitude, 64)
if err != nil {
slog.Warn("Error converting string to float", "err", err, "lat", log.Latitude)
continue
}

visitor.Latitude = lat
visitor.Longitude = lng

email := fmt.Sprintf("%[email protected]", strings.ToLower(log.Login))
filename := fmt.Sprintf("/tmp/%s.html", log.Login)
// Check if the file already exists
if !fileExists(filename) {
slog.Info("Querying LDAP")
// If not, make the HTTP POST request
resp, err := http.PostForm(ldapSearchServer, map[string][]string{
"cn": {email},
})
if err != nil {
slog.Error("Error making request", "err", err)
return
}
defer resp.Body.Close()

// Save response to STR.html
err = saveToFile(filename, resp.Body)
if err != nil {
slog.Error("Error saving file", "err", err)
return
}
time.Sleep(2 * time.Second)
}

// Extract and parse the description
role, department, err := extractDescription(filename)
if err != nil {
slog.Warn("Error extracting description", "login", log.Login, "err", err)
}
// Output the extracted values
visitor.Department = department
visitor.Role = role
visitors = append(visitors, visitor)
}

f, err := os.Create("ezproxy.json")
if err != nil {
slog.Error("failed to create ezproxy JSON", "err", err)
}
defer f.Close()

// Marshal the logs slice into JSON
encoder := json.NewEncoder(f)
if err := encoder.Encode(visitors); err != nil {
slog.Error("failed to encode JSON", "err", err)
}
}
6 changes: 4 additions & 2 deletions process.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@ if [ -f aggregated.log ]; then
rm aggregated.log
fi

for LOG in $(find . -name "ezproxy.log*" | sort -t. -k3 -rn); do
grep -v ezproxy.lib.lehigh.edu "$LOG" >> aggregated.log
find . -type d -mindepth 1 | sort | while read dir; do
find "$dir" -maxdepth 1 -type f | sort -V -r | while read LOG; do
grep -v ezproxy.lib.lehigh.edu "$LOG" >> aggregated.log
done
done

curl -v -XPOST \
Expand Down

0 comments on commit 9b206a1

Please sign in to comment.