Skip to content

Commit

Permalink
Revert "updated URL enrichment to match more pattterns"
Browse files Browse the repository at this point in the history
t pull
Wrong brach
This reverts commit 6890ffd.
  • Loading branch information
brian-grabau committed Aug 19, 2024
1 parent 6890ffd commit 3258d47
Showing 1 changed file with 10 additions and 21 deletions.
31 changes: 10 additions & 21 deletions config/enrichments/06_url.conf
Original file line number Diff line number Diff line change
Expand Up @@ -14,28 +14,21 @@ filter {
}
} else {
### URL enrichment
if [url][original] and [url][original] != "" and ![url][full] {
mutate {
copy => { "[url][full]" => "[url][original]" }
}
}
if [url][full] and [url][full] != "" {
mutate {
remove_field => [ "[url][domain]", "[url][extension]", "[url][fragment]", "[url][full][text]", "[url][original][text]", "[url][password]", "[url][path]", "[url][port]", "[url][query]", "[url][registered_domain]", "[url][scheme]", "[url][top_level_domain]", "[url][username]" ]
remove_field => [ "[url][domain]", "[url][extension]", "[url][fragment]", "[url][full][text]", "[url][original]", "[url][original][text]", "[url][password]", "[url][path]", "[url][port]", "[url][query]", "[url][registered_domain]", "[url][scheme]", "[url][top_level_domain]", "[url][username]" ]
tag_on_failure => "_mutate_error_url_en_1"
}
### mutate order of operation has lowercase before copy
if ![url][original] {
mutate {
copy => { "[url][full]" => "[url][original]" }
tag_on_failure => "_mutate_error_url_en_2"
}
mutate {
copy => { "[url][full]" => "[url][original]" }
tag_on_failure => "_mutate_error_url_en_2"
}
mutate {
lowercase => [ "[url][full]" ]
}
grok {
match => {"[url][full]" => "^((?<[url][scheme]>.*?)://)?((?<[user][name]>.*?):(?<[user][password]>.*?)@)?(?<[url][domain]>\w+((\.\w+){1,})|\d+\.\d+\.\d+\.\d+)(:(?<[url][port]>\d+))?(/|$)((?<[url][path]>.*?))?(\?(?<[url][query]>.*?))?(\#(?<[url][fragment]>.*?))?$" }
match => {"[url][full]" => "^((?<[url][scheme]>[A-Za-z]+(\+[A-Za-z+]+)?):\/\/)?((?<[url][username]>.*?):(?<[url][password]>.*?)@)?(?<[url][domain]>\w+(\.|\-+)\w+(\.|\-?\w+)+)(:(?<[url][port]>\d+))?(\/(?<[url][path]>.*?))?(\?(?<[url][query]>.*?))?(\#(?<[url][fragment]>.*?))?$"}
timeout_millis => 500
tag_on_failure => "_groktimeout_url_en_1"
}
Expand All @@ -55,16 +48,12 @@ filter {
# https://github.com/logstash-plugins/logstash-filter-tld/issues/8
tld {
source => "[url][domain]"
target => "tmp"
}
target => "tld_object"
}
mutate {
convert => { "[url][port]" => "integer" }
rename => {
"[tmp][domain]" => "[url][registered_domain]"
"[tmp][sld]" => "[url][subdomain]"
"[tmp][tld]" => "[url][top_level_domain]"
}
"remove_field" => [ "tmp" ]
rename => {"[tld_object][tld]" => "[url][top_level_domain]"}
rename => {"[tld_object][domain]" => "[url][registered_domain]"}
remove_field => ["tld_object"]
}
}
}
Expand Down

0 comments on commit 3258d47

Please sign in to comment.