Getting Started
Installation
Usage
Basic Syntax
Data Types
Expressions
Variables
Conditions
Commands
Arrays
Loops
Functions
Importing
Advanced Syntax
As Cast
Builtins
Type Condition
Compiler Flags
Standard Library
Array
Date
Environment
FileSystem
HTTP
Math
Text
Contributing
How to
Guide
Compiler structure
Amber by Example
ShellCheck tester
Ubuntu Updater
Bot Detector
LSP Installer
Bot Detector
This script is meant for periodic execution and must be run as root. It scans an Nginx webserver log file for large volumes of requests from automated bots not identifying themselves as bots. If a bad bot makes more than 1000 requests per hour then the IP address is added to a blocklist file that can be picked up by firewall block software such as ipset.
// Script for detecting unwanted bots on our sites and blocking their IPs.
// Usage: ./bot-detector.sh <LOG_FILE_PATH>
// The script is triggered by a cronjob every 10 minutes.
import * from "std"
main (args) {
if len(args) < 1 {
echo "Path to log file missing."
echo "Usage: bot-detector.sh <logfile>"
echo " bot-detector.sh /var/log/nginx/access.log"
exit(1)
}
let logfile = args[0]
$ test -r {logfile} $ failed {
echo "File not found or not readable: {logfile}"
exit(1)
}
let start = parse($ date +%s $?)?
// Get server IP address for excluding.
let server_ip = $ hostname -i $?
// We want to check the previous hour and the current hour.
let timeframes = ["1 hour ago", "now"]
loop timeframe in timeframes {
if timeframe == "1 hour ago" {
echo "Checking the previous hour..."
} else {
echo "Checking the current hour..."
}
let hour_timestamp = $ date "+%d/%b/%Y:%H" -d "{timeframe}" $?
// Get the top 20 IP addresses that accessed job pages for the given hour
// timestamp. Includes a count per IP address in the format: "<count> <ip>".
// Only check GET requests to certain job-related paths.
// Ignore requests from well-behaved bots that send a bot user agent.
// Never block requests from Google in the user agent.
// Exclude requests to /files/ paths.
// Check the top 20 results.
let ip_log = unsafe $ grep -e "{hour_timestamp}" "{logfile}" | \
grep \
-e "GET /job/" \
-e "GET /jobs/" \
-e "GET /stelle/" \
-e "GET /stellen/" \
-e "GET /stellenangebot/" \
-e "GET /de/stelle/" \
-e "GET /de/stellen/" \
-e "GET /de/stellenangebot/" | \
grep -i -v "bot" | \
grep -v "Google" | \
grep -v /files/ | \
awk '\{print \$1}' | sort | uniq -c | sort -nr | \
grep -v "{server_ip}" | \
head -n 20 $
loop line in lines(ip_log) {
let parts = split(line, " ")
let count = parse(parts[0])?
// Skip IP addresses that sent less than 1000 requests.
if count < 1000 {
continue
}
let ip = parts[1]
silent unsafe $ grep "{ip}" /etc/ipblocklist.txt $
if status == 0 {
echo "IP address {ip} is already blocked."
continue
}
silent unsafe $ grep "{ip}" /etc/ipexcludedlist.txt $
if status == 0 {
echo "IP address {ip} is allow-listed and will not be blocked."
continue
}
echo "Blocking IP address: {ip} ({count} requests)"
$ echo "{ip}" >> /etc/ipblocklist.txt $?
$ echo "\$(date) | IP addess {ip} added to the block list, RPH={count}" >> /var/log/bot-detector.log $?
}
}
let end = parse($ date +%s $?)?
let duration = end - start
echo "Execution time: {duration} seconds"
}
Getting Started
Installation
Usage
Basic Syntax
Data Types
Expressions
Variables
Conditions
Commands
Arrays
Loops
Functions
Importing
Advanced Syntax
As Cast
Builtins
Type Condition
Compiler Flags
Standard Library
Array
Date
Environment
FileSystem
HTTP
Math
Text
Contributing
How to
Guide
Compiler structure
Amber by Example
ShellCheck tester
Ubuntu Updater
Bot Detector
LSP Installer