#!/usr/bin/perluse Fcntl qw(:DEFAULT :flock);
use Net::SMTP;# Script Variables
my $ip = $ENV{'REMOTE_ADDR'};
my $escIP = quotemeta($ip);
my $host = $ENV{'REMOTE_HOST'};
if (defined($ip) && ((!$host) ¦¦ ($host =~ m!^\d+\.\d+\.\d+\.\d+$!))) {
if ($ip =~ m!^(\d+)\.(\d+)\.(\d+)\.(\d+)$!) {
$host = (gethostbyaddr(pack('C4',$1,$2,$3,$4),2))[0] ¦¦ $ip;
}
}
my ($rootdir) = $ENV{'DOCUMENT_ROOT'} =~ m/^([\w\.-\/\\:]+)$/;
my $domain = $ENV{'HTTP_HOST'};# E-mail settings
my $smtp_server = "mail.example.com";
my $smtp_sender_email = "webmaster\@example.com";
my $sender_email = "webmaster\@example.com";
my $recipient_email = "webmaster\@example.com";# Ignore List. May be any number of quoted hostnames or IP addresses separated by a comma.
# Pattern matching for REMOTE_ADDR: Left to right
# Pattern matching for REMOTE_HOST: Right to left
my @ignore_list = ("255.255.255.255",".googlebot.com",".google.com");# 0 == Don't send E-mail notifications
# 1 == Send E-mail notifications
my $sendmail = 1;# 0 == Don't use filelock
# 1 == Use filelock
my $filelock = 1;# 0 == Don't use Ignore List
# 1 == Use Ignore List
my $use_ignore_list = 1;# Creates WHOIS link in E-mail notification
my $whois = "http://samspade.org/whois/$ip";# Most users will not need to modify this setting outside of testing
# File in quotes should match AccessFileName in httpd.conf
my ($htaccess) = "htaccess.txt" =~ m/^([\w\.-]+)$/;print "Status: 404 Not Found\n";
print "Content-Type: text/html\n\n";my $serverTime = &getTime();
my $scan = grep{ $ip =~ m/^\Q$_\E/ or $host =~ m/\Q$_\E$/ } @ignore_list;
if (($use_ignore_list == 1) && ($scan > 0)) {
print "<h3>Script access ignored for $ip</h3>\n";
}
else {
&buildHtaccess;
print "<h3>Site access denied to $ip on $serverTime.</h3>\n";
}sub buildHtaccess {
open HTACCESS, "$rootdir/$htaccess" or die "Can't read from $rootdir/$htaccess: $!\n";
if ($filelock == 1) {
flock HTACCESS, LOCK_SH or die "Can't share flock: $!\n";
}
my @list = <HTACCESS>;
close HTACCESS or die "Can't close file after read: $!\n";
my $count = grep{ $_ eq "SetEnvIf Remote_Addr \^$escIP\$ ban\n" } @list;
if ($count == 0) {
unshift(@list, "\# Host: $host\n", "SetEnvIf Remote_Addr \^$escIP\$ ban\n", "\# Banned on $serverTime\n\n");
open HTACCESS, ">$rootdir/$htaccess" or die "Can't write to $rootdir/$htaccess: $!\n";
if ($filelock == 1) {
flock HTACCESS, LOCK_EX or die "Can't set exclusive flock: $!\n";
}
print HTACCESS @list;
close HTACCESS or die "Can't close file after write: $!\n";
if ($sendmail == 1) {
&sendMail;
}
}
}sub sendMail {
my $smtp = Net::SMTP->new($smtp_server, Hello => $smtp_server);
$smtp->mail($smtp_sender_email);
$smtp->to($recipient_email);
$smtp->data();
$smtp->datasend("To: $recipient_email\n");
$smtp->datasend("From: $sender_email\n");
$smtp->datasend("Subject: Spider Trap Alert \@ $domain\!\n");
$smtp->datasend("X-Priority: 1 (Highest)\n\n\n");
$smtp->datasend("The following user session initiated an automated spider trap ban:\n\n");
$smtp->datasend("IP: $ip\n");
$smtp->datasend("Host: $host\n");
$smtp->datasend("Time: $serverTime\n\n");
$smtp->datasend("Whois: $whois\n\n\n");
$smtp->datasend("Additional Details:\n\n");
foreach my $key(sort keys(%ENV)) {
$smtp->datasend("$key : $ENV{$key}\n");
}
$smtp->datasend("\n\n");
$smtp->quit;
}sub getTime {
my @months = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
my @weekDays = qw(Sun Mon Tue Wed Thu Fri Sat Sun);
my ($sec,$min,$hour,$mday,$mon,$yoset,$wday,$yday,$isdst) = localtime();
my $year = 1900 + $yoset;
if ($sec < 10) {
$sec = "0$sec";
}
if ($min < 10) {
$min = "0$min";
}
if ($hour < 1) {
$hour = "00";
}
my $time = "$weekDays[$wday], $months[$mon] $mday, $year at $hour:$min:$sec";
return $time;
}
Improvements Made
File Permissions
trap.pl File Instructions
The user defined variables are pretty self explanatory. If you are unsure of the E-mail settings, your host should be able to tell you what they need to be. More than likely, the'll be same settings you used to set up the POP3 account mail account for use with your site.
.htaccess File Instructions
For testing purposes, I suggest you first create a copy of your .htaccess file and upload it to your server as htaccess.txt. Then temporarily change $htaccess in the script above to htaccess.txt to safely test this script and work out any potential problems beforehand.
If you do not already have a similar directive within your .htaccess file, you will need to include:
SetEnvIfNoCase Request_URI \.ht(access¦passwd)$ ban
Followed by:
<Files *>
order allow,deny
allow from all
deny from env=ban
</Files>
Install this code within the proper area of your .htaccess file.
Simple Spider Trap Link Instructions
I do not recommend the old method of wrapping the spider trap link around a one pixel image. Instead, I suggest they be constructed similar to:
<a href="http://www.example.com/cgi-bin/trap.cgi" rel="nofollow"></a>
It is also good practice to disallow any spider trap link in your robots.txt file.
Other Uses
I will not get into specifics because there are many examples to be found in the archives, but using mod_rewrite in conjunction with a script of this type greatly improves the webmaster's ability to control access to files on their site.
Questions, Suggestions, Comments?
Post them here and I'll try to answer them all. In the meantime, Merry Christmas and Happy Holidays. :)
[edited by: Key_Master at 9:56 am (utc) on Dec. 24, 2008]
[edited by: coopster at 1:20 pm (utc) on Dec. 26, 2008]
[edit reason] updated code upon request by Key_Master [/edit]
no comments since i haven't digested your post yet but i just wanted to add a reference to your original post for "V1.0":
[webmasterworld.com...]
As for the old trap.pl, I wouldn't recommend anybody use that script anymore. The new script is smarter, safer, and should be easier to install and troubleshoot. Also, if your looking for a performance boost, you can run this script in mod_perl.
sub getTime {
return (sprintf "%s %02d, %s at %02d:%02d:%02d",(split(/\W+/,scalar localtime))[1,2,6,5,4,3]);
}
although I am pretty sure localtime returns the same format for all major operating systems, it might not. Maybe someone else knows. Of course POSIX can be used too.
sub getTime {
return (sprintf "%s %02d, %s at %02d:%02d:%02d",(split(/\W+/,scalar localtime))[1,2,6,5,4,3]);
}
I tested it on a server and I got some weird results with that code. It wouldn't display the day and it would skip ahead an hour or more with each refresh of the page.
Thanks for the input though. Keeps me thinking. :)
If you weren't concerned with losing the ability to customize the date/time format to your liking, you could delete subroutine getTime and replace line my $serverTime = &getTime(); with my $serverTime = localtime();.
[edited by: Key_Master at 8:00 am (utc) on Dec. 27, 2008]
split(/[\s+:]/
One final thought is that if you copied and pasted the code I posted from the forum it may have broken the code somehow. I see this forum alters posted code a little, so any code posted here would need to be checked before trying to use in a real script for syntax problems.