#!/bin/perl # -*- perl -*- # # Copyright (c) 1997, 2000 DJ Delorie, All Rights Reserved. NO WARRANTEE. # # Note: headers.cgi is used by a Firefox extension. #if ($ENV{'HTTP_REFERER'} # && $ENV{'HTTP_REFERER'} !~ m@^http://www.delorie.com/web/@) { # print "Location: http://www.delorie.com/web/\n\n"; # exit 0; #} if ($ENV{'HTTP_USER_AGENT'} =~ /robot|spider|wget|crawl|slurp|googlebot|openfind/i) { exit 0; } sub rebase_url { local($tag, $uri, $rest) = @_; if ($uri =~ /^http:/) { $u = $uri; } elsif ($uri =~ /^[a-z]+:/) { if ($tag) { return "$tag=\"$uri\"$rest"; } else { return $uri; } } elsif ($uri =~ m@^/@) { $u = $urlhead . $uri; } else { $u = $urlpath . $uri; 0 while $u =~ s@/\./@/@g; 0 while $u =~ s@/[^/]+/\.\./@/@g; 0 while $u =~ s@/[^/]+/\.\.$@/@g; $u = $urlhead . $u; } if ($u !~ /\.(pdf|zip|gz|exe|gif|jpg|png)/) { $u =~ s/([^a-zA-Z0-9\._\#-])/sprintf("%%%02x", ord($1));/ge; $u = $me . $u; } if ($tag) { return "$tag=\"$u\"$rest"; } else { return $u; } } sub canonical_url { local($base, $uri) = @_; local($urlhead, $urlpath) = $base =~ m@([^/]+/+[^/]+)(.*)@; $urlpath =~ s@/[^/]+$@/@; $urlpath = "/" unless $urlpath; if ($uri =~ /^[a-z]+:/) { return $uri; } elsif ($uri =~ m@^/@) { $u = $urlhead . $uri; } else { $u = $urlpath . $uri; 0 while $u =~ s@/\./@/@g; 0 while $u =~ s@/[^/]+/\.\./@/@g; 0 while $u =~ s@/[^/]+/\.\.$@/@g; $u = $urlhead . $u; } return $u; } #----------------------------------------------------------------------------- sub try_one { my ($url, $ref) = @_; $attempts .= "
Attempted URL: $url \n"; $try_access_ok = &webget($url, $ref); $try_access_ok .= ""; if ($webstatus eq "404") { $attempts .= " (not found)"; } elsif ($webstatus eq "403") { $attempts .= " (access denied)"; } else { $access_ok .= $try_access_ok; } return $webstatus =~ /^[123]/; } sub check_delorie_htm { my ($url) = @_; my ($ref) = "http://$ENV{'SERVER_NAME'}/web/howto-allow.html (on behalf of $ENV{'REMOTE_ADDR'})"; my ($ourl) = $url; $attempts = ''; ($host) = $url =~ m@[a-z]*://([^/:]+)@; print STDERR "url `$url` host `$host`\n"; if ($host =~ m/delorie.com|thomii.com|localhost|^[0-9\.]+$/) { print "Please don't scan my own hosts.\n"; exit 0; } #return if $ENV{'REMOTE_ADDR'} eq "207.22.48.171"; $url =~ s@\?.*@@; $url =~ s@/[^/]*$@/delorie.htm@; return if &try_one($url, $ref); if ($url =~ m@/\~@) { $url =~ s@(/\~[^/]+)/.*$@$1/delorie.htm@; return if &try_one($url, $ref); } if ($url !~ m@//[^/]+/delorie.htm$@) { $url =~ s@(//[^/]+)/.*$@$1/delorie.htm@; return if &try_one($url, $ref); } $url = $ourl; $url =~ s@\?.*@@; $url =~ s@/[^/]*$@/delorie.html@; return if &try_one($url, $ref); if ($url =~ m@/\~@) { $url =~ s@(/\~[^/]+)/.*$@$1/delorie.html@; return if &try_one($url, $ref); } if ($url !~ m@//[^/]+/delorie.htm$@) { $url =~ s@(//[^/]+)/.*$@$1/delorie.html@; return if &try_one($url, $ref); } $url = $ourl; $url =~ s@\?.*@@; $url =~ s@/[^/]*$@/delorie.gif@; return if &try_one($url, $ref); if ($url =~ m@/\~@) { $url =~ s@(/\~[^/]+)/.*$@$1/delorie.gif@; return if &try_one($url, $ref); } if ($url !~ m@//[^/]+/delorie.htm$@) { $url =~ s@(//[^/]+)/.*$@$1/delorie.gif@; return if &try_one($url, $ref); } print "\n"; print "
Sorry, but due to abuse, this service cannot access\n"; print "sites that do not explicitly\n"; print "allow it.
\n"; print "You need to create a file called delorie.htm or\n"; print "delorie.gif on your\n"; print "web server to prove you're the webmaster. When I see this file (it can be empty)\n"; print "I'll allow my tools to access your site.
\n"; $attempts =~ s/\r//g; $attempts =~ s@\n\n+@\n@g; print $attempts, "\n"; if ($access_ok) { print "Here is a copy of the error messages I got from the server\n"; print "you were attempting to look at, in case that helps you.\n"; print "Note that these are not errors with my tools, so pleaes\n"; print "do not tell me my tools are broken because of these.
\n"; $access_ok =~ s/ |
"; $resp .= "Request: GET $uri HTTP/1.0\n"; $resp .= $line; while () { $resp .= $_; } close(S); if ($webstatus eq '302' || $webstatus eq '301') { ($loc) = $resp =~ m@Location:\s*(.*\S)@; $loc = &canonical_url($url, $loc); return &webget($loc); } return $resp; } # Read the response header while () { $web_header .= $_ unless /Content-length/i || /^\s/; last unless /\S/; } $resp = ""; # Read the data while () { s/meta http-equiv=/nometa x=/gi; $resp .= $_; } close(S); print STDERR "normal return\n"; return $resp; } 1;