#!/usr/bin/perl
#
# MK CHECKLINKS v0.1
# by Michael Khler
# mikoehler@vossnet.de
#
# This script checks local or http files for HTTP links.
# (That means only links with <a href="<link>">mylink</a>)
#
# USAGE:
# EX. ./mkcl mylinks.html or ./mkcl http://www.mysite.com/mylinks.html
#
# NOTE:
# This script is using LYNX to fetch HTTP headers and documents.
# You have to install LYNX on your *nix system, before you can
# use this script.
# Only one HTTP link reference per line is allowed in the file that
# should be checked with mkcl. EMail references are ignored.
# The autor of this script wants to receive an email, if u use it.

$fetch_hdr_cmd = "lynx -source -head";
$fetch_src_cmd = "lynx -source";
@LINES =();

# compare strings
sub cstr{
my $a = uc($_[0]);
my $b = uc($_[1]);
  return (index($a,$b)==0);
}

# index strings
sub cidx{
my $a = uc($_[0]);
my $b = uc($_[1]);
   return (index($a,$b)!=-1);
}

# Checking Link, used by checkLink
sub chkL{
my $link = $_[0];
my @cmd  = `$fetch_hdr_cmd $link`;
my $l = $cmd[0];
  # checking HTML header return codes and return boolean value
  return ((index($l,"HTTP/")==0) && ((index($l,"200")==9) || (index($l,"302")==9)));
}

# Checking Link
sub checkLink{
my $link = $_[0];
   if (chkL($link)){
     print "     OK";
   } else {
     print "INVALID";
   }
   print " $link\n";
}

# cuts the Http Reference from a string
sub cutHREF{
my $l = $_[0];
   $l = substr($l,index(uc($l),'HREF="')+6,255);
   $l = substr($l,0,index($l,'"'));
   return $l;
}

# process checking links
sub procLinks{
my $x=0;
   # Count links first
   foreach(@LINES){ 
     $l = $_;
     if ((cidx($l,'href="')) && (!cidx($l,"mailto:"))){
       $x=$x+1;
     }
   }
   # Then check links
   print "Processing $x HTTP links\n";
   foreach (@LINES){
     $l = $_;
     chop($l);
     if ((cidx($l,'href="')) && (!cidx($l,"mailto:"))){
 	$cr = cutHREF($l);
 	checkLink($cr);
     }
   }
}

# process a local file with links
sub procFile{
my $f = $_[0];
   open (DATA, "<$f") || failure("File: $f not found");
   @LINES=<DATA>;
   close(DATA);
   procLinks;
}

# process a http file with links
sub procHTTP{
my $f = $_[0];
   print "Fetching HTTP file (please stand by)\n";
   @LINES = `$fetch_src_cmd $f`;
   procLinks;
}

# failure procedure
sub failure{
   print "\nFAILURE\n$_[0]\nAborting..\n";
   exit;
}

# main routine
sub mymain{
   if (cidx($ARGV[0],"http://")){
     procHTTP($ARGV[0]);
   } else {
     procFile($ARGV[0]);
   }
   print "\nThank you for using MKCheckLinks from mikoehler\@vossnet.de\n";
}

mymain;