/* vim: set sw=8 ts=8 si : */
/* Author: Guido Socher, Copyright: GPL */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <ctype.h>
#include <string.h>
#include "htag.h"
#include "config.h"
static char *filename; /* the file name currently read */
static char pathstr[MAXTAGLEN+1];
static char tag[MAXTAGLEN+1];

/* option parsing */
int opt_a=0;
int opt_s=0;
int opt_p=0;
/*end global data*/

void help()
{
        printf("hrefgrep -- search html pages for <a href=\"...\">\n\
tag and print it in a nice format\n\
\n\
USAGE: hrefgrep [-ahps] html-files\n\
\n\
OPTIONS: -h this help\n\
         -a If this is an anchor tag then print the whole tag until </a>\n\
         -s print in short format; do not print the file names\n\
	 -p print only the path of the href without the surrounding tag.\n\
	    \n");
#ifdef VERINFO
	puts(VERINFO);
#endif
	exit(0);
}
int print_result(char *wholetag, int l){
	char *chptr;
	strcpy(tag,wholetag);
	if (opt_a == 0){
		chptr=strchr(tag,(int)'>');
		if (chptr) *(chptr+1)='\0';
	}
	
	if (opt_s){
		if (opt_p){
			printf("%s\n",pathstr);
		}else{
			printf("%s\n",tag);
		}
	}else{
		if (opt_p){
			printf("%s:%d: %s\n",filename,l,pathstr);
		}else{
			printf("%s:%d: %s\n",filename,l,tag);
		}
	}
	return(0);
}
/*
 * copy the file path into the pathstr variable.
 * pathstartptr must point to the start of the possible quoted string.
 * Example: pathstartptr ponting to "index.html"> will result in
 * pathstr beeing set to index.html with quotes removed.
 * pathstartptr ponting to xx.html>xxxx will result in pathstr beeing set to
 * xx.html
 */
void copy_file_path(char *pathstartptr){
	char *qptr,*dest;
	dest=pathstr;
	if (*pathstartptr == '"' || *pathstartptr == '\''){
		qptr=pathstartptr;
		pathstartptr++;
	}else{
		qptr=NULL;
	}
	while(*pathstartptr){
		if (qptr == NULL){
			/* wait for space or '>' */
			if (*pathstartptr== ' '||*pathstartptr == '>'){
				break;
			}
		}else{
			if (*pathstartptr==*qptr){
				/*found closing quot */
				break;
			}
		}
		*dest=*pathstartptr;
		dest++;
		pathstartptr++;
	}
	*dest='\0';
}
/* check if this is a href= , argument to findtag*/
int evaltag(char *wholetag,int linenumber,int is_anchor){
        char *pathstartptr;
        if (matchpat(wholetag," href *= *",&pathstartptr)){
                copy_file_path(pathstartptr);
		print_result(wholetag,linenumber);
        }
        return(0);
}


int main(int argc, char *argv[])
{

	int isarea=0;
	int wasspace=0;
	
	/* The following things are used for getopt: */
        extern char *optarg;
        extern int optind;
        extern int opterr;
	int ch;

	opterr = 0;
	while ((ch = getopt(argc, argv, "ahps")) != -1) {
		switch (ch) {
		case 'a':
			opt_a=1;
		break;
		case 'p':
			opt_p=1;
		break;
		case 's':
			opt_s=1;
		break;
		case 'h':
			help(); /*no break, help does not return */
		case '?':
			fprintf(stderr, "ERROR: No such option. -h for help.\n");
			exit(1);
		/*no default action for case */
		}
	}
	if (optind == argc){
		help();
	}
	/* href looks like: <A HREF="http://www.xxx/"><IMG SRC="xxxx"
	* WIDTH="150" HEIGHT="100" BORDER="0" ALT="xxxx">text string</A>
	* area is <area shape=rect coords="1,51,149,299" href="datei.htm">*/
	while(optind<argc){
		/* search for html tags and call the function evaltag */
		filename=argv[optind];
		findtag(evaltag,filename,1);
		optind++;
	}
	return(0);
}
