/*
 * FILE:
 * parse.h
 *
 * FUNCTION:
 * Parse URI's into thier componenent pieces.
 * Vaguely similar to the function provided by Chris Blizzard's ghttp, 
 * but formulated in a fashion more suitable for the current webload tools.
 * Vaguely similar to the function provided by libwww.HTParse, 
 * but simpler, more efficient, easier to use.
 *
 * Result of parse is in the form
 * access :// host : port / path # anchor
 * The input URI does not need to be absolute; it can be partial.
 * No memory is allocated, instead, pointers into the uri string,
 * together with lengths are set up.
 *
 * The constructor routine sets the fields of the uri structure
 *   to initial values of NULL, 0, or -1 as approriate.
 *
 * The Parse() routine parses the string pointed at by
 *    uri->uri, and returns the access method, the host name, 
 *    the port number, and the path.  It leves the respective 
 *    fields unchanged if any of these are not found.
 *
 * HISTORY:
 * Created by Linas Vepstas linas@linas.org November 1998
 */

#ifndef __WL_PARSE_H__
#define __WL_PARSE_H__

#include <stdlib.h>
#include "super.h"

class wlURI {
   public:
      wlURI (void);
      void Parse (void);
   public:
      char * uri;
      char * access;  // access scheme, e.g. http:, news:, telnet:, ftp: 
      size_t access_len;
      int encrypt;    // true if access method implies encryption e.g. https: 

      char * host;    // e.g. www.w3.org 
      size_t host_len;

      char * port;    // e.g. 80 
      size_t port_len;
      unsigned short portno; // numeric value of port, or appropriate default 

      char * path;    // e.g. /slash/dot.html
      size_t path_len;

      int error;      // has an error occurred ? 
}; 

/* The wl_extract_link() function returns the start and end points
 *    of a url reference.  A non-zero return value indicates failure.
 *    Basically, it searches the string pointed at by *str for a pair
 *    of balancing quotes, parenthesis, angle brackets.  If balancing
 *    quotes, etc. are not found, then whitespace (including newlines,
 *    tabs, and cariage returns) is used as a delimiter.
 */

int wl_extract_link (char **st, char **en);

/* wl_scan_for_links will search for token "toka", and optionally,
 * tokb and then tokc in the indicated buffer.  If it finds any of 
 * these, it will call the callback function.
 * 
 * The text that will be searched is in the first argument,
 * wlString &page.  The search will begin at buffstart, which 
 * must be a pointer into &page, and will continue for len bytes.
 *
 * The link, once found, will be given to the callback; the second
 * and third arguments to the callback will point at the start and
 * end of the link; whereas the first argument is the page in which 
 * the link was found.
 */


char * wl_scan_for_links (wlString &page, 
                   char *buffstart, size_t bufflen, 
			       char *toka, char * tokb, char * tokc,
                   void (*callback)(wlString&, char *, char *));

#endif /* __WL_PARSE_H__ */
