/* 
   Higher Level Interface to XML Parsers.
   Copyright (C) 1999-2000, Joe Orton <joe@orton.demon.co.uk>

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.
  
   Alternatively, you can redistribute it and/or modify it under the
   terms of the GNU Library General Public License as published by the
   Free Software Foundation; either version 2 of the License, or (at
   your option) any later version.

   This software is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   License for more details.

   You should have received copies of the GNU Library General Public
   License and the GNU General Public License along with this software; 
   if not, write to the Free Software Foundation, Inc., 675 Mass Ave, 
   Cambridge, MA 02139, USA.

   $Id: hip_xml.c,v 1.13 2000/03/08 19:52:18 joe Exp $
*/

#include <config.h>

#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif

#ifdef HAVE_STRING_H
#include <string.h>
#endif

#include "string_utils.h"
#include "http_utils.h"
#include "hip_xml.h"

#define PRETTY_NAME(elm)				\
( ((elm)->id == HIP_ELM_root) ? "root" : 		\
  ( ((elm)->id == HIP_ELM_unknown) ? "unknown" :	\
    ( ((elm)->name != NULL) ? (elm)->name :	\
      "unspecified" )))					\
  
/* TODO: 
 * could move 'valid' into state, maybe allow optional
 * continuation past an invalid branch.
 */

const static struct hip_xml_elm root_element = 
{ "@<root>@", HIP_ELM_root, 0 };

/* The callback handlers */
static void start_element( void *userdata, const hip_xml_char *name, const hip_xml_char **atts );
static void end_element( void *userdata, const hip_xml_char *name );
static void char_data( void *userdata, const hip_xml_char *cdata, int len );

#ifdef HIP_XML_HANDLE_NSPACES

/* Linked list of namespace scopes */
struct hip_xml_nspace {
    hip_xml_char *name;
    hip_xml_char *value;
    struct hip_xml_nspace *next;
};

/* And an auxiliary */
static hip_xml_char * parse_element( struct hip_xml_parser *p, 
				    struct hip_xml_state *state,
				    const hip_xml_char *tag, const hip_xml_char **atts );

#endif

#ifdef HAVE_LIBXML

/* Could be const as far as we care, but libxml doesn't want that */
static xmlSAXHandler sax_handler = {
    NULL, /* internalSubset */
    NULL, /* isStandalone */
    NULL, /* hasInternalSubset */
    NULL, /* hasExternalSubset */
    NULL, /* resolveEntity */
    NULL, /* getEntity */
    NULL, /* entityDecl */
    NULL, /* notationDecl */
    NULL, /* attributeDecl */
    NULL, /* elementDecl */
    NULL, /* unparsedEntityDecl */
    NULL, /* setDocumentLocator */
    NULL, /* startDocument */
    NULL, /* endDocument */
    start_element, /* startElement */
    end_element, /* endElement */
    NULL, /* reference */
    char_data, /* characters */
    NULL, /* ignorableWhitespace */
    NULL, /* processingInstruction */
    NULL, /* comment */
    NULL, /* xmlParserWarning */
    NULL, /* xmlParserError */
    NULL, /* xmlParserError */
    NULL, /* getParameterEntity */
};

#endif /* HAVE_LIBXML */

int hip_xml_currentline( struct hip_xml_parser *p ) {
#ifdef HAVE_EXPAT
    return XML_GetCurrentLineNumber(p->parser);
#else
    return p->parser->input->line;
#endif
}

static const struct hip_xml_elm *
find_element( struct hip_xml_elmlist *list, const hip_xml_char *name,
	      struct hip_xml_elmlist **which ) {
    struct hip_xml_elmlist *cur, *unk_which = NULL;
    const struct hip_xml_elm *unk = NULL;
    int n;
    for( cur = list; cur != NULL; cur = cur->next ) {
	for( n = 0; cur->elements[n].name != NULL; n++ ) {
	    if( strcasecmp( cur->elements[n].name, name ) == 0 ) {
		*which = cur;
		return &cur->elements[n];
	    }
	    if( !unk && cur->elements[n].id == HIP_ELM_unknown ) {
		unk = &cur->elements[n];
		unk_which = cur;
	    }
	}
    }
    if( !cur && unk ) {
	/* Give them the unknown handler */
	*which = unk_which;
	return unk;
    } else {
	return NULL;
    }
}

char *hip_xml_escape( const char *text )
{
    sbuffer buf = sbuffer_create();
    const char *pnt;
    if( !buf ) return NULL;
    /* FIXME: implement */
    return NULL;
}

/* Called with the start of a new element. */
static void 
start_element( void *userdata, const hip_xml_char *name, const hip_xml_char **atts ) 
{
    struct hip_xml_parser *p = (struct hip_xml_parser *)userdata;
    struct hip_xml_state *s;
#ifdef HIP_XML_HANDLE_NSPACES
    hip_xml_char *full_name;
#else
    const hip_xml_char *full_name;
#endif
    if( !p->valid ) {
	/* We've stopped parsing */
	DEBUG( DEBUG_XML, "Ignoring start of element: %s\n", name );
	return;
    }
    if( p->collect ) {
	/* In Collect Mode.
	 * TODO: this doesn't handle namespaces at all. */
	sbuffer_concat( p->buffer, "<", name, NULL );
	if( atts != NULL ) {
	    int n;
	    for( n = 0; atts[n] != NULL; n+=2 ) {
		sbuffer_concat( p->buffer, " ", atts[n], "=", atts[n+1],
				NULL );
	    }
	}
	sbuffer_zappend( p->buffer, ">" );
	/* One deeper */
	p->collect_depth++;
	return;
    }
    /* Set the new state */
    s = malloc( sizeof(struct hip_xml_state) );
    memset( s, 0, sizeof(struct hip_xml_state) );
    s->parent = p->current;
    p->current = s;
#ifdef HIP_XML_HANDLE_NSPACES
    /* We need to handle namespaces ourselves */
    full_name = parse_element( p, s, name, atts );
    if( !full_name ) {
	/* it bombed. */
	p->valid = 0;
	snprintf( p->error, BUFSIZ, "Could not parse element name `%s'",
		  name );
	return;
    }
#else
    full_name = name;
#endif
    /* Map the name to a tag */
    DEBUG( DEBUG_XMLPARSE, "Mapping tag name %s... ", full_name );
    s->elm = find_element( p->elm_lists, full_name, &s->context );
    if( !s->elm ) {
	DEBUG( DEBUG_XMLPARSE, "Unhandled unknown element!\n" );
	snprintf( p->error, BUFSIZ, "Unknown XML element `%s'", full_name );
	p->valid = 0;
	return;
    }
    
    DEBUG( DEBUG_XMLPARSE, "mapped to id %d\n", s->elm->id );

    /* Do we want cdata? */
    p->want_cdata = ((p->current->elm->flags & HIP_XML_CDATA) 
		     == HIP_XML_CDATA);
    p->collect = ((p->current->elm->flags & HIP_XML_COLLECT)
		  == HIP_XML_COLLECT);
    
    /* expat is not a validating parser - check the new element
     * is valid in the current context.
     */
    DEBUG( DEBUG_XML, "Checking context of element %s (parent: %s)\n", 
	   PRETTY_NAME(s->elm), PRETTY_NAME(s->parent->elm) );
    if( (*s->context->validate_cb)( s->parent->elm->id, s->elm->id ) ) {
	DEBUG( DEBUG_XML, "Invalid context.\n" );
	snprintf( p->error, BUFSIZ, "Element %s found in invalid parent %s",
		  PRETTY_NAME(s->elm), PRETTY_NAME(s->parent->elm) );
	p->valid = 0;
    } else {
	DEBUG( DEBUG_XML, "Valid context.\n" );
	if( s->context->startelm_cb ) {
	    if( (*s->context->startelm_cb)( s->context->userdata, s,
					    name, atts ) ) {
		DEBUG( DEBUG_XML, "Startelm callback failed.\n" );
		p->valid = 0;
	    }
	} else {
	    DEBUG( DEBUG_XML, "No startelm handler.\n" );
	}
    }
    
#ifdef HIP_XML_HANDLE_NSPACES
    free( full_name );
#endif

}

/* Destroys given state */
static void destroy_state( struct hip_xml_state *s ) {
#ifdef HIP_XML_HANDLE_NSPACES
    struct hip_xml_nspace *this_ns, *next_ns;
    DEBUG( DEBUG_XMLPARSE, "Freeing namespaces...\n" );
    if( s->default_ns!=NULL ) free( s->default_ns );
    /* Free the namespaces */
    this_ns = s->nspaces;
    while( this_ns != NULL ) {
	next_ns = this_ns->next;
	free( this_ns->name );
	free( this_ns->value );
	free( this_ns );
	this_ns = next_ns;
    };
    DEBUG( DEBUG_XMLPARSE, "Finished freeing namespaces.\n" );
#endif /* HIP_XML_HANDLE_NSPACES */
    free( s );
}

static void char_data( void *userdata, const hip_xml_char *data, int len ) {
    struct hip_xml_parser *p = userdata;
    
    if( !p->want_cdata || !p->valid ) return;
    /* First, if this is the beginning of the CDATA, skip all
     * leading whitespace, we don't want it. */
    DEBUG( DEBUG_XMLPARSE, "Given %d bytes of cdata.\n", len );
    if( sbuffer_size(p->buffer) == 0 ) {
	size_t wslen = 0;
	/* Ignore any leading whitespace */
	while( wslen < len && 
	       ( data[wslen] == ' ' || data[wslen] == '\r' ||
		 data[wslen] == '\n' || data[wslen] == '\t' ) ) {
	    wslen++;
	}
	data += wslen;
	len -= wslen;
	DEBUG( DEBUG_XMLPARSE, "Skipped %d bytes of leading whitespace.\n", 
	       wslen );
	if( len == 0 ) {
	    DEBUG( DEBUG_XMLPARSE, "Zero bytes of content.\n" );
	    return;
	}
    }
    if( sbuffer_append( p->buffer, data, len ) ) {
	/* OOM */
	p->valid = 0;
    }
}

/* Called with the end of an element */
static void end_element( void *userdata, const hip_xml_char *name ) {
    struct hip_xml_parser *p = userdata;
    struct hip_xml_state *s = p->current;
    if( !p->valid ) {
	/* We've stopped parsing */
	DEBUG( DEBUG_XML, "Parse died. Ignoring end of element: %s\n", name );
	return;
    }
    if( p->collect && (p->collect_depth > 0) ) {
	sbuffer_concat( p->buffer, "</", name, ">", NULL );
	p->collect_depth--;
	return;
    }
	
    /* process it */
    if( s->context->endelm_cb ) {
	DEBUG( DEBUG_XMLPARSE, "Calling endelm callback for %s.\n", s->elm->name );
	if( (*s->context->endelm_cb)( s->context->userdata, s,
				      p->want_cdata?sbuffer_data(p->buffer):
				      NULL ) ) {
	    DEBUG( DEBUG_XML, "Endelm callback failed.\n" );
	    p->valid = 0;
	}
    }
    p->current = s->parent;
    /* Move the current pointer up the branch */
    DEBUG( DEBUG_XML, "Back in element: %s\n", p->current->elm->name );
    if( p->want_cdata ) {
	sbuffer_clear( p->buffer );
    } 
    destroy_state( s );
}

#ifdef HIP_XML_HANDLE_NSPACES

/* Parses the tag attributes, and handles XML namespaces. 
 * With a little bit of luck.
 * Returns:
 *   the element name on success
 *   or NULL on error.
 */
static hip_xml_char *parse_element( struct hip_xml_parser *p,
				    struct hip_xml_state *state,
				    const hip_xml_char *tag, const hip_xml_char **atts ) {
    struct hip_xml_nspace *ns;
    int attn;
    hip_xml_char *pnt, *ret;
    const hip_xml_char *tag_prefix, *tag_suffix;
    struct hip_xml_state *xmlt;

    DEBUG( DEBUG_XMLPARSE, "Parsing elm of name: [%s]\n", tag );
    /* Parse the atts for namespace declarations... if we have any atts.
     * expat will never pass us atts == NULL, but libxml will. */
    if( atts != NULL ) {
	for( attn = 0; atts[attn]!=NULL; attn+=2 ) {
	    DEBUG( DEBUG_XMLPARSE, "Got attribute: [%s] = [%s]\n", atts[attn], atts[attn+1] );
	    if( strcasecmp( atts[attn], "xmlns" ) == 0 ) {
		/* New default namespace */
		state->default_ns = strdup( atts[attn+1] );
		DEBUG( DEBUG_XMLPARSE, "New default namespace: %s\n", 
		       state->default_ns );
	    } else if( strncasecmp( atts[attn], "xmlns:", 6 ) == 0 ) {
		/* New namespace scope */
		ns = malloc( sizeof( struct hip_xml_nspace ) );
		ns->next = state->nspaces;
		state->nspaces = ns;
		ns->name = strdup( atts[attn]+6 ); /* skip the xmlns= */
		ns->value = strdup( atts[attn+1] );
		DEBUG( DEBUG_XMLPARSE, "New namespace scope: %s -> %s\n",
		       ns->name, ns->value );
	    }
	}
    }
    /* Now check the tag name for a namespace scope */
    pnt = strchr( tag, ':' );
    tag_prefix = NULL;
    tag_suffix = NULL;
    if( pnt == NULL ) {
	/* No scope - have we got a default? */
	DEBUG( DEBUG_XMLPARSE, "No scope found, searching for default.\n" );
	for( xmlt = state; xmlt!=NULL; xmlt=xmlt->parent ) {
	    if( xmlt->default_ns != NULL ) {
		tag_prefix = xmlt->default_ns;
		break;
	    }
	}
	if( tag_prefix != NULL ) {
	    DEBUG( DEBUG_XMLPARSE, "Found default namespace [%s]\n", tag_prefix );
	} else {
	    DEBUG( DEBUG_XMLPARSE, "No default namespace, using empty.\n" );
	    tag_prefix = "";
	}
	tag_suffix = tag;
    } else {
	DEBUG( DEBUG_XMLPARSE, "Got namespace scope. Trying to resolve..." );
	/* Have a scope - resolve it */
	for( xmlt = state; tag_prefix==NULL && xmlt!=NULL; xmlt=xmlt->parent ) {
	    for( ns = xmlt->nspaces; ns!=NULL; ns=ns->next ) {
		/* Just compare against the bit before the :
		 * pnt points to the colon. */
		if( strncasecmp( ns->name, tag, pnt-tag ) == 0 ) {
		    /* Scope matched! Hoorah */
		    tag_prefix = ns->value;
		    /* end the search */
		    break;
		}
	    }
	}
	if( tag_prefix != NULL ) {
	    DEBUG( DEBUG_XMLPARSE, "Resolved scope to [%s]\n", tag_prefix );
	    /* The suffix is everything after the ':' */
	    tag_suffix = pnt+1;
	    if( *tag_suffix == '\0' ) {
		/* FIXME: presums expat */
		snprintf( p->error, BUFSIZ, 
			  "Element name missing in '%s' at line %d of response.",
			  tag, hip_xml_currentline(p) );

		DEBUG( DEBUG_XMLPARSE, "No element name after ':'. Failed.\n" );
		return NULL;
	    }
	} else {
	    DEBUG( DEBUG_XMLPARSE, "Undeclared namespace.\n" );
	    /* FIXME: presumes expat */
	    snprintf( p->error, BUFSIZ, 
		      "Undeclared namespace in '%s' at line %d of response.",
		      tag, hip_xml_currentline(p) );
	    return NULL;
	}
    }
    /* here, we have tag_suffix and tag_prefix */
    DEBUG( DEBUG_XMLPARSE, "prefix: [%s], suffix: [%s]\n", tag_prefix, tag_suffix );
    CONCAT2( ret, tag_prefix, tag_suffix );
    DEBUG( DEBUG_XMLPARSE, "You gave me: %s, and I gave you this: %s\n", tag,
	   ret );
    return ret;
}

#endif /* HIP_XML_HANDLE_NSPACES */

int hip_xml_init( struct hip_xml_parser *p, struct hip_xml_elmlist *elists ) {
    /* Initialize the expat stuff */
    memset( p, 0, sizeof( struct hip_xml_parser ) );
    /* Initialize other stuff */
    p->valid = 1;
    /* Placeholder for the root element */
    p->current = p->root = malloc( sizeof(struct hip_xml_state) );
    if( !p->current ) {
	return -1;
    }
    memset( p->root, 0, sizeof(struct hip_xml_state) );
    p->root->elm = &root_element;
    p->elm_lists = elists;
    /* Initialize the cdata buffer */
    p->buffer = sbuffer_create();
    if( p->buffer == NULL ) {
	free( p->current );
	return -1;
    }
#ifdef HAVE_EXPAT
    p->parser = XML_ParserCreate( NULL );
    if( p->parser == NULL ) {
	return -1;
    }
    XML_SetElementHandler( p->parser, start_element, end_element );
    XML_SetCharacterDataHandler( p->parser, char_data );
    XML_SetUserData( p->parser, (void *) p );
#else
    p->parser = xmlCreatePushParserCtxt( &sax_handler, (void *)p, NULL, 0, NULL );
    if( p->parser == NULL ) {
	return -1;
    }
#endif
    return 0;
}   

void hip_xml_parse_v( void *userdata, const char *block, size_t len ) {
    struct hip_xml_parser *p = userdata;
    /* FIXME: The two XML parsers break all our nice abstraction by
     * choosing different char *'s. The swine. This may kill us some
     * day. */
    hip_xml_parse( p, (const hip_xml_char *) block, len );
}

/* Parse the given block of input of length len */
void hip_xml_parse( struct hip_xml_parser *p, const hip_xml_char *block, size_t len ) {
    int ret, flag;
    /* duck out if it's broken */
    if( !p->valid ) {
	DEBUG( DEBUG_XML, "Not parsing %d bytes.\n", len );
	return;
    }
    if( len == 0 ) {
	flag = -1;
	block = "";
	DEBUG( DEBUG_XML, "Got 0-length buffer, end of response.\n" );
    } else {	
	DEBUG( DEBUG_XML, "Parsing %d length buffer.\n", len );
	flag = 0;
    }
#ifdef HAVE_EXPAT
    ret = XML_Parse( p->parser, block, len, flag );
    DEBUG( DEBUG_XMLPARSE, "XML_Parse returned %d\n", ret );
    if( ret == 0 ) {
	snprintf( p->error, BUFSIZ,
		  "XML parse error at line %d of response: %s", 
		  XML_GetCurrentLineNumber(p->parser),
		  XML_ErrorString(XML_GetErrorCode(p->parser)) );
	p->valid = 0;
    }
#else
    ret = xmlParseChunk( p->parser, block, len, flag );
    DEBUG( DEBUG_XMLPARSE, "XML_Parse returned %d\n", ret );
    if( p->parser->errNo ) {
	/* FIXME: error handling */
	snprintf( p->error, BUFSIZ, "XML parse error at line %d of response.", 
		  hip_xml_currentline(p) );
	p->valid = 0;
    }
#endif
}

int hip_xml_finish( struct hip_xml_parser *p ) {
    struct hip_xml_state *s, *parent;
    sbuffer_destroy( p->buffer );
    /* Clean up any states which may remain.
     * If p.valid, then this should be only the root element. */
    for( s = p->current; s!=NULL; s=parent ) {
	parent = s->parent;
	destroy_state( s );
    }
#ifdef HAVE_EXPAT
    XML_ParserFree( p->parser );
#else
    xmlFreeParserCtxt( p->parser );
#endif
    return !p->valid;
}

