/* Copyright 2009, UCAR/Unidata and OPeNDAP, Inc.
   See the COPYRIGHT file for more information. */

#include "dapparselex.h"

#define URLCVT

/* Forward */
static void dumptoken(Lexstate* lexstate);

/****************************************************/
/* First character in DDS and DAS TOKEN_IDENTifier or number */
static char* wordchars1 =
  "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-+_/%.\\*";
static char* worddelims =
  "{}[]:;=,";
static char* ddswordcharsn =
  "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-+_/%.\\*#";
static char* daswordcharsn =
  "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-+_/%.\\*:#";

/*
Simple lexer
*/

void
dassetup(DAPparsestate* state)
{
    /* wordchars1 and delims are same for dds and das */
    state->lexstate->wordcharsn = daswordcharsn;
}

void
daplexinit(char* input, Lexstate** lexstatep)
{
    Lexstate* lexstate = (Lexstate*)malloc(sizeof(Lexstate));
    if(lexstatep) *lexstatep = lexstate;
    if(lexstate == NULL) return;
    memset((void*)lexstate,0,sizeof(Lexstate));
    lexstate->input = strdup(input);
    lexstate->next = lexstate->input;
    lexstate->yytext = ocbytesnew();
    lexstate->reclaim = oclistnew();
    lexstate->wordchars1 = wordchars1;
    lexstate->wordcharsn = ddswordcharsn;
    lexstate->worddelims = worddelims;
}

void
daplexcleanup(Lexstate** lexstatep)
{
    unsigned int i;
    Lexstate* lexstate = *lexstatep;
    if(lexstate == NULL) return;
    for(i=0;i<oclistlength(lexstate->reclaim);i++)
	ocfree((void*)oclistget(lexstate->reclaim,i));
    oclistfree(lexstate->reclaim);
    if(lexstate->input != NULL) ocfree(lexstate->input);
    if(lexstate->yytext != NULL) ocbytesfree(lexstate->yytext);
    if(lexstate->lasttoken.text != NULL) ocfree(lexstate->lasttoken.text);
    free(lexstate);
    *lexstatep = NULL;
}

/* Hex digits */
static char hexdigits[] = "0123456789abcdefABCDEF";

static int
tohex(int c)
{
    if(c >= 'a' && c <= 'f') return (c - 'a') + 0xa;
    if(c >= 'A' && c <= 'F') return (c - 'A') + 0xa;
    if(c >= '0' && c <= '9') return (c - '0');
    return -1;
}

#define NKEYWORDS 20

static char* keywords[NKEYWORDS] = {
"alias",
"array",
"attributes",
"byte",
"code",
"dataset",
"error",
"float32",
"float64",
"grid",
"int16",
"int32",
"maps",
"message",
"sequence",
"string",
"structure",
"uint16",
"uint32",
"url"
};

static int keytokens[NKEYWORDS] = {
SCAN_ALIAS,
SCAN_ARRAY,
SCAN_ATTR,
SCAN_BYTE,
SCAN_CODE,
SCAN_DATASET,
SCAN_ERROR,
SCAN_FLOAT32,
SCAN_FLOAT64,
SCAN_GRID,
SCAN_INT16,
SCAN_INT32,
SCAN_MAPS,
SCAN_MESSAGE,
SCAN_SEQUENCE,
SCAN_STRING,
SCAN_STRUCTURE,
SCAN_UINT16,
SCAN_UINT32,
SCAN_URL
};

int
daplex(YYSTYPE* lvalp, DAPparsestate* state)
{
    Lexstate* lexstate = state->lexstate;
    int token;
    int c;
    unsigned int i;
    char* p=lexstate->next;
    token = 0;
    ocbytesclear(lexstate->yytext);
    /* invariant: p always points to current char */
    for(p=lexstate->next;token==0&&(c=*p);p++) {
	if(c == '\n') {
	    lexstate->lineno++;
	} else if(c <= ' ' || c == '\177') {
	    /* whitespace: ignore */
	} else if(c == '#') {
	    /* single line comment */
	    while((c=*(++p))) {if(c == '\n') break;}
	} else if(strchr(lexstate->worddelims,c) != NULL) {
	    /* don't put in lexstate->yytext to avoid memory leak */
	    token = c;
	} else if(c == '"') {
	    int more = 1;
	    /* We have a string token; will be reported as SCAN_WORD */
	    while(more && (c=*(++p))) {
		switch (c) {
		case '"': more=0; break;
		case '\\':
		    c=*(++p);
		    switch (c) {
		    case 'r': c = '\r'; break;
		    case 'n': c = '\n'; break;
		    case 'f': c = '\f'; break;
		    case 't': c = '\t'; break;
		    case 'x': {
			int d1,d2;
			c = '?';
			++p;
		        d1 = tohex(*p++);
			if(d1 < 0) {
			    daperror(state,"Illegal \\xDD in TOKEN_STRING");
			} else {
			    d2 = tohex(*p++);
			    if(d2 < 0) {
			        daperror(state,"Illegal \\xDD in TOKEN_STRING");
			    } else {
				c=(((unsigned int)d1)<<4) | (unsigned int)d2;
			    }
			}
		    } break;
		    default: break;
		    }
		    break;
		default: break;
		}
		if(more) ocbytesappend(lexstate->yytext,c);
	    }
	    token=SCAN_WORD;
	} else if(strchr(lexstate->wordchars1,c) != NULL) {
	    /* we have a SCAN_WORD */
	    ocbytesappend(lexstate->yytext,c);
	    while((c=*(++p))) {
#ifdef URLCVT
		if(c == '%' && p[1] != 0 && p[2] != 0
			    && strchr(hexdigits,p[1]) != NULL
                            && strchr(hexdigits,p[2]) != NULL) {
		    int d1,d2;
		    d1 = tohex(p[1]);
		    d2 = tohex(p[2]);
		    if(d1 >= 0 || d2 >= 0) {
			c=(((unsigned int)d1)<<4) | (unsigned int)d2;
			p+=2;
		    }
		} else {
		    if(strchr(lexstate->wordcharsn,c) == NULL) {p--; break;}
		}
	        ocbytesappend(lexstate->yytext,c);
#else

		if(strchr(lexstate->wordcharsn,c) == NULL) {p--; break;}
	        ocbytesappend(lexstate->yytext,c);
#endif
	    }
	    token=SCAN_WORD; /* assume */
	    ocbytesappend(lexstate->yytext,'\0');
	    /* check for keyword */
	    for(i=0;i<NKEYWORDS;i++) {
		if(strcasecmp(keywords[i],ocbytescontents(lexstate->yytext))==0) {
		    token=keytokens[i];
	            /* don't put in lexstate->yytext to avoid memory leak */
		    ocbytesclear(lexstate->yytext);
		    break;
		}
	    }
	} else { /* illegal */
	}
    }
    lexstate->next = p;
    if(lexstate->lasttoken.text != NULL) free(lexstate->lasttoken.text);
    lexstate->lasttoken.text = ocbytesdup(lexstate->yytext);
    lexstate->lasttoken.token = token;
    if(ocdebug >= 2) dumptoken(lexstate);

    /* Prepare return value */
    /*Put value onto Bison stack*/

    /* Note: this was a bad idea because it sticks malloc'd strings */
    /* on the bison stack which makes it hard to reclaim them. */
    /* Bad (but usable) solution: capture all these strings int */
    /* list in the lexstate and reclaim at end of parse.*/
    if(ocbyteslength(lexstate->yytext) == 0)
        *lvalp = NULL;
    else {
        *lvalp = ocbytesdup(lexstate->yytext);
	oclistpush(lexstate->reclaim,(ocelem)*lvalp); /* save for reclamation*/
    }

    return token;      /* Return the type of the token.  */
}

static void
dumptoken(Lexstate* lexstate)
{
    char ctoken[4];
    char* stoken;
    switch (lexstate->lasttoken.token) {
    case SCAN_ALIAS : stoken = "alias"; break;
    case SCAN_ARRAY: stoken = "array"; break;
    case SCAN_ATTR: stoken = "attr"; break;
    case SCAN_BYTE: stoken = "byte"; break;
    case SCAN_DATASET: stoken = "dataset"; break;
    case SCAN_FLOAT32: stoken = "float32"; break;
    case SCAN_FLOAT64: stoken = "float64"; break;
    case SCAN_GRID: stoken = "grid"; break;
    case SCAN_INT16: stoken = "int16"; break;
    case SCAN_INT32: stoken = "int32"; break;
    case SCAN_MAPS : stoken = "maps"; break;
    case SCAN_SEQUENCE: stoken = "sequence"; break;
    case SCAN_STRING: stoken = "string"; break;
    case SCAN_STRUCTURE: stoken = "structure"; break;
    case SCAN_UINT16: stoken = "uint16"; break;
    case SCAN_UINT32: stoken = "uint32"; break;
    case SCAN_URL : stoken = "url"; break;
    default:
	strcpy(ctoken,"'X'");
	ctoken[1] = (char)lexstate->lasttoken.token;
	stoken = ctoken;
    }
    if(lexstate->lasttoken.token == SCAN_WORD) {
        fprintf(stderr,"TOKEN = |%s|\n",lexstate->lasttoken.text);
    } else {
        fprintf(stderr,"TOKEN = %s\n",stoken);
    }
}

