// QWeb - An SGML Web Browser
// Copyright (C) 1997  Sean Vyain
// svyain@mail.tds.net
// smvyain@softart.com
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#include <stdio.h>
#include <qstack.h>
#include "DtdParser.h"

//=============================================================================
// Public methods.
//-----------------------------------------------------------------------------
DtdParser::DtdParser( Dtd* dtd )
        : _declDepth( 0 ),
          _dtd( dtd ),
          _currentEntity( 0 ),
          _currentNode( 0 ),
          _state( Content )
{
    _elementNames.setAutoDelete( TRUE );
    _inclNames.setAutoDelete( TRUE );
    _exclNames.setAutoDelete( TRUE );
	
    _lexer = new SgmlLexer;
    _lexer->dtd( _dtd );
    connect( this  , SIGNAL( data( const char*, int ) )              , _lexer, SLOT( data( const char*, int ) ) );
    connect( this  , SIGNAL( endOfData() )                           , _lexer, SLOT( endOfData() ) );
    connect( _lexer, SIGNAL( token( SgmlLexer::Token, const char* ) ), this  , SLOT( token( SgmlLexer::Token, const char* ) ) );
    connect( _lexer, SIGNAL( done() )                                , this  , SLOT( done() ) );
}

DtdParser::~DtdParser()
{
//    printf( "DtdParser::~DtdParser()\n" );
}

//=============================================================================
// Public slots.
//-----------------------------------------------------------------------------
void DtdParser::fwdData( const char* bytes, int length )
{
    emit data( bytes, length );
}

void DtdParser::fwdEndOfData()
{
//    printf( "DtdParser::fwdEndOfData()\n" );
    emit endOfData();
}

void DtdParser::done()
{
    SgmlElement* e;
    int i;
	
    // Locate the starting element.
    for ( e = _dtd->elements().first(); e; e = _dtd->elements().next() ) {
        if ( ( i = _contentElements.find( e->name() ) ) == -1 ) {
//            printf( "DtdParser::done() -- start element = '%s'\n", e->name() );
            _dtd->start( e );
        }
    }

    emit done( _dtd );
    delete this;
}

void DtdParser::token( SgmlLexer::Token token, const char* text )
{
//    printf( "DtdParser::token() -- token = %d, text = '%s'\n", token, text.data() );
    
    switch ( _state ) {
        case Attlist:
            stateAttlist( token, text );
            break;

        case AttlistAttrName:
            stateAttlistAttrName( token, text );
            break;

        case AttlistAttrValueType:
            stateAttlistAttrValueType( token, text );
            break;

        case AttlistAttrType:
            stateAttlistAttrType( token, text );
            break;

        case AttlistAttrValue:
            stateAttlistAttrValue( token, text );
            break;
            
        case Content:
            stateContent( token, text );
            break;
		
        case DeclSubset:
            stateDeclSubset( token, text );
            break;

        case ElementDeclNames:
            stateElementDeclNames( token, text );
            break;
		
        case ElementDeclSTag:
            stateElementDeclSTag( token, text );
            break;
		
        case ElementDeclETag:
            stateElementDeclETag( token, text );
            break;
		
        case ElementDeclModel:
            stateElementDeclModel( token, text );
            break;
		
        case ElementDeclIncl:
            stateElementDeclIncl( token, text );
            break;
		
        case ElementDeclExcl:
            stateElementDeclExcl( token, text );
            break;
		
        case EntityDecl:
            stateEntityDecl( token, text );
            break;
		
        case MarkupDeclStart:
            stateMarkupDeclStart( token, text );
            break;
		
        case MarkupDeclSkip:
            stateMarkupDeclSkip( token, text );
            break;
		
        default:
//            printf( "DtdParser::token() -- ignoring token = %d, text = '%s'\n", token, (const char*)text );
            break;
    }
}

//=============================================================================
// Private methods.
//-----------------------------------------------------------------------------
void DtdParser::stateAttlist( SgmlLexer::Token token, const char* text )
{
    switch ( token ) {
        case SgmlLexer::MarkupClose:
            _state = Content;
            _lexer->mode( SgmlLexer::PcdataMode );
            break;

        case SgmlLexer::GroupOpen:
            _elementList = TRUE;
            break;
		
        case SgmlLexer::Identifier:
            _elementNames.append( QString( text ).upper() );
            if ( !_elementList ) {
                _state = AttlistAttrName;
            }
            break;
		
        case SgmlLexer::OrOperator:
            break;
		
        case SgmlLexer::GroupClose:
            _elementList = FALSE;
            _state = AttlistAttrName;
            break;

        default:
//            printf( "DtdParser::stateAttlist() -- ignoring token = %d, text = '%s'\n", token, (const char*)text );
            break;
    }
}

void DtdParser::stateAttlistAttrName( SgmlLexer::Token token, const char* text )
{
    switch ( token ) {
        case SgmlLexer::MarkupClose:
            _state = Content;
            _lexer->mode( SgmlLexer::PcdataMode );
            stateAttlistEnd();
            break;

        case SgmlLexer::Identifier:
            _attrName = text;
            _attrName.detach();
            _state = AttlistAttrValueType;
//            printf( "DtdParser::stateAttlistAttrName() -- attr name = '%s'\n", (const char*)_attrName );
            break;

        case SgmlLexer::Name:
            _attrName = text;
            _attrName.detach();
            _state = AttlistAttrValueType;
//            printf( "DtdParser::stateAttlistAttrName() -- attr name = '%s'\n", (const char*)_attrName );
            break;

        default:
//            printf( "DtdParser::stateAttlistAttrName() -- ignoring token = %d, text = '%s'\n", token, (const char*)text );
            break;
    }
}

void DtdParser::stateAttlistAttrValueType( SgmlLexer::Token token, const char* text )
{
    switch ( token ) {
        case SgmlLexer::MarkupClose:
            _state = Content;
            _lexer->mode( SgmlLexer::PcdataMode );
            break;

        case SgmlLexer::Cdata:
            _attrValueType = Attribute::Cdata;
            _state = AttlistAttrType;
            break;

        case SgmlLexer::Name:
            _attrValueType = Attribute::Name;
            _state = AttlistAttrType;
            break;

        case SgmlLexer::Names:
            _attrValueType = Attribute::Names;
            _state = AttlistAttrType;
            break;

        case SgmlLexer::Number:
            _attrValueType = Attribute::Number;
            _state = AttlistAttrType;
            break;

        case SgmlLexer::GroupOpen:
            _attrValueType = Attribute::Enum;
            _attrEnums.clear();
            break;

        case SgmlLexer::OrOperator:
            break;

        case SgmlLexer::Identifier:
            _attrEnums.append( text );
            break;

        case SgmlLexer::GroupClose:
            _state = AttlistAttrType;
            break;

        default:
//            printf( "DtdParser::stateAttlistAttrValueType() -- ignoring token = %d, text = '%s'\n", token, (const char*)text );
            break;
    }
}

void DtdParser::stateAttlistAttrType( SgmlLexer::Token token, const char* text )
{
    switch ( token ) {
        case SgmlLexer::MarkupClose:
            _state = Content;
            _lexer->mode( SgmlLexer::PcdataMode );
            break;

        case SgmlLexer::Fixed:
            _attrType = Attribute::Fixed;
            _state = AttlistAttrValue;
            break;

        case SgmlLexer::Implied:
            _attrType = Attribute::Implied;
            _state = AttlistAttrName;
            break;

        case SgmlLexer::Required:
            _attrType = Attribute::Required;
            _state = AttlistAttrName;
            break;

        case SgmlLexer::StringLiteral:
        case SgmlLexer::Identifier:
            _attrType = Attribute::Default;
            _attrDefaultValue = text;
            _attrDefaultValue.detach();
            _state = AttlistAttrName;
            break;

        default:
//            printf( "DtdParser::stateAttlistAttrType() -- ignoring token = %d, text = '%s'\n", token, (const char*)text );
            break;
    }
}

void DtdParser::stateAttlistAttrValue( SgmlLexer::Token token, const char* text )
{
    switch ( token ) {
        case SgmlLexer::MarkupClose:
            _state = Content;
            _lexer->mode( SgmlLexer::PcdataMode );
            break;

        default:
            _attrValue = text;
            _attrValue.detach();
            _state = AttlistAttrName;
            break;
    }
}

void DtdParser::stateAttlistEnd()
{
    SgmlElement* e;
    
    for ( char* name = _elementNames.first(); name; name = _elementNames.next() ) {
        e = _dtd->getElement( name );
        e->attrs().append( new Attribute( _attrName, _attrValueType, _attrType, _attrEnums, _attrDefaultValue ) );
    }

    _elementNames.clear();
    _attrEnums.clear();
}

void DtdParser::stateContent( SgmlLexer::Token token, const char* )
{
    if ( token == SgmlLexer::MarkupDeclOpen ) {
        _state = MarkupDeclStart;
        _lexer->mode( SgmlLexer::MarkupMode );
    }
}

void DtdParser::stateDeclSubset( SgmlLexer::Token token, const char* )
{
    switch ( token ) {
        case SgmlLexer::DeclSubsetClose:
            _declDepth--;
            _state = Content;
            _lexer->mode( SgmlLexer::PcdataMode );
            break;
		
        case SgmlLexer::Ignore:
            break;
		
        case SgmlLexer::Include:
            _state = Content;
            _lexer->mode( SgmlLexer::PcdataMode );
            break;
		
        default:
//            printf( "DtdParser::stateDeclSubset() -- ignoring token = %d, text = '%s'\n", token, (const char*)text );
            break;
    }
}

void DtdParser::stateElementDeclNames( SgmlLexer::Token token, const char* text )
{
    switch ( token ) {
        case SgmlLexer::GroupOpen:
            _elementList = TRUE;
            break;
		
        case SgmlLexer::Identifier:
            _elementNames.append( QString( text ).upper() );
            if ( !_elementList ) {
                _state = ElementDeclSTag;
            }
            break;
		
        case SgmlLexer::OrOperator:
            break;
		
        case SgmlLexer::GroupClose:
            _elementList = FALSE;
            _state = ElementDeclSTag;
            break;
		
        default:
//            printf( "DtdParser::stateElementDeclNames() -- ignoring token = %d, text = '%s'\n", token, (const char*)text );
            break;
    }
}

void DtdParser::stateElementDeclSTag( SgmlLexer::Token token, const char* )
{
    switch ( token ) {
        case SgmlLexer::MinusSign:
            _stagRequired = TRUE;
            _state = ElementDeclETag;
            break;
		
        case SgmlLexer::OptionalTag:
            _stagRequired = FALSE;
            _state = ElementDeclETag;
            break;
		
        default:
//            printf( "DtdParser::stateElementDeclSTag() -- ignoring token = %d, text = '%s'\n", token, (const char*)text );
            break;
    }
}

void DtdParser::stateElementDeclETag( SgmlLexer::Token token, const char* )
{
    switch ( token ) {
        case SgmlLexer::MinusSign:
            _etagRequired = TRUE;
            _currentNode = 0;
            _state = ElementDeclModel;
            break;
		
        case SgmlLexer::OptionalTag:
            _etagRequired = FALSE;
            _currentNode = 0;
            _state = ElementDeclModel;
            break;
		
        default:
//            printf( "DtdParser::stateElementDeclETag() -- ignoring token = %d, text = '%s'\n", token, (const char*)text );
            break;
    }
}

void DtdParser::stateElementDeclModel( SgmlLexer::Token token, const char* text )
{
    SgmlNode* node;
	
    switch ( token ) {
        case SgmlLexer::Identifier:
        case SgmlLexer::Empty:
        case SgmlLexer::Cdata:
        case SgmlLexer::Pcdata:
            switch ( token ) {
                case SgmlLexer::Identifier:
                    node = new SgmlNode( SgmlNode::Element, _currentNode );
                    node->element( QString( text ).upper() );
                    if ( _contentElements.find( text ) == -1 ) {
                        _contentElements.append( text );
                    }
                    break;
                case SgmlLexer::Empty:
                    node = new SgmlNode( SgmlNode::Empty, _currentNode );
                    break;
                case SgmlLexer::Cdata:
                    //%%% Make CDATA optional.
                    node = new SgmlNode( SgmlNode::Qmark, _currentNode );
                    node->left( new SgmlNode( SgmlNode::Cdata, node ) );
                    break;
                case SgmlLexer::Pcdata:
                    //%%% Make PCDATA optional.
                    node = new SgmlNode( SgmlNode::Qmark, _currentNode );
                    node->left( new SgmlNode( SgmlNode::Pcdata, node ) );
                    break;
                default: // -Wall
                    node = 0;
                    break;
            }
            if ( _currentNode ) {
                if ( !_currentNode->left() ) {
                    _currentNode->left( node );
                } else {
                    _currentNode->right( node );
                }
            }
            _currentNode = node;
            break;
		
        case SgmlLexer::Star:
        case SgmlLexer::OrOperator:
        case SgmlLexer::Ampersand:
        case SgmlLexer::QuestionMark:
        case SgmlLexer::PlusSign:
        case SgmlLexer::Comma:
            switch ( token ) {
                case SgmlLexer::Star:
                    node = new SgmlNode( SgmlNode::Star, _currentNode->parent() );
                    node->element( "*" );
                    break;
                case SgmlLexer::OrOperator:
                    if ( ( _currentNode->parent() ) && ( _currentNode->parent()->type() == SgmlNode::Or ) ) {
                        _currentNode = _currentNode->parent();
                    }
                    node = new SgmlNode( SgmlNode::Or, _currentNode->parent() );
                    break;
                case SgmlLexer::Ampersand:
                    if ( ( _currentNode->parent() ) && ( _currentNode->parent()->type() == SgmlNode::And ) ) {
                        _currentNode = _currentNode->parent();
                    }
                    node = new SgmlNode( SgmlNode::And, _currentNode->parent() );
                    break;
                case SgmlLexer::QuestionMark:
                    node = new SgmlNode( SgmlNode::Qmark, _currentNode->parent() );
                    break;
                case SgmlLexer::PlusSign:
                    node = new SgmlNode( SgmlNode::Plus, _currentNode->parent() );
                    break;
                case SgmlLexer::Comma:
                    if ( ( _currentNode->parent() ) && ( _currentNode->parent()->type() == SgmlNode::Seq ) ) {
                        _currentNode = _currentNode->parent();
                    }
                    node = new SgmlNode( SgmlNode::Seq, _currentNode->parent() );
                    node->element( "," );
                    break;
                default: // -Wall
                    node = 0;
                    break;
            }
		
            node->left( _currentNode );
            _currentNode->parent( node );
            if ( node->parent() ) {
                if ( node->parent()->left() == _currentNode ) {
                    node->parent()->left( node );
                } else {
                    node->parent()->right( node );
                }
            }
            _currentNode = node;
            break;
		
        case SgmlLexer::GroupOpen:
            node = new SgmlNode( SgmlNode::Group, _currentNode );
            if ( _currentNode ) {
                if ( !_currentNode->left() ) {
                    _currentNode->left( node );
                } else {
                    _currentNode->right( node );
                }
            }
            _currentNode = node;
            break;
		
        case SgmlLexer::GroupClose:
            while ( _currentNode->type() != SgmlNode::Group ) {
                _currentNode = _currentNode->parent();
            }
            break;
		
        case SgmlLexer::InclusionListStart:
            _state = ElementDeclIncl;
            break;
		
        case SgmlLexer::ExclusionListStart:
            _state = ElementDeclExcl;
            break;
		
        case SgmlLexer::MarkupClose:
            stateElementDeclEnd();
            break;
		
        default:
//            printf( "DtdParser::stateElementDeclModel() -- ignoring token = %d, text = '%s'\n", token, (const char*)text );
            break;
    }
}

void DtdParser::stateElementDeclIncl( SgmlLexer::Token token, const char* text )
{
    switch ( token ) {
        case SgmlLexer::Identifier:
            _inclNames.append( text );
            _contentElements.append( text );
            break;
		
        case SgmlLexer::OrOperator:
            break;
		
        case SgmlLexer::GroupClose:
            break;
		
        case SgmlLexer::ExclusionListStart:
            _state = ElementDeclExcl;
            break;
		
        case SgmlLexer::MarkupClose:
            stateElementDeclEnd();
            break;
		
        default:
//            printf( "DtdParser::stateElementDeclIncl() -- ignoring token = %d, text = '%s'\n", token, (const char*)text );
            break;
    }
}

void DtdParser::stateElementDeclExcl( SgmlLexer::Token token, const char* text )
{
    switch ( token ) {
        case SgmlLexer::Identifier:
            _exclNames.append( text );
            break;
		
        case SgmlLexer::OrOperator:
            break;
		
        case SgmlLexer::GroupClose:
            break;
		
        case SgmlLexer::InclusionListStart:
            _state = ElementDeclIncl;
            break;
		
        case SgmlLexer::MarkupClose:
            stateElementDeclEnd();
            break;
		
        default:
//            printf( "DtdParser::stateElementDeclExcl() -- ignoring token = %d, text = '%s'\n", token, (const char*)text );
            break;
    }
}

void DtdParser::stateElementDeclEnd()
{
//    printf( "DtdParser::stateElementDeclEnd()\n" );
    
    // Prune the content model tree.
    SgmlNode*        node;
    SgmlNode*        node1;
    SgmlNode*        node2;
    SgmlNode*        node3;
    QStack<SgmlNode> stack;
    while ( _currentNode->parent() ) {
        _currentNode = _currentNode->parent();
    }
	
    stack.push( _currentNode );
//    printf( "pushed1 node = %p\n", _currentNode );
    while ( !stack.isEmpty() ) {
        node = stack.pop();
//        printf( "popped node = %p\n", node );

        if ( node->type() == SgmlNode::Group ) {
            if ( node->left() ) {
                stack.push( node->left() );
//                printf( "pushed2 node = %p\n", node->left() );
            }
            
            if ( node->parent() ) {
                node->left()->parent( node->parent() );
                if ( node->parent()->left() == node ) {
                    node->parent()->left( node->left() );
                } else {
                    node->parent()->right( node->left() );
                }
            } else {
                _currentNode = node->left();
            }
            node->left( 0 );
//            printf( "deleting node = %p\n", node );
            delete node;
        } else if ( node->type() == SgmlNode::And ) {
            node1 = new SgmlNode( SgmlNode::Or, node->parent() );
            stack.push( node1 );
//            printf( "pushed3 node = %p\n", node1 );
            if ( node->parent() ) {
                if ( node->parent()->left() == node ) {
                    node->parent()->left( node1 );
                } else {
                    node->parent()->right( node1 );
                }
            }
            node->parent( node1 );
            node->type( SgmlNode::Seq );
            node->element( "," );
            node2 = new SgmlNode( node, node1 );
            node3 = node2->left();
            node2->left( node2->right() );
            node2->right( node3 );
            node1->left( node );
            node1->right( node2 );
        } else if ( node->type() == SgmlNode::Plus ) {
            node->type( SgmlNode::Seq );
            node->element( "," );
            stack.push( node );
//            printf( "pushed4 node = %p\n", node );
            node1 = new SgmlNode( SgmlNode::Star, node );
            node1->element( "*" );
            node->right( node1 );
            node1->left( new SgmlNode( node->left(), node1 ) );
        } else if ( node->type() == SgmlNode::Qmark ) {
            node->type( SgmlNode::Or );
            stack.push( node );
//            printf( "pushed5 node = %p\n", node );
            node->right( new SgmlNode( SgmlNode::Empty, node ) );
        } else {
            if ( node->left() ) {
                stack.push( node->left() );
//                printf( "pushed6 node = %p\n", node->left() );
            }
            if ( node->right() ) {
                stack.push( node->right() );
//                printf( "pushed7 node = %p\n", node->right() );
            }
        }
    }
	
    // Add the end token.
    node = new SgmlNode( SgmlNode::Seq );
    node->element( "," );
    node1 = new SgmlNode( SgmlNode::Accept, node );
    node1->element( "#" );
    node->left( _currentNode );
    _currentNode->parent( node );
    node->right( node1 );
    _currentNode = node;
	
    // Print out the tree.
//    _currentNode->print();
//    printf( "\n" );
	
    // Create the DFA.
    _currentNode->calculate();
//	_currentNode->printPos();
	
    for ( char* name = _elementNames.first(); name; name = _elementNames.next() ) {
//        printf( "name = '%s'\n", name );
        _dtd->elements().append( new SgmlElement( name, _stagRequired, _etagRequired, _inclNames, _exclNames, _currentNode ) );
    }
    delete _currentNode;
    _elementNames.clear();
    _inclNames.clear();
    _exclNames.clear();
	
    _state = Content;
    _lexer->mode( SgmlLexer::PcdataMode );
}

void DtdParser::stateEntityDecl( SgmlLexer::Token token, const char* text )
{
    switch ( token ) {
        case SgmlLexer::Comment:
            break;
		
        case SgmlLexer::StringLiteral:
        case SgmlLexer::Identifier:
            if ( _currentEntity->name.isNull() ) {
                _currentEntity->name = text;
            } else {
                _currentEntity->text = text;
            }
            break;
		
        case SgmlLexer::MarkupClose:
            _state = Content;
            _lexer->mode( SgmlLexer::PcdataMode );
            _dtd->entities().append( _currentEntity );
            _currentEntity = 0;
            break;
		
        case SgmlLexer::Percent:
            _currentEntity->type = SgmlEntity::Parametric;
            break;
		
        case SgmlLexer::Public:
            _currentEntity->parmType = SgmlEntity::Public;
            break;
		
        case SgmlLexer::System:
            _currentEntity->parmType = SgmlEntity::System;
            break;
		
        default:
//            printf( "DtdParser::stateEntityDecl() -- ignoring token = %d, text = '%s'\n", token, (const char*)text );
            break;
    }
}

void DtdParser::stateMarkupDeclSkip( SgmlLexer::Token token, const char* )
{
    if ( token == SgmlLexer::MarkupClose ) {
        _state = Content;
        _lexer->mode( SgmlLexer::PcdataMode );
    }
}

void DtdParser::stateMarkupDeclStart( SgmlLexer::Token token, const char* )
{
    switch ( token ) {
        case SgmlLexer::DeclSubsetOpen:
            _declDepth++;
            _state = DeclSubset;
            break;

        case SgmlLexer::Element:
            _elementList = FALSE;
            _elementNames.clear();
            _state = ElementDeclNames;
            break;
		
        case SgmlLexer::Entity:
            _currentEntity = new SgmlEntity;
            _state = EntityDecl;
            break;
		
        case SgmlLexer::MarkupClose:
            _state = Content;
            _lexer->mode( SgmlLexer::PcdataMode );
            break;
		
        case SgmlLexer::Comment:
            break;

        case SgmlLexer::Attlist:
            _elementList = FALSE;
            _elementNames.clear();
            _state = Attlist;
            break;
		
        default:
//            printf( "DtdParser::stateMarkupDeclStart() -- Skipping markup declaration for '%s'\n", (const char*)text );
            _state = MarkupDeclSkip;
    }
}
