/* File "sym_compiler.c":
 * Compiles malaga symbol files. */

/* This file is part of Malaga, a system for Left Associative Grammars.
 * Copyright (C) 1995-1998 Bjoern Beutel
 *
 * Bjoern Beutel
 * Universitaet Erlangen-Nuernberg
 * Abteilung fuer Computerlinguistik
 * Bismarckstrasse 12
 * D-91054 Erlangen
 * e-mail: malaga@linguistik.uni-erlangen.de 
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "basic.h"
#include "pools.h"
#include "values.h"
#include "scanner.h"
#include "files.h"
#include "malaga_files.h"
#include "sym_type.h"
#include "symbols.h"

#undef GLOBAL
#define GLOBAL

#include "sym_compiler.h"

/*---------------------------------------------------------------------------*/

/* maximum number of symbols in a symbol list */
#define SYMBOL_LIST_MAX 100

/* a node of the symbol table */
typedef struct SYMBOL_NODE_T
{
  struct SYMBOL_NODE_T *left_son;  /* sons with a name that before <name> */
  struct SYMBOL_NODE_T *right_son; /* sons with a name that after <name> */
  string_t name;                   /* name of the node in <string_pool> */
  symbol_t symbol;                 /* symbol which this node represents */
  symbol_entry_t *symbol_ptr;      /* pointer to <symbol_pool> */
} symbol_node_t;

LOCAL symbol_node_t *symbol_tree = NULL; /* root node of the symbol tree */

LOCAL pool_t symbol_pool; /* symbol_entries */
LOCAL pool_t value_pool;  /* lists of atomar symbols */
LOCAL pool_t string_pool; /* symbol names */

/*---------------------------------------------------------------------------*/

LOCAL void free_symbols (symbol_node_t *node)
/* Free the symbol table with root <node>. */
{
  if (node != NULL) 
  {
    free_symbols (node->left_son);
    free_symbols (node->right_son);
    free (node);
  }
}

/*---------------------------------------------------------------------------*/

LOCAL symbol_node_t *find_symbol_node (string_t name, bool_t new_symbol)
/* Find and return a symbol with given name.
 * If <new_symbol> == TRUE, create a new symbol
 * (error if symbol already exists).
 * else return an old symbol (error if symbol does not exist). */
{
  symbol_node_t **node_ptr; /* link to the current node (link may be NULL) */

  node_ptr = &symbol_tree;
  while (TRUE) 
  {
    short_t comp_result;
    symbol_node_t *node;
    
    if (*node_ptr == NULL) /* The node doesn't exist yet. */
    {
      if (! new_symbol)
	error ("symbol \"%s\" is not defined", name);
      else 
      {
	/* Allocate and initialise node. */
	node = (symbol_node_t *) new_mem (sizeof (symbol_node_t));
	node->left_son = NULL;
	node->right_son = NULL;
	
	/* Link node into tree. */
	*node_ptr = node;
	return node;
      }
    }
    
    /* Node is not NULL. */
    node = *node_ptr;
    comp_result = strcmp_no_case (name, node->name);
    if (comp_result < 0) 
      node_ptr = &node->left_son;
    else if (comp_result > 0)
      node_ptr = &node->right_son;
    else
    {
      /* The node already exists. */
      if (new_symbol)
	error ("symbol \"%s\" is already defined", name);
      else
	return node;
    }
  }
}

/*---------------------------------------------------------------------------*/

LOCAL symbol_t find_atomic_symbol (string_t name)
/* Find symbol <name> and check if it is atomic.
 * Return the symbol. */
{
  symbol_node_t *node;

  node = find_symbol_node (name, FALSE);

  if (node->symbol_ptr->atoms != -1)
    error ("symbol \"%s\" is not atomic", name);

  return node->symbol;
}

/*---------------------------------------------------------------------------*/

GLOBAL void enter_symbol (string_t name, value_t atoms)
/* Enter <name> as a symbol name with atomic symbol list <atoms>
 * in the symbol tree. */
{
  symbol_node_t *node;
  long_t name_index;
  long_t atoms_index;
  long_t symbol;
  symbol_entry_t symbol_entry;

  if (pool_items (symbol_pool) >= SYMBOL_MAX)
    error ("too many symbols");

  if (atoms != NULL)
    copy_value_to_pool (value_pool, atoms, &atoms_index);
  else
    atoms_index = -1;

  node = find_symbol_node (name, TRUE);
  node->name = copy_string_to_pool (string_pool, name, &name_index);

  /* Initialise symbol entry.*/
  symbol_entry.name = name_index;
  symbol_entry.atoms = atoms_index; /* atoms must already be in pool */

  node->symbol_ptr = (symbol_entry_t *) copy_to_pool (symbol_pool, 
						      &symbol_entry, 1, 
						      &symbol);
  node->symbol = symbol;
}

/*---------------------------------------------------------------------------*/

LOCAL void parse_symbol_list (value_t *symbol_list)
/* Parse a list of symbols. Return it as a Malaga value in *<symbol_list>. */
{
  long_t i;
  symbol_t symbols[SYMBOL_LIST_MAX]; /* the symbols parsed so far */
  long_t num_symbols;                /* number of symbols in <symbols> */

  num_symbols = 0;
  clear_value_heap ();
  test_token ('<');
  do 
  {
    symbol_t symbol;
    
    read_next_token ();
    
    if (num_symbols >= SYMBOL_LIST_MAX)
      error ("atom list too long");
    
    test_token (TOK_IDENT);
    symbol = find_atomic_symbol (token_name);
    
    /* Test if <symbol> already occurs in <symbols>. */
    for (i = 0; i < num_symbols; i++) 
    {
      if (symbol == symbols[i])
	error ("symbol \"%s\" twice in symbol list", token_name);
    }
    
    symbols[num_symbols++] = symbol;
    read_next_token (); /* Read after <token_name>. */
  } while (next_token == ',');
  
  parse_token ('>');
  
  if (num_symbols < 2)
    error ("atom list must contain at least 2 atoms");

  *symbol_list = canonise_atom_list (symbols_to_list (num_symbols, symbols));

  /* Check that there is no identical atom list. */
  for (i = 0; i < pool_items (symbol_pool); i++)
  {
    symbol_entry_t *symbol_entry;
    
    symbol_entry = (symbol_entry_t *) pool_item (symbol_pool, i);
    if (symbol_entry->atoms != -1)
    {
      value_t atoms = (value_t) pool_item (value_pool, 
					   symbol_entry->atoms);
      
      if (values_equal (*symbol_list, atoms))
	error ("atom list is same as for \"%s\"", 
	       (string_t) pool_item (string_pool, symbol_entry->name));
    }
  }
}

/*---------------------------------------------------------------------------*/

GLOBAL void parse_symbols (void)
/* Parse a symbol file. */
{
  while (next_token != EOF) 
  {
    if (next_token == TOK_INCLUDE) /* Include a new file. */
    { 
      string_t file_name;
      
      read_next_token ();
      test_token (TOK_STRING);
      file_name = new_string (absolute_path (token_name, 
					     current_file_name ()));
      include_file (file_name, -1);
      parse_symbols ();
      end_include ();
      free (file_name); 
      parse_token (';');
    } 
    else /* Read a symbol. */
    {
      string_t symbol_name;
      value_t atom_list;
      
      test_token (TOK_IDENT);
      symbol_name = new_string (token_name);
      
      read_next_token ();
      if (next_token == TOK_ASSIGN)
      {
	read_next_token ();
	parse_symbol_list (&atom_list);
      }
      else 
	atom_list = NULL;
      
      enter_symbol (symbol_name, atom_list);
      free (symbol_name);
      parse_token (';');
    } 
  }
}

/*---------------------------------------------------------------------------*/

GLOBAL void write_symbols (string_t file_name)
/* Write symbol table to file <file_name>. */
{ 
  FILE *stream;
  symbol_header_t symbol_header;

  stream = fopen_save (file_name, "wb");

  /* Set rule file header data. */
  set_header (&symbol_header.common_header, SYMBOL_FILE, SYMBOL_CODE_VERSION);
  symbol_header.symbols_size = pool_items (symbol_pool);
  symbol_header.values_size = pool_items (value_pool);
  symbol_header.strings_size = pool_items (string_pool);

  /* Write header. */
  fwrite_save (&symbol_header, sizeof (symbol_header), 1, stream, file_name); 

  /* Write tables to stream. */
  write_pool (symbol_pool, stream, file_name);
  write_pool (value_pool, stream, file_name);
  write_pool (string_pool, stream, file_name);

  fclose_save (stream, file_name);
}

/*---------------------------------------------------------------------------*/

GLOBAL void init_sym_compiler ()
/* Initialise this module. */
{
  /* Initialise global data. */
  symbol_pool = new_pool (sizeof (symbol_entry_t));
  value_pool = new_pool (sizeof (symbol_t));
  string_pool = new_pool (sizeof (char));
}
  
/*---------------------------------------------------------------------------*/

GLOBAL void terminate_sym_compiler ()
/* Terminate this module. */
{
  /* Free global data. */
  free_pool (symbol_pool);
  free_pool (value_pool);
  free_pool (string_pool);

  free_symbols (symbol_tree);
  symbol_tree = NULL;
}
  
/*---------------------------------------------------------------------------*/

GLOBAL void compile_symbol_file (string_t source_file, 
				 string_t object_file,
				 string_t old_symbol_file)
/* Read symbol file <source_file> and create translated file <object_file>.
 * If <old_symbol_file> != NULL, all symbols from this file are included in
 * the new file. */
{
  init_sym_compiler ();

  if (old_symbol_file != NULL)
  {
    symbol_t symbol;
    
    read_symbol_file (old_symbol_file);
    
    /* Enter the symbols from "old_symbol_file". */
    for (symbol = 0; symbol < number_of_symbols (); symbol++)
      enter_symbol (symbol_name (symbol), atoms_of_symbol (symbol));
    
    /* Forget the old symbol file. */
    free_symbol_table ();
  }
  else
  {
    /* Enter the standard symbols in the same order as in "values.h". */
    enter_symbol ("nil", NULL);
    enter_symbol ("yes", NULL);
    enter_symbol ("no", NULL);
    enter_symbol ("symbol", NULL);
    enter_symbol ("string", NULL);
    enter_symbol ("number", NULL);
    enter_symbol ("list", NULL);
    enter_symbol ("record", NULL);
  }
  
  include_file (source_file, -1); 
  parse_symbols ();
  if (next_token != EOF)
    error ("symbol definition expected");
  
  end_include ();
  
  write_symbols (object_file);
  
  terminate_sym_compiler ();
}
