#!/usr/bin/perl

package makeLRSets;

use strict;
use Getopt::Std;
my %Options;
getopts('hf:o:', \%Options);

my $progname = "makeLRSets";
my $usage =<<EOU;
usage: $progname -h -f <cfgfile> -o <fsmfile>

-h prints this message

-f <cfgfile> is a CFG specification in a text format shown below:

start A start B
start

corresponds to the CFG: start -> A start B | empty-string

-o <fsmfile> This program takes as input a file containing a CFG spec
 in the above format and prints out the finite-state machine to
 <fsmfile> based on LR(0) items. This FSM can then be used to build a
 shift/reduce table.

In addition, the program prints out the LR(0) items and the grammar
 rules and other information that was used to create the FSM. This
 file is useful to verify or debug the FSM, but is not directly useful
 in building the shift/reduce table.

EOU

my $cfgfile;
my $fsmfile;

if (defined $Options{'h'}) {
    die $usage;
}

if (defined $Options{'f'}) {
    $cfgfile = $Options{'f'};
}
die $usage if (!defined $cfgfile);

if (defined $Options{'o'}) {
    $fsmfile = $Options{'o'};
}
die $usage if (!defined $fsmfile);

## storing the context-free grammar
## CFG data structure is:
## CFGRule[$ruleNumber] = [ $ruleNumber, $lhs, $rhs ]
## rhs is a reference to a list of symbols

my @CFGrules = ();
my %LHSht = ();
my %token = ();
my %nonTerminal = ();
my $startSym = '';

my $startRuleNumber = 0;
my $augmentedStart = "top";
my $dotString = "\\.";

my $shiftString = "shift";
my $gotoString = "goto";
my $reduceString = "reduce";
my $acceptString = "accept";

## item data structure is:
## $item = [ $ruleNumber, $dotIndex ]
## symbol after dot is:
## $CFGrules[$item->[0]]->[2]->[$item->[1]]

sub readCFG {
    my ($cfgfile) = @_;
    my $fh;
    open($fh, $cfgfile) or die "$progname: could not find $cfgfile\n";
    my $ruleNumber = $startRuleNumber;
    my $line = '';
    while ($line = <$fh>) {
	chomp($line);
	next if length $line == 0;
	my ($lhs, $rhs_str) = split(/\s+/, $line, 2);
	my @rhs = split(/\s+/, $rhs_str);

	if ($ruleNumber == $startRuleNumber) {
	    $CFGrules[$ruleNumber++] = [ 0, $augmentedStart, [ $lhs ] ];
	    $startSym = $lhs;
	}

	my $ruleInfo = [ $ruleNumber, $lhs, \@rhs ];
	$CFGrules[$ruleNumber++] = $ruleInfo;

	# this assumes no duplicates in grammar file
	push( @{ $LHSht{$lhs} } , $ruleInfo); 

	$nonTerminal{$lhs} = 1;
    }
    close($fh);
    map { findTokens($_), 1 } @CFGrules;
}

sub findTokens {
    my ($rule) = @_;
    if (!defined $rule) {
	die "$progname: undefined rule in CFG table\n";
    }
    for my $rhsSymbol (@{$rule->[2]}) {
	if (!defined $nonTerminal{$rhsSymbol}) {
	    $token{$rhsSymbol} = 1;
	}
    }
}

sub printCFG {
    print "= tokens ", join(" ", keys %token), "\n";
    print "= nonterminals ", join(" ", keys %nonTerminal), "\n";
    print "= start $startSym\n";
    map { print "= "; printCFGRule($_), 1 } @CFGrules;
}

sub printCFGRule {
    my ($rule) = @_;
    print CFGRuleToString($rule), "\n";
}

sub CFGRuleToString {
    my ($rule) = @_;
    if (!defined $rule) {
	die "$progname: undefined rule in CFG table\n";
    }
    return "$rule->[0] $rule->[1] @{$rule->[2]}";
}

sub printItem {
    my ($item) = @_;
    my $ruleNumber = $CFGrules[$item->[0]]->[0];
    my $lhs = $CFGrules[$item->[0]]->[1];
    my $rhs = $CFGrules[$item->[0]]->[2];
    my @newrhs = ();
    push(@newrhs, @{$rhs}[0..($item->[1]-1)]);
    push(@newrhs, $dotString);
    push(@newrhs, @{$rhs}[$item->[1]..@{$rhs}]);
    printCFGRule([ $ruleNumber, $lhs, \@newrhs]);
}

sub symAfterDot {
    my ($item) = @_;
    return $CFGrules[$item->[0]]->[2]->[$item->[1]];
}

sub computeClosure {
    my ($item) = @_;
    my %closure = ();
    $closure{"@$item"} = 1;
    my $closureSize = 0; 
    do {
	$closureSize = scalar keys %closure;
	for my $item_str (keys %closure) {
	    my ($rule, $dot) = split(' ', $item_str);
	    my $item = [$rule, $dot];
	    my $sym = symAfterDot($item);
	    if (defined $LHSht{$sym}) {
		for my $rule (@{ $LHSht{$sym} }) {
		    my $newItem = [ $rule->[0], 0 ];
		    $closure{"@$newItem"} = 1;
		}
	    }
	}
    } while ($closureSize < scalar keys %closure);

    my @configSet = ();
    for my $item_str (keys %closure) {
	my ($rule, $dot) = split(' ', $item_str);
	my $item = [$rule, $dot];
	push(@configSet, $item);
    }
    return \@configSet;
}

sub printConfigSet {
    my ($configSet) = @_;
    for my $item (sort { $a->[0] cmp $b->[0] } @$configSet) {
	printItem($item);
    }
}

sub computeSuccessor {
    my ($configSet, $sym) = @_;
    my %memo = ();
    for my $item (@$configSet) {
	if (symAfterDot($item) eq $sym) {
	    my $newItem = [ $item->[0], $item->[1] +1 ];
	    my $closure = computeClosure($newItem);
	    for my $item (@$closure) {
		$memo{"@$item"} = 1;
	    }
	}
    }

    my @newConfigSet = ();
    for my $item_str(keys %memo) {
	my ($rule, $dot) = split(' ', $item_str);
	my $item = [$rule, $dot];
	push(@newConfigSet, $item);
    }
    return \@newConfigSet;
}

sub subsetConfigSets {
    my ($configSet1, $configSet2) = @_;
    my %memo = ();
    for my $item (@$configSet1) {
	$memo{"@$item"} = 1;
    }
    for my $item (@$configSet2) {
	if (!defined $memo{"@$item"}) {
	    return 0;
	}
    }
    return 1;
}

sub equalConfigSets {
    my ($configSet1, $configSet2) = @_;
    return subsetConfigSets( $configSet1, $configSet2) &&
	subsetConfigSets( $configSet2, $configSet1);
}

sub checkSetofItems {
    my ($setofItems, $configSet2) = @_;
    my $i = 0;
    for my $configSet1 (@$setofItems) {
	if (equalConfigSets($configSet1, $configSet2) == 1) {
	    return $i;
	}
	$i++;
    }
    return -1;
}

sub printSetofItems {
    my ($setofItems) = @_;
    my $i=0;
    for my $configSet (@$setofItems) {
	print "% ", $i++, "\n"; 
	printConfigSet($configSet);
    }
}

sub computeSetofItems {
    my $startItem = [$startRuleNumber, 0];
    my @setofItems = ( computeClosure($startItem) );
    my @fsaTable = ();
    my $setofItemsSize;

    do {
	$setofItemsSize = scalar @setofItems;
	my $fromIndex = 0;
	my $toIndex;
	for my $configSet (@setofItems) {
	    for my $tokenSym (keys %token) {
		my $newConfigSet = computeSuccessor($configSet, $tokenSym);
		if ((scalar @$newConfigSet) <= 0) { next; }
		if (($toIndex = checkSetofItems(\@setofItems, $newConfigSet)) == -1) {
		    push(@setofItems, $newConfigSet);
		    $toIndex = (scalar @setofItems) - 1;
		    $fsaTable[$fromIndex][$toIndex]->{"$shiftString $tokenSym"} = 1;
		} else {
		    $fsaTable[$fromIndex][$toIndex]->{"$shiftString $tokenSym"} = 1;
		}
	    }
	    for my $nonTerminalSym (keys %nonTerminal) {
		my $newConfigSet = computeSuccessor($configSet, $nonTerminalSym);
		if ((scalar @$newConfigSet) <= 0) { next; }
		if (($toIndex = checkSetofItems(\@setofItems, $newConfigSet)) == -1) {
		    push(@setofItems, $newConfigSet);
		    $toIndex = (scalar @setofItems) - 1;
		    $fsaTable[$fromIndex][$toIndex]->{"$gotoString $nonTerminalSym"} = 1;
		} else {
		    $fsaTable[$fromIndex][$toIndex]->{"$gotoString $nonTerminalSym"} = 1;
		}
	    }
	    $fromIndex++;
	}
    } while ($setofItemsSize < scalar @setofItems);

    return (\@setofItems, \@fsaTable, $setofItemsSize);
}

sub printfsaTable {
    my ($fsaTable, $setofItemsSize, $file) = @_;
    my $fh;
    open($fh, ">$file") or die "$progname: could not find $file\n";
    for (my $i=0; $i<$setofItemsSize; $i++) {
	for (my $j=0; $j<$setofItemsSize; $j++) {
	    for my $transition (keys %{$fsaTable->[$i]->[$j]}) {
		print $fh "$i $j $transition\n";
	    }
	}
    }
    close($fh);
}

sub printReduce {
    my ($setofItems, $file) = @_;
    my $fh;
    open($fh, ">>$file") or die "$progname: could not find $file\n";
    my $i=0;
    for my $configSet (@$setofItems) {
	for my $item (sort { $a->[0] cmp $b->[0] } @$configSet) {
	    if (!defined symAfterDot($item)) {
		if ($item->[0] == $startRuleNumber) {
		    print $fh "% $i $acceptString $item->[0]\n";
		} else {
		    print $fh "% $i $reduceString $item->[0]\n";
		}
	    }
	}
	$i++;
    }
    close($fh);
}

sub main {
    my ($cfgfile, $fsmfile) = @_;
    readCFG($cfgfile);
    printCFG();
    my ($setofItems, $fsaTable, $sz) = computeSetofItems();
    printSetofItems($setofItems);
    printfsaTable($fsaTable, $sz, $fsmfile);
    printReduce($setofItems, $fsmfile);
}

main($cfgfile, $fsmfile);

1;

