#!/bin/perl5
# -*- perl -*-

$| = 1;

%aval_regex = ("NAME", "[a-zA-Z].*",
	       "NAMEs", "[a-zA-Z].*",
	       "NUMBER", "[0-9]+",
	       "NUMBERS", "[0-9\s]+"
	       );

$dtd = join('', <>);

$dtd =~ s/[\n\t ]+/ /g;
$dtd =~ s@<!\[[^\[]+\[\s*@@g;
$dtd =~ s@]]>@@g;
#$dtd =~ s@(<!([^<>"]+|"[^"]+")+>)@&nl2sp($1)@ge;
#$dtd =~ s@\n\n+@\n@g;
#$dtd =~ s@  +@ @g;
$dtd =~ s@[ \n]+>@>@g;

while ($dtd =~ m@<!(([^\"<>]|\"[^\"]+\")+)>@g) { #"
    $one = $1;
    #print "ONE `$one'\n";
    0 while $one =~ s/%([a-zA-Z0-9\._-]+);?/$entities{$1}/e;
    #print "ONE `$one'\n";
    0 while $one =~ s@^(([^\"]|\"[^\"]\")*)(--.*?--\s*)@$1 @g;
    #print "ONE `$one'\n";
    ($name, $parms) = $one =~ m@(\S+)\s*(.*)@;
    print "SGML $name = $parms\n";
    @parm = ();
    while ($parms =~ m@\s*(\"([^\"]+)\"|\'([^\']+)\'|(\([^\)]+\)[\+\?\*]?)|([^\'\"<>\(\) ]+))@g) { #"
	$p = $2.$3.$4.$5;
	push(@parm, $p);
	#print "  parm: `$p'\n";
    }

    if ($name eq "ENTITY") {
	if ($parm[0] eq "%") {
	    $parm[2] =~ s/--.*?--//g;
	    $entities{$parm[1]} = $parm[2];
	    #print "\$entities{$parm[1]} = `$parm[2]'\n";;
	}
    }

    if ($name eq "ELEMENT") {
	$name = $parm[0];
	if ($name =~ m@^\(\s*(.*\S)\s*\)@) {
	    @names = split(/\s*\|\s*/, $1);
	} else {
	    @names = $name;
	}
	for $name (@names) {
	    #print "Approved element: <$name>\n";
	    $name =~ tr/A-Z/a-z/;
	    $tags{$name} = 1;
	}
    }

    if ($name eq "ATTLIST") {
	$element = shift @parm;
        $element =~ tr/A-Z/a-z/;
	if ($element =~ m@^\(\s*(.*\S)\s*\)@) {
	    @elements = split(/\s*\|\s*/, $1);
	} else {
	    @elements = $element;
	}
	while ($#parm >= 0) {
	    $attr = shift @parm;
	    $type = shift @parm;
	    $default = shift @parm;
	    if ($default eq "#FIXED") {
		$default .= " ";
		$default .= (shift @parm);
	    }
	    $regex = ".*";
	    if ($type eq "($attr)") {
		$regex = "";
	    } elsif ($type =~ m@^\(@) {
		$regex = $type;
		$regex =~ s/ //g;
	    } elsif ($aval_regex{$type}) {
		$regex = $aval_regex{$type};
	    } else {
		$regex = ".*";
	    }
	    for $element (@elements) {
		#print "Approved attribute: <$element $attr> type `$type' match `$regex'\n";
		$attr =~ tr/A-Z/a-z/;
		$attrvals{"$element.$attr"} = $regex;
	    }
	    #print "end of while\n";
	}
	#print "end of if attlist\n";
    }
    #print "end of dtd parse\n";
}

sub entities {
    local($e) = @_;
    local($v) = $entities{$e};
    print "ENT($e) = `$v'\n";
    return $v;
}

sub nl2sp {
    local($v) = @_;
    $v =~ s/\n/ /g;
    #print "nl2sp( $v )\n";
    return $v;
}

print "\@tags = (\n";
for $t (sort keys %tags) {
    print "\t\"$t\",\n";
}
print "\t);\n\n";

print "%alias = ();\n\n";

print "%attrvals = (\n";
for $k (sort keys %attrvals) {
    print "\t\"$k\", \"$attrvals{$k}\",\n";
}
print "\t);\n\n";

print "1;\n";
