#!/bin/perl5 # -*- perl -*- $| = 1; %aval_regex = ("NAME", "[a-zA-Z].*", "NAMEs", "[a-zA-Z].*", "NUMBER", "[0-9]+", "NUMBERS", "[0-9\s]+" ); $dtd = join('', <>); $dtd =~ s/[\n\t ]+/ /g; $dtd =~ s@@@g; #$dtd =~ s@("]+|"[^"]+")+>)@&nl2sp($1)@ge; #$dtd =~ s@\n\n+@\n@g; #$dtd =~ s@ +@ @g; $dtd =~ s@[ \n]+>@>@g; while ($dtd =~ m@]|\"[^\"]+\")+)>@g) { #" $one = $1; #print "ONE `$one'\n"; 0 while $one =~ s/%([a-zA-Z0-9\._-]+);?/$entities{$1}/e; #print "ONE `$one'\n"; 0 while $one =~ s@^(([^\"]|\"[^\"]\")*)(--.*?--\s*)@$1 @g; #print "ONE `$one'\n"; ($name, $parms) = $one =~ m@(\S+)\s*(.*)@; print "SGML $name = $parms\n"; @parm = (); while ($parms =~ m@\s*(\"([^\"]+)\"|\'([^\']+)\'|(\([^\)]+\)[\+\?\*]?)|([^\'\"<>\(\) ]+))@g) { #" $p = $2.$3.$4.$5; push(@parm, $p); #print " parm: `$p'\n"; } if ($name eq "ENTITY") { if ($parm[0] eq "%") { $parm[2] =~ s/--.*?--//g; $entities{$parm[1]} = $parm[2]; #print "\$entities{$parm[1]} = `$parm[2]'\n";; } } if ($name eq "ELEMENT") { $name = $parm[0]; if ($name =~ m@^\(\s*(.*\S)\s*\)@) { @names = split(/\s*\|\s*/, $1); } else { @names = $name; } for $name (@names) { #print "Approved element: <$name>\n"; $name =~ tr/A-Z/a-z/; $tags{$name} = 1; } } if ($name eq "ATTLIST") { $element = shift @parm; $element =~ tr/A-Z/a-z/; if ($element =~ m@^\(\s*(.*\S)\s*\)@) { @elements = split(/\s*\|\s*/, $1); } else { @elements = $element; } while ($#parm >= 0) { $attr = shift @parm; $type = shift @parm; $default = shift @parm; if ($default eq "#FIXED") { $default .= " "; $default .= (shift @parm); } $regex = ".*"; if ($type eq "($attr)") { $regex = ""; } elsif ($type =~ m@^\(@) { $regex = $type; $regex =~ s/ //g; } elsif ($aval_regex{$type}) { $regex = $aval_regex{$type}; } else { $regex = ".*"; } for $element (@elements) { #print "Approved attribute: <$element $attr> type `$type' match `$regex'\n"; $attr =~ tr/A-Z/a-z/; $attrvals{"$element.$attr"} = $regex; } #print "end of while\n"; } #print "end of if attlist\n"; } #print "end of dtd parse\n"; } sub entities { local($e) = @_; local($v) = $entities{$e}; print "ENT($e) = `$v'\n"; return $v; } sub nl2sp { local($v) = @_; $v =~ s/\n/ /g; #print "nl2sp( $v )\n"; return $v; } print "\@tags = (\n"; for $t (sort keys %tags) { print "\t\"$t\",\n"; } print "\t);\n\n"; print "%alias = ();\n\n"; print "%attrvals = (\n"; for $k (sort keys %attrvals) { print "\t\"$k\", \"$attrvals{$k}\",\n"; } print "\t);\n\n"; print "1;\n";