Commit d22d556b authored by Alexandru Dura's avatar Alexandru Dura
Browse files

Trivial production remover

Intended to simplify the SPPF by removing the production that have
no semantic action associated to them.
parent fd6b0f25
package se.lth.sep;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import se.lth.sep.SPPFNode.FamilyNode;
public class SPPFTrivialProductionRemover implements SPPFNodeVisitor {
private HashSet<SPPFNode> visitedNodes = new HashSet<>();
private HashSet<SPPFNode.FamilyNode> visitedFamNodes = new HashSet<>();
private Grammar grammar;
@Override
public void visit(FamilyNode familyNode) {
if (!visitedFamNodes.add(familyNode)) {
// node already visited
return;
}
for (int i = 0; i < familyNode.getNumChildren(); ++i)
familyNode.getChild(i).accept(this);
}
public SPPFTrivialProductionRemover(Grammar grammar) {
this.grammar = grammar;
}
@Override
public void visit(SPPFNode n) {
if (!visitedNodes.add(n)) {
// node already visited
return;
}
Set<SPPFNode.FamilyNode> children = n.getChildren();
HashSet<SPPFNode.FamilyNode> newChildren = new HashSet<>();
HashSet<SPPFNode.FamilyNode> childrenToRemove = new HashSet<>();
for (SPPFNode.FamilyNode f : children) {
f.accept(this);
if (f.getNumChildren() == 0)
continue;
for (int i = 0; i < f.getNumChildren(); ++i) {
SPPFNode ruleNode = f.getChild(i);
assert ruleNode.getLabel() instanceof SymbolLabel;
Category head = ((SymbolLabel)ruleNode.getLabel()).getSymbol(grammar);
boolean allProductionAreTrivial = true;
for (SPPFNode.FamilyNode g : ruleNode.getChildren()) {
if (g.getNumChildren() != 1) {
allProductionAreTrivial = false;
break;
}
Category body = ((SymbolLabel)g.getChild(0).getLabel()).getSymbol(grammar);
if (!isTrivialProduction(head, body)) {
allProductionAreTrivial = false;
break;
}
}
if (!allProductionAreTrivial)
continue;
for (SPPFNode.FamilyNode g : ruleNode.getChildren()) {
SPPFNode[] childArray = new SPPFNode[f.getNumChildren()];
for (int j = 0; j < f.getNumChildren(); ++j) {
if (i == j) {
childArray[j] = g.getChild(0);
} else {
childArray[j] = f.getChild(j);
}
}
newChildren.add(new SPPFNode.FamilyNode(childArray));
childrenToRemove.add(f);
}
}
}
children.removeAll(childrenToRemove);
children.addAll(newChildren);
}
public boolean isTrivialProduction(Category head, Category body) {
return false;
}
}
......@@ -176,6 +176,29 @@ public class EarleyParserTest {
Java14Grammar.t_SEMICOLON};
assertTrue(parser.recognize(str, Java14Grammar.n_statement));
SPPFNode root = parser.parse(str, Java14Grammar.n_statement);
assertNotNull(root);
Util.dumpParseResult("testJava1-bt.dot", root, g);
SPPFDebinarizeVisitor dbv = new SPPFDebinarizeVisitor();
dbv.visit(root);
Util.dumpParseResult("testJava1.dot", root, g);
// remove trivial productions
SPPFTrivialProductionRemover tpr = new SPPFTrivialProductionRemover(g) {
@Override public boolean isTrivialProduction(Category head, Category body) {
String sig = head.getName() + "." + body.getName();
if (body.getName().equals("METAVARID"))
return true;
if (body.getName().equals("GAP"))
return true;
if (Java14Grammar.isTrivialRule(sig))
return true;
return false;
}
};
tpr.visit(root);
Util.dumpParseResult("testJava1-notr.dot", root, g);
}
@Test public void testJava2() {
......@@ -200,8 +223,70 @@ public class EarleyParserTest {
SPPFNode root = parser.parse(str, Java14Grammar.n_class_declaration);
assertNotNull(root);
Util.dumpParseResult("testJava2-bt.dot", root, g);
// debinarize
SPPFDebinarizeVisitor dbv = new SPPFDebinarizeVisitor();
dbv.visit(root);
Util.dumpParseResult("testJava2.dot", root, g);
// remove trivial productions
SPPFTrivialProductionRemover tpr = new SPPFTrivialProductionRemover(g) {
@Override public boolean isTrivialProduction(Category head, Category body) {
String sig = head.getName() + "." + body.getName();
if (body.getName().equals("METAVARID"))
return true;
if (body.getName().equals("GAP"))
return true;
if (Java14Grammar.isTrivialRule(sig))
return true;
return false;
}
};
tpr.visit(root);
Util.dumpParseResult("testJava2-notr.dot", root, g);
}
@Test public void testJava3() {
Grammar g = new Grammar();
Java14Grammar.addRules(g);
EarleyParser parser = new EarleyParser(g);
/* `a = `b.`c + `e.`f.`g() ; . */
Category str[] = {Java14Grammar.t_METAVARID,
Java14Grammar.t_EQ,
Java14Grammar.t_METAVARID,
Java14Grammar.t_DOT,
Java14Grammar.t_METAVARID,
Java14Grammar.t_PLUS,
Java14Grammar.t_METAVARID,
Java14Grammar.t_DOT,
Java14Grammar.t_METAVARID,
Java14Grammar.t_DOT,
Java14Grammar.t_METAVARID,
Java14Grammar.t_LPAREN,
Java14Grammar.t_RPAREN,
Java14Grammar.t_SEMICOLON};
assertTrue(parser.recognize(str, Java14Grammar.n_statement));
SPPFNode root = parser.parse(str, Java14Grammar.n_statement);
assertNotNull(root);
Util.dumpParseResult("testJava3-bt.dot", root, g);
SPPFDebinarizeVisitor dbv = new SPPFDebinarizeVisitor();
dbv.visit(root);
Util.dumpParseResult("testJava3.dot", root, g);
// remove trivial productions
SPPFTrivialProductionRemover tpr = new SPPFTrivialProductionRemover(g) {
@Override public boolean isTrivialProduction(Category head, Category body) {
String sig = head.getName() + "." + body.getName();
if (body.getName().equals("METAVARID"))
return true;
if (body.getName().equals("GAP"))
return true;
if (Java14Grammar.isTrivialRule(sig))
return true;
return false;
}
};
tpr.visit(root);
Util.dumpParseResult("testJava3-notr.dot", root, g);
}
}
......@@ -917,4 +917,175 @@ class Java14Grammar {
g.addRule(new Rule(n_META_OR_CONCRETE_wrapped_name, n_META_OR_CONCRETE_NO_GAP_wrapped_name));
g.addRule(new Rule(n_META_OR_CONCRETE_wrapped_name, t_GAP));
}
public static boolean isTrivialRule(final String signature) {
switch (signature) {
case "class_body_declaration.constructor_declaration": return true;
case "class_body_declaration.static_initializer": return true;
case "class_body_declaration.instance_initializer": return true;
case "class_body_declaration.class_member_declaration": return true;
case "statement.for_statement": return true;
case "statement.while_statement": return true;
case "statement.if_then_else_statement": return true;
case "statement.if_then_statement": return true;
case "statement.labeled_statement": return true;
case "statement.statement_without_trailing_substatement": return true;
case "goal.compilation_unit": return true;
case "type.reference_type": return true;
case "type.primitive_type": return true;
case "primitive_type.numeric_type": return true;
case "numeric_type.META_OR_CONCRETE_floating_point_type": return true;
case "numeric_type.integral_type": return true;
case "reference_type.array_type": return true;
case "reference_type.class_or_interface_type": return true;
case "class_or_interface_type.name": return true;
case "class_type.class_or_interface_type": return true;
case "interface_type.class_or_interface_type": return true;
case "name.qualified_name": return true;
case "name.META_OR_CONCRETE_simple_name": return true;
case "doc_comment_list.doc_comment": return true;
case "package_with_comment.META_OR_CONCRETE_package_declaration": return true;
case "package_with_comment.doc_comment_list.META_OR_CONCRETE_package_declaration": return true;
case "name_decl.META_OR_CONCRETE_qualified_name_decl": return true;
case "name_decl.simple_name_decl": return true;
case "import_with_comment.import_declaration": return true;
case "import_with_comment.doc_comment_list.import_declaration": return true;
case "import_declaration.META_OR_CONCRETE_type_import_on_demand_declaration": return true;
case "import_declaration.single_type_import_declaration": return true;
case "type_with_comment.type_declaration": return true;
case "type_declaration.META_OR_CONCRETE_interface_declaration": return true;
case "type_declaration.META_OR_CONCRETE_class_declaration": return true;
case "body_decl_with_comment.class_body_declaration": return true;
case "class_member_declaration.method_declaration": return true;
case "class_member_declaration.META_OR_CONCRETE_field_declaration": return true;
case "field_declarator.META_OR_CONCRETE_field_declarator_id": return true;
case "variable_initializer.META_OR_CONCRETE_array_initializer": return true;
case "variable_initializer.expression": return true;
case "interface_member_with_comment.interface_member_declaration": return true;
case "interface_member_declaration.abstract_method_declaration": return true;
case "interface_member_declaration.constant_declaration": return true;
case "constant_declaration.META_OR_CONCRETE_field_declaration": return true;
case "block_statement.statement": return true;
case "block_statement.local_variable_declaration_statement": return true;
case "variable_declarator.META_OR_CONCRETE_variable_declarator_id": return true;
case "statement_without_trailing_substatement.assert_statement": return true;
case "statement_without_trailing_substatement.try_statement": return true;
case "statement_without_trailing_substatement.throw_statement": return true;
case "statement_without_trailing_substatement.synchronized_statement": return true;
case "statement_without_trailing_substatement.return_statement": return true;
case "statement_without_trailing_substatement.continue_statement": return true;
case "statement_without_trailing_substatement.break_statement": return true;
case "statement_without_trailing_substatement.do_statement": return true;
case "statement_without_trailing_substatement.switch_statement": return true;
case "statement_without_trailing_substatement.expression_statement": return true;
case "statement_without_trailing_substatement.empty_statement": return true;
case "statement_without_trailing_substatement.META_OR_CONCRETE_block": return true;
case "statement_no_short_if.for_statement_no_short_if": return true;
case "statement_no_short_if.while_statement_no_short_if": return true;
case "statement_no_short_if.if_then_else_statement_no_short_if": return true;
case "statement_no_short_if.labeled_statement_no_short_if": return true;
case "statement_no_short_if.statement_without_trailing_substatement": return true;
case "for_init.statement_expression_list": return true;
case "for_update.statement_expression_list": return true;
case "primary.array_creation_uninit": return true;
case "primary.array_creation_init": return true;
case "primary.primary_no_new_array": return true;
case "primary_no_new_array.array_access": return true;
case "primary_no_new_array.META_OR_CONCRETE_method_invocation": return true;
case "primary_no_new_array.field_access": return true;
case "primary_no_new_array.META_OR_CONCRETE_class_instance_creation_expression": return true;
case "primary_no_new_array.literal": return true;
case "postfix_expression.META_OR_CONCRETE_postdecrement_expression": return true;
case "postfix_expression.META_OR_CONCRETE_postincrement_expression": return true;
case "postfix_expression.name": return true;
case "postfix_expression.primary": return true;
case "unary_expression.unary_expression_not_plus_minus": return true;
case "unary_expression.META_OR_CONCRETE_predecrement_expression": return true;
case "unary_expression.META_OR_CONCRETE_preincrement_expression": return true;
case "unary_expression_not_plus_minus.cast_expression": return true;
case "unary_expression_not_plus_minus.postfix_expression": return true;
case "multiplicative_expression.unary_expression": return true;
case "additive_expression.multiplicative_expression": return true;
case "shift_expression.additive_expression": return true;
case "relational_expression.shift_expression": return true;
case "equality_expression.relational_expression": return true;
case "and_expression.equality_expression": return true;
case "exclusive_or_expression.and_expression": return true;
case "inclusive_or_expression.exclusive_or_expression": return true;
case "conditional_and_expression.inclusive_or_expression": return true;
case "conditional_or_expression.conditional_and_expression": return true;
case "conditional_expression.conditional_or_expression": return true;
case "assignment_expression.META_OR_CONCRETE_assignment": return true;
case "assignment_expression.conditional_expression": return true;
case "expression.assignment_expression": return true;
case "constant_expression.expression": return true;
case "META_OR_CONCRETE_NO_GAP_array_initializer.array_initializer": return true;
case "META_OR_CONCRETE_array_initializer.META_OR_CONCRETE_NO_GAP_array_initializer": return true;
case "META_OR_CONCRETE_NO_GAP_assignment.assignment": return true;
case "META_OR_CONCRETE_assignment.META_OR_CONCRETE_NO_GAP_assignment": return true;
case "META_OR_CONCRETE_NO_GAP_block.block": return true;
case "META_OR_CONCRETE_block.META_OR_CONCRETE_NO_GAP_block": return true;
case "META_OR_CONCRETE_NO_GAP_catch_clause.catch_clause": return true;
case "META_OR_CONCRETE_catch_clause.META_OR_CONCRETE_NO_GAP_catch_clause": return true;
case "META_OR_CONCRETE_NO_GAP_class_declaration.class_declaration": return true;
case "META_OR_CONCRETE_class_declaration.META_OR_CONCRETE_NO_GAP_class_declaration": return true;
case "META_OR_CONCRETE_NO_GAP_class_instance_creation_expression.class_instance_creation_expression": return true;
case "META_OR_CONCRETE_class_instance_creation_expression.META_OR_CONCRETE_NO_GAP_class_instance_creation_expression": return true;
case "META_OR_CONCRETE_NO_GAP_class_literal.class_literal": return true;
case "META_OR_CONCRETE_class_literal.META_OR_CONCRETE_NO_GAP_class_literal": return true;
case "META_OR_CONCRETE_NO_GAP_dim_expr.dim_expr": return true;
case "META_OR_CONCRETE_dim_expr.META_OR_CONCRETE_NO_GAP_dim_expr": return true;
case "META_OR_CONCRETE_NO_GAP_explicit_constructor_invocation.explicit_constructor_invocation": return true;
case "META_OR_CONCRETE_explicit_constructor_invocation.META_OR_CONCRETE_NO_GAP_explicit_constructor_invocation": return true;
case "META_OR_CONCRETE_NO_GAP_field_declaration.field_declaration": return true;
case "META_OR_CONCRETE_field_declaration.META_OR_CONCRETE_NO_GAP_field_declaration": return true;
case "META_OR_CONCRETE_NO_GAP_field_declarator_id.field_declarator_id": return true;
case "META_OR_CONCRETE_field_declarator_id.META_OR_CONCRETE_NO_GAP_field_declarator_id": return true;
case "META_OR_CONCRETE_NO_GAP_finally.finally": return true;
case "META_OR_CONCRETE_finally.META_OR_CONCRETE_NO_GAP_finally": return true;
case "META_OR_CONCRETE_NO_GAP_floating_point_type.floating_point_type": return true;
case "META_OR_CONCRETE_floating_point_type.META_OR_CONCRETE_NO_GAP_floating_point_type": return true;
case "META_OR_CONCRETE_NO_GAP_formal_parameter.formal_parameter": return true;
case "META_OR_CONCRETE_formal_parameter.META_OR_CONCRETE_NO_GAP_formal_parameter": return true;
case "META_OR_CONCRETE_NO_GAP_interface_declaration.interface_declaration": return true;
case "META_OR_CONCRETE_interface_declaration.META_OR_CONCRETE_NO_GAP_interface_declaration": return true;
case "META_OR_CONCRETE_NO_GAP_local_variable_declaration.local_variable_declaration": return true;
case "META_OR_CONCRETE_local_variable_declaration.META_OR_CONCRETE_NO_GAP_local_variable_declaration": return true;
case "META_OR_CONCRETE_NO_GAP_method_header.method_header": return true;
case "META_OR_CONCRETE_method_header.META_OR_CONCRETE_NO_GAP_method_header": return true;
case "META_OR_CONCRETE_NO_GAP_method_invocation.method_invocation": return true;
case "META_OR_CONCRETE_method_invocation.META_OR_CONCRETE_NO_GAP_method_invocation": return true;
case "META_OR_CONCRETE_NO_GAP_modifier.modifier": return true;
case "META_OR_CONCRETE_modifier.META_OR_CONCRETE_NO_GAP_modifier": return true;
case "META_OR_CONCRETE_NO_GAP_package_declaration.package_declaration": return true;
case "META_OR_CONCRETE_package_declaration.META_OR_CONCRETE_NO_GAP_package_declaration": return true;
case "META_OR_CONCRETE_NO_GAP_postdecrement_expression.postdecrement_expression": return true;
case "META_OR_CONCRETE_postdecrement_expression.META_OR_CONCRETE_NO_GAP_postdecrement_expression": return true;
case "META_OR_CONCRETE_NO_GAP_postincrement_expression.postincrement_expression": return true;
case "META_OR_CONCRETE_postincrement_expression.META_OR_CONCRETE_NO_GAP_postincrement_expression": return true;
case "META_OR_CONCRETE_NO_GAP_predecrement_expression.predecrement_expression": return true;
case "META_OR_CONCRETE_predecrement_expression.META_OR_CONCRETE_NO_GAP_predecrement_expression": return true;
case "META_OR_CONCRETE_NO_GAP_preincrement_expression.preincrement_expression": return true;
case "META_OR_CONCRETE_preincrement_expression.META_OR_CONCRETE_NO_GAP_preincrement_expression": return true;
case "META_OR_CONCRETE_NO_GAP_qualified_name_decl.qualified_name_decl": return true;
case "META_OR_CONCRETE_qualified_name_decl.META_OR_CONCRETE_NO_GAP_qualified_name_decl": return true;
case "META_OR_CONCRETE_NO_GAP_simple_name.simple_name": return true;
case "META_OR_CONCRETE_simple_name.META_OR_CONCRETE_NO_GAP_simple_name": return true;
case "META_OR_CONCRETE_NO_GAP_statement_expression.statement_expression": return true;
case "META_OR_CONCRETE_statement_expression.META_OR_CONCRETE_NO_GAP_statement_expression": return true;
case "META_OR_CONCRETE_NO_GAP_super.super": return true;
case "META_OR_CONCRETE_super.META_OR_CONCRETE_NO_GAP_super": return true;
case "META_OR_CONCRETE_NO_GAP_switch_block.switch_block": return true;
case "META_OR_CONCRETE_switch_block.META_OR_CONCRETE_NO_GAP_switch_block": return true;
case "META_OR_CONCRETE_NO_GAP_switch_label.switch_label": return true;
case "META_OR_CONCRETE_switch_label.META_OR_CONCRETE_NO_GAP_switch_label": return true;
case "META_OR_CONCRETE_NO_GAP_type_import_on_demand_declaration.type_import_on_demand_declaration": return true;
case "META_OR_CONCRETE_type_import_on_demand_declaration.META_OR_CONCRETE_NO_GAP_type_import_on_demand_declaration": return true;
case "META_OR_CONCRETE_NO_GAP_variable_declarator_id.variable_declarator_id": return true;
case "META_OR_CONCRETE_variable_declarator_id.META_OR_CONCRETE_NO_GAP_variable_declarator_id": return true;
case "META_OR_CONCRETE_NO_GAP_wrapped_name.wrapped_name": return true;
case "META_OR_CONCRETE_wrapped_name.META_OR_CONCRETE_NO_GAP_wrapped_name": return true;
default: return false;
}
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment