Commit 9f45ed65 authored by Alexandru Dura's avatar Alexandru Dura
Browse files

WIP: Implement "SPPF-Style Parsing From Earley Recognizers"

parent 1663fe8e
public class DottedRule {
EarleyRule r;
int dot;
public DottedRule(EarleyRule r, int dot) {
assert r != null;
this.r = r;
this.dot = dot;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + dot;
result = prime * result + ((r == null) ? 0 : r.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
DottedRule other = (DottedRule) obj;
if (dot != other.dot)
return false;
if (!r.equals(other.r))
return false;
return true;
}
}
......@@ -2,11 +2,25 @@ public class EarleyItem {
int dot; // 0 means before the first element in the rule
final int start; // 0 means beginning of input
final EarleyRule rule;
SPPFNode sppf;
public EarleyItem(EarleyRule rule, int start) {
this.dot = 0;
this.start = start;
this.rule = rule;
this.sppf = null;
}
public void setSPPF(SPPFNode n) {
sppf = n;
}
public DottedRule getDottedRule() {
return new DottedRule(rule, dot);
}
public SPPFNode getSPPF() {
return sppf;
}
public int afterDot() {
......@@ -30,10 +44,10 @@ public class EarleyItem {
return false;
EarleyItem e = (EarleyItem) other;
return dot == e.dot && start == e.start
&& rule == e.rule; // reference equality here!
&& rule == e.rule && sppf == e.sppf; // reference equality here!
}
@Override public int hashCode() {
return (rule.hashCode() + (dot * 31)) * 31 + start;
return ((rule.hashCode() + (dot * 31)) * 31 + start) * 31; // + (sppf == null ? 0 : sppf.hashCode());
}
}
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.TreeSet;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
public class EarleyParser {
......@@ -109,6 +111,24 @@ public class EarleyParser {
}
class StateSet extends HashSet<EarleyItem> {
public StateSet(Collection<EarleyItem> c) {
super(c);
}
public StateSet() {
super();
}
public EarleyItem pickOne() {
Iterator<EarleyItem> it = iterator();
if (it.hasNext()) {
EarleyItem item = it.next();
// the following is optional, let's see if it works
it.remove();
return item;
}
return null;
}
}
/**
......@@ -207,8 +227,125 @@ public class EarleyParser {
int start = cat2int.get(startSymbol);
StateSet[] state = internalParse(symbols, start);
}
private boolean internalParseScott(int[] symbols, int startSymbol) {
StateSet[] state = new StateSet[symbols.length + 1];
state[0] = new StateSet();
StateSet Q_next = new StateSet();
HashMap<NodeLabel, SPPFNode> V = new HashMap<>();
for (EarleyRule r : rules.get(startSymbol)) {
state[0].add(new EarleyItem(r, 0));
if (!r.isEmpty() && r.body[0] == symbols[0]) {
Q_next.add(new EarleyItem(r, 0));
}
}
for (int i = 0; i < symbols.length; ++i) {
HashMap<Integer, SPPFNode> H = new HashMap<>();
StateSet R = new StateSet(state[i]); // worklist
StateSet Q = Q_next;
Q_next = new StateSet();
while (!R.isEmpty()) {
// for hash sets this is not deterministic, this may be a problem...
EarleyItem Lambda = R.pickOne();
if (!Lambda.isComplete() && !isTerminal(Lambda.afterDot())) { // 1
for (EarleyRule r : rules.get(Lambda.afterDot())) { // 1.1
EarleyItem C = new EarleyItem(r, i);
if (r.startsWithNonTerminal()) { // 1.1.1
if (state[i].add(C)) {
R.add(C);
}
}
if (r.body[0] == symbols[i]) { // 1.1.2
assert !r.startsWithNonTerminal();
Q.add(C);
}
}
SPPFNode v = H.get(Lambda.afterDot()); // TODO: check that we never insert null
if (v != null) { // 1.2
EarleyItem LambdaNext = Lambda.advance();
SPPFNode y = makeNode(LambdaNext.getDottedRule(), LambdaNext.start, i, Lambda.getSPPF(), v, V);
LambdaNext.setSPPF(y);
if (LambdaNext.isComplete() || !isTerminal(LambdaNext.afterDot())) { // 1.2.1
if (state[i].add(LambdaNext)) { // 1.2.1
R.add(LambdaNext);
}
} else if (LambdaNext.afterDot() == symbols[i + 1]) { // 1.2.2
Q.add(LambdaNext);
}
}
}
if (Lambda.isComplete()) { // 2
if (Lambda.getSPPF() == null) { // 2.1
NodeLabel vLabel = new SymbolLabel(Lambda.rule.head, i, i);
SPPFNode v;
if (V.containsKey(vLabel)) { // 2.1.1
v = V.get(vLabel);
} else {
v = new SPPFNode(vLabel);
V.put(vLabel, v);
}
Lambda.setSPPF(v);
// TODO: if w does not have family (eps) add one? 2.1.2
}
if (Lambda.start == i) { // 2.2
H.put(Lambda.rule.head, Lambda.getSPPF());
}
for (EarleyItem item : state[Lambda.start]) { // 2.3
EarleyItem itemNext = item.advance();
SPPFNode y = makeNode(itemNext.getDottedRule(), itemNext.start, i, item.getSPPF(), Lambda.getSPPF(), V);
EarleyItem newItem = new EarleyItem(itemNext.rule, itemNext.start);
newItem.setSPPF(y);
if (itemNext.isComplete() || !isTerminal(itemNext.afterDot())) { // 2.3.1
if (state[i].add(newItem)) { // 2.3.1
R.add(newItem);
}
} else if (itemNext.afterDot() == symbols[i + 1]) { // 2.3.2
Q.add(newItem);
}
}
}
}
V.clear();
SPPFNode v = new SPPFNode(new SymbolLabel(symbols[i + 1], i, i + 1));
while (!Q.isEmpty()) { // 3
EarleyItem Lambda = Q.pickOne();
assert Lambda.afterDot() == symbols[i + 1];
EarleyItem LambdaNext = Lambda.advance();
SPPFNode y = makeNode(LambdaNext.getDottedRule(), LambdaNext.start, i + 1, Lambda.getSPPF(), v, V);
EarleyItem newItem = new EarleyItem(LambdaNext.rule, LambdaNext.start);
newItem.setSPPF(y);
if (LambdaNext.isComplete() || !isTerminal(LambdaNext.afterDot())) { // 3.1
state[i + 1].add(newItem);
} else if (i + 2 < symbols.length && LambdaNext.afterDot() == symbols[i + 2]) { // 3.2
Q_next.add(newItem);
}
}
}
StateSet finalState = state[symbols.length];
for (EarleyItem item : finalState) {
if (item.isComplete() && item.start == 0 && item.rule.head == startSymbol) {
return true;
}
}
return false;
}
private SPPFNode makeNode(DottedRule dottedRule, int start, int i, SPPFNode sppf, SPPFNode sppf2,
HashMap<NodeLabel, SPPFNode> v) {
return null;
}
}
......@@ -7,6 +7,14 @@ public class EarleyRule implements Comparable<EarleyRule> {
this.body = body;
}
public boolean isEmpty() {
return body.length == 0;
}
public boolean startsWithNonTerminal() {
return isEmpty() || !EarleyParser.isTerminal(body[0]);
}
@Override
public int compareTo(EarleyRule other) {
for (int i = 0; i < Math.min(this.body.length, other.body.length); ++i) {
......
public class NodeLabel {
int start, end;
protected NodeLabel(int start, int end) {
this.start = start;
this.end = end;
}
@Override public boolean equals(Object other) {
if (!(other instanceof NodeLabel))
return false;
NodeLabel o = (NodeLabel) other;
return o.start == start && o.end == end;
}
@Override public int hashCode() {
return 31 * start + end;
}
}
class ItemLabel extends NodeLabel {
DottedRule item;
public ItemLabel(DottedRule item, int start, int end) {
super(start, end);
this.item = item;
}
@Override public boolean equals(Object other) {
if (!super.equals(other))
return false;
return (other instanceof ItemLabel) &&
item.equals(((ItemLabel)other).item);
}
@Override public int hashCode() {
return super.hashCode() * 31 + item.hashCode();
}
}
class SymbolLabel extends NodeLabel {
int symbol;
public SymbolLabel(int symbol, int start, int end) {
super(start, end);
this.symbol = symbol;
}
@Override public boolean equals(Object other) {
if (!super.equals(other))
return false;
return (other instanceof SymbolLabel) &&
((SymbolLabel)other).symbol == symbol;
}
@Override public int hashCode() {
return super.hashCode() * 31 + symbol;
}
}
import java.util.ArrayList;
import java.util.List;
public class SPPFNode {
private List<SPPFNode> children = new ArrayList<SPPFNode>();
private NodeLabel label;
public SPPFNode() {
label = null;
}
public SPPFNode(NodeLabel label) {
this.label = label;
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment