Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Alexandru Dura
SPPF Earley Parser
Commits
8f32b7fe
Commit
8f32b7fe
authored
Jan 26, 2020
by
Alexandru Dura
Browse files
WIP: parser in infinite loop
parent
9f45ed65
Changes
3
Hide whitespace changes
Inline
Side-by-side
src/main/java/DottedRule.java
View file @
8f32b7fe
public
class
DottedRule
{
EarleyRule
r
;
int
dot
;
final
EarleyRule
r
;
final
int
dot
;
public
DottedRule
(
EarleyRule
r
,
int
dot
)
{
assert
r
!=
null
;
...
...
@@ -8,6 +8,19 @@ public class DottedRule {
this
.
dot
=
dot
;
}
public
DottedRule
advance
()
{
return
new
DottedRule
(
r
,
dot
+
1
);
}
public
boolean
isComplete
()
{
return
r
.
body
.
length
==
dot
;
}
public
int
afterDot
()
{
assert
!
isComplete
();
return
r
.
body
[
dot
];
}
@Override
public
int
hashCode
()
{
final
int
prime
=
31
;
...
...
src/main/java/EarleyItem.java
View file @
8f32b7fe
public
class
EarleyItem
{
int
dot
;
// 0 means before the first element in the rule
final
int
start
;
// 0 means beginning of input
final
Earley
Rule
rule
;
final
Dotted
Rule
rule
;
SPPFNode
sppf
;
public
EarleyItem
(
EarleyRule
rule
,
int
start
)
{
this
.
dot
=
0
;
this
.
start
=
start
;
this
.
rule
=
rule
;
this
.
rule
=
new
DottedRule
(
rule
,
0
)
;
this
.
sppf
=
null
;
}
public
EarleyItem
(
DottedRule
rule
,
int
start
)
{
this
.
start
=
start
;
this
.
rule
=
rule
;
}
public
void
setSPPF
(
SPPFNode
n
)
{
sppf
=
n
;
}
public
DottedRule
getDottedRule
()
{
return
new
DottedRule
(
rule
,
dot
)
;
return
rule
;
}
public
SPPFNode
getSPPF
()
{
...
...
@@ -24,30 +27,26 @@ public class EarleyItem {
}
public
int
afterDot
()
{
assert
!
isComplete
();
return
rule
.
body
[
dot
];
return
rule
.
afterDot
();
}
public
boolean
isComplete
()
{
return
this
.
rule
.
body
.
length
==
this
.
dot
;
return
rule
.
isComplete
()
;
}
public
EarleyItem
advance
()
{
assert
!
isComplete
();
EarleyItem
ret
=
new
EarleyItem
(
rule
,
start
);
ret
.
dot
=
dot
+
1
;
return
ret
;
return
new
EarleyItem
(
rule
.
advance
(),
start
);
}
@Override
public
boolean
equals
(
Object
other
)
{
if
(!(
other
instanceof
EarleyItem
))
return
false
;
EarleyItem
e
=
(
EarleyItem
)
other
;
return
dot
==
e
.
dot
&&
start
==
e
.
start
&&
rule
==
e
.
rule
&&
sppf
==
e
.
sppf
;
// reference equality here!
return
start
==
e
.
start
&&
rule
.
equals
(
e
.
rule
)
&&
sppf
==
e
.
sppf
;
// reference equality here!
}
@Override
public
int
hashCode
()
{
return
(
(
rule
.
hashCode
()
+
(
dot
*
31
))
*
31
+
start
)
*
31
;
//
+ (sppf == null ? 0 : sppf.hashCode());
return
(
rule
.
hashCode
()
*
31
+
start
)
*
31
+
(
sppf
==
null
?
0
:
sppf
.
hashCode
());
}
}
src/main/java/EarleyParser.java
View file @
8f32b7fe
import
javax.swing.plaf.nimbus.State
;
import
java.util.ArrayList
;
import
java.util.Collection
;
import
java.util.HashMap
;
...
...
@@ -80,16 +81,16 @@ public class EarleyParser {
}
private
String
asString
(
EarleyItem
item
)
{
String
s
=
int2cat
.
get
(
item
.
rule
.
head
).
toString
()
+
" -> "
;
for
(
int
j
=
0
;
j
<
item
.
rule
.
body
.
length
;
++
j
)
{
if
(
j
==
item
.
dot
)
{
String
s
=
int2cat
.
get
(
item
.
rule
.
r
.
head
).
toString
()
+
" -> "
;
for
(
int
j
=
0
;
j
<
item
.
rule
.
r
.
body
.
length
;
++
j
)
{
if
(
j
==
item
.
rule
.
dot
)
{
s
+=
"\u2022 "
;
}
int
symbol
=
item
.
rule
.
body
[
j
];
int
symbol
=
item
.
rule
.
r
.
body
[
j
];
s
+=
int2cat
.
get
(
symbol
).
toString
()
+
" "
;
}
if
(
item
.
dot
==
item
.
rule
.
body
.
length
)
{
if
(
item
.
rule
.
dot
==
item
.
rule
.
r
.
body
.
length
)
{
s
+=
"\u2022"
;
}
...
...
@@ -156,7 +157,7 @@ public class EarleyParser {
// We can improve this by storing the set as a tree set, which would give
// a complexity of O(log(n_items)) for this iteration and also for insertion.
for
(
EarleyItem
jtem
:
state
[
item
.
start
])
{
if
(!
jtem
.
isComplete
()
&&
jtem
.
afterDot
()
==
item
.
rule
.
head
)
{
if
(!
jtem
.
isComplete
()
&&
jtem
.
afterDot
()
==
item
.
rule
.
r
.
head
)
{
EarleyItem
newItem
=
jtem
.
advance
();
if
(
currentSet
.
add
(
newItem
))
{
worklist
.
addLast
(
newItem
);
...
...
@@ -196,7 +197,7 @@ public class EarleyParser {
symbols
[
s
.
length
]
=
0
;
int
start
=
cat2int
.
get
(
startSymbol
);
StateSet
[]
state
=
internalParse
(
symbols
,
start
);
StateSet
[]
state
=
internalParse
Scott
(
symbols
,
start
);
if
(
DEBUG
)
{
for
(
int
i
=
0
;
i
<
s
.
length
+
1
;
++
i
)
{
...
...
@@ -211,7 +212,7 @@ public class EarleyParser {
StateSet
finalState
=
state
[
s
.
length
];
System
.
out
.
println
(
"==========================="
);
for
(
EarleyItem
item
:
finalState
)
{
if
(
item
.
isComplete
()
&&
item
.
start
==
0
&&
item
.
rule
.
head
==
start
)
{
if
(
item
.
isComplete
()
&&
item
.
start
==
0
&&
item
.
rule
.
r
.
head
==
start
)
{
return
true
;
}
}
...
...
@@ -229,9 +230,10 @@ public class EarleyParser {
StateSet
[]
state
=
internalParse
(
symbols
,
start
);
}
private
boolean
internalParseScott
(
int
[]
symbols
,
int
startSymbol
)
{
private
StateSet
[]
internalParseScott
(
int
[]
symbols
,
int
startSymbol
)
{
StateSet
[]
state
=
new
StateSet
[
symbols
.
length
+
1
];
state
[
0
]
=
new
StateSet
();
for
(
int
i
=
0
;
i
<
state
.
length
;
++
i
)
state
[
i
]
=
new
StateSet
();
StateSet
Q_next
=
new
StateSet
();
HashMap
<
NodeLabel
,
SPPFNode
>
V
=
new
HashMap
<>();
...
...
@@ -275,7 +277,7 @@ public class EarleyParser {
if
(
state
[
i
].
add
(
LambdaNext
))
{
// 1.2.1
R
.
add
(
LambdaNext
);
}
}
else
if
(
LambdaNext
.
afterDot
()
==
symbols
[
i
+
1
])
{
// 1.2.2
}
else
if
(
LambdaNext
.
afterDot
()
==
symbols
[
i
])
{
// 1.2.2
Q
.
add
(
LambdaNext
);
}
}
...
...
@@ -283,7 +285,7 @@ public class EarleyParser {
if
(
Lambda
.
isComplete
())
{
// 2
if
(
Lambda
.
getSPPF
()
==
null
)
{
// 2.1
NodeLabel
vLabel
=
new
SymbolLabel
(
Lambda
.
rule
.
head
,
i
,
i
);
NodeLabel
vLabel
=
new
SymbolLabel
(
Lambda
.
rule
.
r
.
head
,
i
,
i
);
SPPFNode
v
;
if
(
V
.
containsKey
(
vLabel
))
{
// 2.1.1
v
=
V
.
get
(
vLabel
);
...
...
@@ -295,7 +297,7 @@ public class EarleyParser {
// TODO: if w does not have family (eps) add one? 2.1.2
}
if
(
Lambda
.
start
==
i
)
{
// 2.2
H
.
put
(
Lambda
.
rule
.
head
,
Lambda
.
getSPPF
());
H
.
put
(
Lambda
.
rule
.
r
.
head
,
Lambda
.
getSPPF
());
}
for
(
EarleyItem
item
:
state
[
Lambda
.
start
])
{
// 2.3
...
...
@@ -307,7 +309,8 @@ public class EarleyParser {
if
(
state
[
i
].
add
(
newItem
))
{
// 2.3.1
R
.
add
(
newItem
);
}
}
else
if
(
itemNext
.
afterDot
()
==
symbols
[
i
+
1
])
{
// 2.3.2
}
else
if
(
itemNext
.
afterDot
()
==
symbols
[
i
])
{
// 2.3.2
assert
itemNext
.
afterDot
()
==
newItem
.
afterDot
();
Q
.
add
(
newItem
);
}
}
...
...
@@ -316,30 +319,23 @@ public class EarleyParser {
V
.
clear
();
SPPFNode
v
=
new
SPPFNode
(
new
SymbolLabel
(
symbols
[
i
+
1
],
i
,
i
+
1
));
SPPFNode
v
=
new
SPPFNode
(
new
SymbolLabel
(
symbols
[
i
],
i
,
i
+
1
));
while
(!
Q
.
isEmpty
())
{
// 3
EarleyItem
Lambda
=
Q
.
pickOne
();
assert
Lambda
.
afterDot
()
==
symbols
[
i
+
1
];
assert
Lambda
.
afterDot
()
==
symbols
[
i
];
EarleyItem
LambdaNext
=
Lambda
.
advance
();
SPPFNode
y
=
makeNode
(
LambdaNext
.
getDottedRule
(),
LambdaNext
.
start
,
i
+
1
,
Lambda
.
getSPPF
(),
v
,
V
);
EarleyItem
newItem
=
new
EarleyItem
(
LambdaNext
.
rule
,
LambdaNext
.
start
);
newItem
.
setSPPF
(
y
);
if
(
LambdaNext
.
isComplete
()
||
!
isTerminal
(
LambdaNext
.
afterDot
()))
{
// 3.1
state
[
i
+
1
].
add
(
newItem
);
}
else
if
(
i
+
2
<
symbols
.
length
&&
LambdaNext
.
afterDot
()
==
symbols
[
i
+
2
])
{
// 3.2
}
else
if
(
LambdaNext
.
afterDot
()
==
symbols
[
i
+
1
])
{
// 3.2
Q_next
.
add
(
newItem
);
}
}
}
StateSet
finalState
=
state
[
symbols
.
length
];
for
(
EarleyItem
item
:
finalState
)
{
if
(
item
.
isComplete
()
&&
item
.
start
==
0
&&
item
.
rule
.
head
==
startSymbol
)
{
return
true
;
}
}
return
false
;
return
state
;
}
private
SPPFNode
makeNode
(
DottedRule
dottedRule
,
int
start
,
int
i
,
SPPFNode
sppf
,
SPPFNode
sppf2
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment