-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchart_parser
211 lines (172 loc) · 6.66 KB
/
chart_parser
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
<?xml version="1.0"?>
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
xmlns:rdfe="http://redfoot.net/3.0/rdf#"
xmlns:code="http://redfoot.net/3.0/code#"
>
<rdfe:Namespace rdf:about="#">
<rdfs:label>Chart Parser</rdfs:label>
<rdfs:comment></rdfs:comment>
</rdfe:Namespace>
<code:Module rdf:ID="module">
<rdfs:label>Chart Parser</rdfs:label>
<code:python rdf:datatype="http://redfoot.net/3.0/redfoot#Python">
<![CDATA[
import logging
from rdflib import URIRef, BNode
from rdflib.Namespace import Namespace
_logger = logging.getLogger(__uri__)
CHART_PARSER = Namespace("#", base=__uri__) # TODO: convert to redfoot.namespace(...)
# ChartParser.py, version 1.0
#
# Released to the public domain 10 August 1999 by Oliver Steele.
#"""Module ChartParser -- a simple chart parser
#"""
#__author__ = 'Oliver Steele', 'steele@cs.brandeis.edu'
#__version__ = '1.0'
class Rule:
"""A rule represents a phrase-structure production rule of the form:
A => B C
where A expands to B followed by C (and therefore, B followed by
C can be composed into an A). In this example, the left-hand-side
(lhs) is A, and the right-hand-side (rhs) is [B, C]."""
def __init__(self, spec_or_uri, action=None, check_cache=True):
"""spec is of the form [A, B, C], where A is the lhs and the remaining
items are the rhs. In other words, [A, B, C] represents A => B C"""
if isinstance(spec_or_uri, (URIRef, BNode)):
uri = spec_or_uri
lhs = redfoot.value(uri, CHART_PARSER.lhs)
spec = []
spec.append(lhs)
rhs = redfoot.value(uri, CHART_PARSER.rhs)
if (rhs, RDF.type, RDF.Seq) in redfoot:
s = redfoot.seq(rhs)
else:
s = redfoot.items(rhs)
for item in s:
spec.append(item)
action_module = redfoot.value(uri, CHART_PARSER.action)
if action_module:
action = redfoot.module(action_module, check_cache=check_cache).action
else:
action = None
else:
spec = spec_or_uri
self.lhs = spec[0]
self.rhs = spec[1:]
assert self.rhs[0], "Must have at least one element on rhs"
self.action = action
def __repr__(self):
return '%r => %r : %r' % (self.lhs, self.rhs, self.action)
def matches(self, category):
return self.rhs[0] == category
class Constituent:
def __init__(self, type, children, left, right, rule=None):
self.type = type
self.children = children
self.left = left
self.right = right
self.rule = rule
def __repr__(self):
return '%r%r' % (self.type, `self.children`)
def tokens(self):
for c in self.children:
for t in c.tokens():
yield t
def terminals(self):
for c in self.children:
for terminal in c.terminals():
yield terminal
def action(self, *args, **keyword_args):
if self.rule:
if self.rule.action:
return self.rule.action(self.children, *args, **keyword_args)
class PreTerminal(Constituent):
def __init__(self, tag, token, left):
Constituent.__init__(self, tag, None, left, left+1)
self.token = token
def __repr__(self):
return '%r(%r)' % (self.type, self.token)
def tokens(self):
yield self.token
def terminals(self):
yield self
class Edge:
def __init__(self, rule, left, right=None, index=0, children=None):
self.rule = rule
self.left = left
self.right = right or left
self.index = index
self.children = children or []
def __repr__(self):
str = []
for i in range(len(self.rule.rhs)):
if i == self.index:
str.append('^')
str.append(self.rule.rhs[i] + ' ')
return '<%r => %r at %r:%r>' % (self.rule.lhs, "".join(str)[:-1], self.left, self.right)
def advanceOver(self, chart, constituent):
rule = self.rule
if self.right == constituent.left and rule.rhs[self.index] == constituent.type:
chart.addEdge(Edge(rule, self.left, constituent.right, self.index + 1, self.children + [constituent]))
def active(self):
return self.index < len(self.rule.rhs)
class Chart:
def __init__(self, uri_or_rules=[], check_cache=True):
if isinstance(uri_or_rules, (URIRef, BNode)):
uri = uri_or_rules
rules = []
for rule_uri in redfoot.items(redfoot.value(uri, CHART_PARSER.rules)):
rule = Rule(rule_uri, check_cache=check_cache)
rules.append(Rule(spec, action))
_logger.debug("rules: %r" % rules)
else:
rules = uri_or_rules
self._rules = rules
self.unknown_tokens = set()
def tokenize(self, s):
for token in s.split(" "):
yield token
def parse(self, string_or_tokens):
if isinstance(string_or_tokens, basestring):
tokens = list(self.tokenize(string_or_tokens))
else:
tokens = list(string_or_tokens)
n = len(tokens)
if n>0:
self.n = n
self.edges = []
self.constituents = []
for i in range(n):
self.edges.append(list())
self.constituents.append(list())
for i in range(n):
token = tokens[i]
self.addToken(token, i)
for c in self.constituents[0]:
assert c.left==0
if c.right==self.n:
yield c
def addToken(self, token, position):
for tag in self.tags(token):
self.addConstituent(PreTerminal(tag, token, position))
def addConstituent(self, constituent):
self.constituents[constituent.left].append(constituent)
for edge in self.edges[constituent.left]:
edge.advanceOver(self, constituent)
for rule in self._rules:
if rule.matches(constituent.type):
Edge(rule, constituent.left).advanceOver(self, constituent)
def addEdge(self, edge):
if edge.active():
if edge.right < self.n:
self.edges[edge.right].append(edge)
for constituent in self.constituents[edge.right]:
edge.advanceOver(self, constituent)
else:
self.addConstituent(Constituent(edge.rule.lhs, edge.children, edge.left, edge.right, edge.rule))
]]>
</code:python>
</code:Module>
</rdf:RDF>