Skip to content

Commit f99b5ab

Browse files
committed
Remove an optimization in semgrex which may be causing crashes
The optimization removed - if it even does provide a noticeable time benefit - can have repeated IdentityHashMap hashCodes for SemanticGraphs over a long run, leading to crashes when a stale object is retrieved for a new graph. The directed graph now throws a specific exception type when a cyclic graph occurs. Catch exactly that exception in SemgrexMatcher (previous versions caught Exception, which is horrifying)
1 parent 755edcf commit f99b5ab

File tree

4 files changed

+39
-54
lines changed

4 files changed

+39
-54
lines changed
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
package edu.stanford.nlp.graph;
2+
3+
public class CyclicGraphException extends IllegalStateException {
4+
public final String error;
5+
public final DirectedMultiGraph<?, ?> graph;
6+
7+
public CyclicGraphException(String error, DirectedMultiGraph<?, ?> graph) {
8+
this.error = error;
9+
this.graph = graph;
10+
}
11+
12+
public String toString() {
13+
return super.toString() + ": " + error + "\nGraph:\n" + graph;
14+
}
15+
}

src/edu/stanford/nlp/graph/DirectedMultiGraph.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -613,7 +613,7 @@ public void remove() {
613613
* Topological sorting only works if the graph is acyclic.
614614
*
615615
* @return A sorted list of the vertices
616-
* @throws IllegalStateException if this graph is not a DAG
616+
* @throws CyclicGraphException (a subtype of IllegalStateException) if this graph is not a DAG
617617
*/
618618
public List<V> topologicalSort() {
619619
List<V> result = Generics.newArrayList();
@@ -637,7 +637,7 @@ private void topologicalSortHelper(V vertex, Set<V> temporary, Set<V> permanent,
637637
continue;
638638
}
639639
if (temporary.contains(neighbor)) {
640-
throw new IllegalStateException("This graph has cycles. Topological sort not possible: " + this.toString());
640+
throw new CyclicGraphException("This graph has cycles. Topological sort not possible", this);
641641
}
642642
topologicalSortHelper(neighbor, temporary, permanent, result);
643643
}

src/edu/stanford/nlp/semgraph/SemanticGraph.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -899,7 +899,7 @@ public void setRoots(Collection<IndexedWord> words) {
899899
/**
900900
*
901901
* @return A sorted list of the vertices
902-
* @throws IllegalStateException if this graph is not a DAG
902+
* @throws CyclicGraphException (a subtype of IllegalStateException) if this graph is not a DAG
903903
*/
904904
public List<IndexedWord> topologicalSort() {
905905
return graph.topologicalSort();

src/edu/stanford/nlp/semgraph/semgrex/SemgrexMatcher.java

Lines changed: 21 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package edu.stanford.nlp.semgraph.semgrex;
22

3+
import edu.stanford.nlp.graph.CyclicGraphException;
34
import edu.stanford.nlp.semgraph.SemanticGraph;
45
import edu.stanford.nlp.ling.*;
56
import edu.stanford.nlp.util.logging.Redwood;
@@ -117,67 +118,36 @@ public boolean matchesAt(IndexedWord node) {
117118

118119

119120
/**
120-
* Topological sorting actually takes a rather large amount of time, if you call multiple
121-
* patterns on the same tree.
122-
* This is a weak cache that stores all the trees sorted since the garbage collector last kicked in.
123-
* The key on this map is the identity hash code (i.e., memory address) of the semantic graph; the
124-
* value is the sorted list of vertices.
125-
* <p>
126-
* Note that this optimization will cause strange things to happen if you mutate a semantic graph between
127-
* calls to Semgrex.
121+
* Find the next match of the pattern in the graph.
122+
*
123+
* @return whether there is a match somewhere in the graph
128124
*/
129-
private static final WeakHashMap<Integer, List<IndexedWord>> topologicalSortCache = new WeakHashMap<>();
130-
131-
private void setupFindIterator() {
132-
try {
125+
public boolean find() {
126+
// log.info("hyp: " + hyp);
127+
// there was a cache of the topological sorts to reuse across
128+
// SemgrexPatterns which used IdentityHashMap to remember
129+
// SemanticGraphs, but it was apparently the cause of various
130+
// thread safety bugs when the results were used for an old
131+
// SemanticGraph
132+
if (findIterator == null) {
133133
if (hyp) {
134-
synchronized (topologicalSortCache) {
135-
List<IndexedWord> topoSort = topologicalSortCache.get(System.identityHashCode(sg));
136-
if (topoSort == null || topoSort.size() != sg.size()) { // size check to mitigate a stale cache
137-
topoSort = sg.topologicalSort();
138-
topologicalSortCache.put(System.identityHashCode(sg), topoSort);
139-
}
140-
findIterator = topoSort.iterator();
134+
try {
135+
findIterator = sg.topologicalSort().iterator();
136+
} catch (CyclicGraphException e) {
137+
findIterator = sg.vertexSet().iterator();
141138
}
142139
} else if (sg_aligned == null) {
143-
return;
140+
return false;
144141
} else {
145-
synchronized (topologicalSortCache) {
146-
List<IndexedWord> topoSort = topologicalSortCache.get(System.identityHashCode(sg_aligned));
147-
if (topoSort == null || topoSort.size() != sg_aligned.size()) { // size check to mitigate a stale cache
148-
topoSort = sg_aligned.topologicalSort();
149-
topologicalSortCache.put(System.identityHashCode(sg_aligned), topoSort);
150-
}
151-
findIterator = topoSort.iterator();
142+
try {
143+
findIterator = sg_aligned.topologicalSort().iterator();
144+
} catch (CyclicGraphException e) {
145+
findIterator = sg_aligned.vertexSet().iterator();
152146
}
153147
}
154-
} catch (Exception ex) {
155-
if (hyp) {
156-
findIterator = sg.vertexSet().iterator();
157-
} else if (sg_aligned == null) {
158-
return;
159-
} else {
160-
findIterator = sg_aligned.vertexSet().iterator();
161-
}
162148
}
163-
}
164149

165-
/**
166-
* Find the next match of the pattern in the graph.
167-
*
168-
* @return whether there is a match somewhere in the graph
169-
*/
170-
public boolean find() {
171-
// log.info("hyp: " + hyp);
172-
if (findIterator == null) {
173-
setupFindIterator();
174-
}
175-
if (findIterator == null) {
176-
return false;
177-
}
178-
// System.out.println("first");
179150
if (findCurrent != null && matches()) {
180-
// log.info("find first: " + findCurrent.word());
181151
return true;
182152
}
183153
//log.info("here");

0 commit comments

Comments
 (0)