001 package org.maltparser.transform.pseudo;
002
003 import java.util.Vector;
004
005 import org.apache.log4j.Logger;
006 import org.maltparser.core.exception.MaltChainedException;
007 import org.maltparser.core.symbol.SymbolTable;
008 import org.maltparser.core.symbol.SymbolTableHandler;
009 import org.maltparser.core.syntaxgraph.DependencyStructure;
010 import org.maltparser.core.syntaxgraph.node.DependencyNode;
011
012 /**
013 * This class contains methods for projectivizing and deprojectivizing
014 *
015 * @author Jens Nilsson
016 * @since 1.0
017 */
018 public class PseudoProjectivity {
019 static int id = 0;
020
021 private enum PseudoProjectiveEncoding {
022 NONE, BASELINE, HEAD, PATH, HEADPATH, TRACE
023 };
024
025 private enum CoveredRootAttachment {
026 NONE, LEFT, RIGHT, HEAD
027 };
028
029 private enum LiftingOrder {
030 SHORTEST, DEEPEST
031 };
032
033 private PseudoProjectiveEncoding markingStrategy;
034 private CoveredRootAttachment rootAttachment;
035 private LiftingOrder liftingOrder;
036 private Logger configLogger;
037
038 private SymbolTable deprelSymbolTable;
039 private SymbolTable pppathSymbolTable;
040 private SymbolTable ppliftedSymbolTable;
041 private SymbolTable ppcoveredRootSymbolTable;
042 private Vector<Boolean> nodeLifted;
043 private Vector<Vector<DependencyNode>> nodeTrace;
044 private Vector<DependencyNode> headDeprel;
045 private Vector<Boolean> nodePath;
046 private Vector<Boolean> isCoveredRoot;
047 private Vector<Integer> nodeRelationLength;
048 private Vector<String> synacticHeadDeprel;
049
050
051 public PseudoProjectivity() { }
052
053 public void initialize(String markingStrategyString, String coveredRoot, String liftingOrder, Logger configLogger,
054 SymbolTableHandler symbolTables) throws MaltChainedException {
055 nodeLifted = new Vector<Boolean>();
056 nodeTrace = new Vector<Vector<DependencyNode>>();
057 headDeprel = new Vector<DependencyNode>();
058 nodePath = new Vector<Boolean>();
059 isCoveredRoot = new Vector<Boolean>();
060 nodeRelationLength = new Vector<Integer>();
061 synacticHeadDeprel = new Vector<String>();
062
063 this.configLogger = configLogger;
064 if (markingStrategyString.equalsIgnoreCase("none")) {
065 markingStrategy = PseudoProjectiveEncoding.NONE;
066 } else if (markingStrategyString.equalsIgnoreCase("baseline")) {
067 markingStrategy = PseudoProjectiveEncoding.BASELINE;
068 } else if (markingStrategyString.equalsIgnoreCase("head")) {
069 markingStrategy = PseudoProjectiveEncoding.HEAD;
070 } else if (markingStrategyString.equalsIgnoreCase("path")) {
071 markingStrategy = PseudoProjectiveEncoding.PATH;
072 } else if (markingStrategyString.equalsIgnoreCase("head+path")) {
073 markingStrategy = PseudoProjectiveEncoding.HEADPATH;
074 } else if (markingStrategyString.equalsIgnoreCase("trace")) {
075 markingStrategy = PseudoProjectiveEncoding.TRACE;
076 }
077
078 this.deprelSymbolTable = symbolTables.getSymbolTable("DEPREL");
079 if (markingStrategy == PseudoProjectiveEncoding.HEAD || markingStrategy == PseudoProjectiveEncoding.PATH
080 || markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
081 this.ppliftedSymbolTable = symbolTables.addSymbolTable("PPLIFTED", deprelSymbolTable);
082 if (this.markingStrategy == PseudoProjectiveEncoding.PATH) {
083 ppliftedSymbolTable.addSymbol("#true#");
084 ppliftedSymbolTable.addSymbol("#false#");
085 } else {
086 ppliftedSymbolTable.addSymbol("#false#");
087 }
088 }
089
090 if (markingStrategy == PseudoProjectiveEncoding.PATH || markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
091 pppathSymbolTable = symbolTables.addSymbolTable("PPPATH", deprelSymbolTable);
092 pppathSymbolTable.addSymbol("#true#");
093 pppathSymbolTable.addSymbol("#false#");
094 }
095
096 if (coveredRoot.equalsIgnoreCase("none")) {
097 this.rootAttachment = CoveredRootAttachment.NONE;
098 } else if (coveredRoot.equalsIgnoreCase("left")) {
099 this.rootAttachment = CoveredRootAttachment.LEFT;
100 } else if (coveredRoot.equalsIgnoreCase("right")) {
101 this.rootAttachment = CoveredRootAttachment.RIGHT;
102 } else if (coveredRoot.equalsIgnoreCase("head")) {
103 this.rootAttachment = CoveredRootAttachment.HEAD;
104 }
105
106 if (this.rootAttachment != CoveredRootAttachment.NONE) {
107 this.ppcoveredRootSymbolTable = symbolTables.addSymbolTable("PPCOVERED", deprelSymbolTable);
108 ppcoveredRootSymbolTable.addSymbol("#true#");
109 ppcoveredRootSymbolTable.addSymbol("#false#");
110 }
111 if (liftingOrder.equalsIgnoreCase("shortest")) {
112 this.liftingOrder = LiftingOrder.SHORTEST;
113 } else if (liftingOrder.equalsIgnoreCase("deepest")) {
114 this.liftingOrder = LiftingOrder.DEEPEST;
115 }
116 }
117
118 private void initProjectivization(DependencyStructure pdg) throws MaltChainedException {
119 nodeLifted.clear();
120 nodeTrace.clear();
121 headDeprel.clear();
122 nodePath.clear();
123 isCoveredRoot.clear();
124 nodeRelationLength.clear();
125
126 for (int index : pdg.getDependencyIndices()) {
127 nodeLifted.add(false);
128 nodeTrace.add(new Vector<DependencyNode>());
129 headDeprel.add(null);
130 nodePath.add(false);
131 isCoveredRoot.add(false);
132 if (ppliftedSymbolTable != null && index != 0) {
133 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#false#"));
134 }
135 if (pppathSymbolTable != null && index != 0) {
136 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#false#"));
137 }
138 if (ppcoveredRootSymbolTable != null && index != 0) {
139 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#false#"));
140 }
141 }
142 computeRelationLength(pdg);
143 }
144
145 public void projectivize(DependencyStructure pdg) throws MaltChainedException {
146 id++;
147 if (!pdg.isTree()) {
148 configLogger.info("\n[Warning: Sentence '" + id + "' cannot projectivize, because the dependency graph is not a tree]\n");
149 return;
150 }
151 DependencyNode deepestNonProjectiveNode;
152 initProjectivization(pdg);
153
154 if (markingStrategy != PseudoProjectiveEncoding.NONE) {
155 while (!pdg.isProjective()) {
156 if (liftingOrder == LiftingOrder.DEEPEST) {
157 deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg);
158 } else {
159 deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg);
160 }
161 if (!attachCoveredRoots(pdg, deepestNonProjectiveNode)) {
162 nodeLifted.set(deepestNonProjectiveNode.getIndex(), true);
163 setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead());
164 setPath(deepestNonProjectiveNode.getHead());
165 pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex());
166 }
167 }
168 if (rootAttachment == CoveredRootAttachment.NONE) {
169 deattachCoveredRootsForProjectivization(pdg);
170 }
171 } else if (rootAttachment != CoveredRootAttachment.NONE) {
172 for (int index : pdg.getTokenIndices()) {
173 attachCoveredRoots(pdg, pdg.getTokenNode(index));
174 }
175 }
176 // collectTraceStatistics(pdg);
177 assignPseudoProjectiveDeprels(pdg);
178 }
179
180
181 public void mergeArclabels(DependencyStructure pdg) throws MaltChainedException {
182 assignPseudoProjectiveDeprelsForMerge(pdg);
183 }
184
185 public void splitArclabels(DependencyStructure pdg) throws MaltChainedException {
186 int pathLabelIndex = -1, movedLabelIndex = -1, coveredArcLabelIndex;
187 String label;
188 initDeprojeciviztion(pdg);
189 for (int index : pdg.getTokenIndices()) {
190 if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
191 label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable));
192 if (label != null && (pathLabelIndex = label.indexOf("%")) != -1) {
193 label = label.substring(0, pathLabelIndex);
194 setLabel(pdg.getTokenNode(index), label);
195 pdg.getTokenNode(index).getHeadEdge().addLabel(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#true#"));
196 }
197 if (label != null && (movedLabelIndex = label.indexOf("|")) != -1 && label.indexOf("|null") == -1) {
198 if (movedLabelIndex + 1 < label.length()) {
199 pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode(label.substring(movedLabelIndex + 1)));
200 } else {
201 pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#true#"));
202 }
203 label = label.substring(0, movedLabelIndex);
204 setLabel(pdg.getTokenNode(index), label);
205 }
206 }
207 }
208 for (int index : pdg.getTokenIndices()) {
209 if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
210 label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable));
211 if ((coveredArcLabelIndex = label.indexOf("|null")) != -1) {
212 label = label.substring(0, coveredArcLabelIndex);
213 setLabel(pdg.getTokenNode(index), label);
214 pdg.getTokenNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#"));
215 }
216 }
217 }
218 }
219
220 private void setHeadDeprel(DependencyNode node, DependencyNode parent) {
221 if (headDeprel.get(node.getIndex()) == null) {
222 headDeprel.set(node.getIndex(), parent);
223 }
224 nodeTrace.set(node.getIndex(), headDeprel);
225 }
226
227 private void setPath(DependencyNode node) {
228 nodePath.set(node.getIndex(), true);
229 }
230
231 private boolean isCoveredRoot(DependencyNode node) {
232 return isCoveredRoot.get(node.getIndex());
233 }
234
235 private void deattachCoveredRootsForProjectivization(DependencyStructure pdg) throws MaltChainedException {
236 for (int index : pdg.getTokenIndices()) {
237 if (isCoveredRoot(pdg.getTokenNode(index))) {
238 pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getTokenNode(index).getIndex());
239 }
240 }
241 }
242
243 private boolean attachCoveredRoots(DependencyStructure pdg, DependencyNode deepest) throws MaltChainedException {
244 int i;
245 boolean foundCoveredRoot = false;
246 DependencyNode coveredRootHead;
247 for (i = Math.min(deepest.getIndex(), deepest.getHead().getIndex()) + 1; i < Math.max(deepest.getIndex(), deepest.getHead()
248 .getIndex()); i++) {
249 int leftMostIndex = pdg.getDependencyNode(i).getLeftmostProperDescendantIndex();
250 if (leftMostIndex == -1) {
251 leftMostIndex = i;
252 }
253 int rightMostIndex = pdg.getDependencyNode(i).getRightmostProperDescendantIndex();
254 if (rightMostIndex == -1) {
255 rightMostIndex = i;
256 }
257 // if (!nodeLifted.get(i) && pdg.getDependencyNode(i).getHead().isRoot() && !deepest.getHead().isRoot()
258 // && Math.min(deepest.getIndex(), deepest.getHead().getIndex()) < pdg.getDependencyNode(i).getLeftmostDescendantIndex()
259 // && pdg.getDependencyNode(i).getRightmostDescendantIndex() < Math.max(deepest.getIndex(), deepest.getHead().getIndex())) {
260 if (!nodeLifted.get(i) && pdg.getDependencyNode(i).getHead().isRoot() && !deepest.getHead().isRoot()
261 && Math.min(deepest.getIndex(), deepest.getHead().getIndex()) < leftMostIndex
262 && rightMostIndex < Math.max(deepest.getIndex(), deepest.getHead().getIndex())) {
263 if (rootAttachment == CoveredRootAttachment.LEFT) {
264 if (deepest.getHead().getIndex() < deepest.getIndex()) {
265 coveredRootHead = deepest.getHead();
266 } else {
267 coveredRootHead = deepest;
268 }
269 } else if (rootAttachment == CoveredRootAttachment.RIGHT) {
270 if (deepest.getIndex() < deepest.getHead().getIndex()) {
271 coveredRootHead = deepest.getHead();
272 } else {
273 coveredRootHead = deepest;
274 }
275 } else {
276 coveredRootHead = deepest.getHead();
277 }
278 pdg.moveDependencyEdge(coveredRootHead.getIndex(), pdg.getDependencyNode(i).getIndex());
279 setCoveredRoot(pdg.getDependencyNode(i));
280 foundCoveredRoot = true;
281 }
282 }
283 return foundCoveredRoot;
284 }
285
286 private void setCoveredRoot(DependencyNode node) {
287 isCoveredRoot.set(node.getIndex(), true);
288 }
289
290 private DependencyNode getDeepestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException {
291 DependencyNode deepestNonProjectiveNode = null;
292 for (int index : pdg.getDependencyIndices()) {
293 if (!pdg.getDependencyNode(index).isProjective()
294 && (deepestNonProjectiveNode == null
295 || pdg.getDependencyNode(index).getDependencyNodeDepth() > pdg.getDependencyNode(deepestNonProjectiveNode.getIndex()).getDependencyNodeDepth())) {
296 deepestNonProjectiveNode = pdg.getDependencyNode(index);
297 }
298 }
299
300 return deepestNonProjectiveNode;
301 }
302
303 private DependencyNode getShortestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException {
304 DependencyNode shortestNonProjectiveNode = null;
305 for (int index : pdg.getDependencyIndices()) {
306 if (!pdg.getDependencyNode(index).isProjective()
307 && (shortestNonProjectiveNode == null
308 || nodeRelationLength.get(index) < nodeRelationLength.get(shortestNonProjectiveNode.getIndex())
309 || (nodeRelationLength.get(index) == nodeRelationLength.get(shortestNonProjectiveNode.getIndex())))) {
310 shortestNonProjectiveNode = pdg.getDependencyNode(index);
311 }
312 }
313 return shortestNonProjectiveNode;
314 }
315
316
317 private void computeRelationLength(DependencyStructure pdg) throws MaltChainedException {
318 nodeRelationLength.add(0);
319 for (int index : pdg.getTokenIndices()) {
320 nodeRelationLength.add(Math.abs(pdg.getDependencyNode(index).getIndex() - pdg.getDependencyNode(index).getHead().getIndex()));
321 }
322 }
323
324 private void assignPseudoProjectiveDeprels(DependencyStructure pdg) throws MaltChainedException {
325 int newLabelCode;
326 for (int index : pdg.getTokenIndices()) {
327 if (!isCoveredRoot(pdg.getDependencyNode(index))) {
328 if (this.markingStrategy == PseudoProjectiveEncoding.HEAD || this.markingStrategy == PseudoProjectiveEncoding.PATH
329 || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
330 if (this.markingStrategy == PseudoProjectiveEncoding.PATH) {
331 if (nodeLifted.get(index)) {
332 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#true#");
333 } else {
334 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#");
335 }
336 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode);
337 } else {
338 if (nodeLifted.get(index)) {
339 newLabelCode = ppliftedSymbolTable.addSymbol(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(
340 headDeprel.get(index).getIndex()).getHeadEdge().getLabelCode(deprelSymbolTable)));
341 } else {
342 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#");
343 }
344 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode);
345 }
346 }
347
348 if (this.markingStrategy == PseudoProjectiveEncoding.PATH || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
349 if (nodePath.get(index)) {
350 newLabelCode = pppathSymbolTable.getSymbolStringToCode("#true#");
351 } else {
352 newLabelCode = pppathSymbolTable.getSymbolStringToCode("#false#");
353 }
354 pdg.getDependencyNode(index).getHeadEdge().addLabel(pppathSymbolTable, newLabelCode);
355 }
356
357 // if (markingStrategy == PseudoProjectiveEncoding.TRACE) {
358 // if (nodeLifted.get(i)) {
359 // newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getNode(i).getLabelCode(deprelSymbolTable)) + "|";
360 // addArc(pdg, pdg.getNode(i), pdg.getNode(i).getHead(), newLabel);
361 // }
362 // }
363 } else if (rootAttachment != CoveredRootAttachment.NONE) {
364 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#"));
365 }
366 }
367 }
368
369 private void setLabel(DependencyNode node, String label) throws MaltChainedException {
370 // node.getLabelCode().clear();
371 node.getHeadEdge().getLabelSet().put(deprelSymbolTable, deprelSymbolTable.addSymbol(label));
372 }
373
374 private void assignPseudoProjectiveDeprelsForMerge(DependencyStructure pdg) throws MaltChainedException {
375 Vector<String> originalDeprel = new Vector<String>();
376 String newLabel;
377 originalDeprel.add(null);
378 for (int index : pdg.getTokenIndices()) {
379 originalDeprel.add(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)));
380 }
381 for (int index : pdg.getTokenIndices()) {
382 newLabel = null;
383 if (!isCoveredRoot(pdg.getDependencyNode(index))) {
384 if (markingStrategy == PseudoProjectiveEncoding.HEAD) {
385 if (nodeLifted.get(index)) {
386 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"
387 + originalDeprel.get(headDeprel.get(index).getIndex());
388 // } else {
389 // newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable));
390 }
391 } else if (markingStrategy == PseudoProjectiveEncoding.PATH) {
392 if (nodeLifted.get(index) && nodePath.get(index)) {
393 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|%";
394 } else if (nodeLifted.get(index) && !nodePath.get(index)) {
395 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|";
396 } else if (!nodeLifted.get(index) && nodePath.get(index)) {
397 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "%";
398 }
399 } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
400 if (nodeLifted.get(index) && nodePath.get(index)) {
401 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"
402 + originalDeprel.get(headDeprel.get(index).getIndex()) + "%";
403 } else if (nodeLifted.get(index) && !nodePath.get(index)) {
404 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"
405 + originalDeprel.get(headDeprel.get(index).getIndex());
406 } else if (!nodeLifted.get(index) && nodePath.get(index)) {
407 newLabel = originalDeprel.get(pdg.getDependencyNode(index).getIndex()) + "%";
408 }
409 } else if (markingStrategy == PseudoProjectiveEncoding.TRACE) {
410 if (nodeLifted.get(index)) {
411 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|";
412 }
413 }
414 } else if (rootAttachment != CoveredRootAttachment.NONE) {
415 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|null";
416 }
417 if (newLabel != null) {
418 setLabel(pdg.getDependencyNode(index), newLabel);
419 }
420 }
421 }
422
423 public void deprojectivize(DependencyStructure pdg) throws MaltChainedException {
424 initDeprojeciviztion(pdg);
425
426 for (int index : pdg.getTokenIndices()) {
427 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
428 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(pppathSymbolTable)
429 && pppathSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(pppathSymbolTable)).equals("#true#")) {
430 setPath(pdg.getDependencyNode(index));
431 }
432 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppliftedSymbolTable)
433 && !ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#false#")) {
434 nodeLifted.set(index, true);
435 if (!ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#true#")) {
436 synacticHeadDeprel.set(index, ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge()
437 .getLabelCode(ppliftedSymbolTable)));
438 }
439 }
440 }
441 }
442 deattachCoveredRootsForDeprojectivization(pdg);
443 if (markingStrategy == PseudoProjectiveEncoding.HEAD && needsDeprojectivizeWithHead(pdg)) {
444 deprojectivizeWithHead(pdg, pdg.getDependencyRoot());
445 } else if (markingStrategy == PseudoProjectiveEncoding.PATH) {
446 deprojectivizeWithPath(pdg, pdg.getDependencyRoot());
447 } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
448 deprojectivizeWithHeadAndPath(pdg, pdg.getDependencyRoot());
449 // } else if (markingStrategy == PseudoProjectiveEncoding.TRACE) {
450 // deprojectivizeWithTrace(pdg, pdg.getRoot());
451 }
452 }
453
454 private void initDeprojeciviztion(DependencyStructure pdg) {
455 nodeLifted.clear();
456 nodePath.clear();
457 synacticHeadDeprel.clear();
458 for (int index : pdg.getDependencyIndices()) {
459 nodeLifted.add(false);
460 nodePath.add(false);
461 synacticHeadDeprel.add(null);
462 }
463 }
464
465 private void deattachCoveredRootsForDeprojectivization(DependencyStructure pdg) throws MaltChainedException {
466 for (int index : pdg.getTokenIndices()) {
467 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
468 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppcoveredRootSymbolTable)
469 && ppcoveredRootSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppcoveredRootSymbolTable)).equals(
470 "#true#")) {
471 pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getDependencyNode(index).getIndex());
472 }
473 }
474 }
475 }
476
477 // Check whether there is at least one node in the specified dependency structure that can be lifted.
478 // If this is not the case, there is no need to call deprojectivizeWithHead.
479
480 private boolean needsDeprojectivizeWithHead(DependencyStructure pdg) throws MaltChainedException {
481 for (int index : pdg.getDependencyIndices()) {
482 if (nodeLifted.get(index)) {
483 DependencyNode node = pdg.getDependencyNode(index);
484 if (breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, synacticHeadDeprel.get(index)) != null) {
485 return true;
486 }
487 }
488 }
489 return false;
490 }
491
492 private boolean deprojectivizeWithHead(DependencyStructure pdg, DependencyNode node) throws MaltChainedException {
493 boolean success = true, childSuccess = false;
494 int i, childAttempts = 2;
495 DependencyNode child, possibleSyntacticHead;
496 String syntacticHeadDeprel;
497 if (nodeLifted.get(node.getIndex())) {
498 syntacticHeadDeprel = synacticHeadDeprel.get(node.getIndex());
499 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, syntacticHeadDeprel);
500 if (possibleSyntacticHead != null) {
501 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
502 // addEdge(pdg, possibleSyntacticHead, node, node.getHeadEdge().getLabelSet());
503 nodeLifted.set(node.getIndex(), false);
504 } else {
505 success = false;
506 }
507 }
508 while (!childSuccess && childAttempts > 0) {
509 childSuccess = true;
510 Vector<DependencyNode> children = new Vector<DependencyNode>();
511 i = 0;
512 while ((child = node.getLeftDependent(i)) != null) {
513 children.add(child);
514 i++;
515 }
516 i = 0;
517 while ((child = node.getRightDependent(i)) != null) {
518 children.add(child);
519 i++;
520 }
521 for (i = 0; i < children.size(); i++) {
522 child = children.get(i);
523 if (!deprojectivizeWithHead(pdg, child)) {
524 childSuccess = false;
525 }
526 }
527 childAttempts--;
528 }
529 return childSuccess && success;
530 }
531
532 private DependencyNode breadthFirstSearchSortedByDistanceForHead(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprel)
533 throws MaltChainedException {
534 DependencyNode dependent;
535 String dependentDeprel;
536 Vector<DependencyNode> nodes = new Vector<DependencyNode>();
537 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, false));
538 while (nodes.size() > 0) {
539 dependent = nodes.remove(0);
540 if (dependent.getHeadEdge().hasLabel(deprelSymbolTable)) {
541 dependentDeprel = deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable));
542 if (dependentDeprel.equals(syntacticHeadDeprel)) {
543 return dependent;
544 }
545 }
546 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, false));
547 }
548 return null;
549 }
550
551 private Vector<DependencyNode> findAllDependentsVectorSortedByDistanceToPProjNode(DependencyStructure dg, DependencyNode governor, DependencyNode avoid,
552 boolean percentOnly) {
553 int i, j;
554 Vector<DependencyNode> dependents = new Vector<DependencyNode>();
555 DependencyNode leftChild, rightChild;
556
557 i = governor.getLeftDependentCount() - 1;
558 j = 0;
559 leftChild = governor.getLeftDependent(i--);
560 rightChild = governor.getRightDependent(j++);
561
562 while (leftChild != null && rightChild != null) {
563 if (leftChild == avoid) {
564 leftChild = governor.getLeftDependent(i--);
565 } else if (rightChild == avoid) {
566 rightChild = governor.getRightDependent(j++);
567 } else if (Math.abs(leftChild.getIndex() - avoid.getIndex()) < Math.abs(rightChild.getIndex() - avoid.getIndex())) {
568 if (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex()))) {
569 dependents.add(leftChild);
570 }
571 leftChild = governor.getLeftDependent(i--);
572 } else {
573 if (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex()))) {
574 dependents.add(rightChild);
575 }
576 rightChild = governor.getRightDependent(j++);
577 }
578 }
579 while (leftChild != null) {
580 if (leftChild != avoid && (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex())))) {
581 dependents.add(leftChild);
582 }
583 leftChild = governor.getLeftDependent(i--);
584 }
585 while (rightChild != null) {
586 if (rightChild != avoid && (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex())))) {
587 dependents.add(rightChild);
588 }
589 rightChild = governor.getRightDependent(j++);
590 }
591 return dependents;
592 }
593
594 private boolean deprojectivizeWithPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException {
595 boolean success = true, childSuccess = false;
596 int i, childAttempts = 2;
597 DependencyNode child, possibleSyntacticHead;
598 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) {
599 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node);
600 if (possibleSyntacticHead != null) {
601 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
602 // addEdge(pdg, possibleSyntacticHead, node, node.getHeadEdge().getLabelSet());
603 nodeLifted.set(node.getIndex(), false);
604 } else {
605 success = false;
606 }
607 }
608 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) {
609 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node);
610 if (possibleSyntacticHead != null) {
611 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
612 // addEdge(pdg, possibleSyntacticHead, node, node.getHeadEdge().getLabelSet());
613 nodeLifted.set(node.getIndex(), false);
614 } else {
615 success = false;
616 }
617 }
618 while (!childSuccess && childAttempts > 0) {
619 childSuccess = true;
620 Vector<DependencyNode> children = new Vector<DependencyNode>();
621 i = 0;
622 while ((child = node.getLeftDependent(i)) != null) {
623 children.add(child);
624 i++;
625 }
626 i = 0;
627 while ((child = node.getRightDependent(i)) != null) {
628 children.add(child);
629 i++;
630 }
631 for (i = 0; i < children.size(); i++) {
632 child = children.get(i);
633 if (!deprojectivizeWithPath(pdg, child)) {
634 childSuccess = false;
635 }
636 }
637 childAttempts--;
638 }
639 return childSuccess && success;
640 }
641
642 private DependencyNode breadthFirstSearchSortedByDistanceForPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid) {
643 DependencyNode dependent;
644 Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes;
645 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true));
646 while (nodes.size() > 0) {
647 dependent = nodes.remove(0);
648 if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0) {
649 return dependent;
650 }
651 nodes.addAll(newNodes);
652 }
653 return null;
654 }
655
656 private boolean deprojectivizeWithHeadAndPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException {
657 boolean success = true, childSuccess = false;
658 int i, childAttempts = 2;
659 DependencyNode child, possibleSyntacticHead;
660 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) {
661 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node
662 .getIndex()));
663 if (possibleSyntacticHead != null) {
664 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
665 // addEdge(pdg, possibleSyntacticHead, node, node.getHeadEdge().getLabelSet());
666 nodeLifted.set(node.getIndex(), false);
667 } else {
668 success = false;
669 }
670 }
671 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) {
672 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node
673 .getIndex()));
674 if (possibleSyntacticHead != null) {
675 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
676 // addEdge(pdg, possibleSyntacticHead, node, node.getHeadEdge().getLabelSet());
677 nodeLifted.set(node.getIndex(), false);
678 } else {
679 success = false;
680 }
681 }
682 while (!childSuccess && childAttempts > 0) {
683 childSuccess = true;
684 Vector<DependencyNode> children = new Vector<DependencyNode>();
685 i = 0;
686 while ((child = node.getLeftDependent(i)) != null) {
687 children.add(child);
688 i++;
689 }
690 i = 0;
691 while ((child = node.getRightDependent(i)) != null) {
692 children.add(child);
693 i++;
694 }
695 for (i = 0; i < children.size(); i++) {
696 child = children.get(i);
697 if (!deprojectivizeWithHeadAndPath(pdg, child)) {
698 childSuccess = false;
699 }
700 }
701 childAttempts--;
702 }
703 return childSuccess && success;
704 }
705
706 private DependencyNode breadthFirstSearchSortedByDistanceForHeadAndPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprelCode)
707 throws MaltChainedException {
708 DependencyNode dependent;
709 Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes = null, secondChance = new Vector<DependencyNode>();
710 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true));
711 while (nodes.size() > 0) {
712 dependent = nodes.remove(0);
713 if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0
714 && deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)) {
715 return dependent;
716 }
717 nodes.addAll(newNodes);
718 if (deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)
719 && newNodes.size() != 0) {
720 secondChance.add(dependent);
721 }
722 }
723 if (secondChance.size() > 0) {
724 return secondChance.firstElement();
725 }
726 return null;
727 }
728 }