// $Id: OrphanedWordOutfilter.java 4699 2008-10-16 14:19:53Z nigelw $
// (c) 2007 DeltaXML Ltd. All rights reserved.

package com.deltaxml.pipe.filters;

import java.util.Iterator;
import java.util.ArrayList;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
import com.deltaxml.pipe.XMLFilterImpl2;

/**
 * <p>Converts unchanged <code>deltaxml:word</code> elements into modified <code>deltaxml:word</code>
 * elements if they are surrounded by a large sequence of changed <code>deltaxml:word</code> elements.
 * </p>
 * <p><strong>Example</strong></p>
 * <p>The following delta (pretty printed) contains a single unchanged word, "common". Even though this
 * word has been correctly matched, in most cases the word is coincidentally the same (i.e. in a large change
 * the word happens to appear in the new and old versions).</p>
 * <pre>
 * &lt;root deltaxml:delta="WFmodify"&gt; 
 * &nbsp;&nbsp;&lt;deltaxml:word deltaxml:delta="WFmodify""&gt; 
 * &nbsp;&nbsp;&nbsp;&nbsp;&lt;deltaxml:PCDATAmodify"&gt; 
 * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&lt;deltaxml:PCDATAold"&gt;word1/deltaxml:PCDATAold"&gt; 
 * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&lt;deltaxml:PCDATAnew"&gt;word3/deltaxml:PCDATAnew"&gt; 
 * &nbsp;&nbsp;&nbsp;&nbsp;&lt;/deltaxml:PCDATAmodify"&gt; 
 * &nbsp;&nbsp;&lt;/deltaxml:word"&gt; 
 * &nbsp;&nbsp;&lt;deltaxml:space deltaxml:delta="unchanged""&gt; /deltaxml:space"&gt; 
 * &nbsp;&nbsp;&lt;deltaxml:word deltaxml:delta="WFmodify""&gt; 
 * &nbsp;&nbsp;&nbsp;&nbsp;&lt;deltaxml:PCDATAmodify"&gt; 
 * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&lt;deltaxml:PCDATAold"&gt;word2/deltaxml:PCDATAold"&gt; 
 * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&lt;deltaxml:PCDATAnew"&gt;word8/deltaxml:PCDATAnew"&gt; 
 * &nbsp;&nbsp;&nbsp;&nbsp;&lt;/deltaxml:PCDATAmodify"&gt; 
 * &nbsp;&nbsp;&lt;/deltaxml:word"&gt; 
 * &nbsp;&nbsp;&lt;deltaxml:space deltaxml:delta="unchanged""&gt; /deltaxml:space"&gt; 
 * &nbsp;&nbsp;&lt;deltaxml:word deltaxml:delta="unchanged""&gt;common-word/deltaxml:word"&gt; 
 * &nbsp;&nbsp;&lt;deltaxml:space deltaxml:delta="unchanged""&gt; /deltaxml:space"&gt; 
 * &nbsp;&nbsp;&lt;deltaxml:word deltaxml:delta="WFmodify""&gt; 
 * &nbsp;&nbsp;&nbsp;&nbsp;&lt;deltaxml:PCDATAmodify"&gt; 
 * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&lt;deltaxml:PCDATAold"&gt;word4/deltaxml:PCDATAold"&gt; 
 * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&lt;deltaxml:PCDATAnew"&gt;word6/deltaxml:PCDATAnew"&gt; 
 * &nbsp;&nbsp;&nbsp;&nbsp;&lt;/deltaxml:PCDATAmodify"&gt; 
 * &nbsp;&nbsp;&lt;/deltaxml:word"&gt; 
 * &nbsp;&nbsp;&lt;deltaxml:space deltaxml:delta="unchanged""&gt; /deltaxml:space"&gt; 
 * &nbsp;&nbsp;&lt;deltaxml:word deltaxml:delta="WFmodify""&gt; 
 * &nbsp;&nbsp;&nbsp;&nbsp;&lt;deltaxml:PCDATAmodify"&gt; 
 * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&lt;deltaxml:PCDATAold"&gt;word5/deltaxml:PCDATAold"&gt; 
 * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&lt;deltaxml:PCDATAnew"&gt;word7/deltaxml:PCDATAnew"&gt; 
 * &nbsp;&nbsp;&nbsp;&nbsp;&lt;/deltaxml:PCDATAmodify"&gt; 
 * &nbsp;&nbsp;&lt;/deltaxml:word"&gt; 
 * &lt;/root"&gt;
 * </pre>
 * <p>The filter identifies the orphaned word, and changes it into a modified word. So our common word
 * becomes:</p>
 * <pre>
 * &lt;deltaxml:word deltaxml:delta="WFmodify""&gt; 
 * &nbsp;&nbsp;&lt;deltaxml:PCDATAmodify"&gt; 
 * &nbsp;&nbsp;&nbsp;&nbsp;&lt;deltaxml:PCDATAold"&gt;common-word/deltaxml:PCDATAold"&gt; 
 * &nbsp;&nbsp;&nbsp;&nbsp;&lt;deltaxml:PCDATAnew"&gt;common-word/deltaxml:PCDATAnew"&gt; 
 * &nbsp;&nbsp;&lt;/deltaxml:PCDATAmodify"&gt; 
 * &lt;/deltaxml:word"&gt; 
 * </pre>
 * <p><strong>Properties</strong></p>
 * <p>The filter has two properties, as follows:</p>
 * <p><strong>orphanedLengthLimit</strong> sets a hard limit on the number of
 * unchanged words that would make up an "orphaned" section.</p>
 * <p><i>Default value is 2.</i></p>
 * <p><strong>orphanedThresholdPercentage</strong> sets the percentage threshold for calculating
 * whether a group is orphaned. If the equation:</p>
 * <p> <code>unchanged words / (changed words before + unchanged words + changed words after) * 100</code></p>
 * <p>is less than the value of orphanedThresholdPercentage then the orphaned words are incorporated into the changed words.</p>
 * <p><i>Default value is 20.</i></p>
 * @version $Id: OrphanedWordOutfilter.java 4699 2008-10-16 14:19:53Z nigelw $
 * @author Christopher Cormack
 * @see WordByWordInfilter
 *
 */
public final class OrphanedWordOutfilter extends XMLFilterImpl2
{
  // DeltaXML Name Space and Local Name values
  private static final String DELTAXML_NS= "http://www.deltaxml.com/ns/well-formed-delta-v1";
  private static final String DXML_PREFIX= "deltaxml";
  private static final String SPACE_LOCAL_NAME= "space";
  private static final String WORD_LOCAL_NAME= "word";
  private static final String PUNCTUATION_LOCAL_NAME= "punctuation";
  private static final String PCDATA_MODIFY_LOCAL= "PCDATAmodify";
  private static final String PCDATA_MODIFY= DXML_PREFIX + ":" + PCDATA_MODIFY_LOCAL;
  private static final String PCDATA_OLD_LOCAL= "PCDATAold";
  private static final String PCDATA_OLD= DXML_PREFIX + ":" + PCDATA_OLD_LOCAL;
  private static final String PCDATA_NEW_LOCAL= "PCDATAnew";
  private static final String PCDATA_NEW= DXML_PREFIX + ":" + PCDATA_NEW_LOCAL;
  private static final String DELTA_LOCAL_NAME= "delta";
  private static final String MODIFY= "WFmodify";
  private static final String ADD= "add";
  private static final String DELETE= "delete";
  
  // Default threshold values
  /**
   * the default value for orphanedLengthlimit, set at 2
   * @see #setorphanedLengthLimit(String)
   */
  public static final int DEFAULT_ORPHANED_WORD_LIMIT= 2;
  /**
   * the default value for orphanedThresholdPercentaeg, set at 20
   * @see #setorphanedThresholdPercentage(String)
   */
  public static final int DEFAULT_ORPHANED_THRESHOLD_PERCENTAGE= 20;

  // count words before possibly orphaned section
  private int preceedingOrphanedWordCount= 0;
  // count words in possibly orphaned section
  private int countOrphaned= 0;
  // count words following possibly orphaned section
  private int followingOrphanedWordCount= 0;
  
  // temporary storage of the current element - is output in outputCurrent()
  private String currentLocalName;
  private Attributes currentAttributes;
  private boolean currentModified;
  private boolean currentAdd;
  private boolean currentDelete;
  private String currentNewPCData;
  private String currentOldPCData;
  private boolean currentPCData;

  // list for storing possibly orphaned words/punctuations/spaces - output in outputBuffer()
  private ArrayList orphanedList= new ArrayList();
  // list for storing words/punctuations/spaces in a section following a possibly orphaned section - output in outputBuffer()
  private ArrayList pendingList= new ArrayList();
  
  // is the current element being output straight away
  private boolean outputCurrentElement= false;
  
  // model of the current element's state
  private boolean inWord= false;
  private boolean inSpace= false;
  private boolean inPunctuation= false;
  private boolean inPCDataModified= false;
  private boolean inPCDataNew= false;
  private boolean inPCDataOld= false;
  
  // was the previous word modified
  private boolean previousWordModified= false;
  // is the current section possibly orphaned (remains true when we're following the possibly orphaned section)
  private boolean possiblyOrphaned= false;
  // is the current section following a possibly orphaned section
  private boolean followingPossiblyOrphaned= false;
  
  // set the initial values for the threshold detection (protected to aid testing)
  protected int orphanedLengthLimit= DEFAULT_ORPHANED_WORD_LIMIT;
  protected int orphanedThresholdPercentage= DEFAULT_ORPHANED_THRESHOLD_PERCENTAGE;
  
  /**
   * Creates a new instance of <code>OrphanedWordOutfilter</code>.
   */
  public OrphanedWordOutfilter() {
    super();
  }

  /**
   * Overrides the default <code>startPrefixMapping</code> method.
   * @throws SAXException the superclass may throw an exception during processing.
   * @see XMLFilterImpl#startPrefixMapping(String, String)
   */
  public void startPrefixMapping(String prefix, String uri) throws SAXException {
    super.startPrefixMapping(prefix, uri);
  }

  /**
   * Overrides the default <code>startElement</code> method.
   * @throws SAXException the superclass may throw an exception during processing.
   * @see XMLFilterImpl#startElement(String, String, String, Attributes)
   */
  public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
    if (orphanedLengthLimit == 0) {
      super.startElement(uri, localName, qName, atts);
      return;
    }
    
    outputCurrentElement= false;
    if (uri.equals(DELTAXML_NS)) {
      if (localName.equals(WORD_LOCAL_NAME) || localName.equals(PUNCTUATION_LOCAL_NAME) || localName.equals(SPACE_LOCAL_NAME)) {
        currentLocalName= localName;
        currentAttributes= new AttributesImpl(atts);
        currentModified= false;
        currentNewPCData= "";
        currentOldPCData= "";
        currentPCData= false;
        if(atts.getValue(DELTAXML_NS, DELTA_LOCAL_NAME) != null) {
          currentModified= atts.getValue(DELTAXML_NS, DELTA_LOCAL_NAME).equals(MODIFY);
          currentAdd= atts.getValue(DELTAXML_NS, DELTA_LOCAL_NAME).equals(ADD);
          currentDelete= atts.getValue(DELTAXML_NS, DELTA_LOCAL_NAME).equals(DELETE);
        }
        inWord= localName.equals(WORD_LOCAL_NAME);
        inPunctuation= localName.equals(PUNCTUATION_LOCAL_NAME);
        inSpace= localName.equals(SPACE_LOCAL_NAME);
      } else if (localName.equals(PCDATA_MODIFY_LOCAL)) {
        inPCDataModified= true;
        currentPCData= currentModified;
      } else if (localName.equals(PCDATA_OLD_LOCAL) || localName.equals(PCDATA_NEW_LOCAL)) {
        inPCDataNew= localName.equals(PCDATA_NEW_LOCAL);
        inPCDataOld= localName.equals(PCDATA_OLD_LOCAL);
      } else /* conditions: exchange, exchange new and exchange old */ {
        outputCurrentElement= true;
      }
    } else /* conditions: non-deltaXML elements */ {
      outputCurrentElement= true;
    }
    
    if (outputCurrentElement) {
      // this outputs the buffers - this is needed to output the buffer when a possibly 
      // orphaned/following possibly orphaned section is ended by either a non-deltaxml element
      // or an exchange element (test 28)
      judgeOrphaned();
      super.startElement(uri, localName, qName, atts);
    }
  }

  /**
   * Overrides the default <code>endElement</code> method.
   * @throws SAXException the superclass may throw an exception during processing.
   * @see XMLFilterImpl#endElement(String, String, String)
   */
  public void endElement(String uri, String localName, String qName) throws SAXException {
    if (orphanedLengthLimit == 0) {
      super.endElement(uri, localName, qName);
      return;
    }
    
    outputCurrentElement= false;
    if (uri.equals(DELTAXML_NS)) {
      if (localName.equals(WORD_LOCAL_NAME) || localName.equals(PUNCTUATION_LOCAL_NAME) || localName.equals(SPACE_LOCAL_NAME)) {
        if (!possiblyOrphaned) {
          outputCurrent();
        }
        inWord= false;
        inSpace= false;
        inPunctuation= false;
      } else if (localName.equals(PCDATA_MODIFY_LOCAL)) {
        inPCDataModified= false;
      } else if (localName.equals(PCDATA_NEW_LOCAL) || localName.equals(PCDATA_OLD_LOCAL)) {
        inPCDataNew= false;
        inPCDataOld= false;
      } else /* conditions: exchange, exchange new and exchange old */ {
        outputCurrentElement= true;
      }
    } else /* conditions: non-deltaXML elements */ {
      outputCurrentElement= true;
    }
    if (outputCurrentElement) {
      // this outputs the buffers - this is needed to output the buffer when a possibly 
      // orphaned/following possibly orphaned section is ended by reaching the end of a
      // non-deltaxml element or an exchange element
      judgeOrphaned();
      super.endElement(uri, localName, qName);
    }
  }

  /**
   * Overrides the default <code>characters</code> method.
   * @throws SAXException the superclass may throw an exception during processing.
   * @see XMLFilterImpl#characters(char[], int, int)
   */
  public void characters(char [] ch, int start, int length) throws SAXException {
    if (orphanedLengthLimit == 0) {
      super.characters(ch, start, length);
      return;
    }
    
    if ((inWord || inPunctuation) && !(inPCDataModified || currentAdd || currentDelete) && previousWordModified && (!possiblyOrphaned || followingPossiblyOrphaned)) {
      // we have entered a possibly orphaned section
      judgeOrphaned();
      possiblyOrphaned= true;
      followingPossiblyOrphaned= false;
      previousWordModified= false;
    } else if ((inWord || inPunctuation) && (inPCDataModified || currentAdd || currentDelete) && possiblyOrphaned) {
      // we have entered a section following a possibly orphaned section
      followingPossiblyOrphaned= true;
      previousWordModified= true;
    }
    
    if (outputCurrentElement) {
      super.characters(ch, start, length);
    } else {
      if (!possiblyOrphaned) {
        notPossiblyOrphanedCharacters(ch, start, length);
      } else if (followingPossiblyOrphaned) {
        followingPossiblyOrphanedCharacters(ch, start, length);
      } else if (possiblyOrphaned) {
        possiblyOrphanedCharacters(ch, start, length);
      }
    }
  }
  
  /**
   * This method is called by the characters method when the current element is being stored
   * until reaching the end element for being output and it has not been deemed as possibly
   * orphaned or following possibly orphaned. It stores the new and old, or unchanged PCData. If
   * it is a changed word, then the previousWordModified flag is set.
   */
  private void notPossiblyOrphanedCharacters(char [] ch, int start, int length) {
    if (inPCDataOld) {
      if (inWord || inPunctuation) {
        followingOrphanedWordCount++;
        previousWordModified= true;
      }
      currentOldPCData+= new String(ch, start, length);
    } else if (inPCDataNew) {
      if (inWord || inPunctuation) {
        previousWordModified= true;
      }
      currentNewPCData+= new String(ch, start, length);
    } else if (currentDelete) {
      if (inWord || inPunctuation) {
        followingOrphanedWordCount++;
        previousWordModified= true;
      }
      currentOldPCData+= new String(ch, start, length);
    } else if (currentAdd) {
      if (inWord || inPunctuation) {
        followingOrphanedWordCount++;
        previousWordModified= true;
      }
      currentNewPCData+= new String(ch, start, length);
    } else /* condition: an unchanged section of PCData */ {
      if (inWord || inPunctuation){
        previousWordModified= false;
      }
      currentOldPCData+= new String(ch, start, length);
    }
  }
  
  /**
   * This method is called by the characters method when the current element is in a section
   * following a section that is possibly orphaned. If the character stream is for a PCDataOld
   * element a DeltaElement is created and added to the pending list. If it is for PCDataNew
   * element then the new PCData value is added to the previous DeltaElement in the pending list
   * (the old pc data value appears before the new in the delta file). If it is an unchanged
   * element, a DeltaElement is created with the unchanged PCData value.
   */
  private void followingPossiblyOrphanedCharacters(char [] ch, int start, int length) {
    if (inPCDataOld) {
      DeltaElement current= null;
      if (inWord) {
        followingOrphanedWordCount++;
        current= new DeltaWordElement(DeltaElement.MODIFY, currentAttributes);
      } else if (inSpace) {
        current= new DeltaSpaceElement(DeltaElement.MODIFY, currentAttributes);
      } else if (inPunctuation) {
        //followingOrphanedWordCount++;
        current= new DeltaPunctuationElement(DeltaElement.MODIFY, currentAttributes);
      }
      pendingList.add(current);
      current.setOldValue(new String(ch, start, length));
    } else if (inPCDataNew) {
      ((DeltaElement)pendingList.get(pendingList.size() - 1)).setNewValue(new String(ch, start, length));
    } else if (currentAdd) {
      DeltaElement current= null;
      if (inWord) {
        followingOrphanedWordCount++;
        current= new DeltaWordElement(DeltaElement.ADD, currentAttributes);
      } else if (inSpace) {
        current= new DeltaSpaceElement(DeltaElement.ADD, currentAttributes);
      } else if (inPunctuation) {
        //followingOrphanedWordCount++;
        current= new DeltaPunctuationElement(DeltaElement.ADD, currentAttributes);
      }
      pendingList.add(current);
      current.setNewValue(new String(ch, start, length));
    } else if (currentDelete) {
      DeltaElement current= null;
      if (inWord) {
        followingOrphanedWordCount++;
        current= new DeltaWordElement(DeltaElement.DELETE, currentAttributes);
      } else if (inSpace) {
        current= new DeltaSpaceElement(DeltaElement.DELETE, currentAttributes);
      } else if (inPunctuation) {
        //followingOrphanedWordCount++;
        current= new DeltaPunctuationElement(DeltaElement.DELETE, currentAttributes);
      }
      pendingList.add(current);
      current.setOldValue(new String(ch, start, length));
    } else /* condition: an unchanged punctuation or space inside a section following a possibly orphaned section */ {
      DeltaElement current= null;
      if (inSpace) {
        current= new DeltaSpaceElement(DeltaElement.UNCHANGED, currentAttributes);
      }/* else if (inPunctuation) {
        current= new DeltaPunctuationElement(DeltaElement.UNCHANGED, currentAttributes);
      }*/ /* inWord is not a possible branch as an unchanged word would have taken us away from this state */
      pendingList.add(current);
      current.setUnchangedValue(new String(ch, start, length));
    }
  }
  
  /**
   * This method is called by the characters method when the current element is in a section
   * that is possibly orphaned. For spaces and punctuation if the element that contains the
   * character stream is for a PCDataOld element then a DeltaElement is created and added to
   * the orphaned list. If it is a PCDataNew element then the previous DeltaElement from the
   * orphaned list is updated (the matching old PCData is added first due to the order within
   * the delta file. If it is unchanged PCData for words/punctuation/spaces then a DeltaElement
   * is created and added to the orphaned list.
   */
  private void possiblyOrphanedCharacters(char [] ch, int start, int length) {
    DeltaElement current= null;
    if (inWord) {
      // Unlike spaces (see else ifs below), a word can only be unchanged in this
      // situation as a changed word would have changed the filter's state to followingPossiblyOrphaned
      countOrphaned++;
      current= new DeltaWordElement(DeltaElement.UNCHANGED, currentAttributes);
      current.setUnchangedValue(new String(ch, start, length));
      orphanedList.add(current);
    } else if (inPunctuation) {
      countOrphaned++;
      current= new DeltaPunctuationElement(DeltaElement.UNCHANGED, currentAttributes);
      current.setUnchangedValue(new String(ch, start, length));
      orphanedList.add(current);
    } else if (inSpace) {
      if (inPCDataOld) {
        current= new DeltaSpaceElement(DeltaElement.MODIFY, currentAttributes);
        current.setOldValue(new String(ch, start, length));
        orphanedList.add(current);
      } else if (inPCDataNew) {
        ((DeltaElement)orphanedList.get(orphanedList.size() - 1)).setNewValue(new String(ch, start, length));
      } else if (currentAdd) {
        current= new DeltaSpaceElement(DeltaElement.ADD, currentAttributes);
        current.setNewValue(new String(ch, start, length));
        orphanedList.add(current);
      } else if (currentDelete) {
        current= new DeltaSpaceElement(DeltaElement.DELETE, currentAttributes);
        current.setOldValue(new String(ch, start, length));
        orphanedList.add(current);
      } else /* condition: unchanged space in a possibly orphaned section */ {
        current= new DeltaSpaceElement(DeltaElement.UNCHANGED, currentAttributes);
        current.setUnchangedValue(new String(ch, start, length));
        orphanedList.add(current);
      }
    }
  }
  
  private void outputCurrent () throws SAXException {
    super.startElement(DELTAXML_NS, currentLocalName, DXML_PREFIX + ":" + currentLocalName, currentAttributes);
    if (currentModified && currentPCData) {
      super.startElement(DELTAXML_NS, PCDATA_MODIFY_LOCAL, PCDATA_MODIFY, new AttributesImpl());
      super.startElement(DELTAXML_NS, PCDATA_OLD_LOCAL, PCDATA_OLD, new AttributesImpl());
      if(currentOldPCData.length() != 0) {
          super.characters(currentOldPCData.toCharArray(), 0, currentOldPCData.length());
      }
      super.endElement(DELTAXML_NS, PCDATA_OLD_LOCAL, PCDATA_OLD);
      super.startElement(DELTAXML_NS, PCDATA_NEW_LOCAL, PCDATA_NEW, new AttributesImpl());
      if(currentNewPCData.length() != 0) {
          super.characters(currentNewPCData.toCharArray(), 0, currentNewPCData.length());
      }
      super.endElement(DELTAXML_NS, PCDATA_NEW_LOCAL, PCDATA_NEW);
      super.endElement(DELTAXML_NS, PCDATA_MODIFY_LOCAL, PCDATA_MODIFY);
    } else if (currentAdd) {
      if(currentNewPCData.length() != 0) {
        super.characters(currentNewPCData.toCharArray(), 0, currentNewPCData.length());
      }
    } else if (currentDelete) {
      if(currentOldPCData.length() != 0) {
        super.characters(currentOldPCData.toCharArray(), 0, currentOldPCData.length());
      }
    } else {
      if(currentOldPCData.length() != 0) {
        super.characters(currentOldPCData.toCharArray(), 0, currentOldPCData.length());
      }
    }
    super.endElement(DELTAXML_NS, currentLocalName, DXML_PREFIX + ":" + currentLocalName);
  }

  /**
   * This method judges whether the current buffered data (i.e. orphanedList and pendingList) represents
   * an orphaned situation. If it is deemed as orphaned, then the elements in orphanedList are output with the
   * words flagged PCDataModified (using the same unchanged PCData as both the new and old PCData). If the
   * possibly orphaned words are not deemed orphaned, then they are outputted as they were initially encountered.
   * The pendingList is always outputted as the elements were initially encountered.
   */
  private void judgeOrphaned() throws SAXException {
    int totalCount= preceedingOrphanedWordCount + followingOrphanedWordCount + countOrphaned;
    int percentage= (int)((double)countOrphaned / totalCount * 100.0);

    boolean orphaned= (countOrphaned > 0 && preceedingOrphanedWordCount > 0 && followingOrphanedWordCount > 0 && percentage <= orphanedThresholdPercentage && countOrphaned <= orphanedLengthLimit);
    
    outputBuffer(orphaned);
    previousWordModified= false;
    possiblyOrphaned= false;
    followingPossiblyOrphaned= false;
    
    // shift the following word count to the preceding word count - this adds
    // support for multiple orphaned sections
    preceedingOrphanedWordCount= followingOrphanedWordCount;
    countOrphaned= 0;
    followingOrphanedWordCount= 0;

    pendingList= new ArrayList();
    orphanedList= new ArrayList();
  }
  
  /**
   * @see #judgeOrphaned()
   */
  private void outputBuffer(boolean orphaned) throws SAXException {
    // output orphaned
    Iterator orphanedIterator= orphanedList.iterator();
    while (orphanedIterator.hasNext()) {
      outputOrphaned((DeltaElement)orphanedIterator.next(), orphaned);
    }
    
    // output pending
    Iterator pendingIterator= pendingList.iterator();
    while (pendingIterator.hasNext()) {
      outputPending((DeltaElement)pendingIterator.next());
    }
  }

  /**
   * @see #judgeOrphaned()
   */
  private void outputPending(DeltaElement element) throws SAXException {
    AttributesImpl atts= element.getAttributes();
    String localName= element.getLocalName();
    if (element.getType() == DeltaElement.MODIFY) {
      atts.setValue(atts.getIndex(DELTAXML_NS, DELTA_LOCAL_NAME), MODIFY);
    } else if (element.getType() == DeltaElement.ADD) {
      atts.setValue(atts.getIndex(DELTAXML_NS, DELTA_LOCAL_NAME), ADD);
    } else if (element.getType() == DeltaElement.DELETE) {
      atts.setValue(atts.getIndex(DELTAXML_NS, DELTA_LOCAL_NAME), DELETE);
    }
    super.startElement(DELTAXML_NS, localName, DXML_PREFIX + ":" + localName, atts);
    if (element.getType() == DeltaElement.MODIFY) {
      String oldString= element.getOldValue();
      String newString= element.getNewValue();
      
      super.startElement(DELTAXML_NS, PCDATA_MODIFY_LOCAL, PCDATA_MODIFY, new AttributesImpl());
      super.startElement(DELTAXML_NS, PCDATA_OLD_LOCAL, PCDATA_OLD, new AttributesImpl());
      if(oldString != null && oldString.length() != 0) {
        super.characters(oldString.toCharArray(), 0, oldString.length());
      }
      
      super.endElement(DELTAXML_NS, PCDATA_OLD_LOCAL, PCDATA_OLD);
      super.startElement(DELTAXML_NS, PCDATA_NEW_LOCAL, PCDATA_NEW, new AttributesImpl());
      if(newString != null && newString.length() != 0) {
        super.characters(newString.toCharArray(), 0, newString.length());
      }
      super.endElement(DELTAXML_NS, PCDATA_NEW_LOCAL, PCDATA_NEW);
      super.endElement(DELTAXML_NS, PCDATA_MODIFY_LOCAL, PCDATA_MODIFY);
    } else if (element.getType() == DeltaElement.ADD) {
      String newString= element.getNewValue();
      super.characters(newString.toCharArray(), 0, newString.length());
    } else if (element.getType() == DeltaElement.DELETE) {
      String oldString= element.getOldValue();
      super.characters(oldString.toCharArray(), 0, oldString.length());
    } else {
      String unchangedString= element.getUnchangedValue();
      if(unchangedString.length() != 0) {
        super.characters(unchangedString.toCharArray(), 0, unchangedString.length());
      }
    }
    super.endElement(DELTAXML_NS, localName, DXML_PREFIX + ":" + localName);
  }

  /**
   * @see #judgeOrphaned()
   */
  private void outputOrphaned(DeltaElement element, boolean orphaned) throws SAXException {
    AttributesImpl atts= element.getAttributes();
    String localName= element.getLocalName();
    if ((element.getType() == DeltaElement.MODIFY) || ((localName.equals(WORD_LOCAL_NAME) || localName.equals(PUNCTUATION_LOCAL_NAME)) && orphaned)) {
      atts.setValue(atts.getIndex(DELTAXML_NS, DELTA_LOCAL_NAME), MODIFY);
    } else if (element.getType() == DeltaElement.ADD) {
      atts.setValue(atts.getIndex(DELTAXML_NS, DELTA_LOCAL_NAME), ADD);
    } else if (element.getType() == DeltaElement.DELETE) {
      atts.setValue(atts.getIndex(DELTAXML_NS, DELTA_LOCAL_NAME), DELETE);
    }
    super.startElement(DELTAXML_NS, localName, DXML_PREFIX + ":" + localName, atts);
    if (element.getType() == DeltaElement.MODIFY || ((localName.equals(WORD_LOCAL_NAME) || localName.equals(PUNCTUATION_LOCAL_NAME)) && orphaned)) {
      String oldString;
      String newString;
      if ((localName.equals(WORD_LOCAL_NAME) || localName.equals(PUNCTUATION_LOCAL_NAME)) && orphaned) {
        oldString= element.getUnchangedValue();
        newString= oldString;
      } else {
        oldString= element.getOldValue();
        newString= element.getNewValue();
      }
      
      super.startElement(DELTAXML_NS, PCDATA_MODIFY_LOCAL, PCDATA_MODIFY, new AttributesImpl());
      super.startElement(DELTAXML_NS, PCDATA_OLD_LOCAL, PCDATA_OLD, new AttributesImpl());
      if(oldString.length() != 0) {
        super.characters(oldString.toCharArray(), 0, oldString.length());
      }
      
      super.endElement(DELTAXML_NS, PCDATA_OLD_LOCAL, PCDATA_OLD);
      super.startElement(DELTAXML_NS, PCDATA_NEW_LOCAL, PCDATA_NEW, new AttributesImpl());
      if(newString.length() != 0) {
        super.characters(newString.toCharArray(), 0, newString.length());
      }
      super.endElement(DELTAXML_NS, PCDATA_NEW_LOCAL, PCDATA_NEW);
      super.endElement(DELTAXML_NS, PCDATA_MODIFY_LOCAL, PCDATA_MODIFY);
    } else if (element.getType() == DeltaElement.ADD) {
      String newString= element.getNewValue();
      super.characters(newString.toCharArray(), 0, newString.length());
    } else if (element.getType() == DeltaElement.DELETE) {
      String oldString= element.getOldValue();
      super.characters(oldString.toCharArray(), 0, oldString.length());
    } else {
      String unchangedString= element.getUnchangedValue();
      if(unchangedString.length() != 0) {
        super.characters(unchangedString.toCharArray(), 0, unchangedString.length());
      }
    }
    super.endElement(DELTAXML_NS, localName, DXML_PREFIX + ":" + localName);
  }

  /**
   * Sets the value for the maximum number of words that can be included in an orphaned
   * section.
   * 
   * @param orphanedLengthLimit the limit to set in maximum number of words
   * @throws SAXException is thrown when the supplied value is invalid
   */
  public void setorphanedLengthLimit(String orphanedLengthLimit) throws SAXException {
    try {
      int orphanedLengthLimitInt= Integer.parseInt(orphanedLengthLimit);
      if (orphanedLengthLimitInt >= 0) {
        this.orphanedLengthLimit= orphanedLengthLimitInt;
      } else {
        this.orphanedLengthLimit= DEFAULT_ORPHANED_WORD_LIMIT;
        throw new SAXException("Invalid value for orphanedLengthLimit (it can't be less than 0), using default of 2");
      }
    } catch (NumberFormatException nfe) {
      this.orphanedLengthLimit= DEFAULT_ORPHANED_WORD_LIMIT;
      SAXException se= new SAXException("Invalid value for orphanedLengthLimit, using default of 2", nfe);
      se.initCause(nfe);
      throw se;
    }
  }
  
  /**
   * Sets the percentage value for the calculating of orphaned sections. If the percentage value for a possibly
   * orphaned section is less than or equal to this value, then it is classified as orphaned (unless there are
   * more words than the orphanedLengthLimit allows). The percentage value for a possibly orphaned section is
   * calculated as follows:
   * <code>possibly_orphaned_words_count / preceding_changed_words_count + possibly_orphaned_words_count + following_changed_words_count * 100</code>
   * 
   * @param orphanedThresholdPercentage the percentage threshold that is used to decide whether words
   * orphaned between two changed sections are indeed orphaned.
   * @throws SAXException is thrown when the supplied value is invalid
   */
  public void setorphanedThresholdPercentage(String orphanedThresholdPercentage) throws SAXException {
    try {
      int orphanedThresholdPercentageInt= Integer.parseInt(orphanedThresholdPercentage);
      if (orphanedThresholdPercentageInt >= 0) {
        this.orphanedThresholdPercentage= orphanedThresholdPercentageInt;
      } else {
        this.orphanedThresholdPercentage= DEFAULT_ORPHANED_THRESHOLD_PERCENTAGE;
        throw new SAXException("Invalid value for orpanedLengthLimit (it can't be less than 0), using default of 2");
      }
    } catch (NumberFormatException nfe) {
      this.orphanedThresholdPercentage= DEFAULT_ORPHANED_THRESHOLD_PERCENTAGE;
      throw new SAXException("Invalid value for orphanedThresholdPercentage, using default of 20%", nfe);
    }
  }
  
  private abstract class DeltaElement {
    public final static byte UNCHANGED= 0;
    public final static byte MODIFY= 1;
    public final static byte ADD= 2;
    public final static byte DELETE= 3;
    
    private byte type;
    private String oldValue;
    private String newValue;
    private String unchangedValue;
    private AttributesImpl attributes;

    public DeltaElement (byte type, Attributes attributes) {
      this.type= type;
      this.attributes= new AttributesImpl(attributes);
    }

    public byte getType() {
      return type;
    }

    public void setType(byte type) {
      this.type= type;
    }

    public String getOldValue() {
      return oldValue;
    }

    public void setOldValue(String oldValue) {
      this.oldValue= oldValue;
    }

    public String getNewValue() {
      return newValue;
    }

    public void setNewValue(String newValue) {
      this.newValue= newValue;
    }

    public AttributesImpl getAttributes() {
      return attributes;
    }

    public void setAttributes(AttributesImpl attributes) {
      this.attributes= attributes;
    }

    public String getUnchangedValue() {
      return unchangedValue;
    }

    public void setUnchangedValue(String unchangedValue) {
      this.unchangedValue= unchangedValue;
    }
    
    public abstract String getLocalName();
  }
  
  private class DeltaWordElement extends DeltaElement {
    public DeltaWordElement(byte type, Attributes attributes) {
      super(type, attributes);
    }
    
    public String getLocalName() {
      return WORD_LOCAL_NAME;
    }
  }
  
  private class DeltaSpaceElement extends DeltaElement {
    public DeltaSpaceElement(byte type, Attributes attributes) {
      super(type, attributes);
    }
    
    public String getLocalName() {
      return SPACE_LOCAL_NAME;
    }
  }
  
  private class DeltaPunctuationElement extends DeltaElement {
    public DeltaPunctuationElement(byte type, Attributes attributes) {
      super(type, attributes);
    }
    
    public String getLocalName() {
      return PUNCTUATION_LOCAL_NAME;
    }
  }
}
