// $Id: DoctypePreserver.java,v 1.2 2004/11/15 01:52:03 nigelw Exp $
//  2004 Monsell EDM Ltd. All rights reserved.

import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.sax.SAXResult;
import javax.xml.transform.stream.StreamSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.xml.sax.XMLFilter;
import org.xml.sax.XMLReader;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.ext.LexicalHandler;
import javax.xml.transform.OutputKeys;
import java.io.IOException;

/**
 * This class demonstrates a technique for recording the
 * input DOCTYPE present in an input file and reinstating
 * it at the output stage of a processing pipeline.
 * The processing pipeline in question is an artificial
 * one consisting of 3 XSLT based filters.
 */
public class DoctypePreserver {

    /**
     * Applies a 3 stage pipeline to XHTML files.
     * args[0] is the input file
     * args[1] is the output file
     */
    public static void main(String[] args)
        throws TransformerException, TransformerConfigurationException,
        ParserConfigurationException, SAXException, IOException
    {
    
        SAXParserFactory spf= SAXParserFactory.newInstance();
        TransformerFactory tf= TransformerFactory.newInstance();
        if (!tf.getFeature(SAXSource.FEATURE) ||
            !tf.getFeature(SAXResult.FEATURE) ||
            !tf.getFeature(SAXTransformerFactory.FEATURE) ||
            !tf.getFeature(SAXTransformerFactory.FEATURE_XMLFILTER))
            throw new Error("SAXTransformerFactory not supported");
        SAXTransformerFactory stf= (SAXTransformerFactory) tf;
        
        XMLReader reader= spf.newSAXParser().getXMLReader();

        DoctypeReporter reporter= new DoctypeReporter();
        try { 
            reader.setProperty("http://xml.org/sax/properties/lexical-handler",
                               reporter);
        } catch (SAXNotRecognizedException snre) {
            throw new Error("Cannot associate lexical handler", snre);
        } catch (SAXNotSupportedException snse) {
            throw new Error("Cannot associate lexical handler", snse);
        }
        
        
        // A pre-parse is needed as well as the parse called as part
        // of the pipeline below, which is triggered by the transform()
        // method.  This is because the DoctypeReporter in the pipeline
        // is used as part of the transform(), but the setOutputProperty()
        // call is needed before the transform().
        reader.parse(new InputSource(args[0]));

        XMLFilter stage1= stf.newXMLFilter(new StreamSource("stage1.xsl"));
        Transformer stage2= stf.newTransformer(new StreamSource("stage2.xsl"));
        TransformerHandler stage3= stf.newTransformerHandler(new StreamSource("stage3.xsl"));
    
        stage1.setParent(reader);
        stage3.setResult(new StreamResult(args[1]));
        if (reporter.getPublicId() != null)
            stage3.getTransformer().setOutputProperty(OutputKeys.DOCTYPE_PUBLIC, reporter.getPublicId());
        if (reporter.getSystemId() != null) 
            stage3.getTransformer().setOutputProperty(OutputKeys.DOCTYPE_SYSTEM, reporter.getSystemId());
        stage2.transform(new SAXSource(stage1, new InputSource(args[0])),
                         new SAXResult(stage3));
    }
}


/**
 * Reports the publicId and systemId associated with any DOCTYPE
 * found while parsing.
 */
class DoctypeReporter implements LexicalHandler {
    String systemId= null;
    String publicId= null;
    
    // Implementation methods of the LexicalHandler interface, mostly
    // null methods in order to implement interface
    public void comment(char[] cg, int start, int length) { }
    public void endCDATA() { }
    public void endDTD() { }
    public void endEntity(String name) { }
    public void startCDATA() { } 
    public void startDTD(String name, String publicId, String systemId) {
        this.publicId= publicId;
        this.systemId= systemId;
    }
    public void startEntity(String name) { } 

    /**
     * Provides access to any systemId
     * @return The declared public identifier of the external DTD subset, or null if not was declared
     */
    public String getSystemId() {
        return systemId;
    }

    /**
     * Provides access to any publicId
     * @return The declared system identifier of the external DTD subset, or null if not was declared
     */
    public String getPublicId() {
        return publicId;
    }

}
