View Javadoc

1   //-------------------------------------------------------------------------
2   // FILE: XmlValidatorXercesImpl.java
3   // PACKAGE: org.astrogrid.xmlutils
4   //
5   // DATE       AUTHOR    NOTES
6   // ----       ------    -----
7   // 16/10/02   KEA       Initial prototype
8   // 02/12/02   KEA       Update for supporting feature-setting, and
9   //   reporting of Schema validation errors (rather than just well-
10  //   formedness errors) using Exceptions.
11  //-------------------------------------------------------------------------
12  
13  package org.astrogrid.xmlutils;
14  
15  // FOR XERCES PARSER
16  import org.apache.xerces.parsers.SAXParser;
17  
18  import org.xml.sax.Attributes;
19  import org.xml.sax.InputSource;
20  import org.xml.sax.SAXException;
21  import org.xml.sax.SAXNotRecognizedException;
22  import org.xml.sax.SAXNotSupportedException;
23  import org.xml.sax.SAXParseException;
24  
25  import org.xml.sax.helpers.DefaultHandler;
26  
27  import java.io.IOException;
28  import java.io.Reader;
29  
30  import java.util.Map;
31  import java.util.Iterator;
32  
33  import org.astrogrid.xmlutils.XmlValidatorIfc;
34  
35  import org.astrogrid.log.Log;
36  
37  /***
38   * <p>Implementation class for syntactic and semantic validation of XML
39   * input documents, based on the Xerces Java XML parser.
40   * <p>See Validate.java for a command-line harness.
41   *
42   * <p> See XmlValidatorIfc documentation for usage example with a
43   * namespace map.
44   *
45   *
46   * <p>TO DO:  <ul>
47   * <li>In schema-validation mode, the Xerces parser can produce pretty
48   * cryptic parse-error messages - is there an alternative parser we
49   * can use?  XML4j seems not much better (v. similar to Xerces).
50   * <li>Nicer / more informative exception-throwing?
51   * </ul>
52   *
53   *
54   * @see org.astrogrid.xmlutils.XmlValidatorIfc
55   * @see org.astrogrid.xmlutils.Validate
56   * @see org.apache.xerces.parsers.SAXParser
57   * @see org.xml.sax.helpers.DefaultHandler
58   *
59   * @author Kona Andrews,
60   * <a href="mailto:kea@ast.cam.ac.uk">kea@ast.cam.ac.uk</a>
61   * @version 1.0
62   *
63   *
64   * (c) Copyright Astrogrid 2002; all rights reserved.
65   * See http://www.astrogrid.org/code_licence.html for terms of usage.
66   */
67  public class XmlValidatorXercesImpl extends DefaultHandler
68  {
69  
70     /***
71      * Configuration flag: If true, deeply validate referenced schema,
72      * as well as referencing XML document.
73      * Defaults to false.
74      */
75     protected boolean validateSchema = false;
76  
77     /***
78      * Configuration flag: If true, don't print parse warnings to stderr -
79      * silently pass over them.
80      * Defaults to false.
81      */
82     protected boolean suppressWarnings = false;
83  
84     /***
85      * Configuration flag: If true, use a namespace-aware parser.
86      * Defaults to true.
87      */
88     protected boolean useNamespaces = true;
89  
90     /***
91      * Internal flag: Validate using supplied namespace map.
92      */
93     private boolean useNamespaceMap = false;
94  
95     /***
96      * Namespace map (namespaces -> schema locations) supplied by
97      * the user when validate() method is called.  If present,
98      * validation of XML document will use schema at location
99      * specified for that document's namespace.  If the namespace
100     * in the document is not found in the map, a validation error
101     * will be generated.
102     */
103    private Map currNamespaceMap;
104 
105    /***
106     * Internal flag used for checking document namespace during SAX parsing.
107     */
108    private boolean isRoot;
109 
110 
111    /***
112     * Dummy constructor - does nothing.
113     */
114    public XmlValidatorXercesImpl()
115    {
116    }
117 
118 
119    /***
120     * Syntactically and semantically validates an input XML file
121     * against its schema(s), the location of which must be specified
122     * in the <tt>xsi:schemalocation</tt> attribute of the toplevel
123     * document element.
124     *
125     * @param reader  A (pre-initialised) reader for the input XML.
126     *
127     * @throws org.xml.sax.SAXException if input document is invalid.
128     */
129    public void validate(Reader reader) throws SAXException, IOException
130    {
131       useNamespaceMap = false;
132       currNamespaceMap = null;
133       parse(new InputSource(reader));
134    }
135 
136    /***
137     * Syntactically and semantically validates an input XML file
138     * against its schema(s), the location of which must be specified
139     * in the supplied Map matching namespaces to schema locations.
140     *
141     * @param reader  A (pre-initialised) reader for the input XML.
142     * @param namespaceMap  A Map type containing string:string pairs,
143     * the key being a namespace URI, the value being the location of
144     * the schema for that namespace.
145     *
146     * @throws org.xml.sax.SAXException if input document is invalid.
147     */
148    public void validate(Reader reader, Map namespaceMap) throws SAXException,IOException
149    {
150       useNamespaceMap = true;
151       currNamespaceMap = namespaceMap;
152       parse(new InputSource(reader));
153    }
154 
155 
156    /***
157     * Syntactically and semantically validates an input XML file
158     * against its schema(s), the location of which must be specified
159     * in the <tt>xsi:schemalocation</tt> attribute of the toplevel
160     * document element.
161     *
162     * @param systemResource  The specified system resource from which to
163     * read the input XML (e.g. a local filename).
164     *
165     * @throws org.xml.sax.SAXException if input document is invalid.
166     */
167    public void validate(String systemResource) throws SAXException, IOException
168    {
169       useNamespaceMap = false;
170       currNamespaceMap = null;
171       parse(new InputSource(systemResource));
172    }
173 
174    /***
175     * Syntactically and semantically validates an input XML file
176     * against its schema, the location of which must be specified
177     * in the supplied Map matching namespaces to schema locations.
178     *
179     * @param systemResource  The specified system resource from which to
180     * read the input XML (e.g. a local filename).
181     *
182     * @param namespaceMap  A Map type containing string:string pairs,
183     * the key being a namespace URI, the value being the location of
184     * the schema for that namespace.
185     *
186     * @throws org.xml.sax.SAXException if input document is invalid.
187     */
188    public void validate(String systemResource, Map namespaceMap)
189          throws SAXException, IOException
190    {
191       useNamespaceMap = true;
192       currNamespaceMap = namespaceMap;
193       parse(new InputSource(systemResource));
194    }
195 
196 
197    /***
198     * Allows named features to be switched on or off.
199     *
200     * @param feature  A string identifying a feature by name.
201     * @param value  The boolean value to which the named feature
202     * should be set.
203     *
204     * @throws  org.xml.sax.SAXNotRecognizedException if the named
205     * feature is not recognised.
206     */
207    public void setFeature(String feature, boolean val)
208          throws org.xml.sax.SAXNotRecognizedException
209    {
210       if (feature.equals("ValidateSchema"))
211       {
212          validateSchema = val;
213       }
214       else if (feature.equals("SuppressWarnings"))
215       {
216          suppressWarnings = val;
217       }
218       else if (feature.equals("UseNamespaces"))
219       {
220          useNamespaces = val;
221       }
222       else
223       {
224          throw new SAXNotRecognizedException(
225                "Unrecognised validator feature " + feature);
226       }
227    }
228 
229 
230    /***
231     * Allows named features to be interrogated for their current value.
232     *
233     * @param feature  A string identifying a feature by name.
234     *
235     * @return  The current boolean value of the named feature.
236     *
237     * @throws  org.xml.sax.SAXNotRecognizedException if the named
238     * feature is not recognised.
239     */
240    public boolean getFeature(String feature)
241          throws org.xml.sax.SAXNotRecognizedException
242    {
243       if (feature.equals("ValidateSchema"))
244       {
245          return validateSchema;
246       }
247       else if (feature.equals("SuppressWarnings"))
248       {
249          return suppressWarnings;
250       }
251       else if (feature.equals("UseNamespaces"))
252       {
253          return useNamespaces;
254       }
255       else
256       {
257          throw new SAXNotRecognizedException(
258                "Unrecognised validator feature " + feature);
259       }
260    }
261 
262 
263    /***
264     * Element-handling function supplied to parser.
265     * Only invoked when a namespace map is in use.
266     * Checks the root document element to ensure that the
267     * document namespace URI is a known namespace according
268     * to the current map.
269     *
270     * @param e   An exception containing the warning.
271     *
272     * @throws  org.xml.sax.SAXException when the document namespace is not
273     *   found in the current namespace map.
274     */
275    public void startElement(
276                String namespaceURI,
277                String localName,
278                String qName,
279                Attributes atts)
280                throws SAXException
281    {
282       // Only need to check root element - want document namespace
283       //
284       if (isRoot)
285       {
286          isRoot = false;   // No more checks after this one
287          boolean goodNamespace = false;
288          Iterator iterator;
289          for (iterator = currNamespaceMap.entrySet().iterator();
290                iterator.hasNext(); )
291          {
292             Map.Entry me = (Map.Entry)iterator.next();
293             String namespace = (String)me.getKey();
294             if (namespaceURI.equals(namespace))
295             {
296                goodNamespace = true;
297             }
298          }
299          if (!goodNamespace)
300          {
301             throw new SAXException(
302             "\nThe document does not validate successfully: \n"
303             + "- Unrecognised document namespace " + namespaceURI);
304          }
305       }
306    }
307 
308 
309    /***
310     * Warning-handling function supplied to parser.
311     * Either prints warning to stderr or silently ignores exception,
312     * depending on value of suppressWarnings flag.
313     * Never actually raises an exception.
314     *
315     * @param e   An exception containing the warning.
316     *
317     * @throws  org.xml.sax.SAXException
318     */
319    public void warning(SAXParseException e) throws SAXException
320    {
321       if (!suppressWarnings)
322       {
323          org.astrogrid.log.Log.logWarning(null, "Parse warning", e);
324       }
325    }
326 
327 
328    /***
329     * Error-handling function supplied to parser.
330     * Always raises a SAXException describing the error.
331     *
332     * @param e   An exception containing the error.
333     *
334     * @throws  org.xml.sax.SAXException (always)
335     */
336    public void error(SAXParseException e) throws SAXException
337    {
338       throw new SAXException(
339          "Document contains invalid XML: \n"
340                + "SAXParseException: "+ e.getMessage());
341    }
342 
343 
344    /***
345     * Fatal error-handling function supplied to parser.
346     * Always raises a SAXException describing the fatal error.
347     *
348     * @param e   An exception containing the fatal error.
349     *
350     * @throws  org.xml.sax.SAXException (always)
351     */
352    public void fatalError(SAXParseException e) throws SAXException
353    {
354       throw new SAXException(
355          "Document contains an error that stops validation: \n"
356                + "SAXParseException: "+ e.getMessage());
357    }
358 
359 
360    /***
361     * Provides the actual functionality to syntactically and semantically
362     * validate an input XML file against its schema(s), using the
363     * Xerces Java XML parser.
364     *
365     * @param inputSource A (pre-initialised) SAX input source for the
366     *    input XML.
367     *
368     * @throws org.xml.sax.SAXException if input document is invalid.
369     */
370    protected void parse(InputSource inputSource) throws SAXException, IOException
371    {
372       Log.trace("Parsing "+inputSource.getPublicId()+" ("+inputSource.getSystemId()+")...");
373 
374       // Create a Xerces SAX Parser
375       //
376       SAXParser parser = new SAXParser();
377       parser.setErrorHandler(this);
378 
379       // Do we need to validate the document namespace?
380       // If so, we need our custom element handler
381       //
382       if (useNamespaceMap)
383       {
384          parser.setContentHandler(this);
385       }
386 
387       // Turn requested features on
388       //
389       try
390       {
391          // Switch validation on
392          //
393          parser.setFeature(
394             "http://xml.org/sax/features/validation", true);
395 
396          // Support full Schema validation (not just DTD)
397          //
398          parser.setFeature(
399             "http://apache.org/xml/features/validation/schema", true);
400 
401          if (validateSchema)
402          {
403             // Validate the schema itself in detail
404             //
405             parser.setFeature(
406                "http://apache.org/xml/features/validation/schema-full-checking",
407                true);
408          }
409 
410          // Switch on namespaces if required
411          //
412          if (useNamespaces)
413          {
414             parser.setFeature("http://xml.org/sax/features/namespaces", true);
415 
416             // Use known namespace and schema if this has been requested
417             //
418             if (useNamespaceMap)
419             {
420                String canonicalSchemas = "";
421                Iterator iterator;
422                for (iterator = currNamespaceMap.entrySet().iterator();
423                      iterator.hasNext(); )
424                {
425                   Map.Entry me = (Map.Entry)iterator.next();
426                   canonicalSchemas = canonicalSchemas
427                         + (String)me.getKey() + " "
428                         + (String)me.getValue() + " ";
429                }
430                parser.setProperty(
431                      "http://apache.org/xml/properties/schema/external-schemaLocation",
432                      canonicalSchemas);
433             }
434          }
435          else
436          {
437             parser.setFeature("http://xml.org/sax/features/namespaces", false);
438          }
439 
440          // This one only reports schema errors if a schema is given
441          // (if "false" (default), if no schema is referenced in the XML
442          // document then an error is raised.)  Do we want this?
443          //
444          /*
445          parser.setFeature(
446                "http://apache.org/xml/features/validation/dynamic", true);
447          */
448 
449       }
450       catch (SAXNotRecognizedException e)
451       {
452          throw new SAXException(
453                "\nThis parser does not recognise a requested feature: \n"
454                   + "SAXNotRecognizedException: "+ e.getMessage());
455       }
456       catch (SAXNotSupportedException e)
457       {
458          throw new SAXException(
459             "\nThis parser does not support a requested feature: \n"
460                   + "SAXNotSupportedException: "+ e.getMessage());
461       }
462 
463       // Parse the document (thereby validating it)
464       //
465 //    try let exceptions propogate up
466 //    {
467       isRoot = true;
468       parser.parse(inputSource);
469 //    }
470 //    catch (IOException e)
471 //    {
472 //       throw new SAXException(
473 //          "\nThe document does not validate successfully: \n"
474 //                + "IOException: "+ e.getMessage());
475 //    }
476    }
477 }
478 //-------------------------------------------------------------------------