1
2
3
4
5
6
7
8
9
10
11
12
13 package org.astrogrid.xmlutils;
14
15
16 import org.apache.xerces.parsers.SAXParser;
17
18 import org.xml.sax.Attributes;
19 import org.xml.sax.InputSource;
20 import org.xml.sax.SAXException;
21 import org.xml.sax.SAXNotRecognizedException;
22 import org.xml.sax.SAXNotSupportedException;
23 import org.xml.sax.SAXParseException;
24
25 import org.xml.sax.helpers.DefaultHandler;
26
27 import java.io.IOException;
28 import java.io.Reader;
29
30 import java.util.Map;
31 import java.util.Iterator;
32
33 import org.astrogrid.xmlutils.XmlValidatorIfc;
34
35 import org.astrogrid.log.Log;
36
37 /***
38 * <p>Implementation class for syntactic and semantic validation of XML
39 * input documents, based on the Xerces Java XML parser.
40 * <p>See Validate.java for a command-line harness.
41 *
42 * <p> See XmlValidatorIfc documentation for usage example with a
43 * namespace map.
44 *
45 *
46 * <p>TO DO: <ul>
47 * <li>In schema-validation mode, the Xerces parser can produce pretty
48 * cryptic parse-error messages - is there an alternative parser we
49 * can use? XML4j seems not much better (v. similar to Xerces).
50 * <li>Nicer / more informative exception-throwing?
51 * </ul>
52 *
53 *
54 * @see org.astrogrid.xmlutils.XmlValidatorIfc
55 * @see org.astrogrid.xmlutils.Validate
56 * @see org.apache.xerces.parsers.SAXParser
57 * @see org.xml.sax.helpers.DefaultHandler
58 *
59 * @author Kona Andrews,
60 * <a href="mailto:kea@ast.cam.ac.uk">kea@ast.cam.ac.uk</a>
61 * @version 1.0
62 *
63 *
64 * (c) Copyright Astrogrid 2002; all rights reserved.
65 * See http://www.astrogrid.org/code_licence.html for terms of usage.
66 */
67 public class XmlValidatorXercesImpl extends DefaultHandler
68 {
69
70 /***
71 * Configuration flag: If true, deeply validate referenced schema,
72 * as well as referencing XML document.
73 * Defaults to false.
74 */
75 protected boolean validateSchema = false;
76
77 /***
78 * Configuration flag: If true, don't print parse warnings to stderr -
79 * silently pass over them.
80 * Defaults to false.
81 */
82 protected boolean suppressWarnings = false;
83
84 /***
85 * Configuration flag: If true, use a namespace-aware parser.
86 * Defaults to true.
87 */
88 protected boolean useNamespaces = true;
89
90 /***
91 * Internal flag: Validate using supplied namespace map.
92 */
93 private boolean useNamespaceMap = false;
94
95 /***
96 * Namespace map (namespaces -> schema locations) supplied by
97 * the user when validate() method is called. If present,
98 * validation of XML document will use schema at location
99 * specified for that document's namespace. If the namespace
100 * in the document is not found in the map, a validation error
101 * will be generated.
102 */
103 private Map currNamespaceMap;
104
105 /***
106 * Internal flag used for checking document namespace during SAX parsing.
107 */
108 private boolean isRoot;
109
110
111 /***
112 * Dummy constructor - does nothing.
113 */
114 public XmlValidatorXercesImpl()
115 {
116 }
117
118
119 /***
120 * Syntactically and semantically validates an input XML file
121 * against its schema(s), the location of which must be specified
122 * in the <tt>xsi:schemalocation</tt> attribute of the toplevel
123 * document element.
124 *
125 * @param reader A (pre-initialised) reader for the input XML.
126 *
127 * @throws org.xml.sax.SAXException if input document is invalid.
128 */
129 public void validate(Reader reader) throws SAXException, IOException
130 {
131 useNamespaceMap = false;
132 currNamespaceMap = null;
133 parse(new InputSource(reader));
134 }
135
136 /***
137 * Syntactically and semantically validates an input XML file
138 * against its schema(s), the location of which must be specified
139 * in the supplied Map matching namespaces to schema locations.
140 *
141 * @param reader A (pre-initialised) reader for the input XML.
142 * @param namespaceMap A Map type containing string:string pairs,
143 * the key being a namespace URI, the value being the location of
144 * the schema for that namespace.
145 *
146 * @throws org.xml.sax.SAXException if input document is invalid.
147 */
148 public void validate(Reader reader, Map namespaceMap) throws SAXException,IOException
149 {
150 useNamespaceMap = true;
151 currNamespaceMap = namespaceMap;
152 parse(new InputSource(reader));
153 }
154
155
156 /***
157 * Syntactically and semantically validates an input XML file
158 * against its schema(s), the location of which must be specified
159 * in the <tt>xsi:schemalocation</tt> attribute of the toplevel
160 * document element.
161 *
162 * @param systemResource The specified system resource from which to
163 * read the input XML (e.g. a local filename).
164 *
165 * @throws org.xml.sax.SAXException if input document is invalid.
166 */
167 public void validate(String systemResource) throws SAXException, IOException
168 {
169 useNamespaceMap = false;
170 currNamespaceMap = null;
171 parse(new InputSource(systemResource));
172 }
173
174 /***
175 * Syntactically and semantically validates an input XML file
176 * against its schema, the location of which must be specified
177 * in the supplied Map matching namespaces to schema locations.
178 *
179 * @param systemResource The specified system resource from which to
180 * read the input XML (e.g. a local filename).
181 *
182 * @param namespaceMap A Map type containing string:string pairs,
183 * the key being a namespace URI, the value being the location of
184 * the schema for that namespace.
185 *
186 * @throws org.xml.sax.SAXException if input document is invalid.
187 */
188 public void validate(String systemResource, Map namespaceMap)
189 throws SAXException, IOException
190 {
191 useNamespaceMap = true;
192 currNamespaceMap = namespaceMap;
193 parse(new InputSource(systemResource));
194 }
195
196
197 /***
198 * Allows named features to be switched on or off.
199 *
200 * @param feature A string identifying a feature by name.
201 * @param value The boolean value to which the named feature
202 * should be set.
203 *
204 * @throws org.xml.sax.SAXNotRecognizedException if the named
205 * feature is not recognised.
206 */
207 public void setFeature(String feature, boolean val)
208 throws org.xml.sax.SAXNotRecognizedException
209 {
210 if (feature.equals("ValidateSchema"))
211 {
212 validateSchema = val;
213 }
214 else if (feature.equals("SuppressWarnings"))
215 {
216 suppressWarnings = val;
217 }
218 else if (feature.equals("UseNamespaces"))
219 {
220 useNamespaces = val;
221 }
222 else
223 {
224 throw new SAXNotRecognizedException(
225 "Unrecognised validator feature " + feature);
226 }
227 }
228
229
230 /***
231 * Allows named features to be interrogated for their current value.
232 *
233 * @param feature A string identifying a feature by name.
234 *
235 * @return The current boolean value of the named feature.
236 *
237 * @throws org.xml.sax.SAXNotRecognizedException if the named
238 * feature is not recognised.
239 */
240 public boolean getFeature(String feature)
241 throws org.xml.sax.SAXNotRecognizedException
242 {
243 if (feature.equals("ValidateSchema"))
244 {
245 return validateSchema;
246 }
247 else if (feature.equals("SuppressWarnings"))
248 {
249 return suppressWarnings;
250 }
251 else if (feature.equals("UseNamespaces"))
252 {
253 return useNamespaces;
254 }
255 else
256 {
257 throw new SAXNotRecognizedException(
258 "Unrecognised validator feature " + feature);
259 }
260 }
261
262
263 /***
264 * Element-handling function supplied to parser.
265 * Only invoked when a namespace map is in use.
266 * Checks the root document element to ensure that the
267 * document namespace URI is a known namespace according
268 * to the current map.
269 *
270 * @param e An exception containing the warning.
271 *
272 * @throws org.xml.sax.SAXException when the document namespace is not
273 * found in the current namespace map.
274 */
275 public void startElement(
276 String namespaceURI,
277 String localName,
278 String qName,
279 Attributes atts)
280 throws SAXException
281 {
282
283
284 if (isRoot)
285 {
286 isRoot = false;
287 boolean goodNamespace = false;
288 Iterator iterator;
289 for (iterator = currNamespaceMap.entrySet().iterator();
290 iterator.hasNext(); )
291 {
292 Map.Entry me = (Map.Entry)iterator.next();
293 String namespace = (String)me.getKey();
294 if (namespaceURI.equals(namespace))
295 {
296 goodNamespace = true;
297 }
298 }
299 if (!goodNamespace)
300 {
301 throw new SAXException(
302 "\nThe document does not validate successfully: \n"
303 + "- Unrecognised document namespace " + namespaceURI);
304 }
305 }
306 }
307
308
309 /***
310 * Warning-handling function supplied to parser.
311 * Either prints warning to stderr or silently ignores exception,
312 * depending on value of suppressWarnings flag.
313 * Never actually raises an exception.
314 *
315 * @param e An exception containing the warning.
316 *
317 * @throws org.xml.sax.SAXException
318 */
319 public void warning(SAXParseException e) throws SAXException
320 {
321 if (!suppressWarnings)
322 {
323 org.astrogrid.log.Log.logWarning(null, "Parse warning", e);
324 }
325 }
326
327
328 /***
329 * Error-handling function supplied to parser.
330 * Always raises a SAXException describing the error.
331 *
332 * @param e An exception containing the error.
333 *
334 * @throws org.xml.sax.SAXException (always)
335 */
336 public void error(SAXParseException e) throws SAXException
337 {
338 throw new SAXException(
339 "Document contains invalid XML: \n"
340 + "SAXParseException: "+ e.getMessage());
341 }
342
343
344 /***
345 * Fatal error-handling function supplied to parser.
346 * Always raises a SAXException describing the fatal error.
347 *
348 * @param e An exception containing the fatal error.
349 *
350 * @throws org.xml.sax.SAXException (always)
351 */
352 public void fatalError(SAXParseException e) throws SAXException
353 {
354 throw new SAXException(
355 "Document contains an error that stops validation: \n"
356 + "SAXParseException: "+ e.getMessage());
357 }
358
359
360 /***
361 * Provides the actual functionality to syntactically and semantically
362 * validate an input XML file against its schema(s), using the
363 * Xerces Java XML parser.
364 *
365 * @param inputSource A (pre-initialised) SAX input source for the
366 * input XML.
367 *
368 * @throws org.xml.sax.SAXException if input document is invalid.
369 */
370 protected void parse(InputSource inputSource) throws SAXException, IOException
371 {
372 Log.trace("Parsing "+inputSource.getPublicId()+" ("+inputSource.getSystemId()+")...");
373
374
375
376 SAXParser parser = new SAXParser();
377 parser.setErrorHandler(this);
378
379
380
381
382 if (useNamespaceMap)
383 {
384 parser.setContentHandler(this);
385 }
386
387
388
389 try
390 {
391
392
393 parser.setFeature(
394 "http://xml.org/sax/features/validation", true);
395
396
397
398 parser.setFeature(
399 "http://apache.org/xml/features/validation/schema", true);
400
401 if (validateSchema)
402 {
403
404
405 parser.setFeature(
406 "http://apache.org/xml/features/validation/schema-full-checking",
407 true);
408 }
409
410
411
412 if (useNamespaces)
413 {
414 parser.setFeature("http://xml.org/sax/features/namespaces", true);
415
416
417
418 if (useNamespaceMap)
419 {
420 String canonicalSchemas = "";
421 Iterator iterator;
422 for (iterator = currNamespaceMap.entrySet().iterator();
423 iterator.hasNext(); )
424 {
425 Map.Entry me = (Map.Entry)iterator.next();
426 canonicalSchemas = canonicalSchemas
427 + (String)me.getKey() + " "
428 + (String)me.getValue() + " ";
429 }
430 parser.setProperty(
431 "http://apache.org/xml/properties/schema/external-schemaLocation",
432 canonicalSchemas);
433 }
434 }
435 else
436 {
437 parser.setFeature("http://xml.org/sax/features/namespaces", false);
438 }
439
440
441
442
443
444
445
446
447
448
449 }
450 catch (SAXNotRecognizedException e)
451 {
452 throw new SAXException(
453 "\nThis parser does not recognise a requested feature: \n"
454 + "SAXNotRecognizedException: "+ e.getMessage());
455 }
456 catch (SAXNotSupportedException e)
457 {
458 throw new SAXException(
459 "\nThis parser does not support a requested feature: \n"
460 + "SAXNotSupportedException: "+ e.getMessage());
461 }
462
463
464
465
466
467 isRoot = true;
468 parser.parse(inputSource);
469
470
471
472
473
474
475
476 }
477 }
478