xref: /OpenGrok/dev/validate-xref (revision b9e7c7d3ba0a80391d83f3fefad9a334f37e4e03)
1#!/bin/bash
2
3# CDDL HEADER START
4#
5# The contents of this file are subject to the terms of the
6# Common Development and Distribution License (the "License").
7# You may not use this file except in compliance with the License.
8#
9# See LICENSE.txt included in this distribution for the specific
10# language governing permissions and limitations under the License.
11#
12# When distributing Covered Code, include this CDDL HEADER in each
13# file and include the License file at LICENSE.txt.
14# If applicable, add the following below this CDDL HEADER, with the
15# fields enclosed by brackets "[]" replaced with your own identifying
16# information: Portions Copyright [yyyy] [name of copyright owner]
17#
18# CDDL HEADER END
19
20#
21# Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
22#
23
24#
25# A script that scans through the generated xrefs in an OpenGrok data
26# directory and checks if they contain valid XHTML 1.0 data.
27#
28# The script should be run with one argument, which should be the path
29# to the OpenGrok data directory (the parent of the xref directory).
30# If no problems are found, the script will be silent. Otherwise, it
31# will print which files are ill-formed and what the problems are.
32#
33# The script has only been tested on Debian systems with the
34# libxml2-utils and w3c-sgml-lib packages installed.
35#
36
37if [ $# -ne 1 ] || ! [ -d "$1"/xref ] ; then
38    echo "Usage: `basename $0` opengrok-data-dir"
39    exit 1
40fi
41
42dtd=/usr/share/xml/w3c-sgml-lib/schema/dtd/REC-xhtml1-20020801/xhtml1-strict.dtd
43
44if ! [ -f "$dtd" ]; then
45    echo "Cannot find $dtd."
46    echo "Please install w3c-sgml-lib."
47    exit 1
48fi
49
50if ! [ -x "`which xmllint`" ]; then
51    echo "Cannot find the xmllint executable."
52    echo "Please install libxml2-utils."
53    exit 1
54fi
55
56xml_begin() {
57    cat <<EOF
58<?xml version="1.0" encoding="UTF-8"?>
59<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
60    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
61<html xmlns="http://www.w3.org/1999/xhtml">
62<head><title></title></head>
63<body>
64<div><pre>
65EOF
66}
67
68xml_end() {
69    cat <<EOF
70</pre></div>
71</body>
72</html>
73EOF
74}
75
76tmp="`mktemp`"
77
78shopt -s globstar
79
80for i in "$1"/xref/**/*.gz
81do
82    if ! (xml_begin; zcat "$i"; xml_end) \
83            | xmllint --noout --valid --dtdvalid "$dtd" - > "$tmp" 2>&1
84    then
85        echo "*** ILL-FORMED DATA IN $i ***"
86        echo
87        cat "$tmp"
88        echo
89    fi
90done
91
92rm -f "$tmp"
93