xref: /OpenGrok/dev/validate-xref (revision b9e7c7d3ba0a80391d83f3fefad9a334f37e4e03)
1*b9e7c7d3SVladimir Kotal#!/bin/bash
2*b9e7c7d3SVladimir Kotal
3*b9e7c7d3SVladimir Kotal# CDDL HEADER START
4*b9e7c7d3SVladimir Kotal#
5*b9e7c7d3SVladimir Kotal# The contents of this file are subject to the terms of the
6*b9e7c7d3SVladimir Kotal# Common Development and Distribution License (the "License").
7*b9e7c7d3SVladimir Kotal# You may not use this file except in compliance with the License.
8*b9e7c7d3SVladimir Kotal#
9*b9e7c7d3SVladimir Kotal# See LICENSE.txt included in this distribution for the specific
10*b9e7c7d3SVladimir Kotal# language governing permissions and limitations under the License.
11*b9e7c7d3SVladimir Kotal#
12*b9e7c7d3SVladimir Kotal# When distributing Covered Code, include this CDDL HEADER in each
13*b9e7c7d3SVladimir Kotal# file and include the License file at LICENSE.txt.
14*b9e7c7d3SVladimir Kotal# If applicable, add the following below this CDDL HEADER, with the
15*b9e7c7d3SVladimir Kotal# fields enclosed by brackets "[]" replaced with your own identifying
16*b9e7c7d3SVladimir Kotal# information: Portions Copyright [yyyy] [name of copyright owner]
17*b9e7c7d3SVladimir Kotal#
18*b9e7c7d3SVladimir Kotal# CDDL HEADER END
19*b9e7c7d3SVladimir Kotal
20*b9e7c7d3SVladimir Kotal#
21*b9e7c7d3SVladimir Kotal# Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
22*b9e7c7d3SVladimir Kotal#
23*b9e7c7d3SVladimir Kotal
24*b9e7c7d3SVladimir Kotal#
25*b9e7c7d3SVladimir Kotal# A script that scans through the generated xrefs in an OpenGrok data
26*b9e7c7d3SVladimir Kotal# directory and checks if they contain valid XHTML 1.0 data.
27*b9e7c7d3SVladimir Kotal#
28*b9e7c7d3SVladimir Kotal# The script should be run with one argument, which should be the path
29*b9e7c7d3SVladimir Kotal# to the OpenGrok data directory (the parent of the xref directory).
30*b9e7c7d3SVladimir Kotal# If no problems are found, the script will be silent. Otherwise, it
31*b9e7c7d3SVladimir Kotal# will print which files are ill-formed and what the problems are.
32*b9e7c7d3SVladimir Kotal#
33*b9e7c7d3SVladimir Kotal# The script has only been tested on Debian systems with the
34*b9e7c7d3SVladimir Kotal# libxml2-utils and w3c-sgml-lib packages installed.
35*b9e7c7d3SVladimir Kotal#
36*b9e7c7d3SVladimir Kotal
37*b9e7c7d3SVladimir Kotalif [ $# -ne 1 ] || ! [ -d "$1"/xref ] ; then
38*b9e7c7d3SVladimir Kotal    echo "Usage: `basename $0` opengrok-data-dir"
39*b9e7c7d3SVladimir Kotal    exit 1
40*b9e7c7d3SVladimir Kotalfi
41*b9e7c7d3SVladimir Kotal
42*b9e7c7d3SVladimir Kotaldtd=/usr/share/xml/w3c-sgml-lib/schema/dtd/REC-xhtml1-20020801/xhtml1-strict.dtd
43*b9e7c7d3SVladimir Kotal
44*b9e7c7d3SVladimir Kotalif ! [ -f "$dtd" ]; then
45*b9e7c7d3SVladimir Kotal    echo "Cannot find $dtd."
46*b9e7c7d3SVladimir Kotal    echo "Please install w3c-sgml-lib."
47*b9e7c7d3SVladimir Kotal    exit 1
48*b9e7c7d3SVladimir Kotalfi
49*b9e7c7d3SVladimir Kotal
50*b9e7c7d3SVladimir Kotalif ! [ -x "`which xmllint`" ]; then
51*b9e7c7d3SVladimir Kotal    echo "Cannot find the xmllint executable."
52*b9e7c7d3SVladimir Kotal    echo "Please install libxml2-utils."
53*b9e7c7d3SVladimir Kotal    exit 1
54*b9e7c7d3SVladimir Kotalfi
55*b9e7c7d3SVladimir Kotal
56*b9e7c7d3SVladimir Kotalxml_begin() {
57*b9e7c7d3SVladimir Kotal    cat <<EOF
58*b9e7c7d3SVladimir Kotal<?xml version="1.0" encoding="UTF-8"?>
59*b9e7c7d3SVladimir Kotal<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
60*b9e7c7d3SVladimir Kotal    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
61*b9e7c7d3SVladimir Kotal<html xmlns="http://www.w3.org/1999/xhtml">
62*b9e7c7d3SVladimir Kotal<head><title></title></head>
63*b9e7c7d3SVladimir Kotal<body>
64*b9e7c7d3SVladimir Kotal<div><pre>
65*b9e7c7d3SVladimir KotalEOF
66*b9e7c7d3SVladimir Kotal}
67*b9e7c7d3SVladimir Kotal
68*b9e7c7d3SVladimir Kotalxml_end() {
69*b9e7c7d3SVladimir Kotal    cat <<EOF
70*b9e7c7d3SVladimir Kotal</pre></div>
71*b9e7c7d3SVladimir Kotal</body>
72*b9e7c7d3SVladimir Kotal</html>
73*b9e7c7d3SVladimir KotalEOF
74*b9e7c7d3SVladimir Kotal}
75*b9e7c7d3SVladimir Kotal
76*b9e7c7d3SVladimir Kotaltmp="`mktemp`"
77*b9e7c7d3SVladimir Kotal
78*b9e7c7d3SVladimir Kotalshopt -s globstar
79*b9e7c7d3SVladimir Kotal
80*b9e7c7d3SVladimir Kotalfor i in "$1"/xref/**/*.gz
81*b9e7c7d3SVladimir Kotaldo
82*b9e7c7d3SVladimir Kotal    if ! (xml_begin; zcat "$i"; xml_end) \
83*b9e7c7d3SVladimir Kotal            | xmllint --noout --valid --dtdvalid "$dtd" - > "$tmp" 2>&1
84*b9e7c7d3SVladimir Kotal    then
85*b9e7c7d3SVladimir Kotal        echo "*** ILL-FORMED DATA IN $i ***"
86*b9e7c7d3SVladimir Kotal        echo
87*b9e7c7d3SVladimir Kotal        cat "$tmp"
88*b9e7c7d3SVladimir Kotal        echo
89*b9e7c7d3SVladimir Kotal    fi
90*b9e7c7d3SVladimir Kotaldone
91*b9e7c7d3SVladimir Kotal
92*b9e7c7d3SVladimir Kotalrm -f "$tmp"
93