1#!/bin/bash 2 3# CDDL HEADER START 4# 5# The contents of this file are subject to the terms of the 6# Common Development and Distribution License (the "License"). 7# You may not use this file except in compliance with the License. 8# 9# See LICENSE.txt included in this distribution for the specific 10# language governing permissions and limitations under the License. 11# 12# When distributing Covered Code, include this CDDL HEADER in each 13# file and include the License file at LICENSE.txt. 14# If applicable, add the following below this CDDL HEADER, with the 15# fields enclosed by brackets "[]" replaced with your own identifying 16# information: Portions Copyright [yyyy] [name of copyright owner] 17# 18# CDDL HEADER END 19 20# 21# Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. 22# 23 24# 25# A script that scans through the generated xrefs in an OpenGrok data 26# directory and checks if they contain valid XHTML 1.0 data. 27# 28# The script should be run with one argument, which should be the path 29# to the OpenGrok data directory (the parent of the xref directory). 30# If no problems are found, the script will be silent. Otherwise, it 31# will print which files are ill-formed and what the problems are. 32# 33# The script has only been tested on Debian systems with the 34# libxml2-utils and w3c-sgml-lib packages installed. 35# 36 37if [ $# -ne 1 ] || ! [ -d "$1"/xref ] ; then 38 echo "Usage: `basename $0` opengrok-data-dir" 39 exit 1 40fi 41 42dtd=/usr/share/xml/w3c-sgml-lib/schema/dtd/REC-xhtml1-20020801/xhtml1-strict.dtd 43 44if ! [ -f "$dtd" ]; then 45 echo "Cannot find $dtd." 46 echo "Please install w3c-sgml-lib." 47 exit 1 48fi 49 50if ! [ -x "`which xmllint`" ]; then 51 echo "Cannot find the xmllint executable." 52 echo "Please install libxml2-utils." 53 exit 1 54fi 55 56xml_begin() { 57 cat <<EOF 58<?xml version="1.0" encoding="UTF-8"?> 59<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 60 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 61<html xmlns="http://www.w3.org/1999/xhtml"> 62<head><title></title></head> 63<body> 64<div><pre> 65EOF 66} 67 68xml_end() { 69 cat <<EOF 70</pre></div> 71</body> 72</html> 73EOF 74} 75 76tmp="`mktemp`" 77 78shopt -s globstar 79 80for i in "$1"/xref/**/*.gz 81do 82 if ! (xml_begin; zcat "$i"; xml_end) \ 83 | xmllint --noout --valid --dtdvalid "$dtd" - > "$tmp" 2>&1 84 then 85 echo "*** ILL-FORMED DATA IN $i ***" 86 echo 87 cat "$tmp" 88 echo 89 fi 90done 91 92rm -f "$tmp" 93