1*b9e7c7d3SVladimir Kotal#!/bin/bash 2*b9e7c7d3SVladimir Kotal 3*b9e7c7d3SVladimir Kotal# CDDL HEADER START 4*b9e7c7d3SVladimir Kotal# 5*b9e7c7d3SVladimir Kotal# The contents of this file are subject to the terms of the 6*b9e7c7d3SVladimir Kotal# Common Development and Distribution License (the "License"). 7*b9e7c7d3SVladimir Kotal# You may not use this file except in compliance with the License. 8*b9e7c7d3SVladimir Kotal# 9*b9e7c7d3SVladimir Kotal# See LICENSE.txt included in this distribution for the specific 10*b9e7c7d3SVladimir Kotal# language governing permissions and limitations under the License. 11*b9e7c7d3SVladimir Kotal# 12*b9e7c7d3SVladimir Kotal# When distributing Covered Code, include this CDDL HEADER in each 13*b9e7c7d3SVladimir Kotal# file and include the License file at LICENSE.txt. 14*b9e7c7d3SVladimir Kotal# If applicable, add the following below this CDDL HEADER, with the 15*b9e7c7d3SVladimir Kotal# fields enclosed by brackets "[]" replaced with your own identifying 16*b9e7c7d3SVladimir Kotal# information: Portions Copyright [yyyy] [name of copyright owner] 17*b9e7c7d3SVladimir Kotal# 18*b9e7c7d3SVladimir Kotal# CDDL HEADER END 19*b9e7c7d3SVladimir Kotal 20*b9e7c7d3SVladimir Kotal# 21*b9e7c7d3SVladimir Kotal# Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. 22*b9e7c7d3SVladimir Kotal# 23*b9e7c7d3SVladimir Kotal 24*b9e7c7d3SVladimir Kotal# 25*b9e7c7d3SVladimir Kotal# A script that scans through the generated xrefs in an OpenGrok data 26*b9e7c7d3SVladimir Kotal# directory and checks if they contain valid XHTML 1.0 data. 27*b9e7c7d3SVladimir Kotal# 28*b9e7c7d3SVladimir Kotal# The script should be run with one argument, which should be the path 29*b9e7c7d3SVladimir Kotal# to the OpenGrok data directory (the parent of the xref directory). 30*b9e7c7d3SVladimir Kotal# If no problems are found, the script will be silent. Otherwise, it 31*b9e7c7d3SVladimir Kotal# will print which files are ill-formed and what the problems are. 32*b9e7c7d3SVladimir Kotal# 33*b9e7c7d3SVladimir Kotal# The script has only been tested on Debian systems with the 34*b9e7c7d3SVladimir Kotal# libxml2-utils and w3c-sgml-lib packages installed. 35*b9e7c7d3SVladimir Kotal# 36*b9e7c7d3SVladimir Kotal 37*b9e7c7d3SVladimir Kotalif [ $# -ne 1 ] || ! [ -d "$1"/xref ] ; then 38*b9e7c7d3SVladimir Kotal echo "Usage: `basename $0` opengrok-data-dir" 39*b9e7c7d3SVladimir Kotal exit 1 40*b9e7c7d3SVladimir Kotalfi 41*b9e7c7d3SVladimir Kotal 42*b9e7c7d3SVladimir Kotaldtd=/usr/share/xml/w3c-sgml-lib/schema/dtd/REC-xhtml1-20020801/xhtml1-strict.dtd 43*b9e7c7d3SVladimir Kotal 44*b9e7c7d3SVladimir Kotalif ! [ -f "$dtd" ]; then 45*b9e7c7d3SVladimir Kotal echo "Cannot find $dtd." 46*b9e7c7d3SVladimir Kotal echo "Please install w3c-sgml-lib." 47*b9e7c7d3SVladimir Kotal exit 1 48*b9e7c7d3SVladimir Kotalfi 49*b9e7c7d3SVladimir Kotal 50*b9e7c7d3SVladimir Kotalif ! [ -x "`which xmllint`" ]; then 51*b9e7c7d3SVladimir Kotal echo "Cannot find the xmllint executable." 52*b9e7c7d3SVladimir Kotal echo "Please install libxml2-utils." 53*b9e7c7d3SVladimir Kotal exit 1 54*b9e7c7d3SVladimir Kotalfi 55*b9e7c7d3SVladimir Kotal 56*b9e7c7d3SVladimir Kotalxml_begin() { 57*b9e7c7d3SVladimir Kotal cat <<EOF 58*b9e7c7d3SVladimir Kotal<?xml version="1.0" encoding="UTF-8"?> 59*b9e7c7d3SVladimir Kotal<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 60*b9e7c7d3SVladimir Kotal "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 61*b9e7c7d3SVladimir Kotal<html xmlns="http://www.w3.org/1999/xhtml"> 62*b9e7c7d3SVladimir Kotal<head><title></title></head> 63*b9e7c7d3SVladimir Kotal<body> 64*b9e7c7d3SVladimir Kotal<div><pre> 65*b9e7c7d3SVladimir KotalEOF 66*b9e7c7d3SVladimir Kotal} 67*b9e7c7d3SVladimir Kotal 68*b9e7c7d3SVladimir Kotalxml_end() { 69*b9e7c7d3SVladimir Kotal cat <<EOF 70*b9e7c7d3SVladimir Kotal</pre></div> 71*b9e7c7d3SVladimir Kotal</body> 72*b9e7c7d3SVladimir Kotal</html> 73*b9e7c7d3SVladimir KotalEOF 74*b9e7c7d3SVladimir Kotal} 75*b9e7c7d3SVladimir Kotal 76*b9e7c7d3SVladimir Kotaltmp="`mktemp`" 77*b9e7c7d3SVladimir Kotal 78*b9e7c7d3SVladimir Kotalshopt -s globstar 79*b9e7c7d3SVladimir Kotal 80*b9e7c7d3SVladimir Kotalfor i in "$1"/xref/**/*.gz 81*b9e7c7d3SVladimir Kotaldo 82*b9e7c7d3SVladimir Kotal if ! (xml_begin; zcat "$i"; xml_end) \ 83*b9e7c7d3SVladimir Kotal | xmllint --noout --valid --dtdvalid "$dtd" - > "$tmp" 2>&1 84*b9e7c7d3SVladimir Kotal then 85*b9e7c7d3SVladimir Kotal echo "*** ILL-FORMED DATA IN $i ***" 86*b9e7c7d3SVladimir Kotal echo 87*b9e7c7d3SVladimir Kotal cat "$tmp" 88*b9e7c7d3SVladimir Kotal echo 89*b9e7c7d3SVladimir Kotal fi 90*b9e7c7d3SVladimir Kotaldone 91*b9e7c7d3SVladimir Kotal 92*b9e7c7d3SVladimir Kotalrm -f "$tmp" 93