1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20/* 21 * Copyright (c) 2017, Chris Fraire <cfraire@me.com>. 22 */ 23 24/* 25 * From RFC-3986. See 26 * org.opengrok.indexer.util.StringUtils URI_CHARS_PAT where a regex 27 * in accordance with the following definition is maintained. 28 * 29 * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] 30 */ 31BrowseableURI = {BrowseableURI_scheme} {URI_tail} 32URI_tail = ":" {URI_hier_part} ("?" {URI_query})? ("#" {URI_fragment})? 33 34/* 35 * hier-part = "//" authority path-abempty 36 * / path-absolute 37 * / path-rootless 38 * / path-empty ; N.b. not used in OpenGrok 39 */ 40URI_hier_part = ("//" {URI_authority} {URI_path_abempty} | 41 "/" ({URI_path_absolute} | {URI_path_rootless})) 42 43/* 44 * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) 45 */ 46BrowseableURI_scheme = ([Hh][Tt][Tt][Pp][Ss]? | [Ff][Tt][Pp]) 47 48/* 49 * authority = [ userinfo "@" ] host [ ":" port ] 50 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) 51 * host = IP-literal / IPv4address / reg-name 52 * port = *DIGIT 53 */ 54URI_authority = ({URI_userinfo} "@")? {URI_host} (":" {URI_port})? 55URI_userinfo = ({URI_unreserved} | {URI_pct_encoded} | {URI_sub_delims} | 56 ":")* 57URI_host = ({URI_IP_literal} | {URI_IPv4address} | {URI_reg_name}) 58URI_port = {DIGIT}* 59 60/* 61 * IP-literal = "[" ( IPv6address / IPvFuture ) "]" 62 */ 63URI_IP_literal = "[" ({URI_IPv6address} | {URI_IPvFuture}) "]" 64 65/* 66 * IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) 67 */ 68URI_IPvFuture = "v" {HEXDIG}+ "." ({URI_unreserved} | {URI_sub_delims} | 69 ":")+ 70 71/* 72 * IPv6address = 6( h16 ":" ) ls32 73 * / "::" 5( h16 ":" ) ls32 74 * / [ h16 ] "::" 4( h16 ":" ) ls32 75 * / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 76 * / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 77 * / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 78 * / [ *4( h16 ":" ) h16 ] "::" ls32 79 * / [ *5( h16 ":" ) h16 ] "::" h16 80 * / [ *6( h16 ":" ) h16 ] "::" 81 */ 82URI_IPv6address = ( 83 ( {URI_h16} ":" ){6} {URI_ls32} | 84 "::" ({URI_h16} ":"){5} {URI_ls32} | 85 ({URI_h16})? "::" ({URI_h16} ":"){4} {URI_ls32} | 86 (({URI_h16} ":"){0,1} {URI_h16})? "::" ({URI_h16} ":"){3} {URI_ls32} | 87 (({URI_h16} ":"){0,2} {URI_h16})? "::" ({URI_h16} ":"){2} {URI_ls32} | 88 (({URI_h16} ":"){0,3} {URI_h16})? "::" {URI_h16} ":" {URI_ls32} | 89 (({URI_h16} ":"){0,4} {URI_h16})? "::" {URI_ls32} | 90 (({URI_h16} ":"){0,5} {URI_h16})? "::" {URI_h16} | 91 (({URI_h16} ":"){0,6} {URI_h16})? "::" 92 ) 93/* 94 * h16 = 1*4HEXDIG 95 * ls32 = ( h16 ":" h16 ) / IPv4address 96 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet 97 */ 98URI_h16 = {HEXDIG}{1,4} 99URI_ls32 = ({URI_h16} ":" {URI_h16} | {URI_IPv4address}) 100URI_IPv4address = ({URI_dec_octet} "." {URI_dec_octet} "." {URI_dec_octet} 101 "." {URI_dec_octet}) 102 103/* 104 * dec-octet = DIGIT ; 0-9 105 * / %x31-39 DIGIT ; 10-99 106 * / "1" 2DIGIT ; 100-199 107 * / "2" %x30-34 DIGIT ; 200-249 108 * / "25" %x30-35 ; 250-255 109 */ 110URI_dec_octet = ({DIGIT} | // 0-9 111 [\u{31}-\u{39}] {DIGIT} | // 10-99 112 "1" {DIGIT}{DIGIT} | // 100-199 113 "2" [\u{30}-\u{34}] {DIGIT} | // 200-249 114 "25" [\u{30}-\u{35}]) // 250-255 115 116/* 117 * reg-name = *( unreserved / pct-encoded / sub-delims ) 118 */ 119URI_reg_name = ({URI_unreserved} | {URI_pct_encoded} | {URI_sub_delims})* 120 121/* 122 * path = path-abempty ; begins with "/" or is empty 123 * / path-absolute ; begins with "/" but not "//" 124 * / path-noscheme ; begins with a non-colon segment 125 * / path-rootless ; begins with a segment 126 * / path-empty ; zero characters 127 * 128 * path-abempty = *( "/" segment ) 129 * path-absolute = "/" [ segment-nz *( "/" segment ) ] 130 * path-noscheme = segment-nz-nc *( "/" segment ) ; N.b. not used in OpenGrok 131 * path-rootless = segment-nz *( "/" segment ) 132 * path-empty = 0<pchar> ; N.b. not used in OpenGrok 133 */ 134URI_path_abempty = ("/" {URI_segment})* 135URI_path_absolute = "/" ({URI_segment_nz} ("/" {URI_segment})*)? 136URI_path_rootless = {URI_segment_nz} ("/" {URI_segment})* 137 138/* 139 * segment = *pchar 140 * segment-nz = 1*pchar 141 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) 142 * ; non-zero-length segment without any colon ":" 143 * ; N.b. not used in OpenGrok 144 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 145 */ 146URI_segment = {URI_pchar}* 147URI_segment_nz = {URI_pchar}+ 148URI_pchar = ({URI_unreserved} | {URI_pct_encoded} | {URI_sub_delims} | 149 [:@]) 150 151/* 152 * query = *( pchar / "/" / "?" ) 153 */ 154URI_query = ({URI_pchar} | [/\?])* 155 156/* 157 * fragment = *( pchar / "/" / "?" ) 158 */ 159URI_fragment = ({URI_pchar} | [/\?])* 160 161/* 162 * pct-encoded = "%" HEXDIG HEXDIG 163 */ 164URI_pct_encoded = "%" {HEXDIG} {HEXDIG} 165 166/* 167 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 168 * reserved = gen-delims / sub-delims ; N.b. not used in OpenGrok 169 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" 170 * ; N.b. not used in OpenGrok 171 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 172 * / "*" / "+" / "," / ";" / "=" 173 */ 174URI_unreserved = ({ASCII_ALPHA} | {DIGIT} | [\-\._~]) 175URI_sub_delims = [\!\$&\'\(\)\*\+,;=] 176 177ASCII_ALPHA = [A-Za-z] 178HEXDIG = [0-9A-Fa-f] 179DIGIT = [0-9] 180