1# Copyright (c) 2015, 2016, 2017 dbohdan 2# 3# Permission is hereby granted, free of charge, to any person obtaining a copy 4# of this software and associated documentation files (the "Software"), to deal 5# in the Software without restriction, including without limitation the rights 6# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7# copies of the Software, and to permit persons to whom the Software is 8# furnished to do so, subject to the following conditions: 9# 10# The above copyright notice and this permission notice shall be included in 11# all copies or substantial portions of the Software. 12# 13# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19# THE SOFTWARE. 20 21# Sqawk, an SQL Awk. 22# Copyright (C) 2015, 2016, 2017 dbohdan 23# License: MIT 24 25namespace eval ::sqawk {} 26 27# Performs SQL queries on files and channels. 28::snit::type ::sqawk::sqawk { 29 # Internal object state. 30 variable tables {} 31 variable defaultTableNames [split abcdefghijklmnopqrstuvwxyz ""] 32 variable formatToParser 33 variable formatToSerializer 34 35 # Options. 36 option -database 37 option -ofs 38 option -ors 39 40 option -destroytables -default true 41 option -outputformat -default awk 42 option -parsers -default {} -configuremethod Set-and-update-format-list 43 option -serializers -default {} -configuremethod Set-and-update-format-list 44 45 # Methods. 46 constructor {} { 47 # Register parsers and serializers. 48 $self configure -parsers [namespace children ::sqawk::parsers] 49 $self configure -serializers [namespace children ::sqawk::serializers] 50 } 51 52 destructor { 53 if {[$self cget -destroytables]} { 54 dict for {_ tableObj} $tables { 55 $tableObj destroy 56 } 57 } 58 } 59 60 # Update the related format dictionary when the parser or the serializer 61 # list option is set. 62 method Set-and-update-format-list {option value} { 63 set optToDict { 64 -parsers formatToParser 65 -serializers formatToSerializer 66 } 67 set possibleOpts [dict keys $optToDict] 68 if {$option ni $possibleOpts} { 69 error "Set-and-update-format-list can't set the option \"$option\"" 70 } 71 set options($option) $value 72 73 set dictName [dict get $optToDict $option] 74 set $dictName {} 75 # For each parser/serializer... 76 foreach ns $value { 77 foreach format [set ${ns}::formats] { 78 dict set $dictName $format $ns 79 } 80 } 81 } 82 83 # Parse $data from $format into a list of rows. 84 method Parse {format data fileOptions} { 85 set error [catch { 86 set ns [dict get $formatToParser $format] 87 }] 88 if {$error} { 89 error "unknown input format: \"$format\"" 90 } 91 set parseOptions [set ${ns}::options] 92 return [${ns}::parse $data \ 93 [::sqawk::override-keys $parseOptions $fileOptions]] 94 } 95 96 # Serialize a list of rows into text in the format $format. 97 method Serialize {format data sqawkOptions} { 98 # Parse $format. 99 set splitFormat [split $format ,] 100 set formatName [lindex $splitFormat 0] 101 set formatOptions {} 102 foreach option [lrange $splitFormat 1 end] { 103 lassign [split $option =] key value 104 lappend formatOptions $key $value 105 } 106 set error [catch { 107 set ns [dict get $formatToSerializer $formatName] 108 }] 109 if {$error} { 110 error "unknown output format: \"$formatName\"" 111 } 112 113 # Get the dict containing the options the serializer accepts with their 114 # default values. 115 set so [set ${ns}::options] 116 # Set the two main options for the "awk" serializer. "awk" is a special 117 # case: its options are set based on separate command line arguments 118 # whose values are passed to us in $sqawkOptions. 119 if {$formatName eq {awk}} { 120 if {[dict exists $formatOptions ofs]} { 121 error {to set the field separator for the "awk" output format\ 122 please use the command line option "-OFS" instead of\ 123 the format option "ofs"} 124 } 125 if {[dict exists $formatOptions ors]} { 126 error {to set the record separator for the "awk" output format\ 127 please use the command line option "-OFS" instead of\ 128 the format option "ofs"} 129 } 130 dict set so ofs [dict get $sqawkOptions -ofs] 131 dict set so ors [dict get $sqawkOptions -ors] 132 } 133 # Check if all the serializer options we have been given in $format are 134 # valid. Replace the default values with the actual values. 135 foreach {key value} $formatOptions { 136 if {[dict exists $so $key]} { 137 dict set so $key $value 138 } else { 139 error "unknown option for output format\ 140 \"$formatName\":\ \"$key\"" 141 } 142 } 143 return [${ns}::serialize $data $so] 144 } 145 146 # Read data from a file or a channel into a new database table. The filename 147 # or channel to read from and the options for how to read and store the data 148 # are in all set in the dictionary $fileOptions. 149 method read-file fileOptions { 150 # Set the default table name ("a", "b", "c", ..., "z"). 151 set defaultTableName [lindex $defaultTableNames [dict size $tables]] 152 # Set the default column name prefix equal to the table name. 153 ::sqawk::dict-ensure-default fileOptions table $defaultTableName 154 ::sqawk::dict-ensure-default fileOptions F0 1 155 ::sqawk::dict-ensure-default fileOptions csvquote \" 156 ::sqawk::dict-ensure-default fileOptions csvsep , 157 ::sqawk::dict-ensure-default fileOptions format awk 158 ::sqawk::dict-ensure-default fileOptions merge {} 159 ::sqawk::dict-ensure-default fileOptions prefix \ 160 [dict get $fileOptions table] 161 162 array set metadata $fileOptions 163 164 # Read the data. 165 if {[info exists metadata(channel)]} { 166 set ch $metadata(channel) 167 } elseif {$metadata(filename) eq "-"} { 168 set ch stdin 169 } else { 170 set ch [open $metadata(filename)] 171 } 172 set raw [read $ch] 173 close $ch 174 175 set rows [$self Parse $metadata(format) $raw $fileOptions] 176 unset raw 177 178 # Create and configure a new table object. 179 set newTable [::sqawk::table create %AUTO%] 180 $newTable configure \ 181 -database [$self cget -database] \ 182 -dbtable $metadata(table) \ 183 -columnprefix $metadata(prefix) \ 184 -f0 $metadata(F0) \ 185 -maxnf $metadata(NF) \ 186 -modenf $metadata(MNF) 187 # Configure datatypes. 188 if {[info exists metadata(datatypes)]} { 189 $newTable configure -datatypes [split $metadata(datatypes) ,] 190 } 191 # Configure column names. 192 set header {} 193 if {[info exists metadata(header)] && $metadata(header)} { 194 # Remove the header from $rows. 195 set rows [lassign $rows headerF0] 196 # Strip the first field (a0/b0/...) from the header. 197 set header [lrange $headerF0 1 end] 198 } 199 # Override the header with custom column names. 200 if {[info exists metadata(columns)]} { 201 set customColumnNames [split $metadata(columns) ,] 202 set header [list \ 203 {*}[lrange $customColumnNames \ 204 0 [llength $customColumnNames]-1] \ 205 {*}[lrange $header \ 206 [llength $customColumnNames] end]] 207 } 208 $newTable configure -header $header 209 210 $newTable initialize 211 212 # Insert rows in the table. 213 $newTable insert-rows $rows 214 215 dict set tables $metadata(table) $newTable 216 return $newTable 217 } 218 219 # Perform query $query and output the result to $channel. 220 method perform-query {query {channel stdout}} { 221 # For each row returned... 222 set outputRecords {} 223 [$self cget -database] eval $query results { 224 set outputRecord {} 225 set keys $results(*) 226 foreach key $keys { 227 lappend outputRecord $key $results($key) 228 } 229 lappend outputRecords $outputRecord 230 } 231 set sqawkOptions {} 232 foreach option [$self info options] { 233 dict set sqawkOptions $option [$self cget $option] 234 } 235 set output [$self Serialize [$self cget -outputformat] $outputRecords \ 236 $sqawkOptions] 237 puts -nonewline $channel $output 238 } 239} 240