1*eeb7e5b3SAdam Hornáček# Copyright (c) 2015, 2016, 2017 dbohdan 2*eeb7e5b3SAdam Hornáček# 3*eeb7e5b3SAdam Hornáček# Permission is hereby granted, free of charge, to any person obtaining a copy 4*eeb7e5b3SAdam Hornáček# of this software and associated documentation files (the "Software"), to deal 5*eeb7e5b3SAdam Hornáček# in the Software without restriction, including without limitation the rights 6*eeb7e5b3SAdam Hornáček# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7*eeb7e5b3SAdam Hornáček# copies of the Software, and to permit persons to whom the Software is 8*eeb7e5b3SAdam Hornáček# furnished to do so, subject to the following conditions: 9*eeb7e5b3SAdam Hornáček# 10*eeb7e5b3SAdam Hornáček# The above copyright notice and this permission notice shall be included in 11*eeb7e5b3SAdam Hornáček# all copies or substantial portions of the Software. 12*eeb7e5b3SAdam Hornáček# 13*eeb7e5b3SAdam Hornáček# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14*eeb7e5b3SAdam Hornáček# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15*eeb7e5b3SAdam Hornáček# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16*eeb7e5b3SAdam Hornáček# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17*eeb7e5b3SAdam Hornáček# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18*eeb7e5b3SAdam Hornáček# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19*eeb7e5b3SAdam Hornáček# THE SOFTWARE. 20*eeb7e5b3SAdam Hornáček 21*eeb7e5b3SAdam Hornáček# Sqawk, an SQL Awk. 22*eeb7e5b3SAdam Hornáček# Copyright (C) 2015, 2016, 2017 dbohdan 23*eeb7e5b3SAdam Hornáček# License: MIT 24*eeb7e5b3SAdam Hornáček 25*eeb7e5b3SAdam Hornáčeknamespace eval ::sqawk {} 26*eeb7e5b3SAdam Hornáček 27*eeb7e5b3SAdam Hornáček# Performs SQL queries on files and channels. 28*eeb7e5b3SAdam Hornáček::snit::type ::sqawk::sqawk { 29*eeb7e5b3SAdam Hornáček # Internal object state. 30*eeb7e5b3SAdam Hornáček variable tables {} 31*eeb7e5b3SAdam Hornáček variable defaultTableNames [split abcdefghijklmnopqrstuvwxyz ""] 32*eeb7e5b3SAdam Hornáček variable formatToParser 33*eeb7e5b3SAdam Hornáček variable formatToSerializer 34*eeb7e5b3SAdam Hornáček 35*eeb7e5b3SAdam Hornáček # Options. 36*eeb7e5b3SAdam Hornáček option -database 37*eeb7e5b3SAdam Hornáček option -ofs 38*eeb7e5b3SAdam Hornáček option -ors 39*eeb7e5b3SAdam Hornáček 40*eeb7e5b3SAdam Hornáček option -destroytables -default true 41*eeb7e5b3SAdam Hornáček option -outputformat -default awk 42*eeb7e5b3SAdam Hornáček option -parsers -default {} -configuremethod Set-and-update-format-list 43*eeb7e5b3SAdam Hornáček option -serializers -default {} -configuremethod Set-and-update-format-list 44*eeb7e5b3SAdam Hornáček 45*eeb7e5b3SAdam Hornáček # Methods. 46*eeb7e5b3SAdam Hornáček constructor {} { 47*eeb7e5b3SAdam Hornáček # Register parsers and serializers. 48*eeb7e5b3SAdam Hornáček $self configure -parsers [namespace children ::sqawk::parsers] 49*eeb7e5b3SAdam Hornáček $self configure -serializers [namespace children ::sqawk::serializers] 50*eeb7e5b3SAdam Hornáček } 51*eeb7e5b3SAdam Hornáček 52*eeb7e5b3SAdam Hornáček destructor { 53*eeb7e5b3SAdam Hornáček if {[$self cget -destroytables]} { 54*eeb7e5b3SAdam Hornáček dict for {_ tableObj} $tables { 55*eeb7e5b3SAdam Hornáček $tableObj destroy 56*eeb7e5b3SAdam Hornáček } 57*eeb7e5b3SAdam Hornáček } 58*eeb7e5b3SAdam Hornáček } 59*eeb7e5b3SAdam Hornáček 60*eeb7e5b3SAdam Hornáček # Update the related format dictionary when the parser or the serializer 61*eeb7e5b3SAdam Hornáček # list option is set. 62*eeb7e5b3SAdam Hornáček method Set-and-update-format-list {option value} { 63*eeb7e5b3SAdam Hornáček set optToDict { 64*eeb7e5b3SAdam Hornáček -parsers formatToParser 65*eeb7e5b3SAdam Hornáček -serializers formatToSerializer 66*eeb7e5b3SAdam Hornáček } 67*eeb7e5b3SAdam Hornáček set possibleOpts [dict keys $optToDict] 68*eeb7e5b3SAdam Hornáček if {$option ni $possibleOpts} { 69*eeb7e5b3SAdam Hornáček error "Set-and-update-format-list can't set the option \"$option\"" 70*eeb7e5b3SAdam Hornáček } 71*eeb7e5b3SAdam Hornáček set options($option) $value 72*eeb7e5b3SAdam Hornáček 73*eeb7e5b3SAdam Hornáček set dictName [dict get $optToDict $option] 74*eeb7e5b3SAdam Hornáček set $dictName {} 75*eeb7e5b3SAdam Hornáček # For each parser/serializer... 76*eeb7e5b3SAdam Hornáček foreach ns $value { 77*eeb7e5b3SAdam Hornáček foreach format [set ${ns}::formats] { 78*eeb7e5b3SAdam Hornáček dict set $dictName $format $ns 79*eeb7e5b3SAdam Hornáček } 80*eeb7e5b3SAdam Hornáček } 81*eeb7e5b3SAdam Hornáček } 82*eeb7e5b3SAdam Hornáček 83*eeb7e5b3SAdam Hornáček # Parse $data from $format into a list of rows. 84*eeb7e5b3SAdam Hornáček method Parse {format data fileOptions} { 85*eeb7e5b3SAdam Hornáček set error [catch { 86*eeb7e5b3SAdam Hornáček set ns [dict get $formatToParser $format] 87*eeb7e5b3SAdam Hornáček }] 88*eeb7e5b3SAdam Hornáček if {$error} { 89*eeb7e5b3SAdam Hornáček error "unknown input format: \"$format\"" 90*eeb7e5b3SAdam Hornáček } 91*eeb7e5b3SAdam Hornáček set parseOptions [set ${ns}::options] 92*eeb7e5b3SAdam Hornáček return [${ns}::parse $data \ 93*eeb7e5b3SAdam Hornáček [::sqawk::override-keys $parseOptions $fileOptions]] 94*eeb7e5b3SAdam Hornáček } 95*eeb7e5b3SAdam Hornáček 96*eeb7e5b3SAdam Hornáček # Serialize a list of rows into text in the format $format. 97*eeb7e5b3SAdam Hornáček method Serialize {format data sqawkOptions} { 98*eeb7e5b3SAdam Hornáček # Parse $format. 99*eeb7e5b3SAdam Hornáček set splitFormat [split $format ,] 100*eeb7e5b3SAdam Hornáček set formatName [lindex $splitFormat 0] 101*eeb7e5b3SAdam Hornáček set formatOptions {} 102*eeb7e5b3SAdam Hornáček foreach option [lrange $splitFormat 1 end] { 103*eeb7e5b3SAdam Hornáček lassign [split $option =] key value 104*eeb7e5b3SAdam Hornáček lappend formatOptions $key $value 105*eeb7e5b3SAdam Hornáček } 106*eeb7e5b3SAdam Hornáček set error [catch { 107*eeb7e5b3SAdam Hornáček set ns [dict get $formatToSerializer $formatName] 108*eeb7e5b3SAdam Hornáček }] 109*eeb7e5b3SAdam Hornáček if {$error} { 110*eeb7e5b3SAdam Hornáček error "unknown output format: \"$formatName\"" 111*eeb7e5b3SAdam Hornáček } 112*eeb7e5b3SAdam Hornáček 113*eeb7e5b3SAdam Hornáček # Get the dict containing the options the serializer accepts with their 114*eeb7e5b3SAdam Hornáček # default values. 115*eeb7e5b3SAdam Hornáček set so [set ${ns}::options] 116*eeb7e5b3SAdam Hornáček # Set the two main options for the "awk" serializer. "awk" is a special 117*eeb7e5b3SAdam Hornáček # case: its options are set based on separate command line arguments 118*eeb7e5b3SAdam Hornáček # whose values are passed to us in $sqawkOptions. 119*eeb7e5b3SAdam Hornáček if {$formatName eq {awk}} { 120*eeb7e5b3SAdam Hornáček if {[dict exists $formatOptions ofs]} { 121*eeb7e5b3SAdam Hornáček error {to set the field separator for the "awk" output format\ 122*eeb7e5b3SAdam Hornáček please use the command line option "-OFS" instead of\ 123*eeb7e5b3SAdam Hornáček the format option "ofs"} 124*eeb7e5b3SAdam Hornáček } 125*eeb7e5b3SAdam Hornáček if {[dict exists $formatOptions ors]} { 126*eeb7e5b3SAdam Hornáček error {to set the record separator for the "awk" output format\ 127*eeb7e5b3SAdam Hornáček please use the command line option "-OFS" instead of\ 128*eeb7e5b3SAdam Hornáček the format option "ofs"} 129*eeb7e5b3SAdam Hornáček } 130*eeb7e5b3SAdam Hornáček dict set so ofs [dict get $sqawkOptions -ofs] 131*eeb7e5b3SAdam Hornáček dict set so ors [dict get $sqawkOptions -ors] 132*eeb7e5b3SAdam Hornáček } 133*eeb7e5b3SAdam Hornáček # Check if all the serializer options we have been given in $format are 134*eeb7e5b3SAdam Hornáček # valid. Replace the default values with the actual values. 135*eeb7e5b3SAdam Hornáček foreach {key value} $formatOptions { 136*eeb7e5b3SAdam Hornáček if {[dict exists $so $key]} { 137*eeb7e5b3SAdam Hornáček dict set so $key $value 138*eeb7e5b3SAdam Hornáček } else { 139*eeb7e5b3SAdam Hornáček error "unknown option for output format\ 140*eeb7e5b3SAdam Hornáček \"$formatName\":\ \"$key\"" 141*eeb7e5b3SAdam Hornáček } 142*eeb7e5b3SAdam Hornáček } 143*eeb7e5b3SAdam Hornáček return [${ns}::serialize $data $so] 144*eeb7e5b3SAdam Hornáček } 145*eeb7e5b3SAdam Hornáček 146*eeb7e5b3SAdam Hornáček # Read data from a file or a channel into a new database table. The filename 147*eeb7e5b3SAdam Hornáček # or channel to read from and the options for how to read and store the data 148*eeb7e5b3SAdam Hornáček # are in all set in the dictionary $fileOptions. 149*eeb7e5b3SAdam Hornáček method read-file fileOptions { 150*eeb7e5b3SAdam Hornáček # Set the default table name ("a", "b", "c", ..., "z"). 151*eeb7e5b3SAdam Hornáček set defaultTableName [lindex $defaultTableNames [dict size $tables]] 152*eeb7e5b3SAdam Hornáček # Set the default column name prefix equal to the table name. 153*eeb7e5b3SAdam Hornáček ::sqawk::dict-ensure-default fileOptions table $defaultTableName 154*eeb7e5b3SAdam Hornáček ::sqawk::dict-ensure-default fileOptions F0 1 155*eeb7e5b3SAdam Hornáček ::sqawk::dict-ensure-default fileOptions csvquote \" 156*eeb7e5b3SAdam Hornáček ::sqawk::dict-ensure-default fileOptions csvsep , 157*eeb7e5b3SAdam Hornáček ::sqawk::dict-ensure-default fileOptions format awk 158*eeb7e5b3SAdam Hornáček ::sqawk::dict-ensure-default fileOptions merge {} 159*eeb7e5b3SAdam Hornáček ::sqawk::dict-ensure-default fileOptions prefix \ 160*eeb7e5b3SAdam Hornáček [dict get $fileOptions table] 161*eeb7e5b3SAdam Hornáček 162*eeb7e5b3SAdam Hornáček array set metadata $fileOptions 163*eeb7e5b3SAdam Hornáček 164*eeb7e5b3SAdam Hornáček # Read the data. 165*eeb7e5b3SAdam Hornáček if {[info exists metadata(channel)]} { 166*eeb7e5b3SAdam Hornáček set ch $metadata(channel) 167*eeb7e5b3SAdam Hornáček } elseif {$metadata(filename) eq "-"} { 168*eeb7e5b3SAdam Hornáček set ch stdin 169*eeb7e5b3SAdam Hornáček } else { 170*eeb7e5b3SAdam Hornáček set ch [open $metadata(filename)] 171*eeb7e5b3SAdam Hornáček } 172*eeb7e5b3SAdam Hornáček set raw [read $ch] 173*eeb7e5b3SAdam Hornáček close $ch 174*eeb7e5b3SAdam Hornáček 175*eeb7e5b3SAdam Hornáček set rows [$self Parse $metadata(format) $raw $fileOptions] 176*eeb7e5b3SAdam Hornáček unset raw 177*eeb7e5b3SAdam Hornáček 178*eeb7e5b3SAdam Hornáček # Create and configure a new table object. 179*eeb7e5b3SAdam Hornáček set newTable [::sqawk::table create %AUTO%] 180*eeb7e5b3SAdam Hornáček $newTable configure \ 181*eeb7e5b3SAdam Hornáček -database [$self cget -database] \ 182*eeb7e5b3SAdam Hornáček -dbtable $metadata(table) \ 183*eeb7e5b3SAdam Hornáček -columnprefix $metadata(prefix) \ 184*eeb7e5b3SAdam Hornáček -f0 $metadata(F0) \ 185*eeb7e5b3SAdam Hornáček -maxnf $metadata(NF) \ 186*eeb7e5b3SAdam Hornáček -modenf $metadata(MNF) 187*eeb7e5b3SAdam Hornáček # Configure datatypes. 188*eeb7e5b3SAdam Hornáček if {[info exists metadata(datatypes)]} { 189*eeb7e5b3SAdam Hornáček $newTable configure -datatypes [split $metadata(datatypes) ,] 190*eeb7e5b3SAdam Hornáček } 191*eeb7e5b3SAdam Hornáček # Configure column names. 192*eeb7e5b3SAdam Hornáček set header {} 193*eeb7e5b3SAdam Hornáček if {[info exists metadata(header)] && $metadata(header)} { 194*eeb7e5b3SAdam Hornáček # Remove the header from $rows. 195*eeb7e5b3SAdam Hornáček set rows [lassign $rows headerF0] 196*eeb7e5b3SAdam Hornáček # Strip the first field (a0/b0/...) from the header. 197*eeb7e5b3SAdam Hornáček set header [lrange $headerF0 1 end] 198*eeb7e5b3SAdam Hornáček } 199*eeb7e5b3SAdam Hornáček # Override the header with custom column names. 200*eeb7e5b3SAdam Hornáček if {[info exists metadata(columns)]} { 201*eeb7e5b3SAdam Hornáček set customColumnNames [split $metadata(columns) ,] 202*eeb7e5b3SAdam Hornáček set header [list \ 203*eeb7e5b3SAdam Hornáček {*}[lrange $customColumnNames \ 204*eeb7e5b3SAdam Hornáček 0 [llength $customColumnNames]-1] \ 205*eeb7e5b3SAdam Hornáček {*}[lrange $header \ 206*eeb7e5b3SAdam Hornáček [llength $customColumnNames] end]] 207*eeb7e5b3SAdam Hornáček } 208*eeb7e5b3SAdam Hornáček $newTable configure -header $header 209*eeb7e5b3SAdam Hornáček 210*eeb7e5b3SAdam Hornáček $newTable initialize 211*eeb7e5b3SAdam Hornáček 212*eeb7e5b3SAdam Hornáček # Insert rows in the table. 213*eeb7e5b3SAdam Hornáček $newTable insert-rows $rows 214*eeb7e5b3SAdam Hornáček 215*eeb7e5b3SAdam Hornáček dict set tables $metadata(table) $newTable 216*eeb7e5b3SAdam Hornáček return $newTable 217*eeb7e5b3SAdam Hornáček } 218*eeb7e5b3SAdam Hornáček 219*eeb7e5b3SAdam Hornáček # Perform query $query and output the result to $channel. 220*eeb7e5b3SAdam Hornáček method perform-query {query {channel stdout}} { 221*eeb7e5b3SAdam Hornáček # For each row returned... 222*eeb7e5b3SAdam Hornáček set outputRecords {} 223*eeb7e5b3SAdam Hornáček [$self cget -database] eval $query results { 224*eeb7e5b3SAdam Hornáček set outputRecord {} 225*eeb7e5b3SAdam Hornáček set keys $results(*) 226*eeb7e5b3SAdam Hornáček foreach key $keys { 227*eeb7e5b3SAdam Hornáček lappend outputRecord $key $results($key) 228*eeb7e5b3SAdam Hornáček } 229*eeb7e5b3SAdam Hornáček lappend outputRecords $outputRecord 230*eeb7e5b3SAdam Hornáček } 231*eeb7e5b3SAdam Hornáček set sqawkOptions {} 232*eeb7e5b3SAdam Hornáček foreach option [$self info options] { 233*eeb7e5b3SAdam Hornáček dict set sqawkOptions $option [$self cget $option] 234*eeb7e5b3SAdam Hornáček } 235*eeb7e5b3SAdam Hornáček set output [$self Serialize [$self cget -outputformat] $outputRecords \ 236*eeb7e5b3SAdam Hornáček $sqawkOptions] 237*eeb7e5b3SAdam Hornáček puts -nonewline $channel $output 238*eeb7e5b3SAdam Hornáček } 239*eeb7e5b3SAdam Hornáček} 240