1# Copyright (c) 2015, 2016, 2017 dbohdan
2#
3# Permission is hereby granted, free of charge, to any person obtaining a copy
4# of this software and associated documentation files (the "Software"), to deal
5# in the Software without restriction, including without limitation the rights
6# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7# copies of the Software, and to permit persons to whom the Software is
8# furnished to do so, subject to the following conditions:
9#
10# The above copyright notice and this permission notice shall be included in
11# all copies or substantial portions of the Software.
12#
13# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19# THE SOFTWARE.
20
21# Sqawk, an SQL Awk.
22# Copyright (C) 2015, 2016, 2017 dbohdan
23# License: MIT
24
25namespace eval ::sqawk {}
26
27# Performs SQL queries on files and channels.
28::snit::type ::sqawk::sqawk {
29 # Internal object state.
30 variable tables {}
31 variable defaultTableNames [split abcdefghijklmnopqrstuvwxyz ""]
32 variable formatToParser
33 variable formatToSerializer
34
35 # Options.
36 option -database
37 option -ofs
38 option -ors
39
40 option -destroytables -default true
41 option -outputformat -default awk
42 option -parsers -default {} -configuremethod Set-and-update-format-list
43 option -serializers -default {} -configuremethod Set-and-update-format-list
44
45 # Methods.
46 constructor {} {
47 # Register parsers and serializers.
48 $self configure -parsers [namespace children ::sqawk::parsers]
49 $self configure -serializers [namespace children ::sqawk::serializers]
50 }
51
52 destructor {
53 if {[$self cget -destroytables]} {
54 dict for {_ tableObj} $tables {
55 $tableObj destroy
56 }
57 }
58 }
59
60 # Update the related format dictionary when the parser or the serializer
61 # list option is set.
62 method Set-and-update-format-list {option value} {
63 set optToDict {
64 -parsers formatToParser
65 -serializers formatToSerializer
66 }
67 set possibleOpts [dict keys $optToDict]
68 if {$option ni $possibleOpts} {
69 error "Set-and-update-format-list can't set the option \"$option\""
70 }
71 set options($option) $value
72
73 set dictName [dict get $optToDict $option]
74 set $dictName {}
75 # For each parser/serializer...
76 foreach ns $value {
77 foreach format [set ${ns}::formats] {
78 dict set $dictName $format $ns
79 }
80 }
81 }
82
83 # Parse $data from $format into a list of rows.
84 method Parse {format data fileOptions} {
85 set error [catch {
86 set ns [dict get $formatToParser $format]
87 }]
88 if {$error} {
89 error "unknown input format: \"$format\""
90 }
91 set parseOptions [set ${ns}::options]
92 return [${ns}::parse $data \
93 [::sqawk::override-keys $parseOptions $fileOptions]]
94 }
95
96 # Serialize a list of rows into text in the format $format.
97 method Serialize {format data sqawkOptions} {
98 # Parse $format.
99 set splitFormat [split $format ,]
100 set formatName [lindex $splitFormat 0]
101 set formatOptions {}
102 foreach option [lrange $splitFormat 1 end] {
103 lassign [split $option =] key value
104 lappend formatOptions $key $value
105 }
106 set error [catch {
107 set ns [dict get $formatToSerializer $formatName]
108 }]
109 if {$error} {
110 error "unknown output format: \"$formatName\""
111 }
112
113 # Get the dict containing the options the serializer accepts with their
114 # default values.
115 set so [set ${ns}::options]
116 # Set the two main options for the "awk" serializer. "awk" is a special
117 # case: its options are set based on separate command line arguments
118 # whose values are passed to us in $sqawkOptions.
119 if {$formatName eq {awk}} {
120 if {[dict exists $formatOptions ofs]} {
121 error {to set the field separator for the "awk" output format\
122 please use the command line option "-OFS" instead of\
123 the format option "ofs"}
124 }
125 if {[dict exists $formatOptions ors]} {
126 error {to set the record separator for the "awk" output format\
127 please use the command line option "-OFS" instead of\
128 the format option "ofs"}
129 }
130 dict set so ofs [dict get $sqawkOptions -ofs]
131 dict set so ors [dict get $sqawkOptions -ors]
132 }
133 # Check if all the serializer options we have been given in $format are
134 # valid. Replace the default values with the actual values.
135 foreach {key value} $formatOptions {
136 if {[dict exists $so $key]} {
137 dict set so $key $value
138 } else {
139 error "unknown option for output format\
140 \"$formatName\":\ \"$key\""
141 }
142 }
143 return [${ns}::serialize $data $so]
144 }
145
146 # Read data from a file or a channel into a new database table. The filename
147 # or channel to read from and the options for how to read and store the data
148 # are in all set in the dictionary $fileOptions.
149 method read-file fileOptions {
150 # Set the default table name ("a", "b", "c", ..., "z").
151 set defaultTableName [lindex $defaultTableNames [dict size $tables]]
152 # Set the default column name prefix equal to the table name.
153 ::sqawk::dict-ensure-default fileOptions table $defaultTableName
154 ::sqawk::dict-ensure-default fileOptions F0 1
155 ::sqawk::dict-ensure-default fileOptions csvquote \"
156 ::sqawk::dict-ensure-default fileOptions csvsep ,
157 ::sqawk::dict-ensure-default fileOptions format awk
158 ::sqawk::dict-ensure-default fileOptions merge {}
159 ::sqawk::dict-ensure-default fileOptions prefix \
160 [dict get $fileOptions table]
161
162 array set metadata $fileOptions
163
164 # Read the data.
165 if {[info exists metadata(channel)]} {
166 set ch $metadata(channel)
167 } elseif {$metadata(filename) eq "-"} {
168 set ch stdin
169 } else {
170 set ch [open $metadata(filename)]
171 }
172 set raw [read $ch]
173 close $ch
174
175 set rows [$self Parse $metadata(format) $raw $fileOptions]
176 unset raw
177
178 # Create and configure a new table object.
179 set newTable [::sqawk::table create %AUTO%]
180 $newTable configure \
181 -database [$self cget -database] \
182 -dbtable $metadata(table) \
183 -columnprefix $metadata(prefix) \
184 -f0 $metadata(F0) \
185 -maxnf $metadata(NF) \
186 -modenf $metadata(MNF)
187 # Configure datatypes.
188 if {[info exists metadata(datatypes)]} {
189 $newTable configure -datatypes [split $metadata(datatypes) ,]
190 }
191 # Configure column names.
192 set header {}
193 if {[info exists metadata(header)] && $metadata(header)} {
194 # Remove the header from $rows.
195 set rows [lassign $rows headerF0]
196 # Strip the first field (a0/b0/...) from the header.
197 set header [lrange $headerF0 1 end]
198 }
199 # Override the header with custom column names.
200 if {[info exists metadata(columns)]} {
201 set customColumnNames [split $metadata(columns) ,]
202 set header [list \
203 {*}[lrange $customColumnNames \
204 0 [llength $customColumnNames]-1] \
205 {*}[lrange $header \
206 [llength $customColumnNames] end]]
207 }
208 $newTable configure -header $header
209
210 $newTable initialize
211
212 # Insert rows in the table.
213 $newTable insert-rows $rows
214
215 dict set tables $metadata(table) $newTable
216 return $newTable
217 }
218
219 # Perform query $query and output the result to $channel.
220 method perform-query {query {channel stdout}} {
221 # For each row returned...
222 set outputRecords {}
223 [$self cget -database] eval $query results {
224 set outputRecord {}
225 set keys $results(*)
226 foreach key $keys {
227 lappend outputRecord $key $results($key)
228 }
229 lappend outputRecords $outputRecord
230 }
231 set sqawkOptions {}
232 foreach option [$self info options] {
233 dict set sqawkOptions $option [$self cget $option]
234 }
235 set output [$self Serialize [$self cget -outputformat] $outputRecords \
236 $sqawkOptions]
237 puts -nonewline $channel $output
238 }
239}
240