xref: /OpenGrok/opengrok-indexer/src/test/resources/analysis/tcl/sample.tcl (revision eeb7e5b33d1bcc524fcc9d1d560447b044e286a4)
1# Copyright (c) 2015, 2016, 2017 dbohdan
2#
3# Permission is hereby granted, free of charge, to any person obtaining a copy
4# of this software and associated documentation files (the "Software"), to deal
5# in the Software without restriction, including without limitation the rights
6# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7# copies of the Software, and to permit persons to whom the Software is
8# furnished to do so, subject to the following conditions:
9#
10# The above copyright notice and this permission notice shall be included in
11# all copies or substantial portions of the Software.
12#
13# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19# THE SOFTWARE.
20
21# Sqawk, an SQL Awk.
22# Copyright (C) 2015, 2016, 2017 dbohdan
23# License: MIT
24
25namespace eval ::sqawk {}
26
27# Performs SQL queries on files and channels.
28::snit::type ::sqawk::sqawk {
29    # Internal object state.
30    variable tables {}
31    variable defaultTableNames [split abcdefghijklmnopqrstuvwxyz ""]
32    variable formatToParser
33    variable formatToSerializer
34
35    # Options.
36    option -database
37    option -ofs
38    option -ors
39
40    option -destroytables -default true
41    option -outputformat -default awk
42    option -parsers -default {} -configuremethod Set-and-update-format-list
43    option -serializers -default {} -configuremethod Set-and-update-format-list
44
45    # Methods.
46    constructor {} {
47        # Register parsers and serializers.
48        $self configure -parsers [namespace children ::sqawk::parsers]
49        $self configure -serializers [namespace children ::sqawk::serializers]
50    }
51
52    destructor {
53        if {[$self cget -destroytables]} {
54            dict for {_ tableObj} $tables {
55                $tableObj destroy
56            }
57        }
58    }
59
60    # Update the related format dictionary when the parser or the serializer
61    # list option is set.
62    method Set-and-update-format-list {option value} {
63        set optToDict {
64            -parsers formatToParser
65            -serializers formatToSerializer
66        }
67        set possibleOpts [dict keys $optToDict]
68        if {$option ni $possibleOpts} {
69            error "Set-and-update-format-list can't set the option \"$option\""
70        }
71        set options($option) $value
72
73        set dictName [dict get $optToDict $option]
74        set $dictName {}
75        # For each parser/serializer...
76        foreach ns $value {
77            foreach format [set ${ns}::formats] {
78                dict set $dictName $format $ns
79            }
80        }
81    }
82
83    # Parse $data from $format into a list of rows.
84    method Parse {format data fileOptions} {
85        set error [catch {
86            set ns [dict get $formatToParser $format]
87        }]
88        if {$error} {
89            error "unknown input format: \"$format\""
90        }
91        set parseOptions [set ${ns}::options]
92        return [${ns}::parse $data \
93                [::sqawk::override-keys $parseOptions $fileOptions]]
94    }
95
96    # Serialize a list of rows into text in the format $format.
97    method Serialize {format data sqawkOptions} {
98        # Parse $format.
99        set splitFormat [split $format ,]
100        set formatName [lindex $splitFormat 0]
101        set formatOptions {}
102        foreach option [lrange $splitFormat 1 end] {
103            lassign [split $option =] key value
104            lappend formatOptions $key $value
105        }
106        set error [catch {
107            set ns [dict get $formatToSerializer $formatName]
108        }]
109        if {$error} {
110            error "unknown output format: \"$formatName\""
111        }
112
113        # Get the dict containing the options the serializer accepts with their
114        # default values.
115        set so [set ${ns}::options]
116        # Set the two main options for the "awk" serializer. "awk" is a special
117        # case: its options are set based on separate command line arguments
118        # whose values are passed to us in $sqawkOptions.
119        if {$formatName eq {awk}} {
120            if {[dict exists $formatOptions ofs]} {
121                error {to set the field separator for the "awk" output format\
122                        please use the command line option "-OFS" instead of\
123                        the format option "ofs"}
124            }
125            if {[dict exists $formatOptions ors]} {
126                error {to set the record separator for the "awk" output format\
127                        please use the command line option "-OFS" instead of\
128                        the format option "ofs"}
129            }
130            dict set so ofs [dict get $sqawkOptions -ofs]
131            dict set so ors [dict get $sqawkOptions -ors]
132        }
133        # Check if all the serializer options we have been given in $format are
134        # valid. Replace the default values with the actual values.
135        foreach {key value} $formatOptions {
136            if {[dict exists $so $key]} {
137                dict set so $key $value
138            } else {
139                error "unknown option for output format\
140                        \"$formatName\":\ \"$key\""
141            }
142        }
143        return [${ns}::serialize $data $so]
144    }
145
146    # Read data from a file or a channel into a new database table. The filename
147    # or channel to read from and the options for how to read and store the data
148    # are in all set in the dictionary $fileOptions.
149    method read-file fileOptions {
150        # Set the default table name ("a", "b", "c", ..., "z").
151        set defaultTableName [lindex $defaultTableNames [dict size $tables]]
152        # Set the default column name prefix equal to the table name.
153        ::sqawk::dict-ensure-default fileOptions table $defaultTableName
154        ::sqawk::dict-ensure-default fileOptions F0 1
155        ::sqawk::dict-ensure-default fileOptions csvquote \"
156        ::sqawk::dict-ensure-default fileOptions csvsep ,
157        ::sqawk::dict-ensure-default fileOptions format awk
158        ::sqawk::dict-ensure-default fileOptions merge {}
159        ::sqawk::dict-ensure-default fileOptions prefix \
160                [dict get $fileOptions table]
161
162        array set metadata $fileOptions
163
164        # Read the data.
165        if {[info exists metadata(channel)]} {
166            set ch $metadata(channel)
167        } elseif {$metadata(filename) eq "-"} {
168            set ch stdin
169        } else {
170            set ch [open $metadata(filename)]
171        }
172        set raw [read $ch]
173        close $ch
174
175        set rows [$self Parse $metadata(format) $raw $fileOptions]
176        unset raw
177
178        # Create and configure a new table object.
179        set newTable [::sqawk::table create %AUTO%]
180        $newTable configure \
181                -database [$self cget -database] \
182                -dbtable $metadata(table) \
183                -columnprefix $metadata(prefix) \
184                -f0 $metadata(F0) \
185                -maxnf $metadata(NF) \
186                -modenf $metadata(MNF)
187        # Configure datatypes.
188        if {[info exists metadata(datatypes)]} {
189            $newTable configure -datatypes [split $metadata(datatypes) ,]
190        }
191        # Configure column names.
192        set header {}
193        if {[info exists metadata(header)] && $metadata(header)} {
194            # Remove the header from $rows.
195            set rows [lassign $rows headerF0]
196            # Strip the first field (a0/b0/...) from the header.
197            set header [lrange $headerF0 1 end]
198        }
199        # Override the header with custom column names.
200        if {[info exists metadata(columns)]} {
201            set customColumnNames [split $metadata(columns) ,]
202            set header [list \
203                    {*}[lrange $customColumnNames \
204                            0 [llength $customColumnNames]-1] \
205                    {*}[lrange $header \
206                            [llength $customColumnNames] end]]
207        }
208        $newTable configure -header $header
209
210        $newTable initialize
211
212        # Insert rows in the table.
213        $newTable insert-rows $rows
214
215        dict set tables $metadata(table) $newTable
216        return $newTable
217    }
218
219    # Perform query $query and output the result to $channel.
220    method perform-query {query {channel stdout}} {
221        # For each row returned...
222        set outputRecords {}
223        [$self cget -database] eval $query results {
224            set outputRecord {}
225            set keys $results(*)
226            foreach key $keys {
227                lappend outputRecord $key $results($key)
228            }
229            lappend outputRecords $outputRecord
230        }
231        set sqawkOptions {}
232        foreach option [$self info options] {
233            dict set sqawkOptions $option [$self cget $option]
234        }
235        set output [$self Serialize [$self cget -outputformat] $outputRecords \
236                $sqawkOptions]
237        puts -nonewline $channel $output
238    }
239}
240