xref: /OpenGrok/opengrok-indexer/src/test/resources/analysis/tcl/sample.tcl (revision eeb7e5b33d1bcc524fcc9d1d560447b044e286a4)
1*eeb7e5b3SAdam Hornáček# Copyright (c) 2015, 2016, 2017 dbohdan
2*eeb7e5b3SAdam Hornáček#
3*eeb7e5b3SAdam Hornáček# Permission is hereby granted, free of charge, to any person obtaining a copy
4*eeb7e5b3SAdam Hornáček# of this software and associated documentation files (the "Software"), to deal
5*eeb7e5b3SAdam Hornáček# in the Software without restriction, including without limitation the rights
6*eeb7e5b3SAdam Hornáček# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7*eeb7e5b3SAdam Hornáček# copies of the Software, and to permit persons to whom the Software is
8*eeb7e5b3SAdam Hornáček# furnished to do so, subject to the following conditions:
9*eeb7e5b3SAdam Hornáček#
10*eeb7e5b3SAdam Hornáček# The above copyright notice and this permission notice shall be included in
11*eeb7e5b3SAdam Hornáček# all copies or substantial portions of the Software.
12*eeb7e5b3SAdam Hornáček#
13*eeb7e5b3SAdam Hornáček# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14*eeb7e5b3SAdam Hornáček# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15*eeb7e5b3SAdam Hornáček# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16*eeb7e5b3SAdam Hornáček# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17*eeb7e5b3SAdam Hornáček# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18*eeb7e5b3SAdam Hornáček# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19*eeb7e5b3SAdam Hornáček# THE SOFTWARE.
20*eeb7e5b3SAdam Hornáček
21*eeb7e5b3SAdam Hornáček# Sqawk, an SQL Awk.
22*eeb7e5b3SAdam Hornáček# Copyright (C) 2015, 2016, 2017 dbohdan
23*eeb7e5b3SAdam Hornáček# License: MIT
24*eeb7e5b3SAdam Hornáček
25*eeb7e5b3SAdam Hornáčeknamespace eval ::sqawk {}
26*eeb7e5b3SAdam Hornáček
27*eeb7e5b3SAdam Hornáček# Performs SQL queries on files and channels.
28*eeb7e5b3SAdam Hornáček::snit::type ::sqawk::sqawk {
29*eeb7e5b3SAdam Hornáček    # Internal object state.
30*eeb7e5b3SAdam Hornáček    variable tables {}
31*eeb7e5b3SAdam Hornáček    variable defaultTableNames [split abcdefghijklmnopqrstuvwxyz ""]
32*eeb7e5b3SAdam Hornáček    variable formatToParser
33*eeb7e5b3SAdam Hornáček    variable formatToSerializer
34*eeb7e5b3SAdam Hornáček
35*eeb7e5b3SAdam Hornáček    # Options.
36*eeb7e5b3SAdam Hornáček    option -database
37*eeb7e5b3SAdam Hornáček    option -ofs
38*eeb7e5b3SAdam Hornáček    option -ors
39*eeb7e5b3SAdam Hornáček
40*eeb7e5b3SAdam Hornáček    option -destroytables -default true
41*eeb7e5b3SAdam Hornáček    option -outputformat -default awk
42*eeb7e5b3SAdam Hornáček    option -parsers -default {} -configuremethod Set-and-update-format-list
43*eeb7e5b3SAdam Hornáček    option -serializers -default {} -configuremethod Set-and-update-format-list
44*eeb7e5b3SAdam Hornáček
45*eeb7e5b3SAdam Hornáček    # Methods.
46*eeb7e5b3SAdam Hornáček    constructor {} {
47*eeb7e5b3SAdam Hornáček        # Register parsers and serializers.
48*eeb7e5b3SAdam Hornáček        $self configure -parsers [namespace children ::sqawk::parsers]
49*eeb7e5b3SAdam Hornáček        $self configure -serializers [namespace children ::sqawk::serializers]
50*eeb7e5b3SAdam Hornáček    }
51*eeb7e5b3SAdam Hornáček
52*eeb7e5b3SAdam Hornáček    destructor {
53*eeb7e5b3SAdam Hornáček        if {[$self cget -destroytables]} {
54*eeb7e5b3SAdam Hornáček            dict for {_ tableObj} $tables {
55*eeb7e5b3SAdam Hornáček                $tableObj destroy
56*eeb7e5b3SAdam Hornáček            }
57*eeb7e5b3SAdam Hornáček        }
58*eeb7e5b3SAdam Hornáček    }
59*eeb7e5b3SAdam Hornáček
60*eeb7e5b3SAdam Hornáček    # Update the related format dictionary when the parser or the serializer
61*eeb7e5b3SAdam Hornáček    # list option is set.
62*eeb7e5b3SAdam Hornáček    method Set-and-update-format-list {option value} {
63*eeb7e5b3SAdam Hornáček        set optToDict {
64*eeb7e5b3SAdam Hornáček            -parsers formatToParser
65*eeb7e5b3SAdam Hornáček            -serializers formatToSerializer
66*eeb7e5b3SAdam Hornáček        }
67*eeb7e5b3SAdam Hornáček        set possibleOpts [dict keys $optToDict]
68*eeb7e5b3SAdam Hornáček        if {$option ni $possibleOpts} {
69*eeb7e5b3SAdam Hornáček            error "Set-and-update-format-list can't set the option \"$option\""
70*eeb7e5b3SAdam Hornáček        }
71*eeb7e5b3SAdam Hornáček        set options($option) $value
72*eeb7e5b3SAdam Hornáček
73*eeb7e5b3SAdam Hornáček        set dictName [dict get $optToDict $option]
74*eeb7e5b3SAdam Hornáček        set $dictName {}
75*eeb7e5b3SAdam Hornáček        # For each parser/serializer...
76*eeb7e5b3SAdam Hornáček        foreach ns $value {
77*eeb7e5b3SAdam Hornáček            foreach format [set ${ns}::formats] {
78*eeb7e5b3SAdam Hornáček                dict set $dictName $format $ns
79*eeb7e5b3SAdam Hornáček            }
80*eeb7e5b3SAdam Hornáček        }
81*eeb7e5b3SAdam Hornáček    }
82*eeb7e5b3SAdam Hornáček
83*eeb7e5b3SAdam Hornáček    # Parse $data from $format into a list of rows.
84*eeb7e5b3SAdam Hornáček    method Parse {format data fileOptions} {
85*eeb7e5b3SAdam Hornáček        set error [catch {
86*eeb7e5b3SAdam Hornáček            set ns [dict get $formatToParser $format]
87*eeb7e5b3SAdam Hornáček        }]
88*eeb7e5b3SAdam Hornáček        if {$error} {
89*eeb7e5b3SAdam Hornáček            error "unknown input format: \"$format\""
90*eeb7e5b3SAdam Hornáček        }
91*eeb7e5b3SAdam Hornáček        set parseOptions [set ${ns}::options]
92*eeb7e5b3SAdam Hornáček        return [${ns}::parse $data \
93*eeb7e5b3SAdam Hornáček                [::sqawk::override-keys $parseOptions $fileOptions]]
94*eeb7e5b3SAdam Hornáček    }
95*eeb7e5b3SAdam Hornáček
96*eeb7e5b3SAdam Hornáček    # Serialize a list of rows into text in the format $format.
97*eeb7e5b3SAdam Hornáček    method Serialize {format data sqawkOptions} {
98*eeb7e5b3SAdam Hornáček        # Parse $format.
99*eeb7e5b3SAdam Hornáček        set splitFormat [split $format ,]
100*eeb7e5b3SAdam Hornáček        set formatName [lindex $splitFormat 0]
101*eeb7e5b3SAdam Hornáček        set formatOptions {}
102*eeb7e5b3SAdam Hornáček        foreach option [lrange $splitFormat 1 end] {
103*eeb7e5b3SAdam Hornáček            lassign [split $option =] key value
104*eeb7e5b3SAdam Hornáček            lappend formatOptions $key $value
105*eeb7e5b3SAdam Hornáček        }
106*eeb7e5b3SAdam Hornáček        set error [catch {
107*eeb7e5b3SAdam Hornáček            set ns [dict get $formatToSerializer $formatName]
108*eeb7e5b3SAdam Hornáček        }]
109*eeb7e5b3SAdam Hornáček        if {$error} {
110*eeb7e5b3SAdam Hornáček            error "unknown output format: \"$formatName\""
111*eeb7e5b3SAdam Hornáček        }
112*eeb7e5b3SAdam Hornáček
113*eeb7e5b3SAdam Hornáček        # Get the dict containing the options the serializer accepts with their
114*eeb7e5b3SAdam Hornáček        # default values.
115*eeb7e5b3SAdam Hornáček        set so [set ${ns}::options]
116*eeb7e5b3SAdam Hornáček        # Set the two main options for the "awk" serializer. "awk" is a special
117*eeb7e5b3SAdam Hornáček        # case: its options are set based on separate command line arguments
118*eeb7e5b3SAdam Hornáček        # whose values are passed to us in $sqawkOptions.
119*eeb7e5b3SAdam Hornáček        if {$formatName eq {awk}} {
120*eeb7e5b3SAdam Hornáček            if {[dict exists $formatOptions ofs]} {
121*eeb7e5b3SAdam Hornáček                error {to set the field separator for the "awk" output format\
122*eeb7e5b3SAdam Hornáček                        please use the command line option "-OFS" instead of\
123*eeb7e5b3SAdam Hornáček                        the format option "ofs"}
124*eeb7e5b3SAdam Hornáček            }
125*eeb7e5b3SAdam Hornáček            if {[dict exists $formatOptions ors]} {
126*eeb7e5b3SAdam Hornáček                error {to set the record separator for the "awk" output format\
127*eeb7e5b3SAdam Hornáček                        please use the command line option "-OFS" instead of\
128*eeb7e5b3SAdam Hornáček                        the format option "ofs"}
129*eeb7e5b3SAdam Hornáček            }
130*eeb7e5b3SAdam Hornáček            dict set so ofs [dict get $sqawkOptions -ofs]
131*eeb7e5b3SAdam Hornáček            dict set so ors [dict get $sqawkOptions -ors]
132*eeb7e5b3SAdam Hornáček        }
133*eeb7e5b3SAdam Hornáček        # Check if all the serializer options we have been given in $format are
134*eeb7e5b3SAdam Hornáček        # valid. Replace the default values with the actual values.
135*eeb7e5b3SAdam Hornáček        foreach {key value} $formatOptions {
136*eeb7e5b3SAdam Hornáček            if {[dict exists $so $key]} {
137*eeb7e5b3SAdam Hornáček                dict set so $key $value
138*eeb7e5b3SAdam Hornáček            } else {
139*eeb7e5b3SAdam Hornáček                error "unknown option for output format\
140*eeb7e5b3SAdam Hornáček                        \"$formatName\":\ \"$key\""
141*eeb7e5b3SAdam Hornáček            }
142*eeb7e5b3SAdam Hornáček        }
143*eeb7e5b3SAdam Hornáček        return [${ns}::serialize $data $so]
144*eeb7e5b3SAdam Hornáček    }
145*eeb7e5b3SAdam Hornáček
146*eeb7e5b3SAdam Hornáček    # Read data from a file or a channel into a new database table. The filename
147*eeb7e5b3SAdam Hornáček    # or channel to read from and the options for how to read and store the data
148*eeb7e5b3SAdam Hornáček    # are in all set in the dictionary $fileOptions.
149*eeb7e5b3SAdam Hornáček    method read-file fileOptions {
150*eeb7e5b3SAdam Hornáček        # Set the default table name ("a", "b", "c", ..., "z").
151*eeb7e5b3SAdam Hornáček        set defaultTableName [lindex $defaultTableNames [dict size $tables]]
152*eeb7e5b3SAdam Hornáček        # Set the default column name prefix equal to the table name.
153*eeb7e5b3SAdam Hornáček        ::sqawk::dict-ensure-default fileOptions table $defaultTableName
154*eeb7e5b3SAdam Hornáček        ::sqawk::dict-ensure-default fileOptions F0 1
155*eeb7e5b3SAdam Hornáček        ::sqawk::dict-ensure-default fileOptions csvquote \"
156*eeb7e5b3SAdam Hornáček        ::sqawk::dict-ensure-default fileOptions csvsep ,
157*eeb7e5b3SAdam Hornáček        ::sqawk::dict-ensure-default fileOptions format awk
158*eeb7e5b3SAdam Hornáček        ::sqawk::dict-ensure-default fileOptions merge {}
159*eeb7e5b3SAdam Hornáček        ::sqawk::dict-ensure-default fileOptions prefix \
160*eeb7e5b3SAdam Hornáček                [dict get $fileOptions table]
161*eeb7e5b3SAdam Hornáček
162*eeb7e5b3SAdam Hornáček        array set metadata $fileOptions
163*eeb7e5b3SAdam Hornáček
164*eeb7e5b3SAdam Hornáček        # Read the data.
165*eeb7e5b3SAdam Hornáček        if {[info exists metadata(channel)]} {
166*eeb7e5b3SAdam Hornáček            set ch $metadata(channel)
167*eeb7e5b3SAdam Hornáček        } elseif {$metadata(filename) eq "-"} {
168*eeb7e5b3SAdam Hornáček            set ch stdin
169*eeb7e5b3SAdam Hornáček        } else {
170*eeb7e5b3SAdam Hornáček            set ch [open $metadata(filename)]
171*eeb7e5b3SAdam Hornáček        }
172*eeb7e5b3SAdam Hornáček        set raw [read $ch]
173*eeb7e5b3SAdam Hornáček        close $ch
174*eeb7e5b3SAdam Hornáček
175*eeb7e5b3SAdam Hornáček        set rows [$self Parse $metadata(format) $raw $fileOptions]
176*eeb7e5b3SAdam Hornáček        unset raw
177*eeb7e5b3SAdam Hornáček
178*eeb7e5b3SAdam Hornáček        # Create and configure a new table object.
179*eeb7e5b3SAdam Hornáček        set newTable [::sqawk::table create %AUTO%]
180*eeb7e5b3SAdam Hornáček        $newTable configure \
181*eeb7e5b3SAdam Hornáček                -database [$self cget -database] \
182*eeb7e5b3SAdam Hornáček                -dbtable $metadata(table) \
183*eeb7e5b3SAdam Hornáček                -columnprefix $metadata(prefix) \
184*eeb7e5b3SAdam Hornáček                -f0 $metadata(F0) \
185*eeb7e5b3SAdam Hornáček                -maxnf $metadata(NF) \
186*eeb7e5b3SAdam Hornáček                -modenf $metadata(MNF)
187*eeb7e5b3SAdam Hornáček        # Configure datatypes.
188*eeb7e5b3SAdam Hornáček        if {[info exists metadata(datatypes)]} {
189*eeb7e5b3SAdam Hornáček            $newTable configure -datatypes [split $metadata(datatypes) ,]
190*eeb7e5b3SAdam Hornáček        }
191*eeb7e5b3SAdam Hornáček        # Configure column names.
192*eeb7e5b3SAdam Hornáček        set header {}
193*eeb7e5b3SAdam Hornáček        if {[info exists metadata(header)] && $metadata(header)} {
194*eeb7e5b3SAdam Hornáček            # Remove the header from $rows.
195*eeb7e5b3SAdam Hornáček            set rows [lassign $rows headerF0]
196*eeb7e5b3SAdam Hornáček            # Strip the first field (a0/b0/...) from the header.
197*eeb7e5b3SAdam Hornáček            set header [lrange $headerF0 1 end]
198*eeb7e5b3SAdam Hornáček        }
199*eeb7e5b3SAdam Hornáček        # Override the header with custom column names.
200*eeb7e5b3SAdam Hornáček        if {[info exists metadata(columns)]} {
201*eeb7e5b3SAdam Hornáček            set customColumnNames [split $metadata(columns) ,]
202*eeb7e5b3SAdam Hornáček            set header [list \
203*eeb7e5b3SAdam Hornáček                    {*}[lrange $customColumnNames \
204*eeb7e5b3SAdam Hornáček                            0 [llength $customColumnNames]-1] \
205*eeb7e5b3SAdam Hornáček                    {*}[lrange $header \
206*eeb7e5b3SAdam Hornáček                            [llength $customColumnNames] end]]
207*eeb7e5b3SAdam Hornáček        }
208*eeb7e5b3SAdam Hornáček        $newTable configure -header $header
209*eeb7e5b3SAdam Hornáček
210*eeb7e5b3SAdam Hornáček        $newTable initialize
211*eeb7e5b3SAdam Hornáček
212*eeb7e5b3SAdam Hornáček        # Insert rows in the table.
213*eeb7e5b3SAdam Hornáček        $newTable insert-rows $rows
214*eeb7e5b3SAdam Hornáček
215*eeb7e5b3SAdam Hornáček        dict set tables $metadata(table) $newTable
216*eeb7e5b3SAdam Hornáček        return $newTable
217*eeb7e5b3SAdam Hornáček    }
218*eeb7e5b3SAdam Hornáček
219*eeb7e5b3SAdam Hornáček    # Perform query $query and output the result to $channel.
220*eeb7e5b3SAdam Hornáček    method perform-query {query {channel stdout}} {
221*eeb7e5b3SAdam Hornáček        # For each row returned...
222*eeb7e5b3SAdam Hornáček        set outputRecords {}
223*eeb7e5b3SAdam Hornáček        [$self cget -database] eval $query results {
224*eeb7e5b3SAdam Hornáček            set outputRecord {}
225*eeb7e5b3SAdam Hornáček            set keys $results(*)
226*eeb7e5b3SAdam Hornáček            foreach key $keys {
227*eeb7e5b3SAdam Hornáček                lappend outputRecord $key $results($key)
228*eeb7e5b3SAdam Hornáček            }
229*eeb7e5b3SAdam Hornáček            lappend outputRecords $outputRecord
230*eeb7e5b3SAdam Hornáček        }
231*eeb7e5b3SAdam Hornáček        set sqawkOptions {}
232*eeb7e5b3SAdam Hornáček        foreach option [$self info options] {
233*eeb7e5b3SAdam Hornáček            dict set sqawkOptions $option [$self cget $option]
234*eeb7e5b3SAdam Hornáček        }
235*eeb7e5b3SAdam Hornáček        set output [$self Serialize [$self cget -outputformat] $outputRecords \
236*eeb7e5b3SAdam Hornáček                $sqawkOptions]
237*eeb7e5b3SAdam Hornáček        puts -nonewline $channel $output
238*eeb7e5b3SAdam Hornáček    }
239*eeb7e5b3SAdam Hornáček}
240