Index: main.mk ================================================================== --- main.mk +++ main.mk @@ -155,19 +155,24 @@ SFLAGS = $(TCLINC) -DSQLITE_THREADSAFE=0 -DSQLITE_ENABLE_FTS5 -DSQLITE_TCLMD5 -DTCLSH -Dmain=xmain $(TCLSH): $(SSRC) $(CC) -O2 -o $@ -I. $(SFLAGS) $(SSRC) $(TCLFLAGS) -searchdb: $(TCLSH) - mkdir -p doc/search.d/ - ./$(TCLSH) $(DOC)/search/buildsearchdb.tcl - cp $(DOC)/document_header.tcl doc/document_header.tcl - cp $(DOC)/document_header.tcl doc/search.d/document_header.tcl - cp $(DOC)/search/search.tcl doc/search +doc/search: $(TCLSH) $(DOC)/search/mkscript.tcl $(DOC)/search/search.tcl.in $(DOC)/search/wapp.tcl $(DOC)/document_header.tcl + ./$(TCLSH) $(DOC)/search/mkscript.tcl $(DOC)/search/search.tcl.in >doc/search chmod +x doc/search - cp $(DOC)/search/search.tcl doc/search.d/admin + +doc/search.d/admin: $(TCLSH) $(DOC)/search/mkscript.tcl $(DOC)/search/admin.tcl.in $(DOC)/search/wapp.tcl $(DOC)/document_header.tcl + mkdir -p doc/search.d/ + ./$(TCLSH) $(DOC)/search/mkscript.tcl $(DOC)/search/admin.tcl.in >doc/search.d/admin chmod +x doc/search.d/admin + +searchdb: $(TCLSH) doc/search doc/search.d/admin + ./$(TCLSH) $(DOC)/search/buildsearchdb.tcl + +# cp $(DOC)/search/search.tcl doc/search.d/admin +# chmod +x doc/search.d/admin fts5ext.so: $(DOC)/search/fts5ext.c gcc -shared -fPIC -I. -DSQLITE_EXT \ $(DOC)/search/fts5ext.c -o fts5ext.so ADDED search/admin.tcl.in Index: search/admin.tcl.in ================================================================== --- /dev/null +++ search/admin.tcl.in @@ -0,0 +1,124 @@ +#!/usr/bin/tclsh.docsrc +#### Import of wapp.tcl +INCLUDE wapp.tcl +#### End of wapp.tcl + +# Generate all header content for the output document +# +proc search_header {} { + wapp-trim { +DOCHEAD {Search SQLite Stats} {../} + } +} + +proc wapp-default {} { + # When running using the built-in webserver in Wapp (in other words, + # when not running as CGI) any filename that contains a "." loads + # directly from the filesystem. + if {[string match *//127.0.0.1:* [wapp-param BASE_URL]] + && [string match *.* [wapp-param PATH_INFO]] + } { + set altfile [file dir [wapp-param SCRIPT_FILENAME]]/../[wapp-param PATH_INFO] + set fd [open $altfile rb] + fconfigure $fd -translation binary + wapp-unsafe [read $fd] + close $fd + switch -glob -- $altfile { + *.html { + wapp-mimetype text/html + } + *.css { + wapp-mimetype text/css + } + *.gif { + wapp-mimetype image/gif + } + } + return + } + wapp-page-admin +} +proc wapp-page-admin {} { + wapp-allow-xorigin-params + wapp-content-security-policy off + if {[wapp-param-exists env]} { + search_header + wapp-trim { +

Environment

+
%html([wapp-debug-env])
+ } + return + } + sqlite3 db2 [file dir [wapp-param SCRIPT_FILENAME]]/searchlog.db + set where "" + set res "" + + set ipfilter "" + set ipaddr [wapp-param ip] + if {$ipaddr!=""} { + set where {WHERE ip = $ipaddr} + set ipfilter $ipaddr + } + + set checked "" + set isUnique [expr {[wapp-param unique 0]+0}] + if {$isUnique} { + set checked "checked" + } + + set limit [wapp-param limit 10] + set s10 "" + set s100 "" + set s1000 "" + if {$limit==10} {set s10 selected} + if {$limit==100} {set s100 selected} + if {$limit==1000} {set s1000 selected} + + search_header + set self [wapp-param PATH_HEAD] + wapp-trim { +
+
+
+ Results: + IP: + Unique: + +
+
+
+ +
IP Query Results Timestamp + } + + set i 0 + db2 eval " + SELECT rowid, ip, query, nres, timestamp FROM log $where + ORDER BY rowid DESC + " { + + if {$isUnique} { + if {[info exists seen($query)]} continue + set seen($query) 1 + } + + wapp-trim { +
%html($rowid) + %html($ip) + %html($query) + %html($nres)%html($timestamp) + } + incr i + if {$i >= $limit} break + } + wapp-subst { element, or -# c) A "." character. -# -# 3. 8 characters have been parsed and a
tag or "." character is -# encountered -# -proc docparse_callback {tag details} { - global P - set tag [string tolower $tag] - switch -glob -- $tag { - "" { - append P(text) " $details" - if {$P(isTitle)} { append P(title) $details } - if {[llength $P(fragments)]} { - append P(ftext) " $details" - } - } - - "title" { set P(isTitle) 1 } - "/title" { set P(isTitle) 0 } - - "a" { - array set D $details - if {[info exists D(name)]} { - if {[llength $P(fragments)]} { - lappend P(fragments) $P(ftitle) $P(ftext) - } - lappend P(fragments) $D(name) - set P(ftext) "" - set P(ftitle) "" - catch { unset P(ftitleclose) } - } - } - "h*" { - array set D $details - if {[info exists D(id)]} { - if {[llength $P(fragments)]} { - lappend P(fragments) $P(ftitle) $P(ftext) - } - lappend P(fragments) $D(id) - set P(ftext) "" - set P(ftitle) "" - } - } - - div { - array set D $details - if {[info exists D(class)] && $D(class) == "startsearch"} { - set P(text) "" - } - } - } - - set ftext [string trim $P(ftext) " \v\n"] - if {[string length $ftext]>4 && $P(ftitle) == ""} { - set blocktags [list \ - br td /td th /th p /p \ - h1 h2 h3 h4 h5 h /h1 /h2 /h3 /h4 /h5 /h - ] - if {[lsearch $blocktags $tag]>=0} { - set P(ftitle) $ftext - set P(ftext) "" - } elseif {[string length $ftext]>80} { - set idx [string last " " [string range $ftext 0 79]] - if {$idx<0} { set idx 80 } - set P(ftitle) [string range $ftext 0 [expr $idx-1]] - set P(ftext) [string range $ftext $idx end] - } - } -} - -proc findlinks_callback {tag details} { - global P - set doc $P(doc) - - set tag [string tolower $tag] - switch -glob -- $tag { - a { - array set D $details - if {[info exists D(href)]} { - if { [string range $D(href) 0 0]=="#" } { - set url "${doc}$D(href)" - } else { - set url "$D(href)" - } - - set P(url) $url - set P(link) "" - } - } - /a { - if {$P(url)!=""} { - db eval { UPDATE pagedata SET links = links || ' ' || $P(link) WHERE url=$P(url) } - } - set P(url) "" - set P(link) "" - } - - "" { - append P(link) " $details" - } - } -} - -proc trim {a} { - set L [split $a] - return [lsort -uniq $L] -} - -#========================================================================= -# Build the database. -# -proc rebuild_database {} { - - db transaction { - db eval { - DROP TABLE IF EXISTS pagedata; - CREATE TABLE pagedata( - url TEXT PRIMARY KEY, -- Relative URL for this document - links, -- Text of all links to this URI - title, -- Document or fragment title - content -- Document or fragment content - ); - } - - # Scan the file-system for HTML documents. Add each document found to - # the page and pagedata tables. - foreach file [document_list] { - set zHtml [readfile $file] - - array unset ::P - set ::P(text) "" ;# The full document text - set ::P(isTitle) 0 ;# True while parsing contents of - set ::P(fragments) [list] ;# List of document fragments parsed - set ::P(ftext) "" ;# Text of current document fragment - - parsehtml $zHtml docparse_callback - if {[info exists ::P(ftitle)]} { - lappend ::P(fragments) $::P(ftitle) $::P(ftext) - } - - set keyword "" - catch { set keyword $::K($file) } - if {![info exists ::P(title)]} {set ::P(title) "No Title"} - db eval { REPLACE INTO pagedata VALUES($file, '', $::P(title), $::P(text)) } - - foreach {name title text} $::P(fragments) { - set url "$file#$name" - puts $url - db eval { REPLACE INTO pagedata VALUES($url, '', $title, $text) } - } - } - - foreach file [document_list] { - set zHtml [readfile $file] - - array unset ::P - set ::P(url) "" - set ::P(doc) $file - parsehtml $zHtml findlinks_callback - } - - db func trim trim - #db eval { UPDATE pagedata SET links = trim(links) } - db eval { CREATE INDEX ft ON pagedata USING fts5() } - } -} - -sqlite4 db search4.db -rebuild_database - ADDED search/mkscript.tcl Index: search/mkscript.tcl ================================================================== --- /dev/null +++ search/mkscript.tcl @@ -0,0 +1,54 @@ +#!/usr/bin/tclsh +# +# Use this script to build the TCL scripts that implement the search +# functions on the SQLite website. +# +# Usage example: +# +# tclsh mkscript.tcl search.tcl.in >search.tcl +# +# The input to this script is a template script file, named something +# like "search.tcl.in". This script reads the template line-by-line and +# applies some minor transformations: +# +# INCLUDE filename Lines match this pattern are replaced +# by the complete text of filename. This +# is used (for example) to insert the +# complete text of wapp.tcl in the appropriate +# place CGI scripts +# +# DOCHEADER title path Lines matching this pattern invoke the +# document_header function contained in the +# ../document_header.tcl file to generate +# header text for the document, and then +# insert that header text in place of the +# line +# +# Other than these transformations, the input is copied through into +# the output. +# +if {[llength $argv]!=1} { + puts stderr "Usage: $argv0 TEMPLATE >OUTPUT" + exit 1 +} +set infile [lindex $argv 0] +set ROOT [file dir [file dir [file normalize $argv0]]] +set HOME [file dir [file normalize $infile]] +set in [open $infile rb] +while {1} { + set line [gets $in] + if {[eof $in]} break + if {[regexp {^INCLUDE (.*)} $line all path]} { + set in2 [open $HOME/$path rb] + puts [read $in2] + close $in2 + continue + } + if {[regexp {^DOCHEAD } $line] && [llength $line]==3} { + source $ROOT/document_header.tcl + puts [document_header [lindex $line 1] [lindex $line 2]] + continue + } + puts $line +} +close $in DELETED search/search.tcl Index: search/search.tcl ================================================================== --- search/search.tcl +++ /dev/null @@ -1,500 +0,0 @@ -#!/usr/bin/tclsh.docsrc - -source [file dirname [info script]]/document_header.tcl - -# Decode an HTTP %-encoded string -# -proc percent_decode {str} { - # rewrite "+" back to space - # protect \ and [ and ] by quoting with '\' - set str [string map [list + { } "\\" "\\\\" \[ \\\[ \] \\\]] $str] - - # prepare to process all %-escapes - regsub -all -- {%([A-Fa-f][A-Fa-f0-9])%([A-Fa-f89][A-Fa-f0-9])} \ - $str {[encoding convertfrom utf-8 [binary decode hex \1\2]]} str - regsub -all -- {%([0-7][A-Fa-f0-9])} $str {\\u00\1} str - - # process %-escapes - return [subst -novar $str] -} - -#========================================================================= -# This proc is called to parse the arguments passed to this invocation of -# the CGI program (via either the GET or POST method). It returns a -# key/value list containing the arguments suitable for passing to [array -# set]. For example, if the CGI is invoked via a GET request on the URI: -# -# http://www.sqlite.org/search?query=fts3+table&results=10 -# -# then the returned list value is: -# -# {query {fts3 table} results 10} -# -proc cgi_parse_args {} { - global env A - - if {$env(REQUEST_METHOD) == "GET"} { - foreach q [split $env(QUERY_STRING) &] { - if {[regexp {([a-z0-9]*)=(.*)} $q all var value]} { - set A($var) [percent_decode $value] - } - } - } elseif {$env(REQUEST_METHOD) == "POST"} { - set qstring [read stdin $env(CONTENT_LENGTH)] - foreach q [split $qstring &] { - if {[regexp {([a-z0-9]*)=(.*)} $q all var value]} { - set A($var) [percent_decode $value] - } - } - } else { - error "Unrecognized method: $env(REQUEST_METHOD)" - } -} - - -#========================================================================= -# The argument contains a key value list. The values in the list are -# transformed to an HTTP query key value list. For example: -# -# % cgi_encode_args {s "search string" t "search \"type\""} -# s=search+string&t=search+%22type%22 -# -proc cgi_encode_args {list} { - set reslist [list] - foreach {key value} $list { - set value [string map { - \x20 + \x21 %21 \x2A %2A \x22 %22 \x27 %27 \x28 %28 \x29 %29 \x3B %3B - \x3A %3A \x40 %40 \x26 %26 \x3D %3D \x2B %2B \x24 %24 \x2C %2C \x2F %2F - \x3F %3F \x25 %25 \x23 %23 \x5B %5B \x5D %5D - } $value] - - lappend reslist "$key=$value" - } - join $reslist & -} - -proc htmlize {str} { string map {< < > >} $str } -proc attrize {str} { string map {< < > > \x22 "} $str } - -#========================================================================= - -proc cgi_env_dump {} { - - set ret "<h1>Arguments</h1><table>" - foreach {key value} [array get ::A] { - append ret "<tr><td>[htmlize $key]<td>[htmlize $value]" - } - append ret "</table>" - - append ret "<h1>Environment</h1><table>" - foreach {key value} [array get ::env] { - append ret "<tr><td>[htmlize $key]<td>[htmlize $value]" - } - append ret "</table>" - return $ret -} - -#------------------------------------------------------------------------- -# Add an entry to the log database for the current query. Which -# returns $nRes results. -# -proc search_add_log_entry {nRes} { - if {[info exists ::A(donotlog)]} return - - sqlite3 db2 search.d/searchlog.db - db2 timeout 10000 - - set ip $::env(REMOTE_ADDR) - set query $::A(q) - - db2 eval { - PRAGMA synchronous=OFF; - PRAGMA journal_mode=OFF; - BEGIN; - CREATE TABLE IF NOT EXISTS log( - ip, -- IP query was made from - query, -- Fts5 query string - nres, -- Number of results - timestamp DEFAULT CURRENT_TIMESTAMP - ); - INSERT INTO log(ip, query, nres) VALUES($ip, $query, $nRes); - COMMIT; - } - - db2 close -} - -proc sqlize {text} { - return "'[string map [list ' ''] $text]'" -} - -proc admin_list {} { - sqlite3 db2 searchlog.db - - set where "" - set res "" - - set ipfilter "" - if {[info exists ::A(ip)] && $::A(ip)!=""} { - set where "WHERE ip = [sqlize $::A(ip)]" - set ipfilter $::A(ip) - } - - set checked "" - if {[info exists ::A(unique)] && $::A(unique)} { - set checked "checked" - } - - set limit 10 - if {[info exists ::A(limit)]} { - set limit $::A(limit) - } - set s10 "" - set s100 "" - set s1000 "" - if {$limit==10} {set s10 selected} - if {$limit==100} {set s100 selected} - if {$limit==1000} {set s1000 selected} - - append res " - <div style=\"margin:2em\"> - <center> - <form action=admin method=get> - Results: <select name=limit onChange=\"this.form.submit()\"> - <option $s10 value=\"10\">10</option> - <option $s100 value=\"100\">100</option> - <option $s1000 value=\"1000\">1000</option> - </select> - IP: <input type=input name=ip value=\"[attrize $ipfilter]\"> - Unique: <input - type=checkbox name=unique value=1 - $checked - onChange=\"this.form.submit()\" - > - <input type=submit> - </form> - </center> - </div> - " - - set i 0 - append res "<table border=1 cellpadding=10 align=center>\n" - append res "<tr><td><th>IP <th>Query <th> Results <th> Timestamp\n" - db2 eval " - SELECT rowid, ip, query, nres, timestamp FROM log $where - ORDER BY rowid DESC - " { - - if {[info exists ::A(unique)] && $::A(unique)} { - if {[info exists seen($query)]} continue - set seen($query) 1 - } - - set querylink "<a href=\"../search?q=[attrize $query]&donotlog=1\">$query</a>" - set iplink "<a href=\"?admin=1&ip=$ip\">$ip</a>" - - append res " <tr> <td> $rowid <td> $iplink <td> $querylink" - append res " <td> $nres <td> $timestamp\n" - - incr i - if {$i >= $limit} break - } - append res "</table>\n" - - return $res -} - - -#------------------------------------------------------------------------- -# This command is similar to the builtin Tcl [time] command, except that -# it only ever runs the supplied script once. Also, instead of returning -# a string like "xxx microseconds per iteration", it returns "x.yy ms" or -# "x.yy s", depending on the magnitude of the time spent running the -# command. For example: -# -# % ttime {after 1500} -# 1.50 s -# % ttime {after 45} -# 45.02 ms -# -proc ttime {script} { - set t [lindex [time [list uplevel $script]] 0] - if {$t>1000000} { return [format "%.2f s" [expr {$t/1000000.0}]] } - return [format "%.2f ms" [expr {$t/1000.0}]] -} - -proc searchchanges {} { - global A - if {![info exists A(q)]} return "" - - set open {<span style="background-color:#d9f2e6">} - set close {</span>} - set query { - SELECT url, version, idx, highlight(change, 3, $open, $close) AS text - FROM change($A(q)) ORDER BY rowid ASC - } - - set ret [subst { - <p>Change log entries mentioning: <b>[htmlize $::A(q)]</b> - <table border=0> - }] - - set s2 "style=\"margin-top:0\"" - set s1 "style=\"font-size:larger; text-align:left\" class=nounderline" - set prev "" - db eval $query { - if {$prev!=$version} { - append ret [subst { - <tr> <td $s1 valign=top> <a href=$url>$version</a> <td> <ul $s2> - }] - set prev $version - } - append ret [subst { <li value=$idx> ($idx) $text }] - } - - append ret "</table>" - append ret "<center><p>You can also see the <a href=changes.html>entire" - append ret " changelog as a single page</a> if you wish.</center>" - - return $ret -} - -proc searchresults {} { - if {![info exists ::A(q)]} return "" - #set ::A(q) [string map {' ''} $A(q)] - #regsub -all {[^-/"A-Za-z0-9]} $::A(q) { } ::A(q) - - # Count the '"' characters in $::A(q). If there is an odd number of - # occurences, add a " to the end of the query so that fts5 can parse - # it without error. - if {[regexp -all \x22 $::A(q)] % 2} { append ::A(q) \x22 } - - # Set iStart to the index of the first result to display. Results are - # indexed starting at zero from most to least relevant. - # - set iStart [expr {([info exists ::A(i)] ? $::A(i) : 0)*10}] - - # Grab a list of rowid results. - # - set q { - SELECT rowid FROM page WHERE page MATCH $::A(q) - ORDER BY srank(page) DESC, - rank * COALESCE( - (SELECT percent FROM weight WHERE id=page.rowid), 100 - ); - } - if {[catch { set lRowid [db eval $q] }]} { - set x "" - foreach word [split $::A(q) " "] { - append x " \"[string map [list "\"" "\"\""] $word]\"" - } - set ::A(q) [string trim $x] - set lRowid [db eval $q] - } - - set lRes [list] - foreach rowid $lRowid { - if {$rowid > 1000} { - set parent [expr $rowid / 1000] - lappend subsections($parent) $rowid - } else { - lappend lRes $rowid - } - } - - set nRes [llength $lRes] - set lRes [lrange $lRes $iStart [expr $iStart+9]] - - # Add an entry to the log database. - # - search_add_log_entry $nRes - - # If there are no results, return a message to that effect. - # - if {[llength $lRes] == 0} { - return [subst { No results for: <b>[htmlize $::A(q)]</b> }] - } - - # HTML markup used to highlight keywords within FTS5 generated snippets. - # - set open {<span style="background-color:#d9f2e6">} - set close {</span>} - set ellipsis {<b> ... </b>} - - # Grab the required data - # - db eval [string map [list %LIST% [join $lRowid ,]] { - SELECT - rowid AS parentid, - snippet(page, 0, $open, $close, $ellipsis, 6) AS s_apis, - snippet(page, 2, $open, $close, '', 40) AS s_title1, - snippet(page, 3, $open, $close, $ellipsis, 40) AS s_title2, - snippet(page, 4, $open, $close, $ellipsis, 40) AS s_content, - url, rank - FROM page($::A(q)) - WHERE rowid IN (%LIST%) - }] X { - foreach k [array names X] { set data($X(parentid),$k) [set X($k)] } - } - - set ret [subst { - <table border=0> - <p>Search results - [expr $iStart+1]..[expr {($nRes < $iStart+10) ? $nRes : $iStart+10}] - of $nRes for: <b>[htmlize $::A(q)]</b> - }] - - foreach rowid $lRes { - - foreach a {parentid s_apis s_title1 s_content url rank} { - set $a $data($rowid,$a) - } - - if {[info exists subsections($parentid)]} { - set childid [lindex $subsections($parentid) 0] - set link $data($childid,url) - set hdr $data($childid,s_title2) - - if {$hdr==""} { - set s_content "" - } else { - set s_content [subst { - <b><a style=color:#044a64 href=$link>$hdr</a></b> - }] - } - - append s_content " $data($childid,s_content)" - } - - append ret [subst -nocommands {<tr> - <td valign=top style="line-height:150%"> - <div style="white-space:wrap;font-size:larger" class=nounderline> - <a href="$url">$s_title1 </a> - <div style="float:right;font-size:smaller;color:#BBB">($url)</div> - </div> - <div style="margin-left: 10ex; font:larger monospace">$s_apis</div> - <div style="margin-left: 4ex; margin-bottom:1.5em"> - $s_content - </div> - </td> - }] - } - append ret { </table> } - - - # If the query returned more than 10 results, add up to 10 links to - # each set of 10 results (first link to results 1-10, second to 11-20, - # third to 21-30, as required). - # - if {$nRes>10} { - set s(0) {border:solid #044a64 1px;padding:1ex;margin:1ex;line-height:300%;} - set s(1) "$s(0);background:#044a64;color:white" - append ret <center><p> - for {set i 0} {$i < 10 && ($i*10)<$nRes} {incr i} { - append ret [subst { - <a style="$s([expr {($iStart/10)==$i}])" - href="search?[cgi_encode_args [list q $::A(q) s $::A(s) i $i]]">[expr $i+1]</a> - }] - } - append ret </center> - } - - return $ret -} - -proc main {} { - global A - cgi_parse_args - - # If "env=1" is specified, dump the environment variables instead - # of running any search. - if {[info exists ::A(env)]} { return [cgi_env_dump] } - - # If "admin=1" is specified, jump to the admin screen. - if {[string match *admin* $::env(REQUEST_URI)]} { - set ::PATH ../ - return [admin_list] - } - - sqlite3 db search.d/search.db - - set cmd searchresults - if {[info exists A(s)] && $A(s)=="c"} { - set cmd searchchanges - } - - db transaction { - set t [ttime { - if {[catch $cmd srchout]} { - set A(q) [string tolower $A(q)] - set srchout [$cmd] - } - set doc $srchout - }] - } - append doc "<center>" - append doc "<p>Page generated by <a href='fts5.html'>FTS5</a> in about $t." - append doc "</center>" - return $doc - - # return [cgi_env_dump] -} - -#========================================================================= - -source [file dirname [info script]]/document_header.tcl - -if {![info exists env(REQUEST_METHOD)]} { - set env(REQUEST_METHOD) GET - set env(QUERY_STRING) rebuild=1 - set ::HEADER "" - - #set env(QUERY_STRING) {q="one+two+three+four"+eleven} - set env(QUERY_STRING) {q=windows} - set ::HEADER "" -} - -set ::PATH "" -if {0==[catch main res]} { - set title "Search SQLite Documentation" - if {[info exists ::A(q)]} { - set initsearch [attrize $::A(q)] - append title " - [htmlize $::A(q)]" - } else { - set initsearch {} - } - set document [document_header $title $::PATH $initsearch] - if {$::A(s)!="d" && $::A(s)!="c"} {set ::A(s) d} - append document [subst { - <script> - window.addEventListener('load', function() { - var w = document.getElementById("searchmenu"); - w.style.display = "block"; - - document.getElementById("searchtype").value = "$::A(s)" - - setTimeout(function(){ - var s = document.getElementById("searchbox"); - s.focus(); - s.select(); - }, 30); - }); - </script> - }] - append document $res -} else { - set document "<pre>" - append document "Error: $res\n\n" - append document $::errorInfo - append document "</pre>" -} - -puts "Content-type: text/html" -puts "Content-Length: [string length $document]" -puts "" -puts $document -puts "" -flush stdout -close stdout - -exit ADDED search/search.tcl.in Index: search/search.tcl.in ================================================================== --- /dev/null +++ search/search.tcl.in @@ -0,0 +1,309 @@ +#!/usr/bin/tclsh.docsrc +#### Import of wapp.tcl +INCLUDE wapp.tcl +#### End of wapp.tcl + +# Generate all header content for the output document +# +proc search_header {} { + wapp-trim { +DOCHEAD {Search SQLite Documentation} {} + } +} + +#------------------------------------------------------------------------- +# Add an entry to the log database for the current query. Which +# returns $nRes results. +# +proc search_add_log_entry {nRes} { + if {[wapp-param-exists donotlog]} return + sqlite3 db2 [file dir [wapp-param SCRIPT_FILENAME]]/search.d/searchlog.db + db2 timeout 10000 + set ip [wapp-param REMOTE_ADDR] + set query [wapp-param q] + db2 eval { + PRAGMA synchronous=OFF; + PRAGMA journal_mode=OFF; + BEGIN; + CREATE TABLE IF NOT EXISTS log( + ip, -- IP query was made from + query, -- Fts5 query string + nres, -- Number of results + timestamp DEFAULT CURRENT_TIMESTAMP + ); + INSERT INTO log(ip, query, nres) VALUES($ip, $query, $nRes); + COMMIT; + } + db2 close +} + +#------------------------------------------------------------------------- +# This command is similar to the builtin Tcl [time] command, except that +# it only ever runs the supplied script once. Also, instead of returning +# a string like "xxx microseconds per iteration", it returns "x.yy ms" or +# "x.yy s", depending on the magnitude of the time spent running the +# command. For example: +# +# % ttime {after 1500} +# 1.50 s +# % ttime {after 45} +# 45.02 ms +# +proc ttime {script} { + set t [lindex [time [list uplevel $script]] 0] + if {$t>1000000} { return [format "%.2f s" [expr {$t/1000000.0}]] } + return [format "%.2f ms" [expr {$t/1000.0}]] +} + +#----------------------------------------------------------------------- +# Do a search of the change log +# +proc searchchanges {} { + set q [wapp-param q] + if {$q==""} {return {}} + set open {<span style="background-color:#d9f2e6">} + set close {</span>} + set query { + SELECT url, version, idx, highlight(change, 3, $open, $close) AS text + FROM change($q) ORDER BY rowid ASC + } + wapp-trim { + <p>Change log entries mentioning: <b>%html($q)</b> + <table border=0> + } + set s2 "style=\"margin-top:0\"" + set s1 "style=\"font-size:larger; text-align:left\" class=nounderline" + set prev "" + db eval $query { + if {$prev!=$version} { + wapp-trim { + <tr> <td %unsafe($s1) valign=top> <a href='%url($url)'>%html($version)</a> + <td> <ul %unsafe($s2)> + } + set prev $version + } + wapp-subst {<li value=%html($idx)> (%html($idx)) %unsafe($text)\n} + } + wapp-trim { + </table> + <center><p>You can also see the <a href=changes.html>entire + changelog as a single page</a> if you wish.</center> + } +} + +#----------------------------------------------------------------------- +# Do a search over all documentation other than the change log +# +proc searchresults {} { + set q [wapp-param q] + if {$q==""} {return ""} + + # Count the '"' characters in $::A(q). If there is an odd number of + # occurences, add a " to the end of the query so that fts5 can parse + # it without error. + if {[regexp -all \x22 $q] % 2} { append q \x22 } + + # Set iStart to the index of the first result to display. Results are + # indexed starting at zero from most to least relevant. + # + set iStart 0 + catch {set iStart [expr {[wapp-param i 0]*10}]} + + # Grab a list of rowid results. + # + set sql { + SELECT rowid FROM page WHERE page MATCH $q + ORDER BY srank(page) DESC, + rank * COALESCE( + (SELECT percent FROM weight WHERE id=page.rowid), 100 + ); + } + if {[catch { set lRowid [db eval $sql] }]} { + set x "" + foreach word [split $q " "] { + append x " \"[string map [list "\"" "\"\""] $word]\"" + } + set q [string trim $x] + set lRowid [db eval $sql] + } + + set lRes [list] + foreach rowid $lRowid { + if {$rowid > 1000} { + set parent [expr $rowid / 1000] + lappend subsections($parent) $rowid + } else { + lappend lRes $rowid + } + } + + set nRes [llength $lRes] + set lRes [lrange $lRes $iStart [expr $iStart+9]] + + # Add an entry to the log database. + # + search_add_log_entry $nRes + + # If there are no results, return a message to that effect. + # + if {[llength $lRes] == 0} { + wapp-subst {<p>No Results for: <b>%html($q)</b>\n} + } + + # HTML markup used to highlight keywords within FTS5 generated snippets. + # + set open {<span style="background-color:#d9f2e6">} + set close {</span>} + set ellipsis {<b> ... </b>} + + # Grab the required data + # + db eval [string map [list %LIST% [join $lRowid ,]] { + SELECT + rowid AS parentid, + snippet(page, 0, $open, $close, $ellipsis, 6) AS s_apis, + snippet(page, 2, $open, $close, '', 40) AS s_title1, + snippet(page, 3, $open, $close, $ellipsis, 40) AS s_title2, + snippet(page, 4, $open, $close, $ellipsis, 40) AS s_content, + url, rank + FROM page($q) + WHERE rowid IN (%LIST%) + }] X { + foreach k [array names X] { set data($X(parentid),$k) [set X($k)] } + } + + set i1 [expr {$iStart+1}] + set i2 [expr {($nRes < $iStart+10) ? $nRes : $iStart+10}] + wapp-trim { + <table border=0> + <p>Search results %html($i1)..%html($i2) of %html($nRes) for: <b>%html($q)</b> + } + + foreach rowid $lRes { + + foreach a {parentid s_apis s_title1 s_content url rank} { + set $a $data($rowid,$a) + } + + if {[info exists subsections($parentid)]} { + set childid [lindex $subsections($parentid) 0] + set link $data($childid,url) + set hdr $data($childid,s_title2) + + if {$hdr==""} { + set s_content "" + } else { + set s_content [subst { + <b><a style=color:#044a64 href=$link>$hdr</a></b> + }] + } + + append s_content " $data($childid,s_content)" + } + + wapp-trim {<tr> + <td valign=top style="line-height:150%"> + <div style="white-space:wrap;font-size:larger" class=nounderline> + <a href="%url($url)">%unsafe($s_title1)</a> + <div style="float:right;font-size:smaller;color:#BBB">(%url($url))</div> + </div> + <div style="margin-left: 10ex; font:larger monospace">%unsafe($s_apis)</div> + <div style="margin-left: 4ex; margin-bottom:1.5em"> + %unsafe($s_content) + </div> + </td> + } + } + wapp-subst {</table>\n} + + + # If the query returned more than 10 results, add up to 10 links to + # each set of 10 results (first link to results 1-10, second to 11-20, + # third to 21-30, as required). + # + if {$nRes>10} { + set s(0) {border:solid #044a64 1px;padding:1ex;margin:1ex;line-height:300%;} + set s(1) "$s(0);background:#044a64;color:white" + wapp-subst {<center><p>\n} + for {set i 0} {$i < 10 && ($i*10)<$nRes} {incr i} { + set style $s([expr {($iStart/10)==$i}]) + wapp-trim { + <a style="%html($style)" + href="search?q=%qp($q)&i=%qp($i)">%html([expr $i+1])</a> + } + } + wapp-subst {</center>\n} + } +} + +# This is the main entry point into the search result page generator +# +proc wapp-default {} { + wapp-content-security-policy off + wapp-allow-xorigin-params + if {[wapp-param-exists env]} { + search_header + wapp-trim { + <h1>Environment Dump For Debugging</h1> + <pre>%html([wapp-debug-env])</pre> + } + return + } + + # When running using the built-in webserver in Wapp (in other words, + # when not running as CGI) any filename that contains a "." loads + # directly from the filesystem. + if {[string match *//127.0.0.1:* [wapp-param BASE_URL]] + && [string match *.* [wapp-param PATH_INFO]] + } { + set altfile [file dir [wapp-param SCRIPT_FILENAME]][wapp-param PATH_INFO] + set fd [open $altfile rb] + fconfigure $fd -translation binary + wapp-unsafe [read $fd] + close $fd + switch -glob -- $altfile { + *.html { + wapp-mimetype text/html + } + *.css { + wapp-mimetype text/css + } + *.gif { + wapp-mimetype image/gif + } + } + return + } + + search_header + sqlite3 db [file dir [wapp-param SCRIPT_FILENAME]]/search.d/search.db + set searchType [wapp-param s d] + if {$searchType=="c"} { + set cmd searchchanges + } else { + set cmd searchresults + } + db transaction { + set t [ttime {$cmd}] + } + wapp-trim { + <center> + <p>Page generated by <a href='fts5.html'>FTS5</a> in about %html($t). + </center> + <script> + window.addEventListener('load', function() { + var w = document.getElementById("searchmenu"); + w.style.display = "block"; + w = document.getElementById("searchtype"); + w.value = "%string($searchType)" + setTimeout(function(){ + var s = document.getElementById("searchbox"); + s.value = "%string([wapp-param q])" + s.focus(); + s.select(); + }, 30); + }); + </script> + } +} +wapp-start $argv DELETED search/search4.tcl Index: search/search4.tcl ================================================================== --- search/search4.tcl +++ /dev/null @@ -1,464 +0,0 @@ -#!/usr/bin/tclsqlite4 - -#========================================================================= -# Decode an HTTP %-encoded string -# -proc percent_decode {str} { - # rewrite "+" back to space - # protect \ and [ and ] by quoting with '\' - set str [string map [list + { } "\\" "\\\\" \[ \\\[ \] \\\]] $str] - - # prepare to process all %-escapes - regsub -all -- {%([A-Fa-f][A-Fa-f0-9])%([A-Fa-f89][A-Fa-f0-9])} \ - $str {[encoding convertfrom utf-8 [binary decode hex \1\2]]} str - regsub -all -- {%([0-7][A-Fa-f0-9])} $str {\\u00\1} str - - # process %-escapes - return [subst -novar $str] -} - -#========================================================================= -# This proc is called to parse the arguments passed to this invocation of -# the CGI program (via either the GET or POST method). It returns a -# key/value list containing the arguments suitable for passing to [array -# set]. For example, if the CGI is invoked via a GET request on the URI: -# -# http://www.sqlite.org/search?query=fts3+table&results=10 -# -# then the returned list value is: -# -# {query {fts3 table} results 10} -# -proc cgi_parse_args {} { - global env A - - if {$env(REQUEST_METHOD) == "GET"} { - foreach q [split $env(QUERY_STRING) &] { - if {[regexp {([a-z0-9]*)=(.*)} $q all var value]} { - set A($var) [percent_decode $value] - } - } - } elseif {$env(REQUEST_METHOD) == "POST"} { - set qstring [read stdin $env(CONTENT_LENGTH)] - foreach q [split $qstring &] { - if {[regexp {([a-z0-9]*)=(.*)} $q all var value]} { - set A($var) [percent_decode $value] - } - } - } else { - error "Unrecognized method: $env(REQUEST_METHOD)" - } -} - - -#========================================================================= -# Redirect the web-browser to URL $url. This command does not return. -# -proc cgi_redirect {url} { - set server $::env(SERVER_NAME) - set path [file dirname $::env(REQUEST_URI)] - if {[string range $path end end]!="/"} { - append path / - } - - puts "Status: 302 Redirect" - puts "Location: http://${server}${path}${url}" - puts "Content-Length: 0" - puts "" - exit -} - -#========================================================================= -# The argument contains a key value list. The values in the list are -# transformed to an HTTP query key value list. For example: -# -# % cgi_encode_args {s "search string" t "search \"type\""} -# s=search+string&t=search+%22type%22 -# -proc cgi_encode_args {list} { - set reslist [list] - foreach {key value} $list { - set value [string map { - \x20 + \x21 %21 \x2A %2A \x22 %22 \x27 %27 \x28 %28 \x29 %29 \x3B %3B - \x3A %3A \x40 %40 \x26 %26 \x3D %3D \x2B %2B \x24 %24 \x2C %2C \x2F %2F - \x3F %3F \x25 %25 \x23 %23 \x5B %5B \x5D %5D - } $value] - - lappend reslist "$key=$value" - } - join $reslist & -} - -proc htmlize {str} { string map {< < > >} $str } -proc attrize {str} { string map {< < > > \x22 \x5c\x22} $str } - -#========================================================================= - -proc cgi_env_dump {} { - - set ret "<h1>Arguments</h1><table>" - foreach {key value} [array get ::A] { - append ret "<tr><td>[htmlize $key]<td>[htmlize $value]" - } - append ret "</table>" - - append ret "<h1>Environment</h1><table>" - foreach {key value} [array get ::env] { - append ret "<tr><td>[htmlize $key]<td>[htmlize $value]" - } - append ret "</table>" - return $ret -} - -proc searchform {} { - return {} - set initial "Enter search term:" - catch { set initial $::A(q) } - return [subst { - <table style="margin: 1em auto"> <tr><td>Search SQLite docs for:<td> - <form name=f method=GET action=search4> - <input name=q type=text width=35 value="[attrize $initial]"></input> - <input name=s type=submit value="Search"></input> - <input name=s type=submit value="Lucky"></input> - </form> - </table> - <script> - document.forms.f.q.focus() - document.forms.f.q.select() - </script> - }] -} - -proc footer {} { - return { - <hr> - <table align=right> - <td> - <i>Powered by <a href="http://www.sqlite.org/src4">FTS5</a>.</i> - </table> - } -} - - -#------------------------------------------------------------------------- -# This command is similar to the builtin Tcl [time] command, except that -# it only ever runs the supplied script once. Also, instead of returning -# a string like "xxx microseconds per iteration", it returns "x.yy ms" or -# "x.yy s", depending on the magnitude of the time spent running the -# command. For example: -# -# % ttime {after 1500} -# 1.50 s -# % ttime {after 45} -# 45.02 ms -# -proc ttime {script} { - set t [lindex [time [list uplevel $script]] 0] - if {$t>1000000} { return [format "%.2f s" [expr {$t/1000000.0}]] } - return [format "%.2f ms" [expr {$t/1000.0}]] -} - -proc rank {matchinfo args} { - binary scan $matchinfo i* I - - set nPhrase [lindex $I 0] - set nCol [lindex $I 1] - - set G [lrange $I 2 [expr {1+$nCol*$nPhrase}]] - set L [lrange $I [expr {2+$nCol*$nPhrase}] end] - - foreach a $args { lappend log [expr {log10(100+$a)}] } - - set score 0.0 - set i 0 - foreach l $L g $G { - if {$l > 0} { - set div [lindex $log [expr $i%3]] - set score [expr {$score + (double($l) / double($g)) / $div}] - } - incr i - } - - return $score -} -proc erank {matchinfo args} { - eval rank [list $matchinfo] $args -} - - -proc searchresults {} { - if {![info exists ::A(q)]} return "" - #set ::A(q) [string map {' ''} $A(q)] - #regsub -all {[^-/"A-Za-z0-9]} $::A(q) { } ::A(q) - - # Count the '"' characters in $::A(q). If there is an odd number of - # occurences, add a " to the end of the query so that fts3 can parse - # it without error. - if {[regexp -all \x22 $::A(q)] % 2} { append ::A(q) \x22 } - - set ::TITLE "Results for: \"[htmlize $::A(q)]\"" - - #db func rank rank - #db func erank erank - - set score 0 - catch {set score $::A(score)} - - # Set nRes to the total number of documents that the users query matches. - # If nRes is 0, then the users query returned zero results. Return a short - # message to that effect. - # - set nRes [db one { SELECT count(*) FROM pagedata WHERE pagedata MATCH $::A(q) }] - if {$nRes == 0} { - return [subst { No results for: <b>[htmlize $::A(q)]</b> }] - } - - # Set iStart to the index of the first result to display. Results are - # indexed starting at zero from most to least relevant. - # - set iStart [expr {([info exists ::A(i)] ? $::A(i) : 0)*10}] - - # HTML markup used to highlight keywords within FTS3 generated snippets. - # - set open {<span style="font-weight:bold; color:navy">} - set close {</span>} - set ellipsis {<b> ... </b>} - - set ret [subst { - <table border=0> - <p>Search results - [expr $iStart+1]..[expr {($nRes < $iStart+10) ? $nRes : $iStart+10}] - of $nRes for: <b>[htmlize $::A(q)]</b> - }] - - set open {<span style="font-weight:bold; color:navy">} - set close {</span>} - set ellipsis {<b> ... </b>} - - if {0==[info exists ::A(e)]} { - set sqlquery { - SELECT url, title, - snippet(pagedata, $open, $close, $ellipsis, 3, 40) AS snippet, - '' AS report - FROM pagedata WHERE pagedata MATCH $::A(q) - ORDER BY rankc(pagedata, 1.0, 5.0, 10.0, 1.0) DESC - LIMIT 10 OFFSET $iStart - } - } else { - set sqlquery { - SELECT url, title, - snippet(pagedata, $open, $close, $ellipsis, 3, 40) AS snippet, - erankc(pagedata, 1.0, 5.0, 10.0, 1.0) AS report - FROM pagedata WHERE pagedata MATCH $::A(q) - ORDER BY rankc(pagedata, 1.0, 5.0, 10.0, 1.0) DESC - LIMIT 10 OFFSET $iStart - } - } - - set resnum $iStart - db eval $sqlquery { - incr resnum - - append ret [subst -nocommands {<tr> - <td valign=top>${resnum}.</td> - <td valign=top> - <div style="white-space:wrap"> - <a href="$url">$title</a> - </div> - <div style="font-size:small;margin-left: 2ex"> - <div style="width:80ex"> $snippet </div> - <div style="margin-bottom:1em"><a href="$url">$url</a></div> - </div> - </td> - - <td width=100%> - <td valign=top style="font-size:70%;white-space:nowrap;color:darkgreen"> $report </td> - }] - } - append ret { </table> } - - - # If the query returned more than 10 results, add up to 10 links to - # each set of 10 results (first link to results 1-10, second to 11-20, - # third to 21-30, as required). - # - if {$nRes>10} { - set s(0) {border: solid #044a64 1px ; padding: 1ex ; margin: 1ex} - set s(1) "$s(0);background:#044a64;color:white" - append ret <center><p> - for {set i 0} {$i < 10 && ($i*10)<$nRes} {incr i} { - append ret [subst { - <a style="$s([expr {($iStart/10)==$i}])" - href="search4?[cgi_encode_args [list q $::A(q) i $i]]">[expr $i+1]</a> - }] - } - append ret </center> - } - - return $ret -} - -proc main {} { - global A - sqlite4 db search4.db - cgi_parse_args - - db transaction { - set t [ttime { set doc "[searchform] [searchresults] [footer]" }] - } - append doc "<p>Page generated in $t." - return $doc - - # return [cgi_env_dump] -} - -#========================================================================= - -set ::HEADER { - <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> - <html><head> - <title>$TITLE - - - - - -
- - - -
-
Small. Fast. Reliable.
Choose any three.
- - - - -} - -if {![info exists env(REQUEST_METHOD)]} { - set env(REQUEST_METHOD) GET - set env(QUERY_STRING) rebuild=1 - set ::HEADER "" - - set env(QUERY_STRING) {q=cache+size} - set ::HEADER "" -} - - -set TITLE "Search SQLite Documentation (fts5)" - -if {0==[catch main res]} { - if {[info exists ::A(q)]} { - set ::INITSEARCH \"[attrize $::A(q)]\" - } else { - set ::INITSEARCH \"\" - } - set document [subst -nocommands $::HEADER] - append document $res -} else { - set document "
"
-  append document "Error: $res\n\n"
-  append document $::errorInfo
-  append document "
" -} - -puts "Content-type: text/html" -puts "Content-Length: [string length $document]" -puts "" -puts $document -puts "" -flush stdout -close stdout - -exit ADDED search/wapp.tcl Index: search/wapp.tcl ================================================================== --- /dev/null +++ search/wapp.tcl @@ -0,0 +1,879 @@ +# Copyright (c) 2017 D. Richard Hipp +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the Simplified BSD License (also +# known as the "2-Clause License" or "FreeBSD License".) +# +# This program is distributed in the hope that it will be useful, +# but without any warranty; without even the implied warranty of +# merchantability or fitness for a particular purpose. +# +#--------------------------------------------------------------------------- +# +# Design rules: +# +# (1) All identifiers in the global namespace begin with "wapp" +# +# (2) Indentifiers intended for internal use only begin with "wappInt" +# + +# Add text to the end of the HTTP reply. No interpretation or transformation +# of the text is performs. The argument should be enclosed within {...} +# +proc wapp {txt} { + global wapp + dict append wapp .reply $txt +} + +# Add text to the page under construction. Do no escaping on the text. +# +# Though "unsafe" in general, there are uses for this kind of thing. +# For example, if you want to return the complete, unmodified content of +# a file: +# +# set fd [open content.html rb] +# wapp-unsafe [read $fd] +# close $fd +# +# You could do the same thing using ordinary "wapp" instead of "wapp-unsafe". +# The difference is that wapp-safety-check will complain about the misuse +# of "wapp", but it assumes that the person who write "wapp-unsafe" understands +# the risks. +# +# Though occasionally necessary, the use of this interface should be minimized. +# +proc wapp-unsafe {txt} { + global wapp + dict append wapp .reply $txt +} + +# Add text to the end of the reply under construction. The following +# substitutions are made: +# +# %html(...) Escape text for inclusion in HTML +# %url(...) Escape text for use as a URL +# %qp(...) Escape text for use as a URI query parameter +# %string(...) Escape text for use within a JSON string +# %unsafe(...) No transformations of the text +# +# The %unsafe substitution should be avoided whenever possible, obviously. +# In addition to the substitutions above, the text also does backslash +# escapes. +# +proc wapp-subst {txt} { + global wapp + regsub -all {%(html|url|qp|string|unsafe)\(([^)]+)\)} $txt \ + {[wappInt-enc-\1 "\2"]} txt + dict append wapp .reply [uplevel 1 [list subst -novariables $txt]] +} + +# There must be a wappInt-enc-NAME routine for each possible substitution +# in wapp-subst. Thus there are routines for "html", "url", "qp", and "unsafe". +# +# wappInt-enc-html Escape text so that it is safe to use in the +# body of an HTML document. +# +# wappInt-enc-url Escape text so that it is safe to pass as an +# argument to href= and src= attributes in HTML. +# +# wappInt-enc-qp Escape text so that it is safe to use as the +# value of a query parameter in a URL or in +# post data or in a cookie. +# +# wappInt-enc-string Escape ", ', and \ for using inside of a +# javascript string literal. +# +# wappInt-enc-unsafe Perform no encoding at all. Unsafe. +# +proc wappInt-enc-html {txt} { + return [string map {& & < < > >} $txt] +} +proc wappInt-enc-unsafe {txt} { + return $txt +} +proc wappInt-enc-url {s} { + if {[regsub -all {[^-{}@~?=#_.:/a-zA-Z0-9]} $s {[wappInt-%HHchar {&}]} s]} { + set s [subst -novar -noback $s] + } + if {[regsub -all {[{}]} $s {[wappInt-%HHchar \\&]} s]} { + set s [subst -novar -noback $s] + } + return $s +} +proc wappInt-enc-qp {s} { + if {[regsub -all {[^-{}_.a-zA-Z0-9]} $s {[wappInt-%HHchar {&}]} s]} { + set s [subst -novar -noback $s] + } + if {[regsub -all {[{}]} $s {[wappInt-%HHchar \\&]} s]} { + set s [subst -novar -noback $s] + } + return $s +} +proc wappInt-enc-string {s} { + return [string map {\\ \\\\ \" \\\" ' \\'} $s] +} + +# Works like wapp-subst, but also removes whitespace from the beginning +# of lines. +# +proc wapp-trim {txt} { + global wapp + regsub -all {\n\s+} [string trim $txt] \n txt + regsub -all {%(html|url|qp|string|unsafe)\(([^)]+)\)} $txt \ + {[wappInt-enc-\1 "\2"]} txt + dict append wapp .reply [uplevel 1 [list subst -novariables $txt]] +} + +# This is a helper routine for wappInt-enc-url and wappInt-enc-qp. It returns +# an appropriate %HH encoding for the single character c. If c is a unicode +# character, then this routine might return multiple bytes: %HH%HH%HH +# +proc wappInt-%HHchar {c} { + if {$c==" "} {return +} + return [regsub -all .. [binary encode hex [encoding convertto utf-8 $c]] {%&}] +} + + +# Undo the www-url-encoded format. +# +# HT: This code stolen from ncgi.tcl +# +proc wappInt-decode-url {str} { + set str [string map [list + { } "\\" "\\\\" \[ \\\[ \] \\\]] $str] + regsub -all -- \ + {%([Ee][A-Fa-f0-9])%([89ABab][A-Fa-f0-9])%([89ABab][A-Fa-f0-9])} \ + $str {[encoding convertfrom utf-8 [binary decode hex \1\2\3]]} str + regsub -all -- \ + {%([CDcd][A-Fa-f0-9])%([89ABab][A-Fa-f0-9])} \ + $str {[encoding convertfrom utf-8 [binary decode hex \1\2]]} str + regsub -all -- {%([0-7][A-Fa-f0-9])} $str {\\u00\1} str + return [subst -novar $str] +} + +# Reset the document back to an empty string. +# +proc wapp-reset {} { + global wapp + dict set wapp .reply {} +} + +# Change the mime-type of the result document. +# +proc wapp-mimetype {x} { + global wapp + dict set wapp .mimetype $x +} + +# Change the reply code. +# +proc wapp-reply-code {x} { + global wapp + dict set wapp .reply-code $x +} + +# Set a cookie +# +proc wapp-set-cookie {name value} { + global wapp + dict lappend wapp .new-cookies $name $value +} + +# Unset a cookie +# +proc wapp-clear-cookie {name} { + wapp-set-cookie $name {} +} + +# Add extra entries to the reply header +# +proc wapp-reply-extra {name value} { + global wapp + dict lappend wapp .reply-extra $name $value +} + +# Specifies how the web-page under construction should be cached. +# The argument should be one of: +# +# no-cache +# max-age=N (for some integer number of seconds, N) +# private,max-age=N +# +proc wapp-cache-control {x} { + wapp-reply-extra Cache-Control $x +} + +# Redirect to a different web page +# +proc wapp-redirect {uri} { + wapp-reply-code {307 Redirect} + wapp-reply-extra Location $uri +} + +# Return the value of a wapp parameter +# +proc wapp-param {name {dflt {}}} { + global wapp + if {![dict exists $wapp $name]} {return $dflt} + return [dict get $wapp $name] +} + +# Return true if a and only if the wapp parameter $name exists +# +proc wapp-param-exists {name} { + global wapp + return [dict exists $wapp $name] +} + +# Set the value of a wapp parameter +# +proc wapp-set-param {name value} { + global wapp + dict set wapp $name $value +} + +# Return all parameter names that match the GLOB pattern, or all +# names if the GLOB pattern is omitted. +# +proc wapp-param-list {{glob {*}}} { + global wapp + return [dict keys $wapp $glob] +} + +# By default, Wapp does not decode query parameters and POST parameters +# for cross-origin requests. This is a security restriction, designed to +# help prevent cross-site request forgery (CSRF) attacks. +# +# As a consequence of this restriction, URLs for sites generated by Wapp +# that contain query parameters will not work as URLs found in other +# websites. You cannot create a link from a second website into a Wapp +# website if the link contains query planner, by default. +# +# Of course, it is sometimes desirable to allow query parameters on external +# links. For URLs for which this is safe, the application should invoke +# wapp-allow-xorigin-params. This procedure tells Wapp that it is safe to +# go ahead and decode the query parameters even for cross-site requests. +# +# In other words, for Wapp security is the default setting. Individual pages +# need to actively disable the cross-site request security if those pages +# are safe for cross-site access. +# +proc wapp-allow-xorigin-params {} { + global wapp + if {![dict exists $wapp .qp] && ![dict get $wapp SAME_ORIGIN]} { + wappInt-decode-query-params + } +} + +# Set the content-security-policy. +# +# The default content-security-policy is very strict: "default-src 'self'" +# The default policy prohibits the use of in-line javascript or CSS. +# +# Provide an alternative CSP as the argument. Or use "off" to disable +# the CSP completely. +# +proc wapp-content-security-policy {val} { + global wapp + if {$val=="off"} { + dict unset wapp .csp + } else { + dict set wapp .csp $val + } +} + +# Examine the bodys of all procedures in this program looking for +# unsafe calls to various Wapp interfaces. Return a text string +# containing warnings. Return an empty string if all is ok. +# +# This routine is advisory only. It misses some constructs that are +# dangerous and flags others that are safe. +# +proc wapp-safety-check {} { + set res {} + foreach p [info procs] { + set ln 0 + foreach x [split [info body $p] \n] { + incr ln + if {[regexp {^[ \t]*wapp[ \t]+([^\n]+)} $x all tail] + && [string index $tail 0]!="\173" + && [regexp {[[$]} $tail] + } { + append res "$p:$ln: unsafe \"wapp\" call: \"[string trim $x]\"\n" + } + if {[regexp {^[ \t]*wapp-(subst|trim)[ \t]+[^\173]} $x all cx]} { + append res "$p:$ln: unsafe \"wapp-$cx\" call: \"[string trim $x]\"\n" + } + } + } + return $res +} + +# Return a string that descripts the current environment. Applications +# might find this useful for debugging. +# +proc wapp-debug-env {} { + global wapp + set out {} + foreach var [lsort [dict keys $wapp]] { + if {[string index $var 0]=="."} continue + append out "$var = [list [dict get $wapp $var]]\n" + } + append out "\[pwd\] = [list [pwd]]\n" + return $out +} + +# Start up the wapp framework. Parameters are a list passed as the +# single argument. +# +# -server $PORT Listen for HTTP requests on this TCP port $PORT +# +# -scgi $PORT Listen for SCGI requests on TCP port $PORT +# +# -cgi Handle a single CGI request +# +# With no arguments, the behavior is called "auto". In "auto" mode, +# if the GATEWAY_INTERFACE environment variable indicates CGI, then run +# as CGI. Otherwise, start an HTTP server bound to the loopback address +# only, on an arbitrary TCP port, and automatically launch a web browser +# on that TCP port. +# +# Additional options: +# +# -trace "puts" each request URL as it is handled, for +# debugging +# +# -lint Run wapp-safety-check on the application instead +# of running the application itself +# +# -Dvar=value Set TCL global variable "var" to "value" +# +# +proc wapp-start {arglist} { + global env + set mode auto + set port 0 + set n [llength $arglist] + for {set i 0} {$i<$n} {incr i} { + set term [lindex $arglist $i] + if {[string match --* $term]} {set term [string range $term 1 end]} + switch -glob -- $term { + -server { + incr i; + set mode "server" + set port [lindex $arglist $i] + } + -scgi { + incr i; + set mode "scgi" + set port [lindex $arglist $i] + } + -cgi { + set mode "cgi" + } + -trace { + proc wappInt-trace {} { + set q [wapp-param QUERY_STRING] + set uri [wapp-param BASE_URL][wapp-param PATH_INFO] + if {$q!=""} {append uri ?$q} + puts $uri + } + } + -lint { + set res [wapp-safety-check] + if {$res!=""} { + puts "Potential problems in this code:" + puts $res + exit 1 + } else { + exit + } + } + -D*=* { + if {[regexp {^.D([^=]+)=(.*)$} $term all var val]} { + set ::$var $val + } + } + default { + error "unknown option: $term" + } + } + } + if {($mode=="auto" + && [info exists env(GATEWAY_INTERFACE)] + && $env(GATEWAY_INTERFACE)=="CGI/1.0") + || $mode=="cgi" + } { + wappInt-handle-cgi-request + return + } + if {$mode=="scgi"} { + wappInt-start-listener $port 1 0 1 + } elseif {$mode=="server"} { + wappInt-start-listener $port 0 0 0 + } else { + wappInt-start-listener $port 1 1 0 + } + vwait ::forever +} + +# Tracing function for each HTTP request. This is overridden by wapp-start +# if tracing is enabled. +# +proc wappInt-trace {} {} + +# Start up a listening socket. Arrange to invoke wappInt-new-connection +# for each inbound HTTP connection. +# +# localonly - If true, listen on 127.0.0.1 only +# +# browser - If true, launch a web browser pointing to the new server +# +proc wappInt-start-listener {port localonly browser scgi} { + if {$scgi} { + set type SCGI + set server [list wappInt-new-connection wappInt-scgi-readable] + } else { + set type HTTP + set server [list wappInt-new-connection wappInt-http-readable] + } + if {$localonly} { + set x [socket -server $server -myaddr 127.0.0.1 $port] + } else { + set x [socket -server $server $port] + } + set coninfo [chan configure $x -sockname] + set port [lindex $coninfo 2] + if {$browser} { + wappInt-start-browser http://127.0.0.1:$port/ + } else { + puts "Listening for $type requests on TCP port $port" + } +} + +# Start a web-browser and point it at $URL +# +proc wappInt-start-browser {url} { + global tcl_platform + if {$tcl_platform(platform)=="windows"} { + exec cmd /c start $url & + } elseif {$tcl_platform(os)=="Darwin"} { + exec open $url & + } elseif {[catch {exec xdg-open $url}]} { + exec firefox $url & + } +} + +# Accept a new inbound HTTP request +# +proc wappInt-new-connection {callback chan ip port} { + upvar #0 wappInt-$chan W + set W [dict create REMOTE_ADDR $ip REMOTE_PORT $port .header {}] + fconfigure $chan -blocking 0 -translation binary + fileevent $chan readable [list $callback $chan] +} + +# Close an input channel +# +proc wappInt-close-channel {chan} { + if {$chan=="stdout"} { + # This happens after completing a CGI request + exit 0 + } else { + unset ::wappInt-$chan + close $chan + } +} + +# Process new text received on an inbound HTTP request +# +proc wappInt-http-readable {chan} { + if {[catch [list wappInt-http-readable-unsafe $chan] msg]} { + puts stderr "$msg\n$::errorInfo" + wappInt-close-channel $chan + } +} +proc wappInt-http-readable-unsafe {chan} { + upvar #0 wappInt-$chan W wapp wapp + if {![dict exists $W .toread]} { + # If the .toread key is not set, that means we are still reading + # the header + set line [string trimright [gets $chan]] + set n [string length $line] + if {$n>0} { + if {[dict get $W .header]=="" || [regexp {^\s+} $line]} { + dict append W .header $line + } else { + dict append W .header \n$line + } + if {[string length [dict get $W .header]]>100000} { + error "HTTP request header too big - possible DOS attack" + } + } elseif {$n==0} { + # We have reached the blank line that terminates the header. + global argv0 + set a0 [file normalize $argv0] + dict set W SCRIPT_FILENAME $a0 + dict set W DOCUMENT_ROOT [file dir $a0] + if {[wappInt-parse-header $chan]} { + catch {close $chan} + return + } + set len 0 + if {[dict exists $W CONTENT_LENGTH]} { + set len [dict get $W CONTENT_LENGTH] + } + if {$len>0} { + # Still need to read the query content + dict set W .toread $len + } else { + # There is no query content, so handle the request immediately + set wapp $W + wappInt-handle-request $chan 0 + } + } + } else { + # If .toread is set, that means we are reading the query content. + # Continue reading until .toread reaches zero. + set got [read $chan [dict get $W .toread]] + dict append W CONTENT $got + dict set W .toread [expr {[dict get $W .toread]-[string length $got]}] + if {[dict get $W .toread]<=0} { + # Handle the request as soon as all the query content is received + set wapp $W + wappInt-handle-request $chan 0 + } + } +} + +# Decode the HTTP request header. +# +# This routine is always running inside of a [catch], so if +# any problems arise, simply raise an error. +# +proc wappInt-parse-header {chan} { + upvar #0 wappInt-$chan W + set hdr [split [dict get $W .header] \n] + if {$hdr==""} {return 1} + set req [lindex $hdr 0] + dict set W REQUEST_METHOD [set method [lindex $req 0]] + if {[lsearch {GET HEAD POST} $method]<0} { + error "unsupported request method: \"[dict get $W REQUEST_METHOD]\"" + } + set uri [lindex $req 1] + set split_uri [split $uri ?] + set uri0 [lindex $split_uri 0] + if {![regexp {^/[-.a-z0-9_/]*$} $uri0]} { + error "invalid request uri: \"$uri0\"" + } + dict set W REQUEST_URI $uri0 + dict set W PATH_INFO $uri0 + set uri1 [lindex $split_uri 1] + dict set W QUERY_STRING $uri1 + set n [llength $hdr] + for {set i 1} {$i<$n} {incr i} { + set x [lindex $hdr $i] + if {![regexp {^(.+): +(.*)$} $x all name value]} { + error "invalid header line: \"$x\"" + } + set name [string toupper $name] + switch -- $name { + REFERER {set name HTTP_REFERER} + USER-AGENT {set name HTTP_USER_AGENT} + CONTENT-LENGTH {set name CONTENT_LENGTH} + CONTENT-TYPE {set name CONTENT_TYPE} + HOST {set name HTTP_HOST} + COOKIE {set name HTTP_COOKIE} + ACCEPT-ENCODING {set name HTTP_ACCEPT_ENCODING} + default {set name .hdr:$name} + } + dict set W $name $value + } + return 0 +} + +# Decode the QUERY_STRING parameters from a GET request or the +# application/x-www-form-urlencoded CONTENT from a POST request. +# +# This routine sets the ".qp" element of the ::wapp dict as a signal +# that query parameters have already been decoded. +# +proc wappInt-decode-query-params {} { + global wapp + dict set wapp .qp 1 + if {[dict exists $wapp QUERY_STRING]} { + foreach qterm [split [dict get $wapp QUERY_STRING] &] { + set qsplit [split $qterm =] + set nm [lindex $qsplit 0] + if {[regexp {^[a-z][a-z0-9]*$} $nm]} { + dict set wapp $nm [wappInt-decode-url [lindex $qsplit 1]] + } + } + } + if {[dict exists $wapp CONTENT_TYPE] + && [dict get $wapp CONTENT_TYPE]=="application/x-www-form-urlencoded" + && [dict exists $wapp CONTENT] + } { + foreach qterm [split [string trim [dict get $wapp CONTENT]] &] { + set qsplit [split $qterm =] + set nm [lindex $qsplit 0] + if {[regexp {^[a-z][-a-z0-9_]*$} $nm]} { + dict set wapp $nm [wappInt-decode-url [lindex $qsplit 1]] + } + } + } + # To-Do: Perhaps add support for multipart/form-data decoding. + # Alternatively, perhaps multipart/form-data decoding can be done + # by application code using a separate helper function, like + # "wapp_decode_multipart_formdata" or somesuch. +} + +# Invoke application-supplied methods to generate a reply to +# a single HTTP request. +# +# This routine always runs within [catch], so handle exceptions by +# invoking [error]. +# +proc wappInt-handle-request {chan useCgi} { + global wapp + dict set wapp .reply {} + dict set wapp .mimetype {text/html; charset=utf-8} + dict set wapp .reply-code {200 Ok} + dict set wapp .csp {default-src 'self'} + + # Set up additional CGI environment values + # + if {![dict exists $wapp HTTP_HOST]} { + dict set wapp BASE_URL {} + } elseif {[dict exists $wapp HTTPS]} { + dict set wapp BASE_URL https://[dict get $wapp HTTP_HOST] + } else { + dict set wapp BASE_URL http://[dict get $wapp HTTP_HOST] + } + if {![dict exists $wapp REQUEST_URI]} { + dict set wapp REQUEST_URI / + } elseif {[regsub {\?.*} [dict get $wapp REQUEST_URI] {} newR]} { + # Some servers (ex: nginx) append the query parameters to REQUEST_URI. + # These need to be stripped off + dict set wapp REQUEST_URI $newR + } + if {[dict exists $wapp SCRIPT_NAME]} { + dict append wapp BASE_URL [dict get $wapp SCRIPT_NAME] + } else { + dict set wapp SCRIPT_NAME {} + } + if {![dict exists $wapp PATH_INFO]} { + # If PATH_INFO is missing (ex: nginx) the construct it + set URI [dict get $wapp REQUEST_URI] + set skip [string length [dict get $wapp SCRIPT_NAME]] + dict set wapp PATH_INFO [string range $URI $skip end] + } + if {[regexp {^/([^/]+)(.*)$} [dict get $wapp PATH_INFO] all head tail]} { + dict set wapp PATH_HEAD $head + dict set wapp PATH_TAIL [string trimleft $tail /] + } else { + dict set wapp PATH_INFO {} + dict set wapp PATH_HEAD {} + dict set wapp PATH_TAIL {} + } + dict set wapp SELF_URL [dict get $wapp BASE_URL]/[dict get $wapp PATH_HEAD] + + # Parse query parameters from the query string, the cookies, and + # POST data + # + if {[dict exists $wapp HTTP_COOKIE]} { + foreach qterm [split [dict get $wapp HTTP_COOKIE] {;}] { + set qsplit [split [string trim $qterm] =] + set nm [lindex $qsplit 0] + if {[regexp {^[a-z][-a-z0-9_]*$} $nm]} { + dict set wapp $nm [wappInt-decode-url [lindex $qsplit 1]] + } + } + } + if {[dict exists $wapp HTTP_REFERER] + && [string match [dict get $wapp BASE_URL]/* [dict get $wapp HTTP_REFERER]] + } { + set same_origin 1 + } else { + set same_origin 0 + } + dict set wapp SAME_ORIGIN $same_origin + if {$same_origin} { + wappInt-decode-query-params + } + + # Invoke the application-defined handler procedure for this page + # request. If an error occurs while running that procedure, generate + # an HTTP reply that contains the error message. + # + wapp-before-dispatch-hook + wappInt-trace + set mname [dict get $wapp PATH_HEAD] + if {[catch { + if {$mname!="" && [llength [info proc wapp-page-$mname]]>0} { + wapp-page-$mname + } else { + wapp-default + } + } msg]} { + wapp-reset + wapp-reply-code "500 Internal Server Error" + wapp-mimetype text/html + wapp-trim { +

Wapp Application Error

+
%html($::errorInfo)
+ } + dict unset wapp .new-cookies + } + + # Transmit the HTTP reply + # + if {$chan=="stdout"} { + puts $chan "Status: [dict get $wapp .reply-code]\r" + } else { + puts $chan "HTTP/1.1 [dict get $wapp .reply-code]\r" + puts $chan "Server: wapp\r" + puts $chan "Connection: close\r" + } + if {[dict exists $wapp .reply-extra]} { + foreach {name value} [dict get $wapp .reply-extra] { + puts $chan "$name: $value\r" + } + } + if {[dict exists $wapp .csp]} { + puts $chan "Content-Security-Policy: [dict get $wapp .csp]\r" + } + set mimetype [dict get $wapp .mimetype] + puts $chan "Content-Type: $mimetype\r" + if {[dict exists $wapp .new-cookies]} { + foreach {nm val} [dict get $wapp .new-cookies] { + if {[regexp {^[a-z][-a-z0-9_]*$} $nm]} { + if {$val==""} { + puts $chan "Set-Cookie: $nm=; HttpOnly; Path=/; Max-Age=1\r" + } else { + set val [wappInt-enc-url $val] + puts $chan "Set-Cookie: $nm=$val; HttpOnly; Path=/\r" + } + } + } + } + if {[string match text/* $mimetype]} { + set reply [encoding convertto utf-8 [dict get $wapp .reply]] + if {[regexp {\ygzip\y} [wapp-param HTTP_ACCEPT_ENCODING]]} { + catch { + set x [zlib gzip $reply] + set reply $x + puts $chan "Content-Encoding: gzip\r" + } + } + } else { + set reply [dict get $wapp .reply] + } + puts $chan "Content-Length: [string length $reply]\r" + puts $chan \r + puts $chan $reply + flush $chan + wappInt-close-channel $chan +} + +# This routine runs just prior to request-handler dispatch. The +# default implementation is a no-op, but applications can override +# to do additional transformations or checks. +# +proc wapp-before-dispatch-hook {} {return} + +# Process a single CGI request +# +proc wappInt-handle-cgi-request {} { + global wapp env + foreach key { + CONTENT_LENGTH + CONTENT_TYPE + DOCUMENT_ROOT + HTTP_ACCEPT_ENCODING + HTTP_COOKIE + HTTP_HOST + HTTP_REFERER + HTTP_USER_AGENT + HTTPS + PATH_INFO + QUERY_STRING + REMOTE_ADDR + REQUEST_METHOD + REQUEST_URI + REMOTE_USER + SCRIPT_FILENAME + SCRIPT_NAME + SERVER_NAME + SERVER_PORT + SERVER_PROTOCOL + } { + if {[info exists env($key)]} { + dict set wapp $key $env($key) + } + } + set len 0 + if {[dict exists $wapp CONTENT_LENGTH]} { + set len [dict get $wapp CONTENT_LENGTH] + } + if {$len>0} { + fconfigure stdin -translation binary + dict set wapp CONTENT [read stdin $len] + } + fconfigure stdout -translation binary + wappInt-handle-request stdout 1 +} + +# Process new text received on an inbound SCGI request +# +proc wappInt-scgi-readable {chan} { + if {[catch [list wappInt-scgi-readable-unsafe $chan] msg]} { + puts stderr "$msg\n$::errorInfo" + wappInt-close-channel $chan + } +} +proc wappInt-scgi-readable-unsafe {chan} { + upvar #0 wappInt-$chan W wapp wapp + if {![dict exists $W .toread]} { + # If the .toread key is not set, that means we are still reading + # the header. + # + # An SGI header is short. This implementation assumes the entire + # header is available all at once. + # + set req [read $chan 15] + set n [string length $req] + scan $req %d:%s len hdr + incr len [string length "$len:,"] + append hdr [read $chan [expr {$len-15}]] + foreach {nm val} [split $hdr \000] { + if {$nm==","} break + dict set W $nm $val + } + set len 0 + if {[dict exists $W CONTENT_LENGTH]} { + set len [dict get $W CONTENT_LENGTH] + } + if {$len>0} { + # Still need to read the query content + dict set W .toread $len + } else { + # There is no query content, so handle the request immediately + set wapp $W + wappInt-handle-request $chan 0 + } + } else { + # If .toread is set, that means we are reading the query content. + # Continue reading until .toread reaches zero. + set got [read $chan [dict get $W .toread]] + dict append W CONTENT $got + dict set W .toread [expr {[dict get $W .toread]-[string length $got]}] + if {[dict get $W .toread]<=0} { + # Handle the request as soon as all the query content is received + set wapp $W + wappInt-handle-request $chan 0 + } + } +} + +# Call this version 1.0 +package provide wapp 1.0