tclbdd

Check-in [a24b8ef7c3]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Added the Datalog compiler (under construction - only the parser and the stratification pass are implemented so far).
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:a24b8ef7c341e1ebdd5a49f41410648e7b85dfb1
User & Date: kbk 2013-12-28 05:06:17
Context
2013-12-28
05:33
added corovar notation rather than the fugly 'upvar #1' check-in: adf349c020 user: kbk tags: trunk
05:06
Added the Datalog compiler (under construction - only the parser and the stratification pass are implemented so far). check-in: a24b8ef7c3 user: kbk tags: trunk
2013-12-23
03:18
Major code cleanup and commentary in tclfddd.tcl. check-in: ba20005fb0 user: kbk tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Show Whitespace Changes Patch

Added library/coroutine_iterator.tcl.

            1  +# coroutine_iterator.tcl --
            2  +#
            3  +#	Implements a 'foreach' loop that uses a coroutine to manage the
            4  +#	iteration, and cleans up properly on unusual terminations.
            5  +#
            6  +# Copyright (c) 2013 by Kevin B. Kenny
            7  +#
            8  +# See the file "license.terms" for information on usage and redistribution
            9  +# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
           10  +#
           11  +#------------------------------------------------------------------------------
           12  +
           13  +package require Tcl 8.6
           14  +
           15  +namespace eval coroutine {
           16  +    namespace eval iterator {
           17  +	variable gensym 0;	# Sequence number for generated symbols
           18  +	namespace export foreach
           19  +    }
           20  +}
           21  +
           22  +# coroutine::iterator::foreach --
           23  +#
           24  +#	Iterate over the results of a coroutine
           25  +#
           26  +# Usage:
           27  +#	coroutine::iterator::foreach var initCommand script
           28  +#
           29  +# Parameters:
           30  +#	var         - Name of the variable in caller's scope that
           31  +#		      will hold the values the procedure is iterating over.
           32  +#	initCommand - Command and arguments that will be the main
           33  +#		      procedure of the coroutine. The procedure is
           34  +#		      expected to yield each of the iteration results
           35  +#		      in turn, and then return to indicate the end of
           36  +#		      the loop.
           37  +#	script      - Script to execute for each [yield]ed value, with
           38  +#		      the [yield]ed value in $var.
           39  +#
           40  +# Results:
           41  +#	None.
           42  +#
           43  +# Side effects:
           44  +#	Launches a coroutine with the given 'initCommand' and runs
           45  +#	it to completion, executing the given script one on each
           46  +#	[yield]ed result.
           47  +
           48  +proc coroutine::iterator::foreach {var initCommand script} {
           49  +    variable gensym
           50  +    set coro [namespace current]::coro[incr gensym]
           51  +    upvar 1 $var value
           52  +    try {
           53  +	for {set value [coroutine $coro {*}$initCommand]} \
           54  +	    {[namespace which $coro] ne {}} \
           55  +	    {set value [$coro]} {
           56  +		try {
           57  +		    uplevel 1 $script
           58  +		} on error {message options} {
           59  +		    dict incr options -level 1
           60  +		    return -options $options $message
           61  +		} on return {retval options} {
           62  +		    dict incr options -level 1
           63  +		    return -options $options $retval
           64  +		} on break {} {
           65  +		    break
           66  +		} on continue {} {
           67  +		    continue
           68  +		}
           69  +	    }
           70  +    } finally {
           71  +	catch {rename $coro {}}
           72  +    }
           73  +}
           74  +
           75  +package provide coroutine::iterator 1.0
           76  +
           77  +if {![info exists ::argv0] || $::argv0 ne [info script]} {
           78  +    return
           79  +}
           80  +
           81  +# Example:
           82  +
           83  +proc doit {n {i 0}} {
           84  +    if {$n == 0} {
           85  +	return
           86  +    }
           87  +    yield $i
           88  +    incr n -1
           89  +    doit $n [expr {2*$i + 1}]
           90  +    doit $n [expr {2*$i + 2}]
           91  +    return
           92  +}
           93  +
           94  +coroutine::iterator::foreach x {doit 4} {puts $x}

Added library/datalog.tcl.

            1  +# datalog.tcl --
            2  +#
            3  +#	Datalog compiler, implemented in Tcl for a Tcl-hosted runtime
            4  +#	engine based on BDD's.
            5  +#
            6  +# Copyright (c) 2013 by Kevin B. Kenny
            7  +#
            8  +# See the file "license.terms" for information on usage and redistribution
            9  +# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
           10  +#
           11  +#------------------------------------------------------------------------------
           12  +
           13  +source [file dirname [info script]]/coroutine_iterator.tcl; # TEMP
           14  +
           15  +package require Tcl 8.6
           16  +package require coroutine::iterator 1.0
           17  +package require grammar::aycock 1.0
           18  +
           19  +namespace eval bdd {
           20  +    namespace eval datalog {
           21  +	namespace export lex parse compile
           22  +    }
           23  +}
           24  +
           25  +# bdd::datalog::lex --
           26  +#
           27  +#	Lexical analysis for the Datalog compiler.
           28  +#
           29  +# Parameters:
           30  +#	string - Program text
           31  +#
           32  +# Results:
           33  +#	Returns two lists. The first is a list of token types, drawn from
           34  +#	!, (, ,, ), =, ., ~, ?, :-, IDENTIFIER, TCLVAR, STRING and INTEGER.
           35  +#	The second is a list of token values. Values of most tokens are
           36  +#	their text. Quoted strings are backslash-substituted, and Tcl
           37  +#	variables have the $ sigil stripped.
           38  +
           39  +proc bdd::datalog::lex {string} {
           40  +    set i 0
           41  +    regsub -all {\\\n} $string {} string
           42  +    set tokens [regexp -all -inline -expanded -- {
           43  +	| \s+			# whitespace - ignore
           44  +	| %[^\n]*\n?            # % comment - ignore
           45  +	| [!(,)=.~?\"]		# punctuation
           46  +        | :-			# implicant
           47  +        | \$?[^\s!(,)=:.~?\"%]+	# identifier
           48  +	| \"(?:[^\"\\]|\\.)+\"  # quoted string
           49  +    } $string]
           50  +    set types {}
           51  +    set values {}
           52  +    foreach token $tokens {
           53  +	set sigil [string index $token 0]
           54  +	if {$sigil in {% { } \t \n}} {
           55  +	    continue
           56  +	} elseif {$sigil in {! ( , ) : = . ~ ?}} {
           57  +	    lappend types $token
           58  +	    lappend values $token
           59  +	} elseif {$sigil eq "\$"} {
           60  +	    lappend types TCLVAR
           61  +	    lappend values [list [string range $token 1 end]]
           62  +	} elseif {$sigil eq "\""} {
           63  +	    lappend types STRING
           64  +	    lappend values [subst -novariables -nocommands \
           65  +				[string range $token 1 end-1]]
           66  +	} elseif {[string is integer -strict $token]} {
           67  +	    lappend types INTEGER
           68  +	    lappend values $token
           69  +	} else {
           70  +	    lappend types IDENTIFIER
           71  +	    lappend values $token
           72  +	}
           73  +    }
           74  +    return [list $types $values]
           75  +}
           76  +
           77  +# Grammar for the Datalog parser
           78  +
           79  +set bdd::datalog::parser \
           80  +    [::grammar::aycock::parser \
           81  +	 [regsub -all -- {\#[^\n]*\n} {
           82  +
           83  +    # A program comprises a list of statements followed optionally by
           84  +    # a query
           85  +
           86  +    program	::=	statements 
           87  +    {
           88  +	linsert [lindex $_ 0] 0 PROGRAM
           89  +    }
           90  +    program	::=	statements query
           91  +    {
           92  +	linsert [linsert [lindex $_ 0] end [lindex $_ 1]] 0 PROGRAM
           93  +    }
           94  +    statements	::=	statements statement
           95  +    {
           96  +	linsert [lindex $_ 0] end [lindex $_ 1]
           97  +    }
           98  +    statements	::=
           99  +    {
          100  +	concat
          101  +    }
          102  +
          103  +    # A statement is either an assertion or retraction of a clause
          104  +
          105  +    statement	::=	assertion		{}
          106  +    statement	::=	retraction		{}
          107  +    assertion	::=	clause .
          108  +    {
          109  +	list ASSERTION [lindex $_ 0]
          110  +    }
          111  +    retraction	::=	clause ~
          112  +    {
          113  +	list RETRACTION [lindex $_ 0]
          114  +    }
          115  +
          116  +    # A query gives a literal to match
          117  +
          118  +    query	::=	pliteral ?
          119  +    {
          120  +	list QUERY [lindex $_ 0]
          121  +    }
          122  +
          123  +    # A clause asserts a fact or gives a rule for deducing a fact
          124  +
          125  +    clause	::=	equivalence		{}
          126  +    clause	::=	fact			{}
          127  +    clause	::=	rule			{}
          128  +
          129  +    # A fact is just a non-negated literal
          130  +
          131  +    fact	::=	pliteral
          132  +    {
          133  +	list FACT [lindex $_ 0]
          134  +    }
          135  +
          136  +    # A rule comprises a head (a fact to be deduced) and a body
          137  +    # (the facts to deduce it from)
          138  +
          139  +    rule	::=	head :- body
          140  +    {
          141  +	linsert [lindex $_ 2] 0 RULE [lindex $_ 0]
          142  +    }
          143  +    head	::=	pliteral		{}
          144  +
          145  +    # The body is a set of comma-separated, possibly negated conditions
          146  +
          147  +    body	::=	condition
          148  +    {
          149  +	set _
          150  +    }
          151  +    body	::=	body , condition
          152  +    {
          153  +	linsert [lindex $_ 0] end [lindex $_ 2]
          154  +    }
          155  +
          156  +    # A condition is either a literal, or else an equality constraint
          157  +
          158  +    condition	::=	literal			{}
          159  +    condition	::=	equality		{}
          160  +
          161  +    # A literal is a predicate symbol optionally followed by a list of terms
          162  +
          163  +    literal	::=	pliteral		{}
          164  +    literal	::=	! pliteral
          165  +    {
          166  +	list NOT [lindex $_ 1]
          167  +    }
          168  +    pliteral	::=	predicate_symbol
          169  +    {
          170  +	list LITERAL [lindex $_ 0]
          171  +    }
          172  +    pliteral	::=	predicate_symbol ( termlist )
          173  +    {
          174  +	linsert [lindex $_ 2] 0 LITERAL [lindex $_ 0]
          175  +    }
          176  +    termlist	::=	term
          177  +    {
          178  +	set _
          179  +    }
          180  +    termlist	::=	termlist , term
          181  +    {
          182  +	linsert [lindex $_ 0] end [lindex $_ 2]
          183  +    }
          184  +
          185  +    equality	::=	variable = variable
          186  +    {
          187  +	list EQUALITY [list VARIABLE [lindex $_ 0]] \
          188  +	    [list VARIABLE [lindex $_ 2]]
          189  +    }
          190  +
          191  +    # A predicate symbol is either a bare identifier or a quoted string
          192  +
          193  +    predicate_symbol ::= IDENTIFIER			{}
          194  +    predicate_symbol ::= STRING				{}
          195  +
          196  +    # A term is either a variable or a constant
          197  +
          198  +    term	::=	variable
          199  +    {
          200  +	linsert $_ 0 VARIABLE
          201  +    }
          202  +    term	::=	constant
          203  +    {
          204  +	linsert $_ 0 CONSTANT
          205  +    }
          206  +
          207  +    # A variable is an identifier or quoted string
          208  +
          209  +    variable	::=	IDENTIFIER 			{}
          210  +    variable	::=	STRING				{}
          211  +
          212  +    # A constant is a reference to a Tcl variable or a number
          213  +
          214  +    constant	::=	TCLVAR
          215  +    {
          216  +	linsert $_ 0 TCLVAR
          217  +    }
          218  +    constant	::=	INTEGER
          219  +    {
          220  +	linsert $_ 0 INTEGER
          221  +    }
          222  +} {}]]
          223  +
          224  +# bdd::datalog::getrules --
          225  +#
          226  +#	Walk the parse tree of a Datalog program and extract only the rules
          227  +#
          228  +# Usage:
          229  +#	bdd::datalog::getrules $parseTree
          230  +#
          231  +# Parameters:
          232  +#	parseTree - Parse tree resulting from running the Datalog parser.
          233  +#
          234  +# Results:
          235  +#	Returns a list of the rules, with the RULE flag stripped.
          236  +#	Each rule is therefore a list of literals and equalities, with
          237  +#	the first element representing the head and the rest representing
          238  +#	the body.
          239  +
          240  +proc bdd::datalog::getrules {parseTree} {
          241  +    switch -exact -- [lindex $parseTree 0] {
          242  +	PROGRAM -
          243  +	ASSERTION {
          244  +	    set results {}
          245  +	    foreach part [lrange $parseTree 1 end] {
          246  +		set sub [getrules $part]
          247  +		lappend results {*}$sub
          248  +	    }
          249  +	    return $results
          250  +	}
          251  +	RULE {
          252  +	    return [list [lrange $parseTree 1 end]]
          253  +	}
          254  +	RETRACTION {
          255  +	    error "Retractions are not currently supported."
          256  +	}
          257  +	default {		# Nothing else may contain a rule
          258  +	    return {}
          259  +	}
          260  +    }
          261  +}
          262  +
          263  +# bdd::datalog::pdg --
          264  +#
          265  +#	Forms the predicate dependency graph of a Datalog program,
          266  +#	expressed as a list of edges whose head and tail labels are
          267  +#	predicate names.
          268  +#
          269  +# Parameters:
          270  +#	rules - Rules contained in the program
          271  +#
          272  +# Results:
          273  +#	Returns a list of tuples: each tuple consists of
          274  +#	  {dependent dependency negated rule} 
          275  +#	where
          276  +#	  * dependent is the name of the dependent predicate
          277  +#	  * dependency is the name of the dependency
          278  +#	  * negated is 1 if the dependency appears in the rule in negated form
          279  +#	  * rule is a copy of the input rule
          280  +
          281  +proc bdd::datalog::pdg {rules} {
          282  +    # extract the predicate dependency graph as an edge list from
          283  +    # the rules of a Datalog program
          284  +    set results {}
          285  +    foreach rule $rules {
          286  +	set body [lassign $rule head]
          287  +	set dependent [lindex $head 1]; # LITERAL name term term ...
          288  +	foreach condition $body {
          289  +	    switch -exact -- [lindex $condition 0] {
          290  +		EQUALITY {		# does not create a dependency
          291  +		}
          292  +		LITERAL {
          293  +		    lappend result \
          294  +			[list $dependent [lindex $condition 1] 0 $rule]
          295  +		}		
          296  +		NOT {		# {NOT {LITERAL foo ...}}
          297  +		    lappend result \
          298  +			[list $dependent [lindex $condition 1 1] 1 $rule]
          299  +		}
          300  +	    }
          301  +	}
          302  +    }
          303  +    return $result
          304  +}
          305  +
          306  +# bdd::datalog::pdg-dicts
          307  +#
          308  +#	Inverts the edge set of the predicate dependency graph of a
          309  +#	Datalog program into individual adjacency lists
          310  +#
          311  +# Parameters:
          312  +#	pdg - Edge list of the predicate dependency graph
          313  +#
          314  +# Results:
          315  +#
          316  +#	Returns a list two dictionaries. The first is keyed by the
          317  +#	dependent, and its values are lists of the edges that join it
          318  +#	to its dependencies. The second is keyed by dependency, and
          319  +#	its values are lists of edges joining the dependnts to it.
          320  +
          321  +proc bdd::datalog::pdg-dicts {pdg} {
          322  +    set outedges {}
          323  +    set inedges {}
          324  +    foreach edge $pdg {
          325  +	lassign $edge from to not
          326  +	if {![dict exists $inedges $from]} {
          327  +	    dict set inedges $from {}
          328  +	}
          329  +	dict lappend outedges $from $edge
          330  +	if {![dict exists $outedges $to]} {
          331  +	    dict set outedges $to {}
          332  +	}
          333  +	dict lappend inedges $to $edge
          334  +    }
          335  +    return [list $outedges $inedges]
          336  +}
          337  +
          338  +# bdd::datalog::scc --
          339  +#
          340  +#	Partiton the predicate dependency graph into strongly connected 
          341  +#	components.
          342  +#
          343  +#
          344  +# Usage:
          345  +#	bdd::datalog::scc v $outedges script
          346  +#
          347  +# Parameters:
          348  +#	v        - Name of a variable in the caller's scope that will
          349  +#		   receive each component in turn. The components are
          350  +#		   lists of the predicate names.
          351  +#	outedges - Dictionary containing the adjacency lists. The keys
          352  +#		   of the dictionary are the names of predicates. The
          353  +#		   values are lists of edges. Each edge is a tuple.
          354  +#		   The first two elements of the tuple are the from-predicate
          355  +#		   and to-predicate. The remaining elements are not used
          356  +#		   in this procedure.
          357  +#	script   - Script that will be executed once on each strongly
          358  +#		   connected component, with 'v' set to the list of 
          359  +#		   names of predicates that belong to the component.
          360  +#
          361  +# Results:
          362  +#	None.
          363  +#
          364  +# Side effects:
          365  +#	Partitions the dependency graph into strongly connected components
          366  +#	and runs the given script on each, with 'v' set to the list of nodes.
          367  +
          368  +proc bdd::datalog::scc {v outedges script} {
          369  +    tailcall coroutine::iterator::foreach $v \
          370  +	[list bdd::datalog::SCC_coro $outedges] \
          371  +	$script
          372  +}
          373  +
          374  +# bdd::datalog::SCC_coro --
          375  +#
          376  +#	Main procedure of the coroutine that partitions the dependency
          377  +#	graph into strongly connected components.
          378  +#
          379  +# Usage:
          380  +#	coroutine $name bdd::datalog::SCC_coro $outedges
          381  +#
          382  +# Parameters:
          383  +#	outedges - Dictionary containing the adjacency lists. The keys
          384  +#		   of the dictionary are the names of predicates. The
          385  +#		   values are lists of edges. Each edge is a tuple.
          386  +#		   The first two elements of the tuple are the from-predicate
          387  +#		   and to-predicate. The remaining elements are not used
          388  +#		   in this procedure.
          389  +#
          390  +# Results:
          391  +#	Yields the sets of nodes that form strongly connected components.
          392  +
          393  +proc bdd::datalog::SCC_coro {outedges} {
          394  +    # outedges is coroutine-global
          395  +    set index 0;		# Coroutine global: Current node's index
          396  +    set S {};			# Coroutine global: Stack of nodes on the
          397  +    				# path from a root to the current node
          398  +    set vindex {};		# Coroutine global: Dictionary whose keys are
          399  +    				# node names, and whose values are node indices
          400  +    set lowlink {};		# Coroutine global: Dictionary whose keys are
          401  +    				# node names, and whose values are node indices
          402  +    				# of backward edges in the graph
          403  +
          404  +    # Visit each node and run Tarjan's algorithm recursively on it.
          405  +    # When every node is visited, all components will have been listed.
          406  +    dict for {v edges} $outedges {
          407  +	if {![dict exists $vindex $v]} {
          408  +	    SCC_coro_worker $v $edges
          409  +	}
          410  +    }
          411  +
          412  +    return
          413  +}
          414  +
          415  +# bdd::datalog::SCC_coro_worker --
          416  +#
          417  +#	Visits a single node in Tarjan's algorithm for strongly
          418  +#	connected components.
          419  +#
          420  +# Parameters:
          421  +#	v     - Name of the node being visited.
          422  +#	edges - List of edges whose origin is the node
          423  +#
          424  +# Results:
          425  +#	None.
          426  +#
          427  +# This procedure performs a depth-first traversal, identifying back edges
          428  +# and strongly connected components.
          429  +
          430  +proc bdd::datalog::SCC_coro_worker {v edges} {
          431  +    upvar #1 outedges outedges;	# Coroutine global: adjacency lists
          432  +    upvar #1 index index;	# Coroutine global: index for the next
          433  +    				# unexamined node
          434  +    upvar #1 S S;		# Coroutine global: stack of nodes traversed
          435  +    				# from a root
          436  +    upvar #1 vindex vindex;	# Coroutine global: dictionary whose keys are
          437  +				# names of nodes and whose values are node
          438  +    				# indices
          439  +    upvar #1 lowlink lowlink;	# Coroutine global: dictionary whose keys are
          440  +				# names of nodes and whose values are the
          441  +    				# indices of the other end of back edges
          442  +
          443  +    # Set the index and lowlink of the node to point to itself, and put
          444  +    # the node on the stack.
          445  +    dict set vindex $v $index
          446  +    dict set lowlink $v $index
          447  +    incr index
          448  +    lappend S $v
          449  +
          450  +    # Examine the successor nodes, testing whether they have yet been visited.
          451  +    foreach edge $edges {
          452  +	lassign $edge from w 
          453  +	if {![dict exists $vindex $w]} {
          454  +
          455  +	    # Successor has not been visited. Visit it now.
          456  +	    SCC_coro_worker $w [dict get $outedges $w]
          457  +	    dict set lowlink $v \
          458  +		[expr {min([dict get $lowlink $v],
          459  +			   [dict get $lowlink $w])}]
          460  +
          461  +	} elseif {[lsearch -exact $S $w] >= 0} {
          462  +
          463  +	    # Successor has been visited, is stacked and hence must 
          464  +	    # belong to the current component. Keep track of the earliest
          465  +	    # visited successor.
          466  +	    dict set lowlink $v \
          467  +		[expr {min([dict get $lowlink $v],
          468  +			   [dict get $vindex $w])}]
          469  +	}
          470  +    }
          471  +
          472  +    if {[dict get $lowlink $v] == [dict get $vindex $v]} {
          473  +	# v is a root node of a strongly connected component.
          474  +	# Unstack the component out to the root, and yield it.
          475  +
          476  +	set component {}
          477  +	while {1} {
          478  +	    set w [lindex $S end]
          479  +	    set S [lrange $S[set S {}] 0 end-1]
          480  +	    lappend component $w
          481  +	    if {$w eq $v} break
          482  +	}
          483  +	yield $component
          484  +
          485  +    }
          486  +    return
          487  +}
          488  +
          489  +# bdd::datalog::indexSCCs --
          490  +#
          491  +#	Partitions the dependency graph of a Datalog program into strongly
          492  +#	connected components, and partitions the adjacency lists by
          493  +#	dependent component.
          494  +#
          495  +# Parameters:
          496  +#	outedges - Dictionary containing the adjacency lists. The keys
          497  +#		   of the dictionary are the names of predicates. The
          498  +#		   values are lists of edges. Each edge is a tuple.
          499  +#		   The first two elements of the tuple are the dependent
          500  +#		   predicate and the dependency predicate. The remaining 
          501  +#		   elements are not used in this procedure.
          502  +#
          503  +# Results:
          504  +#	Returns two lists. The first is a list of the strongly connected
          505  +#	components, each of which is represented as a sublist containing the
          506  +#	names of predicates in the component. The second is a dictionary
          507  +#	which for each predicate gives the predicate's position in the list
          508  +#	of components.
          509  +
          510  +proc bdd::datalog::indexSCCs {outedges} {
          511  +    # make the index of predicate->component index: output is componentList and
          512  +    # componentIndex.
          513  +    
          514  +    set i 0
          515  +    set componentIndex {}
          516  +    set componentList {}
          517  +    bdd::datalog::scc c $outedges {
          518  +	foreach predicate $c {
          519  +	    dict set componentIndex $predicate $i
          520  +	}
          521  +	puts "component $i: $c"
          522  +	lappend componentList $c
          523  +	incr i
          524  +    }
          525  +    return [list $componentList $componentIndex]
          526  +}
          527  +
          528  +# bdd::datalog::makeComponentEdges --
          529  +#
          530  +#	Given the partition of the predicate dependency graph into strongly
          531  +#	connected components, makes adjacency lists between the components,
          532  +#	each of which gives the predicate dependencies that relate
          533  +#	the component pair.
          534  +#
          535  +# Parameters:
          536  +#	componentList - List of the components, each of which is a list of
          537  +#		        predicate names.
          538  +#	componentIndex - Dictionary whose keys are predicate names and whose
          539  +#		         values are the positions of the predicates' components
          540  +#			 in componentList
          541  +#	outEdges - Edge list for the predicate dependency graph.
          542  +#
          543  +# Results:
          544  +#	Returns a list of two items, componentEdges and componentEdgeNegated.
          545  +#	componentEdges is a list whose positions are component numbers,
          546  +#	and whose values are dictionaries whose keys dependent component
          547  +#	numbers and whose values are lists of the edges that introduce
          548  +#	the dependency. componentEdgeNegated is a two-level dictionary
          549  +#	whose keys are the component numbers of dependent and dependency,
          550  +#	and whose values exist if at least one dependency predicate
          551  +#	appears in a dependent rule in negated form.
          552  +#
          553  +# Side effects:
          554  +#	Throws an error if the Datalog program is not stratifiable.
          555  +
          556  +proc bdd::datalog::makeComponentEdges {componentList componentIndex outedges} {
          557  +
          558  +    set componentEdges {}
          559  +    set componentEdgeNegated {}
          560  +    set i 0
          561  +    foreach component $componentList {
          562  +	set curComponentEdges {}
          563  +	foreach predicate $component {
          564  +	    foreach edge [dict get $outedges $predicate] {
          565  +		lassign $edge from to not rule
          566  +		set destComponent [dict get $componentIndex $to]
          567  +		if {$not && ($from == $to)} {
          568  +		    # TODO - better error reporting
          569  +		    error "The program is not stratifiable\
          570  +                           because of rule $rule"
          571  +		}
          572  +		dict lappend curComponentEdges $destComponent $edge
          573  +		if {$not} {
          574  +		    dict set componentEdgeNegated $i $destComponent 1
          575  +		}
          576  +	    }
          577  +	}
          578  +	lappend componentEdges $curComponentEdges
          579  +	incr i
          580  +    }
          581  +    return [list $componentEdges $componentEdgeNegated]
          582  +}
          583  +
          584  +# bdd::datalog::stratify --
          585  +#
          586  +#	Given the strongly connected components of a Datalog program
          587  +#	and the dependency relations between components, stratifies the
          588  +#	program.
          589  +#
          590  +# Parameters:
          591  +#	componentEdges - List whose positions are component numbers,
          592  +#			 and whose values are dictionaries whose keys are the
          593  +#                        component numbers of dependencies and whose
          594  +#			 values are lists of edges that introduce the
          595  +#			 dependency
          596  +#	componentEdgeNegated - Two level dictionary whose values are dependent
          597  +#			       component index and dependency component index,
          598  +#			       whose values exist if at least one predicate
          599  +#			       of the dependency appears in the dependent in
          600  +#			       negated form.
          601  +#
          602  +# Results:
          603  +#	Returns a dictionary whose keys are component indices and
          604  +#	whose values are the stratum numbers to which they belong.
          605  +
          606  +proc bdd::datalog::stratify {componentEdges componentEdgeNegated} {
          607  +    set stratum {}
          608  +    set c 0
          609  +    foreach e $componentEdges {
          610  +	stratify1 stratum $componentEdges $componentEdgeNegated $c
          611  +	incr c
          612  +    }
          613  +    return $stratum
          614  +}
          615  +
          616  +# bdd::datalog::stratify1 --
          617  +#
          618  +#	Service procedure for the stratification pass
          619  +#
          620  +# Parameters:
          621  +#	stratumVar - Name of the dictionary in caller's scope where the
          622  +#		     stratum information is being accumulated. Keys
          623  +#		     are component indices; values are strata.
          624  +#	componentEdges - List whose positions are component numbers,
          625  +#			 and whose values are dictionaries whose keys are the
          626  +#                        component numbers of dependencies and whose
          627  +#			 values are lists of edges that introduce the
          628  +#			 dependency
          629  +#	componentEdgeNegated - Two level dictionary whose values are dependent
          630  +#			       component index and dependency component index,
          631  +#			       whose values exist if at least one predicate
          632  +#			       of the dependency appears in the dependent in
          633  +#			       negated form.
          634  +#	c - Component index being examined
          635  +#
          636  +# Results:
          637  +#	Returns the stratum of component 'c'
          638  +#
          639  +# Side effects:
          640  +#	Computes strata for the component and its descendants. Utilizes
          641  +#	'stratumVar' as a cache so that each component is visited only
          642  +#	once.
          643  +#
          644  +# If component A depends on component B, then stratum[A] >= stratum[B].
          645  +# If at least one rule introducing the dependency has the dependent
          646  +# predicate appearing in negated form, then stratum[A] > stratum[B].
          647  +# The resulting strata are the ones that give stratum[X]>=0 for all X,
          648  +# for which the values of stratum[X] are minimized for all X subject to
          649  +# the constraints above.
          650  +
          651  +proc bdd::datalog::stratify1 {stratumVar 
          652  +			      componentEdges
          653  +			      componentEdgeNegated
          654  +			      c} {
          655  +    upvar 1 $stratumVar stratum
          656  +    if {[dict exists $stratum $c]} {
          657  +	return [dict get $stratum $c]
          658  +    } else {
          659  +	set edgeSet [lindex $componentEdges $c]
          660  +	if {[dict size $edgeSet] == 0} {
          661  +	    set s 0
          662  +	} else {
          663  +	    set s 1
          664  +	    dict set stratum $c 1
          665  +	    dict for {next edges} $edgeSet {
          666  +		if {$next == $c} continue
          667  +		set t [stratify1 stratum \
          668  +			   $componentEdges $componentEdgeNegated $next]
          669  +		incr t [dict exists $componentEdgeNegated $c $next]
          670  +		if {$t > $s} {
          671  +		    set s $t
          672  +		}
          673  +	    }
          674  +	}
          675  +	dict set stratum $c $s
          676  +	return $s
          677  +    }
          678  +}
          679  +
          680  +# bdd::datalog::compsByStratum --
          681  +#
          682  +#	Distributes the strongly connected components of the predicate
          683  +#	dependency graph by stratum after the program is stratified.
          684  +#
          685  +# Parameters:
          686  +#	stratum - Dictionary whose keys are component numbers and whose
          687  +#	          values are stratum numbers for the components
          688  +#
          689  +# Results:
          690  +#	Returns a list with one element per stratum, in order by stratum
          691  +#	number. The elements are the lists of components at each of the
          692  +#	strata.
          693  +
          694  +proc bdd::datalog::compsByStratum {stratum} {
          695  +    set bystratum {}
          696  +    dict for {c s} $stratum {
          697  +	while {$s >= [llength $bystratum]} {
          698  +	    lappend bystratum {}
          699  +	}
          700  +	set comps [lindex $bystratum $s]; lset bystratum $s {}
          701  +	lappend comps $c; lset bystratum $s $comps
          702  +    }
          703  +    return $bystratum
          704  +}
          705  +
          706  +# bdd::datalog::sortComponents --
          707  +#
          708  +#	Topologically sorts the components at each stratum of the predicate 
          709  +#	dependency graph to give the components' order of evaluation in
          710  +#	the final generated code.
          711  +#
          712  +# Parameters:
          713  +#	stratum - Dictionary whose keys are component numbers and whose
          714  +#	          values are the stratum numbers for the components
          715  +#	bystratum - List of lists. The sublists are the component indices
          716  +#	            at each stratum.
          717  +#	componentEdges - List of dictionaries. The positions in the
          718  +#		         list are component indices. The keys of the
          719  +#			 dictionaries are dependency component indices,
          720  +#			 and the values are lists of edges inducing the
          721  +#			 dependencies.
          722  +#
          723  +# Results:
          724  +#	Returns 'bystratum' with the component numbers reordered into
          725  +#	topologic numbering.
          726  +
          727  +proc bdd::datalog::sortComponents {stratum bystratum componentEdges} {
          728  +    set s 0
          729  +    foreach clist $bystratum {
          730  +	set e {}
          731  +	foreach comp $clist {
          732  +	    dict for {comp2 edges} [lindex $componentEdges $comp] {
          733  +		if {($comp2 != $comp) && [dict get $stratum $comp2] == $s} {
          734  +		    puts "same-stratum edge $comp->$comp2"
          735  +		    dict lappend e $comp2 [list $comp2 $comp]
          736  +		}
          737  +	    }
          738  +	}
          739  +	lset bystratum $s [bdd::datalog::topsort $clist $e]
          740  +	incr s
          741  +    }
          742  +    return $bystratum
          743  +}
          744  +
          745  +# bdd::datalog::topsort --
          746  +#
          747  +#	Topologic sort.
          748  +#
          749  +# Parameters:
          750  +#	v - List of vertices in a DAG.
          751  +#	e - Dictionary of edges in the graph. The keys are origin nodes.
          752  +#	    The values are lists of edges. The edges are tuples, the first
          753  +#	    two elements of which are the origin and destination vertices.
          754  +#
          755  +# Results:
          756  +#	Returns a topologic ordering of v. If (v,w) is in E, then v
          757  +#	precedes w in the ordering.
          758  +#
          759  +# This procedure separates the DAG into connected commponents, and
          760  +# for each component, adds the vertices of the component to the
          761  +# output in reverse postorder of the component's minimum spanning tree.
          762  +
          763  +proc bdd::datalog::topsort {v e} {
          764  +    set result {}
          765  +    set visited {}
          766  +    foreach vertex $v {
          767  +	topsort1 result visited $e $vertex
          768  +    }
          769  +    return [lreverse $result]
          770  +}
          771  +
          772  +# bdd::datalog::topsort1 --
          773  +#
          774  +#	Service procedure for topologic sort.
          775  +#
          776  +# Parameters:
          777  +#	resultVar - Name of a variable in the caller's scope where
          778  +#		    the topologic order is being accumulated.
          779  +#	visitedVar - Name of a dictionary in the caller's scope whose
          780  +#		     keys are the names of vertices already visited.
          781  +#	e - Dictionary of edges in the graph. The keys are origin nodes.
          782  +#	    The values are lists of edges. The edges are tuples, the first
          783  +#	    two elements of which are the origin and destination vertices.
          784  +#	v - Name of the current vertex being visited.
          785  +#
          786  +# Results:
          787  +#	None
          788  +#
          789  +# This procedure is called at least once for every vertex. It walks
          790  +# the vertex's outbound edges recursively until it comes to either an
          791  +# vertex already visited or to a sink. The vertices that it visits are
          792  +# accumulated in postorder. (The calling procedure will reverse them).
          793  +
          794  +proc bdd::datalog::topsort1 {resultVar visitedVar e v} {
          795  +    upvar 1 $resultVar result
          796  +    upvar 1 $visitedVar visited
          797  +    if {[dict exists $visited $v]} {
          798  +	return
          799  +    }
          800  +    dict set visited $v {}
          801  +    if {[dict exists $e $v]} {
          802  +	foreach edge [dict get $e $v] {
          803  +	    topsort1 result visited $e [lindex $edge 1]
          804  +	}
          805  +    }
          806  +    lappend result $v
          807  +    return
          808  +}
          809  +
          810  +proc bdd::datalog::compile {program} {
          811  +    # Do lexical analysis of the program
          812  +    lassign [::bdd::datalog::lex $program] tokens values
          813  +    
          814  +    # Parse the program
          815  +    set parseTree [$::bdd::datalog::parser parse $tokens $values]
          816  +
          817  +    # Extract the rules from the parse tree
          818  +    set rules [bdd::datalog::getrules $parseTree]
          819  +    
          820  +    # Form the predicate dependency graph of the rules
          821  +    set pdg [bdd::datalog::pdg $rules]
          822  +    
          823  +    # Distribute the edges of the graph into separate adjacency lists
          824  +    lassign [bdd::datalog::pdg-dicts $pdg] outedges inedges
          825  +    
          826  +    # Find the stongly connected components of the predicate dependency graph
          827  +    lassign [bdd::datalog::indexSCCs $outedges] componentList componentIndex
          828  +    
          829  +    # Determine whether the predicate dependency graph is stratifiable, and 
          830  +    # distribute the edges of the component graph into separate adjacency lists.
          831  +    lassign [bdd::datalog::makeComponentEdges \
          832  +		 $componentList $componentIndex $outedges] \
          833  +	componentEdges componentEdgeNegated
          834  +    
          835  +    # TEMP - report component-component edges
          836  +    set i 0
          837  +    foreach edgeSet $componentEdges {
          838  +	dict for {j edges} $edgeSet {
          839  +	    puts "$i -> $j ([dict exists $componentEdgeNegated $i $j])"
          840  +	}
          841  +	incr i
          842  +    }
          843  +
          844  +    # Stratify the predicate dependency graph
          845  +    set stratum [bdd::datalog::stratify $componentEdges $componentEdgeNegated]
          846  +    
          847  +    # Distribute the components by stratum
          848  +    set bystratum [bdd::datalog::compsByStratum $stratum]
          849  +    
          850  +    # Within each stratum, topologically sort the components so that
          851  +    # dependencies precede their dependents
          852  +    
          853  +    set bystratum [bdd::datalog::sortComponents \
          854  +		       $stratum $bystratum $componentEdges]
          855  +
          856  +    # TEMP - Report the components in final order of processing
          857  +    set s 0
          858  +    foreach clist $bystratum {
          859  +	puts "stratum $s"
          860  +	foreach c $clist {
          861  +	    puts "component $c: [lindex $componentList $c]"
          862  +	}
          863  +	incr s
          864  +    }
          865  +    
          866  +    # TEMP - Report the intra-component dependencies for the components 
          867  +    if 0 {
          868  +	set s 0
          869  +	foreach clist $bystratum {
          870  +	    foreach c $clist {
          871  +		puts "component $c at stratum $s"
          872  +		set edges [lindex $componentEdges $c]
          873  +		puts "edges to [dict keys $edges]"
          874  +		if {[dict exists $edges $c]} {
          875  +		    foreach edge [dict get $edges $c] {
          876  +			puts $edge
          877  +		    }
          878  +		}
          879  +	    }
          880  +	    incr s
          881  +	}
          882  +    }
          883  +}
          884  +
          885  +package provide tclbdd::datalog 0.1
          886  +
          887  +##############################################################################
          888  +
          889  +if {![info exists ::argv0] || [string compare $::argv0 [info script]]} return
          890  +
          891  +# TEMP - lexer stuff - maybe work out better unit tests!
          892  +if 0 {
          893  +namespace import bdd::datalog::lex
          894  +
          895  +lassign [lex {
          896  +    flowspast(V, St, St2) :- seq(St, St2).
          897  +    flowspast(V, St, St2) :- flowspast(V, St, St3),
          898  +                             !writes(St3, V),
          899  +                             flowspast(V, St3, St).
          900  +    reaches(V, St, St2) :- writes(St, V), flowspast(V, St, St2), reads(St2, V).
          901  +}] types values
          902  +foreach t $types v $values {
          903  +    puts "$t: $v"
          904  +}
          905  +
          906  +lassign [lex {
          907  +    reaches(V, $i, St2)?
          908  +}] types values
          909  +foreach t $types v $values {
          910  +    puts "$t: $v"
          911  +}
          912  +}
          913  +
          914  +if 0 {
          915  +# TEMP parser stuff - need to do better unit testing!
          916  +
          917  +set parseTree [$::bdd::datalog::parser parse {*}[bdd::datalog::lex {
          918  + 
          919  +    % flowspast(v,st,st2) means that control passes from the exit of st
          920  +    % to the entry of st2 without altering the value of v
          921  +
          922  +    flowspast(v, st, st2) :- seq(st, st2).
          923  +    flowspast(v, st, st2) :- flowspast(v, st, st3),
          924  +                             !writes(st3, v),
          925  +                             flowspast(v, st3, st).
          926  +
          927  +    % reaches(v,st,st2) means that st assigns a value to v, which
          928  +    % reaches st2, which reads the value of v : that is, st is a
          929  +    % reaching definition for the use of v at st2.
          930  +
          931  +    reaches(v, st, st2) :- writes(st, v), flowspast(v, st, st2), reads(st2, v).
          932  +}]]
          933  +
          934  +puts $parseTree
          935  +
          936  +set parseTree2 [$bdd::datalog::parser parse {*}[bdd::datalog::lex {
          937  +    reaches(v, $i, st2)?
          938  +}]]
          939  +
          940  +puts $parseTree2
          941  +
          942  +
          943  +# Parse tree structure
          944  +# PROGRAM statements
          945  +# statement:
          946  +#   ASSERTION clause
          947  +#   RETRACTION clause
          948  +#   QUERY literal
          949  +# clause:
          950  +#   FACT literal
          951  +#   RULE Name conditions
          952  +# condition:
          953  +#   literal
          954  +#   EQUALITY variable variable
          955  +# literal:
          956  +#   NOT literal
          957  +#   LITERAL Name terms
          958  +# term:
          959  +#   CONSTANT const
          960  +#   variable
          961  +# variable:
          962  +#   VARIABLE name
          963  +
          964  +}
          965  +
          966  +# Try compiling a program
          967  +
          968  +bdd::datalog::compile {
          969  + 
          970  +    % A false entry node (node 0) sets every variable and flows
          971  +    % to node 1. If any of its variables are reachable, those are
          972  +    % variables possibly used uninitialized in the program.
          973  +
          974  +    writes(0, _).
          975  +    seq(0, 1).
          976  +
          977  +    % flowspast(v,st,st2) means that control passes from the exit of st
          978  +    % to the entry of st2 without altering the value of v
          979  +
          980  +    flowspast(v, st, st2) :- seq(st, st2).
          981  +    flowspast(v, st, st2) :- flowspast(v, st, st3),
          982  +                             !writes(st3, v),
          983  +                             flowspast(v, st3, st).
          984  +
          985  +    % reaches(v,st,st2) means that st assigns a value to v, which
          986  +    % reaches st2, which reads the value of v : that is, st is a
          987  +    % reaching definition for the use of v at st2.
          988  +
          989  +    reaches(v, st, st2) :- writes(st, v), flowspast(v, st, st2), reads(st2, v).
          990  +
          991  +    % A variable read that is reachable from the entry is a read of a
          992  +    % possibly uninitialized variable
          993  +
          994  +    uninitRead(st, v) :- reaches(v, 0, st).
          995  +
          996  +    % A variable write that reaches nowhere else is dead code
          997  +
          998  +    deadWrite(st, v) :- writes(st, v), !reaches(v, st, _).
          999  +
         1000  +    % Also do the bddbddb example. Only 1 stratum, but 2 loops in the larger SCC
         1001  +
         1002  +    vP(v, h) :- vP0(v,h).
         1003  +    vP(v1,h) :- assign(v1,v2).
         1004  +    hP(h1,f,h2) :- store(v1,f,v2), vP(v1,h1), vP(v2,h2).
         1005  +    vP(v2,h2) :- load(v1,f,v2), vP(v1,h1), hP(h1,f,h2).
         1006  +
         1007  +}
         1008  +