-
Notifications
You must be signed in to change notification settings - Fork 1
/
tfp.tex
1199 lines (1061 loc) · 46 KB
/
tfp.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
\documentclass[sigplan,balance=true,pbalance=true,natbib=false]{acmart}
%% ∞
%% ⊂
%% …
%% ₀
%% ₁
%% ₂
%% ₃
%% ₁₂₃₄₅₆
\usepackage{fontspec,newunicodechar}
\usepackage{hyphenat} %% electromagnetic\hyp{}endioscopy
\usepackage[shortcuts]{extdash}
\usepackage{microtype} %% provides tighter text formatting
\usepackage{flushend}
\usepackage{balance}
\usepackage{polyglossia} %% for xelatex use polyglossia, needed for csquotes
\setdefaultlanguage[variant=american]{english}
\usepackage{fancybox}
\PassOptionsToPackage{cache=false,newfloat=true}{minted}
\PassOptionsToPackage{unicode}{hyperref}
\usepackage[capitalize,nameinlink]{cleveref} %% adds \crefrange{}{} and \cpagerefrange{}
%% \BibTeX command to typeset BibTeX logo in the docs
\AtBeginDocument{%
\providecommand\BibTeX{{%
Bib\TeX}}}
\RequirePackage[
backend=biber,
natbib=true,
datamodel=acmdatamodel,
%% style=acmnumeric,
]{biblatex}
%% Declare bibliography sources (one \addbibresource command per source)
\addbibresource{tfp.bib}
\overfullrule=0pt
\usepackage{nag} %% should complain about old and outdated commands
\usepackage{minted}
\usemintedstyle{bw}
\usepackage[strict,autostyle]{csquotes} %% enquote &c that doesn't interfere w/cdlatex usage
%% http://ftp.lyx.org/pub/tex-archive/macros/latex/contrib/csquotes/csquotes.pdf
%% \usepackage[xspace]{ellipsis} they suggest using xetex instead
%% \usepackage{setspace} %% set the spacing (single double, etc)
\setmonofont{DejaVuSansMono}[Scale=MatchLowercase]
\tracinglostchars=2
\usepackage[defaultlines=3,all]{nowidow} %% Eliminates widow and orphans
\setlength {\marginparwidth }{2cm}
\usepackage{todonotes}
\newmintinline[rackinline]{racket}{}
\newmintinline{prolog}{}
\setcopyright{acmcopyright}
\copyrightyear{2023}
\acmYear{2023}
\acmDOI{XXXXXXX.XXXXXXX}
\acmConference[TFP '23]{Symposium on Trends in Functional
Programming}{January 13-15, 2023}{Boston, Massachusetts}
\acmPrice{15.00}
\acmISBN{978-1-4503-XXXX-X/18/06}
\begin{document}
\title{Nearly Macro-free microKanren}
\author{Jason Hemann}
\email{jason.hemann@shu.edu}
\affiliation{%
\institution{Seton Hall University}
\country{USA}
}
\orcid{0000-0002-5405-2936}
\author{Daniel P. Friedman}
\email{dfried@indiana.edu}
\affiliation{%
\institution{Indiana University}
\country{USA}
}
\orcid{0000-0001-9992-1675}
\renewcommand{\shortauthors}{Hemann and Friedman}
\begin{abstract}
We describe changes to the microKanren implementation that make it
more practical to use in a host language without macros. With
some modest runtime features common to most languages, we
show how an implementer lacking macros can come closer to the
expressive power that macros usually provide---with varying degrees
of success. The result is a still functional microKanren that
invites slightly shorter programs, and is relevant even to
implementers that enjoy macro support. For those without it, we
address some pragmatic concerns that necessarily occur without
macros so they can better weigh their options.
\end{abstract}
\begin{CCSXML}
<ccs2012>
<concept>
<concept_id>10011007.10011006.10011008.10011009.10011015</concept_id>
<concept_desc>Software and its engineering~Constraint and logic languages</concept_desc>
<concept_significance>500</concept_significance>
</concept>
<concept>
<concept_id>10003752.10003790.10003795</concept_id>
<concept_desc>Theory of computation~Constraint and logic programming</concept_desc>
<concept_significance>300</concept_significance>
</concept>
</ccs2012>
\end{CCSXML}
\ccsdesc[500]{Software and its engineering~Constraint and logic languages}
%%
%% Keywords. The author(s) should pick words that accurately describe
%% the work being presented. Separate the keywords with commas.
\keywords{logic programming, miniKanren, DSLs, embedding, macros}
\maketitle
\section{Introduction}
Initially we designed microKanren~\cite{hemann2013muKanren} as a
compact relational language kernel to undergird a miniKanren
implementation. Macros implement the surrounding higher-level
miniKanren operators and surface syntax.\@ microKanren is often used
as a tool for understanding the guts of a relational language through
studying its implementation. By re-implementing miniKanren as separate
surface syntax over a macro-less and side-effect free
microKanren kernel, we hoped to simultaneously aid implementers when
studying the source code, and also to make the language easier to port
to other hosts that support programming with functions. To support
both of those efforts, we also chose to program in a deliberately
small and workaday set of Scheme primitives.
The sum of those implementation restrictions, however, seemed to force
some awkward choices including: binary logical operators, one at a
time local variable introduction, and leaks in the stream
abstractions. These made the surface syntax macros seem almost
required, and were far enough from our goals that \emph{The Reasoned
Schemer, 2nd Ed}~\cite{friedman2018reasoned} did not use a
macro-less functional kernel. It also divided host languages into
those that used macros and those that did not. We bridge some of that
split by re-implementing parts of the kernel using some modest runtime
features common to many languages.
Here we:
\begin{itemize}
\item show how to functionally implement more general logical
operators, cleanly obviating some of the surface macros,
\item survey the design space of macro-less, mostly side-effect free
implementation alternatives for the remaining macros in the
\emph{TRS2e} core language implementation, and weigh the trade-offs
and real-world consequences, and
\item suggest practical solutions for completely eliminating the
macros in those places where the purest microKanren
implementations seemed impractical.
\end{itemize}
This resulted in some higher-level (variadic rather than just binary)
operators, a more succinct kernel language, and enabled some
performance improvement. Around half of the changes are applicable to
any microKanren implementation, and the more concise goal combinators
of \cref{sec:conde} may also be of interest to implementers who embed
goal-oriented languages like Icon~\cite{griswold1983icon}. The other
half are necessarily awkward yet practical strategies for those
platforms lacking macro support. The source code for both our
re-implementation and our experimental results is available
at~\url{https://github.com/jasonhemann/tfp-2023/}. Our preferred
definitions are in \cref{mnt:disj-reimplementation} and
\cref{mnt:conda-implementation}.
In \cref{sec:all-aboard}, we illustrate by example what seemed to
force surface syntax macros. In \cref{sec:conde}, we implement
conjunction and disjunction, and in \cref{sec:impure} we discuss the
re-implementation of the impure operators. We discuss the remaining
macros in \cref{sec:functional}. We close with a discussion of the
performance impacts of these implementation choices, and consider how
implementers of Kanren languages in other hosts might benefit from
these alternatives.
\section{The problem}\label{sec:all-aboard}
We briefly reprise here some basics of programming in the miniKanren
implementation of \emph{TRS2e}. Although based on microKanren, the
\emph{TRS2e} implementation makes some concessions to efficiency and
safety and uses a few macros in the language kernel itself. In
addition to that implementation, in this paper we make occasional
references to earlier iterations such as
\citet{hemann2016small}, an expanded
archival version of the 2013 paper~\cite{hemann2013muKanren}.
\subsection{Background}
Programs consist of a database of relation definitions and queries
executed in the context of those definitions. A relation definition
consists of a name for the relation and a number of parameters equal
to the relations arity, and then a body. This body states the
condition under which the relationship holds between the parameters.
Parameters range over a domain of terms, and the programmer can
introduce local auxiliary variables as needed. The core \emph{TRS2e}
language implementation relies on macros \rackinline{fresh},
\rackinline{defrel}, and \rackinline{run} to introduce new logic
variables, globally define relations, and execute queries. Relations
can refer to themselves or one another in their definitions; the whole
database of relations is mutually-recursively defined. The programmer
states the relation body as a logical expression, built up with
conjunctions and disjunctions, over a class of equations and primitive
constraints on terms. Take the \rackinline{carmelit-subway} relation
of \cref{mnt:carmelit} as a concrete example. The Carmelit in Haifa is
the world's shortest subway system with only six stations on its line;
\rackinline{carmelit-subway} is a six-place relation modeling a
passenger riding that subway end to end. \@
\rackinline{carmel-center}, \rackinline{golomb}, \rackinline{masada},
etc.\ are all term constants in the program; each is the name of a
stop on the line. The sub-expression
\rackinline{(== a 'carmel-center)} is an equation between the
parameter \rackinline{a} and the term \rackinline{carmel-center}. The
\rackinline{conde} operator (the \rackinline{e} is for
\enquote{\rackinline{e}very line}) takes any number of parenthesized
sequences of expressions. Each parenthesized sequence represents the
conjunction of those expressions, and the meaning of the
\rackinline{conde} expression represents the disjunction of those
conjunctions. Altogether, this program states that there are two ways
to ride the subway end to end: a passenger can either start at
\rackinline{carmel-center} and travel five stops to
\rackinline{downtown}, or start at \rackinline{downtown} and travel
five stops to \rackinline{carmel-center}.
Each query against the database contains some expression the
programmer wants to satisfy and introduces some number of variables
against which the language should express the answer. Each query asks
either for all, or some bounded number of answers. The
\rackinline{run} expression of \cref{mnt:carmelit} is a query---it
asks for at most three ways a passenger could have ridden the
Carmelit, printed as a list of six values for variables
$\rackinline{s₀} \ldots \rackinline{s₅}$.\@ miniKanren returns a list
of two answers, the only two ways a passenger could ride the whole
subway end to end.
\subsection{Limitations of microKanren}
A six station subway line is an example sufficiently small that
modeling it should be painless. Without the higher-level operators
that the miniKanren macros implement, however, that same relation
requires 11 logical operator nodes, because microKanren only provides
\emph{binary} conjunctions and disjunctions. In our view, this makes
the superficial syntax macros practically mandatory, and impedes host
languages without a macro system. For a logic programming language,
solely binary logical operators is too low level.
\begin{listing}
\begin{minted}[autogobble,stripall]{racket}
(defrel (carmelit-subway a b c d e f)
(conde
((== a 'carmel-center)
(== b 'golomb)
(== c 'masada)
(== d 'haneviim)
(== e 'hadar-city-hall)
(== f 'downtown))
((== a 'downtown)
(== b 'hadar-city-hall)
(== c 'haneviim)
(== d 'masada)
(== e 'golomb)
(== f 'carmel-center))))
> (run 3 (s₀ s₁ s₂ s₃ s₄ s₅)
(carmelit-subway s₀ s₁ s₂ s₃ s₄ s₅))
'((carmel-center golomb masada
haneviim hadar-city-hall downtown)
(downtown hadar-city-hall haneviim
masada golomb carmel-center))
\end{minted}
\caption{The Carmelit subway relation and a query. Results reformatted for clarity and space.}
\label{mnt:carmelit}
\end{listing}
Moreover, the microKanren language doesn't offer the programmer
sufficient guidance in using that fine-grained control. With $n$
goals, the programmer can associate to the left, to the right, or some
mixtures of the two. The syntax does not obviously encourage any one
choice. Subtle changes in program structure can have profound effects
on performance, and mistakes are easy to make.
Similarly, the soft-cut operator \rackinline{ifte} in the \emph{TRS2e}
language kernel is also low level. It permits a single test, a single
consequent, and a single alternative. To construct an if-then-else
cascade, a microKanren programmer without the \rackinline{conda}
surface macro would need to code that unrolled conditional expression
by hand.
In earlier renditions of related work, the strictures of pure
functional shallow embedding---without macros---seemed to force
implementations of variable introduction, relation definition, and the
query interface that suffered from harsh downsides. We will revisit
those earlier implementations and their trade-offs, survey the
available options, and suggest compromises for those
truly without macros, thus increasing microKanren's \emph{practical}
portability.
\section{The \textmd{\rackinline{disj}}unction and \textmd{\rackinline{conj}}unction
goal constructors}\label{sec:conde}
microKanren's binary \rackinline{disj₂} and \rackinline{conj₂}
operators, shown in \cref{mnt:disj2-conj2}, are goal combinators: they
each take two goals, and produce a new goal. A \emph{goal} is what the
program attempts to achieve: it can fail or succeed (and it can
succeed multiple times). A goal executes with respect to a
\emph{state}, here the curried parameter \rackinline{s}, and the
result is a \emph{stream} of states, usually denoted \rackinline{s∞}
as each entry in the stream is a state that results from achieving
that goal in the given state. Disjunction and conjunction work
slightly differently. The \rackinline{append∞} function used in
\rackinline{disj₂} is a kernel primitive that combines two streams
into one, with an interleave mechanism to prevent starvation; the
result is a stream of the ways to achieve the two goals' disjunction.
The \rackinline{append-map∞} function used in \rackinline{conj₂} is to
\rackinline{append∞} what \rackinline{append-map} is to
\rackinline{append}. The ways to achieve the conjunction of two goals
are all the ways to achieve the second goal in a state that results
from achieving the first goal.\@ \rackinline{append-map∞} runs the
second goal over the stream of results from the first goal, and
combines together the results of mapping into a single stream. That
stream represents the conjunction of the two goals, again with special
attention to interleaving and starvation.
\begin{listing}
\begin{minted}[autogobble,stripall]{racket}
(define ((disj₂ g₁ g₂) s)
(append∞ (g₁ s) (g₂ s)))
(define ((conj₂ g₁ g₂) s)
(append-map∞ g₂ (g₁ s)))
(define-syntax disj
(syntax-rules ()
((disj g) g)
((disj g₁ g₂ g* ...)
(disj₂ g₁ (disj g₂ g* ...)))))
(define-syntax conj
(syntax-rules ()
((conj g) g)
((conj g₁ g₂ g* ...)
(conj (conj₂ g₁ g₂) g* ...))))
\end{minted}
\caption{microKanren \rackinline{disj₂}, \rackinline{conj₂}, and macros that use them}\label{mnt:disj2-conj2}
\end{listing}
We want to implement disjunction and conjunction as functions taking
arbitrary quantities of goals. These implementations should subsume
the binary \rackinline{disj₂} and \rackinline{conj₂} and they also
should not use \rackinline{apply}. Nor should our implementations
induce a tortured program and extraneous closures to \emph{avoid}
using \rackinline{apply}; we address this somewhat-cryptic requirement
in \cref{sec:whats-left}. This re-implementation requires a host that
supports variable arity functions, a widely available feature included
in such languages as JavaScript, Ruby, Java, and Python. These
languages do not generally support macros, and so we advise
implementers working in such languages to use the present approach.
\subsection{Calculating the solutions}
A developer might derive these definitions as follows. We start with
the definition of a recursive \rackinline{disj} macro like one might
define as surface syntax over the microKanren \rackinline{disj₂}. As
this is not part of the microKanren language, we would like to
dispense with the macro and implement this behavior functionally. At
the cost of an \rackinline{apply}, we can build the corresponding
explicitly recursive \rackinline{disj} function. Since
\rackinline{disj} produces and consumes goals, we can η-expand that
first functional definition by a curried parameter \rackinline{s}. We
then split \rackinline{disj} into two mutually-recursive functions. We
use the symbol \rackinline{⊂} here to indicate that the newer
\rackinline{disj} produces the same values as the old one, although it
now does so by calling a new globally-defined function
\rackinline{D}.\@ This new help function will only ever be called from
\rackinline{disj} so does not \emph{need} to be global; we do so for
space and clarity. In this paper, every such help function we
introduce with \rackinline{⊂} could have instead been local and the
relationship actually an equality.
\begin{minted}[autogobble,stripall,escapeinside=μμ]{racket}
(define (disj g . g*)
(cond
((null? g*) g)
(else (disj₂ g (apply disj g*)))))
= { μ\text{η-expansion}μ }
(define ((disj g . g*) s)
(cond
((null? g*) (g s))
(else ((disj₂ g (apply disj g*)) s))))
⊂
(define ((disj g . g*) s)
(D g g* s))
(define (D g g* s)
(cond
((null? g*) (g s))
(else ((disj₂ g (apply disj g*)) s))))
\end{minted}
In that version of \rackinline{D}, we can replace the call to
\rackinline{disj₂} by its definition in terms of \rackinline{append∞}
and perform a trivial β-reduction. The explicit \rackinline{s}
argument suggests removing the call to \rackinline{apply} and making
\rackinline{D} self-recursive. The definition of \rackinline{disj}
remains unchanged from before.
\begin{minted}[autogobble,stripall,escapeinside=μμ]{racket}
= { μ\text{by definition of disj₂}μ }
(define (D g g* s)
(cond
((null? g*) (g s))
(else
((λ (s)
(append∞ (g s) ((apply disj g*) s)))
s))))
= { μ\text{β-reduction}μ }
(define (D g g* s)
(cond
((null? g*) (g s))
(else
(append∞ (g s) ((apply disj g*) s)))))
= { μ\text{by definition of disj}μ }
(define (D g g* s)
(cond
((null? g*) (g s))
(else
(append∞ (g s) (D (car g*) (cdr g*) s)))))
\end{minted}
In both clauses of \rackinline{D} we combine \rackinline{g} and
\rackinline{s}, this suggests constructing that stream in
\rackinline{disj} and passing it along. Adding the trivial base case
to that \rackinline{disj} yields the definition
in~\cref{mnt:disj-reimplementation}.
We can derive the definition of \rackinline{conj} from
\cref{mnt:disj-reimplementation} via a similar process. Starting with
the variadic function based on the macro in \cref{mnt:disj2-conj2}, we
first η-expand and split the definition.
\begin{minted}[autogobble,stripall,escapeinside=μμ]{racket}
(define (conj g . g*)
(cond
((null? g*) g)
(else
(apply conj
(cons (conj₂ g (car g*)) (cdr g*))))))
= { μ\text{η-expansion}μ }
(define ((conj g . g*) s)
(cond
((null? g*) (g s))
(else
((apply conj
(cons (conj₂ g (car g*)) (cdr g*))) s))))
⊂
(define ((conj g . g*) s)
(C g g* s))
(define (C g g* s)
(cond
((null? g*) (g s))
(else
((apply conj
(cons (conj₂ g (car g*)) (cdr g*)))
s))))
\end{minted}
\noindent We next substitute for the definitions of \rackinline{conj}
and \rackinline{conj₂}.
\begin{minted}[autogobble,stripall,escapeinside=μμ]{racket}
(define (C g g* s)
(cond
((null? g*) (g s))
(else
((apply conj
(cons (conj₂ g (car g*)) (cdr g*)))
s))))
= { μ\text{by the definition of conj}μ }
(define (C g g* s)
(cond
((null? g*) (g s))
(else
(C (conj₂ g (car g*)) (cdr g*) s))))
= { μ\text{by the definition of conj₂}μ }
(define (C g g* s)
(cond
((null? g*) (g s))
(else
(C (λ (s)
(append-map∞ (car g*) (g s)))
(cdr g*)
s))))
\end{minted}
This definition of \rackinline{C} sequences the invocations of goal
and state in the order they appear. \rackinline{append-map∞} acts like
a non-deterministic \rackinline{compose} operator. In each recursive
call, we accumulate by mapping, using \rackinline{append-map∞}'s
special delaying implementation of Kanren-language streams, the next
goal in the list.
\begin{minted}[autogobble,stripall,escapeinside=μμ]{racket}
(C g (list g₁ g₂) s)
=
(C (λ (s)
(append-map∞ g₁
(g s)))
(list g₂)
s)
=
(C (λ (s)
(append-map∞ g₂
((λ (s)
(append-map∞ g₁
(g s)))
s)))
'()
s)
=
((λ (s)
(append-map∞ g₂
((λ (s)
(append-map∞ g₁
(g s)))
s)))
s)
\end{minted}
The state does not change in the recursion: \rackinline{C} only needs
\rackinline{s} to \emph{build} the stream. Therefore we can assemble
the stream on the way in---instead of passing in \rackinline{g} and
\rackinline{s} separately, we pass in their combination as a stream.
The function is tail recursive; we can change the signature in the one
and only external call and the recursive call. Adding the trivial base
case to \rackinline{conj}, yields the version shown in
\cref{mnt:disj-reimplementation}.
\begin{listing}
\begin{minted}[autogobble,stripall,frame=single]{racket}
(define ((disj . g*) s)
(cond
((null? g*) '())
(else (D ((car g*) s) (cdr g*) s))))
(define (D s∞ g* s)
(cond
((null? g*) s∞)
(else
(append∞ s∞
(D ((car g*) s) (cdr g*) s)))))
(define ((conj . g*) s)
(cond
((null? g*) (cons s '()))
(else (C (cdr g*) ((car g*) s)))))
(define (C g* s∞)
(cond
((null? g*) s∞)
(else
(C (cdr g*)
(append-map∞ (car g*) s∞)))))
\end{minted}
\caption{Final re-definitions of \rackinline{disj} and \rackinline{conj}}\label{mnt:disj-reimplementation}
\end{listing}
Both of these new versions are shallow wrappers over simple folds. The
first steps are to dispense with the trivial case, and then to call a
recursive help function that makes no use of variadic arguments. The
focus is on recurring over the list \rackinline{g*}. Unlike
\rackinline{D}, the function \rackinline{C} does not take in the state
\rackinline{s}; the help function does not need the state for
conjunction.
\subsection{What's left?}\label{sec:whats-left}
More importantly, while both the functional and the macro based
versions of \rackinline{disj} use a right fold, the implementation of
conjunctions in \cref{mnt:disj-reimplementation} uses a left fold over
the goals. This left-fold implementation of conjunctions therefore
left-associates the conjuncts. This is not an accident.
%
Folklore suggests left associating conjunctions tends to improve
performance of miniKanren's interleaving search. The authors know of
no thorough algorithmic proof of such claims, but see for instance
discussions and implementation in
\citet{rosenblatt2019first} for some of
the related work so far. We have generally, however, resorted to small
step visualizations of the search tree to explain the performance
impact. It is worth considering if we can make an equally compelling
argument for this preference through equational reasoning and
comparing the implementations of functions.
% Folklore suggests that left associating conjunctions tends to improve
% the performance of miniKanren's interleaving search. The authors know
% of no thorough algorithmic proof of such claims, but see for instance
% discussions and implementation in~\cite{rosenblatt2019first} for some
% of the related work so far. In \cref{tab:???}, we display the results
% of some micro benchmarks that suggest the same. We have generally,
% however, resorted to small step visualizations of the search tree to
% explain the performance impact. The authors believe it is worth
% considering if we can make an equally compelling argument for this
% preference through equational reasoning and comparing the
% implementations of functions.
Compare the preceding derivation from a left-fold over conjunctions
with the following attempted derivation from a right-fold
implementation. We η-expand and unfold to a recursive help
function like before.
\begin{minted}[autogobble,stripall,escapeinside=μμ]{racket}
(define (conj g . g*)
(cond
((null? g*) g)
(else (conj₂ g (apply conj g*)))))
= { μ\text{η-expansion}μ }
(define ((conj g . g*) s)
(cond
((null? g*) (g s))
(else ((conj₂ g (apply conj g*)) s))))
⊂
(define ((conj g . g*) s)
(C g g* s))
(define (C g g* s)
(cond
((null? g*) (g s))
(else ((conj₂ g (apply conj g*)) s))))
\end{minted}
In \rackinline{C}, we can substitute in the definition of
\rackinline{conj₂} and β-reduce. We once again η-expand the call to
\rackinline{(apply conj g*)} to substitute with the definition of
\rackinline{conj} and eliminate the \rackinline{apply}. There,
however, we get stuck.
\begin{minted}[autogobble,stripall,escapeinside=μμ]{racket}
(define (C g g* s)
(cond
((null? g*) (g s))
(else ((conj₂ g (apply conj g*)) s))))
= { μ\text{by definition of conj₂ and β-reduction}μ }
(define (C g g* s)
(cond
((null? g*) (g s))
(else
(append-map∞
(apply conj g*)
(g s)))))
= { μ\text{η-expand to use the definition of conj}μ }
(define (C g g* s)
(cond
((null? g*) (g s))
(else
(append-map∞
(λ (s)
(C (car g*) (cdr g*) s))
(g s)))))
\end{minted}
At this point in the left-fold derivation, these calls are
accumulating in a stack-like discipline, so we can simplify and pass
\rackinline{g} and \rackinline{s} together as a stream. The equivalent
right-fold implementation, however, is in a kind of
continuation-passing style for non-deterministic computations. Each
goal in the list seems to require a closure for every recursive call.
Building these closures is expensive. Similar behavior shows up in the
right-fold variant of \rackinline{disj}. Basic programming horse sense
suggests the more elegant variants from
\cref{mnt:disj-reimplementation}, and could partly explain the
performance gap.
\begin{minted}[autogobble,stripall,escapeinside=μμ]{racket}
(C g (list g₁ g₂) s)
=
(append-map∞
(λ (s)
(C g₁ (list g₂) s))
(g s))
=
(append-map∞
(λ (s)
(append-map∞
(λ (s)
(C g₂ '() s))
(g₁ s)))
(g s))
=
(append-map∞
(λ (s)
(append-map∞
(λ (s)
(g₂ s))
(g₁ s)))
(g s))
\end{minted}
The new \rackinline{disj} and \rackinline{conj} functions are, we
believe, sufficiently high-level for programmers in implementations
without macros. Though this note mainly concerns working towards an
internal macro-less kernel language, it may also have something to say
about the miniKanren-level surface syntax, namely that even the
miniKanren language could do without its \rackinline{conde} syntax (a
disjunction of conjunctions that looks superficially like
Scheme's \rackinline{cond}) and have the programmer use these new
underlying logical primitives. In \cref{mnt:new-carmelit}, we
implement \rackinline{carmelit-subway} as an example, and it reads
much better than the 11 binary logical operators the programmer would
have needed in the earlier version.
\begin{listing}[h]
\begin{minted}[autogobble,stripall]{racket}
(defrel (carmelit-subway a b c d e f)
(disj
(conj (== a 'carmel-center)
(== b 'golomb)
(== c 'masada)
(== d 'haneviim)
(== e 'hadar-city-hall)
(== f 'downtown))
(conj (== a 'downtown)
(== b 'hadar-city-hall)
(== c 'haneviim)
(== d 'masada)
(== e 'golomb)
(== f 'carmel-center))))
\end{minted}
\caption{A new Carmelit subway without \rackinline{conde}}\label{mnt:new-carmelit}
\end{listing}
\section{Tidying up the impure operators}\label{sec:impure}
The \rackinline{conda} of \emph{TRS2e} provides nested
\enquote{if-then-else} behavior (the \rackinline{a} is because
\rackinline{a}t most one line succeeds). It relies on microKanren's
underlying \rackinline{ifte}. That \rackinline{conda} requires one or
more conjuncts per clause and one or more clauses. Once again, we
would like to have an equivalently expressive feature without
resorting to macros. Unlike the \emph{TRS2e} implementation, the
versions of \rackinline{conda} in \cref{mnt:conda-macro} consume a
sequence of goals. They consume those goals in \enquote{if-then}
pairs, perhaps followed by a final \enquote{else}; we have no choice
if we want to proceed without using a macro.
\begin{listing}
\begin{minted}[autogobble,stripall]{racket}
(define-syntax conda
(syntax-rules ()
((conda g) g)
((conda g₁ g₂) (conj g₁ g₂))
((conda g₁ g₂ g₃ g* ...)
(ifte g₁ g₂ (conda g₃ g* ...)))))
(define (conda g . g*)
(cond
((null? g*) g)
((null? (cdr g*)) (conj g (car g*)))
(else
(ifte g (car g*) (apply conda (cdr g*))))))
\end{minted}
\caption{A re-implemented \rackinline{conda} macro and its functional equivalent}\label{mnt:conda-macro}
\end{listing}
We defer the derivation of our new \rackinline{conda} solution to
\cref{sec:conda-derivation}. Rather than building a largely redundant
implementation of \rackinline{condu}, we expose the higher-order goal
\rackinline{once} to the user. The programmer can simulate
\rackinline{condu} by wrapping \rackinline{once} around every test
goal. We do however take this opportunity to also lift the inner
function \rackinline{loop} to a top-level definition.
\begin{listing}[h]
\begin{minted}[autogobble,stripall,frame=single]{racket}
(define ((conda . g*) s)
(cond
((null? g*) '())
(else (A (cdr g*) ((car g*) s) s))))
(define (A g* s∞ s)
(cond
((null? g*) s∞)
((null? (cdr g*)) (append-map∞ (car g*) s∞))
(else (ifs∞te s∞ (car g*) (cdr g*) s))))
(define (ifs∞te s∞ g g+ s)
(cond
((null? s∞) (A (cdr g+) ((car g+) s) s))
((pair? s∞) (append-map∞ g s∞))
(else (λ () (ifs∞te (s∞) g g+ s)))))
(define ((once g) s)
(O (g s)))
(define (O s∞)
(cond
((null? s∞) '())
((pair? s∞) (cons (car s∞) '()))
(else (λ () (O (s∞))))))
\end{minted}
\caption{A functional \rackinline{conda}, \rackinline{ifte}, and \rackinline{once}}\label{mnt:conda-implementation}
\end{listing}
\section{Removing more macros}\label{sec:functional}
The \citeyear{hemann2013muKanren} microKanren paper demonstrates how
to implement a side-effect free macro-less Kanren language in an eager
host. In \cref{mnt:call-fresh-and-call-initial-state} we display these
alternative mechanisms for introducing fresh logic variables,
executing queries, and introducing delay and interleave. The versions
in \cref{mnt:call-fresh-and-call-initial-state} are slightly adjusted
to be consistent with this presentation.
\begin{listing}
\begin{minted}[autogobble,stripall]{racket}
(define ((call/fresh f) s)
(let ((v (state->newvar s)))
((f v) (incr-var-ct s))))
(define (call/initial-state n g)
(reify/1st
(take∞ n (pull (g initial-state)))))
(define (((Zzz g) s))
(g s))
> (call/initial-state 3
(call/fresh
(λ (x)
(== x 'cat))))
\end{minted}
\caption{Definition and use of functional microKanren equivalents of \emph{TRS2e} kernel macros}\label{mnt:call-fresh-and-call-initial-state}
\end{listing}
Each of these has drawbacks that compelled the \emph{TRS2e} authors to
instead use macro-based alternatives in the kernel layer. In this
section, we explicitly address those drawbacks and point out some
other non-macro alternatives that may demand more from a host language
than the original microKanren choices, and make some recommendations.
\subsection{Logic variables}
Many of the choices for these last options hinge on a representation
of logic variables. Every implementation must have a mechanism to
produce the next fresh logic variable. The choice of variable
representation will affect the implementation of unification and
constraint solving, the actual introduction of fresh variables, as
well as answer projection, the formatting and presentation of a
query's results. Depending on the implementation, the variables may
also need additional functions to support them. In a shallow
embedding, designing a set of logic variables means either using a
\rackinline{struct}-like mechanism to custom-build a datatype hidden
from the microKanren programmer, or designating some subset of the
host language's values for use as logic variables. Using
\rackinline{struct}s and limiting the visibility of the constructors
and accessors is a nice option for languages that support it.
The choice of which host language values to take for logic variables
divides roughly into the structurally equal and the referentially
equal. For an example of the latter, consider representing each
variable using a vector, and identifying vectors by their unique
memory location. This latter approach models logic variables as a
single global pool rather than reused separately across each disjunct,
and so requires more logic variables overall. The microKanren approach
uses natural numbers as an indexed set of variables, which
necessitates removing numbers from the user's term language.
\subsection{\rackinline{fresh}}
There are numerous ways to represent variables, and so too are there
many ways to introduce fresh variables. In the microKanren approach,
the current variable index is one of the fields of the state threaded
through the computation; to go from index to variable is the identity
function, and the \rackinline{state->newvar} function we use can be
just an accessor. The function \rackinline{incr-var-ct} can
reconstruct that state with the variable count incremented. The
\rackinline{call/fresh} function, shown in
\cref{mnt:call-fresh-and-call-initial-state}, takes as its first
argument a goal parameterized by the name of a fresh variable.
\rackinline{call/fresh} then applies that function with the newly
created logic variable, thereby associating that host-language lexical
variable with the DSL's logic variable. This lets the logic language
\enquote{piggyback} on the host's lexical scoping and variable lookup,
as shown in \cref{mnt:call-fresh-and-call-initial-state}.
This approach also means, however, that absent some additional
machinery the user must introduce those new logic variables one at a
time, once each per \rackinline{call/fresh} expression, as though
manually currying all functions in a functional language. This made
programs larger than the relational \rackinline{append} difficult to
write and to read, and that amount of threading and re-threading state
for each variable is costly. We could easily support, say instead,
three variables at a time---force the user to provide a three-argument
function and always supply three fresh variables at a time. Though
practically workable the choice of some arbitrary quantity $k$ of
variables at a time, or choices $k_{1}$ and $k_{2}$ for that matter,
seems unsatisfactory. It could make sense to inspect a procedure for
its arity at runtime and introduce exactly that many variables, in
languages that support that ability. In many languages, a procedure's
arity is more complex than a single number. Variadic functions and
keyword arguments all complicate the story of a procedure's arity. A
form like \rackinline{case-lambda} means that a single procedure may
have several disjoint arities. The arity inspection approach could be
a partial solution where the implementer restricts the programmer to
using functions with fixed arity.
One last approach is to directly expose to the user a mechanism to
create a new variable, and allow the programmer to use something like
a \rackinline{let} binding to do their own variable introduction and
name binding. Under any referentially transparent representation of
variables, this would mean that the programmer would be responsible
for tracking the next lexical variable. This last approach pairs best
with referentially opaque variables where the operation to produce a
new variable allocates some formerly unused memory location so the
programmer does not need to track the next logic variable. See
sokuza-kanren~\cite{kiselyov2006taste} for an example of this style.
With this latter approach, however, we can expose \rackinline{var}
directly to the programmer who can use \rackinline{let} bindings to
introduce several logic variables simultaneously.
\subsection{\rackinline{run}}
\Cref{mnt:call-fresh-and-call-initial-state} also shows how we have
implemented a \rackinline{run}-like behavior without using macros.
Using a referentially transparent implementation of logic variables,
we can accomplish the job of \rackinline{run} and \rackinline{run*} by
a \rackinline{call/initial-state}-like function. The query is itself
expressed as a goal that introduces the first logic variable
\rackinline{q}. A \rackinline{run}-like operator displays the result
with respect to the first variable introduced. This means pruning
superfluous variables from the answer, producing a single value from
the accumulated equations, and numbering the fresh variables. When
logic variables are only identified by reference equality, the
language implementation must pass the same \emph{pointer-identical}
logic variable into both the query and into the answer projection,
called \rackinline{reify}. The pointer-based logic variable approach
forces the programmer to explicitly invoke \rackinline{reify} as
though it were a goal as the last step of executing the query, as in
the first example in \cref{mnt:run-query}, or create a special
variable introduction mechanism for the first variable, scoped over
both the query and the answer projection, as in the second example.
\begin{listing}[h]
\begin{minted}[autogobble,stripall]{racket}
(call/initial-state 1
(let ((q (var 'q)))
(conj
(let ((x (var 'x)))
(== q x))
(reify q))))
(define (call/initial-state n f)
(let ((q (var 'q)))
(map (reify q)
(take∞ n ((f q) initial-state)))))
\end{minted}
\caption{Several approaches to reifying variables in \rackinline{call/initial-state}. Here \rackinline{initial-state} is a representation of an initially empty set of equations}\label{mnt:run-query}
\end{listing}
\subsection{\rackinline{define}}
The microKanren programmer can use their host language's
\rackinline{define} feature to construct relations as host-language
functions, and manually introduce the delays in relations using a help
function like \rackinline{Zzz}
(\cref{mnt:call-fresh-and-call-initial-state}) to introduce delays, as
in the original implementation.~\cite{hemann2013muKanren} This may be
a larger concession than it looks, since it exposes the delay and
interleave mechanism to the user, and both correct interleaving and,
in an eager host language, even the termination of relation
\emph{definitions} rely on a whole-program correctness property of
relation definitions having a delay. \rackinline{Zzz} \emph{if always
used correctly} would be sufficient to address that problem, but
forgetting it just once could cause the entire program to loop.
Turning the delaying and interleaving into a user-level operation
means giving the programmer some explicit control over the search, and
that in turn could transform a logic language into an imperative one.
Another downside of relying on a host-language \rackinline{define} is
that the programmer must now take extra care not to provide multiple