(ns foundations.computational.linguistics
(:require [reagent.core :as r]
[reagent.dom :as rd]
[clojure.zip :as z]
[clojure.pprint :refer [pprint]]
[clojure.string :refer [index-of]]
;[clojure.string :as str]
))
(enable-console-print!)
(defn log [a-thing]
(.log js/console a-thing))
(defn render-vega [spec elem]
(when spec
(let [spec (clj->js spec)
opts {:renderer "canvas"
:mode "vega"
:actions {
:export true,
:source true,
:compiled true,
:editor true}}]
(-> (js/vegaEmbed elem spec (clj->js opts))
(.then (fn [res]
(. js/vegaTooltip (vega (.-view res) spec))))
(.catch (fn [err]
(log err)))))))
(defn vega
"Reagent component that renders vega"
[spec]
(r/create-class
{:display-name "vega"
:component-did-mount (fn [this]
(render-vega spec (rd/dom-node this)))
:component-will-update (fn [this [_ new-spec]]
(render-vega new-spec (rd/dom-node this)))
:reagent-render (fn [spec]
[:div#vis])}))
;making a histogram from a list of observations
(defn list-to-hist-data-lite [l]
""" takes a list and returns a record
in the right format for vega data,
with each list element the label to a field named 'x'"""
(defrecord rec [category])
{:values (into [] (map ->rec l))})
(defn makehist-lite [data]
{
:$schema "https://vega.github.io/schema/vega-lite/v4.json",
:data data,
:mark "bar",
:encoding {
:x {:field "category",
:type "ordinal"},
:y {:aggregate "count",
:type "quantitative"}
}
})
(defn list-to-hist-data [l]
""" takes a list and returns a record
in the right format for vega data,
with each list element the label to a field named 'x'"""
(defrecord rec [category])
[{:name "raw",
:values (into [] (map ->rec l))}
{:name "aggregated"
:source "raw"
:transform
[{:as ["count"]
:type "aggregate"
:groupby ["category"]}]}
{:name "agg-sorted"
:source "aggregated"
:transform
[{:type "collect"
:sort {:field "category"}}]}
])
(defn makehist [data]
(let [n (count (distinct ((data 0) :values)))
h 200
pad 5
w (if (< n 20) (* n 35) (- 700 (* 2 pad)))]
{
:$schema "https://vega.github.io/schema/vega/v5.json",
:width w,
:height h,
:padding pad,
:data data,
:signals [
{:name "tooltip",
:value {},
:on [{:events "rect:mouseover", :update "datum"},
{:events "rect:mouseout", :update "{}"}]}
],
:scales [
{:name "xscale",
:type "band",
:domain {:data "agg-sorted", :field "category"},
:range "width",
:padding 0.05,
:round true},
{:name "yscale",
:domain {:data "agg-sorted", :field "count"},
:nice true,
:range "height"}
],
:axes [
{ :orient "bottom", :scale "xscale" },
{ :orient "left", :scale "yscale" }
],
:marks [
{:type "rect",
:from {:data "agg-sorted"},
:encode {
:enter {
:x {:scale "xscale", :field "category"},
:width {:scale "xscale", :band 1},
:y {:scale "yscale", :field "count"},
:y2 {:scale "yscale", :value 0}
},
:update {:fill {:value "steelblue"}},
:hover {:fill {:value "green"}}
}
},
{:type "text",
:encode {
:enter {
:align {:value "center"},
:baseline {:value "bottom"},
:fill {:value "#333"}
},
:update {
:x {:scale "xscale", :signal "tooltip.category", :band 0.5},
:y {:scale "yscale", :signal "tooltip.count", :offset -2},
:text {:signal "tooltip.count"},
:fillOpacity [
{:test "isNaN(tooltip.count)", :value 0},
{:value 1}
]
}
}
}
]
}))
(defn hist [l]
(-> l
list-to-hist-data
makehist
vega))
; for making bar plots
(defn list-to-barplot-data-lite [l m]
""" takes a list and returns a record
in the right format for vega data,
with each list element the label to a field named 'x'"""
(defrecord rec [category amount])
{:values (into [] (map ->rec l m))})
(defn makebarplot-lite [data]
{
:$schema "https://vega.github.io/schema/vega-lite/v4.json",
:data data,
:mark "bar",
:encoding {
:x {:field "element", :type "ordinal"},
:y {:field "value", :type "quantitative"}
}
})
(defn list-to-barplot-data [l m]
""" takes a list and returns a record
in the right format for vega data,
with each list element the label to a field named 'x'"""
(defrecord rec [category amount])
{:name "table",
:values (into [] (map ->rec l m))})
(defn makebarplot [data]
(let [n (count (data :values))
h 200
pad 5
w (if (< n 20) (* n 35) (- 700 (* 2 pad)))]
{
:$schema "https://vega.github.io/schema/vega/v5.json",
:width w,
:height h,
:padding pad,
:data data,
:signals [
{:name "tooltip",
:value {},
:on [{:events "rect:mouseover", :update "datum"},
{:events "rect:mouseout", :update "{}"}]}
],
:scales [
{:name "xscale",
:type "band",
:domain {:data "table", :field "category"},
:range "width",
:padding 0.05,
:round true},
{:name "yscale",
:domain {:data "table", :field "amount"},
:nice true,
:range "height"}
],
:axes [
{ :orient "bottom", :scale "xscale" },
{ :orient "left", :scale "yscale" }
],
:marks [
{:type "rect",
:from {:data "table"},
:encode {
:enter {
:x {:scale "xscale", :field "category"},
:width {:scale "xscale", :band 1},
:y {:scale "yscale", :field "amount"},
:y2 {:scale "yscale", :value 0}
},
:update {:fill {:value "steelblue"}},
:hover {:fill {:value "green"}}
}
},
{:type "text",
:encode {
:enter {
:align {:value "center"},
:baseline {:value "bottom"},
:fill {:value "#333"}
},
:update {
:x {:scale "xscale", :signal "tooltip.category", :band 0.5},
:y {:scale "yscale", :signal "tooltip.amount", :offset -2},
:text {:signal "tooltip.amount"},
:fillOpacity [
{:test "isNaN(tooltip.amount)", :value 0},
{:value 1}
]
}
}
}
]
}))
(defn barplot [l m]
(vega (makebarplot (list-to-barplot-data l m))))
; now, for tree making
;(thanks to Taylor Wood's answer in this thread on stackoverflow:
; https://stackoverflow.com/questions/57911965)
(defn count-up-to-right [loc]
(if (z/up loc)
(loop [x loc, pops 0]
(if (z/right x)
pops
(recur (z/up x) (inc pops))))
0))
(defn list-to-tree-spec [l]
""" takes a list and walks through it (with clojure.zip library)
and builds the record format for the spec needed to for vega"""
(loop [loc (z/seq-zip l), next-id 0, parent-ids [], acc []]
(cond
(z/end? loc) acc
(z/end? (z/next loc))
(conj acc
{:id (str next-id)
:name (str (z/node loc))
:parent (when (seq parent-ids)
(str (peek parent-ids)))})
(and (z/node loc) (not (z/branch? loc)))
(recur
(z/next loc)
(inc next-id)
(cond
(not (z/right loc))
(let [n (count-up-to-right loc)
popn (apply comp (repeat n pop))]
(some-> parent-ids not-empty popn))
(not (z/left loc))
(conj parent-ids next-id)
:else parent-ids)
(conj acc
{:id (str next-id)
:name (str (z/node loc))
:parent (when (seq parent-ids)
(str (peek parent-ids)))}))
:else
(recur (z/next loc) next-id parent-ids acc))))
(defn maketree [w h tree-spec]
""" makes vega spec for a tree given tree-spec in the right json-like format """
{:$schema "https://vega.github.io/schema/vega/v5.json"
:data [{:name "tree"
:transform [{:key "id" :parentKey "parent" :type "stratify"}
{:as ["x" "y" "depth" "children"]
:method {:signal "layout"}
:size [{:signal "width"} {:signal "height"}]
:type "tree"}]
:values tree-spec
}
{:name "links"
:source "tree"
:transform [{:type "treelinks"}
{:orient "horizontal"
:shape {:signal "links"}
:type "linkpath"}]}]
:height h
:marks [{:encode {:update {:path {:field "path"} :stroke {:value "#ccc"}}}
:from {:data "links"}
:type "path"}
{:encode {:enter {:size {:value 50} :stroke {:value "#fff"}}
:update {:fill {:field "depth" :scale "color"}
:x {:field "x"}
:y {:field "y"}}}
:from {:data "tree"}
:type "symbol"}
{:encode {:enter {:baseline {:value "bottom"}
:font {:value "Courier"}
:fontSize {:value 14}
:angle {:value 0}
:text {:field "name"}}
:update {:align {:signal "datum.children ? 'center' : 'center'"}
:dy {:signal "datum.children ? -6 : -6"}
:opacity {:signal "labels ? 1 : 0"}
:x {:field "x"}
:y {:field "y"}}}
:from {:data "tree"}
:type "text"}]
:padding 5
:scales [{:domain {:data "tree" :field "depth"}
:name "color"
:range {:scheme "magma"}
:type "linear"
:zero true}]
:signals [{:bind {:input "checkbox"} :name "labels" :value true}
{:bind {:input "radio" :options ["tidy" "cluster"]}
:name "layout"
:value "tidy"}
{:name "links"
:value "line"}]
:width w}
)
(defn tree-depth
"get the depth of a tree (list)"
[list]
(if (seq? list)
(inc (apply max 0 (map tree-depth list)))
0))
(defn tree
"plot tree using vega"
[list]
(let [spec (list-to-tree-spec list)
h (* 30 (tree-depth list))]
(vega (maketree 700 h spec))))
In the last chapter, we saw how lists can be built using the list
constructor or by quoting. But there is a more useful and fundamental
way to think about the structure of lists which we will make extensive
use of in this course.
We can represent a list as a sequence of applications of the
cons
procedure, starting from the empty list: (cons 1
(cons 2 (cons 3 '()))
(list 1 2 3)
(cons 1 (cons 2 (cons 3 '())))
So (list 1 2 3)
can be built by applying cons
to the
value 1
and another list (2 3)
; this latter list can
be built by applying cons
to the value 2
and another
list (3)
, and so on, with the last element in the sequence
being the empty or null list ()
.
We can use first
and rest
to navigate around this list.
(first (list 1 2 3))
(rest (list 1 2 3))
The structure of lists is important because it goes hand in hand with
the most important code design pattern used in the
\(\lambda\)-calculus and functional programming: recursion. We can
process a list by defining a function which does something to the
first element of the list, and calls itself recursively on the rest of
the list. The base case of such a recursive list processing procedure
is the empty list ()
.
Recursion is best explained with an example. One extremely important
and basic list-processing function that exists in all functional
languages is known as map
. map
is a higher-order function which
takes two arguments: (map f l)
. Its first argument is a one-place
(single argument) procedure f
and its second second is a list
l
. The function map
returns the list that results from applying
f
to each element of l
.
(map (fn [x] (+ x 1) ) '(1 2 3 4 5 6))
In the example above, we called map
with two arguments. First, we
passed in an anonymous function (fn [x] (+ x 1))
which simply adds
one to whatever it is passed as an argument. Second, we passed in the
list (1 2 3 4 5 6)
. map
applied the anonymous function to each
element of the list and returned the resulting list (2 3 4 5 6 7)
.
map
is a standard function in all functional programming
languages because it is a common use case for iteration. But it is
easy to define it ourselves using recursion.
(defn my-map [f l]
(if (empty? l)
'()
(cons (f (first l))
(my-map f (rest l)))))
(my-map (fn [x] (+ 1 x)) '(1 2 3 4 5 6))
Let’s go through this function definition. First, the function takes
two arguments: a function f
and a list l
. The body
of the procedure is a conditional. This conditional first checks if
the list l
is the empty list using the built in procedure
empty?
. If the list passed to my-map
is empty, then
the result must also be empty, so we set the value of the conditional
to just the empty list. On the other hand, if there are elements in
the list l
, we need to do more work.
The alternative to the conditional first gets the first elements of
l
with first
, i.e., (first l)
and it then
applies the function f
to this element. Second, it calls
itself on the remainder of the list which it gets using rest
.
Finally, it takes the result of these two operations and creates a
pair from these using cons
.
Let’s look at another example of recursion.
(defn sum [l]
(if (empty? l)
0
(+ (first l)
(sum (rest l)))))
(sum '(1 2 3 3 4))
This function takes a list of numbers, and returns the sum of those
numbers. The recursion in this definition says something very simple
(but important) about summing a list: the sum of a list is equal to
the first number in the list plus the sum of the rest of the list.
In both of these examples, we saw the same pattern for recursing on
the structure of a list.
(defn rec-fun [l]
(if (empty? l)
base-case
(accumulate (do-something (first l))
(rec-fun (rest l))))) ; <- recursive call
This is a very general pattern. The conditional has two parts, when
there is nothing in the input list, it returns some base case. For
instance, in the map
example this is ()
and in the sum
example
it is 0
. Otherwise, the function peels off the first element of the
list, does something to it, recurses on the rest of the list, and
then accumulates the two results.
We can use this observation to write a higher order function that
generates a such recursive functions, given the base case,
accumulator, and an operation to apply to each element of the list
before recursing.
(defn make-rec-fn [base-case accumulate operation]
(fn rec-fun [l]
(if (empty? l)
base-case
(accumulate (operation (first l))
(rec-fun (rest l))))))
Recursion and trees
How are recursively defined functions executed? One helpful way to
visualize recursion is using a tree.
(sum '(1 2 3))
When first working through recursive functions it is often helpful to
explicitly walk through the tree structure of the computation. In
linguistics, we usually make extensive use of such trees which we
usually call derivation trees or parse trees. Although they are
not usually presented in terms of computation traces, they are often
best thought of as such.
Two ways to think about recursion
The discussion above illustrates two different but complementary ways
to think about recursion.
-
The procedural view considers how a recursive function is
computed step-by-step and is best understood by thinking of the
tree of recursive calls, like the one we drew above.
-
The declarative view considers how the recursive function is
defined “by contract”—we break the definition into cases and
define the correct behavior for each case. In the base case we
define the output behavior directly in terms of the input. In the
inductive case, we make the assumption that the function
(e.g.,sum
or map
), is defined correctly on smaller/simpler
inputs (i.e., (sum (rest l))
or (map f (rest l))
) and define
the relationship between those smaller/simpler inputs and the full
input (i.e., (sum l)
or (map f l)
).
The declarative view can be unintuitive at first, since it relies on a
sort of leap-of-faith-style reasoning process. The inductive cases
require that we call our function as if we believe that it was already
correctly defined. However, this kind of inductive reasoning is a
powerful tool in programming (and mathematics more generally); so, it
is worth understanding deeply.
← 2 Models of Computation
4 Well-Formed Sentences and Grammaticality →