(ns foundations.computational.linguistics (:require [reagent.core :as r] [reagent.dom :as rd] [clojure.zip :as z] [clojure.pprint :refer [pprint]] [clojure.string :refer [index-of]] ;[clojure.string :as str] )) (enable-console-print!) (defn log [a-thing] (.log js/console a-thing))

(defn render-vega [spec elem] (when spec (let [spec (clj->js spec) opts {:renderer "canvas" :mode "vega" :actions { :export true, :source true, :compiled true, :editor true}}] (-> (js/vegaEmbed elem spec (clj->js opts)) (.then (fn [res] (. js/vegaTooltip (vega (.-view res) spec)))) (.catch (fn [err] (log err))))))) (defn vega "Reagent component that renders vega" [spec] (r/create-class {:display-name "vega" :component-did-mount (fn [this] (render-vega spec (rd/dom-node this))) :component-will-update (fn [this [_ new-spec]] (render-vega new-spec (rd/dom-node this))) :reagent-render (fn [spec] [:div#vis])})) ;making a histogram from a list of observations (defn list-to-hist-data-lite [l] """ takes a list and returns a record in the right format for vega data, with each list element the label to a field named 'x'""" (defrecord rec [category]) {:values (into [] (map ->rec l))}) (defn makehist-lite [data] { :$schema "https://vega.github.io/schema/vega-lite/v4.json", :data data, :mark "bar", :encoding { :x {:field "category", :type "ordinal"}, :y {:aggregate "count", :type "quantitative"} } }) (defn list-to-hist-data [l] """ takes a list and returns a record in the right format for vega data, with each list element the label to a field named 'x'""" (defrecord rec [category]) [{:name "raw", :values (into [] (map ->rec l))} {:name "aggregated" :source "raw" :transform [{:as ["count"] :type "aggregate" :groupby ["category"]}]} {:name "agg-sorted" :source "aggregated" :transform [{:type "collect" :sort {:field "category"}}]} ]) (defn makehist [data] (let [n (count (distinct ((data 0) :values))) h 200 pad 5 w (if (< n 20) (* n 35) (- 700 (* 2 pad)))] { :$schema "https://vega.github.io/schema/vega/v5.json", :width w, :height h, :padding pad, :data data, :signals [ {:name "tooltip", :value {}, :on [{:events "rect:mouseover", :update "datum"}, {:events "rect:mouseout", :update "{}"}]} ], :scales [ {:name "xscale", :type "band", :domain {:data "agg-sorted", :field "category"}, :range "width", :padding 0.05, :round true}, {:name "yscale", :domain {:data "agg-sorted", :field "count"}, :nice true, :range "height"} ], :axes [ { :orient "bottom", :scale "xscale" }, { :orient "left", :scale "yscale" } ], :marks [ {:type "rect", :from {:data "agg-sorted"}, :encode { :enter { :x {:scale "xscale", :field "category"}, :width {:scale "xscale", :band 1}, :y {:scale "yscale", :field "count"}, :y2 {:scale "yscale", :value 0} }, :update {:fill {:value "steelblue"}}, :hover {:fill {:value "green"}} } }, {:type "text", :encode { :enter { :align {:value "center"}, :baseline {:value "bottom"}, :fill {:value "#333"} }, :update { :x {:scale "xscale", :signal "tooltip.category", :band 0.5}, :y {:scale "yscale", :signal "tooltip.count", :offset -2}, :text {:signal "tooltip.count"}, :fillOpacity [ {:test "isNaN(tooltip.count)", :value 0}, {:value 1} ] } } } ] })) (defn hist [l] (-> l list-to-hist-data makehist vega)) ; for making bar plots (defn list-to-barplot-data-lite [l m] """ takes a list and returns a record in the right format for vega data, with each list element the label to a field named 'x'""" (defrecord rec [category amount]) {:values (into [] (map ->rec l m))}) (defn makebarplot-lite [data] { :$schema "https://vega.github.io/schema/vega-lite/v4.json", :data data, :mark "bar", :encoding { :x {:field "element", :type "ordinal"}, :y {:field "value", :type "quantitative"} } }) (defn list-to-barplot-data [l m] """ takes a list and returns a record in the right format for vega data, with each list element the label to a field named 'x'""" (defrecord rec [category amount]) {:name "table", :values (into [] (map ->rec l m))}) (defn makebarplot [data] (let [n (count (data :values)) h 200 pad 5 w (if (< n 20) (* n 35) (- 700 (* 2 pad)))] { :$schema "https://vega.github.io/schema/vega/v5.json", :width w, :height h, :padding pad, :data data, :signals [ {:name "tooltip", :value {}, :on [{:events "rect:mouseover", :update "datum"}, {:events "rect:mouseout", :update "{}"}]} ], :scales [ {:name "xscale", :type "band", :domain {:data "table", :field "category"}, :range "width", :padding 0.05, :round true}, {:name "yscale", :domain {:data "table", :field "amount"}, :nice true, :range "height"} ], :axes [ { :orient "bottom", :scale "xscale" }, { :orient "left", :scale "yscale" } ], :marks [ {:type "rect", :from {:data "table"}, :encode { :enter { :x {:scale "xscale", :field "category"}, :width {:scale "xscale", :band 1}, :y {:scale "yscale", :field "amount"}, :y2 {:scale "yscale", :value 0} }, :update {:fill {:value "steelblue"}}, :hover {:fill {:value "green"}} } }, {:type "text", :encode { :enter { :align {:value "center"}, :baseline {:value "bottom"}, :fill {:value "#333"} }, :update { :x {:scale "xscale", :signal "tooltip.category", :band 0.5}, :y {:scale "yscale", :signal "tooltip.amount", :offset -2}, :text {:signal "tooltip.amount"}, :fillOpacity [ {:test "isNaN(tooltip.amount)", :value 0}, {:value 1} ] } } } ] })) (defn barplot [l m] (vega (makebarplot (list-to-barplot-data l m)))) ; now, for tree making ;(thanks to Taylor Wood's answer in this thread on stackoverflow: ; https://stackoverflow.com/questions/57911965) (defn count-up-to-right [loc] (if (z/up loc) (loop [x loc, pops 0] (if (z/right x) pops (recur (z/up x) (inc pops)))) 0)) (defn list-to-tree-spec [l] """ takes a list and walks through it (with clojure.zip library) and builds the record format for the spec needed to for vega""" (loop [loc (z/seq-zip l), next-id 0, parent-ids [], acc []] (cond (z/end? loc) acc (z/end? (z/next loc)) (conj acc {:id (str next-id) :name (str (z/node loc)) :parent (when (seq parent-ids) (str (peek parent-ids)))}) (and (z/node loc) (not (z/branch? loc))) (recur (z/next loc) (inc next-id) (cond (not (z/right loc)) (let [n (count-up-to-right loc) popn (apply comp (repeat n pop))] (some-> parent-ids not-empty popn)) (not (z/left loc)) (conj parent-ids next-id) :else parent-ids) (conj acc {:id (str next-id) :name (str (z/node loc)) :parent (when (seq parent-ids) (str (peek parent-ids)))})) :else (recur (z/next loc) next-id parent-ids acc)))) (defn maketree [w h tree-spec] """ makes vega spec for a tree given tree-spec in the right json-like format """ {:$schema "https://vega.github.io/schema/vega/v5.json" :data [{:name "tree" :transform [{:key "id" :parentKey "parent" :type "stratify"} {:as ["x" "y" "depth" "children"] :method {:signal "layout"} :size [{:signal "width"} {:signal "height"}] :type "tree"}] :values tree-spec } {:name "links" :source "tree" :transform [{:type "treelinks"} {:orient "horizontal" :shape {:signal "links"} :type "linkpath"}]}] :height h :marks [{:encode {:update {:path {:field "path"} :stroke {:value "#ccc"}}} :from {:data "links"} :type "path"} {:encode {:enter {:size {:value 50} :stroke {:value "#fff"}} :update {:fill {:field "depth" :scale "color"} :x {:field "x"} :y {:field "y"}}} :from {:data "tree"} :type "symbol"} {:encode {:enter {:baseline {:value "bottom"} :font {:value "Courier"} :fontSize {:value 14} :angle {:value 0} :text {:field "name"}} :update {:align {:signal "datum.children ? 'center' : 'center'"} :dy {:signal "datum.children ? -6 : -6"} :opacity {:signal "labels ? 1 : 0"} :x {:field "x"} :y {:field "y"}}} :from {:data "tree"} :type "text"}] :padding 5 :scales [{:domain {:data "tree" :field "depth"} :name "color" :range {:scheme "magma"} :type "linear" :zero true}] :signals [{:bind {:input "checkbox"} :name "labels" :value true} {:bind {:input "radio" :options ["tidy" "cluster"]} :name "layout" :value "tidy"} {:name "links" :value "line"}] :width w} ) (defn tree-depth "get the depth of a tree (list)" [list] (if (seq? list) (inc (apply max 0 (map tree-depth list))) 0)) (defn tree "plot tree using vega" [list] (let [spec (list-to-tree-spec list) h (* 30 (tree-depth list))] (vega (maketree 700 h spec))))

← →

In the last chapter, we saw how lists can be built using the list constructor or by quoting. But there is a more useful and fundamental way to think about the structure of lists which we will make extensive use of in this course.

We can represent a list as a sequence of applications of the cons procedure, starting from the empty list: (cons 1 (cons 2 (cons 3 '()))

(list 1 2 3)

(cons 1 (cons 2 (cons 3 '())))

So (list 1 2 3) can be built by applying cons to the value 1 and another list (2 3); this latter list can be built by applying cons to the value 2 and another list (3), and so on, with the last element in the sequence being the empty or null list ().

We can use first and rest to navigate around this list.

(first (list 1 2 3))

(rest (list 1 2 3))

The structure of lists is important because it goes hand in hand with the most important code design pattern used in the $\lambda$-calculus and functional programming: recursion. We can process a list by defining a function which does something to the first element of the list, and calls itself recursively on the rest of the list. The base case of such a recursive list processing procedure is the empty list ().

Recursion is best explained with an example. One extremely important and basic list-processing function that exists in all functional languages is known as map. map is a higher-order function which takes two arguments: (map f l). Its first argument is a one-place (single argument) procedure f and its second second is a list l. The function map returns the list that results from applying f to each element of l.

(map (fn [x] (+ x 1) ) '(1 2 3 4 5 6))

In the example above, we called map with two arguments. First, we passed in an anonymous function (fn [x] (+ x 1)) which simply adds one to whatever it is passed as an argument. Second, we passed in the list (1 2 3 4 5 6). map applied the anonymous function to each element of the list and returned the resulting list (2 3 4 5 6 7).

map is a standard function in all functional programming languages because it is a common use case for iteration. But it is easy to define it ourselves using recursion.

(defn my-map [f l]
  (if (empty? l)
    '()
    (cons (f (first l))
          (my-map f (rest l)))))

(my-map (fn [x] (+ 1 x)) '(1 2 3 4 5 6))

Let’s go through this function definition. First, the function takes two arguments: a function f and a list l. The body of the procedure is a conditional. This conditional first checks if the list l is the empty list using the built in procedure empty?. If the list passed to my-map is empty, then the result must also be empty, so we set the value of the conditional to just the empty list. On the other hand, if there are elements in the list l, we need to do more work.

The alternative to the conditional first gets the first elements of l with first, i.e., (first l) and it then applies the function f to this element. Second, it calls itself on the remainder of the list which it gets using rest. Finally, it takes the result of these two operations and creates a pair from these using cons.

Let’s look at another example of recursion.

(defn sum [l]
  (if (empty? l)
    0
    (+ (first l) 
       (sum (rest l)))))

(sum '(1 2 3 3 4))

This function takes a list of numbers, and returns the sum of those numbers. The recursion in this definition says something very simple (but important) about summing a list: the sum of a list is equal to the first number in the list plus the sum of the rest of the list.

In both of these examples, we saw the same pattern for recursing on the structure of a list.

(defn rec-fun [l]
  (if (empty? l)
    base-case
    (accumulate (do-something (first l))
                (rec-fun (rest l))))) ; <- recursive call

This is a very general pattern. The conditional has two parts, when there is nothing in the input list, it returns some base case. For instance, in the map example this is () and in the sum example it is 0. Otherwise, the function peels off the first element of the list, does something to it, recurses on the rest of the list, and then accumulates the two results.

We can use this observation to write a higher order function that generates a such recursive functions, given the base case, accumulator, and an operation to apply to each element of the list before recursing.

(defn make-rec-fn [base-case accumulate operation]
  (fn rec-fun [l]
    (if (empty? l)
      base-case
      (accumulate (operation (first l))
                  (rec-fun (rest l))))))

Recursion and trees

How are recursively defined functions executed? One helpful way to visualize recursion is using a tree.

(sum '(1 2 3))

trace

When first working through recursive functions it is often helpful to explicitly walk through the tree structure of the computation. In linguistics, we usually make extensive use of such trees which we usually call derivation trees or parse trees. Although they are not usually presented in terms of computation traces, they are often best thought of as such.

Two ways to think about recursion

The discussion above illustrates two different but complementary ways to think about recursion.

The procedural view considers how a recursive function is computed step-by-step and is best understood by thinking of the tree of recursive calls, like the one we drew above.
The declarative view considers how the recursive function is defined “by contract”—we break the definition into cases and define the correct behavior for each case. In the base case we define the output behavior directly in terms of the input. In the inductive case, we make the assumption that the function (e.g.,sum or map), is defined correctly on smaller/simpler inputs (i.e., (sum (rest l)) or (map f (rest l))) and define the relationship between those smaller/simpler inputs and the full input (i.e., (sum l) or (map f l)).

The declarative view can be unintuitive at first, since it relies on a sort of leap-of-faith-style reasoning process. The inductive cases require that we call our function as if we believe that it was already correctly defined. However, this kind of inductive reasoning is a powerful tool in programming (and mathematics more generally); so, it is worth understanding deeply.

← 2 Models of Computation 4 Well-Formed Sentences and Grammaticality →

3 Recursion

Recursion and trees

Two ways to think about recursion