(ns foundations.computational.linguistics
(:require [reagent.core :as r]
[reagent.dom :as rd]
[clojure.zip :as z]
[clojure.pprint :refer [pprint]]
[clojure.string :refer [index-of]]
;[clojure.string :as str]
))
(enable-console-print!)
(defn log [a-thing]
(.log js/console a-thing))
(defn render-vega [spec elem]
(when spec
(let [spec (clj->js spec)
opts {:renderer "canvas"
:mode "vega"
:actions {
:export true,
:source true,
:compiled true,
:editor true}}]
(-> (js/vegaEmbed elem spec (clj->js opts))
(.then (fn [res]
(. js/vegaTooltip (vega (.-view res) spec))))
(.catch (fn [err]
(log err)))))))
(defn vega
"Reagent component that renders vega"
[spec]
(r/create-class
{:display-name "vega"
:component-did-mount (fn [this]
(render-vega spec (rd/dom-node this)))
:component-will-update (fn [this [_ new-spec]]
(render-vega new-spec (rd/dom-node this)))
:reagent-render (fn [spec]
[:div#vis])}))
;making a histogram from a list of observations
(defn list-to-hist-data-lite [l]
""" takes a list and returns a record
in the right format for vega data,
with each list element the label to a field named 'x'"""
(defrecord rec [category])
{:values (into [] (map ->rec l))})
(defn makehist-lite [data]
{
:$schema "https://vega.github.io/schema/vega-lite/v4.json",
:data data,
:mark "bar",
:encoding {
:x {:field "category",
:type "ordinal"},
:y {:aggregate "count",
:type "quantitative"}
}
})
(defn list-to-hist-data [l]
""" takes a list and returns a record
in the right format for vega data,
with each list element the label to a field named 'x'"""
(defrecord rec [category])
[{:name "raw",
:values (into [] (map ->rec l))}
{:name "aggregated"
:source "raw"
:transform
[{:as ["count"]
:type "aggregate"
:groupby ["category"]}]}
{:name "agg-sorted"
:source "aggregated"
:transform
[{:type "collect"
:sort {:field "category"}}]}
])
(defn makehist [data]
(let [n (count (distinct ((data 0) :values)))
h 200
pad 5
w (if (< n 20) (* n 35) (- 700 (* 2 pad)))]
{
:$schema "https://vega.github.io/schema/vega/v5.json",
:width w,
:height h,
:padding pad,
:data data,
:signals [
{:name "tooltip",
:value {},
:on [{:events "rect:mouseover", :update "datum"},
{:events "rect:mouseout", :update "{}"}]}
],
:scales [
{:name "xscale",
:type "band",
:domain {:data "agg-sorted", :field "category"},
:range "width",
:padding 0.05,
:round true},
{:name "yscale",
:domain {:data "agg-sorted", :field "count"},
:nice true,
:range "height"}
],
:axes [
{ :orient "bottom", :scale "xscale" },
{ :orient "left", :scale "yscale" }
],
:marks [
{:type "rect",
:from {:data "agg-sorted"},
:encode {
:enter {
:x {:scale "xscale", :field "category"},
:width {:scale "xscale", :band 1},
:y {:scale "yscale", :field "count"},
:y2 {:scale "yscale", :value 0}
},
:update {:fill {:value "steelblue"}},
:hover {:fill {:value "green"}}
}
},
{:type "text",
:encode {
:enter {
:align {:value "center"},
:baseline {:value "bottom"},
:fill {:value "#333"}
},
:update {
:x {:scale "xscale", :signal "tooltip.category", :band 0.5},
:y {:scale "yscale", :signal "tooltip.count", :offset -2},
:text {:signal "tooltip.count"},
:fillOpacity [
{:test "isNaN(tooltip.count)", :value 0},
{:value 1}
]
}
}
}
]
}))
(defn hist [l]
(-> l
list-to-hist-data
makehist
vega))
; for making bar plots
(defn list-to-barplot-data-lite [l m]
""" takes a list and returns a record
in the right format for vega data,
with each list element the label to a field named 'x'"""
(defrecord rec [category amount])
{:values (into [] (map ->rec l m))})
(defn makebarplot-lite [data]
{
:$schema "https://vega.github.io/schema/vega-lite/v4.json",
:data data,
:mark "bar",
:encoding {
:x {:field "element", :type "ordinal"},
:y {:field "value", :type "quantitative"}
}
})
(defn list-to-barplot-data [l m]
""" takes a list and returns a record
in the right format for vega data,
with each list element the label to a field named 'x'"""
(defrecord rec [category amount])
{:name "table",
:values (into [] (map ->rec l m))})
(defn makebarplot [data]
(let [n (count (data :values))
h 200
pad 5
w (if (< n 20) (* n 35) (- 700 (* 2 pad)))]
{
:$schema "https://vega.github.io/schema/vega/v5.json",
:width w,
:height h,
:padding pad,
:data data,
:signals [
{:name "tooltip",
:value {},
:on [{:events "rect:mouseover", :update "datum"},
{:events "rect:mouseout", :update "{}"}]}
],
:scales [
{:name "xscale",
:type "band",
:domain {:data "table", :field "category"},
:range "width",
:padding 0.05,
:round true},
{:name "yscale",
:domain {:data "table", :field "amount"},
:nice true,
:range "height"}
],
:axes [
{ :orient "bottom", :scale "xscale" },
{ :orient "left", :scale "yscale" }
],
:marks [
{:type "rect",
:from {:data "table"},
:encode {
:enter {
:x {:scale "xscale", :field "category"},
:width {:scale "xscale", :band 1},
:y {:scale "yscale", :field "amount"},
:y2 {:scale "yscale", :value 0}
},
:update {:fill {:value "steelblue"}},
:hover {:fill {:value "green"}}
}
},
{:type "text",
:encode {
:enter {
:align {:value "center"},
:baseline {:value "bottom"},
:fill {:value "#333"}
},
:update {
:x {:scale "xscale", :signal "tooltip.category", :band 0.5},
:y {:scale "yscale", :signal "tooltip.amount", :offset -2},
:text {:signal "tooltip.amount"},
:fillOpacity [
{:test "isNaN(tooltip.amount)", :value 0},
{:value 1}
]
}
}
}
]
}))
(defn barplot [l m]
(vega (makebarplot (list-to-barplot-data l m))))
; now, for tree making
;(thanks to Taylor Wood's answer in this thread on stackoverflow:
; https://stackoverflow.com/questions/57911965)
(defn count-up-to-right [loc]
(if (z/up loc)
(loop [x loc, pops 0]
(if (z/right x)
pops
(recur (z/up x) (inc pops))))
0))
(defn list-to-tree-spec [l]
""" takes a list and walks through it (with clojure.zip library)
and builds the record format for the spec needed to for vega"""
(loop [loc (z/seq-zip l), next-id 0, parent-ids [], acc []]
(cond
(z/end? loc) acc
(z/end? (z/next loc))
(conj acc
{:id (str next-id)
:name (str (z/node loc))
:parent (when (seq parent-ids)
(str (peek parent-ids)))})
(and (z/node loc) (not (z/branch? loc)))
(recur
(z/next loc)
(inc next-id)
(cond
(not (z/right loc))
(let [n (count-up-to-right loc)
popn (apply comp (repeat n pop))]
(some-> parent-ids not-empty popn))
(not (z/left loc))
(conj parent-ids next-id)
:else parent-ids)
(conj acc
{:id (str next-id)
:name (str (z/node loc))
:parent (when (seq parent-ids)
(str (peek parent-ids)))}))
:else
(recur (z/next loc) next-id parent-ids acc))))
(defn maketree [w h tree-spec]
""" makes vega spec for a tree given tree-spec in the right json-like format """
{:$schema "https://vega.github.io/schema/vega/v5.json"
:data [{:name "tree"
:transform [{:key "id" :parentKey "parent" :type "stratify"}
{:as ["x" "y" "depth" "children"]
:method {:signal "layout"}
:size [{:signal "width"} {:signal "height"}]
:type "tree"}]
:values tree-spec
}
{:name "links"
:source "tree"
:transform [{:type "treelinks"}
{:orient "horizontal"
:shape {:signal "links"}
:type "linkpath"}]}]
:height h
:marks [{:encode {:update {:path {:field "path"} :stroke {:value "#ccc"}}}
:from {:data "links"}
:type "path"}
{:encode {:enter {:size {:value 50} :stroke {:value "#fff"}}
:update {:fill {:field "depth" :scale "color"}
:x {:field "x"}
:y {:field "y"}}}
:from {:data "tree"}
:type "symbol"}
{:encode {:enter {:baseline {:value "bottom"}
:font {:value "Courier"}
:fontSize {:value 14}
:angle {:value 0}
:text {:field "name"}}
:update {:align {:signal "datum.children ? 'center' : 'center'"}
:dy {:signal "datum.children ? -6 : -6"}
:opacity {:signal "labels ? 1 : 0"}
:x {:field "x"}
:y {:field "y"}}}
:from {:data "tree"}
:type "text"}]
:padding 5
:scales [{:domain {:data "tree" :field "depth"}
:name "color"
:range {:scheme "magma"}
:type "linear"
:zero true}]
:signals [{:bind {:input "checkbox"} :name "labels" :value true}
{:bind {:input "radio" :options ["tidy" "cluster"]}
:name "layout"
:value "tidy"}
{:name "links"
:value "line"}]
:width w}
)
(defn tree-depth
"get the depth of a tree (list)"
[list]
(if (seq? list)
(inc (apply max 0 (map tree-depth list)))
0))
(defn tree
"plot tree using vega"
[list]
(let [spec (list-to-tree-spec list)
h (* 30 (tree-depth list))]
(vega (maketree 700 h spec))))
(def categories '(N V Adj Adv P stop))
(def vocabulary '(Call me Ishmael))
(defn logsumexp [& log-vals]
(let [mx (apply max log-vals)]
(+ mx
(Math/log2
(apply +
(map (fn [z] (Math/pow 2 z))
(map (fn [x] (- x mx))
log-vals)))))))
(defn flip [p]
(if (< (rand 1) p)
true
false))
(defn sample-categorical [outcomes params]
(if (flip (first params))
(first outcomes)
(sample-categorical (rest outcomes)
(normalize (rest params)))))
(defn score-categorical [outcome outcomes params]
(if (empty? params)
(throw "no matching outcome")
(if (= outcome (first outcomes))
(Math/log2 (first params))
(score-categorical outcome (rest outcomes) (rest params)))))
(defn normalize [params]
(let [sum (apply + params)]
(map (fn [x] (/ x sum)) params)))
(defn sample-gamma [shape scale]
(apply + (repeatedly
shape (fn []
(- (Math/log2 (rand))))
)))
(defn sample-dirichlet [pseudos]
(let [gammas (map (fn [sh]
(sample-gamma sh 1))
pseudos)]
(normalize gammas)))
(defn update-context [order old-context new-symbol]
(if (>= (count old-context) order)
(throw "Context too long!")
(if (= (count old-context) (- order 1))
(concat (rest old-context) (list new-symbol))
(concat old-context (list new-symbol)))))
(defn hmm-unfold [transition observation order context current stop?]
(if (stop? current)
(list current)
(let [new-context (update-context
order
context
current)
nxt (transition new-context)]
(cons [current (observation current)]
(hmm-unfold
transition
observation
n-gram-order
new-context
nxt
stop?)))))
(defn all-but-last [l]
(cond (empty? l) (throw "bad thing")
(empty? (rest l)) '()
:else (cons (first l) (all-but-last (rest l)))))
In the preceding chapters, we considered an approximation to the HMM parameter
estimation problem using the expectation maximization algorithm. Now
we turn to the Bayesian approach to parameter learning.
In Bayesian terms, our problem is to estimate the posterior
distribution over parameters, given the data as well as the
hyperparameters of the model.
\[\begin{align}
\Pr(\boldsymbol{\theta}_T, \boldsymbol{\theta}_O \mid \mathbf{C}, \boldsymbol{\alpha}_T, \boldsymbol{\alpha}_O) &=&
\frac{
\Pr(\boldsymbol{\theta}_O \mid
\boldsymbol{\alpha}_O)\Pr(\boldsymbol{\theta}_T \mid
\boldsymbol{\alpha}_T) \prod_{\vec{w} \in \mathbf{C} } \sum_{\vec{c}} \prod_{i=1}^{|\vec{w}|}
\Pr(c^{(i)} | c^{(i-1)}, \vec{\theta}_{T,c^{(i-1)}}) \Pr(w^{(i)} | c^{(i)}, \vec{\theta}_{O,c^{(i)}})
}
{\int_{\boldsymbol{\theta}_T}
\int_{\boldsymbol{\theta}_O}
\Pr(\boldsymbol{\theta}_O \mid
\boldsymbol{\alpha}_O)\Pr(\boldsymbol{\theta}_T \mid
\boldsymbol{\alpha}_T) \prod_{\vec{w} \in \mathbf{C} } \sum_{\vec{c}} \prod_{i=1}^{|\vec{w}|}
\Pr(c^{(i)} | c^{(i-1)}, \vec{\theta}_{T,c^{(i-1)}}) \Pr(w^{(i)} | c^{(i)}, \vec{\theta}_{O,c^{(i)}})
\,d\vec{\theta}_O\,d\vec{\theta}_T
} \\
\end{align}\]
As has become familiar at this stage, we have the problem that summing
over all possible derivations for each observed sentence is
intractable. We now also have the additional problem that we do not
know how to solve the integrals over the parameters which appear
outside of these sums in closed form. How can we approximate this
complex posterior distribution?
One useful observation is to recall the two possible representations of
probability distributions we have been using throughout the coruse:
samplers and scorers. While implementing an exact scorer for this
distribution is non-trivial, we can implement an approximate
sampler.
What does the strange phrase approximate sampler mean? If we can
sample directly from some distribution this is called exact
sampling, however, often we can’t take exact i.i.d. samples from our
distribution of interest. Instead, we take samples that are either
from a different distribution or not i.i.d. and use these in some way
to approximate the true distribution.
The first such algorithm we will examine is called Gibbs sampling
the idea of Gibbs sampling is to sample a subset of the latent random
variables in a model conditioned on fixed values for the others.
In the case of HMMs, this leads to an algorithm which is similar to a
sampling version of the Baum-Welch algorithm. We alternate between
sampling from the conditional posterior distribution of the
parameters given some fixed set of derivations,
\[\begin{align}
\Pr(\boldsymbol{\theta}_T, \boldsymbol{\theta}_O \mid \mathbf{C}, \mathbf{D}, \boldsymbol{\alpha}_T, \boldsymbol{\alpha}_O) &=&
\frac{
\Pr(\boldsymbol{\theta}_O \mid
\boldsymbol{\alpha}_O)\Pr(\boldsymbol{\theta}_T \mid
\boldsymbol{\alpha}_T) \prod_{\vec{w} \in \mathbf{C} } \prod_{i=1}^{|\vec{w}|}
\Pr(c^{(i)} | c^{(i-1)}, \vec{\theta}_{T,c^{(i-1)}}) \Pr(w^{(i)} | c^{(i)}, \vec{\theta}_{O,c^{(i)}})
}
{\int_{\boldsymbol{\theta}_T}
\int_{\boldsymbol{\theta}_O}
\Pr(\boldsymbol{\theta}_O \mid
\boldsymbol{\alpha}_O)\Pr(\boldsymbol{\theta}_T \mid
\boldsymbol{\alpha}_T) \prod_{\vec{w} \in \mathbf{C} } \prod_{i=1}^{|\vec{w}|}
\Pr(c^{(i)} | c^{(i-1)}, \vec{\theta}_{T,c^{(i-1)}}) \Pr(w^{(i)} | c^{(i)}, \vec{\theta}_{O,c^{(i)}})
\,d\vec{\theta}_O\,d\vec{\theta}_T
} \\
\end{align}\]
and sampling from the conditional posterior over derivations given
some fixed set of parameter values:
\[\begin{align}
\Pr(\mathbf{D} \mid \boldsymbol{\theta}_T, \boldsymbol{\theta}_O, \mathbf{C}, \boldsymbol{\alpha}_T, \boldsymbol{\alpha}_O) &=&
\frac{
\Pr(\boldsymbol{\theta}_O \mid
\boldsymbol{\alpha}_O)\Pr(\boldsymbol{\theta}_T \mid
\boldsymbol{\alpha}_T) \prod_{\vec{w} \in \mathbf{C} } \prod_{i=1}^{|\vec{w}|}
\Pr(c^{(i)} | c^{(i-1)}, \vec{\theta}_{T,c^{(i-1)}}) \Pr(w^{(i)} | c^{(i)}, \vec{\theta}_{O,c^{(i)}})
}
{
\Pr(\boldsymbol{\theta}_O \mid
\boldsymbol{\alpha}_O)\Pr(\boldsymbol{\theta}_T \mid
\boldsymbol{\alpha}_T) \prod_{\vec{w} \in \mathbf{C} } \sum_{\vec{c}} \prod_{i=1}^{|\vec{w}|}
\Pr(c^{(i)} | c^{(i-1)}, \vec{\theta}_{T,c^{(i-1)}}) \Pr(w^{(i)} | c^{(i)}, \vec{\theta}_{O,c^{(i)}})
} \\
\end{align}\]
The latter probability distribution we have just studied in depth and
know how to work with. The former, of course, has a closed form
solution as we saw in
Inference using Conditionalization.
← 37 Expectation Maximization
39 Markov Chain Monte Carlo →