はしくれエンジニアもどきのメモ

情報系技術・哲学・デザインなどの勉強メモ・備忘録です。

多次元正規分布の周辺分布と条件付分布を計算する

多次元正規分布の周辺分布と条件付分布を計算する

多次元正規分布の周辺分布と条件付分布もまた正規分布になる. そのときのパラメータ(平均,分散)を導出する.

D次元正規分布に従う確率変数ベクトルを$\vec{x}$, 平均ベクトルを$\vec{\mu}$, 共分散行列を  \mathbf{\Sigma} とおいて 確率密度関数を以下のようにおく.


\begin{eqnarray}
f(\vec{x} | \vec{\mu},\Sigma)
=\frac{1}{(2\pi)^{\frac{D}{2}} |\Sigma|^{\frac{1}{2}}}
\exp{\left(-\frac{1}{2}(\vec{x}-\vec{\mu})^{T}\Sigma^{-1}(\vec{x}-\vec{\mu})\right)}
\end{eqnarray}

参考:

周辺分布の導出

2次元の場合

参考:http://www.ae.keio.ac.jp/lab/soc/takeuchi/lectures/1_Norm.pdf

以下の2次元正規分布を考える.


\begin{eqnarray}
\vec{x} =
\begin{pmatrix}
x_{1}\\
x_{2}
\end{pmatrix},
\vec{\mu} =
\begin{pmatrix}
\mu_{1}\\
\mu_{2}
\end{pmatrix},
\vec{\Sigma}_{2\times 2} =
\begin{pmatrix}
\sigma_{1}^{2} & \sigma_{12} \\
\sigma_{12} & \sigma_{2}^{2}
\end{pmatrix}
\end{eqnarray}

\begin{eqnarray}
f(\vec{x} | \vec{\mu},\Sigma)
=\frac{1}{(2\pi)^{\frac{2}{2}} |\Sigma|^{\frac{1}{2}}}
\exp{\left(-\frac{1}{2}(\vec{x}-\vec{\mu})^{T}\Sigma^{-1}(\vec{x}-\vec{\mu})\right)}
\end{eqnarray}

この同時分布の式変形を考える.

式変形には,共分散行列の対角化を考える. 以下の行列を定義する.


\begin{eqnarray}
\mathbf{E} =
\begin{pmatrix}
1 & 0 \\
-\sigma_{12}(\sigma_{1}^{2})^{-1} & 1
\end{pmatrix},
\mathbf{E}^{-1} =
\begin{pmatrix}
1 & 0 \\
\sigma_{12} (\sigma_{1}^{2})^{-1} & 1
\end{pmatrix} \\
\mathbf{E}^{T} =
\begin{pmatrix}
1 & -\sigma_{12}(\sigma_{1}^{2})^{-1} \\
0 & 1
\end{pmatrix},
(\mathbf{E}^{-1})^{T} =
\begin{pmatrix}
1 & \sigma_{12} (\sigma_{1}^{2})^{-1}  \\
0 & 1
\end{pmatrix}
\end{eqnarray}

 E^{T}(E^{T})^{-1} = E^{-1}E = \mathbf{I} これを両側からかけて共分散行列の対角化をしていく.


\begin{eqnarray}
E^{T} (E^{T})^{-1}\mathbf{\Sigma}^{-1}E^{-1}E
&=& E^{T} ( ( E^{T})^{-1}\mathbf{\Sigma}^{-1}E^{-1}) E \\\\
&=& E^{T} ( ( \mathbf{\Sigma}E^{T} )^{-1} E^{-1} ) E \\\\
&=& E^{T} ( E\mathbf{\Sigma}E^{T} )^{-1} E \\\\
&=& E^{T} \left(
\begin{pmatrix}
1 & 0 \\\\
- \sigma_{12}(\sigma_{1}^{2})^{-1} & 1
\end{pmatrix}
\begin{pmatrix}
\sigma_{1}^{2} & \sigma_{12} \\\\
\sigma_{12} & \sigma_{2}^{2}
\end{pmatrix}
\begin{pmatrix}
1 & - \sigma_{12}(\sigma_{1}^{2})^{-1} \\\\
0 & 1
\end{pmatrix}
\right)^{-1} E \\\\
&=&
E^{T} \left(
\begin{pmatrix}
\sigma_{1}^{2} & \sigma_{12} \\\\
0 & \sigma_{2}^{2} - \sigma_{12}^{2}(\sigma_{1}^{2})^{-1}
\end{pmatrix}
\begin{pmatrix}
1 & - \sigma_{12}(\sigma_{1}^{2})^{-1} \\\\
0 & 1
\end{pmatrix}
\right)^{-1} E \\\\
&=&
E^{T} \left(
\begin{pmatrix}
\sigma_{1}^{2} \& 0 \\\\
0 \& \sigma_{2}^{2} - \sigma_{12}^{2}(\sigma_{1}^{2})^{-1}
\end{pmatrix}
\right)^{-1} E \\\\
&=&
E^{T} \left(
\begin{pmatrix}
(\sigma_{1}^{2})^{-1} \& 0 \\\\
0 \& (\sigma_{2}^{2} - \sigma_{12}^{2}(\sigma_{1}^{2})^{-1})^{-1}
\end{pmatrix}
\right) E
\end{eqnarray}

平均との差のベクトルが$E$で変換されると


\begin{eqnarray}
E(\vec{x} - \vec{\mu}) =
\begin{pmatrix}
1 & 0 \\\\
-\sigma_{12}(\sigma_{1}^{2})^{-1} & 1
\end{pmatrix}
\begin{pmatrix}
x_{1} - \mu_{1}  \\\\
x_{2} - \mu_{2}
\end{pmatrix}
=
\begin{pmatrix}
x_{1} - \mu_{1}  \\\\
x_{2} - (\mu_{2} + \sigma_{12}(\sigma_{1}^{2})^{-1}(x_{1} - \mu_{1}))
\end{pmatrix}
\end{eqnarray}

(ちなみに, \sigma_{12}(\sigma_{1}^{2})^{-1} は,$x_{2}$を$x_{1}$で表した回帰直線の回帰係数より,$x_{2}$から$x_{1}$のもつ回帰成分を取り除いている.)

よって,expの中身は,


\begin{eqnarray}
(\vec{x} - \vec{\mu})^{T}E^{T}(E^{T})^{-1}\mathbf{\Sigma}^{-1}E^{-1}E(\vec{x} - \vec{\mu})
&=& (\vec{x} - \vec{\mu})^{T} E^{T}
\begin{pmatrix}
(\sigma_{1}^{2})^{-1} & 0 \\
0 & (\sigma_{2}^{2} - \sigma_{12}^{2}(\sigma_{1}^{2})^{-1})^{-1}
\end{pmatrix}
E (\vec{x} - \vec{\mu})\\
&=& \begin{pmatrix}
x_{1} - \mu_{1} & x_{2} - (\mu_{2} + \sigma_{12}(\sigma_{1}^{2})^{-1}(x_{1} - \mu_{1}))
\end{pmatrix}
\begin{pmatrix}
(\sigma_{1}^{2})^{-1} & 0 \\
0 & (\sigma_{2}^{2} - \sigma_{12}^{2}(\sigma_{1}^{2})^{-1})^{-1}
\end{pmatrix}
\begin{pmatrix}
x_{1} - \mu_{1}  \\
x_{2} - (\mu_{2} + \sigma_{12}(\sigma_{1}^{2})^{-1}(x_{1} - \mu_{1}))
\end{pmatrix}\\
&=&
\frac{(x_{1} - \mu_{1})^{2}}{\sigma_{1}^{2}}
+ \frac{\left\{ x_{2} - (\mu_{2} + \sigma_{12}(\sigma_{1}^{2})^{-1}(x_{1} - \mu_{1})) \right\}^{2} }{\sigma_{2}^{2} - \sigma_{12}^{2}(\sigma_{1}^{2})^{-1}}
\end{eqnarray}

expを2つに分けることができる.

ここで,同時分布の係数に出てくる行列式を変形すると


\begin{eqnarray}
|\mathbf{\Sigma}|
&=& \sigma_{1}^{2} \sigma_{2}^{2} - \sigma_{12}^{2} \\
&=& \sigma_{1}^{2} \cdot ( \sigma_{2}^{2} - \sigma_{12}^{2}(\sigma_{1}^{2})^{-1} )
\end{eqnarray}

これで2つの正規分布に分けることができる.

よって,


\begin{eqnarray}
\text{Norm}(\vec{x} | \vec{\mu}, \mathbf{\Sigma}_{2\times2})
&=& \frac{1}{(2\pi)^{\frac{2}{2}} |\Sigma|^{\frac{1}{2}}}
\exp{\left(-\frac{1}{2}(\vec{x}-\vec{\mu})^{T}\Sigma^{-1}(\vec{x}-\vec{\mu})\right)} \\\\
&=& \frac{1}{(2\pi)^{\frac{1}{2}} (\sigma_{1}^{2})^{\frac{1}{2}}} \exp{\left(-\frac{1}{2}\frac{(x_{1}-\mu_{1})^{2}}{\sigma_{1}^{2}} \right)}
\cdot
\frac{1}{(2\pi)^{\frac{1}{2}} (\sigma_{2}^{2} - \sigma_{12}^{2}(\sigma_{1}^{2})^{-1} )^{\frac{1}{2}}} \exp{\left(-\frac{1}{2}\frac{\left\{ x_{2} - (\mu_{2} + \sigma_{12}(\sigma_{1}^{2})^{-1}(x_{1} - \mu_{1})) \right\}^{2}}{\sigma_{2}^{2} - \sigma_{12}^{2}(\sigma_{1}^{2})^{-1} } \right)} \\\\
&=& \text{Norm}(x_{1} | \mu_{1}, \sigma_{1}^{2})
\cdot \text{Norm}(x_{2} | \mu_{2} + \sigma_{12}(\sigma_{1}^{2})^{-1}(x_{1} - \mu_{1}), \sigma_{2}^{2} - \sigma_{12}^{2}(\sigma_{1}^{2})^{-1} )
\end{eqnarray}

$x_{2}$で周辺化すると


\begin{eqnarray}
\int_{-\infty}^{\infty} \text{Norm}(\vec{x} | \vec{\mu}, \mathbf{\Sigma}) dx_{2}
&=& \text{Norm}(x_{1} | \mu_{1}, \sigma_{1}^{2})
\cdot \int_{-\infty}^{\infty} \text{Norm}(x_{2} | \mu_{2} + \sigma_{12}(\sigma_{1}^{2})^{-1}(x_{1} - \mu_{1}), \sigma_{2}^{2} - \sigma_{12}^{2}(\sigma_{1}^{2})^{-1} ) dx_{2} \\\\
&=& \text{Norm}(x_{1} | \mu_{1}, \sigma_{1}^{2})
\cdot 1 \\\\
&=& \text{Norm}(x_{1} | \mu_{1}, \sigma_{1}^{2})
\end{eqnarray}

$x{1}$で周辺化する場合も同様に,積の形に変形することができ, 多次元正規分布を$N(x{2}|\mu{2}, \sigma{2}^{2})$が残るように分解する.


\begin{eqnarray}
\text{Norm}(\vec{x} | \vec{\mu}, \mathbf{\Sigma}_{2\times2})
&=& \text{Norm}(x_{2} | \mu_{2}, \sigma_{2}^{2})
\cdot \text{Norm}(x_{1} | \mu_{1} + \sigma_{12}(\sigma_{2}^{2})^{-1}(x_{2} - \mu_{2}), \sigma_{1}^{2} - \sigma_{12}^{2}(\sigma_{2}^{2})^{-1} )
\end{eqnarray}

$x_{1}$で周辺化する.


\begin{eqnarray}
\int_{-\infty}^{\infty} \text{Norm}(\vec{x} | \vec{\mu}, \mathbf{\Sigma}_{2\times2}) dx_{1}
&=& \text{Norm}(x_{2} | \mu_{2}, \sigma_{2}^{2})
\cdot \int_{-\infty}^{\infty} \text{Norm}(x_{1} | \mu_{1} + \sigma_{12}(\sigma_{2}^{2})^{-1}(x_{2} - \mu_{2}), \sigma_{1}^{2} - \sigma_{12}^{2}(\sigma_{2}^{2})^{-1} ) dx_{1} \\\\
&=& \text{Norm}(x_{2} | \mu_{2}, \sigma_{2}^{2}) \\\\
\end{eqnarray}

多次元の場合

多次元の場合も同様に変形できる.

D次元の$変数ベクトル\vec{x}$をp個とq個の2つに分ける. $p+q = D$

この時の多次元正規分布を考える.


\vec{x}_{D\times 1} =
\begin{pmatrix}
x_{1}\\
\vdots \\
x_{D}
\end{pmatrix}
=
\begin{pmatrix}
\vec{x}_{p}\\
\vec{x}_{q}
\end{pmatrix},
\vec{\mu}_{D\times 1} =
\begin{pmatrix}
\mu_{1}\\
\vdots \\
\mu_{D}
\end{pmatrix}
=
\begin{pmatrix}
\vec{\mu}_{p}\\
\vec{\mu}_{q}
\end{pmatrix}

共分散行列は以下のブロック行列とおく.


\begin{eqnarray}
\mathbf{\Sigma}_{D\times D}
&=&
\begin{pmatrix}
\mathbf{\Sigma}_{11, p\times p} & \mathbf{\Sigma}_{12, p\times q} \\\\
\mathbf{\Sigma}_{21, q\times p} & \mathbf{\Sigma}_{22, q\times q}
\end{pmatrix}
\end{eqnarray}

対称行列より, \mathbf{\Sigma}_{12}^{T} = \mathbf{\Sigma}_{21}

密度関数を以下とする.


\begin{eqnarray}
\text{Norm}(\vec{x} | \vec{\mu},\mathbf{\Sigma} )
= \frac{1}{(2\pi)^{\frac{D}{2}} |\mathbf{\Sigma}|^{\frac{1}{2}}}
\exp{\left(-\frac{1}{2}(\vec{x}-\vec{\mu})^{T}\mathbf{\Sigma}^{-1}(\vec{x}-\vec{\mu})\right)}
\end{eqnarray}

2次元の時同様に,共分散行列の対角化を考える.

対角化のために以下の行列を考える.


\begin{eqnarray}
\mathbf{E} =
\begin{pmatrix}
I_{p \times p} & \mathbf{O}_{p\times q} \\\\
-\mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1} & \mathbf{I}_{q\times q} \\\\
\end{pmatrix},
\mathbf{E}^{-1} =
\begin{pmatrix}
I_{p \times p} & \mathbf{O}_{p\times q} \\\\
\mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1} & \mathbf{I}_{q\times q}
\end{pmatrix} \\\\
\mathbf{E}^{T} =
\begin{pmatrix}
I_{p \times p} &  -\mathbf{\Sigma}_{11}^{-1}\mathbf{\Sigma}_{21} \\\\
\mathbf{O}_{q\times p} & \mathbf{I}_{q\times q} \\\\
\end{pmatrix},
(\mathbf{E}^{-1})^{T} =
\begin{pmatrix}
I_{p \times p} &  \mathbf{\Sigma}_{11}^{-1} \mathbf{\Sigma}_{12} \\\\
\mathbf{O}_{q\times p} & \mathbf{I}_{q\times q}
\end{pmatrix}
\end{eqnarray}

共分散行列の両側から$E^{T}(E^{T})^{-1} = E^{-1}E = \mathbf{I}$かけて対角化していく.

これを両側からかけて共分散行列の対角化をしていく.


\begin{eqnarray}
E^{T}(E^{T})^{-1}\mathbf{\Sigma}^{-1}E^{-1}E
&=& E^{T} ( ( E^{T})^{-1}\mathbf{\Sigma}^{-1}E^{-1}) E \\\\
&=& E^{T} ( ( \mathbf{\Sigma}E^{T} )^{-1} E^{-1} ) E \\\\
&=& E^{T} ( E\mathbf{\Sigma}E^{T} )^{-1} E \\\\
&=& E^{T} \left(
\begin{pmatrix}
\mathbf{I}_{p} \& O \\\\
-\mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1} \& \mathbf{I}_{q}
\end{pmatrix}
\begin{pmatrix}
\mathbf{\Sigma}_{11} \& \mathbf{\Sigma}_{12} \\\\
\mathbf{\Sigma}_{21} \& \mathbf{\Sigma}_{22}
\end{pmatrix}
\begin{pmatrix}
\mathbf{I}_{p} \& - \mathbf{\Sigma}_{11}^{-1}\mathbf{\Sigma}_{12} \\\\
O \& \mathbf{I}_{q}
\end{pmatrix}
\right)^{-1} E \\\\
&=& E^{T} \left(
\begin{pmatrix}
\mathbf{\Sigma}_{11} \& \mathbf{\Sigma}_{12} \\\\
O \& \mathbf{\Sigma}_{22} - \mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1}\mathbf{\Sigma}_{12}
\end{pmatrix}
\begin{pmatrix}
\mathbf{I}_{p} \& -\mathbf{\Sigma}_{12}\mathbf{\Sigma}_{11}^{-1} \\\\
O \& \mathbf{I}_{q}
\end{pmatrix}
\right)^{-1} E \\\\
&=& E^{T}
\begin{pmatrix}
\mathbf{\Sigma}_{11} \& O \\\\
O \& \mathbf{\Sigma}_{22} - \mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1}\mathbf{\Sigma}_{12}
\end{pmatrix}^{-1} E \\\\
&=& E^{T}
\begin{pmatrix}
\mathbf{\Sigma}_{11}^{-1} \& O \\\\
O \& (\mathbf{\Sigma}_{22} - \mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1}\mathbf{\Sigma}_{12})^{-1}
\end{pmatrix} E \\\\
\end{eqnarray}

平均との差ベクトルのEでの行列変換を考える.


\begin{eqnarray}
E(\vec{x} - \vec{\mu}) = \begin{pmatrix}
I_{p \times p} & \mathbf{O}_{p\times q} \\\\
- \mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1} & \mathbf{I}_{q\times q} \\\\
\end{pmatrix}
\begin{pmatrix}
\vec{x}_{p} - \vec{\mu}_{p}  \\\\
\vec{x}_{q} - \vec{\mu}_{q}
\end{pmatrix}
=
\begin{pmatrix}
\vec{x}_{p} - \vec{\mu}_{p}  \\\\
\vec{x}_{q} - (\vec{\mu}_{q} + \mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1}(\vec{x}_{p} - \vec{\mu}_{p}))
\end{pmatrix}
\end{eqnarray}

よって,expの中身は,


\begin{eqnarray}
(\vec{x} - \vec{\mu})^{T}E^{T}(E^{T})^{-1}\mathbf{\Sigma}^{-1}E^{-1}E(\vec{x} - \vec{\mu})
&=& (\vec{x} - \vec{\mu})^{T} E^{T}
\begin{pmatrix}
\mathbf{\Sigma}_{11}^{-1} & O \\\\
O & (\mathbf{\Sigma}_{22} - \mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1}\mathbf{\Sigma}_{12})^{-1}
\end{pmatrix}
E (\vec{x} - \vec{\mu}) \\\\
&=& \begin{pmatrix}
\vec{x}_{p} - \vec{\mu}_{p} & \vec{x}_{q} - (\vec{\mu}_{q} + \mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1}(\vec{x}_{p} - \vec{\mu}_{p}))
\end{pmatrix}
\begin{pmatrix}
\mathbf{\Sigma}_{11}^{-1} & O \\\\
O & (\mathbf{\Sigma}_{22} - \mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1}\mathbf{\Sigma}_{12})^{-1}
\end{pmatrix}
\begin{pmatrix}
\vec{x}_{p} - \vec{\mu}_{p}  \\\\
\vec{x}_{q} - (\vec{\mu}_{q} + \mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1}(\vec{x}_{p} - \vec{\mu}_{p})) \\\\
\end{pmatrix} \\\\
&=&
(\vec{x}_{p} - \vec{\mu}_{p})^{T}\mathbf{\Sigma}_{11}^{-1}(\vec{x}_{p} - \vec{\mu}_{p})
+ \left( \vec{x}_{q} - (\vec{\mu}_{q} + \mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1}(\vec{x}_{p} - \vec{\mu}_{p}))\right)^{T}(\mathbf{\Sigma}_{22} - \mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1}\mathbf{\Sigma}_{12})^{-1} \left( \vec{x}_{q} - (\vec{\mu}_{q} + \mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1}(\vec{x}_{p} - \vec{\mu}_{p})) \right)
\end{eqnarray}

expを2つに分けることができる

係数の行列式をみる. ブロック行列の行列式より,


\begin{eqnarray}
|\mathbf{\Sigma}|
&=& \mathbf{\Sigma}_{11} (\mathbf{\Sigma}_{22} - \mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1}\mathbf{\Sigma}_{12}) \\\\
\end{eqnarray}

これで2つの正規分布に分けることができる.

よって,


\begin{eqnarray}
\text{Norm}(\vec{x} | \vec{\mu}, \mathbf{\Sigma})
&=& \frac{1}{(2\pi)^{\frac{D}{2}} |\Sigma|^{\frac{1}{2}}}
\exp{\left(-\frac{1}{2}(\vec{x}-\vec{\mu})^{T}\Sigma^{-1}(\vec{x}-\vec{\mu})\right)} \\\\
&=& \frac{1}{(2\pi)^{\frac{p}{2}} |\mathbf{\Sigma}_{11}|^{\frac{1}{2}}} \exp{\left(-\frac{1}{2}(\vec{x}_{p}-\vec{\mu}_{p})^{T}\mathbf{\Sigma}_{11}^{-1}(\vec{x}_{p}-\vec{\mu}_{p}) \right) }
\cdot
\frac{1}{(2\pi)^{\frac{q}{2}} (\sigma_{2}^{2} - \sigma_{12}^{2}(\sigma_{1}^{2})^{-1} )^{\frac{1}{2}}} \exp{\left(-\frac{1}{2} \left( \vec{x}_{q} - (\vec{\mu}_{q} + \mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1}(\vec{x}_{p} - \vec{\mu}_{p}))\right)^{T}(\mathbf{\Sigma}_{22} - \mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1}\mathbf{\Sigma}_{12})^{-1} \left( \vec{x}_{q} - (\vec{\mu}_{q} + \mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1}(\vec{x}_{p} - \vec{\mu}_{p})) \right) \right)} \\\\
&=& \text{Norm}(\vec{x}_{p} | \vec{\mu}_{p}, \mathbf{\Sigma}_{11})
\cdot \text{Norm}(\vec{x}_{q} | \vec{\mu}_{q} + \mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1}(\vec{x}_{p} - \vec{\mu}_{p}), \mathbf{\Sigma}_{22} - \mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1}\mathbf{\Sigma}_{12} ) \\\\
\end{eqnarray}

$\vec{x}_{q}$で周辺化すると


\begin{eqnarray}
\int_{ - \infty}^{\infty} \text{Norm}(\vec{x}| \vec{\mu}, \mathbf{\Sigma}) d \vec{x}_{q}
&=& \text{Norm}(\vec{x}_{p} | \vec{\mu}_{p}, \mathbf{\Sigma}_{11})
\cdot \int_{ - \infty}^{\infty} \text{Norm}(\vec{x}_{q} | \vec{\mu}_{q} + \mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1}(\vec{x}_{p} - \vec{\mu}_{p}), \mathbf{\Sigma}_{22} - \mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1}\mathbf{\Sigma}_{12} ) d\vec{x_{q}} \\\\
&=& \text{Norm}(\vec{x}_{p} | \vec{\mu}_{p}, \mathbf{\Sigma}_{11})
\cdot 1 \\\\
&=& \text{Norm}(\vec{x}_{p} | \vec{\mu}_{p}, \mathbf{\Sigma}_{11})
\end{eqnarray}

同様にして,多次元正規分布 N(x_{2}|\mu_{2}, \sigma_{2}^{2}) が残るように分解する.


\begin{eqnarray}
\text{Norm}(\vec{x} | \vec{\mu}, \mathbf{\Sigma})
&=& \text{Norm}(\vec{x}_{q} | \vec{\mu}_{q}, \mathbf{\Sigma}_{22})
\cdot \text{Norm}(\vec{x}_{p} | \vec{\mu}_{p} + \mathbf{\Sigma}_{12}\mathbf{\Sigma}_{22}^{-1}(\vec{x}_{q} - \vec{\mu}_{q}), \mathbf{\Sigma}_{11} - \mathbf{\Sigma}_{12}\mathbf{\Sigma}_{22}^{-1}\mathbf{\Sigma}_{21} )
\end{eqnarray}

$\vec{x}_{p}$で周辺化する.


\begin{eqnarray}
\int_{-\infty}^{\infty} \text{Norm}(\vec{x} | \vec{\mu}, \mathbf{\Sigma}) d\vec{x}_{p}
&=& \text{Norm}(\vec{x}_{q} | \vec{\mu}_{q}, \mathbf{\Sigma}_{22})
\cdot \int_{-\infty}^{\infty} \text{Norm}(\vec{x}_{p} | \vec{\mu}_{p} + \mathbf{\Sigma}_{12}\mathbf{\Sigma}_{22}^{-1}(\vec{x}_{q} - \vec{\mu}_{q}), \mathbf{\Sigma}_{11} - \mathbf{\Sigma}_{12}\mathbf{\Sigma}_{22}^{-1}\mathbf{\Sigma}_{21} ) d\vec{x}_{p} \\\\
&=& \text{Norm}(\vec{x}_{q} | \vec{\mu}_{q}, \mathbf{\Sigma}_{22})
\end{eqnarray}

条件付分布の導出

周辺化する際,同時分布を積の形に変形した. 同時分布は,条件付分布とその条件になっている分布との積で以下の式で表される.

$$ Pr( x_{1}, x_{2} ) = Pr(x_{2} | x_{1}) \cdot Pr(x_{1}) $$

つまり,周辺化の際に出てきたもう1つの分布が条件付分布になっている.

2次元の場合

周辺化の式変形より,同時分布は以下の積に変形できる.


\begin{eqnarray}
\text{Norm}(x_{1}, x_{2} | \vec{\mu}, \mathbf{\Sigma}_{2\times2})
&=& \text{Norm}(x_{1} | \mu_{1}, \sigma_{1}^{2})
\cdot \text{Norm}(x_{2} | \mu_{2} + \sigma_{12}(\sigma_{1}^{2})^{-1}(x_{1} - \mu_{1}), \sigma_{2}^{2} - \sigma_{12}^{2}(\sigma_{1}^{2})^{-1} )
\end{eqnarray}

確率の乗法定理より,条件付確率は同時確率をつかって書ける. 固定値$x_{1o}$として


\begin{eqnarray}
Pr(x_{2} | x_{1}=x_{1o}) = \frac{Pr(x_{1}, x_{2})}{Pr(x_{1})}
&=& \frac{Pr(x_{1}, x_{2})}{\int Pr(x_{1}, x_{2})dx_{2}} \\\\
&=& \frac{\text{Norm}(x_{1}, x_{2} | \vec{\mu}, \mathbf{\Sigma}_{2\times2})}{\int \text{Norm}(x_{1}, x_{2} | \vec{\mu}, \mathbf{\Sigma}_{2\times2}) dx_{2}} \\\\
&=& \frac{\text{Norm}(x_{1} | \mu_{1}, \sigma_{1}^{2})
\cdot \text{Norm}(x_{2} | \mu_{2} + \sigma_{12}(\sigma_{1}^{2})^{-1}(x_{1} - \mu_{1}), \sigma_{2}^{2} - \sigma_{12}^{2}(\sigma_{1}^{2})^{-1} )}{\text{Norm}(x_{1} | \mu_{1}, \sigma_{1}^{2})} \\\\
&=& \text{Norm}(x_{2} | \mu_{2} + \sigma_{12}(\sigma_{1}^{2})^{-1}(x_{1o} - \mu_{1}), \sigma_{2}^{2} - \sigma_{12}^{2}(\sigma_{1}^{2})^{-1} ) \\\\
\end{eqnarray}

条件を逆にしても同様に求めることができる. $x_{2o}$を固定値として


\begin{eqnarray}
Pr(x_{1} | x_{2}=x_{2o})
&=& \frac{Pr(x_{1}, x_{2})}{Pr(x_{1})} \\\\
&=& \frac{\text{Norm}(x_{2} | \mu_{2}, \sigma_{2}^{2})
\cdot \text{Norm}(x_{1} | \mu_{1} + \sigma_{12}(\sigma_{2}^{2})^{-1}(x_{2} - \mu_{2}), \sigma_{1}^{2} - \sigma_{12}^{2}(\sigma_{2}^{2})^{-1} )}{\text{Norm}(x_{2} | \mu_{2}, \sigma_{2}^{2})} \\\\
&=& \text{Norm}(x_{1} | \mu_{1} + \sigma_{12}(\sigma_{2}^{2})^{-1}(x_{2o} - \mu_{2}), \sigma_{1}^{2} - \sigma_{12}^{2}(\sigma_{2}^{2})^{-1} ) \\\\
\end{eqnarray}

多次元の場合

同様に,多次元正規分布も積の形に変形できる


\begin{eqnarray}
\text{Norm}(\vec{x} | \vec{\mu}, \mathbf{\Sigma})
&=& \text{Norm}(\vec{x}_{p} | \vec{\mu}_{p}, \mathbf{\Sigma}_{11})
\cdot \text{Norm}(\vec{x}_{q} | \vec{\mu}_{q} + \mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1}(\vec{x}_{p} - \vec{\mu}_{p}), \mathbf{\Sigma}_{22} - \mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1}\mathbf{\Sigma}_{12} )
\end{eqnarray}

\begin{eqnarray}
Pr(\vec{x}_{q} | \vec{x}_{p}= \vec{x}_{po})
= \frac{Pr(\vec{x}_{p}, \vec{x}_{q})}{Pr(\vec{x}_{p})}
&=& \frac{Pr(\vec{x}_{p}, \vec{x}_{q})}{\int Pr(\vec{x}_{p}, \vec{x}_{q})d \vec{x}_{q} } \\\\
&=& \frac{\text{Norm}(\vec{x}_{p}, \vec{x}_{q} | \vec{\mu}, \mathbf{\Sigma})}{\int \text{Norm}(\vec{x}_{p}, \vec{x}_{q} | \vec{\mu}, \mathbf{\Sigma}) d \vec{x}_{q}} \\\\
&=& \frac{\text{Norm}(\vec{x}_{p} | \vec{\mu}_{p}, \mathbf{\Sigma}_{11})
\cdot \text{Norm}(\vec{x}_{q} | \vec{\mu}_{q} + \mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1}(\vec{x}_{p} - \vec{\mu}_{p}), \mathbf{\Sigma}_{22} - \mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1}\mathbf{\Sigma}_{12} )}{\text{Norm}(\vec{x}_{p} | \vec{\mu}_{p}, \mathbf{\Sigma}_{11})} \\\\
&=& \text{Norm}(\vec{x}_{q} | \vec{\mu}_{q} + \mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1}(\vec{x}_{po} - \vec{\mu}_{p}), \mathbf{\Sigma}_{22} - \mathbf{\Sigma}_{21}\mathbf{\Sigma}_{11}^{-1}\mathbf{\Sigma}_{12} ) \\\\
\end{eqnarray}

条件を逆にしても同様に求めることができる.


\begin{eqnarray}
Pr(\vec{x}_{p} | \vec{x}_{q}=\vec{x}_{qo})
&=& \frac{Pr(\vec{x}_{p}, \vec{x}_{q})}{Pr(\vec{x}_{q})} \\\\
&=& \frac{\text{Norm}(\vec{x}_{q} | \vec{\mu}_{q}, \mathbf{\Sigma}_{22})
\cdot \text{Norm}(\vec{x}_{p} | \vec{\mu}_{p} + \mathbf{\Sigma}_{12}\mathbf{\Sigma}_{22}^{-1}(\vec{x}_{q} - \vec{\mu}_{q}), \mathbf{\Sigma}_{11} - \mathbf{\Sigma}_{12}\mathbf{\Sigma}_{22}^{-1}\mathbf{\Sigma}_{21} ) }{\text{Norm}(\vec{x}_{q} | \vec{\mu}_{q}, \mathbf{\Sigma}_{22})} \\\\
&=& \text{Norm}(\vec{x}_{p} | \vec{\mu}_{p} + \mathbf{\Sigma}_{12}\mathbf{\Sigma}_{22}^{-1}(\vec{x}_{qo} - \vec{\mu}_{q}), \mathbf{\Sigma}_{11} - \mathbf{\Sigma}_{12}\mathbf{\Sigma}_{22}^{-1}\mathbf{\Sigma}_{21} ) \\\\
\end{eqnarray}