index.html

<!DOCTYPE html>
<!--[if lt IE 7]><html class="no-js lt-ie9 lt-ie8 lt-ie7" lang="en"> <![endif]-->
<!--[if (IE 7)&!(IEMobile)]><html class="no-js lt-ie9 lt-ie8" lang="en"><![endif]-->
<!--[if (IE 8)&!(IEMobile)]><html class="no-js lt-ie9" lang="en"><![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en"><!--<![endif]-->
<head>
<meta charset="utf-8">
<title>Runner</title>
<meta name="description" content="">

    
<meta name="twitter:title" content="Runner">
<meta name="twitter:description" content="Time and pressure ...">


        <meta name="twitter:card" content="summary_large_image">
        <meta name="twitter:image" content="/images/geiranger-1508630_1920.jpg">
    

<meta property="og:type" content="article">
<meta property="og:title" content="Runner">
<meta property="og:description" content="Time and pressure ...">
<meta property="og:url" content="/">
<meta property="og:site_name" content="Runner">


    <meta property="og:image" content="/images/geiranger-1508630_1920.jpg" />


  <meta property="og:updated_time" content="2019-11-04T00:00:00&#43;00:00"/>


<link href="//cdn.bootcss.com/highlight.js/9.12.0/styles/monokai-sublime.min.css" rel="stylesheet">


<link rel="canonical" href="/">
<link href="/index.xml" rel="alternate" type="application/rss+xml" title="Runner" />
  <link href="/index.xml" rel="feed" type="application/rss+xml" title="Runner" />

<meta name="HandheldFriendly" content="True">
<meta name="MobileOptimized" content="320">
<meta name="viewport" content="width=device-width, initial-scale=1.0">


<link rel="stylesheet" href="/css/main.css">
<link rel="stylesheet" href="/css/note.css">

<meta http-equiv="cleartype" content="on">

<meta name="generator" content="Hugo 0.58.3" />

<script src="/js/vendor/modernizr-2.6.2.custom.min.js"></script>


<link rel="shortcut icon" href="/favicon.png">


<link rel="stylesheet"
      href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.15.10/styles/monokai.min.css">
<script src="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.15.10/highlight.min.js"></script>
<script>hljs.initHighlightingOnLoad();</script>

<script type="text/javascript">
    WebFontConfig = {"google":{"families":["Lora:r:latin,latin-ext","Lora:r,i,b,bi:latin,latin-ext"]}};
    (function() {
      var wf = document.createElement('script');
      wf.src = 'https://s0.wp.com/wp-content/plugins/custom-fonts/js/webfont.js';
      wf.type = 'text/javascript';
      wf.async = 'true';
      var s = document.getElementsByTagName('script')[0];
      s.parentNode.insertBefore(wf, s);
      })();
</script>

</head>

<body id="post-index" class="feature">
<nav id="dl-menu" class="dl-menuwrapper" role="navigation" style="display:inline-block">
	<button class="dl-trigger">Open Menu</button>
	<ul class="dl-menu">
		<li><a href="/">Home</a></li>
		<li>
			<a href="#">About</a>
			<ul class="dl-submenu">
			
				<li>
					
					<img src="/images/shengxue.jpg" alt="Sheng Xue's photo" class="author-photo">
					
					<h4>Sheng Xue</h4>
					<p>Working as C&#43;&#43;/C# developer, while actively learning node.js/AngularJs/Data Science/Machine Learning</p>
				</li>
				<li><a href="/about/"><span class="btn btn-inverse">Learn More</span></a></li>
				
				
				<li>
					<a href="https://github.com/shengxue/shengxue.github.io"><i class="fa fa-fw fa-github"></i> GitHub</a>
				</li>
				
				
			</ul>
		</li>
		<li>
			<a href="#">Posts</a>
			<ul class="dl-submenu">
				<li><a href="/posts/">All Posts</a></li>
				<li><a href="/tags/">All Tags</a></li>
			</ul>
		</li>
		
	    <li><a href="https://www.linkedin.com/in/sheng-xue-24550b28/" target="_blank">Linkedin</a></li>
	  
	</ul>
</nav>

<div class="entry-header">
  

	<div class="image-credit">Image credit: <a href="http://www.dargadgetz.com/ios-7-abstract-wallpaper-pack-for-iphone-5-and-ipod-touch-retina/">dargadgetz</a></div>
    <div class="entry-image">
      <img src="/images/geiranger-1508630_1920.jpg" alt="">
    </div>
  
  <div class="header-title">
    <div class="header-title-wrap">
		<h1><a href="/" title="Go to the homepage">Runner</a></h1>
	  <h2>
            Time and pressure ...
          </h2>
    </div>
  </div>
</div>

<div id="main" role="main">

<article class="hentry">
  <header>
    
    <div class="entry-meta">
         
		<span class="entry-date date published updated"><time datetime="2019-11-04 00:00:00 &#43;0000 UTC"><a href="/posts/2019-11-04-matrix-cookbook-46/">Nov 4, 2019</a></time></span>
        
      <span class="entry-reading-time">
        <i class="fa fa-clock-o"></i>
        Reading time ~3 minutes
      </span>
    </div>
    
	<h1 class="entry-title"><a href="/posts/2019-11-04-matrix-cookbook-46/" rel="bookmark" title="Jacobi&#39;s formula" itemprop="url">Jacobi&#39;s formula</a></h1>
    
  </header>
  <div class="entry-content">
    

<p class="boxed">
<span class="math display">\[
\tag{46}
\frac{\partial  \det \left( \mathbf{Y} \right)}{\partial x}=\,\,\det \left( \mathbf{Y} \right) Tr\left[ \mathbf{Y}^{-1}\frac{\partial \mathbf{Y}}{\partial x} \right] 
\]</span>
</p>
<p>Formula <span class="math inline">\((46)\)</span> is actually Jacobi’s formula. <a href="#fn1" class="footnote-ref" id="fnref1"><sup>1</sup></a></p>
<p><strong>Analogy in functions</strong></p>
<p>For a differentiable function <span class="math inline">\(f: D\subseteq R\rightarrow R\)</span>, for all <span class="math inline">\(x\)</span> in some neighborhood of <span class="math inline">\(a\)</span>, <span class="math inline">\(f\)</span> can be written as: <a href="#fn2" class="footnote-ref" id="fnref2"><sup>2</sup></a>
<span class="math display">\[f(x)=f(a)+f^{\prime}(a) (x−a)+R(x−a) \]</span>
and, <span class="math inline">\(L(x)=f(a)+f^{\prime}(a)(x−a)\)</span> is the best affine approximation of the function <span class="math inline">\(f\)</span> at <span class="math inline">\(a\)</span>.</p>
<p>or, the idea could be expressed in other way:
<span class="math display">\[f(x+\epsilon)=f(x)+f^{\prime}(x) \epsilon +R\epsilon \]</span></p>
<p>It comes from <strong>Taylor aproximation</strong> at <span class="math inline">\(x+\epsilon\)</span>:
<span class="math display">\[f(x+\epsilon)=f(x)+f^{\prime}(x) \epsilon +f^{\prime\prime}(x) \epsilon^2 /2 + \cdots  \]</span></p>
<p><strong>Lemma1</strong> <a href="#fn3" class="footnote-ref" id="fnref3"><sup>3</sup></a></p>
<p class="boxed">
<span class="math display">\[
\det \left( \mathbf{I}+\epsilon \mathbf{A} \right) =\,\,1+\epsilon Tr\left( \mathbf{A} \right) +O\left( \epsilon^2 \right) 
\]</span>
</p>
<p>Let <span class="math inline">\(A_1,A_2, \cdot,A_N\)</span> be the column vectors of the matrix <span class="math inline">\(A\)</span>. Let <span class="math inline">\(e_1,e_2, \cdot,e_N\)</span> be the standard basis; note that these basis vectors form the columns of the identity matrix <span class="math inline">\(I\)</span>. Then we recall that the determinant is an alternating multi-linear map on the column space.</p>
<p><span class="math display">\[det(I+ϵA)=det(e_1+ϵA_1,e_2+ϵA_2,…,e_N+ϵA_N) \\
=det(e_1,e_2,…,e_N)+\epsilon \left\{ det(A_1,e_2,…,e_N)+det(e_1,A_2,…,e_N) +\cdots \\
 +det(e_1,e_2,…,A_N) \right\} + O(\epsilon^2)\]</span></p>
<p>The first term is just the determinant of the identity matrix which is 1. The term proportional to ϵ is a sum of expressions like <span class="math inline">\(det(e_1,e_2,…,A_j,…,e_N)\)</span> where the j’th column of the identity matrix is replaced with the j’th column of A. Expanding the determinant along the j’th row we see that <span class="math inline">\(det(e_1,e_2,…,A_j,…,e_N)=A_{jj}\)</span>.</p>
<p><span class="math display">\[det(I+ϵA)=1+ϵ\sum_{j=1}^N A_{jj}+O(ϵ^2)=1+ϵTr(A)+O(ϵ^2)\]</span></p>
<p>Particularly when <span class="math inline">\(n=2\)</span>,
<span class="math display">\[\begin{align}
\det \left( I+\epsilon A \right) &amp;=\det \left( \begin{matrix}{}
    1+\varepsilon a_{11}&amp;       \varepsilon a_{12}\\
    \varepsilon a_{21}&amp;     1+\varepsilon a_{22}\\
\end{matrix} \right) \,\, \\
&amp;=\,\,1+\varepsilon \left( a_{11}+a_{22} \right) +\varepsilon ^2\left( a_{11}a_{22}-a_{12}a_{21} \right) \,\,\\
&amp;=\,\,1+\varepsilon Tr\left( A \right) +\varepsilon ^2\det \left( A \right) 
\end{align}\]</span></p>
<p><strong>Lemma 2</strong>. <a href="#fn4" class="footnote-ref" id="fnref4"><sup>4</sup></a></p>
<p class="boxed">
<span class="math display">\[ det^{\prime}(I)=\mathrm {Tr} \]</span>
</p>
<p>where <span class="math inline">\(det^{\prime}(I)=Tr\)</span> is the differential of <span class="math inline">\({\displaystyle \det }\)</span></p>
<p>This equation means that the differential of <span class="math inline">\({\displaystyle \det }\)</span>, evaluated at the identity matrix, is equal to the trace. The differential <span class="math inline">\({\displaystyle \det &#39;(I)}\)</span> is a linear operator that maps an n × n matrix to a real number.</p>
<p>Using the definition of a directional derivative together with one of its basic properties for differentiable functions, we have
<span class="math display">\[\begin{equation}
 \operatorname{det}^{\prime}(I)(T)=\nabla_{T} \operatorname{det}(I)=\lim _{\varepsilon \rightarrow 0} \frac{\operatorname{det}(I+\varepsilon T)-\operatorname{det} I}{\varepsilon} \\
 = lim_{\varepsilon \rightarrow 0} \frac{1+ϵTr(T)+O(ϵ^2)-1}{\varepsilon} \\
 = Tr(T)
\end{equation}\]</span></p>
<p><em>Alternative proof of lemma 2</em>: <a href="#fn5" class="footnote-ref" id="fnref5"><sup>5</sup></a></p>
<p><span class="math inline">\(det\)</span> is a function <span class="math inline">\(M_{n×n}→R\)</span> where <span class="math inline">\(M_{n×n}\)</span> is the space of <span class="math inline">\(n×n\)</span> square matrices. Therefore, a matrix is the equivalent of a point for real functions. The best linear approximation to <span class="math inline">\(det\)</span> near the identity is given by:
<span class="math display">\[det(\mathbf{I}+\mathbf{M})=det(\mathbf{I})+d(det(\mathbf{I}))M+R(\mathbf{I},\mathbf{M})\]</span>
<span class="math display">\[\underset{||\mathbf{M} ||\rightarrow 0}{lim}\frac{R(\mathbf{I},\mathbf{M})}{||\mathbf{M} ||}=0\]</span></p>
<p><span class="math inline">\(det^{\prime}(I)=\mathrm {Tr}\)</span> is equivalent to the following:</p>
<p class="boxed">
<span class="math display">\[\begin{equation}
 \left.\frac{\mathrm{d}}{\mathrm{d} t}[\operatorname{det}(\mathbf{I}+t \mathbf{B})]\right|_{t=0}=\operatorname{Tr}(\mathbf{B}) 
\end{equation}\]</span>
</p>
<p><strong>Lemma 3</strong> <a href="#fn6" class="footnote-ref" id="fnref6"><sup>6</sup></a></p>
For an invertible matrix <span class="math inline">\(A\)</span>, we have:
<p class="boxed">
<span class="math display">\[\begin{equation}
 \operatorname{det}^{\prime}(A)(T)=\operatorname{det} A \operatorname{tr}\left(A^{-1} T\right) 
\end{equation}\]</span>
</p>
<p>proof: <a href="#fn7" class="footnote-ref" id="fnref7"><sup>7</sup></a></p>
<p>Remember that, if <span class="math inline">\(f:E→F\)</span> is a differentiable map, a way to compute <span class="math inline">\(df(a)(v)\)</span> is to find a curve <span class="math inline">\(γ:R→E\)</span> with <span class="math inline">\(γ(0)=a\)</span> and <span class="math inline">\(γ′(0)=v\)</span>, and then <span class="math inline">\(df(a)(v)=\frac{d}{dt}|_0f(γ(t))\)</span> (this is the chain rule). Here, find a curve <span class="math inline">\(γ:R→M_n(R)\)</span> with <span class="math inline">\(γ(0)=A\)</span> and <span class="math inline">\(γ′(0)=T\)</span>. Then note that</p>
<p><span class="math display">\[\begin{equation}
 \begin{aligned} d \operatorname{det}(A)(T)=\left.\frac{d}{d t}\right|_{0}(\operatorname{det}(\gamma(t))) &amp;=\left.\frac{d}{d t}\right|_{0}\left(\operatorname{det}\left(A A^{-1} \gamma(t)\right)\right) \\ =\left.\operatorname{det}(A) \frac{d}{d t}\right|_{0}\left(\operatorname{det}\left(A^{-1} \gamma(t)\right)\right)  &amp;=\operatorname{det}(A) d \operatorname{det}(\operatorname{I})\left(A^{-1} T\right) \end{aligned} 
\end{equation}\]</span>
(since <span class="math inline">\(t↦A^{−1}γ(t)\)</span> is a curve which is <span class="math inline">\(I\)</span> in 0 and which the derivative is <span class="math inline">\(A^{−1}T\)</span> in 0).</p>
<div class="footnotes">
<hr />
<ol>
<li id="fn1"><p><a href="https://en.wikipedia.org/wiki/Jacobi%27s_formula">Jacobi’s formula</a><a href="#fnref1" class="footnote-back">↩</a></p></li>
<li id="fn2"><p><a href="https://math.stackexchange.com/questions/1784262/how-is-the-derivative-truly-literally-the-best-linear-approximation-near-a-po">Best affine approximation</a><a href="#fnref2" class="footnote-back">↩</a></p></li>
<li id="fn3"><p><a href="https://math.stackexchange.com/questions/1174639/series-expansion-of-the-determinant-for-a-matrix-near-the-identity">Determinant for a matrix near the identity</a><a href="#fnref3" class="footnote-back">↩</a></p></li>
<li id="fn4"><p><a href="https://en.wikipedia.org/wiki/Jacobi%27s_formula">Jacobi’s formula</a><a href="#fnref4" class="footnote-back">↩</a></p></li>
<li id="fn5"><p><a href="https://math.stackexchange.com/questions/2393932/lemma-for-jacobis-formula-proof">Lemma for jocobi’s formula proof</a><a href="#fnref5" class="footnote-back">↩</a></p></li>
<li id="fn6"><p><a href="https://en.wikipedia.org/wiki/Jacobi%27s_formula">Jacobi’s formula</a><a href="#fnref6" class="footnote-back">↩</a></p></li>
<li id="fn7"><p><a href="https://math.stackexchange.com/questions/3427241/how-to-understand-the-proof-of-lemma2-of-jacobis-formula-at-wikipedia/3427259#3427259">How to understand the proof of lemma2 of Jacobi’s formula at wikipedia</a><a href="#fnref7" class="footnote-back">↩</a></p></li>
</ol>
</div>

  </div>
</article>

<article class="hentry">
  <header>
    
    <div class="entry-meta">
         
		<span class="entry-date date published updated"><time datetime="2019-10-30 00:00:00 &#43;0000 UTC"><a href="/posts/2019-10-30-matrix-cookbook-43/">Oct 30, 2019</a></time></span>
        
      <span class="entry-reading-time">
        <i class="fa fa-clock-o"></i>
        Reading time ~2 minutes
      </span>
    </div>
    
	<h1 class="entry-title"><a href="/posts/2019-10-30-matrix-cookbook-43/" rel="bookmark" title="Derivative of log of determinant" itemprop="url">Derivative of log of determinant</a></h1>
    
  </header>
  <div class="entry-content">
    

<p><span class="math display">\[\begin{equation}
\tag{43}
\partial(\ln (\operatorname{det}(\mathbf{X})))=\operatorname{Tr}\left(\mathbf{X}^{-1} \partial \mathbf{X}\right)
\end{equation}\]</span></p>
<p><strong>Lemma 1</strong></p>
<p><span class="math display">\[\begin{equation}
\sum_{i} \sum_{j} \mathbf{A}^{\mathrm{T}}_{i j} \mathbf{B}_{i j} = \operatorname{Tr}\left(\mathbf{A} \mathbf{B}\right)
\end{equation}\]</span></p>
<p><strong>Lemma 2</strong> <a href="#fn1" class="footnote-ref" id="fnref1"><sup>1</sup></a></p>
<p>(Credit to <a href="https://statisticaloddsandends.wordpress.com/2018/05/24/derivative-of-log-det-x/" class="uri">https://statisticaloddsandends.wordpress.com/2018/05/24/derivative-of-log-det-x/</a>)</p>
<p><span class="math display">\[\begin{equation}
\frac{\partial(\operatorname{det} \mathbf{X})}{\partial \mathbf{X}_{i j}}=\mathbf{C}_{i j}
\end{equation}\]</span></p>
<p>For a matrix <span class="math inline">\(X\)</span>, we define some terms:</p>
<ul>
<li><p>The <span class="math inline">\((i,j)\)</span> minor of <span class="math inline">\(X\)</span>, denoted <span class="math inline">\(M_{ij}\)</span>, is the determinant of the <span class="math inline">\((n-1) \times (n-1)\)</span> matrix that remains after removing the <span class="math inline">\(i\)</span>th row and <span class="math inline">\(j\)</span>th column from <span class="math inline">\(X\)</span>.</p></li>
<li><p>The cofactor matrix of <span class="math inline">\(X\)</span>, denoted <span class="math inline">\(C\)</span>, is an <span class="math inline">\(n \times n\)</span> matrix such that <span class="math inline">\(C_{ij} = (-1)^{i+j} M_{ij}\)</span>.</p></li>
<li><p>The adjugate matrix of <span class="math inline">\(X\)</span>, denoted <span class="math inline">\(\operatorname{adj } X\)</span>, is simply the transpose of <span class="math inline">\(C\)</span>.</p></li>
</ul>
<p>These terms are useful because they related to both matrix determinants and inverses. If <span class="math inline">\(X\)</span> is invertible, then <span class="math inline">\(X^{-1}=\frac{1}{\operatorname{det} X}(\operatorname{adj} X)\)</span>, so</p>
<p><span class="math display">\[\begin{equation}
\left(\textbf{X}^{-1}\right)^T_{ij} = \frac{1}{\operatorname{det} X} C_{ij}
\end{equation}\]</span></p>
<p>On the other hand, by the cofactor expansion of the determinant, <span class="math inline">\(\det X=\,\,\underset{k=1}{\overset{n}{\varSigma}}X_{ik}C_{ik}\)</span>, so by the product rule,</p>
<p><span class="math display">\[
\frac{\partial \left( \det  X \right)}{\partial X_{ij}}=\,\,\underset{k=1}{\overset{n}{\varSigma}}\frac{\partial X_{ik}}{\partial X_{ij}}C_{ik}\,\,+\,\,\underset{k=1}{\overset{n}{\varSigma}}X_{ik}\frac{\partial C_{ik}}{\partial X_{ij}}
\]</span></p>
<p>If <span class="math inline">\(k \neq j\)</span>, then <span class="math inline">\(\dfrac{\partial X_{ik}}{\partial X_{ij}} = 0\)</span>, otherwise it is equal to 1. This means that the first term above reduces to <span class="math inline">\(C_{ij}\)</span>. For any <span class="math inline">\(k\)</span>, the elements of <span class="math inline">\(X\)</span> which affect <span class="math inline">\(C_{ik}\)</span> are those which do not lie on row <span class="math inline">\(i\)</span> or column <span class="math inline">\(k\)</span>. Hence, <span class="math inline">\(\dfrac{\partial C_{ik}}{\partial X_{ij}} = 0\)</span> for all k! So,</p>
<p><span class="math display">\[\frac{\partial \left( \det  X \right)}{\partial X_{ij}}=C_{ij}\]</span></p>
<p><strong>Proof</strong></p>
<p>Putting all this together with an application of the chain rule, we get</p>
<p><span class="math display">\[\left(\ln (\det X)\right)_{ij}&#39; = \dfrac{1}{\det X} \dfrac{\partial (\det X)}{\partial X_{ij}} = \dfrac{1}{\det X} C_{ij} = (X^{-1})^T_{ij}\]</span></p>
<p>So,</p>
<p><span class="math display">\[\begin{align}
\partial(\ln (\operatorname{det}(\mathbf{X})))&amp;=\sum_{i} \sum_{j} \left(\ln (\det X)\right)_{ij}&#39; d_{ij} \\
&amp;= \sum_{i} \sum_{j}(\mathbf{X}^{-1})^T_{ij} d_{ij} \\
&amp;= \operatorname{Tr}\left(\mathbf{X}^{-1} \partial \mathbf{X}\right)
\end{align}\]</span></p>
<p>where
<span class="math display">\[
\partial X=\left( \begin{matrix}{}
    dX_{11}&amp;        \cdots&amp;     dX_{1n}\\
    \vdots&amp;     \ddots&amp;     \vdots\\
    dX_{n1}&amp;        \cdots&amp;     dX_{nn}\\
\end{matrix} \right) 
\]</span></p>
<div class="footnotes">
<hr />
<ol>
<li id="fn1"><p><a href="https://statisticaloddsandends.wordpress.com/2018/05/24/derivative-of-log-det-x/" class="uri">https://statisticaloddsandends.wordpress.com/2018/05/24/derivative-of-log-det-x/</a><a href="#fnref1" class="footnote-back">↩</a></p></li>
</ol>
</div>

  </div>
</article>

<article class="hentry">
  <header>
    
    <div class="entry-meta">
         
		<span class="entry-date date published updated"><time datetime="2019-10-24 00:00:00 &#43;0000 UTC"><a href="/posts/2019-10-24-matrix-derivative/">Oct 24, 2019</a></time></span>
        
      <span class="entry-reading-time">
        <i class="fa fa-clock-o"></i>
        Reading time ~1 minute
      </span>
    </div>
    
	<h1 class="entry-title"><a href="/posts/2019-10-24-matrix-derivative/" rel="bookmark" title="Derivative of inverse matrix" itemprop="url">Derivative of inverse matrix</a></h1>
    
  </header>
  <div class="entry-content">
    

<p><span class="math display">\[\begin{equation}
\tag{40}
\partial\left(\mathbf{X}^{-1}\right)=-\mathbf{X}^{-1}(\partial \mathbf{X}) \mathbf{X}^{-1}
\end{equation}\]</span></p>
<blockquote>
<p><strong>Explanation</strong>: <a href="#fn1" class="footnote-ref" id="fnref1"><sup>1</sup></a></p>
</blockquote>
<p><span class="math display">\[\begin{equation}
\underbrace{(I)^{\prime}}_{=0}=\left(\mathbf{X} \mathbf{X}^{-1}\right)^{\prime}=\mathbf{X}^{\prime} \mathbf{X}^{-1}+\mathbf{X}\left(\mathbf{X}^{-1}\right)^{\prime} \Rightarrow
\end{equation}\]</span></p>
<p><span class="math display">\[\begin{equation}
\mathbf{X}\left(\mathbf{X}^{-1}\right)^{\prime}=-\mathbf{X}^{\prime} \mathbf{X}^{-1} \quad \Rightarrow
\end{equation}\]</span></p>
<p><span class="math display">\[\begin{equation}
\left(\mathbf{X}^{-1}\right)^{\prime}=-\mathbf{X}^{-1} \mathbf{X}^{\prime} \mathbf{X}^{-1}
\end{equation}\]</span></p>
<hr />
<p><span class="math display">\[\begin{equation}
\tag{41}
\partial(\operatorname{det}(\mathbf{X}))=\operatorname{Tr}(\operatorname{adj}(\mathbf{X}) \partial \mathbf{X})
\end{equation}\]</span></p>
<div id="background" class="section level2">
<h2>Background</h2>
<div id="adjugate-matrix" class="section level3">
<h3>Adjugate Matrix</h3>
<p>The adjugate of <span class="math inline">\(A\)</span> is the transpose of the cofactor matrix <span class="math inline">\(C\)</span> of <span class="math inline">\(X\)</span>,
<span class="math display">\[\begin{equation}
\operatorname{adj}(\mathbf{X})=\mathbf{C}^{\top}
\end{equation}\]</span></p>
<p>and,
<span class="math display">\[\begin{equation}
\mathbf{X}^{-1}=\operatorname{det}(\mathbf{X})^{-1} \operatorname{adj}(\mathbf{X}) \quad \Rightarrow
\end{equation}\]</span></p>
<p><span class="math display">\[\begin{equation}
\operatorname{det}(\mathbf{X}) \mathbf{I} = \operatorname{adj}(\mathbf{X}) \mathbf{X}
\end{equation}\]</span></p>
</div>
<div id="characteristic-polynomial" class="section level3">
<h3>Characteristic Polynomial</h3>
<p class="notes">
The characteristic polynomial of a square matrix is a polynomial which is invariant under matrix similarity and has the eigenvalues as roots. It has the determinant and the trace of the matrix as coefficients.
</p>
<p>The characteristic polynomial of a sqaure matrix <span class="math inline">\(A\)</span> is defined by
<span class="math display">\[\begin{equation}
p_{A}(t)=\operatorname{det}(t I-A)
\end{equation}\]</span></p>
</div>
</div>
<div id="proof-2" class="section level2">
<h2>Proof <a href="#fn2" class="footnote-ref" id="fnref2"><sup>2</sup></a></h2>
<div id="via-matrix-computation" class="section level3">
<h3>Via Matrix Computation</h3>
<p><span class="math display">\[\begin{equation}
\frac{\partial \operatorname{det}(\mathbf{X})}{\partial \mathbf{X}_{i j}}=\sum_{k} \operatorname{adj}^{\mathrm{T}}(\mathbf{X})_{i k} \delta_{j k}=\operatorname{adj}^{\mathrm{T}}(\mathbf{X})_{i j} \quad \Rightarrow
\end{equation}\]</span></p>
<p><span class="math display">\[\begin{equation}
d(\operatorname{det}(\mathbf{X}))=\sum_{i} \sum_{j} \operatorname{adj}^{\mathrm{T}}(\mathbf{X})_{i j} d \mathbf{X}_{i j} \quad \Rightarrow
\end{equation}\]</span></p>
<p><span class="math display">\[\begin{equation}
d(\operatorname{det}(\mathbf{X}))=\operatorname{tr}(\operatorname{adj}(\mathbf{X}) d \mathbf{X})
\end{equation}\]</span></p>
</div>
<div id="via-chain-rule" class="section level3">
<h3>Via Chain Rule</h3>
<p><strong>Lemma 1.</strong> <span class="math inline">\(\operatorname{det}^{\prime}(I)=\operatorname{tr}\)</span>, where <span class="math inline">\(\operatorname{det}^{\prime}\)</span> is the differential of <span class="math inline">\(\operatorname{det}\)</span>.</p>
<p><strong>Lemma 2.</strong> For an invertible matrix <span class="math inline">\(\mathbf{A}\)</span>, we have: <span class="math inline">\(\operatorname{det}^{\prime}(\mathbf{A})(\mathbf{T})=\operatorname{det} \mathbf{A} \operatorname{tr}(\mathbf{A}^{-1}\mathbf{T}))\)</span></p>
<hr />
<p><span class="math display">\[\begin{equation}
\tag{42}
\partial(\operatorname{det}(\mathbf{X}))=\operatorname{det}(\mathbf{X}) \operatorname{Tr}\left(\mathbf{X}^{-1} \partial \mathbf{X}\right)
\end{equation}\]</span></p>
</div>
</div>
<div class="footnotes">
<hr />
<ol>
<li id="fn1"><p><a href="https://math.stackexchange.com/questions/1471825/derivative-of-the-inverse-of-a-matrix" class="uri">https://math.stackexchange.com/questions/1471825/derivative-of-the-inverse-of-a-matrix</a><a href="#fnref1" class="footnote-back">↩</a></p></li>
<li id="fn2"><p><a href="https://en.wikipedia.org/wiki/Jacobi%27s_formula#Derivation" class="uri">https://en.wikipedia.org/wiki/Jacobi%27s_formula#Derivation</a><a href="#fnref2" class="footnote-back">↩</a></p></li>
</ol>
</div>

  </div>
</article>

<article class="hentry">
  <header>
    
    <div class="entry-meta">
         
		<span class="entry-date date published updated"><time datetime="2019-10-11 00:00:00 &#43;0000 UTC"><a href="/posts/2019-10-17-matrix-cookbook-1.2-determinant/">Oct 11, 2019</a></time></span>
        
      <span class="entry-reading-time">
        <i class="fa fa-clock-o"></i>
        Reading time ~1 minute
      </span>
    </div>
    
	<h1 class="entry-title"><a href="/posts/2019-10-17-matrix-cookbook-1.2-determinant/" rel="bookmark" title="Matrix cookbook - determinant" itemprop="url">Matrix cookbook - determinant</a></h1>
    
  </header>
  <div class="entry-content">
    

<p><span class="math display">\[\begin{equation}
\tag{18}
\operatorname{det}(\mathbf{A})=\prod_{i} \lambda_{i} \quad \lambda_{i}=\operatorname{eig}(\mathbf{A})
\end{equation}\]</span></p>
<p><span class="math display">\[\begin{equation}
\tag{19}
\operatorname{det}(c \mathbf{A})=c^{n} \operatorname{det}(\mathbf{A}), \quad \text { if } \mathbf{A} \in \mathbb{R}^{n \times n}
\end{equation}\]</span></p>
<p><span class="math display">\[\begin{equation}
\tag{20}
\operatorname{det}\left(\mathbf{A}^{T}\right)=\operatorname{det}(\mathbf{A})
\end{equation}\]</span></p>
<hr />
<p><span class="math display">\[\begin{equation}
\tag{21}
\operatorname{det}(\mathbf{A B})=\operatorname{det}(\mathbf{A}) \operatorname{det}(\mathbf{B})
\end{equation}\]</span></p>
<p>The determinant of a tranformation matrix is the scale of area/volume of the shape before and after the tranformation. <span class="math inline">\(\mathbf{A B}\)</span> are two consecutive transformations, therefore its determinant is the product of two scales.</p>
<hr />
<p><span class="math display">\[\begin{equation}
\tag{22}
\operatorname{det}\left(\mathbf{A}^{-1}\right)=1 / \operatorname{det}(\mathbf{A})
\end{equation}\]</span></p>
<p><span class="math display">\[\begin{equation}
\tag{23}
\operatorname{det}\left(\mathbf{A}^{n}\right)=\operatorname{det}(\mathbf{A})^{n}
\end{equation}\]</span></p>
<hr />
<p><span class="math display">\[\begin{equation}
\tag{24}
\operatorname{det}\left(\mathbf{I}+\mathbf{u v}^{T}\right)=1+\mathbf{u}^{T} \mathbf{v}
\end{equation}\]</span></p>
<hr />
<p><span class="math display">\[\begin{equation}
\tag{25}
\begin{array}{l}{\text { For } n=2:} \\ {\qquad \operatorname{det}(\mathbf{I}+\mathbf{A})=1+\operatorname{det}(\mathbf{A})+\operatorname{Tr}(\mathbf{A})}\end{array}
\end{equation}\]</span></p>
<p><span class="math display">\[\begin{equation}
\tag{26}
\begin{array}{l}{\text { For } n=3:} \\ {\qquad \operatorname{det}(\mathbf{I}+\mathbf{A})=1+\operatorname{det}(\mathbf{A})+\operatorname{Tr}(\mathbf{A})+\frac{1}{2} \operatorname{Tr}(\mathbf{A})^{2}-\frac{1}{2} \operatorname{Tr}\left(\mathbf{A}^{2}\right)}\end{array}
\end{equation}\]</span></p>

  </div>
</article>

<article class="hentry">
  <header>
    
    <div class="entry-meta">
         
		<span class="entry-date date published updated"><time datetime="2019-10-11 00:00:00 &#43;0000 UTC"><a href="/posts/2019-10-11-matrix-cookbook-trace/">Oct 11, 2019</a></time></span>
        
      <span class="entry-reading-time">
        <i class="fa fa-clock-o"></i>
        Reading time ~2 minutes
      </span>
    </div>
    
	<h1 class="entry-title"><a href="/posts/2019-10-11-matrix-cookbook-trace/" rel="bookmark" title="Matrix cookbook - Trace" itemprop="url">Matrix cookbook - Trace</a></h1>
    
  </header>
  <div class="entry-content">
    

<p><span class="math display">\[\begin{equation}
\tag{11}
\operatorname{Tr}(\mathbf{A})=\sum_{i} A_{i i}
\end{equation}\]</span></p>
<blockquote>
<p><span style="color:blue"> Let’s write the trace in a more convenient way. We have: <a href="#fn1" class="footnote-ref" id="fnref1"><sup>1</sup></a>
<span class="math display">\[\begin{equation}
A e_{i}=\left[\begin{array}{ccc}{a_{11}} &amp; {\cdots} &amp; {a_{1 n}} \\ {\vdots} &amp; {\ddots} &amp; {\vdots} \\ {a_{n 1}} &amp; {\cdots} &amp; {a_{n n}}\end{array}\right]\left[\begin{array}{c}{0} \\ {\vdots} \\ {1} \\ {\vdots} \\ {0}\end{array}\right]=\left[\begin{array}{c}{a_{i 1}} \\ {\vdots} \\ {a_{i n}}\end{array}\right]
\end{equation}\]</span>
where the <span class="math inline">\(1\)</span> is in the <span class="math inline">\(i\)</span>-th entry. This way:
<span class="math display">\[\begin{equation}
\left\langle e_{i}, A e_{i}\right\rangle= e_{i}^{t} A e_{i}=A_{i i}
\end{equation}\]</span>
So <span class="math inline">\(\operatorname{Tr}(\mathbf{A})=\sum_{i}A_{ii}\)</span></span>.</p>
</blockquote>
<p>Intuitive explanation <a href="#fn2" class="footnote-ref" id="fnref2"><sup>2</sup></a></p>
<hr />
<p><span class="math display">\[\begin{equation}
\tag{12}
\operatorname{Tr}(\mathbf{A})=\sum_{i} \lambda_{i}, \quad \lambda_{i}=\operatorname{eig}(\mathbf{A})
\end{equation}\]</span></p>
<blockquote>
<p>If eigendecomposition of matrix <span class="math inline">\(\mathbf{A}\)</span> is <span class="math inline">\(\mathbf{A}=\mathbf{Q} \mathbf{\Lambda} \mathbf{Q}^{-1}\)</span>, then according to equation (16):
<span class="math display">\[\begin{align}
\operatorname{Tr}(\mathbf{A})&amp;=\operatorname{Tr}(\mathbf{Q} \mathbf{\Lambda} \mathbf{Q}^{-1}) \\
&amp;=\operatorname{Tr}(\mathbf{\Lambda} \mathbf{Q}^{-1} \mathbf{Q}) \\
&amp;=\operatorname{Tr}(\mathbf{\Lambda}) \\
&amp;=\sum_{i} \lambda_{i}
\end{align}\]</span></p>
</blockquote>
<hr />
<p><span class="math display">\[\begin{equation}
\tag{13}
\operatorname{Tr}(\mathbf{A})=\operatorname{Tr}\left(\mathbf{A}^{T}\right)
\end{equation}\]</span></p>
<p><span class="math display">\[\begin{equation}
\tag{14}
\operatorname{Tr}(\mathbf{A B})=\operatorname{Tr}(\mathbf{B A})
\end{equation}\]</span></p>
<blockquote>
<p>Now: <span class="math inline">\((\mathbf{A B})_{ij}=\sum_{k}A_{ik}B_{kj}\)</span>, and: <a href="#fn3" class="footnote-ref" id="fnref3"><sup>3</sup></a>
<span class="math display">\[\begin{equation}
\operatorname{tr}(A B)=\sum_{i} \sum_{k} A_{i k} B_{k i}
\end{equation}\]</span></p>
<p>On the other hand, <span class="math inline">\((\mathbf{B A})_{ij}=\sum_{k}B_{ik}A_{kj}\)</span>. So:
<span class="math display">\[\begin{equation}
\operatorname{tr}(B A)=\sum_{i} \sum_{k} B_{i k} A_{k i}
\end{equation}\]</span>
They are the same quantity, up to renaming indices <span class="math inline">\((i \leftrightarrow k)\)</span></p>
</blockquote>
<hr />
<p><span class="math display">\[\begin{equation}
\tag{15}
\operatorname{Tr}(\mathbf{A}+\mathbf{B})=\operatorname{Tr}(\mathbf{A})+\operatorname{Tr}(\mathbf{B})
\end{equation}\]</span></p>
<p><span class="math display">\[\begin{equation}
\tag{16}
\operatorname{Tr}(\mathbf{A B C})=\operatorname{Tr}(\mathbf{B C A})=\operatorname{Tr}(\mathbf{C A B})
\end{equation}\]</span></p>
<hr />
<p><span class="math display">\[\begin{equation}
\tag{17}
\mathbf{a}^{T} \mathbf{a}=\operatorname{Tr}\left(\mathbf{a a}^{T}\right)
\end{equation}\]</span></p>
<blockquote>
<p><span class="math display">\[\begin{align}
\mathbf{a a}^{T}&amp;=\left[\begin{array}{c}{a_{1}} \\ {\vdots} \\ {a_{n}}\end{array}\right]\left[{a_{1}}, {\cdots}, {a_{n}}\right] \\
&amp;=\left[\begin{array}{ccc}{a_{1}}^{2} &amp; {\cdots} &amp; {a_{1}a_{n}} \\ {\vdots} &amp; {\ddots} &amp; {\vdots} \\ {a_{n}a_{1}} &amp; {\cdots} &amp; {a_{n}}^{2}\end{array}\right]
\end{align}\]</span></p>
<p>So,
<span class="math display">\[\begin{equation}
\operatorname{Tr}\left(\mathbf{a a}^{T}\right) = a_{1}^{2}+\cdots+a_{n}^{2} = \mathbf{a}^{T} \mathbf{a}
\end{equation}\]</span></p>
</blockquote>
<div class="footnotes">
<hr />
<ol>
<li id="fn1"><p><a href="https://math.stackexchange.com/questions/1314142/trace-of-ab-trace-of-ba/1314150" class="uri">https://math.stackexchange.com/questions/1314142/trace-of-ab-trace-of-ba/1314150</a><a href="#fnref1" class="footnote-back">↩</a></p></li>
<li id="fn2"><p><a href="https://github.com/shengxue/Matrix-Cookbook-Notes/blob/master/1.1%20Trace/1.1%20Trace.ipynb">jupyter notebook</a><a href="#fnref2" class="footnote-back">↩</a></p></li>
<li id="fn3"><p><a href="https://math.stackexchange.com/questions/1314142/trace-of-ab-trace-of-ba/1314150" class="uri">https://math.stackexchange.com/questions/1314142/trace-of-ab-trace-of-ba/1314150</a><a href="#fnref3" class="footnote-back">↩</a></p></li>
</ol>
</div>

  </div>
</article>


<div class="pagination">
  <ul class="inline-list">
	  
    
	<li><strong class="current-page">1</strong></li>
    
	
	<li><a href="/page/2/">2</a></li>
    
	
	<li><a href="/page/3/">3</a></li>
    
	
	<li><a href="/page/4/">4</a></li>
    
	
      <li><a href="/page/2/" class="btn">Next</a></li>
    
  </ul>
</div>


</div>

<div class="footer-wrapper">
  <footer role="contentinfo">
    <span> Powered by <a href="https://gohugo.io/" rel="nofollow">Hugo</a> and blogdown using the <a href="https://github.com/dldx/hpstr-hugo-theme" rel="nofollow">HPSTR</a> theme.</span>

    
<script type="application/javascript">
var doNotTrack = false;
if (!doNotTrack) {
	(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
	(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
	m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
	})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
	ga('create', 'UA-111479944-1', 'auto');
	
	ga('send', 'pageview');
}
</script>


<div id="disqus_thread"></div>
<script type="application/javascript">
    var disqus_config = function () {
    
    
    };
    (function() {
        if (["localhost", "127.0.0.1"].indexOf(window.location.hostname) != -1) {
            document.getElementById('disqus_thread').innerHTML = 'Disqus comments not available by default when the website is previewed locally.';
            return;
        }
        var d = document, s = d.createElement('script'); s.async = true;
        s.src = '//' + "algorithmist" + '.disqus.com/embed.js';
        s.setAttribute('data-timestamp', +new Date());
        (d.head || d.body).appendChild(s);
    })();
</script>
<noscript>Please enable JavaScript to view the <a href="https://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript>
<a href="https://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a>


<script src="//cdn.bootcss.com/highlight.js/9.12.0/highlight.min.js"></script>
<script src="//cdn.bootcss.com/highlight.js/9.12.0/languages/r.min.js"></script>

<script>
hljs.configure({languages: []});
hljs.initHighlightingOnLoad();
</script>

<script src="//yihui.name/js/math-code.js"></script>
<script async
src="//cdn.bootcss.com/mathjax/2.7.1/MathJax.js?config=TeX-MML-AM_CHTML">
</script>
  </footer>
</div>

<script src="//ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js"></script>
<script>window.jQuery || document.write('<script src="\/js\/vendor\/jquery-1.9.1.min.js"><\/script>')</script>
<script src="/js/scripts.min.js"></script>

<script type="application/javascript">
var doNotTrack = false;
if (!doNotTrack) {
	(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
	(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
	m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
	})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
	ga('create', 'UA-111479944-1', 'auto');
	
	ga('send', 'pageview');
}
</script>


</body>
</html>