-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.html
732 lines (570 loc) · 35.7 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
<!DOCTYPE html>
<!--[if lt IE 7]><html class="no-js lt-ie9 lt-ie8 lt-ie7" lang="en"> <![endif]-->
<!--[if (IE 7)&!(IEMobile)]><html class="no-js lt-ie9 lt-ie8" lang="en"><![endif]-->
<!--[if (IE 8)&!(IEMobile)]><html class="no-js lt-ie9" lang="en"><![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en"><!--<![endif]-->
<head>
<meta charset="utf-8">
<title>Runner</title>
<meta name="description" content="">
<meta name="twitter:title" content="Runner">
<meta name="twitter:description" content="Time and pressure ...">
<meta name="twitter:card" content="summary_large_image">
<meta name="twitter:image" content="/images/geiranger-1508630_1920.jpg">
<meta property="og:type" content="article">
<meta property="og:title" content="Runner">
<meta property="og:description" content="Time and pressure ...">
<meta property="og:url" content="/">
<meta property="og:site_name" content="Runner">
<meta property="og:image" content="/images/geiranger-1508630_1920.jpg" />
<meta property="og:updated_time" content="2019-11-04T00:00:00+00:00"/>
<link href="//cdn.bootcss.com/highlight.js/9.12.0/styles/monokai-sublime.min.css" rel="stylesheet">
<link rel="canonical" href="/">
<link href="/index.xml" rel="alternate" type="application/rss+xml" title="Runner" />
<link href="/index.xml" rel="feed" type="application/rss+xml" title="Runner" />
<meta name="HandheldFriendly" content="True">
<meta name="MobileOptimized" content="320">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="stylesheet" href="/css/main.css">
<link rel="stylesheet" href="/css/note.css">
<meta http-equiv="cleartype" content="on">
<meta name="generator" content="Hugo 0.58.3" />
<script src="/js/vendor/modernizr-2.6.2.custom.min.js"></script>
<link rel="shortcut icon" href="/favicon.png">
<link rel="stylesheet"
href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.15.10/styles/monokai.min.css">
<script src="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.15.10/highlight.min.js"></script>
<script>hljs.initHighlightingOnLoad();</script>
<script type="text/javascript">
WebFontConfig = {"google":{"families":["Lora:r:latin,latin-ext","Lora:r,i,b,bi:latin,latin-ext"]}};
(function() {
var wf = document.createElement('script');
wf.src = 'https://s0.wp.com/wp-content/plugins/custom-fonts/js/webfont.js';
wf.type = 'text/javascript';
wf.async = 'true';
var s = document.getElementsByTagName('script')[0];
s.parentNode.insertBefore(wf, s);
})();
</script>
</head>
<body id="post-index" class="feature">
<nav id="dl-menu" class="dl-menuwrapper" role="navigation" style="display:inline-block">
<button class="dl-trigger">Open Menu</button>
<ul class="dl-menu">
<li><a href="/">Home</a></li>
<li>
<a href="#">About</a>
<ul class="dl-submenu">
<li>
<img src="/images/shengxue.jpg" alt="Sheng Xue's photo" class="author-photo">
<h4>Sheng Xue</h4>
<p>Working as C++/C# developer, while actively learning node.js/AngularJs/Data Science/Machine Learning</p>
</li>
<li><a href="/about/"><span class="btn btn-inverse">Learn More</span></a></li>
<li>
<a href="https://github.com/shengxue/shengxue.github.io"><i class="fa fa-fw fa-github"></i> GitHub</a>
</li>
</ul>
</li>
<li>
<a href="#">Posts</a>
<ul class="dl-submenu">
<li><a href="/posts/">All Posts</a></li>
<li><a href="/tags/">All Tags</a></li>
</ul>
</li>
<li><a href="https://www.linkedin.com/in/sheng-xue-24550b28/" target="_blank">Linkedin</a></li>
</ul>
</nav>
<div class="entry-header">
<div class="image-credit">Image credit: <a href="http://www.dargadgetz.com/ios-7-abstract-wallpaper-pack-for-iphone-5-and-ipod-touch-retina/">dargadgetz</a></div>
<div class="entry-image">
<img src="/images/geiranger-1508630_1920.jpg" alt="">
</div>
<div class="header-title">
<div class="header-title-wrap">
<h1><a href="/" title="Go to the homepage">Runner</a></h1>
<h2>
Time and pressure ...
</h2>
</div>
</div>
</div>
<div id="main" role="main">
<article class="hentry">
<header>
<div class="entry-meta">
<span class="entry-date date published updated"><time datetime="2019-11-04 00:00:00 +0000 UTC"><a href="/posts/2019-11-04-matrix-cookbook-46/">Nov 4, 2019</a></time></span>
<span class="entry-reading-time">
<i class="fa fa-clock-o"></i>
Reading time ~3 minutes
</span>
</div>
<h1 class="entry-title"><a href="/posts/2019-11-04-matrix-cookbook-46/" rel="bookmark" title="Jacobi's formula" itemprop="url">Jacobi's formula</a></h1>
</header>
<div class="entry-content">
<p class="boxed">
<span class="math display">\[
\tag{46}
\frac{\partial \det \left( \mathbf{Y} \right)}{\partial x}=\,\,\det \left( \mathbf{Y} \right) Tr\left[ \mathbf{Y}^{-1}\frac{\partial \mathbf{Y}}{\partial x} \right]
\]</span>
</p>
<p>Formula <span class="math inline">\((46)\)</span> is actually Jacobi’s formula. <a href="#fn1" class="footnote-ref" id="fnref1"><sup>1</sup></a></p>
<p><strong>Analogy in functions</strong></p>
<p>For a differentiable function <span class="math inline">\(f: D\subseteq R\rightarrow R\)</span>, for all <span class="math inline">\(x\)</span> in some neighborhood of <span class="math inline">\(a\)</span>, <span class="math inline">\(f\)</span> can be written as: <a href="#fn2" class="footnote-ref" id="fnref2"><sup>2</sup></a>
<span class="math display">\[f(x)=f(a)+f^{\prime}(a) (x−a)+R(x−a) \]</span>
and, <span class="math inline">\(L(x)=f(a)+f^{\prime}(a)(x−a)\)</span> is the best affine approximation of the function <span class="math inline">\(f\)</span> at <span class="math inline">\(a\)</span>.</p>
<p>or, the idea could be expressed in other way:
<span class="math display">\[f(x+\epsilon)=f(x)+f^{\prime}(x) \epsilon +R\epsilon \]</span></p>
<p>It comes from <strong>Taylor aproximation</strong> at <span class="math inline">\(x+\epsilon\)</span>:
<span class="math display">\[f(x+\epsilon)=f(x)+f^{\prime}(x) \epsilon +f^{\prime\prime}(x) \epsilon^2 /2 + \cdots \]</span></p>
<p><strong>Lemma1</strong> <a href="#fn3" class="footnote-ref" id="fnref3"><sup>3</sup></a></p>
<p class="boxed">
<span class="math display">\[
\det \left( \mathbf{I}+\epsilon \mathbf{A} \right) =\,\,1+\epsilon Tr\left( \mathbf{A} \right) +O\left( \epsilon^2 \right)
\]</span>
</p>
<p>Let <span class="math inline">\(A_1,A_2, \cdot,A_N\)</span> be the column vectors of the matrix <span class="math inline">\(A\)</span>. Let <span class="math inline">\(e_1,e_2, \cdot,e_N\)</span> be the standard basis; note that these basis vectors form the columns of the identity matrix <span class="math inline">\(I\)</span>. Then we recall that the determinant is an alternating multi-linear map on the column space.</p>
<p><span class="math display">\[det(I+ϵA)=det(e_1+ϵA_1,e_2+ϵA_2,…,e_N+ϵA_N) \\
=det(e_1,e_2,…,e_N)+\epsilon \left\{ det(A_1,e_2,…,e_N)+det(e_1,A_2,…,e_N) +\cdots \\
+det(e_1,e_2,…,A_N) \right\} + O(\epsilon^2)\]</span></p>
<p>The first term is just the determinant of the identity matrix which is 1. The term proportional to ϵ is a sum of expressions like <span class="math inline">\(det(e_1,e_2,…,A_j,…,e_N)\)</span> where the j’th column of the identity matrix is replaced with the j’th column of A. Expanding the determinant along the j’th row we see that <span class="math inline">\(det(e_1,e_2,…,A_j,…,e_N)=A_{jj}\)</span>.</p>
<p><span class="math display">\[det(I+ϵA)=1+ϵ\sum_{j=1}^N A_{jj}+O(ϵ^2)=1+ϵTr(A)+O(ϵ^2)\]</span></p>
<p>Particularly when <span class="math inline">\(n=2\)</span>,
<span class="math display">\[\begin{align}
\det \left( I+\epsilon A \right) &=\det \left( \begin{matrix}{}
1+\varepsilon a_{11}& \varepsilon a_{12}\\
\varepsilon a_{21}& 1+\varepsilon a_{22}\\
\end{matrix} \right) \,\, \\
&=\,\,1+\varepsilon \left( a_{11}+a_{22} \right) +\varepsilon ^2\left( a_{11}a_{22}-a_{12}a_{21} \right) \,\,\\
&=\,\,1+\varepsilon Tr\left( A \right) +\varepsilon ^2\det \left( A \right)
\end{align}\]</span></p>
<p><strong>Lemma 2</strong>. <a href="#fn4" class="footnote-ref" id="fnref4"><sup>4</sup></a></p>
<p class="boxed">
<span class="math display">\[ det^{\prime}(I)=\mathrm {Tr} \]</span>
</p>
<p>where <span class="math inline">\(det^{\prime}(I)=Tr\)</span> is the differential of <span class="math inline">\({\displaystyle \det }\)</span></p>
<p>This equation means that the differential of <span class="math inline">\({\displaystyle \det }\)</span>, evaluated at the identity matrix, is equal to the trace. The differential <span class="math inline">\({\displaystyle \det '(I)}\)</span> is a linear operator that maps an n × n matrix to a real number.</p>
<p>Using the definition of a directional derivative together with one of its basic properties for differentiable functions, we have
<span class="math display">\[\begin{equation}
\operatorname{det}^{\prime}(I)(T)=\nabla_{T} \operatorname{det}(I)=\lim _{\varepsilon \rightarrow 0} \frac{\operatorname{det}(I+\varepsilon T)-\operatorname{det} I}{\varepsilon} \\
= lim_{\varepsilon \rightarrow 0} \frac{1+ϵTr(T)+O(ϵ^2)-1}{\varepsilon} \\
= Tr(T)
\end{equation}\]</span></p>
<p><em>Alternative proof of lemma 2</em>: <a href="#fn5" class="footnote-ref" id="fnref5"><sup>5</sup></a></p>
<p><span class="math inline">\(det\)</span> is a function <span class="math inline">\(M_{n×n}→R\)</span> where <span class="math inline">\(M_{n×n}\)</span> is the space of <span class="math inline">\(n×n\)</span> square matrices. Therefore, a matrix is the equivalent of a point for real functions. The best linear approximation to <span class="math inline">\(det\)</span> near the identity is given by:
<span class="math display">\[det(\mathbf{I}+\mathbf{M})=det(\mathbf{I})+d(det(\mathbf{I}))M+R(\mathbf{I},\mathbf{M})\]</span>
<span class="math display">\[\underset{||\mathbf{M} ||\rightarrow 0}{lim}\frac{R(\mathbf{I},\mathbf{M})}{||\mathbf{M} ||}=0\]</span></p>
<p><span class="math inline">\(det^{\prime}(I)=\mathrm {Tr}\)</span> is equivalent to the following:</p>
<p class="boxed">
<span class="math display">\[\begin{equation}
\left.\frac{\mathrm{d}}{\mathrm{d} t}[\operatorname{det}(\mathbf{I}+t \mathbf{B})]\right|_{t=0}=\operatorname{Tr}(\mathbf{B})
\end{equation}\]</span>
</p>
<p><strong>Lemma 3</strong> <a href="#fn6" class="footnote-ref" id="fnref6"><sup>6</sup></a></p>
For an invertible matrix <span class="math inline">\(A\)</span>, we have:
<p class="boxed">
<span class="math display">\[\begin{equation}
\operatorname{det}^{\prime}(A)(T)=\operatorname{det} A \operatorname{tr}\left(A^{-1} T\right)
\end{equation}\]</span>
</p>
<p>proof: <a href="#fn7" class="footnote-ref" id="fnref7"><sup>7</sup></a></p>
<p>Remember that, if <span class="math inline">\(f:E→F\)</span> is a differentiable map, a way to compute <span class="math inline">\(df(a)(v)\)</span> is to find a curve <span class="math inline">\(γ:R→E\)</span> with <span class="math inline">\(γ(0)=a\)</span> and <span class="math inline">\(γ′(0)=v\)</span>, and then <span class="math inline">\(df(a)(v)=\frac{d}{dt}|_0f(γ(t))\)</span> (this is the chain rule). Here, find a curve <span class="math inline">\(γ:R→M_n(R)\)</span> with <span class="math inline">\(γ(0)=A\)</span> and <span class="math inline">\(γ′(0)=T\)</span>. Then note that</p>
<p><span class="math display">\[\begin{equation}
\begin{aligned} d \operatorname{det}(A)(T)=\left.\frac{d}{d t}\right|_{0}(\operatorname{det}(\gamma(t))) &=\left.\frac{d}{d t}\right|_{0}\left(\operatorname{det}\left(A A^{-1} \gamma(t)\right)\right) \\ =\left.\operatorname{det}(A) \frac{d}{d t}\right|_{0}\left(\operatorname{det}\left(A^{-1} \gamma(t)\right)\right) &=\operatorname{det}(A) d \operatorname{det}(\operatorname{I})\left(A^{-1} T\right) \end{aligned}
\end{equation}\]</span>
(since <span class="math inline">\(t↦A^{−1}γ(t)\)</span> is a curve which is <span class="math inline">\(I\)</span> in 0 and which the derivative is <span class="math inline">\(A^{−1}T\)</span> in 0).</p>
<div class="footnotes">
<hr />
<ol>
<li id="fn1"><p><a href="https://en.wikipedia.org/wiki/Jacobi%27s_formula">Jacobi’s formula</a><a href="#fnref1" class="footnote-back">↩</a></p></li>
<li id="fn2"><p><a href="https://math.stackexchange.com/questions/1784262/how-is-the-derivative-truly-literally-the-best-linear-approximation-near-a-po">Best affine approximation</a><a href="#fnref2" class="footnote-back">↩</a></p></li>
<li id="fn3"><p><a href="https://math.stackexchange.com/questions/1174639/series-expansion-of-the-determinant-for-a-matrix-near-the-identity">Determinant for a matrix near the identity</a><a href="#fnref3" class="footnote-back">↩</a></p></li>
<li id="fn4"><p><a href="https://en.wikipedia.org/wiki/Jacobi%27s_formula">Jacobi’s formula</a><a href="#fnref4" class="footnote-back">↩</a></p></li>
<li id="fn5"><p><a href="https://math.stackexchange.com/questions/2393932/lemma-for-jacobis-formula-proof">Lemma for jocobi’s formula proof</a><a href="#fnref5" class="footnote-back">↩</a></p></li>
<li id="fn6"><p><a href="https://en.wikipedia.org/wiki/Jacobi%27s_formula">Jacobi’s formula</a><a href="#fnref6" class="footnote-back">↩</a></p></li>
<li id="fn7"><p><a href="https://math.stackexchange.com/questions/3427241/how-to-understand-the-proof-of-lemma2-of-jacobis-formula-at-wikipedia/3427259#3427259">How to understand the proof of lemma2 of Jacobi’s formula at wikipedia</a><a href="#fnref7" class="footnote-back">↩</a></p></li>
</ol>
</div>
</div>
</article>
<article class="hentry">
<header>
<div class="entry-meta">
<span class="entry-date date published updated"><time datetime="2019-10-30 00:00:00 +0000 UTC"><a href="/posts/2019-10-30-matrix-cookbook-43/">Oct 30, 2019</a></time></span>
<span class="entry-reading-time">
<i class="fa fa-clock-o"></i>
Reading time ~2 minutes
</span>
</div>
<h1 class="entry-title"><a href="/posts/2019-10-30-matrix-cookbook-43/" rel="bookmark" title="Derivative of log of determinant" itemprop="url">Derivative of log of determinant</a></h1>
</header>
<div class="entry-content">
<p><span class="math display">\[\begin{equation}
\tag{43}
\partial(\ln (\operatorname{det}(\mathbf{X})))=\operatorname{Tr}\left(\mathbf{X}^{-1} \partial \mathbf{X}\right)
\end{equation}\]</span></p>
<p><strong>Lemma 1</strong></p>
<p><span class="math display">\[\begin{equation}
\sum_{i} \sum_{j} \mathbf{A}^{\mathrm{T}}_{i j} \mathbf{B}_{i j} = \operatorname{Tr}\left(\mathbf{A} \mathbf{B}\right)
\end{equation}\]</span></p>
<p><strong>Lemma 2</strong> <a href="#fn1" class="footnote-ref" id="fnref1"><sup>1</sup></a></p>
<p>(Credit to <a href="https://statisticaloddsandends.wordpress.com/2018/05/24/derivative-of-log-det-x/" class="uri">https://statisticaloddsandends.wordpress.com/2018/05/24/derivative-of-log-det-x/</a>)</p>
<p><span class="math display">\[\begin{equation}
\frac{\partial(\operatorname{det} \mathbf{X})}{\partial \mathbf{X}_{i j}}=\mathbf{C}_{i j}
\end{equation}\]</span></p>
<p>For a matrix <span class="math inline">\(X\)</span>, we define some terms:</p>
<ul>
<li><p>The <span class="math inline">\((i,j)\)</span> minor of <span class="math inline">\(X\)</span>, denoted <span class="math inline">\(M_{ij}\)</span>, is the determinant of the <span class="math inline">\((n-1) \times (n-1)\)</span> matrix that remains after removing the <span class="math inline">\(i\)</span>th row and <span class="math inline">\(j\)</span>th column from <span class="math inline">\(X\)</span>.</p></li>
<li><p>The cofactor matrix of <span class="math inline">\(X\)</span>, denoted <span class="math inline">\(C\)</span>, is an <span class="math inline">\(n \times n\)</span> matrix such that <span class="math inline">\(C_{ij} = (-1)^{i+j} M_{ij}\)</span>.</p></li>
<li><p>The adjugate matrix of <span class="math inline">\(X\)</span>, denoted <span class="math inline">\(\operatorname{adj } X\)</span>, is simply the transpose of <span class="math inline">\(C\)</span>.</p></li>
</ul>
<p>These terms are useful because they related to both matrix determinants and inverses. If <span class="math inline">\(X\)</span> is invertible, then <span class="math inline">\(X^{-1}=\frac{1}{\operatorname{det} X}(\operatorname{adj} X)\)</span>, so</p>
<p><span class="math display">\[\begin{equation}
\left(\textbf{X}^{-1}\right)^T_{ij} = \frac{1}{\operatorname{det} X} C_{ij}
\end{equation}\]</span></p>
<p>On the other hand, by the cofactor expansion of the determinant, <span class="math inline">\(\det X=\,\,\underset{k=1}{\overset{n}{\varSigma}}X_{ik}C_{ik}\)</span>, so by the product rule,</p>
<p><span class="math display">\[
\frac{\partial \left( \det X \right)}{\partial X_{ij}}=\,\,\underset{k=1}{\overset{n}{\varSigma}}\frac{\partial X_{ik}}{\partial X_{ij}}C_{ik}\,\,+\,\,\underset{k=1}{\overset{n}{\varSigma}}X_{ik}\frac{\partial C_{ik}}{\partial X_{ij}}
\]</span></p>
<p>If <span class="math inline">\(k \neq j\)</span>, then <span class="math inline">\(\dfrac{\partial X_{ik}}{\partial X_{ij}} = 0\)</span>, otherwise it is equal to 1. This means that the first term above reduces to <span class="math inline">\(C_{ij}\)</span>. For any <span class="math inline">\(k\)</span>, the elements of <span class="math inline">\(X\)</span> which affect <span class="math inline">\(C_{ik}\)</span> are those which do not lie on row <span class="math inline">\(i\)</span> or column <span class="math inline">\(k\)</span>. Hence, <span class="math inline">\(\dfrac{\partial C_{ik}}{\partial X_{ij}} = 0\)</span> for all k! So,</p>
<p><span class="math display">\[\frac{\partial \left( \det X \right)}{\partial X_{ij}}=C_{ij}\]</span></p>
<p><strong>Proof</strong></p>
<p>Putting all this together with an application of the chain rule, we get</p>
<p><span class="math display">\[\left(\ln (\det X)\right)_{ij}' = \dfrac{1}{\det X} \dfrac{\partial (\det X)}{\partial X_{ij}} = \dfrac{1}{\det X} C_{ij} = (X^{-1})^T_{ij}\]</span></p>
<p>So,</p>
<p><span class="math display">\[\begin{align}
\partial(\ln (\operatorname{det}(\mathbf{X})))&=\sum_{i} \sum_{j} \left(\ln (\det X)\right)_{ij}' d_{ij} \\
&= \sum_{i} \sum_{j}(\mathbf{X}^{-1})^T_{ij} d_{ij} \\
&= \operatorname{Tr}\left(\mathbf{X}^{-1} \partial \mathbf{X}\right)
\end{align}\]</span></p>
<p>where
<span class="math display">\[
\partial X=\left( \begin{matrix}{}
dX_{11}& \cdots& dX_{1n}\\
\vdots& \ddots& \vdots\\
dX_{n1}& \cdots& dX_{nn}\\
\end{matrix} \right)
\]</span></p>
<div class="footnotes">
<hr />
<ol>
<li id="fn1"><p><a href="https://statisticaloddsandends.wordpress.com/2018/05/24/derivative-of-log-det-x/" class="uri">https://statisticaloddsandends.wordpress.com/2018/05/24/derivative-of-log-det-x/</a><a href="#fnref1" class="footnote-back">↩</a></p></li>
</ol>
</div>
</div>
</article>
<article class="hentry">
<header>
<div class="entry-meta">
<span class="entry-date date published updated"><time datetime="2019-10-24 00:00:00 +0000 UTC"><a href="/posts/2019-10-24-matrix-derivative/">Oct 24, 2019</a></time></span>
<span class="entry-reading-time">
<i class="fa fa-clock-o"></i>
Reading time ~1 minute
</span>
</div>
<h1 class="entry-title"><a href="/posts/2019-10-24-matrix-derivative/" rel="bookmark" title="Derivative of inverse matrix" itemprop="url">Derivative of inverse matrix</a></h1>
</header>
<div class="entry-content">
<p><span class="math display">\[\begin{equation}
\tag{40}
\partial\left(\mathbf{X}^{-1}\right)=-\mathbf{X}^{-1}(\partial \mathbf{X}) \mathbf{X}^{-1}
\end{equation}\]</span></p>
<blockquote>
<p><strong>Explanation</strong>: <a href="#fn1" class="footnote-ref" id="fnref1"><sup>1</sup></a></p>
</blockquote>
<p><span class="math display">\[\begin{equation}
\underbrace{(I)^{\prime}}_{=0}=\left(\mathbf{X} \mathbf{X}^{-1}\right)^{\prime}=\mathbf{X}^{\prime} \mathbf{X}^{-1}+\mathbf{X}\left(\mathbf{X}^{-1}\right)^{\prime} \Rightarrow
\end{equation}\]</span></p>
<p><span class="math display">\[\begin{equation}
\mathbf{X}\left(\mathbf{X}^{-1}\right)^{\prime}=-\mathbf{X}^{\prime} \mathbf{X}^{-1} \quad \Rightarrow
\end{equation}\]</span></p>
<p><span class="math display">\[\begin{equation}
\left(\mathbf{X}^{-1}\right)^{\prime}=-\mathbf{X}^{-1} \mathbf{X}^{\prime} \mathbf{X}^{-1}
\end{equation}\]</span></p>
<hr />
<p><span class="math display">\[\begin{equation}
\tag{41}
\partial(\operatorname{det}(\mathbf{X}))=\operatorname{Tr}(\operatorname{adj}(\mathbf{X}) \partial \mathbf{X})
\end{equation}\]</span></p>
<div id="background" class="section level2">
<h2>Background</h2>
<div id="adjugate-matrix" class="section level3">
<h3>Adjugate Matrix</h3>
<p>The adjugate of <span class="math inline">\(A\)</span> is the transpose of the cofactor matrix <span class="math inline">\(C\)</span> of <span class="math inline">\(X\)</span>,
<span class="math display">\[\begin{equation}
\operatorname{adj}(\mathbf{X})=\mathbf{C}^{\top}
\end{equation}\]</span></p>
<p>and,
<span class="math display">\[\begin{equation}
\mathbf{X}^{-1}=\operatorname{det}(\mathbf{X})^{-1} \operatorname{adj}(\mathbf{X}) \quad \Rightarrow
\end{equation}\]</span></p>
<p><span class="math display">\[\begin{equation}
\operatorname{det}(\mathbf{X}) \mathbf{I} = \operatorname{adj}(\mathbf{X}) \mathbf{X}
\end{equation}\]</span></p>
</div>
<div id="characteristic-polynomial" class="section level3">
<h3>Characteristic Polynomial</h3>
<p class="notes">
The characteristic polynomial of a square matrix is a polynomial which is invariant under matrix similarity and has the eigenvalues as roots. It has the determinant and the trace of the matrix as coefficients.
</p>
<p>The characteristic polynomial of a sqaure matrix <span class="math inline">\(A\)</span> is defined by
<span class="math display">\[\begin{equation}
p_{A}(t)=\operatorname{det}(t I-A)
\end{equation}\]</span></p>
</div>
</div>
<div id="proof-2" class="section level2">
<h2>Proof <a href="#fn2" class="footnote-ref" id="fnref2"><sup>2</sup></a></h2>
<div id="via-matrix-computation" class="section level3">
<h3>Via Matrix Computation</h3>
<p><span class="math display">\[\begin{equation}
\frac{\partial \operatorname{det}(\mathbf{X})}{\partial \mathbf{X}_{i j}}=\sum_{k} \operatorname{adj}^{\mathrm{T}}(\mathbf{X})_{i k} \delta_{j k}=\operatorname{adj}^{\mathrm{T}}(\mathbf{X})_{i j} \quad \Rightarrow
\end{equation}\]</span></p>
<p><span class="math display">\[\begin{equation}
d(\operatorname{det}(\mathbf{X}))=\sum_{i} \sum_{j} \operatorname{adj}^{\mathrm{T}}(\mathbf{X})_{i j} d \mathbf{X}_{i j} \quad \Rightarrow
\end{equation}\]</span></p>
<p><span class="math display">\[\begin{equation}
d(\operatorname{det}(\mathbf{X}))=\operatorname{tr}(\operatorname{adj}(\mathbf{X}) d \mathbf{X})
\end{equation}\]</span></p>
</div>
<div id="via-chain-rule" class="section level3">
<h3>Via Chain Rule</h3>
<p><strong>Lemma 1.</strong> <span class="math inline">\(\operatorname{det}^{\prime}(I)=\operatorname{tr}\)</span>, where <span class="math inline">\(\operatorname{det}^{\prime}\)</span> is the differential of <span class="math inline">\(\operatorname{det}\)</span>.</p>
<p><strong>Lemma 2.</strong> For an invertible matrix <span class="math inline">\(\mathbf{A}\)</span>, we have: <span class="math inline">\(\operatorname{det}^{\prime}(\mathbf{A})(\mathbf{T})=\operatorname{det} \mathbf{A} \operatorname{tr}(\mathbf{A}^{-1}\mathbf{T}))\)</span></p>
<hr />
<p><span class="math display">\[\begin{equation}
\tag{42}
\partial(\operatorname{det}(\mathbf{X}))=\operatorname{det}(\mathbf{X}) \operatorname{Tr}\left(\mathbf{X}^{-1} \partial \mathbf{X}\right)
\end{equation}\]</span></p>
</div>
</div>
<div class="footnotes">
<hr />
<ol>
<li id="fn1"><p><a href="https://math.stackexchange.com/questions/1471825/derivative-of-the-inverse-of-a-matrix" class="uri">https://math.stackexchange.com/questions/1471825/derivative-of-the-inverse-of-a-matrix</a><a href="#fnref1" class="footnote-back">↩</a></p></li>
<li id="fn2"><p><a href="https://en.wikipedia.org/wiki/Jacobi%27s_formula#Derivation" class="uri">https://en.wikipedia.org/wiki/Jacobi%27s_formula#Derivation</a><a href="#fnref2" class="footnote-back">↩</a></p></li>
</ol>
</div>
</div>
</article>
<article class="hentry">
<header>
<div class="entry-meta">
<span class="entry-date date published updated"><time datetime="2019-10-11 00:00:00 +0000 UTC"><a href="/posts/2019-10-17-matrix-cookbook-1.2-determinant/">Oct 11, 2019</a></time></span>
<span class="entry-reading-time">
<i class="fa fa-clock-o"></i>
Reading time ~1 minute
</span>
</div>
<h1 class="entry-title"><a href="/posts/2019-10-17-matrix-cookbook-1.2-determinant/" rel="bookmark" title="Matrix cookbook - determinant" itemprop="url">Matrix cookbook - determinant</a></h1>
</header>
<div class="entry-content">
<p><span class="math display">\[\begin{equation}
\tag{18}
\operatorname{det}(\mathbf{A})=\prod_{i} \lambda_{i} \quad \lambda_{i}=\operatorname{eig}(\mathbf{A})
\end{equation}\]</span></p>
<p><span class="math display">\[\begin{equation}
\tag{19}
\operatorname{det}(c \mathbf{A})=c^{n} \operatorname{det}(\mathbf{A}), \quad \text { if } \mathbf{A} \in \mathbb{R}^{n \times n}
\end{equation}\]</span></p>
<p><span class="math display">\[\begin{equation}
\tag{20}
\operatorname{det}\left(\mathbf{A}^{T}\right)=\operatorname{det}(\mathbf{A})
\end{equation}\]</span></p>
<hr />
<p><span class="math display">\[\begin{equation}
\tag{21}
\operatorname{det}(\mathbf{A B})=\operatorname{det}(\mathbf{A}) \operatorname{det}(\mathbf{B})
\end{equation}\]</span></p>
<p>The determinant of a tranformation matrix is the scale of area/volume of the shape before and after the tranformation. <span class="math inline">\(\mathbf{A B}\)</span> are two consecutive transformations, therefore its determinant is the product of two scales.</p>
<hr />
<p><span class="math display">\[\begin{equation}
\tag{22}
\operatorname{det}\left(\mathbf{A}^{-1}\right)=1 / \operatorname{det}(\mathbf{A})
\end{equation}\]</span></p>
<p><span class="math display">\[\begin{equation}
\tag{23}
\operatorname{det}\left(\mathbf{A}^{n}\right)=\operatorname{det}(\mathbf{A})^{n}
\end{equation}\]</span></p>
<hr />
<p><span class="math display">\[\begin{equation}
\tag{24}
\operatorname{det}\left(\mathbf{I}+\mathbf{u v}^{T}\right)=1+\mathbf{u}^{T} \mathbf{v}
\end{equation}\]</span></p>
<hr />
<p><span class="math display">\[\begin{equation}
\tag{25}
\begin{array}{l}{\text { For } n=2:} \\ {\qquad \operatorname{det}(\mathbf{I}+\mathbf{A})=1+\operatorname{det}(\mathbf{A})+\operatorname{Tr}(\mathbf{A})}\end{array}
\end{equation}\]</span></p>
<p><span class="math display">\[\begin{equation}
\tag{26}
\begin{array}{l}{\text { For } n=3:} \\ {\qquad \operatorname{det}(\mathbf{I}+\mathbf{A})=1+\operatorname{det}(\mathbf{A})+\operatorname{Tr}(\mathbf{A})+\frac{1}{2} \operatorname{Tr}(\mathbf{A})^{2}-\frac{1}{2} \operatorname{Tr}\left(\mathbf{A}^{2}\right)}\end{array}
\end{equation}\]</span></p>
</div>
</article>
<article class="hentry">
<header>
<div class="entry-meta">
<span class="entry-date date published updated"><time datetime="2019-10-11 00:00:00 +0000 UTC"><a href="/posts/2019-10-11-matrix-cookbook-trace/">Oct 11, 2019</a></time></span>
<span class="entry-reading-time">
<i class="fa fa-clock-o"></i>
Reading time ~2 minutes
</span>
</div>
<h1 class="entry-title"><a href="/posts/2019-10-11-matrix-cookbook-trace/" rel="bookmark" title="Matrix cookbook - Trace" itemprop="url">Matrix cookbook - Trace</a></h1>
</header>
<div class="entry-content">
<p><span class="math display">\[\begin{equation}
\tag{11}
\operatorname{Tr}(\mathbf{A})=\sum_{i} A_{i i}
\end{equation}\]</span></p>
<blockquote>
<p><span style="color:blue"> Let’s write the trace in a more convenient way. We have: <a href="#fn1" class="footnote-ref" id="fnref1"><sup>1</sup></a>
<span class="math display">\[\begin{equation}
A e_{i}=\left[\begin{array}{ccc}{a_{11}} & {\cdots} & {a_{1 n}} \\ {\vdots} & {\ddots} & {\vdots} \\ {a_{n 1}} & {\cdots} & {a_{n n}}\end{array}\right]\left[\begin{array}{c}{0} \\ {\vdots} \\ {1} \\ {\vdots} \\ {0}\end{array}\right]=\left[\begin{array}{c}{a_{i 1}} \\ {\vdots} \\ {a_{i n}}\end{array}\right]
\end{equation}\]</span>
where the <span class="math inline">\(1\)</span> is in the <span class="math inline">\(i\)</span>-th entry. This way:
<span class="math display">\[\begin{equation}
\left\langle e_{i}, A e_{i}\right\rangle= e_{i}^{t} A e_{i}=A_{i i}
\end{equation}\]</span>
So <span class="math inline">\(\operatorname{Tr}(\mathbf{A})=\sum_{i}A_{ii}\)</span></span>.</p>
</blockquote>
<p>Intuitive explanation <a href="#fn2" class="footnote-ref" id="fnref2"><sup>2</sup></a></p>
<hr />
<p><span class="math display">\[\begin{equation}
\tag{12}
\operatorname{Tr}(\mathbf{A})=\sum_{i} \lambda_{i}, \quad \lambda_{i}=\operatorname{eig}(\mathbf{A})
\end{equation}\]</span></p>
<blockquote>
<p>If eigendecomposition of matrix <span class="math inline">\(\mathbf{A}\)</span> is <span class="math inline">\(\mathbf{A}=\mathbf{Q} \mathbf{\Lambda} \mathbf{Q}^{-1}\)</span>, then according to equation (16):
<span class="math display">\[\begin{align}
\operatorname{Tr}(\mathbf{A})&=\operatorname{Tr}(\mathbf{Q} \mathbf{\Lambda} \mathbf{Q}^{-1}) \\
&=\operatorname{Tr}(\mathbf{\Lambda} \mathbf{Q}^{-1} \mathbf{Q}) \\
&=\operatorname{Tr}(\mathbf{\Lambda}) \\
&=\sum_{i} \lambda_{i}
\end{align}\]</span></p>
</blockquote>
<hr />
<p><span class="math display">\[\begin{equation}
\tag{13}
\operatorname{Tr}(\mathbf{A})=\operatorname{Tr}\left(\mathbf{A}^{T}\right)
\end{equation}\]</span></p>
<p><span class="math display">\[\begin{equation}
\tag{14}
\operatorname{Tr}(\mathbf{A B})=\operatorname{Tr}(\mathbf{B A})
\end{equation}\]</span></p>
<blockquote>
<p>Now: <span class="math inline">\((\mathbf{A B})_{ij}=\sum_{k}A_{ik}B_{kj}\)</span>, and: <a href="#fn3" class="footnote-ref" id="fnref3"><sup>3</sup></a>
<span class="math display">\[\begin{equation}
\operatorname{tr}(A B)=\sum_{i} \sum_{k} A_{i k} B_{k i}
\end{equation}\]</span></p>
<p>On the other hand, <span class="math inline">\((\mathbf{B A})_{ij}=\sum_{k}B_{ik}A_{kj}\)</span>. So:
<span class="math display">\[\begin{equation}
\operatorname{tr}(B A)=\sum_{i} \sum_{k} B_{i k} A_{k i}
\end{equation}\]</span>
They are the same quantity, up to renaming indices <span class="math inline">\((i \leftrightarrow k)\)</span></p>
</blockquote>
<hr />
<p><span class="math display">\[\begin{equation}
\tag{15}
\operatorname{Tr}(\mathbf{A}+\mathbf{B})=\operatorname{Tr}(\mathbf{A})+\operatorname{Tr}(\mathbf{B})
\end{equation}\]</span></p>
<p><span class="math display">\[\begin{equation}
\tag{16}
\operatorname{Tr}(\mathbf{A B C})=\operatorname{Tr}(\mathbf{B C A})=\operatorname{Tr}(\mathbf{C A B})
\end{equation}\]</span></p>
<hr />
<p><span class="math display">\[\begin{equation}
\tag{17}
\mathbf{a}^{T} \mathbf{a}=\operatorname{Tr}\left(\mathbf{a a}^{T}\right)
\end{equation}\]</span></p>
<blockquote>
<p><span class="math display">\[\begin{align}
\mathbf{a a}^{T}&=\left[\begin{array}{c}{a_{1}} \\ {\vdots} \\ {a_{n}}\end{array}\right]\left[{a_{1}}, {\cdots}, {a_{n}}\right] \\
&=\left[\begin{array}{ccc}{a_{1}}^{2} & {\cdots} & {a_{1}a_{n}} \\ {\vdots} & {\ddots} & {\vdots} \\ {a_{n}a_{1}} & {\cdots} & {a_{n}}^{2}\end{array}\right]
\end{align}\]</span></p>
<p>So,
<span class="math display">\[\begin{equation}
\operatorname{Tr}\left(\mathbf{a a}^{T}\right) = a_{1}^{2}+\cdots+a_{n}^{2} = \mathbf{a}^{T} \mathbf{a}
\end{equation}\]</span></p>
</blockquote>
<div class="footnotes">
<hr />
<ol>
<li id="fn1"><p><a href="https://math.stackexchange.com/questions/1314142/trace-of-ab-trace-of-ba/1314150" class="uri">https://math.stackexchange.com/questions/1314142/trace-of-ab-trace-of-ba/1314150</a><a href="#fnref1" class="footnote-back">↩</a></p></li>
<li id="fn2"><p><a href="https://github.com/shengxue/Matrix-Cookbook-Notes/blob/master/1.1%20Trace/1.1%20Trace.ipynb">jupyter notebook</a><a href="#fnref2" class="footnote-back">↩</a></p></li>
<li id="fn3"><p><a href="https://math.stackexchange.com/questions/1314142/trace-of-ab-trace-of-ba/1314150" class="uri">https://math.stackexchange.com/questions/1314142/trace-of-ab-trace-of-ba/1314150</a><a href="#fnref3" class="footnote-back">↩</a></p></li>
</ol>
</div>
</div>
</article>
<div class="pagination">
<ul class="inline-list">
<li><strong class="current-page">1</strong></li>
<li><a href="/page/2/">2</a></li>
<li><a href="/page/3/">3</a></li>
<li><a href="/page/4/">4</a></li>
<li><a href="/page/2/" class="btn">Next</a></li>
</ul>
</div>
</div>
<div class="footer-wrapper">
<footer role="contentinfo">
<span> Powered by <a href="https://gohugo.io/" rel="nofollow">Hugo</a> and blogdown using the <a href="https://github.com/dldx/hpstr-hugo-theme" rel="nofollow">HPSTR</a> theme.</span>
<script type="application/javascript">
var doNotTrack = false;
if (!doNotTrack) {
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-111479944-1', 'auto');
ga('send', 'pageview');
}
</script>
<div id="disqus_thread"></div>
<script type="application/javascript">
var disqus_config = function () {
};
(function() {
if (["localhost", "127.0.0.1"].indexOf(window.location.hostname) != -1) {
document.getElementById('disqus_thread').innerHTML = 'Disqus comments not available by default when the website is previewed locally.';
return;
}
var d = document, s = d.createElement('script'); s.async = true;
s.src = '//' + "algorithmist" + '.disqus.com/embed.js';
s.setAttribute('data-timestamp', +new Date());
(d.head || d.body).appendChild(s);
})();
</script>
<noscript>Please enable JavaScript to view the <a href="https://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript>
<a href="https://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a>
<script src="//cdn.bootcss.com/highlight.js/9.12.0/highlight.min.js"></script>
<script src="//cdn.bootcss.com/highlight.js/9.12.0/languages/r.min.js"></script>
<script>
hljs.configure({languages: []});
hljs.initHighlightingOnLoad();
</script>
<script src="//yihui.name/js/math-code.js"></script>
<script async
src="//cdn.bootcss.com/mathjax/2.7.1/MathJax.js?config=TeX-MML-AM_CHTML">
</script>
</footer>
</div>
<script src="//ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js"></script>
<script>window.jQuery || document.write('<script src="\/js\/vendor\/jquery-1.9.1.min.js"><\/script>')</script>
<script src="/js/scripts.min.js"></script>
<script type="application/javascript">
var doNotTrack = false;
if (!doNotTrack) {
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-111479944-1', 'auto');
ga('send', 'pageview');
}
</script>
</body>
</html>