notebooks/check-pdf.ipynb


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "fad5b45a-158a-4484-a34e-453f1b3316cc",
   "metadata": {},
   "outputs": [],
   "source": [
    "from pathlib import Path\n",
    "from dotenv import load_dotenv\n",
    "load_dotenv()\n",
    "from importlib.util import find_spec\n",
    "if find_spec(\"rag\") is None:\n",
    "    import sys\n",
    "    sys.path.append('..')\n",
    "from rag.parser.pdf import PDFParser"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "6762495a-fc4e-4022-963e-e302e095b0cf",
   "metadata": {},
   "outputs": [],
   "source": [
    "p = PDFParser()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "1eeef01f-497b-4069-8cde-fa018e99ce52",
   "metadata": {},
   "outputs": [],
   "source": [
    "path = Path(\"/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "9ac72c90-fea7-4f96-a648-691a01e5b38b",
   "metadata": {},
   "outputs": [],
   "source": [
    "d = p.from_data(p.from_path(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "24d3edd0-8430-4f54-b03a-4dcce67d2cff",
   "metadata": {},
   "outputs": [],
   "source": [
    "d[1]\n",
    "b = p.from_path(path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "0a2c5f13-17f9-4674-b524-4bba7e05754a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "langchain_core.document_loaders.blob_loaders.Blob"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(b)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8496ab61-dfa8-45be-bea0-4a31fc5476da",
   "metadata": {},
   "outputs": [],
   "source": [
    "Path(d[1].metadata[\"source\"]).name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "00df538f-2d15-47c8-87dd-639582c41cbb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Document(page_content='A Signal Processing\\nPerspective on Financial\\nEngineering\\nYiyong Feng\\nDept. of Electronic and Computer Engineering\\nThe Hong Kong University of Science and Technology\\nClear Water Bay, Kowloon\\nHong Kong\\nyiyong@connect.ust.hk\\nDaniel P. Palomar\\nDept. of Electronic and Computer Engineering\\nThe Hong Kong University of Science and Technology\\nClear Water Bay, Kowloon\\nHong Kong\\npalomar@ust.hk\\nBoston — Delft', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 0}),\n",
       " Document(page_content='Foundations and TrendsR⃝in Signal Processing\\nPublished, sold and distributed by:\\nnow Publishers Inc.\\nPO Box 1024\\nHanover, MA 02339\\nUnited States\\nTel. +1-781-985-4510\\nwww.nowpublishers.com\\nsales@nowpublishers.com\\nOutside North America:\\nnow Publishers Inc.\\nPO Box 179\\n2600 AD Delft\\nThe Netherlands\\nTel. +31-6-51115274\\nThe preferred citation for this publication is\\nY. Feng and D. P. Palomar. A Signal Processing Perspective on Financial\\nEngineering . Foundations and TrendsR⃝in Signal Processing, vol. 9, no. 1-2,\\npp. 1–231, 2015.\\nThis Foundations and TrendsR⃝issue was typeset in LATEX using a class ﬁle designed\\nby Neal Parikh. Printed on acid-free paper.\\nISBN: 978-1-68083-119-1\\nc⃝2016 Y. Feng and D. P. Palomar\\nAll rights reserved. No part of this publication may be reproduced, stored in a retrieval\\nsystem, or transmitted in any form or by any means, mechanical, photocopying, recording\\nor otherwise, without prior written permission of the publishers.\\nPhotocopying. In the USA: This journal is registered at the Copyright Clearance Cen-\\nter, Inc., 222 Rosewood Drive, Danvers, MA 01923. Authorization to photocopy items for\\ninternal or personal use, or the internal or personal use of speciﬁc clients, is granted by\\nnow Publishers Inc for users registered with the Copyright Clearance Center (CCC). The\\n‘services’ for users can be found on the internet at: www.copyright.com\\nFor those organizations that have been granted a photocopy license, a separate system\\nof payment has been arranged. Authorization does not extend to other kinds of copy-\\ning, such as that for general distribution, for advertising or promotional purposes, for\\ncreating new collective works, or for resale. In the rest of the world: Permission to pho-\\ntocopy must be obtained from the copyright owner. Please apply to now Publishers Inc.,\\nPO Box 1024, Hanover, MA 02339, USA; Tel. +1 781 871 0245; www.nowpublishers.com;\\nsales@nowpublishers.com\\nnow Publishers Inc. has an exclusive license to publish this material worldwide. Permission\\nto use this content must be obtained from the copyright license holder. Please apply to\\nnow Publishers, PO Box 179, 2600 AD Delft, The Netherlands, www.nowpublishers.com;\\ne-mail: sales@nowpublishers.com', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 1}),\n",
       " Document(page_content='Foundations and TrendsR⃝in Signal Processing\\nVolume 9, Issue 1-2, 2015\\nEditorial Board\\nEditor-in-Chief\\nYonina Eldar\\nTechnion - Israel Institute of Technology\\nIsrael\\nEditors\\nRobert M. Gray\\nFounding Editor-in-Chief\\nStanford University\\nPao-Chi Chang\\nNCU, Taiwan\\nPamela Cosman\\nUC San Diego\\nMichelle Eﬀros\\nCaltech\\nYariv Ephraim\\nGMU\\nAlfonso Farina\\nSelex ES\\nSadaoki Furui\\nTokyo Tech\\nGeorgios Giannakis\\nUniversity of Minnesota\\nVivek Goyal\\nBoston University\\nSinan Gunturk\\nCourant Institute\\nChristine Guillemot\\nINRIA\\nRobert W. Heath, Jr.\\nUT AustinSheila Hemami\\nNortheastern University\\nLina Karam\\nArizona State U\\nNick Kingsbury\\nUniversity of Cambridge\\nAlex Kot\\nNTU, Singapore\\nJelena Kovacevic\\nCMU\\nGeert Leus\\nTU Delft\\nJia Li\\nPenn State\\nHenrique Malvar\\nMicrosoft Research\\nB.S. Manjunath\\nUC Santa Barbara\\nUrbashi Mitra\\nUSC\\nBjörn Ottersten\\nKTH Stockholm\\nVincent Poor\\nPrinceton UniversityAnna Scaglione\\nUC Davis\\nMihaela van der Shaar\\nUCLA\\nNicholas D. Sidiropoulos\\nTU Crete\\nMichael Unser\\nEPFL\\nP. P. Vaidyanathan\\nCaltech\\nAmi Wiesel\\nHebrew U\\nMin Wu\\nUniversity of Maryland\\nJosiane Zerubia\\nINRIA', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 2}),\n",
       " Document(page_content='Editorial Scope\\nTopics\\nFoundations and TrendsR⃝in Signal Processing publishes survey and\\ntutorial articles in the following topics:\\n•Adaptive signal processing\\n•Audio signal processing\\n•Biological and biomedical signal\\nprocessing\\n•Complexity in signal processing\\n•Digital signal processing\\n•Distributed and network signal\\nprocessing\\n•Image and video processing\\n•Linear and nonlinear ﬁltering\\n•Multidimensional signal\\nprocessing\\n•Multimodal signal processing\\n•Multirate signal processing\\n•Multiresolution signal processing\\n•Nonlinear signal processing\\n•Randomized algorithms in signal\\nprocessing\\n•Sensor and multiple source signal\\nprocessing, source separation•Signal decompositions, subband\\nand transform methods, sparse\\nrepresentations\\n•Signal processing for\\ncommunications\\n•Signal processing for security and\\nforensic analysis, biometric signal\\nprocessing\\n•Signal quantization, sampling,\\nanalog-to-digital conversion,\\ncoding and compression\\n•Signal reconstruction,\\ndigital-to-analog conversion,\\nenhancement, decoding and\\ninverse problems\\n•Speech/audio/image/video\\ncompression\\n•Speech and spoken language\\nprocessing\\n•Statistical/machine learning\\n•Statistical signal processing\\nInformation for Librarians\\nFoundationsandTrendsR⃝inSignalProcessing,2015,Volume9,4issues.ISSN\\npaper version 1932-8346. ISSN online version 1932-8354. Also available as a\\ncombined paper and online subscription.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 3}),\n",
       " Document(page_content='Foundations and TrendsR⃝in Signal Processing\\nVol. 9, No. 1-2 (2015) 1–231\\nc⃝2016 Y. Feng and D. P. Palomar\\nDOI: 10.1561/2000000072\\nA Signal Processing Perspective on Financial\\nEngineering\\nYiyong Feng\\nDept. of Electronic and Computer Engineering\\nThe Hong Kong University of Science and Technology\\nClear Water Bay, Kowloon\\nHong Kong\\nyiyong@connect.ust.hk\\nDaniel P. Palomar\\nDept. of Electronic and Computer Engineering\\nThe Hong Kong University of Science and Technology\\nClear Water Bay, Kowloon\\nHong Kong\\npalomar@ust.hk', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 4}),\n",
       " Document(page_content='Contents\\n1 Introduction 2\\n1.1 A Signal Processing Perspective on Financial Engineering . 5\\n1.2 Connections between Fin. Eng. and Signal Process. . . . . 9\\n1.3 Outline . . . . . . . . . . . . . . . . . . . . . . . . . . . . 12\\nI Financial Modeling & Order Execution 16\\n2 Modeling of Financial Time Series 17\\n2.1 Asset Returns . . . . . . . . . . . . . . . . . . . . . . . . 18\\n2.2 General Structure of a Model . . . . . . . . . . . . . . . . 21\\n2.3 I.I.D. Model . . . . . . . . . . . . . . . . . . . . . . . . . 22\\n2.4 Factor Model . . . . . . . . . . . . . . . . . . . . . . . . 23\\n2.5 VARMA Model . . . . . . . . . . . . . . . . . . . . . . . 27\\n2.6 VECM . . . . . . . . . . . . . . . . . . . . . . . . . . . . 31\\n2.7 Conditional Volatility Models . . . . . . . . . . . . . . . . 34\\n2.8 Summary of Diﬀerent Models and Their Limitations . . . . 42\\n3 Modeling Fitting: Mean and Covariance Matrix Estimators 47\\n3.1 Fitting Process, Types of Estimators, and Main Focus . . . 47\\n3.2 Warm Up: Large Sample Regime . . . . . . . . . . . . . . 50\\n3.3 Small Sample Regime: Shrinkage Estimators . . . . . . . . 59\\nii', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 5}),\n",
       " Document(page_content='iii\\n3.4 Heavy Tail Issue: Robust Estimators . . . . . . . . . . . . 70\\n3.5 Small Sample Regime & Heavy Tail Issue . . . . . . . . . 74\\n3.6 Summary of Diﬀerent Estimators . . . . . . . . . . . . . . 83\\n4 Order Execution 85\\n4.1 Limit Order Book and Market Impact . . . . . . . . . . . 85\\n4.2 Price Model and Execution Cost . . . . . . . . . . . . . . 91\\n4.3 Minimizing Expected Execution Cost . . . . . . . . . . . . 94\\n4.4 Minimizing Mean-Variance Trade-oﬀ of Execution Cost . . 94\\n4.5 Minimizing CVaR of Execution Cost . . . . . . . . . . . . 95\\nII Portfolio Optimization (Risk-Return Trade-oﬀ) 101\\n5 Portfolio Optimization with Known Parameters 102\\n5.1 Markowitz Mean-Variance Portfolio Optimization . . . . . 103\\n5.2 Drawbacks of Markowitz Framework . . . . . . . . . . . . 111\\n5.3 Black-Litterman Model . . . . . . . . . . . . . . . . . . . 114\\n6 Robust Portfolio Optimization 120\\n6.1 Robust Mean-Variance Trade-oﬀ Portfolio Optimization . . 121\\n6.2 Robust Sharpe ratio Optimization . . . . . . . . . . . . . 128\\n6.3 Connections with Robust Beamforming . . . . . . . . . . . 131\\n7 Multi-Portfolio Optimization 135\\n7.1 From Single-Portfolio to Multi-Portfolio . . . . . . . . . . 136\\n7.2 Multi-Portfolio Problems . . . . . . . . . . . . . . . . . . 139\\n7.3 Eﬃcient Solving Methods . . . . . . . . . . . . . . . . . . 142\\n8 Index Tracking 148\\n8.1 Diﬀerent Index Tracking Methods . . . . . . . . . . . . . 149\\n8.2 Sparse Index Tracking: Two-Step Approach . . . . . . . . 151\\n8.3 Sparse Index Tracking: Joint Optimization Approach . . . 154\\n9 Risk Parity Portfolio Optimization 161\\n9.1 What is a Risk Parity Portfolio? . . . . . . . . . . . . . . . 162\\n9.2 Risk Parity Portfolio Formulations . . . . . . . . . . . . . 165', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 6}),\n",
       " Document(page_content='iv\\n9.3 SCRIP: An Eﬃcient Numerical Solving Approach . . . . . 169\\nIII Statistical Arbitrage (Mean-Reversion) 172\\n10 Statistical Arbitrage 173\\n10.1 Cointegration versus Correlation . . . . . . . . . . . . . . 174\\n10.2 Pairs Selection . . . . . . . . . . . . . . . . . . . . . . . . 181\\n10.3 Cointegration Test . . . . . . . . . . . . . . . . . . . . . . 184\\n10.4 Investing in Cointegrated Pairs . . . . . . . . . . . . . . . 192\\n10.5 From Pairs Trading to Statistical Arbitrage . . . . . . . . 198\\n11 Conclusions 201\\nAppendices 203\\nA MATLAB Code of Example 3.1 204\\nB MATLAB Code of Figure 5.1 207\\nC MATLAB Code of Example 10.4 209\\nAbbreviations 211\\nNotation 213\\nReferences 216', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 7}),\n",
       " Document(page_content='Abstract\\nFinancial engineering and electrical engineering are seemingly diﬀer-\\nent areas that share strong underlying connections. Both areas rely\\non statistical analysis and modeling of systems; either modeling the\\nﬁnancial markets or modeling, say, wireless communication channels.\\nHaving a model of reality allows us to make predictions and to optimize\\nthe strategies. It is as important to optimize our investment strategies\\nin a ﬁnancial market as it is to optimize the signal transmitted by an\\nantenna in a wireless link.\\nThis monograph provides a survey of ﬁnancial engineering from a\\nsignal processing perspective, that is, it reviews ﬁnancial modeling, the\\ndesign of quantitative investment strategies, and order execution with\\ncomparison to seemingly diﬀerent problems in signal processing and\\ncommunication systems, such as signal modeling, ﬁlter/beamforming\\ndesign, network scheduling, and power allocation.\\nY. Feng and D. P. Palomar. A Signal Processing Perspective on Financial\\nEngineering . Foundations and TrendsR⃝in Signal Processing, vol. 9, no. 1-2,\\npp. 1–231, 2015.\\nDOI: 10.1561/2000000072.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 8}),\n",
       " Document(page_content='1\\nIntroduction\\nDespitethediﬀerentnaturesofﬁnancialengineeringandelectricalengi-\\nneering, both areas are intimately connected on a mathematical level.\\nThe foundations of ﬁnancial engineering lie on the statistical analy-\\nsis of numerical time series and the modeling of the behavior of the\\nﬁnancial markets in order to perform predictions and systematically\\noptimize investment strategies. Similarly, the foundations of electrical\\nengineering, for instance, wireless communication systems, lie on statis-\\ntical signal processing and the modeling of communication channels in\\norder to perform predictions and systematically optimize transmission\\nstrategies. Both foundations are the same in disguise.\\nThisobservationimmediatelypromptsthequestionofwhetherboth\\nareas can beneﬁt from each other. It is often the case in science that the\\nsame or very similar methodologies are developed and applied indepen-\\ndently in diﬀerent areas. The purpose of this monograph is to explore\\nsuch connections and to capitalize on the existing mathematical tools\\ndeveloped in wireless communications and signal processing to solve\\nreal-life problems arising in the ﬁnancial markets in an unprecedented\\nway.\\n2', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 9}),\n",
       " Document(page_content='3\\nThus,thismonographisaboutinvestmentinﬁnancialassetstreated\\nas a signal processing and optimization problem. An investment is the\\ncurrent commitment of resources in the expectation of reaping future\\nbeneﬁts. In ﬁnancial markets, such resources usually take the form of\\nmoney and thus the investment is the present commitment of money\\nin order to reap (hopefully more) money later [27]. The carriers of\\nmoney in ﬁnancial markets are usually referred to as ﬁnancial assets.\\nThere are various classes of ﬁnancial assets, namely, equity securities\\n(e.g., common stocks), exchange-traded funds (ETFs), market indexes,\\ncommodities, exchanges rates, ﬁxed-income securities, derivatives (e.g.,\\noptions and futures), etc. A detailed description of each kind of asset\\nis well documented, e.g., [27, 103]. For diﬀerent kinds of assets, the key\\nquantitiesofinterestarenotthesame;forexample,forequitysecurities\\nthe quantities of interest are the compounded returns or log-returns;\\nfor ﬁxed-income securities they are the changes in yield to maturity;\\nand for options they are changes in the rolling at-the-money forward\\nimplied volatility [143].\\nRoughly speaking, there are three families of investment philoso-\\nphies: fundamental analysis, technical analysis, and quantitative analy-\\nsis. Fundamental analysis uses ﬁnancial and economical measures, such\\nas earnings, dividend yields, expectations of future interest rates, and\\nmanagement, to determine the value of each share of the company’s\\nstocks and then recommends purchasing the stocks if the estimated\\nvalue exceeds the current stock price [88, 89]. Warren Buﬀett of Berk-\\nshire Hathaway is probably the most famous practitioner of fundamen-\\ntal analysis [91]. Technical analysis, also known as “charting,” is essen-\\ntially the search for patterns in one dimensional charts of the prices of a\\nstock.Inaway,itpretendstobeascientiﬁcanalysisofpatterns(similar\\nto machine learning) but generally implemented in an unscientiﬁc and\\nanecdotal way with a low predictive power, as detailed in [132]. Quanti-\\ntative analysis applies quantitative (namely scientiﬁc or mathematical)\\ntools to discover the predictive patterns from ﬁnancial data [128]. To\\nput this in perspective with the previous approach, technical analysis\\nis to quantitative analysis what astrology is to astronomy. The pioneer\\nof the quantitative investment approach is Edward O. Thorp, who used', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 10}),\n",
       " Document(page_content='4 Introduction\\nhis knowledge of probability and statistics in the stock markets and has\\nmade a signiﬁcant fortune since the late 1960s [193]. Quantitative anal-\\nysis has become more and more widely used since advanced computer\\nscience technology has enabled practitioners to apply complex quan-\\ntitative techniques to reap many more rewards more eﬃciently and\\nmore frequently in practice [4]. In fact, one could even go further to\\nsay that algorithmic trading has been one of the main driving forces in\\nthe technological advancement of computers. Some institutional hedge\\nfund ﬁrms that rely on quantitative analysis include Renaissance Tech-\\nnologies, AQR Capital, Winton Capital Management, and D. E. Shaw\\n& Co., to name a few.\\nIn this monograph, we will focus on the quantitative analysis of eq-\\nuity securities since they are the simplest and easiest accessible assets.\\nAs we will discover, many quantitative techniques employed in signal\\nprocessing methods may be applicable in quantitative investment. Nev-\\nertheless, the discussion in this monograph can be easily extended to\\nsome other tradeable assets such as commodities, ETFs, and futures.\\nThus, to explore the multiple connections between quantitative in-\\nvestment in ﬁnancial engineering and areas in signal processing and\\ncommunications, we will show how to capitalize on existing mathemat-\\nical tools and methodologies that have been developed and are widely\\napplied in the context of signal processing applications to solve prob-\\nlems in the ﬁeld of portfolio optimization and investment management\\nin quantitative ﬁnance. In particular, we will explore ﬁnancial engineer-\\ning in several respects: i) we will provide the fundamentals of market\\ndata modeling and asset return predictability, as well as outline state-\\nof-the-art methodologies for the estimation and forecasting of portfolio\\ndesign parameters in realistic, non-frictionless ﬁnancial markets; ii) we\\nwill present the problem of optimal portfolio construction, elaborate\\non advanced optimization issues, and make the connections between\\nportfolio optimization and ﬁlter/beamforming design in signal process-\\ning; iii) we will reveal the theoretical mechanisms underlying the design\\nand evaluation of statistical arbitrage trading strategies from a signal\\nprocessing perspective based on multivariate data analysis and time\\nseries modeling; and iv) we will discuss the optimal order execution', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 11}),\n",
       " Document(page_content='1.1. A Signal Processing Perspective on Financial Engineering 5\\nand compare it with network scheduling in sensor networks and power\\nallocation in communication systems.\\nWe hope this monograph can provide more straightforward and sys-\\ntematic access to ﬁnancial engineering for researchers in signal process-\\ning and communication societies1so that they can understand prob-\\nlems in ﬁnancial engineering more easily and may even apply signal\\nprocessing techniques to handle ﬁnancial problems.\\nIn the following content of this introduction, we ﬁrst introduce ﬁ-\\nnancial engineering from a signal processing perspective and then make\\nconnectionsbetweenproblemsarisinginﬁnancialengineeringandthose\\narising in diﬀerent areas of signal processing and communication sys-\\ntems. At the end, the outline of the monograph is detailed.\\n1.1 A Signal Processing Perspective on Financial Engineer-\\ning\\nFigure 1.1 summarizes the procedure of quantitative investment.\\nRoughly speaking and oversimplifying, there are three main steps\\n(shown in Figure 1.1):\\n•ﬁnancial modeling: modeling a very noisy ﬁnancial time series to\\ndecompose it into trend and noise components;\\n•portfolio design: designing quantitative investment strategies\\nbased on the estimated ﬁnancial models to optimize some pre-\\nferred criterion; and\\n•order execution: properly executing the orders to establish or un-\\nwind positions of the designed portfolio in an optimal way.\\nIn the following, we will further elaborate the above three steps from\\na signal processing perspective.\\n1There have been some initiatives in Signal Processing journals on the ﬁnancial\\nengineering topic, namely, the 2011 IEEE Signal Processing Magazine - Special Issue\\non Signal Processing for Financial Applications, the 2012 IEEE Journal of Selected\\nTopics in Sginal Processing - Special Issue on Signal Processing Methods in Finance\\nand Electronic Trading, and the 2016 IEEE Journal of Selected Topics in Signal\\nProcessing - Special Issue on Financial Signal Processing and Machine Learning for\\nElectronic Trading.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 12}),\n",
       " Document(page_content='6 Introduction\\nFinancial Modeling\\nPortfolio Optimization\\n(Risk-Return Trade-Off)\\nOrder ExecutionStatistical Arbitrage\\n(Mean-Reversion)Investment Strategies\\nFigure 1.1: Block diagram of quantitative investment in ﬁnancial engineering.\\n1.1.1 Financial Modeling\\nFor equity securities, the log-prices (i.e., the logarithm of the prices)\\nand the compounded returns or log-returns (i.e., the diﬀerences of the\\nlog-prices) are the quantities of interest. From a signal processing per-\\nspective, a log-price sequence can be decomposed into two parts: trend\\nandnoisecomponents,whicharealsoreferredtoasmarketandidiosyn-\\ncratic components, respectively. The purpose of ﬁnancial modeling or\\nsignal modeling is to decompose the trend components from the noisy\\nﬁnancial series. Then based on the constructed ﬁnancial models, one\\ncan properly design some quantitative investment strategies for future\\nbeneﬁts [196, 129, 143].\\nFor instance, a simple and popular ﬁnancial model of the log-price\\nseries is the following random walk with drift:\\nyt=µ+yt−1+wt, (1.1)\\nwhereytis the log-price at discrete-time t,{wt}is a zero-mean white\\nnoise series, and the constant term µrepresents the time trend of the', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 13}),\n",
       " Document(page_content='1.1. A Signal Processing Perspective on Financial Engineering 7\\nJan−10 Jan−11 Jan−12 Jan−13 Jan−14 Jan−15−0.3−0.2−0.100.10.20.30.40.50.60.7Log−priceS&P 500\\n  \\nLog−price\\nTrend\\nAccum. noise\\nFigure 1.2: The decomposition of the log-price sequence of the S&P 500 Index into\\ntimetrendcomponent,andthecomponentwithouttimetrend(i.e.,theaccumulative\\nnoise).\\nlog-priceytsince E[yt−yt−1] =µ, which is usually referred to as drift.\\nBased on model (1.1), we can see the trend signal and noise com-\\nponents in the log-prices more clearly by rewriting ytas follows:\\nyt=µt+y0+t\\uf8fa\\ni=1wi, (1.2)\\nwhere the term µtdenotes the trend (e.g., uptrend if µ>0, downtrend\\nifµ < 0, or no trend if µ= 0), and the term∑t\\ni=1widenotes the\\naccumulative noise as time evolves.\\nFigure 1.2 shows the weekly log-prices of the S&P 500 index from\\n04-Jan-2010 to 04-Feb-2015 (the log-prices are shifted down so that\\nthe initial log-price is zero, i.e., y0= 0), where the estimated drift is\\nµ= 0.0022. Obviously, we observe two patterns: ﬁrst, there exists a\\nsigniﬁcant uptrend since 2010 in the US market (see the dashed red', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 14}),\n",
       " Document(page_content='8 Introduction\\nlineµt); and second, the accumulative noise in the log-prices is not\\nsteady and looks like a random walk (see the solid gray line for the\\naccumulative noise∑t\\ni=1wi=yt−µt).\\n1.1.2 Quantitative Investment\\nOnce the speciﬁc ﬁnancial model is calibrated from the ﬁnancial time\\nseries, the next question is how to utilize such a calibrated ﬁnancial\\nmodel to invest. As mentioned before, one widely employed approach\\nis to apply quantitative techniques to design the investment strategies,\\ni.e., the quantitative investment [65, 128, 64, 143].\\nFigure 1.2 shows that there are two main components in a ﬁnan-\\ncial series: trend and noise. Correspondingly, there are two main types\\nof quantitative investment strategies based on the two components: a\\ntrend-based approach, termed risk-return trade-oﬀ investment; and a\\nnoise-based approach, termed mean-reversion investment.\\nThe trend-based risk-return trade-oﬀ investment tends to maximize\\nthe expected portfolio return while keeping the risk low; however, this\\nis easier said than done because of the sensitivity to the imperfect\\nestimation of the drift component and the covariance matrix of the\\nnoise component of multiple assets. In practice, one needs to consider\\nthe parameter estimation errors in the problem formulation to design\\nthe portfolio in a robust way. Traditionally, the variance of the portfolio\\nreturn is taken as a measure of risk, and the method is thus referred\\nto as “mean-variance portfolio optimization” in the ﬁnancial literature\\n[135, 137, 138]. From the signal processing perspective, interestingly,\\nthe design of a mean-variance portfolio is mathematically identical to\\nthe design of a ﬁlter in signal processing or the design of beamforming\\nin wireless multi-antenna communication systems [123, 149, 213].\\nThe noise-based mean-reversion investment aims at seeking prof-\\nitability based on the noise component. For clarity of presentation, let\\nus use a simple example of only two stocks to illustrate the rough idea.\\nSuppose the log-price sequences of the two stocks are cointegrated (i.e.,\\nthey share the same stochastic drift), at some point in time if one stock\\nmoves up while the other moves down, then people can short-sell the\\nﬁrst overperforming stock and long/buy the second underperforming', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 15}),\n",
       " Document(page_content='1.2. Connections between Fin. Eng. and Signal Process. 9\\nstock2, betting that the deviation between the two stocks will eventu-\\nally diminish. This idea can be generalized from only two stocks to a\\nlarger number of stocks to create more proﬁtable opportunities. This\\ntype of quantitative investment is often referred to as “pairs trading”,\\nor more generally, “statistical arbitrage” in the literature [160, 203].\\n1.1.3 Order Execution\\nIdeally, after one has made a prediction and designed a portfolio, the\\nexecutionshouldbeaseamlesspartoftheprocess.However,inpractice,\\nthe process of executing the orders aﬀects the original predictions in\\nthe wrong way, i.e., the achieved prices of the executed orders will\\nbe worse than what they should have been. This detrimental eﬀect\\nis called market impact. Since it has been shown that smaller orders\\nhave a much smaller market impact, a natural idea to execute a large\\norder is to partition it into many small pieces and then execute them\\nsequentially [8, 18, 78, 146].\\nInterestingly, the order execution problem is close to many other\\nscheduling and optimization problems in signal processing and com-\\nmunication systems. From a dynamic control point of view, the order\\nexecutionproblemisquitesimilartosensorschedulingindynamicwire-\\nlesssensornetworks[180,181,208].Fromanoptimizationpointofview,\\ndistributing a large order into many smaller sized orders over a certain\\ntime window [8, 79] corresponds to allocating total power over diﬀer-\\nent communication channels in broadcasting networks [198] or wireless\\nsensor networks [214].\\n1.2 Connections between Financial Engineering and Areas in\\nSignal Processing and Communication Systems\\nWe have already brieﬂy introduced the main components of ﬁnancial\\nengineering from a signal processing perspective. In the following we\\nmake several speciﬁc connections between ﬁnancial engineering and\\nareas in signal processing and communication systems.\\n2In ﬁnancial engineering, to “long” means simply to buy ﬁnancial instruments,\\nto “short-sell” (or simply, to “short”) means to sell ﬁnancial instruments that are\\nnot currently owned.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 16}),\n",
       " Document(page_content='10 Introduction\\nModeling. Oneofthemostpopularmodelsusedinﬁnancialengineer-\\ning is the autoregressive moving average (ARMA) model. It models the\\ncurrent observation (e.g., today’s return) as the weighted summation\\nof a linear combination of previous observations (e.g., several previous\\ndays’ returns) and a moving average of the current and several previ-\\nous noise components [196]. Actually, this model is also widely used\\nin signal processing and it is referred to as a rational model because\\nitsz-transform is a rational function, or as a pole-zero model because\\nthe roots of the numerator polynomial of the z-transform are known as\\nzeros and the roots of the denominator polynomial of the z-transform\\nare known as poles [133].\\nRobust Covariance Matrix Estimation. After a speciﬁc model has\\nbeen selected, the next step is to estimate or calibrate its parameters\\nfrom the empirical data. In general, a critical parameter to be esti-\\nmatedisthecovariancematrixofthereturnsofmultiplestocks.Usually\\nthe empirical data contains noise and some robust estimation methods\\nare needed in practice. One popular idea in ﬁnancial engineering is\\nto shrink the sample covariance matrix to the identity matrix as the\\nrobust covariance matrix estimator [120]. Interestingly, this is mathe-\\nmaticallythesameasthediagonalloadingmatrix(i.e.,theadditionofa\\nscaled identity matrix to the sample interference-plus-noise covariance\\nmatrix) derived more than thirty years ago for robust adaptive beam-\\nformingin signalprocessing andcommunication systems[1, 38,45]. For\\nlarge-dimensional data, the asymptotic performance of the covariance\\nmatrix estimators is important. The mathematical tool for the asymp-\\ntotic analysis is referred to as general asymptotics or large-dimensional\\ngeneral asymptotics in ﬁnancial engineering [121, 122], or as random\\nmatrix theory (RMT) in information theory and communications [199].\\nPortfolioOptimizationvsFilter/BeamformingDesign. Onepopular\\nportfolio optimization problem is the minimum variance problem:\\nminimizewwTΣw\\nsubject to wT1= 1,(1.3)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 17}),\n",
       " Document(page_content='1.2. Connections between Fin. Eng. and Signal Process. 11\\nwhere w∈RNis the portfolio vector variable representing the nor-\\nmalized dollars invested in Nstocks, wT1= 1is the capital budget\\nconstraint, and Σ∈RN×Nis the (estimated in advance) positive deﬁ-\\nnite covariance matrix of the stock returns.\\nThe above problem (1.3) is really mathematically identical to the\\nﬁlter/beamforming design problem in signal processing [149]:\\nminimizewwHRw\\nsubject to wHa= 1,(1.4)\\nwhere w∈CNis the complex beamforming vector variable denoting\\nthe weights of Narray observations and a∈CNandR∈CN×N(es-\\ntimated in advance) are the signal steering vector (also known as the\\ntransmission channel) and the positive deﬁnite interference-plus-noise\\ncovariance matrix, respectively. The similarity between problems (1.3)\\nand(1.4)showssomepotentialconnectionsbetweenportfoliooptimiza-\\ntion and ﬁlter/beamforming design, and we will explore more related\\nformulations in detail later in the monograph.\\nIndex Tracking vs Sparse Signal Recovery. Index tracing is a widely\\nused quantitative investment that aims at mimicking the market index\\nbut with much fewer stocks. That is, suppose that a benchmark index\\nis composed of Nstocks and let rb= [rb\\n1,...,rb\\nT]T∈RTandX=\\n[r1,...,rT]T∈RT×Ndenote the returns of the benchmark index and\\ntheNstocks in the past Tdays, respectively, index tracking intends\\nto ﬁnd a sparse portfolio wto minimize the tracking error between the\\ntracking portfolio and benchmark index [106]:\\nminimizew1\\nT∥Xw−rb∥2\\n2+λ∥w∥0\\nsubject to 1Tw= 1,w≥0,(1.5)\\nwhereλ≥0is a predeﬁned trade-oﬀ parameter.\\nMathematically speaking, the above problem (1.5) is identical to\\nthe sparse signal recovery problem [37] and compressive sensing [51] in\\nsignal processing:\\nminimizew1\\nT∥Φw−y∥2\\n2+λ∥w∥0 (1.6)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 18}),\n",
       " Document(page_content='12 Introduction\\nTable 1.1: Connections between ﬁnancial engineering and signal processing.\\nFinancial Engineer-\\ningSignal Processing\\nModeling ARMA model [196]rational or pole-zero\\nmodel [133]\\nCovariance\\nMatrix\\nEstimationshrinkage sample co-\\nvariance matrix estima-\\ntor [120]diagonal loading in\\nbeamforming [1, 38, 45]\\nAsymptotic\\nAnalysis(large-dimensional)\\ngeneral asymptotics\\n[121, 122]random matrix theory\\n[199]\\nOptimizationportfolio optimization\\n[135, 137, 179, 213]ﬁlter/beamforming de-\\nsign [149, 213]\\nSparsity index tracking [106]sparse signal recovery\\n[37, 51]\\nwhereλ≥0is a predeﬁned trade-oﬀ parameter, Φ∈RT×Nis a dic-\\ntionary matrix with T≪N,y∈RTis a measurement vector, and\\nw∈RNis a sparse signal to be recovered. Again, the similarity be-\\ntween the two problems (1.5) and (1.6) shows that the quantitative\\ntechniques dealing with sparsity may be useful for both index tracking\\nand sparse signal recovery.\\nTable 1.1 summarizes the above comparisons in a more compact\\nway and it is interesting to see so many similarities and connections\\nbetween ﬁnancial engineering and signal processing.\\n1.3 Outline\\nThe abbreviations and notations used throughout the monograph are\\nprovided on pages 211 and 213, respectively.\\nFigure 1.3 shows the outline of the monograph and provides the\\nrecommended reading order for the reader’s convenience. The detailed\\norganization is as follows.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 19}),\n",
       " Document(page_content='1.3. Outline 13\\nPart I mainly focuses on ﬁnancial modeling (Chapters 2 and 3) and\\norder execution (Chapter 4).\\nChapter 2 starts with some basic ﬁnancial concepts and then in-\\ntroduces several models, such as the i.i.d. model, factor model, ARMA\\nmodel, autoregressive conditional heteroskedasticity (ARCH) model,\\ngeneralized ARCH (GARCH) model, and vector error correction model\\n(VECM), which will be used in the later chapters. Thus, this chapter\\nprovides a foundation for the following chapters in the monograph.\\nChapter 3 deals with the model parameter estimation issues. In\\nparticular, it focuses on the estimation of the mean vector and the co-\\nvariance matrix of the returns of multiple stocks. Usually, these two\\nparameters are not easy to estimate in practice, especially under two\\nscenarios: when the number of samples is small, and when there exists\\noutliers. This chapter reviews the start-of-the-art robust estimation of\\nthe mean vector and the covariance matrix from both ﬁnancial engi-\\nneering and signal processing.\\nChapter 4 formulates the order execution as optimization problems\\nand presents the eﬃcient solving approaches.\\nOnce ﬁnancial modeling and order execution have been introduced\\nin Part I, we move to the design of quantitative investment strate-\\ngies. As shown in Figure 1.1 there are two main types of investment\\nstrategies,namelyrisk-returntrade-oﬀinvestmentstrategiesandmean-\\nreversion investment strategies, which are documented in Parts II and\\nIII, respectively.\\nPart II entitled “Portfolio Optimization” focuses on the risk-return\\ntrade-oﬀ investment. It contains Chapters 5-9 and is organized as fol-\\nlows.\\nChapter 5 reviews the most basic Markowitz mean-variance portfo-\\nlio framework, that is, the objective is to optimize a trade-oﬀ between\\nthe mean and the variance of the portfolio return. However, this frame-\\nwork is not practical due to two reasons: ﬁrst, the optimized strategy\\nis extremely sensitive to the estimated mean vector and covariance\\nmatrix of the stock returns; and second, the variance is not an ap-\\npropriate risk measurement in ﬁnancial engineering. To overcome the\\nsecond drawback, some more practical single side risk measurements,', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 20}),\n",
       " Document(page_content='14 Introduction\\ne.g.,Value-at-Risk(VaR)andConditionalVaR(CVaR),areintroduced\\nas the alternatives to the variance.\\nChapter 6 presents the robust portfolio optimization to deal with\\nparameter estimation errors. The idea is to employ diﬀerent uncer-\\ntainty sets to characterize diﬀerent estimation errors and then derive\\nthe corresponding worst-case robust formulations.\\nChapter 7, diﬀerent from previous Chapters 5 and 6 that consider\\neach portfolio individually, designs multiple portfolios corresponding to\\ndiﬀerent clients jointly via a game theoretic approach by modeling a\\nﬁnancial market as a game and each portfolio as a player in the game.\\nThis approach is important in practice because multiple investment\\ndecisions may aﬀect each other.\\nChapter 8 considers a passive quantitative investment method\\nnamed index tracking. It aims at designing a portfolio that mimics a\\npreferred benchmark index as closely as possible but with much fewer\\ninstruments.\\nChapter 9 considers a newly developed approach to the portfolio\\ndesign aiming at diversifying the risk, instead of diversifying the capital\\nas usually done, among the available assets, which is called a “risk\\nparity portfolio” in the literature.\\nPart III, containing Chapter 10, explores the mean-reversion in-\\nvestment that utilizes the noise component in the log-price sequences\\nof multiple assets.\\nChapter 10 introduces the idea of constructing a pair of two stocks\\nvia cointegration and optimizes the threshold for trading to achieve a\\npreferred criterion. Then it extends further from pairs trading based\\non only two stocks to statistical arbitrage for multiple stocks.\\nAfter covering the main content of the three parts, Chapter 11\\nconcludes the monograph.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 21}),\n",
       " Document(page_content='1.3. Outline 15\\nChapter 1:\\n“Introduction ”\\nChapter 2:\\n“Basic Models ”Chapter 3:\\n“Mean/Cov \\nEstimaton ”\\nChapter 4:\\n“Order \\nExecution ”\\nChapter 5:\\n“MV Portfolio ”\\nChapter 9:\\n“Risk Parity \\nPortfolio”Chapter 7:\\n“Multiple \\nPortfolio”\\nChapter 8:\\n“Index \\nTracking ”Chapter 10:\\n“Statistical \\nArbitrage ”\\nChapter 11:\\n“Conclusion ”Part I:\\nFinancial \\nModeling\\nPart II:\\nPortfolio\\nOptimization\\nRecommended reading orderPart III:\\nStatistical\\nArbitrageChapter 6:\\n“Robust Portfolio \\nOptimization ”\\nFigure 1.3: Outline of the monograph.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 22}),\n",
       " Document(page_content='Part I\\nFinancial Modeling &\\nOrder Execution', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 23}),\n",
       " Document(page_content='2\\nModeling of Financial Time Series\\nModeling of ﬁnancial time series provides the quantitative tools to ex-\\ntract useful (or predictable) information for future investments. There\\nare two main philosophies of modeling like then are in signal process-\\ning and control theory [98]: continuous-time and discrete-time systems.\\nContinuous-time modeling, using the Black-Scholes model, for exam-\\nple, involves stochastic calculus and concepts like the Brownian motion\\nthat are at the core of many fundamental results. For computational\\npurposes, however, discrete-time modeling is more convenient. In addi-\\ntion, practical investment strategies are usually naturally discretized,\\ni.e., daily or monthly investments.\\nTherefore, this chapter focuses on discrete-time modeling of ﬁnan-\\ncial time series, i.e., the interested time series quantities (mainly the\\nlog-returns) of some interested assets (say Nassets) given the past\\ninformation (i.e., the past log-returns of the Nassets).\\nThe detailed organization is as follows. Section 2.1 starts with some\\nbasic ﬁnancial concepts, i.e., prices and returns. Then Section 2.2 in-\\ntroduces the general structure of modeling and Sections 2.3-2.7 explain\\nseveralspeciﬁcmodels,suchasthei.i.d.model,factormodel,vectorau-\\ntoregressive moving average (VARMA) model, vector error correction\\n17', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 24}),\n",
       " Document(page_content='18 Modeling of Financial Time Series\\nmodel (VECM), autoregressive conditional heteroskedasticity (ARCH)\\nmodel, generalized ARCH (GARCH) model, and multivariate ARCH\\nand GARCH models, which will be used in the later chapters. At the\\nend, Section 2.8 summarizes all the models brieﬂy.\\nThis chapter focuses on the models themselves but leaves the ﬁtting\\nof the models with real data or parameter estimation to Chapter 3. All\\nthe models are introduced in their vector/multivariate cases.\\n2.1 Asset Returns\\nFor simplicity, let us focus on a single asset. Let ptbe the price of an\\nasset at (discrete) time index t.\\n2.1.1 Returns Based on Prices\\nSuppose the asset pays no dividends1, the simple return (a.k.a. linear\\nreturn or net return) over one interval from time t−1totis\\nRt≜pt−pt−1\\npt−1=pt\\npt−1−1. (2.1)\\nThe numerator pt−pt−1is the proﬁt (or the loss in case of a negative\\nproﬁt)duringtheholdingperiodandthedenominator pt−1istheinitial\\ninvestment at time t−1. Thus the simple return can be regarded as\\nthe proﬁt rate.\\nThen the quantity\\nRt+ 1 =pt\\npt−1(2.2)\\ndenotes the ratio between the end capital and the initial investment,\\nthus it is referred to as total return or gross return.\\nBased on the above deﬁnitions for only one investment period, the\\ngross return on the most recent kperiods is the product of the past k\\nsingle period gross returns\\n1 +Rt(k) =pt\\npt−k=pt\\npt−1×pt−1\\npt−2×···×pt−k+1\\npt−k\\n= (1 +Rt)×···× (1 +Rt−k+1),(2.3)\\n1If there exists dividend dtat timet, then the simple return in (2.1) can be\\nadjusted as Rt=pt−pt−1+dt\\npt−1.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 25}),\n",
       " Document(page_content='2.1. Asset Returns 19\\nand the corresponding net return is\\nRt(k) =pt\\npt−k−1. (2.4)\\n2.1.2 Returns Based on Log-prices\\nThe log-return (a.k.a. continuously compounded return) at time tis\\ndeﬁned as follows:\\nrt≜log(1 +Rt) = logpt\\npt−1=yt−yt−1, (2.5)\\nwhereyt≜logptis the log-price and log denotes the natural logarithm.\\nSince the function f(x) = log(1 + x)has the ﬁrst order Taylor\\napproximation f(x) = log(1 + x)≈xat point 0, we can see rt=\\nlog(1 +Rt)is approximately equal to the net return Rtin (2.1), i.e.,\\nrt≈Rt, especially when Rtis small around zero (which is the case for\\nthe usual intervals).\\nThe log-return on the most recent kperiods is\\nrt(k)≜log(1 +Rt(k)) = log[(1 + Rt)×···× (1 +Rt−k+1)]\\n= log(1 +Rt) + log(1 + Rt−1) +···+ log(1 +Rt−k+1)\\n=rt+rt−1+···+rt−k+1,(2.6)\\nwhich has a nice additive property over periods (recall that the linear\\nmulti-period net return Rt(k)in (2.4) does not have such a property).\\n2.1.3 Portfolio Returns\\nFor a portfolio composing of Nassets, let w∈RNbe a vector with wi\\ndenoting normalized capital invested into the i-th asset. Then the net\\nreturn of the portfolio over a single period tisRp\\nt=∑N\\ni=1wiRitwhere\\nRitis the net return of the i-th asset.\\nThe log-return of a portfolio, however, does not have the above ad-\\nditivity property. If the simple returns Ritare all small in magnitude,\\ntheycanbeapproximatedbythelog-returns ritandtheportfolionetre-\\nturn can be approximated as Rp\\nt=∑N\\ni=1wiRit≈∑N\\ni=1wirit. However,\\nwhen some Ritare signiﬁcantly diﬀerent from zero, using∑N\\ni=1wiritto\\napproximate∑N\\ni=1wiRitmay introduce some serious errors [144].', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 26}),\n",
       " Document(page_content='20 Modeling of Financial Time Series\\nModels for Financial \\nTime SeriesAdditivity Over Periods Log-Returns\\nPortfolio OptimizationAdditivity Over Assets Simple Returns\\nFigure 2.1: Simple returns versus log-returns.\\n2.1.4 Comparisons: Simple Returns versus Log-returns\\nFigure 2.1 provides a summary of the comparisons between simple re-\\nturns and log-returns.\\nFirst, the simple returns have the advantage of additivity over as-\\nsets. Because of that, it is the simple returns that will be used in port-\\nfolio optimization later in Part II.\\nSecond, the log-returns have the advantage of additivity over assets\\nperiods. This makes the distribution of the log-returns in the future\\neasier to compute and predict.\\nThird, the statistical properties of the log-returns are relatively\\nmore tractable. For example, from (2.1) we can see that simple returns\\nare highly asymmetric because they are bounded below by −1and un-\\nbounded above. Instead, the log-returns are relatively more symmetric\\nand this makes the corresponding distributions easier to model.\\nIt is the additivity over periods and statistical simplicity that are\\nneeded for modeling purposes and thus we focus on the log-returns in\\nthis chapter. However, as shown in Figure 2.1, either simple returns or\\nlog-returns should be used depending on the investor’s speciﬁc goal.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 27}),\n",
       " Document(page_content='2.2. General Structure of a Model 21\\n2.2 General Structure of a Model\\nMost of the existing ﬁnancial time series models aim at modeling the\\nlog-returns of Nassets jointly denoted by rt∈RN. In particular, they\\nmodel the log-returns at time tbased on the previous historical data\\ndenoted byFt−1. However, modeling an N-dimensional random vari-\\nable may be a daunting task not just because of the estimation aspect\\nbut also the storage issue. For this reason, most models simplify the\\ntask by modeling only the mean and covariance matrix.\\nConditional onFt−1, we can decompose rt∈RNas follows:\\nrt=µt+wt, (2.7)\\nwhereµtis the conditional mean\\nµt=E[rt|Ft−1] (2.8)\\nandwtis a white noise with zero mean and conditional covariance\\nΣt=E[(rt−µt)(rt−µt)T|Ft−1]. (2.9)\\nHere,µtandΣt(or equivalently Σ1/2\\nt) are the two main components\\nto be modeled, and they are usually referred to as conditional mean\\nand conditional covariance matrix (or more often conditional volatility\\nforΣ1/2\\nt), respectively, in the literature.\\nIn the literature, the underlying distribution wtis always assumed\\nto be Gaussian (or sometimes a more general elliptical distribution) for\\nmathematical simplicity even though reality does not ﬁt the thin tails\\nof the Gaussian distribution [143].\\nIn the following, we ﬁrst provide general models for both µtand\\nΣtand then explore several diﬀerent types of speciﬁc models. Sections\\n2.3 and 2.4 model both conditional mean and covariance as constants,\\nSections 2.5 and 2.6 explore various models of the conditional mean\\nbut leave the conditional covariance matrix as a constant, and Section\\n2.7 focuses on modeling the conditional covariance matrix only. All the\\nspeciﬁc models can be regarded as special cases of the general models,\\nand we summarize them in Section 2.8.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 28}),\n",
       " Document(page_content='22 Modeling of Financial Time Series\\n2.2.1 General Model for Conditional Mean µt\\nFor most log-return series, the following model is enough to model the\\nconditional mean µt:\\nµt=φ0+Πxt+p\\uf8fa\\ni=1Φirt−i−q\\uf8fa\\nj=1Θjwt−j, (2.10)\\nwhereφ0∈RNdenotes a constant vector, xt∈RKdenotes a vector\\nof exogenous variables, Π∈RN×Kis a loading matrix, pandqare\\nnonnegative integers, Φi,Θj∈RN×Nare matrix parameters, and rt−i\\nandwt−jare past log-returns and temporally white noise.\\n2.2.2 General Model for Conditional Covariance Matrix Σt\\nFor a multivariate case, there exist many diﬀerent models of the con-\\nditional covariance matrix Σt, and, in general, there does not exist a\\ngeneral model formulation that captures all the existing ones as special\\ncases, e.g., see [16, 182, 196, 129]. Nevertheless, for the consistency of\\npresentation, let us introduce the following model [62]:\\nΣt=A0AT\\n0+m\\uf8fa\\ni=1Ai(wt−iwT\\nt−i)AT\\ni+s\\uf8fa\\nj=1BjΣt−jBT\\nj,(2.11)\\nwheremandsare nonnegative integers and Ai,Bj∈RN×Nare pa-\\nrameters. This model ensures a positive deﬁnite matrix provided that\\nA0AT\\n0is positive deﬁnite. The above model is referred to as the Baba-\\nEngle-Kraft-Kroner (BEKK) model in the literature.\\nIn practice, most models simply assume a constant covariance ma-\\ntrixΣt=Σw, i.e., a special case of (2.11) with m= 0ands= 0.\\n2.3 I.I.D. Model\\nPerhaps the simplest model for rtis that it follows an i.i.d. distribution\\nwith ﬁxed mean and covariance matrix, i.e.,\\nrt=µ+wt, (2.12)\\nwhere wt∈RNis a white noise series with zero mean and constant\\ncovariance matrix Σw.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 29}),\n",
       " Document(page_content='2.4. Factor Model 23\\nComparing the i.i.d. model (2.12) with the general model (2.7)-\\n(2.11), obviously we can see it is the simplest special case with µ=φ0,\\nΠ=0,p= 0,q= 0,Σw=A0AT\\n0,m= 0, ands= 0. And the\\nconditional mean and covariance matrix are both constant:\\nµt=µ, (2.13)\\nΣt=Σw. (2.14)\\nThis i.i.d. model assumption may look simple, however, it is one\\nof the most fundamental assumptions for many important works. One\\nexample is the Nobel prize-winning Markowitz portfolio theory [135,\\n136, 137, 138, 179] that will be covered in Chapter 5.\\n2.4 Factor Model\\nIf we look at (2.12) carefully, we may think that the dimension of the\\nmarketalwaysequalsthenumberofassets N.However,thismaynotbe\\ntrue in practice. In general, the market is composed of a large number\\nofassets(i.e., Nislarge),butitisusuallyobservedthatitsdimensionis\\nrelatively small, that is, the market is only driven by a limited number\\nof factors, say Kfactors with K≪N.\\nThe general factor model is\\nrt=φ0+h(ft) +wt, (2.15)\\nwhereφ0denotes a constant vector; ft∈RKwithK≪Nis a vector\\nof a few factors that are responsible for most of the randomness in\\nthe market, the vector function h:RK↦→RNdenotes how the low\\ndimensional factors aﬀect the higher dimensional market; and a resid-\\nual vector wtof (possibly independent) perturbations that has only a\\nmarginal eﬀect. In general, the function his assumed to be linear.\\nThis approach of modeling enjoys a wide popularity; refer to [42,\\n66, 67, 68, 69, 70, 118] for some typical references.\\nIn the following, we consider two speciﬁc models of (2.15) with\\neither explicit or hidden factors.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 30}),\n",
       " Document(page_content='24 Modeling of Financial Time Series\\n2.4.1 Explicit Factors\\nThe explicit factor model is\\nrt=φ0+Πft+wt, (2.16)\\nwhich is a speciﬁc case of (2.15) with h(ft) =Πft,ft∈RKbeing\\nexplicitly observable market variables, and Π∈RN×Kbeing the factor\\nloading matrix.\\nSome popular explicit factors include returns on the market port-\\nfolio2, growth rate of the GDP, interest rate on short term Treasury\\nbills, inﬂation rate, unemployment, etc. [171].\\nObviously, the factor model with explicit factors (2.16) is a special\\ncase of the general model (2.7)-(2.11) with exogenous input being the\\nfactors xt=ft,p= 0, andq= 0.\\nIn general, it is assumed that ftfollows an i.i.d. distribution with\\nconstant mean µfand constant covariance matrix Σf,wtfollows an\\ni.i.d. distribution with zero mean and (possibly diagonal) constant co-\\nvariance matrix Σw, and ftandwtare uncorrelated. Then the con-\\nditional mean and covariance matrix are both constant and can be\\ncomputed as follows:\\nµt=E[rt|Ft−1] =E[rt] =φ0+Πµf (2.17)\\nΣt=E[(rt−µt)(rt−µt)T|Ft−1],\\n=ΠΣfΠT+Σw. (2.18)\\nCapital Asset Pricing Model (CAPM)\\nOne of the most popular factor models is the CAPM with the returns\\non the market portfolio being the only factor [70]. The i-th stock return\\nat timetis\\nri,t−rf=βi(rM,t−rf) +wi,t, (2.19)\\nwhererfis the risk-free rate, rM,tis the return of the market portfolio,\\nandwi,tis a stock-speciﬁc white noise with zero mean and constant\\nvariance.\\n2The market portfolio is a portfolio consisting of all equities with the normalized\\nportfolio weights being proportional to the market values of the equities.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 31}),\n",
       " Document(page_content='2.4. Factor Model 25\\nTaking the expectation on both sides of (2.19) results in the so-\\ncalled CAPM:\\nE[ri,t]−rf=βi(E[rM,t]−rf). (2.20)\\nBased on (2.20)\\n•E[rM,t]−rfmeasures the diﬀerence between the expected market\\nreturn and risk-free rate, which is known as the market premium;\\n•E[ri,t]−rfmeasures the diﬀerence between the expected stock\\nreturn and risk-free rate, which is known as the risk premium;\\nand\\n•βiin general is given by\\nβi=Cov(ri,t,rM,t)\\nVar(rM,t)(2.21)\\nwhich measures how sensitive the risk premium is to the market\\npremium, that is, the risk premium equals the market premium\\ntimesβi.\\nNote that the conditional mean E[ri,t|Ft−1]is the same as the un-\\nconditional mean E[ri,t] =rf+βi(E[rM,t]−rf).\\nTaking the variance on both sides of (2.19) gives us the following\\nrelationship:\\nVar[ri,t] =β2\\niVar[rM,t] +Var[wi,t], (2.22)\\nwhich is decomposed into two parts:\\n•β2\\niVar[rM,t]measures the risk associated with the market and it\\nis referred to as systematic risk, and\\n•Var[wi,t]is speciﬁc to each stock and it is called nonsystematic\\nrisk.\\nAlso, the conditional variance Var[ri,t|Ft−1]equals the unconditional\\nvariance Var[ri,t].', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 32}),\n",
       " Document(page_content='26 Modeling of Financial Time Series\\n2.4.2 Hidden Factors\\nThe assumption of a linear model of (2.15) with hidden factors is that\\nthe factors are not explicit market variables but are functions of rtthat\\nsummarize as much information as possible.\\nOne method is to deﬁne the hidden factors as aﬃne transformations\\nofrtas follows:\\nft=d+ΥTrt, (2.23)\\nwhere d∈RKandΥ∈RN×Kare parameters to be estimated.\\nThen the hidden factor model can be expressed as follows:\\nrt=φ0+Π(d+ΥTrt) +wt, (2.24)\\nwhich is a speciﬁc case of (2.15) with h(ft) =Πft,ft∈RKbeing\\nthe hidden variables deﬁned in (2.23); Π∈RN×Kbeing the factor\\nloading matrix; and wtfollows an i.i.d. distribution with zero mean\\nand a (possibly diagonal) constant covariance matrix Σw.\\nThe model (2.24) can be further simpliﬁed as follows:\\nrt=m+ΠΥTrt+wt, (2.25)\\nwhere m=φ0+Πdis an newly deﬁned parameter.\\nThe parameters m,Π, and Υcan be estimated by the following\\nnonlinear least-square (LS) regression:\\nminimize\\nm,Π,ΥE\\ued79\\ued79\\ued79rt−m−ΠΥTrt\\ued79\\ued79\\ued792\\n2. (2.26)\\nRecall that Π,Υ∈RN×K, then ΠΥT∈RN×Nwith rank(ΠΥT)≤\\nK≪N, then intuitively problem (2.26) is projecting rtonto a lower\\nK-dimensional subspace with variations being captured as much as\\npossible. Indeed, this technique is usually referred to as principal com-\\nponent analysis (PCA) [109] in the literature, the optimal solution of\\nwhich can be stated in closed-form as follows [143]:\\nΠ=Υ=EK, (2.27)\\nm=(\\nI−EKET\\nK)\\nE[rt], (2.28)\\nwhere EK∈RN×Kwith thek-th column vector being the k-th largest\\neigenvector of the covariance matrix Cov[rt],k= 1,...,K., and it can\\nbe shown that the white noise wtis uncorrelated of the hidden factors.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 33}),\n",
       " Document(page_content='2.5. VARMA Model 27\\nThen combining (2.25), (2.27) and (2.28) together, we can ﬁnd the\\nconditional mean and covariance matrix as follows:\\nµt=E[rt|Ft−1] =E[rt], (2.29)\\nΣt=E[(rt−µt)(rt−µt)T|Ft−1]\\n=EKΛKET\\nK+Σw, (2.30)\\nwhere ΛK= Diag([λ1,...,λK])is aK-by-Kdiagonal matrix with λk\\nbeing thek-th largest eigenvalue of Cov[rt], and we can see both the\\nconditional mean and covariance matrix are constant and independent\\nof time.\\n2.4.3 Comparisons: Explicit Factors versus Hidden Factors\\nBased on (2.17)-(2.18) or (2.29)-(2.30), we can see that the factor mod-\\nels,i.e.,(2.16)and(2.25),decomposetheconditionalcovariance Σtinto\\ntwo parts: low dimensional factors and marginal noise. The key is the\\nway to choose or construct the factors, and the comparisons between\\nthe explicit and hidden factor models are as follows:\\n•The explicit factor model tends to explain the log-returns with a\\nsmaller number of fundamental or macroeconomic variables and\\nthus it is easier to interpret. However, in general there is no sys-\\ntematic method to choose the right factors.\\n•The hidden factor model employs PCA to explore the structure\\nof the covariance matrix and locate a low-dimensional subspace\\nthat captures most of the variation in the log-returns. It is a more\\nsystematical approach and thus it may provide a better explana-\\ntory power. One drawback of the hidden factors compared with\\nthe explicit factors is that they do not have explicit econometric\\ninterpretations.\\n2.5 VARMA Model\\nThe previous i.i.d. and factor models, while commonly employed, do\\nnot incorporate any time-dependency in the model for rt. In other\\nwords, the conditional mean and covariance matrix are constant and', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 34}),\n",
       " Document(page_content='28 Modeling of Financial Time Series\\npast information is not explicitly used (it can still be used implicitly\\nvia the estimation of the parameters).\\nThe VARMA model can incorporate the past information into the\\nmodel of conditional mean, although still not in the conditional covari-\\nance matrix.\\nStationarity is an important characteristic for time series analysis\\nwhich describes the time-invariant behavior of a time series. A mul-\\ntivariate time series rtis said to be weakly stationary if its ﬁrst and\\nsecond moments are time-invariant. In general, a stationary time series\\nis much easier to model, estimate, and analyze.\\n2.5.1 VAR( 1) Model\\nLet us start with the vector autoregressive (VAR) model of order 1,\\ndenoted as VAR( 1), as follows:\\nrt=φ0+Φ1rt−1+wt, (2.31)\\nwhereφ0∈RNisaconstantvector, Φ1∈RN×Nisamatrixparameter,\\nandwtdenotes a serially uncorrelated noise series with zero mean\\nand constant covariance matrix Σw. We can see that the term Φ1rt−1\\nmodels the serial correlation of the time series rt.\\nAlso, compared with the general model (2.7)-(2.11), the VAR( 1)\\nmodel (2.31) is a special case with Π=0,p= 1,q=m=s= 0, and\\nΣt=Σw, and it is straightforward to obtain the conditional mean and\\ncovariance matrix based on (2.31) as follows:\\nµt=φ0+Φ1rt−1, (2.32)\\nΣt=Σw. (2.33)\\nObviously, the conditional covariance matrix Σtis constant.\\n2.5.2 VAR( p) Model\\nThep-th order autoregressive process, denoted as VAR( p), extends the\\nVAR( 1) model by including more previous observations into the model\\nas follows:\\nrt=φ0+p\\uf8fa\\ni=1Φirt−i+wt, (2.34)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 35}),\n",
       " Document(page_content='2.5. VARMA Model 29\\nwherepis a nonnegative integer, φ0∈RNis a constant vector, Φi∈\\nRN×Nare matrix parameters, and wtdenotes a serially uncorrelated\\nwhite noise series with zero mean and constant covariance matrix Σw.\\nClearly we can see that the time series rtis serially correlated via\\nthe term∑p\\ni=1Φirt−iwhich contains more previous observations than\\nthe AR( 1) model (2.31). Similar to (2.32) and (2.33), the conditional\\nmean and covariance matrix based on (2.34) are\\nµt=φ0+p\\uf8fa\\ni=1Φirt−i, (2.35)\\nΣt=Σw, (2.36)\\nwhere the conditional covariance matrix is constant.\\n2.5.3 VMA( q) Model\\nEven though the VAR model models the serial correlations, it imposes\\nsuch correlations with all the past observations. We can observe this\\neasily by substituting the VAR( 1) model (2.31) recursively and we have\\nthatrtis serially correlated to all the past observations r0,...,rt−1,\\nespecially when the eigenvalues of Ψ1are close to 1.\\nFor some realistic cases, the time series rtshould only have serial\\ncorrelation up to a small lag qsuch that rtis serially uncorrelated to\\nrt−ℓfor allℓ > q. Unfortunately, the VAR model does not have this\\nproperty.\\nA useful alternative to the VAR model is a vector moving average\\n(VMA) model. The VMA model of order q, denoted as VMA( q), is\\nrt=µ+wt−q\\uf8fa\\nj=1Θjwt−j, (2.37)\\nwhereqis a nonnegative integer, µ∈RNis a constant vector, Θj∈\\nRN×Nare matrix parameters, and wtdenotes a serially uncorrelated\\nwhite noise series with zero mean and constant covariance matrix Σw.\\nBased on (2.37), it is easy to check that rtis serially uncorrelated\\ntort−ℓfor allℓ > q. Also, the VMA( q) model (2.37) is a special case\\nof the general model (2.7)-(2.11) with Π=0andp=m=s= 0, and', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 36}),\n",
       " Document(page_content='30 Modeling of Financial Time Series\\nwe have the conditional mean and covariance matrix as follows:\\nµt=µ−q\\uf8fa\\nj=1Θjwt−j, (2.38)\\nΣt=Σw, (2.39)\\nwhere the conditional covariance matrix is constant.\\n2.5.4 VARMA Model\\nSometimes, using simply a VAR model or a VMA model only is not\\nenough to ﬁt the data and it is helpful to combine them together. The\\ncombination of VAR( p) and VMA( q), referred to as VARMA( p,q), is\\ngiven by\\nrt=φ0+p\\uf8fa\\ni=1Φirt−i+wt−q\\uf8fa\\nj=1Θjwt−j, (2.40)\\nwherepandqare nonnegative integers, φ0∈RNis a constant vector,\\nthe matrices Φi,Θj∈RN×Nare parameters, and wtis a white noise\\nseries with zero mean and constant covariance matrix Σw. Directly, the\\nconditional mean and covariance matrix based on (2.40) are\\nµt=φ0+p\\uf8fa\\ni=1Φirt−i−q\\uf8fa\\nj=1Θjwt−j, (2.41)\\nΣt=Σw, (2.42)\\nwhere the conditional covariance matrix is still constant.\\nRemark 2.1. The VARMA model is a powerful model of conditional\\nmean, however, it also has some drawbacks that need to be dealt with\\ncarefully.\\nThe identiﬁability issue, i.e., two VARMA( p,q) models with diﬀer-\\nent coeﬃcient matrices can be rewritten as the same VMA( ∞) model,\\nis one of the most important ones. This issue is important because the\\nlikelihood function of the VARMA( p,q) model may not be uniquely\\ndeﬁned and thus the parameters cannot be estimated. To overcome\\nthis drawback, some model structural speciﬁcations are needed. There', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 37}),\n",
       " Document(page_content='2.6. VECM 31\\nare two main approaches namely the Kronecker index, and the scalar\\ncomponent model in the literature [197].\\nAnother issue is that, for a causal and invertible VARMA model,\\nthe conditional maximum likelihood estimation may not result in a\\ncausal and invertible estimated VARMA model, especially when the\\nnumber of samples is small [129, 197]. The solving approach is to either\\naddmoreconstraintsintheconditionalmaximumlikelihoodestimation\\n[169] or switch to the unconditional maximum likelihood estimation\\n[197]. However, both of them require more intensive computation. ■\\n2.6 VECM\\nUntil now we have focused on modeling directly the log-return series\\nrtinstead of the log-price series yt(recall that rt= ∆yt=yt−yt−1).\\nThisisbecauseingeneralthelog-priceseries ytisnotweaklystationary\\n(think for example of Apple stock whose log-prices keep increasing) and\\nthus is not easy to model, while its diﬀerence series, i.e., the log-return\\nseries rt, is weakly stationary and is easier to model and analyze.\\nHowever, it turns out that diﬀerencing may destroy part of the\\nrelationship among the log-prices which may be invaluable for a proper\\nmodeling with forecast power. It is therefore also important to analyze\\nthe original (probably non-stationary) time series directly [129].\\nInterestingly, it turns out that in fact a (probably non-stationary)\\nVAR model may be enough. For example, one can always ﬁt the log-\\nprice series ytwith a VAR model, say, the following VAR( p):\\nyt=φ0+Φ1yt−1+···+Φt−pyt−p+wt, (2.43)\\nwherepis a nonnegative integer, φ0∈RNis a constant vector, Φi∈\\nRN×Nare matrix parameters, and wtdenotes a serially uncorrelated\\nwhite noise series with zero mean and constant covariance matrix Σw.\\nHere (2.43) models the log-price series and ytis not necessarily\\nstationary. The standard results for a stationary VAR model may not\\nbe useful.\\nIntheliterature,atimeseriesiscalledintegratedoforder p,denoted\\nasI(p),ifthetimeseriesobtainedbydiﬀerencingthetimeseries ptimes\\nis weakly stationary, while by diﬀerencing the time series p−1times is', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 38}),\n",
       " Document(page_content='32 Modeling of Financial Time Series\\nnot weakly stationary [196, 129]. A multivariate time series is said to be\\ncointegrated if it has at least one linear combination being integrated of\\na lower order. To illustrate the concepts visually, we consider a slightly\\nmodiﬁed example from [196] with only two dimensions as follows.\\nExample 2.1. Suppose the log-price series ytfollows\\nyt=Φ1yt−1+wt, (2.44)\\nwhere Φ1=)\\n0.5−1\\n−0.25 0.5[\\n, and wtfollows an i.i.d. distribution with\\nzero mean and constant covariance matrix Σw. The model (2.44) (or\\nyt) is not stationary because the eigenvalues of Φ1are0and1(recall\\nfor stationarity the modulus of the eigenvalues need to be less than\\none).\\nTo check the integration order of yt, rewriting (2.44) as\\n)\\n1−0.5B B\\n0.25B 1−0.5B[\\nyt=wt, (2.45)\\nwhereBis the backshift operator, and premultiplying both sides of\\n(2.45) by)\\n1−0.5B−B\\n−0.25B1−0.5B[\\nyields\\n)\\n1−B 0\\n0 1−B[\\nyt=)\\n1−0.5B−B\\n−0.25B1−0.5B[\\nwt. (2.46)\\nSince the right hand side of (2.46) is stationary, so is the ﬁrst order\\ndiﬀerence of yton the left hand side of (2.46). This implies that ytis\\nintegrated of order one, i.e., it is I(1).\\nTocheckwhether ytiscointegratedornot,wedeﬁne L≜)\\n1−2\\n0.5 1[\\nand premultiply (2.44) by L, then we have\\nLyt=LΦ1L−1Lyt−1+Lwt, (2.47)\\nwhich can be rewritten more explicitly as\\n)\\ny1t−2y2t\\n0.5y1t+y2t[\\n=)\\n1 0\\n0 0[)\\ny1,t−1−2y2,t−1\\n0.5y1,t−1+y2,t−1[\\n+Lwt.(2.48)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 39}),\n",
       " Document(page_content='2.6. VECM 33\\nSince Lwtis always stationary, so is the linear combination 0.5y1t+\\ny2t, and thus ytis cointegrated. This derived cointegration result in\\nfact is very important and can be utilized to design very proﬁtable\\nquantitative trading strategies (which will be shown later in Part III).\\nNow we can observe that if we diﬀerence the log-price series directly\\nand reach the model (2.46), we cannot obtain the cointegration result\\nthat 0.5y1t+y2tis stationary any more. Therefore, it is important to\\nstudy the log-price series ytdirectly as mentioned before. ■\\nThe above Example 2.1 shows a speciﬁc example of cointegration.\\nIn practice, a systematic way to ﬁnd the cointegrated components (if\\nthey exist) is via a vector error correction model (VECM) [61].\\nLet us assume the log-price series ytis at most I(1), that is, at\\nleast its diﬀerence series rtor the log-return series is always weakly\\nstationary. Using the relation yt=yt−1+rt, the VAR(p) model (2.43)\\ncan always be rewritten as\\nrt=φ0+Πyt−1+˜Φ1rt−1+···+˜Φp−1rt−p+1+wt,(2.49)\\nwhere\\nΠ=−(I−Φ1−···− Φp) =−Φ(1) (2.50)\\n˜Φj=−p\\uf8fa\\ni=j+1Φi, j = 1,...,p−1. (2.51)\\nInterestingly, the above model (2.49) can also be regarded as a\\nspecial case of the general model (2.10) with the exogenous variables\\nbeing the previous log-prices, i.e., xt=yt−1. And the conditional mean\\nand covariance matrix are\\nµt=φ0+Πyt−1+p−1\\uf8fa\\ni=1˜Φirt−i, (2.52)\\nΣt=Σw, (2.53)\\nwhere the conditional covariance matrix is constant.\\nUnder the assumption that ytis at mostI(1), it is straightforward\\ntoconcludethattheterm Πyt−1intheabovemodel(2.49)isstationary,\\ntherefore, some linear combinations of ytmay be stationary. The term', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 40}),\n",
       " Document(page_content='34 Modeling of Financial Time Series\\nΠyt−1is usually referred to as an error correction term and thus the\\nmodel is called a VECM. There are three interesting cases of Πyt−1:\\n1.rank(Π) = 0. This implies Π=0andytis not cointegrated since\\nthere is no linear combination of ytbeing stationary. Then the\\nVECM (2.49) reduces to a VAR( p−1) for the log-return time\\nseries rt.\\n2.rank(Π) =N. This implies Πis invertible. Then ytmust be\\nstationary already since rtandwtare both stationary and yt\\ncan be rewritten as a linear combination of rtandwtby left\\nmultiplying both sides of (2.49) by Πinverse. Thus, one can\\nstudy ytdirectly.\\n3.0<rank(Π) =r<N. This is the interesting case and Πcan be\\ndecomposed as\\nΠ=αβT, (2.54)\\nwhereα,β∈RN×rwith full column rank, i.e., rank(α) =\\nrank(β) =r. Then the VECM (2.49) becomes\\nrt=φ0+αβTyt−1+˜Φ1rt−1+···+˜Φp−1rt−p+1+wt.(2.55)\\nThis means that the log-price time series ythasrlinearly in-\\ndependent cointegrated components, i.e., βTyt. This interesting\\nproperty can be used to design mean-reversion statistical arbi-\\ntrage investment strategies, e.g., pairs trading strategies, as we\\nwill cover later in Part III.\\n2.7 Conditional Volatility Models\\nThe previous models only model the conditional mean while always\\nkeeping the conditional volatility as a constant, e.g., see (2.14), (2.18),\\n(2.30), (2.33), (2.36), (2.42), and (2.53). In the real market, usually\\ntime-varying rather than constant volatility is observed. For example,\\na well-known phenomenon is that high volatility is more likely followed\\nby high volatility rather than low volatility and it ishence referred to as\\n“volatility clustering”. Let us illustrate the concept with the following\\nExample 2.2.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 41}),\n",
       " Document(page_content='2.7. Conditional Volatility Models 35\\nJun−10 Jun−11 Jun−12 Jun−13 Jun−14 Jun−15−0.1−0.0500.050.1Synthetic Normal White Noise\\n  \\nWhite noise\\nConditional sample volatility\\nJun−10 Jun−11 Jun−12 Jun−13 Jun−14 Jun−15−0.1−0.0500.050.1APPLE\\n  \\nLog−returns\\nConditional sample volatility\\nFigure 2.2: White noise versus APPLE log-returns. The conditional sample volatil-\\nity is the sample standard deviation of the most recent 22 days observations (i.e.,\\nwhite noise observations or log-returns).\\nExample 2.2. We study the daily log-returns of Apple Inc. from 01-\\nJan-2010 to 08-Jul-2015. The sample volatility is σ= 1.659×10−2.\\nWe then synthetically simulate a Gaussian white noise series with zero\\nmean and variance σ2.\\nThe top panel of Figure 2.2 shows a simulated realization of the\\nGaussian white noise series and the conditional sample volatility, and\\nthe bottom panel shows that of the log-returns series of Apple Inc.\\nHere the conditional sample volatility is the sample standard deviation\\nof the most recent 22 days observations (i.e., white noise observations\\nor log-returns). Clearly, we can see that the synthetic Gaussian white\\nnoise series has quite stable conditional sample volatility while the log-\\nreturn series of Apple Inc. has volatile conditional sample volatility and\\nthere exist some volatility clusters. ■\\nIn this subsection, we mainly focus on reviewing the models of con-\\nditional volatility. Since there are many diﬀerent multivariate models', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 42}),\n",
       " Document(page_content='36 Modeling of Financial Time Series\\nof conditional volatility extending from the same univariate models, we\\nwill start with the univariate models ﬁrst and then discuss the multi-\\nvariate models.\\n2.7.1 Univariate ARCH Model\\nRecall that previously the white noise wtin the general model (2.7)\\nhas always been modeled as a zero mean noise with constant variance.\\nSince the conditional mean µtin (2.7) has been well explored in the\\nprevious parts of this chapter, without loss of generality, we focus now\\non models for conditional volatility. The autoregressive conditional het-\\neroskedasticity (ARCH) model is the ﬁrst one that focuses on modeling\\nthe conditional volatility [59]. The ARCH( m) model is\\nwt=σtzt, (2.56)\\nwhere{zt}is a white noise series with zero mean and unit variance and\\nthe conditional variance σ2\\ntis modeled by\\nσ2\\nt=α+m\\uf8fa\\ni=1αiw2\\nt−i. (2.57)\\nHere,mis a nonnegative integer, α > 0, andαi≥0for alli >0.\\nThe coeﬃcients αimust satisfy some regularity conditions so that the\\nunconditional variance of wtis ﬁnite. Also, the white noise with zero\\nmean and constant variance in model (2.7) can be regarded as a special\\ncase of (2.57) with αi= 0for alli > 0. We can see that the past\\ninformation is incorporated into the model by using∑m\\ni=1αiw2\\nt−ito\\nmodel the variance (or equivalently, the square of the volatility).\\nEven though the ARCH model can model the conditional het-\\neroskedasticity, it has several disadvantages [196]:\\n•positive and negative noise have the same eﬀects on volatility\\nbecause volatility modeled by (2.57) depends on the square of the\\nprevious noise; however, it is well known that they have diﬀerent\\nimpacts on the ﬁnancial assets;\\n•the ARCH model is too restrictive to capture some patterns, e.g.,\\nexcess kurtosis;', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 43}),\n",
       " Document(page_content='2.7. Conditional Volatility Models 37\\n•the ARCH model does not provide any new insight for under-\\nstanding the source of variations and only provides a mechanical\\nway to describe the behavior of the conditional variance; and\\n•ARCH models tend to overpredict the volatility because they\\nrespond slowly to large isolated noise to the return series.\\n2.7.2 Univariate GARCH Model\\nA limitation of the ARCH model is that the high volatility is not per-\\nsistent enough and it often requires many parameters to describe the\\nvolatility process. An extension called Generalized ARCH (GARCH)\\nwas proposed to overcome this drawback [28]. The GARCH( m,s)\\nmodel is\\nwt=σtzt, (2.58)\\nwhere{zt}is a white noigse series with zero mean and constant unit\\nvariance, and the conditional variance σ2\\ntis modeled by\\nσ2\\nt=α+m\\uf8fa\\ni=1αiw2\\nt−i+s\\uf8fa\\nj=1βjσ2\\nt−j. (2.59)\\nHere,mandsare nonnegative integers, α >0,αi≥0,βj≥0for all\\ni>0andj >0and∑m\\ni=1αi+∑s\\nj=1βj≤1.\\nWe can see the GARCH model (2.59) in fact is obtained by adding\\nthe term∑s\\nj=1βjσ2\\nt−jto the previous ARCH model (2.59), there-\\nfore, the volatility is more persistent and the volatility clustering phe-\\nnomenon can be modeled better. For illustrative purposes, a numerical\\nexample is provided as follows.\\nExample 2.3. We consider an ARCH( 1) model with α= 0.01andα1=\\n0.2, an ARCH( 9) model with α= 0.01andα1= 0.2/2i−1,i= 1,..., 9\\nand a GARCH( 1,1) model with α= 0.01,α1= 0.2, andβ1= 0.7.\\nFigure 2.3 shows the realization path and conditional volatilities of\\neach model. The volatility clusters of the ARCH( 1) are quite sharp and\\nthus not persistent enough. The higher order ARCH( 9) model over-\\ncomes the drawback to some degree however, it requires many more\\nparameters (i.e., ten parameters compared to two of the ARCH( 1)).', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 44}),\n",
       " Document(page_content='38 Modeling of Financial Time Series\\nComparatively, the GARCH( 1,1) model captures the volatility cluster-\\ning relatively more persistently and requires much less (i.e., only three)\\nparameters. ■\\n0100 200 300 400 500 600 700 800 900 1000−0.500.5Synthetic ARCH(1) Noise\\n  \\nARCH(1) Noise\\nARCH(1) σt\\n0100 200 300 400 500 600 700 800 900 1000−0.4−0.200.20.40.6Synthetic ARCH(9) Noise\\n  \\nARCH(9) Noise\\nARCH(9) σt\\n0100 200 300 400 500 600 700 800 900 1000−1−0.500.511.5Synthetic GARCH(1,1) Noise\\n  \\nGARCH(1,1) Noise\\nGARCH(1,1) σt\\nFigure 2.3: The conditional volatility of GARCH is more persistent.\\n2.7.3 Multivariate GARCH Model\\nThe multivariate noise vector is modeled as\\nwt=Σ1/2\\ntzt, (2.60)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 45}),\n",
       " Document(page_content='2.7. Conditional Volatility Models 39\\nwhere zt∈RNis an i.i.d. white noise series with zero mean and con-\\nstant covariance matrix I. Then the key part is to model Σtconditional\\non the past information Ft−1.\\nSince the ARCH model is a special case of the GARCH model, we\\nfocusontheGARCHmodelonly.Therearemanydiﬀerentmultivariate\\nextensions of the univariate GARCH model, e.g., see [16, 182]. Here we\\nfocus on introducing several popular models.\\nVEC Model\\nOne of the ﬁrst extensions is the vector (VEC) GARCH model where\\nthe conditional covariance matrix linearly depends on some past con-\\nditional covariance matrices and the cross-products of some past noise\\nas follows [30]:\\nvech(Σt) =a0+m\\uf8fa\\ni=1˜Aivech(wt−iwT\\nt−i) +s\\uf8fa\\nj=1˜Bjvech(Σt−j),(2.61)\\nwheremandsare nonnegative integers, the half-vectorization operator\\nvech(·)denotes an N(N+ 1)/2dimensional vector by vectorizing only\\nthe lower triangular part of its argument N-by-Nsquare matrix, a0is\\nanN(N+1)/2dimensional vector, and ˜Aiand˜BiareN(N+1)/2-by-\\nN(N+ 1)/2parameter matrices. This model is very ﬂexible; however,\\nin general it does not guarantee a positive deﬁnite covariance matrix\\nΣtat each time and the number of parameters is very large unless N\\nis small.\\nDiagonal VEC Model\\nA more parameter parsimonious model is to assume that ˜Aiand ˜Bi\\nare diagonal, and the model is referred to as a diagonal VEC (DVEC)\\nmodel [30], which can be simpliﬁed as\\nΣt=A0+m\\uf8fa\\ni=1Ai⊙(wt−iwT\\nt−i) +s\\uf8fa\\nj=1Bj⊙Σt−j,(2.62)\\nwhere Ai,Bj∈RN×Nare symmetric matrix parameters. Here, the op-\\nerator⊙denotestheHadamardproduct,i.e.,theelement-wiseproduct,', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 46}),\n",
       " Document(page_content='40 Modeling of Financial Time Series\\nandAiandBjcan be interpreted as moving weight matrices. However,\\nthe DVEC model still may not guarantee a positive deﬁnite covariance\\nmatrix Σtat each time.\\nBEKK Model\\nLater, the BEKK model is proposed to guarantee the conditional co-\\nvariance matrix Σtto be positive deﬁnite [62]:\\nΣt=A0AT\\n0+m\\uf8fa\\ni=1Ai(wt−iwT\\nt−i)AT\\ni+s\\uf8fa\\nj=1BjΣt−jBT\\nj,(2.63)\\nwheremandsare nonnegative integers, Ai,Bj∈RN×Nare matrix\\nparameters, and A0is lower triangular. Clearly, this model ensures a\\npositive deﬁnite matrix Σtprovided that A0AT\\n0is positive deﬁnite;\\nhowever, now the parameters AiandBjdo not have direct interpre-\\ntations.\\nCCC Model\\nAnother model that restricts the number of model parameters and\\nguaranteesthepositivedeﬁniteconditionalvarianceestimateisthecon-\\nstant conditional correlation (CCC) model [29]. The underlying idea is\\nto assume that the conditional heteroskedasticity only exists in each as-\\nset and their correlations are constant. Mathematically, the conditional\\ncovariance matrix Σtis decomposed as follows:\\nΣt=DtCDt (2.64)\\nwhere Dt= Diag([σ1,t,...,σN,t])is the time-varying conditional\\nvolatilities of each stock and Cis the CCC matrix of the standard-\\nized noise vector.\\nThen the conditional volatilities and correlations are modeled sep-\\narately. For example, the conditional volatilities are modeled by N\\nunivariate GARCH models. Regarding the CCC matrix C, it simply\\nequals the conditional covariance of the following deﬁned standardized\\nnoise vector:\\nηt≜D−1\\ntwt, (2.65)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 47}),\n",
       " Document(page_content='2.7. Conditional Volatility Models 41\\nthat is,\\nE\\uf8f3\\nηtηT\\nt|Ft−1\\uf8f2\\n=D−1\\ntΣtD−1\\nt=C. (2.66)\\nIn practice, the CCC matrix Cis modeled as the covariance matrix\\nof the estimated standardized noise ˆηt≜ˆD−1\\ntwtwhere ˆDtis the esti-\\nmated conditional volatilities of each asset [29].\\nDCC Model\\nThe main limit of the CCC model is that the correlation is constant\\nand there are no spillover and feedback eﬀects across the conditional\\nvolatilities. To overcome this drawback, a dynamic conditional correla-\\ntion (DCC) model is proposed [60]:\\nΣt=DtCtDt. (2.67)\\nCompared with the CCC model (2.64), the only diﬀerence is that now\\nthe conditional correlation matrix Ctis time-dependent.\\nTo ensure that the estimate of the DCC matrix Ctis a matrix\\ncontaining correlation coeﬃcients, e.g., diagonal elements equal to 1,\\nEngle [60] modeled it as follows. The ij-th element of DCC matrix is\\nmodeled as\\nρij,t=qij,t√qii,tqjj,t(2.68)\\nand then each qij,tis modeled by a simple GARCH( 1,1) model:\\nqij,t=α(ηi,t−1ηT\\nj,t−1) + (1−α)qij,t−1. (2.69)\\nModel (2.69) admits a compact matrix notation as follows:\\nQt=α(ηt−1ηT\\nt−1) + (1−α)Qt−1 (2.70)\\nand thus\\nCt= Diag−1/2(Qt)QtDiag−1/2(Qt). (2.71)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 48}),\n",
       " Document(page_content='42 Modeling of Financial Time Series\\n2.8 Summary of Diﬀerent Models and Their Limitations\\n2.8.1 Summary\\nUntilnowwehavebrieﬂyreviewedmostofthebasicmodelsforthetime\\nseries of ﬁnancial markets, i.e., I.I.D. model, VARMA model, VECM,\\nand multivariate ARCH/GARCH model.\\nTable 2.1 provides a compact summary of all the models. In prac-\\ntice, the models of conditional mean and covariance matrix can always\\nbe combined together, for example, VARMA and GARCH can be used\\nto model the conditional mean and volatility jointly to ﬁt the real ﬁ-\\nnancial data better.\\n2.8.2 Limitations\\nThe previous covered models mainly work for daily, weekly, monthly,\\nor yearly investments and they also have some limitations.\\nNot Valid for High-Frequency Trading\\nWhen the investment interval becomes very small, say several minutes,\\nseveral seconds or even shorter, the previous models become invalid\\nand one reaches like a “quantum regime” where things are not ﬂuid\\nanymore but quantized into a limit order book. The limit order book\\ncontains the list of all kinds of orders with the information of order sign\\n(buy or sell), price, quantity, and timestamp at any given time point,\\nand the records of the dynamics of the limit order book in general are\\nreferred to as high-frequency data or tick data. For investments based\\non high-frequency data, not only do the models (for high-frequency\\ndata) matter [97] but also the practical computer and internet commu-\\nnication technologies [4].\\nFact 2.1. For high-frequency trading, the computer and internet com-\\nmunication technologies are extremely important. For example, high-\\nfrequency trading strategies require the execution of the orders with\\nextremely low latency because high latency may push the price in\\nthe adverse direction and reduce the proﬁtability signiﬁcantly. To re-', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 49}),\n",
       " Document(page_content='2.8. Summary of Diﬀerent Models and Their Limitations 43\\nTable 2.1: Summary of diﬀerent ﬁnancial models.\\nModel\\nStructurert=µt+wt\\nGeneral\\nCond.\\nMeanµt=φ0+Πxt+∑p\\ni=1Φirt−i−∑q\\nj=1Θjwt−j\\nGeneral\\nCond.\\nVolatilityΣt=A0AT\\n0+m\\uf8fa\\ni=1Ai(wt−iwT\\nt−i)AT\\ni\\n+s\\uf8fa\\nj=1BjΣt−jBT\\nj\\nModelsCond. Mean\\nModelCond. Volatility\\nModel\\nI.I.D.\\nModelconst.: Π=0, p=\\nq= 0const.:m=s= 0\\nFactor\\nModelsxt=ft, p=q= 0 const.:m=s= 0\\nVAR Model Π=0, q= 0 const.:m=s= 0\\nVMA\\nModelΠ=0, p= 0 const.:m=s= 0\\nVARMA\\nModelΠ=0 const.:m=s= 0\\nVECM xt=yt−1 const.:m=s= 0\\nARCH\\nModelconst.: Π=0, p=\\nq= 0s= 0\\nGARCH\\nModelconst.: Π=0, p=\\nq= 0General Cond.\\nVolatility Model', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 50}),\n",
       " Document(page_content='44 Modeling of Financial Time Series\\nduce such latency, nowadays many stock exchanges, e.g., NASDAQ3,\\nHKEx4, etc., provide a “co-location” service that oﬀers all customers\\nthe opportunity to co-locate their servers and equipment within the\\ndata centers of the stock exchanges. ■\\nHeavy Tails Issue\\nAnother limitation is that most models implicitly assume a Gaussian\\ndistribution for mathematical simplicity [143]. However, for ﬁnancial\\ndata it is known that the ﬁnancial distributions have heavy tails in\\npractice and the Gaussian assumption may totally fail simply because\\nit predicts the large price changes much less likely than the actual case;\\nsee the following Fact 2.2 and the illustrative Example 2.4.\\nFact2.2. TheBlack-Scholesmodel[25]isasimplemathematicalmodel\\nand is famous for describing the option prices. However, it completely\\nfailed in practice because it assumed a Brownian model which trans-\\nlated into a Gaussian assumption, and underestimated the very possi-\\nbility of a global crisis. In fact, the abuse of this model led to the crash\\nin October 1987 during which the US market dropped 23%in a single\\nday [31]. ■\\nExample 2.4. We study the daily log-returns of the S&P500 index\\nfrom 04-Jan-2010 to 04-Feb-2015. The sample mean and variance are\\n4.5966×10−4and1.0199×10−4, respectively.\\nFigure 2.4 shows the empirical quantiles the log-returns of the\\nS&P500 index versus the theoretical quantiles of the Gaussian distri-\\nbutionN(4.5966×10−4,1.0199×10−4). The ﬁgure is plotted using the\\nMATLAB function qqplotwhich uses symbol ’+’ to denote the sam-\\nple data and superimposes a line joining the ﬁrst and third quartiles\\nof each distribution (this is a robust linear ﬁt of the order statistics of\\nthe two samples). We can see that the empirical data has much heavier\\ntails than the Gaussian distribution since the values of the small em-\\npirical quantiles are much smaller than the theoretical Gaussian ones\\n3http://www.nasdaqomx.com/transactions/technicalinformation/connectivity\\n4http://www.hkex.com.hk/eng/prod/hosting/hostingservices.htm', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 51}),\n",
       " Document(page_content='2.8. Summary of Diﬀerent Models and Their Limitations 45\\n−0.04 −0.03 −0.02 −0.01 0 0.01 0.02 0.03 0.04−0.08−0.06−0.04−0.0200.020.040.06\\nQuantiles of Gaussian distributionQuantiles of the log−returns of S&P500\\nFigure 2.4: Quantile-Quantile plot of the daily log-returns of the S&P500 index\\nversus the Gaussian distribution with the same mean and standard deviation.\\nand the values of the large empirical quantiles are much larger than the\\ntheoretical Gaussian ones. This practical issue is very important as it\\nis very diﬀerent from signal processing and communications where the\\nnoise is typically assumed to be Gaussian. ■\\nPart of this issue can be overcome by changing the Gaussian as-\\nsumption to some other distribution with heavier tails, see parameter\\nestimation in Chapter 3.\\nLack of Stationarity of Real Data\\nThe lack of stationarity of real data is also a critical limitation. Even if\\nthe models were accurate, the parameters deﬁning them would change\\nover time at a pace faster than one can properly estimate. Thus the\\ncalibrated models would always be prone to many estimation errors or,\\neven worse, the regime of the market may change and the previously\\nﬁtted models may be totally wrong [11].', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 52}),\n",
       " Document(page_content='46 Modeling of Financial Time Series\\nSmall Sample Regime\\nAnother limitation, which arises in part from the lack of stationarity\\nof data, is the lack of enough supply of historical data for ﬁtting and\\nestimationpurposes,especiallywhenthemodeldimensionislarge.This\\nﬁts into the realm of a small sample regime for high-dimensional data\\nthat appears in some big data problems. Some methods to overcome\\nthis limitation will be discussed in parameter estimation in Chapter 3.\\nOther Practical Limitations\\nApart from the above limitations, there are many other limitations due\\nto small practical details. For example, some stocks may have a longer\\nhistory than others, some stocks may not trade exactly the same days\\nas others, and for the daily period it is not clear whether one should\\nuse the open price, close price, maximum price, or minimum price of\\neach day. Some sophisticated methods involving diﬀerent prices have\\nbeen proposed [80, 84, 209].\\nAnother practical issue the above models do not consider is the\\nliquidity of the asset. This is important for exactly when to execute\\nan order in the market and this will be covered in Chapter 4 order\\nexecution.\\n2.8.3 Concluding Remarks\\nPractical implementations are more complicated than the nice and\\nclean mathematical models covered in this chapter; however, it is still\\nmeaningful to understand them because in principal “all models are\\nfalse but some models are useful” [171]. It is always necessary to inves-\\ntigate various models with their limitations and thus one can pick up\\nthe most useful model for his/her own purposes of investment.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 53}),\n",
       " Document(page_content='3\\nModeling Fitting: Mean and Covariance Matrix\\nEstimators\\nModels need to be ﬁtted to real data before being used in practice.\\nThe previous chapter introduced various time series market models.\\nThis chapter focuses on the estimation of the model parameters, more\\nspeciﬁcally, the mean vector and covariance matrix.\\nSection 3.1 brieﬂy introduces the practical ﬁtting process and dif-\\nferent types of estimation methods. Section 3.2 considers some speciﬁc\\nexamples for the large sample regime as a warm up, and it is followed\\nby several practical challenges, i.e., the small sample regime in Section\\n3.3, the heavy tail issue in Section 3.4, and their combination in Sec-\\ntion 3.5. At the end, Section 3.6 brieﬂy summarizes all the estimation\\nmethods.\\n3.1 Fitting Process, Types of Estimators, and Main Focus\\n3.1.1 Fitting Process\\nFigure 3.1 shows the practical ﬁtting process, roughly speaking, it can\\nbe decomposed into two parts: in-sample training and out-of-sample\\ntesting [95].\\n47', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 54}),\n",
       " Document(page_content='48 Modeling Fitting: Mean and Covariance Matrix Estimators\\nTrainingCross-\\nValidationTestingIn-Sample DataOut-Of-\\nSample Data\\nFigure 3.1: Fitting process.\\nA naive example of portfolio optimization is that, at the end of each\\nmonth,onecanalwaysusethesamplecovariancematrixofthepastone\\nyear daily returns of multiple stocks as the covariance matrix estimate\\nand then compute the minimum-variance portfolio and hold it in the\\nupcoming month to investigate the out-of-sample performance. The\\ndata of the past one year daily returns is the in-sample data and the\\ndata of the daily returns in the upcoming month is the out-of-sample\\ndata.\\nTheaboveexampleisonlyanoversimpliﬁedexample;inpractice,to\\nimprove the out-of-sample testing results, there may exist some tuning\\nparameters (e.g., see the shrinkage trade-oﬀ parameters in the shrink-\\nage estimators in Section 3.3 later) in the estimators and the in-sample\\ntrainingcanbefurtherdecomposedintotwosteps:i)splitthein-sample\\ndata into training data and cross-validation data and ﬁt the training\\ndata to the model with diﬀerent (discretely sampled) tuning parame-\\nters, and ii) ﬁnd out the parameter that gives the best cross-validation\\ncriterion of interest and then ﬁt the in-sample data as a whole (i.e., the\\ntraining and validation data together) to the model with the selected\\ntuning parameter. Step i) of choosing the optimal tuning parameter\\nis usually referred to as the cross-validation method in the literature\\n[95]. After the in-sample training, one can conduct the out-of-sample\\ntesting.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 55}),\n",
       " Document(page_content='3.1. Fitting Process, Types of Estimators, and Main Focus 49\\n3.1.2 Diﬀerent Types of Estimators\\nIn statistics, an estimator is simply a function of the current informa-\\ntion (i.e., the observations) that computes a quantity of interest (e.g.,\\nthe mean, the covariance matrix, or the other model parameters). The\\ncomputed (or estimated) value is referred to as the estimate. Roughly\\nspeaking, there are three main types of estimators [143].\\nNon-parametric Estimators. Non-parametric estimators do not as-\\nsume the observations follow any speciﬁc distribution but estimate the\\nquantity of interest from the observations based on the law of large\\nnumbers. For example, the sample mean and the sample covariance\\nmatrix are two typical non-parametric estimators. In general, a large\\nnumber of samples is required to ensure a low estimation error.\\nMaximum Likelihood Estimators (MLEs). In practice, the number\\nof samples may not be large enough, and non-parametric methods may\\nnot provide satisfactory estimates. An alternative method is the para-\\nmetricapproach,thatis,weﬁrstassumethattheobservationsfollowan\\nunderlying distribution with some unknown parameters and then de-\\nﬁne the estimates as the maximizer of the likelihood of the observations\\novertheunknownparameters.Forobviousreasons,theseestimatorsare\\nreferred to as maximum likelihood estimators.\\nShrinkage-Bayesian Estimators. For some applications, the number\\nof samples may be too small compared to the data dimension and both\\nthe non-parametric and maximum likelihood (ML) estimators may not\\nprovide reliable estimates. In the literature, there are two (related)\\nways to improve the estimates. The ﬁrst approach is to shrink the es-\\ntimate to a given ﬁxed target to get a new estimate. Some examples\\nare the shrinkage covariance matrix in ﬁnancial engineering and the\\ndiagonally loaded interference-plus-noise covariance matrix in signal\\nprocessing. The second approach is to incorporate some Bayesian prior\\ninformation into an estimator by adding a proper regularization term\\nto the selected likelihood function (since combining the likelihood with\\nthe prior information simply results in a posterior distribution, this', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 56}),\n",
       " Document(page_content='50 Modeling Fitting: Mean and Covariance Matrix Estimators\\nmethod is also referred to as the maximum a posterior (MAP) estima-\\ntion method). These two methods are closely related in the sense that\\na shrinkage estimator can usually be alternatively derived by adding a\\nproper regularization term to a speciﬁc likelihood function.\\nComparison on Diﬀerent Types of Estimators\\nIn general, when the number of samples is large enough, the non-\\nparametric estimators already perform well due to the law of large\\nnumbers, the MLEs also work ﬁne assuming the distribution of the\\nobservations is not far away from the underlying true one, and the\\nShrinkage-Bayesian may underperform if the observed data does not\\nquite ﬁt the assumed prior or shrinkage target. When the number\\nof samples is medium, then the non-parametric estimators degener-\\nate too much and MLEs are still relatively reliable. When the number\\nof samples becomes too small, neither of the estimators are reliable and\\nshrinking to some target or incorporating some prior information into\\nthe estimator usually improves the estimation quality to some degree.\\n3.2 Warm Up: Large Sample Regime\\nIn this section we start with the large sample regime for the I.I.D.\\nmodel under diﬀerent distribution assumptions as a warm up and then\\nwe point out the real challenges faced in practice.\\n3.2.1 I.I.D. Model\\nLet us start with the simplest I.I.D. model, i.e.,\\nrt=µ+wt, (3.1)\\nwhereµ∈RNis the mean and wt∈RNis a white noise series with\\nzero mean and constant covariance matrix Σ.\\nSuppose we have Tobservations of the log-returns rt,t= 1,...,T\\nand they are drawn according to (3.1). Then estimating the model\\nsimply means estimating µandΣ.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 57}),\n",
       " Document(page_content='3.2. Warm Up: Large Sample Regime 51\\nSample Mean and Sample Covariance Matrix\\nIntuitively, the most straightforward estimators are the sample aver-\\nages. The sample mean is\\nˆµ=1\\nTT\\uf8fa\\nt=1rt, (3.2)\\nand the sample covariance is\\nˆΣ=1\\nTT\\uf8fa\\nt=1(rt−ˆµ)(rt−ˆµ)T. (3.3)\\nThe popularity of such estimators comes from the law of large num-\\nbers (LLN) that under fairly general conditions the sample average\\nestimates approximate the true expectations and the approximation\\naccuracy increases as the number of samples increases, e.g., ˆµ→µ\\nand ˆΣ→ΣasT→+∞[101, 140].\\nLeast-Square (LS) Estimation\\nWe can ﬁrst estimate the mean via minimizing the least-square error\\nin theTobserved i.i.d. samples, that is,\\nminimizeµ1\\nTT\\uf8fa\\nt=1∥rt−µ∥2\\n2. (3.4)\\nSetting the derivative of the objective w.r.t. µyields as the optimal\\nsolution the same as the sample mean stated in (3.2). Then, the sample\\ncovariance matrix of the residuals coincides with the sample covariance\\nmatrix stated in (3.3).\\nNote that both the sample average and LS estimation methods do\\nnot assume the speciﬁc distribution of rt, thus they belong to the non-\\nparametric approach.\\nML Estimation\\nIf we assume the underlying distribution is known, then the MLE can\\nbe employed. Here, we assume rtare i.i.d. and follow an elliptical dis-\\ntribution [141, 140, 101]:\\nrt∼EL(µ,Σ,g), (3.5)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 58}),\n",
       " Document(page_content='52 Modeling Fitting: Mean and Covariance Matrix Estimators\\nwhereµ∈RNis a mean vector, Σ∈RN×Nis a positive deﬁnite\\ndispersion (or scatter) matrix and gis a probability density generator\\nthat mainly determines the thickness of the tails. The corresponding\\npdf function is given as follows:\\nf(r) =1√\\n|Σ|g(\\n(r−µ)TΣ−1(r−µ))\\n. (3.6)\\nThe problem of model estimation consists of estimating the param-\\netersµandΣfrom the observations by maximizing the likelihood (3.6)\\nas a function of the parameters µandΣfor given observations.\\nGiven the Ti.i.d. samples rt,t= 1,...,T, the negative log-\\nlikelihood of such Tsamples is\\nℓ(µ,Σ) =−logT\\uf8f1\\nt=1f(rt) (3.7)\\n=T\\n2log|Σ|−T\\uf8fa\\nt=1log(\\ng(\\n(rt−µ)TΣ−1(rt−µ)))\\n(3.8)\\nand then the estimates are the minimizer of ℓ(µ,Σ), i.e.,\\n(µ,Σ)∈arg min\\nµ,Σ≻0ℓ(µ,Σ). (3.9)\\nFor clarity of presentation, ﬁrst denote\\ndt≜(rt−µ)TΣ−1(rt−µ). (3.10)\\nThen ﬁnding the derivative of ℓ(µ,Σ)w.r.t.µandΣ−1yields\\n∂ℓ\\n∂µ=−T\\uf8fa\\nt=1∂log (g(dt))\\n∂µ=−T\\uf8fa\\nt=1g′(dt)\\ng(dt)∂dt\\n∂µ(3.11)\\n=−T\\uf8fa\\nt=12g′(dt)\\ng(dt)Σ−1(rt−µ) (3.12)\\n∂ℓ\\n∂Σ−1=−T\\n2Σ−T\\uf8fa\\nt=1g′(dt)\\ng(dt)(rt−µ) (rt−µ)T(3.13)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 59}),\n",
       " Document(page_content='3.2. Warm Up: Large Sample Regime 53\\nand setting both (3.12) and (3.13) to zero yields1\\nµ=T\\uf8fa\\nt=1w(dt)\\n∑T\\ni=1w(di)rt (3.15)\\nΣ=1\\nTT\\uf8fa\\nt=1w(dt) (rt−µ) (rt−µ)T(3.16)\\nwhere\\nw(x)≜−2g′(x)\\ng(x)= (−2 logg(x))′. (3.17)\\nNote that Σsatisfying (3.16) must be positive deﬁnite (with proba-\\nbility one) when Tis large enough (i.e., T≥N+ 1), thus the solutions\\nof (3.15) and (3.16) are the minimizers of ℓ(µ,Σ).\\nGaussian Distribution. Note that the Gaussian distribution is a spe-\\ncial case of the elliptical distribution with\\ngG(x)≜e−x/2\\n(2π)N/2, (3.18)\\nand from (3.17) we have\\nwG(x) = 1. (3.19)\\nInterestingly, we can see that the relationships (3.15) and (3.16) re-\\nduce to the sample averages (3.2) and (3.3), respectively. Thus, when\\nestimating mean and covariance, both the non-parametric least-square\\nestimation and the parametric MLE under Gaussian assumption coin-\\ncide with the sample average estimations.\\nStudent-tDistribution. As mentioned before, the ﬁnancial noise usu-\\nally have heavier tails than the Gaussian assumption. In practice,\\n1An implicit expression of (3.15) is\\n0=T\\uf8fa\\nt=1w(dt) (rt−µ). (3.14)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 60}),\n",
       " Document(page_content='54 Modeling Fitting: Mean and Covariance Matrix Estimators\\nthis characteristic can be captured by an elliptical distribution called\\nStudent-tdistribution and the density generator function reads\\ngS(x)≜Γ(\\nν+N\\n2)\\nΓ(ν\\n2)(νπ)N/2(1 +x/ν)−1+N\\n2, (3.20)\\nand from (3.17) we have\\nwS(x) =ν+N\\nν+x. (3.21)\\nHere, the parameter ν >0is the degree of freedom: the smaller νis,\\nthe heavier the tails are. It can be shown that the Student- tconverges\\nto the Gaussian distribution, i.e., gS(x)→gG(x)asν→+∞[141].\\nCauchy Distribution. A special case of the Student- tdistribution is\\nν= 1and the density generator function is\\ngC(x)≜Γ(\\n1+N\\n2)\\nΓ(\\n1\\n2)\\n(π)N/2(1 +x)−1+N\\n2, (3.22)\\nand from (3.17) we have\\nwC(x) =1 +N\\n1 +x. (3.23)\\nSinceν= 1<+∞, the Cauchy distribution usually has much heavier\\ntails than the Gaussian distribution.\\nGaussian versus Cauchy Distributions. Now we compare the MLE\\nof Gaussian and Cauchy distributions. Figure 3.2 shows the one-\\ndimensional pdfs of the standard Gaussian and the standard Cauchy\\ndistributions. We can see that the standard Gaussian distribution is\\nthin-tailed and the standard Cauchy distribution is heavy-tailed. Then\\nwe can interpret their weights in (3.19) and (3.23) as follows.\\nThe Gaussian weights wG(dt)are constant from (3.19), this is be-\\ncause the Gaussian distribution is very thin-tailed and the observations\\nwith largedt(recall that dt= (rt−µ)TΣ−1(rt−µ)given in (3.10)) are\\nrelatively rare. So if an observation is far away from the mean position', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 61}),\n",
       " Document(page_content='3.2. Warm Up: Large Sample Regime 55\\n−5 −4 −3 −2 −1 0 1 2 3 4 500.050.10.150.20.250.30.350.4\\n  \\nNormal\\nCauchy\\nFigure 3.2: Comparison of one-dimensional Gaussian and Cauchy distributions.\\n(i.e.,dtis large), the only reason is that the dispersion is large and thus\\nthe Gaussian MLE assigns all the samples the same weights.\\nCompared with the Gaussian weights wG(dt), the Cauchy weights\\nwC(dt)are smaller for the extreme events (i.e., the observations with\\nlargedt) from (3.23). This is because the Cauchy distribution is heavy-\\ntailed and the observations with large dtare relatively more frequent\\nand then the Cauchy MLE tends to give the extreme events smaller\\nweights to diminish their negative eﬀect that would otherwise distort\\nthe estimates.\\nFrom the above comparison, we can see that the MLE of the Gaus-\\nsian distribution is more easily aﬀected by extreme observations or out-\\nliers (because the outliers usually have large dtas well) and the MLE\\nof the Cauchy distribution is more robust to the extreme observations\\nand outliers. This is an important property and will be explored in\\nSections 3.4 and 3.5 later. Nevertheless, here we still use an illustrative\\nexample to show the importance of the robust estimation.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 62}),\n",
       " Document(page_content='56 Modeling Fitting: Mean and Covariance Matrix Estimators\\nExample 3.1. Suppose the dimension is N= 2and we draw T= 40\\ni.i.d. samples rt∼N (0,Σ)where\\nΣ=)\\n1 0.8\\n0.8 1[\\n, (3.24)\\nand four i.i.d. outliers from N()\\n−2\\n2[\\n,Σ(\\n. We assume we know the\\nunderlying distribution is Gaussian with the mean known and we aim\\nto estimate the covariance only.\\nFigure 3.3 shows the result of a speciﬁc realization. We can see that\\nthe Gaussian MLE is too sensitive to the outliers while the Cauchy\\nMLE is more robust and provides much better estimation.\\nGaussian MLE: The Gaussian MLE is computed from the sample\\ncovariance (3.3) with given mean µ=0:\\nˆΣG=)\\n1.4407 0.4552\\n0.4552 1.1807[\\n. (3.25)\\nCauchy MLE: First we can ﬁnd the solution of (3.16) with given\\nmeanµ=0and weight functions deﬁned as (3.23), and we denote\\nit as ˆΣShape. Then the estimated covariance matrix of the underlying\\nGaussian distribution is ˆΣC=ˆΣShape/cwherecis the size parameter\\ndeﬁned as the solution of (3.55)2. We will see this procedure more\\nclearly in Section 3.4.1. Here, we have\\nˆΣC=)\\n1.0351 0.7584\\n0.7584 1.0127[\\n. (3.26)\\nNumerically, the Cauchy MLE ˆΣCis much closer to the true covariance\\nΣthan the Gaussian MLE ˆΣG. It veriﬁes the result in Figure 3.3.\\nThe MATLAB code of this example is included in Appendix A. ■\\n3.2.2 Other Models and Main Focus\\nThe estimations of the other models more or less follow a similar pro-\\ncedure: ﬁrst always rewrite the noise in terms of observations and pa-\\n2For this example, given N= 2,w2(x) =wC(x) =N+1\\n1+x, solving (3.55), i.e.,\\uf8f6+∞\\n0wC(x\\nc)x\\ncχ2\\nN(x)dx=N, yieldsc= 0.4944.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 63}),\n",
       " Document(page_content='3.2. Warm Up: Large Sample Regime 57\\n−4 −3 −2 −1 0 1 2 3 4−4−3−2−101234\\n  \\nSamples\\nOutliers\\nOracle\\nMLE: Cauchy\\nMLE: Normal\\nFigure 3.3: Comparison of Gaussian and Cauchy MLEs.\\nrameters, then if the pdf of the noise is known employ MLE, otherwise\\nuse a simple LS estimation. For example, the LS estimation method\\nis used to estimate the parameters of an i.i.d. model, i.e., the condi-\\ntional mean vector and covariance matrix. It is also widely used to\\nestimate the linear coeﬃcients of a VAR model and is based on which\\nthe conditional mean vector and covariance matrix can be computed\\ndirectly [129, 197]. And interestingly, as shown before, the LS estima-\\ntion methods coincides with the maximum likelihood estimation under\\nthe Gaussian noise assumption. For the estimation of diﬀerent multi-\\nvariate time series models, i.e., VAR, VARMA, VECM, and GARCH,\\nthe book [129] provides a good and comprehensive summary where the\\nLS estimation and/or MLE of each model are explained in detail.\\nAlso, to overcome the possible over-ﬁtting or over-parameterization', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 64}),\n",
       " Document(page_content='58 Modeling Fitting: Mean and Covariance Matrix Estimators\\nissues due to outliers or lack of samples, the same idea of shrinkage-\\nBayesian is employed for the parameter estimation of diﬀerent models,\\ne.g., the i.i.d. model [53, 105, 187, 120, 121, 122], the factor model [40,\\n71], the VAR/VMA/VARMA model [15, 116, 126, 47, 186, 152, 129],\\nthe ARCH/GARCH model [72], etc.\\nTherefore, for clarity of presentation, the scope of this chapter is\\nnot to restate the existing well-developed estimation procedures for all\\nthe models but is to focus on the state-of-the-art estimation of the\\nmean and covariance matrix that lies at the heart of all ﬁtting meth-\\nods. In fact, the estimation of the covariance matrix is of paramount\\nimportance in the ﬁnancial engineering industry, as illustrated by the\\nfollowing fact.\\nFact 3.1. Estimating the covariance or correlation between diﬀerent\\nassets is very important in practice. Accurate covariance enables one\\nto make optimal portfolio optimization decisions and to control the risk\\nbetter. In industry there are even some ﬁnancial ﬁrms consulting on\\nestimating the covariance. For example, see Studdridge International3\\nwhich “is a high-end consulting ﬁrm specialized in estimating large-\\ndimensionalcovariancematrices,andinexploitingtheinformationthey\\ncontain to make optimal decisions”. ■\\n3.2.3 Real Challenges\\nSo far we have introduced the sample average estimators and the MLE.\\nUnfortunately, those two estimation methods are not reliable in prac-\\ntice due to the following real challenges.\\n•Small sample regime: when the number of samples is not enough\\ncomparedtothedimensionofthelog-returnvector(i.e.,thenum-\\nber of stocks considered), the sample covariance may not even\\nbe invertible. Even if it is invertible, it may still not be well-\\nconditioned and taking matrix inversion will amplify any tiny\\nerrors in the estimated covariance matrix. This becomes a prac-\\ntical challenge because of the rise of big data analysis in various\\napplications, including ﬁnancial engineering, signal processing,\\n3http://studdridge.com/what-we-do/', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 65}),\n",
       " Document(page_content='3.3. Small Sample Regime: Shrinkage Estimators 59\\nbioinformatics, etc. In our context, one manifestation of big data\\nrefers to the high dimensionality or large size of the universe of\\nstocks.\\n•Heavy tails issue: another critical issue in ﬁnancial engineering is\\nthat the distributions of the log-returns are always heavy-tailed\\n(cf.Section3.4).Thus,thewidelyusedGaussianassumptiondoes\\nnot hold in practice and it does not lead to a proper ﬁtting of real\\ndata. The traditional estimators based on the Gaussian assump-\\ntion are too sensitive to extreme events and outliers, and as a\\nconsequence the estimates are distorted too much to be reliable.\\nIn the following, we will explore and connect the recent diﬀerent\\nmethods developed in both ﬁnancial engineering and signal processing\\nthat deal with the above two issues.\\n3.3 Small Sample Regime: Shrinkage Estimators\\nWhen the number of samples is small compared with the data dimen-\\nsion, the total mean squared errors (MSEs) of the sample average es-\\ntimators are mainly from the variances rather than the biases of the\\nestimators [143]. It is well-known that lower MSEs can be achieved by\\nallowing for some biases [56]. This can be implemented by shrinking\\nthe sample estimators to some known target values.\\nGenerally speaking, the shrinkage estimator has the following form:\\n˜θ=ρT+ (1−ρ)ˆθ, (3.27)\\nwhere ˆθis the sample average estimator (i.e., it can be either sample\\nmean or sample covariance matrix), Tis a target which can either be\\ngiven or have a speciﬁc structure (e.g., it can be an identity matrix\\nup to a scalar for the covariance matrix estimation), ρis the shrinkage\\ntrade-oﬀ parameter, and ˜θis the shrinkage estimator.\\nNow the critical problem is how to choose the shrinkage trade-oﬀ\\nparameterρ(and sometimes the target Tas well) so that the MSE or\\nsome other criteria of interest is minimized. In general, there are two\\ndiﬀerent approaches of ﬁnding the optimal shrinkage trade-oﬀ param-\\neter: random matrix theory (RMT) and cross-validation.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 66}),\n",
       " Document(page_content='60 Modeling Fitting: Mean and Covariance Matrix Estimators\\nRMT.This is a theoretical approach and the idea is to ﬁrst assume\\nthe true parameter (to be estimated) is known and formulate a problem\\nthat minimizes the ideal criterion of interest. However, in practice the\\ntrue parameter is never known and then under some technical assump-\\ntions and conditions the RMT is employed to either get the asymptot-\\nically optimal trade-oﬀ parameter in closed-form expression or derive\\nan easy to solve numerical optimization problem. The advantage of this\\napproach is that the (in-sample) asymptotically optimal trade-oﬀ pa-\\nrameter can be computed directly and eﬃciently. However, we need to\\npoint out that the (in-sample) asymptotically optimal trade-oﬀ param-\\neter does not guarantee the best out-of-sample test result. Due to the\\nmathematical simplicity the RMT has also found various applications\\nin signal processing and wireless communication ﬁelds, e.g., see [199].\\nCross-Validation. ThisisthenumericalmethodintroducedinSection\\n3.1.1. This approach tends to provide better out-of-sample results since\\nthe cross-validation is used to exhaustively search for the best trade-oﬀ\\nparameter. However, it is also computationally more intensive since it\\nrequires to compute the estimates with many diﬀerent shrinkage trade-\\noﬀ parameter values in the cross-validation step.\\n3.3.1 Shrinkage Mean\\nIt is well-known from the central limit theorem that\\nˆµ∼N(\\nµ,Σ\\nT)\\n(3.28)\\nand thus the MSE of ˆµis\\nE∥ˆµ−µ∥2\\n2=1\\nTTr(Σ). (3.29)\\nSharing the same form as (3.27), the James-Stein shrinkage estima-\\ntor [53, 105, 187] for the mean aims at shrinking the sample mean to a\\ntarget b:\\n˜µ=ρb+ (1−ρ)ˆµ. (3.30)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 67}),\n",
       " Document(page_content='3.3. Small Sample Regime: Shrinkage Estimators 61\\nIt is shown that a choice of ρso that E∥˜µ−µ∥2\\n2≤E∥ˆµ−µ∥2\\n2is [143]\\n˜ρ=1\\nTN˜λ−2λ1\\n∥ˆµ−b∥2\\n2(3.31)\\nif it is positive, otherwise it is zero. Here, ˜λandλ1are the average\\nand the largest value of the eigenvalues of Σ, respectively. Intuitively,\\n˜ρvanishes as Tincreases, and the shrinkage estimator gets closer to\\nthe sample mean.\\nApart from any ﬁxed bindependent of the observations, some other\\nexamples of bare1Tˆµ\\nN1, which is the scenario-dependent grand mean\\n[143]; and1TˆΣ−1ˆµ\\n1TˆΣ−111, which is a volatility-weighted grand mean where\\nˆΣis an estimator of the covariance matrix [110].\\nExample 3.2. We setN= 40and drawT= 10,20,..., 80i.i.d. sam-\\nples fromN(0,Σ)where Σij= 0.8|i−j|. Suppose Σis known exactly,\\nwe compare the sample mean with three shrinkage estimators: i) the\\nconstant target b= 0.2×1, ii) the scenario-dependent (SD) target\\nb=1Tˆµ\\nN1, and iii) the volatility-weighted (VW) target b=1TΣ−1ˆµ\\n1TΣ−111.\\nFigure 3.4 shows the numerical results where the MSE is averaged\\nover 200realizations. We can see that the sample mean has a numer-\\nical MSE close to the theoretical one1\\nTTr(Σ), and all the shrinkage\\nestimators outperform the sample mean. Among them, the shrinkage\\nestimator with the scenario-dependent and volatility-weighted targets\\noutperforms the one with the constant target. ■\\n3.3.2 Shrinkage Scatter Matrix Based on RMT\\nNow we assume the mean is known exactly and the goal is to estimate\\nthe scatter (or dispersion, or covariance if it exists) matrix. For the\\nshrinkage scatter matrix, the identity matrix in general is selected as\\nthetarget,andthereexistmanyworksaimingatselectingtheshrinkage\\ntrade-oﬀ parameter according to diﬀerent criteria.\\nMSE\\nFor example, Ledoit and Wolf [121] aimed at ﬁnding the linear com-\\nbination of the sample covariance matrix ˆΣand the identity matrix', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 68}),\n",
       " Document(page_content='62 Modeling Fitting: Mean and Covariance Matrix Estimators\\n10 20 30 40 50 60 70 8000.511.522.533.544.55\\nNumber of samplesMSE\\n  \\nTr(Σ)/T\\nSample mean\\nConstant b\\nSD b\\nVW b\\nFigure 3.4: Shrinkage mean estimations.\\nsuch that the expected quadratic loss between the estimation and the\\n(unknown) true covariance Σwas minimized:\\nminimizeρ1,ρ2E\\ued79\\ued79\\ued79˜Σ−Σ\\ued79\\ued79\\ued792\\nF\\nsubject to ˜Σ=ρ1I+ρ2ˆΣ.(3.32)\\nSuppose the true covariance matrix Σis known, problem (3.32) is a\\nquadratic programming and the variables are two scalars. The optimal\\nsolution admits a closed-form as follows.\\nTheorem 3.1 ([121, Theorem 2.1]) .Problem (3.32) admits the optimal\\nsolution\\n˜Σ⋆= ˜ρ˜λI+ (1−˜ρ)ˆΣ, (3.33)\\nwhere\\n˜λ=Tr(Σ)\\nNand ˜ρ=E\\ued79\\ued79\\ued79ˆΣ−Σ\\ued79\\ued79\\ued792\\nF\\nE\\ued79\\ued79\\ued79ˆΣ−˜λI\\ued79\\ued79\\ued792\\nF. (3.34)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 69}),\n",
       " Document(page_content='3.3. Small Sample Regime: Shrinkage Estimators 63\\nUnfortunately, the true covariance Σis not known in practice,\\nhence, ˜λand ˜ρare not directly computable. Ledoit and Wolf further\\nproposed the consistent estimators of ˜λand˜ρas follows:\\nˆ˜λ=Tr(ˆΣ)\\nN, (3.35)\\nˆ˜ρ= min\\uf8eb\\n\\uf8ed1\\nT1\\nT∑T\\nt=1Tr(rtrT\\nt−ˆΣ)2\\nTr(ˆΣ−ˆ˜λI)2,1(\\n(. (3.36)\\nThen simply replacing ˜λand˜ρin (3.33) withˆ˜λandˆ˜ρyields a consistent\\nestimator of ˜Σ⋆. Intuitively, ˆ˜ρvanishes as Tincreases, and the shrink-\\nage estimator becomes closer to the sample covariance estimator. The\\nresults of (3.35) and (3.36) were derived based on the RMT, which\\nis also a popular quantitative tool in signal processing and wireless\\ncommunication, e.g., see [199].\\nInterestingly, the idea of shrinking the sample covariance matrix to\\nthe identity matrix has also been widely used in array signal processing\\nand is referred to as diagonal loading, e.g., see [1, 45, 38]. However, the\\ntrade-oﬀparameterisusuallychoseninanadhocway,e.g.,thediagonal\\nloading matrix may be chosen as ˆΣ+10σ2Iwhereσ2is the noise power\\nin a single sensor [204]. Here, (3.33) provides a more sensible way to\\nselect the trade-oﬀ parameter.\\nExample 3.3. We use the same parameter settings as Example 3.2,\\nbut now we assume the mean is known and we want to estimate the\\ncovariance matrix. For the Gaussian case, it can be shown that [143,\\n121]\\nE\\ued79\\ued79\\ued79ˆΣ−Σ\\ued79\\ued79\\ued792\\nF=1\\nT(\\nTr(Σ2) +(\\n1−1\\nT)\\n(Tr(Σ))2)\\n,(3.37)\\nE\\ued79\\ued79\\ued79ˆΣ−˜λI\\ued79\\ued79\\ued792\\nF=E\\ued79\\ued79\\ued79ˆΣ−Σ\\ued79\\ued79\\ued792\\nF+\\ued79\\ued79\\ued79Σ−˜λI\\ued79\\ued79\\ued792\\nF. (3.38)\\nIf we knew Σ, we could obtain ˜Σ⋆in (3.33) directly. It is referred\\nto as the “oracle” estimator since Σis never known in practice. The\\npractical estimator obtained by replacing ˜λand˜ρin (3.33) withˆ˜λand\\nˆ˜ρis referred to as the Ledoit-Wolf (LW) estimator.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 70}),\n",
       " Document(page_content='64 Modeling Fitting: Mean and Covariance Matrix Estimators\\n10 20 30 40 50 60 70 80050100150200250300\\nNumber of samplesMSE\\n  \\n(Tr(Σ2)+(1−1/T)(Tr( Σ))2)/T\\nSCM\\nLW\\nOracle\\nFigure 3.5: Shrinkage covariance estimation.\\nFigure 3.5 shows the numerical results where the MSEs are aver-\\naged over 200realizations. We can see that the sample covariance has a\\nnumerical MSE close to the theoretical one, i.e., (3.37) and the shrink-\\nage LW estimator outperforms the sample covariance. Interestingly, the\\nLW estimator performs closely to the oracle estimator. ■\\nQuadratic Loss of Precision Matrix\\nFor many cases, it is the precision matrix (i.e., the inverse of the dis-\\npersion or covariance matrix) that is used in practice, e.g., see the\\nminimum variance (MV) portfolio\\nwMV=Σ−11\\n1TΣ−11, (3.39)\\nwhich is the optimal solution of (1.3) introduced before.\\nSince the inversion operation can dramatically amplify the estima-\\ntion error, for applications similar to the minimum variance portfolio,\\nit is more sensible to minimize the estimation error in the precision', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 71}),\n",
       " Document(page_content='3.3. Small Sample Regime: Shrinkage Estimators 65\\nmatrix directly instead of minimizing the estimation error in the co-\\nvariance matrix.\\nBased on the shrinkage structure, Zhang et al. [212] considered\\nthe problem of minimizing the quadratic loss of the precision matrix\\ndirectly as follows:\\nminimize\\nρ≥0,T∈D+1\\nN\\ued79\\ued79\\ued79˜Σ−1−Σ−1\\ued79\\ued79\\ued792\\nF\\nsubject to ˜Σ=ρI+1\\nTRTRT,(3.40)\\nwhere R= [r1,...,rT]∈RN×Tis the data matrix of Tobservations,\\nandT∈D +is aT-by-Tnonnegative and diagonal weight matrix.\\nEven if the true covariance matrix Σis known, problem (3.40) is\\nmuch harder than problem (3.32) because the objective of problem\\n(3.40) cannot be explicitly computed anymore.\\nUnder some technical conditions, Zhang et al. [212] showed that\\nasymptoticallythereexistsaglobaloptimalsolutionoftheform (ρ,T=\\nαI)and derived the following asymptotic problem:\\nminimize\\nρ≥0,α≥01\\nN\\ued79\\ued79\\ued79˜Σ−1−ˆΣ−1\\ued79\\ued79\\ued792\\nF\\n+2\\nNTr(\\nρ−1(ˆ˜δ˜Σ−1−(1−cN)ˆΣ−1)\\n+ˆΣ−1˜Σ−1)\\n−(2cN−c2\\nN)1\\nNTr(ˆΣ−2)\\n−(cN−c2\\nN)(1\\nNTr(ˆΣ−1))2\\nsubject to ˜Σ=ρI+αˆΣ,\\nˆ˜δ=α(\\n1−1\\nTTr(\\nαˆΣ˜Σ−1))\\n,(3.41)\\nwherecN≜N\\nTandˆ˜δare intermediate parameters.\\nWe can understand (3.41) as thus, it replaces the unknown true\\ncovariance matrix Σwith the explicitly computable sample covariance\\nmatrix ˆΣand then adds some correction terms to increase the approx-\\nimation accuracy.\\nProblem (3.41) is nonconvex but it can be solved via exhaustive\\nsearch since there are only two scalar variables ρ≥0andα≥0.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 72}),\n",
       " Document(page_content='66 Modeling Fitting: Mean and Covariance Matrix Estimators\\n60 70 80 90 100 110 120101520253035\\nNumber of samplesQuadratic loss in db\\n  \\nSCM\\nLW\\nZRP\\nOracle\\nFigure 3.6: Shrinkage precision matrix estimations.\\nExample 3.4. SupposeN= 40and the i.i.d. samples are drawn from\\nN(0,Σ)where Σij= 0.9|i−j|. The number of samples Tvaries as\\n60,70,..., 120. We compare the following four estimators: i) the inverse\\nof the sample covariance matrix, ii) the inverse of the LW covariance\\nestimator, iii) the exhaustive search solution of problem (3.41) over\\n(ρ,α)≥0, which is referred to as the ZRP estimator, and iv) the ex-\\nhaustive search solution of problem (3.40) with the structure (ρ,αI)\\nover (ρ,α)≥0assuming that Σis known, is referred to as the “oracle”\\nestimator since Σis never known in practice.\\nFigure 3.6 shows the numerical results where the quadratic loss is\\naveraged over 200realizations. We can see that estimating the precision\\nmatrix directly provides lower quadratic losses. ■\\nRemark 3.1. Note that problem (3.41) requires the sample covariance\\nmatrix to be invertible. For the singular case, Zhang et al. [212] studied\\nan alternative loss function called Stein’s loss. For simplicity, we have\\nonly included the results of quadratic loss here. ■', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 73}),\n",
       " Document(page_content='3.3. Small Sample Regime: Shrinkage Estimators 67\\nSharpe Ratio\\nAll the previous works focus on selecting the shrinkage trade-oﬀ pa-\\nrameter to improve the covariance (or precision) estimation accuracy,\\nand recall that the target of an investor is always to achieve better\\nout-of-sample result (e.g., higher realized Sharpe ratio). Even though\\nan accurate covariance (or precision) estimator necessarily leads to a\\nbetter out-of-sample result, a more sensible approach is to select the\\nshrinkage trade-oﬀ parameter so that the out-of-sample criterion of\\ninterest is optimized directly.\\nHere we take the Sharpe ratio (with risk-free return being zero)\\nSR =wTµ√\\nwTΣw(3.42)\\nas the criterion of interest and an optimal solution is given by [65] (also\\nsee (5.15) later)\\nw⋆\\nSR∝Σ−1µ. (3.43)\\nIn practice the true values of µandΣare never known and the esti-\\nmates from the training samples are used instead. In [213], the sample\\nmean ˆµand the shrinkage covariance matrix\\n˜Σ=ρ1I+ρ2ˆΣ, (3.44)\\nwhere ˆΣis the sample covariance matrix, are used and the resulted\\nportfolio is\\nˆwSR∝˜Σ−1ˆµ. (3.45)\\nNote that the Sharpe ratio (3.42) is scale invariant in w, thusρ2\\ncan be arbitrarily set to 1and the more sensible approach proposed\\nin [213] is to ﬁnd the shrinkage trade-oﬀ parameter ρ1such that the\\nrealized out-of-sample Sharpe ratio of the portfolio ˆwSRis maximized:\\nmaximize\\nρ1≥0µT˜Σ−1ˆµ∑\\nˆµT˜Σ−1Σ˜Σ−1ˆµ\\nsubject to ˜Σ=ρ1I+ˆΣ.(3.46)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 74}),\n",
       " Document(page_content='68 Modeling Fitting: Mean and Covariance Matrix Estimators\\nHowever, the objective of (3.46) is not computable since the true values\\nofµandΣare unknown. Under some technical conditions, the authors\\nof [213] derived an asymptotically equivalent problem based on RMT\\nas follows:\\nmaximize\\nρ1≥0ˆµT˜Σ−1ˆµ−ˆδ∑\\nbˆµT˜Σ−1ˆΣ˜Σ−1ˆµ\\nsubject to ˜Σ=ρ1I+ˆΣ\\nD=1\\nTTr(ˆΣ˜Σ−1)\\nˆδ=D/(1−D)\\nb=T\\nTr(\\nW(I+ˆδW)−2)(3.47)\\nwhere W≜I−1\\nT11T∈RT×Tis a predeﬁned parameter, and D,ˆδ,\\nandbare intermediate parameters.\\nThe problem (3.41) can be understood as follows: the unknown true\\nmeanµand covariance matrix Σare replaced by the explicitly com-\\nputablesamplemean ˆµandsamplecovariancematrix ˆΣandthensome\\ncorrection terms, i.e., ˆδin the numerator and bin the denominator, are\\nincorporated to increase the approximation accuracy. This problem is\\nstill nonconvex but it can be solved via exhaustive search since there\\nis only one scalar variable ρ1≥0.\\nTo investigate the performance of (3.47), let us now consider a real\\nexperiment conducted in [213] as follows.\\nExample 3.5. Let us consider the daily returns of the 45stocks under\\nHang Seng Index from 03-Jun-2009 to 31-Jul-2011. The portfolio is up-\\ndated at each 10days and the past T= 75,76,..., 95observations are\\nusedtodesigntheportfoliosateachupdateperiod.Thecomparedport-\\nfolios are: i) the method (3.47) based on RMT (referred to as RMT), ii)\\nthe portfolio (3.42) based on the Ledoit-Wolf (LW) estimator (referred\\nto as LW), iii) the portfolio (3.42) based on the SCM (referred to as\\nSCM), and iv) the uniform portfolio (referred to as Uniform).\\nFigure 3.7 shows the out-of-sample Sharpe ratio of the four com-\\npared methods. It can be observed that when Tchanges from 75to81,', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 75}),\n",
       " Document(page_content='3.3. Small Sample Regime: Shrinkage Estimators 69\\n75 80 85 90 95−0.04−0.0200.020.040.060.08\\nT (number of observations)Sharpe ratio\\n  \\nRMT\\nLW\\nSCM\\nUniformStationary Nonstationary\\nFigure 3.7: Out-of-sample Sharpe ratio of RMT, LW, SCM and Uniform portfolios.\\n75 80 85 90 95−0.0500.050.10.15\\nT (number of observations)Sharpe ratio\\n  \\nSparse RMT\\nLW\\nSCM\\nUniform\\nFigure 3.8: Out-of-sample Sharpe ratio of sparse RMT, LW, SCM and Uniform\\nportfolios.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 76}),\n",
       " Document(page_content='70 Modeling Fitting: Mean and Covariance Matrix Estimators\\nthe RMT method outperforms the others, but when T >81, its perfor-\\nmance becomes unstable. This is mainly because the mean return and\\ncovariance matrix cannot be stationary in a long period (e.g., T >81)\\n[213]. Later an improved RMT portfolio was proposed by setting the\\nweights whose absolute values are less than 5%of the summed absolute\\nvalues of all the weights to zeros, and this portfolio is referred to as a\\nsparse RMT portfolio. Figure 3.8 shows the out-of-sample Sharpe ratio\\nof the diﬀerent methods, and it can be seen that the sparse RMT port-\\nfolio outperforms all the other methods signiﬁcantly when Tchanges\\nfrom 75to90. ■\\nRemark 3.2. For simplicity we have only considered the Sharpe ratio\\nhere.Someothercriteriaarealsostudiedintheliteratureinthecontent\\nof both beamforming design and portfolio optimization, e.g., variance,\\nMSE and SNR for beamforming design [213], and portfolio variance for\\nportfolio optimization [170, 213]. ■\\nRemark 3.3. There now exist some recent works on including spar-\\nsity in the estimates, e.g., the covariance [20, 21, 117] or the precision\\nmatrix [82, 99, 211]. In general, some regularization terms are added\\nto propose sparsity (or group sparsity). For example, one widely used\\nregularization is ℓ1-norm and the technique is usually referred to as\\nLASSO (least absolute shrinkage and selection operator). The book\\n[96] serves as a good summary reference on various topics related to\\nsparsity. Apart from adding the sparsity in the estimation parameters,\\nincluding sparsity in portfolio optimization is also of interest in some\\nﬁnancial problems. Chapter 8 in the later part will demonstrate some\\nwidely used techniques to impose sparsity in portfolio optimization. ■\\n3.4 Heavy Tail Issue: Robust Estimators\\nFrom the previous Example 3.1 (see also Figure 3.3) we have already\\nseen that the traditional sample average estimators (or equivalently,\\nthe MLE under Gaussian distribution assumption) are very sensitive\\nto the extreme events and outliers; instead, the MLE under heavy-tail\\nassumption (e.g., the Cauchy distribution) provides more robust esti-\\nmations. In this part, we will explore more general robust estimators.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 77}),\n",
       " Document(page_content='3.4. Heavy Tail Issue: Robust Estimators 71\\n3.4.1M-Estimators\\nMultivariate M-estimators can be deﬁned as a generalization of the\\nMLEs of elliptical distributions [140, 101]. Given the i.i.d. samples rt,\\nt= 1,...,T, theM-estimatesµandΣare deﬁned as the solutions to\\nthe ﬁxed-point equations\\n0=T\\uf8fa\\nt=1w1(dt) (rt−µ) (3.48)\\nΣ=1\\nTT\\uf8fa\\nt=1w2(dt) (rt−µ) (rt−µ)T, (3.49)\\nwheredt= (rt−µ)TΣ−1(rt−µ)and the weight functions w1(x)and\\nw2(x)are both nonnegative, nonincreasing, and continuous functions\\ninx∈(0,+∞), and they are not necessarily equal. The existence and\\nuniqueness of solutions can be guaranteed under some technical condi-\\ntions, and the uniqueness requires that xw2(x)is a strictly increasing\\nfunction of x∈(0,+∞)[139, 192]. Suppose the solution to (3.48) and\\n(3.49) exists and is unique, and let us denote it as (ˆµ,ˆΣ).\\nObserve that the elliptical MLE given by (3.15) and (3.16) can be\\nregarded as a special case with w1(x) =w2(x) =−2g′(x)\\ng(x), wheregis a\\ndensity generating function.\\nAsymptotics\\nAssume the i.i.d. samples rt∼EL(µo,Σo,g)where the superscript “o”\\nstands for “oracle”. Then as T→∞, the solution to (3.48) and (3.49),\\ni.e.,(ˆµ,ˆΣ), will converge with probability one to the unique solution,\\ndenoted as (ˆµ∞,ˆΣ∞), to the ﬁxed-point equations\\n0=E[w1(d) (r−µ)] (3.50)\\nΣ=E\\uf8f3\\nw2(d) (r−µ) (r−µ)T\\uf8f2\\n, (3.51)\\nwhered= (r−µ)TΣ−1(r−µ), and the following relationships hold\\nˆµ∞=µo(3.52)\\nˆΣ∞=cΣo(3.53)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 78}),\n",
       " Document(page_content='72 Modeling Fitting: Mean and Covariance Matrix Estimators\\nregardless of w1(d)andw2(d)as long as they satisfy some technical\\nassumptions such that the solution to (3.48) and (3.49) exists and is\\nunique [140].\\nHere, the size parameter c>0is given by\\nE)\\nw2(\\n∥x∥2\\n2\\nc(\\n∥x∥2\\n2\\nc[\\n=N (3.54)\\nwithx∼EL(0,I,g)sharing the same density generating function as r.\\nFor the Gaussian case, since x∼N(0,I)implies∥x∥2\\n2∼χ2\\nN, then the\\nrelationship (3.54) can be simpliﬁed as\\n\\uf8fc+∞\\n0w2(x\\nc)x\\ncχ2\\nN(x)dx=N. (3.55)\\nNumerical Algorithm\\nAlgorithm 1 is a numerical iterative method that converges to the\\nunique solution (if it exists and is unique) and the initial values only\\naﬀect the number of iterations [12].\\nAlgorithm 1 M-Estimator\\nInput:anyµ,Σ0≻0.\\nOutput: the solution to (3.48) and (3.49).\\n1:repeat\\n2:dkt= 1 + ( rt−µk)Σ−1\\nk(rt−µk)T\\n3:µk+1=∑T\\nt=1w1(dkt)rt∑T\\nt=1w1(dkt)\\n4: Σk+1=1\\nT∑T\\nt=1w2(dkt)(rt−µk+1)(rt−µk+1)T\\n5:k←k+ 1\\n6:untilconvergence\\n3.4.2 Tyler’s Estimator\\nTyler’s estimator was proposed to ﬁnd the right balance between ef-\\nﬁciency and robustness [201]. It assumes zero mean and focuses on\\nestimating the scatter matrix only. Tyler’s estimate is deﬁned as the', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 79}),\n",
       " Document(page_content='3.4. Heavy Tail Issue: Robust Estimators 73\\nsolution to the-ﬁxed point equation\\nΣ=N\\nTT\\uf8fa\\nt=1rtrT\\nt\\nrT\\ntΣ−1rt. (3.56)\\nNote that here xw2(x) =Kis not strictly increasing and the results of\\ntheM-estimator do not apply. Tyler established the conditions (e.g.,\\none condition is T≥N+ 1) for existence and uniqueness (up to a\\npositive scalar) of a solution to the ﬁxed-point equation (3.56), and\\nproposedthefollowingiterativeAlgorithm2toachievetheuniquetrace\\nnormalized solution.\\nAlgorithm 2 Tyler’s Estimator\\nInput: Σ0≻0\\nOutput: solution to (3.56)\\n1:repeat\\n2: ˜Σk+1=N\\nT∑T\\nt=1rtrT\\nt\\nrT\\ntΣ−1\\nkrt\\n3: Σk+1=˜Σk+1\\nTr(˜Σk+1)\\n4:k←k+ 1\\n5:untilconvergence\\nWe have previously seen that M-estimators can be regarded as\\ngeneralized MLEs. Interestingly, Tyler’s estimator can be derived from\\nan MLE perspective as well.\\nIt is known that if r∼El(0,Σ,g), then the normalized samples\\ns=r\\n∥r∥2follow [202, 113, 81]\\nf(s) =Γ(\\nN\\n2)\\n2πN/21√\\n|Σ|(\\nsTΣ−1s)−N/2, (3.57)\\nwhich is independent of the density generating function g. Then the\\nMLE of Σcan be obtained by minimizing the scale-invariant negative\\nlog-likelihood function\\nL(Σ) =T\\n2log|Σ|+T\\uf8fa\\nt=1N\\n2log(\\nsT\\ntΣ−1st)\\n(3.58)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 80}),\n",
       " Document(page_content='74 Modeling Fitting: Mean and Covariance Matrix Estimators\\nor, equivalently,\\nLTyler(Σ) =T\\n2log|Σ|+T\\uf8fa\\nt=1N\\n2log(\\nrT\\ntΣ−1rt)\\n.(3.59)\\nFinally, setting the derivative of LTyler(Σ)w.r.t. to Σ−1to zero yields\\nthe ﬁxed-point equation (3.56).\\n3.5 Small Sample Regime & Heavy Tail Issue: Regularized\\nRobust Estimators\\nOne regularity condition for the previous mentioned robust estimators\\nis that the number of samples is at least T≥N+ 1. In practice, the\\nuniverse of stocks may be large and the number available samples for\\nthe ﬁtting may be scarce in comparison (e.g., N= 500stocks of the\\nS&P500 and less than two years of daily data, say, T≈400). Thus\\nwhenT≥N+ 1is violated and the ordinary robust estimators cannot\\nbe applied anymore, or even when it is satisﬁed, regularization still\\nhelps ifTis not suﬃciently large.\\nIn this part, we mainly study the recent advances on robust esti-\\nmators with regularizations so that the reliable statistical inference can\\nstill be conducted even when the data contains extreme events and/or\\noutliers and the number of samples is limited compared to the data\\ndimension.\\n3.5.1 Regularized Robust Estimation of Scatter Estimator\\nThis subsection contains the most recent advances on the regularized\\nTyler’s estimator.\\nDiagonally Loaded Estimator\\nSimilar to the idea of shrinkage covariance or diagonal loading, the\\nauthorsof[2,39]proposedtoshrinktheTylerupdatecovariancematrix\\n(i.e., step 2 of Algorithm 2) to the identity matrix.\\nAlgorithm 3 summarizes the iterative computing procedure where\\nα≥0is a scalar parameter.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 81}),\n",
       " Document(page_content='3.5. Small Sample Regime & Heavy Tail Issue 75\\nAlgorithm 3 Tyler’s Estimator with shrinkage\\nInput: Σ0≻0\\nOutput: A unique positive deﬁnite matrix\\n1:repeat\\n2: ˜Σk+1=1\\n1+αN\\nT∑T\\nt=1rtrT\\nt\\nrT\\ntΣ−1\\nkrt+α\\n1+αI\\n3: Σk+1=˜Σk+1\\nTr(˜Σk+1)\\n4:k←k+ 1\\n5:untilconvergence\\nChen et al. [39] proved that for any α > 0Algorithm 3 converges\\nto a unique point and they proposed a systematic way to select α.\\nEven though this estimator is widely used and performs well in\\npractice, it is still considered to be heuristic and does not have an\\ninterpretation based on minimizing a cost function.\\nKullback-Leibler Divergence Regularized Estimator\\nInterestingly, the heuristic regularization in Algorithm 3 can be for-\\nmallyinterpretedasthesolutiontoaKullback-Leibler(KL)regularized\\nTyler’s loss function (3.59) [190].\\nFor two multivariate Gaussian distributions, e.g., NΣ(0,Σ)and\\nNT(0,T), the KL divergence is deﬁned as [44]\\nDKL(NT||NΣ) =1\\n2(\\nTr(Σ−1T)−K−log(|T|\\n|Σ|))\\n,(3.60)\\nwhere the positive deﬁnite matrix Tcan be interpreted as the target\\nthat represents some prior information.\\nRecallLTyler(Σ)in (3.59), then ignoring the constant terms results\\nin the following KL divergence regularized LTyler(Σ):\\nLKL(Σ) = log|Σ|+N\\nTT\\uf8fa\\nt=1log(rT\\ntΣrt)\\n+α(\\nTr(Σ−1T) + log|Σ|)\\n, (3.61)\\nwhereα≥0is the regularize parameter.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 82}),\n",
       " Document(page_content='76 Modeling Fitting: Mean and Covariance Matrix Estimators\\nMinimizing (3.61) leads to the following ﬁxed-point equation:\\nΣ=1\\n1 +αN\\nTT\\uf8fa\\nt=1rtrT\\nt\\nrT\\ntΣ−1rt+α\\n1 +αT. (3.62)\\nNote that T=Irecovers the regularization in Algorithm 3.\\nInterestingly, almost at the same time, three independent works,\\ni.e., [190], [158], and [157], achieved the same result as follows.\\nTheorem 3.2. Suppose rtare drawn i.i.d. from a zero mean ellipti-\\ncal distribution, then the ﬁxed-point equation (3.62) admits a unique\\nsolution if and only if T >N\\n1+α. ■\\nThe following Algorithm 4 computes the unique solution.\\nAlgorithm 4 Tyler’s Estimator with KL divergence penalty\\nInput: Σ0≻0\\nOutput: the unique solution to (3.62)\\n1:repeat\\n2: Σk+1=1\\n1+αN\\nT∑T\\nt=1rtrT\\nt\\nrT\\ntΣ−1\\nkrt+α\\n1+αT\\n3:k←k+ 1\\n4:untilconvergence\\nWiesel’s Penalty. There also exist some other regularizations. One\\nexample is the Wiesel’s penalty [207]:\\nh(Σ) =Klog(Tr( Σ−1T)) + log|Σ|, (3.63)\\nand minimizing the Wiesel’s penalty regularized Tyler’s loss function\\nresults in solving the following ﬁxed-point equation:\\nΣ=1\\n1 +αN\\nTT\\uf8fa\\nt=1rtrT\\nt\\nrT\\ntΣ−1rt+α\\n1 +αNT\\nTr(Σ−1T),(3.64)\\nwhereα>0is the regularization parameter.\\nExample 3.6. Now we set N= 39and draw i.i.d. samples from a\\nStudent-tdistribution tν(µo,Σo)withν= 3,µo=0, and Σo\\nij=', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 83}),\n",
       " Document(page_content='3.5. Small Sample Regime & Heavy Tail Issue 77\\n0.8|i−j|. We assume the mean is known and focus on estimating the\\nnormalizedscattermatrixonly.Notethatnowthedistributionisheavy-\\ntailed. The number of samples T= 20,30,..., 100. The performance\\nmetric is the normalized MSE (NMSE) [207]:\\nNMSE =E\\uf8f3\\ued79\\ued79\\ued79ˆΣ−Σo\\ued79\\ued79\\ued792\\nF\\uf8f2\\n∥Σo∥2\\nF, (3.65)\\nwhere all matrices are normalized by their traces.\\nWe simulate the following ﬁve estimators: i) the sample covariance\\nmatrix, ii) the LW covariance estimator, iii) the Tyler’s estimator (i.e.,\\nAlgorithm 2), and iv) two KL divergence regularized Tyler’s estimators\\n(i.e., Algorithm 4) with noninformative identity target T=Iand infor-\\nmativetarget Twhere Tij= 0.7|i−j|.Fortuningtheparameter αofthe\\nKL regularized Tyler’s estimators, a standard cross-validation method\\nis in [207] and a method based on random matrix theory is in [43]. For\\nsimulation simplicity, we simulate αsuch thatρ(α) =α\\n1+αis each of\\nthe ten uniform grid points of the interval (max(0,1−T/N) + 0.01,1)\\nand we report the best result. Nevertheless, this experiment aims at\\nproviding an illustrative example to reveal the ideas behind diﬀerent\\nestimators and for more intensive numerical experiments, please refer\\nto [39, 207, 190, 157, 158].\\nFigure 3.9 shows the numerical results where the NMSEs are av-\\neraged over 200realizations. We have several interesting observations:\\ni) the sample covariance matrix and the LW estimator (recall that the\\nLW estimator also relies on the sample covariance matrix) both per-\\nform badly since the underlying distribution is heavy-tailed and the\\nextreme events distort the estimation, ii) Tyler’s estimator is robust\\nsince it uses the weightsN\\nrT\\ntΣ−1rtto eliminate the eﬀect of the extreme\\nevents, however it only works when the number of samples is larger\\nthan the data dimension since Σneeds to be invertible, iv) the regu-\\nlarized Tyler’s estimator with noninformative identity target improves\\nthe estimation quality, and v) the informative prior target furthermore\\nimproves the estimation quality. All these observations coincide with\\nthe ideas behind the diﬀerent estimators. ■', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 84}),\n",
       " Document(page_content='78 Modeling Fitting: Mean and Covariance Matrix Estimators\\n20 30 40 50 60 70 80 90 10000.20.40.60.811.2\\nNumber of samplesNMSE\\n  \\nSCM\\nLW\\nTyler\\nKL: identity target\\nKL: info target\\nFigure 3.9: Regularized robust covariance estimations.\\n3.5.2 Regularized Robust Estimation of Mean and Covariance\\nPreviously we reviewed the robust estimation of covariance matrix only\\nassuming the mean was known. Now we consider the joint estimation\\nof the mean and covariance matrix.\\nSuppose the samples rt,t= 1,...,Tare drawn i.i.d. from an ellip-\\ntical distribution El(µo,Σo,g)where the density generating function\\ngis assumed unknown. Since the speciﬁc elliptical distribution is as-\\nsumed unknown, we will do the ﬁtting under a conservative heavy-tail\\ndistribution (note that the estimation will work for any elliptical distri-\\nbution). In particular, it is convenient to use the Cauchy distribution\\nsince it has very heavy tails and still the scatter matrix exists (the\\ncovariance matrix does not exist).', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 85}),\n",
       " Document(page_content='3.5. Small Sample Regime & Heavy Tail Issue 79\\nConsider the Cauchy MLE, that is, the estimates of the mean and\\nthe scatter are the minimizers of the following negative log-likelihood:\\nL(µ,Σ) =T\\n2log|Σ|+N+ 1\\n2T\\uf8fa\\nt=1log(\\n1 + (rt−µ)TΣ−1(rt−µ))\\n,\\n(3.66)\\nor, equivalently, the solutions of the following ﬁxed-point equations:\\n0=N+ 1\\nTT\\uf8fa\\nt=1rt−µ\\n1 + (rt−µ)TΣ−1(rt−µ), (3.67)\\nΣ=N+ 1\\nTT\\uf8fa\\nt=1(rt−µ)(rt−µ)T\\n1 + (rt−µ)TΣ−1(rt−µ). (3.68)\\nAsymptotics\\nRecall theM-estimators in Section 3.4.1. Note that (3.67) and (3.68)\\nare the same as (3.15) and (3.16) with weights given by wC(x) =N+1\\n1+x\\nin (3.23), and they are a special case of the M-estimators (3.48) and\\n(3.49) with w1(x) =w2(x) =wC(x).\\nThis implies the asymptotics of the M-estimators applies. That is,\\nunder some technical conditions, as T→∞the asymptotic solution of\\n(3.67) and (3.68) converges to a unique point, denoted as (ˆµ∞,ˆΣ∞).\\nSimilar to (3.52) and (3.53), we have (ˆµ∞,ˆΣ∞) = (µo,cΣo)where the\\nsize parameter cis unknown since now the density generating function\\ngis unknown.\\nIn other words, asymptotically the Cauchy MLE can estimate the\\nmean and the shape well. Therefore, it is more sensible to regularize\\nonly the shape but not the size if a regularization is necessary.\\nSmall Sample Regime\\nWhen the number of samples is limited, say less than the data dimen-\\nsion, the regularized Cauchy MLE is more reliable. Sun et al. [191]\\nproposed the following penalty function:', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 86}),\n",
       " Document(page_content='80 Modeling Fitting: Mean and Covariance Matrix Estimators\\nh(µ,Σ) =α(\\nNlog(Tr( Σ−1T)) + log|Σ|)\\n+γlog(\\n1 + (t−µ)TΣ−1(t−µ))\\n,(3.69)\\nwhereα≥0andγ≥0are regularization parameters. It is easy to\\nverify that the minimizer of (3.69) is (t,cT)for anyc>0[191]. That\\nis, (3.69) shrinks the mean to tand scatter to the shape of Tonly.\\nThis justiﬁes that h(µ,Σ)is a proper penalty function since it only\\npenalizes the shape but not the size of the scatter matrix.\\nThen the regularized Cauchy MLE problem is\\nminimize\\nµ,Σ≻0N+ 1\\n2T\\uf8fa\\nt=1log(\\n1 + (rt−µ)TΣ−1(rt−µ))\\n+T\\n2log|Σ|+α(\\nNlog(Tr( Σ−1T)) + log|Σ|)\\n+γlog(\\n1 + (t−µ)TΣ−1(t−µ))\\n.(3.70)\\nSetting the derivatives of the objective w.r.t µandΣ−1to zeros yields\\nthe following ﬁxed-point equations:\\nµ=(N+ 1)∑T\\nt=1wt(µ,Σ)rt+ 2γwt(µ,Σ)t\\n(N+ 1)∑T\\nt=1wt(µ,Σ) + 2γwt(µ,Σ)(3.71)\\nΣ=N+ 1\\nT+ 2αT\\uf8fa\\nt=1wt(µ,Σ)(rt−µ)(rt−µ)T\\n+2γ\\nT+ 2αwt(µ,Σ)(t−µ)(t−µ)T\\n+2αN\\nT+ 2αT\\nTr(Σ−1T), (3.72)\\nwhere\\nwt(µ,Σ) =1\\n1 + (rt−µ)TΣ−1(rt−µ), (3.73)\\nwt(µ,Σ) =1\\n1 + (t−µ)TΣ−1(t−µ). (3.74)\\nThe properties of the problem (3.70) are as follows.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 87}),\n",
       " Document(page_content='3.5. Small Sample Regime & Heavy Tail Issue 81\\nα\\nγγ2α1\\nγ1α2(γ)existence\\nuniqueness\\nFigure 3.10: Values that the regularization parameters αandγcan take for the\\nexistence and uniqueness of the regularized Cauchy MLE.\\nTheorem 3.3. Assume the underlying distribution of the samples is\\ncontinuous, T≻0,T >1,α≥0andγ≥0, then we have\\nExistence: Problem (3.70) has a minimizer if either of the following\\nconditions are satisﬁed:\\n(i)γ >γ 1andα>α 1\\n(ii)γ2<γ≤γ1andα>α 2(γ)\\nwhereγ1=N/2,γ2= (N+ 1−T)/2,α1= (N−T)/2, and\\nα2(γ) =1\\n2(\\nN+ 1−T−2γ+T−N−1\\nT−1)\\n.\\nUniqueness: The solution is unique if γ≥α. ■\\nProof.See [191, Theorem 2, Corollary 3, and Theorem 4].\\nFigure 3.10 shows the regions of regularization parameter values for\\nthe existences and uniqueness of the regularized Cauchy MLE (3.70).\\nTo compute the unique solution, Sun et al. [191] also proposed\\nseveral iterative algorithms, including the following Algorithm 5, with\\nconvergence guaranteed based on the majorization-minimization (MM)\\ntheory.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 88}),\n",
       " Document(page_content='82 Modeling Fitting: Mean and Covariance Matrix Estimators\\nAlgorithm 5 Iterative Regularized Cauchy MLE\\nInput:µ0,Σ0≻0\\nOutput: The unique solution of (3.70)\\n1:repeat\\n2:µk+1=(N+ 1)∑T\\nt=1wt(µk,Σk)rt+ 2γwt(µk,Σk)t\\n(N+ 1)∑T\\nt=1wt(µk,Σk) + 2γwt(µk,Σk)\\n3:Σ=N+ 1\\nT+ 2αT\\uf8fa\\nt=1wt(µk,Σk)(rt−µk+1)(rt−µk+1)T\\n+2γ\\nT+ 2αwt(µk,Σk)(t−µk+1)(t−µk+1)T\\n+2αN\\nT+ 2αT\\nTr(Σ−1\\nkT)\\n4:k←k+ 1\\n5:untilconvergence\\nExample 3.7. In this example, we study the robustness of diﬀerent\\noutliers. We ﬁx N= 100and draw i.i.d. samples from N(µo,Σo)with\\nµo=1, and Σo\\nij= 0.8|i−j|. Then we draw outliers as routlier∼µ+rs\\nwhere sis uniformly distributed on a sphere such that ∥s∥2= 1andr∼\\nUniform[2l,2l+1]wherel≜max\\nt{∥rt∥2}. The total number of samples\\nisT= 120and the fraction of outliers varies as 0.02,0.05,..., 0.2. Since\\nwe are now estimating both the mean and the covariance matrix we\\nneed a combined measure of performance. We use the symmetric KL\\ndivergence distance [191]:\\nKL distance = E\\uf8f3DKL(N(ˆµ,ˆΣ)||N(µo,Σo))\\n+DKL(N(µo,Σo)||N(ˆµ,ˆΣ))\\uf8f2,(3.75)\\nwhere all matrices are normalized by their traces.\\nWe simulate the following six estimators: i) the sample covariance\\nmatrix, ii) the LW covariance estimator, iii) the Cauchy MLE estima-\\ntor, and iv) three regularized Cauchy MLE estimators (i.e., Algorithm\\n5) with: iv-a) tbeing the sample mean and noninformative identity co-\\nvariance target T=I, iv-b) tbeing the sample mean and informative\\ncovariance target Twhere Tij= 0.7|i−j|, and iv-c) informative mean\\ntarget t= 0.9µoand informative covariance target with Tij= 0.7|i−j|.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 89}),\n",
       " Document(page_content='3.6. Summary of Diﬀerent Estimators 83\\n0.02 0.05 0.08 0.11 0.14 0.17 0.2101102103104\\nPercentage of outliersKL distance\\n  \\nSCM\\nLW\\nCauchy\\nRegularized Cauchy: mean + identity targets\\nRegularized Cauchy: mean + info cov targets\\nRegularized Cauchy: info mean & cov targets\\nFigure 3.11: Regularized robust mean and covariance estimations.\\nFor the regularized Cauchy MLEs, for simulation simplicity we set\\nα=γand simulate αsuch thatρ(α) =T\\nT+2α∈{0.1,0.2,..., 1}and\\nreport the best result. A more practical but complicated way is cross-\\nvalidation [207].\\nFigure 3.11 shows the numerical results where the KL distances are\\naveraged over 200realizations. We can see similar observations to that\\nof Example 3.6. Brieﬂy speaking, the regularized Cauchy MLE (even\\nwith a noninformative target) does improve the estimation quality in\\na small sample regime and the improvement becomes more signiﬁcant\\nwhen the percentage of outliers increases. For more intensive numerical\\nexperiments, the interested reader can refer to [191]. ■\\n3.6 Summary of Diﬀerent Estimators\\nIn this chapter we have reviewed diﬀerent types of estimators: non-\\nparametric estimators (e.g., sample mean/covariance, LS estimator),\\nML estimators, and shrinkage/regularized estimators mainly based on\\nthe I.I.D. model. Table 3.1 provides a brief and compact summary.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 90}),\n",
       " Document(page_content='84 Modeling Fitting: Mean and Covariance Matrix Estimators\\nTable 3.1: Summary of diﬀerent estimators.\\nType NameFixed-Point Eqs.\\nor Expression or\\nProblemScenario\\nNon-\\nparametricSample\\naverages(3.2) and (3.3)Large sample;\\nSame as\\nGaussian MLELeast\\nsquare\\nMLEElliptical(3.15) and (3.16)\\nwith weight (3.17)\\nLarge or medium\\nsampleGaussian(3.15) and (3.16)\\nwith weight (3.19)\\nCauchy(3.15) and (3.16)\\nwith weight (3.23)\\nRegularized\\nor\\nShrinkageMean (3.30)-(3.31) Small sample\\nwithout extreme\\nevents or outliersCovariance (3.33), (3.35)-(3.36)\\nPrecision Problem (3.41)\\nRobustM-\\nestimator(3.48)-(3.49)Generalized\\nMLE for large or\\nmedium sample\\nwith extreme\\nevents or outliersTyler (3.56)\\nRegularized\\nRobustKL reg-\\nularized\\nTylerMinimizing (3.61) or\\nsolving (3.62)A combination\\nof shrinkage idea\\nand robust\\nestimators;\\nSmall sample\\nwith extreme\\nevents and/or\\noutliersWiesel\\nregu-\\nlarized\\nTylerSolving (3.64)\\nRegularized\\nCauchyMinimizing (3.70) or\\nsolving (3.71)-(3.72)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 91}),\n",
       " Document(page_content='4\\nOrder Execution\\nOrderexecutionbridgesadesiredidealtargetandtherealworld:oncea\\nportfolio has been designed, it needs to be executed in the real markets.\\nThis chapter studies the order execution problem and how to optimally\\nexecute such orders.\\nSection 4.1 brieﬂy reviews the limit order book system and intro-\\nduces the concept of market impact. Section 4.2 further presents the\\nprice model and execution cost. Section 4.3 focuses on minimizing the\\nexpected execution cost, and Section 4.4 considers an extension of min-\\nimizing the mean-variance trade-oﬀ of execution cost. Finally, Section\\n4.5considersminimizingamorepracticalcriterion,i.e.,theConditional\\nValue-at-Risk (CVaR), of the execution cost.\\n4.1 Limit Order Book and Market Impact\\n4.1.1 Limit Order Book\\nOnce a buy (respectively, sell) order has been submitted, it will not\\nbe executed immediately. Instead, it will be checked for whether it can\\nbe matched by the previously submitted sell (respectively, buy) orders.\\nA limit order book at a speciﬁc time is the snapshot of all the active\\noutstanding orders at that time [87].\\n85', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 92}),\n",
       " Document(page_content='86 Order Execution\\nbuy limit ordersell limit order\\nask-pricebid-price\\nPricemid-priceQuantity\\nFigure 4.1: Limit order book and two limit orders.\\nOrders that do not cause an immediate matching upon submission\\nbut become active orders in a limit order book are known as limit\\norders. Figure 4.1 shows an illustrative example of a limit order book\\nand two new limit orders. The limit orders that ask for sell are called\\nsell limit orders. They may have diﬀerent ask prices and the lowest ask\\nprice is referred to as ask-price. Respectively, the limit orders that bid\\nfor buy are called buy limit orders. They have diﬀerent bid prices and\\nthe highest bid price is referred to as bid-price. The average of the bid-\\nprice and the ask-price is referred to as mid-price and the diﬀerence\\nbetween the ask-price and the bid-price is called a bid-ask spread.\\nOrders that cross the bid-ask spread cause an immediate matching\\nupon submission. This can happen, for example, when a small buy\\norder is submitted with a bid price equal to the current ask-price (and', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 93}),\n",
       " Document(page_content='4.1. Limit Order Book and Market Impact 87\\nask-pricebid-price\\nPricemid-priceQuantity\\nmatched ordersmarket sell order\\nFigure 4.2: A new market sell order and matched trades.\\nthe amount of that order can be absorbed). Also, there is a type of\\nexecutionorder,calledamarketorder,thatdoesnothaveanassociated\\nprice, instead it will be matched to the best existing price in the order\\nbook and executed immediately. However, the execution of large orders\\nfollows a diﬀerent pattern.\\nFigure 4.2 shows a submission of a large market sell order that\\nits owner simply wants to sell at whatever price the limit order book\\ncan provide immediately. Once the large market sell order has been\\nsubmitted, it matches some limit buy orders in the limit order book\\n(in order from high to low price). Figure 4.2 also shows the matched\\ntrades with diﬀerent quantities at diﬀerent prices.\\nRight after the trades have been executed, they are eliminated from\\nthe limit order book. Figure 4.3 shows the updated limit order book.\\nClearly, we can see that the overall average trade price is lower than\\nthe initial bid-price and the new bid-price also becomes much lower.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 94}),\n",
       " Document(page_content='88 Order Execution\\nask-pricebid-price\\nPricemid-priceQuantity\\nFigure 4.3: Market impact: the large market sell order moves prices in the opposite\\ndirection.\\nThat is, a large market sell order moves the prices in the opposite\\ndirection, i.e., when one wants to sell, he/she actually sells lower. Sim-\\nilarly, when one wants to buy, he/she actually buys higher. Such an\\neﬀect is known as market impact and will be explained next.\\n4.1.2 Market Impact\\nIn the practice of quantitativeinvestment, portfolio allocation decisions\\nand trading strategies are realized through the execution of buy and\\nsell orders in organized exchanges via brokers through the limit order\\nbook systems. Due to practical limitations concerning market liquidity,\\ni.e., availability of required volume levels matching the size of an out-\\nstanding order for a speciﬁc asset, executing transactions in the market\\nhas an eﬀect on the prices of assets: buying pushes the prices upward\\nand selling pushes the prices downward, as shown in the previous illus-\\ntrative Figures 4.2 and 4.3. This market impact is reﬂected on the cost\\nincurred when implementing trades [94, 156].\\nFigure 4.41illustratively shows how the price evolves versus time\\nwhen a large market sell order, say sshares, is executed directly.\\nThis market order must be matched immediately and represents a de-\\nmand for liquidity. Similar to Figure 4.2, selling this large amount of s\\n1Figures 4.4 and 4.5 are reproduced based on [65, Figures 12.1 and 12.2].', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 95}),\n",
       " Document(page_content='4.1. Limit Order Book and Market Impact 89\\npre-trade equilibrium\\npost-trade equilibrium\\nTimePrice\\ntrade price of selling s sharespermanent \\nimpact\\ntemporary \\nimpactmarket impact \\nof selling s \\nshares\\nFigure 4.4: Market impact of a single large order.\\nshares incurs signiﬁcant market impact and the executed price is much\\nlower than the pre-trade equilibrium price. As time goes by, liquidity\\nproviders replenish the bid side and the limit order book reaches a\\npost-trade equilibrium; however, the price is still lower than the pre-\\ntrade equilibrium. The diﬀerence between the pre-trade and post-trade\\nequilibrium is due to the information that an investor has decided to\\nsellsshares and it is referred to as permanent impact. The remaining\\nimpact is called temporary and it is because the investor wants to sell\\nthe order immediately regardless of price. In practice, the temporary\\nmarket impact is more signiﬁcant than the permanent one [115]. An-\\nother observation is that the permanent impact propagates with time\\nwhile the temporary impact diminishes after some time period.\\nSmall orders in general have much smaller market impacts. Intu-\\nitively, a large order can be partitioned into many small orders to be\\nexecuted sequentially to reduce the overall market impact. Figure 4.5\\nshows the example of partitioning a large order of sshares into two\\nequalsmallordersexecutedsequentially.Wecanobserveseveralthings:\\n1) the market impact of selling s/2shares is much smaller than that\\nof sellingsdirectly and 2) executing them sequentially helps to reduce\\nthe overall market impact since the price may be recovered from the', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 96}),\n",
       " Document(page_content='90 Order Execution\\npre-trade equilibrium\\npost-trade equilibrium\\nTimePrice\\npermanent \\nimpact\\ntemporary \\nimpactstrade price of selling s/2 \\nshares\\ntrade price of selling s/2 shares\\ntrade price of selling s shares\\nFigure 4.5: Market impact of two sequential small orders.\\ntemporary impact caused by the ﬁrst trade before the second trade\\nhappens. Obviously, the average of the two trade prices of selling s/2\\nis much higher than that of selling sshares at once directly. That is,\\noverall, the average trade price of the total sshares achieved by exe-\\ncuting small orders sequentially is much higher than that of executing\\nthe large order once.\\nNaturally, the idea of optimal order execution is to partition a large\\norder into many small pieces and execute them sequentially. The min-\\nimization of the execution cost through optimal order execution al-\\ngorithms is crucial for preserving in practice the proﬁt structure of\\ntheoretically sound investment processes [65]. Otherwise, one may ex-\\npect to make a certain proﬁt with a carefully designed portfolio that\\nwill vanish or even become negative. Interestingly, this order execution\\nproblem is close to many other scheduling and optimization problems\\nin signal processing. From a dynamic control point of view, the order\\nexecution problem of ﬁnding an optimal order execution strategy to\\nminimize the mean-variance trade-oﬀ of the execution cost [18] is quite\\nsimilar to the problem of ﬁnding an optimal sensor scheduling strategy\\nto minimize the state estimation error in dynamic wireless sensor net-\\nworks [208, 180, 181]. From an optimization point of view, distributing', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 97}),\n",
       " Document(page_content='4.2. Price Model and Execution Cost 91\\na large order into smaller size orders over a certain time window to\\nminimize the execution cost [8, 79] is similar to allocating total power\\nover diﬀerent channels to achieve the capacity region for parallel Gaus-\\nsian broadcast channels [198], or to minimize the J-divergence between\\nthe distributions of the detection statistic in wireless sensor networks\\n[214].\\nFact 4.1. One usually focuses on minimizing the market impact and\\ntherefore reducing execution cost. However, every coin has two sides\\nand in practice it is possible to use the market impact to make money\\nas well [83]. Perhaps the most famous example is “Black Wednesday”.\\nIn 1992, there was a devaluation trend of pound sterling and Geogre\\nSoros’ Quantum fund began to massively short-sell pounds on Tuesday,\\nSeptember 15, 1992 and triggered a more intensive trend of devaluation\\nofthepound. OnWednesday,September16,1992,theBankofEngland\\nwas not able to protect the pound anymore and the British Conserva-\\ntive government was forced to withdraw the pound sterling from the\\nEuropean Exchange Rate Mechanism. During that period, Soros ﬁrst\\nheld a total of US$ 10billion short positions on GBP and later closed\\nthe position at a lower value so that he made US$ 1billion. Because of\\nthat, he has since been known as “The Man Who Broke the Bank of\\nEngland”. ■\\n4.2 Price Model and Execution Cost\\n4.2.1 Price Model\\nBefore introducing a price model, let us deﬁne the notation. The buy\\nand the sell problems are similar to each other and for the sake of\\nnotation we focus on the sell problem. Assume we hold Nstocks with\\nan initial price p0≜[p10,...,,pN0]Twith the number of shares to\\nsell denoted by s≜[s1,...,sN]Tand we want to completely execute\\nthem before time T. Assume there is no short-selling, and denote the\\nnumber of shares for the Nstocks executed over the t-th period as\\nnt≜[n1t,...,nNt]T≥0,t= 1,...,T. We write the order execution\\nsequence{n1,...,nT}as anN-by-Tmatrix N= [n1,...,nT], such\\nthatN1=s, where 1is aT-dimensional vector having all entries equal', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 98}),\n",
       " Document(page_content='92 Order Execution\\np0=~p0\\n~p1\\n~p2\\n~pT¡1\\np1\\n¡Ã(n1)\\n¡ª(n1)+§»1\\np2\\n¡Ã(n2)\\np3\\n¡Ã(n3)\\n¡ª(n2)+§»2\\npT\\n¡Ã(nT)\\nexecutionprice\\n`0=s\\n`1\\n`2\\n`T=0\\n(a)\\n(b)\\n¡n1\\n¡n2\\nFigure 4.6: Illustrations of (a) trading trajectory and (b) price model.\\nto 1. We deﬁne such a matrix Nas an execution strategy. The number\\nofremainingsharesafterthe t-thperiodisℓt,withℓt=ℓt−1−nt,initial\\nconditionℓ0=s, and end condition ℓT= 0. Then, L= [ℓ1,...,ℓT]is\\na trading trajectory, and it evolves as shown in Figure 4.6a.\\nWethenconsideramodelforthepricedynamicstakingintoaccount\\nthe market impact incurred when executing the order. Speciﬁcally, the\\nexecution price in the t-th period pt(n1,...,nt)is a random variable\\ndepending on past executions and also the current execution. In order\\nto characterize how it evolves over time, a number of diﬀerent price\\nmodels have been proposed [102, 8, 18]. Here, we consider the quite\\ngeneral price model in [8] with both linear permanent and temporary\\nmarket impact components. More speciﬁcally, prices evolve, for t=\\n1,...,T, as\\n˜pt=˜pt−1−Ψ(nt) +Σξt, (4.1)\\npt=˜pt−1−ψ(nt), (4.2)\\nwhere ˜pt≜[˜p1t,..., ˜pNt]Tis a hidden variable denoting the permanent\\nimpact prices with initial value ˜p0=p0,pt≜[p1t,...,pNt]Tis the\\nactual execution price, ξt≜[ξ1t,...,ξrt]Tis the random noise with\\nall the elements being i.i.d. random variables with zero mean and unit\\nvariance, Σ∈RN×ris the volatility matrix, and Ψ(·)andψ(·)are\\npermanent and temporary, respectively, linear market impact functions\\nthat take the form\\nΨ(nt) =Θnt, (4.3)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 99}),\n",
       " Document(page_content='4.2. Price Model and Execution Cost 93\\nψ(nt) =Ωnt. (4.4)\\nIn the above price model, the parameters Σ,Θ, and Ωrepresent the\\nlinear coeﬃcient matrices of noise, permanent market impact, and tem-\\nporarymarketimpact.Theyareusuallyﬁxedandcalibratedinadvance\\nbyusingdataonthebid-askspread,thevolatility,andthedailytrading\\nvolume. Similar to [8], we assume that Ω∈RN×Nis positive deﬁnite2,\\nand for simplicity, we assume that the matrices ΘandΩare both\\nsymmetric. Figure 4.6(b) summarizes price model.\\n4.2.2 Execution Cost\\nLetP≜[p1,...,pT]andΞ≜[ξ1,...,ξT]. The ideal value in the ab-\\nsence of market impact and market noise would be pT\\n0sbut in practice\\nit becomes∑T\\nt=1nT\\ntpt= Tr(\\nPTN)\\n, and the gap between them is de-\\nﬁnedastheexecutioncost(i.e.,theimplementationshortfall)asfollows\\n[159]:\\nX(N) =\\uf8f1\\n}\\n\\uf8f3pT\\n0s−Tr(\\nPTN)\\n,sell program\\nTr(\\nPTN)\\n−pT\\n0s,buy program .(4.5)\\nBased on the above price model for a sell program, plugging (4.1)-(4.4)\\ninto (4.5) and after some mathematical manipulations, we obtain\\nX(N) =1\\n2sTΘs+ Tr(\\nNT˜ΩN)\\n−Tr(\\nLTΣΞ)\\n,(4.6)\\nwhere ˜Ω≜Ω−1\\n2Θ, and the mean and the variance are\\nE[X(N)] =1\\n2sTΘs+ Tr(\\nNT˜ΩN)\\n(4.7)\\nVar[X(N)] = Tr(\\nLTΣΣTL)\\n. (4.8)\\nRecall that Lis a function of N:ℓt=ℓt−1−ntandℓ0=s.\\nIn practice, it is also assumed that ˜Ω=Ω−1\\n2Θ≻0, as in [145].\\nThismakessensebecauseusuallythetemporarymarketimpactismuch\\nhigher than the permanent market impact in ﬁnancial markets. Indeed,\\n2Becauseif nT\\ntΩnt≤0,itwouldmeanthetemporarymarketimpactofexecuting\\nntin fact would beneﬁt the trading or at least would lose nothing, which would go\\nagainst the goal of reducing the market impact.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 100}),\n",
       " Document(page_content='94 Order Execution\\nthe amount caused by the permanent impact is a relatively small per-\\ncentage of the pure cost component, and Kissell et. al. estimate it to\\nbe5%[115, pp. 182]. Then both the mean (4.7) and the variance (4.8)\\nare quadratic convex in N.\\n4.3 Minimizing Expected Execution Cost\\nThe ﬁrst problem formulation was proposed in [18] for the single asset\\ncase, and it aims at minimizing the expected execution cost\\nminimize\\nNE[X(N)]\\nsubject to N1=s,N≥0.(4.9)\\nSince ˜Ω≻0, the problem is already quadratic convex and thus can\\nbe eﬃciently and numerically solved. When ˜Ωis diagonal, it is not\\nhard to show that the problem (4.9) of Nassets can be decomposed\\nintoNsmall problems of a single asset, and following the derivation in\\n[18], the optimal execution strategy is to uniformly distribute the large\\norder among the Texecution periods, that is, N=1\\nTs1T.\\n4.4 Minimizing Mean-Variance Trade-oﬀ of Execution Cost\\nAn obvious disadvantage of the problem (4.9) is that it does not con-\\nsider the risk of the execution cost. By taking the variance as the risk\\nmeasurement, Almgren and Chriss [8] extended (4.9) by minimizing a\\nmean-variance trade-oﬀ of the execution cost as follows:\\nminimize\\nNE[X(N)] +λVar[X(N)]\\nsubject to N1=s,N≥0,(4.10)\\nwhereλ≥0is a ﬁxed parameter modeling an investor’s risk aversion\\nlevel. The larger the value of λ, the more risk averse the investor ( λ= 0\\nmeans the investor is risk neutral and it corresponds to problem (4.9)).\\nFor obvious reasons, such an approach is commonly referred to in the\\nliterature as the mean-variance optimization approach. Note that since\\n˜Ω≻0, the problem is already convex.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 101}),\n",
       " Document(page_content='4.5. Minimizing CVaR of Execution Cost 95\\n4.5 Minimizing CVaR of Execution Cost\\nHowever, it is well known that the variance used in (4.9) is not an ap-\\npropriate risk measure when dealing with ﬁnancial returns from non-\\nnormal, negatively skewed, and leptokurtic distributions [141]. In order\\nto overcome the inadequacy of variance, CVaR (also known in the liter-\\nature as Expected Shortfall, Expected Tail Loss, Tail Conditional Ex-\\npectation, and Tail VaR) has been proposed as a single side alternative\\nrisk measurement [166] and it has been employed signiﬁcantly in ﬁnan-\\ncial engineering, see [7, 63, 172, 100], for portfolio or risk management.\\nInterestingly, such a single side risk measurement technique has also\\nfound some applications in signal processing recently, see [124, 183],\\nfor chance constrained communication systems.\\n4.5.1 CVaR and Problem Formulation\\nTheCVaRisdeﬁnedastheconditionalmeanvalueofarandomvariable\\nexceeding a particular percentile. This precisely measures the risky re-\\nalizations, as opposed to the variance that simply measures how spread\\nthe distribution is and mixes together both tails.\\nFor illustrative purposes, Figure 4.7 shows the deﬁnition of the\\nCVaR of a random variable. Mathematically, given an random vari-\\nableZ, the CVaR of the execution cost at the 1−εconﬁdence level\\ncan be expressed as\\nCVaR 1−ε(Z) =E\\uf8f3\\nZ⏐⏐⏐Z > VaR 1−ε(Z)\\uf8f2\\n, (4.11)\\nwhere the Value-at-Risk of the execution cost at the 1−εconﬁdence\\nlevel, denoted as VaR 1−ε(Z), is the (1−ε)-quantile of Z:\\nVaR 1−ε(Z) = inf\\nζ∈R{ζ|P(Z >ζ )≤ε}. (4.12)\\nNote that given an execution strategy N, the execution cost X(N)\\nis a random variable and the problem of minimizing the CVaR of the\\nexecution cost turns out to be [79, 77, 78]:\\nminimize\\nNCVaR 1−ε(X(N))\\nsubject to N1=s,N≥0.(4.13)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 102}),\n",
       " Document(page_content='96 Order Execution\\nRandom variable ZFrequency\\nVaR1−ε(Z)\\nCVaR1−ε(Z)Maximum\\nProbability ε\\nFigure 4.7: The VaR and CVaR of a random variable.\\nAt ﬁrst glance, CVaR 1−ε(X(N))is hard to deal with because it\\ncontains a conditional expectation exceeding a threshold that is not\\nﬁxed. To proceed, we will make use of the following auxiliary function.\\nAuxiliary Function. Following the approach in [166], we can deﬁne an\\nauxiliary function of CVaR 1−ε(X(N))as follows:\\nFε(N,ζ) =ζ+ε−1E[X(N)−ζ]+(4.14)\\nwhere [x]+= max (x,0).Observethat(4.14)isconvexw.r.t.both ζand\\nN, sinceX(N)is convex quadratic in N, and additionally, we further\\nhave [166]:\\nCVaR 1−ε(X(N)) = min\\nζFε(N,ζ). (4.15)\\nThen, the original problem (4.13) can be more eﬃciently optimized by\\nusing the property in (4.15). To that eﬀect, notice that we need to\\ncompute the expectation E[X(N)−ζ]+.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 103}),\n",
       " Document(page_content='4.5. Minimizing CVaR of Execution Cost 97\\n4.5.2 Sample Average Approximation\\nThe ﬁrst idea is to use the sample average approximation (SAA) to\\napproximate E[X(N)−ζ]+, and the CVaR problem (4.13) is approxi-\\nmated by\\nminimize\\nN,z,ζζ+ε−1M−1∑M\\ni=1zi\\nsubject to 0≤zi≥1\\n2sTΘs+ Tr(\\nNT˜ΩN)\\n−Tr(\\nLTΣΞi)\\n−ζ,\\n∀i= 1,...,M\\nN1=s,N≥0,\\n(4.16)\\nwhere Ξiisthei-threalizationofnoisesampledfromthedistributionof\\nξit’s, andMis the number of noise realizations. As pointed out in [142]\\nalthough the SAA method can provide an accurate execution strategy\\nfor a very large number of realizations, such a method is impaired by\\nlarge storage requirements and high computational complexity, espe-\\ncially when Mis large.\\n4.5.3 Analytical Approach\\nTo overcome the drawback of the SAA method, an analytical approach\\ntohandling CVaR 1−ε(X(N))andsolving (4.13) forbothGaussianand\\nNon-Gaussian noise was proposed in [79, 77, 78]. The idea is to either\\nﬁnd the explicit expression of E[X(N)−ζ]+for the Gaussian cases or\\nconstructasaveconvexapproximationof E[X(N)−ζ]+forthegeneral\\nnon-Gaussian noise.\\nGaussian Noise\\nFor the Gaussian case, the following analytical equivalent formulation\\nof the problem (4.13) was derived in [78].\\nLemma 4.1. If all theξitare i.i.d. and ξit∼N (0,1), and ˜Ω≻0in the\\nprice model (4.1)-(4.2), we have that (4.13) is equivalent to the convex\\nproblem:\\nminimize\\nN1\\n2sTΘs+ Tr(\\nNT˜ΩN)\\n+κ(ε)\\ued79\\ued79\\ued79ΣTL\\ued79\\ued79\\ued79\\nF\\nsubject to N1=s,N≥0,(4.17)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 104}),\n",
       " Document(page_content='98 Order Execution\\nwhereκ(ε) = exp(\\n−(Q−1(ε))2/2)\\n/√\\n2πεandQ−1(x)is the inverse\\nQ-function.3. ■\\nInterestingly, the results in Lemma 4.1 can be extended to more\\ngeneral elliptical distributions [78].\\nGeneral Non-Gaussian Noise\\nThe elliptical distributions are only appropriate in situations where\\nreturns are symmetric or not strongly asymmetric, but fail to success-\\nfully model highly asymmetric or, equivalently, skewed returns [141].\\nFor such cases, CVaR 1−ε(X(N))admits no explicit expression, and an\\nalternative way to approach the CVaR execution problem is to solve a\\nsafe tractable convex approximation of CVaR 1−ε(X(N))instead. The\\nfollowing technical assumption is needed.\\nAssumption 4.1. The moment generating function of the random vari-\\nableξit, i.e.,Mit(z) =E\\uf8f3\\nezξit\\uf8f2\\n, is ﬁnite-valued for all z∈Rand can\\nbe computed eﬃciently. ■\\nThen one can have the following result [78].\\nProposition 4.1 (Bernstein’s Approximation) .If all theξitare i.i.d. sat-\\nisfying Assumption 4.1, and ˜Ω≻0in the price model (4.1)-(4.2), a\\nsafe tractable convex approximation of (4.13) is\\nminimize\\nN,z>01\\n2sTΘs+ Tr(\\nNT˜ΩN)\\n+∑T\\nt=1∑r\\ni=1zlogMit(z−1git(N))−zlogε\\nsubject to N1=s,N≥0,(4.19)\\nwheregit(N) =−∑m\\nj=1ℓjtΣji. ■\\nNote thatzlogMit(z−1git(N))withz>0is the perspective func-\\ntion of the convex log-sum-exp function logMit(git(N))and thus is\\njointly convex in (N,z)[32]. Given that ˜Ω≻0, we are able to con-\\nclude that problem (4.19) is convex.\\n3The Q-function is deﬁned as\\nQ(x) =1√\\n2π\\uf8fc∞\\nxe−u2\\n2du. (4.18)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 105}),\n",
       " Document(page_content='4.5. Minimizing CVaR of Execution Cost 99\\n1 2 3 4 500.20.40.60.81asset 1normalized execution order\\nt\\n(a)  \\nE+λVar, λ=0\\nE+λVar, λ=10−6\\nCVaR − small order\\nCVaR − medium order\\nCVaR − large order\\n1 2 3 4 500.20.40.60.81normalized execution orderasset 2\\nt\\n(b)\\nFigure 4.8: Order execution with T= 5: small order = 0.2×s, medium order\\ns=\\uf8f3\\n106,106\\uf8f2T, and large order = 5×s. (a) asset 1. (b) asset 2.\\nLet us consider an illustrative example to understand diﬀerent\\nmethods of order execution based on the Gaussian noise.\\nExample 4.1. Suppose there are N= 2assets,r= 2noise sources,\\nand i.i.d. noise ξit∼N (0,1). The parameter matrices are\\nΩ=)\\n5 0\\n0 1[\\n×10−6,Θ=)\\n2.5 0\\n0 0.5[\\n×10−7,(4.20)\\nΣ=)\\n0.6191 0.1292\\n0.1292 0.6191[\\n. (4.21)\\nWe consider three kinds of sizes of the initial order: the medium initial\\norder size is s=\\uf8f3106,106\\uf8f2T, small order = 0.2×sand large order\\n= 5×s. We simulate three diﬀerent methods: i) the problem (4.9) of\\nminimizing the expected execution cost (or equivalently, the problem\\n(4.10) with λ= 0), ii) the problem of minimizing the mean-variance\\ntrade-oﬀ of the execution cost with λ= 10−6, and iii) the closed-form\\nCVaR formulation of (4.17) for the Gaussian case with ε= 0.05.\\nFigure 4.8 shows the normalized order execution strategies of Ex-\\nample 4.1 with diﬀerent initial order sizes. First, we ﬁnd that minimiz-\\ning the expected execution cost always distributes the order uniformly\\namong the execution periods, which veriﬁes the results in Section 4.3.\\nSecond, the mean-variance approach with ﬁxed λ= 10−6adjusts the', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 106}),\n",
       " Document(page_content='100 Order Execution\\nexecution strategies according to the variance of the execution cost;\\nhowever, it always gives the same normalized execution order strategy\\nno matter what the initial order size is. While the CVaR approach exe-\\ncutes the small initial order faster to reduce the risk of not completing\\nthe execution, it spreads the large initial order more to avoid the huge\\nmarket impact caused by one single large order. Another interesting\\nobservation is that asset 2is executed faster than asset 1because it\\nhas a smaller market impact (see (4.20)) and thus is more liquid. The\\nresults show that the CVaR approach can adjust the execution strate-\\ngies depending on the initial order size but the mean-variance approach\\n(including the case λ= 0) cannot. Thus, the CVaR approach is more\\nappropriate for the order execution problem. ■\\nRemark 4.1. Apart from the above reviewed non-robust cases, there\\nare also some other related works appearing simultaneously and inde-\\npendently. For example, for the robust mean-variance order execution\\nproblem see [146, 77, 78], for a numerical Monte Carlo simulation based\\nCVaR formulation of the order execution see [147], and for the robust\\nCVaR formulation of the order execution see [77, 78]. ■', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 107}),\n",
       " Document(page_content='Part II\\nPortfolio Optimization\\n(Risk-Return Trade-oﬀ)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 108}),\n",
       " Document(page_content='5\\nPortfolio Optimization with Known Parameters\\nModeling of time series (overviewed in Part I) is at the core of and is a\\npreliminary step in quantitative investment. The design of investment\\nstrategies is the natural next step and will be explored in the form\\nof portfolio optimization (in Part II) and statistical arbitrage (in Part\\nIII).\\nAs a start, Part II, this chapter introduces the most basic frame-\\nwork of Markowitz portfolio optimization under the assumption that\\nthe model parameters, i.e., the expected return µand the covariance\\nmatrix Σof the asset net returns, are perfectly known. We need to\\npoint out that in practice µandΣneed to be estimated from the past\\nobservations as discussed in the previous Chapter 3.\\nThe organization of this chapter is as follows. Section 5.1 reviews\\nthe Markowitz mean-variance portfolio optimization. Section 5.2 points\\nout two serious drawbacks of the Markowitz framework: variance as a\\nrisk measurement is not appropriate, and the mean-variance framework\\nis very sensitive to parameter estimation errors. To overcome the ﬁrst\\ndrawback, Section 5.2.1 covers the works on a single side risk measure-\\nment instead of variance. The literature results dealing with the second\\ndrawback are left to the next chapter, robust portfolio optimization.\\n102', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 109}),\n",
       " Document(page_content='5.1. Markowitz Mean-Variance Portfolio Optimization 103\\n5.1 Markowitz Mean-Variance Portfolio Optimization\\nThe Markowitz mean-variance framework, introduced by Harry\\nMarkowitz [135] in 1952, provides a ﬁrst quantitative approach to con-\\nstructportfolios,whichisthefoundationofthenowadaysModernPort-\\nfolio Theory (for a comprehensive review, see [58]). Because of this fun-\\ndamental contribution, Harry Markowitz shared the Nobel prize with\\nanother two researchers, Merton Miller and William Sharpe, in 1990.\\nThe idea of the Markowitz framework is to ﬁnd a trade-oﬀ between\\nthe expected return and the risk of the portfolio measured by the vari-\\nance. Given that the expected return µand the positive deﬁnite covari-\\nance matrix Σof the assets are perfectly known, the expected return\\nand variance of a portfolio warewTµandwTΣw, respectively.\\nRemark 5.1. Recall from Section 2.1.4 that it is the mean vector and\\ncovariance matrix for simple returns that are used for portfolio opti-\\nmization. However, Part I mainly focuses on modeling log-returns since\\nits statistical properties are more tractable. The good thing is that one\\ncan have the mean vector and covariance matrix for simple returns\\nbased on that for log-returns directly under the Gaussian assumption.\\nThat is, suppose the log-returns of Nassets follow a multivariate Gaus-\\nsian distribution N(¯µ,¯Σ), the mean vector and covariance matrix for\\nthe simple returns are\\nµ=e¯µ+¯σ/2−1 (5.1)\\nΣ=(\\n(µ+1) (µ+1)T)\\n⊙(\\ne¯Σ−1N×N)\\n, (5.2)\\nwhere ¯σ= [Σ11,...,ΣNN]Tis the vector of the variances of the N\\nstocks, 1is aNdimensional all one vector, 1N×Nis aN-by-Nall one\\nmatrix, and eXis an elementwise exponential operator, i.e., [eX]ij=\\neXij. ■\\n5.1.1 Mean-Variance Trade-Oﬀ Optimization\\nThere are three alternative but equivalent formulations, i.e., the risk\\nminimizationproblem,returnmaximizationproblem,andrisk-adjusted\\nreturn maximization problem, and all of them are useful in practical\\napplications.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 110}),\n",
       " Document(page_content='104 Portfolio Optimization with Known Parameters\\nRisk Minimization Problem\\nThe risk minimization formulation aims at minimizing the portfolio\\nvariance with the expected portfolio return being above a given target:\\nminimizewwTΣw\\nsubject to wTµ≥µ0,\\nwT1= 1,(5.3)\\nwhereµ0is a expected return target parameter. The constraint wT1=\\n1is the capital budget constraint. Note that the above problem is\\nconvex given that Σis positive deﬁnite and thus it can always be\\nsolved eﬃciently.\\nAn interesting case of problem (5.3) that achieves the minimum\\nvariance regardless of the expected portfolio return is\\nminimizewwTΣw\\nsubject to wT1= 1,(5.4)\\nwhich for obvious reasons is referred to as a global minimum variance\\nportfolio (GMVP). Since the GMVP is a convex QP with only one lin-\\near equality constraint, solving the Karush-Kuhn-Tucker (KKT) opti-\\nmality conditions [32] directly yields the closed-form solution expressed\\nas follows:\\nwGMVP =1\\n1TΣ−11Σ−11. (5.5)\\nThen the portfolio mean and variance of the GMVP are easily com-\\nputed by\\nµGMVP =µTwGMVP =µTΣ−11\\n1TΣ−11, (5.6)\\nσ2\\nGMVP =wT\\nGMVP Σw GMVP =1\\n1TΣ−11. (5.7)\\nReturn Maximization Problem\\nInstead of seeking the minimum variance, an alternative problem is\\nto search for the maximum expected return with the variance under', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 111}),\n",
       " Document(page_content='5.1. Markowitz Mean-Variance Portfolio Optimization 105\\ncontrol, say, less than a given target. This problem is referred to as a\\nreturn maximization problem and has the following form:\\nmaximizewwTµ\\nsubject to wTΣw≤σ2\\n0,\\nwT1= 1,(5.8)\\nwhereσ2\\n0is the parameter that controls the variance target. Again,\\nsince the covariance matrix Σis positive deﬁnite, the above problem\\nhas a linear objective with linear and convex quadratic constraints, and\\nthus it is eﬃciently computable.\\nRisk-Adjusted Return Maximization Problem\\nThe third problem formulation is to maximize a risk-adjusted return\\nas follows:\\nmaximizewwTµ−λwTΣw\\nsubject to wT1= 1,(5.9)\\nwhereλ≥0is a given trade-oﬀ parameter between the portfolio ex-\\npected return and variance. When λ>0, it is a convex QP with only\\none linear constraint which admits a closed-form solution as follows:\\nw⋆=1\\n2λΣ−1(µ+ν⋆1), (5.10)\\nwhereν⋆is the optimal dual variable\\nν⋆=2λ−1TΣ−1µ\\n1TΣ−11. (5.11)\\nEﬃcient Frontier\\nEach of the above three problem formulations, i.e., (5.3), (5.8), and\\n(5.9),hasonecontrollingparameterandtheyareequivalentinthesense\\nthat when the parameters change (i.e., µ0changes from µGMVPto+∞,\\nσ2\\n0changes from σ2\\nGMVPto+∞, andλchanges from 0to+∞), they\\nresult in the same mean-variance1trade-oﬀ curve (Pareto curve), which\\n1In the ﬁnancial literature, it is standard deviation instead of variance that is\\nused for illustrative purposes.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 112}),\n",
       " Document(page_content='106 Portfolio Optimization with Known Parameters\\nrf\\nStandard deviationExpected return\\nGlobal minimum varianceMaximum Sharpe ratioEfficient frontierCapital market line\\nFeasible portfolios\\nFigure 5.1: Illustration of the eﬃcient frontier, capital market line, and global\\nminimum variance and maximum Sharpe ratio portfolios.\\nis usually referred to as an eﬃcient frontier in the ﬁnancial literature,\\ne.g., see [65, 58]. For example, when λ→+∞, the portfolio (5.10) goes\\nto the GMVP (5.5).\\nFigure 5.1 shows the shape of an eﬃcient frontier (see the black\\nsolid curve) and all the other feasible portfolios fall below the eﬃcient\\nfrontier (see that all the red square points fall below the back solid\\ncurve).TheGMVPistheleftmostpointthathastheminimumvariance\\namong all the feasible portfolios (see the black round dot). A simpliﬁed\\nversion code of Figure 5.1 is included in Appendix B.\\n5.1.2 Sharpe Ratio Optimization\\nAll the portfolios on the eﬃcient frontier are optimal depending on the\\ninvestor’s risk proﬁle, that is, the choice of the parameters µ0,σ0, orλ.\\nHowever, one may still ask which portfolio may be the most meaningful\\nin practice. Precisely, Sharpe [179] ﬁrst proposed the optimization of', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 113}),\n",
       " Document(page_content='5.1. Markowitz Mean-Variance Portfolio Optimization 107\\nthe following problem:\\nmaximizewwTµ−rf√\\nwTΣw\\nsubject to wT1= 1,(5.12)\\nwhererfis the return of a risk-free asset2. The objective of (5.12),\\nis usually referred to as the Sharpe ratio, which measures the excess\\nreturn (i.e., wTµ−rf) normalized by the risk (i.e.,√\\nwTΣw), and the\\nproblem is thus called the Sharpe ratio maximization problem.\\nSincetheSharperatioisnonconcave,theSharperatiomaximization\\nproblem is not a convex problem. Fortunately, it can be reformulated\\nin convex form as follows. First, note that wT1= 1, then the problem\\n(5.12) can be rewritten as\\nmaximizewwT(µ−rf1)√\\nwTΣw\\nsubject to wT1= 1.(5.13)\\nObservethattheobjectiveof (5.13)nowisscaleinvariantw.r.t. w,thus\\nthe constraint wT1= 1can be relaxed to wT1>0and then one can\\narbitrarily set wT(µ−rf1) = 1and minimize wTΣwinstead. Thus,\\nthe problem (5.13) can be further reformulated into a convex form:\\nminimizewwTΣw\\nsubject to wT(µ−rf1) = 1,\\nwT1>0.(5.14)\\nAny normalized solution of (5.14) so that the summation of all the\\nportfolio weight values being one is an optimal solution of (5.13).\\nThe problem (5.14) without wT1>0is a convex QP with only one\\nlinear equality constraint and thus admits a closed-form solution:\\nwSR=1\\n(µ−rf1)TΣ−1(µ−rf1)Σ−1(µ−rf1), (5.15)\\n2Usually a risk-free asset is assumed to have zero risk or variance. In practice,\\nfor example, the US Treasuries, especially T-bills, are considered as risk-free assets\\nbecause they are backed by the U.S. government.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 114}),\n",
       " Document(page_content='108 Portfolio Optimization with Known Parameters\\nthenwSRis also an optimal solution of the problem (5.14) if wT\\nSR1>0\\n(which is always observed in practice); otherwise, one can always ﬁnd\\nan optimal solution of (5.14) eﬃciently via a standard optimization\\nsolver since it is a convex QP.\\nFigure 5.1 shows the Sharpe ratio point on the eﬃcient frontier\\n(see the blue round point) that has the maximum Sharpe ratio (or\\nequivalently, the maximum slope between the points on the eﬃcient\\nfrontier and the risk-free point). If one is allowed to borrow or lend the\\nrisk-free asset, then he/she can have a portfolio that falls on the solid\\nblue line, which is usually referred to as the capital market line in the\\nﬁnancial literature [58].\\nAnother interesting observation is that when rf= 0and all the\\nassets have the same expected return, i.e., µ=α1for someα>0, the\\nSharpe ratio solution (5.15) coincides with the GMVP in (5.5).\\n5.1.3 Connections between Portfolio and Beamforming\\nLet us ﬁrst start with introducing the formulation of beamforming. The\\noutput of a narrowband beamformer is given by\\ny(t) =wHx(t), (5.16)\\nwheretis the time index, x(t)∈CNis the complex vector of array\\nobservations (i.e., measurements at diﬀerent antennas), w∈CNis the\\ncomplex vector of beamformer weights, and Nis the number of array\\nsensors.\\nThe observation vector is modeled as\\nx(t) =s(t)a\\ued19\\ued18\\ued17\\ued1a\\n≜s(t)+i(t) +n(t), (5.17)\\nwhere s(t),i(t), and n(t)are the desired signal, interference, and noise\\ncomponents, respectively. The signal s(t)is the temporal waveform and\\nais the spatial steering vector.\\nThen the goal of beamforming design is to design a weight vector\\nor beamvector wthat maximizes the SINR [149]:\\nmaximizewσ2\\ns|wHa|2\\nwHRw(5.18)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 115}),\n",
       " Document(page_content='5.1. Markowitz Mean-Variance Portfolio Optimization 109\\nwhereσ2\\nsis the signal power, |·|denotes the magnitude of a complex\\nnumber, and\\nR=E\\uf8f3\\n(i(t) +n(t))(i(t) +n(t))H\\uf8f2\\n(5.19)\\nis theN×Ninterference-plus-noise covariance matrix.\\nNote that the objective of (5.18) is invariant to the magnitude and\\nthe phase of w, thus one can arbitrarily set the complex number wHa\\nto be real and equal to one, i.e., wHa= 1, and then the problem (5.18)\\ncan be reformulated as [149]:\\nminimizewwHRw\\nsubject to wHa= 1,(5.20)\\nwhich is the problem (1.4) mentioned in the introduction of Chapter 1.\\nThe solution is found in closed-form as\\nw=1\\naHR−1aR−1a, (5.21)\\nwhich shares the same mathematical form as the GMVP in (5.5) with\\nthe real-valued net returns covariance matrix Σbeing replaced by the\\ncomplex-valued interference-plus-noise covariance matrix and the con-\\nstant vector 1being replaced by the complex-valued signal steering\\nvector a.\\n5.1.4 Practical Constraints\\nIn practice, the optimization problems are not as clean as stated above\\nand there are always some additional constraints due to market regu-\\nlarizations, capital budgets, investors’ preferences, etc. (some of which\\nare not even convex) [65, 63].\\nLong-Only Constraints\\nThis is the most natural constraint and models the fact that one cannot\\nsell what one does not have:\\nw≥0. (5.22)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 116}),\n",
       " Document(page_content='110 Portfolio Optimization with Known Parameters\\nThis is a usual constraint since many funds and institutional investors\\nare not allowed to short-sell in the market, which means selling what\\none does not have, and would translate into a negative weight (since\\nthat value is owed rather than owned).\\nTurnover Constraints\\nIf we denote the current portfolio as w0, and the target portfolio to\\nbe designed as w, then ∆w≜w−w0denotes the turnover, i.e., the\\ncapital to be traded. Usually, the smaller the turnover is, the lower the\\ntransaction cost is. Thus, we can limit the turnover either on each asset\\n|∆wi|≤Ui (5.23)\\nor on the whole portfolio:\\n∥∆w∥1≤U. (5.24)\\nFor example, it is practical to restrict the turnover of an asset to be\\nless than 5%of the average daily volume of the asset.\\nHolding Constraints\\nIt is also common in practice to limit the weights in each asset, that is,\\nLi≤wi≤Ui, (5.25)\\nwhereLiandUiare lower and upper bounds of the holdings of asset i.\\nAnother issue is that one has to pay a ﬁxed minimum brokerage\\nfee no matter how small the order is. Thus too small holdings are not\\ndesired in practice and they can be avoided by adding the following\\n(nonconvex) constraints:\\n|wi|≥Li 1{wi̸=0}, (5.26)\\nwhereLiis the smallest holding size of asset i.\\nCardinality Constraints\\nIt is also suggested to restrict the number of assets in some scenarios,\\ne.g., it is practical to use only a few stocks to track the market index.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 117}),\n",
       " Document(page_content='5.2. Drawbacks of Markowitz Framework 111\\nMathematically speaking, this constraint reads\\n∥w∥0≤K. (5.27)\\n5.2 Drawbacks of Markowitz Framework\\nEven though the Markowitz framework is quantitatively easy to under-\\nstand, it has two serious drawbacks that have made the framework not\\nused in practice for many years.\\n5.2.1 Variance Is Not Appropriate\\nAs motivated in Section 4.5 for an order execution problem, variance\\nis not a good risk measurement in practice since it penalizes both the\\nunwanted high transaction costs and the desired low transaction costs\\n(for short-selling it is the opposite).\\nThis argument indeed applies to the portfolio optimization since\\nonly the high portfolio losses3are unwanted and it is thus more\\npractical to penalize these only but not the low portfolio losses, see\\n[7, 63, 65, 172, 100].\\nTo overcome this drawback, there are many single side risk mea-\\nsurements, e.g., Roy’s safety-ﬁrst, semi-variance, lower partial moment,\\nVaR,CVaR,etc.[65],proposedintheﬁnancialliterature.Amongthem,\\nCVaR enjoys the widest popularity due to its mathematical tractabil-\\nity, thus in the next subsection we mainly review the application of\\nCVaR in portfolio optimization.\\nCVaR Portfolio Optimization\\nActually, one of the ﬁrst popular single side risk measurements was\\nValue-at-Risk (VaR) initially proposed by J.P. Morgan.4Denote ras\\na multivariate random variable of the asset returns, and the portfolio\\nloss is−wTr. Rockafellar and Uryasev [166] ﬁrst proposed to minimize\\n3The portfolio loss is the negative portfolio return. Thus high portfolio losses\\nmean low portfolio returns.\\n4See http://www.value-at-risk.net/riskmetrics/.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 118}),\n",
       " Document(page_content='112 Portfolio Optimization with Known Parameters\\nthe CVaR of the portfolio loss as follows:\\nminimizewCVaR 1−ε(−wTr)\\nsubject to wT1= 1,(5.28)\\nwhere the deﬁnition of CVaR has been introduced in Section 4.5.1.\\nAgain, the objective of the problem (5.28) contains a conditional\\nexpectation exceeding a threshold that is not ﬁxed, which in general is\\nnot easy to deal with.\\nFollowing the technique in [166] (which has been introduced in Sec-\\ntion 4.5.1) and given the past observations rt,t= 1,...,T, ofr, one\\nhas the sample average approximation (SAA) of (5.28) as follows:\\nminimize\\nw,z,ζζ+1\\nεTT\\uf8fa\\nt=1zt\\nsubject to 0≤zt≥−wTrt−ζ, t = 1,...,T\\nwT1= 1.(5.29)\\nRemark 5.2. Similar to the order execution problem in Section 4.4,\\none can have either an equivalent convex formulation for a Gaussian\\ndistribution (e.g., see [166]) or a safe approximation convex approxi-\\nmation for general non-Gaussian distributions satisfying Assumption\\n4.1. Since it is straightforward, we omit it here. ■\\nRemark 5.3. Now we have seen that CVaR as a single side risk mea-\\nsurementhasbeenappliedinbothorderexecutionandportfoliodesign.\\nInterestingly, researchers in signal processing and wireless communica-\\ntion communities have become aware of this useful mathematical tech-\\nnique. Recently, it has been used to design some chance-constrained\\nwireless communication networks for more reliable communications\\neven under some extreme events, e.g., see [124, 183, 112]. ■\\n5.2.2 Markowitz Framework Is Too Sensitive (Lack of Robustness)\\nThe second drawback is that the Markowitz framework is very sensitive\\nto the parameters, i.e., the mean vector µand the covariance matrix\\nΣ, but especially the mean vector [63]. For illustrative purposes, here', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 119}),\n",
       " Document(page_content='5.2. Drawbacks of Markowitz Framework 113\\nTable 5.1: Performance of the maximum Sharpe ratio portfolios under diﬀerent\\nparameter perturbations. The optimal portfolio w⋆is the portfolio of the case of\\nNo. Err.\\nParam. Err. w∥w−w⋆∥2\\n∥w⋆∥2SR\\nNo Err. [ 0.9909,0.4088,−0.3997]T0 0.2551\\nMean Err. [ 0.1341,0.5976,0.2683]T0.9639 0.2377\\nCov. Err. [ 0.1103,0.6140,0.2757]T0.9865 0.2363\\nMean&Cov. Err. [−0.2572,0.6576,0.5996]T1.4144 0.2057\\nwe use a simple numerical example to show how a slightly insigniﬁcant\\nerror can dramatically distort the optimal portfolio.\\nExample 5.1. Suppose there are three assets with µ1=µ2= 8%and\\nµ3= 5%and volatilities of the three assets are σ1= 20%,σ2= 22%,\\nσ3= 10%and the correlations are ρij= 0.8.\\nLet us focus on solving the maximum Sharpe ratio problem (5.12)\\nwithrf= 3%under four scenarios: i) all the parameters are known\\nexactly (referred to as No Err.), ii) there is a slight error in µ1so that\\nthe estimated value is ˆµ1= 7%(referred to as Mean Err.), iii) there\\nis an error in σ1such that the estimated value is ˆσ1= 25%(referred\\nto as Cov. Err.), and iv) the combination of ii) and iii) (referred to as\\nMean&Cov. Err.).\\nTable 5.1 shows the numerical results of the solved portfolios, the\\nrelative diﬀerences, and the Sharpe ratios (SR). For example, if we\\ncompare Mean Err. with No Err., we can see that changing the mean\\nof the ﬁrst asset from 8%to7%dramatically changes the portfolio\\nweights vector: the relative diﬀerence is 0.9639. Similar results can be\\nobtained if we compare Cov. Err. with No Err., and the diﬀerence\\nbecomes even larger if there are both errors in the mean vector and\\ncovariance matrix, see Mean&Cov. Err. versus No Err. ■\\nThere are many works, e.g., [55, 200, 86, 63], that focus on over-\\ncoming this drawback fully and we will review them separately from\\nthis chapter in the upcoming Chapter 6.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 120}),\n",
       " Document(page_content='114 Portfolio Optimization with Known Parameters\\n5.3 Black-Litterman Model\\nThe Black-Litterman model is an alternative approach dealing with the\\nsensitivity issue in expected excess returns to some degree. It combines\\nmarket equilibrium and investors’ views to result in a more robust\\nexpected return estimate, based on which the optimized portfolio is\\nrelatively more stable [22, 23, 24, 104].\\nFor simplicity, let us suppose for the Black-Litterman model the\\ntrue covariance Σis known and the goal is to produce a stable estimate\\nof the expected excess returns µ.\\nLet us ﬁrst start with the two information sources based on which\\nthe Black-Litterman model can be built, i.e., market equilibrium and\\ninvestors’ views.\\nMarket Equilibrium. The ﬁrst important assumption is that a market\\nequilibrium can provide an estimate of the expected excess returns,\\ndenoted asπ, close to the true unknown expected excess returns µ.\\nMathematically, it can be expressed as follows:\\nπ=µ+wπ,wπ∼N(0,τΣ) (5.30)\\nwhere the parameter τ > 0, which measures the uncertainty in the\\nestimateπ, and the smaller τis, the less uncertain the estimate is. A\\nspeciﬁc is provided later in Example .\\nInvestors’ View. Suppose there are Kviews summarized from some\\ninvestors, the Black-Litterman model quantiﬁes them via a linear sys-\\ntem:\\nq=Pµ+wq,wq∼N(0,Ω), (5.31)\\nwhere P∈RK×Nandq∈RKcharacterize the absolute or relative K\\nviews and Ω∈RK×Kmeasures the uncertainty in the views. A speciﬁc\\nexample is provided later in Example .\\nThe expected excess returns based on the market equilibrium (5.30)\\nand the investors’ views (5.31) actually can be written together in a', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 121}),\n",
       " Document(page_content='5.3. Black-Litterman Model 115\\nmore compact form:\\ny=Xµ+wBL, (5.32)\\nwhere wBL∼N(0,V)and\\ny≜)\\nπ\\nq[\\n,X≜)\\nI\\nP[\\n,V≜)\\nτΣ 0\\n0 Ω[\\n. (5.33)\\nObviously, (5.32) is a standard linear model for the true expected\\nexcess returns with white Gaussian noise. The Gaussian ML estimator,\\ni.e., the minimizer of the following problem\\nminimizeµ(y−Xµ)TV−1(y−Xµ), (5.34)\\nis a better estimate since it combines the market equilibrium and in-\\nvestors views. Easily, setting the derivative of (5.34) to zero yields the\\nclosed-form solution:\\nˆµBL= (XTV−1X)−1XTV−1y (5.35)\\n=(\\uf8f3\\nI PT\\uf8f2)\\n(τΣ)−10\\n0 Ω−1[)\\nI\\nP[(−1\\uf8f3\\nI PT\\uf8f2)\\n(τΣ)−10\\n0 Ω−1[)\\nπ\\nq[\\n(5.36)\\n=(\\n(τΣ)−1+PTΩ−1P)−1(\\n(τΣ)−1π+PTΩ−1q)\\n. (5.37)\\nWe can further understand the above solution (5.37) deeper as fol-\\nlows. Since the objective of (5.34) can be rewritten as\\n(y−Xµ)TV−1(y−Xµ) (5.38)\\n=()\\nπ\\nq[\\n−)\\nµ\\nPµ[(T)\\n(τΣ)−10\\n0 Ω−1[()\\nπ\\nq[\\n−)\\nµ\\nPµ[(\\n=1\\nτ(π−µ)TΣ−1(π−µ) + (q−Pµ)TΩ−1(q−Pµ),(5.39)\\nproblem (5.34) actually equals\\nminimizeµ(π−µ)TΣ−1(π−µ) +τ(q−Pµ)TΩ−1(q−Pµ).\\n(5.40)\\nThe objective combines the market equilibrium towards the investors’\\nviews with τbeing the trade-oﬀ parameter. There are two extreme\\ncases', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 122}),\n",
       " Document(page_content='116 Portfolio Optimization with Known Parameters\\n•whenτ= 0, the objective does not consider any view and the\\noptimal solution is only based on the market equilibrium:\\nˆµme=π; (5.41)\\n•whenτ→+∞, the objective emphasizes on the investors’ views\\nonly and the optimal solution goes to\\nˆµview= (PTΩ−1P)−1PTΩ−1q. (5.42)\\nInterestingly, the general Black-Litterman estimate (5.37) can be\\nrewritten as follows:\\nˆµBL=(\\n(τΣ)−1+PTΩ−1P)−1(\\n(τΣ)−1π+PTΩ−1q)\\n(5.43)\\n=(\\n(τΣ)−1+PTΩ−1P)−1(\\n(τΣ)−1ˆµme+PTΩ−1Pˆµview)\\n(5.44)\\n=(\\n(τΣ)−1+PTΩ−1P)−1(τΣ)−1\\n\\ued19 \\ued18\\ued17 \\ued1a\\nWme≜ˆµme\\n+(\\n(τΣ)−1+PTΩ−1P)−1PTΩ−1P\\n\\ued19 \\ued18\\ued17 \\ued1a\\nWview≜ˆµview, (5.45)\\nwhich is simply a linear weighted combination of the two extreme so-\\nlutions ˆµmeandˆµviewand the weight matrices satisfy\\nWme+Wview=I. (5.46)\\nClearly, the Black-Litterman expected excess returns (5.45) shrinks\\nthe market equilibrium towards the investors’ views. This idea of the\\nBlack-Litterman model indeed is similar to the previous James-Stein\\nshrinkage estimator (3.30) with three diﬀerences:\\n•the sample mean estimate in (3.30) is replaced by the expected\\nexcess returns estimated based on the market equilibrium ˆµme;\\n•the speciﬁc target in (3.30) is replaced by the estimate of the\\nexpected excess returns investors’ view ˆµview; and', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 123}),\n",
       " Document(page_content='5.3. Black-Litterman Model 117\\n•the scalar trade-oﬀ (or shrinkage) parameter in (3.30) is changed\\nto a matrix instead.\\nThus, we can see that the Black-Litterman model is a more precise\\nmodel for producing stable and reliable expected excess returns (or\\nequivalently expected returns since the risk-free rate is almost always\\nknown).\\nThe above models of market equilibrium (5.30) and investors’ views\\n(5.31) are quite general. This generality enables the popularity of the\\nBlack-Litterman model. In the following we consider some speciﬁc ex-\\namples for both of them.\\nExample 5.2. One of the most popular models for market equilibrium\\nis the CAPM5(2.20)\\nE[ri]−rf=βi(E[rM]−rf), (5.47)\\nwhere E[ri],E[rM], andrfare the expected returns on the i-stock,\\nthe expected return on the market portfolio, and the risk-free rate,\\nrespectively. The sensitivity of the expected excess return of the stock\\nto that of the market is captured by the beta (2.21):\\nβi=Cov(ri,rM)\\nVar(rM)(5.48)\\nandβ≜[β1,...,βN]T.\\nLetwM≜[w1M,...,wNM]T∈RNdenote the market portfolio of\\ntheNstocks, thus the market return is\\nrM=rTwM, (5.49)\\nwhere r≜[r1,...,rN]Tcontains the returns of the Nstocks.\\nSubstituting (5.48) and (5.49) into (5.47), the estimated expected\\nexcess returns of the Nstocks are as follows:\\nπ≜)\\n]])E[r1]−rf\\n...\\nE[rN]−rf(\\n\\uf8fa\\uf8fa[=β(E[rM]−rf) (5.50)\\n5Actually, the CAPM model was used in the initial derivaiton of the Black-\\nLitterman model [22, 23, 24]. For simplicity, we drop the time index tin this section.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 124}),\n",
       " Document(page_content='118 Portfolio Optimization with Known Parameters\\n=E[rM]−rf\\nVar(rM))\\n]])Cov(r1,rM)\\n...\\nCov(rN,rM)(\\n\\uf8fa\\uf8fa[=E[rM]−rf\\nVar(rM))\\n]])Cov(r1,rTwM)\\n...\\nCov(rN,rTwM)(\\n\\uf8fa\\uf8fa[\\n(5.51)\\n=E[rM]−rf\\nVar(rM)\\ued19\\ued18\\ued17\\ued1a\\nδ≜)\\n]])Cov(r1,r1)... Cov(r1,rN)\\n.........\\nCov(rN,r1)... Cov(rN,rN)(\\n\\uf8fa\\uf8fa[\\n\\ued19 \\ued18\\ued17 \\ued1a\\nΣ≜wM (5.52)\\n=δΣwM. (5.53)\\nThat is,πin (5.30) is replaced by the quantity δΣwM.■\\nExample 5.3. Let us consider an example from [65] to understand how\\nthe model (5.31) expresses the views. Suppose there are N= 5stocks\\nand two independent views on them:\\n•Stock 1 will have excess return of 1.5%with standard deviation\\n1%;\\n•Stock 3 will outperform Stock 2 by 4%with a standard deviation\\n1%.\\nMathematically, the above two independent views can be expressed as\\n)\\n1.5%\\n4%[\\n=)\\n1 0 0 0 0\\n0−1 1 0 0[)\\n]]]]]])µ1\\nµ2\\nµ3\\nµ4\\nµ5(\\n\\uf8fa\\uf8fa\\uf8fa\\uf8fa\\uf8fa\\uf8fa[+wq, (5.54)\\nwhere wq∼N(0,Ω)andΩ=)\\n1%20\\n0 1%2[\\n. ■\\nOnce a Black-Litterman expected excess returns ˆµBLhas been es-\\ntimated, we can further plug it and the known true covariance matrix6\\nΣinto the previously mentioned mean-variance portfolio optimization\\nframework to achieve some desired portfolios.\\n6Keep in mind that the covariance matrix also needs to be estimated in practice.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 125}),\n",
       " Document(page_content='5.3. Black-Litterman Model 119\\nRemark 5.4. The Black-Litterman expected excess returns (5.37) re-\\nquires the trade-oﬀ parameter τ, investors’ view Pandqand the\\nconﬁdence parameter Ω. In general, they are diﬃcult to specify. For\\nexample, diﬀerent researchers have diﬀerent views on selecting the pa-\\nrameterτ: some experience researchers generally set τ∈[0.01,0.05]\\n[104], some prefers to use τ= 1directly [174], while some suggest the\\nvalue 1dividedbythenumberofobservations[26].Hereweonlyoutline\\nthe idea of Black-Litterman model but do not explore these diﬃculties.\\nThe interested readers may please refer to [104] and references therein\\nfor more detailed discussions. ■', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 126}),\n",
       " Document(page_content='6\\nRobust Portfolio Optimization\\nMarkowitz portfolio optimization requires knowledge of the mean re-\\nturn vector and covariance matrix parameters. As it turns out, the\\nresulting optimized portfolio is so highly sensitive to small estimation\\nerrors in such parameters that it is unusable in practice (indeed practi-\\ntioners seldom use such a naive design). One step towards the solution\\nis to make the portfolio design robust to uncertainties in the parame-\\nters.\\nThis chapter reviews the robust portfolio optimization that uses\\nsome uncertainty sets to capture the estimation errors and then takes\\nsuch uncertainty sets into problem formulations.\\nThe organization of this chapter is as follows. Section 6.1 reviews\\nthe robust mean-variance portfolio optimization and Section 6.2 con-\\ncentrates on the robust Sharpe ratio maximization. At the end, Section\\n6.3 makes some speciﬁc connections between robust portfolio optimiza-\\ntion in ﬁnancial engineering and robust beamforming in signal process-\\ning.\\n120', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 127}),\n",
       " Document(page_content='6.1. Robust Mean-Variance Trade-oﬀ Portfolio Optimization 121\\n6.1 Robust Mean-Variance Trade-oﬀ Portfolio Optimization\\nRecall that in Section 5.1.1 there are three alternative mean-variance\\ntrade-oﬀ optimization formulations, i.e., (5.4), (5.8), and (5.9). Since\\nthe formulations are equivalent in the sense that they give the same\\neﬃcient frontier, for simplicity, we focus on (5.9) which is restated as\\nfollows:\\nmaximizewwTµ−λwTΣw\\nsubject to wT1= 1,w∈W,(6.1)\\nwhereλ≥0is the trade-oﬀ parameter, Wdenotes the set of other\\nconvex constraints, and we further deﬁne W≜{w|wT1= 1}∩Wand\\nassumeWis convex and compact.\\nTo design the robust counterpart of (6.1), here we assume that\\nthe uncertainty sets of the mean return µand covariance matrix Σ\\nare separable, convex, and compact, and they are denoted as Uµand\\nUΣ, respectively. A conservative and practical investment approach is\\nto optimize the worst-case objective over the uncertainty sets, which\\nleads to the following robust counterpart of (6.1):\\nmaximizewmin\\nµ∈UµwTµ−λmax\\nΣ∈UΣwTΣw\\nsubject to wT1= 1,w∈W.(6.2)\\n6.1.1 Minimax or Maximin\\nIt is obvious that the objective of (6.2) is concave in wand is linear\\n(and thus convex) in both µandΣ. Under the condition that W,Uµ,\\nandUΣare convex and compact sets, one can easily get that\\nmax\\nw∈Wmin\\nµ∈Uµ,Σ∈UΣ{wTµ−λwTΣw}= min\\nµ∈Uµ,Σ∈UΣmax\\nw∈W{wTµ−λwTΣw}\\n(6.3)\\nbased on the minimax theory [165]. Therefore, one can equivalently\\nsolve either the minimax or maximin formulations, whichever is com-\\nputationally cheaper in practice. Some speciﬁc examples of numerical\\niterative algorithms can be found in [127, 200].\\nHowever, instead of solving a double-layered minimax or maximin\\nproblem numerically, which in general is computationally costly, one', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 128}),\n",
       " Document(page_content='122 Robust Portfolio Optimization\\nmay either ﬁnd the worst-case mean and variance in closed-form di-\\nrectly or reformulate the worst-case formulation as some simpler max-\\nimization problem so that (6.2) reduces into a single-layered convex\\nmaximization problem (e.g., QP, QCQP, or SDP). In the following, we\\nwill review diﬀerent types of uncertainty sets such that (6.2) can be\\nreformulated to a simple single-layered convex problem.\\n6.1.2 Worst-Case Mean\\nLet us start with the worst-case mean ﬁrst. We consider two types of\\nthe uncertainty set up for the mean vector Uµ, i.e., box and elliptical\\nsets.\\nBox Uncertainty Set\\nThe box uncertainty set is given by\\nUb\\nµ={µ|−δ≤µ−ˆµ≤δ}, (6.4)\\nwhere the predeﬁned parameters ˆµandδdenote the location and size\\nof the box uncertainty set, respectively.\\nWe can easily derive the worst-case mean as\\nmin\\nµ∈UbµwTµ=wTˆµ+ min\\n−δ≤γ≤δwTγ=wTˆµ−|w|Tδ,(6.5)\\nwhere|w|denotes elementwise absolute value of w.\\nElliptical Uncertainty Set\\nThe elliptical uncertainty set1is\\nUe\\nµ={µ|(µ−ˆµ)TS−1\\nµ(µ−ˆµ)≤δ2\\nµ}, (6.6)\\nwhere the predeﬁned parameters ˆµ,δµ>0, and Sµ≻0denote the\\nlocation, size, and the shape of the uncertainty set, respectively. The\\nworst-case mean is\\nmin\\nµ∈UeµwTµ= min\\ued79\\ued79\\ued79S−1/2\\nµγ\\ued79\\ued79\\ued79\\n2≤δµwT(ˆµ+γ) =wTˆµ+ min\\ued79\\ued79\\ued79S−1/2\\nµγ\\ued79\\ued79\\ued79\\n2≤δµwTγ\\n1A special case is S=Iand the uncertainty set becomes a sphere.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 129}),\n",
       " Document(page_content='6.1. Robust Mean-Variance Trade-oﬀ Portfolio Optimization 123\\n=wTˆµ+ min\\n∥˜γ∥2≤δµwTS1/2\\nµ˜γ=wTˆµ−δµ\\ued79\\ued79\\ued79S1/2\\nµw\\ued79\\ued79\\ued79\\n2.(6.7)\\nItiseasytocheckthatboththeworst-casevalues(6.5)and(6.7)are\\nconcave in w, which is desired since (6.2) is a maximization problem.\\n6.1.3 Worst-Case Variance Based on ΣDirectly\\nNow let us focus on the worst-case variance and we start by incorpo-\\nrating the uncertainty into the covariance matrix Σdirectly.\\nBox Uncertainty Set\\nAgain, let us elementwise ﬁrst consider the box type uncertainty set as\\nfollows:\\nUb\\nΣ={Σ|Σ≤Σ≤Σ,Σ⪰0}, (6.8)\\nwhere ΣandΣare as lower and upper bounds.\\nA special case is that if Σ⪰0andw≥0holds, the worst-case\\nvariance can be found directly [200]:\\nmax\\nΣ∈Ub\\nΣwTΣw=wTΣw. (6.9)\\nHowever, when either Σ⪰0orw≥0may not hold, the worst-case\\nvariance does not have a closed-form expression anymore. Fortunately,\\nan equivalent formulation can be found as follows. First note that the\\nworst-case value max\\nΣ∈Ub\\nΣwTΣwis given by the convex problem\\nmaximize\\nΣwTΣw\\nsubject to Σ≤Σ≤Σ,\\nΣ⪰0.(6.10)\\nThenitiseasytohavetheequivalentdualproblemof (6.10)as[127,63]\\nminimize\\nΛ,ΛTr(ΛΣ)−Tr(ΛΣ)\\nsubject to)\\nΛ−Λw\\nwT1[\\n⪰0,\\nΛ≥0,Λ≥0,(6.11)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 130}),\n",
       " Document(page_content='124 Robust Portfolio Optimization\\nwhich is a convex SDP, and in fact the constraints are jointly convex\\nin the inner dual variable variables ΛandΛand the outer variable w.\\nNow we can easily have a speciﬁc equivalent formulation of (6.2) as\\nfollows. Given the uncertainty sets Ub\\nµandUb\\nΣ, (6.2) equals the convex\\nProblem I in Table 6.1. In fact, Table 6.1 summarizes all the convex\\nproblems for all the possible combinations of uncertainty sets.\\nElliptical Uncertainty Set\\nThe elliptical uncertainty set of the covariance matrix can be deﬁned\\nas [127]\\nUe\\nΣ={\\nΣ|(\\nvec(Σ)−vec(ˆΣ))TS−1\\nΣ(\\nvec(Σ)−vec(ˆΣ))\\n≤δ2\\nΣ,Σ⪰0}\\n,\\n(6.12)\\nwhere the predeﬁned parameters ˆΣ⪰0,δΣ>0, and SΣ≻0denote\\nthe location, size, and the shape of the uncertainty set.\\nTo proceed, we consider a reformulation of (6.2) as follows:\\nmaximize\\nw,Xmin\\nµ∈UµwTµ−λmax\\nΣ∈UΣTr(XΣ)\\nsubject to wT1= 1,w∈W,\\n)\\nX w\\nwT1[\\n⪰0.(6.13)\\nSince Σ⪰0, the last constraint implies X⪰wwTwhich in turn is\\nsatisﬁed with equality at an optimal solution and thus (6.13) is equal\\nto (6.2) and an optimal portfolio wof (6.13) is also optimal for (6.2).\\nThe advantage of (6.13) over (6.2) is that it allows us to derive a ﬁnal\\nequivalent convex problem.\\nSimilar to (6.10), the inner worst-case variance in (6.13) over the\\nelliptical uncertainty set is given by the following problem:\\nmaximize\\nΣTr(XΣ)\\nsubject to(\\nvec(Σ)−vec(ˆΣ))TS−1\\nΣ(\\nvec(Σ)−vec(ˆΣ))\\n≤δ2\\nΣ,\\nΣ⪰0.\\n(6.14)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 131}),\n",
       " Document(page_content='6.1. Robust Mean-Variance Trade-oﬀ Portfolio Optimization 125\\nProblem (6.14) is convex and equals its dual problem:\\nminimize\\nZTr(ˆΣ(X+Z))\\n+δΣ\\ued79\\ued79\\ued79S1/2\\nΣ(vec(X) + vec( Z))\\ued79\\ued79\\ued79\\n2\\nsubject to Z⪰0.(6.15)\\nNote that the objective is jointly convex in XandZ, and the formu-\\nlations III and IV in Table 6.1 are the resulting convex problems over\\nthe elliptical uncertainty set (6.12).\\n6.1.4 Worst-Case Variance Based on Factor Model\\nInstead of incorporating the uncertainty into the covariance matrix\\ndirectly, it is may be more accurate to explore the structure of the\\ncovariance matrix and thus the uncertainty can be incorporated in\\na more proper way. Recall from Chapter 2 that one example of the\\nﬁnancial time series modeling is the explicit factor model:\\nrt=µ+ΠTft+wt. (6.16)\\nHere, for simplicity we assume µ∈RNis the vector of mean returns,\\nf∼N (0,F)∈RKis the vector of returns of the factors that drive the\\nmarket, Π∈RK×Nis the matrix of factor loadings, wt∼N (0,D)\\nis the residual noise, and Dis diagonal, i.e., D= Diag( d). Then the\\ncovariance has the following structure:\\nΣ=ΠTFΠ+D. (6.17)\\nFor this structure (6.17), we assume Fis known exactly and ΠandD\\ncontain some estimation errors.\\nSimilar to the previous cases, we assume the uncertainty sets of Π\\nandDare separable, convex, and compact, and they are denoted as\\nUΠandUD, respectively. Now, the worst-case variance turns out to be\\nmax\\nΠ∈UΠ,D∈UDwTΣw= max\\nΠ∈UΠ,D∈UDwT(\\nΠTFΠ+D)\\nw\\n= max\\nΠ∈UΠwTΠTFΠw + max\\nD∈UDwTDw.(6.18)\\nNow, the expression in (6.18) is not concave in the uncertainty param-\\neters any more and the results in Section 6.1.1 cannot be used. Here', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 132}),\n",
       " Document(page_content='126 Robust Portfolio Optimization\\nwe can consider the worst-case terms in (6.18) one by one and the goal\\nis to ﬁnd the worst-case variance either in a closed-form or given by an\\neﬃciently solvable convex problem.\\nLet us start with the second one which is simpler. Since Dis the\\ncovariance for the residual noise and is assumed to be diagonal, the\\nfollowing uncertainty set is considered in practice [86]\\nUD={D|D= Diag( d),d≤d≤d}. (6.19)\\nDenoting D= Diag( d), we have\\nmax\\nD∈UDwTDw=wTDw. (6.20)\\nFor the ﬁrst worst-case term in (6.18), i.e., max\\nΠ∈UΠwTΠTFΠw, note\\nthat the objective is convex in Π; however, the goal is to maximize the\\nobjective and thus it is nonconvex. In general, it is not easy to compute\\nthe worst-case value eﬃciently.\\nIn the following, we will review some uncertainty sets so that the\\nworst-case value max\\nΠ∈UΠwTΠTFΠwcan be either computed in a closed-\\nform or given by solving a convex problem.\\nSphere Uncertainty Set\\nThe uncertainty set of Πis assumed to be a sphere2and is given by\\nUs\\nΠ={Π|Π=ˆΠ+∆,∥∆∥F≤δΠ}. (6.21)\\nWithout loss of generality and for simplicity, we set F=Iso that\\nmax\\nΠ∈Us\\nΠ√\\nwTΠTΠw= max\\nΠ∈Us\\nΠ∥Πw∥2, (6.22)\\nwhich is the square root of max\\nΠ∈UΠwTΠTΠw. One can upper bound the\\nworst-case value in (6.22) as follows [54]:\\nmax\\nΠ∈Us\\nΠ∥Πw∥2= max\\n∥∆∥F≤δΠ\\ued79\\ued79\\ued79ˆΠw+∆w\\ued79\\ued79\\ued79\\n2\\n≤\\ued79\\ued79\\ued79ˆΠw\\ued79\\ued79\\ued79\\n2+ max\\n∥∆∥F≤δΠ∥∆w∥2\\n2This can be easily extended to an elliptical uncertainty set.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 133}),\n",
       " Document(page_content='6.1. Robust Mean-Variance Trade-oﬀ Portfolio Optimization 127\\n≤\\ued79\\ued79\\ued79ˆΠw\\ued79\\ued79\\ued79\\n2+ max\\n∥∆∥F≤δΠ∥∆∥F∥w∥2\\n=\\ued79\\ued79\\ued79ˆΠw\\ued79\\ued79\\ued79\\n2+δΠ∥w∥2. (6.23)\\nIn fact, this upper bound is achievable by ∆=δΠuwT\\n∥w∥2where\\nu=\\uf8f1\\n\\uf8f4}\\n\\uf8f4\\uf8f3ˆΠw\\ued79\\ued79\\ued79ˆΠTw\\ued79\\ued79\\ued79\\n2, ifˆΠw̸=0,\\nany unitary vector ,otherwise.(6.24)\\nThe Problem V in Table 6.1 shows the equivalent convex formula-\\ntion when only worst-case variance is considered and the uncertainty\\nsets areUDin (6.19) andUs\\nΠin (6.21). Similar to Problems I-IV in\\nTable 6.1, it is easy to combine the worst-case means over diﬀerent\\nuncertainty sets to get more equivalent convex formulations. They are\\nquite straightforward and thus are omitted.\\nColumn-Wise Elliptical Uncertainty Set\\nAnother type of uncertainty is the column-wise elliptical uncertainty\\nset [86]\\nUce\\nΠ={Π=ˆΠ+∆,∥∆i∥g≤δΠ,i, i= 1,...,N},(6.25)\\nwhere ∆iis thei-th column of ∆,∥x∥g=√\\nxTGxandGis a given\\npositive deﬁnite weight matrix, and δΠ= [δΠ,1,...,δ Π,N]Trepresent\\nthe sizes of the elliptical uncertainty sets.\\nEventhoughtheworst-casevalue max\\nΠ∈UΠwTΠTFΠwindeedisanon-\\nconvex problem, it is shown in [86] that the following inequality\\nmax\\nΠ∈UΠwTΠTFΠw≤v (6.26)\\nholds if and only if there exist σ > 0,τ≥0, and t≥0∈RKthat\\nsatisfy the following convex constraints:\\nτ+1Tt≤v, (6.27)\\n|w|TδΠ≤r, (6.28)\\nσ≤1\\nλmax(H), (6.29)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 134}),\n",
       " Document(page_content='128 Robust Portfolio Optimization\\n\\ued79\\ued79\\ued79\\ued79\\ued79)\\n2r\\nσ−τ[\\ued79\\ued79\\ued79\\ued79\\ued79\\n2≤σ+τ, (6.30)\\n\\ued79\\ued79\\ued79\\ued79\\ued79)\\n2si\\n1−σλi−ti[\\ued79\\ued79\\ued79\\ued79\\ued79\\n2≤1−σλi+ti, i= 1,...,K, (6.31)\\nwhere UΛUTis the spectral decomposition of H=G−1/2FG−1/2,\\nΛ= Diag([λ1,...,λK])ands=UTH1/2G1/2ˆΠw.\\nProblem VI in Table 6.1 presents the resulted equivalent convex\\nformulation when the uncertainty sets are UDin (6.19) andUce\\nΠin\\n(6.25). Again, we omit the cases of considering worst-case mean and\\nworst-case variance together since the derivations of equivalent convex\\nformulations can be obtained straightforwardly based on the previous\\nderivations.\\n6.1.5 Summary of Diﬀerent Equivalent Formulations\\nTable 6.1 summarizes all the previously reviewed cases and, as men-\\ntioned before, straightforwardly, we can have many more diﬀerent\\nequivalent convex formulations for diﬀerent combinations of the un-\\ncertainty sets of the mean vector and variance matrix.\\n6.2 Robust Sharpe ratio Optimization\\nLet us ﬁrst recall the convex reformulation of Sharpe ratio maximiza-\\ntion with only the capital budget constraint wT1= 1, i.e., (5.14) as\\nfollows:\\nminimizewwTΣw\\nsubject to wT(µ−rf1) = 1,\\nwT1>0.(6.32)\\nActually, the equality constraint wT(µ−rf1) = 1in (6.32) can be\\nrelaxed as the inequality wT(µ−rf1)≥1since optimality is always\\nachieved at the equality. Then the robust Sharpe ratio problem can be', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 135}),\n",
       " Document(page_content='6.2. Robust Sharpe ratio Optimization 129\\nTable 6.1: Diﬀerent robustiﬁcations of the problem (6.2).\\nmaximizewmin\\nµ∈UµwTµ−λmax\\nΣ∈UΣwTΣw\\nsubject to wT1= 1,w∈W.\\nUncertainty\\nSetsEquivalent Convex Formulations\\nIUb\\nµ=(6.4)\\nUb\\nΣ=(6.8)maximize\\nw,Λ,ΛwTˆµ−|w|Tδ\\n−λ(\\nTr(ΛΣ)−Tr(ΛΣ))\\nsubject to wT1= 1,w∈W,\\n)\\nΛ−Λw\\nwT1[\\n⪰0,\\nΛ≥0,Λ≥0.\\nIIUe\\nµ=(6.6)\\nUb\\nΣ=(6.8)maximize\\nw,Λ,ΛwTˆµ−δµ\\ued79\\ued79\\ued79S1/2w\\ued79\\ued79\\ued79\\n2\\n−λ(\\nTr(ΛΣ)−Tr(ΛΣ))\\nsubject to wT1= 1,w∈W,\\n)\\nΛ−Λw\\nwT1[\\n⪰0,\\nΛ≥0,Λ≥0.\\nIIIUb\\nµ=(6.4)\\nUe\\nΣ=(6.12)maximize\\nw,X,ZwTˆµ−|w|Tδ−λTr(ˆΣ(X+Z))\\n−λδΣ\\ued79\\ued79\\ued79S1/2\\nΣ(vec(X) + vec( Z))\\ued79\\ued79\\ued79\\n2\\nsubject to wT1= 1,w∈W,\\n)\\nX w\\nwT1[\\n⪰0,Z⪰0.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 136}),\n",
       " Document(page_content='130 Robust Portfolio Optimization\\nmaximizewwTµ−λ max\\nΠ∈UΠ,D∈UDwT(ΠTFΠ+D)w\\nsubject to wT1= 1,w∈W.\\nUncertainty\\nSetsEquivalent Convex Formulations\\nIVUe\\nµ=(6.6)\\nUe\\nΣ=(6.12)maximize\\nw,X,ZwTˆµ−δµ\\ued79\\ued79\\ued79S1/2w\\ued79\\ued79\\ued79\\n2\\n−λTr(ˆΣ(X+Z))\\n−λδΣ\\ued79\\ued79\\ued79S1/2\\nΣ(vec(X) + vec( Z))\\ued79\\ued79\\ued79\\n2\\nsubject to wT1= 1,w∈W,\\n)\\nX w\\nwT1[\\n⪰0,Z⪰0.\\nVUD=(6.19)\\nUs\\nΠ=(6.21)maximizew,ywTµ−λ(\\nwTDw+y2)\\nsubject to wT1= 1,w∈W,\\n\\ued79\\ued79\\ued79ˆΠTw\\ued79\\ued79\\ued79\\n2+δΠ∥w∥2≤y.\\nVIUD=(6.19)\\nUce\\nΠ=(6.25)maximize\\nw,v,σ,τ, t,swTµ−λ(\\nwTDw+v)\\nsubject to wT1= 1,w∈W,\\nτ+1Tt≤v,t≥0,\\n|w|TδΠ≤r\\nσ≤1\\nλmax(H),\\n\\ued79\\ued79\\ued79\\ued79\\ued79)\\n2r\\nσ−τ[\\ued79\\ued79\\ued79\\ued79\\ued79\\n2≤σ+τ,\\ns=UTH1/2G1/2ˆΠw,\\n\\ued79\\ued79\\ued79\\ued79\\ued79)\\n2si\\n1−σλi−ti[\\ued79\\ued79\\ued79\\ued79\\ued79\\n2≤1−σλi+ti,\\ni= 1,...,K.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 137}),\n",
       " Document(page_content='6.3. Connections with Robust Beamforming 131\\nformulated based on (6.32) as follows:\\nminimizew,κmax\\nΣ∈UΣwTΣw\\nsubject to min\\nµ∈UµwT(µ−rf1)≥1,\\nwT1>0,(6.33)\\nwhereUµandUΣdenote some general uncertainty sets for µandΣ,\\nrespectively, and the robust techniques stated in the previous Section\\n6.1 can be directly used to obtain some equivalent convex formulations.\\nWhen there exist some other convex constraints apart from the\\ncapital budge constraint, the robust formulation is not to simply add\\nthem into (6.33) but becomes more complicated. The detailed deriva-\\ntion approach can be found in [200]. Nevertheless, for the derived ro-\\nbust formulation in [200], the robust techniques in Section 6.1 are still\\napplicable.\\n6.3 Connections with Robust Beamforming\\nLet us ﬁrst recall the receive beamforming problem (5.18):\\nmaximizewσ2\\ns|wHa|2\\nwHRw(6.34)\\nwhere w∈CNis the complex beamforming vector variable denoting\\nthe weights of Narray observations and a∈CNandR∈CN×N\\n(estimated in advance) are the signal steering vector (also known as\\nthe transmission channel) and the positive deﬁnite interference-plus-\\nnoise covariance matrix, respectively.\\nSimilartothe(real-valued)parameters µandΣforportfoliodesign,\\nthe (complex-valued) parameters aandRneed to be estimated ﬁrst\\nand may contain some estimation errors. Since the objective in (6.34)\\nis invariant to the magnitude of wHa, the robust counterpart of (6.34)\\nhas the following general form [204, 205]:\\nminimizewmax\\nR∈URwHRw\\nsubject to min\\na∈Ua⏐⏐⏐wHa⏐⏐⏐≥1,(6.35)\\nwhereUaandURdenote the uncertainty sets of aandR, respectively.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 138}),\n",
       " Document(page_content='132 Robust Portfolio Optimization\\n6.3.1 Worst-Case Signal Power Constraint\\nIn this subsection, we deal with the worst-case signal power constraint\\nin (6.35), i.e., mina∈Ua⏐⏐⏐wHa⏐⏐⏐≥1.\\nThe authors of [204] considered a sphere uncertainty set\\nUs\\na={a|(a−ˆa)H(a−ˆa)≤δ2\\na}, (6.36)\\nwhere the predeﬁned parameters ˆaand (usually very small) δa>0\\ndeﬁne the location and size of the uncertainty set, respectively.\\nDenotingγ≜a−ˆa, we have∥γ∥2≤δaand\\n⏐⏐⏐wHa⏐⏐⏐=⏐⏐⏐wH(ˆa+γ)⏐⏐⏐≥⏐⏐⏐wHˆa⏐⏐⏐−⏐⏐⏐wHγ⏐⏐⏐≥⏐⏐⏐wHˆa⏐⏐⏐−δa∥w∥2.(6.37)\\nIt can be shown that if δais small enough such that⏐⏐⏐wHˆa⏐⏐⏐>δa∥w∥2\\nalways holds, then the inequalities in (6.37) are achieved with equality\\nby [204]\\nγ=−w\\n∥w∥2δaej∠(wHˆa). (6.38)\\nThat is to say,\\nmin\\na∈Ua⏐⏐⏐wHa⏐⏐⏐=⏐⏐⏐wHˆa⏐⏐⏐−δa∥w∥2. (6.39)\\nHowever, then the worst-case signal power constraint mina∈Ua⏐⏐⏐wHa⏐⏐⏐≥\\n1turns out to be\\n⏐⏐⏐wHˆa⏐⏐⏐−δa∥w∥2≥1, (6.40)\\nwhich is still nonconvex.\\nFortunately, the objective of (6.35) is unchanged under any arbi-\\ntrary phase rotation of w, and one can always rotate wproperly so that\\nwHˆais real and positive. That is, (6.40) can be further equivalently\\nreformulated as the following convex constraints:\\nwHˆa−δa∥w∥2≥1, (6.41)\\nIm{wHˆa}= 0. (6.42)\\nInterestingly, we can see that the derivations for the (complex-\\nvalued) worst-case signal power here are very similar to that for the', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 139}),\n",
       " Document(page_content='6.3. Connections with Robust Beamforming 133\\n(real-valued) worst-case mean under the elliptical uncertainty set in\\nSection 6.1.2. For example, the (complex-valued) worst-case signal\\npower wHˆa−δa∥w∥2in (6.41) looks the same as the (real-valued)\\nworst-case mean wTˆµ−δµ\\ued79\\ued79\\ued79S1/2\\nµw\\ued79\\ued79\\ued79\\n2in (6.7) with Sµ=I.\\n6.3.2 Worst-Case Interference-Plus-Noise Power\\nNow let us consider the worst-case interference-plus-noise power in\\n(6.35), i.e., max R∈URwHRw.\\nTheauthorsof[205]consideredreplacingtheinterference-plus-noise\\ncovariance matrix with the SCM:\\nRSCM=1\\nTˆXHˆX, (6.43)\\nwhere ˆX∈CT×Nis the observation matrix such that the t-th row of ˆX\\nis the transpose of the t-th observation x(t), andTis the total number\\nof observations. Then they considered a spherical uncertainty set for\\nthe underlying true observations Xas follows:\\nUs\\nX={X|X=ˆX+∆,∥∆∥F≤δX}. (6.44)\\nInstead of studying the worst-case interference-plus-noise power,\\none can study its square root value\\nmax\\nX∈Us\\nX√\\nwHXHXw= max\\nX∈Us\\nX∥Xw∥2. (6.45)\\nGiven the uncertainty set (6.44), the worst-case value admits a closed-\\nform expression [205]\\nmax\\nX∈Us\\nX∥Xw∥2=\\ued79\\ued79\\ued79ˆXw\\ued79\\ued79\\ued79\\n2+δX∥w∥2. (6.46)\\nActually, the derivation procedure of (6.46) is exactly the same as that\\nof (6.23) for worst-case portfolio variance and thus it is omitted.\\n6.3.3 Robust Beamforming Formulation\\nFinally we can see that with the uncertainties are considered in (6.36)\\nand (6.44), the worst-case robust problem formulation (6.35) can be', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 140}),\n",
       " Document(page_content='134 Robust Portfolio Optimization\\nreformulated in a convex form as follows:\\nminimizew\\ued79\\ued79\\ued79ˆXw\\ued79\\ued79\\ued79\\n2+δX∥w∥2\\nsubject to wHˆa−δa∥w∥2≥1,\\nIm{wHˆa}= 0.(6.47)\\nThus, it is interesting to see that both robust portfolio optimiza-\\ntion and robust beamforming can be dealt with using almost the same\\ntechniques.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 141}),\n",
       " Document(page_content='7\\nMulti-Portfolio Optimization\\nPortfolio managers usually manage multiple accounts corresponding to\\ndiﬀerent clients, and the portfolios associated with diﬀerent accounts\\nare pooled together for execution, amplifying the level of the so-called\\nmarketimpact(cf.Chapter4)onallaccounts.InthepreviousChapters\\n5 and 6, each portfolio is considered and optimized individually disre-\\ngarding the eﬀect or impact on other portfolio, however, if this aggre-\\ngate market eﬀect is not considered when each account is individually\\noptimized, the actual market impact can be severely underestimated.\\nThus, a more realistic way is to analyze and optimize the multi-\\nple portfolios jointly while adhering to both the account-speciﬁc con-\\nstraints and also some global constraints present on all accounts. The\\nholistic approach is termed multi-portfolio optimization.\\nThe detailed organization of this chapter is as follows. Section 7.1\\nreviews some basic concepts and deﬁnitions. Section 7.2 states some\\ntypical problem formulations and Section 7.3 presents a solving ap-\\nproach based on game theory.\\n135', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 142}),\n",
       " Document(page_content='136 Multi-Portfolio Optimization\\nFigure 7.1: Multiple accounts and market impact.\\n7.1 From Single-Portfolio to Multi-Portfolio\\nIn the real markets, portfolio managers always manage multiple ac-\\ncounts and each account is in fact eﬀected by all the others as shown\\nin Figure 7.1. In practice, such an impact usually is undesired, e.g., the\\nimpact on account 1 given by account 2 and the other accounts always\\ntends to weaken the proﬁtability of account 1, and it is referred to as\\nmarket impact.\\nSuppose there are Nassets with mean vector and covariance matrix\\ngiven byµ∈RNandΣ∈RN×N. Now we consider multiple, say K,\\naccounts, and their corresponding investment portfolios are denoted as\\nwk∈RN,k= 1,...,K. So now we have multiple portfolios to optimize\\nat the sample instead of only a single portfolio. In the following we will\\nﬁrst quantify the market impact and then consider the utility function\\nand diﬀerent types of constraints.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 143}),\n",
       " Document(page_content='7.1. From Single-Portfolio to Multi-Portfolio 137\\n7.1.1 Market Impact Cost Function\\nA key concept of the extension from single-portfolio to multi-portfolio\\nistounderstandhowoneaccountwillbeaﬀectedbytheotheraccounts,\\ni.e., the market impact among portfolios of diﬀerent accounts.\\nRecallthereare Kportfolios: wk∈RN,k= 1,...,K.Letusdenote\\nw−k≜(wl)l̸=kandw≜(wk)K\\nk=1as the other portfolios (i.e., all the\\nportfolios except portfolio k) and all the portfolios, respectively. For\\nsimplicity,⟨x,y⟩andxTyare used interchangeably to denote the inner\\nproduct of vectors xandy.\\nA popular market impact on the portfolio wkcaused by itself and\\nthe other ones w−kis [210]\\nTC(wk,w)≜1\\n2(]\\n[wk−w0\\nk]+,c+(w)⟩\\n+]\\n[wk−w0\\nk]−,c−(w)⟩)\\n,\\n(7.1)\\nwhere\\n[wk−w0\\nk]+≜max(0,wk−w0\\nk) (7.2)\\n[wk−w0\\nk]−≜max(0,−(wk−w0\\nk)) (7.3)\\nrepresent the buy and sell trades of the k-th account, respectively, and\\nc+(w)≜Ω+K\\uf8fa\\nl=1[wl−w0\\nl]+(7.4)\\nc−(w)≜Ω−K\\uf8fa\\nl=1[wl−w0\\nl]−(7.5)\\nare the linear market impact costs of buy and sell trades of all the\\naccounts, respectively, [8, 18]. Here, Ω+andΩ−are positive diago-\\nnal matrices representing the market impact of buy and sell trades,\\nrespectively.\\n7.1.2 Mean-Variance Utility Function\\nInstead of ignoring the market impact in single-portfolio optimization\\n(cf. Chapter 5), the utility function of each portfolio is composed of\\nboth the mean-variance trade-oﬀ (cf. Section 5.1.1) and the market', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 144}),\n",
       " Document(page_content='138 Multi-Portfolio Optimization\\nimpact cost in multi-portfolio optimization, that is, the utility function\\nof accountkis\\nuk(wk,w−k)≜µTwk−1\\n2ρkwT\\nkΣwk−TC(wk,w),(7.6)\\nwhere the ﬁrst two terms together are the mean-variance trade-oﬀ that\\ndepends on the portfolio of account k(i.e.,wk) only with ρk>0being\\nthe trade-oﬀ parameter, and the third term TC(wk,w)as deﬁned in\\n(7.1) is the market impact cost function that measures the impact\\nquantitatively among the portfolios of all accounts.\\nSubstituting (7.1)-(7.5) into (7.6), the utility function uk(wk,w−k)\\ncan be rewritten more explicitly as follows:\\nuk(wk,w−k) =µTwk−1\\n2ρkwT\\nkΣwk\\n−1\\n2⟨\\n[wk−w0\\nk]+,Ω+K\\uf8fa\\nl=1[wl−w0\\nl]+⟩\\n−1\\n2⟨\\n[wk−w0\\nk]−,Ω−K\\uf8fa\\nl=1[wl−w0\\nl]−⟩\\n.(7.7)\\n7.1.3 Individual and Global Constraints\\nFor multi-portfolio optimization, there are two types of constraints:\\nindividual constraints that apply to each speciﬁc account and global\\nconstraints that apply to all (or a group of) accounts.\\nIndividual Constraints\\nThe individual constraints are similar to the constraints stated in Sec-\\ntion 5.1.4, e.g., holding constraint lk≤wk≤uk, long-only constraint\\nwk≥0, etc., for each account kwherek= 1,...,K, and they are\\nreferred to as individual constraints.\\nFor the multi-portfolio optimization, since each account may have\\ndiﬀerent capital budgets, the capital budget constraints can be math-\\nematically represented as 1Twk≤bk, wherebk≥0,k= 1,...,K, are\\nthe capital budget bounds for the corresponding accounts.\\nFor clarity of presentation, we use Wkto denote all the individual\\nconstraints on account k, and in general we assume it is non-empty,', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 145}),\n",
       " Document(page_content='7.2. Multi-Portfolio Problems 139\\nclosed, and convex. We further use W≜W1×···×W Kto denote their\\nCartesian product set.\\nGlobal Constraints\\nAs to the global constraints, one example is that the total traded vol-\\nume of each asset over all the accounts must be less than a threshold\\n(e.g., 10%of the average daily trading volume). Mathematically, these\\nglobal constraints on all the accounts are\\nK\\uf8fa\\nk=1|wk,i−w0\\nk,i|≤Di, i= 1,...,N. (7.8)\\nThese constraints can be extended so that the traded volume of some\\ngroups of assets (e.g., industries, sectors, countries, asset classes, etc.)\\nshould be limited, that is,\\nK\\uf8fa\\nk=1\\uf8fa\\ni∈Gl|wk,i−w0\\nk,i|≤Ui, l= 1,...,L, (7.9)\\nwhereGldenotes the l-th group of assets and there are Lgroups.\\nIt is easy to see from (7.8) and (7.9) that one account’s portfolio\\ndesign,say wk,alsodependsonotheraccounts’actions w−k.Therefore,\\nthe presence of global constraints couple all the portfolios together.\\nThe global constraints (7.8) and (7.9) can be rewritten in a more\\ncompact form. We ﬁrst deﬁne a multivariate function\\ng(w) =)\\n])(∑K\\nk=1|wk,i−w0\\nk,i|−Di)N\\ni=1 (∑K\\nk=1∑\\ni∈Gl|wk,i−w0\\nk,i|−Ul)L\\nl=1(\\n\\uf8fa[,(7.10)\\nthen (7.8) and (7.9) can be simply rewritten as g(w)≤0(or more\\noften g(wk,w−k)≤0for the consistency of notation).\\n7.2 Multi-Portfolio Problems\\nFor the multi-portfolio case, there exist many diﬀerent formulations.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 146}),\n",
       " Document(page_content='140 Multi-Portfolio Optimization\\n7.2.1 Naive Formulation\\nOne of the most direct formulations is to ignore the market impact\\namong diﬀerent accounts, do not consider the global constraints, and\\nsimply optimize each account individually (but include the market im-\\npact of the individual account) as follows [175]:\\nmaximizewkµTwk−1\\n2ρkwT\\nkΣwk\\n−1\\n2]\\n[wk−w0\\nk]+,Ω+[wk−w0\\nk]+⟩\\n−1\\n2]\\n[wk−w0\\nk]−,Ω−[wk−w0\\nk]−⟩\\nsubject to wk∈Wk\\uf8fc\\n\\uf8f4\\uf8f4\\uf8f4\\uf8f4\\uf8f4\\uf8f4\\uf8f4\\uf8f4\\uf8f4\\uf8fd\\n\\uf8f4\\uf8f4\\uf8f4\\uf8f4\\uf8f4\\uf8f4\\uf8f4\\uf8f4\\uf8f4{∀k,(7.11)\\nwhere the objective contains a mean-variance trade-oﬀ with ρk>0\\nbeing the trade-oﬀ parameter and a market impact cost caused only\\nby itself.\\nWe can see both the objectives and the constraints of the problems\\nin (7.11) depend on each individual account portfolio wkand the prob-\\nlems can be optimized separately. In other words, (7.11) represents K\\ndiﬀerent single-portfolio optimization problems.\\n7.2.2 Total Social Welfare Maximization Problem\\nWhenthemarketimpactamongdiﬀerentaccountsisconsidered,allthe\\naccounts are coupled together. Then one direct formulation is simply\\nmaximize the summation of all the utilities of all the accounts, i.e., the\\ntotal social welfare maximization problem [154]\\nmaximizewK\\uf8fa\\nk=1uk(wk,w−k)\\nsubject to w∈W,(7.12)\\nwhereuk(wk,w−k)are deﬁned in (7.7), w= (wk)K\\nk=1,W=W1×···×\\nWK, and walso needs to satisfy the global constraints g(w)≤0if\\nthey are present.\\nEven though the central problem (7.12) can achieve the maximum\\nsocial welfare, it may not result in fair enough portfolios: smaller ac-', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 147}),\n",
       " Document(page_content='7.2. Multi-Portfolio Problems 141\\ncountssuﬀerfromashortageofliquidityandtheyareforcedtosacriﬁce\\ntheir own beneﬁts to achieve social optimality [175, 173].\\n7.2.3 Game Theoretical Formulation Under Individual Constraints\\nAmorefairformulationproposedin[210]isthateachaccountcompetes\\nagainsttheothersandchoosesaportfoliothatmaximizesitsownutility\\nunder individual constraints. Mathematically, it can be formulated as\\na Nash Equilibrium Problem (NEP): given the other strategies w−k,\\naccountkaims at solving\\nmaximizewkuk(wk,w−k)\\nsubject to wk∈Wk\\uf8fc\\n\\uf8fd\\n{∀k, (7.13)\\nwhereuk(wk,w−k)is deﬁned in (7.7).\\nCompared with the naive individual formulation in (7.11), the main\\ndiﬀerence is that the objectives in (7.13) depend on not only the portfo-\\nlioofeachaccount wkbutalsotheportfoliosoftheotheraccounts w−k.\\nThus all the problems given by (7.13) are coupled via their objectives.\\nWith the NEP formulation, a solution of interest is the well-known\\nnotion of the Nash Equilibrium (NE) point from which no account has\\nan incentive to deviate from unilaterally. That is, a solution wne=\\n(w⋆\\nk)K\\nk=1is an NE of the NEP (7.13) if\\nuk(w⋆\\nk,w⋆\\n−k)≥uk(wk,w⋆\\n−k),∀wk∈Wk,∀k. (7.14)\\n7.2.4 Game Theoretical Formulation Under Global Constraints\\nWhen there are global constraints, incorporating them into (7.13) re-\\nsults in the following Generalized NEP (GNEP) [210]:\\nmaximizewkuk(wk,w−k)\\nsubject to wk∈Wk\\ng(wk,w−k)≤0\\uf8fc\\n\\uf8f4\\uf8f4\\uf8f4\\uf8fd\\n\\uf8f4\\uf8f4\\uf8f4{∀k, (7.15)\\nwhere there is coupling in both utility and constraint sets.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 148}),\n",
       " Document(page_content='142 Multi-Portfolio Optimization\\nSimilar to an NE of (7.13), a solution of interest of (7.15) is referred\\nto as Generalized NE (GNE) such that wgne= (w⋆\\nk)K\\nk=1and\\nuk(w⋆\\nk,w⋆\\n−k)≥uk(wk,w⋆\\n−k),∀wk∈Wk,g(wk,w⋆\\n−k)≤0,∀k.\\n(7.16)\\nThe extra coupling in the constrain sets caused by the global con-\\nstraints makes the GNEP (7.15) much more diﬃcult to analyze than\\nthe NEP (7.13).\\n7.2.5 Diﬃculties\\nFor the above naive and total social welfare maximization problems,\\ni.e., (7.11) and (7.12), the main diﬃculty is that the objectives in gen-\\neral are nonconcave and nondiﬀerentiable due to the projections in the\\nutilities [·]+and[·]−.\\nFor the NEP (7.13) and GNEP (7.15), apart from the above non-\\nconcave and nondiﬀerentiable objectives, the coupling in the objectives\\nand constraints sets of the multi-account problems further complicates\\nthe analysis.\\n7.3 Eﬃcient Solving Methods\\nIn this section, some reformulation techniques are considered to deal\\nwith the diﬃculty caused the projections [·]+and[·]−and then the\\neﬃcient solving methods for all the problems, i.e., (7.11), (7.12), (7.13),\\nand GNEP (7.15) are reviewed.\\n7.3.1 Reformulations of Objectives and Constraints\\nTo deal with the projections [·]+and[·]−, one can ﬁrst introduce some\\nnew variables,∀k,\\n˜wk≜)\\n˜w+\\nk\\n˜w−\\nk[\\n≥0 (7.17)\\nsuch that\\n[wk−w0\\nk]+=˜w+\\nk, (7.18)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 149}),\n",
       " Document(page_content='7.3. Eﬃcient Solving Methods 143\\n[wk−w0\\nk]−=˜w−\\nk, (7.19)\\nwk−w0\\nk=˜w+\\nk−˜w−\\nk, (7.20)\\n0 =]\\n˜w+\\nk,˜w−\\nk⟩\\n. (7.21)\\nThen the utility function in (7.7) can be rewritten as (some constants\\nare added)\\n˜u(˜wk,˜w−k) =)\\nµ−ρkΣw0\\nk\\n−µ+ρkΣw0\\nk[T\\n\\ued19\\ued18\\ued17\\ued1a\\n≜˜µT\\nk˜wk−1\\n2ρk˜wT\\nk)\\nΣ−Σ\\n−Σ Σ[\\n\\ued19\\ued18\\ued17\\ued1a\\n≜˜Σ˜wk\\n−1\\n2˜wT\\nk)\\nΩ+\\nΩ−[\\n\\ued19\\ued18\\ued17\\ued1a\\n≜˜Ω(K\\uf8fa\\nl=1˜wl(\\n=˜µT\\nk˜wk−1\\n2ρk˜wT\\nk˜Σ˜wk−1\\n2˜wT\\nk˜Ω(K\\uf8fa\\nl=1˜wl(\\n,(7.22)\\nwhich now is a diﬀerentiable function.\\nFor the introduced variable ˜wk, relaxing the nonconvex constraint\\n(7.21), one can deﬁne the following individual set based on (7.17)-\\n(7.20):\\n⟩Wk≜{\\n˜wk⏐⏐⏐\\uf8f3\\nI−I\\uf8f2\\n˜wk+w0\\nk∈Wk,˜wk≥0}\\n,(7.23)\\nwhich is convex in ˜wk.\\n7.3.2 Naive Solution\\nForeachk,theobjectiveofthenaiveformulation(7.11)canbeobtained\\nby ignoring the market impact terms caused by the other accounts in\\nuk(wk,w−k)(cf.(7.7)).Thus,similarto(7.22),arelaxationofthenaive\\nformulation (7.11) is\\nmaximize\\n˜wk˜µT\\nk˜wk−1\\n2ρk˜wT\\nk˜Σ˜wk−1\\n2˜wT\\nk˜Ω˜wk\\nsubject to ˜wk∈⟩Wk\\uf8fc\\n\\uf8f4\\uf8fd\\n\\uf8f4{∀k,(7.24)\\nwhich is convex since theobjective is quadratic concave and thefeasible\\nis convex for each given k, and thus it is eﬃciently solvable.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 150}),\n",
       " Document(page_content='144 Multi-Portfolio Optimization\\nSimilar to (7.11), the relaxation (7.24) actually represents Kindi-\\nvidual convex problems and, fortunately, it is shown that each optimal\\n˜wksatisﬁes (7.21) for all kand thus the relaxation (7.24) is tight for\\nand therefore equivalent to (7.11) [210]. An optimal solution of (7.24)\\nis referred to as a naive solution.\\n7.3.3 Total Social Welfare Maximization\\nFor the total welfare maximization problem (7.12), replacing\\nuk(wk,w−k)with ˜u(˜wk,˜w−k)in (7.22) and rearranging the terms, one\\ncan have the following relaxation [210]:\\nmaximize\\n˜wPso(˜w)≜˜µT˜w−1\\n2˜wTMso˜w\\nsubject to ˜w∈⟩W1×···×⟩WK(7.25)\\nwhere ˜µ≜(˜µk)K\\nk=1,˜w≜(˜wk)K\\nk=1,\\nMso= Diag(ρ)⊗˜Σ+J⊗˜Ω (7.26)\\nandJis aK×Kmatrix with all entries being 1.\\nAgain, it is shown in [210] that (7.25) is convex and the optimal ˜wk\\nsatisﬁes (7.21) for all kand thus the relaxation (7.25) is tight.\\n7.3.4 Multi-Portfolio Optimization with Individual Constraints\\nReplacinguk(wk,w−k)in the NEP (7.13) with ˜u(˜wk,˜w−k)results in\\nthe following relaxation NEP:\\nmaximize\\n˜wk˜u(˜wk,˜w−k)\\nsubject to ˜wk∈⟩Wk\\uf8fc\\n\\uf8fd\\n{∀k. (7.27)\\nAnd it is shown that (7.21) is satisﬁed by an NE of (7.27), thus the\\nNEP (7.27) indeed equals the NEP (7.13).\\nSince the constraint sets of ˜wkare decoupled, based on potential\\ngame theory [148], it is further shown in [210] that the NEP is equal\\nto the following optimization problem:\\nmaximize\\n˜wPne(˜w)≜˜µT˜w−1\\n2˜wTMne˜w\\nsubject to ˜w∈⟩W1×···×⟩WK,(7.28)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 151}),\n",
       " Document(page_content='7.3. Eﬃcient Solving Methods 145\\nwhere\\nMne= Diag(ρ)⊗˜Σ+1\\n2(I+J)⊗˜Ω, (7.29)\\nin the sense that ˜wis an NE of (7.27) if and only if it is optimal\\nto (7.28). Later, the authors of [210] showed that (7.28) is strongly\\nconvex and thus its optimal solution, or equivalently, the NE of (7.27)\\nor (7.13), is unique.\\n7.3.5 Multi-Portfolio Optimization with Global Constraints\\nEven when there exist global constraints, one can still show that the\\nGNEP (7.15) is equal to the following GNEP [210]:\\nmaximize\\n˜wk˜u(˜wk,˜w−k)\\nsubject to ˜wk∈⟩Wk\\n˜g(˜w)≤0\\uf8fc\\n\\uf8f4\\uf8f4\\uf8f4\\uf8fd\\n\\uf8f4\\uf8f4\\uf8f4{∀k, (7.30)\\nwhere\\n˜g(˜w)≜K\\uf8fa\\nk=1)\\n])(\\n˜w+\\nk,i+ ˜w−\\nk,i)N\\ni=1 (∑\\ni∈Gl(\\n˜w+\\nk,i+ ˜w−\\nk,i)\\n−Ul)L\\nl=1(\\n\\uf8fa[−)\\n(Di)N\\ni=1\\n(Ul)L\\nl=1[\\n.(7.31)\\nSimilar to (7.28), one can construct the following convex problem\\nwith global constraints:\\nmaximize\\n˜wPne(˜w) =˜µT˜w−1\\n2˜wTMne˜w\\nsubject to ˜w∈⟩W1×···×⟩WK,\\n˜g(˜w)≤0.(7.32)\\nHowever, now the constraint sets of all the ˜wkare coupled and one\\ncan only conclude that an optimal solution to (7.32) is a GNE of the\\nGNEP (7.30), but not vice versa [210]. An optimal solution to (7.32) is\\nreferred to as a Variational Equilibrium (VE), and it is actually unique\\nsince (7.32) is strongly convex.\\nExample 7.1. Let us now consider some numerical experiments. The\\nmean vector µ, covariance matrix Σ, and market impact coeﬃcient', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 152}),\n",
       " Document(page_content='146 Multi-Portfolio Optimization\\nYANG et al. : MULTI-PORTFOLIO OPTIMIZATION: A POTENTIAL GAME APPROACH 5599\\nFig. 1. Utility improvement of the NE and socially optimal solution against the\\nnaive approach.\\nVI. N UMERICAL RESULTS AND DISCUSSIONS\\nIn this section, we provide some numerical results to illus-\\ntrate the ef ﬁcacy of our multi-portfolio o ptimization framework,\\nalong with the convergence behavior of the proposed distributediterative algorithms. In our simulations, we consider syntheticdata such that\\n,,a n d model annual expected values from\\nto 5% and volatility values given in annualized terms in\\nthe range of 20% to 30%. We assume that the number of assetsis\\n.\\n1) Utility Improvement: Weﬁrst compare each account’s\\nutility improvement achieved b y NE and socially optimal so-\\nlutions over the naive approach measured by:\\nwhere is the optimal solution of (8) with ,a n d\\nis deﬁned in (4). We assume that there are\\naccounts and they are subject to the long-only constraint and\\nbudget constraint. The result is plotted in Fig. 1. We can see\\nfrom the red bar that the performance of the NE outperformsthe naive design, because the market impact cost incurred fromtransactions of other accoun ts are properly counted.\\nWe also compare the NE (red bar on the left) and the socially\\noptimal solution (black bar on the right). We can see that thesocial optimality is at the price of accounts 1, 3 and 4. This con-solidates again what has been observed in [9]: some accountscan probably get better payoff by acting alone than staying inthe socially optimal solution. The unilateral optimality and theuniqueness makes the NE a meaningful outcome that can be pre-dicted by all accounts.\\nTo compare the NE and the socially optimal solution from\\nthe perspective of total welfare, we also plot in dashed lines the\\nfollowing metric:\\nFig. 2. Convergence of Algorithm 1: potential function versus iteration.\\nAs expected, socially optimal solutions can achieve a highertotal welfare than NE.\\n2) Convergence of Algorithm 1: We assume that the number\\nof accounts is 5, 10 and 20, resp ectively, and each account is\\nsubject to the long-only constraint. The results are illustrated in\\nFig. 2, where we update the portfolio in each iteration and theresulting value of the potential function\\nis plotted. We\\ncan see that the algorithm converges reasonably fast for bothsequential and simultaneous upd ate, with the convergence speed\\ndepending as expected upon the number of accounts.\\n3) Global Constraint: We assume that each account is sub-\\nject to the long-only constraint. In Fig. 3, we can see that, as the\\nnumber of accounts increases, the global transact ion-size con-\\nstraint may be violated if it is not properly considered. Moti-vated by liquidity problems for a speci ﬁc asset in practice, the\\nissue is specially aggravated du e to the aggregate effect over ac-\\ncounts.\\n4) Convergence of Outer Loop of Algorithm 3: We assume\\nthat the number of accounts is 5 and 10, respectively. Each ac-count is subject to the long-only constraint, and the accounts\\nare also subject to the global transaction-size constraint as (5).\\nThe convergence behavior of the outer loop of Algorithm 3 isillustrated in Fig. 4, where in each iteration we generate the\\nNE for a ﬁxed\\nand the corresponding duality gap (de ﬁned as\\nwhere is ob-\\ntained a priori from solving (27) by [36]) is plotted. We see\\nthat the asymptotic convergence speed of is fast and indepen-\\ndent of the number of accounts, since the GNEP (25) is solved inits dual domain and the dimension of the dual variable is equalto the number of global constraints.\\nVII. C\\nONCLUDING REMARKS\\nIn this paper, we have studied the multi-portfolio optimiza-\\ntion problem where multiple accounts are coupled through themarket impact cost, which is modeled as an af ﬁne function of\\nthe aggregate trades from all accounts. The analysis is from\\nt h ep e r s p e c t i v eo fn o n - c o o p e r a t i v eg a m et h e o r y ,a n dw eh a v eshown that there always exists a unique NE, and moreover de-vised (synchronous and asynchr onous) distributed algorithms\\nFigure 7.2: Utility improvement of the NE, and socially optimal solution against\\nthe naive solution.\\nmatrices Ω+andΩ−are randomly generated. Suppose there are N= 5\\nassets.\\nFor the moment, the number of accounts is ﬁxed to K= 5with\\nindividual constraints. We compare three methods, i.e., i) the naive\\nproblem(7.11),ii)thetotalsocialwelfaremaximizationproblem(7.12),\\nand iii) the NEP (7.13), in terms of two criteria:\\n•the relative utility improvement of each account:\\nuk(w)−uk(wnaive)\\nuk(wnaive)(7.33)\\n•the relative utility improvement of all the accounts:\\n∑K\\nk=1uk(w)−∑K\\nk=1uk(wnaive)\\n∑K\\nk=1uk(wnaive)(7.34)\\nwhere wis either wneandwsoandw=0.\\nFigure 7.2 shows the numerical results measured by (7.33) and\\n7.34. We can see that the social welfare maximization problem (7.12)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 153}),\n",
       " Document(page_content='7.3. Eﬃcient Solving Methods 147\\n5600 IEEE TRANSACTIONS ON SIGNAL PROCESSING, VOL. 61, NO. 22, NOVEMBER 15, 2013\\nFig. 3. Global transaction-size versus number of accounts.\\nFig. 4. Convergence of Algorithm 3: duality gap versus iteration.\\nwith satisfactory convergence properties. Then we have ana-\\nlyzed the NEP with global constraints imposed on all accounts,resulting in a GNEP. We have shown as well that there alwaysexists a unique VE which can be computed in a distributedmanner. Finally, we have considered the maximization of thetotal welfare along with distributed schemes.\\nA\\nPPENDIX A\\nPROOF OF LEMMA 5\\nProof: In (17), the utility function of account is\\n(34)and the constraint is . The former two terms\\nof (34) depend only on the difference between and .W e\\nuse contradiction to show that at the optimal solution, and\\nare orthogonal.\\nFirst assume that there exists such that\\n. It is easy to see that the variable\\nis feasible since\\n. Consider a new function\\nwith deﬁned as\\nw h i c hi sc o n v e xi n . The convexity of infers that\\nminimizes over iff :\\nwhere we have made use of the fact that are positive\\ndiagonal matrices and . This establishes that\\nminimizes over ,a n d is the maximizing\\nvariable of in (34), contradicting the optimality\\nof . This completes the proof.\\nAPPENDIX B\\nPROOF OF THEOREM 15\\nProof: Av a r i a b l e is a VE of the GNEP (30) if and only\\nif it solves the following optimization problem:\\n(35)\\nSince (35) is a convex optimization problem, the optimal so-\\nlution of (35) can be equally ach ieved from its dual problem,\\nprovided Slater’s condition is satis ﬁed [2]:\\n(36)\\nwhere and is\\nthe Lagrange multiplier associated with .\\nFor a ﬁxed , the inner maximization problem in (36) is a\\npotential game equivalent to the following NEP:\\n(37)\\nSince is a saddle point of the minimax problem (36)\\n[32], can be obtained by solving (37) with while\\nare primal feasible, dual feasible and satisfy the com-\\nplementary slackness condition.\\nFigure 7.3: Global transaction size versus number of accounts.\\nachieves the best total social welfare (see the horizontal dashed black\\nline),butatthepriceofsacriﬁcingaccounts 1,3,andespecially,account\\n4(see the vertical black bars). The NEP improves the total welfare sig-\\nniﬁcantly (see the horizontal dashed red line) albeit below the social\\nsolution; however, opposed to the social solution, it does not sacriﬁce\\nindividual accounts as much as the social formulation (see the red bars\\nvs the black bars).\\nLater we also considered to include some global constraints, e.g.,\\na global transaction size constraint. Figure 7.3 shows the total trans-\\naction size versus of the number of accounts. Clearly, we see that the\\nglobal transaction size constraint may be violated if it is not properly\\nconsidered. ■\\nRemark 7.1. In this chapter, we have focused on the reformulation\\nof diﬀerent nonconvex problems in convex form, but without going\\ninto the details of the speciﬁc algorithms to solve such problems. It is\\npossible to derive highly eﬃcient parallel and distributed algorithms\\nfor the above convex problems [210]. ■', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 154}),\n",
       " Document(page_content='8\\nIndex Tracking\\nActive investment strategies assume that the markets are not perfectly\\neﬃcient and fund managers can identify mispriced stocks and/or make\\nsuperior predictions and then collect (hopefully positive) proﬁts based\\non them (cf. Chapters 5-7).\\nPassive investment strategies, on the other hand, assume the mar-\\nkets are eﬃcient enough and cannot be beaten in the long run, there-\\nfore, the investment philosophy is to directly follow the markets.\\nThischapterreviewsoneofthemostpopularandimportantpassive\\ninvestment strategies: index tracking. The goal of index tracking is to\\nconstruct a tracking portfolio whose value follows a market index (or\\nsome preferred benchmark index).\\nThe detailed organization of this chapter is as follows. Section 8.1\\nreviews diﬀerent methods of index tracking, i.e., full index tracking,\\nsynthetic index tracking, and sparse index tracking. Sections 8.2 and\\n8.3 focus on two approaches of sparse index tracking, i.e., the two-step\\napproach and joint optimization approach, separately.\\n148', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 155}),\n",
       " Document(page_content='8.1. Diﬀerent Index Tracking Methods 149\\n8.1 Diﬀerent Index Tracking Methods\\nSuppose that a benchmark index is composed of Nstocks, let rb=\\n[rb\\n1,...,rb\\nT]T∈RTandX= [r1,...,rT]T∈RT×Ndenote the returns of\\nthe benchmark index and the Nstocks in the past Tdays, respectively.\\nLetb∈RNdenote the (normalized) benchmark index weights such\\nthatb>0,bT1= 1, and Xb=rb. Further, let wdenote the tracking\\nportfolio to be designed, which must satisfy w≥0andwT1= 1.\\n8.1.1 Tracking Performance\\nTracking Error\\nGiven the covariance matrix of the benchmark stocks Σand the bench-\\nmark index weight vector b, the theoretical tracking error is deﬁned\\nas\\nTTE( w) = (w−b)TΣ(w−b). (8.1)\\nSince Σneeds to be estimated ﬁrst and bmay not be available, the\\nempirical tracking error, deﬁned as\\nTE(w) =1\\nT∥Xw−rb∥2\\n2, (8.2)\\nis more popular in practice [134, 177]. It measures how closely the\\ntracking portfolio mimics the benchmark index empirically. In princi-\\npal, the smaller, the better. Note that the daily stock returns are in\\ngeneral very small and if we suppose E[rt] =0, the expected value of\\nTE equals TEE:\\nE[TE(w)] =1\\nTE\\uf8f3\\n∥Xw−rb∥2\\n2\\uf8f2\\n, (8.3)\\n= (w−b)TE\\uf8f31\\nTXTX\\uf8f2\\n(w−b), (8.4)\\n= (w−b)TΣ(w−b). (8.5)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 156}),\n",
       " Document(page_content='150 Index Tracking\\nExcess Return\\nApart from tracking error, another important criterion is excess return\\n(ER):\\nER(w) =1\\nT1T(Xw−rb), (8.6)\\nIt represents how much the tracking portfolio outperforms the bench-\\nmark index, and the larger, the better.\\nCombined Criterion\\nTo achieve a trade-oﬀ between the tracking error (8.2) and the excess\\nreturn (8.6), a combined objective is thus considered [17, 19]:\\nU(w) =αTE(w)−(1−α)ER(w), (8.7)\\nwhereα∈[0,1]is a predeﬁned trade-oﬀ parameter.\\nGoal\\nSince the excess return (8.6) is linear in the tracking portfolio w, with-\\nout loss of generality and for clarity of presentation, we focus on the\\ntracking error (8.2) only. The goal of index tracking is to construct a\\nportfolio w⋆(or a derivative like future contract) to track the perfor-\\nmance of the benchmark index with the tracking error (8.2) being small\\nor, even better, minimized.\\n8.1.2 Full Index Tracking\\nThe most straightforward tracking method, referred to as full index\\ntracking, is to purchase all the index constituents in appropriate quan-\\ntities to perfectly track the index, i.e., w⋆=band∥Xw⋆−rb∥2\\n2= 0.\\nHowever, it has several signiﬁcant disadvantages [17, 134], for example:\\n•including all the stocks may not be practical especially when the\\nindex contains some illiquid stocks and it is hard to purchase such\\nstocks; and\\n•allocating capital in all assets would also incur signiﬁcant trading\\ncost.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 157}),\n",
       " Document(page_content='8.2. Sparse Index Tracking: Two-Step Approach 151\\n8.1.3 Synthetic Index Tracking\\nThe second method is to use derivatives, like future contracts, to track\\ntheindex(e.g.,E-miniS&PfuturecontractisusedtotracktheS&P500\\nIndex). The advantage of future contracts is that the trading cost is\\nrelatively lower than stocks, however, dynamically tracking the index\\nby rolling the contracts can be both expensive and risky because of the\\ncounterparty risk and illiquidity of contracts. These drawbacks make\\nfuture contracts less attractive in tracking the index [19, 106].\\n8.1.4 Sparse Index Tracking\\nTo make the index tracking more practical (i.e., relatively lower trading\\ncost and less risky), a third method was proposed: to use a subset of\\nstocks to track the index (i.e., ∥w⋆∥0≪N) with only a small sacriﬁce\\nin tracking error (i.e., ∥Xw⋆−rb∥2is still close to 0) [17]. This method\\nis referred to as sparse index tracking and in fact it is the core business\\nof ETFs, which now have been very popular in the markets1.\\nIn the following content of this chapter, we will focus on two main\\napproachesofsparseindextracking,namely,thetwo-stepapproachand\\njoint optimization approach.\\n8.2 Sparse Index Tracking: Two-Step Approach\\nThe ﬁrst approach of sparse index tracking is to decompose the task\\ninto two steps [19, 52, 155]:\\n•stock selection: selecting a subset of K(K≪N) stocks; and\\n•capital allocation: distributing the capital among the selected\\nstocks.\\n8.2.1 Stock Selection\\nLet us ﬁrst introduce diﬀerent stock selection methods.\\n1Many funds provide some ETF products and some of them have very large\\nassets under management (AUM) even at the magnitude of $10billion USD, e.g.,\\nsee http://etfdb.com/type/size/large-cap/.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 158}),\n",
       " Document(page_content='152 Index Tracking\\nRandom Selection\\nOne simple and naive idea is to randomly select Kstocks from the N\\nindex stocks [52]. This method in general is used as a benchmark.\\nSelection Based Market Capitals\\nA widely used stock selection method, especially for a market capital\\nweighted index, is to select the largest Kstocks according to their\\nmarket capitals (e.g., the product of outstanding shares2and prices)\\n[155]. For the market capital weighted index, if the index weight vector\\nbis available, one can select the stocks with Klargest weights bi.\\nSelection Based on Correlation\\nAnother idea is to select the stocks that have similar return perfor-\\nmancesastheindex[19,52].Forexample,giventhecorrelationbetween\\nthei-th stock and the benchmark index\\nρib=Cov(X·i,rb), (8.8)\\nthis method selects the stocks with Klargest correlations ρib.\\nSelection Based on Cointegration\\nThe idea is to select Kstocks so that there exists a linear combination\\nof their log-prices cointegrated well with the value of the benchmark\\nindex [5, 19]. Mathematically, based on the following model:\\nIt=N\\uf8fa\\ni=1siβipi,t+wt, (8.9)\\nwheresi∈{0,1}, one needs to ﬁnd the optimal swithsT1=K(i.e.,\\nselection of Kstocks) and the weights βisuch thatwtis most likely\\nstationary (e.g., resulting the smallest p-value of the stationary test).\\nThis problem itself is NP-hard. Exhaustive search can be employed\\n2Outstanding shares refer to a company’s stock currently held by all its share-\\nholders, including share blocks held by institutional investors and restricted shares\\nowned by the company’s oﬃcers and insiders.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 159}),\n",
       " Document(page_content='8.2. Sparse Index Tracking: Two-Step Approach 153\\nwhenNis small; some heuristic method, e.g., genetic algorithm, is\\nneeded otherwise [19].\\n8.2.2 Capital Allocation\\nOnce a subset of Kstocks has been selected, the second step is to\\ndesign the capital allocation among them. Before we proceed, let us\\nuse the binary vector s⋆∈RN:\\ns⋆\\ni=\\uf8f1\\n}\\n\\uf8f31,if stockiis selected\\n0,otherwise(8.10)\\nwith1Ts⋆=Kto represent the selected Kstocks.\\nNaive Allocation\\nWhenthebenchmarkportfolioweightvector bisknown,anaivealloca-\\ntion is to distribute the capital among the selected stocks proportional\\nto the original weights with their summation equal to 1. That is, the\\nnaive allocation weight vector is\\nw⋆=b⊙s⋆\\n1T(b⊙s⋆), (8.11)\\nwhere⊙means Hadamard product.\\nOptimization Allocation\\nThenaiveallocationweight(8.11)issimpleenough,however,thetrack-\\ning error is not optimized and sometimes the benchmark weight vec-\\ntorbmay not be available. The optimization allocation overcomes this\\ndrawback by minimizing the tracking error based on the selected stocks\\ndirectly as follows [155]3:\\nminimizew1\\nT∥X(w⊙s⋆)−rb∥2\\n2\\nsubject to 1T(w⊙s⋆) = 1,\\nw≥0.(8.12)\\n3The authors of [155] considered a more complicated nonconvex objective and\\nthey employed a genetic algorithm to solve their nonconvex problem.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 160}),\n",
       " Document(page_content='154 Index Tracking\\nProblem (8.12) is convex and can be solved eﬃciently. The optimal\\nallocation simply is the optimal solution of (8.12).\\n8.3 Sparse Index Tracking: Joint Optimization Approach\\nThe previous approach executes the two steps of stock selection and\\ncapital allocation sequentially; however, it is not clear how optimal the\\nresulting tracking portfolio is. A better approach may be to conduct\\nthese two steps jointly and systematically.\\n8.3.1 Problem Formulation\\nA direct way is to regularize the cardinality of the tracking portfolio\\nweights [106]:\\nminimizew1\\nT∥Xw−rb∥2\\n2+λ∥w∥0\\nsubject to 1Tw= 1,\\nw≥0,(8.13)\\nwhereλ≥0is a predeﬁned parameter.\\n8.3.2ℓ1-norm Approximation\\nGenerally, problem (8.13) is hard to solve due to the nonconvex and\\ndiscontinuous cardinality term ∥w∥0(note that∥w∥0=∑N\\ni=1 1{wi̸=0}).\\nFigure8.1showstheindicatorfunction 1{x̸=0}(seethesolidblackline).\\nApopularapproximationof ∥w∥0thatisconvexandpromotesspar-\\nsity is theℓ1-norm function∥w∥1as indicated by the dashed red line\\nin Figure 8.1, i.e., the LASSO (least absolute shrinkage and selection\\noperator) technique [96]. LASSO has indeed been used in portfolio op-\\ntimization [3, 33, 48, 73, 74].\\nUnfortunately, this technique does not work for index tracking with\\nlong only constraints (i.e., 1Tw= 1andw≥0) since\\n∥w∥1=N\\uf8fa\\ni=1|wi|=N\\uf8fa\\ni=1wi=1Tw= 1 (8.14)\\nis constant.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 161}),\n",
       " Document(page_content='8.3. Sparse Index Tracking: Joint Optimization Approach 155\\n−2−1.5 −1−0.5 00.5 11.5 200.20.40.60.811.21.4\\n  \\n||x||1\\nlog(1+|x|/p)/log(1+1/p)\\nIndicator\\nFigure 8.1: Indicator function and approximations.\\n8.3.3 Reweighted ℓ1-norm Approximation\\nSince the convex ℓ1-norm approximation does not work for an in-\\ndex tracking problem, a better (possibly nonconvex) approximation\\nis needed. An example is [37]\\nρp(x) =log(1 +|x|/p)\\nlog(1 + 1/p), (8.15)\\nwherep >0is a parameter and ρp(x)→ 1{x̸=0}asp→0. Figure 8.1\\nshows an illustrative example of p= 0.2, i.e., the dashed-dotted blue\\nline.\\nReplacing the indicator function 1{x̸=0}by the approximation func-\\ntionρp(x)results in the following problem:\\nminimizew1\\nT∥Xw−rb∥2\\n2+λ∑N\\ni=1ρp(wi)\\nsubject to 1Tw= 1,\\nw≥0.(8.16)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 162}),\n",
       " Document(page_content='156 Index Tracking\\nIn fact, there are also some other approximations for the indicator\\nfunction, e.g., see [185] and references therein. For example, the |x|p\\nwith 0< p < 1is used in [75, 106] and a smoothed version of |x|pis\\nused in [41]. However, there does not exist either eﬃcient algorithms\\n[41, 106] or heuristic algorithms that can guarantee the quality of the\\nsolution [75].\\nFollowing [37], we will present an iterative algorithm that interest-\\ningly turns out to replace ∥w∥0with a sequence of reweighted ℓ1-norm\\napproximations. The idea also applies to the problems in [41, 106].\\nThe idea is, at each iteration point, say x0, to approximate ρp(x)\\nwith its ﬁrst-order Taylor approximation, as follows:\\nρp(x) =log (1 +|x|/p)\\nlog (1 + 1/p)(8.17)\\n≈1\\nlog (1 + 1/p))\\n|x|\\np+|x0|+ log(\\n1 +⏐⏐⏐x0⏐⏐⏐/p)\\n−⏐⏐x0⏐⏐\\np+|x0|[\\n(8.18)\\n=1\\n(p+|x0|) log (1 + 1/p)\\ued19\\ued18\\ued17\\ued1a\\n≜d(x0)|x|+ const (8.19)\\n=d(x0)|x|+ const (8.20)\\n≜u(x,x0). (8.21)\\nFigure 8.2 shows an illustrative example of u(x,x0)at pointx0= 1\\n(see the dashed magenta line).\\nThen at the k-th iteration point wk, one can solve the follow-\\ning reweighted approximation problem to get the next iteration point\\nwk+1:\\nminimizew1\\nT∥Xw−rb∥2\\n2+λ\\ued79\\ued79\\ued79D(\\nwk)\\nw\\ued79\\ued79\\ued79\\n1\\nsubject to 1Tw= 1,\\nw≥0,(8.22)\\nwhere\\nD(\\nwk)\\n= Diag(\\nd(\\nwk\\n1)\\n,...,d(\\nwk\\nN))\\n. (8.23)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 163}),\n",
       " Document(page_content='8.3. Sparse Index Tracking: Joint Optimization Approach 157\\n−2−1.5 −1−0.5 00.5 11.5 200.20.40.60.811.21.4\\n  \\nu(x,x0)\\nρp(x)\\nIndicator\\nFigure 8.2: Reweighted ℓ1-norm approximation.\\nAlgorithm 6 summarizes the iterative procedure. It can be easily shown\\nthat Algorithm 6 converges to a stationary point of problem (8.16)\\nfollowing [164].\\nAlgorithm 6 Reweighted ℓ1-norm approximation for index tracking.\\nInput: w0\\nOutput: a stationary point of problem (8.16)\\n1:repeat\\n2:Computed(\\nwk\\ni)\\naccording to (8.19)\\n3:Compute D(\\nwk)\\naccording to (8.23)\\n4:Solve (8.22) and set the optimal solution as wk+1\\n5:k←k+ 1\\n6:untilconvergence\\nExample 8.1. For illustration purposes, here we conduct some syn-\\nthetic experiments in MATLAB.\\nThe data is synthetically generated as follows. We consider', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 164}),\n",
       " Document(page_content='158 Index Tracking\\n0 20 40 60 80 100 120 140 160 180 20000.0050.010.0150.020.0250.030.0350.040.045\\nNumber of selected stocksSquare root of tracking error\\n  \\nTwo−step approach: Naive allocation\\nTwo−step approach: Optimization allocation\\nJoint optimization: Reweighted L1−norm approx.\\nFigure 8.3: Comparisons of diﬀerent sparse index tracking methods.\\nN= 200 stocks and draw T= 1000 i.i.d. samples, de-\\nnoted as r1,...,r1000, from the multivariate Gaussian distribution\\nN(µ,Σ), whereµ=randn (N,1)/252andΣ=DCDwith D=\\n2∗diag (rand (N,1))/sqrt (252)andCij= 0.7|i−j|. The data matrix\\nisX=\\uf8f3\\nrT\\n1,...,rT\\n1000\\uf8f2T∈R1000×200.\\nNext, we construct an artiﬁcial index. We ﬁrst randomly generate\\na temporary vector t=rand (N,1)and then set the artiﬁcial index\\nweights by normalizing tso that the summation of the weights equals\\none, i.e., b=t\\n1Tt. The historical returns of the constructed benchmark\\nindex are rb=Xb∈RT.\\nWe compare the following sparse index tracking methods:\\n•two-step approach: we select the stocks with Klargest corre-\\nlations (8.8) and then consider both the naive allocation (i.e.,\\n(8.11)) and the optimization allocation (i.e., (8.12)); and\\n•joint optimization approach: Algorithm 6.\\nFigure8.3showsthesquarerootoftrackingerrorversusthenumber\\nof selected stocks. We can clearly see that: i) for the two-step method,', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 165}),\n",
       " Document(page_content='8.3. Sparse Index Tracking: Joint Optimization Approach 159\\n0 100 200 300 400 500 600 700 800 900 1000−0.3−0.25−0.2−0.15−0.1−0.0500.050.10.15\\nDayCumulative return\\n  \\nIndex\\nTracking portfolio: N = 15\\nTracking portfolio: N = 78\\nFigure 8.4: Tracking performances of some sparse index tracking portfolios.\\nthe optimization allocation method outperforms the naive allocation\\nmethod, e.g., the square root of the tracking error is reduced from\\n4.45%to2.58%whenK= 10; and ii) the joint optimization approach\\noutperforms the methods of the two-step approach, e.g., the joint opti-\\nmization approach even achieves a much lower square root of tracking\\nerror at 0.94%with fewer stocks K= 8compared with the results\\n4.45%and2.58%of the two-step approach with K= 10.\\nFigure8.4showsthetrackingperformancesofthejointoptimization\\napproach: the tracking path deviates from the index path signiﬁcantly\\nwhenK= 15(see the dashed red line) and the tracking path mimics\\nthe index path very closely when K= 78(see the dashed-dotted blue\\nline). ■\\n8.3.4 Nonconvex Constraints\\nFor simplicity, we imposed only the long only constraints (i.e., 1Tw= 1\\nandw≥0) in the previous parts of this chapter. In practice, some fund\\nmanagers may also impose some holding constraints (see Section 5.1.4)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 166}),\n",
       " Document(page_content='160 Index Tracking\\nand the joint optimization problem becomes\\nminimizew,s1\\nT∥Xw−rb∥2\\n2+λ1Ts\\nsubject to 1Tw= 1,\\nw≥0,\\nsiLi≤wi≤siUi,∀i\\nsi∈{0,1},∀i(8.24)\\nwhereLiandUiare the holding lower and upper bound for the i-th\\nstock, respectively, only if it is selected, and 0≤Li≤Ui.\\nThe binary variable scomplicates problem (8.24). There are several\\ndiﬀerent methods to deal with it. In the following, we brieﬂy explain\\neach method and list the corresponding references:\\n•thresholding method: a practical heuristic is to solve the problem\\n(8.24) without the binary variable sand then select the stocks\\nwith weights larger than a certain threshold (i.e., decide sbased\\non the optimized w) and then optimize (8.24) with sﬁxed. To\\nmake the solution more robust, one can remove a few stocks each\\ntime and apply the idea several times to achieve enough sparsity\\nin the portfolio [106];\\n•mixed-integer programming (MIP): problem (8.24) indeed is an\\nMIP and there are some commercial solvers like GUROBI4and\\nCPLEX5that can solve MIPs with small and medium sizes ef-\\nﬁciently. Thus, one can directly apply such standard solvers to\\nsolve small and medium size MIP type index tracking problems\\n[36, 177];\\n•heuristic algorithms: for MIP with a large size, standard solvers\\nmay fail, and some heuristic algorithms, e.g., genetic algorithms\\n[10, 17, 177], and diﬀerential evolution [10, 134], are used in prac-\\ntice. However, the solution in general may be far from optimal.\\n4http://www.gurobi.com/\\n5http://www-01.ibm.com/software/commerce/optimization/cplex-\\noptimizer/index.html', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 167}),\n",
       " Document(page_content='9\\nRisk Parity Portfolio Optimization\\nThe Markowitz portfolio (cf. Chapters 5-7) has never been embraced\\nby practitioners, among other reasons because it only considers the risk\\nof the portfolio as a whole and ignores the risk diversiﬁcation.\\nRecently, an alternative risk parity portfolio design has been receiv-\\ning signiﬁcant attention from both the theoretical and practical sides\\ndue to its advantage in diversiﬁcation of (ex-ante) risk contributions\\namong assets. Such risk contributions can be deemed good predictors\\nfor the (ex-post) loss contributions, especially when there exist huge\\nlosses. The main goal of this chapter is to introduce the concepts of\\nrisk parity portfolio, review diﬀerent existing formulations, and study\\ndiﬀerent eﬃcient solving algorithms.\\nThe detailed organization is as follows. Section 9.1 introduces the\\nconcepts of risk contribution and risk parity portfolio. Section 9.2 lists\\nseveral existing speciﬁc risk parity formulations and presents a general\\nrisk parity portfolio problem formulation that can ﬁt most of the listed\\nspeciﬁc risk parity formulations. To solve the risk parity problems, Sec-\\ntion 9.3 details an eﬃcient numerical solving approach for the general\\nrisk parity portfolio problem formulation based on successive convex\\noptimization methods.\\n161', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 168}),\n",
       " Document(page_content='162 Risk Parity Portfolio Optimization\\n9.1 What is a Risk Parity Portfolio?\\nLetusﬁrststartwithintroducingtheconceptofriskcontributionbased\\non which we can deﬁne the risk parity portfolio.\\n9.1.1 Risk Contribution\\nSuppose there are Nassets and the mean vector and (positive deﬁnite)\\ncovariance matrix of the returns are denoted as µ∈RNandΣ∈\\nRN×N, respectively. For a portfolio w∈RN, to study the risk parity\\nportfolio, we need some well deﬁned risk measurements f(w)so that\\nthe “risk contribution” of each asset to the risk of the whole portfolio\\ncan be quantiﬁed. We start with the following desired property as it\\nwill be the key to quantify the risk parity.\\nTheorem 9.1 (Euler’s Theorem) .Let a continuous and diﬀerentiable\\nfunctionf:RN↦→Rbe a positively homogeneous function of degree\\none1. Then\\nf(w) =N\\uf8fa\\ni=1wi∂f\\n∂wi. (9.1)\\nOne observation from property (9.1) is that the component wi∂f\\n∂wi\\ncan be regarded as the risk contribution from asset ito the total risk\\nf(w).\\nInterestingly and fortunately, most of the existing risk measure-\\nments do satisfy the Euler property (9.1) either directly (VaR and\\nCVaR) or indirectly (variance) as we show next.\\nVolatility\\nNote that variance σ2(w) =wTΣwdoes not satisfy (9.1) directly.\\nFortunately, it is easy to check that volatility σ(w) =√\\nwTΣwdoes\\nsatisfy (9.1):\\nN\\uf8fa\\ni=1wi∂σ\\n∂wi=N\\uf8fa\\ni=1wi(Σw√\\nwTΣw)\\ni=1√\\nwTΣwN\\uf8fa\\ni=1wi(Σw)i\\n1A function f(w)is a positively homogeneous function of degree one if f(cw) =\\ncf(w)holds for any constant c>0.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 169}),\n",
       " Document(page_content='9.1. What is a Risk Parity Portfolio? 163\\nFigure 9.1: One example that satisﬁes the Euler property (9.1).\\n=1√\\nwTΣwwTΣw=σ(w). (9.2)\\nThus variance ﬁts (9.1) indirectly via volatility. Figure 9.1 shows an\\nexample of σ(w)and we can see that the function is linear along any\\ndirection starting from the origin.\\nVaR and CVaR\\nFor simplicity, we consider the Gaussian case VaR and CVaR in this\\nchapter. For the Gaussian distribution, VaR and CVaR can be ex-\\npressed explicitly as [141]\\nVaR 1−ε(w) =−µTw+κ1(ε)√\\nwTΣw, (9.3)\\nCVaR 1−ε(w) =−µTw+κ2(ε)√\\nwTΣw, (9.4)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 170}),\n",
       " Document(page_content='164 Risk Parity Portfolio Optimization\\nwhereκ1(ε)≜Q−1(ε)andκ2(ε)≜1√\\n2πεe−(Q−1(ε))2\\n2, andQ−1(·)is the\\ninverse of the Q-function (see (4.18)). Here, we implicitly assume that\\nεis small (e.g., ε≤20%) andκ1(ε)andκ2(ε)are both positive.\\nFrom (9.3) and (9.4) we can see that if µ∝1, ignoring the constant\\nterms, the volatility, VaR, and CVaR are equal up to a positive scalar.\\nMore generally, the Gaussian distribution can be extended to ellip-\\ntical distributions [119] for which VaR and CVaR both are mean and\\nstandard deviation trade-oﬀ expressions.\\nRemark 9.1. For the more general non-Gaussian VaR and CVaR, it\\ncan be shown that they both satisfy (9.1), however, they do not have\\nclosed-form expressions and some approximations are needed. For more\\ndiscussions, please refer to [76] and references therein. ■\\n9.1.2 Risk Parity Portfolio\\nThe risk parity portfolio is a portfolio such that each asset has the same\\nrisk contribution. That is, given the risk measurement f(w), the risk\\nparity portfolio should satisfy [162, 163, 131]\\nwi∂f(w)\\n∂wi=wj∂f(w)\\n∂wj,∀i,j. (9.5)\\nRisk budgeting portfolio is a more general concept. Given a budget\\nvector b= [b1,...,bN]T>0, and bT1= 1, where budget bis inter-\\npreted as a perdetermined percentage risk contribution target for all\\nthe assets, the risk budgeting portfolio should satisfy\\nwi∂f(w)\\n∂wi=bif(w),∀i. (9.6)\\nObviously, the risk parity portfolio is a special case of the risk bud-\\ngeting portfolio with b=1/N.\\nDue to the popularity of the terminology “risk parity”, it is always\\nused to refer to a broad portfolio allocation method of risk diversiﬁ-\\ncation (e.g., including both risk parity and risk budgeting portfolios)\\n[167]. We take the broad concept of “risk parity” unless speciﬁed oth-\\nerwise in this chapter.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 171}),\n",
       " Document(page_content='9.2. Risk Parity Portfolio Formulations 165\\n9.2 Risk Parity Portfolio Formulations\\nThere are many diﬀerent existing speciﬁc formulations on risk parity\\nportfolios due to diﬀerent risk measurements used or diﬀerent proﬁles\\nof investors. In this section, we ﬁrst review some speciﬁc formulations\\nand then consider a general risk parity portfolio problem formulation.\\n9.2.1 Some Speciﬁc Formulations\\nRecall that the risk contribution of asset iiswi(Σw)i√\\nwΣw, then the risk\\nparity (9.5) and risk budgeting (9.6) relationships turn out to be\\nrisk parity : wi(Σw)i=wj(Σw)j, (9.7)\\nrisk budgeting : wi(Σw)i=biwTΣw, (9.8)\\nrespectively, where b= [b1,...,bN]T>0is the given risk budgeting\\nfornassets and bT1= 1. Actually, relationship (9.7) is a special case\\nof relationship (9.8) with bi= 1/Nfor alli.\\nAgain, we denote the feasible set as W≜{w|wT1= 1}∩Wwhere\\nwT1= 1denotes the capital budget constraint and Wis a convex set\\nthat denotes the other constraints.\\nOnly when Σis diagonal and there exists a long-only constraint,\\ni.e.,W={w|w≥0}, the nonlinear equation systems (9.8) admit a\\nunique solution [167]:\\nwi=√bi/√Σii∑n\\nk=1√bk/√Σkk, i= 1,...,N. (9.9)\\nHowever, for non-diagonal Σor when there are some additional\\nconstraints, the closed-form solution does not exist anymore and some\\noptimization problems are constructed instead.\\nPaper [131] is one of the ﬁrst few papers that focuses on ﬁnding\\nthe risk parity portfolio via optimization. The proposed problem for-\\nmulation is to penalize the summation of squared diﬀerences among\\nrisk contributions:\\nminimizew∑N\\ni,j=1(\\nwi(Σw)i−wj(Σw)j)2\\nsubject to wT1= 1,w∈W.(9.10)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 172}),\n",
       " Document(page_content='166 Risk Parity Portfolio Optimization\\nMotivated by problem (9.10), Bai et al. [14] simpliﬁed the objective\\nof (9.10) to solve:\\nminimize\\nw,θ∑N\\ni=1(wi(Σw)i−θ)2\\nsubject to wT1= 1,w∈W.(9.11)\\nTo ﬁnd a portfolio that meets the risk budgeting targets bas much\\nas possible, Bruder and Roncalli proposed to solve [34]:\\nminimizew∑N\\ni=1(wi(Σw)i\\nwTΣw−bi)2\\nsubject to wT1= 1,w∈W.(9.12)\\nSimilarly, it is easy to have some other alternative (but diﬀerent) prob-\\nlem formulations, e.g.,\\nminimizew∑N\\ni,j=1(\\nwi(Σw)i\\nbi−wj(Σw)j\\nbj)2\\nsubject to wT1= 1,w∈W,(9.13)\\nand\\nminimizew∑N\\ni=1(\\nwi(Σw)i−biwTΣw)2\\nsubject to wT1= 1,w∈W,(9.14)\\nand\\nminimizew∑N\\ni=1(wi(Σw)i√\\nwTΣw−bi√\\nwTΣw)2\\nsubject to wT1= 1,w∈W,(9.15)\\nand\\nminimize\\nw,θ∑N\\ni=1(wi(Σw)i\\nbi−θ)2\\nsubject to wT1= 1,w∈W.(9.16)\\nNote that all the above formulations are nonconvex and they are\\nonly some examples. Actually, there are many more speciﬁc formula-\\ntions; for more a comprehensive summary, please see [76, Table I].\\nUnfortunately, all the above problem formulations are generally\\nnonconvex in general. In the following we review a numerical approach\\nthat attacks all of them in a uniﬁed way.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 173}),\n",
       " Document(page_content='9.2. Risk Parity Portfolio Formulations 167\\n9.2.2 General Risk Parity Portfolio Problem\\nLet us start with a general risk parity formulation proposed in [76] that\\ncan ﬁt all the previously stated speciﬁc formulations. The general risk\\nparity formulation can be expressed as\\nminimizewU(w)≜R(w) +λF(w)\\nsubject to wT1= 1,w∈W,(9.17)\\nwhere\\n•R(w)measures the risk concentration and has the form\\nR(w)≜N\\uf8fa\\ni=1(gi(w))2(9.18)\\ninwhicheach gi(w)isasmoothdiﬀerentiablenonconvexfunction\\nthatmeasurestheriskconcentrationofthe i-thasset.Thesmaller\\nthe quantity R(w)is, the more uniform the risk is distributed\\namongnassets;2\\n•F(w)is a convex function that represents some traditional pref-\\nerences on the portfolio. For example, it can be the expected\\nportfolio loss (e.g., F(w) =−µTw), the mean-variance trade-oﬀ\\nof the portfolio loss (e.g., F(w) =−µTw+νwTΣwwhereν >0\\nis the trade-oﬀ parameter), or F(w) = 0when the goal is to\\ndistribute the risk only;\\n•λ≥0is some trade-oﬀ parameter between the portfolio prefer-\\nence and risk concentration; and\\n•wT1= 1denotes the capital budget constraint and Wis a convex\\nsetthatdenotestheinvestor’sproﬁles,capitallimitations,market\\nregulations, etc.\\nThis problem formulation is quite general to ﬁt the previously stated\\nspeciﬁcformulations,forexample,setting gi(w) =wi(Σw)i−biwTΣw\\nandλ= 0recovers the problem (9.14).\\n2In some problem formulations, the deﬁnition∑N\\ni,j=1(gij(w))2is used where\\ngij(w)measures the diﬀerence between the risk contributions of assets iandj, for\\nwhich the analytical approach derived in this paper still applies.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 174}),\n",
       " Document(page_content='168 Risk Parity Portfolio Optimization\\n0 20 40 60 80 10010−1210−1010−810−610−410−2100102\\nCPU time (seconds)Objective\\n  \\nfmincon−SQP\\nfmincon−IPM\\nFigure 9.2: Performances of SQP and IPM.\\nSince each function gi(w)is highly nonconvex, the problem (9.17)\\nis also nonconvex and hard to solve. In the literature, usually tra-\\nditional oﬀ-the-shelf nonlinear optimization methods, like sequential\\nquadratic programming (SQP) [153] and interior point methods (IPM)\\n[35] built in the MATLAB function fmincon, are used in practice\\n[14, 34, 131, 168, 90, 184]. However, for the nonconvex risk parity prob-\\nlem, in general they are time consuming and sometimes may not even\\nconvergeglobally[14,90,184].Thiscanbeshownbyasimplenumerical\\nexample as follows.\\nExample 9.1. We setN= 500and simulate the problem (9.14). The\\ncovariance matrix is randomly generated as Σ=VVTwhere V=\\nrand (N,N). For simplicity and for illustrative purposes, we consider the\\nlong-only constraints, e.g., wT1= 1andw≥0, and for this special\\ncase it is known that the optimal objective is zero [167].3\\nFigure9.2showsonetypicalrealizationoftheperformanceofobjec-\\ntive vs CPU time of the SQP and IPM methods built in the MATLAB\\n3More comprehensive numerical experiments based on both synthetic and real\\ndata can be found in [76].', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 175}),\n",
       " Document(page_content='9.3. SCRIP: An Eﬃcient Numerical Solving Approach 169\\nfunction fmincon, and we have similar results for all the realizations.\\nBasically, we observe that the SQP and IPM methods may either not\\neven converge or converge to a unsatisfactory point very slowly. ■\\n9.3 SCRIP: An Eﬃcient Numerical Solving Approach\\nJust as shown before, the general standard oﬀ-the-shelf numerical non-\\nconvex nonlinear optimization methods, like SQP and IPM, are not\\neﬃcient for nonconvex problems like (9.17).\\nTo overcome this drawback, the authors of [76] explored the struc-\\nture of the nonconvex part of U(w), e.g.,R(w) =∑N\\ni=1(gi(w))2, as\\nfollows. At the k-th iteration, the proposed method aims to solve\\nminimizewP(w;wk)≜\\n\\ued17 \\ued1a\\ued19 \\ued18\\nN\\uf8fa\\ni=1(\\ngi(\\nwk)\\n+(\\n∇gi(\\nwk))T(\\nw−wk))2\\n+τ\\n2\\ued79\\ued79\\ued79w−wk\\ued79\\ued79\\ued792\\n2+λF(w)\\nsubject to wT1= 1,w∈W,(9.19)\\nwhereτ > 0is the parameter for the regularization term. Here, the\\nnonconvex term R(w)is convexiﬁed by linearizing each gi(w)inside\\nthe square operation. The added proximal term\\ued79\\ued79\\ued79w−wk\\ued79\\ued79\\ued792\\n2is for con-\\nvergence reasons [178].\\nThe beauty of the approximation P(\\nw;wk)\\nis that it is an easily\\ncomputable quadratic convex function and has the same gradient as\\nR(w)at each iteration point wk:\\n∇P(\\nw;wk)\\n|w=wk=∇R(w)|w=wk, (9.20)\\nwhere∇P(\\nw;wk)\\ndenotes the partial gradient of P(\\nw;wk)\\nwith re-\\nspect to the ﬁrst argument w.\\nNote thatP(\\nw;wk)\\ncan be rewritten more compactly as\\nP(\\nw;wk)\\n=\\ued79\\ued79\\ued79Ak(\\nw−wk)\\n+g(\\nwk)\\ued79\\ued79\\ued792\\n2(9.21)\\nwhere\\nAk≜\\uf8f3\\n∇g1(\\nwk)\\n,...,∇gN(\\nwk)\\uf8f2T, (9.22)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 176}),\n",
       " Document(page_content='170 Risk Parity Portfolio Optimization\\ng(\\nwk)\\n≜\\uf8f3\\ng1(\\nwk)\\n,...,gN(\\nwk)\\uf8f2T. (9.23)\\nThen the problem (9.19) can be further rewritten as\\nminimizew1\\n2wTQkw+wTqk+λF(w)\\nsubject to wT1= 1,w∈W,(9.24)\\nwhere\\nQk≜2(\\nAk)TAk+τI, (9.25)\\nqk≜2(\\nAk)Tg(\\nwk)\\n−Qkwk. (9.26)\\nIn general, under the assumption that F(w)is convex, for\\nnonempty convex set W(recall thatW={\\nw|wT1= 1}\\n∩W) and\\nτ > 0, problem (9.24) is strongly convex and can be solved by the\\nexisting eﬃcient solvers (e.g., MOSEK [150], SeDuMi [189], SDPT3\\n[194], etc.). Moreover, if F(w)is linear or convex quadratic, and W\\nonly contains linear constraints, problem (9.24) reduces to a QP.\\nAlgorithm 7 summarizes the sequential solving approach and it is\\nreferred to as SCRIP (Successive Convex optimization for RIsk Parity\\nportfolio) since it is based on a successive convex optimization method.\\nAlgorithm 7 Successive Convex optimization for RIsk Parity portfolio\\n(SCRIP).\\nInput:k= 0,w0∈W1,τ >0,{γk}>0\\nOutput: a stationary point of problem (9.17)\\n1:repeat\\n2:Solve (9.24) to get the optimal solution ˆwk\\n3: wk+1=wk+γk(\\nˆwk−wk)\\n4:k←k+ 1\\n5:untilconvergence\\nBased on the result of [178, Theorem 3], it can be shown [76] that\\nunder some technical assumptions and τ > 0,γk∈(0,1],γk→0,\\n∑\\nkγk= +∞, and∑\\nk(\\nγk)2<+∞, then either Algorithm 7 converges\\nin a ﬁnite number of iterations to a stationary point of (9.17) or every', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 177}),\n",
       " Document(page_content='9.3. SCRIP: An Eﬃcient Numerical Solving Approach 171\\n0 20 40 60 80 10010−1210−1010−810−610−410−2100102\\nCPU time (seconds)Objective\\n  \\nfmincon−SQP\\nfmincon−IPM\\nSCRIP\\n00.20.40.60.810−1010−5100\\n  \\nFigure 9.3: Performances of SQP, IPM, and SCRIP.\\nlimit of wk(at least one such point exists) is a stationary point of\\n(9.17). There are also some interesting remarks on Algorithm 7.\\nRemark 9.2. Actually, one can easily have more algorithms by explor-\\ning two ideas: i) constructing a simpler QP approximation at each iter-\\nation, e.g., the quadratic coeﬃcient matrix can be even diagonal, and\\nii) deriving some fast numerical updates when solving the inner QP\\napproximation for some speciﬁc constraints. Here we do not explore\\nthem; however the interested reader is referred to [76, Algorithms 2-5]\\nfor detailed information. ■\\nLet us now revisit the previous Example 9.1 to conclude this chap-\\nter. Figure 9.3 shows the performance of objective vs CPU time of\\nthe existing SQP and IPM methods and the iterative SCRIP method.\\nClearly, we can see that SCRIP converges much more quickly and\\nachieves a better objective value. This is also observed in more com-\\nprehensive numerical experiments, cf. [76].', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 178}),\n",
       " Document(page_content='Part III\\nStatistical Arbitrage\\n(Mean-Reversion)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 179}),\n",
       " Document(page_content='10\\nStatistical Arbitrage\\nMarkowitz mean-variance portfolio optimization mainly follows the\\ntrendsoftheprices,i.e.,themeanvectorofthereturns,tomaximizethe\\nportfolio return with the portfolio risk under control, i.e., the portfolio\\nvariance is under a given threshold (cf. Part II, i.e., Chapters 5-9).\\nConversely, the mean-reversion type of quantitative investment\\nstrategies aims at making proﬁt based on the noisy ﬂuctuations in the\\nmarket prices regardless of the trends. This will be covered in this part,\\nPart III, which contains only this chapter, Chapter 10. The underly-\\ning rough idea is to short-sell the (relatively) overvalued stocks and\\nbuy the (relatively) undervalued stocks, and hopefully a positive proﬁt\\nwill be generated if their values converge. Such a type of quantitative\\ninvestment strategy is referred to as “statistical arbitrage”.\\nThe detailed organization of this chapter is as follows. Section 10.1\\nexplains the concept of cointegration and compares it with correlation.\\nSections 10.2-10.4 focus on introducing the “ancestor” of statistical ar-\\nbitrage, that is, pairs trading. Section 10.2 studies diﬀerent methods\\nof discovering the potential pairs, once the potential pairs have been\\ndetected, Section 10.3 then tests whether they are indeed cointegrated\\nor not, and Section 10.4 proceeds to checking the tractability and de-\\n173', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 180}),\n",
       " Document(page_content='174 Statistical Arbitrage\\nsigning trading rules. At the end, Section 10.5 generalizes pairs trading\\nto statistical arbitrage.\\n10.1 Cointegration versus Correlation\\nTo begin with, let us ﬁrst recall the deﬁnition of cointegration intro-\\nduced in Section 2.6: a time series is called integrated of order p, de-\\nnoted asI(p), if the time series obtained by diﬀerencing the time series\\nptimes is weakly stationary, while by diﬀerencing the time series p−1\\ntimes is not weakly stationary, and a multivariate time series is said to\\nbecointegratedifithasatleastonelinearcombinationbeingintegrated\\nof a lower order.\\nUnlike correlation, which generally characterizes (relatively short-\\nterm) co-movements in log-returns, cointegration refers to (relatively\\nlong-term) co-movements in log-prices [61]. Correlation and cointegra-\\ntion are two related but diﬀerent concepts. High correlation of log-\\nreturns does not necessarily imply high cointegration in log-prices, and\\nneither does high cointegration in log-prices imply high correlation of\\nlog-returns [6].\\nSince cointegration is the key concept for statistical arbitrage and\\nit is often confused with correlation, let us use some simple numerical\\nexamples to introduce cointegration and illustrate its relationship with\\ncorrelation.\\n10.1.1 Log-Price Series with High Cointegration\\nRecall that in Example 2.1 in Section 2.6, we introduced cointegration\\nbased on a VECM model. Here, to understand the relationship between\\ncointegrationandcorrelation,weconsideramoredirectstochasticcom-\\nmon trend model of two stocks, as follows [188]:\\ny1t=γxt+w1t (10.1)\\ny2t=xt+w2t (10.2)\\nxt=xt−1+wt, (10.3)\\nwherey1tandy2tare the log-prices, xtis the stochastic common trend\\n(which is a random walk), γis a (positive) loading coeﬃcient, and w1t,', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 181}),\n",
       " Document(page_content='10.1. Cointegration versus Correlation 175\\nw2t, andwtare i.i.d. errors that are independent of each other. For\\nsimplicity, suppose w1t,w2t, andwtfollow Gaussian distributions and\\ntheir means are zero and variances are σ2\\n1,σ2\\n2, andσ2, respectively.\\nW.l.o.g., we also assume γ= 1in this section.\\nBased on the model (10.1)-(10.3) and according to the deﬁnition of\\ncointegration, we can ﬁrst conclude that y1tandy2tare always cointe-\\ngrated because the following linear combination\\nzt≜y1t−y2t=w1t−w2t (10.4)\\nis stationary regardless of the values of σ2\\n1,σ2\\n2, andσ2. In the literature,\\nthe above obtained stationary process ztis referred to as “spread”.1\\nHowever, the correlation between the ﬁrst order diﬀerences of y1tand\\ny2t, i.e., the log-returns of the two stocks, is\\nρ=σ2\\n∑\\nσ2+ 2σ2\\n1∑\\nσ2+ 2σ2\\n2=1∑\\n1 +2σ2\\n1\\nσ2∑\\n1 +2σ2\\n2\\nσ2,(10.5)\\nwhich depends on the value of σ2\\n1,σ2\\n2, andσ2. Ifσ2\\n1≫σ2and/orσ2\\n2≫\\nσ2,ρisveryclosetozeroandthecorrelationisverylow.Therefore,high\\ncointegration in log-prices does not necessarily imply high correlation\\nin log-returns.\\nExample10.1. Letusnowstudyanillustrativenumericalexample.We\\nsetσ1=σ2= 0.2andσ= 0.1and randomly generate a sample path\\nwith 200observations for each random process in the model (10.1)-\\n(10.3).\\nFigure 10.1 shows the realization paths of y1t(the blue curve), y2t\\n(the red curve), and their diﬀerence y1t−y2t(the black curve). Clearly,\\nwe can see that y1tandy2thave a co-movement and indeed they are\\ncointegrated since y1t−y2tis stationary, as shown by the black curve.\\nHowever, the empirical correlation coeﬃcient is 0.1124(the theo-\\nretical value based on (10.5) is 0.1111) which means the correlation in\\nthe log-return series is quite low. Figure 10.2 shows the log-returns of\\nstock 2 versus that of stock 1 and it veriﬁes the low correlation since\\nthe points spread out in all directions.\\n1The spread ztin (10.4) happens to have zero mean because the means of w1t\\nandw2tare assumed to be zero. Generally, the spread mean is diﬀerent from zero\\nin practice.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 182}),\n",
       " Document(page_content='176 Statistical Arbitrage\\n0 20 40 60 80 100 120 140 160 180 200−3.5−3−2.5−2−1.5−1−0.500.51\\n  \\ny1ty2ty1t−y2t\\nFigure 10.1: Some sample paths of the log-price series of the two stocks: the\\ncointegration is high.\\n−1−0.8−0.6−0.4−0.2 00.20.40.60.81−1−0.8−0.6−0.4−0.200.20.40.60.81\\nLog−returns of stock 1Log−returns of stock 2\\nFigure 10.2: Log-returns of stock 2 versus that of stock 1: the correlation is low.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 183}),\n",
       " Document(page_content='10.1. Cointegration versus Correlation 177\\n−1−0.8−0.6−0.4−0.2 00.20.40.60.81−1−0.8−0.6−0.4−0.200.20.40.60.81\\nLog−returns of stock 1Log−returns of stock 2\\nFigure 10.3: The correlation between the log-return series is high.\\nTherefore, we can conclude that high cointegration in log-prices\\nseries does not necessarily imply high correlation in log-return series. ■\\n10.1.2 Log-Return Series with High Correlation\\nLetusstillfocusonthepreviousstochasticcommontrendmodel(10.1)-\\n(10.3), but further consider a log-price series ˜y1tas follows:\\n˜y1t=y1t+c0t, (10.6)\\nthat is, we add a temporal trend in the log-price series y1t.\\nThe correlation between the ﬁrst order diﬀerences of ˜y1tandy2tis\\nstill given by (10.5); however, ˜y1tandy2tare no longer cointegrated\\nsince they will diverge increasingly as time goes by. In fact, this rela-\\ntionship is called cointegration with deterministic trend.\\nExample 10.2. Let us now consider a modiﬁcation of Example 10.1.\\nWe setσ1=σ2= 0.05, andσ= 0.3and generate 200samples of y1t', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 184}),\n",
       " Document(page_content='178 Statistical Arbitrage\\n0 20 40 60 80 100 120 140 160 180 200−1012345\\n  \\n˜y1ty2t\\n˜y1t−y2t\\nFigure 10.4: The log-price series are not cointegrated.\\nandy2taccording to the common stochastic model (10.1)-(10.3) and\\n˜y1taccording to (10.6) with c0= 0.01.\\nThe empirical correlation between the ﬁrst order diﬀerences of ˜y1t\\nandy2tis0.9504(the theoretical value based on (10.5) is 0.9474), which\\nmeans the corresponding log-return series are highly correlated. Figure\\n10.3showsthelog-returnsofstock2versusthatofstock1anditveriﬁes\\nthe high correlation since the points fall closely along a straight line.\\nAs to the cointegration, Figure 10.4 shows the realization paths of\\n˜y1t(see the blue curve) and y2t(see the red curve). Clearly, we can\\nsee that ˜y1tandy2tare not tied together by a stationary spread and\\nindeed they are diverging increasingly since the spread ˜y1t−y2tkeeps\\ngrowing.\\nThus,highcorrelationinlog-returnseriesdoesnotnecessarilyimply\\nhigh cointegration in log-prices series. ■\\n10.1.3 The Idea of Statistical Arbitrage Based on Cointegration\\nThe idea behind statistical arbitrage is to short-sell the overvalued\\nspread, and buy the undervalued stocks and unwind the position when\\nthe spread converges to its normal stage.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 185}),\n",
       " Document(page_content='10.1. Cointegration versus Correlation 179\\ns0\\n0\\n−s0\\nt  \\nzt\\nBuy to unwindSell\\nBuy to unwindSell\\nBuySell to unwind\\nFigure 10.5: Investing on the mean-reversion spread.\\nFigure 10.5 pictorially shows the idea of investing on the mean-\\nreversion spread. For example, suppose the stationary spread zt=y1t−\\nγy2thas mean zero, then one can buy the spread when it low at zt=\\n−s0, i.e., buy one dollar stock 1 and short-sell γdollar stock 2 as\\nindicated in Figure 10.5 by the red point, and unwind the positions\\nwhen the spread reverts to zero after itime steps, i.e., zt+i= 0as\\nindicated in Figure 10.5 by the red circle. The resulting log-return of\\nthe strategy is zt+i−zt=s0. Similarly, one can sell the spread when\\nthe spread is high at zt=s0, i.e., short-sell one dollar stock 1 and buy γ\\ndollar stock 2 as indicated in Figure 10.5 by the red point, and unwind\\nthe positions when the spread reverts to zero again. The resulting log-\\nreturn is also zt−zt+i=s0.2\\nFor illustrative purposes, let us revisit Example 10.1 and set\\ns0= 0.25. Figure 10.6(a) shows the resulting mean-reversion stationary\\nspreadztand the buy and sell thresholds ±s0, Figure 10.6(b) reports\\nthe raw signaling for buying or shorting the spread, and Figure 10.6(c)\\n2For simplicity, we ignore the trading costs, e.g., brokerage fee, stamp fee, slip-\\npage, etc. in this chapter.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 186}),\n",
       " Document(page_content='180 Statistical Arbitrage\\n0 20 40 60 80 100 120 140 160 180 200−0.500.5Spread\\n(a)\\n0 20 40 60 80 100 120 140 160 180 200−1−0.500.51Position\\n(b)\\n0 20 40 60 80 100 120 140 160 180 200010203040P&L\\n(c)\\nFigure 10.6: A simple example of statistical arbitrage: (a) the mean-reversion\\nspread and the thresholds ±0.25; (b) the positions, +1and−1means buy and sell\\nthe spread, respectively; (c) the cumulative proﬁt and loss (P&L).\\nstates the cumulative proﬁt and loss (P&L)3. We can see that the sta-\\ntistical arbitrage does generate consistent positive proﬁt from Figure\\n10.6(c). However, note that this is the in-sample result of a synthetic\\nexperiment without accounting for any trading costs. In practice, one\\nneeds to focus on the out-of-sample results and take the trading costs\\ninto consideration as well. Still, statistical arbitrage has generated sig-\\nniﬁcant positive proﬁts in the real markets.\\nFact 10.1. Pairs trading probably is the ﬁrst practically implemented\\nstatistical arbitrage trading strategy. It was ﬁrst invented in industry\\nby a quantitative trading team led by the quant Nunzio Tartaglia in\\nMorgan Stanley around the mid 1980s. Tartaglia’s team enjoyed signif-\\n3We invest one dollar in each asset whenever we buy or sell the spread, and the\\nP&L is computed as the cumulative summation of the proﬁts and losses.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 187}),\n",
       " Document(page_content='10.2. Pairs Selection 181\\nicant success in pairs trading in 1987. The team was disbanded in 1989\\nand the members joined various other trading ﬁrms. However, pairs\\ntrading became widely known. Until now, pairs trading has generated\\nhundreds of millions of dollars in proﬁts for large institutions or hedge\\nfunds, e.g., Morgan Stanley, Renaissance Technologies, D. E. Shaw &\\nCo., etc. ■\\nIn the following Sections 10.2-10.4, we ﬁrst focus on pairs trading\\nas the example to introduce the main steps of statistical arbitrage. In\\npractice,pairstradingcanbemainlydecomposedintothreesteps[203]:\\n•Pairs selection: identify stock pairs that could potentially be coin-\\ntegrated.\\n•Cointegration test: test whether the identiﬁed stock pairs are in-\\ndeed cointegrated or not.\\n•Trading strategy design: study the spread dynamics and design\\nproper trading rules.\\nIn the literature, the papers focusing on pairs trading are usually\\ncategorized into diﬀerent approaches [92, 161], namely minimum dis-\\ntance approach [85, 151, 9], stochastic approach [57, 50, 195], and coin-\\ntegration approach [203, 125, 6]. However, most of them mainly focus\\non only one (or two) of the above three steps and do not conduct the\\nother steps properly. Here, we prefer to review diﬀerent papers follow-\\ning the above three steps structure.\\nLater in Section 10.5, we will consider more general statistical ar-\\nbitrage among multiple stocks.\\n10.2 Pairs Selection\\nThe markets usually contain thousands of stocks which can form mil-\\nlions of pairs. It is too computationally costly to check whether each\\npair is cointegrated or not. A more practical way is to deﬁne an easy\\nand straightforward measure to preliminarily identify the most poten-\\ntially cointegrated pairs and then focus on testing the cointegration of\\nsuch identiﬁed pairs only.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 188}),\n",
       " Document(page_content='182 Statistical Arbitrage\\n10.2.1 Normalized Price Distance\\nProbably the most simple and straightforward measurement is the nor-\\nmalized price distance (NPD) [85, 9]:\\nNPD≜T\\uf8fa\\nt=1(˜p1t−˜p2t)2(10.7)\\nwhere the normalized price ˜p1tof stock 1 is given by\\n˜p1t=t\\uf8f1\\ni=1(1 +R1i) (10.8)\\nwithR1ibeing thei-th simple return of stock 1. Actually, this criterion\\nimplicitly assume the cointegration coeﬃcient between the log-price of\\ntwo stocks equals 1, i.e., γ= 1. The normalized prices of the other\\nstocks are deﬁned similarly. Then one can easily compute the NPDs\\nfor all the possible pairs and select some pairs with the smallest NPDs\\nas the potentially cointegrated pairs.\\nThe authors of [85] conduct pairs trading as follows. First, they\\nuse the past 12 months daily data to construct pairs with the smallest\\nNPDs. Once the pairs are formed, they simply buy one dollar in the\\nundervalued stock and short-sell one dollar in the overvalued stock\\nwhen the normalized prices diverge more than two standard deviations,\\nand unwind the positions when the normalized prices cross later. After\\n6 months, the positions are forced to unwind regardless of whether the\\nprices have crossed or not.\\nLater, a following paper [9] provides more out-of-sample numerical\\nresults and another one [151] incorporates a stop-loss trigger if the\\ndistance diverges too much to limit the potential huge losses. Also,\\nsince theℓ2-norm distance in (10.7) is too sensitive to outliers, it is\\nalso suggested to consider some robust distance measurements, e.g.,\\nℓ1-norm distance [92].\\nActually, the methods in [85, 9, 151] are just some speciﬁc practical\\nimplementations and the cointegration test and trading strategy design\\nsteps are either ignored or not properly conducted.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 189}),\n",
       " Document(page_content='10.2. Pairs Selection 183\\n10.2.2 Measurements Based on Stochastic Common Trend Model\\nand Factor Model\\nNow let us revisit the stochastic common trend model (10.1)-(10.3) and\\ndo not assume γ= 1. Then, the log-returns can be decomposed into\\ntwo components as follows:\\nr1t=y1t−y1,t−1=γwt\\ued19\\ued18\\ued17\\ued1a\\n≜rc\\n1t+ (w1t−w1,t−1)\\ued19\\ued18\\ued17\\ued1a\\n≜rs\\n1t(10.9)\\nr2t=y2t−y2,t−1=wt\\ued19\\ued18\\ued17\\ued1a\\n≜rc\\n2t+ (w2t−w2,t−1)\\ued19\\ued18\\ued17\\ued1a\\n≜rs\\n2t(10.10)\\nwhererc\\n1tandrc\\n2tarethelog-returnsduetothenonstationarystochastic\\ncommon trend with rc\\n1t=γrc\\n2t, andrs\\n1tandrs\\n2tare the log-returns due\\nto the stationary components (and thus the cumulative summations of\\nrs\\n1tandrs\\n2tare stationary).\\nNote that the factor model for stock iat timethas the form:\\nrit=πT\\nift+εit, (10.11)\\nwhere ftis the factor which is the same for all the stocks, πiis the\\nvector of loading coeﬃcients, and εitis the idiosyncratic noise.\\nThe factor model (10.11) is a more general approach than the trend\\nmodel (10.9)-(10.10) in the sense that if we further assume π1=γπ2\\nand the cumulative summations of the speciﬁc noise component εit,\\ni.e.,∑t\\nk=1εik, are stationary for all the stocks, then rc\\n1t,rc\\n2t,rs\\n1t, and\\nrs\\n2tare modeled by πT\\n1ftandπT\\n2ft,ε1t, andε1t, respectively.\\nBased on the above connection, one can always ﬁrst estimate the\\nfactor model parameters, e.g., the factor loading coeﬃcient estimates\\nˆπi, the factor covariance matrix estimates ˆΣf, and then deﬁne diﬀerent\\nmeasurements to eﬃciently select the potentially cointegrated pairs.\\nFor simplicity, we arbitrarily study the pair of stocks 1 and 2.\\nNormalized Factor Loadings Diﬀerence\\nThe ﬁrst idea is that, for a cointegrated pair, the log-returns due to the\\ncommon trend should be proportional to each other, which means the', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 190}),\n",
       " Document(page_content='184 Statistical Arbitrage\\nfactor loading coeﬃcients should be proportional to each other. There-\\nfore, one can deﬁne the normalized factor loadings diﬀerence (NFLD)\\nas follows [203]:\\nNFLD≜\\ued79\\ued79\\ued79\\ued79\\ued79ˆπ1\\n∥ˆπ1∥2−ˆπ2\\n∥ˆπ2∥2\\ued79\\ued79\\ued79\\ued79\\ued79\\n2(10.12)\\nand then identify the pairs with the smallest NFLDs as the potentially\\ncointegrated ones.\\nCorrelation Between Log-Returns due to Common Trend\\nSince the log-returns due to the common trend should be proportional\\nto each other, i.e., they should share the same direction, an alternative\\nidea is to compute the correlation coeﬃcient between them [203]:\\n|ρ|=⏐⏐⏐⏐⏐⏐Cov(rc\\n1t,rc\\n2t)∑\\nVar(rc\\n1t)Var(rc\\n2t)⏐⏐⏐⏐⏐⏐=⏐⏐⏐⏐⏐⏐Cov(ˆπT\\n1ft,ˆπT\\n2ft)∑\\nVar(ˆπT\\n1ft)Var(ˆπT\\n2ft)⏐⏐⏐⏐⏐⏐\\n=⏐⏐⏐⏐⏐⏐ˆπT\\n1ˆΣfˆπ2∑\\n(ˆπT\\n1ˆΣfˆπ1)(ˆπT\\n2ˆΣfˆπ2)⏐⏐⏐⏐⏐⏐=|cosθ|, (10.13)\\nwhereθis the angle between the log-return series, and the potentially\\ncointegrated pairs are the ones with θbeing close to zero or, equiva-\\nlently,|ρ|being close to one.\\nRemark 10.1. Note that it is the log-returns due to the common trend\\nonly that are used to compute the absolute value of the correlation\\nin (10.13). We should not use the overall log-returns, i.e., diﬀerence\\nof log-prices, here because high correlation in the log-returns does not\\nnecessarily imply high cointegration in the log-prices, as we have shown\\nin Section 10.1. ■\\n10.3 Cointegration Test\\nOnce a potentially cointegrated pair, e.g., stocks 1 and 2, has been\\nselected, the next step is to check whether they are cointegrated or', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 191}),\n",
       " Document(page_content='10.3. Cointegration Test 185\\nnot. That is, we need to ﬁnd out whether or not there exists a value of\\nγso that the spread\\nzt=y1t−γy2t (10.14)\\nis stationary. Note that in practice the mean of spread ztis not neces-\\nsarily zero and in general γmay not be one.\\nTo test for cointegration of two stocks, one of the most simple and\\ndirect methods is the Engle and Granger test [61] which usually con-\\ntains two steps:\\n1. linearlyregressthelog-pricesofonestockagainstthatoftheother\\nstock and use the LS to compute the linear regression parameter;\\nand\\n2. test whether the estimated residuals of the linear regression are\\nstationary or not.\\n10.3.1 Linear Relationship\\nIfztin (10.14) is stationary, it can be rewritten into the following form:\\nzt=y1t−γy2t=µ+εt, (10.15)\\nwhereµrepresents the equilibrium value and εtis a zero mean station-\\nary process that can be interpreted as the disturbance in the equilib-\\nrium [203]. The relationship (10.15) can be further rearranged as\\ny1t=µ+γy2t+εt, (10.16)\\nwhich has the same expression as a linear regression. Then naturally\\nthe LS is employed to estimate the cointegration coeﬃcient γand the\\nequilibrium value µ, and in fact, if y1tandy2tareI(1)and are cointe-\\ngrated, the estimates converge to the true values at the rate of number\\nof observations [61].\\nRemark 10.2. In the literature, once the pairs are selected, many pa-\\npers, e.g., [85, 151, 9, 57, 195], always long one and short the other\\nwith equal dollars so that the strategy is dollar neutral. Actually, this\\nisequivalenttoartiﬁciallyﬁxing γtobeoneandhopingthat thespread\\nzt=y1t−y2t (10.17)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 192}),\n",
       " Document(page_content='186 Statistical Arbitrage\\nis stationary. Based on (10.17), we can have the following relationship\\nzt−zt−1=r1t−r2t, (10.18)\\nwhich implies that the two stocks should have the same average return\\ntoensureztisstationary.Thenumberofpairssharingthesameaverage\\nreturn in the real markets may be too few. Therefore, ﬁxing γ= 1may\\nreduce the chance of identifying truly cointegrated pairs. ■\\nRemark 10.3. Please note that the log-prices are used for construct-\\ning cointegrated pairs here and the cointegration coeﬃcients (i.e., the 1\\nandγin (10.15) in front of log-prices) mean the invested dollars in each\\nstock. To keep the invested dollar in each stock constant requires daily\\nrebalancing since the price change may deviate the invested value from\\nthe constant level. One drawback of this method is that daily rebalanc-\\ning may incur signiﬁcant transaction costs and thus reduce total proﬁt.\\nOne way to avoid this daily rebalancing via constructing cointegration\\npairs based on price series directly (as opposed to log-prices), in which\\ncase the cointegration coeﬃcients (i.e., the estimated linar coeﬃcients\\nin front of prices) mean the numbers of shares invested in each stock.\\nHowever, using prices directly may reduce the chance of cointegration\\nsince the noise in price is less symmetric than that in log-prices and\\ntheresultingcointegrationspreadmaybelessstationarycomparedthat\\none obtained based on log-prices. Since the two approaches can be an-\\nalyzed almost in the same way, for clarity of presentation and without\\nloss of generality, we focus on pairs trading using log-prices only. ■\\n10.3.2 Cointegration and Strength of Mean-Reversion\\nThe spread ztis stationary if and only if the true residual series is sta-\\ntionary. In practice, we do not know the true values of the cointegration\\ncoeﬃcientγand the equilibrium value µ, and we cannot know the true\\nresiduals. However, the parameters γandµcan be estimated by LS as\\nshown before and we denote their estimates as ˆγandˆµ, respectively.\\nThen we can use the estimated residuals\\nˆεt=y1t−ˆγy2t−ˆµ (10.19)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 193}),\n",
       " Document(page_content='10.3. Cointegration Test 187\\nas the approximations of the true ones and test the stationarity of the\\nestimated residuals instead.\\nIntuitively, without stepping into any statistical hypothesis test, an\\nad hoc method may be to use a high mean crossing rate as an indicator\\nof mean-reversion: the higher the mean crossing rate is, the stronger\\nthe strength of mean-reversion is [203]. Even though it is simple and\\nstraightforward, it is not clear how to set the corresponding testing\\ncritical value for a given statistical signiﬁcance value. This actually can\\nbe overcome by some statistical hypothesis tests as follows.\\nDickey-Fuller (SF) Test\\nThe DF test [49] is a hypothesis test for unit root nonstationarity. For\\nany given time series xt, the DF test ﬁrst ﬁts it to the following model:\\n∆xt=φ0+c0t+φ1xt−1+et, (10.20)\\nwhereetdenotes white noise, and then consider the null hypothesis\\nH0:φ1= 0versus the the alternative hypothesis Ha:φ1<0. Here the\\nnull hypothesis means the time series xtis a random walk, thus unit\\nroot nonstationary. The intuition here is that, if xtis stationary, that\\nisφ1<0, then it tends to revert to its long term mean; for example,\\nsupposingφ0=c0= 0, a large value (or a small value) tends to be\\nfollowed by a smaller value, that is, a negative change (or a large value,\\nthat is, a positive change, respectively).\\nThe DF statistic is deﬁned as the t-statistic of the LS estimate of\\nφ1under the null hypothesis\\nDF =ˆφ1\\nstd(ˆφ1), (10.21)\\nwhere ˆφ1is the (expected) LS estimate and std(ˆφ1)is the standard\\ndeviation of the estimate [196]. Then given the statistical signiﬁcance\\nvalue, the null hypothesis is rejected if the DF statistic is less than a\\ncritical value.\\nIdeally, it is the true residuals εtthat should be used in the above\\nDF test to test it is stationary and thus the cointegration between the\\nlog-prices series y1tandy2t. However, as we mentioned, we can only', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 194}),\n",
       " Document(page_content='188 Statistical Arbitrage\\nuse the estimated residuals instead in practice and the critical value in\\nthe above DF test should be adjusted accordingly [130].\\nAugmented DF (ADF) Test\\nThe ADF test is an extension by removing all the structural eﬀects\\n(autocorrelation) in the time series as follows:\\n∆xt=φ0+c0t+φ1xt−1+p\\uf8fa\\ni=1φi+1∆xt−i+et, (10.22)\\nwhere the null and alternative hypotheses are the same as that of the\\nDFtest.TheremainingprocedureoftheADFtestforthecointegration\\ntest is the same as that of the DF test.\\nRemark 10.4. Earlier we investigated the Engle and Granger cointe-\\ngration test based on two stocks. However, such a cointegration test\\nhas several drawbacks: the two-step cointegration test is sensitive to\\nthe ordering of variables in the regression; the ﬁrst step “cointegration\\nregression” may lead to spurious estimators if the bivariate series are\\nnot cointegrated, and it is not suitable for more than two stocks. An\\nalternativemethodistheJohansentest,whichteststherankofthema-\\ntrixΠ(recall (2.49)) and obtains the corresponding MLE estimate in\\nthe VECM [107, 108] so one can get all the possible cointegration vec-\\ntors. For more detailed discussions on diﬀerent tests for cointegration,\\nplease refer to [93]. The good thing is that there already exist highly\\ndeveloped functions for the diﬀerent tests, e.g., egcitest for the Engle\\nand Granger test and jcitest for the Johansen test in MATLAB or\\npackages egcmfortheEngleandGrangertestand urcafortheJohansen\\ntest in R programming language. ■\\nFor illustrative purposes, let us revisit previous Examples 10.1 and\\n10.2 to see how the simple Engle and Granger cointegration test works.\\nExample 10.3. Consider the generated sample paths of y1tandy2tin\\nExample 10.1. We simply use the MATLAB function egcitest with\\ndefault settings to test the cointegration. The estimated values are\\nˆµ=−0.0521andˆγ= 0.9492, which are close to their true values µ= 0', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 195}),\n",
       " Document(page_content='10.3. Cointegration Test 189\\n0 20 40 60 80 100 120 140 160 180 200−0.8−0.6−0.4−0.200.20.40.60.8\\n  \\ny1t−y2t\\ny1t−ˆγy2t\\nFigure 10.7: Engle and Granger cointegration test of Example 10.1.\\nandγ= 1. Then egcitest uses the ADF test to test the stationarity of\\nthe estimated residuals. Here, the ADF statistic computed by (10.21)\\nis−14.009, less than the 5%signiﬁcance level critical value −3.3669,\\nthus the ADF test is to reject the null hypothesis and y1tandy2tare\\ncointegrated. Figure 10.7 shows that the true and estimated residuals\\nlook close to each other and they look stationary. As for Example\\n10.2, the default settings of egcitest do not contain the time trend\\nand it fails to reject the null hypothesis (note that the null hypothesis\\nis that ˜y1tandy2tare not cointegrated). However, if we allow the time\\ntrend component estimation in egcitest , it produces the estimates\\nof the parameters ˆµ= 0.0037,ˆγ= 0.9945, and ˆc0= 1, which again\\nare close to their true values µ= 0,γ= 1, andc0= 1, respectively.\\nNow the ADF statistic computed by (10.21) is −15.1876, less than\\nthe5%signiﬁcance level critical value −3.8283. Thus the ADF test\\nrejects the null hypothesis and ˜y1tandy2tare not cointegrated but\\ncointegrated with a deterministic trend. Similarly, Figure 10.8 shows\\nthat the true and estimated residuals look close to each other and they\\nlook stationary with a deterministic trend. ■', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 196}),\n",
       " Document(page_content='190 Statistical Arbitrage\\n0 20 40 60 80 100 120 140 160 180 200−0.500.511.522.5\\n  \\ny1t−y2t\\ny1t−ˆγy2t\\nFigure 10.8: Engle and Granger cointegration test of the modiﬁcation Example\\n10.2.\\nFurthermore, we consider one more simple example based on real\\ndata to show how to retrieve real data and how the cointegration test\\nperforms in practice.\\nExample 10.4. We focus on two main Chinese banks listed in the Hong\\nKong Stock Exchange, i.e., Industrial and Commercial Bank of China\\n(ICBC, Code: 1398.HK) and China Construction Bank (CCB, Code:\\n0939.HK).\\nFigure 10.9 shows their adjusted log-prices from 01-Jan-2013 to 31-\\nDec-2015. The data is retrieved from Yahoo! Finance using the MAT-\\nLAB function hist_stock_data4. We can see the two paths look really\\nclose to each other.\\nIndeed, the cointegration test shows that they are cointegrated and\\nFigure 10.10 shows the (in-sample) spread (as indicated by the solid\\nblack line), its mean level (as indicated by the dashed blue line), and\\nthe thresholds deviating from the mean by one standard deviation (as\\n4http://www.mathworks.com/matlabcentral/ﬁleexchange/18458-historical-\\nstock-data-downloader/content/hist_stock_data.m', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 197}),\n",
       " Document(page_content='10.3. Cointegration Test 191\\n2013 2014 20151.31.41.51.61.71.81.92Log−price\\n  \\nICBC\\nCCB\\nFigure 10.9: Log-prices of ICBC and CCB.\\n2013 2014 2015−0.13−0.12−0.11−0.1−0.09−0.08−0.07−0.06−0.05−0.04−0.03In−sample spread\\nFigure 10.10: Log-prices of ICBC and CCB.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 198}),\n",
       " Document(page_content='192 Statistical Arbitrage\\nindicated by the two solid magenta lines). The MATLAB code is in-\\ncluded in Appendix C. ■\\n10.4 Investing in Cointegrated Pairs\\nOnce cointegrated pairs have been identiﬁed, there are diﬀerent trading\\nstrategies that can be employed, for example, one can short the spread\\nztwhen it is larger than its long term mean by a signiﬁcant value (i.e.,\\nentry threshold) and unwind the position when the spread converges\\nto a smaller value (i.e., exit threshold). The analysis of the optimal\\nentry and exit thresholds for diﬀerent rules is similar. For simplicity\\nof presentation and w.l.o.g., we take the following trading rule: buy or\\nsell the spread when it diverges from its long-term mean by s0and\\nunwind the position when it passes through its mean. Thus, the key\\nproblem now is how to design the value of s0such that the total proﬁt\\nis maximized.\\n10.4.1 Optimal Threshold Value\\nIntuitively,alargethresholdprovidesalargeproﬁtforeachtrade,albeit\\nat a lower frequency, and a small threshold results in more frequent\\ntrades but a smaller proﬁt for each trade. Both of these two extremes\\nmay not give the best total proﬁt and an optimal threshold must be\\nfound between them. To compute the total proﬁt, one needs to know\\ntwo things: the proﬁt of each single trade and the trading frequency.\\nThe former is simply the threshold value s0(based on the previous\\ntrading rule and note that log-prices are used here) and the latter is\\na monotonically decreasing function of the threshold value s0which is\\nthe key issue. Both the parametric and nonparametric approaches for\\ncomputing the trading frequency function are introduced next.\\nParametric Approach\\nThe idea of the parametric approach is to ﬁt the spread dynamic with\\na speciﬁc model based on which the trading frequency can be either\\ntheoretically or numerically eﬃciently computed. There are several dif-\\nferent parametric models that satisfy the above requirement, e.g., the', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 199}),\n",
       " Document(page_content='10.4. Investing in Cointegrated Pairs 193\\nwhite Gaussian noise model, mixture Gaussian model, ARMA model,\\nand hidden Markov ARMA model [203].\\nFor illustrative purposes, let us focus on the white Gaussian noise\\nmodel and we further arbitrarily assume the noise is i.i.d. following a\\nstandard Normal distribution since otherwise we can always standard-\\nize the noise ﬁrst.\\nThe probability that a white Gaussian noise process at any time\\ndeviates above from the mean by s0or more is 1−Φ(s0), where Φ(·)\\nis the c.d.f. of the standard Normal distribution. Therefore, in Tsteps\\nwe expect to have T(1−Φ(s0))events greater than s0and the number\\nof shorts is one half of that, i.e., T(1−Φ(s0))/2, since the spread\\nmay need to cross the threshold s0again before it reverts to the mean\\nlevel (cf. Figure 10.5). Similarly, we can get the number of buys is also\\nT(1−Φ(s0))/2, and the total number of trades is T(1−Φ(s0)). For\\neach trade, the proﬁt is s0and then the total proﬁt is s0T(1−Φ(s0)).\\nExample 10.5. Let us use a simple numerical example to illustrate the\\nidea. We ﬁrst randomly generate T= 70samples from the standard\\nNormal distribution. The sample mean and variance are 0.1752and\\n0.9928, respectively.\\nFigure 10.11(a) shows the true theoretical function (1−Φ(s0))and\\nthe estimated one which is computed based on the sample mean and\\nsample variance. Figure 10.11(b) shows the proﬁt of each single trade,\\nand Figure 10.11(c) shows the total proﬁt. The maximum of the esti-\\nmated total proﬁt is achieved at the threshold s0= 0.8which is close\\nto the optimal threshold, i.e., the maximizer of the theoretical total\\nproﬁt, ats0= 0.75. ■\\nNonparametric Approach\\nFor the previous parametric approach, one always needs to calibrate\\na predeﬁned model from the spread samples and then compute the\\ntrading frequency for any given trading threshold either theoretically\\nor numerically. Is there an alternative way to ﬁnd the trading frequency\\ndirectly from an observed spread path? The answer is aﬃrmative and\\nit is the nonparametric approach [203].', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 200}),\n",
       " Document(page_content='194 Statistical Arbitrage\\n0 1 2 300.10.20.30.40.50.60.7\\ns0\\n(a)Probability of trades\\n  \\nTheoretical\\nParametric\\n0 1 2 300.511.522.53\\ns0\\n(b)Profit of each trade\\n0 0.5 1 1.5 2 2.5 300.050.10.150.20.25\\ns0\\n(c)Total profit\\n  \\nTheoretical\\nParametric\\nFigure 10.11: The computation of the total proﬁt: parametric approach.\\nThe idea is as follows: given a sample path of the spread realization,\\none can always compute the empirical trading frequency for any given\\nthreshold. That is, suppose the observed sample path has length T,\\nand it is denoted as z1, z2,...,zT. We consider Jdiscretized threshold\\nvalues ass0∈{s01,s02,...,s 0J}and the empirical trading frequency\\nfor the threshold s0jis\\n¯fj=∑T\\nt=1 1{zt>s0j}\\nT. (10.23)\\nHowever,inpractice,theempiricalvaluesmaynotbeasmoothfunction\\nin the discretized thresholds and the resulted total proﬁt function may\\nbe not accurate enough. To overcome this issue and obtain a smoother\\ntrading frequency function, one can employ the regularization idea,', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 201}),\n",
       " Document(page_content='10.4. Investing in Cointegrated Pairs 195\\nwhich has been heavily used in Chapter 3, as follows:\\nminimize\\nfJ\\uf8fa\\nj=1(¯fj−fj) +λJ−1\\uf8fa\\nj=1(fj−fj+1)2, (10.24)\\nwhere the second term is the regularization to induce smoothness and\\nλ > 0is the regularization parameter which can be chosen according\\nto the rule in [203]. We can see that fis a smoothed version of the\\nempirical trading frequency ¯f. The problem (10.24) can be rewritten\\nas a unconstrained convex QP:\\nminimize\\nf∥¯f−f∥2\\n2+λ∥Df∥2\\n2, (10.25)\\nwhere\\nD=)\\n]]]])1−1\\n1−1\\n......\\n1−1(\\n\\uf8fa\\uf8fa\\uf8fa\\uf8fa[∈R(J−1)×J(10.26)\\nis the ﬁrst order diﬀerence matrix. Setting the derivative of the objec-\\ntive of (10.25) w.r.t. fto zero yields the optimal solution\\nf= (I+λDTD)−1¯f. (10.27)\\nSimilar to the parametric approach, let us use a simple example to\\nillustrate the idea of the nonparametric approach.\\nExample 10.6. We use the same observations as Example 10.5. Figure\\n10.12(a) shows the empirical and regularized trading frequencies and\\nFigure 10.12(c) shows the resulting total proﬁt functions. We can see\\nthe total proﬁt function given by the nonparametric approach is not\\nsmooth and is sensitive to the errors, for example, it gives the maxi-\\nmizer at the threshold s0= 0.6. This issue indeed is overcome by the\\nregularizationapproachwith λ= 24.5,asshownbytheredstarcurvein\\nFigure 10.12(c). The new maximizer now is s0= 0.75, which is exactly\\nthe same as the optimal theoretical threshold. ■', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 202}),\n",
       " Document(page_content='196 Statistical Arbitrage\\n0 1 2 300.10.20.30.40.5\\ns0\\n(a)Probability of trades\\n  \\nTheoretical\\nNonParam: empirical\\nNonParam: regularized\\n0 1 2 300.511.522.53\\ns0\\n(b)Profit of each trade\\n0 0.5 1 1.5 2 2.5 300.050.10.150.2\\ns0\\n(c)Total profit\\n  \\nTheoretical\\nNonParam: empirical\\nNonParam: regularized\\nFigure 10.12: The computation of the total proﬁt: nonparametric approach.\\n10.4.2 Holding Time\\nThe above contents focused on designing the optimal threshold. Once\\nthe threshold has been designed, investors may also be interested in\\nthe corresponding holding time. For this purpose, we need to resort to\\nsome continuous-time mean-reversion models ﬁrst.\\nOneofthemostwidelyusedmean-reversionmodelsistheOrnstein-\\nUhlenbeck process [114]:\\ndXt=κ(µ−Xt)dt+σdWt, (10.28)\\nwhereµdenotes long term mean, κ > 0represents the strength of\\nreversion,σ > 0is the conditional volatility, and {Wt|t≥0}is a\\nstandardBrownianmotion.Itcanbeshownthatthelongtermvariance\\nisσ2\\n2κwhich depends on both the conditional volatility σand also the\\nstrength of reversion κ.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 203}),\n",
       " Document(page_content='10.4. Investing in Cointegrated Pairs 197\\nIntuitively, if the current value Xtis larger (or smaller) than the\\nlong term mean, i.e., µ−Xt<0(orµ−Xt>0, respectively) since\\nκ>0, the change has a higher probability to be negative (or positive,\\nrespectively)andthustheprocesstendstoreverttoitslongtermmean.\\nFor example, if X0=µ+cσ√\\n2κ, then the most likely time Tit reverts\\nto the long term mean µis [57]\\nT=1\\nκlog\\uf8f3\\n1 +1\\n2(∑\\n(c2−3)2+ 4c2+c2−3)\\uf8f2\\n.(10.29)\\nThus, we can see that the larger κis, the faster the process reverts from\\nthe deviation cσ√\\n2κ(note that this deviation is measured as cmultiples\\nof the long term standard deviationσ√\\n2κ) to its long term mean.\\nIn practice, the discretized model of (10.28) may be more useful\\nand it turns out to be\\nxt+1−xt=κ(µ−xt)τ+σ√τεt+1, (10.30)\\nwhereτ >0is the discretization period and εtis i.i.d. and follows the\\nstandard Normal distribution. It can be easily shown [57] that given\\nx0∼N(µ0,σ2\\n0), the distribution of xtisxt∼N(µt,σ2\\nt)where\\nµt=µ+ (µ0−µ)(1−bτ)t, (10.31)\\nσ2\\nt=σ2τ\\n1−(1−κτ)2[1−(1−κτ)2t] +σ2\\n0(1−κτ)2t,(10.32)\\nandµt→µandσ2\\nt→σ2τ\\n1−(1−κτ)2provided that the discretization period\\nτ > 0is small enough so that |1−κτ|<1. Note that when the dis-\\ncretization period τgoes to 0, the long term variance of the discretized\\nmodelσ2τ\\n1−(1−κτ)2=σ2\\n2κ−κ2τgoes toσ2\\n2κ, which is the long term variance\\nfor the continuous model.\\nThe relationship (10.30) can be rewritten as:\\nxt+1=A+Bxt+Cεt+1, (10.33)\\nwhereA=µ,0< B = 1−κτ < 1, andC=σ√τ. Actually, model\\n(10.33) is also a univariate AR( 1) model introduced in Section 2.5.1.\\nTo infer a relatively smoother spread dynamic procedure, instead\\nof using (10.33) to model a spread process directly, the authors of [57]', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 204}),\n",
       " Document(page_content='198 Statistical Arbitrage\\nmodeled the practical observed spread ztas the underlying true spread\\nxtplus an observation noise, as follows:\\nzt=xt+Dwt, (10.34)\\nwhereD > 0is a model parameter and the i.i.d. noise wtfollows\\nthe standard Normal distribution and are independent of the noise in\\n(10.33).\\nIn fact, the model (10.33)-(10.34) is a homogeneous Kalman ﬁlter,\\nwhich has been widely used in various ﬁelds, including system con-\\ntrol [111], signal processing [176], ﬁnancial engineering [206], etc., its\\nparameters can be easily estimated via the Expectation-Maximization\\nalgorithm, and the ﬁltering procedure admits closed-form update steps\\nunder the Gaussian assumption.\\nLater, the paper [195] extends the work of [57] by considering a\\ntime varying Kalman ﬁlter since the market regime may change with\\ntime.\\nAgain, let us consider a simple example to see how the above\\nKalmanﬁltercanhelptoimprovethemodelingofthespreaddynamics.\\nExample 10.7. Here, for the state transition process (10.33) we arti-\\nﬁcially set τ= 1/252,κ= 150,µ= 0, andσ= 0.02, which means\\nA=µ= 0,B= 1−κτ= 1−150/252, andC=σ√τ= 0.02/√\\n252.\\nFor the observation process (10.34) we set D= 2C.\\nFigure 10.13 shows the randomly generated realization paths of\\nthe underlying true spread xt, the noisy observed spread zt, and the\\nKalman ﬁltering spread ˆxt. Compared with zt, we can see that the\\nﬁltering spread ˆxtis relatively not as noisy and is closer to xt. This\\nis because, in principle, the Kalman ﬁlter can ﬁlter out the noise in\\nthe observed spread to some degree and the trading threshold designed\\nbased on the ﬁltered spread process is relatively more reliable. ■\\n10.5 From Pairs Trading to Statistical Arbitrage\\nNow, let us move one step further from pairs trading based on only\\ntwo stocks to statistical arbitrage for multiple stocks. The idea is still\\nbased on cointegration: try to construct some linear combinations of', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 205}),\n",
       " Document(page_content='10.5. From Pairs Trading to Statistical Arbitrage 199\\n0 10 20 30 40 50 60−8−6−4−20246x 10−3\\nt  xtzt\\nˆxt\\nFigure 10.13: A realization of a spread based on the Kalman ﬁlter model (10.33)-\\n(10.33):xtis the underlying hidden spread, ztis the observed noisy spread, and ˆxt\\nis the Kalman ﬁltering spread.\\nthelog-pricesofmultiple(morethantwo)stockssuchthattheresulting\\nspread series are mean-reversion processes.\\n10.5.1 Statistical Arbitrage Based on VECM\\nUntil now we have explained the cointegration of only two stocks. As\\nwe have introduced the VECM before in Section 2.6, it is possible to\\nﬁnd some cointegration components among multiple stocks. Actually,\\nif we look at the VECM model (2.49) which is stated as follows:\\nrt=φ0+Πyt−1+˜Φ1rt−1+···+˜Φp−1rt−p+1+wt.(10.35)\\nIf0<rank(Π) =r<N,Πcan be decomposed as\\nΠ=αβT(10.36)\\nand then each of the rcomponents of βTytis stationary and thus a\\nmean-reversion process.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 206}),\n",
       " Document(page_content='200 Statistical Arbitrage\\nThus, following the procedure in Section 10.4, one can study the\\nspread and ﬁnd the optimal trading threshold for each component.\\nAmong all the cointegrated components, usually the one with strongest\\nstrength of mean-reversion is preferred in practice [46].\\n10.5.2 Statistical Arbitrage Based on Factor Models\\nLet us now introduce the second method based on factor models. First\\nrecall the factor model (10.11) for stock iat timetused in Section\\n10.2.2 as follows:\\nrit=πT\\nift+εit, (10.37)\\nwhere ftis the factor which is the same for all the stocks, πiis the\\nvector of loading coeﬃcients, and εitis the speciﬁc noise.\\nThen the idea of trading the mean-reversion pattern based on\\n(10.37) is to ﬁrst properly select some tradeable factors and then test\\nwhether the cumulative summations of the resulted speciﬁc noise εit\\nare stationary. If positive, then one can deﬁne zit=∑t\\nj=0(rij−πT\\nifj)\\nas a spread. Some tradeable examples of ftare the log-returns of the\\nsector ETFs and/or that of several largest eigen-portfolios5[13].\\nAgain,foreachconstructedcointegrationcomponent,onecanstudy\\nthe spread and ﬁnd the optimal trading threshold following the proce-\\ndure in Section 10.4.\\n5An eigen-portfolio is a portfolio whose weight is a eigenvector of the covariance\\n(or correlation) matrix of the stock returns.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 207}),\n",
       " Document(page_content='11\\nConclusions\\nThis monograph has discussed the underlying connections between ﬁ-\\nnancial engineering and signal processing.\\nPart I has focused on ﬁnancial modeling and order execution. The\\nidea of decomposing a ﬁnancial time series into a trend and noise com-\\nponents is the same as that of decomposing discrete-time signal series\\ninto useful signal and noise components; ﬁnancial time series modeling\\nis similar to ﬁlter modeling in signal processing, e.g., the ARMA model\\nin ﬁnancial engineering is the same as the pole-zero model in signal pro-\\ncessing; the order execution problem of minimizing the execution cost\\nis also similar to sensor scheduling in dynamic wireless sensor networks\\nand power allocation problems in broadcast channels.\\nPart II has mainly explored the (robust) portfolio optimization.\\nIn fact, portfolio optimization is mathematically identical to beam-\\nforming/ﬁlter design and the robust techniques to handling those two\\nproblems are also the same, e.g., the shrinkage technique in ﬁnancial\\nengineering is exactly diagonal loading in beamforming design.\\nPart III has reviewed statistical arbitrage with three steps: pairs se-\\nlection, cointegration test, and trading strategy design. It is interesting\\nto see that some quantitative tools familiar to researchers in signal pro-\\n201', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 208}),\n",
       " Document(page_content='202 Conclusions\\ncessing and control theory, e.g., the Kalman ﬁlter, have been applied in\\nﬁnancial engineering to improve the statistical arbitrage trading strat-\\negy.\\nBased on the detailed explorations in this entire monograph and\\nin the above brief summary, we believe this monograph may serve as\\na comprehensive tutorial on ﬁnancial engineering from a signal pro-\\ncessing perspective. We hope it can help researchers in signal process-\\ning and communication societies as a starting point to access ﬁnancial\\nengineering problems more straightforwardly and systematically, and\\napply signal processing techniques to deal with appropriate ﬁnancial\\nproblems.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 209}),\n",
       " Document(page_content='Appendices', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 210}),\n",
       " Document(page_content='A\\nMATLAB Code of Example 3.1\\nclear all; clc; close all;\\n%% settings\\n% N: dim; T: # samples ; OutT : # outliers\\nN = 2;\\nT = 40;\\nOutT = 4;\\nCovMatrix = zeros (N,N);\\nMeanVec = zeros (1, N);\\nOutMeanVec = [-2, 2];\\nfor i = 1:N\\nfor j = 1:N\\nCovMatrix (i,j) = (0.8) ^abs(i-j);\\nend\\nend\\n%% generate samples and outliers\\nSamPoints = mvnrnd ( MeanVec , CovMatrix , T);\\nOutPoints = mvnrnd ( OutMeanVec , CovMatrix , OutT );\\n%% data : samples + outliers\\nX = [ SamPoints ; OutPoints ];\\n%% sample covariance matrix , or equivalently , the Gaussian\\nMLE\\nCovNormal = X ’*X./(T+ OutT );\\n%% Cauchy MLE\\nCovCauchy = eye(N);\\nCovCauchyInv = inv ( CovMatrix );\\ncvg = 0;\\nwhile (~ cvg)\\nw = (N+1) ./ (1 + diag (X* CovCauchyInv *X ’));\\ntmpCov = (X ’* diag (w)*X) ./ (T + OutT );\\n204', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 211}),\n",
       " Document(page_content='205\\nif ( norm ( CovCauchy - tmpCov , ’fro ’) ./ norm (tmpCov , ’fro\\n’) < 1e -8)\\ncvg = 1;\\nelse\\nCovCauchy = tmpCov ;\\nCovCauchyInv = inv ( CovCauchy );\\nend\\nend\\n% get size c: solving Eq. (3.56) yields c = 0.4944.\\nfun = @(x,c,N) ((N+1) ./(1+ x./c) .* (x./c) .* chi2pdf (x,N));\\ncmin = 0.01; cmax = 20;\\nTol_c = 1e -6;\\nwhile 1\\ncc = ( cmin + cmax ) ./ 2;\\nq = integral (@(x)fun(x,cc ,N), 0, Inf);\\nif q > N + Tol_c\\ncmin = cc;\\nelseif q < N - Tol_c\\ncmax = cc;\\nelse\\nbreak ;\\nend\\nend\\nCovCauchy = CovCauchy ./ cc;\\n%% plot results\\nRG = 4;\\nx1 = -RG :.2: RG;\\nx2 = -RG :.2: RG;\\n[X1 ,X2] = meshgrid (x1 ,x2);\\nfigure ()\\n% plot the sample points\\nhsam = plot ( SamPoints (: ,1) ,SamPoints (: ,2) , ’k+’);\\nhold on;\\n% plot the outliers\\nhout = plot ( OutPoints (: ,1) ,OutPoints (: ,2) , ’rs ’, ’\\nMarkerFaceColor ’, ’r’);\\n% plot the true shape\\nALL_Points = mvnpdf ([ X1 (:) X2 (:)], MeanVec , CovMatrix );\\nALL_points = reshape ( ALL_Points , length (x2),length (x1));', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 212}),\n",
       " Document(page_content='206 MATLAB Code of Example 3.1\\n[c, hTrue ] = contour (x1 , x2 , ALL_points ,[0.01] , ’LineWidth ’\\n, 2, ’Color ’, ’k’, ’LineStyle ’, ’:’);\\n% plot the shape based on Gaussian MLE\\nALL_Points = mvnpdf ([ X1 (:) X2 (:)], MeanVec , CovNormal );\\nALL_points = reshape ( ALL_Points , length (x2),length (x1));\\n[c, hNormal ] = contour (x1 ,x2 , ALL_points ,[0.01] , ’LineWidth ’\\n, 2, ’Color ’, ’r’, ’LineStyle ’, ’-’);\\n% plot the shape based on Cauchy MLE\\nALL_Points = mvnpdf ([ X1 (:) X2 (:)], MeanVec , CovCauchy );\\nALL_points = reshape ( ALL_Points , length (x2),length (x1));\\n[c, hCauchy ] = contour (x1 ,x2 , ALL_points ,[0.01] , ’LineWidth ’,\\n2, ’Color ’, ’b’, ’LineStyle ’,’ -.’);\\naxis square\\nxlim ([-RG , RG ])\\nylim ([-RG , RG ])\\nlegend ([ hsam , hout , hTrue , hCauchy , hNormal ], ’Samples ’, ’\\nOutliers ’, ’Oracle ’, ’MLE: Cauchy ’, ’MLE: Gaussian ’, ’\\nLocation ’, ’SouthEast ’)\\nprint (’-depsc ’, [’ Normal_vs_Cauchy_Cov ’])', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 213}),\n",
       " Document(page_content='B\\nMATLAB Code of Figure 5.1\\nclear all; clc; close all;\\n%% initial settings\\nNumStocks = 3;\\nSigma = eye( NumStocks );\\nmu = 0.5*[1 2 3] ’;\\nSigmaInv = inv( Sigma );\\n%% portfolio optimization : solution (5.8)\\nLams = 2.^[ -2:0.1:10];\\nNumLams = length ( Lams );\\nmeanVec = NaN ( NumLams , 1);\\nstdVec = NaN ( NumLams , 1);\\nonesNumStocks = ones ( NumStocks ,1);\\nfor whichLam = 1: NumLams\\nlam = Lams ( whichLam );\\nnu = (2* lam - onesNumStocks ’ * SigmaInv * mu) / (\\nonesNumStocks ’ * SigmaInv * onesNumStocks );\\nw = SigmaInv * (mu + nu * onesNumStocks ) / (2* lam);\\nmeanVec ( whichLam ) = w ’* mu;\\nstdVec ( whichLam ) = sqrt (w ’* Sigma *w);\\nend\\n%% Sharpe ratio portofilo , (5.13)\\nrf = 0.4;\\nwm = SigmaInv *( mu - rf* onesNumStocks );\\nwm = wm ./ sum (wm);\\nmeanm = wm ’* mu;\\nstdm = sqrt (wm ’* Sigma *wm);\\nxx = linspace (0, 2, 400) ;\\nslope = ( meanm - rf) ./ stdm ;\\nyy = slope .* xx + rf;\\n207', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 214}),\n",
       " Document(page_content='208 MATLAB Code of Figure 5.1\\n%% plot the results\\nfigure ()\\nplot (stdVec , meanVec , ’k-’,’LineWidth ’ ,1.5);\\nhold on;\\nplot (xx , yy , ’b-’,’LineWidth ’ ,1.5);\\nxlim ([0 1.7028]) ;\\nylim ([0 2.2172]) ;\\ntext ( -0.05 ,rf ,’r_f ’)\\n% GMVP\\nscatter (min ( stdVec ), min ( meanVec ) ,25 ,’k’,’filled ’)\\nannotation (’textarrow ’ ,[0.72 ,0.62]/(1.5480)\\n,[1 -0.2 ,1 -0.05]/(2.0156) ,...\\n’String ’,’Global minimum variance ’)\\n% Sharpe ratio\\nscatter (stdm , meanm ,25 ,’b’,’filled ’)\\nannotation (’textarrow ’ ,[ stdm +0.1 , stdm +0.005]/(1.5480) ,[\\nmeanm -0.28 , meanm -0.125]/(2.0156) ,...\\n’String ’,’Maximum Sharpe ratio ’)\\n% Efficient frontier\\nannotation (’textarrow ’ ,[1.2 ,1.05]/(1.7028)\\n,[1.35 ,1.55]/(2.2172) ,...\\n’String ’,’Efficient frontier ’)\\n% Capital market line\\nannotation (’textarrow ’ ,[0.5 , 0.57]/(1.7028) ,[1.5 ,\\n1.05]/(2.2172) ,...\\n’String ’,’Capital market line ’)\\nxlabel (’Standard deviation ’)\\nylabel (’Expected return ’)\\n% remove ticks\\nset(gca ,’xtick ’ ,[])\\nset(gca ,’xticklabel ’ ,[])\\nset(gca ,’ytick ’ ,[])\\nset(gca ,’yticklabel ’ ,[])\\nprint (’-depsc ’,’ Efficient_Frontier ’)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 215}),\n",
       " Document(page_content='C\\nMATLAB Code of Example 10.4\\nclear all; clc; close all;\\n%% retrive data\\n% the hist_stock_data function is available at\\n% http :// www. mathworks .com/ matlabcentral / fileexchange\\n/18458 - historical -stock -data - downloader / content //\\nhist_stock_data .m\\nRealData = hist_stock_data (’01012013 ’, ’31122015 ’, ’1398. HK\\n’, ’0939. HK ’, ’frequency ’, ’d’);\\n%% process data\\nAllDates = sort ( intersect ( RealData (1).Date , RealData (2).\\nDate ));\\n[~, DateIdx ] = intersect ( RealData (1) .Date , AllDates );\\nLogPrice1 = log( RealData (1). AdjClose ( DateIdx ));\\n[~, DateIdx ] = intersect ( RealData (2) .Date , AllDates );\\nLogPrice2 = log( RealData (2). AdjClose ( DateIdx ));\\n% plot the log prices\\nfigure ()\\nNumDays = length ( LogPrice1 );\\nh1 = plot (1: NumDays , LogPrice1 , ’b’, ’LineWidth ’, 1.5) ;\\nhold on;\\nh2 = plot (1: NumDays , LogPrice2 , ’r--’, ’LineWidth ’, 1.5) ;\\ngrid on;\\nlegend ([h1 , h2], {’ICBC ’, ’CCB ’}, ’location ’, ’NorthWest ’)\\nylabel (’Log - price ’)\\nDateIdxShow = find ([1; diff ( year ( AllDates ))]);\\nset(gca ,’XTick ’, DateIdxShow )\\nset(gca , ’XTickLabel ’, datestr ({ AllDates { DateIdxShow }}, ’\\nyyyy ’))\\nprint (’-depsc ’,’ Real_log_prices ’)\\n209', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 216}),\n",
       " Document(page_content='210 MATLAB Code of Example 10.4\\n%% cointegration test\\nY = [ LogPrice1 LogPrice2 ];\\n[h, pValue , stat , cValue , reg] = egcitest (Y);\\n%% plot the in - sample spread\\nfigure ()\\nspread = Y * [1; -reg . coeff (2) ];\\nhspread = plot (1: NumDays , spread , ’k-’,’LineWidth ’, 1.5) ;\\nhold on;\\nplot (1: NumDays , mean ( spread ) + std ( spread ) .* ones ( NumDays\\n,1) , ’m’, ’LineWidth ’, 1.5)\\nplot (1: NumDays , mean ( spread ) - std ( spread ) .* ones ( NumDays\\n,1) , ’m’, ’LineWidth ’, 1.5)\\nplot (1: NumDays , mean ( spread ) .* ones ( NumDays ,1) , ’b--’, ’\\nLineWidth ’, 1.5)\\nylabel (’In - sample spread ’)\\nDateIdxShow = find ([1; diff ( year ( AllDates ))]);\\nset(gca ,’XTick ’, DateIdxShow )\\nset(gca , ’XTickLabel ’, datestr ({ AllDates { DateIdxShow }}, ’\\nyyyy ’))\\nprint (’-depsc ’,’ Real_in_sample_spread ’)', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 217}),\n",
       " Document(page_content='Abbreviations\\nAR Autoregressive.\\nARCH Autoregressive Conditional\\nHeteroskedasticity.\\nARMA Autoregressive Moving Average.\\nCVaR Conditional Value-at-Risk.\\nGARCH Generalized Autoregressive\\nConditional Heteroskedasticity.\\nGMVP Global Minimum Variance Portfolio.\\nGNE Generalized Nash Equilibrium.\\nGNEP Generalized Nash Equilibrium\\nProblem.\\ni.i.d./I.I.D. Independent and Identically\\nDistributed.\\nIPM Interior Point Methods.\\nLS Least-Square.\\nMA Moving Average.\\nMAP Maximum A Posterior.\\nML Maximum Likelihood.\\nMLE Maximum Likelihood Estimator.\\nMSE Mean Squared Error.\\nMV Minimum Variance.\\n211', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 218}),\n",
       " Document(page_content='212 MATLAB Code of Example 10.4\\nNE Nash Equilibrium.\\nNEP Nash EquilibriumE Problem.\\nPCA Principal Component Analysis.\\nPSD Positive Semideﬁnite.\\nQCQP Quadratically Constrained\\nQuadratic Programming.\\nQP Quadratic Programming.\\nRMT Random Matrix Theory.\\nSAA Sample Average Approximation.\\nSCA Successive Convex Approximation.\\nSCM Sample Covariance Matrix.\\nSCRIP Successive Convex optimization for\\nRIsk Parity portfolio.\\nSDP Semideﬁnite Programming.\\nSDR Semideﬁnite Programming\\nRelaxation.\\nSINR Signal-to-Interference-plus-Noise\\nRatio.\\nSNR Signal-to-Noise Ratio.\\nSQP Sequential Quadratic Programming.\\nSR Sharpe Ratio.\\nVaR Value-at-Risk.\\nVAR Vector Autoregressive.\\nVARMA Vector Autoregressive Moving\\nAverage.\\nVECM Vector Error Correction Model.\\nVMA Vector Moving Average.\\nw.r.t. With Respect To.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 219}),\n",
       " Document(page_content='Notation\\nBoldface lower-case letters denote column vectors, boldface upper-case\\nlettersdenotematrices,lower-caseitalicsdenotescalars,andupper-case\\nitalics denote random scalar variables. For the ﬁnancial time series, at\\ntimet, we useptto denote the price, Rt≜pt−pt−1\\npt−1to denote net return,\\nyt≜logptto denote the log-price, rt≜yt−yt−1= logpt−logpt−1=\\nlog(1 +Rt)to denote the compound return or log-return, and wtto\\ndenote the white noise.\\n∝ Proportional to.\\n≜ Deﬁned as.\\nAT,AHTranspose, conjugate transpose (Hermitian) of\\nthe matrix A, respectively.\\nA−1Inverse of the matrix A.\\nA†Matrix Moore-Penrose pseudoinverse of the\\nmatrix A.\\nai Thei-th entry of the vector a.\\nAij The element of matrix Aat thei-th row and\\nj-th column.\\nA1/2The principal square root of the matrix A, i.e.,\\nA1/2A1/2=A.\\nDiag ( A) A diagonal matrix with diagonal elements equal\\nto that of A.\\n|A|ordet (A)Determinant of the matrix A.\\n213', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 220}),\n",
       " Document(page_content='214 MATLAB Code of Example 10.4\\nTr (A) Trace of the matrix A.\\n|a| Absolute value of the scalar a.\\n∥a∥1 ℓ1-norm of the vector a, i.e.,∥a∥1≜∑\\ni|ai|.\\n∥a∥2 Euclidean norm (i.e., ℓ2-norm) of the vector a,\\ni.e.,∥a∥1≜√\\naTa.\\n∥A∥F Frobenius norm of matrix A, i.e.,\\n∥A∥F≜∑\\nTr (ATA).\\nI Identity matrix with proper size. A subscript\\ncan be used to indicate the dimension as well.\\na≥b Elementwise relation ai≥bi.\\nA⪰B A −Bis a positive semideﬁnite matrix.\\nA≻B A −Bis a positive deﬁnite matrix.\\nR,C The set of real and complex numbers,\\nrespectively.\\nRm×n,Cm×nThe setm-by-nmatrices with real- and\\ncomplex-valued entries, respectively.\\nSnThe set of symmetric n-by-nmatrices\\nSn≜{\\nX∈Rn×n|X=XT}\\n.\\nSn\\n+ The set of positive semideﬁnite n-by-nmatrices\\nSn\\n+≜{\\nX∈Rn×n|X=XT⪰0}\\n.\\nx⋆The optimal solution xto a problem. The\\nnotation xdenotes the vector form of all the\\nvariables of the problem.\\nv⋆((·)) The optimal value of problem (·).\\n∼ Distributed according to.\\nN(µ,Σ) Multivariate Gaussian distribution with mean µ\\nand covariance matrix Σ.\\nlog (·) Natural logarithm.\\nE[·] Statistical expectation.\\nVar[·] Statistical variance.\\nCov[·] Statistical covariance.\\n[a]+Positive part of a, i.e., [a]+≜max (0,a).\\nsup,inf Supremum and inﬁmum.\\n∪,∩ Union and intersection.\\n∇xf(x) Gradient of function f(x)with respect to x.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 221}),\n",
       " Document(page_content='Acknowledgments\\nThe work of Yiyong Feng and Daniel P. Palomar was supported by the\\nHong Kong Research Grants Council under research grants 16207814\\nand 16206315. Both the authors would like to thank the anonymous\\nreviewer, whose comments have signiﬁcantly contributed to improve\\nthe quality of this monograph.\\n215', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 222}),\n",
       " Document(page_content='References\\n[1] Y. Abramovich. Controlled method for adaptive optimization of ﬁlters\\nusing the criterion of maximum SNR. Radio Engineering and Electronic\\nPhysics, 26(3):87–95, 1981.\\n[2] Y. Abramovich and N. K. Spencer. Diagonally loaded normalised sam-\\nple matrix inversion (LNSMI) for outlier-resistant adaptive ﬁltering. In\\nIEEE International Conference on Acoustics, Speech and Signal Pro-\\ncessing, volume 3, pages III–1105. IEEE, 2007.\\n[3] A. N. Akansu, S. R. Kulkarni, and D. M. Malioutov, editors. Financial\\nSignal Processing and Machine Learning . Wiley-IEEE Press, 2016.\\n[4] I. Aldridge. High-Frequency Trading: A Practical Guide to Algorithmic\\nStrategies and Trading Systems . John Wiley & Sons, 2013.\\n[5] C. Alexander. Optimal hedging using cointegration. Philosophical\\nTransactions of the Royal Society of London A: Mathematical, Phys-\\nical and Engineering Sciences , 357(1758):2039–2058, 1999.\\n[6] C. Alexander, I. Giblin, and W. Weddington. Cointegration and asset\\nallocation: A new active hedge fund strategy. ISMA Centre Discussion\\nPapers in Finance Series , 2002.\\n[7] S. Alexander, T. F. Coleman, and Y. Li. Minimizing CVaR and VaR for\\na portfolio of derivatives. Journal of Banking & Finance , 30(2):583–605,\\n2006.\\n[8] R. Almgren and N. Chriss. Optimal execution of portfolio transactions.\\nJournal of Risk , 3:5–40, 2001.\\n216', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 223}),\n",
       " Document(page_content='References 217\\n[9] S. Andrade, V. Di Pietro, and M. Seasholes. Understanding the prof-\\nitability of pairs trading. Unpublished working paper, UC Berkeley,\\nNorthwestern University , 2005.\\n[10] K.Andriosopoulos,M.Doumpos,N.C.Papapostolou,andP.K.Poulia-\\nsis. Portfolio optimization and index tracking for the shipping stock and\\nfreight markets using evolutionary algorithms. Transportation Research\\nPart E: Logistics and Transportation Review , 52:16–34, 2013.\\n[11] A. Ang and A. Timmermann. Regime changes and ﬁnancial markets.\\nTechnical report, National Bureau of Economic Research, 2011.\\n[12] O. Arslan. Convergence behavior of an iterative reweighting algorithm\\nto compute multivariate m-estimates for location and scatter. Journal\\nof Statistical Planning and Inference , 118(1):115–128, 2004.\\n[13] M. Avellaneda and J.-H. Lee. Statistical arbitrage in the US equities\\nmarket.Quantitative Finance , 10(7):761–782, 2010.\\n[14] X. Bai, K. Scheinberg, and R. Tutuncu. Least-squares approach to risk\\nparity in portfolio selection. Available at SSRN 2343406 , 2013.\\n[15] M. Bańbura, D. Giannone, and L. Reichlin. Large Bayesian vector auto\\nregressions. Journal of Applied Econometrics , 25(1):71–92, 2010.\\n[16] L. Bauwens, S. Laurent, and J.V.K. Rombouts. Multivariate GARCH\\nmodels: A survey. Journal of Applied Econometrics , 21(1):79–109, 2006.\\n[17] J. E. Beasley, N. Meade, and T.-J. Chang. An evolutionary heuristic for\\nthe index tracking problem. European Journal of Operational Research ,\\n148(3):621–643, 2003.\\n[18] D. Bertsimas and A. W. Lo. Optimal control of execution costs. Journal\\nof Financial Markets , 1:1–50, 1998.\\n[19] D. Bianchi and A. Gargano. High-dimensional index tracking with coin-\\ntegrated assets using an hybrid genetic algorithm. Available at SSRN ,\\n1785908, 2011.\\n[20] P. J. Bickel and E. Levina. Regularized estimation of large covariance\\nmatrices. The Annals of Statistics , pages 199–227, 2008.\\n[21] J. Bien and R. J. Tibshirani. Sparse estimation of a covariance matrix.\\nBiometrika , 98(4):807–820, 2011.\\n[22] F. Black and R. Litterman. Asset allocation: combining investor views\\nwith market equilibrium. The Journal of Fixed Income , 1(2):7–18, 1991.\\n[23] F. Black and R. Litterman. Global asset allocation with equities, bonds,\\nand currencies. Fixed Income Research , 2:15–28, 1991.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 224}),\n",
       " Document(page_content='218 References\\n[24] F. Black and R. Litterman. Global portfolio optimization. Financial\\nAnalysts Journal , 48(5):28–43, 1992.\\n[25] F.BlackandM.Scholes. Thepricingofoptionsandcorporateliabilities.\\nThe Journal of Political Economy , pages 637–654, 1973.\\n[26] D. Blamont and N. Firoozy. Asset allocation model. Global Markets\\nResearch: Fixed Income Research , 2003.\\n[27] Z. Bodie, A. Kane, and A. J. Marcus. Investments . Tata McGraw-Hill\\nEducation, 10th edition, 2013.\\n[28] T. Bollerslev. Generalized autoregressive conditional heteroskedasticity.\\nJournal of Econometrics , 31(3):307–327, 1986.\\n[29] T. Bollerslev. Modelling the coherence in short-run nominal exchange\\nrates: a multivariate generalized arch model. The Review of Economics\\nand Statistics , pages 498–505, 1990.\\n[30] T. Bollerslev, R. F. Engle, and J. M. Wooldridge. A capital asset pricing\\nmodelwithtime-varyingcovariances. The Journal of Political Economy ,\\npages 116–131, 1988.\\n[31] J.-P. Bouchaud. Economics needs a scientiﬁc revolution. Nature,\\n455(7217):1181–1181, 2008.\\n[32] S. P. Boyd and L. Vandenberghe. Convex Optimization . Cambridge\\nUniversity Press, 2004.\\n[33] J. Brodie, I. Daubechies, C. De Mol, D. Giannone, and I. Loris. Sparse\\nand stable Markowitz portfolios. Proceedings of the National Academy\\nof Sciences , 106(30):12267–12272, 2009.\\n[34] B. Bruder and T. Roncalli. Managing risk exposures using the risk\\nbudgeting approach. Technical report, University Library of Munich,\\nGermany, 2012.\\n[35] R. H. Byrd, M. E. Hribar, and J. Nocedal. An interior point algorithm\\nfor large-scale nonlinear programming. SIAM Journal on Optimization ,\\n9(4):877–900, 1999.\\n[36] N. A. Canakgoz and J. E. Beasley. Mixed-integer programming ap-\\nproaches for index tracking and enhanced indexation. European Journal\\nof Operational Research , 196(1):384–399, 2009.\\n[37] E. J. Candes, M. B. Wakin, and S. P. Boyd. Enhancing sparsity by\\nreweighted ℓ1minimization. Journal of Fourier Analysis and Applica-\\ntions, 14(5-6):877–905, 2008.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 225}),\n",
       " Document(page_content='References 219\\n[38] B.D.Carlson. Covariancematrixestimationerrorsanddiagonalloading\\nin adaptive arrays. IEEE Transactions on Aerospace and Electronic\\nSystems, 24(4):397–401, 1988.\\n[39] Y. Chen, A. Wiesel, and A. O. Hero III. Robust shrinkage estimation\\nof high-dimensional covariance matrices. IEEE Transactions on Signal\\nProcessing , 59(9):4097–4107, 2011.\\n[40] X. Cheng, Z. Liao, and F. Schorfheide. Shrinkage estimation of high-\\ndimensional factor models with structural instabilities. The Review of\\nEconomic Studies , 2016.\\n[41] T. F. Coleman, Y. Li, and J. Henniger. Minimizing tracking error while\\nrestricting the number of assets. Journal of Risk , 8(4):33, 2006.\\n[42] G. Connor. The three types of factor models: A comparison of their\\nexplanatory power. Financial Analysts Journal , 51(3):42–46, 1995.\\n[43] R.CouilletandM.McKay. Largedimensionalanalysisandoptimization\\nof robust shrinkage covariance matrix estimators. Journal of Multivari-\\nate Analysis , 131:99–120, 2014.\\n[44] T. M. Cover and J. A. Thomas. Elements of Information Theory . John\\nWiley & Sons, 2012.\\n[45] H. Cox, R. M. Zeskind, and M. M. Owen. Robust adaptive beamform-\\ning.IEEE Transactions on Acoustics, Speech and Signal Processing ,\\n35(10):1365–1376, 1987.\\n[46] A. d’Aspremont. Identifying small mean-reverting portfolios. Quanti-\\ntative Finance , 11(3):351–364, 2011.\\n[47] R. A. Davis, P. Zang, and T. Zheng. Sparse vector autoregressive mod-\\neling.Journal of Computational and Graphical Statistics , 0:1–53, 2015.\\n[48] V. DeMiguel, L. Garlappi, F. J. Nogales, and R. Uppal. A general-\\nized approach to portfolio optimization: Improving performance by con-\\nstraining portfolio norms. Management Science , 55(5):798–812, 2009.\\n[49] D.A.DickeyandW.A.Fuller. Distributionoftheestimatorsforautore-\\ngressive time series with a unit root. Journal of the American statistical\\nassociation , 74(366a):427–431, 1979.\\n[50] B. Do, R. Faﬀ, and K. Hamza. A new approach to modeling and esti-\\nmation for pairs trading. In Proceedings of 2006 Financial Management\\nAssociation European Conference , 2006.\\n[51] D. L. Donoho. Compressed sensing. IEEE Transactions on Information\\nTheory, 52(4):1289–1306, 2006.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 226}),\n",
       " Document(page_content='220 References\\n[52] C. Dose and S. Cincotti. Clustering of ﬁnancial time series with ap-\\nplication to index and enhanced index tracking portfolio. Physica A:\\nStatistical Mechanics and its Applications , 355(1):145–151, 2005.\\n[53] B. Efron and C. Morris. Stein’s estimation rule and its competitors-an\\nempirical Bayes approach. Journal of the American Statistical Associa-\\ntion, 68(341):117–130, 1973.\\n[54] L. El Ghaoui and H. Lebret. Robust solutions to least-squares problems\\nwith uncertain data. SIAM Journal on Matrix Analysis and Applica-\\ntions, 18:1035–1064, 1997.\\n[55] L. El Ghaoui, M. Oks, and F. Oustry. Worst-case value-at-risk and ro-\\nbust portfolio optimization: A conic programming approach. Operations\\nResearch , pages 543–556, 2003.\\n[56] Y. C. Eldar. Rethinking biased estimation: Improving maximum likeli-\\nhood and the Cramér–Rao bound. Foundations and TrendsR⃝in Signal\\nProcessing , 1(4):305–449, 2008.\\n[57] R. J. Elliott, J. Van Der Hoek, and W. P. Malcolm. Pairs trading.\\nQuantitative Finance , 5(3):271–276, 2005.\\n[58] E. J. Elton, M. J. Gruber, S. J. Brown, and W. N. Goetzmann. Modern\\nPortfolio Theory and Investment Analysis . John Wiley & Sons, 2009.\\n[59] R. F. Engle. Autoregressive conditional heteroscedasticity with esti-\\nmates of the variance of United Kingdom inﬂation. Econometrica: Jour-\\nnal of the Econometric Society , pages 987–1007, 1982.\\n[60] R. F. Engle. Dynamic conditional correlation: A simple class of mul-\\ntivariate generalized autoregressive conditional heteroskedasticity mod-\\nels.Journal of Business & Economic Statistics , 20(3):339–350, 2002.\\n[61] R. F. Engle and C. W. J. Granger. Co-integration and error correction:\\nrepresentation, estimation, and testing. Econometrica: Journal of the\\nEconometric Society , pages 251–276, 1987.\\n[62] R. F. Engle and K. F. Kroner. Multivariate simultaneous generalized\\nARCH.Econometric Theory , 11(01):122–150, 1995.\\n[63] F. J. Fabozzi. Robust Portfolio Optimization and Management . Wiley,\\n2007.\\n[64] F. J. Fabozzi, S. M. Focardi, and P. N. Kolm. Financial Modeling of the\\nEquity Market: from CAPM to Cointegration , volume 146. John Wiley\\n& Sons, 2006.\\n[65] F. J. Fabozzi, S. M. Focardi, and P. N. Kolm. Quantitative Equity\\nInvesting: Techniques and Strategies . Wiley, 2010.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 227}),\n",
       " Document(page_content='References 221\\n[66] E. F. Fama and K. R. French. The cross-section of expected stock\\nreturns. Journal of Finance , 47(2):427–465, 1992.\\n[67] E. F. Fama and K. R. French. Common risk factors in the returns on\\nstocks and bonds. Journal of Financial Economics , 33(1):3–56, 1993.\\n[68] E. F. Fama and K. R. French. Size and book-to-market factors in\\nearnings and returns. Journal of Finance , 50(1):131–155, 1995.\\n[69] E. F. Fama and K. R. French. Multifactor explanations of asset pricing\\nanomalies. Journal of Finance , 51(1):55–84, 1996.\\n[70] E. F. Fama and K. R. French. The capital asset pricing model: Theory\\nand evidence. Journal of Economic Perspectives , 18:25–46, 2004.\\n[71] J. Fan, Y. Fan, and J. Lv. High dimensional covariance matrix esti-\\nmation using a factor model. Journal of Econometrics , 147(1):186–197,\\n2008.\\n[72] J. Fan, L. Qi, and D. Xiu. Quasi-maximum likelihood estimation of\\ngarch models with heavy-tailed likelihoods. Journal of Business & Eco-\\nnomic Statistics , 32(2):178–191, 2014.\\n[73] J. Fan, J. Zhang, and K. Yu. Vast portfolio selection with gross-\\nexposure constraints. Journal of the American Statistical Association ,\\n107(498):592–606, 2012.\\n[74] B. Fastrich, S. Paterlini, and P. Winker. Constructing optimal sparse\\nportfolios using regularization methods. Computational Management\\nScience, pages 1–18, 2013.\\n[75] B. Fastrich, S. Paterlini, and P. Winker. Cardinality versus q-norm\\nconstraints for index tracking. Quantitative Finance , 14(11):2019–2032,\\n2014.\\n[76] Y. Feng and D. P. Palomar. SCRIP: Successive convex optimization\\nmethods for risk parity portfolios design. IEEE Transactions on Signal\\nProcessing , 63(19):5285–5300, Oct. 2015.\\n[77] Y. Feng, D. P. Palomar, and F. Rubio. Robust order execution under\\nbox uncertainty sets. In Proceedings of the Asilomar Conference on\\nSignals Systems, and Computers , pages 44–48, Paciﬁc Grove, CA, Nov.\\n2013.\\n[78] Y. Feng, D. P. Palomar, and F. Rubio. Robust optimization of order\\nexecution. IEEE Transactions on Signal Processing ,63(4):907–920,Feb.\\n2015.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 228}),\n",
       " Document(page_content='222 References\\n[79] Y. Feng, F. Rubio, and D. P. Palomar. Optimal order execution for\\nalgorithmic trading: A CVaR approach. In Proceedings of the IEEE\\nWorkshop on Signal Processing Advances in Wireless Communications ,\\npages 480–484, Jun. 2012.\\n[80] C. Floros. Modelling volatility using high, low, open and closing prices:\\nevidence from four S&P indices. International Research Journal of Fi-\\nnance and Economics , 28:198–206, 2009.\\n[81] G. Frahm. Generalized elliptical distributions: theory and applications .\\nPhD thesis, Universität zu Köln, 2004.\\n[82] J. Friedman, T. Hastie, and R. Tibshirani. Sparse inverse covariance\\nestimation with the graphical lasso. Biostatistics , 9(3):432–441, 2008.\\n[83] W. Fung and D. A. Hsieh. Measuring the market impact of hedge funds.\\nJournal of Empirical Finance , 7(1):1–36, 2000.\\n[84] M. B. Garman and M. J. Klass. On the estimation of security price\\nvolatilities from historical data. Journal of Business , pages 67–78, 1980.\\n[85] E. Gatev, W. N. Goetzmann, and K. G. Rouwenhorst. Pairs trad-\\ning: Performance of a relative-value arbitrage rule. Review of Financial\\nStudies, 19(3):797–827, 2006.\\n[86] D. Goldfarb and G. Iyengar. Robust portfolio selection problems. Math-\\nematics of Operations Research , 28(1):1–38, 2003.\\n[87] M. D. Gould, M. A. Porter, S. Williams, M. McDonald, D. J. Fenn, and\\nS. D. Howison. Limit order books. Quantitative Finance , 13(11):1709–\\n1742, 2013.\\n[88] B. Graham and D. L. Dodd. Security Analysis: Principles and Tech-\\nnique. McGraw-Hill, 1934.\\n[89] B. Graham, J. Zweig, and W. E. Buﬀett. The Intelligent Investor: A\\nBook of Practical Counsel . Harper & Row, 1973.\\n[90] T. Griveau-Billion, J.-C. Richard, and T. Roncalli. A fast algorithm\\nfor computing high-dimensional risk parity portfolios. arXiv preprint\\narXiv:1311.4057 , 2013.\\n[91] R. G. Hagstrom. The Warren Buﬀett Way: Investment Strategies of the\\nWorld’s Greatest Investor . John Wiley & Sons, 1997.\\n[92] M. Harlacher. Cointegration based statistical arbitrage. Department\\nof Mathematics, Swiss Federal Institute of Technology, Zurich, Switzer-\\nland, 2012.\\n[93] R. I. D. Harris. Using Cointegration Analysis in Econometric Modelling .\\nHarvester Wheatsheaf, Prentice Hall, 1995.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 229}),\n",
       " Document(page_content='References 223\\n[94] J. Hasbrouck. Empirical Market Microstructure: The Institutions, Eco-\\nnomics and Econometrics of Securities Trading . Oxford University\\nPress, USA, 2007.\\n[95] T. Hastie, R. Tibshirani, and J. Friedman. The Elements of Statistical\\nLearning . Springer, New York, 2009.\\n[96] T. Hastie, R. Tibshirani, and M. Wainwright. Statistical Learning with\\nSparsity: The Lasso and Generalizations . CRC Press, 2015.\\n[97] N. Hautsch. Econometrics of Financial High-Frequency Data . Springer\\nScience & Business Media, 2011.\\n[98] S. Haykin and B. Van Veen. Signals and Systems . John Wiley & Sons,\\n2007.\\n[99] C.-J. Hsieh, I. S. Dhillon, P. K. Ravikumar, and M. A. Sustik. Sparse\\ninverse covariance matrix estimation using quadratic approximation. In\\nAdvances in Neural Information Processing Systems , pages 2330–2338,\\n2011.\\n[100] D. Huang, S. Zhu, F. J. Fabozzi, and M. Fukushima. Portfolio selec-\\ntionunderdistributionaluncertainty:ArelativerobustCVaRapproach.\\nEuropean Journal of Operational Research , 203(1):185–194, 2010.\\n[101] P. J. Huber. Robust Statistics . Springer, 2011.\\n[102] G. Huberman and W. Stanzl. Optimal liquidity trading. Review of\\nFinance, 9(2):165–200, 2005.\\n[103] J. C. Hull. Options, Futures, and Other Derivatives . Pearson Education\\nIndia, 9th edition, 2014.\\n[104] T. M. Idzorek. A step-by-step guide to the Black-Litterman model.\\nForecasting Expected Returns in the Financial Markets , page 17, 2002.\\n[105] W. James and C. Stein. Estimation with quadratic loss. In Proceed-\\nings of the Fourth Berkeley Symposium on Mathematical Statistics and\\nProbability , volume 1, pages 361–379, 1961.\\n[106] R. Jansen and R. Van Dijk. Optimal benchmark tracking with small\\nportfolios. The Journal of Portfolio Management , 28(2):33–39, 2002.\\n[107] S. Johansen. Estimation and hypothesis testing of cointegration vectors\\nin Gaussian vector autoregressive models. Econometrica: Journal of the\\nEconometric Society , pages 1551–1580, 1991.\\n[108] S. Johansen. Likelihood-based inference in cointegrated vector autore-\\ngressive models. Oxford University Press Catalogue , 1995.\\n[109] I. Jolliﬀe. Principal Component Analysis . Wiley Online Library, 2002.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 230}),\n",
       " Document(page_content='224 References\\n[110] P. Jorion. Bayes-stein estimation for portfolio analysis. Journal of\\nFinancial and Quantitative Analysis , 21(03):279–292, 1986.\\n[111] T. Kailath. Linear Systems , volume 1. Prentice-Hall Englewood Cliﬀs,\\nNJ, 1980.\\n[112] A. Kammerdiner, A. Sprintson, E. Pasiliao, and V. Boginski. Optimiza-\\ntion of discrete broadcast under uncertainty using conditional value-at-\\nrisk.Optimization Letters , 8(1):45–59, 2014.\\n[113] J. T. Kent and D. E. Tyler. Maximum likelihood estimation for the\\nwrapped cauchy distribution. Journal of Applied Statistics , 15(2):247–\\n254, 1988.\\n[114] Masaaki Kijima. Stochastic Processes with Applications to Finance .\\nCRC Press, 2013.\\n[115] R. Kissell, M. Glantz, R. Malamut, and N.A. Chriss. Optimal Trading\\nStrategies: Quantitative Approaches for Managing Market Impact and\\nTrading Risk . Amacom, 2003.\\n[116] G. M. Koop. Forecasting with medium and large bayesian VARs. Jour-\\nnal of Applied Econometrics , 28(2):177–203, 2013.\\n[117] C. Lam and J. Fan. Sparsistency and rates of convergence in large\\ncovariance matrix estimation. The Annals of Statistics , 37(6B):4254,\\n2009.\\n[118] C. Lam, Q. Yao, and N. Bathia. Factor modeling for high dimensional\\ntimeseries. In Recent Advances in Functional Data Analysis and Related\\nTopics, pages 203–207. Springer, 2011.\\n[119] Z. M. Landsman and E. A. Valdez. Tail conditional expectations for\\nellipticaldistributions. The North American Actuarial Journal ,7(4):55–\\n71, 2003.\\n[120] O. Ledoit and M. Wolf. Improved estimation of the covariance matrix\\nof stock returns with an application to portfolio selection. Journal of\\nEmpirical Finance , 10(5):603–621, 2003.\\n[121] O. Ledoit and M. Wolf. A well-conditioned estimator for large-\\ndimensional covariance matrices. Journal of multivariate analysis ,\\n88(2):365–411, 2004.\\n[122] O. Ledoit and M. Wolf. Nonlinear shrinkage estimation of large-\\ndimensional covariance matrices. The Annals of Statistics , 40(2):1024–\\n1060, 2012.\\n[123] J. Li and P. Stoica. Robust Adaptive Beamforming . Wiley, 2006.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 231}),\n",
       " Document(page_content='References 225\\n[124] W.-L. Li, Y. Zhang, A. M.-C. So, and M. Z. Win. Slow adaptive\\nOFDMA systems through chance constrained programming. IEEE\\nTransactions on Signal Processing , 58(7):3858–3869, 2010.\\n[125] Y.-X. Lin, M. McCrae, and C. Gulati. Loss protection in pairs trading\\nthrough minimum proﬁt bounds: A cointegration approach. Advances\\nin Decision Sciences , 2006.\\n[126] R. B. Litterman. Forecasting with bayesian vector autoregressions–ﬁve\\nyearsofexperience. Journal of Business & Economic Statistics ,4(1):25–\\n38, 1986.\\n[127] M. S. Lobo and S. Boyd. The worst-case risk of a portfolio. Technical\\nreport, 2000.\\n[128] D. G. Luenberger. Investment Science . Oxford University Press, New\\nYork, 1998.\\n[129] H. Lütkepohl. New Introduction to Multiple Time Series Analysis .\\nSpringer Science & Business Media, 2007.\\n[130] J. G. MacKinnon. Critical values for cointegration tests. Technical\\nreport, Queen’s Economics Department Working Paper, 2010.\\n[131] S. Maillard, T. Roncalli, and J. Teïletche. The properties of equally\\nweighted risk contribution portfolios. Journal of Portfolio Management ,\\n36(4):60–70, 2010.\\n[132] B. G. Malkiel. A Random Walk Down Wall Street: The Time-tested\\nStrategy for Successful Investing . WW Norton & Company, 9th edition,\\n2007.\\n[133] D. G. Manolakis, V. K. Ingle, and S. M. Kogon. Statistical and adaptive\\nsignal processing: spectral estimation, signal modeling, adaptive ﬁltering,\\nand array processing , volume 46. Artech House Norwood, 2005.\\n[134] D. Maringer and O. Oyewumi. Index tracking with constrained port-\\nfolios.Intelligent Systems in Accounting, Finance and Management ,\\n15(1-2):57–71, 2007.\\n[135] H. M. Markowitz. Portfolio selection. Journal of Finance , 7(1):77–91,\\n1952.\\n[136] H. M. Markowitz. The optimization of a quadratic function subject to\\nlinear constraints. Naval Research Logistics Quarterly , 3(1-2):111–133,\\n1956.\\n[137] H. M. Markowitz. Portfolio Selection: Eﬃcient Diversiﬁcation of In-\\nvestments . Yale University Press, 1968.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 232}),\n",
       " Document(page_content='226 References\\n[138] H. M. Markowitz, G. P. Todd, and W. F. Sharpe. Mean-Variance Anal-\\nysis in Portfolio Choice and Capital Markets , volume 66. Wiley, 2000.\\n[139] R. A. Maronna. Robust M-Estimators of multivariate location and\\nscatter.The Annals of Statistics , 4(1):51–67, 01 1976.\\n[140] R. A. Maronna, D. Martin, and V. Yohai. Robust Statistics: Theory and\\nMethods. John Wiley & Sons, Chichester., 2006.\\n[141] A. J. McNeil, R. Frey, and P. Embrechts. Quantitative Risk Manage-\\nment: Concepts, Techniques and Tools . Princeton University Press,\\n2005.\\n[142] F.W.Meng,J.Sun,andM.Goh. Stochasticoptimizationproblemswith\\nCVaR risk measure and their sample average approximation. Journal\\nof Optimization Theory and Applications , 146(2):399–418, 2010.\\n[143] A. Meucci. Risk and Asset Allocation . Springer Science & Business\\nMedia, 2009.\\n[144] A. Meucci. Quant nugget 2: Linear vs. compounded returns–common\\npitfallsinportfoliomanagement. GARP Risk Professional ,pages49–51,\\n2010.\\n[145] S. Moazeni, T. F. Coleman, and Y. Li. Optimal portfolio execution\\nstrategies and sensitivity to price impact parameters. SIAM Journal on\\nOptimization , 20(3):1620–1654, 2010.\\n[146] S. Moazeni, T. F. Coleman, and Y. Li. Regularized robust optimization:\\nthe optimal portfolio execution case. Computational Optimization and\\nApplications , 55(2):341–377, 2013.\\n[147] S. Moazeni, T. F. Coleman, and Y. Li. Smoothing and parametric\\nrules for stochastic mean-CVaR optimal execution strategy. Annals of\\nOperations Research , pages 1–22, 2013.\\n[148] D. Monderer and L. S. Shapley. Potential games. Games and Economic\\nBehavior , 14(1):124–143, 1996.\\n[149] R. A. Monzingo and T. W. Miller. Introduction to Adaptive Arrays .\\nSciTech Publishing, 1980.\\n[150] MOSEK. The MOSEK optimization toolbox for MATLAB manual.\\nTechnical report, 2013.\\n[151] P. Nath. High frequency pairs trading with US treasury securities: Risks\\nand rewards for hedge funds. Available at SSRN 565441 , 2003.\\n[152] W. B. Nicholson, J. Bien, and D. S. Matteson. Hierarchical vector\\nautoregression. arXiv preprint arXiv:1412.5250 , 2014.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 233}),\n",
       " Document(page_content='References 227\\n[153] J. Nocedal and S. J. Wright. Numerical Optimization . Springer Series\\nin Operations Research. Springer Verlag, second edition, 2006.\\n[154] C. O’Cinneide, B. Scherer, and X. Xu. Pooling trades in a quantita-\\ntive investment process. Journal of Portfolio Management , 32(4):33–43,\\n2006.\\n[155] K. J. Oh, T. Y. Kim, and S. Min. Using genetic algorithm to support\\nportfolio optimization for index fund management. Expert Systems with\\nApplications , 28(2):371–379, 2005.\\n[156] M.O’Hara. Market Microstructure Theory ,volume108. BlackwellCam-\\nbridge, MA, 1995.\\n[157] E. Ollila and D. E. Tyler. Regularized m-estimators of scatter matrix.\\nIEEE Transactions on Signal Processing , 62(22):6059–6070, Nov 2014.\\n[158] F. Pascal, Y. Chitour, and Y. Quek. Generalized robust shrinkage esti-\\nmatoranditsapplicationtostapdetectionproblem. IEEE Transactions\\non Signal Processing , 62(21):5640–5651, 2014.\\n[159] A. F. Perold. The implementation shortfall: Paper versus reality. Jour-\\nnal of Portfolio Management , 14(3):4–9, 1988.\\n[160] A. Pole. Statistical Arbitrage: Algorithmic Trading Insights and Tech-\\nniques, volume 411. John Wiley & Sons, 2011.\\n[161] H. Puspaningrum. Pairs Trading Using Cointegration Approach . PhD\\nthesis, 2012.\\n[162] E. Qian. Risk parity portfolios: Eﬃcient portfolios through true diver-\\nsiﬁcation. Panagora Asset Management , Sept. 2005.\\n[163] E. Qian. On the ﬁnancial interpretation of risk contribution: Risk bud-\\ngets do add up. Journal of Investment Management , 4(4):41, 2006.\\n[164] M.Razaviyayn,M.Hong,andZ.-Q.Luo. Auniﬁedconvergenceanalysis\\nof block successive minimization methods for nonsmooth optimization.\\nSIAM Journal on Optimization , 23(2):1126–1153, 2013.\\n[165] R. T. Rockafellar. Convex Analysis . Princeton University Press, 1997.\\n[166] R. T. Rockafellar and S. Uryasev. Optimization of conditional value-\\nat-risk.Journal of Risk , 2:21–42, 2000.\\n[167] T. Roncalli. Introduction to Risk Parity and Budgeting . CRC Press,\\n2013.\\n[168] T. Roncalli and G. Weisang. Risk parity portfolios with risk factors.\\nAvailable at SSRN 2155159 , 2012.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 234}),\n",
       " Document(page_content='228 References\\n[169] A. Roy, T. S. McElroy, and P. Linton. Estimation of causal invertible\\nvarma models. arXiv preprint arXiv:1406.4584 , 2014.\\n[170] F. Rubio, X. Mestre, and D. P. Palomar. Performance analysis and\\noptimal selection of large minimum variance portfolios under estimation\\nrisk.IEEE Journal of Selected Topics in Signal Processing , 6(4):337–\\n350, 2012.\\n[171] D. Ruppert. Statistics and Data Analysis for Financial Engineering .\\nSpringer, 2010.\\n[172] S. Sarykalin, G. Serraino, and S. Uryasev. Value-at-risk vs. conditional\\nvalue-at-risk in risk management and optimization. Tutorials in Oper-\\nations Research. INFORMS, Hanover, MD , 2008.\\n[173] S. E. Satchell and B. Scherer. Fairness in trading: A microeconomic\\ninterpretation. Journal of Trading , 5:40–47, 2010.\\n[174] S. E. Satchell and A. Scowcroft. A demystiﬁcation of the black–\\nlitterman model: Managing quantitative and traditional portfolio con-\\nstruction. Journal of Asset Management , 1(2):138–150, 2000.\\n[175] M. W. P. Savelsbergh, R. A. Stubbs, and D. Vandenbussche. Multi-\\nportfolio optimization: A natural next step. In Handbook of Portfolio\\nConstruction , pages 565–581. Springer, 2010.\\n[176] L. L. Scharf. Statistical Signal Processing , volume 98. Addison-Wesley\\nReading, MA, 1991.\\n[177] Andrea Scozzari, Fabio Tardella, Sandra Paterlini, and Thiemo Krink.\\nExact and heuristic approaches for the index tracking problem with\\nucits constraints. Annals of Operations Research , 205(1):235–250, 2013.\\n[178] G. Scutari, F. Facchinei, Peiran Song, D. P. Palomar, and Jong-Shi\\nPang. Decomposition by partial linearization: Parallel optimization\\nof multi-agent systems. IEEE Transactions on Signal Processing ,\\n62(3):641–656, Feb. 2014.\\n[179] W. F. Sharpe. The sharpe ratio. Streetwise–the Best of the Journal of\\nPortfolio Management , pages 169–185, 1998.\\n[180] L. Shi and L. Xie. Optimal sensor power scheduling for state estima-\\ntion of Gauss–Markov systems over a packet-dropping network. IEEE\\nTransactions on Signal Processing , 60(5):2701–2705, May 2012.\\n[181] L. Shi and H. Zhang. Scheduling two Gauss–Markov systems: An op-\\ntimal solution for remote state estimation under bandwidth constraint.\\nIEEE Transactions on Signal Processing , 60(4):2038–2042, Apr. 2012.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 235}),\n",
       " Document(page_content='References 229\\n[182] A. Silvennoinen and T. Teräsvirta. Multivariate GARCH models. In\\nHandbook of Financial Time Series , pages 201–229. Springer, 2009.\\n[183] N. Y. Soltani, S.-J. Kim, and G. B. Giannakis. Chance-constrained\\noptimization of OFDMA cognitive radio uplinks. IEEE Transactions\\non Wireless Communications , 12(3):1098–1107, 2013.\\n[184] I. Song. New Quantitative Approaches to Asset Selection and Portfolio\\nConstruction . PhD thesis, Columbia University, 2014.\\n[185] J. Song, P. Babu, and D. P. Palomar. Sparse generalized eigenvalue\\nproblem via smooth optimization. IEEE Transactions on Signal Pro-\\ncessing, 63(7):1627–1642, April 2015.\\n[186] S. Song and P. J. Bickel. Large vector auto regressions. arXiv preprint\\narXiv:1106.3915 , 2011.\\n[187] C. Stein. Inadmissibility of the usual estimator for the mean of a\\nmultivariate normal distribution. In Proceedings of the Third Berke-\\nley Symposium on Mathematical Statistics and Probability , volume 1,\\npages 197–206, 1956.\\n[188] J. H. Stock and M. W. Watson. Testing for common trends. Journal of\\nthe American statistical Association , 83(404):1097–1107, 1988.\\n[189] J. F. Sturm. Using SeDuMi 1.02, a MATLAB toolbox for optimization\\noversymmetriccones. Optimization Methods and Software ,11(1-4):625–\\n653, 1999.\\n[190] Y. Sun, P. Babu, and D. P. Palomar. Regularized Tyler’s scatter esti-\\nmator: Existence, uniqueness, and algorithms. IEEE Transactions on\\nSignal Processing , 62(19):5143–5156, 2014.\\n[191] Y. Sun, P. Babu, and D. P. Palomar. Regularized robust estimation\\nof mean and covariance matrix under heavy-tailed distributions. IEEE\\nTransactions on Signal Processing , 63(12):3096–3109, June 2015.\\n[192] K. S. Tatsuoka and D. E. Tyler. On the uniqueness of S-functionals and\\nm-functionals under nonelliptical distributions. The Annals of Statis-\\ntics, pages 1219–1243, 2000.\\n[193] E. O. Thorp and S. T. Kassouf. Beat the Market: A Scientiﬁc Stock\\nMarket System . Random House New York, 1967.\\n[194] K.-C.Toh,M.J.Todd,andR.Tütüncü. Ontheimplementationandus-\\nage of SDPT3–a MATLAB software package for semideﬁnite-quadratic-\\nlinear programming, version 4.0. In Handbook on Semideﬁnite, Conic\\nand Polynomial Optimization , pages 715–754. Springer, 2012.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 236}),\n",
       " Document(page_content='230 References\\n[195] K. Triantafyllopoulos and G. Montana. Dynamic modeling of mean-\\nreverting spreads for statistical arbitrage. Computational Management\\nScience, 8(1-2):23–49, 2011.\\n[196] R. S. Tsay. Analysis of Financial Time Series , volume 543. Wiley-\\nInterscience, 3rd edition, 2010.\\n[197] R. S. Tsay. Multivariate Time Series Analysis: With R and Financial\\nApplications . John Wiley & Sons, 2013.\\n[198] D.N.C.Tse. OptimalpowerallocationoverparallelGaussianbroadcast\\nchannels. In Proceedings of the International Symposium on Information\\nTheory, page 27, 1997.\\n[199] A. M. Tulino and S. Verdú. Random matrix theory and wireless com-\\nmunications. Foundations and TrendsR⃝in Communications and Infor-\\nmation theory , 1(1):1–182, 2004.\\n[200] R. H. Tütüncü and M. Koenig. Robust asset allocation. Annals of\\nOperations Research , 132(1):157–187, 2004.\\n[201] D. E. Tyler. A distribution-free m-estimator of multivariate scatter.\\nThe Annals of Statistics , pages 234–251, 1987.\\n[202] D. E. Tyler. Statistical analysis for the angular central gaussian distri-\\nbution on the sphere. Biometrika , 74(3):579–589, 1987.\\n[203] G. Vidyamurthy. Pairs Trading: Quantitative Methods and Analysis ,\\nvolume 217. John Wiley & Sons, 2004.\\n[204] S. A. Vorobyov, A. B. Gershman, and Z. Q. Luo. Robust adaptive\\nbeamforming using worst-case performance optimization: A solution to\\nthe signal mismatch problem. IEEE Transactions on Signal Processing ,\\n51(2):313–324, 2003.\\n[205] S.A.Vorobyov,A.B.Gershman,Z.Q.Luo,andN.Ma. Adaptivebeam-\\nforming with joint robustness against mismatched signal steering vec-\\ntor and interference nonstationarity. Signal Processing Letters, IEEE ,\\n11(2):108–111, 2004.\\n[206] C. Wells. The Kalman Filter in Finance , volume 32. Springer Science\\n& Business Media, 1996.\\n[207] A. Wiesel. Uniﬁed framework to regularized covariance estimation\\nin scaled gaussian models. IEEE Transactions on Signal Processing ,\\n60(1):29–38, 2012.\\n[208] C. Yang and L. Shi. Deterministic sensor data scheduling under lim-\\nited communication resource. IEEE Transactions on Signal Processing ,\\n59(10):5050–5056, Oct. 2011.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 237}),\n",
       " Document(page_content='References 231\\n[209] D.YangandQ.Zhang. Drift-independentvolatilityestimationbasedon\\nhigh, low, open, and close prices. The Journal of Business , 73(3):477–\\n492, 2000.\\n[210] Y. Yang, F. Rubio, G. Scutari, and D. P. Palomar. Multi-portfolio\\noptimization: A potential game approach. IEEE Transactions on Signal\\nProcessing , 61(22):5590–5602, Nov. 2013.\\n[211] M.Yuan. Highdimensionalinversecovariancematrixestimationvialin-\\near programming. The Journal of Machine Learning Research , 11:2261–\\n2286, 2010.\\n[212] M. Zhang, F. Rubio, and D. P. Palomar. Improved calibration of high-\\ndimensional precision matrices. IEEE Transactions on Signal Process-\\ning, 61(6):1509–1519, 2013.\\n[213] M. Zhang, F. Rubio, D. P. Palomar, and X. Mestre. Finite-sample linear\\nﬁlter optimization in wireless communications and ﬁnancial systems.\\nIEEE Transactions on Signal Processing , 61(20):5014–5025, 2013.\\n[214] X. Zhang, H. V. Poor, and M. Chiang. Optimal power allocation for\\ndistributed detection over MIMO channels in wireless sensor networks.\\nIEEE Transactions on Signal Processing , 56(9):4124–4140, Sept. 2008.', metadata={'source': '/home/aktersnurra/projects/library/quant/math/a-signal-processing-perspective-on-financial-engineering.pdf', 'page': 238})]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "p.parser.parse(b)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "9c040abf-089c-4aaf-a71e-ab3ebf4174f4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'abc.pdf'"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Path(\"abc.pdf\").name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "85ca78a2-f772-4dfc-a5d8-3d6c6af6d843",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}