Load up our packages
install.packages("tidyverse")
install.packages("tidyquant")
install.packages("timetk")
install.packages("tibbletime")
install.packages("broom")
install.packages("dygraphs")
devtools::install_github("jbkunst/highcharter")
library(tidyverse)
library(tidyquant)
library(timetk)
library(tibbletime)
library(scales)
library(highcharter)
library(broom)
library(PerformanceAnalytics)
library(dygraphs)
Introducing R
+ Statistical programming language -> by data scientists, for data scientists
+ Base R + 17,000 packages
+ RStudio
+ Shiny
+ sparklyr
+ tensorflow
+ Rmarkdown
+ database connectors
+ htmlwidgets
Packages for finance
library(PerformanceAnalytics)
library(PortfolioAnalytics)
library(TTR)
library(tidyquant)
library(quantmod)
library(xts)
List of packages for finance here: https://cran.r-project.org/web/views/Finance.html
Packages for data visualization
library(ggplot2)
library(dygraphs)
library(highcharter)
library(shiny)
An example project
- Import data for 5 ETFs
- Visualize prices and returns
- Calculate some stats of interest
- Create an SMA 50 v. SMA 200
Run a rolling linear model, chart some results
- SPY (S&P500 fund)
- EFA (a non-US equities fund)
- IJS (a small-cap value fund)
- EEM (an emerging-mkts fund)
- AGG (a bond fund)
Import the Data
+ from excel using `read_excel`
+ from csv using `read_csv`
+ from Yahoo! Finance using `getSymbols` or `tq_get`
+ from myssql, postgres etc using `dbConnect`
+ via API with `httr` and `jsonlite` (important for Alternative Data)
Get data
# The symbols vector holds our tickers.
symbols <- c("SPY",
"EFA",
"IJS",
"EEM",
"AGG")
# data imported from Yahoo! Finance
etf_prices<-
tq_get(symbols, from = "2012-12-31") %>%
select(symbol, date, adjusted) %>%
spread(symbol, adjusted) %>%
tk_xts(date_var = date)
Inspect the data
head(etf_prices)
AGG EEM EFA IJS SPY
2012-12-31 97.30676 39.63340 48.20629 74.55437 127.1599
2013-01-02 97.19288 40.41088 48.95237 76.60001 130.4191
2013-01-03 96.94758 40.12492 48.47759 76.49866 130.1244
2013-01-04 97.05273 40.20534 48.72346 77.09759 130.6959
2013-01-07 97.00017 39.90150 48.51151 76.64607 130.3387
2013-01-08 97.08776 39.54404 48.24020 76.35121 129.9637
Visualize
dygraph(etf_prices)
Another Visualization
highchart(type = "stock") %>%
hc_add_series(etf_prices[,1]) %>%
hc_add_series(etf_prices[,2]) %>%
hc_add_series(etf_prices[,3]) %>%
hc_add_series(etf_prices[,4]) %>%
hc_add_series(etf_prices[,5]) %>%
hc_title(text = "Highcharting 5 ETFS") %>%
hc_yAxis(opposite = FALSE,
labels = list(format = "${value}")) %>%
hc_legend(enabled = TRUE) %>%
hc_navigator(enabled = FALSE) %>%
hc_exporting(enabled = TRUE)
Performance Analytics: getting started
etf_returns <-
Return.calculate(etf_prices, method = "log")
head(etf_returns)
AGG EEM EFA IJS
2012-12-31 NA NA NA NA
2013-01-02 -0.0011710460 0.019426883 0.015358255 0.027068430
2013-01-03 -0.0025270480 -0.007101493 -0.009746094 -0.001323970
2013-01-04 0.0010840087 0.002002185 0.005058969 0.007798811
2013-01-07 -0.0005416977 -0.007585831 -0.004359509 -0.005873638
2013-01-08 0.0009026116 -0.008998854 -0.005608370 -0.003854452
SPY
2012-12-31 NA
2013-01-02 0.025307299
2013-01-03 -0.002261673
2013-01-04 0.004382005
2013-01-07 -0.002736513
2013-01-08 -0.002881389
table.Stats(etf_returns)
AGG EEM EFA IJS SPY
Observations 1448.0000 1448.0000 1448.0000 1448.0000 1448.0000
NAs 1.0000 1.0000 1.0000 1.0000 1.0000
Minimum -0.0110 -0.0626 -0.0898 -0.0386 -0.0427
Quartile 1 -0.0011 -0.0069 -0.0040 -0.0048 -0.0028
Median 0.0001 0.0005 0.0005 0.0008 0.0007
Arithmetic Mean 0.0001 0.0001 0.0002 0.0005 0.0006
Geometric Mean 0.0001 0.0000 0.0002 0.0005 0.0005
Quartile 3 0.0013 0.0074 0.0053 0.0065 0.0048
Maximum 0.0084 0.0479 0.0327 0.0339 0.0390
SE Mean 0.0001 0.0003 0.0002 0.0002 0.0002
LCL Mean (0.95) 0.0000 -0.0005 -0.0002 0.0001 0.0002
UCL Mean (0.95) 0.0002 0.0007 0.0007 0.0010 0.0010
Variance 0.0000 0.0001 0.0001 0.0001 0.0001
Stdev 0.0020 0.0117 0.0089 0.0094 0.0077
Skewness -0.3176 -0.2652 -1.0628 -0.2618 -0.5979
Kurtosis 1.6349 1.3469 8.4793 0.9114 3.5034
table.CAPM(etf_returns, etf_returns$SPY)
AGG to SPY EEM to SPY EFA to SPY IJS to SPY SPY to SPY
Alpha 0.0001 -0.0006 -0.0003 0.0000 0.0000
Beta -0.0422 1.1464 0.9690 1.0166 1.0000
Beta+ -0.0297 1.1341 0.8953 0.9174 1.0000
Beta- -0.0412 1.0600 0.9930 0.9401 1.0000
R-squared 0.0266 0.5649 0.6962 0.6872 1.0000
Annualized Alpha 0.0199 -0.1408 -0.0769 -0.0091 0.0000
Correlation -0.1630 0.7516 0.8344 0.8290 1.0000
Correlation p-value 0.0000 0.0000 0.0000 0.0000 0.0000
Tracking Error 0.1307 0.1238 0.0781 0.0836 0.0000
Active Premium -0.1336 -0.1503 -0.0956 -0.0119 0.0000
Information Ratio -1.0226 -1.2134 -1.2251 -0.1422 NaN
Treynor Ratio -0.3139 -0.0029 0.0529 0.1328 0.1469
Scatter returns
etf_returns %>%
tk_tbl(rename_index = "date") %>%
select(-date) %>%
gather(symbol, return, -SPY) %>%
ggplot(aes(x = SPY, y = return, color = symbol)) +
geom_point(alpha = .5) +
facet_wrap(~symbol)
Add regression line
etf_returns %>%
tk_tbl(rename_index = "date") %>%
dplyr::select(-date) %>%
gather(symbol, return, -SPY) %>%
ggplot(aes(x = SPY, y = return, color = symbol)) +
geom_point(alpha = .5) +
geom_smooth(formula = y ~ x, se = TRUE) +
facet_wrap(~symbol)
Interactive Scatter
etf_returns %>%
tk_tbl(rename_index = "date") %>%
hchart(., type = "scatter", hcaes(x = SPY, y = EEM, date = date)) %>%
hc_xAxis(title = list(text = "Market Returns"),
labels = list(format = "{value}%")) %>%
hc_yAxis(title = list(text = "EEM Returns"),
labels = list(format = "{value}%")) %>%
hc_title(text = "Emerging Market v. SPY") %>%
hc_tooltip(pointFormat = "date: {point.date} <br>
EEM return: {point.y:.4f} <br>
mkt return: {point.x:.4f}")
Grab beta or slope of regression line.
slope <- table.CAPM(etf_returns, etf_returns$SPY)[2, 2]
Add the regression line to the original scatter
etf_returns_tibble <-
etf_returns %>%
tk_tbl(rename_index = "date")
hchart(etf_returns_tibble, type = "scatter",
hcaes(x = SPY, y = EEM, date = date)) %>%
hc_add_series(etf_returns_tibble, "line",
hcaes(x = SPY,
y = SPY * slope)) %>%
hc_xAxis(title = list(text = "Market Returns"),
labels = list(format = "{value}%")) %>%
hc_yAxis(title = list(text = "EEM Returns"),
labels = list(format = "{value}%")) %>%
hc_title(text = "Scatter with Beta Line")
Other Nice Functions (too many to list)
table.DownsideRisk(etf_returns, Rf= .0003)
AGG EEM EFA IJS SPY
Semi Deviation 0.0015 0.0085 0.0067 0.0069 0.0057
Gain Deviation 0.0012 0.0070 0.0053 0.0057 0.0049
Loss Deviation 0.0014 0.0080 0.0071 0.0064 0.0061
Downside Deviation (MAR=210%) 0.0085 0.0137 0.0118 0.0118 0.0107
Downside Deviation (Rf=7.56%) 0.0016 0.0087 0.0068 0.0067 0.0056
Downside Deviation (0%) 0.0014 0.0085 0.0066 0.0066 0.0055
Maximum Drawdown 0.0518 0.3794 0.2476 0.2148 0.1399
Historical VaR (95%) -0.0033 -0.0193 -0.0149 -0.0151 -0.0128
Historical ES (95%) -0.0045 -0.0268 -0.0219 -0.0209 -0.0190
Modified VaR (95%) -0.0033 -0.0197 -0.0154 -0.0154 -0.0128
Modified ES (95%) -0.0049 -0.0284 -0.0360 -0.0216 -0.0225
table.Drawdowns(etf_returns$EEM)
From Trough To Depth Length To Trough Recovery
1 2014-09-08 2016-01-20 2017-09-18 -0.3794 764 345 419
2 2018-01-29 2018-09-10 <NA> -0.2158 172 156 NA
3 2013-01-03 2013-06-24 2014-07-24 -0.1948 392 119 273
4 2017-11-24 2017-12-06 2018-01-02 -0.0533 26 9 17
5 2014-07-29 2014-08-07 2014-08-19 -0.0396 16 8 8
SharpeRatio(etf_returns, Rf = .0003)
AGG EEM EFA
StdDev Sharpe (Rf=0%, p=95%): -0.12391054 -0.020919066 -0.006924686
VaR Sharpe (Rf=0%, p=95%): -0.07409206 -0.012404382 -0.004008990
ES Sharpe (Rf=0%, p=95%): -0.04986439 -0.008618933 -0.001713013
IJS SPY
StdDev Sharpe (Rf=0%, p=95%): 0.02623972 0.03563936
VaR Sharpe (Rf=0%, p=95%): 0.01599121 0.02144298
ES Sharpe (Rf=0%, p=95%): 0.01141275 0.01215960
InformationRatio(etf_returns, etf_returns$SPY)
AGG EEM EFA IJS SPY
Information Ratio: SPY -1.022615 -1.213369 -1.225114 -0.1422194 NaN
SemiDeviation(etf_returns)
AGG EEM EFA IJS SPY
Semi-Deviation 0.001450925 0.008533412 0.006743213 0.006862133 0.005725545
Standard Deviation of each asset
StdDev(na.omit(etf_returns))
AGG EEM EFA IJS SPY
StdDev 0.001984284 0.01170326 0.008911274 0.009409863 0.007673235
Portfolio Standard Deviation
StdDev(na.omit(etf_returns),
weights = c(.1, .2, .2, .2, .3))
[,1]
[1,] 0.007505251
Contribution to Portfolio Standard Deviation
StdDev(na.omit(etf_returns),
weights = c(.1, .2, .2, .2, .3),
portfolio_method = "component")
$StdDev
[1] 0.007505251
$contribution
AGG EEM EFA IJS SPY
-1.886601e-05 2.090247e-03 1.637633e-03 1.625859e-03 2.170378e-03
$pct_contrib_StdDev
AGG EEM EFA IJS SPY
-0.002513708 0.278504648 0.218198318 0.216629513 0.289181229
Visualize Contribution
StdDev(na.omit(etf_returns),
weights = c(.1, .2, .2, .2, .3),
portfolio_method = "component") %>%
as.tibble() %>%
add_column(symbols = sort(symbols)) %>%
ggplot(aes(x = symbols, y = pct_contrib_StdDev, fill = symbols)) +
geom_col(width = .5) +
labs(y = "percent contribution to vol", x = "", title = "Asset Contribution to Vol") +
scale_y_continuous(labels = scales::percent)
Rolling Mean Calculations and Visualization: our own functions
sma_50 <- rollify(mean, window = 50)
sma_200 <- rollify(mean, window = 200)
sd_50 <- rollify(sd, window = 50)
sd_200 <- rollify(sd, window = 200)
etf_rolling_calculations <-
etf_prices %>%
tk_tbl(rename_index = "date") %>%
select(date, SPY) %>%
mutate(sma50 = sma_50(SPY),
sma200 = sma_200(SPY),
sd200_lower = sma200 - sd_200(SPY),
sd200_upper = sma200 + sd_200(SPY),
signal = ifelse(sma50 > sma200, 1, 0)
) %>%
select(-SPY) %>%
na.omit()
highchart()%>%
hc_add_series(etf_rolling_calculations, type = "line",
hcaes(x = date, y = sma200),
name = "sma200"
) %>%
hc_add_series(etf_rolling_calculations, type = "line",
hcaes(x = date, y = sma50),
name = "sma50",
color = "green") %>%
hc_add_series(etf_rolling_calculations,
type = "arearange",
hcaes(x = date,
low = sd200_lower,
high = sd200_upper),
color = "pink",
alpha = .25,
fillOpacity = 0.3,
showInLegend = FALSE
) %>%
hc_title(text = "SMA 50 v. SMA 200") %>%
hc_xAxis(type = 'datetime') %>%
hc_navigator(enabled = FALSE) %>%
hc_scrollbar(enabled = FALSE) %>%
hc_legend(enabled = TRUE) %>%
hc_exporting(enabled = TRUE)
Calculate growth of dollar
SMA trend strategy versus buy and hold
etf_trend <-
etf_prices %>%
tk_tbl(rename_index = "date") %>%
select(date, SPY) %>%
mutate(sma50 = sma_50(SPY),
sma200 = sma_200(SPY),
returns = log(SPY) - log(lag(SPY)),
signal = ifelse(sma50 > sma200, 1, 0),
daily_treas = (1 + (2/100)) ^ (1/252) - 1,
buy_hold_returns = (.9 * returns) + (.1 * daily_treas),
trend_returns = if_else(lag(signal) == 1, (signal * returns), daily_treas)
) %>%
na.omit() %>%
mutate(
trend_growth = accumulate(1 + trend_returns, `*`),
buy_hold_growth = accumulate(1 + buy_hold_returns, `*`)) %>%
select(date, trend_growth, buy_hold_growth) %>%
tk_xts(date_var = date)
Visualize Strategy versus Buy and Hold
highchart(type = "stock") %>%
hc_title(text = "Growth") %>%
hc_add_series(etf_trend$trend_growth, color = "cornflowerblue", name = "trend") %>%
hc_add_series(etf_trend$buy_hold_growth, color = "green", name = "buy_hold") %>%
hc_add_theme(hc_theme_flat()) %>%
hc_navigator(enabled = FALSE) %>%
hc_scrollbar(enabled = FALSE) %>%
hc_legend(enabled = TRUE)
Rolling Models
rolling_lm <- rollify(.f = function(EEM, SPY) {
lm(EEM ~ SPY)
},
window = 100,
unlist = FALSE)
etf_returns_tibble %>%
select(date, EEM, SPY) %>%
na.omit() %>%
mutate(rolling_beta = rolling_lm(EEM, SPY)) %>%
slice(-1:-99) %>%
mutate(glanced = map(rolling_beta,
glance)) %>%
unnest(glanced)
# A tibble: 1,349 x 15
date EEM SPY rolling_beta r.squared adj.r.squared
<date> <dbl> <dbl> <list> <dbl> <dbl>
1 2013-05-24 -0.00871 -0.000846 <S3: lm> 0.641 0.638
2 2013-05-28 0.00543 0.00597 <S3: lm> 0.623 0.619
3 2013-05-29 -0.0128 -0.00652 <S3: lm> 0.627 0.623
4 2013-05-30 0 0.00369 <S3: lm> 0.627 0.623
5 2013-05-31 -0.0183 -0.0145 <S3: lm> 0.641 0.637
6 2013-06-03 0.0147 0.00549 <S3: lm> 0.635 0.631
7 2013-06-04 -0.0123 -0.00482 <S3: lm> 0.638 0.634
8 2013-06-05 -0.0181 -0.0141 <S3: lm> 0.648 0.645
9 2013-06-06 0.00835 0.00901 <S3: lm> 0.655 0.651
10 2013-06-07 -0.00515 0.0126 <S3: lm> 0.628 0.624
# ... with 1,339 more rows, and 9 more variables: sigma <dbl>,
# statistic <dbl>, p.value <dbl>, df <int>, logLik <dbl>, AIC <dbl>,
# BIC <dbl>, deviance <dbl>, df.residual <int>
rolling_model_results <-
etf_returns_tibble %>%
select(date, EEM, SPY) %>%
na.omit() %>%
mutate(rolling_beta = rolling_lm(EEM, SPY)) %>%
slice(-1:-99) %>%
mutate(glanced = map(rolling_beta,
glance)) %>%
unnest(glanced) %>%
select(date, r.squared, adj.r.squared, p.value)
rolling_model_results %>%
hchart(., hcaes(x = date, y = r.squared), type = "line") %>%
hc_title(text = "Rolling R-Squared")
Other Packages of Interest
library(forecast) # Good out of the box forecasting tools. Useful for macro trends.
library(h2o) # machine learning libraries
library(keras) # deep learning tensorflow.rstudio.com
library(lime) # for ML white-boxing
library(ranger) # random forest
library(recipes) # for ML preprocessing
library(rsample) # for resampling
library(caret) # classification and regression
library(tidytext) # parse text and mining
library(tidyposterior) # posthoc after resampling
Get Started
Download R: https://cloud.r-project.org/
Download RStudio: www.rstudio.com/products/rstudio/download/#download
datacamp course: www.datacamp.com/tracks/applied-finance-with-r