by Jonathan Regenstein
Scrape etf closures
etf_closures_url <- "https://www.etf.com/etf-watch-tables/etf-closures"
etf_closures_html <- read_html(etf_closures_url)
etf_closures_tibble <-
etf_closures_html %>%
html_nodes(xpath = '//*[@id="article-body-content"]/table') %>%
html_table(fill = TRUE) %>%
.[[1]] %>%
rename(close_date = X1, fund = X2, ticker = X3) %>%
slice(-1:-2) %>%
filter(!(ticker == "Source: FactSet") | !(nchar(close_date) < 2)) %>%
filter(!(ticker %in% c("2019", "2018", "2017", "2016"))) %>%
mutate(close_date = str_replace(close_date, "20177", "2017"),
ticker = case_when(nchar(fund) < 8 ~ fund,
TRUE ~ ticker),
fund = case_when(nchar(close_date) > 10 ~ close_date,
TRUE ~ fund),
close_date = case_when(between(nchar(close_date), 3, 10) ~ close_date)) %>%
fill(close_date) %>%
filter(nchar(fund) > 4) %>%
mutate(close_date = case_when(nchar(close_date) > 4 ~ lubridate::ymd(lubridate::parse_date_time(close_date, "%m/%d/%Y")),
nchar(close_date) == 4 ~ lubridate::ymd(close_date, truncated = 2L) + months(6)))
## Warning: 564 failed to parse.
## Warning: 539 failed to parse.
etf_closures_tibble %>%
head()
## close_date fund
## 1 2019-11-12 Xtrackers MSCI South Korea Hedged Equity ETF
## 2 2019-11-12 Xtrackers FTSE Emerging Comprehensive Factor ETF
## 3 2019-11-12 Xtrackers Russell 2000 Comprehensive Factor ETF
## 4 2019-11-12 Xtrackers Barclays International Corporate Bond Hedged ETF
## 5 2019-11-12 Xtrackers Barclays International Treasury Bond Hedged ETF
## 6 2019-10-22 Reality Shares Fundstrat DQM Long ETF
## ticker
## 1 DBKO
## 2 DEMG
## 3 DESC
## 4 IFIX
## 5 IGVT
## 6 DQML
Scrape ETF launches
etf_launches_url <- "https://www.etf.com/etf-watch-tables/etf-launches"
etf_launches_html <- read_html(etf_launches_url)
etf_launches_scraper_fun <- function(table = 1){
xpath = paste('//*[@id="article-body-content"]/table[', table, ']', sep = "")
etf_launches_html %>%
html_nodes(xpath = xpath) %>%
html_table(fill = TRUE) %>%
.[[1]] %>%
rename(launch_date = X1, fund = X2, ticker = X3, exchange = X4) %>%
slice(-1) %>%
mutate(launch_date = lubridate::ymd(lubridate::parse_date_time(launch_date, "%m/%d/%Y")))
}
etf_launches_tibble <-
map_dfr(1:8, etf_launches_scraper_fun)
etf_launches_tibble %>%
left_join(etf_closures_tibble %>% select(-fund), by = "ticker") %>%
select(launch_date, close_date, ticker, fund) %>%
filter(!is.na(close_date) & close_date > launch_date) %>%
mutate(survival_time = close_date - launch_date) %>%
arrange(desc(survival_time)) %>%
mutate(launch_year = year(launch_date)) %>%
head()
## launch_date close_date ticker
## 1 2012-08-15 2019-06-14 BBRC
## 2 2012-03-28 2018-12-27 BSCI
## 3 2013-02-12 2019-10-18 YMLI
## 4 2012-04-25 2018-12-27 BSJI
## 5 2012-04-13 2018-09-06 USAG
## 6 2014-01-23 2019-11-12 DBKO
## fund survival_time
## 1 EGShares Beyond BRICs ETF 2494 days
## 2 Guggenheim BulletShares 2018 Corporate Bond ETF 2465 days
## 3 Yorkville High Income Infrastructure MLP ETF 2439 days
## 4 Guggenheim BulletShares 2018 High Yield Corporate Bond 2437 days
## 5 U.S. Agricultural Index Fund 2337 days
## 6 db X-trackers MSCI South Korea Hedged Equity Fund 2119 days
## launch_year
## 1 2012
## 2 2012
## 3 2013
## 4 2012
## 5 2012
## 6 2014
chosen_year <- 2014
etf_launch_close_tibble <-
etf_launches_tibble %>%
left_join(etf_closures_tibble %>% select(-fund), by = "ticker") %>%
select(launch_date, close_date, ticker, fund) %>%
filter(!is.na(close_date) & close_date > launch_date) %>%
mutate(survival_time = close_date - launch_date) %>%
# arrange(desc(survival_time)) %>%
mutate(launch_year = year(launch_date)) %>%
group_by(launch_year)
write_rds(etf_launch_close_tibble, "etf_launch_close_tibble.RDS")
etf_launch_close_tibble %>%
top_n(-10, survival_time) %>%
filter(launch_year == chosen_year) %>%
ggplot(aes(x = ticker, y = survival_time, fill = ticker)) +
geom_bar(stat='identity', width = .7) +
coord_flip() +
geom_text(
aes(label = survival_time),
color = "white",
size = 5,
hjust = 1
) +
cowplot::theme_minimal_vgrid(16) +
ylab("days survived") +
ggtitle(paste("10 Fastest Funds to Die, launched in ", chosen_year, sep = "")) +
theme(
axis.title.y = element_blank(),
legend.position = "none"
)
## Don't know how to automatically pick scale for object of type difftime. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type difftime. Defaulting to continuous.