ETF Survival Scraper

by Jonathan Regenstein

Scrape etf closures

etf_closures_url <- "https://www.etf.com/etf-watch-tables/etf-closures"

etf_closures_html <- read_html(etf_closures_url)

etf_closures_tibble <-
etf_closures_html  %>% 
    html_nodes(xpath = '//*[@id="article-body-content"]/table') %>%
  html_table(fill = TRUE) %>% 
    .[[1]] %>%
    rename(close_date = X1, fund = X2, ticker = X3) %>% 
    slice(-1:-2) %>% 
    filter(!(ticker == "Source: FactSet") | !(nchar(close_date) < 2)) %>% 
    filter(!(ticker %in% c("2019", "2018", "2017", "2016"))) %>% 
    mutate(close_date = str_replace(close_date, "20177", "2017"),
        ticker = case_when(nchar(fund) < 8 ~ fund,
                                          TRUE ~ ticker),
           fund = case_when(nchar(close_date) > 10 ~ close_date,
                                          TRUE ~ fund),
           close_date = case_when(between(nchar(close_date), 3, 10) ~ close_date)) %>%
    fill(close_date) %>% 
    filter(nchar(fund) > 4) %>% 
    mutate(close_date = case_when(nchar(close_date) > 4 ~ lubridate::ymd(lubridate::parse_date_time(close_date, "%m/%d/%Y")),
                                  nchar(close_date) == 4 ~ lubridate::ymd(close_date, truncated = 2L) + months(6)))
## Warning: 564 failed to parse.
## Warning: 539 failed to parse.
etf_closures_tibble %>% 
  head()
##   close_date                                                       fund
## 1 2019-11-12               Xtrackers MSCI South Korea Hedged Equity ETF
## 2 2019-11-12           Xtrackers FTSE Emerging Comprehensive Factor ETF
## 3 2019-11-12            Xtrackers Russell 2000 Comprehensive Factor ETF
## 4 2019-11-12 Xtrackers Barclays International Corporate Bond Hedged ETF
## 5 2019-11-12  Xtrackers Barclays International Treasury Bond Hedged ETF
## 6 2019-10-22                      Reality Shares Fundstrat DQM Long ETF
##   ticker
## 1   DBKO
## 2   DEMG
## 3   DESC
## 4   IFIX
## 5   IGVT
## 6   DQML

Scrape ETF launches

etf_launches_url <- "https://www.etf.com/etf-watch-tables/etf-launches"


etf_launches_html <- read_html(etf_launches_url)


etf_launches_scraper_fun <- function(table = 1){
    xpath = paste('//*[@id="article-body-content"]/table[', table, ']', sep = "")
    
    etf_launches_html  %>% 
        html_nodes(xpath = xpath) %>%
        html_table(fill = TRUE) %>% 
        .[[1]] %>% 
        rename(launch_date = X1, fund = X2, ticker = X3, exchange = X4) %>% 
        slice(-1) %>% 
        mutate(launch_date = lubridate::ymd(lubridate::parse_date_time(launch_date, "%m/%d/%Y")))
}

etf_launches_tibble <- 
map_dfr(1:8, etf_launches_scraper_fun)
etf_launches_tibble %>% 
    left_join(etf_closures_tibble %>% select(-fund), by = "ticker") %>% 
    select(launch_date, close_date, ticker, fund) %>% 
    filter(!is.na(close_date) & close_date > launch_date) %>% 
    mutate(survival_time = close_date - launch_date) %>% 
    arrange(desc(survival_time)) %>% 
  mutate(launch_year = year(launch_date)) %>% 
  head()
##   launch_date close_date ticker
## 1  2012-08-15 2019-06-14   BBRC
## 2  2012-03-28 2018-12-27   BSCI
## 3  2013-02-12 2019-10-18   YMLI
## 4  2012-04-25 2018-12-27   BSJI
## 5  2012-04-13 2018-09-06   USAG
## 6  2014-01-23 2019-11-12   DBKO
##                                                     fund survival_time
## 1                              EGShares Beyond BRICs ETF     2494 days
## 2        Guggenheim BulletShares 2018 Corporate Bond ETF     2465 days
## 3           Yorkville High Income Infrastructure MLP ETF     2439 days
## 4 Guggenheim BulletShares 2018 High Yield Corporate Bond     2437 days
## 5                           U.S. Agricultural Index Fund     2337 days
## 6      db X-trackers MSCI South Korea Hedged Equity Fund     2119 days
##   launch_year
## 1        2012
## 2        2012
## 3        2013
## 4        2012
## 5        2012
## 6        2014
chosen_year <- 2014

etf_launch_close_tibble <- 
etf_launches_tibble %>% 
    left_join(etf_closures_tibble %>% select(-fund), by = "ticker") %>% 
    select(launch_date, close_date, ticker, fund) %>% 
    filter(!is.na(close_date) & close_date > launch_date) %>% 
    mutate(survival_time = close_date - launch_date) %>% 
    # arrange(desc(survival_time)) %>% 
  mutate(launch_year = year(launch_date)) %>% 
  group_by(launch_year) 

write_rds(etf_launch_close_tibble, "etf_launch_close_tibble.RDS")

etf_launch_close_tibble %>% 
  top_n(-10, survival_time) %>%
  filter(launch_year == chosen_year) %>% 
  ggplot(aes(x = ticker, y = survival_time, fill = ticker)) +
  geom_bar(stat='identity', width = .7) +
  coord_flip() +
  geom_text(
    aes(label = survival_time), 
    color = "white", 
    size = 5, 
    hjust = 1
  )  +
  cowplot::theme_minimal_vgrid(16) +
    ylab("days survived") +
  ggtitle(paste("10 Fastest Funds to Die, launched in ", chosen_year, sep = "")) +
  theme(
    axis.title.y = element_blank(), 
    legend.position = "none"
  ) 
## Don't know how to automatically pick scale for object of type difftime. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type difftime. Defaulting to continuous.

Share Comments ·