by Jonathan Regenstein
Let’s load some R packages
library(reticulate)
library(tidyverse)
library(highcharter)
library(lubridate)
Let’s load some Python libraries and define a function
import pandas as pd
import numpy as np
import sklearn
import datetime
We can import data via python by inserting a Python chunk.
freddie = pd.read_excel("http://www.freddiemac.com/fmac-resources/research/docs/State_and_US_SA.xls", skiprows = 5, nrows = 532) # look up nrows
freddie['date'] = freddie['Month'].apply(lambda x:datetime.datetime.strptime(x,"%YM%m"))
freddie.drop(['Month'], axis=1, inplace = True)
freddie = freddie[freddie['date'] > '1989-12-31']
#
freddie_tidy = pd.melt(freddie,
["date"],
var_name = "location",
value_name = "hpi")
freddie_tidy['sma'] = freddie_tidy.groupby('location')['hpi'].apply(lambda x:x.rolling(center=False,window=10).mean())
freddie_tidy['roll_sd'] = freddie_tidy.groupby('location')['hpi'].apply(lambda x:x.rolling(center=False,window=10).std())
freddie_tidy['roll_max'] = freddie_tidy.groupby('location')['hpi'].apply(lambda x:x.rolling(center=False,window=10).max())
freddie_tidy['roll_min'] = freddie_tidy.groupby('location')['hpi'].apply(lambda x:x.rolling(center=False,window=10).min())
Now we can pass those Python objects to R chunks by appending py$
to the data frame name that we chose in the Python chunk. We’ll create a few toy examples below but, in short, we can take advantage of any Python functions or models in previous chunks and then leverage data visualization libraries from R.
library(ggplot2)
roll_max_ggplot <-
py$freddie_tidy %>%
na.omit() %>%
ggplot(aes(x = date, y = roll_max)) +
geom_line(color = "cornflowerblue")
roll_max_ggplot
roll_max_ggplot +
geom_line(aes(y = roll_min), color = "red")
roll_max_ggplot +
geom_line(aes(y = roll_min), color = "red") +
facet_wrap(~location, ncol = 10)
py$freddie_tidy %>%
ggplot(aes(x = date, y = roll_sd, color = location)) +
geom_line(key_glyph = "timeseries") +
facet_wrap(~location, ncol = 10)
py$freddie_tidy %>%
filter(location == "FL" | location == "NY" | location == "CA") %>%
ggplot(aes(x = sma,
y = roll_sd,
color = location)) +
theme_minimal() +
geom_point(alpha = .5) +
geom_smooth(method = "lm", se = TRUE, color = "purple", size = 1) +
ylab("Housing Price Index") +
xlab("hpi min") +
facet_wrap(~location, nrow = 3)
py$freddie_tidy %>%
filter(location == "FL" | location == "NY" | location == "CA") %>%
ggplot(aes(x = sma)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
theme_minimal() +
geom_histogram(color = "blue", fill = "pink", bins = 35) +
ylab("Housing Price Index Smoothed")
py$freddie_tidy %>%
mutate(date = ymd(date)) %>%
hchart(., hcaes(x = date, y = roll_sd, group = location), type = 'line') %>%
hc_tooltip(pointFormat = "{point.location}: ${point.roll_sd: .2f}") %>%
hc_legend(enabled = FALSE)