Freddie Mac Python

by Jonathan Regenstein

Let’s load some R packages

library(reticulate)
library(tidyverse)
library(highcharter)
library(lubridate)

Let’s load some Python libraries and define a function

import pandas as pd
import numpy as np
import sklearn
import datetime

We can import data via python by inserting a Python chunk.

freddie = pd.read_excel("http://www.freddiemac.com/fmac-resources/research/docs/State_and_US_SA.xls", skiprows = 5, nrows = 532) # look up nrows

freddie['date'] = freddie['Month'].apply(lambda x:datetime.datetime.strptime(x,"%YM%m"))

freddie.drop(['Month'], axis=1, inplace = True)

freddie = freddie[freddie['date'] > '1989-12-31']
#
freddie_tidy = pd.melt(freddie,
                       ["date"],
                       var_name = "location",
                       value_name = "hpi")

freddie_tidy['sma'] = freddie_tidy.groupby('location')['hpi'].apply(lambda x:x.rolling(center=False,window=10).mean())

freddie_tidy['roll_sd'] = freddie_tidy.groupby('location')['hpi'].apply(lambda x:x.rolling(center=False,window=10).std())

freddie_tidy['roll_max'] = freddie_tidy.groupby('location')['hpi'].apply(lambda x:x.rolling(center=False,window=10).max())

freddie_tidy['roll_min'] = freddie_tidy.groupby('location')['hpi'].apply(lambda x:x.rolling(center=False,window=10).min())

Now we can pass those Python objects to R chunks by appending py$ to the data frame name that we chose in the Python chunk. We’ll create a few toy examples below but, in short, we can take advantage of any Python functions or models in previous chunks and then leverage data visualization libraries from R.

library(ggplot2)
roll_max_ggplot <- 
py$freddie_tidy %>% 
  na.omit() %>% 
  ggplot(aes(x = date, y = roll_max)) + 
  geom_line(color = "cornflowerblue")

roll_max_ggplot
roll_max_ggplot +
  geom_line(aes(y = roll_min), color = "red")
roll_max_ggplot +
  geom_line(aes(y = roll_min), color = "red") +
  facet_wrap(~location, ncol = 10)
py$freddie_tidy %>% 
  ggplot(aes(x = date, y = roll_sd, color = location)) + 
  geom_line(key_glyph = "timeseries") +
  facet_wrap(~location, ncol = 10)
py$freddie_tidy %>% 
filter(location == "FL" | location == "NY" | location == "CA") %>% 
  ggplot(aes(x = sma, 
             y = roll_sd, 
             color = location)) +
  theme_minimal() +
  geom_point(alpha = .5) +
  geom_smooth(method = "lm", se = TRUE, color = "purple", size = 1) +
  ylab("Housing Price Index") +
  xlab("hpi min") +
  facet_wrap(~location, nrow = 3)
py$freddie_tidy %>% 
filter(location == "FL" | location == "NY" | location == "CA") %>% 
  ggplot(aes(x = sma)) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  theme_minimal() +
  geom_histogram(color = "blue", fill = "pink", bins = 35) +
  ylab("Housing Price Index Smoothed") 
py$freddie_tidy %>% 
  mutate(date = ymd(date)) %>% 
  hchart(., hcaes(x = date, y = roll_sd, group = location), type = 'line') %>% 
  hc_tooltip(pointFormat = "{point.location}: ${point.roll_sd: .2f}") %>% 
  hc_legend(enabled = FALSE)
Share Comments