How We Predict Today’s NBA Scores
daniel_tokarz
March 20, 2018
The first thing we do is scrape the entire schedule for this month from basketball reference. Then, we set the current date and eliminate every game that is not taking place today.
library(XML)
library(RCurl)
cur_mon <- "march"
u <- paste0("https://www.basketball-reference.com/leagues/NBA_2018_games-",cur_mon,".html")
newu <- getURL(u)
raw <- readHTMLTable(newu, as.is = T)
#Once we have the full month, we'll take out all the unnecessary games
today <- raw$schedule
index <- min(which(today$PTS == ""))
cur_date <- today$Date[index]
cur_date
## [1] Tue, Mar 20, 2018
## 31 Levels: Fri, Mar 16, 2018 Fri, Mar 2, 2018 ... Wed, Mar 7, 2018
slate <- which(today$Date == cur_date)
predictions <- data.frame(today$`Home/Neutral`[slate],today$`Visitor/Neutral`[slate])
colnames(predictions) <- c("Home", "Away")
predictions$Home <- as.character(predictions$Home)
predictions$Away <- as.character(predictions$Away)
locs <- rep("H", length(slate))
#Let's take a look at today's games
predictions
## Home Away
## 1 Boston Celtics Oklahoma City Thunder
## 2 Minnesota Timberwolves Los Angeles Clippers
## 3 New Orleans Pelicans Dallas Mavericks
## 4 Orlando Magic Toronto Raptors
## 5 Phoenix Suns Detroit Pistons
## 6 Portland Trail Blazers Houston Rockets
## 7 Utah Jazz Atlanta Hawks
Now we define a function that takes in a team, an opponent,and a location, then uses our two linear models to output the predicted home score differential and home win probability. Be sure to check out our guide on how we make those models!
ptdif_call <- function(home,away,HN){
arr <- c(0,0)
r1 <- rankings$yusag_coeff[which(rankings$team == home)]
r2 <- rankings$yusag_coeff[which(rankings$team == away)]
if(HN == "H"){
pt_dif <- r1 - r2 - coefficients(lm.NBAhoops)[[1]]
}
if(HN == "N"){
pt_dif <- r1 - r2
}
arr[1] <- pt_dif
prob <- 1 / (1+ exp(- coefficients(glm.pointspread)[[2]] * pt_dif))
arr[2] <- prob
return(arr)
}
Now we call the function for each game that’s happening today
predictions$pt_dif <- rep(0,length(slate))
predictions$home_prob <- rep(0,length(slate))
for(i in 1:length(slate)){
predictions$pt_dif[i] <- ptdif_call(predictions$Home[i], predictions$Away[i], locs[i])[1]
predictions$home_prob[i] <- ptdif_call(predictions$Home[i], predictions$Away[i], locs[i])[2]
}
The last thing we do is clean up our final data frame by rounding and sorting the games. Then we’re good to go!
predictions$pt_dif <- round(predictions$pt_dif, digits = 2)
predictions$home_prob <- round(predictions$home_prob, digits = 2)
predictions <- predictions[order(predictions$home_prob,decreasing = T),]
names(predictions) <- c("Home", "Away", "home_pt_dif", "home_win_prob")
write.csv(predictions, "NBA_TODAY_PREDICTIONS.csv", row.names = FALSE)
#Let's take a look at our predictions for today
predictions
## Home Away home_pt_dif home_win_prob
## 7 Utah Jazz Atlanta Hawks 11.10 0.85
## 3 New Orleans Pelicans Dallas Mavericks 4.87 0.68
## 2 Minnesota Timberwolves Los Angeles Clippers 3.90 0.65
## 1 Boston Celtics Oklahoma City Thunder 2.64 0.60
## 6 Portland Trail Blazers Houston Rockets -4.07 0.35
## 5 Phoenix Suns Detroit Pistons -5.33 0.30
## 4 Orlando Magic Toronto Raptors -10.35 0.16
Follow @YUSAG_NBA on twitter for the latest updates!