East vs. West
Alex Kane
In this script, I adapted Daniel’s NBA rankings model to differentiate how teams in a given year performed against teams in their own conference compared to their performance against the other conference.
The general premise was to use the linear model Daniel developed for the YUSAG coefficient, and determine a team’s expected point differential for both intra- and inter-conference competition.
# Years to be analyzed
years <- c(2003:2019)
# Empty data frame for the west and east strengths. The final data frame will contain the teams in the conference, the expected point differencial in its own conference, the expected point differential in the other conference, and the difference between the two.
west_strength_all <- data.frame()
east_strength_all <- data.frame()
# Data frame to hold the mean difference in expected points for east and west for each year. A negative difference means that competition against the other conference was easier, and a positive difference means competition within the conference was easier.
differences <- data.frame(year = years,
west = rep(0,length(years)),
east = rep(0,length(years)))
# For each year
for(j in 1:length(years)){
# This part contains the months to be analyzed for a given year
# There was a lockout in the 2011-2012 season
# The 2019-2020 season is ongoing
# The 1999-2000, 2004-2005, and 2005-2006 seasons started in november
# All other seasons go from October to April
if(years[j] == 2012){
months <- c("december", "january", "february", "march")
} else if (years[j] == 2020) {
months <- c("october")
} else if(years[j] == 2000 | years[j] == 2005 | years[j] == 2006) {
months <- c("november","december", "january", "february", "march")
} else {
months <- c("october", "november", "december", "january", "february", "march")
}
# The script from here to line 112 is identical to Daniel's, so I won't comment it.
for(i in 1:length(months)){
u <- paste0("https://www.basketball-reference.com/leagues/NBA_",years[j],"_games-",months[i],".html")
newu <- getURL(u)
data <- readHTMLTable(newu)
if(i==1){
frame <- data$schedule
frame$`Visitor/Neutral` <- as.character(frame$`Visitor/Neutral`)
frame$`Home/Neutral` <- as.character(frame$`Home/Neutral`)
frame$PTSV <- as.numeric(as.character(frame[[4]]))
frame$PTSH <- as.numeric(as.character(frame[[6]]))
frame <- frame[!is.na(frame$PTSV),]
} else {
frame1 <- data$schedule
frame1$`Visitor/Neutral` <- as.character(frame1$`Visitor/Neutral`)
frame1$`Home/Neutral` <- as.character(frame1$`Home/Neutral`)
frame1$PTSV <- as.numeric(as.character(frame1[[4]]))
frame1$PTSH <- as.numeric(as.character(frame1[[6]]))
frame1 <- frame1[!is.na(frame1$PTSV),]
frame <- rbind(frame,frame1)
}
if(i != 1 && i != length(months)){
remove(frame1)
}
}
games <- data.frame(frame$`Visitor/Neutral`,frame$PTSV,frame$`Home/Neutral`,frame$PTSH)
games$frame..Visitor.Neutral. <- as.character(games$frame..Visitor.Neutral.)
games$frame..Home.Neutral. <- as.character(games$frame..Home.Neutral.)
games$pt_dif <- games$frame.PTSH - games$frame.PTSV
team <- character(length = 2*length(games$pt_dif))
opponent <- character(length = 2*length(games$pt_dif))
location <- character(length = 2*length(games$pt_dif))
ptdif <- vector(mode='numeric',length = 2*length(games$pt_dif))
clean <- data.frame(team,opponent,location,ptdif)
clean$team <- as.character(clean$team)
clean$opponent <- as.character(clean$opponent)
clean$location <- as.character(clean$location)
for(i in 1:length(games$pt_dif)){
clean$team[i] <- games$frame..Home.Neutral.[i]
clean$opponent[i] <- games$frame..Visitor.Neutral.[i]
clean$location[i] <- "H"
clean$ptdif[i] <- games$pt_dif[i]
clean$team[i + length(games$pt_dif)] <- games$frame..Visitor.Neutral.[i]
clean$opponent[i + length(games$pt_dif)] <- games$frame..Home.Neutral.[i]
clean$location[i + length(games$pt_dif)] <- "A"
clean$ptdif[i + length(games$pt_dif)] <- (-1)*games$pt_dif[i]
}
# Create vector containing West conference team names
west <- c("New Orleans Pelicans", "Denver Nuggets","Houston Rockets","Memphis Grizzlies",
"Oklahoma City Thunder", "Sacramento Kings", "Phoenix Suns", "Los Angeles Lakers",
"Minnesota Timberwolves","Los Angeles Clippers","Golden State Warriors","Portland Trail Blazers",
"Dallas Mavericks","Utah Jazz","San Antonio Spurs")
# Create vector containing East conference team names
east <- c("Milwaukee Bucks", "Philadelphia 76ers","Boston Celtics","Brooklyn Nets","Miami Heat",
"Indiana Pacers", "Orlando Magic", "Washington Wizards", "Detroit Pistons", "New York Knicks",
"Toronto Raptors", "Chicago Bulls", "Charlotte Hornets", "Cleveland Cavaliers", "Atlanta Hawks")
# Add column to "clean" data frame to identify the conference of the team and the conference of its opponent.
clean$conference <- rep(NA, length(clean$team))
# If the team is in the west and the opponent is in the west, code this "WW"
clean$conference[which(clean$team %in% west & clean$opponent %in% west)] <- "WW"
# If the team is in the west and the opponent is in the east, code this "WE"
clean$conference[which(clean$team %in% west & clean$opponent %in% east)] <- "WE"
# If the team is in the east and the opponent is in the west, code this "EW"
clean$conference[which(clean$team %in% east & clean$opponent %in% west)] <- "EW"
# If the team is in the east and the opponent is in the east code this "EE"
clean$conference[which(clean$team %in% east & clean$opponent %in% east)] <- "EE"
# Make linear model to predict point differential based on team, point, and location
lm.NBAhoops <- lm(ptdif ~ team + opponent + location, data = clean)
# Add column to "clean" with the predicted point differential using the linear model
clean$predscore <- predict(lm.NBAhoops, newdata = clean)
# Make empty data frame to contain: year, team, average predicted point differential vs. West teams, average predicted point differential vs. East teams, and the difference between the two.
west_strength <- data.frame(year = rep(years[j],length(west)),
team = rep(NA,length(west)),
vsWest = rep(0,length(west)),
vsEast = rep(0,length(west)),
difference = rep(0,length(west))
)
# Fill in the data frame with values
for(i in 1:length(west)){
west_strength$team[i] <- unique(west)[i] # team name
west_strength$vsWest[i] <- mean(mean(clean$predscore[which(clean$team == unique(west)[i] & clean$conference == "WW")])) # average predicted point differential for that team vs. west teams
west_strength$vsEast[i]<- mean(mean(clean$predscore[which(clean$team == unique(west)[i] & clean$conference == "WE")])) # average predicted point differential for that team vs. east teams
west_strength$difference <- west_strength$vsWest - west_strength$vsEast # average point differential vs West - average point differential vs. East
}
# Make empty data frame to contain: year, team, average predicted point differential vs. West teams, average predicted point differential vs. East teams, and the difference between the two.
east_strength <- data.frame(year = rep(years[j],length(east)),
team = rep(NA,length(east)),
vsEast = rep(0,length(east)),
vsWest = rep(0,length(east)),
difference = rep(0,length(east))
)
# Fill in the data frame with values
for(i in 1:length(east)){
east_strength$team[i] <- unique(east)[i] # team name
east_strength$vsEast[i] <- mean(mean(clean$predscore[which(clean$team == unique(east)[i] & clean$conference == "EE")])) # average predicted point differential for that team vs. east teams
east_strength$vsWest[i]<- mean(mean(clean$predscore[which(clean$team == unique(east)[i] & clean$conference == "EW")])) # average predicted point differential for that team vs. west teams
east_strength$difference <- east_strength$vsEast - east_strength$vsWest # average point differential vs East - average point differential vs. West
}
# Take the mean of the difference for the west and then the east and fill it in the differences data frame.
differences$west[j] <- mean(west_strength$difference, na.rm = TRUE)
differences$east[j] <- mean(east_strength$difference, na.rm = TRUE)
# If the difference is negative, it means expected point differential vs. own conference was lower than vs. the other conference, so the competition in the other conference was more difficult.
# If the difference is positive, it means expected point differential vs. own conference was higher than vs. the other conference, so the competition in the other conference was more easier.
west_strength_all <- rbind(west_strength_all,west_strength)
east_strength_all <- rbind(east_strength_all,east_strength)
}
In order to show how I determined conference strength, let’s take a look at the 2018-2019 season. Here we have the expected average point differential against teams in their own conference:
And here we have expected average point differential against teams in the other conference:
Now, we take the difference of the two to see how a team would fare on average against a team in its own conference vs. against a team in the other conference.
We let Difference = Expected Point Differential vs. Own Conference - Expected Point Differential vs. Other Conference.
A negative difference in expected points implies that a team faced more difficult competition in its own conference because the expected point differential is lower compared to the other conference, and a positive difference in expected points implies that a team faced easier competition in its own conference because the expected point differential is higher compared to the other conferece.
Let’s look at the data table to show this concept:
strength_all_2019[,1:5]
## year team vsOwn vsOther difference
## 241 2019 New Orleans Pelicans -1.8527653 -0.007396633 -1.8453686
## 242 2019 Denver Nuggets 3.4858291 5.121728754 -1.6358997
## 243 2019 Houston Rockets 2.9609285 4.581911401 -1.6209829
## 244 2019 Memphis Grizzlies -3.5514147 -1.087313598 -2.4641011
## 245 2019 Oklahoma City Thunder 2.1765919 4.476678390 -2.3000864
## 246 2019 Sacramento Kings -1.2119143 0.281789129 -1.4937034
## 247 2019 Phoenix Suns -10.1601314 -7.803920393 -2.3562110
## 248 2019 Los Angeles Lakers -2.2308399 -0.271684112 -1.9591558
## 249 2019 Minnesota Timberwolves -1.9729019 0.274974701 -2.2478766
## 250 2019 Los Angeles Clippers 0.9446929 2.755158008 -1.8104651
## 251 2019 Golden State Warriors 5.7691572 6.891365973 -1.1222088
## 252 2019 Portland Trail Blazers 3.1620253 5.184894551 -2.0228693
## 253 2019 Dallas Mavericks -2.5429930 0.155885220 -2.6988782
## 254 2019 Utah Jazz 4.3925416 5.984501607 -1.5919600
## 255 2019 San Antonio Spurs 0.9731412 2.271960703 -1.2988195
## 2411 2019 Milwaukee Bucks 10.4607522 7.478754919 2.9819973
## 2421 2019 Philadelphia 76ers 4.0220732 1.826295208 2.1957780
## 2431 2019 Boston Celtics 5.3143684 2.707489535 2.6068788
## 2441 2019 Brooklyn Nets 0.7385882 -1.890454839 2.6290430
## 2451 2019 Miami Heat 0.9319921 -1.441504448 2.3734966
## 2461 2019 Indiana Pacers 4.5489063 2.073380155 2.4755261
## 2471 2019 Orlando Magic 0.9202070 -1.308324341 2.2285314
## 2481 2019 Washington Wizards -1.9361107 -4.004502676 2.0683920
## 2491 2019 Detroit Pistons 0.7909131 -1.320136766 2.1110499
## 2501 2019 New York Knicks -9.1879255 -9.660948364 0.4730229
## 2511 2019 Toronto Raptors 6.8011293 4.294682592 2.5064467
## 2521 2019 Chicago Bulls -7.8006265 -9.012351879 1.2117254
## 2531 2019 Charlotte Hornets -1.0091915 -2.591386039 1.5821946
## 2541 2019 Cleveland Cavaliers -8.8497857 -9.986958897 1.1371732
## 2551 2019 Atlanta Hawks -5.1625182 -7.006866508 1.8443484
We can see that in the 2018-2019 season, all teams in the West had negative differences and all teams in the East had positive differences, so every single team performed better against East teams, showing that the West was a more difficult conference.
Now, let’s look at conference difficulty over the years. To do so, we take the average of each team’s expected point difference, and report the values for the East and the West.
differences
## year west east
## 1 2003 -3.6787882 3.4525185
## 2 2004 -4.2195707 4.3333932
## 3 2005 -2.1854581 2.1851562
## 4 2006 -1.4733855 1.4206663
## 5 2007 -2.3783676 2.4167772
## 6 2008 -1.8230313 1.9964151
## 7 2009 1.5880051 -1.6340583
## 8 2010 -0.8129574 1.0145512
## 9 2011 -1.4279316 1.3828644
## 10 2012 -1.0526572 0.4537844
## 11 2013 -2.5089158 2.4511754
## 12 2014 -4.3261536 4.2249408
## 13 2015 -3.1113872 3.0942225
## 14 2016 -0.7016765 0.7710714
## 15 2017 -2.1153684 2.0371947
## 16 2018 -1.0434534 1.0181025
## 17 2019 -1.8979058 2.0283736
We can see that in every single season except 2008-2009 the Western conference has been “stronger” than the East. Now, let’s look at a scatterplot over the seasons: