East vs West Strength




East vs. West

In this script, I adapted Daniel’s NBA rankings model to differentiate how teams in a given year performed against teams in their own conference compared to their performance against the other conference.

The general premise was to use the linear model Daniel developed for the YUSAG coefficient, and determine a team’s expected point differential for both intra- and inter-conference competition.

# Years to be analyzed
years <- c(2003:2019)

# Empty data frame for the west and east strengths.  The final data frame will contain the teams in the conference, the expected point differencial in its own conference, the expected point differential in the other conference, and the difference between the two.
west_strength_all <- data.frame()
east_strength_all <- data.frame()

# Data frame to hold the mean difference in expected points for east and west for each year.  A negative difference means that competition against the other conference was easier, and a positive difference means competition within the conference was easier.
differences <- data.frame(year = years,
                          west = rep(0,length(years)),
                          east = rep(0,length(years)))

# For each year
for(j in 1:length(years)){
  
  # This part contains the months to be analyzed for a given year
  # There was a lockout in the 2011-2012 season
  # The 2019-2020 season is ongoing
  # The 1999-2000, 2004-2005, and 2005-2006 seasons started in november
  # All other seasons go from October to April
  
  if(years[j] == 2012){
    months <- c("december", "january", "february", "march")
  } else if (years[j] == 2020) {
    months <- c("october")
  } else if(years[j] == 2000 | years[j] == 2005 | years[j] == 2006) {
    months <- c("november","december", "january", "february", "march")
  } else {
    months <- c("october", "november", "december", "january", "february", "march")
  }
  
  # The script from here to line 112 is identical to Daniel's, so I won't comment it.
  
  for(i in 1:length(months)){
    u <- paste0("https://www.basketball-reference.com/leagues/NBA_",years[j],"_games-",months[i],".html")
    newu <- getURL(u)
    data <- readHTMLTable(newu)
    
    if(i==1){
      
      frame <- data$schedule
      frame$`Visitor/Neutral` <- as.character(frame$`Visitor/Neutral`)
      frame$`Home/Neutral` <- as.character(frame$`Home/Neutral`)
      frame$PTSV <- as.numeric(as.character(frame[[4]]))
      frame$PTSH <- as.numeric(as.character(frame[[6]]))
      frame <- frame[!is.na(frame$PTSV),]
      
    } else {
      
      frame1 <- data$schedule
      frame1$`Visitor/Neutral` <- as.character(frame1$`Visitor/Neutral`)
      frame1$`Home/Neutral` <- as.character(frame1$`Home/Neutral`)
      frame1$PTSV <- as.numeric(as.character(frame1[[4]]))
      frame1$PTSH <- as.numeric(as.character(frame1[[6]]))
      frame1 <- frame1[!is.na(frame1$PTSV),]
      frame <- rbind(frame,frame1)
    
    }
    if(i != 1 && i != length(months)){
      remove(frame1)
    }
  }  
  
  games <- data.frame(frame$`Visitor/Neutral`,frame$PTSV,frame$`Home/Neutral`,frame$PTSH)
  games$frame..Visitor.Neutral. <- as.character(games$frame..Visitor.Neutral.)
  games$frame..Home.Neutral. <- as.character(games$frame..Home.Neutral.)
  
  
  games$pt_dif <- games$frame.PTSH - games$frame.PTSV
  
  team <- character(length = 2*length(games$pt_dif))
  opponent <- character(length = 2*length(games$pt_dif))
  location <- character(length = 2*length(games$pt_dif))
  ptdif <- vector(mode='numeric',length = 2*length(games$pt_dif))
  
  
  clean <- data.frame(team,opponent,location,ptdif)
  
  clean$team <- as.character(clean$team)
  clean$opponent <- as.character(clean$opponent)
  clean$location <- as.character(clean$location)
  
  for(i in 1:length(games$pt_dif)){
    clean$team[i] <- games$frame..Home.Neutral.[i]
    clean$opponent[i] <- games$frame..Visitor.Neutral.[i]
    clean$location[i] <- "H"
    clean$ptdif[i] <- games$pt_dif[i]
    
    
    clean$team[i + length(games$pt_dif)] <- games$frame..Visitor.Neutral.[i]
    clean$opponent[i + length(games$pt_dif)] <- games$frame..Home.Neutral.[i]
    clean$location[i + length(games$pt_dif)] <- "A"
    clean$ptdif[i + length(games$pt_dif)] <- (-1)*games$pt_dif[i]
  }

# Create vector containing West conference team names
  west <- c("New Orleans Pelicans", "Denver Nuggets","Houston Rockets","Memphis Grizzlies",
            "Oklahoma City Thunder", "Sacramento Kings", "Phoenix Suns", "Los Angeles Lakers",
            "Minnesota Timberwolves","Los Angeles Clippers","Golden State Warriors","Portland Trail Blazers",
            "Dallas Mavericks","Utah Jazz","San Antonio Spurs")
  
# Create vector containing East conference team names  
  east <- c("Milwaukee Bucks", "Philadelphia 76ers","Boston Celtics","Brooklyn Nets","Miami Heat",
            "Indiana Pacers", "Orlando Magic", "Washington Wizards", "Detroit Pistons", "New York Knicks",
            "Toronto Raptors", "Chicago Bulls", "Charlotte Hornets", "Cleveland Cavaliers", "Atlanta Hawks")
  
  # Add column to "clean" data frame to identify the conference of the team and the conference of its opponent.
  clean$conference <- rep(NA, length(clean$team))
  
  # If the team is in the west and the opponent is in the west, code this "WW"
  clean$conference[which(clean$team %in% west & clean$opponent %in% west)] <- "WW"
  
  # If the team is in the west and the opponent is in the east, code this "WE"
  clean$conference[which(clean$team %in% west & clean$opponent %in% east)] <- "WE"
  
  # If the team is in the east and the opponent is in the west, code this "EW"
  clean$conference[which(clean$team %in% east & clean$opponent %in% west)] <- "EW"
  
  # If the team is in the east and the opponent is in the east code this "EE"
  clean$conference[which(clean$team %in% east & clean$opponent %in% east)] <- "EE"
  
  # Make linear model to predict point differential based on team, point, and location
  lm.NBAhoops <- lm(ptdif ~ team + opponent + location, data = clean) 
  
  # Add column to "clean" with the predicted point differential using the linear model
  clean$predscore <- predict(lm.NBAhoops, newdata = clean)


  # Make empty data frame to contain: year, team, average predicted point differential vs. West teams, average predicted point differential vs. East teams, and the difference between the two.
  west_strength <- data.frame(year = rep(years[j],length(west)),
                              team = rep(NA,length(west)),
                              vsWest = rep(0,length(west)),
                              vsEast = rep(0,length(west)),
                              difference = rep(0,length(west))
                              )
  # Fill in the data frame with values
  for(i in 1:length(west)){
    west_strength$team[i] <- unique(west)[i] # team name
    west_strength$vsWest[i] <- mean(mean(clean$predscore[which(clean$team == unique(west)[i] & clean$conference == "WW")])) # average predicted point differential for that team vs. west teams
    west_strength$vsEast[i]<- mean(mean(clean$predscore[which(clean$team == unique(west)[i] & clean$conference == "WE")])) # average predicted point differential for that team vs. east teams
    west_strength$difference <- west_strength$vsWest - west_strength$vsEast # average point differential vs West - average point differential vs. East
  }
  
  # Make empty data frame to contain: year, team, average predicted point differential vs. West teams, average predicted point differential vs. East teams, and the difference between the two.
  east_strength <- data.frame(year = rep(years[j],length(east)),
                              team = rep(NA,length(east)),
                              vsEast = rep(0,length(east)),
                              vsWest = rep(0,length(east)),
                              difference = rep(0,length(east))
  )
  # Fill in the data frame with values
  for(i in 1:length(east)){
    east_strength$team[i] <- unique(east)[i] # team name
    east_strength$vsEast[i] <- mean(mean(clean$predscore[which(clean$team == unique(east)[i] & clean$conference == "EE")])) # average predicted point differential for that team vs. east teams
    east_strength$vsWest[i]<- mean(mean(clean$predscore[which(clean$team == unique(east)[i] & clean$conference == "EW")])) # average predicted point differential for that team vs. west teams
    east_strength$difference <- east_strength$vsEast - east_strength$vsWest # average point differential vs East - average point differential vs. West
  }

# Take the mean of the difference for the west and then the east and fill it in the differences data frame.
differences$west[j] <- mean(west_strength$difference, na.rm = TRUE)
differences$east[j] <- mean(east_strength$difference, na.rm = TRUE)

# If the difference is negative, it means expected point differential vs. own conference was lower than vs. the other conference, so the competition in the other conference was more difficult.
  # If the difference is positive, it means expected point differential vs. own conference was higher than vs. the other conference, so the competition in the other conference was more easier.
    
west_strength_all <- rbind(west_strength_all,west_strength)
east_strength_all <- rbind(east_strength_all,east_strength)
}

In order to show how I determined conference strength, let’s take a look at the 2018-2019 season. Here we have the expected average point differential against teams in their own conference:

And here we have expected average point differential against teams in the other conference:

Now, we take the difference of the two to see how a team would fare on average against a team in its own conference vs. against a team in the other conference.

We let Difference = Expected Point Differential vs. Own Conference - Expected Point Differential vs. Other Conference.

A negative difference in expected points implies that a team faced more difficult competition in its own conference because the expected point differential is lower compared to the other conference, and a positive difference in expected points implies that a team faced easier competition in its own conference because the expected point differential is higher compared to the other conferece.

Let’s look at the data table to show this concept:

strength_all_2019[,1:5]
##      year                   team       vsOwn      vsOther difference
## 241  2019   New Orleans Pelicans  -1.8527653 -0.007396633 -1.8453686
## 242  2019         Denver Nuggets   3.4858291  5.121728754 -1.6358997
## 243  2019        Houston Rockets   2.9609285  4.581911401 -1.6209829
## 244  2019      Memphis Grizzlies  -3.5514147 -1.087313598 -2.4641011
## 245  2019  Oklahoma City Thunder   2.1765919  4.476678390 -2.3000864
## 246  2019       Sacramento Kings  -1.2119143  0.281789129 -1.4937034
## 247  2019           Phoenix Suns -10.1601314 -7.803920393 -2.3562110
## 248  2019     Los Angeles Lakers  -2.2308399 -0.271684112 -1.9591558
## 249  2019 Minnesota Timberwolves  -1.9729019  0.274974701 -2.2478766
## 250  2019   Los Angeles Clippers   0.9446929  2.755158008 -1.8104651
## 251  2019  Golden State Warriors   5.7691572  6.891365973 -1.1222088
## 252  2019 Portland Trail Blazers   3.1620253  5.184894551 -2.0228693
## 253  2019       Dallas Mavericks  -2.5429930  0.155885220 -2.6988782
## 254  2019              Utah Jazz   4.3925416  5.984501607 -1.5919600
## 255  2019      San Antonio Spurs   0.9731412  2.271960703 -1.2988195
## 2411 2019        Milwaukee Bucks  10.4607522  7.478754919  2.9819973
## 2421 2019     Philadelphia 76ers   4.0220732  1.826295208  2.1957780
## 2431 2019         Boston Celtics   5.3143684  2.707489535  2.6068788
## 2441 2019          Brooklyn Nets   0.7385882 -1.890454839  2.6290430
## 2451 2019             Miami Heat   0.9319921 -1.441504448  2.3734966
## 2461 2019         Indiana Pacers   4.5489063  2.073380155  2.4755261
## 2471 2019          Orlando Magic   0.9202070 -1.308324341  2.2285314
## 2481 2019     Washington Wizards  -1.9361107 -4.004502676  2.0683920
## 2491 2019        Detroit Pistons   0.7909131 -1.320136766  2.1110499
## 2501 2019        New York Knicks  -9.1879255 -9.660948364  0.4730229
## 2511 2019        Toronto Raptors   6.8011293  4.294682592  2.5064467
## 2521 2019          Chicago Bulls  -7.8006265 -9.012351879  1.2117254
## 2531 2019      Charlotte Hornets  -1.0091915 -2.591386039  1.5821946
## 2541 2019    Cleveland Cavaliers  -8.8497857 -9.986958897  1.1371732
## 2551 2019          Atlanta Hawks  -5.1625182 -7.006866508  1.8443484

We can see that in the 2018-2019 season, all teams in the West had negative differences and all teams in the East had positive differences, so every single team performed better against East teams, showing that the West was a more difficult conference.

Now, let’s look at conference difficulty over the years. To do so, we take the average of each team’s expected point difference, and report the values for the East and the West.

differences
##    year       west       east
## 1  2003 -3.6787882  3.4525185
## 2  2004 -4.2195707  4.3333932
## 3  2005 -2.1854581  2.1851562
## 4  2006 -1.4733855  1.4206663
## 5  2007 -2.3783676  2.4167772
## 6  2008 -1.8230313  1.9964151
## 7  2009  1.5880051 -1.6340583
## 8  2010 -0.8129574  1.0145512
## 9  2011 -1.4279316  1.3828644
## 10 2012 -1.0526572  0.4537844
## 11 2013 -2.5089158  2.4511754
## 12 2014 -4.3261536  4.2249408
## 13 2015 -3.1113872  3.0942225
## 14 2016 -0.7016765  0.7710714
## 15 2017 -2.1153684  2.0371947
## 16 2018 -1.0434534  1.0181025
## 17 2019 -1.8979058  2.0283736

We can see that in every single season except 2008-2009 the Western conference has been “stronger” than the East. Now, let’s look at a scatterplot over the seasons: