.Rhistory

dev_phantom <- data.frame('AtHome'=c(dev[,'AtHome'], rep(0, n_teams)))
dev_phantom$y <- cbind(c(dev$y, rep(1, n_teams)), c(1-dev$y, rep(1, n_teams))) # need 64 games
dev_phantom$X <- X
w <- c(rep(1, nrow(dev)), rep(rho, n_teams))
model = glm(y~X+AtHome+0, family="binomial", weights=w, data=dev_phantom)
return (model)
}
# Cross-validation
K=10
ls.rho = 5:17
phantom.plr.cv.log.likelihood = rep(0, length(ls.rho))
for (i in 1:K){
print(paste("K =",i))
split = dev.val.split(i,nfl)
dev1 = as.data.frame(split[1])
val = as.data.frame(split[2])
for (j in 1:length(ls.rho)){
model = phantom.player.regularization(dev1, ls.rho[j])
val = val[,c("Home","Away",'AtHome', 'y')]
val$X = outer(val$Home, Teams, "==") - outer(val$Away, Teams, "==")
p = predict(model, val, type="response")
ll = sum((val$y)*log(p) + (1-val$y)*log(1-p))
phantom.plr.cv.log.likelihood[j] = phantom.plr.cv.log.likelihood[j] + ll
}
}
best.rho = ls.rho[which.max(phantom.plr.cv.log.likelihood)] # calculate best rho
# refit model with best rho
ability.phantom.plr.logit.best = phantom.player.regularization(nfl, best.rho)
Teams.ability.logit$Phantom.rho.opt =
ability.phantom.plr.logit.best$coefficients
oo = order(Teams.ability.logit$Phantom.rho.opt, decreasing = T)
Teams.ability.order = Teams.ability.logit[oo,]
plot(ls.rho, phantom.plr.cv.log.likelihood,type='b',col="blue",lwd=3,
xlab="rho",ylab="Validation log-likelihood")
abline(v=best.rho,lwd=3,col="red")
cat("The optimized weight is", best.rho, "\n")
xylim = range(c(Teams.ability.logit$Phantom.rho.opt, Teams.ability.logit$Pseudo.delta.opt))
plot(Teams.ability.logit$Phantom.rho.opt, Teams.ability.logit$Pseudo.delta.opt, pch=19, col="red",
xlim = xylim, ylim = xylim,
xlab="Phantom Estimates",
ylab="Pseudo-game Estimates")
abline(0,1,lwd=2,col="blue")
par(pty='m')
# Regularization with phantom player
phantom.player.regularization = function(dev, rho){
X <- outer(dev$Home, Teams, "==") - outer(dev$Away, Teams, "==")
X <- rbind(X, diag(n_teams))
dev_phantom <- data.frame('AtHome'=c(dev[,'AtHome'], rep(0, n_teams)))
dev_phantom$y <- cbind(c(dev$y, rep(1, n_teams)), c(1-dev$y, rep(1, n_teams))) # need 64 games
dev_phantom$X <- X
w <- c(rep(1, nrow(dev)), rep(rho, n_teams))
model = glm(y~X+AtHome+0, family="binomial", weights=w, data=dev_phantom)
return (model)
}
# Cross-validation
K=10
ls.rho = 5:17
phantom.plr.cv.log.likelihood = rep(0, length(ls.rho))
for (i in 1:K){
print(paste("K =",i))
split = dev.val.split(i,nfl)
dev1 = as.data.frame(split[1])
val = as.data.frame(split[2])
for (j in 1:length(ls.rho)){
model = phantom.player.regularization(dev1, ls.rho[j])
val = val[,c("Home","Away",'AtHome', 'y')]
val$X = outer(val$Home, Teams, "==") - outer(val$Away, Teams, "==")
p = predict(model, val, type="response")
ll = sum((val$y)*log(p) + (1-val$y)*log(1-p))
phantom.plr.cv.log.likelihood[j] = phantom.plr.cv.log.likelihood[j] + ll
}
}
best.rho = ls.rho[which.max(phantom.plr.cv.log.likelihood)] # calculate best rho
# refit model with best rho
ability.phantom.plr.logit.best = phantom.player.regularization(nfl, best.rho)
Teams.ability.logit$Phantom.rho.opt =
ability.phantom.plr.logit.best$coefficients
oo = order(Teams.ability.logit$Phantom.rho.opt, decreasing = T)
Teams.ability.order = Teams.ability.logit[oo,]
plot(ls.rho, phantom.plr.cv.log.likelihood,type='b',col="blue",lwd=3,
xlab="rho",ylab="Validation log-likelihood")
abline(v=best.rho,lwd=3,col="red")
cat("The optimized weight is", best.rho, "\n")
xylim = range(c(Teams.ability.logit$Phantom.rho.opt, Teams.ability.logit$Pseudo.delta.opt))
plot(Teams.ability.logit$Phantom.rho.opt, Teams.ability.logit$Pseudo.delta.opt, pch=19, col="red",
xlim = xylim, ylim = xylim,
xlab="Phantom Estimates",
ylab="Pseudo-game Estimates")
abline(0,1,lwd=2,col="blue")
par(pty='m')
# Regularization with phantom player
phantom.player.regularization = function(dev, rho){
X <- outer(dev$Home, Teams, "==") - outer(dev$Away, Teams, "==")
X <- rbind(X, diag(n_teams))
dev_phantom <- data.frame('AtHome'=c(dev[,'AtHome'], rep(0, n_teams)))
dev_phantom$y <- cbind(c(dev$y, rep(1, n_teams)), c(1-dev$y, rep(1, n_teams))) # need 64 games
dev_phantom$X <- X
w <- c(rep(1, nrow(dev)), rep(rho, n_teams))
model = glm(y~X+AtHome+0, family="binomial", weights=w, data=dev_phantom)
return (model)
}
# Cross-validation
K=10
ls.rho = 5:17
phantom.plr.cv.log.likelihood = rep(0, length(ls.rho))
for (i in 1:K){
print(paste("K =",i))
split = dev.val.split(i,nfl)
dev1 = as.data.frame(split[1])
val = as.data.frame(split[2])
for (j in 1:length(ls.rho)){
model = phantom.player.regularization(dev1, ls.rho[j])
val = val[,c("Home","Away",'AtHome', 'y')]
val$X = outer(val$Home, Teams, "==") - outer(val$Away, Teams, "==")
p = predict(model, val, type="response")
ll = sum((val$y)*log(p) + (1-val$y)*log(1-p))
phantom.plr.cv.log.likelihood[j] = phantom.plr.cv.log.likelihood[j] + ll
}
}
best.rho = ls.rho[which.max(phantom.plr.cv.log.likelihood)] # calculate best rho
# refit model with best rho
ability.phantom.plr.logit.best = phantom.player.regularization(nfl, best.rho)
Teams.ability.logit$Phantom.rho.opt =
ability.phantom.plr.logit.best$coefficients
oo = order(Teams.ability.logit$Phantom.rho.opt, decreasing = T)
Teams.ability.order = Teams.ability.logit[oo,]
plot(ls.rho, phantom.plr.cv.log.likelihood,type='b',col="blue",lwd=3,
xlab="rho",ylab="Validation log-likelihood")
abline(v=best.rho,lwd=3,col="red")
cat("The optimized weight is", best.rho, "\n")
xylim = range(c(Teams.ability.logit$Phantom.rho.opt, Teams.ability.logit$Pseudo.delta.opt))
plot(Teams.ability.logit$Phantom.rho.opt, Teams.ability.logit$Pseudo.delta.opt, pch=19, col="red",
xlim = xylim, ylim = xylim,
xlab="Phantom Estimates",
ylab="Pseudo-game Estimates")
abline(0,1,lwd=2,col="blue")
par(pty='m')
plot(Teams.ability.logit$Phantom.rho.opt, Teams.ability.logit$Est, pch=19, col="red",
xlim = xylim, ylim = xylim,
xlab="Phantom Estimates",
ylab="Pseudo-game Estimates")
xylim = range(c(Teams.ability.logit$Phantom.rho.opt, Teams.ability.logit$Est))
plot(Teams.ability.logit$Phantom.rho.opt, Teams.ability.logit$Est, pch=19, col="red",
xlim = xylim, ylim = xylim,
xlab="Phantom Estimates",
ylab="Pseudo-game Estimates")
abline(0,1,lwd=2,col="blue")
xylim = range(c(Teams.ability.logit$Phantom.rho.opt, Teams.ability.logit$Est))
plot(Teams.ability.logit$Phantom.rho.opt, Teams.ability.logit$Est, pch=19, col="red",
xlim = xylim, ylim = xylim,
xlab="Phantom Estimates",
ylab="Pseudo-game Estimates")
abline(0,1,lwd=2,col="blue")
par(pty='m')
xylim = range(c(Teams.ability.logit$Phantom.rho.opt, Teams.ability.logit$Pseudo.delta.opt))
plot(Teams.ability.logit$Phantom.rho.opt, Teams.ability.logit$Pseudo.delta.opt, pch=19, col="red",
xlim = xylim, ylim = xylim,
xlab="Phantom Estimates",
ylab="Pseudo-game Estimates")
abline(0,1,lwd=2,col="blue")
par(pty='m')
xylim = range(c(Teams.ability.logit$Phantom.rho.opt, Teams.ability.logit$Est))
plot(Teams.ability.logit$Phantom.rho.opt, Teams.ability.logit$Est, pch=19, col="red",
xlim = xylim, ylim = xylim,
xlab="Phantom Estimates",
ylab="Unregularized Estimates")
abline(0,1,lwd=2,col="blue")
par(pty='m')
xylim = range(c(Teams.ability.logit$Phantom.rho.opt, Teams.ability.logit$RidgePF.Est))
plot(Teams.ability.logit$Phantom.rho.opt, Teams.ability.logit$RidgePF.Est, pch=19, col="red",
xlim = xylim, ylim = xylim,
xlab="Phantom Estimates",
ylab="Ridge Regularized Estimates")
abline(0,1,lwd=2,col="blue")
par(pty='m')
chess_98 <- vienna1898.played
knitr::opts_chunk$set(echo = TRUE)
nba <- nba_score_2022
nba_score_2022 <- read.csv("~/Desktop/Harvard/STAT143/HW/HW1/nba_score_2022.csv")
View(nba_score_2022)
knitr::opts_chunk$set(echo = TRUE)
nba <- nba_score_2022
nba = nba[1:1230, ] # Want the first 1230 games of the data
Teams = unique(nba$Home) # Get list of team names
X = outer(nba$Home, Teams, "==") - outer(nba$Visitor, Teams, "==") # Construct design matrix (schedule)
W = rbind(diag(29), rep(-1, 29))
X_star = X %*% W
nba$X_star = X_star
# Fit a linear model with home field indicator
ability.hfa.lm = lm(PtsDiff~X_star + AtHome + 0, data=nba) # Fits the model
W_h = rbind(cbind(W,0),c(rep(0,29),1))  # cbind adds another col of 0s, c(rep) gives row of 0s and 1 at the end, rbind combines them
V = W_h %*%
vcov(ability.hfa.lm) %*%
t(W_h) # Creates covariance matrix
dimnames(V) = list(c(Teams,"HFA"), c(Teams,"HFA"))
Teams.ability.hfa.est = W_h %*% ability.hfa.lm$coefficients # estimates for teams
Teams.ability.hfa.cov = W_h %*%  # covariance
vcov(ability.hfa.lm) %*% t(W_h)
Teams.ability.hfa.stderr = sqrt(diag(Teams.ability.hfa.cov)) # std errors
Teams.ability.hfa = data.frame( Est=Teams.ability.hfa.est, # put into data frame
StdErr=Teams.ability.hfa.stderr)
row.names(Teams.ability.hfa) = c(Teams,"Home Advantage") #creates home advantage estimate
oo = order(Teams.ability.hfa$Est[1:30],decreasing=T) # reorder estimates in decreasing order
Teams.ability.hfa = Teams.ability.hfa[c(oo,31),]
Teams.ability.hfa <- round(Teams.ability.hfa[1:31,],3) # home field advantage estimate
Teams.ability.hfa
pairwise_compare =
function(team1, team2,model.summary.df,model.cov.mat){
diff = model.summary.df[team1,"Est"]- # takes difference of theta from first team and second team
model.summary.df[team2,"Est"]
diff.stderr = sqrt(model.summary.df[team1,"StdErr"]^2+
model.summary.df[team2,"StdErr"]^2
-2*model.cov.mat[team1, team2])
return (c(diff, diff.stderr))
}
lakers_vs_hawks <- pairwise_compare("Los Angeles Lakers", "Atlanta Hawks", Teams.ability.hfa,V)
score_diff <- lakers_vs_hawks[1]
std_err <- lakers_vs_hawks[2]
# Create 95% confidence interval
deg_free <- 1200 # 1230-29-1
alpha <- 0.05 # 1-.95
crit_value <- qt(1 - alpha/2, deg_free) # two tailed test
confidence_int <- c(score_diff - std_err*crit_value, score_diff + std_err*crit_value)
cat("95% Confidence Interval for Lakers & Hawks\n", confidence_int)
# Score differential if Milwaukee are home
# Takes difference in thetas and add the home advantage
# theta_a - theta_b + 1*beta_h
bucks_home_mean <-
Teams.ability.hfa["Milwaukee Bucks",1]-
Teams.ability.hfa["Miami Heat",1]+
Teams.ability.hfa["Home Advantage",1]
# Score differential if Milwaukee are away
# Takes difference in thetas and subtracts the home advantage
# theta_a - theta_b - 1*beta_h
bucks_away_mean <-
Teams.ability.hfa["Milwaukee Bucks",1]-
Teams.ability.hfa["Miami Heat",1]-
Teams.ability.hfa["Home Advantage",1]
resid.stderr <- summary(ability.hfa.lm)$sigma
bucks_home <-  1-pnorm(0,
mean=bucks_home_mean, sd=resid.stderr)
bucks_away <- 1-pnorm(0,
mean=bucks_away_mean, sd=resid.stderr)
cat("The probability the Bucks will win at Fiserv Forum is ", bucks_home)
cat("\nThe probability the Bucks will win at Kaseya Center is ", bucks_away)
# Want to compute the probability that the Bucks win at least 4 of the 7 games
# There are 4 home games and 3 away games
# Probability that the Bucks win 7 games
# The only option is 4 Home wins, 3 Away wins
prob_7_wins <- dbinom(4,4,bucks_home) * dbinom(3,3,bucks_away)
# Probability that the Bucks win 6 games
# 2 options: 3 Home wins, 3 Away wins OR 4 Home wins, 2 Away wins
prob_6_wins <- dbinom(3,4,bucks_home)*dbinom(3,3,bucks_away) + dbinom(4,4,bucks_home)*dbinom(2,3,bucks_away)
# Probability that the Bucks win 5 games
# 3 options: 4 Home wins, 1 Away win OR 3 Home wins, 2 Away wins OR 2 Home wins, 3 Away wins
prob_5_wins <- dbinom(4,4,bucks_home)*dbinom(1,3,bucks_away) + dbinom(3,4,bucks_home) * dbinom(2,3,bucks_away) + dbinom(2,4,bucks_home) * dbinom(3,3,bucks_away)
# Probability that the Bucks win 4 games
# 4 options: 4 Home wins, 0 Away wins OR 3 Home wins, 1 Away win OR 2 Home wins, 2 Away wins OR 1 Home win, 3 Away wins
prob_4_wins <- dbinom(4,4,bucks_home) * dbinom(0,3,bucks_away) + dbinom(3,4,bucks_home) * dbinom(1,3, bucks_away) + dbinom(2,4,bucks_home) * dbinom(2,3,bucks_away) + dbinom(1,4,bucks_home) * dbinom(3,3,bucks_away)
# Sum the probabilities together to get the probability of at least 4 wins
prob_bucks_win_at_least_4 <- prob_7_wins + prob_6_wins + prob_5_wins + prob_4_wins
cat("The probability that the Bucks win the series is", prob_bucks_win_at_least_4)
# I think this number makes sense because they were 'heavily favored' to win
# If AtHome is 1, it means the game was played at a team's home court. If AtHome is not 1 (0), it was neutral territory
#nba$HomeFactorVar <- factor(ifelse(nba$AtHome == 1, nba$Home, "0"))
#nba$HomeFactorVar <- factor(nba$Home)
nba$HomeFactor <- factor(as.integer(factor(nba$Home, levels = unique(nba$Home))), levels = 1:30) # I used chatgpt to help me create a 'factor' variable in R (I normally use Python so I was not familiar with this command)
nba$HomeFactorVar <- outer(nba$AtHome, as.integer(nba$HomeFactor), "*") #* outer(as.integer(nba$HomeFactor), Teams, "*")
# Fit model with 30 home-court advantages (different for each team)
ability.hfa30.lm <- lm(PtsDiff ~ X_star + as.factor(AtHome) * (HomeFactor) + 0, data = nba)
# Fit model with only 1 home-court advantage (same for each team)
ability.lm <- lm(PtsDiff ~ X_star + AtHome  + 0, data = nba)
# Partial F test with anova
partial_F_test <- anova(ability.lm, ability.hfa30.lm)
partial_F_test
m1 <- matrix(0,nrow=31,ncol=29) # change W_h matrix to account for 30 more predictors
m2 <- matrix(0,nrow=29, ncol=30)
identity <- diag(29)
m2_identity <- cbind(m2,identity)
W_h_test <- cbind(W_h,m1)
W_h_test <- rbind(W_h_test, m2_identity)
newTeams.ability.hfa30.est <- W_h_test %*% ability.hfa30.lm$coefficients[1:59] # estimates for team strengths and home advantages
# put team strength estimates and home field advantages into dataframe
teamsHFA <- Teams
newTeams.ability.hfa30 <- data.frame(Est=newTeams.ability.hfa30.est)
row.names(newTeams.ability.hfa30) <- c(teamsHFA, paste(sort(teamsHFA),"HFA"))
# Score differential if Milwaukee are home
# Takes difference in thetas and incorporates the Bucks advantage
# Bucks strength - Heat strength + Buck Home Court Advantage
bucks_home_mean_hfa30 <-
newTeams.ability.hfa30["Milwaukee Bucks",1]-
newTeams.ability.hfa30["Miami Heat",1]+
newTeams.ability.hfa30["Milwaukee Bucks HFA",1]
# Score differential if Milwaukee are away
# Takes difference in thetas and subtracts the Heat's advantage
# Heat strength - Bucks strength - Heat Home Court Advantage
bucks_away_mean_hfa30 <-
newTeams.ability.hfa30["Milwaukee Bucks",1] -
newTeams.ability.hfa30["Miami Heat",1]  -
newTeams.ability.hfa30["Miami Heat HFA",1]
resid.stderr <- summary(ability.hfa30.lm)$sigma
bucks_home <-
1-pnorm(0,
mean=bucks_home_mean_hfa30, sd=resid.stderr)
bucks_away <- 1- pnorm(0,
mean=bucks_away_mean_hfa30, sd=resid.stderr)
cat("The probability the Bucks will win at Fiserv Forum is ", bucks_home)
cat("\nThe probability the Bucks will win at Kaseya Center is ", bucks_away)
# Want to compute the probability that the Bucks win at least 4 of the 7 games
# There are 4 home games and 3 away games
# Probability that the Bucks win 7 games
# The only option is 4 Home wins, 3 Away wins
prob_7_wins <- dbinom(4,4,bucks_home) * dbinom(3,3,bucks_away)
# Probability that the Bucks win 6 games
# 2 options: 3 Home wins, 3 Away wins OR 4 Home wins, 2 Away wins
prob_6_wins <- dbinom(3,4,bucks_home)*dbinom(3,3,bucks_away) + dbinom(4,4,bucks_home)*dbinom(2,3,bucks_away)
# Probability that the Bucks win 5 games
# 3 options: 4 Home wins, 1 Away win OR 3 Home wins, 2 Away wins OR 2 Home wins, 3 Away wins
prob_5_wins <- dbinom(4,4,bucks_home)*dbinom(1,3,bucks_away) + dbinom(3,4,bucks_home) * dbinom(2,3,bucks_away) + dbinom(2,4,bucks_home) * dbinom(3,3,bucks_away)
# Probability that the Bucks win 4 games
# 4 options: 4 Home wins, 0 Away wins OR 3 Home wins, 1 Away win OR 2 Home wins, 2 Away wins OR 1 Home win, 3 Away wins
prob_4_wins <- dbinom(4,4,bucks_home) * dbinom(0,3,bucks_away) + dbinom(3,4,bucks_home) * dbinom(1,3, bucks_away) + dbinom(2,4,bucks_home) * dbinom(2,3,bucks_away) + dbinom(1,4,bucks_home) * dbinom(3,3,bucks_away)
# Sum the probabilities together to get the probability of at least 4 wins
prob_bucks_win_at_least_42 <- prob_7_wins + prob_6_wins + prob_5_wins + prob_4_wins
cat("\nThe probability that the Bucks win the series is", prob_bucks_win_at_least_42)
X = outer(nba$Home, Teams, "==") - outer(nba$Visitor, Teams, "==") # Construct design matrix (schedule)
W = rbind(diag(29), rep(-1, 29))
X_star = X %*% W
nba$X_star = X_star
# Fit a linear model with home field indicator
ability.hfa.lm = lm(PtsDiff~X_star + AtHome + 0, data=nba) # Fits the model
W_h = rbind(cbind(W,0),c(rep(0,29),1))  # cbind adds another col of 0s, c(rep) gives row of 0s and 1 at the end, rbind combines them
V = W_h %*%
vcov(ability.hfa.lm) %*%
t(W_h) # Creates covariance matrix
dimnames(V) = list(c(Teams,"HFA"), c(Teams,"HFA"))
nba$HomeFactorVar <- factor(ifelse(nba$AtHome == 1, nba$Home, 0))
nba$HomeFactorVar <- factor(nba$HomeFactorVar, exclude = c(0))
X2 <- outer(nba$HomeFactorVar, Teams, "==") * outer(nba$Home, Teams, "==")
X <- cbind(X,X2)
#W = rbind(diag(59), rep(-1, 59))
m1 <- matrix(0,nrow=31,ncol=29)
m2 <- matrix(0,nrow=29, ncol=30)
identity <- diag(29)
m2_identity <- cbind(m2,identity)
W_h_test <- cbind(W_h,m1)
W_h_test <- rbind(W_h_test, m2_identity)
X_star = X %*% W_h_test
ability.hfa302.lm <- lm(PtsDiff ~ X_star + HomeFactorVar + 0, data = nba)
V = W_h_test %*%
vcov(ability.hfa302.lm) %*%
t(W_h_test) # Creates covariance matrix
newTeams.ability.hfa302.est <- W_h_test %*% ability.hfa302.lm$coefficients
row.names(newTeams.ability.hfa302.est) <- c(teamsHFA,paste(teamsHFA, "1"))
dimnames(V) = list(c(Teams,"HFA"), c(Teams,"HFA"))
wnba_2024 <- read.csv("~/Desktop/git_projects/wnba_2024/WNBA-2024-Predictions/wnba_2024.csv")
View(wnba_2024)
knitr::opts_chunk$set(echo = TRUE)
wnba <- wnba_2024
```
teams = unique(wnba$HomeTeam)
teams
wnba <- wnba_2024
# Get team data
teams <- unique(wnba$HomeTeam)
num_teams <- length(teams)
wnba_2024 <- read.csv("~/Desktop/git_projects/wnba_2024/WNBA-2024-Predictions/wnba_2024.csv")
View(wnba_2024)
## Read/process in data
```{r}
wnba <- wnba_2024
teams <- unique(wnba$HomeTeam)
num_teams <- length(teams)
# Construct design matrix for normal model calculation
X = outer(wnba$HomeTeam, teams, "==") - outer(wnba$AwayTeam, teams, "==")
W = rbind(diag(num_teams-1), rep(-1, num_teams-1))
X_star = X %*% W # schedule matrix
wnba$X_star = X_star
ability.hfa.lm = lm(PtsDiff ̃X_star + AtHome + 0, data=wnba)
# Construct design matrix for normal model calculation
X = outer(wnba$HomeTeam, teams, "==") - outer(wnba$AwayTeam, teams, "==")
W = rbind(diag(num_teams-1), rep(-1, num_teams-1))
X_star = X %*% W # schedule matrix
wnba$X_star = X_star
ability.hfa.lm = lm(PtsDiff̃~X_star + AtHome + 0, data=wnba)
# Construct design matrix for normal model calculation
X = outer(wnba$HomeTeam, teams, "==") - outer(wnba$AwayTeam, teams, "==")
W = rbind(diag(num_teams-1), rep(-1, num_teams-1))
X_star = X %*% W # schedule matrix
wnba$X_star = X_star
ability.hfa.lm = lm(̃PtsDiff~X_star + AtHome + 0, data=wnba)
ability.hfa.lm = lm(PtsDiff~X_star + AtHome + 0, data=wnba)
W_h = rbind(cbind(W,0),c(rep(0,n.team-1),1))
W_h = rbind(cbind(W,0),c(rep(0,num_teams-1),1))
# Construct design matrix for normal model calculation
X = outer(wnba$HomeTeam, teams, "==") - outer(wnba$AwayTeam, teams, "==")
W = rbind(diag(num_teams-1), rep(-1, num_teams-1))
X_star = X %*% W # schedule matrix
wnba$X_star = X_star
ability.hfa.lm = lm(PtsDiff~X_star + AtHome + 0, data=wnba)
W_h = rbind(cbind(W,0),c(rep(0,num_teams-1),1))
resid.stderr = summary(ability.hfa.lm)$sigma
Teams.ability.hfa.est = W_h %*% ability.hfa.lm$coefficients
V = W_h %*% vcov(ability.hfa.lm) %*% t(W_h)
dimnames(V) = list(c(teams,"HFA"), c(teams,"HFA"))
Teams.ability.hfa = data.frame( Est=Teams.ability.hfa.est,
+ StdErr=sqrt(diag(V)))
# Construct design matrix for normal model calculation
X = outer(wnba$HomeTeam, teams, "==") - outer(wnba$AwayTeam, teams, "==")
W = rbind(diag(num_teams-1), rep(-1, num_teams-1))
X_star = X %*% W # schedule matrix
wnba$X_star = X_star
ability.hfa.lm = lm(PtsDiff~X_star + AtHome + 0, data=wnba)
W_h = rbind(cbind(W,0),c(rep(0,num_teams-1),1))
resid.stderr = summary(ability.hfa.lm)$sigma
Teams.ability.hfa.est = W_h %*% ability.hfa.lm$coefficients
V = W_h %*% vcov(ability.hfa.lm) %*% t(W_h)
dimnames(V) = list(c(teams,"HFA"), c(teams,"HFA"))
Teams.ability.hfa = data.frame( Est=Teams.ability.hfa.est,
StdErr=sqrt(diag(V)))
row.names(Teams.ability.hfa) = c(teams,"Home Advantage")
# Ranking teams by strength estimates
Teams.rank = Teams.ability.hfa[order(-Teams.ability.hfa$Est),]
print(Teams.rank)
# Construct design matrix for normal model calculation
X = outer(wnba$HomeTeam, teams, "==") - outer(wnba$AwayTeam, teams, "==")
W = rbind(diag(num_teams-1), rep(-1, num_teams-1))
# Create schedule matrix
X_star = X %*% W
wnba$X_star = X_star
ability_model = lm(PtsDiff~X_star + AtHome + 0, data=wnba)
W_h = rbind(cbind(W,0), c(rep(0,num_teams-1),1))
resid_stderr = summary(ability_model)$sigma
team_ability_estimates = W_h %*% ability_model$coefficients
V = W_h %*% vcov(ability_model) %*% t(W_h)
dimnames(V) = list(c(teams,"HFA"), c(teams,"HFA")) # Home field advantage
estimates = data.frame(Estimate=team_ability_estimates, StdErr=sqrt(diag(V)))
row.names(estimates) = c(teams,"Home Advantage")
# Ranking WNBA teams by normal strength estimates
team_ranks = estimates[order(-estimates$Estimate),]
print(team_ranks)
source("~/Desktop/git_projects/wnba_2024/WNBA-2024-Predictions/wnba_analysis.Rmd", encoding = 'UTF-8')
knitr::opts_chunk$set(echo = TRUE)
wnba <- wnba_2024
teams <- unique(wnba$HomeTeam)
num_teams <- length(teams)
# Construct design matrix for normal model calculation
X = outer(wnba$HomeTeam, teams, "==") - outer(wnba$AwayTeam, teams, "==")
W = rbind(diag(num_teams-1), rep(-1, num_teams-1))
# Create schedule matrix
X_star = X %*% W
wnba$X_star = X_star
ability_model = lm(PtsDiff~X_star + AtHome + 0, data=wnba)
W_h = rbind(cbind(W,0), c(rep(0,num_teams-1),1))
resid_stderr = summary(ability_model)$sigma
team_ability_estimates = W_h %*% ability_model$coefficients
V = W_h %*% vcov(ability_model) %*% t(W_h)
dimnames(V) = list(c(teams,"HFA"), c(teams,"HFA")) # Home field advantage
estimates = data.frame(Estimate=team_ability_estimates, StdErr=sqrt(diag(V)))
row.names(estimates) = c(teams,"Home Advantage")
# Ranking WNBA teams by normal strength estimates
team_ranks = estimates[order(-estimates$Estimate),]
print(team_ranks)
setwd("/Users/michellehewson/Desktop/git_projects/wnba_2024/WNBA-2024-Predictions")
knitr::opts_chunk$set(echo <- TRUE)
# Construct design matrix for normal model calculation
X <- outer(wnba$HomeTeam, teams, "<-<-") - outer(wnba$AwayTeam, teams, "<-<-")
knitr::opts_chunk$set(echo = TRUE)
# Construct design matrix for normal model calculation
X <- outer(wnba$HomeTeam, teams, "==") - outer(wnba$AwayTeam, teams, "==")
W <- rbind(diag(num_teams-1), rep(-1, num_teams-1))
# Create schedule matrix
X_star <- X %*% W
wnba$X_star <- X_star
ability_model <- lm(PtsDiff~X_star + AtHome + 0, data=wnba)
W_h <- rbind(cbind(W,0), c(rep(0,num_teams-1),1))
resid_stderr <- summary(ability_model)$sigma
team_ability_estimates <- W_h %*% ability_model$coefficients
V <- W_h %*% vcov(ability_model) %*% t(W_h)
dimnames(V) <- list(c(teams,"HFA"), c(teams,"HFA")) # Home field advantage
estimates <- data.frame(Estimate=team_ability_estimates, StdErr=sqrt(diag(V)))
row.names(estimates) <- c(teams,"Home Advantage")
# Ranking WNBA teams by normal strength estimates
team_ranks <- estimates[order(-estimates$Estimate),]
print(team_ranks)
write.csv(team_ranks, "normal_estimates_wnba2024.csv")
View(team_ranks)
View(wnba)
# Thurstone-Mosteller model (probit)
tm_model <- glm(HomeWin~X_star+AtHome+0,
family=binomial(link="probit"), data=wnba)
# Add home field advantage
W_h <- rbind(cbind(W,0),c(rep(0,num_teams-1),1))
# Team strength estimates
thurstone_estimates <- data.frame("Est"=W_h %*% tm_model$coefficients)
V.probit <- W_h %*% summary(tm_model)$cov.unscaled %*% t(W_h)
thurstone_estimates$StdErr <- sqrt(diag(V.probit))
row.names(thurstone_estimates) <- c(Teams,"HFA")
# Thurstone-Mosteller model (probit)
tm_model <- glm(HomeWin~X_star+AtHome+0,
family=binomial(link="probit"), data=wnba)
# Add home field advantage
W_h <- rbind(cbind(W,0),c(rep(0,num_teams-1),1))
# Team strength estimates
thurstone_estimates <- data.frame("Est"=W_h %*% tm_model$coefficients)
V.probit <- W_h %*% summary(tm_model)$cov.unscaled %*% t(W_h)
thurstone_estimates$StdErr <- sqrt(diag(V.probit))
row.names(thurstone_estimates) <- c(teams,"HFA")
dimnames(V.probit) = list(c(teams,"HFA"), c(teams,"HFA"))
thurstone_estimates = thurstone_estimates[
order(-thurstone_estimates$Est),]
thurstone_estimates