# Setup 

In [None]:
import os, json, time 

import pandas as pd 
import numpy as np 

from scipy.stats import norm 

# turn off the pd chained assignment warning 
pd.options.mode.chained_assignment = None 

# folder where all the files should go 
folder_path = f"C://Users//YOUR_FOLDER_PATH" 

## Current Week 

In [None]:
# day 1 of the league 
first_date = pd.to_datetime("2024-10-22")

# get the current week number 
days_in = (pd.Timestamp.now() - first_date).days 
week_number = (days_in // 7) + 1 

# showcase the current week 
print(week_number)

# Data Prep 

## Raw Stats 

In [None]:
# read in the stats data 
df_stats = pd.read_csv(f"{folder_path}//all_stats.csv") 
 
# make sure that the game date is formatted correctly 
df_stats["GAME_DATE"] = pd.to_datetime(df_stats["GAME_DATE"]) 
 
# showcase the data 
df_stats.head() 

## Players 

In [None]:
# read in the players data 
df_players = pd.read_csv(f"{folder_path}//raw_players.csv") 
 
# put together a full player name column 
df_players["FULL_NAME"] = (df_players["FIRST_NAME"] + " " + df_players["LAST_NAME"]).str.upper() 
 
# read in the Sleeper players and rosters 
df_splayers = pd.read_csv(f"{folder_path}//sleeper_players.csv") 
df_rosters = pd.read_csv(f"{folder_path}//sleeper_rosters.csv") 
 
# uppercase the names to match the other data 
df_splayers["FULL_NAME"] = df_splayers["PLAYER_NAME"].str.upper() 
 
# merge the rosters into the players 
df_splayers = (
    df_splayers.merge(df_rosters, how = "left", on = "PLAYER_ID_SLEEPER") 
    [["FULL_NAME", "ROSTER_ID", "IS_STARTER", "SEARCH_RANK"]]
) 
 
# join to the other players dataframe 
df_players = df_splayers.merge(df_players, how = "left", on = "FULL_NAME") 
df_players = df_players.loc[~df_players["PLAYER_ID"].isnull()] 
 
# aggregate the stats data 
agg_stats = df_stats.groupby("PLAYER_ID").agg(
    PTS_MEAN = ("FANTASY_POINTS", "mean"), 
    PTS_SD = ("FANTASY_POINTS", "std") 
) 
 
# join in the aggregated stats 
df_players = df_players.merge(agg_stats, how = "left", on = "PLAYER_ID") 
 
# subset to just the columns we want 
df_players = df_players[[
    "PLAYER_ID", "FULL_NAME", "TEAM_ID", "PTS_MEAN", "PTS_SD", 
    "ROSTER_ID", "IS_STARTER", "SEARCH_RANK"
]] 
 
# make sure that player_id is an integer 
df_players["PLAYER_ID"] = df_players["PLAYER_ID"].astype(int) 
 
# showcase the data 
df_players.head() 

## Individual Player Schedules 

In [None]:
# read in the team schedules 
df_games = pd.read_csv(f"{folder_path}//all_games.csv") 
 
# make sure that the game date is formatted correctly 
df_games["GAME_DATE"] = pd.to_datetime(df_games["GAME_DATE"]) 
 
# concatenate the home and away teams 
df_pgames = pd.concat([
    (
        df_games[["HOME_ID", "GAME_ID", "WEEK_NUMBER", "GAME_DATE"]]
        .rename(columns = {"HOME_ID": "TEAM_ID"})
    ), 
    (
        df_games[["GUEST_ID", "GAME_ID", "WEEK_NUMBER", "GAME_DATE"]]
        .rename(columns = {"GUEST_ID": "TEAM_ID"}) 
    ) 
]) 
 
# join in a few player attributes 
df_pgames = df_pgames.merge(
    df_players[["TEAM_ID", "PLAYER_ID", "FULL_NAME"]], 
    on = "TEAM_ID", 
    how = "inner"
) 
 
# showcase the data 
df_pgames.head()

## Locked Points 

In [None]:
# read in the locked points data 
df_locks = pd.read_csv(f"{folder_path}//locked_points.csv") 
 
# filter to just the current week 
df_locks = df_locks.loc[df_locks["WEEK_NUMBER"] == week_number] 
 
# filter to the players that haven't been locked yet 
df_open = df_locks.loc[df_locks["LOCKED_POINTS"].isna()]
 
# filter to the players that have already been locked 
df_locks = df_locks.loc[~df_locks["LOCKED_POINTS"].isna()] 
 
# showcase the data 
df_locks.head() 

# Optimize Lock-ins

## Better Score Opportunity 

In [None]:
# probability that there will be a better score given the parameters and number of games left 
def calc_prob_better(current, games_left, mean, sd):
 
    prob_single = norm.cdf(current, loc = mean, scale = sd) 
    prob_any = 1 - (prob_single ** games_left) 
 
    return prob_any 
 
# test out the function 
pbetter = calc_prob_better(
    current = 48.1, 
    games_left = 3, 
    mean = 42.3, 
    sd = 14.6 
) 
print(f"{pbetter:.1%}")

## Flag Potential Locks 

In [None]:
# filter to just the players that we can potentially lock-in 
df_options = df_open.loc[df_open["TEAM"] == "My Team"][["PLAYER_NAME", "PLAYER_ID"]] 
 
# get the games that haven't been played yet  
dfg2 = (
    df_pgames.loc[
        (df_pgames["PLAYER_ID"].isin(df_options["PLAYER_ID"])) & 
        (df_pgames["WEEK_NUMBER"] == week_number) & 
        (df_pgames["GAME_DATE"] > pd.Timestamp.now()) 
    ][["PLAYER_ID", "GAME_ID"]] 
    .groupby("PLAYER_ID").agg(
        GAMES = ("GAME_ID", "count") 
    ).reset_index() 
) 
 
# get the most recent fantasy points 
dfs2 = df_stats.loc[
    (df_stats["PLAYER_ID"].isin(df_options["PLAYER_ID"])) & 
    (df_stats["WEEK_NUMBER"] == week_number)
][["PLAYER_ID", "GAME_ID", "FANTASY_POINTS"]] 
dfs2["RECENT_RANK"] = dfs2.groupby("PLAYER_ID")["FANTASY_POINTS"].rank(ascending = False) 
dfs2 = dfs2.loc[dfs2["RECENT_RANK"] == 1] 
dfs2 = dfs2[["PLAYER_ID", "FANTASY_POINTS"]]  
 
# join everything together 
df_options = (
    df_options.merge(dfg2, on = "PLAYER_ID", how = "left") 
    .merge(dfs2, on = "PLAYER_ID", how = "left") 
    .merge(df_players[["PLAYER_ID", "PTS_MEAN", "PTS_SD"]], on = "PLAYER_ID", how = "left") 
) 
 
# loop through each player and calculate the probability of a better score 
for i, row in df_options.iterrows(): 
    df_options.loc[i, "PROB_BETTER"] = calc_prob_better(
        current = row["FANTASY_POINTS"], 
        games_left = row["GAMES"], 
        mean = row["PTS_MEAN"], 
        sd = row["PTS_SD"]
    ) 
 
# sort by the lock probability 
df_options = df_options.sort_values("PROB_BETTER").reset_index(drop = True) 
 
# flag whether or not we should lock 
df_options["LOCK"] = df_options["PROB_BETTER"] < 0.5
 
# showcase the data 
df_options 

# Adjusted Projections 

## Simulation Function 

In [None]:
def simulate_locked_points(df_players, nreps = 100, seedval = 4): 
 
    # set the random seed for reproducibility 
    np.random.seed(seedval) 
 
    # expand by the game number 
    df = df_players.copy() 
    df["GAME_NUM"] = df["GAMES"].apply(lambda x: np.arange(1, x + 1)) 
    df = df.explode("GAME_NUM") 
 
    # expand by the simulation repetition 
    df = df.reset_index(drop = True) 
    df["REP_NUM"] = df.apply(lambda x: np.arange(1, nreps + 1), axis = 1) 
    df = df.explode("REP_NUM") 
 
    # simulate the points values 
    df["SIM_POINTS"] = df.apply(
        lambda x: np.random.normal(loc = x["PTS_MEAN"], scale = x["PTS_SD"]), axis = 1
    ) 
 
    # calculate the number of games left 
    df["GAMES_LEFT"] = df["GAMES"] - df["GAME_NUM"] 
 
    # compute which games we will lock in 
    df["PROB_BETTER"] = df.apply(lambda x: calc_prob_better(
        current = x["SIM_POINTS"], 
        games_left = x["GAMES_LEFT"], 
        mean = x["PTS_MEAN"], 
        sd = x["PTS_SD"] 
    ), axis = 1) 
    df = df.loc[df["PROB_BETTER"] < 0.5]  
    df = df.sort_values(by = ["REP_NUM", "GAME_NUM"]).groupby(["PLAYER_ID", "REP_NUM"]).head(1) 
 
    return df 
 
# test out the function 
simulate_locked_points(pd.DataFrame({
    "PLAYER_ID": [1, 2], 
    "PTS_MEAN": [30, 30], 
    "PTS_SD": [10, 10], 
    "GAMES": [4, 3] 
})) 

## Adjusted Projection 

In [None]:
def compute_adjusted_projections(df_players):
 
    # simulate the locked points 
    df_sims = simulate_locked_points(df_players) 
 
    # calculate the adjusted projections 
    df_adjusted = df_sims.groupby(list(df_players.columns) ).agg({
        "SIM_POINTS": "mean"
    }).reset_index() 
 
    return df_adjusted 
 
# test out the function 
compute_adjusted_projections(pd.DataFrame({
    "PLAYER_NAME": ["Player 1", "Player 2"], 
    "PLAYER_ID": [1, 2], 
    "PTS_MEAN": [30, 30], 
    "PTS_SD": [10, 10], 
    "GAMES": [4, 3] 
}) ) 

# Player Comparisons 

In [None]:
compare_players = ["JONAS VALANCIUNAS", "DYSON DANIELS"] 
compare_week = 12
 
# filter down to the players we want to compare 
df_players2 = df_players.loc[df_players["FULL_NAME"].isin(compare_players)] 
 
# get the number of games in the week 
agg_games = df_pgames.loc[df_pgames["WEEK_NUMBER"] == compare_week].groupby(["PLAYER_ID"]).agg(
    GAMES = ("GAME_ID", "count")
) 
 
# join in the number of games 
df_players2 = df_players2.merge(agg_games, on = "PLAYER_ID", how = "left") 
 
# calculate the adjusted projections 
df_adjusted = compute_adjusted_projections(df_players2) 
 
# showcase each of the projections 
for i, row in df_adjusted.iterrows():
    print(f"\n{row['FULL_NAME']} is projected to score {row['SIM_POINTS']:.1f} points this week ")
    print(f"  ({row['GAMES']} games with mean = {row['PTS_MEAN']:.1f} and sd = {row['PTS_SD']:.1f})") 

# Matchup Projections 

## Simulate Remaining Games 

In [None]:
# filter to the players that we need to simulate 
sim_players = df_players.merge(
    df_open[["PLAYER_ID", "TEAM"]], 
    on = "PLAYER_ID", 
    how = "inner"
) 
 
# filter to the games left this week 
current_games = df_pgames.loc[
    (df_pgames["WEEK_NUMBER"] == week_number) & 
    (df_pgames["GAME_DATE"] > current_date)
] 
 
# aggregate by player 
current_games = current_games.groupby("PLAYER_ID").agg(
    GAMES = ("GAME_ID", "count")
) 
 
# join in the number of games left 
sim_players = sim_players.merge(current_games, on = "PLAYER_ID", how = "left") 
 
# simulate the locked points 
df_sims = simulate_locked_points(sim_players) 
 
# showcase the data 
df_sims.head() 

## Repetition Totals 

In [None]:
# aggregate the simulation reps by team 
df_reps = df_sims.groupby(["TEAM", "REP_NUM"]).agg( 
    SIM_TOTAL = ("SIM_POINTS", "sum")
).reset_index() 
 
# summarize the totals that are already locked in 
locked_totals = df_locks.groupby("TEAM").agg(
    LOCKED_POINTS = ("LOCKED_POINTS", "sum")
) 
 
# join in the locked points and add to the totals 
df_reps = df_reps.merge(locked_totals, on = "TEAM", how = "left") 
df_reps["LOCKED_POINTS"] = df_reps["LOCKED_POINTS"].fillna(0) 
df_reps["TOTAL_POINTS"] = df_reps["SIM_TOTAL"] + df_reps["LOCKED_POINTS"] 
 
# showcase the data 
df_reps.head() 

## Matchup Totals 

In [None]:
# calculate the overall totals 
df_totals = df_reps.groupby("TEAM").agg(
    LOCKED_POINTS = ("LOCKED_POINTS", "mean"), 
    EST_TOTAL = ("TOTAL_POINTS", "mean") 
).reset_index() 
 
# calculate the players left to lock 
dfr = df_open.groupby("TEAM").agg(
    PLAYERS_LEFT = ("PLAYER_ID", "count")
).reset_index() 
 
# join in the players left to lock 
df_totals = df_totals.merge(dfr, on = "TEAM", how = "left") 
df_totals["PLAYERS_LEFT"] = df_totals["PLAYERS_LEFT"].fillna(0) 
 
# showcase the data 
df_totals 

## Prediction Intervals 

In [None]:
# calculate the overall totals 
df_totals = df_reps.groupby("TEAM").agg(
    PCT05 = ("TOTAL_POINTS", lambda x: np.percentile(x, 5)), 
    AVG = ("TOTAL_POINTS", "mean"),
    PCT95 = ("TOTAL_POINTS", lambda x: np.percentile(x, 95))
).sort_values("TEAM").reset_index() 
 
# print the overall projections 
for i, row in df_totals.iterrows():
    print(f"\n{row['TEAM']}: {row['AVG']:,.1f} total points" ) 
    print(f"    (between {row['PCT05']:.1f} and {row['PCT95']:.1f})") 

## Win Probability

In [None]:
# get the rep totals for my team 
dfr1 = (
    df_reps.loc[df_reps["TEAM"] == "My Team"] 
    [["REP_NUM", "TOTAL_POINTS"]]
    .rename(columns = {"TOTAL_POINTS": "MY_TEAM"}) 
) 
 
# get the rep totals for the opponent 
dfr2 = (
    df_reps.loc[df_reps["TEAM"] == "Opponent"] 
    [["REP_NUM", "TOTAL_POINTS"]] 
    .rename(columns = {"TOTAL_POINTS": "OPPONENT"}) 
) 
 
# join the two together and calcuate the result 
df_match = dfr1.merge(dfr2, on = "REP_NUM", how = "inner") 
df_match["RESULT"] = np.where(df_match["MY_TEAM"] > df_match["OPPONENT"], 1, 0) 
 
# calculate the win probability 
win_prob = df_match["RESULT"].mean() 
print(f"My team has a {win_prob:.1%} chance of winning this week") 