# Setup 

In [None]:
import os, json, time, requests 

import pandas as pd 
import numpy as np 

# folder where all the files should go 
folder_path = f"C://Users//YOUR_FILE_PATH" 

## API Authentication 

In [2]:
# get your API key from the environment variables 
api_key = os.environ.get("NBA_API_KEY") 

# set the headers 
headers = {
    'x-rapidapi-key': api_key
} 

## call_get_endpoint 

In [3]:
def call_get_endpoint(endpoint, params, headers): 

    # build the url 
    url = f"https://api-nba-v1.p.rapidapi.com/{endpoint}" 

    # make the request 
    response = requests.get(url, headers = headers, params = params) 

    # get the data if the request was successful 
    if response.status_code == 200: 
        data = response.json() 
    
    # otherwise, prompt the user with the error message and return None 
    else: 
        print(f"Error: {response.status_code}") 
        data = None 
    
    return data 

# Create Base Tables 

## Teams 

In [4]:
# get the teams data from the endpoint 
data = call_get_endpoint(
    endpoint = "teams", 
    params = {}, 
    headers = headers 
) 

# placeholder to add to 
df_teams = pd.DataFrame() 

# loop through the responses and add each team 
for rsp in data["response"]:
    df_teams = pd.concat([
        df_teams, 
        pd.DataFrame({
            "TEAM_ID": [rsp["id"]], 
            "TEAM_NAME": rsp["name"], 
            "TEAM_CODE": rsp["code"], 
            "NICKNAME": rsp["nickname"], 
            "CITY": rsp["city"], 
            "NBA_FRANCHISE": rsp["nbaFranchise"] 
        })
    ]) 

# filter to only the NBA teams 
df_teams = df_teams[df_teams["NBA_FRANCHISE"] == True] 

# reset the index 
df_teams.reset_index(drop = True, inplace = True) 

# save to a csv 
df_teams.to_csv(f"{folder_path}//raw_teams.csv", index = False)

# showcase the data 
df_teams.head() 

Unnamed: 0,TEAM_ID,TEAM_NAME,TEAM_CODE,NICKNAME,CITY,NBA_FRANCHISE
0,1,Atlanta Hawks,ATL,Hawks,Atlanta,True
1,2,Boston Celtics,BOS,Celtics,Boston,True
2,4,Brooklyn Nets,BKN,Nets,Brooklyn,True
3,5,Charlotte Hornets,CHA,Hornets,Charlotte,True
4,6,Chicago Bulls,CHI,Bulls,Chicago,True


## Players 

In [None]:
# placeholder to add to 
df_players = pd.DataFrame() 

# loop through each team and get the players 
for i, row in df_teams.iterrows(): 
    print(f"Getting players for team {row['TEAM_NAME']}") 

    # get the team id 
    team_id = row["TEAM_ID"] 

    # make the request to get the players 
    data = call_get_endpoint(
        endpoint = "players", 
        params = {"team": team_id, "season": "2024"}, 
        headers = headers 
    ) 

    # loop through the responses and add each player 
    for rsp in data["response"]: 

        # protect against errors 
        try:

            # add to the final dataframe 
            df_players = pd.concat([
                df_players, 
                pd.DataFrame({
                    "PLAYER_ID": [rsp["id"]], 
                    "TEAM_ID": team_id, 
                    "FIRST_NAME": rsp["firstname"], 
                    "LAST_NAME": rsp["lastname"], 
                    "POSITION": rsp["leagues"]["standard"]["pos"], 
                    "ACTIVE": rsp["leagues"]["standard"]["active"], 
                    "JERSEY": rsp["leagues"]["standard"]["jersey"], 
                    "BIRTH_DATE": rsp["birth"]["date"], 
                    "HEIGHT_FT": rsp["height"]["feets"], 
                    "HEIGHT_IN": rsp["height"]["inches"] 
                }) 
            ]) 
        
        except:
            print(f"Error with player from team {team_id}")  

    # sleep for a bit to avoid rate limiting 
    time.sleep(8) 

# reset the index 
df_players.reset_index(drop = True, inplace = True) 

# save to a csv 
df_players.to_csv(f"{folder_path}//raw_players.csv", index = False) 

# showcase the data 
df_players.head() 

Getting players for team Atlanta Hawks
Error with player from team 1
Getting players for team Boston Celtics


Unnamed: 0,PLAYER_ID,TEAM_ID,FIRST_NAME,LAST_NAME,POSITION,ACTIVE,JERSEY,BIRTH_DATE,HEIGHT_FT,HEIGHT_IN
0,385,1,Larry,Nance Jr.,F-C,True,22,1993-01-01,6,7
1,317,1,Caris,LeVert,G,True,3,1994-08-25,6,6
2,391,1,Georges,Niang,F,True,20,1993-06-17,6,7
3,1046,1,Trae,Young,G,True,11,1998-09-19,6,1
4,1877,1,Terance,Mann,G-F,True,14,1996-10-18,6,5


## Games 

In [6]:
# function to calculate the week number based on the date 
def calculate_week_number(dt, first_date):

    # calucate the number of days between the two dates 
    days = (dt - first_date).days 

    # calculate the week number 
    week_number = (days // 7) + 1 

    return week_number 

In [7]:
# get all of the games on the given date 
params = {
    "season": "2024" 
} 

# call the API 
data = call_get_endpoint(
    endpoint = "games", 
    params = params, 
    headers = headers
) 

# transform the data into a dataframe 
df_games = pd.DataFrame() 
for game in data["response"]: 
    df_games = pd.concat([
        df_games, 
        pd.DataFrame({
            "GAME_ID": [game["id"]], 
            "GAME_DATE": game["date"]["start"], 
            "HOME_ID": game["teams"]["home"]["id"], 
            "HOME_TEAM": game["teams"]["home"]["name"], 
            "GUEST_ID": game["teams"]["visitors"]["id"], 
            "GUEST_TEAM": game["teams"]["visitors"]["name"], 
            "LEAGUE": game["league"] 
        }) 
    ]) 

# reset the index 
df_games.reset_index(drop = True, inplace = True) 

# convert to Mountain time 
df_games["GAME_DATE"] = pd.to_datetime(df_games["GAME_DATE"]) \
    .dt.tz_convert("US/Mountain").dt.tz_localize(None) 

# first day of the first week of the season 
first_date = pd.to_datetime("2024-10-21") 

# filter to only regular season games 
df_games = df_games.loc[df_games["GAME_DATE"] >= first_date] 

# calculate the week number 
df_games["WEEK_NUMBER"] = df_games["GAME_DATE"].apply(
    lambda x: calculate_week_number(x, first_date)
) 

# save the data 
df_games.to_csv(f"{folder_path}//all_games.csv", index = False) 

# showcase the data 
df_games.head() 

Unnamed: 0,GAME_ID,GAME_DATE,HOME_ID,HOME_TEAM,GUEST_ID,GUEST_TEAM,LEAGUE,WEEK_NUMBER
70,14115,2024-10-22 17:30:00,2,Boston Celtics,24,New York Knicks,standard,1
71,14116,2024-10-22 20:00:00,17,Los Angeles Lakers,22,Minnesota Timberwolves,standard,1
72,14117,2024-10-23 17:00:00,10,Detroit Pistons,15,Indiana Pacers,standard,1
73,14118,2024-10-23 17:30:00,1,Atlanta Hawks,4,Brooklyn Nets,standard,1
74,14119,2024-10-23 17:30:00,20,Miami Heat,26,Orlando Magic,standard,1


# Pull Daily Stats 

## get_daily_stats 

In [None]:
# function to get and save the stats for all games on a given date 
def get_daily_stats(game_date, df_games):

    # get the dataframe of daily games 
    df_games = df_games.loc[df_games["GAME_DATE"].dt.strftime("%Y-%m-%d") == game_date] 
    df_games = df_games.reset_index(drop = True)  
    
    # sleep for a bit to avoid rate limiting 
    time.sleep(8) 

    # loop through each game and get the stats 
    df_stats = pd.DataFrame() 
    for i, row in df_games.iterrows(): 
        print(f"Getting stats for game {i + 1} out of {len(df_games.index)}") 

        # get the game stats 
        params = {
            'game': row["GAME_ID"]
        } 

        # call the API 
        data = call_get_endpoint(
            endpoint = "players/statistics", 
            params = params, 
            headers = headers
        ) 

        # loop through and add the stats to the dataframe 
        for stats in data["response"]:
            df_stats = pd.concat([ 
                df_stats, 
                pd.DataFrame({
                    "GAME_ID": [row["GAME_ID"]], 
                    "PLAYER_ID": stats["player"]["id"], 
                    "TEAM_ID": stats["team"]["id"], 
                    "POINTS": stats["points"], 
                    "POS": stats["pos"], 
                    "MIN": stats["min"], 
                    "FGM": stats["fgm"], 
                    "FGA": stats["fga"], 
                    "FGP": stats["fgp"], 
                    "FTM": stats["ftm"], 
                    "FTA": stats["fta"], 
                    "FTP": stats["ftp"], 
                    "TPM": stats["tpm"], 
                    "TPA": stats["tpa"], 
                    "TPP": stats["tpp"], 
                    "OFFREB": stats["offReb"], 
                    "DEFREB": stats["defReb"], 
                    "TOTREB": stats["totReb"], 
                    "ASSISTS": stats["assists"], 
                    "PFOULS": stats["pFouls"], 
                    "STEALS": stats["steals"], 
                    "TURNOVERS": stats["turnovers"], 
                    "BLOCKS": stats["blocks"], 
                    "PLUSMINUS": stats["plusMinus"] 
                }) 
            ]) 
        
        # sleep for a bit to avoid rate limiting 
        time.sleep(8) 

    # reset the index 
    df_stats.reset_index(drop = True, inplace = True) 

    # save the data 
    df_stats.to_csv(f"{folder_path}//Stats//stats {game_date}.csv", index = False) 

    return df_stats 

## Pull Missing Dates 

In [9]:
# first day of the season 
league_start = "2024-10-22" 

# get yesterday's date 
yesterday = pd.Timestamp.now().normalize() - pd.Timedelta(days = 1) 

# dates between the start and end date 
all_dates = pd.date_range(league_start, yesterday, freq = "D").strftime("%Y-%m-%d")

# read in the dates that have already been processed 
files = os.listdir(f"{folder_path}//Stats") 
dates_processed = [file.split(" ")[1].split(".")[0] for file in files] 

# filter out the dates that have already been processed 
pull_dates = [date for date in all_dates if date not in dates_processed] 

# loop through each date and get the stats 
for game_date in pull_dates: 
    print(f"\nGetting stats for {game_date}...") 
    df_stats = get_daily_stats(game_date, df_games) 


Getting stats for 2025-02-14...
Getting games for 2025-02-14...

Getting stats for 2025-02-15...
Getting games for 2025-02-15...

Getting stats for 2025-02-16...
Getting games for 2025-02-16...

Getting stats for 2025-02-17...
Getting games for 2025-02-17...

Getting stats for 2025-02-18...
Getting games for 2025-02-18...


# Put Everything Together 

## calculate_fantasy_points 

In [10]:
# function to calculate fantasy points based on stats 
def calculate_fantasy_points(df):

    # calculate the double double and triple double flags 
    double_columns = ['POINTS', 'TOTREB', 'ASSISTS', 'STEALS', 'BLOCKS']
    df['DD'] = df[double_columns].apply(lambda row: (row >= 10).sum() >= 2, axis=1)
    df['TD'] = df[double_columns].apply(lambda row: (row >= 10).sum() >= 3, axis=1) 

    # miss calculations 
    df["FGMI"] = df["FGA"] - df["FGM"] 
    df["FTMI"] = df["FTA"] - df["FTM"] 
    df["TPMI"] = df["TPA"] - df["TPM"] 

    # threshold columns 
    df["PB40"] = np.where(df["POINTS"] >= 40, 1, 0) 
    df["PB50"] = np.where(df["POINTS"] >= 50, 1, 0) 
    df["AB15"] = np.where(df["ASSISTS"] >= 15, 1, 0) 
    df["RB20"] = np.where(df["TOTREB"] >= 20, 1, 0) 

    # calculate the fantasy points 
    df["FANTASY_POINTS"] = (
        (df["POINTS"] * 1) + # +1 for each point scored 
        (df["TOTREB"] * 1) + # +1 for each rebound 
        (df["ASSISTS"] * 1) + # +1 for each assist 
        (df["STEALS"] * 1.5) + # +1.5 for each steal 
        (df["BLOCKS"] * 1.5) + # +1.5 for each block 
        (df["TURNOVERS"] * -1) + # -1 for each turnover 
        (df["DD"] * 5) + # +5 for double doubles 
        (df["TD"] * 10) + # +10 for triple doubles 
        (df["FGM"] * 0.5) + # +0.5 for field goals made 
        ((df["FGMI"]) * -0.5) + # -0.5 for two or three point misses 
        (df["FTM"] * 1) + # +1 for free throws made 
        (df["FTMI"] * -1) + # -1 for free throw misses 
        (df["TPM"] * 2) + # +2 for three pointers made 
        (df["TPMI"] * -1) + # -1 for three point misses 
        (df["OFFREB"] * 1.5) + # +1.5 for offensive rebounds 
        (df["DEFREB"] * 1) + # +1 for defensive rebounds 
        (df["PB40"] * 2) + # +2 for scoring 40+ points 
        (df["PB50"] * 3) + # +3 for scoring 50+ points 
        (df["AB15"] * 2) + # +2 for 15+ assists 
        (df["RB20"] * 2) # +2 for 20+ rebounds 
    ) 

    return df 

## Combine Daily Files 

In [11]:
# get a list of all the daily stats files 
files = os.listdir(f"{folder_path}//Stats") 

# read in each file and add to one final dataframe 
df_stats = pd.DataFrame() 
for file in files: 
    try:
        df_new = pd.read_csv(f"{folder_path}//Stats//{file}")

        # concatenate if there are actual rows in it 
        if len(df_new.index) > 0:
            df_stats = pd.concat([df_stats, df_new]) 
    
    # we get an error if the file is empty (i.e. no games on that date)
    except: 
        pass 

# reset the index 
df_stats.reset_index(drop = True, inplace = True) 

# calculate the fantasy points 
df_stats = calculate_fantasy_points(df_stats) 

# join in some basic game info 
df_stats = df_stats.merge(
    df_games[["GAME_ID", "GAME_DATE", "WEEK_NUMBER"]], 
    on = "GAME_ID", 
    how = "left"
) 

# save the final dataframe 
df_stats.to_csv(f"{folder_path}//all_stats.csv", index = False) 

# showcase the data 
df_stats.head() 

Unnamed: 0,GAME_ID,PLAYER_ID,TEAM_ID,POINTS,POS,MIN,FGM,FGA,FGP,FTM,...,FGMI,FTMI,TPMI,PB40,PB50,AB15,RB20,FANTASY_POINTS,GAME_DATE,WEEK_NUMBER
0,14115,882,2,37,,30,14,18,77.8,1,...,4,1,3,0,0,0,0,80.0,2024-10-22 17:30:00,1
1,14115,248,2,11,,26,4,7,57.1,0,...,3,0,2,0,0,0,0,29.5,2024-10-22 17:30:00,1
2,14115,242,2,18,,31,7,9,77.8,0,...,2,0,2,0,0,0,0,41.0,2024-10-22 17:30:00,1
3,14115,897,2,24,,27,8,13,61.5,2,...,5,0,4,0,0,0,0,47.0,2024-10-22 17:30:00,1
4,14115,75,2,23,,30,7,18,38.9,4,...,11,0,4,0,0,0,0,47.5,2024-10-22 17:30:00,1
