# Setup 

In [46]:
import requests 
import json 
import pandas as pd 
from geopy.distance import geodesic 

# change this to your folder path where you'll store all the data 
folder_path = "C://Users//YOUR_FOLDER_PATH_HERE" 

# Calling the API 

## Authentication 

In [47]:
# add your API key here 
api_key = "PASTE_YOUR_API_KEY_HERE"

# create the headers object 
headers = {
    "Authorization": f"Bearer {api_key}"
}

## Activity Summaries 

In [48]:
# fetch the first result group 
response = requests.get(f"https://api.nike.com/sport/v3/me/activities/after_time/0", headers = headers) 
results = response.json() 

# add to a list 
all_activities = results["activities"]

# continue to call the API until the paging section is empty 
while "after_id" in results["paging"].keys(): 

    # get the after_id 
    after_id = results["paging"]["after_id"] 

    # get the next set of results 
    response = requests.get(f"https://api.nike.com/sport/v3/me/activities/after_id/{after_id}", headers = headers) 
    results = response.json() 

    # append to the list of activities 
    all_activities += results["activities"] 

# add to the final dictionary 
combined_data = {
    "activities": all_activities 
} 

# save the result as a json file 
with open(f"{folder_path}//activity_summaries.json", "w") as file: 
    json.dump(combined_data, file, indent = 4)

## Activity Details 

In [49]:
# iterate through each activity 
for activity in combined_data["activities"]: 

    # get the activity id 
    activity_id = activity["id"] 

    # put together the url 
    url = f"https://api.nike.com/sport/v3/me/activity/{activity_id}?metrics=ALL" 

    # call the API 
    response = requests.get(url, headers = headers) 
    res = response.json() 

    # save the result as a JSON file 
    with open(f"{folder_path}//full_{activity_id}.json", "w") as file: 
        json.dump(res, file, indent = 4) 

# Formatting the Data

## Activity Summaries 

### Functions 

In [50]:
# function to convert to the desired date/time formats 
def convert_unix_column(tscol):

    # convert the epoc ms to a UTC datetime object 
    tscol = pd.to_datetime(tscol, unit = "ms", utc = True) 

    # convert from UTC to US central time 
    tscol = tscol.dt.tz_convert("US/Central") 

    # drop the timezone portion 
    tscol = tscol.dt.tz_localize(None) 

    return tscol 

In [51]:
# function for getting each of the summary attributes 
def get_summary_value(activity, metric, summary): 

    # find the values that match the metric and summary 
    vals = [
        sm["value"] for sm in activity["summaries"] 
        if ((sm["metric"] == metric) and (sm["summary"] == summary))
    ] 

    # if there are no values, return None 
    if len(vals) == 0:
        val = None 
    else:
        val = vals[0] 
    
    return val 

### Creating the dataframe 

In [52]:
# placeholder dataframe 
df_runs = pd.DataFrame() 

# iterate through each activity 
for activity in combined_data["activities"]: 

    # add to the dataframe 
    df_runs = pd.concat([
        df_runs, 
        pd.DataFrame({
            "ACTIVITY_ID": [activity["id"]], 
            "START_TIME": activity["start_epoch_ms"], 
            "END_TIME": activity["end_epoch_ms"], 
            "AVG_PACE": get_summary_value(activity, "pace", "mean"), 
            "AVG_SPEED": get_summary_value(activity, "speed", "mean"), 
            "TOTAL_DISTANCE": get_summary_value(activity, "distance", "total"), 
            "TOTAL_ASCENT": get_summary_value(activity, "ascent", "total"), 
            "TOTAL_DESCENT": get_summary_value(activity, "descent", "total"), 
            "TOTAL_CALORIES": get_summary_value(activity, "calories", "total")
        })
    ]) 

# convert the time columns to the format that we want 
df_runs["START_TIME"] = convert_unix_column(df_runs["START_TIME"]) 
df_runs["END_TIME"] = convert_unix_column(df_runs["END_TIME"]) 

# write to a csv 
df_runs.to_csv(f"{folder_path}//activity_summaries.csv", index = False) 

## Activity Details 

### Functions 

In [53]:
# function for getting the full activity details from the json list 
def get_metrics(metrics, metric_type): 

    try: 

        # get the values that we want 
        vals = [met["values"] for met in metrics if met["type"] == metric_type][0] 

        # get the metrics as a dataframe 
        df = pd.DataFrame(vals) 

        # change the column names 
        df.columns = ["START_TIME", "END_TIME", "VALUE"] 

        # keep track of the metric type 
        df["METRIC_TYPE"] = metric_type 

    # if the desired value couldn't be found and it errors out, just return None 
    except:
        df = None 
    
    return df 

### Creating the dataframe 

In [54]:
# metric types to pull 
metric_types = [
    "distance", "steps", "speed", "calories", "heart_rate", "pace", 
    "latitude", "longitude", "elevation", "ascent", "descent" 
] 

# placeholder dataframe 
df_full = pd.DataFrame() 

# iterate through each activity 
for i, row in df_runs.iterrows(): 

    # get the activity id 
    activity_id = row["ACTIVITY_ID"] 

    # read in the json data 
    with open(f"{folder_path}//full_{activity_id}.json", "r") as f:
        data = json.load(f) 
    
    # iterate through each attribute 
    for metric_type in metric_types: 

        # get the metrics as a dataframe 
        df_new = get_metrics(data["metrics"], metric_type) 

        # make sure that we have some data 
        if df_new is not None:

            # add the activity id 
            df_new["ACTIVITY_ID"] = row["ACTIVITY_ID"] 

            # add to the dataframe 
            df_full = pd.concat([df_full, df_new]) 

# convert the timezones to central 
df_full["START_TIME"] = convert_unix_column(df_full["START_TIME"]) 
df_full["END_TIME"] = convert_unix_column(df_full["END_TIME"]) 

# write to a csv 
df_full.to_csv(f"{folder_path}//activity_details.csv", index = False) 

## Pivot Location Data 

In [55]:
# filter to only the metrics we want 
metric_types = ["latitude", "longitude", "elevation"] 
df_pivot = df_full.loc[df_full["METRIC_TYPE"].isin(metric_types)] 

# pivot on metric type 
df_pivot = df_pivot.pivot_table(
    index = ["START_TIME", "END_TIME", "ACTIVITY_ID"], 
    columns = "METRIC_TYPE", 
    values = "VALUE" 
).reset_index() 

# filter out rows with NAs 
df_pivot = df_pivot.dropna().reset_index(drop = True) 

# turn all the columns to all caps 
df_pivot.columns = [col.upper() for col in df_pivot.columns] 

# join in the overall start time of the run 
df_starts = (
    df_runs[["ACTIVITY_ID", "START_TIME"]] 
    .rename(columns = {"START_TIME": "RUN_START"})
) 
df_pivot = df_pivot.merge(df_starts, on = "ACTIVITY_ID", how = "left") 

# calculate the time into the run 
df_pivot["TOTAL_TIME"] = (
    df_pivot["START_TIME"] - df_pivot["RUN_START"]
).dt.total_seconds() / (60 * 60) 

### Cumulative distance/time calculations 

In [56]:
# function to calculate distance between previous and current row 
def calculate_distance(row):
    try:
        return geodesic(
            (row["LATITUDE_PREV"], row["LONGITUDE_PREV"]), 
            (row["LATITUDE"], row["LONGITUDE"])
        ).miles 
    except:
        return None 

### Column calculations 

In [57]:
# get the previous lat/long for each row 
df_pivot = df_pivot.sort_values(["ACTIVITY_ID", "START_TIME"]).reset_index(drop = True) 
df_pivot["LATITUDE_PREV"] = df_pivot.groupby("ACTIVITY_ID")["LATITUDE"].shift(1) 
df_pivot["LONGITUDE_PREV"] = df_pivot.groupby("ACTIVITY_ID")["LONGITUDE"].shift(1) 

# calculate distance between each by comparing the lat/long coordinates 
df_pivot["DISTANCE_DIFF"] = df_pivot.apply(calculate_distance, axis = 1).fillna(0) 

# calculate the cumulative distance with a cumulative sum 
df_pivot["TOTAL_DISTANCE"] = df_pivot.groupby("ACTIVITY_ID")["DISTANCE_DIFF"].cumsum() 

# write to a csv 
df_pivot.to_csv(f"{folder_path}//pivoted_location_data.csv", index = False) 