import requests as reqs
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt


# helper function for removing parenthesis from the end of strings
def remove_record(str):
  try:
    return re.match(r'^(.*) \(', str).group(1)
  except AttributeError:
    return str

# some of the sites I am scrapping from will not respond unless the 
# user agent header is set in the html request
headers = {"User-Agent": "Lax-Stats-Bot"}

# Because this project is due 5-16-2022, I will not be able to include
# the last few tournament games in the dataset because they have not
# happened yet
season_dates = pd.date_range('2022-01-29', '2022-5-15')


# empty dataframes to store data in
df_games = pd.DataFrame()
df_schedule = pd.DataFrame()

for date in season_dates:
  # If there were no games on a particular day, trying to access the games table
  # will cause an error
  try: 
    # can just change the month and day parameters in the url to get a different day's games
    url = f'https://stats.ncaa.org/season_divisions/17822/scoreboards?utf8=%E2%9C%93&season_division_id=&game_date={date.month}%2F{date.day}%2F2022&conference_id=0&tournament_id=&commit=Submit'

    # make request and turn into dataframe
    req = reqs.get(url, headers=headers)
    games_table = pd.read_html(req.content)[0]

    # for each game in the games table, we need to 
    # further parse out the team names and other info.
    # each game is represented by 5 rows in the html table
    for i in range(0, len(games_table), 5):
      # if a game was cancled or postponed, it will throw an error when we try
      # to read in the score.
      # We are not worried about these games since you cannot check a prediction
      # for tham, so we will just throw them out
      try:
        # grab the 5 games that make up this game
        game = games_table.loc[i:i+5]

        # the team names are followed by the record, but we just
        # want to store the name with no record following it
        team1 = remove_record(game.loc[i, 'Teams.1'])
        team2 = remove_record(game.loc[i+3, 'Teams'])

        # some teams have a number ranking before the name
        # we are not interested in that so it is removed
        team1 = re.sub(r'^#\d ', '', team1)
        team2 = re.sub(r'^#\d ', '', team2)

        # read in the scores so we can tell who won
        team1_score = int(game.loc[i, 'Score.1'])
        team2_score = int(game.loc[i + 3, 'Teams.1'])

        # let win = 1 and loss = 0 to make manipulation easy later
        team1_outcome = 1 if team1_score > team2_score else 0
        team2_outcome = 1 if team2_score > team1_score else 0

        # put the data into dictionaries so it can be added to the
        # dataframes as rows
        game1 = {
          'date': date,
          'opponent': team2,
          'outcome': team1_outcome,
          'team': team1,
        }
        game2 = {
          'date': date,
          'opponent': team1,
          'outcome': team2_outcome,
          'team': team2,
        }

        schueled_game = {
          'home': team2,
          'away': team1,
          'date': date,
          'winner' : team1 if team1_score > team2_score else team2
        }

        df_schedule = df_schedule.append(schueled_game, ignore_index=True)

        df_games = df_games.append(game1, ignore_index=True)
        df_games = df_games.append(game2, ignore_index=True)
      except ValueError as err:
        pass
  
  except ValueError as err:
    print('No games on ' + date.isoformat()[:10])

No games on 2022-01-30
No games on 2022-01-31
No games on 2022-02-01
No games on 2022-02-02
No games on 2022-02-03
No games on 2022-02-07
No games on 2022-02-08
No games on 2022-02-10
No games on 2022-02-14
No games on 2022-02-17
No games on 2022-02-24
No games on 2022-03-03
No games on 2022-03-07
No games on 2022-03-17
No games on 2022-03-21
No games on 2022-03-23
No games on 2022-03-24
No games on 2022-03-25
No games on 2022-03-28
No games on 2022-03-30
No games on 2022-03-31
No games on 2022-04-04
No games on 2022-04-06
No games on 2022-04-13
No games on 2022-04-18
No games on 2022-04-25
No games on 2022-04-27
No games on 2022-05-02
No games on 2022-05-04
No games on 2022-05-09
No games on 2022-05-10
No games on 2022-05-12
No games on 2022-05-13


df_games.tail()


df_schedule.tail()


# getting laxELO data

df_elo = pd.DataFrame()

for i in range(1, 76):
  try:
    req = reqs.get('https://lacrossereference.com/teams/a00%02d' % (i))
    soup = BeautifulSoup(req.content)

    # first figure out the name
    # the team name is in a div with a unique set of CSS classes so it can be easily puled out with beautiful soup
    team_str = soup.find(class_='col-12 font-24 bold').get_text()
    # that div does contain the teams record as well, so this regular expression isolates the name
    team = remove_record(team_str)

    # Scrapping ELO data
    # The ELO table is unfortunately not a HTML table, but we can grab the div by its unique ID
    elo_table_string = soup.find(id='elo_change_table').get_text()

    # the table lists a starting ELO, and then the change in ELO after each game
    # here we pull out the starting ELO by targeting its div
    starting_elo = int(soup.find_all(class_="no-padding dtop")[1].get_text())

    # thanks to beautiful soup the rows of the table are seperated by 2 new lines and the columns by a single new line
    # splitting on rows and columns then trhowing out rows/columns that do not contain data gives a 2D array that pandas can turn into a dataframe
    elo_table = pd.DataFrame([x.split('\n')[1:] for x in elo_table_string.split('\n\n')[3:-1]])\

    # name columns, drop unnecessary ones and add a team name column
    elo_table.columns = ['opponent', 'score', 'date', 'elo']
    elo_table.drop(['opponent', 'score'], axis=1,  inplace=True)
    elo_table['team'] = team

    # convert date and ELO columns to datetimes and numerics
    elo_table['date'] = pd.to_datetime(elo_table['date'] + ' 2022', format='%b %d %Y')
    elo_table['elo'] = pd.to_numeric(elo_table['elo'])

    # add row for initial elo on 1/28, they day befor the first game of the season
    initial = {
      'team': team,
      'elo': starting_elo,
      'date': pd.to_datetime('2022-01-28'),
    }
    df_elo = df_elo.append(initial, ignore_index=True)

    # Since the ELO column is the game to game change, need to go through it and update it to be an absolute ELO
    for index, row in elo_table.iterrows():
      prev = starting_elo if index == 0 else elo_table.loc[index - 1, 'elo']
      elo_table.loc[index, 'elo'] = elo_table.loc[index, 'elo'] + prev

    # add the 
    df_elo = df_elo.append(elo_table)
  
  except Exception as err:
    print('Problem fetching data from: "https://lacrossereference.com/teams/a00%02d"' % (i))

Problem fetching data from: "https://lacrossereference.com/teams/a0021"
Problem fetching data from: "https://lacrossereference.com/teams/a0024"


df_elo.tail()


# USILA

# The URL for USILA polls uses the text version of the month instead of
# the number representation, so this dictionary just helps convert
# datetimes into the correct format
month_mappings = {
  2: 'feb',
  3: 'march',
  4: 'april',
  5: 'may'
}

# As of writing this, there are 14 weeks of polls published, starting every monday
# from February 7th to May 9th.
dates = [date for date in pd.date_range('2022-02-07', '2022-05-02', freq='7D')]

# Using the list of poll publish dates to generate the urls for the poll published
# on that date
urls = [f'https://usila.org/news/2022/{date.month}/{date.day}/mens-lacrosse-usila-dynamic-2022-mens-coaches-division-i-poll-week-of-{month_mappings[date.month]}-{date.day}.aspx' for date in dates]

# the very first ranking released was "preseason" ranking, and did not follow the typicall
# URL format, so I just add it manually, as well as the date it was published
# also for some reason the week 14 ranking is a different format as well
urls += ['https://usila.org/news/2022/2/1/mens-lacrosse-usila-dynamic-2022-mens-coaches-division-i-poll-preseason.aspx',
        'https://usila.org/news/2022/5/9/mens-lacrosse-usila-dynamic-2022-mens-coaches-division-i-poll-week-of-may-9-1.aspx']
dates += [pd.to_datetime('2022-02-01'), pd.to_datetime('2022-05-09')]

# create the empty dataframe to add the data to
df_usila = pd.DataFrame()


for url, date in zip(urls, dates):
  # get the page conent
  req = reqs.get(url, headers=headers)

  # turn the table into a dataframe
  # the ranking is the first table on the page, conveniently
  df = pd.read_html(req.content)[0]

  # renaming the columns to be what the actually represent but I am only 
  # interested in the teams and ranks, so I can just ignore the rest of the collumns
  df.columns = ['team', 'rank', 'record', 'points', 'last']
  df.drop(['record', 'points', 'last'], axis=1, inplace=True)

  # the team names have the records in the same string, so use my helper function
  # from earlier to remove it
  df['team'] = df['team'].apply(remove_record)

  # add the date as I will need that for analysis later
  df['date'] = date

  df_usila = df_usila.append(df)


df_usila.head()


# Inside Lacrosse

# helper functino that takes the the week number a poll was published on
# and returns the date that many weeks after the intial list's date
# The preason ranking was published more than a week before the others,
# so it has a special case with its published date hard-coded
def get_date(str):
  preseason_date = '2022-01-24'
  first_week = '2022-02-07'
  if str == 'Preseason':
    return pd.to_datetime(preseason_date)
  weeks_past = int(str[-1])
  return pd.to_datetime(first_week) + pd.Timedelta(weeks_past, 'W')

# get the page containing the URLs to all the polls
poll_page = reqs.get('https://www.insidelacrosse.com/league/di/polls/2022', headers=headers)
soup = BeautifulSoup(poll_page.content)

# Us beautiful soup to get all the dropdown menus on the page
dropdown_menus = soup.find_all(class_='dropdown-menu')

# the poll URLs are in the second dropdown menu on the page
poll_pages = dropdown_menus[1]


# The URL stored in the navigatino dropdown is a relative path to another site on
# the server, so I will need to add the base path to the front to get absolute paths
base_url = 'https://www.insidelacrosse.com'

# create a list of tuples with the link to the page and the date 
urls_with_date = [((base_url + link['href']), get_date(link.text)) for link in poll_pages.contents]

# empty dataframe for results
df_ilax = pd.DataFrame()

for url, date in urls_with_date:
  poll_page = reqs.get(url, headers=headers)

  # read all the tables from the page
  rankings = pd.read_html(poll_page.content)

  # the ranking table is the first one on the page, and we can pull it out by name
  rankings = rankings[0]["Men's Division I Media Poll"]

  # the automatically generate dataframe has a lot of junk in it
  # instead of trying to clean it up, I will just pull out the info I
  # need from it into a new dataframe
  tidyer = pd.DataFrame()
  # need to remove the record from the team name string
  tidyer['team'] = rankings['Team'].apply(remove_record)
  # add the only two pieces of data I am interested in
  tidyer['rank'] = rankings['Rank']
  tidyer['date'] = date

  # add it to the results dataframe
  df_ilax = df_ilax.append(tidyer)


df_ilax.head()


team_names = {
  'Army West Point': 'Army',
  'UAlbany': 'Albany', 
  'Albany (NY)': 'Albany',
  'Boston U.': 'Boston U',
  'Boston': 'Boston U',
  'Cleveland St.': 'Cleveland State',
  'Detroit Mercy': 'Detroit',
  'Loyola Maryland': 'Loyola',
  'Massachusetts': 'UMass',
  "Mount St. Mary’s": 'Mount St. Mary\'s',
  "Mount St Marys": 'Mount St. Mary\'s',
  'Ohio St.': 'Ohio State',
  'Penn St.': 'Penn State',
  'St. John\'s (NY)': 'St. John\'s',
  "St. John’s": "St. John\'s",
  'UMass Lowell': 'UMass-Lowell',
  'Saint Joseph`s': 'Saint Joseph\'s',
  'Saint Joseph’s': 'Saint Joseph\'s'
}

# standardize the team names so they are consitent across all dataframes
df_games = df_games.replace(team_names)
df_schedule = df_schedule.replace(team_names)
df_elo = df_elo.replace(team_names)
df_usila = df_usila.replace(team_names)
df_ilax = df_ilax.replace(team_names)


# Add the winning percentage of each team after the game

for index, row in df_games.iterrows():
  date = row['date']
  team = row['team']

  # get all games played by this team by this date
  relevant_games = df_games[(df_games['team'] == team) & (df_games['date'] <= date)]
  
  # 
  if len(relevant_games) > 0:
    wp = relevant_games['outcome'].mean()
    df_games.at[index, 'wp'] = wp

df_games.tail(10)


def get_elo(team, date):
  try:
    # the relevant entries are the ones for this team before the
    # specified date
    rel = df_elo[(df_elo['team'] == team) & (df_elo['date'] < date)]
    # sort the relevant entries by date and grab the most recent one
    # which is the bottom, because default sort is ascending
    return rel.sort_values('date').iloc[-1]['elo']
  except IndexError:
    # if there were no relevant entries, trying to get one causes
    # a problem, so just return and ELO of 0
    print(f'{team} could not be found')
    return 0

def get_usila(team, date):
  try:
    rel = df_usila[(df_usila['team'] == team) & (df_usila['date'] < date)]
    return rel.sort_values('date').iloc[-1]['rank']
  except IndexError:

    # default if unranked
    return 100

def get_ilax(team, date):
  try:
    rel = df_ilax[(df_ilax['team'] == team) & (df_ilax['date'] < date)]
    return rel.sort_values('date').iloc[-1]['rank']
  except IndexError:

    # default if unranked
    return 100

def get_wp(team, date, without=None):
  try:
    rel = df_games[(df_games['team'] == team) & (df_games['date'] < date) & (df_games['opponent'] !=  without)]
    # winning percentage is number of wins / games played
    # since wins are represented by 1 and loses are 0, getting
    # the number of wins is the same as the sum of the outcome column
    # To get the percentage you would need divide by the number of games,
    # which ends up being the same as taking the mean of the outcome column
    return rel['outcome'].mean()
  except IndexError:
    return 0

def get_rpi(team, date):
  # get all games played by the team beore the date
  rel_games = df_games[(df_games['team'] == team) & (df_games['date'] < date)]

  # get all the opponents played by this team
  opps = rel_games['opponent']

  # create an empty series with enough spots for all the opponents
  # to store that opponents-opponents winning percentage
  opps_opps_wp = pd.Series([0] * len(opps), dtype='float64')
  for index, opp in enumerate(opps):
    # get the opponents-opponents by selecting oponents column from the
    # opponents relevant games
    opps_opps = df_games[(df_games['team'] == opp) & (df_games['date'] < date)]['opponent']

    # for each opponent we need to get their winning percentage
    opps_opps = opps_opps.apply(lambda opp_opp: get_wp(opp_opp, date, without=opp))

    # save the average winning percentage or all of this opponents
    # opponents
    opps_opps_wp.loc[index] = opps_opps.mean()

  # the teams winning percentage
  wp = get_wp(team, date)

  # opponent winning percentage is the average of all the opponents
  # winning percentages
  opp_wp = opps.apply(lambda opp : get_wp(opp, date, without=team)).mean()

  # opponents-opponents winning percentage is the average of opponents
  # winning percentage for all opponents
  opp_opp_wp = opps_opps_wp.mean()

  # return a tuple of all three factors
  return wp, opp_wp, opp_opp_wp


# helper functino that takes a game from the schedule games
# and uses the other helper function to say whether or not
# each ranking predicted the game correctly
# stores that in a extra column of that games row
def add_predictions(game):
  # Get the details of this game
  winner = game['winner']
  team1, team2 = game['home'], game['away']
  date = game['date']

  # get all the rankings for both teams.
  elo1, elo2 = get_elo(team1, date), get_elo(team2, date)
  ilax1, ilax2 = get_ilax(team1, date), get_ilax(team2, date)
  usila1, usila2 = get_usila(team1, date), get_usila(team2, date)

  # get the raw RPI factors
  (a1, b1, c1), (a2, b2, c2) = get_rpi(team1, date), get_rpi(team2, date)

  # calculate the actual RPI
  rpi1, rpi2 = a1 * 0.25 + b1 * 0.50 + c1 * 0.25, a2 * 0.25 + b2 * 0.50 + c2 * 0.25

  # store the raw RPI factors for later
  game['home_rpi_factors'] = (a1, b1, c1)
  game['away_rpi_factors'] = (a2, b2, c2)

  # elo
  if elo1 > elo2 and winner == team1:
    game['elo'] = 1
  elif elo2 > elo1 and winner == team2:
    game['elo'] = 1
  else:
    game['elo'] = 0
  # rpi
  if rpi1 > rpi2 and winner == team1:
    game['rpi'] = 1
  elif rpi2 > rpi1 and winner == team2:
    game['rpi'] = 1
  else:
    game['rpi'] = 0
  # ilax
  if ilax1 < ilax2 and winner == team1:
    game['ilax'] = 1
  elif ilax2 < ilax1 and winner == team2:
    game['ilax'] = 1
  else:
    game['ilax'] = 0
  # usali
  if usila1 < usila2 and winner == team1:
    game['usila'] = 1
  elif usila2 < usila1 and winner == team2:
    game['usila'] = 1
  else:
    game['usila'] = 0
  # home team
  game['pred_home'] = 1 if winner == team1 else 0
  return game

# go through all the games and add the prediction columns
df_schedule = df_schedule.apply(add_predictions, axis=1)


df_schedule.tail()


df_weekly_results = df_schedule.groupby(pd.Grouper(key='date', freq='7D')).mean()


fig, ax = plt.subplots(figsize=(12,8))
ax.set_title('Proportion of Games Predicted Each Week')
ax.set_ylabel('Percentage of Correct Predictions')
ax.set_xlabel('Date')

ax.plot(df_weekly_results.index, df_weekly_results[['elo', 'ilax', 'rpi', 'usila', 'pred_home']])
ax.legend(['ELO', 'Inside Lacrosse', 'RPI', 'USILA', 'Home Team'])

<matplotlib.legend.Legend at 0x7f1196b9f310>


df_schedule.groupby(pd.Grouper(key='date', freq='7D')).count()


def adjust_predictions(row):
  # Get the details of this game
  winner = row['winner']
  team1, team2 = row['home'], row['away']
  date = row['date']

  # get all the rankings for both teams.
  ilax1, ilax2 = get_ilax(team1, date), get_ilax(team2, date)
  usila1, usila2 = get_usila(team1, date), get_usila(team2, date)

  # ilax
  if ilax1 < ilax2 and winner == team1:
    row['ilax_adj'] = 1
  elif ilax2 < ilax1 and winner == team2:
    row['ilax_adj'] = 1
  elif ilax1 == ilax2:
    row['ilax_adj'] = np.nan
  else:
    row['ilax_adj'] = 0
  # usali
  if usila1 < usila2 and winner == team1:
    row['usila_adj'] = 1
  elif usila2 < usila1 and winner == team2:
    row['usila_adj'] = 1
  elif usila1 == usila2:
    row['usila_adj'] = np.nan
  else:
    row['usila_adj'] = 0
  return row

df_schedule = df_schedule.apply(adjust_predictions, axis=1)


df_schedule.head()


df_weekly_results = df_schedule.groupby(pd.Grouper(key='date', freq='7D')).mean()


fig, ax = plt.subplots(figsize=(12,8))
ax.set_title('Proportion of Games Predicted Each Week')
ax.set_ylabel('Percentage of Correct Predictions')
ax.set_xlabel('Date')

ax.plot(df_weekly_results.index, df_weekly_results[['elo', 'ilax_adj', 'rpi', 'usila_adj', 'pred_home']])
ax.legend(['ELO', 'Inside Lacrosse', 'RPI', 'USILA', 'Home Team'])

<matplotlib.legend.Legend at 0x7f1196f8c6d0>


from statsmodels.stats.proportion import test_proportions_2indep

games_data = df_schedule[df_schedule['date'] >= '03-27-2022']

nobs = len(games_data)


test_proportions_2indep(games_data['elo'].sum(), nobs, games_data['rpi'].sum(), nobs, alternative='larger')

<class 'statsmodels.stats.base.HolderTuple'>
statistic = 0.9765008652088699
pvalue = 0.1644081596716534
compare = diff
method = agresti-caffo
diff = 0.03813559322033899
ratio = 1.0508474576271187
odds_ratio = 1.24
variance = 0.0014996359393980034
alternative = larger
value = 0
tuple = (0.9765008652088699, 0.1644081596716534)


adj = games_data.dropna(subset=['usila_adj'])

nobs = len(adj)

test_proportions_2indep(adj['usila_adj'].sum(), nobs, adj['rpi'].sum(), nobs, alternative='larger')

<class 'statsmodels.stats.base.HolderTuple'>
statistic = 0.13679659074956643
pvalue = 0.4455957892491623
compare = diff
method = agresti-caffo
diff = 0.0066225165562914245
ratio = 1.0085470085470085
odds_ratio = 1.0391090391090392
variance = 0.002282793305853818
alternative = larger
value = 0
tuple = (0.13679659074956643, 0.4455957892491623)


adj = games_data.dropna(subset=['ilax_adj'])

nobs = len(adj)

test_proportions_2indep(adj['ilax_adj'].sum(), nobs, adj['rpi'].sum(), nobs, alternative='larger')

<class 'statsmodels.stats.base.HolderTuple'>
statistic = 1.2230785510670248
pvalue = 0.11065001450319706
compare = diff
method = agresti-caffo
diff = 0.05590062111801242
ratio = 1.0731707317073171
odds_ratio = 1.406223717409588
variance = 0.0020379855946329813
alternative = larger
value = 0
tuple = (1.2230785510670248, 0.11065001450319706)


# this is a helper function that actually figures out
# what the prediction for a game should be based on 
# the given RPI weights
def predict(game, weights):
    
    winner = game['winner']

    # get all the rankings for both teams.
    a1, b1, c1 = game['home_rpi_factors']
    a2, b2, c2 = game['away_rpi_factors']

    rpi1 = a1 * weights[0] + b1 * weights[1] + c1 * weights[2]
    rpi2 = a2 * weights[0] + b2 * weights[1] + c2 * weights[2]

    # rpi
    if rpi1 > rpi2 and winner == game['home']:
        return 1
    elif rpi2 > rpi1 and winner == game['away']:
        return 1
    else:
        return 0
  
# This is taking the place of a loss function
def rpi_accuracy(v):
    return games_data.apply(lambda game : predict(game, v), axis=1).mean()



# The following code in this block is from the class notebook

def estimate_partial(f, v, i, h):
    new_v = [val + (h if d == i else 0) for d, val in enumerate(v)]
    
    return (f(new_v) - f(v)) / h

# Getting the vector of partial derivatives is now just doing that for every index
def estimate_gradient(f, v, h):
    return [estimate_partial(f,v,i,h) for i in range(len(v))]

# A single step of Gradient Descent
# We assume that the input vector and the length of the gradient match!!!
# We also parameterize the function with a step-size
def single_step(vec, grad, ss):
    # multiply our gradient by the scalar step-size
    step = np.multiply(grad, ss)
    # add the resulting step vector to our input vector
    return vec + step

# random starting point
start = [.7, .9, .1]

# each 'epoch' is defined as the following:
#
# * estimate the gradient
# * take a step (opposite of the gradient)
v = start
for e in range(100):
    g = estimate_gradient(rpi_accuracy, v, 0.01)
    v = single_step(v, g, 0.01)

# We would hope that the resulting vector is close to 0 (because that's the minimum for our function)
print(v)
print(rpi_accuracy(v))
print(rpi_accuracy([0.25, 0.5, 0.25]))

[0.63220339 0.88728814 0.10423729]
0.7669491525423728
0.75


total = v[0] + v[1] + v[2]
best_weights = v / total

rpi_accuracy(best_weights)

0.7669491525423728


better_rpi = games_data.apply(lambda game : predict(game, best_weights), axis=1)

nobs = len(games_data)

test_proportions_2indep(better_rpi.sum(), nobs, games_data['rpi'].sum(), nobs, alternative='larger')

<class 'statsmodels.stats.base.HolderTuple'>
statistic = 0.42713625621360213
pvalue = 0.3346400433043889
compare = diff
method = agresti-caffo
diff = 0.016949152542372836
ratio = 1.0225988700564972
odds_ratio = 1.0969696969696967
variance = 0.0015482218591836142
alternative = larger
value = 0
tuple = (0.42713625621360213, 0.3346400433043889)

	date	opponent	outcome	team
1065	2022-05-15	Ohio St.	1.0	Cornell
1066	2022-05-15	Rutgers	0.0	Harvard
1067	2022-05-15	Harvard	1.0	Rutgers
1068	2022-05-15	Georgetown	1.0	Delaware
1069	2022-05-15	Delaware	0.0	Georgetown

	away	date	home	winner
530	Virginia	2022-05-14	Brown	Virginia
531	Saint Joseph's	2022-05-14	Yale	Yale
532	Ohio St.	2022-05-15	Cornell	Cornell
533	Harvard	2022-05-15	Rutgers	Rutgers
534	Delaware	2022-05-15	Georgetown	Delaware

	date	elo	team
10	2022-04-09	1494.0	LIU
11	2022-04-16	1469.0	LIU
12	2022-04-23	1460.0	LIU
13	2022-04-30	1485.0	LIU
14	2022-05-05	1484.0	LIU

	team	rank	date
0	Maryland	1	2022-02-28
1	Georgetown	2	2022-02-28
2	Penn	3	2022-02-28
3	Notre Dame	4	2022-02-28
4	Virginia	5	2022-02-28

	date	opponent	outcome	team	wp
1060	2022-05-14	Brown	1.0	Virginia	0.800000
1061	2022-05-14	Virginia	0.0	Brown	0.625000
1062	2022-05-14	Yale	0.0	Saint Joseph's	0.777778
1063	2022-05-14	Saint Joseph's	1.0	Yale	0.750000
1064	2022-05-15	Cornell	0.0	Ohio State	0.625000
1065	2022-05-15	Ohio State	1.0	Cornell	0.750000
1066	2022-05-15	Rutgers	0.0	Harvard	0.615385
1067	2022-05-15	Harvard	1.0	Rutgers	0.823529
1068	2022-05-15	Georgetown	1.0	Delaware	0.722222
1069	2022-05-15	Delaware	0.0	Georgetown	0.882353

Lacrosse Predictors¶

Conclusion¶

	team	rank	date
0	Virginia	1	2022-02-07
1	Duke	2	2022-02-07
2	Maryland	3	2022-02-07
3	Georgetown	4	2022-02-07
4	North Carolina	4	2022-02-07

	away	home	winner	home_rpi_factors	away_rpi_factors	elo	rpi	ilax	usila	pred_home
date
2022-01-29	2	2	2	2	2	2	2	2	2	2
2022-02-05	19	19	19	19	19	19	19	19	19	19
2022-02-12	35	35	35	35	35	35	35	35	35	35
2022-02-19	43	43	43	43	43	43	43	43	43	43
2022-02-26	47	47	47	47	47	47	47	47	47	47
2022-03-05	53	53	53	53	53	53	53	53	53	53
2022-03-12	29	29	29	29	29	29	29	29	29	29
2022-03-19	37	37	37	37	37	37	37	37	37	37
2022-03-26	40	40	40	40	40	40	40	40	40	40
2022-04-02	39	39	39	39	39	39	39	39	39	39
2022-04-09	38	38	38	38	38	38	38	38	38	38
2022-04-16	42	42	42	42	42	42	42	42	42	42
2022-04-23	44	44	44	44	44	44	44	44	44	44
2022-04-30	47	47	47	47	47	47	47	47	47	47
2022-05-07	13	13	13	13	13	13	13	13	13	13
2022-05-14	7	7	7	7	7	7	7	7	7	7

	away	date	home	winner	home_rpi_factors	away_rpi_factors	elo	ilax	usila	pred_home	ilax_adj	usila_adj
0	Bellarmine	2022-01-29	Mercer	Mercer	(nan, nan, nan)	(nan, nan, nan)	0	0	0	1	NaN	NaN
1	Robert Morris	2022-02-04	Duke	Duke	(nan, nan, nan)	(nan, nan, nan)	1	1	1	1	1.0	1.0
2	Air Force	2022-02-05	Virginia	Virginia	(nan, nan, nan)	(nan, nan, nan)	1	1	1	1	1.0	1.0
3	Bucknell	2022-02-05	Mercer	Bucknell	(1.0, nan, nan)	(nan, nan, nan)	1	0	0	0	NaN	NaN
4	Mount St. Mary's	2022-02-05	Navy	Mount St. Mary's	(nan, nan, nan)	(nan, nan, nan)	0	0	0	0	NaN	NaN