import time
#runs scored per game
rsg = [4.26,4.75,4.19,4.52,4.28,4.36,4.41,4.39,4.54,3.24,4.49,3.82,3.92,5.19,3.99,4.67,4.56,4.66,
5.34,3.44,4.63,3.57,4.37,4.06,4.62,4.61,4.13,4.55,4.86,3.85]
#runs allowed per game
rag = [4.6,3.98,4.25,4.74,4.94,4.58,5.33,4.26,5.31,4.4,3.37,5.06,4.28,3.28,4.27,4.23,4.29,3.74,3.29,
4.5,4.12,5.04,4.07,3.85,4.36,4,3.93,4.47,4.29,5.45]
#is you winner or loser?
#I have included .500 teams as winner (you're welcome Chicago White Sox)
#1 means your team is losing (should be above the line), 0 means your team is winning (below the line)
w = [1,0,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,1]
teams = ['Arizona Diamondbacks','Atlanta Braves','Baltimore Orioles','Boston Red Sox','Chicago Cubs',
'Chicago White Sox','Cincinnati Reds','Cleveland Guardians','Colorado Rockies','Detroit Tigers',
'Houston Astros','Kansas City Royals','Los Angeles Angels','Los Angeles Dodgers','Miami Marlins',
'Milwaukee Brewers','Minnesota Twins','New York Mets','New York Yankees','Oakland Athletics',
'Philadelphia Phillies','Pittsburgh Pirates','San Diego Padres','Seattle Mariners',
'San Francisco Giants','St. Louis Cardinals','Tampa Bay Rays','Texas Rangers','Toronto Blue Jays',
'Washington Nationals']
#winner runs scored
winner_rsg = []
#loser runs scored
loser_rsg = []
#winner runs allowed
winner_rag = []
#loser runs allowed
loser_rag = []
#now separate all the data into 2 sets: winners and losers
for i in range(len(rsg)):
if w[i] > 0:
loser_rsg += [rsg[i]]
loser_rag += [rag[i]]
else:
winner_rsg += [rsg[i]]
winner_rag += [rag[i]]
import matplotlib.pyplot as plt
speed = 0.5 #how many seconds in between graphs
#draw a scatter plot of actual winners and losers as of 7/31/2022
def draw_scatter():
for i, label in enumerate(teams):
plt.annotate(label, (rsg[i]+0.02, rag[i]+0.02))
plt.title("Separating Winning Teams By Runs Allowed and Scored")
plt.xlabel("Runs Scored/Game")
plt.ylabel("Runs Allowed/Game")
plt.scatter(winner_rsg, winner_rag)
plt.scatter(loser_rsg, loser_rag)
plt.legend(['Winning Teams','Losing Teams'])
plt.ion()
draw_scatter()
x = list(range(30,55,1))
x = [data/10 for data in x]
#starting coefficients
#coefficients = [w0, w1, w2]
#you can basically start these coefficients with any random numbers because you haven't even looked a team yet
coeff = [0, 1, 0.5]
#now we will graph the first hypothesized line based on just a random guess
y = [(coeff[1]*data -1*coeff[0])/coeff[2] for data in x]
plt.plot(x,y) #use a line graph
plt.draw() #draw the graph
plt.pause(speed) #wait 1 second
plt.clf() #clear the graph
n = 0.2 #this number determins how much the graph changes when you find a new point to classify
#now we are going to go through the real data points for all 30 of the teams
for i in range(len(rsg)):
pointvalue = coeff[1]*rag[i] + coeff[2]*rsg[i] + coeff[0] * 1
#these are the 2 options for properly classified points, so d = 1
if (w[i] == 1 and pointvalue > 1) or (w[i] == 0 and pointvalue < 1):
d = 1
#these are the 2 options for misclassified points, so d = -1
if (w[i] == 0 and pointvalue > 1) or (w[i] == 1 and pointvalue < 1):
d = -1
#if d=-1, we have found a misclassified point, so we will update the list of coefficients for the new point
if d == -1:
coeff[0] = coeff[0] + n * d * 1
coeff[1] = coeff[1] + n * d * rag[i]
coeff[2] = coeff[2] + n * d * rsg[i]
#now replot the dividing line
y = [(coeff[1]*data -1*coeff[0])/coeff[2] for data in x]
draw_scatter()
plt.plot(x,y)
plt.draw()
plt.pause(speed)
#only clear if you aren't on the final data point
if i != len(rsg)-1:
plt.clf()
print(coeff)