Predicting match winner in Cricket using machine learning

2 minute read

Data Preprocessing

import pandas as pd
dataset=pd.read_csv('ipl.csv',index_col=0)
dataset = dataset.drop(columns=['gender', 'match_type','date','umpire_1','umpire_2','player of the match','win_by_runs','win_by_wickets'])
# columns with missing values
dataset.columns[dataset.isnull().any()]
Index(['city'], dtype='object')
# replace missing column with mode value
dataset['city'].fillna(dataset['city'].mode()[0], inplace=True)
dataset.columns[dataset.isnull().any()]

dataset.replace(['Mumbai Indians','Kolkata Knight Riders','Royal Challengers Bangalore','Deccan Chargers','Chennai Super Kings',
                 'Rajasthan Royals','Delhi Daredevils','Gujarat Lions','Kings XI Punjab',
                 'Sunrisers Hyderabad','Rising Pune Supergiants','Kochi Tuskers Kerala','Pune Warriors','Rising Pune Supergiant']
                ,['MI','KKR','RCB','DC','CSK','RR','DD','GL','KXIP','SRH','RPS','KTK','PW','RPS'],inplace=True)
dataset.head(5)
city team 1 team 2 team_1_batting_average team_1_bowling_average team_2_batting_average team_2_bowling_average toss_decision toss_winner venue winner
0 Bangalore KKR RCB 5.0 5.000000 3.0 12.0 field RCB M Chinnaswamy Stadium KKR
1 Chandigarh CSK KXIP 5.0 54.000000 5.0 47.0 bat CSK Punjab Cricket Association Stadium, Mohali CSK
2 Delhi RR DD 5.0 21.000000 5.0 5.0 bat RR Feroz Shah Kotla DD
3 Mumbai MI RCB 28.0 14.750000 3.0 19.0 bat MI Wankhede Stadium RCB
4 Kolkata DC KKR 5.0 5.666667 50.0 10.0 bat DC Eden Gardens KKR
def createDict(series) :
    
    dictionary={}
    
    i=0
    
    for ser in series :
        if(ser in dictionary) :
            continue
        dictionary[ser]=i
        i=i+1
        
    return dictionary
teamDict=createDict(dataset['team 1'])

cityDict=createDict(dataset['city'])

venueDict=createDict(dataset['venue'])

tossDecisionDict=createDict(dataset['toss_decision'])

winnerDict=dict(teamDict)

winnerDict['tie']=14

winnerDict['no result']=15
venueDict
{'M Chinnaswamy Stadium': 0,
 'Punjab Cricket Association Stadium, Mohali': 1,
 'Feroz Shah Kotla': 2,
 'Wankhede Stadium': 3,
 'Eden Gardens': 4,
 'Sawai Mansingh Stadium': 5,
 'Rajiv Gandhi International Stadium, Uppal': 6,
 'MA Chidambaram Stadium, Chepauk': 7,
 'Dr DY Patil Sports Academy': 8,
 'Newlands': 9,
 "St George's Park": 10,
 'Kingsmead': 11,
 'SuperSport Park': 12,
 'Buffalo Park': 13,
 'New Wanderers Stadium': 14,
 'De Beers Diamond Oval': 15,
 'OUTsurance Oval': 16,
 'Brabourne Stadium': 17,
 'Sardar Patel Stadium, Motera': 18,
 'Barabati Stadium': 19,
 'Vidarbha Cricket Association Stadium, Jamtha': 20,
 'Himachal Pradesh Cricket Association Stadium': 21,
 'Nehru Stadium': 22,
 'Holkar Cricket Stadium': 23,
 'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium': 24,
 'Subrata Roy Sahara Stadium': 25,
 'Shaheed Veer Narayan Singh International Stadium': 26,
 'JSCA International Stadium Complex': 27,
 'Sheikh Zayed Stadium': 28,
 'Sharjah Cricket Stadium': 29,
 'Dubai International Cricket Stadium': 30,
 'Maharashtra Cricket Association Stadium': 31,
 'Punjab Cricket Association IS Bindra Stadium, Mohali': 32,
 'Saurashtra Cricket Association Stadium': 33,
 'Green Park': 34}
encode = {
'team 1': teamDict,
'team 2': teamDict,
'toss_winner': teamDict,
'winner': winnerDict,
'city':cityDict,
'venue':venueDict,
'toss_decision': tossDecisionDict    
 }
dataset.replace(encode, inplace=True)
def prediction(Model,X_train,y_train,X_test,y_test) :
    
    clf=Model()
    
    clf.fit(X_train,y_train)
    
    print(clf.score(X_test,y_test))
    
    return clf
def predictWinner():    
    
    from sklearn.neural_network import MLPClassifier

    from sklearn.svm import LinearSVC

    from sklearn.linear_model import LogisticRegression

    from sklearn.ensemble import RandomForestClassifier

    clf_A = prediction(MLPClassifier,X_train,y_train,X_test,y_test)

    clf_B = prediction(LinearSVC,X_train,y_train,X_test,y_test)

    clf_C = prediction(LogisticRegression,X_train,y_train,X_test,y_test)

    clf_D = prediction(RandomForestClassifier,X_train,y_train,X_test,y_test)
    
from sklearn.neural_network import MLPClassifier

from sklearn.linear_model import LogisticRegression

def buildModel(dataset,team1,team2) :

    
    dataset=dataset[
        ((dataset['team 1']==team1)&(dataset['team 2']==team2) | 
         (dataset['team 1']==team2)&(dataset['team 2']==team1))
    ]


    winner = dataset['winner']

    features = dataset.drop('winner',axis=1)

    features = pd.get_dummies(features)

    clf=LogisticRegression()

    clf.fit(features,winner)

    return clf
def getPrediction(city,team1,team2,team1_batting_avg,team1_bowling_avg,team2_batting_avg,team2_bowling_avg,toss_decision,toss_winner,venue) :

    predictionSet = pd.DataFrame({
        'city':cityDict[city],
        'team 1':teamDict[team1],
        'team 2':teamDict[team2],
        'team_1_batting_average':team1_batting_avg,
        'team_1_bowling_average':team1_bowling_avg,
        'team_2_batting_average':team2_batting_avg,
        'team_2_bowling_average':team2_bowling_avg,
        'toss_decision':[toss_decision],
        'toss_winner':teamDict[toss_winner],
        'venue':venueDict[venue]
    })

    predictionSet = pd.get_dummies(predictionSet)
    
    clf=buildModel(dataset,teamDict[team1],teamDict[team2])
    
    prediction=clf.predict(predictionSet)
    
    for key,value in teamDict.items() :
        
        if(value==prediction) :
            
            print(key)

getPrediction('Bangalore','KKR','RCB',5.0,5.000000,3.0,12.0,'field','RCB','M Chinnaswamy Stadium')
KKR
getPrediction('Chandigarh','KXIP','CSK',5.0,54.000000,5.0,47.0,'bat','CSK','Punjab Cricket Association Stadium, Mohali')
CSK
getPrediction('Delhi','DD','RR',5.0,21.000000,5.0,5.0,'bat','RR','Feroz Shah Kotla')
DD

Leave a Comment