train_test_split_method_with_audi.py

# -*- coding: utf-8 -*-
"""Train Test Split Method with Audi.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1Z9H85gBP5_XyFScNsG2v0iSJW0wzL4F4

**Listings in the UK for Audi A1 cars on the popular website https://www.autotrader.co.uk/.**

Attributes:

-Year: The year the car was registered   
-Type: Type of the vehicle   
-Mileage(miles): The mileage of the vehicle  
-Engine: Engine size in litres       
-PS: Pferdstarke 98.6% of one HP    
-Transmission: The vehicles transmission       
-Fuel: Fuel type        
-Number_of_Owners: number of previous owners of the vehicle         
-Price(£): price in pounds sterling     
-href: href of that listing     
-PPY: Price Per Year = (Year Price of car / (10 - Age of the car)) The cost of the car per year if you intended to sell the car when it became 10 years old.    
-MileageRank: Where the car ranks in the dataset in order of mileage. Lower mileage yields a higher rank.       
-PriceRank: Where the car ranks in the dataset in order of price. Lower price yields a higher rank.       
-PPYRank: Where the car ranks in the dataset in order of PPY. Lower price yields a higher rank.       
-Score: The sum of MileageRank, PriceRank and PPYRank.
"""

import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

df=pd.read_csv("audi.csv")

df.head(3)

df=df[["Year", "Type", "Mileage(miles)","Engine","PS","Transmission","Fuel", "Number_of_Owners","Price(£)"]]

df.head(3)

df.columns=["yil","kasa","mil","motor","ps","vites","yakit","sahip","fiyat"]

df["motor"]=df["motor"].str.replace("L","")
df["motor"]=pd.to_numeric(df["motor"])

df=pd.get_dummies(df,columns=["kasa","vites","yakit"],drop_first=True)

y=df[["fiyat"]]
x=df.drop("fiyat",axis=1)

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=21)

lm=LinearRegression()
model=lm.fit(x_train,y_train)
model.score(x_test,y_test)

model.predict([[2017,20000,1.0,90,5,0,1]])