-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathF5 - Pandas Library.py
100 lines (57 loc) · 1.86 KB
/
F5 - Pandas Library.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# import pandas library
import pandas as pd
# read in a file and assign to variable df
df = pd.read_csv("D:\OneDrive\Downloads\pandas data.csv")
# outputs the top 10
#print(df.head(10))
# outputs the last 10
#print(df.tail(10))
# outputs whole data frame
#print(df)
# prints rows and columns are in the data frame
#print(df.shape)
# outputs data types
#print(df.info())
# outputs column headings
#print(df.columns)
# outputs statistical distribution by column
#print(df.describe())
# checks for any null values
#print(df.isnull().any())
# outputs how many missing values in each column
#print(df.isnull().sum())
# outputs unique values for each category
#print(df["Distance"].unique())
#print(df["Pulse"].unique())
#print(df["Maxpulse"].unique())
#print(df["Calories"].unique())
# filter by calories in the data frame
#print(df[df["Calories"]>400])
# find the mode for a particular column
#y = df["Calories"].mode()
#print(y)
# finds the mean of the distance column and rounds to 1 d.p.
#x = df["Distance"].mean()
#x = round(x,1)
#print(x)
# function to replace 'NaN' in the data frame with previously calculated mean value
#df["Distance"] = df["Distance"].fillna(x)
# check to see distance column now has no null values
#print(df.isnull().sum())
# use numpy library to replace any value in the data frame
#from numpy import nan
# Calculate mean of calories column and replace NaN with that value
#y = df["Calories"].mean()
#y = round(y,1)
#print(y)
#df["Calories"] = df["Calories"].replace(to_replace = nan, value = y)
#print(df.isnull().sum())
# create new measure
#df["PulsePlus"] = df["Maxpulse"] - df["Pulse"]
#print(df)
import matplotlib.pyplot as plt
import numpy as np
x_axis = np.array([0,10])
y_axis = np.array([0,100])
plt.plot(x_axis, y_axis)
plt.show()