diff --git a/01. Series, DataFrame and Reindex.html b/01. Series, DataFrame and Reindex.html index bd1a913..0ed6a4d 100644 --- a/01. Series, DataFrame and Reindex.html +++ b/01. Series, DataFrame and Reindex.html @@ -16515,7 +16515,7 @@
df_fruits = pd.DataFrame({
+ "fruit": ["apple", "banana", "orange"],
+ "Aldi": [4, 5, 6],
+ "Costco": [1, 2, 3],
+ "Target": [3, 4, 5],
+ "Walmart": [6, 7, 8]
+})
+
+df_fruits
+
+ | fruit | +Aldi | +Costco | +Target | +Walmart | +
---|---|---|---|---|---|
0 | +apple | +4 | +1 | +3 | +6 | +
1 | +banana | +5 | +2 | +4 | +7 | +
2 | +orange | +6 | +3 | +5 | +8 | +
df_fruits.melt(id_vars=["fruit"],
+ value_vars=["Aldi", "Costco", "Target", "Walmart"],
+ var_name='store')
+
+ | fruit | +store | +value | +
---|---|---|---|
0 | +apple | +Aldi | +4 | +
1 | +banana | +Aldi | +5 | +
2 | +orange | +Aldi | +6 | +
3 | +apple | +Costco | +1 | +
4 | +banana | +Costco | +2 | +
5 | +orange | +Costco | +3 | +
6 | +apple | +Target | +3 | +
7 | +banana | +Target | +4 | +
8 | +orange | +Target | +5 | +
9 | +apple | +Walmart | +6 | +
10 | +banana | +Walmart | +7 | +
11 | +orange | +Walmart | +8 | +
walmart = pd.read_csv('datasets/Walmart_Store_sales.csv')
@@ -16542,7 +16782,7 @@ DataFrame
- Out[39]:
+ Out[41]:
@@ -16720,7 +16960,7 @@ DataFrame
-In [40]:
+In [42]:
pd.read_csv("datasets/Walmart_Store_sales.csv", usecols=["Store", "Date", "Weekly_Sales"])
@@ -16739,7 +16979,7 @@ DataFrame
- Out[40]:
+ Out[42]:
@@ -16857,7 +17097,7 @@ DataFrame
-In [41]:
+In [43]:
pd.read_csv("datasets/Walmart_Store_sales.csv", dtype={"Store": "category"})
@@ -16876,7 +17116,7 @@ DataFrame
- Out[41]:
+ Out[43]:
@@ -17054,7 +17294,7 @@ DataFrame
-In [42]:
+In [44]:
pd.read_csv("datasets/Walmart_Store_sales.csv", parse_dates=["Date"])
@@ -17073,7 +17313,7 @@ DataFrame
- Out[42]:
+ Out[44]:
@@ -17251,7 +17491,7 @@ DataFrame
-In [43]:
+In [45]:
pd.read_csv("datasets/Walmart_Store_sales.csv", index_col=["Date"])
@@ -17270,7 +17510,7 @@ DataFrame
- Out[43]:
+ Out[45]:
@@ -17446,7 +17686,7 @@ DataFrame
-In [44]:
+In [46]:
pd.read_csv("datasets/Walmart_Store_sales.csv", nrows=5)
@@ -17465,7 +17705,7 @@ DataFrame
- Out[44]:
+ Out[46]:
@@ -17576,7 +17816,7 @@ DataFrame
-In [45]:
+In [47]:
pd.read_csv("datasets/Walmart_Store_sales.csv", skiprows=100) # skips the first 100 lines
@@ -17595,7 +17835,7 @@ DataFrame
- Out[45]:
+ Out[47]:
@@ -17765,7 +18005,7 @@ DataFrame
-In [46]:
+In [48]:
pd.read_csv("datasets/Walmart_Store_sales.csv", skiprows=[1, 5]) # skips line 1 and 5
@@ -17784,7 +18024,7 @@ DataFrame
- Out[46]:
+ Out[48]:
@@ -17962,7 +18202,7 @@ DataFrame
-In [47]:
+In [49]:
walmart.info()
@@ -18011,7 +18251,7 @@ DataFrame
-In [48]:
+In [50]:
walmart = walmart.astype(
@@ -18067,7 +18307,7 @@ DataFrame
-In [49]:
+In [51]:
walmart
@@ -18086,7 +18326,7 @@ DataFrame
- Out[49]:
+ Out[51]:
@@ -18264,7 +18504,7 @@ DataFrame
-In [50]:
+In [52]:
walmart.query('Holiday_Flag == 1')
@@ -18283,7 +18523,7 @@ DataFrame
- Out[50]:
+ Out[52]:
@@ -18453,7 +18693,7 @@ DataFrame
-In [51]:
+In [53]:
val = 3049614.93
@@ -18473,7 +18713,7 @@ DataFrame
- Out[51]:
+ Out[53]:
@@ -18664,7 +18904,7 @@ DataFrame
-In [52]:
+In [54]:
walmart.query('1 <= index < 7')
@@ -18683,7 +18923,7 @@ DataFrame
- Out[52]:
+ Out[54]:
@@ -18797,7 +19037,7 @@ DataFrame
-In [53]:
+In [55]:
walmart.query("Store in [1,2]")
@@ -18816,7 +19056,7 @@ DataFrame
- Out[53]:
+ Out[55]:
@@ -18986,7 +19226,7 @@ DataFrame
-In [54]:
+In [56]:
walmart.query("Date == '12-02-2010' and Temperature < 40 and Unemployment < 8")
@@ -19005,7 +19245,7 @@ DataFrame
- Out[54]:
+ Out[56]:
@@ -19150,7 +19390,7 @@ DataFrame
-In [55]:
+In [57]:
walmart.iloc[0,1]
@@ -19169,7 +19409,7 @@ DataFrame
- Out[55]:
+ Out[57]:
@@ -19187,7 +19427,7 @@ DataFrame
-In [56]:
+In [58]:
walmart.loc[[0,1,2]]
@@ -19206,7 +19446,7 @@ DataFrame
- Out[56]:
+ Out[58]:
@@ -19287,7 +19527,7 @@ DataFrame
-In [57]:
+In [59]:
walmart.loc[0]
@@ -19306,7 +19546,7 @@ DataFrame
- Out[57]:
+ Out[59]:
@@ -19340,7 +19580,7 @@ DataFrame
-In [58]:
+In [60]:
walmart
@@ -19359,7 +19599,7 @@ DataFrame
- Out[58]:
+ Out[60]:
@@ -19529,7 +19769,7 @@ DataFrame
-In [59]:
+In [61]:
walmart.loc[walmart['Holiday_Flag'] == 1]
@@ -19548,7 +19788,7 @@ DataFrame
- Out[59]:
+ Out[61]:
@@ -19718,7 +19958,7 @@ DataFrame
-In [60]:
+In [62]:
walmart.loc[(walmart['Temperature'] > 90) & (walmart['Store'] == 1)]
@@ -19737,7 +19977,7 @@ DataFrame
- Out[60]:
+ Out[62]:
@@ -19816,7 +20056,7 @@ DataFrame
-In [61]:
+In [63]:
walmart.loc[walmart['Store'] == 1].pipe(lambda x: [x['Temperature'].max(), x['Temperature'].min()])
@@ -19835,7 +20075,7 @@ DataFrame
- Out[61]:
+ Out[63]:
@@ -19853,7 +20093,7 @@ DataFrame
-In [62]:
+In [64]:
walmart.query("Store==1").pipe(lambda x: [x['Temperature'].max(), x['Temperature'].min()])
@@ -19872,7 +20112,7 @@ DataFrame
- Out[62]:
+ Out[64]:
@@ -19898,7 +20138,7 @@ DataFrame
-In [63]:
+In [65]:
walmart.loc[walmart['Store'] == 1].eval("Temperature.max(), Temperature.min()")
@@ -19917,13 +20157,13 @@ DataFrame
- Out[63]:
+ Out[65]:
-array([91.65, 35.4], dtype=object)
+[91.65, 35.4]
@@ -19935,7 +20175,7 @@ DataFrame
-In [64]:
+In [66]:
walmart.query("Store==1").eval("Temperature.max(), Temperature.min()")
@@ -19954,13 +20194,13 @@ DataFrame
- Out[64]:
+ Out[66]:
-array([91.65, 35.4], dtype=object)
+[91.65, 35.4]
@@ -19980,7 +20220,7 @@ DataFrame
-In [65]:
+In [67]:
df_aux = walmart.copy()
@@ -20000,7 +20240,7 @@ DataFrame
- Out[65]:
+ Out[67]:
@@ -20157,7 +20397,478 @@ DataFrame
-6435 rows × 8 columns
+6435 rows × 8 columns
+
+
+
+
+
+
+
+
+
+
+
+
+
+- É possível filtrar colunas usando o método filter
+
+
+
+
+
+
+In [68]:
+
+
+df_aux.filter(items=['Date','Weekly_Sales'])
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Out[68]:
+
+
+
+
+
+
+
+
+
+
+ Date
+ Weekly_Sales
+
+
+
+
+ 0
+ 2010-05-02
+ 1643690.90
+
+
+ 1
+ 2010-12-02
+ 1641957.44
+
+
+ 2
+ 2010-02-19
+ 1611968.17
+
+
+ 3
+ 2010-02-26
+ 1409727.59
+
+
+ 4
+ 2010-05-03
+ 1554806.68
+
+
+ ...
+ ...
+ ...
+
+
+ 6430
+ 2012-09-28
+ 713173.95
+
+
+ 6431
+ 2012-05-10
+ 733455.07
+
+
+ 6432
+ 2012-12-10
+ 734464.36
+
+
+ 6433
+ 2012-10-19
+ 718125.53
+
+
+ 6434
+ 2012-10-26
+ 760281.43
+
+
+
+6435 rows × 2 columns
+
+
+
+
+
+
+
+
+
+
+
+
+
+- Selecionar colunas e index com expressão regular
+
+
+
+
+
+
+In [69]:
+
+
+df_aux.filter(regex='e$', axis=1)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Out[69]:
+
+
+
+
+
+
+
+
+
+
+ Store
+ Date
+ Temperature
+ Fuel_Price
+
+
+
+
+ 0
+ 1
+ 2010-05-02
+ 42.31
+ 2.572
+
+
+ 1
+ 1
+ 2010-12-02
+ 38.51
+ 2.548
+
+
+ 2
+ 1
+ 2010-02-19
+ 39.93
+ 2.514
+
+
+ 3
+ 1
+ 2010-02-26
+ 46.63
+ 2.561
+
+
+ 4
+ 1
+ 2010-05-03
+ 46.50
+ 2.625
+
+
+ ...
+ ...
+ ...
+ ...
+ ...
+
+
+ 6430
+ 45
+ 2012-09-28
+ 64.88
+ 3.997
+
+
+ 6431
+ 45
+ 2012-05-10
+ 64.89
+ 3.985
+
+
+ 6432
+ 45
+ 2012-12-10
+ 54.47
+ 4.000
+
+
+ 6433
+ 45
+ 2012-10-19
+ 56.47
+ 3.969
+
+
+ 6434
+ 45
+ 2012-10-26
+ 58.85
+ 3.882
+
+
+
+6435 rows × 4 columns
+
+
+
+
+
+
+
+
+
+
+
+
+
+- Filtrar por caracteres colunas e index utilizando like
+
+
+
+
+
+
+In [70]:
+
+
+df_aux.filter(like='10', axis=0)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Out[70]:
+
+
+
+
+
+
+
+
+
+
+ Store
+ Date
+ Weekly_Sales
+ Holiday_Flag
+ Temperature
+ Fuel_Price
+ CPI
+ Unemployment
+
+
+
+
+ 10
+ 1
+ 2010-04-16
+ 1466058.28
+ False
+ 66.32
+ 2.808
+ 210.488700
+ 7.808
+
+
+ 100
+ 1
+ 2012-06-01
+ 1550369.92
+ False
+ 49.01
+ 3.157
+ 219.714258
+ 7.348
+
+
+ 101
+ 1
+ 2012-01-13
+ 1459601.17
+ False
+ 48.53
+ 3.261
+ 219.892526
+ 7.348
+
+
+ 102
+ 1
+ 2012-01-20
+ 1394393.84
+ False
+ 54.11
+ 3.268
+ 219.985689
+ 7.348
+
+
+ 103
+ 1
+ 2012-01-27
+ 1319325.59
+ False
+ 54.26
+ 3.290
+ 220.078852
+ 7.348
+
+
+ ...
+ ...
+ ...
+ ...
+ ...
+ ...
+ ...
+ ...
+ ...
+
+
+ 6109
+ 43
+ 2012-01-27
+ 587685.38
+ False
+ 52.10
+ 3.290
+ 211.587991
+ 9.653
+
+
+ 6110
+ 43
+ 2012-03-02
+ 629176.71
+ False
+ 51.92
+ 3.360
+ 211.676200
+ 9.653
+
+
+ 6210
+ 44
+ 2011-08-04
+ 292498.61
+ False
+ 42.75
+ 3.547
+ 128.823806
+ 6.906
+
+
+ 6310
+ 45
+ 2010-11-06
+ 794698.77
+ False
+ 69.71
+ 2.809
+ 182.431557
+ 8.899
+
+
+ 6410
+ 45
+ 2012-11-05
+ 770487.37
+ False
+ 61.24
+ 3.889
+ 190.976417
+ 8.567
+
+
+
+234 rows × 8 columns
@@ -20178,7 +20889,7 @@ DataFrame
-In [66]:
+In [71]:
del df_aux['CPI']
@@ -20198,7 +20909,7 @@ DataFrame
- Out[66]:
+ Out[71]:
@@ -20364,7 +21075,7 @@ DataFrame
-In [67]:
+In [72]:
df_aux.drop(0)
@@ -20383,7 +21094,7 @@ DataFrame
- Out[67]:
+ Out[72]:
@@ -20541,7 +21252,7 @@ DataFrame
-In [68]:
+In [73]:
df_aux.drop([6431,6433])
@@ -20560,7 +21271,7 @@ DataFrame
- Out[68]:
+ Out[73]:
@@ -20726,7 +21437,7 @@ DataFrame
-In [69]:
+In [74]:
df_aux
@@ -20745,7 +21456,7 @@ DataFrame
- Out[69]:
+ Out[74]:
@@ -20911,7 +21622,7 @@ DataFrame
-In [70]:
+In [75]:
df_aux.drop([6431,6433], inplace=True)
@@ -20931,7 +21642,7 @@ DataFrame
- Out[70]:
+ Out[75]:
@@ -21097,7 +21808,7 @@ DataFrame
-In [71]:
+In [76]:
df_aux.drop(['Fuel_Price','Unemployment'], inplace=True, axis='columns')
@@ -21117,7 +21828,7 @@ DataFrame
- Out[71]:
+ Out[76]:
@@ -21259,7 +21970,7 @@ DataFrame
-In [72]:
+In [77]:
df_aux.index.name = 'indice'
@@ -21274,7 +21985,7 @@ DataFrame
-In [73]:
+In [78]:
df_aux
@@ -21293,7 +22004,7 @@ DataFrame
- Out[73]:
+ Out[78]:
@@ -21443,7 +22154,7 @@ DataFrame
-In [74]:
+In [79]:
df_aux = df_aux.query("Store==1")
@@ -21463,7 +22174,7 @@ DataFrame
- Out[74]:
+ Out[79]:
@@ -21602,6 +22313,228 @@ DataFrame
+
+
+
+
+- O método assign atribui novas colunas ao DataFrame, podendo até sobrescrevê-la caso a coluna já exista
+
+
+
+
+
+
+In [80]:
+
+
+time_sentences = ["Saturday: Weekend (Not working day)",
+ "Sunday: Weekend (Not working day)",
+ "Monday: Doctor appointment at 2:45pm.",
+ "Tuesday: Dentist appointment at 11:30 am.",
+ "Wednesday: basketball game At 7:00pm",
+ "Thursday: Back home by 11:15 pm.",
+ "Friday: Take the train at 08:10 am."]
+
+df_sentences = pd.DataFrame(time_sentences, columns=['text'])
+df_sentences
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Out[80]:
+
+
+
+
+
+
+
+
+
+
+ text
+
+
+
+
+ 0
+ Saturday: Weekend (Not working day)
+
+
+ 1
+ Sunday: Weekend (Not working day)
+
+
+ 2
+ Monday: Doctor appointment at 2:45pm.
+
+
+ 3
+ Tuesday: Dentist appointment at 11:30 am.
+
+
+ 4
+ Wednesday: basketball game At 7:00pm
+
+
+ 5
+ Thursday: Back home by 11:15 pm.
+
+
+ 6
+ Friday: Take the train at 08:10 am.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+In [81]:
+
+
+df_sentences = df_sentences.assign(text=df_sentences.text.str.lower(),
+ text_len=df_sentences.text.str.len(),
+ word_count=df_sentences.text.str.count(" ") + 1,
+ weekend=df_sentences.text.str.contains("saturday|sunday", case=False)
+ )
+df_sentences
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Out[81]:
+
+
+
+
+
+
+
+
+
+
+ text
+ text_len
+ word_count
+ weekend
+
+
+
+
+ 0
+ saturday: weekend (not working day)
+ 35
+ 5
+ True
+
+
+ 1
+ sunday: weekend (not working day)
+ 33
+ 5
+ True
+
+
+ 2
+ monday: doctor appointment at 2:45pm.
+ 37
+ 5
+ False
+
+
+ 3
+ tuesday: dentist appointment at 11:30 am.
+ 41
+ 6
+ False
+
+
+ 4
+ wednesday: basketball game at 7:00pm
+ 36
+ 5
+ False
+
+
+ 5
+ thursday: back home by 11:15 pm.
+ 32
+ 6
+ False
+
+
+ 6
+ friday: take the train at 08:10 am.
+ 35
+ 7
+ False
+
+
+
+
+
+
+
+
+
+
+
+
@@ -21613,7 +22546,7 @@ DataFrame
-In [75]:
+In [82]:
df_aux = df_aux.rename({"Store": "loja",
@@ -21638,7 +22571,7 @@ DataFrame
- Out[75]:
+ Out[82]:
@@ -21788,7 +22721,7 @@ DataFrame
-In [76]:
+In [83]:
format_dict = {
@@ -21812,1156 +22745,1156 @@ DataFrame
- Out[76]:
+ Out[83]: