mirror of
https://github.com/Asabeneh/30-Days-Of-Python.git
synced 2026-06-06 21:09:15 +08:00
day25 update
This commit is contained in:
parent
c6fd597aec
commit
1cdef9ce00
@ -29,14 +29,14 @@
|
||||
- [DataFrames](#dataframes)
|
||||
- [Creating DataFrames from List of Lists](#creating-dataframes-from-list-of-lists)
|
||||
- [Creating DataFrame Using Dictionary](#creating-dataframe-using-dictionary)
|
||||
- [Creating DataFrams from List of Dictionaries](#creating-dataframs-from-list-of-dictionaries)
|
||||
- [Creating DataFrames from a List of Dictionaries](#creating-dataframes-from-a-list-of-dictionaries)
|
||||
- [Reading CSV File Using Pandas](#reading-csv-file-using-pandas)
|
||||
- [Data Exploration](#data-exploration)
|
||||
- [Modifying DataFrame](#modifying-dataframe)
|
||||
- [Create a DataFrame](#create-a-dataframe)
|
||||
- [Modifying a DataFrame](#modifying-a-dataframe)
|
||||
- [Creating a DataFrame](#creating-a-dataframe)
|
||||
- [Adding a New Column](#adding-a-new-column)
|
||||
- [Modifying Column Values](#modifying-column-values)
|
||||
- [Formating DataFrame Column](#formating-dataframe-column)
|
||||
- [Formating DataFrame Columns](#formating-dataframe-columns)
|
||||
- [Checking Data Types of Column Values](#checking-data-types-of-column-values)
|
||||
- [Boolean Indexing](#boolean-indexing)
|
||||
- [Exercises: Day 25](#exercises-day-25)
|
||||
@ -235,58 +235,8 @@ data = [
|
||||
]
|
||||
df = pd.DataFrame(data, columns=['Names','Country','City'])
|
||||
print(df)
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
||||
```html
|
||||
<div>
|
||||
<style scoped>
|
||||
.dataframe tbody tr th:only-of-type {
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.dataframe tbody tr th {
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
.dataframe thead th {
|
||||
text-align: right;
|
||||
}
|
||||
</style>
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
<th></th>
|
||||
<th>Names</th>
|
||||
<th>Country</th>
|
||||
<th>City</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>0</td>
|
||||
<td>Asabeneh</td>
|
||||
<td>Finland</td>
|
||||
<td>Helsink</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>1</td>
|
||||
<td>David</td>
|
||||
<td>UK</td>
|
||||
<td>London</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>2</td>
|
||||
<td>John</td>
|
||||
<td>Sweden</td>
|
||||
<td>Stockholm</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
```
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
@ -325,26 +275,10 @@ print(df)
|
||||
data = {'Name': ['Asabeneh', 'David', 'John'], 'Country':[
|
||||
'Finland', 'UK', 'Sweden'], 'City': ['Helsiki', 'London', 'Stockholm']}
|
||||
df = pd.DataFrame(data)
|
||||
df
|
||||
print(df)
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
<div>
|
||||
<style scoped>
|
||||
.dataframe tbody tr th:only-of-type {
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.dataframe tbody tr th {
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
.dataframe thead th {
|
||||
text-align: right;
|
||||
}
|
||||
</style>
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
@ -375,16 +309,9 @@ df
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
```python
|
||||
|
||||
```
|
||||
|
||||
### Creating DataFrams from List of Dictionaries
|
||||
### Creating DataFrames from a List of Dictionaries
|
||||
|
||||
|
||||
```python
|
||||
@ -393,26 +320,10 @@ data = [
|
||||
{'Name': 'David', 'Country': 'UK', 'City': 'London'},
|
||||
{'Name': 'John', 'Country': 'Sweden', 'City': 'Stockholm'}]
|
||||
df = pd.DataFrame(data)
|
||||
df
|
||||
print(df)
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
<div>
|
||||
<style scoped>
|
||||
.dataframe tbody tr th:only-of-type {
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.dataframe tbody tr th {
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
.dataframe thead th {
|
||||
text-align: right;
|
||||
}
|
||||
</style>
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
@ -443,17 +354,23 @@ df
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
## Reading CSV File Using Pandas
|
||||
|
||||
To download the csv file, needed in this example, console/command line is enough:
|
||||
|
||||
```sh
|
||||
curl -O https://raw.githubusercontent.com/Asabeneh/30-Days-Of-Python/master/data/weight-height.csv
|
||||
```
|
||||
|
||||
```python
|
||||
import pandas as pd
|
||||
|
||||
df = pd.read_csv('./data/weight-height.csv')
|
||||
df = pd.read_csv('weight-height.csv')
|
||||
print(df)
|
||||
```
|
||||
|
||||
### Data Exploration
|
||||
@ -461,26 +378,10 @@ Let's read only the first 5 rows using head()
|
||||
|
||||
|
||||
```python
|
||||
df.head() # give five rows we can increase the number of rows by passing argument to the head() method
|
||||
print(df.head()) # give five rows we can increase the number of rows by passing argument to the head() method
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
<div>
|
||||
<style scoped>
|
||||
.dataframe tbody tr th:only-of-type {
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.dataframe tbody tr th {
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
.dataframe thead th {
|
||||
text-align: right;
|
||||
}
|
||||
</style>
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
@ -523,7 +424,6 @@ df.head() # give five rows we can increase the number of rows by passing argumen
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
@ -531,7 +431,7 @@ As you can see the csv file has three rows: Gender, Height and Weight. But we do
|
||||
|
||||
|
||||
```python
|
||||
df.shape # as you can see 10000 rows and three columns
|
||||
print(df.shape) # as you can see 10000 rows and three columns
|
||||
```
|
||||
|
||||
|
||||
@ -546,7 +446,7 @@ Let's get all the columns using columns.
|
||||
|
||||
|
||||
```python
|
||||
df.columns
|
||||
print(df.columns)
|
||||
```
|
||||
|
||||
|
||||
@ -560,26 +460,10 @@ Let's read only the last 5 rows using tail()
|
||||
|
||||
|
||||
```python
|
||||
df.tail() # tails give the last five rows, we can increase the rows by passing argument to tail method
|
||||
print(df.tail()) # tails give the last five rows, we can increase the rows by passing argument to tail method
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
<div>
|
||||
<style scoped>
|
||||
.dataframe tbody tr th:only-of-type {
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.dataframe tbody tr th {
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
.dataframe thead th {
|
||||
text-align: right;
|
||||
}
|
||||
</style>
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
@ -622,26 +506,24 @@ df.tail() # tails give the last five rows, we can increase the rows by passing a
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
Now, lets get specific column using the column key
|
||||
Now, lets get a specific column using the column key
|
||||
|
||||
|
||||
|
||||
```python
|
||||
heights = df['Height'] # this is now a a series
|
||||
heights = df['Height'] # this is now a series
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
heights
|
||||
print(heights)
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
```sh
|
||||
0 73.847017
|
||||
1 68.781904
|
||||
2 74.110105
|
||||
@ -654,7 +536,7 @@ heights
|
||||
9998 69.034243
|
||||
9999 61.944246
|
||||
Name: Height, Length: 10000, dtype: float64
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
||||
@ -664,12 +546,11 @@ weights = df['Weight'] # this is now a series
|
||||
|
||||
|
||||
```python
|
||||
weights
|
||||
print(weights)
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
```sh
|
||||
0 241.893563
|
||||
1 162.310473
|
||||
2 212.740856
|
||||
@ -682,12 +563,12 @@ weights
|
||||
9998 163.852461
|
||||
9999 113.649103
|
||||
Name: Weight, Length: 10000, dtype: float64
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
||||
```python
|
||||
len(heights) == len(weights)
|
||||
print(len(heights) == len(weights))
|
||||
```
|
||||
|
||||
|
||||
@ -699,12 +580,11 @@ len(heights) == len(weights)
|
||||
|
||||
|
||||
```python
|
||||
heights.describe() # give statisical information about height data
|
||||
print(heights.describe()) # give statisical information about height data
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
```sh
|
||||
count 10000.000000
|
||||
mean 66.367560
|
||||
std 3.847528
|
||||
@ -714,17 +594,17 @@ heights.describe() # give statisical information about height data
|
||||
75% 69.174262
|
||||
max 78.998742
|
||||
Name: Height, dtype: float64
|
||||
|
||||
|
||||
|
||||
|
||||
```python
|
||||
weights.describe()
|
||||
```
|
||||
|
||||
|
||||
|
||||
```python
|
||||
print(weights.describe())
|
||||
```
|
||||
|
||||
|
||||
|
||||
```sh
|
||||
count 10000.000000
|
||||
mean 161.440357
|
||||
std 32.108439
|
||||
@ -734,31 +614,15 @@ weights.describe()
|
||||
75% 187.169525
|
||||
max 269.989699
|
||||
Name: Weight, dtype: float64
|
||||
|
||||
|
||||
|
||||
|
||||
```python
|
||||
df.describe() # describe can also give statistical information from a dataFrame
|
||||
```
|
||||
|
||||
|
||||
|
||||
```python
|
||||
print(df.describe()) # describe can also give statistical information from a dataFrame
|
||||
```
|
||||
|
||||
<div>
|
||||
<style scoped>
|
||||
.dataframe tbody tr th:only-of-type {
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.dataframe tbody tr th {
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
.dataframe thead th {
|
||||
text-align: right;
|
||||
}
|
||||
</style>
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
@ -810,24 +674,22 @@ df.describe() # describe can also give statistical information from a dataFrame
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
|
||||
## Modifying a DataFrame
|
||||
|
||||
|
||||
|
||||
## Modifying DataFrame
|
||||
|
||||
|
||||
|
||||
Modifying a DataFrame
|
||||
Modifying a DataFrame:
|
||||
* We can create a new DataFrame
|
||||
* We can create a new column and add to DataFrame,
|
||||
* we can remove an existing column from DataFrame,
|
||||
* we can modify an existing column from DataFrame,
|
||||
* we can change the data type of column values from DataFrame
|
||||
* We can create a new column and add it to the DataFrame,
|
||||
* we can remove an existing column from a DataFrame,
|
||||
* we can modify an existing column in a DataFrame,
|
||||
* we can change the data type of column values in the DataFrame
|
||||
|
||||
### Create a DataFrame
|
||||
### Creating a DataFrame
|
||||
|
||||
All the time, first we import the necessary packages. Now, lets import pandas and numpy, two best friends ever.
|
||||
As always, first we import the necessary packages. Now, lets import pandas and numpy, two best friends ever.
|
||||
|
||||
|
||||
```python
|
||||
@ -838,26 +700,9 @@ data = [
|
||||
{"Name": "David", "Country":"UK","City":"London"},
|
||||
{"Name": "John", "Country":"Sweden","City":"Stockholm"}]
|
||||
df = pd.DataFrame(data)
|
||||
df
|
||||
print(df)
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
<div>
|
||||
<style scoped>
|
||||
.dataframe tbody tr th:only-of-type {
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.dataframe tbody tr th {
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
.dataframe thead th {
|
||||
text-align: right;
|
||||
}
|
||||
</style>
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
@ -888,13 +733,11 @@ df
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
|
||||
Adding a column to a DataFrame is like adding a key to a dictionary.
|
||||
|
||||
Adding column in DataFrame is like adding a key in dictionary.
|
||||
|
||||
First let's use the previous example to create a DataFrame. After we create the DataFrame, we will start modifying the columns and column values.
|
||||
First let's use the previous example to create a DataFrame. After we create the DataFrame, we will start modifying the columns and column values.
|
||||
|
||||
### Adding a New Column
|
||||
Let's add a weight column in the DataFrame
|
||||
@ -907,22 +750,6 @@ df
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
<div>
|
||||
<style scoped>
|
||||
.dataframe tbody tr th:only-of-type {
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.dataframe tbody tr th {
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
.dataframe thead th {
|
||||
text-align: right;
|
||||
}
|
||||
</style>
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
@ -957,36 +784,17 @@ df
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
Let's add a height column in the DataFrame
|
||||
Let's add a height column into the DataFrame aswell
|
||||
|
||||
|
||||
```python
|
||||
heights = [173, 175, 169]
|
||||
df['Height'] =heights
|
||||
df
|
||||
df['Height'] = heights
|
||||
print(df)
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
<div>
|
||||
<style scoped>
|
||||
.dataframe tbody tr th:only-of-type {
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.dataframe tbody tr th {
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
.dataframe thead th {
|
||||
text-align: right;
|
||||
}
|
||||
</style>
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
@ -1025,13 +833,10 @@ df
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
As you can see in the DataFrame above, we did add new columns, Weight and Height. Let's add one additional column called BMI(Body Mass Index) by calculating their BMI using thier mass and height. BMI is mass divided by height squared (in meters) - Weight/Height * Height.
|
||||
|
||||
|
||||
As you can see from the above DataFrame, now we new added columns, the Weight and Height. Let's add one additional column by called BMI(Body Mass Index) by calculating their BMI using thier mass and height. BMI is mass divided by height square meter(Weight/Height * Height).
|
||||
|
||||
As you can see, the hieght is in centimeter, so we shoud change the height to meter. So, let's modify the height row
|
||||
As you can see, the height is in centimeters, so we shoud change it to meters. Let's modify the height row.
|
||||
|
||||
### Modifying column values
|
||||
|
||||
@ -1039,26 +844,8 @@ As you can see, the hieght is in centimeter, so we shoud change the height to me
|
||||
```python
|
||||
df['Height'] = df['Height'] * 0.01
|
||||
df
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
<div>
|
||||
<style scoped>
|
||||
.dataframe tbody tr th:only-of-type {
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.dataframe tbody tr th {
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
.dataframe thead th {
|
||||
text-align: right;
|
||||
}
|
||||
</style>
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
@ -1097,13 +884,9 @@ df
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
```python
|
||||
# Using function makes our code clean but you can just calculate the bmi without function
|
||||
# Using functions makes our code clean, but you can calculate the bmi without one
|
||||
def calculate_bmi ():
|
||||
weights = df['Weight']
|
||||
heights = df['Height']
|
||||
@ -1123,23 +906,6 @@ df['BMI'] = bmi
|
||||
df
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
<div>
|
||||
<style scoped>
|
||||
.dataframe tbody tr th:only-of-type {
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.dataframe tbody tr th {
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
.dataframe thead th {
|
||||
text-align: right;
|
||||
}
|
||||
</style>
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
@ -1182,37 +948,20 @@ df
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
### Formating DataFrame column
|
||||
### Formating DataFrame columns
|
||||
|
||||
The BMI of the above DataFrame has is float with many significant digits after decimal. Let's make it to have only one significant digit after point.
|
||||
The BMI column values of the DataFrame are float with many significant digits after decimal. Let's change it to one significant digit after point.
|
||||
|
||||
|
||||
```python
|
||||
df['BMI'] = round(df['BMI'], 1)
|
||||
df
|
||||
print(df)
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
<div>
|
||||
<style scoped>
|
||||
.dataframe tbody tr th:only-of-type {
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.dataframe tbody tr th {
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
.dataframe thead th {
|
||||
text-align: right;
|
||||
}
|
||||
</style>
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
@ -1255,38 +1004,19 @@ df
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
The information in the DataFrame seems not yet complete, let's add birth year and current year columns.
|
||||
|
||||
|
||||
```python
|
||||
birth_year = ['1769', '1985', '1990']
|
||||
current_year = pd.Series(2019, index=[0, 1,2])
|
||||
current_year = pd.Series(2020, index=[0, 1,2])
|
||||
df['Birth Year'] = birth_year
|
||||
df['Current Year'] = current_year
|
||||
df
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
<div>
|
||||
<style scoped>
|
||||
.dataframe tbody tr th:only-of-type {
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.dataframe tbody tr th {
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
.dataframe thead th {
|
||||
text-align: right;
|
||||
}
|
||||
</style>
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
@ -1311,7 +1041,7 @@ df
|
||||
<td>1.73</td>
|
||||
<td>24.7</td>
|
||||
<td>1769</td>
|
||||
<td>2019</td>
|
||||
<td>2020</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>1</td>
|
||||
@ -1322,7 +1052,7 @@ df
|
||||
<td>1.75</td>
|
||||
<td>25.5</td>
|
||||
<td>1985</td>
|
||||
<td>2019</td>
|
||||
<td>2020</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>2</td>
|
||||
@ -1333,65 +1063,55 @@ df
|
||||
<td>1.69</td>
|
||||
<td>24.2</td>
|
||||
<td>1990</td>
|
||||
<td>2019</td>
|
||||
<td>2020</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
## Checking data types of Column values
|
||||
|
||||
|
||||
```python
|
||||
df.Weight.dtype
|
||||
print(df.Weight.dtype)
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
```sh
|
||||
dtype('int64')
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
||||
```python
|
||||
df['Birth Year'].dtype # it give string object , we should change this to number
|
||||
df['Birth Year'].dtype # it gives string object , we should change this to number
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
dtype('O')
|
||||
|
||||
|
||||
|
||||
|
||||
```python
|
||||
df['Birth Year'] = df['Birth Year'].astype('int')
|
||||
df['Birth Year'].dtype # let's check the data type now
|
||||
print(df['Birth Year'].dtype) # let's check the data type now
|
||||
```
|
||||
|
||||
|
||||
|
||||
```sh
|
||||
dtype('int32')
|
||||
```
|
||||
|
||||
dtype('int64')
|
||||
|
||||
|
||||
|
||||
Now same for the current year:
|
||||
|
||||
```python
|
||||
df['Current Year'] = df['Current Year'].astype('int')
|
||||
df['Current Year'].dtype
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
dtype('int64')
|
||||
|
||||
```sh
|
||||
dtype('int32')
|
||||
```
|
||||
|
||||
|
||||
Now, the column values of birth year and current year are integers. We can calculate the age.
|
||||
@ -1405,36 +1125,20 @@ ages
|
||||
|
||||
|
||||
|
||||
0 250
|
||||
1 34
|
||||
2 29
|
||||
dtype: int64
|
||||
0 251
|
||||
1 35
|
||||
2 30
|
||||
dtype: int32
|
||||
|
||||
|
||||
|
||||
|
||||
```python
|
||||
df['Ages'] = ages
|
||||
df
|
||||
print(df)
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
<div>
|
||||
<style scoped>
|
||||
.dataframe tbody tr th:only-of-type {
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.dataframe tbody tr th {
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
.dataframe thead th {
|
||||
text-align: right;
|
||||
}
|
||||
</style>
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
@ -1489,51 +1193,32 @@ df
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
The person in the first row lived so far for 251 years. It is unlikely for someone to live so long. Either it is a typo or the data is cooked. So lets fill that data with average of the columns without including outlier.
|
||||
|
||||
|
||||
The person in the first row lives 250 years. It is unlikely for someone to live 250 years. Either it is a typo or the data is cooked. So lets fill that data with average of the columns without including outlier.
|
||||
|
||||
mean = (34 + 29)/ 2
|
||||
mean = (35 + 30)/ 2
|
||||
|
||||
|
||||
```python
|
||||
mean = (34 + 29)/ 2
|
||||
mean
|
||||
mean = (35 + 30)/ 2
|
||||
print('Mean: ',mean) #it is good to add some description to the output, so we know what is what
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
31.5
|
||||
|
||||
```sh
|
||||
Mean: 32.5
|
||||
```
|
||||
|
||||
|
||||
### Boolean Indexing
|
||||
|
||||
|
||||
```python
|
||||
df[df['Ages'] > 120]
|
||||
print(df[df['Ages'] > 120])
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
<div>
|
||||
<style scoped>
|
||||
.dataframe tbody tr th:only-of-type {
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.dataframe tbody tr th {
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
.dataframe thead th {
|
||||
text-align: right;
|
||||
}
|
||||
</style>
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
@ -1559,37 +1244,19 @@ df[df['Ages'] > 120]
|
||||
<td>1.73</td>
|
||||
<td>24.7</td>
|
||||
<td>1769</td>
|
||||
<td>2019</td>
|
||||
<td>250</td>
|
||||
<td>2020</td>
|
||||
<td>251</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
```python
|
||||
df[df['Ages'] < 120]
|
||||
print(df[df['Ages'] < 120])
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
<div>
|
||||
<style scoped>
|
||||
.dataframe tbody tr th:only-of-type {
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.dataframe tbody tr th {
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
.dataframe thead th {
|
||||
text-align: right;
|
||||
}
|
||||
</style>
|
||||
<table border="1" class="dataframe">
|
||||
<thead>
|
||||
<tr style="text-align: right;">
|
||||
@ -1615,8 +1282,8 @@ df[df['Ages'] < 120]
|
||||
<td>1.75</td>
|
||||
<td>25.5</td>
|
||||
<td>1985</td>
|
||||
<td>2019</td>
|
||||
<td>34</td>
|
||||
<td>2020</td>
|
||||
<td>35</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>2</td>
|
||||
@ -1627,22 +1294,13 @@ df[df['Ages'] < 120]
|
||||
<td>1.69</td>
|
||||
<td>24.2</td>
|
||||
<td>1990</td>
|
||||
<td>2019</td>
|
||||
<td>29</td>
|
||||
<td>2020</td>
|
||||
<td>50</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
```python
|
||||
df['Ages'] = df[df['Ages'] > 120]
|
||||
|
||||
|
||||
```
|
||||
|
||||
## Exercises: Day 25
|
||||
1. Read the hacker_ness.csv file from data directory
|
||||
1. Get the first five rows
|
||||
@ -1651,7 +1309,7 @@ df['Ages'] = df[df['Ages'] > 120]
|
||||
1. Count the number of rows and columns
|
||||
* Filter the titles which contain python
|
||||
* Filter the titles which contain JavaScript
|
||||
* Explore the data and make sense of the data
|
||||
* Explore the data and make sense of it
|
||||
|
||||
🎉 CONGRATULATIONS ! 🎉
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user