# using pandas to load in data from a URL
import pandas as pd
url="https://docs.google.com/spreadsheets/d/e/2PACX-1vQcpVvVioO23cndDwr1UmKhndrSq6ES6ZUKZ4fkBBqIAavd1_coVPO_yeOye-Ub-cAWlkX3psJvOU8o/pub?output=csv"
df = pd.read_csv(url)
# print the first 10 rows od the data set
df.head()
date | meantemp | humidity | wind_speed | meanpressure | |
---|---|---|---|---|---|
0 | 2013-01-01 | 10.000000 | 84.500000 | 0.000000 | 1015.666667 |
1 | 2013-01-02 | 7.400000 | 92.000000 | 2.980000 | 1017.800000 |
2 | 2013-01-03 | 7.166667 | 87.000000 | 4.633333 | 1018.666667 |
3 | 2013-01-04 | 8.666667 | 71.333333 | 1.233333 | 1017.166667 |
4 | 2013-01-05 | 6.000000 | 86.833333 | 3.700000 | 1016.500000 |
# shape will print the count of (rows, columns) within the dataset
df.shape
(1462, 5)
# Get basic statistics of the data
df.describe()
meantemp | humidity | wind_speed | meanpressure | |
---|---|---|---|---|
count | 1462.000000 | 1462.000000 | 1462.000000 | 1462.000000 |
mean | 25.495521 | 60.771702 | 6.802209 | 1011.104548 |
std | 7.348103 | 16.769652 | 4.561602 | 180.231668 |
min | 6.000000 | 13.428571 | 0.000000 | -3.041667 |
25% | 18.857143 | 50.375000 | 3.475000 | 1001.580357 |
50% | 27.714286 | 62.625000 | 6.221667 | 1008.563492 |
75% | 31.305804 | 72.218750 | 9.238235 | 1014.944901 |
max | 38.714286 | 100.000000 | 42.220000 | 7679.333333 |
# Check for missing values in the Dataset
df.isnull().sum()
date 0 meantemp 0 humidity 0 wind_speed 0 meanpressure 0 dtype: int64