In [ ]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
%matplotlib inline
plt.style.use('ggplot')
mpl.rcParams['axes.unicode_minus'] = False
In [10]:
train = pd.read_csv('train.csv', parse_dates=['datetime'])
train.shape
Out[10]:
In [11]:
train.info()
In [14]:
train.head(20)
Out[14]:
In [13]:
train.temp.describe()
Out[13]:
In [17]:
train.isnull().sum()
Out[17]:
In [58]:
corrMatt = train[["temp", "atemp", "casual","registered","humidity","windspeed","count"]]
corrMatt = corrMatt.corr()
print(corrMatt)
mask = np.array(corrMatt)
mask[np.tril_indices_from(mask)] = False
In [64]:
fig, ax = plt.subplots()
fig.set_size_inches(20,10)
sns.heatmap(corrMatt, mask=mask, vmax=.8, square=True, annot=True)
Out[64]:
'ML | 데이터과학 > 머신러닝' 카테고리의 다른 글
Cost Function for Logistic regression (0) | 2018.01.26 |
---|---|
Linear regrssion, cost func. , Logistic (0) | 2018.01.26 |
댓글