In [1]:
import pandas as pd
In [2]:
df = pd.DataFrame([['Alice', 'Algebra', 5],
                   ['Alice', 'Calculus', 3],
                   ['Bob', 'Algebra', 4],
                   ['Bob', 'Calculus', 2],
                   ['Bob', 'Music', 3]], 
                  columns=['name', 'course', 'grade'])
In [5]:
df
Out[5]:
name course grade
0 Alice Algebra 5
1 Alice Calculus 3
2 Bob Algebra 4
3 Bob Calculus 2
4 Bob Music 3
In [7]:
(df
 .pivot(index='name', columns='course', values='grade')
 .assign(math=lambda x: 0.6 * x.Algebra + 0.4 * x.Calculus))
Out[7]:
course Algebra Calculus Music math
name
Alice 5.0 3.0 NaN 4.2
Bob 4.0 2.0 3.0 3.2
In [8]:
grades_wide = (df
 .pivot(index='name', columns='course', values='grade'))
In [9]:
grades_wide
Out[9]:
course Algebra Calculus Music
name
Alice 5.0 3.0 NaN
Bob 4.0 2.0 3.0
In [14]:
grades_wide.loc['Alice']
Out[14]:
course
Algebra     5.0
Calculus    3.0
Music       NaN
Name: Alice, dtype: float64
In [15]:
grades_wide.reset_index()
Out[15]:
course name Algebra Calculus Music
0 Alice 5.0 3.0 NaN
1 Bob 4.0 2.0 3.0
In [16]:
(grades_wide
 .reset_index()
 .melt(id_vars='name', 
       value_vars=['Algebra', 'Calculus', 'Music'],
       var_name='grade'))
Out[16]:
name grade value
0 Alice Algebra 5.0
1 Bob Algebra 4.0
2 Alice Calculus 3.0
3 Bob Calculus 2.0
4 Alice Music NaN
5 Bob Music 3.0
In [19]:
df = pd.DataFrame([['Alice', 'Algebra', 5],
                   ['Alice', 'Calculus', 3],
                   ['Bob', 'Algebra', 4],
                   ['Bob', 'Calculus', 2],
                   ['Bob', 'Music', 3],
                   ['Alice', 'Algebra', 3]], 
                  columns=['name', 'course', 'grade'])
In [26]:
df.pivot_table(index='name', columns='course', values='grade', 
               aggfunc='last').fillna(0)
Out[26]:
course Algebra Calculus Music
name
Alice 3.0 3.0 0.0
Bob 4.0 2.0 3.0
In [27]:
students = pd.DataFrame([['Alice', 2],
                         ['Bob', 5],
                         ['Claudia', 3]], columns=['name', 'id'])
In [31]:
grades = pd.DataFrame([[5, 8],
                       [2, 10],
                       [4, 9]], columns=['id', 'grade'])
In [32]:
grades
Out[32]:
id grade
0 5 8
1 2 10
2 4 9
In [33]:
students
Out[33]:
name id
0 Alice 2
1 Bob 5
2 Claudia 3
In [35]:
students.merge(grades, how='left', on='id')
Out[35]:
name id grade
0 Alice 2 10.0
1 Bob 5 8.0
2 Claudia 3 NaN
In [36]:
students.merge(grades, how='right', on='id')
Out[36]:
name id grade
0 Alice 2 10
1 Bob 5 8
2 NaN 4 9
In [37]:
students.merge(grades, how='outer', on='id')
Out[37]:
name id grade
0 Alice 2 10.0
1 Bob 5 8.0
2 Claudia 3 NaN
3 NaN 4 9.0
In [41]:
grades = pd.DataFrame([[5, 8],
                       [2, 10],
                       [4, 9],
                       [5, 10]], columns=['idx', 'grade'])
In [43]:
students.merge(grades, left_on='id', right_on='idx').groupby('name').mean()
Out[43]:
id idx grade
name
Alice 2 2 10
Bob 5 5 9
In [44]:
df = pd.DataFrame([[5, 4, 2], [3, 5, 8]])
In [47]:
df > 5
Out[47]:
0 1 2
0 False False False
1 False False True
In [49]:
df[df > 5].fillna(0)
Out[49]:
0 1 2
0 0.0 0.0 0.0
1 0.0 0.0 8.0
In [51]:
df
Out[51]:
0 1 2
0 5 4 2
1 3 5 8
In [52]:
df[df > 5] = 5
In [53]:
df
Out[53]:
0 1 2
0 5 4 2
1 3 5 5
In [56]:
df
Out[56]:
0 1 2
0 5 4 2
1 3 5 5
In [55]:
df.where(pd.DataFrame([[True, False, True],
                       [False, False, True]]), -1)
Out[55]:
0 1 2
0 5 -1 2
1 -1 -1 5
In [57]:
df.where(pd.DataFrame([[True, False, True],
                       [False, False, True]]),
         pd.DataFrame([[1, 2, 3],
                       [4, 5, 6]]))
Out[57]:
0 1 2
0 5 2 2
1 4 5 5
In [66]:
tables = pd.read_html("https://bit.ly/2FfPmiX", header=0)
In [67]:
type(tables)
Out[67]:
list
In [68]:
len(tables)
Out[68]:
1
In [69]:
df = tables[0]
In [70]:
df.dtypes
Out[70]:
STATION_ID      int64
STATION_NM     object
DATE_OBS       object
TMPMAX        float64
Q               int64
TMPMIN        float64
Q.1             int64
TMPMN         float64
Q.2             int64
PRECIP        float64
Q.3             int64
D               int64
dtype: object
In [78]:
%matplotlib inline
In [79]:
df_clean = (df
 .drop(['STATION_ID', 'STATION_NM', 'Q', 'Q.1', 'Q.2', 'Q.3', 'D'], axis=1)
 .dropna().loc[366:])
In [87]:
df_clean['TMPMN'][:3650].plot()
df_clean['TMPMN'][:3650].rolling(window=30).mean().plot()
Out[87]:
<matplotlib.axes._subplots.AxesSubplot at 0x21f2af4ffd0>
In [88]:
df_clean['TMPMN'].rolling(window=3650).mean().plot()
Out[88]:
<matplotlib.axes._subplots.AxesSubplot at 0x21f2af63860>
In [90]:
df_clean['DATE_OBS'].dtype
Out[90]:
dtype('O')
In [94]:
df_clean.index = pd.to_datetime(df_clean['DATE_OBS'])
In [96]:
df_clean['TMPMN'].rolling(window=3650).mean().plot()
Out[96]:
<matplotlib.axes._subplots.AxesSubplot at 0x21f2af16ef0>
In [97]:
df_clean['TMPMN'][:365].plot()
Out[97]:
<matplotlib.axes._subplots.AxesSubplot at 0x21f2af8fc50>
In [98]:
df_clean["1950"]
Out[98]:
DATE_OBS TMPMAX TMPMIN TMPMN PRECIP
DATE_OBS
1950-01-01 1950-01-01 -16.4 -22.7 -19.0 0.1
1950-01-02 1950-01-02 -15.7 -23.1 -19.1 0.0
1950-01-03 1950-01-03 -19.1 -29.0 -24.8 0.2
1950-01-04 1950-01-04 -24.4 -31.8 -29.1 0.1
1950-01-05 1950-01-05 -27.5 -33.0 -30.3 0.0
1950-01-06 1950-01-06 -23.7 -29.6 -26.8 0.0
1950-01-07 1950-01-07 -23.7 -25.7 -24.5 0.0
1950-01-08 1950-01-08 -24.7 -31.4 -28.9 0.0
1950-01-09 1950-01-09 -27.6 -32.7 -29.8 0.5
1950-01-10 1950-01-10 -28.4 -36.8 -32.8 0.2
1950-01-11 1950-01-11 -17.2 -31.6 -23.4 0.3
1950-01-12 1950-01-12 -15.7 -22.5 -17.7 0.0
1950-01-13 1950-01-13 -12.5 -24.4 -18.4 0.0
1950-01-14 1950-01-14 -9.1 -13.6 -11.6 1.8
1950-01-15 1950-01-15 -4.1 -9.5 -7.1 1.7
1950-01-16 1950-01-16 -6.6 -15.6 -13.1 0.2
1950-01-17 1950-01-17 -15.1 -26.4 -22.6 0.4
1950-01-18 1950-01-18 -15.5 -28.9 -20.8 0.0
1950-01-19 1950-01-19 -18.4 -21.4 -19.6 0.0
1950-01-20 1950-01-20 -10.6 -21.9 -15.9 0.6
1950-01-21 1950-01-21 -13.2 -19.9 -16.2 0.8
1950-01-22 1950-01-22 -14.7 -21.4 -18.5 0.1
1950-01-23 1950-01-23 -5.8 -19.8 -11.9 0.0
1950-01-24 1950-01-24 -3.7 -6.3 -4.6 0.1
1950-01-25 1950-01-25 -2.3 -5.7 -3.6 0.1
1950-01-26 1950-01-26 -2.0 -5.3 -3.5 0.0
1950-01-27 1950-01-27 -4.8 -8.4 -6.7 0.1
1950-01-28 1950-01-28 -7.0 -13.1 -10.9 0.0
1950-01-29 1950-01-29 -12.7 -20.3 -17.4 0.2
1950-01-30 1950-01-30 -12.7 -18.5 -15.5 0.2
... ... ... ... ... ...
1950-12-02 1950-12-02 2.8 -3.2 0.3 0.0
1950-12-03 1950-12-03 3.3 1.9 2.7 0.0
1950-12-04 1950-12-04 2.6 1.2 1.8 0.0
1950-12-05 1950-12-05 1.6 -0.2 0.6 0.0
1950-12-06 1950-12-06 0.5 -1.3 -0.6 5.7
1950-12-07 1950-12-07 -0.7 -3.3 -2.4 0.0
1950-12-08 1950-12-08 -2.2 -6.0 -3.8 0.0
1950-12-09 1950-12-09 -2.6 -9.6 -5.3 0.0
1950-12-10 1950-12-10 -4.2 -9.3 -6.6 0.0
1950-12-11 1950-12-11 -3.0 -6.1 -4.6 0.0
1950-12-12 1950-12-12 -3.1 -6.0 -4.6 0.0
1950-12-13 1950-12-13 0.6 -3.5 -0.6 1.0
1950-12-14 1950-12-14 1.1 -0.2 0.5 7.0
1950-12-15 1950-12-15 1.2 -2.8 -0.9 0.2
1950-12-16 1950-12-16 -2.3 -7.0 -5.6 0.0
1950-12-17 1950-12-17 -4.8 -9.0 -7.0 0.0
1950-12-18 1950-12-18 -3.3 -5.8 -4.6 0.0
1950-12-19 1950-12-19 -4.2 -8.6 -6.6 0.0
1950-12-20 1950-12-20 -4.9 -8.5 -7.1 0.0
1950-12-21 1950-12-21 -5.9 -10.1 -8.2 0.0
1950-12-22 1950-12-22 -7.7 -11.2 -9.5 0.0
1950-12-23 1950-12-23 -6.8 -10.1 -8.4 0.7
1950-12-24 1950-12-24 -6.2 -12.6 -8.4 0.2
1950-12-25 1950-12-25 -7.9 -11.6 -9.1 0.7
1950-12-26 1950-12-26 -9.9 -14.4 -11.8 0.5
1950-12-27 1950-12-27 -5.0 -11.1 -8.4 5.2
1950-12-28 1950-12-28 -9.9 -14.6 -11.6 0.0
1950-12-29 1950-12-29 -8.5 -14.7 -10.8 0.0
1950-12-30 1950-12-30 -12.4 -18.6 -15.7 0.0
1950-12-31 1950-12-31 -13.7 -19.4 -15.3 0.0

365 rows × 5 columns

In [99]:
df_clean["1950-03"]
Out[99]:
DATE_OBS TMPMAX TMPMIN TMPMN PRECIP
DATE_OBS
1950-03-01 1950-03-01 2.2 0.4 0.9 1.5
1950-03-02 1950-03-02 1.4 -1.0 -0.3 0.0
1950-03-03 1950-03-03 -0.3 -1.7 -1.0 0.1
1950-03-04 1950-03-04 -0.6 -2.3 -1.8 0.8
1950-03-05 1950-03-05 -0.2 -2.0 -1.2 0.0
1950-03-06 1950-03-06 2.1 -1.5 0.5 3.7
1950-03-07 1950-03-07 2.4 -1.8 -0.3 1.4
1950-03-08 1950-03-08 -0.7 -5.4 -4.2 0.3
1950-03-09 1950-03-09 -4.6 -10.5 -8.6 0.2
1950-03-10 1950-03-10 -5.3 -14.3 -10.2 0.1
1950-03-11 1950-03-11 -3.4 -12.5 -7.9 0.3
1950-03-12 1950-03-12 -0.5 -5.5 -3.4 3.8
1950-03-13 1950-03-13 0.6 -3.6 -1.8 2.5
1950-03-14 1950-03-14 1.9 -6.9 -2.9 0.0
1950-03-15 1950-03-15 0.4 -5.8 -3.2 1.4
1950-03-16 1950-03-16 -2.2 -11.0 -7.3 0.9
1950-03-17 1950-03-17 0.7 -15.0 -7.9 0.0
1950-03-18 1950-03-18 2.2 -8.8 -3.4 0.0
1950-03-19 1950-03-19 5.8 -4.0 -0.2 0.0
1950-03-20 1950-03-20 7.0 -2.3 1.4 0.0
1950-03-21 1950-03-21 6.0 -3.2 0.6 0.0
1950-03-22 1950-03-22 6.2 -2.9 1.1 0.0
1950-03-23 1950-03-23 3.6 0.2 1.2 3.5
1950-03-24 1950-03-24 1.1 0.1 0.7 0.0
1950-03-25 1950-03-25 0.9 -4.7 -1.7 0.0
1950-03-26 1950-03-26 3.7 -7.4 -2.4 0.0
1950-03-27 1950-03-27 0.7 -4.5 -2.4 0.5
1950-03-28 1950-03-28 -0.9 -1.6 -1.2 5.0
1950-03-29 1950-03-29 1.4 -2.2 -1.3 2.9
1950-03-30 1950-03-30 3.4 -4.8 -1.0 0.0
1950-03-31 1950-03-31 5.5 -4.9 0.2 0.0
In [101]:
df_clean["1950-03-05"]
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
D:\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
   3062             try:
-> 3063                 return self._engine.get_loc(key)
   3064             except KeyError:

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: '1950-03-05'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-101-837d73c8927a> in <module>()
----> 1 df_clean["1950-03-05"]

D:\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
   2683             return self._getitem_multilevel(key)
   2684         else:
-> 2685             return self._getitem_column(key)
   2686 
   2687     def _getitem_column(self, key):

D:\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
   2690         # get column
   2691         if self.columns.is_unique:
-> 2692             return self._get_item_cache(key)
   2693 
   2694         # duplicate columns & possible reduce dimensionality

D:\Anaconda3\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
   2484         res = cache.get(item)
   2485         if res is None:
-> 2486             values = self._data.get(item)
   2487             res = self._box_item_values(item, values)
   2488             cache[item] = res

D:\Anaconda3\lib\site-packages\pandas\core\internals.py in get(self, item, fastpath)
   4113 
   4114             if not isna(item):
-> 4115                 loc = self.items.get_loc(item)
   4116             else:
   4117                 indexer = np.arange(len(self.items))[isna(self.items)]

D:\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
   3063                 return self._engine.get_loc(key)
   3064             except KeyError:
-> 3065                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   3066 
   3067         indexer = self.get_indexer([key], method=method, tolerance=tolerance)

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: '1950-03-05'
In [109]:
df_clean['mon'] = df_clean.index.month
df_clean['day'] = df_clean.index.day
df_clean['year'] = df_clean.index.year
In [145]:
corr = df_clean.pivot_table(values='TMPMN', index='year', columns='mon').corr()
In [146]:
corr
Out[146]:
mon 1 2 3 4 5 6 7 8 9 10 11 12
mon
1 1.000000 0.384755 0.257302 0.216484 0.047153 0.039691 0.206568 -0.120758 0.134955 0.117936 -0.142560 -0.059667
2 0.384755 1.000000 0.379095 0.216300 0.017000 0.028284 0.252928 -0.097432 0.033740 0.071301 0.059700 -0.012276
3 0.257302 0.379095 1.000000 0.328143 0.071455 0.177538 0.091281 0.022202 -0.081379 -0.037189 -0.031521 -0.075481
4 0.216484 0.216300 0.328143 1.000000 0.041331 0.017182 0.159124 -0.117359 0.028134 0.037973 0.077483 0.000211
5 0.047153 0.017000 0.071455 0.041331 1.000000 -0.121644 0.028159 0.118944 0.038196 0.178622 0.175155 -0.156485
6 0.039691 0.028284 0.177538 0.017182 -0.121644 1.000000 0.306204 0.093280 -0.060803 0.111169 -0.370548 0.150160
7 0.206568 0.252928 0.091281 0.159124 0.028159 0.306204 1.000000 0.411425 0.131770 0.132230 0.000195 0.053797
8 -0.120758 -0.097432 0.022202 -0.117359 0.118944 0.093280 0.411425 1.000000 0.222137 0.179547 0.123741 0.002483
9 0.134955 0.033740 -0.081379 0.028134 0.038196 -0.060803 0.131770 0.222137 1.000000 0.277824 0.077645 0.054884
10 0.117936 0.071301 -0.037189 0.037973 0.178622 0.111169 0.132230 0.179547 0.277824 1.000000 0.146520 -0.031469
11 -0.142560 0.059700 -0.031521 0.077483 0.175155 -0.370548 0.000195 0.123741 0.077645 0.146520 1.000000 -0.112312
12 -0.059667 -0.012276 -0.075481 0.000211 -0.156485 0.150160 0.053797 0.002483 0.054884 -0.031469 -0.112312 1.000000
In [113]:
import matplotlib.pyplot as plt
In [118]:
months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep',
         'oct', 'nov', 'dec']
plt.imshow(corr, cmap='seismic', vmin=-1, vmax=1)
plt.colorbar()
plt.xticks(range(12), months)
plt.yticks(range(12), months);
In [119]:
from statsmodels.formula.api import ols
In [130]:
td = (df_clean.index - df_clean.index[0])
In [133]:
df_clean['days'] = td.days
In [138]:
reg = ols('TMPMN ~ days', data=df_clean).fit()
In [140]:
reg.params['days']
Out[140]:
8.38027808900898e-05
In [141]:
df_clean['TMPMN_corrected'] = (df_clean['TMPMN'] 
                               - reg.params['days'] * df_clean['days'])
In [142]:
df_clean
Out[142]:
DATE_OBS TMPMAX TMPMIN TMPMN PRECIP mon day year days TMPMN_corrected
DATE_OBS
1949-01-01 1949-01-01 -2.1 -6.7 -4.2 0.0 1 1 1949 0 -4.200000
1949-01-02 1949-01-02 -0.5 -6.7 -1.2 4.2 1 2 1949 1 -1.200084
1949-01-03 1949-01-03 1.1 -2.1 -0.7 0.0 1 3 1949 2 -0.700168
1949-01-04 1949-01-04 3.3 0.9 2.3 0.0 1 4 1949 3 2.299749
1949-01-05 1949-01-05 4.0 -0.9 1.1 0.8 1 5 1949 4 1.099665
1949-01-06 1949-01-06 -0.8 -3.2 -2.3 0.0 1 6 1949 5 -2.300419
1949-01-07 1949-01-07 -0.6 -5.1 -3.4 0.0 1 7 1949 6 -3.400503
1949-01-08 1949-01-08 0.5 -3.0 -0.6 1.7 1 8 1949 7 -0.600587
1949-01-09 1949-01-09 0.8 -1.6 -0.6 0.0 1 9 1949 8 -0.600670
1949-01-10 1949-01-10 -1.1 -4.2 -2.5 1.6 1 10 1949 9 -2.500754
1949-01-11 1949-01-11 -3.0 -5.3 -4.3 0.0 1 11 1949 10 -4.300838
1949-01-12 1949-01-12 -3.3 -8.4 -5.8 0.0 1 12 1949 11 -5.800922
1949-01-13 1949-01-13 -4.8 -10.2 -6.8 0.2 1 13 1949 12 -6.801006
1949-01-14 1949-01-14 -1.0 -5.7 -2.5 0.0 1 14 1949 13 -2.501089
1949-01-15 1949-01-15 -1.9 -3.2 -2.9 2.2 1 15 1949 14 -2.901173
1949-01-16 1949-01-16 -0.5 -4.9 -2.5 0.9 1 16 1949 15 -2.501257
1949-01-17 1949-01-17 -0.7 -4.0 -3.2 0.1 1 17 1949 16 -3.201341
1949-01-18 1949-01-18 -3.8 -10.3 -7.5 0.0 1 18 1949 17 -7.501425
1949-01-19 1949-01-19 -6.9 -10.4 -8.8 0.2 1 19 1949 18 -8.801508
1949-01-20 1949-01-20 -2.4 -13.5 -8.6 2.5 1 20 1949 19 -8.601592
1949-01-21 1949-01-21 1.6 -2.6 0.0 3.6 1 21 1949 20 -0.001676
1949-01-22 1949-01-22 0.2 -11.7 -4.2 2.0 1 22 1949 21 -4.201760
1949-01-23 1949-01-23 -11.4 -15.5 -13.4 0.0 1 23 1949 22 -13.401844
1949-01-24 1949-01-24 -10.5 -14.4 -12.4 0.0 1 24 1949 23 -12.401927
1949-01-25 1949-01-25 1.7 -11.5 -1.8 4.4 1 25 1949 24 -1.802011
1949-01-26 1949-01-26 -0.7 -6.8 -3.1 0.0 1 26 1949 25 -3.102095
1949-01-27 1949-01-27 1.1 -9.1 -3.7 0.0 1 27 1949 26 -3.702179
1949-01-28 1949-01-28 1.2 -2.6 -1.2 0.0 1 28 1949 27 -1.202263
1949-01-29 1949-01-29 1.4 -2.6 -0.1 0.0 1 29 1949 28 -0.102346
1949-01-30 1949-01-30 -0.5 -8.1 -6.5 0.0 1 30 1949 29 -6.502430
... ... ... ... ... ... ... ... ... ... ...
2006-10-02 2006-10-02 13.6 7.2 10.0 0.0 10 2 2006 21093 8.232348
2006-10-03 2006-10-03 14.7 10.9 12.5 10.2 10 3 2006 21094 10.732264
2006-10-04 2006-10-04 14.3 10.3 12.6 4.9 10 4 2006 21095 10.832180
2006-10-05 2006-10-05 15.9 11.6 13.7 3.8 10 5 2006 21096 11.932097
2006-10-06 2006-10-06 14.0 9.0 11.1 0.0 10 6 2006 21097 9.332013
2006-10-07 2006-10-07 13.9 7.9 10.3 0.0 10 7 2006 21098 8.531929
2006-10-08 2006-10-08 14.4 7.1 10.8 3.0 10 8 2006 21099 9.031845
2006-10-09 2006-10-09 12.3 9.9 10.9 2.5 10 9 2006 21100 9.131761
2006-10-10 2006-10-10 11.5 8.5 9.8 0.6 10 10 2006 21101 8.031678
2006-10-11 2006-10-11 9.0 6.0 7.0 0.0 10 11 2006 21102 5.231594
2006-10-12 2006-10-12 8.7 0.4 5.3 2.2 10 12 2006 21103 3.531510
2006-10-13 2006-10-13 9.8 5.1 7.6 0.0 10 13 2006 21104 5.831426
2006-10-14 2006-10-14 7.9 4.7 5.8 1.2 10 14 2006 21105 4.031342
2006-10-15 2006-10-15 4.8 -1.4 1.3 2.1 10 15 2006 21106 -0.468741
2006-10-16 2006-10-16 0.8 -1.7 -0.4 0.0 10 16 2006 21107 -2.168825
2006-10-17 2006-10-17 2.9 -4.1 0.0 0.0 10 17 2006 21108 -1.768909
2006-10-18 2006-10-18 5.4 -1.0 2.2 0.1 10 18 2006 21109 0.431007
2006-10-19 2006-10-19 7.1 3.8 5.5 0.6 10 19 2006 21110 3.730923
2006-10-20 2006-10-20 4.4 2.3 3.5 0.7 10 20 2006 21111 1.730839
2006-10-21 2006-10-21 4.1 0.5 1.9 3.7 10 21 2006 21112 0.130756
2006-10-22 2006-10-22 12.0 3.9 8.8 0.7 10 22 2006 21113 7.030672
2006-10-23 2006-10-23 12.6 9.1 10.8 0.0 10 23 2006 21114 9.030588
2006-10-24 2006-10-24 13.2 9.9 11.3 0.0 10 24 2006 21115 9.530504
2006-10-25 2006-10-25 14.0 11.4 12.6 4.7 10 25 2006 21116 10.830420
2006-10-26 2006-10-26 12.2 5.8 8.5 6.9 10 26 2006 21117 6.730337
2006-10-27 2006-10-27 8.7 2.7 5.6 0.3 10 27 2006 21118 3.830253
2006-10-28 2006-10-28 11.0 4.1 8.2 0.3 10 28 2006 21119 6.430169
2006-10-29 2006-10-29 4.6 1.4 2.4 0.5 10 29 2006 21120 0.630085
2006-10-30 2006-10-30 1.8 -1.9 -0.7 3.5 10 30 2006 21121 -2.469999
2006-10-31 2006-10-31 1.3 -7.7 -3.0 0.0 10 31 2006 21122 -4.770082

21121 rows × 10 columns

In [143]:
corr = df_clean.pivot_table(values='TMPMN_corrected', index='year', columns='mon').corr()
months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep',
         'oct', 'nov', 'dec']
plt.imshow(corr, cmap='seismic', vmin=-1, vmax=1)
plt.colorbar()
plt.xticks(range(12), months)
plt.yticks(range(12), months);
In [144]:
corr
Out[144]:
mon 1 2 3 4 5 6 7 8 9 10 11 12
mon
1 1.000000 0.345332 0.181045 0.141913 -0.015442 -0.014635 0.121429 -0.214890 0.060025 0.044105 -0.207636 -0.104374
2 0.345332 1.000000 0.330117 0.162770 -0.022966 -0.002403 0.196804 -0.151839 -0.012321 0.023386 0.015190 -0.044311
3 0.181045 0.330117 1.000000 0.248782 0.002669 0.125861 -0.020188 -0.084129 -0.168972 -0.127324 -0.101320 -0.127112
4 0.141913 0.162770 0.248782 1.000000 0.017757 0.010875 0.103228 -0.139183 0.006385 0.007387 0.058009 -0.011488
5 -0.015442 -0.022966 0.002669 0.017757 1.000000 -0.078167 0.020219 0.160880 0.071877 0.196792 0.189534 -0.136849
6 -0.014635 -0.002403 0.125861 0.010875 -0.078167 1.000000 0.319074 0.165168 0.000776 0.150155 -0.317147 0.176838
7 0.121429 0.196804 -0.020188 0.103228 0.020219 0.319074 1.000000 0.404667 0.132022 0.121302 -0.009343 0.053329
8 -0.214890 -0.151839 -0.084129 -0.139183 0.160880 0.165168 0.404667 1.000000 0.281657 0.222578 0.163609 0.045318
9 0.060025 -0.012321 -0.168972 0.006385 0.071877 0.000776 0.132022 0.281657 1.000000 0.305413 0.113701 0.087329
10 0.044105 0.023386 -0.127324 0.007387 0.196792 0.150155 0.121302 0.222578 0.305413 1.000000 0.168834 -0.005908
11 -0.207636 0.015190 -0.101320 0.058009 0.189534 -0.317147 -0.009343 0.163609 0.113701 0.168834 1.000000 -0.093018
12 -0.104374 -0.044311 -0.127112 -0.011488 -0.136849 0.176838 0.053329 0.045318 0.087329 -0.005908 -0.093018 1.000000