python数据分析-04Nan的类型处理

#NaN --means Not a Number
import pandas as pd
import numpy as np
from pandas import Series,DataFrame
# n = np.nan
# print(type(n)) #<class 'float'>
# print(1+n) #nan
#nan in Series
#s1 = Series([1,2,np.nan,3,4],index=['A','B','C','D','E'])
# print(s1)
# A 1.0
# B 2.0
# C NaN
# D 3.0
# E 4.0
# dtype: float64
# print(s1.isnull())
# A False
# B False
# C True
# D False
# E False
# dtype: bool
#print(s1.dropna())#去掉值为nan的行
# A 1.0
# B 2.0
# D 3.0
# E 4.0
# dtype: float64
#NaN in DataFrame
#dframe = DataFrame([[1,2,3],[np.nan,5,6],[7,np.nan,9],[np.nan,np.nan,np.nan]])
#print(dframe)
# 0 1 2
# 0 1.0 2.0 3.0
# 1 NaN 5.0 6.0
# 2 7.0 NaN 9.0
# 3 NaN NaN NaN
#print(dframe.isnull())
# 0 1 2
# 0 False False False
# 1 True False False
# 2 False True False
# 3 True True True
# df1 = dframe.dropna(axis=0)
# print(df1)
# 0 1 2
# 0 1.0 2.0 3.0
# df2 = dframe.dropna(axis=1)
# print(df2)
# Empty DataFrame
# Columns: []
# Index: [0, 1, 2, 3]
# df3= dframe.dropna(axis=0,how='any')#默认为any
# print(df3)
# 0 1 2
# 0 1.0 2.0 3.0
# df4= dframe.dropna(axis=0,how='all')#全部为nan才会被删除
# print(df4)
# 0 1 2
# 0 1.0 2.0 3.0
# 1 NaN 5.0 6.0
# 2 7.0 NaN 9.0
# dframe2 = DataFrame([[1,2,3,np.nan],[2,np.nan,5,6],[np.nan,7,np.nan,9],[1,np.nan,np.nan,np.nan]])
# print(dframe2)
# 0 1 2 3
# 0 1.0 2.0 3.0 NaN
# 1 2.0 NaN 5.0 6.0
# 2 NaN 7.0 NaN 9.0
# 3 1.0 NaN NaN NaN
# df2 = dframe2.dropna(thresh=None)#包含nan都会被删掉
# print(df2)
# Empty DataFrame
# Columns: [0, 1, 2, 3]
# Index: []
# df2 = dframe2.dropna(thresh=2)#2个以上nan的值会被删掉
# print(df2)
# 0 1 2 3
# 0 1.0 2.0 3.0 NaN
# 1 2.0 NaN 5.0 6.0
# 2 NaN 7.0 NaN 9.0
#print(dframe2.fillna(value=1))#用1填充所有的nan值
# 0 1 2 3
# 0 1.0 2.0 3.0 1.0
# 1 2.0 1.0 5.0 6.0
# 2 1.0 7.0 1.0 9.0
# 3 1.0 1.0 1.0 1.0
#print(dframe2.fillna(value={0:0,1:1,2:2,3:3}))#第一列用0,第二列用1,第三列用2,第四列用3填充
# 0 1 2 3
# 0 1.0 2.0 3.0 3.0
# 1 2.0 1.0 5.0 6.0
# 2 0.0 7.0 2.0 9.0
# 3 1.0 1.0 2.0 3.0
#----------------------------------
#多级index
# s1 = Series(np.random.randn(6),index=[['1','1','1','2','2','2'],['a','b','c','a','b','c']])
# print(s1)
# 1 a 0.450512
# b -0.432511
# c -0.280125
# 2 a 0.333269
# b 0.123369
# c -2.000533
# dtype: float64
# print(s1['1'])
# a 0.870953
# b 0.128729
# c 0.958637
# dtype: float64
# print(type(s1["1"]))#<class 'pandas.core.series.Series'>
# print(s1['1']['a']) #-1.6023742345246375
# print(s1[:,'a'])
# 1 0.891888
# 2 -1.087930
# dtype: float64
# df1 = s1.unstack()
# print(df1)
# a b c
# 1 -0.300567 0.124230 -0.488987
# 2 1.618275 -0.155234 -0.284849
# df2 = DataFrame([s1['1'],s1['2']])
# print(df2)
# a b c
# 0 -0.757713 0.801604 1.975145
# 1 -0.112589 0.083369 -1.174583
# s2 = df1.unstack()
# print(s2)
# a 1 -0.037449
# 2 -1.364984
# b 1 -1.748248
# 2 0.581338
# c 1 1.758856
# 2 -0.244383
# dtype: float64
# s2 = df1.T.unstack()
# print(s2)
# 1 a 0.343976
# b 1.664177
# c -0.039674
# 2 a 0.357644
# b 0.357658
# c -0.308326
# dtype: float64
# df = DataFrame(np.arange(16).reshape(4,4))
# print(df)
# 0 1 2 3
# 0 0 1 2 3
# 1 4 5 6 7
# 2 8 9 10 11
# 3 12 13 14 15
# df = DataFrame(np.arange(16).reshape(4,4),index=[['a','a','b','b'],[1,2,1,2]])
# print(df)
# 0 1 2 3
# a 1 0 1 2 3
# 2 4 5 6 7
# b 1 8 9 10 11
# 2 12 13 14 15
# df = DataFrame(np.arange(16).reshape(4,4),index=[['a','a','b','b'],[1,2,1,2]],columns=[['BJ','BJ','SH','GZ'],[8,9,8,8]])
# print(df)
# BJ SH GZ
# 8 9 8 8
# a 1 0 1 2 3
# 2 4 5 6 7
# b 1 8 9 10 11
# 2 12 13 14 15
# print(df['BJ'][8])
# a 1 0
# 2 4
# b 1 8
# 2 12
# Name: 8, dtype: int32