티스토리 뷰
[1]
import numpy as np
[3]
#정수 배열:
np.array([1,4,2,5,3])
array([1, 4, 2, 5, 3])
[6]
np.array([3.14,4,2,3])
array([3.14, 4. , 2. , 3. ])
[8]
np.array([1,2,3,4], dtype='float32')
array([1., 2., 3., 4.], dtype=float32)
[13]
np.array([num*3 for num in range(3)])
array([0, 3, 6])
[19]
np.array([ range(num, num+3) for num in [2,4,6]])
array([[2, 3, 4],
[4, 5, 6],
[6, 7, 8]])
[21]
np.zeros(10, dtype=int)
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
[24]
np.ones(10, dtype=int)
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
[29]
np.ones((3,5), dtype=int)
array([[1, 1, 1, 1, 1],
[1, 1, 1, 1, 1],
[1, 1, 1, 1, 1]])
[31]
np.full(10, 3.14)
array([3.14, 3.14, 3.14, 3.14, 3.14, 3.14, 3.14, 3.14, 3.14, 3.14])
[33]
np.full((3,4), 3.14)
array([[3.14, 3.14, 3.14, 3.14],
[3.14, 3.14, 3.14, 3.14],
[3.14, 3.14, 3.14, 3.14]])
[36]
np.arange(0,20,2)
array([ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18])
[38]
np.linspace(0,1,5)
array([0. , 0.25, 0.5 , 0.75, 1. ])
[40]
np.random.random(10)
array([0.40530949, 0.34347442, 0.1513537 , 0.25527436, 0.60350375,
0.81535512, 0.26061947, 0.61846248, 0.12884634, 0.27509558])
[45]
np.random.randint(0,10,3)
array([6, 2, 9])
[47]
np.random.randint(0,10,size=(3,3))
array([[4, 9, 0],
[3, 8, 0],
[0, 7, 8]])
[50]
np.random.normal(0,10,(3,3)) # 표준 정규 분포
array([[ 0.59707263, 0.94885647, 11.42131954],
[14.21789927, 9.80141963, -1.46138918],
[10.41933506, -1.74334103, -1.7538321 ]])
[52]
np.eye(3)
array([[1., 0., 0.],
[0., 1., 0.],
[0., 0., 1.]])
[54]
np.empty(3)
array([1., 1., 1.])
[56]
np.random.seed(0)
np.random.randint(10)
5
[81]
x1=np.random.randint(10, size=6)
print(x1)
[3 8 8 8 2 3]
[82]
x2=np.random.randint(10, size=(3,4))
print(x2)
[[2 0 8 8]
[3 8 2 8]
[4 3 0 4]]
[83]
x3=np.random.randint(10, size=(3,4,5))
print(x3)
[[[3 6 9 8 0]
[8 5 9 0 9]
[6 5 3 1 8]
[0 4 9 6 5]]
[[7 8 8 9 2]
[8 6 6 9 1]
[6 8 8 3 2]
[3 6 3 6 5]]
[[7 0 8 4 6]
[5 8 2 3 9]
[7 5 3 4 5]
[3 3 7 9 9]]]
[124]
x1.shape
x2.shape
x3.shape
x1.ndim
x2.ndim
x3.ndim
x1.size
x2.size
x3.size
60
[127]
x3.dtype
x3.itemsize
x3.nbytes
240
[101]
x=np.arange(10)
x
x[0:5]
x[:5]
x[5:]
x[4:7]
x[::2]
x[1::2]
x[::-1]
x[5::-2]
array([5, 3, 1])
[103]
x2
array([[2, 0, 8, 8],
[3, 8, 2, 8],
[4, 3, 0, 4]])
[113]
x2[2,3]
x2[:2,:3]
x2[:3,::2]
x2[::-1]
x2[:,::-1]
x2[::-1,::-1]
x2[:,0]
x2[0,:]
x2[0]
array([2, 0, 8, 8])
[122]
x2_sub=x2[:2,:2]
x2_sub
array([[2, 0],
[3, 8]])
[129]
x2_sub[0,0]=99
x2_sub
array([[99, 0],
[ 3, 8]])
[131]
x2
array([[99, 0, 8, 8],
[ 3, 8, 2, 8],
[ 4, 3, 0, 4]])
[134]
x2_sub_copy=x2[:2,:2].copy()
[136]
x2_sub_copy[0,0]=10
x2_sub_copy
array([[10, 0],
[ 3, 8]])
[138]
x2
array([[99, 0, 8, 8],
[ 3, 8, 2, 8],
[ 4, 3, 0, 4]])
[141]
grid=np.arange(1,10).reshape((3,3))
grid
array([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]])
[153]
x=np.array([1,2,3])
x
x.reshape((1,3))
x
y=x.reshape((1,3))
y
x[np.newaxis, :]
x.reshape((3,1))
x[:, np.newaxis]
array([[1],
[2],
[3]])
[156]
x=np.array([1,2,3])
y=np.array([3,2,1])
np.concatenate([x,y])
z=[99,99,99]
np.concatenate([x,y,z])
array([ 1, 2, 3, 3, 2, 1, 99, 99, 99])
[161]
grid=np.array([[1,2,3],
[4,5,6]])
np.concatenate([grid,grid])
np.concatenate([grid,grid], axis=1)
np.concatenate([grid,grid], axis=0)
array([[1, 2, 3],
[4, 5, 6],
[1, 2, 3],
[4, 5, 6]])
[164]
x=np.array([1,2,3])
#np.concatenate([x,grid])
np.vstack([x,grid])
array([[1, 2, 3],
[1, 2, 3],
[4, 5, 6]])
[170]
grid
x=np.array([1,2])[:,np.newaxis]
x
np.hstack([grid,x])
array([[1, 2, 3, 1],
[4, 5, 6, 2]])
[174]
x=np.arange(4)
x
y=np.array([5,5,5,5])
z=x+y
z
array([5, 6, 7, 8])
[176]
z=x+5
z
array([5, 6, 7, 8])
[180]
print("x+5=",x+5)
print("x+5=",np.add(x,5))
x+5= [5 6 7 8]
x+5= [5 6 7 8]
[190]
big_array=np.random.rand(10000)
%timeit sum(big_array)
%timeit np.sum(big_array)
690 µs ± 18.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
7.03 µs ± 392 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
[192]
min(big_array)
5.728320201958681e-06
[195]
M=np.random.random((3,4))
M
array([[0.43748437, 0.88285858, 0.4333984 , 0.53022543],
[0.30267819, 0.52977185, 0.38987088, 0.59326485],
[0.93048989, 0.77927655, 0.96983439, 0.03330797]])
[200]
np.sum(M)
M.sum()
M.sum(axis=0)
M.sum(axis=1)
array([2.28396677, 1.81558577, 2.7129088 ])
[210]
import pandas as pd
data=pd.read_csv("https://raw.githubusercontent.com/wikibook/python-ds-handbook/master/notebooks/data/president_heights.csv")
data
data['height(cm)']
heights=np.array(data['height(cm)'])
print(heights.mean())
print(heights.min()) #0사분위
print(np.percentile(heights,25)) #1사분위
print(np.median(heights)) #2사분위
print(np.percentile(heights,75)) #3사분위
print(heights.max()) #4사분위
179.73809523809524
163
174.25
182.0
183.0
193
[225]
import matplotlib.pyplot as plt
import matplotlib as mpl
font_name=mpl.font_manager.FontProperties(fname="C:\Windows\Fonts\malgun.ttf").get_name()
mpl.rc('font',family=font_name)
plt.hist(heights) #분포도
plt.title("미국 대통령의 키 분포")
plt.xlabel('height (cm)')
plt.ylabel('명')
Text(0, 0.5, '명')
[231]
a=np.array([0,1,2])
b=np.array([0,1,2])
a+b
a+3
array([3, 4, 5])
[233]
M=np.ones((3,3))
M+a
array([[1., 2., 3.],
[1., 2., 3.],
[1., 2., 3.]])
[237]
a=np.arange(3)
a
b=np.arange(3)[:,np.newaxis]
[245]
rainfall=pd.read_csv('https://raw.githubusercontent.com/wikibook/python-ds-handbook/master/notebooks/data/Seattle2014.csv')
rainfall=rainfall['PRCP'].values
inches=rainfall/254.0
inches.shape
plt.hist(inches,40)
(array([245., 14., 13., 17., 8., 6., 5., 6., 4., 3., 7.,
6., 3., 3., 3., 4., 4., 2., 4., 0., 0., 1.,
1., 1., 0., 0., 0., 2., 1., 1., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 1.]),
array([0. , 0.04596457, 0.09192913, 0.1378937 , 0.18385827,
0.22982283, 0.2757874 , 0.32175197, 0.36771654, 0.4136811 ,
0.45964567, 0.50561024, 0.5515748 , 0.59753937, 0.64350394,
0.6894685 , 0.73543307, 0.78139764, 0.8273622 , 0.87332677,
0.91929134, 0.96525591, 1.01122047, 1.05718504, 1.10314961,
1.14911417, 1.19507874, 1.24104331, 1.28700787, 1.33297244,
1.37893701, 1.42490157, 1.47086614, 1.51683071, 1.56279528,
1.60875984, 1.65472441, 1.70068898, 1.74665354, 1.79261811,
1.83858268]),
<BarContainer object of 40 artists>)
[248]
np.sum((inches>0.5)&(inches<1))
29
[254]
np.sum(~((inches>0.5)&(inches<1)))
336
[257]
np.sum(inches>0)
150
[261]
inches>0
array([False, True, True, False, False, True, True, True, True,
True, True, True, False, False, False, False, False, False,
False, False, False, True, False, False, False, False, False,
True, True, False, True, True, False, False, False, False,
False, False, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, False, False, False, True, True, True, True,
True, True, False, True, True, True, False, False, True,
True, True, True, True, False, True, False, False, False,
False, False, True, True, True, True, True, False, False,
False, False, True, False, True, False, False, True, False,
False, False, False, False, False, True, True, True, False,
True, False, True, True, True, True, False, True, True,
False, False, False, False, False, True, True, True, False,
False, True, True, True, False, False, False, False, False,
False, False, False, False, False, False, False, True, False,
True, False, False, False, False, False, False, False, False,
False, False, False, False, False, False, False, False, False,
True, True, False, True, True, True, False, True, True,
False, False, False, False, False, False, True, True, False,
False, False, False, False, False, False, False, False, False,
False, False, False, False, False, False, False, False, False,
False, False, False, False, True, True, False, False, False,
False, False, False, False, False, False, True, False, False,
False, False, False, False, False, False, True, True, True,
False, True, False, False, False, False, False, False, False,
False, False, False, False, False, False, False, True, True,
False, True, False, False, False, False, False, False, False,
False, False, False, False, False, False, False, True, True,
False, False, False, True, True, True, True, True, False,
False, True, False, False, False, False, False, False, False,
False, False, False, True, True, False, True, True, True,
False, True, True, False, True, True, True, True, True,
True, True, True, True, True, True, True, False, True,
True, True, True, True, False, False, True, False, False,
False, False, False, False, False, False, False, False, True,
True, True, True, True, True, True, True, True, True,
False, False, False, False, True, True, True, False, True,
True, True, True, False, False, False, False, False, True,
True, True, True, False, False, True, True, False, False,
True, True, False, False, False])
[265]
x=np.array([1,2,3,4,5])
x<3
np.sum(x<3)
2
[275]
x>3
np.sum(x>3)
x<=3
x>=3
x!=3
x==3
(2*x)
(x**x)
(2*x)==(x**2)
array([False, True, False, False, False])
[278]
x=np.random.randint(10,size=(3,4))
x
array([[0, 9, 1, 4],
[7, 0, 0, 1],
[6, 6, 7, 8]])
[280]
x<6
array([[ True, False, True, True],
[False, True, True, True],
[False, False, False, False]])
[284]
np.sum(x<6, axis=1)
array([3, 3, 0])
[286]
np.sum(x<6, axis=0)
array([1, 1, 2, 2])
[294]
#8보다 큰 값이 하나라도 있는가?
np.any(x<8)
True
[296]
#모든 값이 10보다 작은가?
np.all(x<10)
True
[298]
#모든 값이 6과 같은가?
np.all(x==6)
False
[300]
#각 행의 모든 값은 8보다 작은가?
np.all(x<8, axis=1)
array([False, True, False])
[302]
np.all(x<8, axis=0)
array([ True, False, True, False])
[305]
A=np.array([1,0,1,0,1,0], dtype=bool)
B=np.array([1,1,1,0,1,0], dtype=bool)
A|B
A&B
array([ True, False, True, False, True, False])
[308]
np.sum((inches>0.5)&(inches<1))
29
[312]
inches[(inches>0.5)&(inches<1)]
array([0.83858268, 0.8503937 , 0.72047244, 0.66929134, 0.57086614,
0.5984252 , 0.51181102, 0.7519685 , 0.6496063 , 0.74015748,
0.87007874, 0.5511811 , 0.72834646, 0.53937008, 0.55905512,
0.62992126, 0.53937008, 0.75984252, 0.8503937 , 0.72047244,
0.7992126 , 0.59055118, 0.66929134, 0.5984252 , 0.72047244,
0.51181102, 0.51181102, 0.77165354, 0.81102362])
[316]
x=np.random.randint(100,size=10)
x
array([54, 77, 55, 39, 93, 0, 86, 6, 41, 46])
[318]
x[3],x[7],x[2]
(39, 6, 55)
[328]
idx=[3,7,2]
x[idx]
array([39, 6, 55])
[331]
ind=np.array([[3,7],
[4,5]])
x[ind]
array([[39, 6],
[93, 0]])
[333]
X=np.arange(12).reshape((3,4))
X
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
[335]
X[0,2], X[1,1], X[2,3]
(2, 5, 11)
[337]
row=np.array([0,1,2])
col=np.array([2,1,3])
X[row,col]
array([ 2, 5, 11])
[339]
row[:,np.newaxis]
array([[0],
[1],
[2]])
[341]
col
array([2, 1, 3])
[343]
X[row[:, np.newaxis], col]
array([[ 2, 1, 3],
[ 6, 5, 7],
[10, 9, 11]])
[345]
row[:, np.newaxis]*col
array([[0, 0, 0],
[2, 1, 3],
[4, 2, 6]])
[347]
print(X)
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
[349]
X[2,2], X[2,0], X[2,1]
(10, 8, 9)
[354]
X[1:,:]
array([[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
[356]
X[2,[2,0,1]]
array([10, 8, 9])
[358]
mask = np.array([1,0,1,0], dtype=bool)
mask
array([ True, False, True, False])
[360]
X[2:,mask]
array([[ 8, 10]])
[362]
x=np.array([0.25,0.5,0.75,1.0])
x
array([0.25, 0.5 , 0.75, 1. ])
[365]
import pandas as pd
data=pd.Series([0.25,0.5,0.75,1.0])
data
0 0.25
1 0.50
2 0.75
3 1.00
dtype: float64
[367]
x[0]
0.25
[369]
data[0]
0.25
[371]
data.values
array([0.25, 0.5 , 0.75, 1. ])
[373]
data.index
RangeIndex(start=0, stop=4, step=1)
[375]
data[1]
0.5
[377]
data[1:3]
1 0.50
2 0.75
dtype: float64
[397]
data=pd.Series([0.25, 0.5, 0.75, 1.0], index=['a','b','c','d'])
data
a 0.25
b 0.50
c 0.75
d 1.00
dtype: float64
[382]
data['a']
0.25
[384]
data[0]
0.25
[386]
data=pd.Series([0.25, 0.5, 0.75, 1.0], index=[2,5,3,7])
data
2 0.25
5 0.50
3 0.75
7 1.00
dtype: float64
[388]
data[3]
0.75
[391]
data={2:0.25, 5:0.50,3:0.75,7:1.00}
data=pd.Series(data)
data
2 0.25
5 0.50
3 0.75
7 1.00
dtype: float64
[499]
population_dict = {'California': 38332521,
'Texas': 26448193,
'New York': 19651127,
'Florida': 19552860,
'Illinois': 12882135}
population = pd.Series(population_dict)
population
California 38332521
Texas 26448193
New York 19651127
Florida 19552860
Illinois 12882135
dtype: int64
[395]
population_dict['California']
38332521
[401]
data['a']
data['a':'c']
data[0:3]
a 0.25
b 0.50
c 0.75
dtype: float64
[404]
population['California':'New York']
population[0:3]
California 38332521
Texas 26448193
New York 19651127
dtype: int64
[406]
pd.Series([2,4,6], index=[100,200,300])
100 2
200 4
300 6
dtype: int64
[409]
pd.Series({2:'a',1:'b',3:'c'}, index=[3,2])
3 c
2 a
dtype: object
[411]
population
California 38332521
Texas 26448193
New York 19651127
Florida 19552860
Illinois 12882135
dtype: int64
[413]
area_dict = {'California': 423967, 'Texas': 695662, 'New York': 141297,
'Florida': 170312, 'Illinois': 149995}
area = pd.Series(area_dict)
area
California 423967
Texas 695662
New York 141297
Florida 170312
Illinois 149995
dtype: int64
[500]
states=pd.DataFrame({'population':population
, 'area':area})
states
[417]
states.index
Index(['California', 'Texas', 'New York', 'Florida', 'Illinois'], dtype='object')
[421]
states.columns
Index(['population', 'area'], dtype='object')
[426]
states['population']
states['area']
California 423967
Texas 695662
New York 141297
Florida 170312
Illinois 149995
Name: area, dtype: int64
[428]
pd.DataFrame(population, columns=['population'])
[430]
population
California 38332521
Texas 26448193
New York 19651127
Florida 19552860
Illinois 12882135
dtype: int64
[432]
pd.DataFrame([{'a': 1, 'b': 2}, {'b': 3, 'c': 4}])
[434]
pd.DataFrame(np.random.rand(3, 2),
columns=['foo', 'bar'],
index=['a', 'b', 'c'])
[436]
data=pd.Series([0.25, 0.5, 0.75, 1.0], index=['a','b','c','d'])
data
a 0.25
b 0.50
c 0.75
d 1.00
dtype: float64
[477]
data['b']
'b' in data
data.keys()
list(data.items())
data.values
data['c']=2.0
data
data['a':'c']
data[0:3]
# 마스크
(data>0.3) & (data<1.0)
# 마스킹
data[(data>0.3) & (data<1.0)]
data['a'], data['c']
# 팬시 인덱싱
data[['a','b']]
idx=['a','b']
data[idx]
a 0.25
b 0.50
dtype: float64
[478]
dic={'a':0.25, 'b':0.5, 'c':0.75, 'd':1.0}
dic['b']
'c' in dic
dic.keys()
list(data.items())
dic.items()
dict_items([('a', 0.25), ('b', 0.5), ('c', 0.75), ('d', 1.0)])
[483]
data=pd.Series(['a','b','c'], index=[1,3,5])
data[1]
'a'
[486]
data.iloc[2] #암시적 인덱스
'c'
[488]
data.loc[1] #명시적 인덱스
'a'
[495]
data[1:3] #암시적 인덱스
data.iloc[1:3]
3 b
5 c
dtype: object
[496]
data.loc[1:3] #명시적 인덱스
1 a
3 b
dtype: object
[514]
data=pd.DataFrame({'pop':population
, 'area':area})
data
[508]
data['area']
data.area
California 423967
Texas 695662
New York 141297
Florida 170312
Illinois 149995
Name: area, dtype: int64
[510]
data.area is data['area']
True
[512]
dic
dic['e']=3.0
dic
{'a': 0.25, 'b': 0.5, 'c': 0.75, 'd': 1.0, 'e': 3.0}
[517]
data
data['density']=data['pop']/data['area']
data
[520]
data.values
array([[3.83325210e+07, 4.23967000e+05, 9.04139261e+01],
[2.64481930e+07, 6.95662000e+05, 3.80187404e+01],
[1.96511270e+07, 1.41297000e+05, 1.39076746e+02],
[1.95528600e+07, 1.70312000e+05, 1.14806121e+02],
[1.28821350e+07, 1.49995000e+05, 8.58837628e+01]])
[523]
data.T
[535]
data
data['area'] #열 데이터
data.loc['California'] #행 데이터
data.loc['California','area']
423967
[541]
data
data.iloc[0,1]
423967
[545]
data
data.iloc[:3,:2]
data.loc[:'New York',:'pop']
[561]
data['area'], data['density']
data[['pop','density']] # 팬시 인덱싱
data[data.density>100] #마스킹
data.loc[data.density>100, ['pop','density']]
data.loc[data['density']>100, ['pop','density']]
[565]
area = pd.Series({'Alaska': 1723337, 'Texas': 695662,
'California': 423967}, name='area')
population = pd.Series({'California': 38332521, 'Texas': 26448193,
'New York': 19651127}, name='population')
df=pd.DataFrame({"area":area, "population":population})
df
[567]
population/area
Alaska NaN
California 90.413926
New York NaN
Texas 38.018740
dtype: float64
[571]
area.index
population.index
area.index|population.index
C:\Users\root\AppData\Local\Temp/ipykernel_3160/3387932575.py:3: FutureWarning: Index.__or__ operating as a set operation is deprecated, in the future this will be a logical operation matching Series.__or__. Use index.union(other) instead
area.index|population.index
Index(['Alaska', 'California', 'New York', 'Texas'], dtype='object')
[576]
A = pd.Series([2, 4, 6], index=[0, 1, 2])
B = pd.Series([1, 3, 5], index=[1, 2, 3])
A,B
A+B
A.add(B, fill_value=0)
0 2.0
1 5.0
2 9.0
3 5.0
dtype: float64
[587]
A = pd.DataFrame(np.random.randint(0, 20, (2, 2)),
columns=list('AB'))
B = pd.DataFrame(np.random.randint(0, 10, (3, 3)),
columns=list('BAC'))
A+B
[591]
A.add(B,fill_value=0)
[595]
A.mean() #열 평균
A.stack().mean() # 전체 평균
9.25
[597]
A.add(B, fill_value=A.stack().mean())
[607]
vals1=np.array([1,None,3,4])
vals1
vals1=np.array([1,np.nan,3,4])
vals1
1+np.nan
0*np.nan
vals1.sum()
vals1.min()
vals1.max()
np.nansum(vals1)
np.nanmin(vals1)
np.nanmax(vals1)
4.0
[611]
x=pd.Series([1, np.nan, 2, None])
x
0 1.0
1 NaN
2 2.0
3 NaN
dtype: float64
[613]
x[2]=None
x
0 1.0
1 NaN
2 NaN
3 NaN
dtype: float64
[622]
x.isnull()
x.notnull()
x[x.notnull()]
x.dropna() #na제거 하시오
x
0 1.0
1 NaN
2 NaN
3 NaN
dtype: float64
[638]
df = pd.DataFrame([[1, np.nan, 2],
[2, 3, 5],
[np.nan, 4, 6]])
df
df.dropna()
[632]
df
[639]
df.dropna(axis=1)
df.dropna(axis='columns')
[650]
df
df[1]
df[3]=np.nan
df
df.dropna(axis=1, how='all')
df
df.dropna(axis=0, thresh=3)
df.dropna(axis='rows', thresh=3)
[658]
ebola=pd.read_csv("https://raw.githubusercontent.com/SoongMoo/soldesk2110/main/data/country_timeseries.csv")
ebola
ebola.shape
ebola.shape[0]
ebola.shape[1]
ebola.count()
Date 122
Day 122
Cases_Guinea 93
Cases_Liberia 83
Cases_SierraLeone 87
Cases_Nigeria 38
Cases_Senegal 25
Cases_UnitedStates 18
Cases_Spain 16
Cases_Mali 12
Deaths_Guinea 92
Deaths_Liberia 81
Deaths_SierraLeone 87
Deaths_Nigeria 38
Deaths_Senegal 22
Deaths_UnitedStates 18
Deaths_Spain 16
Deaths_Mali 12
dtype: int64
[664]
ebola_fillna_0=ebola.fillna(0)
ebola_fillna_0.shape[0]-ebola_fillna_0.count()
ebola_fillna_100=ebola.fillna(100)
ebola_fillna_100.iloc[0:10, 0:5]
[672]
#0~9행, 0열 4에만 누락값을 0
ebola.iloc[0:10, 0:5].fillna(0)
ebola.fillna(0).iloc[0:10, 0:5]
[684]
#누락값을 평균 값으로 변경(각 열의 평균값)
print(ebola.mean())
ebola_fillna_mean=ebola.fillna(ebola.mean())
ebola_fillna_mean.iloc[0:10,0:5]
Day 144.778689
Cases_Guinea 911.064516
Cases_Liberia 2335.337349
Cases_SierraLeone 2427.367816
Cases_Nigeria 16.736842
Cases_Senegal 1.080000
Cases_UnitedStates 3.277778
Cases_Spain 1.000000
Cases_Mali 3.500000
Deaths_Guinea 563.239130
Deaths_Liberia 1101.209877
Deaths_SierraLeone 693.701149
Deaths_Nigeria 6.131579
Deaths_Senegal 0.000000
Deaths_UnitedStates 0.833333
Deaths_Spain 0.187500
Deaths_Mali 3.166667
dtype: float64
C:\Users\root\AppData\Local\Temp/ipykernel_3160/2059366500.py:2: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction.
print(ebola.mean())
C:\Users\root\AppData\Local\Temp/ipykernel_3160/2059366500.py:3: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction.
ebola_fillna_mean=ebola.fillna(ebola.mean())
[686]
ebola
[689]
ebola_fillna_ffill=ebola.fillna(method='ffill')
ebola_fillna_ffill.iloc[0:10,0:5]
[691]
ebola_fillna_ffill=ebola.fillna(method='bfill')
ebola_fillna_ffill.iloc[0:10,0:5]
[693]
ebola_fillna_interpolate=ebola.interpolate()
ebola_fillna_interpolate.iloc[0:10,0:5]
[694]
ebola_fillna_ffill_1=ebola.fillna(method='ffill',axis=1)
ebola_fillna_ffill_1.iloc[0:10,0:5]
'파이썬[python]' 카테고리의 다른 글
파이썬 강좌 5 (0) | 2022.05.13 |
---|---|
파이썬 강좌 4 (0) | 2022.05.13 |
파이썬 오프라인 (0) | 2022.05.11 |
파이썬 강좌 2 (0) | 2022.05.11 |
파이썬 강좌 1-1 (0) | 2022.05.10 |
댓글
공지사항
최근에 올라온 글
최근에 달린 댓글
- Total
- Today
- Yesterday
링크
TAG
- ocajp
- 문자열
- C
- 인포믹스
- 자바 smtp
- php
- KG
- Python
- 프로씨
- MySQL
- 포인터
- 파싱
- 오라클
- XE3
- XE
- 자바
- webix
- xe애드온
- JDBC
- 스크래핑
- esql
- C언어
- 이클립스
- 플러터
- proc
- 라이믹스 모듈
- EC
- 파이썬
- ocjap
- xe addon
일 | 월 | 화 | 수 | 목 | 금 | 토 |
---|---|---|---|---|---|---|
1 | ||||||
2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 | 10 | 11 | 12 | 13 | 14 | 15 |
16 | 17 | 18 | 19 | 20 | 21 | 22 |
23 | 24 | 25 | 26 | 27 | 28 |
글 보관함