IT,라이믹스,XE(Xpress Engine),자바,파이썬,마리아DB,php,스크래핑,파싱,크롤링,스프링등 알짜 정보 제공

티스토리 뷰

파이썬[python]
파이썬 강좌 3

xemaker 2022. 5. 12. 08:58
[1]
import numpy as np
[3]
#정수 배열:
np.array([1,4,2,5,3])
array([1, 4, 2, 5, 3])
[6]
np.array([3.14,4,2,3])
array([3.14, 4.  , 2.  , 3.  ])
[8]
np.array([1,2,3,4], dtype='float32')
array([1., 2., 3., 4.], dtype=float32)
[13]
np.array([num*3 for num in range(3)])
array([0, 3, 6])
[19]
np.array([ range(num, num+3) for num in [2,4,6]])
array([[2, 3, 4],
       [4, 5, 6],
       [6, 7, 8]])
[21]
np.zeros(10, dtype=int)
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
[24]
np.ones(10, dtype=int)
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
[29]
np.ones((3,5), dtype=int)
array([[1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])
[31]
np.full(10, 3.14)
array([3.14, 3.14, 3.14, 3.14, 3.14, 3.14, 3.14, 3.14, 3.14, 3.14])
[33]
np.full((3,4), 3.14)
array([[3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14]])
[36]
np.arange(0,20,2)
array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])
[38]
np.linspace(0,1,5)
array([0.  , 0.25, 0.5 , 0.75, 1.  ])
[40]
np.random.random(10)
array([0.40530949, 0.34347442, 0.1513537 , 0.25527436, 0.60350375,
       0.81535512, 0.26061947, 0.61846248, 0.12884634, 0.27509558])
[45]
np.random.randint(0,10,3)
array([6, 2, 9])
[47]
np.random.randint(0,10,size=(3,3))
array([[4, 9, 0],
       [3, 8, 0],
       [0, 7, 8]])
[50]
np.random.normal(0,10,(3,3)) # 표준 정규 분포
array([[ 0.59707263,  0.94885647, 11.42131954],
       [14.21789927,  9.80141963, -1.46138918],
       [10.41933506, -1.74334103, -1.7538321 ]])
[52]
np.eye(3)
array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])
[54]
np.empty(3)
array([1., 1., 1.])
[56]
np.random.seed(0)
np.random.randint(10)
5
[81]
x1=np.random.randint(10, size=6)
print(x1)
[3 8 8 8 2 3]

[82]
x2=np.random.randint(10, size=(3,4))
print(x2)
[[2 0 8 8]
 [3 8 2 8]
 [4 3 0 4]]

[83]
x3=np.random.randint(10, size=(3,4,5))
print(x3)
[[[3 6 9 8 0]
  [8 5 9 0 9]
  [6 5 3 1 8]
  [0 4 9 6 5]]

 [[7 8 8 9 2]
  [8 6 6 9 1]
  [6 8 8 3 2]
  [3 6 3 6 5]]

 [[7 0 8 4 6]
  [5 8 2 3 9]
  [7 5 3 4 5]
  [3 3 7 9 9]]]

[124]
x1.shape
x2.shape
x3.shape
x1.ndim
x2.ndim
x3.ndim
x1.size
x2.size
x3.size
60
[127]
x3.dtype
x3.itemsize
x3.nbytes
240
[101]
x=np.arange(10)
x
x[0:5]
x[:5]
x[5:]
x[4:7]
x[::2]
x[1::2]
x[::-1]
x[5::-2]
array([5, 3, 1])
[103]
x2
array([[2, 0, 8, 8],
       [3, 8, 2, 8],
       [4, 3, 0, 4]])
[113]
x2[2,3]
x2[:2,:3]
x2[:3,::2]
x2[::-1]
x2[:,::-1]
x2[::-1,::-1]
x2[:,0]
x2[0,:]
x2[0]
array([2, 0, 8, 8])
[122]
x2_sub=x2[:2,:2]
x2_sub
array([[2, 0],
       [3, 8]])
[129]
x2_sub[0,0]=99
x2_sub
array([[99,  0],
       [ 3,  8]])
[131]
x2
array([[99,  0,  8,  8],
       [ 3,  8,  2,  8],
       [ 4,  3,  0,  4]])
[134]
x2_sub_copy=x2[:2,:2].copy()

[136]
x2_sub_copy[0,0]=10
x2_sub_copy
array([[10,  0],
       [ 3,  8]])
[138]
x2
array([[99,  0,  8,  8],
       [ 3,  8,  2,  8],
       [ 4,  3,  0,  4]])
[141]
grid=np.arange(1,10).reshape((3,3))
grid
array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])
[153]
x=np.array([1,2,3])
x
x.reshape((1,3))
x
y=x.reshape((1,3))
y
x[np.newaxis, :]
x.reshape((3,1))
x[:, np.newaxis]
array([[1],
       [2],
       [3]])
[156]
x=np.array([1,2,3])
y=np.array([3,2,1])
np.concatenate([x,y])
z=[99,99,99]
np.concatenate([x,y,z])
array([ 1,  2,  3,  3,  2,  1, 99, 99, 99])
[161]
grid=np.array([[1,2,3],
             [4,5,6]])
np.concatenate([grid,grid])
np.concatenate([grid,grid], axis=1)
np.concatenate([grid,grid], axis=0)
array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])
[164]
x=np.array([1,2,3])
#np.concatenate([x,grid])
np.vstack([x,grid])
array([[1, 2, 3],
       [1, 2, 3],
       [4, 5, 6]])
[170]
grid
x=np.array([1,2])[:,np.newaxis]
x
np.hstack([grid,x])
array([[1, 2, 3, 1],
       [4, 5, 6, 2]])
[174]
x=np.arange(4)
x
y=np.array([5,5,5,5])
z=x+y
z
array([5, 6, 7, 8])
[176]
z=x+5
z
array([5, 6, 7, 8])
[180]
print("x+5=",x+5)
print("x+5=",np.add(x,5))
x+5= [5 6 7 8]
x+5= [5 6 7 8]

[190]
big_array=np.random.rand(10000)
%timeit sum(big_array)
%timeit np.sum(big_array)
690 µs ± 18.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
7.03 µs ± 392 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)

[192]
min(big_array)
5.728320201958681e-06
[195]
M=np.random.random((3,4))
M
array([[0.43748437, 0.88285858, 0.4333984 , 0.53022543],
       [0.30267819, 0.52977185, 0.38987088, 0.59326485],
       [0.93048989, 0.77927655, 0.96983439, 0.03330797]])
[200]
np.sum(M)
M.sum()
M.sum(axis=0)
M.sum(axis=1)
array([2.28396677, 1.81558577, 2.7129088 ])
[210]
import pandas as pd
data=pd.read_csv("https://raw.githubusercontent.com/wikibook/python-ds-handbook/master/notebooks/data/president_heights.csv")
data
data['height(cm)']
heights=np.array(data['height(cm)'])
print(heights.mean())

print(heights.min()) #0사분위
print(np.percentile(heights,25)) #1사분위
print(np.median(heights)) #2사분위
print(np.percentile(heights,75)) #3사분위
print(heights.max()) #4사분위
179.73809523809524
163
174.25
182.0
183.0
193

[225]
import matplotlib.pyplot as plt
import matplotlib as mpl

font_name=mpl.font_manager.FontProperties(fname="C:\Windows\Fonts\malgun.ttf").get_name()
mpl.rc('font',family=font_name)

plt.hist(heights) #분포도
plt.title("미국 대통령의 키 분포")
plt.xlabel('height (cm)')
plt.ylabel('명')
Text(0, 0.5, '명')

[231]
a=np.array([0,1,2])
b=np.array([0,1,2])
a+b
a+3
array([3, 4, 5])
[233]
M=np.ones((3,3))
M+a
array([[1., 2., 3.],
       [1., 2., 3.],
       [1., 2., 3.]])
[237]
a=np.arange(3)
a
b=np.arange(3)[:,np.newaxis]

[245]
rainfall=pd.read_csv('https://raw.githubusercontent.com/wikibook/python-ds-handbook/master/notebooks/data/Seattle2014.csv')
rainfall=rainfall['PRCP'].values
inches=rainfall/254.0
inches.shape
plt.hist(inches,40)
(array([245.,  14.,  13.,  17.,   8.,   6.,   5.,   6.,   4.,   3.,   7.,
          6.,   3.,   3.,   3.,   4.,   4.,   2.,   4.,   0.,   0.,   1.,
          1.,   1.,   0.,   0.,   0.,   2.,   1.,   1.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   1.]),
 array([0.        , 0.04596457, 0.09192913, 0.1378937 , 0.18385827,
        0.22982283, 0.2757874 , 0.32175197, 0.36771654, 0.4136811 ,
        0.45964567, 0.50561024, 0.5515748 , 0.59753937, 0.64350394,
        0.6894685 , 0.73543307, 0.78139764, 0.8273622 , 0.87332677,
        0.91929134, 0.96525591, 1.01122047, 1.05718504, 1.10314961,
        1.14911417, 1.19507874, 1.24104331, 1.28700787, 1.33297244,
        1.37893701, 1.42490157, 1.47086614, 1.51683071, 1.56279528,
        1.60875984, 1.65472441, 1.70068898, 1.74665354, 1.79261811,
        1.83858268]),
 <BarContainer object of 40 artists>)

[248]
np.sum((inches>0.5)&(inches<1))
29
[254]
np.sum(~((inches>0.5)&(inches<1)))

336
[257]
np.sum(inches>0)
150
[261]
inches>0
array([False,  True,  True, False, False,  True,  True,  True,  True,
        True,  True,  True, False, False, False, False, False, False,
       False, False, False,  True, False, False, False, False, False,
        True,  True, False,  True,  True, False, False, False, False,
       False, False,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True, False, False, False,  True,  True,  True,  True,
        True,  True, False,  True,  True,  True, False, False,  True,
        True,  True,  True,  True, False,  True, False, False, False,
       False, False,  True,  True,  True,  True,  True, False, False,
       False, False,  True, False,  True, False, False,  True, False,
       False, False, False, False, False,  True,  True,  True, False,
        True, False,  True,  True,  True,  True, False,  True,  True,
       False, False, False, False, False,  True,  True,  True, False,
       False,  True,  True,  True, False, False, False, False, False,
       False, False, False, False, False, False, False,  True, False,
        True, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
        True,  True, False,  True,  True,  True, False,  True,  True,
       False, False, False, False, False, False,  True,  True, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False,  True,  True, False, False, False,
       False, False, False, False, False, False,  True, False, False,
       False, False, False, False, False, False,  True,  True,  True,
       False,  True, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False,  True,  True,
       False,  True, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False,  True,  True,
       False, False, False,  True,  True,  True,  True,  True, False,
       False,  True, False, False, False, False, False, False, False,
       False, False, False,  True,  True, False,  True,  True,  True,
       False,  True,  True, False,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True, False,  True,
        True,  True,  True,  True, False, False,  True, False, False,
       False, False, False, False, False, False, False, False,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
       False, False, False, False,  True,  True,  True, False,  True,
        True,  True,  True, False, False, False, False, False,  True,
        True,  True,  True, False, False,  True,  True, False, False,
        True,  True, False, False, False])
[265]
x=np.array([1,2,3,4,5])
x<3
np.sum(x<3)
2
[275]
x>3
np.sum(x>3)
x<=3
x>=3
x!=3
x==3
(2*x)
(x**x)
(2*x)==(x**2)
array([False,  True, False, False, False])
[278]
x=np.random.randint(10,size=(3,4))
x
array([[0, 9, 1, 4],
       [7, 0, 0, 1],
       [6, 6, 7, 8]])
[280]
x<6
array([[ True, False,  True,  True],
       [False,  True,  True,  True],
       [False, False, False, False]])
[284]
np.sum(x<6, axis=1)
array([3, 3, 0])
[286]
np.sum(x<6, axis=0)
array([1, 1, 2, 2])
[294]
#8보다 큰 값이 하나라도 있는가?
np.any(x<8)
True
[296]
#모든 값이 10보다 작은가?
np.all(x<10)
True
[298]
#모든 값이 6과 같은가?
np.all(x==6)
False
[300]
#각 행의 모든 값은 8보다 작은가?
np.all(x<8, axis=1)
array([False,  True, False])
[302]
np.all(x<8, axis=0)
array([ True, False,  True, False])
[305]
A=np.array([1,0,1,0,1,0], dtype=bool)
B=np.array([1,1,1,0,1,0], dtype=bool)
A|B
A&B
array([ True, False,  True, False,  True, False])
[308]
np.sum((inches>0.5)&(inches<1))
29
[312]
inches[(inches>0.5)&(inches<1)]
array([0.83858268, 0.8503937 , 0.72047244, 0.66929134, 0.57086614,
       0.5984252 , 0.51181102, 0.7519685 , 0.6496063 , 0.74015748,
       0.87007874, 0.5511811 , 0.72834646, 0.53937008, 0.55905512,
       0.62992126, 0.53937008, 0.75984252, 0.8503937 , 0.72047244,
       0.7992126 , 0.59055118, 0.66929134, 0.5984252 , 0.72047244,
       0.51181102, 0.51181102, 0.77165354, 0.81102362])
[316]
x=np.random.randint(100,size=10)
x
array([54, 77, 55, 39, 93,  0, 86,  6, 41, 46])
[318]
x[3],x[7],x[2]
(39, 6, 55)
[328]
idx=[3,7,2]
x[idx]
array([39,  6, 55])
[331]
ind=np.array([[3,7],
              [4,5]])
x[ind]
array([[39,  6],
       [93,  0]])
[333]
X=np.arange(12).reshape((3,4))
X
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])
[335]
X[0,2], X[1,1], X[2,3]
(2, 5, 11)
[337]
row=np.array([0,1,2])
col=np.array([2,1,3])
X[row,col]
array([ 2,  5, 11])
[339]
row[:,np.newaxis]
array([[0],
       [1],
       [2]])
[341]
col
array([2, 1, 3])
[343]
X[row[:, np.newaxis], col]
array([[ 2,  1,  3],
       [ 6,  5,  7],
       [10,  9, 11]])
[345]
row[:, np.newaxis]*col
array([[0, 0, 0],
       [2, 1, 3],
       [4, 2, 6]])
[347]
print(X)
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

[349]
X[2,2], X[2,0], X[2,1]
(10, 8, 9)
[354]
X[1:,:]
array([[ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])
[356]
X[2,[2,0,1]]
array([10,  8,  9])
[358]
mask = np.array([1,0,1,0], dtype=bool)
mask
array([ True, False,  True, False])
[360]
X[2:,mask]
array([[ 8, 10]])
[362]
x=np.array([0.25,0.5,0.75,1.0])
x
array([0.25, 0.5 , 0.75, 1.  ])
[365]
import pandas as pd
data=pd.Series([0.25,0.5,0.75,1.0])
data
0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64
[367]
x[0]
0.25
[369]
data[0]
0.25
[371]
data.values
array([0.25, 0.5 , 0.75, 1.  ])
[373]
data.index
RangeIndex(start=0, stop=4, step=1)
[375]
data[1]
0.5
[377]
data[1:3]
1    0.50
2    0.75
dtype: float64
[397]
data=pd.Series([0.25, 0.5, 0.75, 1.0], index=['a','b','c','d'])
data
a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64
[382]
data['a']
0.25
[384]
data[0]
0.25
[386]
data=pd.Series([0.25, 0.5, 0.75, 1.0], index=[2,5,3,7])
data
2    0.25
5    0.50
3    0.75
7    1.00
dtype: float64
[388]
data[3]
0.75
[391]
data={2:0.25, 5:0.50,3:0.75,7:1.00}
data=pd.Series(data)
data
2    0.25
5    0.50
3    0.75
7    1.00
dtype: float64
[499]
population_dict = {'California': 38332521,
                   'Texas': 26448193,
                   'New York': 19651127,
                   'Florida': 19552860,
                   'Illinois': 12882135}
population = pd.Series(population_dict)
population
California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64
[395]
population_dict['California']
38332521
[401]
data['a']
data['a':'c']
data[0:3]
a    0.25
b    0.50
c    0.75
dtype: float64
[404]
population['California':'New York']
population[0:3]
California    38332521
Texas         26448193
New York      19651127
dtype: int64
[406]
pd.Series([2,4,6], index=[100,200,300])
100    2
200    4
300    6
dtype: int64
[409]
pd.Series({2:'a',1:'b',3:'c'}, index=[3,2])
3    c
2    a
dtype: object
[411]
population
California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64
[413]
area_dict = {'California': 423967, 'Texas': 695662, 'New York': 141297,
             'Florida': 170312, 'Illinois': 149995}
area = pd.Series(area_dict)
area
California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
dtype: int64
[500]
states=pd.DataFrame({'population':population
                   , 'area':area})
states

[417]
states.index
Index(['California', 'Texas', 'New York', 'Florida', 'Illinois'], dtype='object')
[421]
states.columns
Index(['population', 'area'], dtype='object')
[426]
states['population']
states['area']
California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64
[428]
pd.DataFrame(population, columns=['population'])

[430]
population
California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64
[432]
pd.DataFrame([{'a': 1, 'b': 2}, {'b': 3, 'c': 4}])

[434]
pd.DataFrame(np.random.rand(3, 2),
             columns=['foo', 'bar'],
             index=['a', 'b', 'c'])

[436]
data=pd.Series([0.25, 0.5, 0.75, 1.0], index=['a','b','c','d'])
data
a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64
[477]
data['b']
'b' in data
data.keys()
list(data.items())
data.values
data['c']=2.0
data
data['a':'c']
data[0:3]
# 마스크
(data>0.3) & (data<1.0)
# 마스킹
data[(data>0.3) & (data<1.0)]
data['a'], data['c']
# 팬시 인덱싱
data[['a','b']]
idx=['a','b']
data[idx]
a    0.25
b    0.50
dtype: float64
[478]
dic={'a':0.25, 'b':0.5, 'c':0.75, 'd':1.0}
dic['b']
'c' in dic
dic.keys()
list(data.items())
dic.items()
dict_items([('a', 0.25), ('b', 0.5), ('c', 0.75), ('d', 1.0)])
[483]
data=pd.Series(['a','b','c'], index=[1,3,5])
data[1]
'a'
[486]
data.iloc[2] #암시적 인덱스
'c'
[488]
data.loc[1] #명시적 인덱스
'a'
[495]
data[1:3] #암시적 인덱스
data.iloc[1:3]
3    b
5    c
dtype: object
[496]
data.loc[1:3] #명시적 인덱스
1    a
3    b
dtype: object
[514]
data=pd.DataFrame({'pop':population
                   , 'area':area})
data

[508]
data['area']
data.area
California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64
[510]
data.area is data['area']
True
[512]
dic
dic['e']=3.0
dic
{'a': 0.25, 'b': 0.5, 'c': 0.75, 'd': 1.0, 'e': 3.0}
[517]
data
data['density']=data['pop']/data['area']
data

[520]
data.values
array([[3.83325210e+07, 4.23967000e+05, 9.04139261e+01],
       [2.64481930e+07, 6.95662000e+05, 3.80187404e+01],
       [1.96511270e+07, 1.41297000e+05, 1.39076746e+02],
       [1.95528600e+07, 1.70312000e+05, 1.14806121e+02],
       [1.28821350e+07, 1.49995000e+05, 8.58837628e+01]])
[523]
data.T

[535]
data
data['area'] #열 데이터
data.loc['California'] #행 데이터
data.loc['California','area']
423967
[541]
data
data.iloc[0,1]
423967
[545]
data
data.iloc[:3,:2]
data.loc[:'New York',:'pop']

[561]
data['area'], data['density']
data[['pop','density']] # 팬시 인덱싱
data[data.density>100] #마스킹
data.loc[data.density>100, ['pop','density']]
data.loc[data['density']>100, ['pop','density']]

[565]
area = pd.Series({'Alaska': 1723337, 'Texas': 695662,
                  'California': 423967}, name='area')
population = pd.Series({'California': 38332521, 'Texas': 26448193,
                        'New York': 19651127}, name='population')
df=pd.DataFrame({"area":area, "population":population})
df

[567]
population/area
Alaska              NaN
California    90.413926
New York            NaN
Texas         38.018740
dtype: float64
[571]
area.index
population.index
area.index|population.index
C:\Users\root\AppData\Local\Temp/ipykernel_3160/3387932575.py:3: FutureWarning: Index.__or__ operating as a set operation is deprecated, in the future this will be a logical operation matching Series.__or__.  Use index.union(other) instead
  area.index|population.index

Index(['Alaska', 'California', 'New York', 'Texas'], dtype='object')
[576]
A = pd.Series([2, 4, 6], index=[0, 1, 2])
B = pd.Series([1, 3, 5], index=[1, 2, 3])
A,B
A+B
A.add(B, fill_value=0)
0    2.0
1    5.0
2    9.0
3    5.0
dtype: float64
[587]
A = pd.DataFrame(np.random.randint(0, 20, (2, 2)),
                 columns=list('AB'))
B = pd.DataFrame(np.random.randint(0, 10, (3, 3)),
                 columns=list('BAC'))
A+B

[591]
A.add(B,fill_value=0)

[595]
A.mean() #열 평균
A.stack().mean() # 전체 평균
9.25
[597]
A.add(B, fill_value=A.stack().mean())

[607]
vals1=np.array([1,None,3,4])
vals1
vals1=np.array([1,np.nan,3,4])
vals1
1+np.nan
0*np.nan
vals1.sum()
vals1.min()
vals1.max()
np.nansum(vals1)
np.nanmin(vals1)
np.nanmax(vals1)
4.0
[611]
x=pd.Series([1, np.nan, 2, None])
x
0    1.0
1    NaN
2    2.0
3    NaN
dtype: float64
[613]
x[2]=None
x
0    1.0
1    NaN
2    NaN
3    NaN
dtype: float64
[622]
x.isnull()
x.notnull()
x[x.notnull()]
x.dropna() #na제거 하시오
x
0    1.0
1    NaN
2    NaN
3    NaN
dtype: float64
[638]
df = pd.DataFrame([[1,      np.nan, 2],
                   [2,      3,      5],
                   [np.nan, 4,      6]])
df
df.dropna()

[632]
df


[639]
df.dropna(axis=1)
df.dropna(axis='columns')

[650]
df
df[1]
df[3]=np.nan
df
df.dropna(axis=1, how='all')
df
df.dropna(axis=0, thresh=3)
df.dropna(axis='rows', thresh=3)

[658]
ebola=pd.read_csv("https://raw.githubusercontent.com/SoongMoo/soldesk2110/main/data/country_timeseries.csv")
ebola
ebola.shape
ebola.shape[0]
ebola.shape[1]
ebola.count()
Date                   122
Day                    122
Cases_Guinea            93
Cases_Liberia           83
Cases_SierraLeone       87
Cases_Nigeria           38
Cases_Senegal           25
Cases_UnitedStates      18
Cases_Spain             16
Cases_Mali              12
Deaths_Guinea           92
Deaths_Liberia          81
Deaths_SierraLeone      87
Deaths_Nigeria          38
Deaths_Senegal          22
Deaths_UnitedStates     18
Deaths_Spain            16
Deaths_Mali             12
dtype: int64
[664]
ebola_fillna_0=ebola.fillna(0)
ebola_fillna_0.shape[0]-ebola_fillna_0.count()
ebola_fillna_100=ebola.fillna(100)
ebola_fillna_100.iloc[0:10, 0:5]

[672]
#0~9행, 0열 4에만 누락값을 0
ebola.iloc[0:10, 0:5].fillna(0)
ebola.fillna(0).iloc[0:10, 0:5]

[684]
#누락값을 평균 값으로 변경(각 열의 평균값)
print(ebola.mean())
ebola_fillna_mean=ebola.fillna(ebola.mean())
ebola_fillna_mean.iloc[0:10,0:5]
Day                     144.778689
Cases_Guinea            911.064516
Cases_Liberia          2335.337349
Cases_SierraLeone      2427.367816
Cases_Nigeria            16.736842
Cases_Senegal             1.080000
Cases_UnitedStates        3.277778
Cases_Spain               1.000000
Cases_Mali                3.500000
Deaths_Guinea           563.239130
Deaths_Liberia         1101.209877
Deaths_SierraLeone      693.701149
Deaths_Nigeria            6.131579
Deaths_Senegal            0.000000
Deaths_UnitedStates       0.833333
Deaths_Spain              0.187500
Deaths_Mali               3.166667
dtype: float64

C:\Users\root\AppData\Local\Temp/ipykernel_3160/2059366500.py:2: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.
  print(ebola.mean())
C:\Users\root\AppData\Local\Temp/ipykernel_3160/2059366500.py:3: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.
  ebola_fillna_mean=ebola.fillna(ebola.mean())


[686]
ebola

[689]
ebola_fillna_ffill=ebola.fillna(method='ffill')
ebola_fillna_ffill.iloc[0:10,0:5]

[691]
ebola_fillna_ffill=ebola.fillna(method='bfill')
ebola_fillna_ffill.iloc[0:10,0:5]

[693]
ebola_fillna_interpolate=ebola.interpolate()
ebola_fillna_interpolate.iloc[0:10,0:5]

[694]
ebola_fillna_ffill_1=ebola.fillna(method='ffill',axis=1)
ebola_fillna_ffill_1.iloc[0:10,0:5]
저작자표시
'파이썬[python]' 카테고리의 다른 글

파이썬 강좌 5 (0)	2022.05.13
파이썬 강좌 4 (0)	2022.05.13
파이썬 오프라인 (0)	2022.05.11
파이썬 강좌 2 (0)	2022.05.11
파이썬 강좌 1-1 (0)	2022.05.10
공유하기 링크
페이스북
카카오스토리
트위터
공지사항
최근에 올라온 글
최근에 달린 댓글
Total
Today
Yesterday
링크
TAG more
« 2024/05 »
일	월	화	수	목	금	토
			1	2	3	4
5	6	7	8	9	10	11
12	13	14	15	16	17	18
19	20	21	22	23	24	25
26	27	28	29	30	31
글 보관함
IT의 신

티스토리 뷰

파이썬 강좌 3

'파이썬[python]' 카테고리의 다른 글

티스토리툴바