numGraphs = 3 # If multiple variables are plotted, this is how many graphs to plot per row
subWidth = 12 # Width to allocate for each row of subplots
subHeight = 6 # Height to allocate for each subplot
font = 8 # Font size for subplots
def plotCat(setType,dataset,fields):
maxCat = 10 # Number of categories to reduce plots to showing
sns.set(style='darkgrid')
n = len(fields) # Number of variables to plot
if n == 1:
field = fields[0]
data = dataset[field].dropna()
plt.xticks(rotation=90)
sns.countplot(data, order = data.value_counts().iloc[:maxCat].index).set_title("%s set %s" % (setType, field))
else:
size = (subWidth, subHeight * math.ceil(n/numGraphs))
if n > numGraphs:
fig, axes = plt.subplots(math.ceil(n/numGraphs), numGraphs, figsize=size)
else:
fig, axes = plt.subplots(1, n, figsize=size)
for i in range(n):
field = fields[i]
data = dataset[field].dropna()
if n > numGraphs:
sns.countplot(data, order = data.value_counts().iloc[:maxCat].index,
ax=axes[i//numGraphs,i % numGraphs]).set_title("%s set %s" % (setType, field))
else:
sns.countplot(data, order = data.value_counts().iloc[:maxCat].index,
ax=axes[i]).set_title("%s set %s" % (setType, field))
for ax in axes.flatten():
for tick in ax.get_xticklabels():
tick.set_rotation(90)
plt.show()
return None
사용법
plotCat('train', train_combined, ['ProductCD'])
train인지 test인지 (제목용), dataset, feature 배열
'개발 > ML+ Data Science' 카테고리의 다른 글
ML에서 OOF (out of the folds)란? (0) | 2019.10.06 |
---|---|
rf,xgb,cat,lgb stacking 예제 (0) | 2019.10.06 |
[Plot] 전체적인 실수값 histogram을 그려주는 kdeplot 템플릿 (0) | 2019.10.05 |
Network work dataset feature Extraction 네트워크 피처추출하는법 (0) | 2019.10.01 |
data 전처리에서 nan과 null은 같다. (0) | 2019.10.01 |