numGraphs = 3 # If multiple variables are plotted, this is how many graphs to plot per row
subWidth = 12 # Width to allocate for each row of subplots
subHeight = 6 # Height to allocate for each subplot
font = 8 # Font size for subplots
def plotNum(setType,dataset,fields):
# Plots KDE plots of numerical data, to show distribution of frequencies
# setType is a string, either "train" or "test"
sns.set(style='darkgrid')
n = len(fields) # Number of variables to plot
if n == 1:
field = fields[0]
data = dataset[field].dropna()
plt.xticks(rotation=90)
sns.kdeplot(data).set_title("%s set %s" % (setType, field))
#print("Minimum %s value: %d" % (field, data.min()))
#print("Maximum %s value: %d" % (field, data.max()))
#print("Average %s value: %d" % (field, (data.sum()/len(data))))
#print("Median %s value: %d" % (field, data.median()))
else:
size = (subWidth, subHeight * math.ceil(n/numGraphs)) # Allot 4 in of height per row
if n > numGraphs:
fig, axes = plt.subplots(math.ceil(n/numGraphs), numGraphs, figsize=size)
else:
fig, axes = plt.subplots(1, n, figsize=size)
for i in range(n):
field = fields[i]
data = dataset[field].dropna()
if n > numGraphs:
sns.kdeplot(data,ax=axes[i//numGraphs,i % numGraphs]).set_title("%s set %s" % (setType, field))
else:
sns.kdeplot(data,ax=axes[i]).set_title("%s set %s" % (setType, field))
#print("Minimum %s value: %d" % (field, data.min()))
#print("Maximum %s value: %d" % (field, data.max()))
#print("Average %s value: %d" % (field, (data.sum()/len(data))))
#print("Median %s value: %d" % (field, data.median()))
for ax in axes.flatten():
for tick in ax.get_xticklabels():
tick.set_rotation(90)
ax.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
plt.show()
return None
'개발 > ML+ Data Science' 카테고리의 다른 글
rf,xgb,cat,lgb stacking 예제 (0) | 2019.10.06 |
---|---|
[Plot] 전체적인 categorical feature의 histogram그려주는 countplot 템플릿 (0) | 2019.10.05 |
Network work dataset feature Extraction 네트워크 피처추출하는법 (0) | 2019.10.01 |
data 전처리에서 nan과 null은 같다. (0) | 2019.10.01 |
PCA로 feature 줄이기 예제! (0) | 2019.10.01 |