Commit 489f19ff authored by Marcel Henrik Schubert's avatar Marcel Henrik Schubert
Browse files

added plot changes to file

parent 005bf1c9
......@@ -558,9 +558,6 @@ def plotter(subsets, subana, phases, labels):
for st in subsets:
for ana in subana:
an = ana.split('/')[0]
......@@ -583,6 +580,8 @@ def plotter(subsets, subana, phases, labels):
index.append(label)
if label == 'age':
tmp_sub = []
ind_sub = []
comp = {'accuracy': accuracy_score(df_dic[st][an]['df'][an+'_'+str(st)+'_life_phase'],
df_dic[st][an]['df'][an+'_'+str(st)+'_life_phase_pred']).round(3),
......@@ -596,7 +595,34 @@ def plotter(subsets, subana, phases, labels):
df_dic[st][an]['df'][an+'_'+str(st)+'_life_phase_pred'],
average='weighted').round(3)
}
for ph in phases:
df = df_dic[st][an]['df']
comp_sub = {'accuracy': accuracy_score(df.loc[df[an+'_'+str(st)+'_life_phase'] == ph][an+'_'+str(st)+'_life_phase'],
df.loc[df[an+'_'+str(st)+'_life_phase'] == ph][an+'_'+str(st)+'_life_phase_pred']).round(3),
'precision': precision_score(df['df'][an+'_'+str(st)+'_life_phase'],
df[an+'_'+str(st)+'_life_phase_pred'], labels = [ph],
average='weighted').round(3),
'recall': recall_score(df[an+'_'+str(st)+'_life_phase'],
df[an+'_'+str(st)+'_life_phase_pred'],labels = [ph],
average='weighted').round(3),
'f1-score': f1_score(df[an+'_'+str(st)+'_life_phase'],
df[an+'_'+str(st)+'_life_phase_pred'],labels = [ph],
average='weighted').round(3)}
tmp_sub.append(comp_sub)
ind_sub.append('age phase {}'.format(ph))
f= plt.figure(figsize=(10,5))
tmp_df =pd.DataFrame(tmp_sub, index = ind_sub)
tmp_df.plot(kind='barh', colormap = cmap, ax=f.gca())
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5)) #legend outside box
plt.ylabel(ylabel='Evaluation Measures for Different Groups',fontsize ='large', fontweight='roman')
plt.tight_layout()
plt.savefig(savedir+'barplots/age_scores_{}_{}.pdf'.format(st, an))
plt.savefig(savedir+'barplots/age_scores_{}_{}.png'.format(st, an))
tmp.append(comp)
index.append('age by life_phase')
......@@ -610,6 +636,10 @@ def plotter(subsets, subana, phases, labels):
plt.tight_layout()
#plt.show()
#f.savefig('../Data/results/heatmaps/test.png')
f.savefig(savedir+ 'heatmaps/cm_{st}_{an}_{label}_{group}.pdf'.format(st = st,
an=an,
label=label,
group='life_phase'))
f.savefig(savedir+ 'heatmaps/cm_{st}_{an}_{label}_{group}.png'.format(st = st,
an=an,
label=label,
......@@ -619,6 +649,10 @@ def plotter(subsets, subana, phases, labels):
if label == 'author':
tmp_sub = []
ind_sub = []
gen_pred_auth = []
life_ph_pred_auth = []
gen_pred_auth_wrong = []
......@@ -630,6 +664,10 @@ def plotter(subsets, subana, phases, labels):
for au in auth:
gen_pred_auth.append(author_dic[st][an][au]['gender'])
life_ph_pred_auth.append(author_dic[st][an][au]['life_phase'])
df_dic[st][an]['df'][an+'_'+str(st)+'gender_pred_auth'] = gen_pred_auth
df_dic[st][an]['df'][an+'_'+str(st)+'life_phase_pred_auth'] = life_ph_pred_auth
for au in auth_sub_wrong:
gen_pred_auth_wrong.append(author_dic[st][an][au]['gender'])
......@@ -648,7 +686,34 @@ def plotter(subsets, subana, phases, labels):
life_ph_pred_auth,
average='weighted').round(3)
}
for ph in phases:
df = df_dic[st][an]['df']
comp_sub = {'accuracy': accuracy_score(df.loc[df[an+'_'+str(st)+'_life_phase'] == ph][an+'_'+str(st)+'_life_phase'],
df.loc[df[an+'_'+str(st)+'_life_phase'] == ph][an+'_'+str(st)+'_life_phase_pred_auth']).round(3),
'precision': precision_score(df['df'][an+'_'+str(st)+'_life_phase'],
df[an+'_'+str(st)+'_life_phase_pred_auth'], labels = [ph],
average='weighted').round(3),
'recall': recall_score(df[an+'_'+str(st)+'_life_phase'],
df[an+'_'+str(st)+'_life_phase_pred_auth'],labels = [ph],
average='weighted').round(3),
'f1-score': f1_score(df[an+'_'+str(st)+'_life_phase'],
df[an+'_'+str(st)+'_life_phase_pred_auth'],labels = [ph],
average='weighted').round(3)}
tmp_sub.append(comp_sub)
ind_sub.append('age phase {}'.format(ph))
f= plt.figure(figsize=(10,5))
tmp_df =pd.DataFrame(tmp_sub, index = ind_sub)
tmp_df.plot(kind='barh', colormap = cmap, ax=f.gca())
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5)) #legend outside box
plt.ylabel(ylabel='Evaluation Measures for Different Groups',fontsize ='large', fontweight='roman')
plt.tight_layout()
plt.savefig(savedir+'barplots/author_scores_{}_{}.pdf'.format(st, an))
plt.savefig(savedir+'barplots/author_scores_{}_{}.png'.format(st, an))
tmp.append(comp)
index.append('author by life_phase')
......@@ -661,6 +726,9 @@ def plotter(subsets, subana, phases, labels):
plot_confusion_matrix(cnf_matrix, classes=phases,title=None, ax=ax)
plt.show()
plt.tight_layout()
f.savefig(savedir+'heatmaps/cm_{st}_{an}_{label}_{group}.pdf'.format(st = st, an=an,
label=label,
group='life_phase'))
f.savefig(savedir+'heatmaps/cm_{st}_{an}_{label}_{group}.png'.format(st = st, an=an,
label=label,
group='life_phase'))
......@@ -688,6 +756,9 @@ def plotter(subsets, subana, phases, labels):
ax = f.subplots()
plot_confusion_matrix(cnf_matrix, classes=phases,title=None, ax=ax)
plt.tight_layout()
f.savefig(savedir+'heatmaps/cm_{st}_{an}_{label}_{group}.pdf'.format(st = st, an=an,
label=label,
group='gender'))
f.savefig(savedir+'heatmaps/cm_{st}_{an}_{label}_{group}.png'.format(st = st, an=an,
label=label,
group='gender'))
......@@ -701,6 +772,9 @@ def plotter(subsets, subana, phases, labels):
ax = f.subplots()
plot_confusion_matrix(cnf_matrix, classes=phases,title=None, ax=ax)
plt.tight_layout()
f.savefig(savedir+'heatmaps/cm_{st}_{an}_{label}_{group}_false.pdf'.format(st = st, an=an,
label=label,
group='life_phase'))
f.savefig(savedir+'heatmaps/cm_{st}_{an}_{label}_{group}_false.png'.format(st = st, an=an,
label=label,
group='life_phase'))
......@@ -713,6 +787,9 @@ def plotter(subsets, subana, phases, labels):
ax = f.subplots()
plot_confusion_matrix(cnf_matrix, classes=phases,title=None, ax=ax)
plt.tight_layout()
f.savefig(savedir+'heatmaps/cm_{st}_{an}_{label}_{group}_false.pdf'.format(st = st, an=an,
label=label,
group='gender'))
f.savefig(savedir+'heatmaps/cm_{st}_{an}_{label}_{group}_false.png'.format(st = st, an=an,
label=label,
group='gender'))
......@@ -728,6 +805,7 @@ def plotter(subsets, subana, phases, labels):
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5)) #legend outside box
plt.ylabel(ylabel='Evaluation Measures for Different Subsets',fontsize ='large', fontweight='roman')
plt.tight_layout()
plt.savefig(savedir+'barplots/overall_scores_{}_{}.pdf'.format(st, an))
plt.savefig(savedir+'barplots/overall_scores_{}_{}.png'.format(st, an))
......@@ -791,8 +869,16 @@ def plotter(subsets, subana, phases, labels):
df = pd.DataFrame(tmp , index = ind)
##select only informative features (i.e. those which are 0 across less than all columns of type)
mask = (df == 0.0).T
ls = []
for col in mask.columns:
if Counter(mask[col])[True] < 1:
ls.append(col)
df.drop(ls, inplace = True)
f,ax = plt.subplots(figsize=(18, len(ind)/6))
sns.heatmap(df, fmt= '.1f',ax=ax, center = 0, yticklabels = True)
f.savefig(savedir+'featureplots/features_heat_{}_{}_{}_phases.pdf'.format(st, an, label))
f.savefig(savedir+'featureplots/features_heat_{}_{}_{}_phases.png'.format(st, an, label))
elif label == 'author':
......@@ -834,7 +920,7 @@ def plotter(subsets, subana, phases, labels):
tmp_l.append(0)
tmp[ph] = tmp_l
#print([vocab[el] for el in ind_pos + ind_neg])
ind = [vocab[el].replace('§', '') for el in ind_pos+ind_neg]
ind = [re.sub(r'\s', 'BLANK', el) for el in ind]
......@@ -848,8 +934,19 @@ def plotter(subsets, subana, phases, labels):
ind[i] = ind[i].encode('unicode-escape')
#print([vocab[el] for el in ind_pos + ind_neg])
df = pd.DataFrame(tmp, index = ind)
##select only informative features (i.e. those which are 0 across less than all columns of type)
mask = (df == 0.0).T
ls = []
for col in mask.columns:
if Counter(mask[col])[True] < 1:
ls.append(col)
df.drop(ls, inplace = True)
f,ax = plt.subplots(figsize=(18, len(ind_pos+ind_neg)/6))
sns.heatmap(df, fmt= '.1f',ax=ax, center = 0, yticklabels = True)
f.savefig(savedir+'featureplots/features_heat_{}_{}_{}_phases.pdf'.format(st, an, label))
f.savefig(savedir+'featureplots/features_heat_{}_{}_{}_phases.png'.format(st, an, label))
dic = {}
......@@ -892,8 +989,18 @@ def plotter(subsets, subana, phases, labels):
except:
ind[i] = ind[i].encode('unicode-escape')
df = pd.DataFrame(tmp, index = ind)
##select only informative features (i.e. those which are 0 across less than all columns of type)
mask = (df == 0.0).T
ls = []
for col in mask.columns:
if Counter(mask[col])[True] < 1:
ls.append(col)
df.drop(ls, inplace = True)
f,ax = plt.subplots(figsize=(18, len(ind_pos+ind_neg)/6))
sns.heatmap(df, fmt= '.1f',ax=ax, center = 0, yticklabels = True)
f.savefig(savedir+'featureplots/features_heat_{}_{}_{}_gender_phases.pdf'.format(st, an, label))
f.savefig(savedir+'featureplots/features_heat_{}_{}_{}_gender_phases.png'.format(st, an, label))
return 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment