Commit 4e45c3d4 authored by Marcel Henrik Schubert's avatar Marcel Henrik Schubert
Browse files

fixed error

parent 4bda1a62
%% Cell type:code id: tags:
``` python
import pandas as pd
import numpy as np
import ndjson
import jsonlines
import json
import pickle
import os
import sys
import random as rd
import json
import re, regex
from joblib import dump, load
import collections
import math
import statistics
import itertools
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import PassiveAggressiveClassifier, SGDClassifier
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
```
%% Cell type:code id: tags:
``` python
path = '../Data/pan19-celebrity-profiling-training-dataset-2019-01-31/stratified_subsample/'
subana = ['org/']
subsets = [200, 500, 1000, 2000]
subsets = [200, 500, 1000]
subsets = [2000]
classifiers = ['SVM']
datafolder = 'split_data/'
ml_results = 'ml/'
filebeg = 'stratified_subsample_'
labels = ['age', 'gender', 'author']
phases = ['child_21', 'young_adult_35', 'adult_50', 'old_adult_65', 'retiree']
```
%% Cell type:code id: tags:
``` python
def identity_tokenizer(text):
return text
df_dic = {}
res_dic = {}
author_dic = {}
for st in subsets:
res_dic[st] = {}
df_dic[st] = {}
author_dic[st] = {}
for ana in subana:
res_dic[st][ana.split('/')[0]] = {}
df_dic[st][ana.split('/')[0]] = {}
author_dic[st][ana.split('/')[0]] = {}
###make dic with all authors
with open(path+ana+str(st)+'/'+datafolder+filebeg+str(st)+'_author_train.json', 'r', encoding='utf-8') as f:
authors = json.load(f)
with open(path+ana+str(st)+'/'+datafolder+filebeg+str(st)+'_gender_train.json', 'r', encoding='utf-8') as f:
gender = json.load(f)
with open(path+ana+str(st)+'/'+datafolder+filebeg+str(st)+'_age_train.json', 'r', encoding='utf-8') as f:
year = json.load(f)
for i in range(0, len(year)):
age = 2019 - year[i]
if age <22:
lifePhase = 'child_21'
elif age <36:
lifePhase = 'young_adult_35'
elif age < 51:
lifePhase = 'adult_50'
elif age <66:
lifePhase = 'old_adult_65'
else:
lifePhase = 'retiree'
author_dic[st][ana.split('/')[0]][authors[i]] = {}
author_dic[st][ana.split('/')[0]][authors[i]]['life_phase'] = lifePhase
author_dic[st][ana.split('/')[0]][authors[i]]['age'] = year[i]
author_dic[st][ana.split('/')[0]][authors[i]]['gender'] = gender[i]
df = pd.DataFrame()
with open(path+ana+str(st)+'/'+datafolder+filebeg+str(st)+'_bigram_vocab.json', 'r', encoding='utf-8') as f:
vocab = json.load(f)
res_dic[st][ana.split('/')[0]]['vocab'] = vocab
res_dic[st][ana.split('/')[0]]['vocab_inverse'] = {v:k for k,v in vocab.items()}
##update "vocab" to include the tweet length as feature to display
if len(vocab) not in res_dic[st][ana.split('/')[0]]['vocab_inverse']:
leng = len(vocab)
res_dic[st][ana.split('/')[0]]['vocab']['§LENGTH§'] = leng
res_dic[st][ana.split('/')[0]]['vocab_inverse'][leng] = '§LENGTH§'
else:
print('error; key already exists')
print(res_dic[st][ana.split('/')[0]]['vocab_inverse'][len(vocab)])
sys.exit(1)
for label in labels:
res_dic[st][ana.split('/')[0]][label] = {}
enc = load(path+ana+str(st)+'/'+ml_results+filebeg+label+'_'+str(st)+'_encoder.jlib')
res_dic[st][ana.split('/')[0]][label]['label_encoder'] = enc
for clf in classifiers:
clf_l = load(path+ana+str(st)+'/'+ml_results+filebeg+clf+'_'+label+'_'+str(st)+'_svm_out_count.jlib')
with open(path+ana+str(st)+'/'+datafolder+filebeg+str(st)+'_'+label+'_test.json') as f:
lab = json.load(f)
df[ana.split('/')[0]+'_'+str(st)+'_'+clf+'_'+label] = lab
print(str(st)+'_'+ana+'_'+label+'_'+clf)
if label == 'age':
phase = []
for el in lab:
age = 2019 - el
if age <22:
lifePhase = 'child_21'
elif age <36:
lifePhase = 'young_adult_35'
elif age < 51:
lifePhase = 'adult_50'
elif age <66:
lifePhase = 'old_adult_65'
else:
lifePhase = 'retiree'
phase.append(lifePhase)
df[ana.split('/')[0]+'_'+str(st)+'_life_phase'] = phase
#print(len(json.load(f)))
res_dic[st][ana.split('/')[0]][label][clf] = clf_l.coef_
df[ana.split('/')[0]+'_'+str(st)+'_'+clf+'_'+label+'_pred_enc'] = list(load(path+ana+str(st)+'/'+ml_results+filebeg+clf+'_'+label+'_'+str(st)+'_predictions_count.jlib'))
rev_enc = list(enc.inverse_transform(load(path+ana+str(st)+'/'+ml_results+filebeg+clf+'_'+label+'_'+str(st)+'_predictions_count.jlib')))
df[ana.split('/')[0]+'_'+str(st)+'_'+clf+'_'+label+'_pred'] = rev_enc
if label == 'age':
phase = []
for el in rev_enc:
age = 2019 - el
if age <22:
lifePhase = 'child_21'
elif age <36:
lifePhase = 'young_adult_35'
elif age < 51:
lifePhase = 'adult_50'
elif age <66:
lifePhase = 'old_adult_65'
else:
lifePhase = 'retiree'
phase.append(lifePhase)
df[ana.split('/')[0]+'_'+str(st)+'_life_phase_pred'] = phase
res_dic[st][ana.split('/')[0]][label]['labels'] = {}
for l in lab:
res_dic[st][ana.split('/')[0]][label]['labels'][l] = {}
df_dic[st][ana.split('/')[0]]['df'] = df
```
%% Output
C:\Users\schubert\AppData\Roaming\Python\Python36\site-packages\sklearn\base.py:251: UserWarning: Trying to unpickle estimator LabelEncoder from version 0.19.1 when using version 0.20.2. This might lead to breaking code or invalid results. Use at your own risk.
UserWarning)
C:\Users\schubert\AppData\Roaming\Python\Python36\site-packages\sklearn\base.py:251: UserWarning: Trying to unpickle estimator SGDClassifier from version 0.19.1 when using version 0.20.2. This might lead to breaking code or invalid results. Use at your own risk.
UserWarning)
2000_org/_age_SVM
2000_org/_gender_SVM
2000_org/_author_SVM
%% Cell type:code id: tags:
``` python
#df_dic[200]['org']['df']
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
for st in subsets:
for ana in subana:
an = ana.split('/')[0]
for label in labels:
enc = res_dic[st][an][label]['label_encoder']
coef = res_dic[st][an][label]['SVM']
key_len = len(res_dic[st][an][label]['labels'].keys())
df = df_dic[st][an]['df']
res_dic[st][an][label]['acc'] = accuracy_score(df[an+'_'+str(st)+'_SVM_'+label], df[an+'_'+str(st)+'_SVM_'+label+'_pred']).round(3)
res_dic[st][an][label]['prec'] = precision_score(df[an+'_'+str(st)+'_SVM_'+label], df[an+'_'+str(st)+'_SVM_'+label+'_pred'], average='weighted').round(3)
res_dic[st][an][label]['rec'] = recall_score(df[an+'_'+str(st)+'_SVM_'+label], df[an+'_'+str(st)+'_SVM_'+label+'_pred'], average='weighted').round(3)
res_dic[st][an][label]['f1'] = f1_score(df[an+'_'+str(st)+'_SVM_'+label], df[an+'_'+str(st)+'_SVM_'+label+'_pred'], average='weighted').round(3)
for key in res_dic[st][an][label]['labels'].keys():
key_enc = enc.transform([key])[0]
subDf = df.loc[df[an+'_'+str(st)+'_SVM_'+label] == key]
if label == 'author':
row = df.loc[df[an+'_'+str(st)+'_'+'SVM'+'_'+'author'] == key].iloc[0]
res_dic[st][an][label]['labels'][key]['gender'] = row[an+'_'+str(st)+'_'+'SVM'+'_'+'gender']
res_dic[st][an][label]['labels'][key]['age'] = row[an+'_'+str(st)+'_'+'SVM'+'_'+'age']
res_dic[st][an][label]['labels'][key]['life_phase'] = row[an+'_'+str(st)+'_'+'life_phase']
elif label == 'age':
age = 2019 -key
if age <22:
lifePhase = 'child_21'
elif age <36:
lifePhase = 'young_adult_35'
elif age < 51:
lifePhase = 'adult_50'
elif age <66:
lifePhase = 'old_adult_65'
else:
lifePhase = 'retiree'
res_dic[st][an][label]['labels'][key]['life_phase'] = lifePhase
res_dic[st][an][label]['labels'][key]['acc'] = accuracy_score(subDf[an+'_'+str(st)+'_SVM_'+label], subDf[an+'_'+str(st)+'_SVM_'+label+'_pred']).round(3)
##no second category for subanalysis: prec = 1
#res_dic[st][an][label]['labels'][key]['prec'] = precision_score(subDf[an+'_'+str(st)+'_SVM_'+label], subDf[an+'_'+str(st)+'_SVM_'+label+'_pred'], average='weighted').round(3)
## precision always equals recall in subanalyis
#res_dic[st][an][label]['labels'][key]['rec'] = recall_score(subDf[an+'_'+str(st)+'_SVM_'+label], subDf[an+'_'+str(st)+'_SVM_'+label+'_pred'], average='weighted').round(3)
##f1 score is ill defined
#res_dic[st][an][label]['labels'][key]['f1'] = f1_score(subDf[an+'_'+str(st)+'_SVM_'+label], subDf[an+'_'+str(st)+'_SVM_'+label+'_pred'], average='weighted').round(3)
if key_len > 2:
res_dic[st][an][label]['labels'][key]['feature_vec'] = coef[key_enc]
elif key_enc > 0:
res_dic[st][an][label]['labels'][key]['feature_vec'] = coef[0]
```
%% Output
C:\Users\schubert\AppData\Roaming\Python\Python36\site-packages\sklearn\metrics\classification.py:1143: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples.
'precision', 'predicted', average, warn_for)
C:\Users\schubert\AppData\Roaming\Python\Python36\site-packages\sklearn\metrics\classification.py:1145: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples.
'recall', 'true', average, warn_for)
C:\Users\schubert\AppData\Roaming\Python\Python36\site-packages\sklearn\metrics\classification.py:1143: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
'precision', 'predicted', average, warn_for)
C:\Users\schubert\AppData\Roaming\Python\Python36\site-packages\sklearn\metrics\classification.py:1145: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no true samples.
'recall', 'true', average, warn_for)
%% Cell type:code id: tags:
``` python
res_dic
```
%% Output
{2000: {'org': {'vocab': {'e ': 0,
' t': 1,
's ': 2,
't ': 3,
'in': 4,
' a': 5,
'th': 6,
'n ': 7,
'he': 8,
'§MENTION§ ': 9,
'd ': 10,
'an': 11,
'er': 12,
're': 13,
'on': 14,
'y ': 15,
'r ': 16,
' §MENTION§': 17,
' s': 18,
'o ': 19,
'ou': 20,
' w': 21,
' o': 22,
' i': 23,
'or': 24,
'ng': 25,
'at': 26,
'to': 27,
' §LINK§': 28,
'. ': 29,
'en': 30,
' f': 31,
'§LINK§§END§': 32,
'st': 33,
'ha': 34,
'is': 35,
'es': 36,
'g ': 37,
' b': 38,
' m': 39,
'nd': 40,
'it': 41,
'ar': 42,
'te': 43,
' §TAG§': 44,
' h': 45,
'a ': 46,
' c': 47,
'ea': 48,
've': 49,
'§TAG§ ': 50,
'al': 51,
'ti': 52,
'me': 53,
'l ': 54,
'll': 55,
' p': 56,
'le': 57,
'ed': 58,
', ': 59,
'§RETWEET§ ': 60,
' d': 61,
'§BEG§§RETWEET§': 62,
'ne': 63,
'as': 64,
'hi': 65,
'nt': 66,
' l': 67,
'se': 68,
' y': 69,
'ri': 70,
'f ': 71,
' I': 72,
'! ': 73,
'h ': 74,
'li': 75,
'om': 76,
'yo': 77,
'ro': 78,
'ur': 79,
'de': 80,
' g': 81,
'fo': 82,
'of': 83,
'ra': 84,
'ay': 85,
'§BEG§§MENTION§': 86,
'ee': 87,
'ho': 88,
' n': 89,
'el': 90,
' T': 91,
'co': 92,
' r': 93,
'et': 94,
'be': 95,
'ut': 96,
' e': 97,
'ic': 98,
'..': 99,
'il': 100,
'la': 101,
'ch': 102,
'ta': 103,
'ow': 104,
'us': 105,
'm ': 106,
'oo': 107,
'ma': 108,
'ot': 109,
'k ': 110,
' S': 111,
'ce': 112,
'wi': 113,
'lo': 114,
'no': 115,
'io': 116,
'u ': 117,
'ca': 118,
'so': 119,
'Th': 120,
'ni': 121,
'un': 122,
'da': 123,
'wa': 124,
'pe': 125,
'w ': 126,
'ad': 127,
'si': 128,
'rs': 129,
'I ': 130,
'ac': 131,
'am': 132,
' A': 133,
'we': 134,
'rt': 135,
'ke': 136,
'ge': 137,
'ie': 138,
' C': 139,
'ig': 140,
'di': 141,
'ss': 142,
'ns': 143,
'ly': 144,
'ol': 145,
'ec': 146,
' M': 147,
'sh': 148,
'gh': 149,
'mo': 150,
'mi': 151,
'na': 152,
'!!': 153,
'ai': 154,
'do': 155,
'tr': 156,
'od': 157,
'ir': 158,
'ul': 159,
'id': 160,
' u': 161,
' W': 162,
'ts': 163,
' B': 164,
'po': 165,
'ck': 166,
' ': 167,
'os': 168,
"'s": 169,
'vi': 170,
'pr': 171,
'em': 172,
'ry': 173,
'ht': 174,
' H': 175,
'wh': 176,
'ov': 177,
'…§END§': 178,
'nc': 179,
'av': 180,
'ia': 181,
'nk': 182,
'im': 183,
'go': 184,
'p ': 185,
'wo': 186,
'op': 187,
' P': 188,
'.§END§': 189,
'pl': 190,
'ct': 191,
'tt': 192,
'ld': 193,
'ev': 194,
'bo': 195,
': ': 196,
'fi': 197,
' L': 198,
'ab': 199,
'pa': 200,
'sa': 201,
'my': 202,
' D': 203,
's.': 204,
'e.': 205,
' v': 206,
'iv': 207,
' G': 208,
'gr': 209,
'ci': 210,
'ki': 211,
'rd': 212,
' N': 213,
'su': 214,
'ba': 215,
'fr': 216,
' F': 217,
'ak': 218,
'ag': 219,
'pp': 220,
'fe': 221,
'§LINK§ ': 222,
'ew': 223,
'up': 224,
'ap': 225,
' R': 226,
'tu': 227,
'mp': 228,
'§TAG§§END§': 229,
'bu': 230,
'bl': 231,
'!§END§': 232,
'fa': 233,
'ey': 234,
'ep': 235,
'if': 236,
'ue': 237,
'ks': 238,
'pi': 239,
' k': 240,
't.': 241,
'rn': 242,
' j': 243,
'uc': 244,
'sp': 245,
'ga': 246,
'ex': 247,
'ye': 248,
'ty': 249,
' &': 250,
'& ': 251,
'nn': 252,
' O': 253,
'ru': 254,
'gi': 255,
' E': 256,
"n'": 257,
'ok': 258,
'rr': 259,
' 1': 260,
'um': 261,
'fu': 262,
'au': 263,
'ei': 264,
' 2': 265,
'§BEG§T': 266,
'- ': 267,
'Co': 268,
'ys': 269,
' J': 270,
' -': 271,
'oi': 272,
'cr': 273,
'ff': 274,
'ds': 275,
'? ': 276,
'ug': 277,
'ls': 278,
'i ': 279,
'bi': 280,
's,': 281,
"'t": 282,
'rk': 283,
'br': 284,
' Y': 285,
'y.': 286,
"t'": 287,
'by': 288,
'eo': 289,
'Ma': 290,
'ef': 291,
'We': 292,
'\n\n': 293,
'oc': 294,
'§BEG§I': 295,
'mu': 296,
'Ha': 297,
'n.': 298,
'ny': 299,
'sc': 300,
' "': 301,
'ik': 302,
'rl': 303,
'lu': 304,
's!': 305,
'e,': 306,
'e!': 307,
'rm': 308,
'cl': 309,
'c ': 310,
'eg': 311,
'ud': 312,
'’s': 313,
'Wh': 314,
'ju': 315,
'cu': 316,
' \n': 317,
'mm': 318,
"I'": 319,
'qu': 320,
'd.': 321,
'gu': 322,
'tc': 323,
'Ca': 324,
'vo': 325,
'va': 326,
'He': 327,
'" ': 328,
'A ': 329,
' K': 330,
'Ch': 331,
'Lo': 332,
'Go': 333,
'dr': 334,
'It': 335,
'r.': 336,
'No': 337,
'Yo': 338,
'ah': 339,
' U': 340,
'Re': 341,
'Ho': 342,
'aw': 343,
'So': 344,
'St': 345,
' .': 346,
'tl': 347,
'An': 348,
'mb': 349,
'jo': 350,
'ob': 351,
'lt': 352,
'ua': 353,
'rg': 354,
"e'": 355,
't!': 356,
' (': 357,
'og': 358,
'ft': 359,
'To': 360,
'af': 361,
') ': 362,
'ui': 363,
'wn': 364,
'du': 365,
'dy': 366,
'§BEG§W': 367,
'pu': 368,
'0 ': 369,
'ip': 370,
'ek': 371,
'hr': 372,
'In': 373,
'§BEG§A': 374,
'E ': 375,
'kn': 376,
'y,': 377,
'Se': 378,
'20': 379,
'§BEG§H': 380,
'y!': 381,
'Be': 382,
'§BEG§S': 383,
'eb': 384,
' V': 385,
'ub': 386,
'§MENTION§§END§': 387,
'oa': 388,
'ib': 389,
'tw': 390,
't,': 391,
'Pr': 392,
"'m": 393,
'hu': 394,
'py': 395,
'oy': 396,
'rc': 397,
'T ': 398,
'Tr': 399,
'S ': 400,
'Mo': 401,
'gs': 402,
'Gr': 403,
'nu': 404,
'Ne': 405,
'az': 406,
'00': 407,
'g.': 408,
'De': 409,
'Al': 410,
'ph': 411,
'rv': 412,
'Sa': 413,
'Pa': 414,
' ا': 415,
'yi': 416,
'n,': 417,
'Wa': 418,
' q': 419,
'ps': 420,
'Br': 421,
'Do': 422,
'ms': 423,
'.\n': 424,
'Da': 425,
'n’': 426,
'Ju': 427,
'ws': 428,
'gn': 429,
'nl': 430,
'oe': 431,
' :': 432,
'lk': 433,
'l.': 434,
'sk': 435,
'Sh': 436,
'pt': 437,
'gl': 438,
'… ': 439,
'x ': 440,
'Mi': 441,
'Li': 442,
'ال': 443,
'§BEG§M': 444,
'\n§LINK§': 445,
'Me': 446,
'hy': 447,
'Le': 448,
'xt': 449,
'n!': 450,
'2 ': 451,
'Su': 452,
'sl': 453,
'cc': 454,
' 3': 455,
'La': 456,
'zi': 457,
'?§END§': 458,
'§BEG§C': 459,
'dd': 460,
'nf': 461,
'Wi': 462,
'Wo': 463,
'10': 464,
'o.': 465,
'’t': 466,
'rf': 467,
'iz': 468,
'§BEG§§TAG§': 469,
'Ba': 470,
'01': 471,
't’': 472,
'\n§TAG§': 473,
'Fo': 474,
'd,': 475,
'dn': 476,
'§BEG§G': 477,
'sm': 478,
'sn': 479,
'My': 480,
'lf': 481,
'Bu': 482,
'Fr': 483,
'§BEG§L': 484,
'Am': 485,
'r,': 486,
'Jo': 487,
"'r": 488,
' 4': 489,
'fl': 490,
'nv': 491,
' !': 492,
'm.': 493,
'lp': 494,
' “': 495,
':)': 496,
'On': 497,
'xc': 498,
'b ': 499,
'Ro': 500,
'ka': 501,
'ze': 502,
'xp': 503,
'lm': 504,
'ya': 505,
'd!': 506,
'uy': 507,
'Fa': 508,
')§END§': 509,
'aa': 510,
'h.': 511,
'§MENTION§.': 512,
'Fi': 513,
'bs': 514,
'IN': 515,
'5 ': 516,
'a.': 517,
" '": 518,
'e:': 519,
"' ": 520,
'.§MENTION§': 521,
'Bo': 522,
'§BEG§B': 523,
'Bi': 524,
'Di': 525,
'Ge': 526,
'” ': 527,
'§MENTION§,': 528,
' 5': 529,
'gg': 530,
'Pe': 531,
'Te': 532,
'wr': 533,
't…': 534,
'4 ': 535,
'1 ': 536,
'Ye': 537,
'§MENTION§:': 538,
' x': 539,
'RE': 540,
'TH': 541,
'§BEG§O': 542,
'Po': 543,
'dl': 544,
"u'": 545,
'I’': 546,
'Y ': 547,
'If': 548,
'Ja': 549,
'r!': 550,
'e§END§': 551,
'D ': 552,
'3 ': 553,
'w.': 554,
'Hi': 555,
'o!': 556,
'Pl': 557,
'k.': 558,
'sw': 559,
'§BEG§P': 560,
'O ': 561,
'§BEG§D': 562,
"y'": 563,
'§BEG§F': 564,
'je': 565,
'e…': 566,
'Ar': 567,
'N ': 568,
'§BEG§N': 569,
"'l": 570,
'e?': 571,
'eh': 572,
'C ': 573,
' م': 574,
'sy': 575,
'ER': 576,
'Sp': 577,
'cy': 578,
'lw': 579,
'e’': 580,
'u!': 581,
"'v": 582,
'nj': 583,
'pm': 584,
'g!': 585,
's§END§': 586,
'😂😂': 587,
'§MENTION§!': 588,
'Ta': 589,
's:': 590,
'Na': 591,
'§MENTION§…': 592,
'hd': 593,
'rp': 594,
'ن ': 595,
'."': 596,
'Si': 597,
'ON': 598,
't?': 599,
'l,': 600,
'R ': 601,
'oh': 602,
'g,': 603,
'o,': 604,
'??': 605,
'Ti': 606,
'AN': 607,
'Ra': 608,
'US': 609,
'30': 610,
'P ': 611,
'dg': 612,
'ا ': 613,
's?': 614,
'tm': 615,
' @': 616,
'VE': 617,
'rw': 618,
'Mu': 619,
'ST': 620,
'ja': 621,
'M ': 622,
'En': 623,
'e-': 624,
'yt': 625,
'p.': 626,
'h,': 627,
'As': 628,
'a!': 629,
'/ ': 630,
's…': 631,
' 6': 632,
'l!': 633,
'§BEG§R': 634,
'Fe': 635,
'Vi': 636,
'ES': 637,
'§BEG§J': 638,
'Sc': 639,
' ❤️': 640,
' 7': 641,
'Tu': 642,
'Cl': 643,
'a,': 644,
'L ': 645,
'OU': 646,
'Ga': 647,
'8 ': 648,
' ب': 649,
'6 ': 650,
'z ': 651,
' 😂': 652,
'HE': 653,
'§BEG§Y': 654,
' 9': 655,
' 8': 656,
'ix': 657,
'x§END§': 658,
'xx': 659,
'Au': 660,
'Ke': 661,
'AT': 662,
'7 ': 663,
'yl': 664,
'Ka': 665,
'rb': 666,
'za': 667,
'Ev': 668,
'Ri': 669,
'xi': 670,
'Un': 671,
'NG': 672,
'h!': 673,
'Ki': 674,
' و': 675,
'w!': 676,
'U ': 677,
'!\n': 678,
'Ni': 679,
'EA': 680,
'Is': 681,
'eu': 682,
'’m': 683,
'§BEG§E': 684,
'u.': 685,
'Bl': 686,
'IS': 687,
'HA': 688,
'G ': 689,
'ة ': 690,
'cs': 691,
'@ ': 692,
't§END§': 693,
'w/': 694,
'K ': 695,
'Ou': 696,
'AL': 697,
'TO': 698,
'\n§MENTION§': 699,
'lv': 700,
'15': 701,
'18': 702,
'र ': 703,
'Ex': 704,
'lb': 705,
'hn': 706,
'Ru': 707,
'19': 708,
'ر ': 709,
'ym': 710,
'ko': 711,
'ه ': 712,
'RT': 713,
'Cr': 714,
'--': 715,
'Je': 716,
'\nT': 717,
'§BEG§“': 718,
'11': 719,
'ي ': 720,
' $': 721,
'Ve': 722,
'v ': 723,
'ان': 724,
'12': 725,
'§BEG§"': 726,
'NE': 727,
'ky': 728,
'§BEG§§LINK§': 729,
'uf': 730,
'tp': 731,
'k!': 732,
'ox': 733,
'hh': 734,
'lc': 735,
'nm': 736,
'LO': 737,
'gy': 738,
'LA': 739,
'OR': 740,
'hl': 741,
"§MENTION§'": 742,
'17': 743,
'aj': 744,
' Q': 745,
'kl': 746,
'TE': 747,
'❤️ ': 748,
'“§MENTION§': 749,
'bb': 750,
'LL': 751,
'ax': 752,
'At': 753,
'LI': 754,
'Dr': 755,
'NO': 756,
' X': 757,
'sd': 758,
'§TAG§\n': 759,
'OO': 760,
'xa': 761,
'AR': 762,
'o…': 763,
'n…': 764,
'm,': 765,
'yb': 766,
'16': 767,
't:': 768,
'k,': 769,
'yn': 770,
'eq': 771,
'IT': 772,
'Hu': 773,
'§BEG§.': 774,
'50': 775,
'’r': 776,
'م ': 777,
'Tw': 778,
'Oh': 779,
'LE': 780,
'ME': 781,
'?!': 782,
'DA': 783,
'9 ': 784,
'Ce': 785,
'a…': 786,
'y§END§': 787,
'EN': 788,
'y?': 789,
'Gi': 790,
'dm': 791,
'W ': 792,
'dv': 793,
'Cu': 794,
'Ap': 795,
'yr': 796,
'lr': 797,
'ی ': 798,
'm!': 799,
'Of': 800,
'Gu': 801,
'w,': 802,
'AY': 803,
'n§END§': 804,
'Qu': 805,
'Ph': 806,
'r…': 807,
'tn': 808,
'Aw': 809,
'% ': 810,
'ND': 811,
'tb': 812,
'RI': 813,
'El': 814,
'OW': 815,
'AS': 816,
'YO': 817,
'EE': 818,
'Pu': 819,
'Ea': 820,
'AM': 821,
'Fu': 822,
'Ac': 823,
'ez': 824,
't-': 825,
'OT': 826,
'; ': 827,
'😂 ': 828,
'n-': 829,
'n?': 830,
'ل ': 831,
'NY': 832,
'Or': 833,
'د ': 834,
'ت ': 835,
'HI': 836,
'n:': 837,
'MA': 838,
'SA': 839,
'h…': 840,
'ae': 841,
's"': 842,
'hs': 843,
'GO': 844,
'y:': 845,
'| ': 846,
'14': 847,
'l§END§': 848,
'"I': 849,
'❤️❤️': 850,
'gt': 851,
"r'": 852,
'ET': 853,
'xe': 854,
'Fl': 855,
'IC': 856,
'Pi': 857,
'TI': 858,
'Ab': 859,
'\nI': 860,
':\n': 861,
'p…': 862,
'Va': 863,
'o§END§': 864,
'u’': 865,
'CA': 866,
' |': 867,
'’ ': 868,
'+ ': 869,
'§LINK§\n': 870,
'Af': 871,
' ‘': 872,
' ل': 873,
'mn': 874,
"'d": 875,
'के ': 876,
'OM': 877,
'ED': 878,
'e"': 879,
'u,': 880,
' z': 881,
'-s': 882,
'd§END§': 883,
' §RETWEET§': 884,
'و ': 885,
'"T': 886,
'Ci': 887,
'S.': 888,
'zz': 889,
'.”': 890,
'Ad': 891,
'SO': 892,
'r-': 893,
'SE': 894,
'zo': 895,
'Du': 896,
'/1': 897,
'H ': 898,
'وا': 899,
'RA': 900,
' ک': 901,
'p!': 902,
'i…': 903,
'CH': 904,
' के': 905,
'لا': 906,
'\nW': 907,
'DE': 908,
"d'": 909,
'13': 910,
"s'": 911,
'را': 912,
'ار': 913,
' ع': 914,
'f.': 915,
'oj': 916,
'CO': 917,
'\nA': 918,
'iu': 919,
' ,': 920,
'o-': 921,
'y’': 922,
'Gl': 923,
' Z': 924,
' ت': 925,
'tf': 926,
'xo': 927,
' ;': 928,
'نا': 929,
'’v': 930,
':3': 931,
'\nS': 932,
'SS': 933,
'ii': 934,
'IG': 935,
' +': 936,
'WA': 937,
'😂§END§': 938,
'V ': 939,
'yp': 940,
'1s': 941,
"a'": 942,
'IV': 943,
'wl': 944,
'd…': 945,
'c.': 946,
'ww': 947,
'AC': 948,
'r?': 949,
'F ': 950,
'NT': 951,
'B ': 952,
'25': 953,
'zy': 954,
' 😘': 955,
'yw': 956,
'-t': 957,
'RO': 958,
' 😊': 959,
'TV': 960,
'r:': 961,
'❤️§END§': 962,
'HO': 963,
' د': 964,
'PM': 965,
'd:': 966,
'd?': 967,
'MI': 968,
'-o': 969,
' ?': 970,
'a§END§': 971,
'NI': 972,
'ij': 973,
'BC': 974,
' ف': 975,
'r§END§': 976,
'ما': 977,
'p,': 978,
'OL': 979,
"o'": 980,
'त ': 981,
'nb': 982,
'1.': 983,
'لم': 984,
'-1': 985,
'yy': 986,
'uu': 987,
'yd': 988,
'CE': 989,
'Im': 990,
'UN': 991,
'ام': 992,
'§MENTION§)': 993,
'Lu': 994,
'MO': 995,
'sb': 996,
' 😍': 997,
'UK': 998,
' ن': 999,
...},
'vocab_inverse': {0: 'e ',
1: ' t',
2: 's ',
3: 't ',
4: 'in',
5: ' a',
6: 'th',
7: 'n ',
8: 'he',
9: '§MENTION§ ',
10: 'd ',
11: 'an',
12: 'er',
13: 're',
14: 'on',
15: 'y ',
16: 'r ',
17: ' §MENTION§',
18: ' s',
19: 'o ',
20: 'ou',
21: ' w',
22: ' o',
23: ' i',
24: 'or',
25: 'ng',
26: 'at',
27: 'to',
28: ' §LINK§',
29: '. ',
30: 'en',
31: ' f',
32: '§LINK§§END§',
33: 'st',
34: 'ha',
35: 'is',
36: 'es',
37: 'g ',
38: ' b',
39: ' m',
40: 'nd',
41: 'it',
42: 'ar',
43: 'te',
44: ' §TAG§',
45: ' h',
46: 'a ',
47: ' c',
48: 'ea',
49: 've',
50: '§TAG§ ',
51: 'al',
52: 'ti',
53: 'me',
54: 'l ',
55: 'll',
56: ' p',
57: 'le',
58: 'ed',
59: ', ',
60: '§RETWEET§ ',
61: ' d',
62: '§BEG§§RETWEET§',
63: 'ne',
64: 'as',
65: 'hi',
66: 'nt',
67: ' l',
68: 'se',
69: ' y',
70: 'ri',
71: 'f ',
72: ' I',
73: '! ',
74: 'h ',
75: 'li',
76: 'om',
77: 'yo',
78: 'ro',
79: 'ur',
80: 'de',
81: ' g',
82: 'fo',
83: 'of',
84: 'ra',
85: 'ay',
86: '§BEG§§MENTION§',
87: 'ee',
88: 'ho',
89: ' n',
90: 'el',
91: ' T',
92: 'co',
93: ' r',
94: 'et',
95: 'be',
96: 'ut',
97: ' e',
98: 'ic',
99: '..',
100: 'il',
101: 'la',
102: 'ch',
103: 'ta',
104: 'ow',
105: 'us',
106: 'm ',
107: 'oo',
108: 'ma',
109: 'ot',
110: 'k ',
111: ' S',
112: 'ce',
113: 'wi',
114: 'lo',
115: 'no',
116: 'io',
117: 'u ',
118: 'ca',
119: 'so',
120: 'Th',
121: 'ni',
122: 'un',
123: 'da',
124: 'wa',
125: 'pe',
126: 'w ',
127: 'ad',
128: 'si',
129: 'rs',
130: 'I ',
131: 'ac',
132: 'am',
133: ' A',
134: 'we',
135: 'rt',
136: 'ke',
137: 'ge',
138: 'ie',
139: ' C',
140: 'ig',
141: 'di',
142: 'ss',
143: 'ns',
144: 'ly',
145: 'ol',
146: 'ec',
147: ' M',
148: 'sh',
149: 'gh',
150: 'mo',
151: 'mi',
152: 'na',
153: '!!',
154: 'ai',
155: 'do',
156: 'tr',
157: 'od',
158: 'ir',
159: 'ul',
160: 'id',
161: ' u',
162: ' W',
163: 'ts',
164: ' B',
165: 'po',
166: 'ck',
167: ' ',
168: 'os',
169: "'s",
170: 'vi',
171: 'pr',
172: 'em',
173: 'ry',
174: 'ht',
175: ' H',
176: 'wh',
177: 'ov',
178: '…§END§',
179: 'nc',
180: 'av',
181: 'ia',
182: 'nk',
183: 'im',
184: 'go',
185: 'p ',
186: 'wo',
187: 'op',
188: ' P',
189: '.§END§',
190: 'pl',
191: 'ct',
192: 'tt',
193: 'ld',
194: 'ev',
195: 'bo',
196: ': ',
197: 'fi',
198: ' L',
199: 'ab',
200: 'pa',
201: 'sa',
202: 'my',
203: ' D',
204: 's.',
205: 'e.',
206: ' v',
207: 'iv',
208: ' G',
209: 'gr',
210: 'ci',
211: 'ki',
212: 'rd',
213: ' N',
214: 'su',
215: 'ba',
216: 'fr',
217: ' F',
218: 'ak',
219: 'ag',
220: 'pp',
221: 'fe',
222: '§LINK§ ',
223: 'ew',
224: 'up',
225: 'ap',
226: ' R',
227: 'tu',
228: 'mp',
229: '§TAG§§END§',
230: 'bu',
231: 'bl',
232: '!§END§',
233: 'fa',
234: 'ey',
235: 'ep',
236: 'if',
237: 'ue',
238: 'ks',
239: 'pi',
240: ' k',
241: 't.',
242: 'rn',
243: ' j',
244: 'uc',
245: 'sp',
246: 'ga',
247: 'ex',
248: 'ye',
249: 'ty',
250: ' &',
251: '& ',
252: 'nn',
253: ' O',
254: 'ru',
255: 'gi',
256: ' E',
257: "n'",
258: 'ok',
259: 'rr',
260: ' 1',
261: 'um',
262: 'fu',
263: 'au',
264: 'ei',
265: ' 2',
266: '§BEG§T',
267: '- ',
268: 'Co',
269: 'ys',
270: ' J',
271: ' -',
272: 'oi',
273: 'cr',
274: 'ff',
275: 'ds',
276: '? ',
277: 'ug',
278: 'ls',
279: 'i ',
280: 'bi',
281: 's,',
282: "'t",
283: 'rk',
284: 'br',
285: ' Y',
286: 'y.',
287: "t'",
288: 'by',
289: 'eo',
290: 'Ma',
291: 'ef',
292: 'We',
293: '\n\n',
294: 'oc',
295: '§BEG§I',
296: 'mu',
297: 'Ha',
298: 'n.',
299: 'ny',
300: 'sc',
301: ' "',
302: 'ik',
303: 'rl',
304: 'lu',
305: 's!',
306: 'e,',
307: 'e!',
308: 'rm',
309: 'cl',
310: 'c ',
311: 'eg',
312: 'ud',
313: '’s',
314: 'Wh',
315: 'ju',
316: 'cu',
317: ' \n',
318: 'mm',
319: "I'",
320: 'qu',
321: 'd.',
322: 'gu',
323: 'tc',
324: 'Ca',
325: 'vo',
326: 'va',
327: 'He',
328: '" ',
329: 'A ',
330: ' K',
331: 'Ch',
332: 'Lo',
333: 'Go',
334: 'dr',
335: 'It',
336: 'r.',
337: 'No',
338: 'Yo',
339: 'ah',
340: ' U',
341: 'Re',
342: 'Ho',
343: 'aw',
344: 'So',
345: 'St',
346: ' .',
347: 'tl',
348: 'An',
349: 'mb',
350: 'jo',
351: 'ob',
352: 'lt',
353: 'ua',
354: 'rg',
355: "e'",
356: 't!',
357: ' (',
358: 'og',
359: 'ft',
360: 'To',
361: 'af',
362: ') ',
363: 'ui',
364: 'wn',
365: 'du',
366: 'dy',
367: '§BEG§W',
368: 'pu',
369: '0 ',
370: 'ip',
371: 'ek',
372: 'hr',
373: 'In',
374: '§BEG§A',
375: 'E ',
376: 'kn',
377: 'y,',
378: 'Se',
379: '20',
380: '§BEG§H',
381: 'y!',
382: 'Be',
383: '§BEG§S',
384: 'eb',
385: ' V',
386: 'ub',
387: '§MENTION§§END§',
388: 'oa',
389: 'ib',
390: 'tw',
391: 't,',
392: 'Pr',
393: "'m",
394: 'hu',
395: 'py',
396: 'oy',
397: 'rc',
398: 'T ',
399: 'Tr',
400: 'S ',
401: 'Mo',
402: 'gs',
403: 'Gr',
404: 'nu',
405: 'Ne',
406: 'az',
407: '00',
408: 'g.',
409: 'De',
410: 'Al',
411: 'ph',
412: 'rv',
413: 'Sa',
414: 'Pa',
415: ' ا',
416: 'yi',
417: 'n,',
418: 'Wa',
419: ' q',
420: 'ps',
421: 'Br',
422: 'Do',
423: 'ms',
424: '.\n',
425: 'Da',
426: 'n’',
427: 'Ju',
428: 'ws',
429: 'gn',
430: 'nl',
431: 'oe',
432: ' :',
433: 'lk',
434: 'l.',
435: 'sk',
436: 'Sh',
437: 'pt',
438: 'gl',
439: '… ',
440: 'x ',
441: 'Mi',
442: 'Li',
443: 'ال',
444: '§BEG§M',
445: '\n§LINK§',
446: 'Me',
447: 'hy',
448: 'Le',
449: 'xt',
450: 'n!',
451: '2 ',
452: 'Su',
453: 'sl',
454: 'cc',
455: ' 3',
456: 'La',
457: 'zi',
458: '?§END§',
459: '§BEG§C',
460: 'dd',
461: 'nf',
462: 'Wi',
463: 'Wo',
464: '10',
465: 'o.',
466: '’t',
467: 'rf',
468: 'iz',
469: '§BEG§§TAG§',
470: 'Ba',
471: '01',
472: 't’',
473: '\n§TAG§',
474: 'Fo',
475: 'd,',
476: 'dn',
477: '§BEG§G',
478: 'sm',
479: 'sn',
480: 'My',
481: 'lf',
482: 'Bu',
483: 'Fr',
484: '§BEG§L',
485: 'Am',
486: 'r,',
487: 'Jo',
488: "'r",
489: ' 4',
490: 'fl',
491: 'nv',
492: ' !',
493: 'm.',
494: 'lp',
495: ' “',
496: ':)',
497: 'On',
498: 'xc',
499: 'b ',
500: 'Ro',
501: 'ka',
502: 'ze',
503: 'xp',
504: 'lm',
505: 'ya',
506: 'd!',
507: 'uy',
508: 'Fa',
509: ')§END§',
510: 'aa',
511: 'h.',
512: '§MENTION§.',
513: 'Fi',
514: 'bs',
515: 'IN',
516: '5 ',
517: 'a.',
518: " '",
519: 'e:',
520: "' ",
521: '.§MENTION§',
522: 'Bo',
523: '§BEG§B',
524: 'Bi',
525: 'Di',
526: 'Ge',
527: '” ',
528: '§MENTION§,',
529: ' 5',
530: 'gg',
531: 'Pe',
532: 'Te',
533: 'wr',
534: 't…',
535: '4 ',
536: '1 ',
537: 'Ye',
538: '§MENTION§:',
539: ' x',
540: 'RE',
541: 'TH',
542: '§BEG§O',
543: 'Po',
544: 'dl',
545: "u'",
546: 'I’',
547: 'Y ',
548: 'If',
549: 'Ja',
550: 'r!',
551: 'e§END§',
552: 'D ',
553: '3 ',
554: 'w.',
555: 'Hi',
556: 'o!',
557: 'Pl',
558: 'k.',
559: 'sw',
560: '§BEG§P',
561: 'O ',
562: '§BEG§D',
563: "y'",
564: '§BEG§F',
565: 'je',
566: 'e…',
567: 'Ar',
568: 'N ',
569: '§BEG§N',
570: "'l",
571: 'e?',
572: 'eh',
573: 'C ',
574: ' م',
575: 'sy',
576: 'ER',
577: 'Sp',
578: 'cy',
579: 'lw',
580: 'e’',
581: 'u!',
582: "'v",
583: 'nj',
584: 'pm',
585: 'g!',
586: 's§END§',
587: '😂😂',
588: '§MENTION§!',
589: 'Ta',
590: 's:',
591: 'Na',
592: '§MENTION§…',
593: 'hd',
594: 'rp',
595: 'ن ',
596: '."',
597: 'Si',
598: 'ON',
599: 't?',
600: 'l,',
601: 'R ',
602: 'oh',
603: 'g,',
604: 'o,',
605: '??',
606: 'Ti',
607: 'AN',
608: 'Ra',
609: 'US',
610: '30',
611: 'P ',
612: 'dg',
613: 'ا ',
614: 's?',
615: 'tm',
616: ' @',
617: 'VE',
618: 'rw',
619: 'Mu',
620: 'ST',
621: 'ja',
622: 'M ',
623: 'En',
624: 'e-',
625: 'yt',
626: 'p.',
627: 'h,',
628: 'As',
629: 'a!',
630: '/ ',
631: 's…',
632: ' 6',
633: 'l!',
634: '§BEG§R',
635: 'Fe',
636: 'Vi',
637: 'ES',
638: '§BEG§J',
639: 'Sc',
640: ' ❤️',
641: ' 7',
642: 'Tu',
643: 'Cl',
644: 'a,',
645: 'L ',
646: 'OU',
647: 'Ga',
648: '8 ',
649: ' ب',
650: '6 ',
651: 'z ',
652: ' 😂',
653: 'HE',
654: '§BEG§Y',
655: ' 9',
656: ' 8',
657: 'ix',
658: 'x§END§',
659: 'xx',
660: 'Au',
661: 'Ke',
662: 'AT',
663: '7 ',
664: 'yl',
665: 'Ka',
666: 'rb',
667: 'za',
668: 'Ev',
669: 'Ri',
670: 'xi',
671: 'Un',
672: 'NG',
673: 'h!',
674: 'Ki',
675: ' و',
676: 'w!',
677: 'U ',
678: '!\n',
679: 'Ni',
680: 'EA',
681: 'Is',
682: 'eu',
683: '’m',
684: '§BEG§E',
685: 'u.',
686: 'Bl',
687: 'IS',
688: 'HA',
689: 'G ',
690: 'ة ',
691: 'cs',
692: '@ ',
693: 't§END§',
694: 'w/',
695: 'K ',
696: 'Ou',
697: 'AL',
698: 'TO',
699: '\n§MENTION§',
700: 'lv',
701: '15',
702: '18',
703: 'र ',
704: 'Ex',
705: 'lb',
706: 'hn',
707: 'Ru',
708: '19',
709: 'ر ',
710: 'ym',
711: 'ko',
712: 'ه ',
713: 'RT',
714: 'Cr',
715: '--',
716: 'Je',
717: '\nT',
718: '§BEG§“',
719: '11',
720: 'ي ',
721: ' $',
722: 'Ve',
723: 'v ',
724: 'ان',
725: '12',
726: '§BEG§"',
727: 'NE',
728: 'ky',
729: '§BEG§§LINK§',
730: 'uf',
731: 'tp',
732: 'k!',
733: 'ox',
734: 'hh',
735: 'lc',
736: 'nm',
737: 'LO',
738: 'gy',
739: 'LA',
740: 'OR',
741: 'hl',
742: "§MENTION§'",
743: '17',
744: 'aj',
745: ' Q',
746: 'kl',
747: 'TE',
748: '❤️ ',
749: '“§MENTION§',
750: 'bb',
751: 'LL',
752: 'ax',
753: 'At',
754: 'LI',
755: 'Dr',
756: 'NO',
757: ' X',
758: 'sd',
759: '§TAG§\n',
760: 'OO',
761: 'xa',
762: 'AR',
763: 'o…',
764: 'n…',
765: 'm,',
766: 'yb',
767: '16',
768: 't:',
769: 'k,',
770: 'yn',
771: 'eq',
772: 'IT',
773: 'Hu',
774: '§BEG§.',
775: '50',
776: '’r',
777: 'م ',
778: 'Tw',
779: 'Oh',
780: 'LE',
781: 'ME',
782: '?!',
783: 'DA',
784: '9 ',
785: 'Ce',
786: 'a…',
787: 'y§END§',
788: 'EN',
789: 'y?',
790: 'Gi',
791: 'dm',
792: 'W ',
793: 'dv',
794: 'Cu',
795: 'Ap',
796: 'yr',
797: 'lr',
798: 'ی ',
799: 'm!',
800: 'Of',
801: 'Gu',
802: 'w,',
803: 'AY',
804: 'n§END§',
805: 'Qu',
806: 'Ph',
807: 'r…',
808: 'tn',
809: 'Aw',
810: '% ',
811: 'ND',
812: 'tb',
813: 'RI',
814: 'El',
815: 'OW',
816: 'AS',
817: 'YO',
818: 'EE',
819: 'Pu',
820: 'Ea',
821: 'AM',
822: 'Fu',
823: 'Ac',
824: 'ez',
825: 't-',
826: 'OT',
827: '; ',
828: '😂 ',
829: 'n-',
830: 'n?',
831: 'ل ',
832: 'NY',
833: 'Or',
834: 'د ',
835: 'ت ',
836: 'HI',
837: 'n:',
838: 'MA',
839: 'SA',
840: 'h…',
841: 'ae',
842: 's"',
843: 'hs',
844: 'GO',
845: 'y:',
846: '| ',
847: '14',
848: 'l§END§',
849: '"I',
850: '❤️❤️',
851: 'gt',
852: "r'",
853: 'ET',
854: 'xe',
855: 'Fl',
856: 'IC',
857: 'Pi',
858: 'TI',
859: 'Ab',
860: '\nI',
861: ':\n',
862: 'p…',
863: 'Va',
864: 'o§END§',
865: 'u’',
866: 'CA',
867: ' |',
868: '’ ',
869: '+ ',
870: '§LINK§\n',
871: 'Af',
872: ' ‘',
873: ' ل',
874: 'mn',
875: "'d",
876: 'के ',
877: 'OM',
878: 'ED',
879: 'e"',
880: 'u,',
881: ' z',
882: '-s',
883: 'd§END§',
884: ' §RETWEET§',
885: 'و ',
886: '"T',
887: 'Ci',
888: 'S.',
889: 'zz',
890: '.”',
891: 'Ad',
892: 'SO',
893: 'r-',
894: 'SE',
895: 'zo',
896: 'Du',
897: '/1',
898: 'H ',
899: 'وا',
900: 'RA',
901: ' ک',
902: 'p!',
903: 'i…',
904: 'CH',
905: ' के',
906: 'لا',
907: '\nW',
908: 'DE',
909: "d'",
910: '13',
911: "s'",
912: 'را',
913: 'ار',
914: ' ع',
915: 'f.',
916: 'oj',
917: 'CO',
918: '\nA',
919: 'iu',
920: ' ,',
921: 'o-',
922: 'y’',
923: 'Gl',
924: ' Z',
925: ' ت',
926: 'tf',
927: 'xo',
928: ' ;',
929: 'نا',
930: '’v',
931: ':3',
932: '\nS',
933: 'SS',
934: 'ii',
935: 'IG',
936: ' +',
937: 'WA',
938: '😂§END§',
939: 'V ',
940: 'yp',
941: '1s',
942: "a'",
943: 'IV',
944: 'wl',
945: 'd…',
946: 'c.',
947: 'ww',
948: 'AC',
949: 'r?',
950: 'F ',
951: 'NT',
952: 'B ',
953: '25',
954: 'zy',
955: ' 😘',
956: 'yw',
957: '-t',
958: 'RO',
959: ' 😊',
960: 'TV',
961: 'r:',
962: '❤️§END§',
963: 'HO',
964: ' د',
965: 'PM',
966: 'd:',
967: 'd?',
968: 'MI',
969: '-o',
970: ' ?',
971: 'a§END§',
972: 'NI',
973: 'ij',
974: 'BC',
975: ' ف',
976: 'r§END§',
977: 'ما',
978: 'p,',
979: 'OL',
980: "o'",
981: 'त ',
982: 'nb',
983: '1.',
984: 'لم',
985: '-1',
986: 'yy',
987: 'uu',
988: 'yd',
989: 'CE',
990: 'Im',
991: 'UN',
992: 'ام',
993: '§MENTION§)',
994: 'Lu',
995: 'MO',
996: 'sb',
997: ' 😍',
998: 'UK',
999: ' ن',
...},
'age': {'label_encoder': LabelEncoder(),
'SVM': array([[-0.00677567, -0.00169392, -0.01863308, ..., 0. ,
0. , 0.02168654],
[-0.00254088, -0.00423479, -0.00762263, ..., 0. ,
0. , 0.03802259],
[-0.020327 , 0.01524525, -0.01609221, ..., 0. ,
0. , 0.05151201],
...,
[ 0.00381115, 0.00381115, -0.00508154, ..., 0. ,
0. , -0.01698541],
[-0.00381115, 0.00508154, -0.01397422, ..., 0. ,
0. , -0.00691292],
[-0.03302998, -0.00127038, -0.00635192, ..., 0. ,
0. , -0.06553542]]),
'labels': {1949: {'life_phase': 'retiree',
'acc': 0.105,
'feature_vec': array([-0.00635232, 0.00317616, -0.01016372, ..., 0. ,
0. , 0.05144212])},
1955: {'life_phase': 'old_adult_65',
'acc': 0.004,
'feature_vec': array([ 0.00698755, -0.00762279, -0.00381139, ..., 0. ,
0. , 0.02802543])},
1951: {'life_phase': 'retiree',
'acc': 0.18,
'feature_vec': array([ 0.0019057 , 0.00508186, -0.0019057 , ..., 0. ,
0. , 0.0659627 ])},
1998: {'life_phase': 'child_21',
'acc': 0.197,
'feature_vec': array([-0.01067204, 0.01067204, -0.01067204, ..., 0. ,
0. , -0.02388219])},
1968: {'life_phase': 'old_adult_65',
'acc': 0.176,
'feature_vec': array([-0.00889325, 0.00571709, -0.00889325, ..., 0. ,
0. , 0.11210101])},
1961: {'life_phase': 'old_adult_65',
'acc': 0.057,
'feature_vec': array([ 8.60783881e-16, -4.44662541e-03, -2.54092880e-03, ...,
0.00000000e+00, 0.00000000e+00, 4.87144330e-02])},
1986: {'life_phase': 'young_adult_35',
'acc': 0.075,
'feature_vec': array([-0.00635232, -0.00825802, 0.01079895, ..., 0. ,
0. , 0.16698355])},
1964: {'life_phase': 'old_adult_65',
'acc': 0.024,
'feature_vec': array([-2.56679531e-16, 3.17616101e-03, 6.35232201e-04, ...,
0.00000000e+00, 0.00000000e+00, 1.03128995e-01])},
1965: {'life_phase': 'old_adult_65',
'acc': 0.056,
'feature_vec': array([0.00444663, 0.00381139, 0.00635232, ..., 0. , 0. ,
0.07466856])},
1992: {'life_phase': 'young_adult_35',
'acc': 0.045,
'feature_vec': array([ 0.00571709, -0.00889325, 0.00444663, ..., 0. ,
0. , 0.08777073])},
1971: {'life_phase': 'adult_50',
'acc': 0.057,
'feature_vec': array([ 0.00508186, 0.00063523, -0.00063523, ..., 0. ,
0. , 0.08597588])},
1962: {'life_phase': 'old_adult_65',
'acc': 0.112,
'feature_vec': array([ 0.00254093, -0.00063523, 0.00063523, ..., 0. ,
0. , 0.05681561])},
1994: {'life_phase': 'young_adult_35',
'acc': 0.064,
'feature_vec': array([ 0.00084696, -0.00592871, -0.00508175, ..., 0. ,
0. , 0.03952467])},
1973: {'life_phase': 'adult_50',
'acc': 0.055,
'feature_vec': array([ 0.01206941, -0.01333988, 0.00254093, ..., 0. ,
0. , 0.07283649])},
1959: {'life_phase': 'old_adult_65',
'acc': 0.098,
'feature_vec': array([ 0.00381139, -0.00635232, 0.00698755, ..., 0. ,
0. , 0.10421721])},
2000: {'life_phase': 'child_21',
'acc': 0.113,
'feature_vec': array([-0.004356 , -0.001815 , 0.005445 , ..., -0.000363 , -0.000363 ,
0.1691012])},
1966: {'life_phase': 'old_adult_65',
'acc': 0.22,
'feature_vec': array([ 0.00711469, -0.00508192, 0.00101638, ..., -0.00050819,
-0.00050819, 0.13088236])},
1963: {'life_phase': 'old_adult_65',
'acc': 0.069,
'feature_vec': array([-0.00952848, -0.01143418, -0.00127046, ..., 0. ,
0. , 0.12347707])},
1946: {'life_phase': 'retiree',
'acc': 0.068,
'feature_vec': array([-0.00508186, 0.00444663, -0.01333988, ..., 0. ,
0. , 0.04483507])},
1976: {'life_phase': 'adult_50',
'acc': 0.052,
'feature_vec': array([-0.0019057 , -0.00508186, 0.00571709, ..., 0. ,
0. , 0.05640919])},
1978: {'life_phase': 'adult_50',
'acc': 0.08,
'feature_vec': array([ 0.01270464, -0.00317616, 0.01206941, ..., 0. ,
0. , 0.05276137])},
1989: {'life_phase': 'young_adult_35',
'acc': 0.037,
'feature_vec': array([ 1.27046440e-03, -5.71708981e-03, -1.51392450e-15, ...,
0.00000000e+00, 0.00000000e+00, 6.07830825e-02])},
1944: {'life_phase': 'retiree',
'acc': 0.292,
'feature_vec': array([-0.00677567, -0.00423479, -0.02456179, ..., 0. ,
0. , -0.00494116])},
1952: {'life_phase': 'retiree',
'acc': 0.209,
'feature_vec': array([-0.0066065 , 0.00508192, -0.00101638, ..., 0. ,
0. , 0.07873895])},
1970: {'life_phase': 'adult_50',
'acc': 0.069,
'feature_vec': array([ 0.00571709, 0.0019057 , -0.0019057 , ..., 0. ,
0. , 0.10251276])},
1983: {'life_phase': 'adult_50',
'acc': 0.062,
'feature_vec': array([ 0.00508186, -0.01016372, 0.01016372, ..., -0.00063523,
-0.00063523, 0.04483208])},
1953: {'life_phase': 'retiree',
'acc': 0.144,
'feature_vec': array([-0.00457373, -0.00152458, -0.00152458, ..., 0. ,
0. , 0.08475437])},
1984: {'life_phase': 'young_adult_35',
'acc': 0.007,
'feature_vec': array([ 3.59824797e-16, -1.71512694e-02, 4.44662541e-03, ...,
0.00000000e+00, 0.00000000e+00, 4.24202347e-02])},
1956: {'life_phase': 'old_adult_65',
'acc': 0.061,
'feature_vec': array([-0.00063523, -0.00508186, -0.00381139, ..., 0. ,
0. , 0.08420143])},
1993: {'life_phase': 'young_adult_35',
'acc': 0.065,
'feature_vec': array([-0.00677567, -0.00254088, -0.00254088, ..., 0. ,
0. , -0.00317203])},
1995: {'life_phase': 'young_adult_35',
'acc': 0.075,
'feature_vec': array([0.00254088, 0.00592871, 0.00338783, ..., 0. , 0. ,
0.01353643])},
1960: {'life_phase': 'old_adult_65',
'acc': 0.05,
'feature_vec': array([ 0.00571709, -0.01270464, 0.0019057 , ..., 0. ,
0. , 0.08362305])},
1967: {'life_phase': 'old_adult_65',
'acc': 0.122,
'feature_vec': array([ 0.00304915, -0.00914746, 0.00609831, ..., 0. ,
0. , 0.07726788])},
1943: {'life_phase': 'retiree',
'acc': 0.14,
'feature_vec': array([ 0.01439829, -0.0101635 , 0.01524525, ..., 0. ,
0. , 0.09805152])},
1999: {'life_phase': 'child_21',
'acc': 0.188,
'feature_vec': array([-0.01206941, 0.01079895, -0.01270464, ..., 0. ,
0. , 0.02765458])},
1947: {'life_phase': 'retiree',
'acc': 0.059,
'feature_vec': array([-0.00127046, -0.00508186, -0.00317616, ..., -0.00063523,
-0.00063523, 0.05036254])},
1942: {'life_phase': 'retiree',
'acc': 0.212,
'feature_vec': array([-0.020327 , 0.01524525, -0.01609221, ..., 0. ,
0. , 0.05151201])},
1985: {'life_phase': 'young_adult_35',
'acc': 0.062,
'feature_vec': array([ 0.01016372, -0.00381139, 0.00317616, ..., 0. ,
0. , 0.10488789])},
1940: {'life_phase': 'retiree',
'acc': 0.05,
'feature_vec': array([-0.00677567, -0.00169392, -0.01863308, ..., 0. ,
0. , 0.02168654])},
1987: {'life_phase': 'young_adult_35',
'acc': 0.105,
'feature_vec': array([ 0.00127046, -0.00571709, 0.00635232, ..., 0. ,
0. , 0.08442782])},
2005: {'life_phase': 'child_21',
'acc': 0.066,
'feature_vec': array([ 8.46958352e-04, 7.98090749e-17, 3.38783341e-03, ...,
0.00000000e+00, 0.00000000e+00, -7.74611170e-03])},
1990: {'life_phase': 'young_adult_35',
'acc': 0.045,
'feature_vec': array([ 6.98755421e-03, -8.25801862e-03, -9.09143792e-16, ...,
0.00000000e+00, 0.00000000e+00, 4.62643424e-02])},
1975: {'life_phase': 'adult_50',
'acc': 0.085,
'feature_vec': array([ 0.01333988, -0.02096266, 0.01588081, ..., 0. ,
0. , 0.10714347])},
1974: {'life_phase': 'adult_50',
'acc': 0.099,
'feature_vec': array([ 0.00889325, -0.01079895, 0.01206941, ..., 0. ,
0. , 0.0874392 ])},
2001: {'life_phase': 'child_21',
'acc': 0.052,
'feature_vec': array([-0.00169392, -0.00592871, -0.00592871, ..., 0. ,
0. , 0.0512492 ])},
1982: {'life_phase': 'adult_50',
'acc': 0.083,
'feature_vec': array([ 0.00698755, -0.01651604, -0.00127046, ..., 0. ,
0. , 0.09355242])},
1958: {'life_phase': 'old_adult_65',
'acc': 0.059,
'feature_vec': array([ 0.00508186, 0.00381139, -0.01461034, ..., -0.00063523,
-0.00063523, 0.04135539])},
1991: {'life_phase': 'young_adult_35',
'acc': 0.014,
'feature_vec': array([ 0.00846958, -0.00508175, 0.00931654, ..., 0. ,
0. , 0.05434161])},
1988: {'life_phase': 'young_adult_35',
'acc': 0.138,
'feature_vec': array([ 0.00571709, -0.00444663, 0.00508186, ..., 0. ,
0. , 0.07336316])},
1948: {'life_phase': 'retiree',
'acc': 0.148,
'feature_vec': array([-8.25801862e-03, -9.18500139e-16, -6.35232201e-03, ...,
0.00000000e+00, 0.00000000e+00, 5.53974568e-02])},
1972: {'life_phase': 'adult_50',
'acc': 0.026,
'feature_vec': array([-0.00825802, -0.00254093, -0.00508186, ..., 0. ,
0. , 0.07007081])},
1957: {'life_phase': 'old_adult_65',
'acc': 0.07,
'feature_vec': array([ 0.00169392, 0.00169392, -0.00084696, ..., 0. ,
0. , 0.03138269])},
1950: {'life_phase': 'retiree',
'acc': 0.074,
'feature_vec': array([-0.00050819, -0.01016384, 0.00050819, ..., -0.00050819,
-0.00050819, 0.11148456])},
1969: {'life_phase': 'adult_50',
'acc': 0.078,
'feature_vec': array([0.00063523, 0.00508186, 0.00254093, ..., 0. , 0. ,
0.07676629])},
1981: {'life_phase': 'adult_50',
'acc': 0.049,
'feature_vec': array([-0.00063523, -0.00825802, -0.00444663, ..., 0.00254093,
0.00254093, 0.11222584])},
1980: {'life_phase': 'adult_50',
'acc': 0.1,
'feature_vec': array([-0.00444663, 0.00063523, 0.00571709, ..., 0. ,
0. , 0.07988725])},
1941: {'life_phase': 'retiree',
'acc': 0.17,
'feature_vec': array([-0.00254088, -0.00423479, -0.00762263, ..., 0. ,
0. , 0.03802259])},
1979: {'life_phase': 'adult_50',
'acc': 0.053,
'feature_vec': array([ 0.00508186, -0.01206941, 0.00508186, ..., 0. ,
0. , 0.08692841])},
1977: {'life_phase': 'adult_50',
'acc': 0.098,
'feature_vec': array([-0.0019057 , -0.00571709, 0.00571709, ..., 0. ,
0. , 0.28550873])},
2003: {'life_phase': 'child_21',
'acc': 0.048,
'feature_vec': array([ 0.00254088, -0.00338783, -0.00169392, ..., 0. ,
0. , 0.01480017])},
1954: {'life_phase': 'old_adult_65',
'acc': 0.1,
'feature_vec': array([-0.00952848, 0.00381139, -0.01905697, ..., 0. ,
0. , 0.02893737])},
2002: {'life_phase': 'child_21',
'acc': 0.036,
'feature_vec': array([-0.01778613, 0.00423479, -0.00762263, ..., 0. ,
0. , 0.03892273])},
1945: {'life_phase': 'retiree',
'acc': 0.085,
'feature_vec': array([-0.01185742, 0.00338783, -0.01185742, ..., 0. ,
0. , 0.01294017])},
1996: {'life_phase': 'young_adult_35',
'acc': 0.073,
'feature_vec': array([-0.00338783, -0.00169392, -0.00592871, ..., 0. ,
0. , 0.0522575 ])},
2008: {'life_phase': 'child_21',
'acc': 0.39,
'feature_vec': array([-0.03302998, -0.00127038, -0.00635192, ..., 0. ,
0. , -0.06553542])},
1997: {'life_phase': 'young_adult_35',
'acc': 0.112,
'feature_vec': array([-0.00508175, -0.00338783, 0.00338783, ..., 0. ,
0. , 0.01308483])},
2004: {'life_phase': 'child_21',
'acc': 0.099,
'feature_vec': array([-0.00423479, 0.00592871, -0.00423479, ..., 0. ,
0. , -0.00660619])},
2006: {'life_phase': 'child_21',
'acc': 0.01,
'feature_vec': array([ 0.00381115, 0.00381115, -0.00508154, ..., 0. ,
0. , -0.01698541])},
2007: {'life_phase': 'child_21',
'acc': 0.02,
'feature_vec': array([-0.00381115, 0.00508154, -0.01397422, ..., 0. ,
0. , -0.00691292])}},
'acc': 0.099,
'prec': 0.154,
'rec': 0.099,
'f1': 0.098},
'gender': {'label_encoder': LabelEncoder(),
'SVM': array([[ 0.02185282, 0.05818949, -0.00635256, ..., 0.00254103,
0.00254103, -0.23526192]]),
'labels': {'female': {'acc': 0.621},
'male': {'acc': 0.607,
'feature_vec': array([ 0.02185282, 0.05818949, -0.00635256, ..., 0.00254103,
0.00254103, -0.23526192])}},
'acc': 0.615,
'prec': 0.615,
'rec': 0.615,
'f1': 0.615},
'author': {'label_encoder': LabelEncoder(),
'SVM': array([[-0.00592871, -0.00169392, -0.00338783, ..., 0. ,
0. , -0.01790733],
[-0.01143345, -0.00635192, 0.00508154, ..., 0. ,
0. , -0.03942268],
[-0.01397422, 0.01397422, 0.01524461, ..., 0. ,
0. , -0.03780535],
...,
[-0.00635192, -0.0076223 , 0.00127038, ..., 0. ,
0. , -0.02545112],
[-0.0076223 , 0.00127038, 0.00889269, ..., 0. ,
0. , -0.02380585],
[-0.0076223 , -0.0076223 , -0.01016307, ..., 0. ,
0. , -0.01591359]]),
'labels': {14237: {'gender': 'female',
'age': 1949,
'life_phase': 'retiree',
'acc': 0.409,
'feature_vec': array([ 0.01143345, 0.01524461, -0.01651499, ..., 0. ,
0. , -0.04926053])},
39182: {'gender': 'male',
'age': 1955,
'life_phase': 'old_adult_65',
'acc': 0.27,
'feature_vec': array([ 0.00254088, -0.00084696, -0.00084696, ..., 0. ,
0. , -0.03016883])},
12093: {'gender': 'male',
'age': 1951,
'life_phase': 'retiree',
'acc': 0.04,
'feature_vec': array([-0.00677567, 0.00423479, -0.00846958, ..., 0. ,
0. , -0.01431766])},
40578: {'gender': 'female',
'age': 1998,
'life_phase': 'child_21',
'acc': 0.498,
'feature_vec': array([ 0.00127038, -0.01016307, 0.0076223 , ..., 0. ,
0. , -0.01865152])},
9869: {'gender': 'male',
'age': 1968,
'life_phase': 'old_adult_65',
'acc': 0.027,
'feature_vec': array([-0.0101635 , -0.00084696, 0.00084696, ..., 0. ,
0. , -0.00631484])},
23699: {'gender': 'male',
'age': 1961,
'life_phase': 'old_adult_65',
'acc': 0.017,
'feature_vec': array([-0.00084696, 0.00931654, -0.00592871, ..., 0. ,
0. , -0.02485619])},
18614: {'gender': 'female',
'age': 1986,
'life_phase': 'young_adult_35',
'acc': 0.007,
'feature_vec': array([-0.00254088, 0.00254088, 0.00762263, ..., 0. ,
0. , -0.00865134])},
6953: {'gender': 'female',
'age': 1964,
'life_phase': 'old_adult_65',
'acc': 0.304,
'feature_vec': array([-0.0101635 , 0.00338783, -0.01101046, ..., -0.00084696,
-0.00084696, 0.00897039])},
23655: {'gender': 'female',
'age': 1965,
'life_phase': 'old_adult_65',
'acc': 0.02,
'feature_vec': array([-0.00084696, -0.00169392, -0.00423479, ..., 0. ,
0. , -0.01075976])},
39735: {'gender': 'male',
'age': 1992,
'life_phase': 'young_adult_35',
'acc': 0.073,
'feature_vec': array([-0.00084696, -0.00084696, 0.00846958, ..., 0. ,
0. , -0.00993448])},