Commit 78504446 authored by Marcel Henrik Schubert's avatar Marcel Henrik Schubert
Browse files

fixed count grid search

parent eaa2cad5
......@@ -307,10 +307,10 @@ y_list = [['author', y_train_author], ['gender', y_train_gender], ['age', y_trai
if not os.path.exists(direct+subdir+'/'+'split_data'+'/{}_{}_sparse_train_count.jlib'.format(subset, size)):
vectorizer = CountVectorizer(tokenizer = identity_tokenizer, vocabulary = vocab, lowercase=False)
train_set = vectorizer.transform(train_set)
tf = TfidfTransformer(use_idf = False)
train_set = tf.fit_transform(train_set)
#tf = TfidfTransformer(use_idf = False)
#train_set = tf.fit_transform(train_set)
test_set = vectorizer.transform(test_set)
test_set = tf.transform(test_set)
#test_set = tf.transform(test_set)
col_train = np.array(tweet_train).reshape(-1,1)
col_test = np.array(tweet_test).reshape(-1,1)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment