Commit c44bc7f0 authored by Marcel Henrik Schubert's avatar Marcel Henrik Schubert
Browse files

fixed preprocess

parent f3ec5c57
......@@ -56,14 +56,15 @@ def get_linebytes(path, filename, test = False):
nextLineByte = f.tell() # returns the location of the next line
if test:
if i == 3:
i += 1
if i == 1:
break
i+=1
if not line or line == '':
break
with open(os.path.join(path, "linebytes.json"), 'w') as p:
json.dump(linebytes, p)
if not test:
with open(os.path.join(path, "linebytes.json"), 'w') as p:
json.dump(linebytes, p)
return linebytes
......@@ -625,19 +626,33 @@ def listener(doInt, ranges, q, savepath, rerun=False, num_to_process =0, o=None)
gotten = 0
proc = {}
id_byte = {}
while True:
#print('waiting for item from queue...', file=o)
#sys.stdout.flush()
#get from correct queue
m = q[doInt].get() ##Q:
#if we have the kill command from main we end the loop and close the file
print('Gotten el {} from queue'.format(type(m[0])), file=o)
#if we have the kill command from main we end the loop (if queue is empty) otherwise work till empty and close the file
if m[0] == 'kill':
print('is kill', file=o)
if doInt != 0:
q[doInt-1].put(m)
break
empty = q[doInt].empty()
print('Queue is empty: {}'.format(empty), file=o)
if empty:
print('is kill', file=o)
if doInt !=0:
q[doInt-1].put(m)
break
else:
print('is kill alternative', file=o)
q[doInt].put(m)
print('put kill command back to queue', file=o)
continue
#while not q[doInt].empty():
# m = q[doInt].get()
# print('Gotten el {} for id {} from queue'.format(m[2], m[4]), file=o)
# print(q[doInt].empty())
##this is for write results to file
if m[2] != 'PROCESS':
......@@ -666,7 +681,7 @@ def listener(doInt, ranges, q, savepath, rerun=False, num_to_process =0, o=None)
#print('wrote item to file ID: {}, tweetID {}, type: {} ngrams: {}'.format(m[0], m[4], m[2], m[3])
# , file=o)
#put the id into the processed queue
q[0].put((m[0], '', 'PROCESS', str(m[2]) + str(m[3]), m[4]))
q[0].put((m[0], '', 'PROCESS', str(m[2]) +'_' + str(m[3]), m[4]))
del res, m
#here we check whether all has been processed per ID, and if so, we put the ID into the processed file, so that we know all is done here
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment