import sqlite3 import time import zlib import string conn = sqlite3.connect('index.sqlite') cur = conn.cursor() cur.execute('SELECT id, subject FROM Subjects') subjects = dict() for message_row in cur : subjects[message_row[0]] = message_row[1] # cur.execute('SELECT id, guid,sender_id,subject_id,headers,body FROM Messages') cur.execute('SELECT subject_id FROM Messages') counts = dict() for message_row in cur : text = subjects[message_row[0]] text = text.translate(str.maketrans('','',string.punctuation)) text = text.translate(str.maketrans('','','1234567890')) text = text.strip() text = text.lower() words = text.split() for word in words: if len(word) < 4 : continue counts[word] = counts.get(word,0) + 1 x = sorted(counts, key=counts.get, reverse=True) highest = None lowest = None for k in x[:100]: if highest is None or highest < counts[k] : highest = counts[k] if lowest is None or lowest > counts[k] : lowest = counts[k] print('Zakres częstości:',highest,lowest) # Rozmiary czcionek w zakresie 20-100 w zależności od częstości # Spread the font sizes across 20-100 based on the count bigsize = 80 smallsize = 20 fhand = open('gword.js','w') fhand.write("gword = [") first = True for k in x[:100]: if not first : fhand.write( ",\n") first = False size = counts[k] size = (size - lowest) / float(highest - lowest) size = int((size * bigsize) + smallsize) fhand.write("{text: '"+k+"', size: "+str(size)+"}") fhand.write( "\n];\n") fhand.close() print("Wynik zapisano w gword.js") print("Otwórz gword.htm w przeglądarce internetowej by zobaczyć wizualizcję")