ASCII code

ASCII code
http://mathcs.emory.edu/~cheung/Courses/170/Syllabus/05/char1.html

def orc(file_name):
____file_object = open(file_name)
____while True:
________paragraph = file_object.readline()
________if paragraph == “”:
____________#print(“EOF has been reached”)
____________return file_object.close()
________else:
____________print(paragraph)

>>> orc(“temp.txt”)

The Art of Cheesemaking

The Art of Cheesemaking
University of Wisconsin Extension
2011

NLTK word_tokenize()

cell_structureNLTK word_tokenize()
http://www.nltk.org/book/ch03.html#tokenization_index_term

>>>from nltk import word_tokenize

>>>word_tokenize(‘cells’ structure’)
[‘cells‘, ‘structure’]

>>with open(‘h.txt’) as f:
____word_tokenize(f.read())

Source code for nltk.collocations()
http://www.nltk.org/api/nltk.html
http://www.nltk.org/_modules/nltk/collocations.html

>>> dir(nltk.collocations)

>>> print(nltk.collocations.__doc__)

>>> with open(‘Histology14_Ch01_ALL.txt’) as f:
____nltk.Text(word_tokenize(f.read())).collocations()

cell nuclei;
electron microscopy;
fluorescent compounds;
glass slides;
gold particles;
labeled secondary; <=======
light microscope;
light microscopy;
MEDICAL APPLICATION;
nucleic acids;
objective lens;
organic solvents;
primary antibody;
resolving power;
secondary antibody;
secretory granules;
situ hybridization; <=======
tissue components;
tissue section;
tissue sections

>> with open(‘Histology14_Ch01_i.txt‘) as f:
nltk.Text(word_tokenize(f.read())).collocations()

matrix components;
tissue biology

 

Naturally, the quality of the collocations is also higher than computer-generated lists – as we would expect from a manually produced compilation.
p. 174
http://nlp.stanford.edu/fsnlp/promo/colloc.pdf
phrasal verbs: good example of a collocation with often non-adjacent words

related:
Collocations dictionary
https://franzcalvo.wordpress.com/2015/09/07/collocations

 

 

SQLite

SQLite
http://www.sqlite.org

https://docs.python.org/3.5/library/sqlite3.html

import sqlite3

conn = sqlite3.connect(’emaildb.sqlite’)
cur = conn.cursor()

cur.execute(”’DROP TABLE IF EXISTS Counts”’)

cur.execute(”’CREATE TABLE Counts (org TEXT, count INTEGER)”’)

fname = input(‘Enter file name: ‘)
if ( len(fname) < 1 ) : fname = ‘mbox.txt’
fh = open(fname)
for line in fh:
____if not line.startswith(‘From: ‘) : continue
pieces = line.split()
email = pieces[1]
organization = email.split(‘@’)[1]
cur.execute(‘SELECT count FROM Counts WHERE org = ? ‘, (organization, ))
row = cur.fetchone()
if row is None:
____cur.execute(”’INSERT INTO Counts (org, count)
____VALUES ( ?, 1 )”’, ( organization, ) )
else :
____cur.execute(‘UPDATE Counts SET count=count+1 WHERE org = ?’,
(organization, ))
print(organization)
conn.commit()

sqlstr = ‘SELECT org, count FROM Counts ORDER BY count DESC LIMIT 10’

for row in cur.execute(sqlstr) :
____print(str(row[0]), row[1])

cur.close()