Skip to content

Commit

Permalink
Merge pull request #6 from silshack/gh-pages
Browse files Browse the repository at this point in the history
merge request
  • Loading branch information
erholmes committed Sep 30, 2013
2 parents 526ffc8 + 8b0ed06 commit 3726ed6
Show file tree
Hide file tree
Showing 127 changed files with 118,925 additions and 18,802 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1 @@
site*
_site/*
20 changes: 20 additions & 0 deletions LICENSE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
The MIT License (MIT)

Copyright (c) 2013 Elliott Hauser

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
SILShack, Fall 2013
===================

This is the source code for the collaboratively edited blog/site for the Fall 2013 session of UNC INLS 560, Programming for Information Professionals.

Expand Down
22 changes: 22 additions & 0 deletions _includes/add_names.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
def add_names(list_of_names, file):
"""
Opens and adds a list of names to the end of a file, each on its own line
"""
# We open a file in 'a' mode, for appending to it.
names_file = open(file, 'a')

# For each line in the list, we print that to the file.
# This assumes one file per line.
for name in list_of_names:
print >> names_file, name

# Close the file so the changes are visible.
names_file.close()


# Exercise: make new_names customizible:
new_names = ['John', 'Sarah', 'Taj']

# Exercise: make the file name used here customizible:
add_names(new_names, 'names.txt')

1 change: 1 addition & 0 deletions _includes/gerbal/execise1.txt

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions _includes/gerbal/execise2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
There are 4311 unique words in this work
21 changes: 21 additions & 0 deletions _includes/gerbal/execise3.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
The twenty most common terms in this work are:
('the', 1574)
('and', 1061)
('a', 705)
('to', 679)
('of', 662)
('in', 520)
('it', 516)
('he', 485)
('was', 427)
('his', 420)
('that', 338)
('i', 337)
('scrooge', 314)
('with', 269)
('you', 233)
('as', 228)
('said', 221)
('had', 205)
('him', 198)
('for', 197)
1 change: 1 addition & 0 deletions _includes/gerbal/execise4.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
['', 'neighbours', 'jacob', 'warnt', 'poulterers', 'laundress', 'mr', 'execrable', 'endeavoured', 'dunstan', 'london', 'ebenezer', 'damascus', 'accusatory', 'recognising', 'crusoe', 'hadnt', 'waistcoats', 'conducive', 'everyhow', 'ebook', 'ironmongery', 'whomsoever', 'o', 'mens', 'reverently', 'behindhand', 'recognised', 'abels', 'iv', 'ii', 'im', 'youre', 'jose', 'schoolmaster', 'countrys', 'theyre', 'christmas', 'dilber', 'recollect', 'waistcoat', 'petrification', 'outstretched', 'undisturbed', 'blindmans', 'laocon', 'd', 'elses', 'munifi', 'wouldnt', 'december', 'sunday', 'frousy', 'marleys', 'browed', 'drowsiness', 'undigested', 'pastrycooks', 'spanish', 'unavailing', 'solemnised', 'despairingly', 'caroline', 'unaltered', 'scrutinise', 'lifes', 'flutterings', 'imploringly', 'foldings', 'defenceless', 'fezziwig', 'youd', 'pleasantry', 'endeavouring', 'hed', 'scro', 'broadwise', 'curiously', 'administered', 'menendez', 'recognise', 'ali', 'childs', 'overflowings', 'christian', '1843', 'fragrance', 'miserys', 'der', 'v', 'almshouse', 'robinson', 'shouldnt', 'a', 'isnt', 'norfolk', 'yo', 'fezziwigs', 'excrescence', 'robert', 'friday', 'dont', 'cornhill', 'rustlings', 'fred', 'i', 'wasnt', 'unconstrained', 'charitable', 'undertakers', 'weathercock', 'laundresss', 'belshazzars', 'c', 'coverley', 'hilli', 'whos', 'camden', 'ful', 'cratchits', 'belinda', 'tim', 'endeavour', 'passionless', 'wilkins', 'irresistibly', 'saturdays', 'penetrated', 'instalments', 'mrs', 'neighbouring', 'lamplighter', 'cratchit', 'oclock', 'skreeks', 'havent', 'tims', 'thats', 'wailings', 'lamplight', 'iii', 'didnt', 'orson', 'plentys', 'childrens', 'whitechapel', 'abrahams', 'marley', 'residuary', 'delicately', 'oge', 'uncurtained', 'uncared', 'martha', 'sheba', 'fruiterers', 'monstrous', 'youll', 'severally', 'observable', 'thankee', 'theyve', 'britain', 'ive', 'repleted', 'charles', 'pauls', 'hasnt', 'sprinklings', 'delightful', 'gutenberg', 'submissively', 'theyd', 'couldnt', 'wheresoever', 'demeanour', 'inasmuch', 'shufflings', 'shant', 'dowerless', 'monday', 'unwatched']
102 changes: 102 additions & 0 deletions _includes/gerbal/extra1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import re,string,operator,os

def process_file(filename, guten):
hist = dict()
fp = open(filename)
if guten:
header = True
if not guten:
header = False
for line in fp:
if line[:20] == "*** END OF THIS PROJ": # There must be a better way to escape the header and footer
header = True
if not header:
process_line(line, hist)
if line[:20] == "*END*THE SMALL PRINT" or line[:20]=="*** START OF THIS PR": #this is only for the shakespeares folios "00ws110.tt"
header = False
#print "header escaped" #woo, debugging
return hist

def process_line(line, hist):
line = line.replace('-', ' ') #clean hyphenated words

for word in line.split(): #re.split('[\W_]+', line) #could do the split with regex, but regex is magic and doesn't strip punctuation quite as nicely
word = word.strip(string.punctuation + string.whitespace)
word = word.lower()
word = re.sub('[\W_]+','', word) #this regular expression should get rid of the few special iso characters not in string.punctuation

hist[word] = hist.get(word, 0) + 1

def top_20(hist):
hist_sorted = sorted(hist.iteritems(), key=operator.itemgetter(1), reverse=True) #according to stackexchange this is a really fast way to sort a dicitonary
output ="The twenty most common terms in this work are:\n"
for i in range(0,20):
output += str(hist_sorted[i]) +"\n"
return output

def compare_lists(list1, list2):
new_list = dict()
for word1 in list1:
new_list[word1] = True
#print word1
if list2.has_key(word1):
new_list[word1] = False
return new_list

def return_true_words(dic):
output_list = list()
for i in dic:
#print i
if dic[i]:
output_list.append(i)
return output_list

def exercise1(book):
'''
proccess_file() got rather complicated and too sofisticated for this problem
So we're just going to rewrite it here. We could define a new method,
but my preference is to only do that if we need to solve the same problem 3 or more times
'''
fp = open(book)
longstring = str()
for line in fp:
line = line.replace('-', ' ')
for word in line.split():
word = word.strip(string.punctuation + string.whitespace)
word = word.lower()
word = re.sub('[\W_]+','', word) #regular expressions clean up wierd characters not included in string.punctuation
longstring = longstring + word + " "
return longstring

def exercise2(book):
hist = process_file(book, True)
return "There are %d unique words in this work" %len(hist)

def exercise3(book):
hist = process_file(book, True)
output = top_20(hist)
return output

def exercise4(book):
output_list = return_true_words(compare_lists(
process_file(book, True), process_file("words.txt", False)))
return output_list #long

def writeitallout():
for i in range(1,5):
output = open("execise%d.txt" % i, "w")
method_name = "exercise%d" % i #because writing four method names is hard

outtext = eval(method_name) #eval() evaluates a string as python code
'''
eval() is kind of dangerous and has the potential to make it much easier
to excecute malicious, obfuscated code, but it works in this case.
I suppose this makes this bad code. The other methods I tried to solve this
problem did not work nearly as well.
'''
print "Writing exercise %d to file exercise%d.txt" % (i,i)
output.write(str(outtext('pg46.txt')))
output.close()

writeitallout()

Loading

0 comments on commit 3726ed6

Please sign in to comment.