From cb45f5838ced660e6e3f8de2db6d358006db5370 Mon Sep 17 00:00:00 2001 From: namhi89 Date: Wed, 19 Oct 2016 11:02:14 -0400 Subject: [PATCH 1/2] Week 4 first submission by hienhpss --- week0004/hienhpss/python/event1.input | 5 +++ week0004/hienhpss/python/event2.input | 5 +++ week0004/hienhpss/python/guests.output | 2 + week0004/hienhpss/python/week0004.py | 58 ++++++++++++++++++++++++++ 4 files changed, 70 insertions(+) create mode 100644 week0004/hienhpss/python/event1.input create mode 100644 week0004/hienhpss/python/event2.input create mode 100644 week0004/hienhpss/python/guests.output create mode 100644 week0004/hienhpss/python/week0004.py diff --git a/week0004/hienhpss/python/event1.input b/week0004/hienhpss/python/event1.input new file mode 100644 index 0000000..9e3aae3 --- /dev/null +++ b/week0004/hienhpss/python/event1.input @@ -0,0 +1,5 @@ +fname,lname,email +Bill,Gates,bill@xyz.com +Alice,Wondergirl,alice@wonderland.net +Julius,Caesar,caesar@rome.com +Bob,Dylan,bob.dylan@abc.com \ No newline at end of file diff --git a/week0004/hienhpss/python/event2.input b/week0004/hienhpss/python/event2.input new file mode 100644 index 0000000..f979502 --- /dev/null +++ b/week0004/hienhpss/python/event2.input @@ -0,0 +1,5 @@ +fname,lname,email +Mike,Tyson,boxer@legend.com +Bob,Dylan,bob.dylan@abc.com +Neo,Anderson,neo@thematrix.net +Bill,Gates,bill@xyz.com \ No newline at end of file diff --git a/week0004/hienhpss/python/guests.output b/week0004/hienhpss/python/guests.output new file mode 100644 index 0000000..59680e2 --- /dev/null +++ b/week0004/hienhpss/python/guests.output @@ -0,0 +1,2 @@ +Bill Gates +Bob Dylan diff --git a/week0004/hienhpss/python/week0004.py b/week0004/hienhpss/python/week0004.py new file mode 100644 index 0000000..5ef7a50 --- /dev/null +++ b/week0004/hienhpss/python/week0004.py @@ -0,0 +1,58 @@ +import csv +from csv import Dialect +import sys +from hashlib import md5 + +def read_csv(filename): + '''Reader csv files with header. General function that can be reused''' + with open(filename, newline='') as csv_file: + # Read the header from first line + header = csv_file.readline().rstrip().split(',') + # Read the csv using the header obtained above + csv_reader = csv.DictReader(csv_file, delimiter = ',', fieldnames = header) + for row in csv_reader: + yield(row) + +def generate_md5(*args): + '''Generate md5 from list of strings. General function that can be reused''' + m = md5() + for i in args: + # Encode first + i_enc = i.encode('utf-8') + m.update(i_enc) + return m.digest() + +def week4_csv_to_dict(csv_rows): + '''Convert an iterator of rows into dictionary + with key as hash of the whole row. This function is not generic + and can be used for week0004 practice only''' + result = dict() + for row in csv_rows: + md5_email = generate_md5(row['email']) + #only add into dict if email is not used. Skip those duplicate emails + if not md5_email in list(result.keys()): + result[md5_email] = row + return result + + +def week4_match_sources(): + '''Match the 2 input file and return the people + who subscribe to both''' + file1 = sys.argv[1] + file2 = sys.argv[2] + source1 = week4_csv_to_dict(read_csv(file1)) + source2 = week4_csv_to_dict(read_csv(file2)) + for key in set(source1.keys()): + if key in set(source2.keys()): + if source1[key]['fname'] == source2[key]['fname'] and source1[key]['lname'] == source2[key]['lname']: + yield(source1[key]) + +def week4_output(output_file='guests.output'): + '''Outout list of duplicate guests to file''' + f = open(output_file,'w') + for person in week4_match_sources(): + print('{:s} {:s} <{:s}>'.format(str(person['fname']),str(person['lname']),str(person['email']))) + f.close() + +if __name__ == "__main__": + week4_output('guest.output') \ No newline at end of file From 2fcc2477f803f0eca83b72fdc78f12a3bd33f322 Mon Sep 17 00:00:00 2001 From: namhi89 Date: Wed, 19 Oct 2016 11:08:19 -0400 Subject: [PATCH 2/2] Update 1 --- week0004/hienhpss/python/week0004.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/week0004/hienhpss/python/week0004.py b/week0004/hienhpss/python/week0004.py index 5ef7a50..f99ac9a 100644 --- a/week0004/hienhpss/python/week0004.py +++ b/week0004/hienhpss/python/week0004.py @@ -47,12 +47,10 @@ def week4_match_sources(): if source1[key]['fname'] == source2[key]['fname'] and source1[key]['lname'] == source2[key]['lname']: yield(source1[key]) -def week4_output(output_file='guests.output'): +def week4_output(): '''Outout list of duplicate guests to file''' - f = open(output_file,'w') for person in week4_match_sources(): print('{:s} {:s} <{:s}>'.format(str(person['fname']),str(person['lname']),str(person['email']))) - f.close() if __name__ == "__main__": - week4_output('guest.output') \ No newline at end of file + week4_output() \ No newline at end of file