From 40b8b8c6bb549ede5379d4d5e198fcb39f6250de Mon Sep 17 00:00:00 2001 From: Saurabh Minni Date: Fri, 17 Jun 2016 19:57:33 +0530 Subject: [PATCH] First commit of the code which does the basic work --- README.md | 15 ++++++++++- slack_json_to_csv.py | 60 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 slack_json_to_csv.py diff --git a/README.md b/README.md index 75e3423..78e4db2 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,15 @@ # Slack-Export-JSON-to-CSV -Convert Slack messages exported in their complicated JSON format to simple CSV format +Convert Slack messages exported in their complicated JSON format to simple CSV formatpa + +To run the application use the following command + +```shell +python slack_json_to_csv.py folder_of_channel_to_export path_to_slack_users.json path_to_output_slack_messages.csv + +``` + +eg. + +```shell +python slack_json_to_csv.py slack_export/channelA slack_export/users.json output.csv +``` diff --git a/slack_json_to_csv.py b/slack_json_to_csv.py new file mode 100644 index 0000000..7226f63 --- /dev/null +++ b/slack_json_to_csv.py @@ -0,0 +1,60 @@ +import json, sys +import os, csv +from pprint import pprint +import re +from datetime import datetime + + +def handle_annotated_mention(matchobj): + return "@{}".format((matchobj.group(0)[2:-1]).split("|")[1]) + +def handle_mention(matchobj): + global user + print(user[matchobj.group(0)[2:-1]][0]) + return "@{}".format(user[matchobj.group(0)[2:-1]][0]) + + +def transform_text(text): + text = text.replace("", "@channel") + text = text.replace(">", ">") + text = text.replace("&", "&") + # Handle "<@U0BM1CGQY|the100rabh> has joined the channel" + text = re.compile("<@U\w+\|[A-Za-z0-9.-_]+>").sub(handle_annotated_mention, text) + text = re.compile("<@U\w+>").sub(handle_mention, text) + return text + +jsondir = sys.argv[1] +userjson = sys.argv[2] +outcsv_file = sys.argv[3] + +content_list = [] +userlist = [] +f = open(outcsv_file, 'w') +user = {}; +with open(userjson) as user_data: + userlist = json.load(user_data) + for userdata in userlist: + userid = userdata["id"] + if "real_name" in userdata and userdata["real_name"]: + realname = userdata["real_name"] + if not re.match('.*[a-zA-Z].*', realname) : + realname = userdata["name"] + else: + realname = userdata["name"] + print(realname) + user[userid] = [realname] +csvwriter = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) +for content in os.listdir(jsondir): + content_list.append(content) + with open(jsondir + '/' + content) as data_file: + data = json.load(data_file) + for item in data: + if item["type"] == "message" : + if item["text"].find("> has joined the channel") == -1: + user_cur = user[item["user"]] + ts = datetime.utcfromtimestamp(float(item['ts'])) + time = ts.strftime("%Y-%m-%d %H:%M:%S") + item["text"] = transform_text(item["text"]) + csvwriter.writerow([time.encode('utf-8'),item['text'].encode('utf-8'),user_cur[0].encode('utf-8')]) + +f.close()