Skip to content
This repository has been archived by the owner on Mar 30, 2023. It is now read-only.

Commit

Permalink
Updated storing structures
Browse files Browse the repository at this point in the history
  • Loading branch information
pielco11 committed Aug 12, 2019
1 parent 894655f commit 0709e14
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 11 deletions.
23 changes: 21 additions & 2 deletions twint/storage/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,27 @@ def init(db):
CREATE TABLE IF NOT EXISTS
retweets(
user_id integer not null,
username text not null,
tweet_id integer not null,
retweet_id integer not null,
CONSTRAINT retweets_pk PRIMARY KEY(user_id, tweet_id),
CONSTRAINT user_id_fk FOREIGN KEY(user_id) REFERENCES users(id),
CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
);
"""
cursor.execute(table_retweets)

table_reply_to = """
CREATE TABLE IF NOT EXISTS
replies(
tweet_id integer not null,
user_id integer not null,
username text not null,
CONSTRAINT replies_pk PRIMARY KEY (user_id, tweet_id),
CONSTRAINT tweet_id_fk FOREIGN KEY (tweet_id) REFERENCES tweets(id)
);
"""
cursor.execute(table_reply_to)

table_favorites = """
CREATE TABLE IF NOT EXISTS
Expand Down Expand Up @@ -256,8 +270,13 @@ def tweets(conn, Tweet, config):
cursor.execute(query, (config.User_id, Tweet.id))

if Tweet.retweet:
query = 'INSERT INTO retweets VALUES(?,?)'
cursor.execute(query, (config.User_id, Tweet.id))
query = 'INSERT INTO retweets VALUES(?,?,?,?)'
cursor.execute(query, (int(Tweet.user_rt_id), Tweet.user_rt, Tweet.id, int(Tweet.retweet_id)))

if Tweet.reply_to:
for reply in Tweet.reply_to:
query = 'INSERT INTO replies VALUES(?,?,?)'
cursor.execute(query, (Tweet.id, int(reply['user_id']), reply['username']))

conn.commit()
except sqlite3.IntegrityError:
Expand Down
13 changes: 9 additions & 4 deletions twint/storage/elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ def createIndex(config, instance, **scope):
"tweet": {"type": "text"},
"hashtags": {"type": "keyword"},
"cashtags": {"type": "keyword"},
"user_id": {"type": "long"},
"user_id_str": {"type": "keyword"},
"username": {"type": "keyword"},
"name": {"type": "text"},
Expand All @@ -86,9 +85,12 @@ def createIndex(config, instance, **scope):
"geo_near": {"type": "geo_point"},
"geo_tweet": {"type": "geo_point"},
"photos": {"type": "text"},
"user_rt_id": {"type": "integer"},
"user_rt_id": {"type": "keyword"},
"mentions": {"type": "keyword"},
"source": {"type": "keyword"}
"source": {"type": "keyword"},
"user_rt": {"type": "keyword"},
"retweet_id": {"type": "keyword"},
"reply_to": {"type": "nested"}
}
},
"settings": {
Expand Down Expand Up @@ -203,7 +205,6 @@ def Tweet(Tweet, config):
"tweet": Tweet.tweet,
"hashtags": Tweet.hashtags,
"cashtags": Tweet.cashtags,
"user_id": Tweet.user_id,
"user_id_str": Tweet.user_id_str,
"username": Tweet.username,
"name": Tweet.name,
Expand All @@ -223,6 +224,10 @@ def Tweet(Tweet, config):
}
if Tweet.retweet:
j_data["_source"].update({"user_rt_id": Tweet.user_rt_id})
j_data["_source"].update({"user_rt": Tweet.user_rt})
j_data["_source"].update({"retweet_id": Tweet.retweet_id})
if Tweet.reply_to:
j_data["_source"].update({"reply_to": Tweet.reply_to})
if Tweet.photos:
_photos = []
for photo in Tweet.photos:
Expand Down
5 changes: 4 additions & 1 deletion twint/storage/panda.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,10 @@ def update(object, config):
"near": Tweet.near,
"geo": Tweet.geo,
"source": Tweet.source,
"user_rt_id": Tweet.user_rt_id
"user_rt_id": Tweet.user_rt_id,
"user_rt": Tweet.user_rt,
"retweet_id": Tweet.retweet_id,
"reply_to": Tweet.reply_to
}
_object_blocks[_type].append(_data)
elif _type == "user":
Expand Down
14 changes: 10 additions & 4 deletions twint/storage/write_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,13 @@ def tweetData(t):
"retweet": t.retweet,
"quote_url": t.quote_url,
"video": t.video,
"user_rt_id": t.user_rt_id,
"near": t.near,
"geo": t.geo,
"source": t.source
"source": t.source,
"user_rt_id": t.user_rt_id,
"user_rt": t.user_rt,
"retweet_id": t.retweet_id,
"reply_to": t.reply_to
}
return data

Expand Down Expand Up @@ -55,10 +58,13 @@ def tweetFieldnames():
"retweet",
"quote_url",
"video",
"user_rt_id",
"near",
"geo",
"source"
"source",
"user_rt_id",
"user_rt",
"retweet_id",
"reply_to"
]
return fieldnames

Expand Down

0 comments on commit 0709e14

Please sign in to comment.