-
Notifications
You must be signed in to change notification settings - Fork 1
Zawartość danych 2013.12.18
dmydlarz edited this page Dec 18, 2013
·
4 revisions
- 4 790 909 wszystkich wpisów
- 152 574 (3,186 %) z geolokalizacją
- Na podstawie meczu Manchester City - Arsenal FC (2013-12-14 13:45)
- Nasłuchiwanie: 12:58:05 - 19:35:58
- 525 799 wpisów
- 15 561 (2,96 %) z geolokalizacją
SELECT *
FROM tweets
WHERE match_event = 715
AND latitude IS NOT null
- 9075 (1,73 %) z emotikonami (20 najpopularniejszych - http://datagenetics.com/blog/october52012/index.html)
select id, latitude, created_at, text, user_id
from tweets
where match_event = 715 AND (
text LIKE '%:)%' OR
text LIKE '%:D%' OR
text LIKE '%:(%' OR
text LIKE '%;)%' OR
text LIKE '%:-)%' OR
text LIKE '%:P%' OR
text LIKE '%=)%' OR
text LIKE '%(:%' OR
text LIKE '%;-)%' OR
(text LIKE '%:/%' AND text NOT LIKE '%://%') OR
text LIKE '%XD%' OR
text LIKE '%=D%' OR
text LIKE '%:o%' OR
text LIKE '%=]%' OR
-- text LIKE '%D:%' OR
text LIKE '%;D%' OR
text LIKE '%:]%' OR
text LIKE '%:-(%' OR
text LIKE '%=/%' OR
text LIKE '%=(%'
)
order by created_at
- 152464 (29%) z hashtagami
SELECT count(*)
FROM tweets
WHERE match_event = 715
AND text LIKE '%#%'
- 46749 (8,89 %) wpisów z odwołaniami do innych użytkowników, nie będących odpowiedziami ani retweetami
SELECT *
FROM tweets
WHERE match_event = 715
AND text LIKE '%@%'
AND text NOT LIKE 'RT @%: %'
AND in_reply_to_user_id = -1
- 49995 (9,5 %) będących odpowiedziami
in_reply_to_user_id != -1
- 227261 (43,22 %) retweetów
SELECT *
FROM tweets
WHERE match_event = 715
AND text LIKE 'RT @%: %'
AND in_reply_to_user_id = -1
- 248543 (47,27 %) zwykłych wpisów (nie będących odpowiedziami ani retweetami)
SELECT *
FROM tweets
WHERE match_event = 715
AND text NOT LIKE 'RT @%: %'
AND in_reply_to_user_id = -1
-
226186 autorów (avg = 2,325 wpisów / autora)
-
10 najwięcej piszących autorów
id screen_name liczba wpisów 1670241355 newsnow_arsenal 186 136964211 BarclaysLeague 107 221324862 potongkelape 104 446140418 Prinz_Poldi92 101 2199508952 abdultanko14 95 260736981 bryoshavin 93 446498804 PandaGCMC 93 126244954 Damite2ky 92 309883345 RazArsenal6 91 381670402 Swinestar96 89
SELECT user_id, u.screen_name, count(*) as liczbaTweetow
FROM tweets t JOIN users u ON t.user_id = u.id
WHERE match_event = 715
GROUP BY user_id, screen_name
ORDER BY liczbaTweetow desc
-
10 autorów, którym najczęściej odpowiadano (in_reply_to_user_id)
id screen_name liczba odpowiedzi 34613288 Arsenal 3197 216299334 piersmorgan 2010 14573900 MCFC 1259 22910295 chelseafc 392 365953260 FootballFunnys 251 343627165 premierleague 240 370536393 Footy_Jokes 186 105297123 EASPORTSFIFA 165 435225922 TSBible 154 21181713 liamgallagher 147
SELECT in_reply_to_user_id, u.screen_name, count(*) as liczbaTweetow
FROM tweets t JOIN users u ON t.in_reply_to_user_id = u.id
WHERE match_event = 715
GROUP BY in_reply_to_user_id, screen_name
ORDER BY liczbaTweetow desc
- Liczba wpisów / liczba userów
- https://docs.google.com/spreadsheet/pub?key=0AjZj7xUw8iiTdG1IcmdCZjhReU90OEpqbk1XM3lwQ3c&gid=0
SELECT liczbaTweetow, count(*) as liczbaUserow
FROM (
SELECT user_id, u.screen_name, count(*) as liczbaTweetow
FROM tweets t JOIN users u ON t.user_id = u.id
WHERE match_event = 715
GROUP BY user_id, screen_name
) as view1
group by liczbaTweetow
ORDER BY liczbaTweetow desc
- Liczba użytkowników, która pisała w konkretnej liczbie meczów
SELECT liczbaMeczow, count(*) liczbaUserow
FROM (
SELECT user_id, screen_name, count(*) liczbaMeczow
FROM (
SELECT DISTINCT user_id, screen_name, match_event
FROM tweets t JOIN users u ON t.user_id = u.id
WHERE match_event IS NOT NULL
) as view1
GROUP BY user_id, screen_name
) as view2
GROUP BY liczbaMeczow
ORDER BY liczbaMeczow DESC