-
Notifications
You must be signed in to change notification settings - Fork 0
/
scraper.php
44 lines (33 loc) · 1.5 KB
/
scraper.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
<?php
require 'scraperwiki.php';
######################################
# Basic PHP scraper
######################################
require 'scraperwiki/simple_html_dom.php';
date_default_timezone_set('Europe/London');
$html = scraperwiki::scrape("http://www.theofficialcharts.com/singles-chart/");
preg_match_all('|<span class="date"> - (.*?)</span>|',$html,$date);
$date = date('Y-m-d',strtotime($date[1][0]));
preg_match_all('|<td class="currentposition">(.*?)</td>|',$html,$arr);
$current = $arr[1];
preg_match_all('|<td class="lastposition">(.*?)</td>|',$html,$arr);
$last= $arr[1];
preg_match_all('|<td class="weeks">(.*?)</td>|',$html,$arr);
$weeks = $arr[1];
$html_oneline = str_replace("\r", "", $html);
$html_oneline= str_replace("\n", "", $html_oneline);
preg_match_all('|<div id="wide"><div class="infoHolder"> <img class="coverimage" src="(.*?)" /> <h4>(.*?)</h4>(.*?)<br /> <span class="label">\((.*?)\)</span> </div>|i',$html_oneline,$arr);
$cover= $arr[1];
$song= $arr[2];
$artist= $arr[3];
$label= $arr[4];
foreach($song as $key=>$val) {
scraperwiki::save(array('current','date'), array('date'=>$date,'current' => $current[$key],'last' => $last[$key],'weeks' => $weeks[$key],'song' => cleanText($song[$key]),'artist' => cleanText($artist[$key]),'cover' => $cover[$key],'label' => cleanText($label[$key]) ));
}
function cleanText($str) {
$str = html_entity_decode($str);
$str = str_replace(''',"'",$str);
$str = trim($str);
return($str);
}
?>