diff --git a/README.md b/README.md index 0651b28..97788af 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,6 @@ CourseraDownloader ================== -Download videos off Coursera without needing an account \ No newline at end of file +Download videos off Coursera without needing an account. + +All you need is the /lecture/preview link. The script will crawl all lectures and save files with a standardized name. \ No newline at end of file diff --git a/grab.php b/grab.php new file mode 100644 index 0000000..5039bb0 --- /dev/null +++ b/grab.php @@ -0,0 +1,52 @@ +loadHTMLFile($page); + $xpath = new DOMXPath($dom); + $itemListNodes = $xpath->query('//div[contains(@class,"course-item-list")]'); + if ($itemListNodes->length > 0) { + $itemList = $itemListNodes->item(0); + $headerNodes = $xpath->query('div[contains(@class,"course-item-list-header")]/h3', $itemList); + $listNodes = $xpath->query('ul[contains(@class,"course-item-list-section-list")]', $itemList); + if ($headerNodes->length == $listNodes->length) { + for ($i = 0; $i < $headerNodes->length; $i++) { + // remove unprintable shit, convert Lecture1 into Lecture 1 + $header = preg_replace('/Lecture(\d)/', 'Lecture $1', trim(preg_replace('/[\x00-\x1F\x80-\xFF]/', '', $headerNodes->item($i)->nodeValue))) ; + $listItems = $xpath->query('li/a', $listNodes->item($i)); + if ($listItems->length > 0) { + foreach ($listItems as $idx => $item) { + $videoTitle = trim(preg_replace('/([\[].*)$/', '', str_replace(array(':', '?'), '', $item->nodeValue))); + $videoLink = $item->getAttribute('data-modal-iframe'); + $dom2 = new DOMDocument(); + $dom2->loadHTMLFile($videoLink); + $xpath2 = new DOMXPath($dom2); + $videoNodes = $xpath2->query('//source[@type="video/mp4"]'); + if ($videoNodes->length > 0) { + $fp = fopen(__DIR__.'/files/'.$header.' - E'.($idx+1).' - '.$videoTitle.'.mp4', 'w'); + $ch = curl_init(); + curl_setopt_array($ch, array( + CURLOPT_FILE => $fp, + CURLOPT_TIMEOUT => 28800, // set this to 8 hours so we dont timeout on big files + CURLOPT_URL => $videoNodes->item(0)->getAttribute('src'), + CURLOPT_SSL_VERIFYPEER => false + )); + echo 'Downloading '.$header.' - E'.($idx+1).' - '.$videoTitle.'...'.PHP_EOL; + curl_exec($ch); + } + } + } + } + } + } +?> \ No newline at end of file