-
Notifications
You must be signed in to change notification settings - Fork 0
/
cache.py
72 lines (61 loc) · 1.89 KB
/
cache.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/env python
# -*- coding: utf8 -*
import os
import urllib
import urllib2
from StringIO import StringIO
import logging
class cache():
def __init__ (self, storage, referer=None, enable = 1):
if not os.path.isdir(storage):
os.makedirs(storage)
self.storage=storage
self.referer=referer
def get(self, fname):
k = os.path.join(self.storage, fname)
l = open(k)
m = l.read()
l.close()
return m
def put(self, fname, content):
k = os.path.join(self.storage, fname)
l = open(k, 'w')
n = l.write(content)
l.close()
return n
def incache(self, fname):
k = os.path.join(self.storage, fname)
if os.path.isfile(k):
return 1
else:
return 0
def dw_html(self, url, fname):
if self.incache(fname):
logging.debug ("Not re-downloading file")
page_body=self.get(fname)
else:
#logging.info( "WA page is %s" % self.wa_addr )
req = urllib2.Request(url)
try:
handle = urllib2.urlopen(req)
except IOError:
print "ioerror"
page_body=handle.read()
handle.close()
self.put(fname,page_body)
return page_body
def dw_img(self, url, fname):
if self.incache(fname):
logging.debug ("Not re-downloading image file")
else:
logging.info ( ("Downloading link %s to file %s") % (url, fname) )
req = urllib2.Request(url)
if self.referer != None:
req.add_header("Referer", self.referer)
try:
handle = urllib2.urlopen(req)
except IOError, e:
logging.warning( "IOerror %s" % e)
data = handle.read()
handle.close()
self.put(fname, data)