-
Notifications
You must be signed in to change notification settings - Fork 1
/
labelmeretriever.py
66 lines (55 loc) · 2.31 KB
/
labelmeretriever.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
'''
Created on 2010-12-09
Downloads folders from LabelMe. Run the main() method if you don't have a dataset. It won't work if you already have a data folder at the location.
By default, filter out all folders with 256x256 images.
For the number parameter, enter the number of folders you want to download or no number to download the whole database.
For the randomized parameter, set to True if you want the script to randomly choose folders up to the number you specified in the number parameter.
Science of Imagination Laboratory
Author: Sebastien Ouellet - [email protected]
'''
import random
import os
import re
import urllib
import urllib2
import BeautifulSoup
def parse(website):
""" Parse a website into a tree via BeautifulSoup """
website_object = urllib2.Request(website)
website_to_parse = urllib2.urlopen(website_object)
parsed = BeautifulSoup.BeautifulSoup(website_to_parse)
return parsed
def main(number=-1,filtering=True,randomized=False):
data_directory = "data"
os.makedirs(data_directory)
website = "http://labelme.csail.mit.edu/Annotations"
parsed = parse(website)
folders = []
for a in parsed.findAll('a'):
if filtering:
match = re.search("256x256",a.contents[0])
if match == None:
folders.append(a.contents[0])
else:
folders.append(a.contents[0])
folders = folders[5:]
if number == -1:
pass
elif randomized:
folders = [folders.pop(random.randint(0,len(folders)-1)) for index in xrange(number)]
else:
folders = folders[0:number]
for folder in folders:
print "Annotations for: ", folder
website = "http://labelme.csail.mit.edu/Annotations/"+folder
parsed = parse(website)
for a in parsed.findAll('a'):
if a.contents[0][-3:] == "xml":
urllib.urlretrieve(website+a.contents[0], os.path.join(data_directory,folder[:-1]+"---"+a.contents[0]))
for folder in folders:
print "Images for: ", folder
website = "http://labelme.csail.mit.edu/Images/"+folder
parsed = parse(website)
for a in parsed.findAll('a'):
if a.contents[0][-3:] == "jpg":
urllib.urlretrieve(website+a.contents[0], os.path.join(data_directory,folder[:-1]+"---"+a.contents[0]))