-
Notifications
You must be signed in to change notification settings - Fork 1
/
get_data.py
49 lines (36 loc) · 1.24 KB
/
get_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import time
import imageio
import numpy as np
import pandas as pd
from tqdm import tqdm
# Downloads 'size' images, starting from 'offset'
offset = 0
size = 10
# Load in the coordinates
catalog = pd.read_csv('objects_details.csv')
catalog = catalog[offset:offset + size]
ra = catalog['ra'].values
dec = catalog['dec'].values
imageList = []
coords = []
err = 0
# Loop over all objects and download the thumbnail. If it fails (e.g. internet failure), it retries 5 times, otherwise moves on.
for idx in tqdm(range(len(ra))):
url = 'http://skyserver.sdss.org/dr16/SkyServerWS/ImgCutout/getjpeg?ra=' + str(ra[idx]) + '&dec=' + str(dec[idx]) + '&scale=0.4&height=64&width=64'
for i in range(5): # retry loop
try:
im = imageio.imread(url)
break # On success, stop retry.
except:
print('timeout, retry in 1 second.')
time.sleep(1)
err = 1
if err == 0:
imageList.append(im)
coords.append(idx + offset)
err = 0
# Save images and indices (in case some objects fail, we need the index of the object in the original file)
imageList = np.array(imageList)
print(imageList.shape, len(coords))
np.savez('dr16.npz', imageList)
np.savetxt('indices.txt', coords)