-
Notifications
You must be signed in to change notification settings - Fork 6
/
Makefile
108 lines (77 loc) · 3.34 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
CATALOG_DATA_CAIDA_PATH = catalog-data-caida/sources/
PUBDB_PAPER= data/pubdb_output__papers.json
PUBDB_MEDIA= data/pubdb_output__presentations.json
REDIRECTS_FILE=data/redirects.csv
SUMMARY_URL = https://users.caida.org/~dataadm/catalog/catalog-dataset-summary.jsonl
SUMMARY_BACKUP_FILE = data/catalog-dataset-summary-backup.jsonl
SUMMARY_FILE = data/catalog-dataset-summary.jsonl
URL=https://api.catalog.caida.org/v2/graphql
IDS_FILE=data/_ids.txt
FRESH_HOURS=23
FRESH_HOUR=
START=`date -r t +%s`
END=`date +%s`
((DIFF=${START}+${END}))
###### Data Schema files
DATA_SCHEMA_DATASETS=data/data-schema-datasets.tsv
DATA_SCHEMA_DATASETS_SRC=~/Downloads/Data\ Schema\ for\ CAIDA\ Datasets\ -\ Sheet1.tsv
DATA_SCHEMA_CATEGORIES=data/data-schema-categories.tsv
DATA_SCHEMA_CATEGORIES_SRC=~/Downloads/Categories\ used\ in\ Schema\ for\ CAIDA\'s\ Datasets\ -\ Sheet1.tsv
###### Ontology
ONTOLOGY_DIR=ontology
###### Namespace
NAMESPACE_DIR=namespaces
DATA_BUILD_OPTS=-s ${SUMMARY_FILE} -r ${REDIRECTS_FILE} -c ${DATA_SCHEMA_CATEGORIES} -d ${DATA_SCHEMA_DATASETS}
run:clean_placeholders ensure_dirs pubdb external caida summary build suggestions schema
###### Ensure required directories exist
ensure_dirs:
@mkdir -p sources/presentation
fast: ensure_dirs
make DATA_BUILD_OPTS="-D ${DATA_BUILD_OPTS}" run
human: readable
read: readable
readable:ensure_dirs
make DATA_BUILD_OPTS="-RD ${DATA_BUILD_OPTS}" fast
readdata:ensure_dirs
make DATA_BUILD_OPTS="-RD ${DATA_BUILD_OPTS}" data
data: ensure_dirs build
build: ensure_dirs
if [ -f ${DATA_SCHEMA_DATASETS_SRC} ]; then \
mv ${DATA_SCHEMA_DATASETS_SRC} ${DATA_SCHEMA_DATASETS} ; \
fi
if [ -f ${DATA_SCHEMA_CATEGORIES_SRC} ]; then \
mv ${DATA_SCHEMA_CATEGORIES_SRC} ${DATA_SCHEMA_CATEGORIES} ; \
fi
echo "scripts/data-build.py ${DATA_BUILD_OPTS}"
ifneq ("$(wildcard $(CATALOG_DATA_CAIDA_PATH))","")
python3 scripts/data-build.py ${DATA_BUILD_OPTS}
else
./scripts/catalog-ids-download.py -O ${IDS_FILE} ${URL}
python3 scripts/data-build.py ${DATA_BUILD_OPTS} -i ${IDS_FILE}
endif
summary:
python3 scripts/catalog-dataset-summary-download.py -O ${SUMMARY_FILE} -b ${SUMMARY_BACKUP_FILE} ${SUMMARY_URL}
pubdb: scripts/lib/utils.py scripts/pubdb_placeholder.py scripts/pubdb_links.py ${PUBDB_PAPER} ${PUBDB_MEDIA}
python3 scripts/pubdb_placeholder.py -p ${PUBDB_PAPER} -m ${PUBDB_MEDIA}
external: scripts/externallinks_placeholder.py
python3 scripts/externallinks_placeholder.py -d data/data-papers.yaml
caida: scripts/caida_placeholder.py scripts/caida_dataset_blanks.py
@if [ -d ${CATALOG_DATA_CAIDA_PATH} ]; then \
python3 scripts/caida_placeholder.py -p ${CATALOG_DATA_CAIDA_PATH}; \
fi; \
suggestions: suggestions.json
suggestions.json: scripts/suggestions.py data/suggestions.json
scripts/suggestions.py -o $@ data/suggestions.json
# This was used to backfill historic papers and presentations
data/pubdb_links.json:
python3 scripts/pubdb_links.py
##############################################################
schema:
python3 scripts/ontology-build.py ${ONTOLOGY_DIR}
##############################################################
clean: clean_placeholders
rm -f id_object.json id_id_link.json word_id_score.json category_id_depth.json ${SUMMARY_FILE} ${IDS_FILE} \
suggestions.json category_id_score.json category_id_depth.json
clean_placeholders:
rm -f pubdb
python3 scripts/remove_placeholders.py