diff --git a/.doctrees/environment.pickle b/.doctrees/environment.pickle
index ae608b4a..59d857e3 100644
Binary files a/.doctrees/environment.pickle and b/.doctrees/environment.pickle differ
diff --git a/.doctrees/visualization.doctree b/.doctrees/visualization.doctree
index 1032ebf2..da782b59 100644
Binary files a/.doctrees/visualization.doctree and b/.doctrees/visualization.doctree differ
diff --git a/_sources/visualization.rst.txt b/_sources/visualization.rst.txt
index c1326274..8bb988b8 100644
--- a/_sources/visualization.rst.txt
+++ b/_sources/visualization.rst.txt
@@ -21,26 +21,28 @@ The code block demonstrates how to use it:
The function will show the loops of the temporal mapping and for each operand shows at which memory level it resides.
For example:
-********* Temporal Mapping - CostModelEvaluation(layer=LayerNode_0, core=1) *********
- O (O): [[('FX', 11), ('FY', 11)], [('OY', 7), ('OY', 2), ('OX', 14), ('K', 12)], []]
- W (I2): [[], [('FX', 11), ('FY', 11), ('OY', 7), ('OY', 2), ('OX', 14)], [('K', 12)]]
- I (I1): [[('FX', 11), ('FY', 11), ('OY', 7), ('OY', 2), ('OX', 14), ('K', 12)], []]
-
--------------------------------------------------------------------------------------
- Temporal Loops O W I
--------------------------------------------------------------------------------------
- for K in [0:12) sram_2MB dram sram_2MB
--------------------------------------------------------------------------------------
- for OX in [0:14) sram_2MB sram_32KB sram_2MB
--------------------------------------------------------------------------------------
- for OY in [0:2) sram_2MB sram_32KB sram_2MB
--------------------------------------------------------------------------------------
- for OY in [0:7) sram_2MB sram_32KB sram_2MB
--------------------------------------------------------------------------------------
- for FY in [0:11) rf_2B sram_32KB sram_2MB
--------------------------------------------------------------------------------------
- for FX in [0:11) rf_2B sram_32KB sram_2MB
--------------------------------------------------------------------------------------
+::
+
+ ********* Temporal Mapping - CostModelEvaluation(layer=LayerNode_0, core=1) *********
+ O (O): [[('FX', 11), ('FY', 11)], [('OY', 7), ('OY', 2), ('OX', 14), ('K', 12)], []]
+ W (I2): [[], [('FX', 11), ('FY', 11), ('OY', 7), ('OY', 2), ('OX', 14)], [('K', 12)]]
+ I (I1): [[('FX', 11), ('FY', 11), ('OY', 7), ('OY', 2), ('OX', 14), ('K', 12)], []]
+
+ -------------------------------------------------------------------------------------
+ Temporal Loops O W I
+ -------------------------------------------------------------------------------------
+ for K in [0:12) sram_2MB dram sram_2MB
+ -------------------------------------------------------------------------------------
+ for OX in [0:14) sram_2MB sram_32KB sram_2MB
+ -------------------------------------------------------------------------------------
+ for OY in [0:2) sram_2MB sram_32KB sram_2MB
+ -------------------------------------------------------------------------------------
+ for OY in [0:7) sram_2MB sram_32KB sram_2MB
+ -------------------------------------------------------------------------------------
+ for FY in [0:11) rf_2B sram_32KB sram_2MB
+ -------------------------------------------------------------------------------------
+ for FX in [0:11) rf_2B sram_32KB sram_2MB
+ -------------------------------------------------------------------------------------
The top loop is the outer-most for loop, where as the bottom loop is the inner-most. Going from bottom to top, loops are allocated to the innermost memories of the memory hierarchy for each operand.
The names of the memories match the names of the ``MemoryInstance`` object used to create the memory level using the ``add_memory()`` call in the ``MemoryHierarchy``.
diff --git a/searchindex.js b/searchindex.js
index dca83656..3673cd5f 100644
--- a/searchindex.js
+++ b/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"docnames": ["api", "code-documentation", "contribute", "future", "getting-started", "hardware", "index", "installation", "mapping", "outputs", "publications", "stages", "user-guide", "visualization", "workload"], "filenames": ["api.rst", "code-documentation.rst", "contribute.rst", "future.rst", "getting-started.rst", "hardware.rst", "index.rst", "installation.rst", "mapping.rst", "outputs.rst", "publications.rst", "stages.rst", "user-guide.rst", "visualization.rst", "workload.rst"], "titles": ["ZigZag API", "Code Documentation", "Contribute", "Future changes", "Getting Started", "Hardware Architecture", "Welcome to ZigZag\u2019s documentation!", "Installing ZigZag", "Mapping", "Outputs", "Publications", "Stages", "User Guide", "Visualization", "Workload"], "terms": {"onc": [0, 7], "i": [0, 2, 4, 5, 6, 7, 8, 9, 10, 11, 14], "avail": [0, 2, 5, 7], "your": [0, 2, 7, 8], "python": [0, 2, 4, 5, 7, 14], "path": [0, 4, 14], "you": [0, 2, 3, 4, 5, 7, 8, 9, 11, 14], "can": [0, 2, 3, 4, 5, 7, 8, 9, 11, 13, 14], "import": [0, 2, 4, 5, 13, 14], "ani": [0, 2, 5, 7, 11], "file": [0, 2, 4, 5, 7, 8, 9, 11, 14], "from": [0, 5, 11, 13, 14], "thi": [0, 1, 2, 4, 5, 6, 8, 9, 11, 13, 14], "function": [0, 2, 7, 11, 13], "take": [0, 3, 5, 7, 14], "an": [0, 2, 4, 5, 7, 9, 11], "workload": [0, 4, 6, 8, 11, 12], "hardwar": [0, 4, 6, 8, 12, 14], "architectur": [0, 4, 6, 10, 11, 12], "map": [0, 3, 4, 5, 6, 9, 12], "return": [0, 9, 11], "perform": [0, 5, 10], "execut": [0, 2, 4, 5, 8, 11, 14], "model": [0, 3, 4, 6], "": [0, 2, 3, 4, 5, 8, 10, 11, 13], "layer": [0, 4, 6, 8, 9, 11, 13], "under": [0, 2, 4, 8], "given": [0, 4, 11], "constraint": [0, 4], "energi": [0, 3, 4, 5, 9, 10, 11], "latenc": [0, 3, 4, 5, 6, 9, 11], "cme": [0, 11, 13], "acceler": [0, 4, 6, 8, 9, 10, 11, 14], "opt": 0, "dump_filename_pattern": [0, 4], "output": [0, 4, 5, 6, 11, 12, 13, 14], "datetim": [0, 11], "json": [0, 9, 11], "pickle_filenam": 0, "list_of_cm": [0, 13], "pickl": [0, 11, 13], "The": [0, 1, 2, 4, 5, 6, 8, 9, 12, 13, 14], "input": [0, 3, 4, 5, 8, 9, 14], "ar": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14], "A": [0, 5, 6, 8, 10, 14], "neural": [0, 5, 10], "network": [0, 4, 5, 10], "defin": [0, 4, 5, 9, 10, 11, 14], "onnx": [0, 3, 4, 8, 11], "format": [0, 1, 2, 9], "own": [0, 11, 14], "high": [0, 5, 10], "level": [0, 5, 11, 13], "hw": [0, 3, 4, 6, 8, 11], "descript": [0, 8, 11], "specifi": [0, 5, 9], "core": [0, 4, 6, 8, 11, 13, 14], "alloc": [0, 5, 8, 10, 11, 13], "spatial": [0, 3, 5, 8, 9, 10, 14], "option": [0, 5], "tempor": [0, 3, 4, 6, 9], "order": [0, 2, 3, 5, 10, 11], "memori": [0, 3, 6, 8, 9, 10, 11, 13, 14], "operand": [0, 5, 8, 11, 13, 14], "link": [0, 1, 5, 8, 14], "optim": [0, 4, 6, 10], "target": 0, "It": [0, 4, 9, 11, 14], "edp": [0, 11], "delai": 0, "product": 0, "name": [0, 4, 5, 8, 13, 14], "result": [0, 6, 9, 13], "which": [0, 4, 5, 8, 9, 11, 13, 14], "includ": [0, 2, 3, 5, 9], "all": [0, 2, 4, 5, 9, 11, 14], "detail": [0, 2, 6, 7, 11], "metadata": 0, "analys": 0, "debug": 0, "number": [0, 5, 9], "indic": [0, 14], "overal": 0, "consum": 0, "run": [0, 2, 6, 7, 8, 11], "user": [0, 3, 4, 5, 6, 7, 11], "wai": [0, 1, 2, 4, 5, 9, 11, 13, 14], "cycl": [0, 5], "count": 0, "collect": 0, "cost": [0, 3, 4, 5, 6, 14], "evalu": [0, 5, 11], "stand": 0, "we": [0, 2, 3, 4, 5, 11], "demonstr": [0, 13], "how": [0, 2, 4, 5, 7, 8, 9, 11, 13], "us": [0, 1, 2, 3, 4, 5, 7, 8, 9, 11, 13, 14], "multipl": [0, 5, 6, 10, 11, 13, 14], "demo": 0, "comment": [1, 2], "within": [1, 5, 8, 11, 14], "sourc": [1, 2], "zigzag": [1, 4, 5, 8, 9, 11, 12, 13, 14], "framework": [1, 2, 4, 6, 7, 8, 10, 11, 12, 14], "support": [1, 3, 4, 5, 6, 11], "auto": [1, 5, 10], "doxygen": 1, "automat": [1, 4, 5, 6, 8, 11, 14], "updat": [1, 2, 3, 11], "soon": 1, "somebodi": 1, "push": 1, "someth": 1, "master": 1, "branch": 1, "github": [1, 7, 10], "repo": [1, 14], "project": [1, 6, 10], "follow": [1, 2, 4, 5, 8, 9, 11, 12, 14], "access": [1, 3, 5], "latest": 1, "version": [1, 6, 11], "when": [2, 8, 11, 14], "pleas": [2, 9, 11], "consid": [2, 11], "googl": 2, "style": 2, "guid": [2, 3, 6], "docstr": 2, "class": [2, 3, 11], "method": [2, 5, 9], "exampl": [2, 4, 8, 9, 11, 13, 14], "found": [2, 11, 14], "throughout": 2, "here": [2, 3, 4, 8, 10, 14], "accordingli": 2, "In": [2, 4, 5, 6, 9, 11, 14], "packag": [2, 6], "call": [2, 5, 13], "bumpver": 2, "twine": 2, "These": [2, 11], "instal": [2, 6], "pip": [2, 7], "first": [2, 5, 6, 11], "pull": 2, "make": [2, 3, 5, 7, 9], "sure": 2, "have": [2, 4, 5, 11, 14], "remot": 2, "cahng": 2, "merg": 2, "conflict": 2, "chang": [2, 5, 6, 11], "commit": 2, "Then": [2, 14], "command": [2, 4, 14], "patch": 2, "m": [2, 5, 10], "upload": 2, "dist": 2, "zigzag_ds": 2, "x": [2, 5], "y": [2, 5], "z": [2, 5], "whl": 2, "dse": [2, 6, 7], "tar": 2, "gz": 2, "provid": [2, 4, 5, 6, 7, 8, 11, 12, 14], "sever": 2, "differ": [2, 4, 5, 6, 9, 11, 14], "There": [2, 5, 9], "mani": [2, 5], "public": [2, 6], "relat": 2, "page": [2, 6], "allow": [2, 5, 8, 11], "everyon": 2, "get": [2, 6, 7], "familiar": 2, "more": [2, 3, 4, 5, 7, 11, 12, 14], "about": [2, 9, 11], "implement": 2, "ad": [2, 5, 8], "mandatori": 2, "what": [2, 5, 8, 9, 11], "doe": [2, 5], "achiev": [2, 5], "newli": 2, "explicit": 2, "resid": [2, 11, 13], "doc": 2, "folder": [2, 11], "restructuredtext": 2, "rst": 2, "decid": 2, "would": [2, 14], "best": [2, 11], "fit": 2, "exist": [2, 11], "one": [2, 4, 5, 11], "If": [2, 5, 7, 9, 11, 14], "creat": [2, 8, 13], "lower": [2, 5, 11], "case": [2, 3, 6, 14], "letter": [2, 14], "hyphen": 2, "between": [2, 4, 6, 14], "word": [2, 5], "after": [2, 5, 7, 11], "need": [2, 4, 5, 8, 14], "add": [2, 3, 5, 7, 14], "toctre": 2, "index": [2, 6], "same": [2, 3, 5], "webpag": 2, "sphinx": 2, "should": [2, 5, 8, 9, 14], "both": [2, 3, 5], "press": 2, "theme": 2, "easi": [2, 9], "through": [2, 4, 5, 6, 7, 8, 10, 11, 14], "requir": [2, 4, 5, 7, 8, 9, 11, 14], "txt": [2, 7], "cd": 2, "r": [2, 5, 7], "simpli": [2, 11], "b": [2, 5, 14], "html": 2, "entri": [2, 8], "point": [2, 6], "guidlin": 2, "paramet": [2, 5, 11], "constructor": 2, "download": 2, "describ": [2, 5, 14], "successfulli": 2, "configur": [2, 10], "done": 2, "either": [2, 5], "gui": 2, "conf": 2, "find": [3, 4, 6, 11], "plan": 3, "oper": [3, 8, 11], "ancestor": 3, "layernod": [3, 14], "dummynod": [3, 14], "fix": 3, "loop": [3, 10, 11, 14], "multi": [3, 6], "dimension": 3, "unrol": [3, 5], "fraction": 3, "account": [3, 11, 14], "bandwidth": [3, 5], "loma": [3, 4, 10, 11], "memoryalloc": 3, "besid": [3, 4, 5, 11], "capac": [3, 5], "lpf": 3, "limit": [3, 10], "visualis": 3, "tutori": 3, "remak": 3, "tabl": [3, 5], "without": 3, "df": [3, 5], "stage": [3, 4, 6, 9, 12], "stack": 3, "combin": [3, 5, 8, 9, 11], "common": [3, 5], "versatil": 4, "tool": 4, "estim": [4, 6], "dl": [4, 6], "design": [4, 5, 6, 11], "multitud": 4, "set": [4, 11], "As": [4, 11], "step": [4, 11], "nn": [4, 5, 14], "onto": [4, 6, 8], "go": [4, 5, 13], "alexnet": 4, "ha": [4, 5, 11, 12, 14], "been": 4, "shape": 4, "infer": [4, 5], "mean": [4, 5, 11], "tensor": [4, 5, 14], "intermedi": [4, 5, 14], "inform": [4, 5, 8, 9, 11, 12, 14], "know": [4, 5, 8, 9, 14], "correctli": [4, 5, 14], "tpu": [4, 5], "like": [4, 11, 14], "tpu_lik": 4, "py": [4, 8, 11, 14], "must": [4, 11], "suggest": 4, "resourc": [4, 6, 8], "alexnet_on_tpu_lik": 4, "gener": [4, 5, 6, 8, 9, 11, 12, 13], "ran": 4, "main": [4, 5, 7, 9, 14], "pars": [4, 11, 14], "contain": [4, 8, 14], "program": 4, "flow": [4, 11], "document": [4, 7, 11, 12], "main_onnx": [4, 14], "note": [4, 9], "construct": [4, 5], "becaus": 4, "object": [4, 5, 9, 11, 13, 14], "respect": [4, 5, 9], "modul": [4, 6], "other": [4, 5, 11, 14], "also": [4, 5, 7, 8, 9, 11, 14], "see": [4, 9, 14], "section": [4, 5, 9, 11], "manual": [4, 5, 6, 8, 11], "definit": [4, 8, 9, 11], "resnet18": [4, 8, 14], "salsa": [4, 11], "search": [4, 6], "engin": [4, 6, 11], "util": [4, 9, 13], "schedul": [4, 5, 6, 11], "than": [4, 11], "main_onnx_salsa": 4, "dure": 4, "save": [4, 9], "depend": [4, 7, 14], "total": [4, 5, 11], "five": [4, 12], "each": [4, 5, 9, 11, 13, 14], "node": [4, 8, 9, 11], "onnxmodelparserstag": [4, 8, 11, 14], "wa": 4, "minimallatencystag": [4, 11], "refer": [4, 14], "introduc": 5, "concept": [5, 11], "well": 5, "known": 5, "start": [5, 6, 7, 11], "smallest": 5, "build": [5, 12, 14], "block": [5, 12, 13], "work": [5, 9], "our": [5, 11], "up": [5, 11], "toward": [5, 10], "summat": 5, "accumul": 5, "across": [5, 10, 11], "data": [5, 9, 11], "activ": 5, "train": 5, "weight": [5, 14], "typic": [5, 8], "multipli": 5, "two": [5, 9], "element": [5, 11], "attribut": [5, 9, 14], "input_precis": 5, "list": [5, 11, 13, 14], "precis": [5, 14], "bit": [5, 14], "output_precis": 5, "e": [5, 8, 10, 11, 13, 14], "g": [5, 8, 11, 13], "sum": [5, 11], "energy_cost": 5, "singl": [5, 11], "area": [5, 10], "overhead": 5, "inferenc": 5, "million": 5, "parallel": [5, 8, 14], "significantli": 5, "speed": 5, "comput": [5, 6, 8, 10, 14], "increas": 5, "effici": 5, "cover": 5, "later": [5, 11], "dimens": [5, 11, 14], "size": [5, 14], "explain": [5, 9, 11], "introduct": 5, "operational_unit": 5, "built": 5, "dictionari": [5, 8, 14], "kei": [5, 8], "being": [5, 11], "identifi": 5, "d1": 5, "d2": 5, "valu": [5, 11, 14], "along": 5, "store": 5, "attach": 5, "hierarch": 5, "fashion": 5, "big": 5, "term": 5, "write": [5, 8], "read": [5, 14], "its": [5, 7, 9, 11], "port": 5, "r_bw": 5, "w_bw": 5, "per": 5, "r_cost": 5, "w_cost": 5, "r_port": 5, "w_port": 5, "rw_port": 5, "address": 5, "receiv": [5, 11], "correspond": [5, 11], "For": [5, 11, 13, 14], "now": 5, "assum": [5, 14], "1": [5, 10, 13], "prefetch": 5, "behavior": 5, "thank": 5, "determinist": 5, "dataflow": [5, 10], "min_r_granular": 5, "min_w_granular": 5, "minim": [5, 11], "granular": 5, "better": 5, "half": 5, "quarter": 5, "pattern": [5, 11], "wordlength": 5, "256": 5, "100": 5, "128": 5, "approximatlli": 5, "onli": [5, 9, 11, 14], "50": 5, "spec": [5, 7], "encod": [5, 8], "interconnect": [5, 11], "add_memori": [5, 13], "where": [5, 11, 13, 14], "connect": [5, 11], "higher": [5, 11], "To": [5, 11], "anoth": [5, 11], "decoupl": 5, "algorithm": [5, 6, 8, 10, 14], "side": [5, 14], "oppos": 5, "w": [5, 8, 10], "think": [5, 11], "virtual": [5, 14], "actual": [5, 14], "memory_operand_link": [5, 8, 14], "similarli": 5, "form": 5, "accompani": 5, "served_dimens": 5, "serv": [5, 11], "hot": 5, "tupl": [5, 11], "2": [5, 13], "3": [5, 7], "4": [5, 10], "four": 5, "none": [5, 14], "0": 5, "12": [5, 13], "them": [5, 11], "lastli": 5, "assign": 5, "movement": 5, "possibl": [5, 14], "type": [5, 12, 14], "fh": 5, "th": 5, "low": 5, "fl": 5, "tl": 5, "written": 5, "current": [5, 9], "out": 5, "At": 5, "time": [5, 8], "syntax": 5, "port_typ": 5, "_port_": 5, "port_numb": 5, "rw": 5, "equal": 5, "altern": [5, 7, 14], "default": [5, 8, 11], "intern": [5, 7, 10, 11], "memoryhierarchi": [5, 13], "extend": 5, "networkx": 5, "digraph": 5, "so": [5, 11, 14], "operational_arrai": 5, "new": [5, 6, 11], "memorylevel": 5, "graph": [5, 11, 14], "memory_inst": 5, "memoryinst": [5, 13], "port_alloc": 5, "direction": 5, "abov": 5, "togeth": [5, 14], "id": [5, 8, 14], "memory_hierarchi": 5, "core_set": 5, "compris": 5, "global_buff": 5, "share": 5, "un": 5, "repositori": [5, 7], "5": 5, "dnn": [5, 10], "meta": 5, "prototyp": 5, "edg": [5, 14], "ascend": 5, "tesla": 5, "npu": 5, "depth": [5, 6], "research": 5, "fair": 5, "relev": [5, 9], "comparison": 5, "normal": 5, "1024": [5, 14], "mac": 5, "maxim": 5, "2mb": 5, "global": 5, "buffer": 5, "gb": 5, "kept": 5, "local": 5, "shown": 5, "idx": 5, "7": 5, "9": 5, "variant": 5, "everi": [5, 8], "chip": 5, "denot": 5, "end": [5, 7, 11], "6": [5, 10, 11], "8": [5, 7, 10], "10": [5, 10], "k": [5, 10, 13, 14], "channel": [5, 14], "c": [5, 14], "ox": [5, 14], "oi": [5, 14], "featur": 5, "fx": [5, 14], "fy": [5, 13, 14], "h": [5, 10], "sumbul": [5, 10], "t": [5, 8, 10, 14], "f": 5, "wu": [5, 10], "li": 5, "sarwar": 5, "koven": 5, "murphi": 5, "trotzki": 5, "cai": 5, "ansari": 5, "d": [5, 10], "morri": 5, "liu": [5, 10], "kim": 5, "beign": [5, 10], "lab": 5, "system": [5, 10, 11], "integr": [5, 10], "vr": 5, "custom": [5, 7, 8, 14], "power": 5, "7nm": 5, "technologi": 5, "codec": 5, "avatar": 5, "2022": [5, 10], "ieee": [5, 10], "circuit": [5, 10], "confer": [5, 10], "cicc": 5, "pp": [5, 10], "01": 5, "08": 5, "n": [5, 10], "p": [5, 10], "jouppi": 5, "young": 5, "patil": 5, "patterson": 5, "agraw": 5, "bajwa": 5, "bate": 5, "bhatia": 5, "boden": 5, "borcher": 5, "boyl": 5, "l": [5, 10], "cantin": 5, "chao": 5, "clark": 5, "j": 5, "coriel": 5, "dalei": 5, "dau": 5, "dean": 5, "gelb": 5, "v": [5, 10], "ghaemmaghami": 5, "gottipati": 5, "gulland": 5, "hagmann": 5, "ho": 5, "hogberg": 5, "hu": 5, "hundt": 5, "hurt": 5, "ibarz": 5, "jaffei": 5, "jaworski": 5, "kaplan": 5, "khaitan": 5, "killebrew": 5, "koch": 5, "kumar": 5, "laci": 5, "laudon": 5, "law": 5, "le": 5, "leari": 5, "luck": 5, "lundin": 5, "mackean": 5, "maggior": 5, "mahoni": 5, "miller": 5, "nagarajan": 5, "narayanaswami": 5, "ni": 5, "nix": 5, "norri": 5, "omernick": 5, "penukonda": 5, "phelp": 5, "ross": 5, "salek": 5, "samadiani": 5, "severn": 5, "sizikov": 5, "snelham": 5, "souter": 5, "steinberg": 5, "swing": 5, "tan": 5, "thorson": 5, "tian": 5, "toma": 5, "tuttl": 5, "vasudevan": 5, "walter": 5, "wang": 5, "wilcox": 5, "yoon": 5, "datacent": 5, "analysi": 5, "process": [5, 11], "sigarch": 5, "archit": 5, "vol": [5, 10], "45": 5, "jun": 5, "2017": 5, "yazdanbakhsh": 5, "seshadri": 5, "akin": 5, "convolut": [5, 8, 14], "arxiv": [5, 10], "print": [5, 10, 13], "2102": 5, "10423": 5, "feb": 5, "2021": [5, 10], "liao": 5, "tu": 5, "xia": 5, "zhou": 5, "yuan": 5, "scalabl": 5, "unifi": 5, "ubiquit": 5, "deep": [5, 6, 10], "industri": 5, "track": 5, "paper": [5, 10, 14], "symposium": [5, 10], "hpca": [5, 10], "789": 5, "801": 5, "talp": 5, "sarma": 5, "venkataramanan": 5, "bannon": 5, "mcgee": 5, "floer": 5, "jalot": 5, "hsiong": 5, "arora": 5, "gorti": 5, "sachdev": 5, "solut": 5, "full": 5, "self": [5, 9], "drive": 5, "micro": 5, "40": 5, "25": 5, "35": 5, "2020": [5, 10], "space": [6, 11], "explor": [6, 11], "learn": 6, "bridg": 6, "gap": 6, "decis": 6, "special": 6, "fast": [6, 10], "accur": 6, "analyt": [6, 10], "crucial": 6, "part": [6, 8], "clone": 6, "analyz": [6, 10], "api": [6, 7, 13], "get_hardware_performance_zigzag": 6, "visual": [6, 12], "futur": 6, "contribut": [6, 14], "guidelin": [6, 14], "upgrad": 6, "develop": 6, "idea": 6, "explan": 6, "studi": 6, "extens": 6, "cross": 6, "fuse": 6, "code": [6, 13], "re": 7, "interest": [7, 11], "modif": [7, 9], "directli": 7, "venv": 7, "conda": 7, "environ": 7, "look": [7, 11], "want": [7, 8, 11, 14], "git": 7, "com": 7, "kuleuven": 7, "mica": 7, "http": 7, "anaconda": 7, "argument": [7, 11], "autom": [8, 10], "some": [8, 11, 14], "aspect": [8, 9, 11], "interfac": 8, "core_alloc": [8, 14], "spatial_map": [8, 9, 14], "strategi": [8, 14], "spatialmappinggeneratorstag": [8, 11, 14], "hierarchi": [8, 9, 11, 13], "o": [8, 14], "extra": [8, 11], "flexibl": 8, "scheme": 8, "don": 8, "put": 8, "safe": 8, "bet": 8, "copi": [8, 11], "exact": 8, "detect": 8, "interpret": 9, "predefin": 9, "costmodelevalu": [9, 11, 13], "knowledg": 9, "irrelev": 9, "handl": 9, "complexhandl": 9, "insid": [9, 11, 14], "represent": [9, 11], "invok": 9, "pass": 9, "__simplejsonrepr__": 9, "convert": [9, 11, 14], "off": [9, 10], "load": [9, 14], "reli": 9, "def": 9, "simpl": [9, 11], "energy_tot": 9, "latency_total2": 9, "standard": 9, "filename_pattern": [9, 11], "lose": 9, "etc": [9, 11], "concern": 9, "__jsonrepr__": 9, "temporal_map": 9, "mem_utili_shar": 9, "word_access": 9, "memory_word_access": 9, "operational_energi": 9, "mac_energi": 9, "memory_energi": 9, "mem_energi": 9, "energy_breakdown_per_level": 9, "energy_breakdown": 9, "energy_breakdown_per_level_per_operand": 9, "energy_breakdown_furth": 9, "latency_without_onloading_without_offload": 9, "latency_total0": 9, "latency_with_onloading_without_offload": 9, "latency_total1": 9, "latency_with_onloading_with_offload": 9, "goal": [9, 11], "straightforward": 9, "care": 9, "certain": 9, "modifi": [9, 11], "parser": 9, "pointer": 10, "mei": 10, "houshmand": 10, "jain": 10, "giraldo": 10, "verhelst": 10, "enlarg": 10, "joint": 10, "transact": 10, "70": 10, "1160": 10, "1174": 10, "aug": 10, "doi": 10, "1109": 10, "tc": 10, "3059962": 10, "uniform": 10, "divers": 10, "test": 10, "europ": 10, "exhibit": 10, "date": 10, "antwerp": 10, "belgium": 10, "220": 10, "225": 10, "23919": 10, "date54114": 10, "9774728": 10, "slide": 10, "video": 10, "symon": 10, "base": [10, 11], "3rd": 10, "artifici": 10, "intellig": 10, "aica": 10, "washington": 10, "dc": 10, "usa": 10, "aicas51828": 10, "9458493": 10, "coseman": 10, "papista": 10, "bhattacharje": 10, "deback": 10, "mallik": 10, "verkest": 10, "opportun": 10, "emerg": 10, "analog": 10, "electron": 10, "devic": 10, "meet": 10, "iedm": 10, "san": 10, "francisco": 10, "ca": 10, "29": 10, "iedm13553": 10, "9372006": 10, "accuraci": 10, "trade": 10, "contemporari": 10, "9458553": 10, "colleman": 10, "verelst": 10, "tuytelaar": 10, "processor": 10, "dynam": 10, "ifip": 10, "29th": 10, "veri": 10, "larg": [10, 14], "scale": 10, "vlsi": 10, "soc": 10, "singapor": 10, "soc53125": 10, "9607013": 10, "zhu": 10, "sun": 10, "mobil": 10, "transform": 10, "4th": 10, "incheon": 10, "korea": 10, "republ": 10, "142": 10, "145": 10, "aicas54282": 10, "9869945": 10, "goetschalckx": 10, "enabl": 10, "2023": 10, "karl": 10, "heterogen": 10, "exploit": 10, "fine": 10, "grain": 10, "48550": 10, "2212": 10, "10612": 10, "fasfou": 10, "genet": 10, "date56975": 10, "10137070": 10, "modularli": 11, "easili": 11, "adapt": 11, "sequenc": 11, "determin": 11, "mainstag": 11, "initi": 11, "acceleratorparserstag": 11, "simplesavestag": 11, "workloadstag": 11, "sm": 11, "lomastag": 11, "tm": 11, "costmodelstag": 11, "accelerator_path": 11, "arg": 11, "onnx_model_path": 11, "mapping_path": 11, "loma_lpf_limit": 11, "loma_show_progress_bar": 11, "true": [11, 14], "show": [11, 13], "progress": 11, "bar": [11, 13], "while": 11, "over": 11, "similar": 11, "those": 11, "pipelin": [11, 14], "remain": 11, "said": 11, "further": 11, "label": 11, "below": 11, "fed": 11, "far": 11, "discuss": 11, "last": 11, "revers": 11, "hold": 11, "finish": 11, "conbim": 11, "yield": 11, "chain": 11, "manipul": 11, "invoc": 11, "lowest": 11, "still": 11, "miss": 11, "__init__": 11, "workloadparserstag": 11, "workload_path": 11, "generalparameteriteratorstag": 11, "whose": 11, "predetermin": 11, "plottemporalmappingsstag": 11, "substag": 11, "keep": 11, "minimalenergystag": 11, "list_of_cal": 11, "minimaledpstag": 11, "sumstag": 11, "listifystag": 11, "instead": [11, 14], "removeextrainfostag": 11, "strip": 11, "info": 11, "subcal": 11, "cachebeforeyieldstag": 11, "cach": 11, "break": 11, "top": [11, 13], "bottom": [11, 13], "skipifdumpexistsstag": 11, "check": 11, "alreadi": 11, "skip": 11, "multiprocessingspawnstag": 11, "multiprocess": 11, "multiprocessinggatherstag": 11, "completesavestag": 11, "picklesavestag": 11, "dumpstag": 11, "salsastag": 11, "simul": 11, "anneal": 11, "temporalorderingconversionstag": 11, "spatialmappingconversionstag": 11, "auser": 11, "arrai": 11, "present": [11, 14], "inner": [11, 13], "most": [11, 13], "config": 11, "searchunusedmemorystag": 11, "instanc": 11, "usag": 11, "next": 11, "place": 11, "befor": 11, "workload_data_always_from_top_mem": 11, "fals": [11, 14], "final": [11, 14], "entir": 11, "highest": 11, "travel": 11, "removeunusedmemorystag": 11, "remov": 11, "unus": 11, "accord": 11, "let": 11, "sai": 11, "metric": 11, "easiest": 11, "intend": 11, "behaviour": 11, "guarante": 11, "correct": 11, "taken": 11, "inherit": 11, "abstract": 11, "callabl": 11, "kwarg": 11, "second": 11, "extra_info": 11, "reduct": 11, "statement": 11, "outsid": 11, "happen": 11, "regard": 12, "major": 12, "compon": 12, "termin": 13, "pickle_load": 13, "print_map": 13, "layernode_0": 13, "i2": 13, "i1": 13, "dram": 13, "outer": 13, "innermost": 13, "match": 13, "plot": 13, "bar_plot_cost_model_evaluations_breakdown": 13, "plot_cm": 13, "jpg": 13, "produc": 13, "chart": 13, "recommend": 14, "context": 14, "ml": 14, "often": 14, "recogn": 14, "complet": 14, "conv": 14, "qlinearconv": 14, "matmul": 14, "gemm": 14, "accelerat": 14, "incur": 14, "zero": 14, "feel": 14, "free": 14, "open": 14, "issu": 14, "yourself": 14, "rather": 14, "avoid": 14, "origin": 14, "discard": 14, "doesn": 14, "do": 14, "onnx_model": 14, "modelproto": 14, "my_model_with_internal_data": 14, "save_model": 14, "save_as_external_data": 14, "all_tensors_to_one_fil": 14, "locat": 14, "external_data_filenam": 14, "size_threshold": 14, "convert_attribut": 14, "raw": 14, "specif": 14, "directori": 14, "shape_infer": 14, "my_model": 14, "inferred_model": 14, "infer_shap": 14, "my_inferred_model": 14, "moreov": 14, "repres": 14, "equat": 14, "small": 14, "wherea": 14, "alwai": 14, "freeli": 14, "dimension_rel": 14, "relationship": 14, "stride": 14, "filter": 14, "dilat": 14, "rate": 14, "loop_dim_s": 14, "left": 14, "hand": 14, "operand_precis": 14, "partial": 14, "o_fin": 14, "operand_sourc": 14, "come": 14, "constant_operand": 14, "constant": 14, "prior": 14, "readm": 14, "notat": 14, "batch": 14, "row": 14, "column": 14, "kernel": 14}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"zigzag": [0, 2, 6, 7, 10], "api": 0, "get_hardware_performance_zigzag": 0, "code": [1, 2], "document": [1, 2, 3, 6], "contribut": 2, "guidelin": 2, "upgrad": 2, "project": 2, "version": 2, "develop": 2, "write": 2, "new": [2, 10], "part": 2, "gener": [2, 10], "build": 2, "local": 2, "which": 2, "support": [2, 10, 14], "doxygen": 2, "futur": 3, "chang": 3, "framework": 3, "get": 4, "start": 4, "first": [4, 10], "run": 4, "analyz": 4, "result": [4, 11], "hardwar": [5, 11], "architectur": 5, "oper": [5, 14], "unit": 5, "arrai": 5, "memori": 5, "instanc": 5, "hierarchi": 5, "core": [5, 10], "hw": 5, "acceler": 5, "model": [5, 10, 11, 14], "exampl": 5, "specif": 5, "set": 5, "refer": 5, "welcom": 6, "": [6, 14], "content": 6, "indic": 6, "tabl": 6, "instal": 7, "packag": 7, "manual": [7, 14], "clone": 7, "prerequisit": 7, "map": [8, 10, 11, 13], "user": [8, 12], "defin": 8, "constraint": 8, "output": 9, "simplesavestag": 9, "completesavestag": 9, "creat": [9, 11], "custom": [9, 11], "savestag": 9, "public": 10, "The": [10, 11], "idea": 10, "detail": 10, "latenc": [10, 13], "explan": 10, "tempor": [10, 11, 13], "search": 10, "engin": 10, "differ": 10, "design": 10, "space": 10, "explor": 10, "case": 10, "studi": 10, "extens": 10, "cross": 10, "layer": [10, 14], "depth": 10, "schedul": 10, "multi": 10, "fuse": 10, "stage": 11, "introduct": 11, "main": 11, "entri": 11, "point": 11, "sequenti": 11, "call": 11, "back": 11, "pass": 11, "implement": 11, "input": 11, "parser": 11, "iter": 11, "plot": 11, "reduc": 11, "optim": 11, "save": [11, 14], "dump": 11, "spatial": 11, "cost": 11, "modif": 11, "your": [11, 14], "guid": 12, "visual": 13, "loop": 13, "o": 13, "w": 13, "i": 13, "ox": 13, "0": 13, "14": 13, "sram_2mb": 13, "sram_32kb": 13, "oi": 13, "7": 13, "fx": 13, "11": 13, "rf_2b": 13, "energi": 13, "breakdown": 13, "workload": 14, "onnx": 14, "extern": 14, "data": 14, "infer": 14, "an": 14, "shape": 14, "definit": 14}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 58}, "alltitles": {"ZigZag API": [[0, "zigzag-api"]], "get_hardware_performance_zigzag()": [[0, "get-hardware-performance-zigzag"]], "Code Documentation": [[1, "code-documentation"]], "Contribute": [[2, "contribute"]], "Contributing guidelines": [[2, "contributing-guidelines"]], "Upgrading the project version (for ZigZag developers)": [[2, "upgrading-the-project-version-for-zigzag-developers"]], "Documentation": [[2, "documentation"], [3, "documentation"]], "Writing new parts for the general documentation": [[2, "writing-new-parts-for-the-general-documentation"]], "Building the general documentation locally": [[2, "building-the-general-documentation-locally"]], "Writing code which supports the code documentation with Doxygen": [[2, "writing-code-which-supports-the-code-documentation-with-doxygen"]], "Building the code documentation locally": [[2, "building-the-code-documentation-locally"]], "Future changes": [[3, "future-changes"]], "Framework": [[3, "framework"]], "Getting Started": [[4, "getting-started"]], "First run": [[4, "first-run"]], "Analyzing results": [[4, "analyzing-results"]], "Hardware Architecture": [[5, "hardware-architecture"]], "Operational Unit": [[5, "operational-unit"]], "Operational Array": [[5, "operational-array"]], "Memory Instance": [[5, "memory-instance"]], "Memory Hierarchy": [[5, "memory-hierarchy"]], "Core": [[5, "core"]], "HW Accelerator Model": [[5, "hw-accelerator-model"]], "Modelled examples": [[5, "modelled-examples"]], "Specific settings": [[5, "specific-settings"]], "References": [[5, "references"]], "Welcome to ZigZag\u2019s documentation!": [[6, "welcome-to-zigzag-s-documentation"]], "Contents:": [[6, null]], "Indices and tables": [[6, "indices-and-tables"]], "Installing ZigZag": [[7, "installing-zigzag"]], "Installing as a package": [[7, "installing-as-a-package"]], "Manual clone": [[7, "manual-clone"]], "Prerequisites": [[7, "prerequisites"]], "Installation": [[7, "installation"]], "Mapping": [[8, "mapping"]], "User-defined mapping constraints": [[8, "user-defined-mapping-constraints"]], "Outputs": [[9, "outputs"]], "SimpleSaveStage": [[9, "simplesavestage"]], "CompleteSaveStage": [[9, "completesavestage"]], "Creating a custom SaveStage": [[9, "creating-a-custom-savestage"]], "Publications": [[10, "publications"]], "The general idea of ZigZag": [[10, "the-general-idea-of-zigzag"]], "Detailed latency model explanation": [[10, "detailed-latency-model-explanation"]], "The new temporal mapping search engine": [[10, "the-new-temporal-mapping-search-engine"]], "Different design space exploration case studies": [[10, "different-design-space-exploration-case-studies"]], "Extension to support cross-layer depth-first scheduling": [[10, "extension-to-support-cross-layer-depth-first-scheduling"]], "Extension to support multi-core layer-fused scheduling": [[10, "extension-to-support-multi-core-layer-fused-scheduling"]], "Stages": [[11, "stages"]], "Introduction": [[11, "introduction"]], "The main entry point": [[11, "the-main-entry-point"]], "The sequential call of stages": [[11, "the-sequential-call-of-stages"]], "The back passing of results": [[11, "the-back-passing-of-results"]], "Implemented stages": [[11, "implemented-stages"]], "Input parser stages": [[11, "input-parser-stages"]], "Iterator stage": [[11, "iterator-stage"]], "Plot stages": [[11, "plot-stages"]], "Reduce stages": [[11, "reduce-stages"]], "Optimization stages": [[11, "optimization-stages"]], "Save and dump stages": [[11, "save-and-dump-stages"]], "Temporal mapping stages": [[11, "temporal-mapping-stages"]], "Spatial mapping stages": [[11, "spatial-mapping-stages"]], "Cost model stages": [[11, "cost-model-stages"]], "Hardware modification stages": [[11, "hardware-modification-stages"]], "Creating your custom stage": [[11, "creating-your-custom-stage"]], "User Guide": [[12, "user-guide"]], "Visualization": [[13, "visualization"]], "Temporal mapping": [[13, "temporal-mapping"]], "Temporal Loops O W I": [[13, "temporal-loops-o-w-i"]], "for OX in [0:14) sram_2MB sram_32KB sram_2MB": [[13, "for-ox-in-0-14-sram-2mb-sram-32kb-sram-2mb"]], "for OY in [0:7) sram_2MB sram_32KB sram_2MB": [[13, "for-oy-in-0-7-sram-2mb-sram-32kb-sram-2mb"]], "for FX in [0:11) rf_2B sram_32KB sram_2MB": [[13, "for-fx-in-0-11-rf-2b-sram-32kb-sram-2mb"]], "Energy and latency breakdown": [[13, "energy-and-latency-breakdown"]], "Workload": [[14, "workload"]], "Onnx models": [[14, "onnx-models"]], "Supported onnx operators": [[14, "supported-onnx-operators"]], "Saving your onnx model with external data": [[14, "saving-your-onnx-model-with-external-data"]], "Inferring an onnx model\u2019s shapes": [[14, "inferring-an-onnx-model-s-shapes"]], "Manual layer definition": [[14, "manual-layer-definition"]]}, "indexentries": {}})
\ No newline at end of file
+Search.setIndex({"docnames": ["api", "code-documentation", "contribute", "future", "getting-started", "hardware", "index", "installation", "mapping", "outputs", "publications", "stages", "user-guide", "visualization", "workload"], "filenames": ["api.rst", "code-documentation.rst", "contribute.rst", "future.rst", "getting-started.rst", "hardware.rst", "index.rst", "installation.rst", "mapping.rst", "outputs.rst", "publications.rst", "stages.rst", "user-guide.rst", "visualization.rst", "workload.rst"], "titles": ["ZigZag API", "Code Documentation", "Contribute", "Future changes", "Getting Started", "Hardware Architecture", "Welcome to ZigZag\u2019s documentation!", "Installing ZigZag", "Mapping", "Outputs", "Publications", "Stages", "User Guide", "Visualization", "Workload"], "terms": {"onc": [0, 7], "i": [0, 2, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14], "avail": [0, 2, 5, 7], "your": [0, 2, 7, 8], "python": [0, 2, 4, 5, 7, 14], "path": [0, 4, 14], "you": [0, 2, 3, 4, 5, 7, 8, 9, 11, 14], "can": [0, 2, 3, 4, 5, 7, 8, 9, 11, 13, 14], "import": [0, 2, 4, 5, 13, 14], "ani": [0, 2, 5, 7, 11], "file": [0, 2, 4, 5, 7, 8, 9, 11, 14], "from": [0, 5, 11, 13, 14], "thi": [0, 1, 2, 4, 5, 6, 8, 9, 11, 13, 14], "function": [0, 2, 7, 11, 13], "take": [0, 3, 5, 7, 14], "an": [0, 2, 4, 5, 7, 9, 11], "workload": [0, 4, 6, 8, 11, 12], "hardwar": [0, 4, 6, 8, 12, 14], "architectur": [0, 4, 6, 10, 11, 12], "map": [0, 3, 4, 5, 6, 9, 12], "return": [0, 9, 11], "perform": [0, 5, 10], "execut": [0, 2, 4, 5, 8, 11, 14], "model": [0, 3, 4, 6], "": [0, 2, 3, 4, 5, 8, 10, 11, 13], "layer": [0, 4, 6, 8, 9, 11, 13], "under": [0, 2, 4, 8], "given": [0, 4, 11], "constraint": [0, 4], "energi": [0, 3, 4, 5, 9, 10, 11], "latenc": [0, 3, 4, 5, 6, 9, 11], "cme": [0, 11, 13], "acceler": [0, 4, 6, 8, 9, 10, 11, 14], "opt": 0, "dump_filename_pattern": [0, 4], "output": [0, 4, 5, 6, 11, 12, 13, 14], "datetim": [0, 11], "json": [0, 9, 11], "pickle_filenam": 0, "list_of_cm": [0, 13], "pickl": [0, 11, 13], "The": [0, 1, 2, 4, 5, 6, 8, 9, 12, 13, 14], "input": [0, 3, 4, 5, 8, 9, 14], "ar": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14], "A": [0, 5, 6, 8, 10, 14], "neural": [0, 5, 10], "network": [0, 4, 5, 10], "defin": [0, 4, 5, 9, 10, 11, 14], "onnx": [0, 3, 4, 8, 11], "format": [0, 1, 2, 9], "own": [0, 11, 14], "high": [0, 5, 10], "level": [0, 5, 11, 13], "hw": [0, 3, 4, 6, 8, 11], "descript": [0, 8, 11], "specifi": [0, 5, 9], "core": [0, 4, 6, 8, 11, 13, 14], "alloc": [0, 5, 8, 10, 11, 13], "spatial": [0, 3, 5, 8, 9, 10, 14], "option": [0, 5], "tempor": [0, 3, 4, 6, 9], "order": [0, 2, 3, 5, 10, 11], "memori": [0, 3, 6, 8, 9, 10, 11, 13, 14], "operand": [0, 5, 8, 11, 13, 14], "link": [0, 1, 5, 8, 14], "optim": [0, 4, 6, 10], "target": 0, "It": [0, 4, 9, 11, 14], "edp": [0, 11], "delai": 0, "product": 0, "name": [0, 4, 5, 8, 13, 14], "result": [0, 6, 9, 13], "which": [0, 4, 5, 8, 9, 11, 13, 14], "includ": [0, 2, 3, 5, 9], "all": [0, 2, 4, 5, 9, 11, 14], "detail": [0, 2, 6, 7, 11], "metadata": 0, "analys": 0, "debug": 0, "number": [0, 5, 9], "indic": [0, 14], "overal": 0, "consum": 0, "run": [0, 2, 6, 7, 8, 11], "user": [0, 3, 4, 5, 6, 7, 11], "wai": [0, 1, 2, 4, 5, 9, 11, 13, 14], "cycl": [0, 5], "count": 0, "collect": 0, "cost": [0, 3, 4, 5, 6, 14], "evalu": [0, 5, 11], "stand": 0, "we": [0, 2, 3, 4, 5, 11], "demonstr": [0, 13], "how": [0, 2, 4, 5, 7, 8, 9, 11, 13], "us": [0, 1, 2, 3, 4, 5, 7, 8, 9, 11, 13, 14], "multipl": [0, 5, 6, 10, 11, 13, 14], "demo": 0, "comment": [1, 2], "within": [1, 5, 8, 11, 14], "sourc": [1, 2], "zigzag": [1, 4, 5, 8, 9, 11, 12, 13, 14], "framework": [1, 2, 4, 6, 7, 8, 10, 11, 12, 14], "support": [1, 3, 4, 5, 6, 11], "auto": [1, 5, 10], "doxygen": 1, "automat": [1, 4, 5, 6, 8, 11, 14], "updat": [1, 2, 3, 11], "soon": 1, "somebodi": 1, "push": 1, "someth": 1, "master": 1, "branch": 1, "github": [1, 7, 10], "repo": [1, 14], "project": [1, 6, 10], "follow": [1, 2, 4, 5, 8, 9, 11, 12, 14], "access": [1, 3, 5], "latest": 1, "version": [1, 6, 11], "when": [2, 8, 11, 14], "pleas": [2, 9, 11], "consid": [2, 11], "googl": 2, "style": 2, "guid": [2, 3, 6], "docstr": 2, "class": [2, 3, 11], "method": [2, 5, 9], "exampl": [2, 4, 8, 9, 11, 13, 14], "found": [2, 11, 14], "throughout": 2, "here": [2, 3, 4, 8, 10, 14], "accordingli": 2, "In": [2, 4, 5, 6, 9, 11, 14], "packag": [2, 6], "call": [2, 5, 13], "bumpver": 2, "twine": 2, "These": [2, 11], "instal": [2, 6], "pip": [2, 7], "first": [2, 5, 6, 11], "pull": 2, "make": [2, 3, 5, 7, 9], "sure": 2, "have": [2, 4, 5, 11, 14], "remot": 2, "cahng": 2, "merg": 2, "conflict": 2, "chang": [2, 5, 6, 11], "commit": 2, "Then": [2, 14], "command": [2, 4, 14], "patch": 2, "m": [2, 5, 10], "upload": 2, "dist": 2, "zigzag_ds": 2, "x": [2, 5], "y": [2, 5], "z": [2, 5], "whl": 2, "dse": [2, 6, 7], "tar": 2, "gz": 2, "provid": [2, 4, 5, 6, 7, 8, 11, 12, 14], "sever": 2, "differ": [2, 4, 5, 6, 9, 11, 14], "There": [2, 5, 9], "mani": [2, 5], "public": [2, 6], "relat": 2, "page": [2, 6], "allow": [2, 5, 8, 11], "everyon": 2, "get": [2, 6, 7], "familiar": 2, "more": [2, 3, 4, 5, 7, 11, 12, 14], "about": [2, 9, 11], "implement": 2, "ad": [2, 5, 8], "mandatori": 2, "what": [2, 5, 8, 9, 11], "doe": [2, 5], "achiev": [2, 5], "newli": 2, "explicit": 2, "resid": [2, 11, 13], "doc": 2, "folder": [2, 11], "restructuredtext": 2, "rst": 2, "decid": 2, "would": [2, 14], "best": [2, 11], "fit": 2, "exist": [2, 11], "one": [2, 4, 5, 11], "If": [2, 5, 7, 9, 11, 14], "creat": [2, 8, 13], "lower": [2, 5, 11], "case": [2, 3, 6, 14], "letter": [2, 14], "hyphen": 2, "between": [2, 4, 6, 14], "word": [2, 5], "after": [2, 5, 7, 11], "need": [2, 4, 5, 8, 14], "add": [2, 3, 5, 7, 14], "toctre": 2, "index": [2, 6], "same": [2, 3, 5], "webpag": 2, "sphinx": 2, "should": [2, 5, 8, 9, 14], "both": [2, 3, 5], "press": 2, "theme": 2, "easi": [2, 9], "through": [2, 4, 5, 6, 7, 8, 10, 11, 14], "requir": [2, 4, 5, 7, 8, 9, 11, 14], "txt": [2, 7], "cd": 2, "r": [2, 5, 7], "simpli": [2, 11], "b": [2, 5, 14], "html": 2, "entri": [2, 8], "point": [2, 6], "guidlin": 2, "paramet": [2, 5, 11], "constructor": 2, "download": 2, "describ": [2, 5, 14], "successfulli": 2, "configur": [2, 10], "done": 2, "either": [2, 5], "gui": 2, "conf": 2, "find": [3, 4, 6, 11], "plan": 3, "oper": [3, 8, 11], "ancestor": 3, "layernod": [3, 14], "dummynod": [3, 14], "fix": 3, "loop": [3, 10, 11, 13, 14], "multi": [3, 6], "dimension": 3, "unrol": [3, 5], "fraction": 3, "account": [3, 11, 14], "bandwidth": [3, 5], "loma": [3, 4, 10, 11], "memoryalloc": 3, "besid": [3, 4, 5, 11], "capac": [3, 5], "lpf": 3, "limit": [3, 10], "visualis": 3, "tutori": 3, "remak": 3, "tabl": [3, 5], "without": 3, "df": [3, 5], "stage": [3, 4, 6, 9, 12], "stack": 3, "combin": [3, 5, 8, 9, 11], "common": [3, 5], "versatil": 4, "tool": 4, "estim": [4, 6], "dl": [4, 6], "design": [4, 5, 6, 11], "multitud": 4, "set": [4, 11], "As": [4, 11], "step": [4, 11], "nn": [4, 5, 14], "onto": [4, 6, 8], "go": [4, 5, 13], "alexnet": 4, "ha": [4, 5, 11, 12, 14], "been": 4, "shape": 4, "infer": [4, 5], "mean": [4, 5, 11], "tensor": [4, 5, 14], "intermedi": [4, 5, 14], "inform": [4, 5, 8, 9, 11, 12, 14], "know": [4, 5, 8, 9, 14], "correctli": [4, 5, 14], "tpu": [4, 5], "like": [4, 11, 14], "tpu_lik": 4, "py": [4, 8, 11, 14], "must": [4, 11], "suggest": 4, "resourc": [4, 6, 8], "alexnet_on_tpu_lik": 4, "gener": [4, 5, 6, 8, 9, 11, 12, 13], "ran": 4, "main": [4, 5, 7, 9, 14], "pars": [4, 11, 14], "contain": [4, 8, 14], "program": 4, "flow": [4, 11], "document": [4, 7, 11, 12], "main_onnx": [4, 14], "note": [4, 9], "construct": [4, 5], "becaus": 4, "object": [4, 5, 9, 11, 13, 14], "respect": [4, 5, 9], "modul": [4, 6], "other": [4, 5, 11, 14], "also": [4, 5, 7, 8, 9, 11, 14], "see": [4, 9, 14], "section": [4, 5, 9, 11], "manual": [4, 5, 6, 8, 11], "definit": [4, 8, 9, 11], "resnet18": [4, 8, 14], "salsa": [4, 11], "search": [4, 6], "engin": [4, 6, 11], "util": [4, 9, 13], "schedul": [4, 5, 6, 11], "than": [4, 11], "main_onnx_salsa": 4, "dure": 4, "save": [4, 9], "depend": [4, 7, 14], "total": [4, 5, 11], "five": [4, 12], "each": [4, 5, 9, 11, 13, 14], "node": [4, 8, 9, 11], "onnxmodelparserstag": [4, 8, 11, 14], "wa": 4, "minimallatencystag": [4, 11], "refer": [4, 14], "introduc": 5, "concept": [5, 11], "well": 5, "known": 5, "start": [5, 6, 7, 11], "smallest": 5, "build": [5, 12, 14], "block": [5, 12, 13], "work": [5, 9], "our": [5, 11], "up": [5, 11], "toward": [5, 10], "summat": 5, "accumul": 5, "across": [5, 10, 11], "data": [5, 9, 11], "activ": 5, "train": 5, "weight": [5, 14], "typic": [5, 8], "multipli": 5, "two": [5, 9], "element": [5, 11], "attribut": [5, 9, 14], "input_precis": 5, "list": [5, 11, 13, 14], "precis": [5, 14], "bit": [5, 14], "output_precis": 5, "e": [5, 8, 10, 11, 13, 14], "g": [5, 8, 11, 13], "sum": [5, 11], "energy_cost": 5, "singl": [5, 11], "area": [5, 10], "overhead": 5, "inferenc": 5, "million": 5, "parallel": [5, 8, 14], "significantli": 5, "speed": 5, "comput": [5, 6, 8, 10, 14], "increas": 5, "effici": 5, "cover": 5, "later": [5, 11], "dimens": [5, 11, 14], "size": [5, 14], "explain": [5, 9, 11], "introduct": 5, "operational_unit": 5, "built": 5, "dictionari": [5, 8, 14], "kei": [5, 8], "being": [5, 11], "identifi": 5, "d1": 5, "d2": 5, "valu": [5, 11, 14], "along": 5, "store": 5, "attach": 5, "hierarch": 5, "fashion": 5, "big": 5, "term": 5, "write": [5, 8], "read": [5, 14], "its": [5, 7, 9, 11], "port": 5, "r_bw": 5, "w_bw": 5, "per": 5, "r_cost": 5, "w_cost": 5, "r_port": 5, "w_port": 5, "rw_port": 5, "address": 5, "receiv": [5, 11], "correspond": [5, 11], "For": [5, 11, 13, 14], "now": 5, "assum": [5, 14], "1": [5, 10, 13], "prefetch": 5, "behavior": 5, "thank": 5, "determinist": 5, "dataflow": [5, 10], "min_r_granular": 5, "min_w_granular": 5, "minim": [5, 11], "granular": 5, "better": 5, "half": 5, "quarter": 5, "pattern": [5, 11], "wordlength": 5, "256": 5, "100": 5, "128": 5, "approximatlli": 5, "onli": [5, 9, 11, 14], "50": 5, "spec": [5, 7], "encod": [5, 8], "interconnect": [5, 11], "add_memori": [5, 13], "where": [5, 11, 13, 14], "connect": [5, 11], "higher": [5, 11], "To": [5, 11], "anoth": [5, 11], "decoupl": 5, "algorithm": [5, 6, 8, 10, 14], "side": [5, 14], "oppos": 5, "w": [5, 8, 10, 13], "think": [5, 11], "virtual": [5, 14], "actual": [5, 14], "memory_operand_link": [5, 8, 14], "similarli": 5, "form": 5, "accompani": 5, "served_dimens": 5, "serv": [5, 11], "hot": 5, "tupl": [5, 11], "2": [5, 13], "3": [5, 7], "4": [5, 10], "four": 5, "none": [5, 14], "0": [5, 13], "12": [5, 13], "them": [5, 11], "lastli": 5, "assign": 5, "movement": 5, "possibl": [5, 14], "type": [5, 12, 14], "fh": 5, "th": 5, "low": 5, "fl": 5, "tl": 5, "written": 5, "current": [5, 9], "out": 5, "At": 5, "time": [5, 8], "syntax": 5, "port_typ": 5, "_port_": 5, "port_numb": 5, "rw": 5, "equal": 5, "altern": [5, 7, 14], "default": [5, 8, 11], "intern": [5, 7, 10, 11], "memoryhierarchi": [5, 13], "extend": 5, "networkx": 5, "digraph": 5, "so": [5, 11, 14], "operational_arrai": 5, "new": [5, 6, 11], "memorylevel": 5, "graph": [5, 11, 14], "memory_inst": 5, "memoryinst": [5, 13], "port_alloc": 5, "direction": 5, "abov": 5, "togeth": [5, 14], "id": [5, 8, 14], "memory_hierarchi": 5, "core_set": 5, "compris": 5, "global_buff": 5, "share": 5, "un": 5, "repositori": [5, 7], "5": 5, "dnn": [5, 10], "meta": 5, "prototyp": 5, "edg": [5, 14], "ascend": 5, "tesla": 5, "npu": 5, "depth": [5, 6], "research": 5, "fair": 5, "relev": [5, 9], "comparison": 5, "normal": 5, "1024": [5, 14], "mac": 5, "maxim": 5, "2mb": 5, "global": 5, "buffer": 5, "gb": 5, "kept": 5, "local": 5, "shown": 5, "idx": 5, "7": [5, 13], "9": 5, "variant": 5, "everi": [5, 8], "chip": 5, "denot": 5, "end": [5, 7, 11], "6": [5, 10, 11], "8": [5, 7, 10], "10": [5, 10], "k": [5, 10, 13, 14], "channel": [5, 14], "c": [5, 14], "ox": [5, 13, 14], "oi": [5, 13, 14], "featur": 5, "fx": [5, 13, 14], "fy": [5, 13, 14], "h": [5, 10], "sumbul": [5, 10], "t": [5, 8, 10, 14], "f": 5, "wu": [5, 10], "li": 5, "sarwar": 5, "koven": 5, "murphi": 5, "trotzki": 5, "cai": 5, "ansari": 5, "d": [5, 10], "morri": 5, "liu": [5, 10], "kim": 5, "beign": [5, 10], "lab": 5, "system": [5, 10, 11], "integr": [5, 10], "vr": 5, "custom": [5, 7, 8, 14], "power": 5, "7nm": 5, "technologi": 5, "codec": 5, "avatar": 5, "2022": [5, 10], "ieee": [5, 10], "circuit": [5, 10], "confer": [5, 10], "cicc": 5, "pp": [5, 10], "01": 5, "08": 5, "n": [5, 10], "p": [5, 10], "jouppi": 5, "young": 5, "patil": 5, "patterson": 5, "agraw": 5, "bajwa": 5, "bate": 5, "bhatia": 5, "boden": 5, "borcher": 5, "boyl": 5, "l": [5, 10], "cantin": 5, "chao": 5, "clark": 5, "j": 5, "coriel": 5, "dalei": 5, "dau": 5, "dean": 5, "gelb": 5, "v": [5, 10], "ghaemmaghami": 5, "gottipati": 5, "gulland": 5, "hagmann": 5, "ho": 5, "hogberg": 5, "hu": 5, "hundt": 5, "hurt": 5, "ibarz": 5, "jaffei": 5, "jaworski": 5, "kaplan": 5, "khaitan": 5, "killebrew": 5, "koch": 5, "kumar": 5, "laci": 5, "laudon": 5, "law": 5, "le": 5, "leari": 5, "luck": 5, "lundin": 5, "mackean": 5, "maggior": 5, "mahoni": 5, "miller": 5, "nagarajan": 5, "narayanaswami": 5, "ni": 5, "nix": 5, "norri": 5, "omernick": 5, "penukonda": 5, "phelp": 5, "ross": 5, "salek": 5, "samadiani": 5, "severn": 5, "sizikov": 5, "snelham": 5, "souter": 5, "steinberg": 5, "swing": 5, "tan": 5, "thorson": 5, "tian": 5, "toma": 5, "tuttl": 5, "vasudevan": 5, "walter": 5, "wang": 5, "wilcox": 5, "yoon": 5, "datacent": 5, "analysi": 5, "process": [5, 11], "sigarch": 5, "archit": 5, "vol": [5, 10], "45": 5, "jun": 5, "2017": 5, "yazdanbakhsh": 5, "seshadri": 5, "akin": 5, "convolut": [5, 8, 14], "arxiv": [5, 10], "print": [5, 10, 13], "2102": 5, "10423": 5, "feb": 5, "2021": [5, 10], "liao": 5, "tu": 5, "xia": 5, "zhou": 5, "yuan": 5, "scalabl": 5, "unifi": 5, "ubiquit": 5, "deep": [5, 6, 10], "industri": 5, "track": 5, "paper": [5, 10, 14], "symposium": [5, 10], "hpca": [5, 10], "789": 5, "801": 5, "talp": 5, "sarma": 5, "venkataramanan": 5, "bannon": 5, "mcgee": 5, "floer": 5, "jalot": 5, "hsiong": 5, "arora": 5, "gorti": 5, "sachdev": 5, "solut": 5, "full": 5, "self": [5, 9], "drive": 5, "micro": 5, "40": 5, "25": 5, "35": 5, "2020": [5, 10], "space": [6, 11], "explor": [6, 11], "learn": 6, "bridg": 6, "gap": 6, "decis": 6, "special": 6, "fast": [6, 10], "accur": 6, "analyt": [6, 10], "crucial": 6, "part": [6, 8], "clone": 6, "analyz": [6, 10], "api": [6, 7, 13], "get_hardware_performance_zigzag": 6, "visual": [6, 12], "futur": 6, "contribut": [6, 14], "guidelin": [6, 14], "upgrad": 6, "develop": 6, "idea": 6, "explan": 6, "studi": 6, "extens": 6, "cross": 6, "fuse": 6, "code": [6, 13], "re": 7, "interest": [7, 11], "modif": [7, 9], "directli": 7, "venv": 7, "conda": 7, "environ": 7, "look": [7, 11], "want": [7, 8, 11, 14], "git": 7, "com": 7, "kuleuven": 7, "mica": 7, "http": 7, "anaconda": 7, "argument": [7, 11], "autom": [8, 10], "some": [8, 11, 14], "aspect": [8, 9, 11], "interfac": 8, "core_alloc": [8, 14], "spatial_map": [8, 9, 14], "strategi": [8, 14], "spatialmappinggeneratorstag": [8, 11, 14], "hierarchi": [8, 9, 11, 13], "o": [8, 13, 14], "extra": [8, 11], "flexibl": 8, "scheme": 8, "don": 8, "put": 8, "safe": 8, "bet": 8, "copi": [8, 11], "exact": 8, "detect": 8, "interpret": 9, "predefin": 9, "costmodelevalu": [9, 11, 13], "knowledg": 9, "irrelev": 9, "handl": 9, "complexhandl": 9, "insid": [9, 11, 14], "represent": [9, 11], "invok": 9, "pass": 9, "__simplejsonrepr__": 9, "convert": [9, 11, 14], "off": [9, 10], "load": [9, 14], "reli": 9, "def": 9, "simpl": [9, 11], "energy_tot": 9, "latency_total2": 9, "standard": 9, "filename_pattern": [9, 11], "lose": 9, "etc": [9, 11], "concern": 9, "__jsonrepr__": 9, "temporal_map": 9, "mem_utili_shar": 9, "word_access": 9, "memory_word_access": 9, "operational_energi": 9, "mac_energi": 9, "memory_energi": 9, "mem_energi": 9, "energy_breakdown_per_level": 9, "energy_breakdown": 9, "energy_breakdown_per_level_per_operand": 9, "energy_breakdown_furth": 9, "latency_without_onloading_without_offload": 9, "latency_total0": 9, "latency_with_onloading_without_offload": 9, "latency_total1": 9, "latency_with_onloading_with_offload": 9, "goal": [9, 11], "straightforward": 9, "care": 9, "certain": 9, "modifi": [9, 11], "parser": 9, "pointer": 10, "mei": 10, "houshmand": 10, "jain": 10, "giraldo": 10, "verhelst": 10, "enlarg": 10, "joint": 10, "transact": 10, "70": 10, "1160": 10, "1174": 10, "aug": 10, "doi": 10, "1109": 10, "tc": 10, "3059962": 10, "uniform": 10, "divers": 10, "test": 10, "europ": 10, "exhibit": 10, "date": 10, "antwerp": 10, "belgium": 10, "220": 10, "225": 10, "23919": 10, "date54114": 10, "9774728": 10, "slide": 10, "video": 10, "symon": 10, "base": [10, 11], "3rd": 10, "artifici": 10, "intellig": 10, "aica": 10, "washington": 10, "dc": 10, "usa": 10, "aicas51828": 10, "9458493": 10, "coseman": 10, "papista": 10, "bhattacharje": 10, "deback": 10, "mallik": 10, "verkest": 10, "opportun": 10, "emerg": 10, "analog": 10, "electron": 10, "devic": 10, "meet": 10, "iedm": 10, "san": 10, "francisco": 10, "ca": 10, "29": 10, "iedm13553": 10, "9372006": 10, "accuraci": 10, "trade": 10, "contemporari": 10, "9458553": 10, "colleman": 10, "verelst": 10, "tuytelaar": 10, "processor": 10, "dynam": 10, "ifip": 10, "29th": 10, "veri": 10, "larg": [10, 14], "scale": 10, "vlsi": 10, "soc": 10, "singapor": 10, "soc53125": 10, "9607013": 10, "zhu": 10, "sun": 10, "mobil": 10, "transform": 10, "4th": 10, "incheon": 10, "korea": 10, "republ": 10, "142": 10, "145": 10, "aicas54282": 10, "9869945": 10, "goetschalckx": 10, "enabl": 10, "2023": 10, "karl": 10, "heterogen": 10, "exploit": 10, "fine": 10, "grain": 10, "48550": 10, "2212": 10, "10612": 10, "fasfou": 10, "genet": 10, "date56975": 10, "10137070": 10, "modularli": 11, "easili": 11, "adapt": 11, "sequenc": 11, "determin": 11, "mainstag": 11, "initi": 11, "acceleratorparserstag": 11, "simplesavestag": 11, "workloadstag": 11, "sm": 11, "lomastag": 11, "tm": 11, "costmodelstag": 11, "accelerator_path": 11, "arg": 11, "onnx_model_path": 11, "mapping_path": 11, "loma_lpf_limit": 11, "loma_show_progress_bar": 11, "true": [11, 14], "show": [11, 13], "progress": 11, "bar": [11, 13], "while": 11, "over": 11, "similar": 11, "those": 11, "pipelin": [11, 14], "remain": 11, "said": 11, "further": 11, "label": 11, "below": 11, "fed": 11, "far": 11, "discuss": 11, "last": 11, "revers": 11, "hold": 11, "finish": 11, "conbim": 11, "yield": 11, "chain": 11, "manipul": 11, "invoc": 11, "lowest": 11, "still": 11, "miss": 11, "__init__": 11, "workloadparserstag": 11, "workload_path": 11, "generalparameteriteratorstag": 11, "whose": 11, "predetermin": 11, "plottemporalmappingsstag": 11, "substag": 11, "keep": 11, "minimalenergystag": 11, "list_of_cal": 11, "minimaledpstag": 11, "sumstag": 11, "listifystag": 11, "instead": [11, 14], "removeextrainfostag": 11, "strip": 11, "info": 11, "subcal": 11, "cachebeforeyieldstag": 11, "cach": 11, "break": 11, "top": [11, 13], "bottom": [11, 13], "skipifdumpexistsstag": 11, "check": 11, "alreadi": 11, "skip": 11, "multiprocessingspawnstag": 11, "multiprocess": 11, "multiprocessinggatherstag": 11, "completesavestag": 11, "picklesavestag": 11, "dumpstag": 11, "salsastag": 11, "simul": 11, "anneal": 11, "temporalorderingconversionstag": 11, "spatialmappingconversionstag": 11, "auser": 11, "arrai": 11, "present": [11, 14], "inner": [11, 13], "most": [11, 13], "config": 11, "searchunusedmemorystag": 11, "instanc": 11, "usag": 11, "next": 11, "place": 11, "befor": 11, "workload_data_always_from_top_mem": 11, "fals": [11, 14], "final": [11, 14], "entir": 11, "highest": 11, "travel": 11, "removeunusedmemorystag": 11, "remov": 11, "unus": 11, "accord": 11, "let": 11, "sai": 11, "metric": 11, "easiest": 11, "intend": 11, "behaviour": 11, "guarante": 11, "correct": 11, "taken": 11, "inherit": 11, "abstract": 11, "callabl": 11, "kwarg": 11, "second": 11, "extra_info": 11, "reduct": 11, "statement": 11, "outsid": 11, "happen": 11, "regard": 12, "major": 12, "compon": 12, "termin": 13, "pickle_load": 13, "print_map": 13, "layernode_0": 13, "11": 13, "14": 13, "i2": 13, "i1": 13, "sram_2mb": 13, "dram": 13, "sram_32kb": 13, "rf_2b": 13, "outer": 13, "innermost": 13, "match": 13, "plot": 13, "bar_plot_cost_model_evaluations_breakdown": 13, "plot_cm": 13, "jpg": 13, "produc": 13, "chart": 13, "recommend": 14, "context": 14, "ml": 14, "often": 14, "recogn": 14, "complet": 14, "conv": 14, "qlinearconv": 14, "matmul": 14, "gemm": 14, "accelerat": 14, "incur": 14, "zero": 14, "feel": 14, "free": 14, "open": 14, "issu": 14, "yourself": 14, "rather": 14, "avoid": 14, "origin": 14, "discard": 14, "doesn": 14, "do": 14, "onnx_model": 14, "modelproto": 14, "my_model_with_internal_data": 14, "save_model": 14, "save_as_external_data": 14, "all_tensors_to_one_fil": 14, "locat": 14, "external_data_filenam": 14, "size_threshold": 14, "convert_attribut": 14, "raw": 14, "specif": 14, "directori": 14, "shape_infer": 14, "my_model": 14, "inferred_model": 14, "infer_shap": 14, "my_inferred_model": 14, "moreov": 14, "repres": 14, "equat": 14, "small": 14, "wherea": 14, "alwai": 14, "freeli": 14, "dimension_rel": 14, "relationship": 14, "stride": 14, "filter": 14, "dilat": 14, "rate": 14, "loop_dim_s": 14, "left": 14, "hand": 14, "operand_precis": 14, "partial": 14, "o_fin": 14, "operand_sourc": 14, "come": 14, "constant_operand": 14, "constant": 14, "prior": 14, "readm": 14, "notat": 14, "batch": 14, "row": 14, "column": 14, "kernel": 14}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"zigzag": [0, 2, 6, 7, 10], "api": 0, "get_hardware_performance_zigzag": 0, "code": [1, 2], "document": [1, 2, 3, 6], "contribut": 2, "guidelin": 2, "upgrad": 2, "project": 2, "version": 2, "develop": 2, "write": 2, "new": [2, 10], "part": 2, "gener": [2, 10], "build": 2, "local": 2, "which": 2, "support": [2, 10, 14], "doxygen": 2, "futur": 3, "chang": 3, "framework": 3, "get": 4, "start": 4, "first": [4, 10], "run": 4, "analyz": 4, "result": [4, 11], "hardwar": [5, 11], "architectur": 5, "oper": [5, 14], "unit": 5, "arrai": 5, "memori": 5, "instanc": 5, "hierarchi": 5, "core": [5, 10], "hw": 5, "acceler": 5, "model": [5, 10, 11, 14], "exampl": 5, "specif": 5, "set": 5, "refer": 5, "welcom": 6, "": [6, 14], "content": 6, "indic": 6, "tabl": 6, "instal": 7, "packag": 7, "manual": [7, 14], "clone": 7, "prerequisit": 7, "map": [8, 10, 11, 13], "user": [8, 12], "defin": 8, "constraint": 8, "output": 9, "simplesavestag": 9, "completesavestag": 9, "creat": [9, 11], "custom": [9, 11], "savestag": 9, "public": 10, "The": [10, 11], "idea": 10, "detail": 10, "latenc": [10, 13], "explan": 10, "tempor": [10, 11, 13], "search": 10, "engin": 10, "differ": 10, "design": 10, "space": 10, "explor": 10, "case": 10, "studi": 10, "extens": 10, "cross": 10, "layer": [10, 14], "depth": 10, "schedul": 10, "multi": 10, "fuse": 10, "stage": 11, "introduct": 11, "main": 11, "entri": 11, "point": 11, "sequenti": 11, "call": 11, "back": 11, "pass": 11, "implement": 11, "input": 11, "parser": 11, "iter": 11, "plot": 11, "reduc": 11, "optim": 11, "save": [11, 14], "dump": 11, "spatial": 11, "cost": 11, "modif": 11, "your": [11, 14], "guid": 12, "visual": 13, "energi": 13, "breakdown": 13, "workload": 14, "onnx": 14, "extern": 14, "data": 14, "infer": 14, "an": 14, "shape": 14, "definit": 14}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 58}, "alltitles": {"ZigZag API": [[0, "zigzag-api"]], "get_hardware_performance_zigzag()": [[0, "get-hardware-performance-zigzag"]], "Code Documentation": [[1, "code-documentation"]], "Contribute": [[2, "contribute"]], "Contributing guidelines": [[2, "contributing-guidelines"]], "Upgrading the project version (for ZigZag developers)": [[2, "upgrading-the-project-version-for-zigzag-developers"]], "Documentation": [[2, "documentation"], [3, "documentation"]], "Writing new parts for the general documentation": [[2, "writing-new-parts-for-the-general-documentation"]], "Building the general documentation locally": [[2, "building-the-general-documentation-locally"]], "Writing code which supports the code documentation with Doxygen": [[2, "writing-code-which-supports-the-code-documentation-with-doxygen"]], "Building the code documentation locally": [[2, "building-the-code-documentation-locally"]], "Future changes": [[3, "future-changes"]], "Framework": [[3, "framework"]], "Getting Started": [[4, "getting-started"]], "First run": [[4, "first-run"]], "Analyzing results": [[4, "analyzing-results"]], "Hardware Architecture": [[5, "hardware-architecture"]], "Operational Unit": [[5, "operational-unit"]], "Operational Array": [[5, "operational-array"]], "Memory Instance": [[5, "memory-instance"]], "Memory Hierarchy": [[5, "memory-hierarchy"]], "Core": [[5, "core"]], "HW Accelerator Model": [[5, "hw-accelerator-model"]], "Modelled examples": [[5, "modelled-examples"]], "Specific settings": [[5, "specific-settings"]], "References": [[5, "references"]], "Welcome to ZigZag\u2019s documentation!": [[6, "welcome-to-zigzag-s-documentation"]], "Contents:": [[6, null]], "Indices and tables": [[6, "indices-and-tables"]], "Installing ZigZag": [[7, "installing-zigzag"]], "Installing as a package": [[7, "installing-as-a-package"]], "Manual clone": [[7, "manual-clone"]], "Prerequisites": [[7, "prerequisites"]], "Installation": [[7, "installation"]], "Mapping": [[8, "mapping"]], "User-defined mapping constraints": [[8, "user-defined-mapping-constraints"]], "Outputs": [[9, "outputs"]], "SimpleSaveStage": [[9, "simplesavestage"]], "CompleteSaveStage": [[9, "completesavestage"]], "Creating a custom SaveStage": [[9, "creating-a-custom-savestage"]], "Publications": [[10, "publications"]], "The general idea of ZigZag": [[10, "the-general-idea-of-zigzag"]], "Detailed latency model explanation": [[10, "detailed-latency-model-explanation"]], "The new temporal mapping search engine": [[10, "the-new-temporal-mapping-search-engine"]], "Different design space exploration case studies": [[10, "different-design-space-exploration-case-studies"]], "Extension to support cross-layer depth-first scheduling": [[10, "extension-to-support-cross-layer-depth-first-scheduling"]], "Extension to support multi-core layer-fused scheduling": [[10, "extension-to-support-multi-core-layer-fused-scheduling"]], "Stages": [[11, "stages"]], "Introduction": [[11, "introduction"]], "The main entry point": [[11, "the-main-entry-point"]], "The sequential call of stages": [[11, "the-sequential-call-of-stages"]], "The back passing of results": [[11, "the-back-passing-of-results"]], "Implemented stages": [[11, "implemented-stages"]], "Input parser stages": [[11, "input-parser-stages"]], "Iterator stage": [[11, "iterator-stage"]], "Plot stages": [[11, "plot-stages"]], "Reduce stages": [[11, "reduce-stages"]], "Optimization stages": [[11, "optimization-stages"]], "Save and dump stages": [[11, "save-and-dump-stages"]], "Temporal mapping stages": [[11, "temporal-mapping-stages"]], "Spatial mapping stages": [[11, "spatial-mapping-stages"]], "Cost model stages": [[11, "cost-model-stages"]], "Hardware modification stages": [[11, "hardware-modification-stages"]], "Creating your custom stage": [[11, "creating-your-custom-stage"]], "User Guide": [[12, "user-guide"]], "Visualization": [[13, "visualization"]], "Temporal mapping": [[13, "temporal-mapping"]], "Energy and latency breakdown": [[13, "energy-and-latency-breakdown"]], "Workload": [[14, "workload"]], "Onnx models": [[14, "onnx-models"]], "Supported onnx operators": [[14, "supported-onnx-operators"]], "Saving your onnx model with external data": [[14, "saving-your-onnx-model-with-external-data"]], "Inferring an onnx model\u2019s shapes": [[14, "inferring-an-onnx-model-s-shapes"]], "Manual layer definition": [[14, "manual-layer-definition"]]}, "indexentries": {}})
\ No newline at end of file
diff --git a/visualization.html b/visualization.html
index 9187d2af..6fda3616 100644
--- a/visualization.html
+++ b/visualization.html
@@ -1,9 +1,29 @@
-
Visualization — ZigZag 2.0.0 documentation Skip to content Visualization The generated CostModelEvaluation
object(s) (from e.g. the API call) can be visualized in multiple ways.
Temporal mapping The temporal mapping can be visualized by a function which prints it to the terminal. The code block demonstrates how to use it:
from zigzag.utils import pickle_load
+ Visualization — ZigZag 2.0.0 documentation Skip to content Visualization The generated CostModelEvaluation
object(s) (from e.g. the API call) can be visualized in multiple ways.
Temporal mapping The temporal mapping can be visualized by a function which prints it to the terminal. The code block demonstrates how to use it:
from zigzag.utils import pickle_load
from zigzag.visualization.results.print_mapping import print_mapping
cmes = pickle_load ( "zigzag/visualization/list_of_cmes.pickle" )
cme = cmes [ 0 ]
print_mapping ( cme )
- The function will show the loops of the temporal mapping and for each operand shows at which memory level it resides. For example:
***** Temporal Mapping - CostModelEvaluation(layer=LayerNode_0, core=1) ***** O (O): [[(‘FX’, 11), (‘FY’, 11)], [(‘OY’, 7), (‘OY’, 2), (‘OX’, 14), (‘K’, 12)], []] W (I2): [[], [(‘FX’, 11), (‘FY’, 11), (‘OY’, 7), (‘OY’, 2), (‘OX’, 14)], [(‘K’, 12)]] I (I1): [[(‘FX’, 11), (‘FY’, 11), (‘OY’, 7), (‘OY’, 2), (‘OX’, 14), (‘K’, 12)], []]
Temporal Loops O W I for K in [0:12) sram_2MB dram sram_2MB
for OX in [0:14) sram_2MB sram_32KB sram_2MB for OY in [0:2) sram_2MB sram_32KB sram_2MB
for OY in [0:7) sram_2MB sram_32KB sram_2MB for FY in [0:11) rf_2B sram_32KB sram_2MB
for FX in [0:11) rf_2B sram_32KB sram_2MB The top loop is the outer-most for loop, where as the bottom loop is the inner-most. Going from bottom to top, loops are allocated to the innermost memories of the memory hierarchy for each operand. The names of the memories match the names of the MemoryInstance
object used to create the memory level using the add_memory()
call in the MemoryHierarchy
.
Energy and latency breakdown The energy and latency breakdown of a list of CostModelEvaluation
objects can be plotted using the bar_plot_cost_model_evaluations_breakdown
function:
from zigzag.utils import pickle_load
+ The function will show the loops of the temporal mapping and for each operand shows at which memory level it resides. For example:
********* Temporal Mapping - CostModelEvaluation ( layer = LayerNode_0 , core = 1 ) *********
+O ( O ): [[( 'FX' , 11 ), ( 'FY' , 11 )], [( 'OY' , 7 ), ( 'OY' , 2 ), ( 'OX' , 14 ), ( 'K' , 12 )], []]
+W ( I2 ): [[], [( 'FX' , 11 ), ( 'FY' , 11 ), ( 'OY' , 7 ), ( 'OY' , 2 ), ( 'OX' , 14 )], [( 'K' , 12 )]]
+I ( I1 ): [[( 'FX' , 11 ), ( 'FY' , 11 ), ( 'OY' , 7 ), ( 'OY' , 2 ), ( 'OX' , 14 ), ( 'K' , 12 )], []]
+
+-------------------------------------------------------------------------------------
+Temporal Loops O W I
+-------------------------------------------------------------------------------------
+for K in [ 0 : 12 ) sram_2MB dram sram_2MB
+-------------------------------------------------------------------------------------
+for OX in [ 0 : 14 ) sram_2MB sram_32KB sram_2MB
+-------------------------------------------------------------------------------------
+for OY in [ 0 : 2 ) sram_2MB sram_32KB sram_2MB
+-------------------------------------------------------------------------------------
+ for OY in [ 0 : 7 ) sram_2MB sram_32KB sram_2MB
+-------------------------------------------------------------------------------------
+ for FY in [ 0 : 11 ) rf_2B sram_32KB sram_2MB
+-------------------------------------------------------------------------------------
+ for FX in [ 0 : 11 ) rf_2B sram_32KB sram_2MB
+-------------------------------------------------------------------------------------
+ The top loop is the outer-most for loop, where as the bottom loop is the inner-most. Going from bottom to top, loops are allocated to the innermost memories of the memory hierarchy for each operand. The names of the memories match the names of the MemoryInstance
object used to create the memory level using the add_memory()
call in the MemoryHierarchy
.
Energy and latency breakdown The energy and latency breakdown of a list of CostModelEvaluation
objects can be plotted using the bar_plot_cost_model_evaluations_breakdown
function:
from zigzag.utils import pickle_load
from zigzag.visualization.results.plot_cme import bar_plot_cost_model_evaluations_breakdown
cmes = pickle_load ( "zigzag/visualization/list_of_cmes.pickle" )