diff --git a/.doctrees/environment.pickle b/.doctrees/environment.pickle index ae608b4a..59d857e3 100644 Binary files a/.doctrees/environment.pickle and b/.doctrees/environment.pickle differ diff --git a/.doctrees/visualization.doctree b/.doctrees/visualization.doctree index 1032ebf2..da782b59 100644 Binary files a/.doctrees/visualization.doctree and b/.doctrees/visualization.doctree differ diff --git a/_sources/visualization.rst.txt b/_sources/visualization.rst.txt index c1326274..8bb988b8 100644 --- a/_sources/visualization.rst.txt +++ b/_sources/visualization.rst.txt @@ -21,26 +21,28 @@ The code block demonstrates how to use it: The function will show the loops of the temporal mapping and for each operand shows at which memory level it resides. For example: -********* Temporal Mapping - CostModelEvaluation(layer=LayerNode_0, core=1) ********* - O (O): [[('FX', 11), ('FY', 11)], [('OY', 7), ('OY', 2), ('OX', 14), ('K', 12)], []] - W (I2): [[], [('FX', 11), ('FY', 11), ('OY', 7), ('OY', 2), ('OX', 14)], [('K', 12)]] - I (I1): [[('FX', 11), ('FY', 11), ('OY', 7), ('OY', 2), ('OX', 14), ('K', 12)], []] - -------------------------------------------------------------------------------------- - Temporal Loops O W I -------------------------------------------------------------------------------------- - for K in [0:12) sram_2MB dram sram_2MB -------------------------------------------------------------------------------------- - for OX in [0:14) sram_2MB sram_32KB sram_2MB -------------------------------------------------------------------------------------- - for OY in [0:2) sram_2MB sram_32KB sram_2MB -------------------------------------------------------------------------------------- - for OY in [0:7) sram_2MB sram_32KB sram_2MB -------------------------------------------------------------------------------------- - for FY in [0:11) rf_2B sram_32KB sram_2MB -------------------------------------------------------------------------------------- - for FX in [0:11) rf_2B sram_32KB sram_2MB -------------------------------------------------------------------------------------- +:: + + ********* Temporal Mapping - CostModelEvaluation(layer=LayerNode_0, core=1) ********* + O (O): [[('FX', 11), ('FY', 11)], [('OY', 7), ('OY', 2), ('OX', 14), ('K', 12)], []] + W (I2): [[], [('FX', 11), ('FY', 11), ('OY', 7), ('OY', 2), ('OX', 14)], [('K', 12)]] + I (I1): [[('FX', 11), ('FY', 11), ('OY', 7), ('OY', 2), ('OX', 14), ('K', 12)], []] + + ------------------------------------------------------------------------------------- + Temporal Loops O W I + ------------------------------------------------------------------------------------- + for K in [0:12) sram_2MB dram sram_2MB + ------------------------------------------------------------------------------------- + for OX in [0:14) sram_2MB sram_32KB sram_2MB + ------------------------------------------------------------------------------------- + for OY in [0:2) sram_2MB sram_32KB sram_2MB + ------------------------------------------------------------------------------------- + for OY in [0:7) sram_2MB sram_32KB sram_2MB + ------------------------------------------------------------------------------------- + for FY in [0:11) rf_2B sram_32KB sram_2MB + ------------------------------------------------------------------------------------- + for FX in [0:11) rf_2B sram_32KB sram_2MB + ------------------------------------------------------------------------------------- The top loop is the outer-most for loop, where as the bottom loop is the inner-most. Going from bottom to top, loops are allocated to the innermost memories of the memory hierarchy for each operand. The names of the memories match the names of the ``MemoryInstance`` object used to create the memory level using the ``add_memory()`` call in the ``MemoryHierarchy``. diff --git a/searchindex.js b/searchindex.js index dca83656..3673cd5f 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["api", "code-documentation", "contribute", "future", "getting-started", "hardware", "index", "installation", "mapping", "outputs", "publications", "stages", "user-guide", "visualization", "workload"], "filenames": ["api.rst", "code-documentation.rst", "contribute.rst", "future.rst", "getting-started.rst", "hardware.rst", "index.rst", "installation.rst", "mapping.rst", "outputs.rst", "publications.rst", "stages.rst", "user-guide.rst", "visualization.rst", "workload.rst"], "titles": ["ZigZag API", "Code Documentation", "Contribute", "Future changes", "Getting Started", "Hardware Architecture", "Welcome to ZigZag\u2019s documentation!", "Installing ZigZag", "Mapping", "Outputs", "Publications", "Stages", "User Guide", "Visualization", "Workload"], "terms": {"onc": [0, 7], "i": [0, 2, 4, 5, 6, 7, 8, 9, 10, 11, 14], "avail": [0, 2, 5, 7], "your": [0, 2, 7, 8], "python": [0, 2, 4, 5, 7, 14], "path": [0, 4, 14], "you": [0, 2, 3, 4, 5, 7, 8, 9, 11, 14], "can": [0, 2, 3, 4, 5, 7, 8, 9, 11, 13, 14], "import": [0, 2, 4, 5, 13, 14], "ani": [0, 2, 5, 7, 11], "file": [0, 2, 4, 5, 7, 8, 9, 11, 14], "from": [0, 5, 11, 13, 14], "thi": [0, 1, 2, 4, 5, 6, 8, 9, 11, 13, 14], "function": [0, 2, 7, 11, 13], "take": [0, 3, 5, 7, 14], "an": [0, 2, 4, 5, 7, 9, 11], "workload": [0, 4, 6, 8, 11, 12], "hardwar": [0, 4, 6, 8, 12, 14], "architectur": [0, 4, 6, 10, 11, 12], "map": [0, 3, 4, 5, 6, 9, 12], "return": [0, 9, 11], "perform": [0, 5, 10], "execut": [0, 2, 4, 5, 8, 11, 14], "model": [0, 3, 4, 6], "": [0, 2, 3, 4, 5, 8, 10, 11, 13], "layer": [0, 4, 6, 8, 9, 11, 13], "under": [0, 2, 4, 8], "given": [0, 4, 11], "constraint": [0, 4], "energi": [0, 3, 4, 5, 9, 10, 11], "latenc": [0, 3, 4, 5, 6, 9, 11], "cme": [0, 11, 13], "acceler": [0, 4, 6, 8, 9, 10, 11, 14], "opt": 0, "dump_filename_pattern": [0, 4], "output": [0, 4, 5, 6, 11, 12, 13, 14], "datetim": [0, 11], "json": [0, 9, 11], "pickle_filenam": 0, "list_of_cm": [0, 13], "pickl": [0, 11, 13], "The": [0, 1, 2, 4, 5, 6, 8, 9, 12, 13, 14], "input": [0, 3, 4, 5, 8, 9, 14], "ar": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14], "A": [0, 5, 6, 8, 10, 14], "neural": [0, 5, 10], "network": [0, 4, 5, 10], "defin": [0, 4, 5, 9, 10, 11, 14], "onnx": [0, 3, 4, 8, 11], "format": [0, 1, 2, 9], "own": [0, 11, 14], "high": [0, 5, 10], "level": [0, 5, 11, 13], "hw": [0, 3, 4, 6, 8, 11], "descript": [0, 8, 11], "specifi": [0, 5, 9], "core": [0, 4, 6, 8, 11, 13, 14], "alloc": [0, 5, 8, 10, 11, 13], "spatial": [0, 3, 5, 8, 9, 10, 14], "option": [0, 5], "tempor": [0, 3, 4, 6, 9], "order": [0, 2, 3, 5, 10, 11], "memori": [0, 3, 6, 8, 9, 10, 11, 13, 14], "operand": [0, 5, 8, 11, 13, 14], "link": [0, 1, 5, 8, 14], "optim": [0, 4, 6, 10], "target": 0, "It": [0, 4, 9, 11, 14], "edp": [0, 11], "delai": 0, "product": 0, "name": [0, 4, 5, 8, 13, 14], "result": [0, 6, 9, 13], "which": [0, 4, 5, 8, 9, 11, 13, 14], "includ": [0, 2, 3, 5, 9], "all": [0, 2, 4, 5, 9, 11, 14], "detail": [0, 2, 6, 7, 11], "metadata": 0, "analys": 0, "debug": 0, "number": [0, 5, 9], "indic": [0, 14], "overal": 0, "consum": 0, "run": [0, 2, 6, 7, 8, 11], "user": [0, 3, 4, 5, 6, 7, 11], "wai": [0, 1, 2, 4, 5, 9, 11, 13, 14], "cycl": [0, 5], "count": 0, "collect": 0, "cost": [0, 3, 4, 5, 6, 14], "evalu": [0, 5, 11], "stand": 0, "we": [0, 2, 3, 4, 5, 11], "demonstr": [0, 13], "how": [0, 2, 4, 5, 7, 8, 9, 11, 13], "us": [0, 1, 2, 3, 4, 5, 7, 8, 9, 11, 13, 14], "multipl": [0, 5, 6, 10, 11, 13, 14], "demo": 0, "comment": [1, 2], "within": [1, 5, 8, 11, 14], "sourc": [1, 2], "zigzag": [1, 4, 5, 8, 9, 11, 12, 13, 14], "framework": [1, 2, 4, 6, 7, 8, 10, 11, 12, 14], "support": [1, 3, 4, 5, 6, 11], "auto": [1, 5, 10], "doxygen": 1, "automat": [1, 4, 5, 6, 8, 11, 14], "updat": [1, 2, 3, 11], "soon": 1, "somebodi": 1, "push": 1, "someth": 1, "master": 1, "branch": 1, "github": [1, 7, 10], "repo": [1, 14], "project": [1, 6, 10], "follow": [1, 2, 4, 5, 8, 9, 11, 12, 14], "access": [1, 3, 5], "latest": 1, "version": [1, 6, 11], "when": [2, 8, 11, 14], "pleas": [2, 9, 11], "consid": [2, 11], "googl": 2, "style": 2, "guid": [2, 3, 6], "docstr": 2, "class": [2, 3, 11], "method": [2, 5, 9], "exampl": [2, 4, 8, 9, 11, 13, 14], "found": [2, 11, 14], "throughout": 2, "here": [2, 3, 4, 8, 10, 14], "accordingli": 2, "In": [2, 4, 5, 6, 9, 11, 14], "packag": [2, 6], "call": [2, 5, 13], "bumpver": 2, "twine": 2, "These": [2, 11], "instal": [2, 6], "pip": [2, 7], "first": [2, 5, 6, 11], "pull": 2, "make": [2, 3, 5, 7, 9], "sure": 2, "have": [2, 4, 5, 11, 14], "remot": 2, "cahng": 2, "merg": 2, "conflict": 2, "chang": [2, 5, 6, 11], "commit": 2, "Then": [2, 14], "command": [2, 4, 14], "patch": 2, "m": [2, 5, 10], "upload": 2, "dist": 2, "zigzag_ds": 2, "x": [2, 5], "y": [2, 5], "z": [2, 5], "whl": 2, "dse": [2, 6, 7], "tar": 2, "gz": 2, "provid": [2, 4, 5, 6, 7, 8, 11, 12, 14], "sever": 2, "differ": [2, 4, 5, 6, 9, 11, 14], "There": [2, 5, 9], "mani": [2, 5], "public": [2, 6], "relat": 2, "page": [2, 6], "allow": [2, 5, 8, 11], "everyon": 2, "get": [2, 6, 7], "familiar": 2, "more": [2, 3, 4, 5, 7, 11, 12, 14], "about": [2, 9, 11], "implement": 2, "ad": [2, 5, 8], "mandatori": 2, "what": [2, 5, 8, 9, 11], "doe": [2, 5], "achiev": [2, 5], "newli": 2, "explicit": 2, "resid": [2, 11, 13], "doc": 2, "folder": [2, 11], "restructuredtext": 2, "rst": 2, "decid": 2, "would": [2, 14], "best": [2, 11], "fit": 2, "exist": [2, 11], "one": [2, 4, 5, 11], "If": [2, 5, 7, 9, 11, 14], "creat": [2, 8, 13], "lower": [2, 5, 11], "case": [2, 3, 6, 14], "letter": [2, 14], "hyphen": 2, "between": [2, 4, 6, 14], "word": [2, 5], "after": [2, 5, 7, 11], "need": [2, 4, 5, 8, 14], "add": [2, 3, 5, 7, 14], "toctre": 2, "index": [2, 6], "same": [2, 3, 5], "webpag": 2, "sphinx": 2, "should": [2, 5, 8, 9, 14], "both": [2, 3, 5], "press": 2, "theme": 2, "easi": [2, 9], "through": [2, 4, 5, 6, 7, 8, 10, 11, 14], "requir": [2, 4, 5, 7, 8, 9, 11, 14], "txt": [2, 7], "cd": 2, "r": [2, 5, 7], "simpli": [2, 11], "b": [2, 5, 14], "html": 2, "entri": [2, 8], "point": [2, 6], "guidlin": 2, "paramet": [2, 5, 11], "constructor": 2, "download": 2, "describ": [2, 5, 14], "successfulli": 2, "configur": [2, 10], "done": 2, "either": [2, 5], "gui": 2, "conf": 2, "find": [3, 4, 6, 11], "plan": 3, "oper": [3, 8, 11], "ancestor": 3, "layernod": [3, 14], "dummynod": [3, 14], "fix": 3, "loop": [3, 10, 11, 14], "multi": [3, 6], "dimension": 3, "unrol": [3, 5], "fraction": 3, "account": [3, 11, 14], "bandwidth": [3, 5], "loma": [3, 4, 10, 11], "memoryalloc": 3, "besid": [3, 4, 5, 11], "capac": [3, 5], "lpf": 3, "limit": [3, 10], "visualis": 3, "tutori": 3, "remak": 3, "tabl": [3, 5], "without": 3, "df": [3, 5], "stage": [3, 4, 6, 9, 12], "stack": 3, "combin": [3, 5, 8, 9, 11], "common": [3, 5], "versatil": 4, "tool": 4, "estim": [4, 6], "dl": [4, 6], "design": [4, 5, 6, 11], "multitud": 4, "set": [4, 11], "As": [4, 11], "step": [4, 11], "nn": [4, 5, 14], "onto": [4, 6, 8], "go": [4, 5, 13], "alexnet": 4, "ha": [4, 5, 11, 12, 14], "been": 4, "shape": 4, "infer": [4, 5], "mean": [4, 5, 11], "tensor": [4, 5, 14], "intermedi": [4, 5, 14], "inform": [4, 5, 8, 9, 11, 12, 14], "know": [4, 5, 8, 9, 14], "correctli": [4, 5, 14], "tpu": [4, 5], "like": [4, 11, 14], "tpu_lik": 4, "py": [4, 8, 11, 14], "must": [4, 11], "suggest": 4, "resourc": [4, 6, 8], "alexnet_on_tpu_lik": 4, "gener": [4, 5, 6, 8, 9, 11, 12, 13], "ran": 4, "main": [4, 5, 7, 9, 14], "pars": [4, 11, 14], "contain": [4, 8, 14], "program": 4, "flow": [4, 11], "document": [4, 7, 11, 12], "main_onnx": [4, 14], "note": [4, 9], "construct": [4, 5], "becaus": 4, "object": [4, 5, 9, 11, 13, 14], "respect": [4, 5, 9], "modul": [4, 6], "other": [4, 5, 11, 14], "also": [4, 5, 7, 8, 9, 11, 14], "see": [4, 9, 14], "section": [4, 5, 9, 11], "manual": [4, 5, 6, 8, 11], "definit": [4, 8, 9, 11], "resnet18": [4, 8, 14], "salsa": [4, 11], "search": [4, 6], "engin": [4, 6, 11], "util": [4, 9, 13], "schedul": [4, 5, 6, 11], "than": [4, 11], "main_onnx_salsa": 4, "dure": 4, "save": [4, 9], "depend": [4, 7, 14], "total": [4, 5, 11], "five": [4, 12], "each": [4, 5, 9, 11, 13, 14], "node": [4, 8, 9, 11], "onnxmodelparserstag": [4, 8, 11, 14], "wa": 4, "minimallatencystag": [4, 11], "refer": [4, 14], "introduc": 5, "concept": [5, 11], "well": 5, "known": 5, "start": [5, 6, 7, 11], "smallest": 5, "build": [5, 12, 14], "block": [5, 12, 13], "work": [5, 9], "our": [5, 11], "up": [5, 11], "toward": [5, 10], "summat": 5, "accumul": 5, "across": [5, 10, 11], "data": [5, 9, 11], "activ": 5, "train": 5, "weight": [5, 14], "typic": [5, 8], "multipli": 5, "two": [5, 9], "element": [5, 11], "attribut": [5, 9, 14], "input_precis": 5, "list": [5, 11, 13, 14], "precis": [5, 14], "bit": [5, 14], "output_precis": 5, "e": [5, 8, 10, 11, 13, 14], "g": [5, 8, 11, 13], "sum": [5, 11], "energy_cost": 5, "singl": [5, 11], "area": [5, 10], "overhead": 5, "inferenc": 5, "million": 5, "parallel": [5, 8, 14], "significantli": 5, "speed": 5, "comput": [5, 6, 8, 10, 14], "increas": 5, "effici": 5, "cover": 5, "later": [5, 11], "dimens": [5, 11, 14], "size": [5, 14], "explain": [5, 9, 11], "introduct": 5, "operational_unit": 5, "built": 5, "dictionari": [5, 8, 14], "kei": [5, 8], "being": [5, 11], "identifi": 5, "d1": 5, "d2": 5, "valu": [5, 11, 14], "along": 5, "store": 5, "attach": 5, "hierarch": 5, "fashion": 5, "big": 5, "term": 5, "write": [5, 8], "read": [5, 14], "its": [5, 7, 9, 11], "port": 5, "r_bw": 5, "w_bw": 5, "per": 5, "r_cost": 5, "w_cost": 5, "r_port": 5, "w_port": 5, "rw_port": 5, "address": 5, "receiv": [5, 11], "correspond": [5, 11], "For": [5, 11, 13, 14], "now": 5, "assum": [5, 14], "1": [5, 10, 13], "prefetch": 5, "behavior": 5, "thank": 5, "determinist": 5, "dataflow": [5, 10], "min_r_granular": 5, "min_w_granular": 5, "minim": [5, 11], "granular": 5, "better": 5, "half": 5, "quarter": 5, "pattern": [5, 11], "wordlength": 5, "256": 5, "100": 5, "128": 5, "approximatlli": 5, "onli": [5, 9, 11, 14], "50": 5, "spec": [5, 7], "encod": [5, 8], "interconnect": [5, 11], "add_memori": [5, 13], "where": [5, 11, 13, 14], "connect": [5, 11], "higher": [5, 11], "To": [5, 11], "anoth": [5, 11], "decoupl": 5, "algorithm": [5, 6, 8, 10, 14], "side": [5, 14], "oppos": 5, "w": [5, 8, 10], "think": [5, 11], "virtual": [5, 14], "actual": [5, 14], "memory_operand_link": [5, 8, 14], "similarli": 5, "form": 5, "accompani": 5, "served_dimens": 5, "serv": [5, 11], "hot": 5, "tupl": [5, 11], "2": [5, 13], "3": [5, 7], "4": [5, 10], "four": 5, "none": [5, 14], "0": 5, "12": [5, 13], "them": [5, 11], "lastli": 5, "assign": 5, "movement": 5, "possibl": [5, 14], "type": [5, 12, 14], "fh": 5, "th": 5, "low": 5, "fl": 5, "tl": 5, "written": 5, "current": [5, 9], "out": 5, "At": 5, "time": [5, 8], "syntax": 5, "port_typ": 5, "_port_": 5, "port_numb": 5, "rw": 5, "equal": 5, "altern": [5, 7, 14], "default": [5, 8, 11], "intern": [5, 7, 10, 11], "memoryhierarchi": [5, 13], "extend": 5, "networkx": 5, "digraph": 5, "so": [5, 11, 14], "operational_arrai": 5, "new": [5, 6, 11], "memorylevel": 5, "graph": [5, 11, 14], "memory_inst": 5, "memoryinst": [5, 13], "port_alloc": 5, "direction": 5, "abov": 5, "togeth": [5, 14], "id": [5, 8, 14], "memory_hierarchi": 5, "core_set": 5, "compris": 5, "global_buff": 5, "share": 5, "un": 5, "repositori": [5, 7], "5": 5, "dnn": [5, 10], "meta": 5, "prototyp": 5, "edg": [5, 14], "ascend": 5, "tesla": 5, "npu": 5, "depth": [5, 6], "research": 5, "fair": 5, "relev": [5, 9], "comparison": 5, "normal": 5, "1024": [5, 14], "mac": 5, "maxim": 5, "2mb": 5, "global": 5, "buffer": 5, "gb": 5, "kept": 5, "local": 5, "shown": 5, "idx": 5, "7": 5, "9": 5, "variant": 5, "everi": [5, 8], "chip": 5, "denot": 5, "end": [5, 7, 11], "6": [5, 10, 11], "8": [5, 7, 10], "10": [5, 10], "k": [5, 10, 13, 14], "channel": [5, 14], "c": [5, 14], "ox": [5, 14], "oi": [5, 14], "featur": 5, "fx": [5, 14], "fy": [5, 13, 14], "h": [5, 10], "sumbul": [5, 10], "t": [5, 8, 10, 14], "f": 5, "wu": [5, 10], "li": 5, "sarwar": 5, "koven": 5, "murphi": 5, "trotzki": 5, "cai": 5, "ansari": 5, "d": [5, 10], "morri": 5, "liu": [5, 10], "kim": 5, "beign": [5, 10], "lab": 5, "system": [5, 10, 11], "integr": [5, 10], "vr": 5, "custom": [5, 7, 8, 14], "power": 5, "7nm": 5, "technologi": 5, "codec": 5, "avatar": 5, "2022": [5, 10], "ieee": [5, 10], "circuit": [5, 10], "confer": [5, 10], "cicc": 5, "pp": [5, 10], "01": 5, "08": 5, "n": [5, 10], "p": [5, 10], "jouppi": 5, "young": 5, "patil": 5, "patterson": 5, "agraw": 5, "bajwa": 5, "bate": 5, "bhatia": 5, "boden": 5, "borcher": 5, "boyl": 5, "l": [5, 10], "cantin": 5, "chao": 5, "clark": 5, "j": 5, "coriel": 5, "dalei": 5, "dau": 5, "dean": 5, "gelb": 5, "v": [5, 10], "ghaemmaghami": 5, "gottipati": 5, "gulland": 5, "hagmann": 5, "ho": 5, "hogberg": 5, "hu": 5, "hundt": 5, "hurt": 5, "ibarz": 5, "jaffei": 5, "jaworski": 5, "kaplan": 5, "khaitan": 5, "killebrew": 5, "koch": 5, "kumar": 5, "laci": 5, "laudon": 5, "law": 5, "le": 5, "leari": 5, "luck": 5, "lundin": 5, "mackean": 5, "maggior": 5, "mahoni": 5, "miller": 5, "nagarajan": 5, "narayanaswami": 5, "ni": 5, "nix": 5, "norri": 5, "omernick": 5, "penukonda": 5, "phelp": 5, "ross": 5, "salek": 5, "samadiani": 5, "severn": 5, "sizikov": 5, "snelham": 5, "souter": 5, "steinberg": 5, "swing": 5, "tan": 5, "thorson": 5, "tian": 5, "toma": 5, "tuttl": 5, "vasudevan": 5, "walter": 5, "wang": 5, "wilcox": 5, "yoon": 5, "datacent": 5, "analysi": 5, "process": [5, 11], "sigarch": 5, "archit": 5, "vol": [5, 10], "45": 5, "jun": 5, "2017": 5, "yazdanbakhsh": 5, "seshadri": 5, "akin": 5, "convolut": [5, 8, 14], "arxiv": [5, 10], "print": [5, 10, 13], "2102": 5, "10423": 5, "feb": 5, "2021": [5, 10], "liao": 5, "tu": 5, "xia": 5, "zhou": 5, "yuan": 5, "scalabl": 5, "unifi": 5, "ubiquit": 5, "deep": [5, 6, 10], "industri": 5, "track": 5, "paper": [5, 10, 14], "symposium": [5, 10], "hpca": [5, 10], "789": 5, "801": 5, "talp": 5, "sarma": 5, "venkataramanan": 5, "bannon": 5, "mcgee": 5, "floer": 5, "jalot": 5, "hsiong": 5, "arora": 5, "gorti": 5, "sachdev": 5, "solut": 5, "full": 5, "self": [5, 9], "drive": 5, "micro": 5, "40": 5, "25": 5, "35": 5, "2020": [5, 10], "space": [6, 11], "explor": [6, 11], "learn": 6, "bridg": 6, "gap": 6, "decis": 6, "special": 6, "fast": [6, 10], "accur": 6, "analyt": [6, 10], "crucial": 6, "part": [6, 8], "clone": 6, "analyz": [6, 10], "api": [6, 7, 13], "get_hardware_performance_zigzag": 6, "visual": [6, 12], "futur": 6, "contribut": [6, 14], "guidelin": [6, 14], "upgrad": 6, "develop": 6, "idea": 6, "explan": 6, "studi": 6, "extens": 6, "cross": 6, "fuse": 6, "code": [6, 13], "re": 7, "interest": [7, 11], "modif": [7, 9], "directli": 7, "venv": 7, "conda": 7, "environ": 7, "look": [7, 11], "want": [7, 8, 11, 14], "git": 7, "com": 7, "kuleuven": 7, "mica": 7, "http": 7, "anaconda": 7, "argument": [7, 11], "autom": [8, 10], "some": [8, 11, 14], "aspect": [8, 9, 11], "interfac": 8, "core_alloc": [8, 14], "spatial_map": [8, 9, 14], "strategi": [8, 14], "spatialmappinggeneratorstag": [8, 11, 14], "hierarchi": [8, 9, 11, 13], "o": [8, 14], "extra": [8, 11], "flexibl": 8, "scheme": 8, "don": 8, "put": 8, "safe": 8, "bet": 8, "copi": [8, 11], "exact": 8, "detect": 8, "interpret": 9, "predefin": 9, "costmodelevalu": [9, 11, 13], "knowledg": 9, "irrelev": 9, "handl": 9, "complexhandl": 9, "insid": [9, 11, 14], "represent": [9, 11], "invok": 9, "pass": 9, "__simplejsonrepr__": 9, "convert": [9, 11, 14], "off": [9, 10], "load": [9, 14], "reli": 9, "def": 9, "simpl": [9, 11], "energy_tot": 9, "latency_total2": 9, "standard": 9, "filename_pattern": [9, 11], "lose": 9, "etc": [9, 11], "concern": 9, "__jsonrepr__": 9, "temporal_map": 9, "mem_utili_shar": 9, "word_access": 9, "memory_word_access": 9, "operational_energi": 9, "mac_energi": 9, "memory_energi": 9, "mem_energi": 9, "energy_breakdown_per_level": 9, "energy_breakdown": 9, "energy_breakdown_per_level_per_operand": 9, "energy_breakdown_furth": 9, "latency_without_onloading_without_offload": 9, "latency_total0": 9, "latency_with_onloading_without_offload": 9, "latency_total1": 9, "latency_with_onloading_with_offload": 9, "goal": [9, 11], "straightforward": 9, "care": 9, "certain": 9, "modifi": [9, 11], "parser": 9, "pointer": 10, "mei": 10, "houshmand": 10, "jain": 10, "giraldo": 10, "verhelst": 10, "enlarg": 10, "joint": 10, "transact": 10, "70": 10, "1160": 10, "1174": 10, "aug": 10, "doi": 10, "1109": 10, "tc": 10, "3059962": 10, "uniform": 10, "divers": 10, "test": 10, "europ": 10, "exhibit": 10, "date": 10, "antwerp": 10, "belgium": 10, "220": 10, "225": 10, "23919": 10, "date54114": 10, "9774728": 10, "slide": 10, "video": 10, "symon": 10, "base": [10, 11], "3rd": 10, "artifici": 10, "intellig": 10, "aica": 10, "washington": 10, "dc": 10, "usa": 10, "aicas51828": 10, "9458493": 10, "coseman": 10, "papista": 10, "bhattacharje": 10, "deback": 10, "mallik": 10, "verkest": 10, "opportun": 10, "emerg": 10, "analog": 10, "electron": 10, "devic": 10, "meet": 10, "iedm": 10, "san": 10, "francisco": 10, "ca": 10, "29": 10, "iedm13553": 10, "9372006": 10, "accuraci": 10, "trade": 10, "contemporari": 10, "9458553": 10, "colleman": 10, "verelst": 10, "tuytelaar": 10, "processor": 10, "dynam": 10, "ifip": 10, "29th": 10, "veri": 10, "larg": [10, 14], "scale": 10, "vlsi": 10, "soc": 10, "singapor": 10, "soc53125": 10, "9607013": 10, "zhu": 10, "sun": 10, "mobil": 10, "transform": 10, "4th": 10, "incheon": 10, "korea": 10, "republ": 10, "142": 10, "145": 10, "aicas54282": 10, "9869945": 10, "goetschalckx": 10, "enabl": 10, "2023": 10, "karl": 10, "heterogen": 10, "exploit": 10, "fine": 10, "grain": 10, "48550": 10, "2212": 10, "10612": 10, "fasfou": 10, "genet": 10, "date56975": 10, "10137070": 10, "modularli": 11, "easili": 11, "adapt": 11, "sequenc": 11, "determin": 11, "mainstag": 11, "initi": 11, "acceleratorparserstag": 11, "simplesavestag": 11, "workloadstag": 11, "sm": 11, "lomastag": 11, "tm": 11, "costmodelstag": 11, "accelerator_path": 11, "arg": 11, "onnx_model_path": 11, "mapping_path": 11, "loma_lpf_limit": 11, "loma_show_progress_bar": 11, "true": [11, 14], "show": [11, 13], "progress": 11, "bar": [11, 13], "while": 11, "over": 11, "similar": 11, "those": 11, "pipelin": [11, 14], "remain": 11, "said": 11, "further": 11, "label": 11, "below": 11, "fed": 11, "far": 11, "discuss": 11, "last": 11, "revers": 11, "hold": 11, "finish": 11, "conbim": 11, "yield": 11, "chain": 11, "manipul": 11, "invoc": 11, "lowest": 11, "still": 11, "miss": 11, "__init__": 11, "workloadparserstag": 11, "workload_path": 11, "generalparameteriteratorstag": 11, "whose": 11, "predetermin": 11, "plottemporalmappingsstag": 11, "substag": 11, "keep": 11, "minimalenergystag": 11, "list_of_cal": 11, "minimaledpstag": 11, "sumstag": 11, "listifystag": 11, "instead": [11, 14], "removeextrainfostag": 11, "strip": 11, "info": 11, "subcal": 11, "cachebeforeyieldstag": 11, "cach": 11, "break": 11, "top": [11, 13], "bottom": [11, 13], "skipifdumpexistsstag": 11, "check": 11, "alreadi": 11, "skip": 11, "multiprocessingspawnstag": 11, "multiprocess": 11, "multiprocessinggatherstag": 11, "completesavestag": 11, "picklesavestag": 11, "dumpstag": 11, "salsastag": 11, "simul": 11, "anneal": 11, "temporalorderingconversionstag": 11, "spatialmappingconversionstag": 11, "auser": 11, "arrai": 11, "present": [11, 14], "inner": [11, 13], "most": [11, 13], "config": 11, "searchunusedmemorystag": 11, "instanc": 11, "usag": 11, "next": 11, "place": 11, "befor": 11, "workload_data_always_from_top_mem": 11, "fals": [11, 14], "final": [11, 14], "entir": 11, "highest": 11, "travel": 11, "removeunusedmemorystag": 11, "remov": 11, "unus": 11, "accord": 11, "let": 11, "sai": 11, "metric": 11, "easiest": 11, "intend": 11, "behaviour": 11, "guarante": 11, "correct": 11, "taken": 11, "inherit": 11, "abstract": 11, "callabl": 11, "kwarg": 11, "second": 11, "extra_info": 11, "reduct": 11, "statement": 11, "outsid": 11, "happen": 11, "regard": 12, "major": 12, "compon": 12, "termin": 13, "pickle_load": 13, "print_map": 13, "layernode_0": 13, "i2": 13, "i1": 13, "dram": 13, "outer": 13, "innermost": 13, "match": 13, "plot": 13, "bar_plot_cost_model_evaluations_breakdown": 13, "plot_cm": 13, "jpg": 13, "produc": 13, "chart": 13, "recommend": 14, "context": 14, "ml": 14, "often": 14, "recogn": 14, "complet": 14, "conv": 14, "qlinearconv": 14, "matmul": 14, "gemm": 14, "accelerat": 14, "incur": 14, "zero": 14, "feel": 14, "free": 14, "open": 14, "issu": 14, "yourself": 14, "rather": 14, "avoid": 14, "origin": 14, "discard": 14, "doesn": 14, "do": 14, "onnx_model": 14, "modelproto": 14, "my_model_with_internal_data": 14, "save_model": 14, "save_as_external_data": 14, "all_tensors_to_one_fil": 14, "locat": 14, "external_data_filenam": 14, "size_threshold": 14, "convert_attribut": 14, "raw": 14, "specif": 14, "directori": 14, "shape_infer": 14, "my_model": 14, "inferred_model": 14, "infer_shap": 14, "my_inferred_model": 14, "moreov": 14, "repres": 14, "equat": 14, "small": 14, "wherea": 14, "alwai": 14, "freeli": 14, "dimension_rel": 14, "relationship": 14, "stride": 14, "filter": 14, "dilat": 14, "rate": 14, "loop_dim_s": 14, "left": 14, "hand": 14, "operand_precis": 14, "partial": 14, "o_fin": 14, "operand_sourc": 14, "come": 14, "constant_operand": 14, "constant": 14, "prior": 14, "readm": 14, "notat": 14, "batch": 14, "row": 14, "column": 14, "kernel": 14}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"zigzag": [0, 2, 6, 7, 10], "api": 0, "get_hardware_performance_zigzag": 0, "code": [1, 2], "document": [1, 2, 3, 6], "contribut": 2, "guidelin": 2, "upgrad": 2, "project": 2, "version": 2, "develop": 2, "write": 2, "new": [2, 10], "part": 2, "gener": [2, 10], "build": 2, "local": 2, "which": 2, "support": [2, 10, 14], "doxygen": 2, "futur": 3, "chang": 3, "framework": 3, "get": 4, "start": 4, "first": [4, 10], "run": 4, "analyz": 4, "result": [4, 11], "hardwar": [5, 11], "architectur": 5, "oper": [5, 14], "unit": 5, "arrai": 5, "memori": 5, "instanc": 5, "hierarchi": 5, "core": [5, 10], "hw": 5, "acceler": 5, "model": [5, 10, 11, 14], "exampl": 5, "specif": 5, "set": 5, "refer": 5, "welcom": 6, "": [6, 14], "content": 6, "indic": 6, "tabl": 6, "instal": 7, "packag": 7, "manual": [7, 14], "clone": 7, "prerequisit": 7, "map": [8, 10, 11, 13], "user": [8, 12], "defin": 8, "constraint": 8, "output": 9, "simplesavestag": 9, "completesavestag": 9, "creat": [9, 11], "custom": [9, 11], "savestag": 9, "public": 10, "The": [10, 11], "idea": 10, "detail": 10, "latenc": [10, 13], "explan": 10, "tempor": [10, 11, 13], "search": 10, "engin": 10, "differ": 10, "design": 10, "space": 10, "explor": 10, "case": 10, "studi": 10, "extens": 10, "cross": 10, "layer": [10, 14], "depth": 10, "schedul": 10, "multi": 10, "fuse": 10, "stage": 11, "introduct": 11, "main": 11, "entri": 11, "point": 11, "sequenti": 11, "call": 11, "back": 11, "pass": 11, "implement": 11, "input": 11, "parser": 11, "iter": 11, "plot": 11, "reduc": 11, "optim": 11, "save": [11, 14], "dump": 11, "spatial": 11, "cost": 11, "modif": 11, "your": [11, 14], "guid": 12, "visual": 13, "loop": 13, "o": 13, "w": 13, "i": 13, "ox": 13, "0": 13, "14": 13, "sram_2mb": 13, "sram_32kb": 13, "oi": 13, "7": 13, "fx": 13, "11": 13, "rf_2b": 13, "energi": 13, "breakdown": 13, "workload": 14, "onnx": 14, "extern": 14, "data": 14, "infer": 14, "an": 14, "shape": 14, "definit": 14}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 58}, "alltitles": {"ZigZag API": [[0, "zigzag-api"]], "get_hardware_performance_zigzag()": [[0, "get-hardware-performance-zigzag"]], "Code Documentation": [[1, "code-documentation"]], "Contribute": [[2, "contribute"]], "Contributing guidelines": [[2, "contributing-guidelines"]], "Upgrading the project version (for ZigZag developers)": [[2, "upgrading-the-project-version-for-zigzag-developers"]], "Documentation": [[2, "documentation"], [3, "documentation"]], "Writing new parts for the general documentation": [[2, "writing-new-parts-for-the-general-documentation"]], "Building the general documentation locally": [[2, "building-the-general-documentation-locally"]], "Writing code which supports the code documentation with Doxygen": [[2, "writing-code-which-supports-the-code-documentation-with-doxygen"]], "Building the code documentation locally": [[2, "building-the-code-documentation-locally"]], "Future changes": [[3, "future-changes"]], "Framework": [[3, "framework"]], "Getting Started": [[4, "getting-started"]], "First run": [[4, "first-run"]], "Analyzing results": [[4, "analyzing-results"]], "Hardware Architecture": [[5, "hardware-architecture"]], "Operational Unit": [[5, "operational-unit"]], "Operational Array": [[5, "operational-array"]], "Memory Instance": [[5, "memory-instance"]], "Memory Hierarchy": [[5, "memory-hierarchy"]], "Core": [[5, "core"]], "HW Accelerator Model": [[5, "hw-accelerator-model"]], "Modelled examples": [[5, "modelled-examples"]], "Specific settings": [[5, "specific-settings"]], "References": [[5, "references"]], "Welcome to ZigZag\u2019s documentation!": [[6, "welcome-to-zigzag-s-documentation"]], "Contents:": [[6, null]], "Indices and tables": [[6, "indices-and-tables"]], "Installing ZigZag": [[7, "installing-zigzag"]], "Installing as a package": [[7, "installing-as-a-package"]], "Manual clone": [[7, "manual-clone"]], "Prerequisites": [[7, "prerequisites"]], "Installation": [[7, "installation"]], "Mapping": [[8, "mapping"]], "User-defined mapping constraints": [[8, "user-defined-mapping-constraints"]], "Outputs": [[9, "outputs"]], "SimpleSaveStage": [[9, "simplesavestage"]], "CompleteSaveStage": [[9, "completesavestage"]], "Creating a custom SaveStage": [[9, "creating-a-custom-savestage"]], "Publications": [[10, "publications"]], "The general idea of ZigZag": [[10, "the-general-idea-of-zigzag"]], "Detailed latency model explanation": [[10, "detailed-latency-model-explanation"]], "The new temporal mapping search engine": [[10, "the-new-temporal-mapping-search-engine"]], "Different design space exploration case studies": [[10, "different-design-space-exploration-case-studies"]], "Extension to support cross-layer depth-first scheduling": [[10, "extension-to-support-cross-layer-depth-first-scheduling"]], "Extension to support multi-core layer-fused scheduling": [[10, "extension-to-support-multi-core-layer-fused-scheduling"]], "Stages": [[11, "stages"]], "Introduction": [[11, "introduction"]], "The main entry point": [[11, "the-main-entry-point"]], "The sequential call of stages": [[11, "the-sequential-call-of-stages"]], "The back passing of results": [[11, "the-back-passing-of-results"]], "Implemented stages": [[11, "implemented-stages"]], "Input parser stages": [[11, "input-parser-stages"]], "Iterator stage": [[11, "iterator-stage"]], "Plot stages": [[11, "plot-stages"]], "Reduce stages": [[11, "reduce-stages"]], "Optimization stages": [[11, "optimization-stages"]], "Save and dump stages": [[11, "save-and-dump-stages"]], "Temporal mapping stages": [[11, "temporal-mapping-stages"]], "Spatial mapping stages": [[11, "spatial-mapping-stages"]], "Cost model stages": [[11, "cost-model-stages"]], "Hardware modification stages": [[11, "hardware-modification-stages"]], "Creating your custom stage": [[11, "creating-your-custom-stage"]], "User Guide": [[12, "user-guide"]], "Visualization": [[13, "visualization"]], "Temporal mapping": [[13, "temporal-mapping"]], "Temporal Loops O W I": [[13, "temporal-loops-o-w-i"]], "for OX in [0:14) sram_2MB sram_32KB sram_2MB": [[13, "for-ox-in-0-14-sram-2mb-sram-32kb-sram-2mb"]], "for OY in [0:7) sram_2MB sram_32KB sram_2MB": [[13, "for-oy-in-0-7-sram-2mb-sram-32kb-sram-2mb"]], "for FX in [0:11) rf_2B sram_32KB sram_2MB": [[13, "for-fx-in-0-11-rf-2b-sram-32kb-sram-2mb"]], "Energy and latency breakdown": [[13, "energy-and-latency-breakdown"]], "Workload": [[14, "workload"]], "Onnx models": [[14, "onnx-models"]], "Supported onnx operators": [[14, "supported-onnx-operators"]], "Saving your onnx model with external data": [[14, "saving-your-onnx-model-with-external-data"]], "Inferring an onnx model\u2019s shapes": [[14, "inferring-an-onnx-model-s-shapes"]], "Manual layer definition": [[14, "manual-layer-definition"]]}, "indexentries": {}}) \ No newline at end of file +Search.setIndex({"docnames": ["api", "code-documentation", "contribute", "future", "getting-started", "hardware", "index", "installation", "mapping", "outputs", "publications", "stages", "user-guide", "visualization", "workload"], "filenames": ["api.rst", "code-documentation.rst", "contribute.rst", "future.rst", "getting-started.rst", "hardware.rst", "index.rst", "installation.rst", "mapping.rst", "outputs.rst", "publications.rst", "stages.rst", "user-guide.rst", "visualization.rst", "workload.rst"], "titles": ["ZigZag API", "Code Documentation", "Contribute", "Future changes", "Getting Started", "Hardware Architecture", "Welcome to ZigZag\u2019s documentation!", "Installing ZigZag", "Mapping", "Outputs", "Publications", "Stages", "User Guide", "Visualization", "Workload"], "terms": {"onc": [0, 7], "i": [0, 2, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14], "avail": [0, 2, 5, 7], "your": [0, 2, 7, 8], "python": [0, 2, 4, 5, 7, 14], "path": [0, 4, 14], "you": [0, 2, 3, 4, 5, 7, 8, 9, 11, 14], "can": [0, 2, 3, 4, 5, 7, 8, 9, 11, 13, 14], "import": [0, 2, 4, 5, 13, 14], "ani": [0, 2, 5, 7, 11], "file": [0, 2, 4, 5, 7, 8, 9, 11, 14], "from": [0, 5, 11, 13, 14], "thi": [0, 1, 2, 4, 5, 6, 8, 9, 11, 13, 14], "function": [0, 2, 7, 11, 13], "take": [0, 3, 5, 7, 14], "an": [0, 2, 4, 5, 7, 9, 11], "workload": [0, 4, 6, 8, 11, 12], "hardwar": [0, 4, 6, 8, 12, 14], "architectur": [0, 4, 6, 10, 11, 12], "map": [0, 3, 4, 5, 6, 9, 12], "return": [0, 9, 11], "perform": [0, 5, 10], "execut": [0, 2, 4, 5, 8, 11, 14], "model": [0, 3, 4, 6], "": [0, 2, 3, 4, 5, 8, 10, 11, 13], "layer": [0, 4, 6, 8, 9, 11, 13], "under": [0, 2, 4, 8], "given": [0, 4, 11], "constraint": [0, 4], "energi": [0, 3, 4, 5, 9, 10, 11], "latenc": [0, 3, 4, 5, 6, 9, 11], "cme": [0, 11, 13], "acceler": [0, 4, 6, 8, 9, 10, 11, 14], "opt": 0, "dump_filename_pattern": [0, 4], "output": [0, 4, 5, 6, 11, 12, 13, 14], "datetim": [0, 11], "json": [0, 9, 11], "pickle_filenam": 0, "list_of_cm": [0, 13], "pickl": [0, 11, 13], "The": [0, 1, 2, 4, 5, 6, 8, 9, 12, 13, 14], "input": [0, 3, 4, 5, 8, 9, 14], "ar": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14], "A": [0, 5, 6, 8, 10, 14], "neural": [0, 5, 10], "network": [0, 4, 5, 10], "defin": [0, 4, 5, 9, 10, 11, 14], "onnx": [0, 3, 4, 8, 11], "format": [0, 1, 2, 9], "own": [0, 11, 14], "high": [0, 5, 10], "level": [0, 5, 11, 13], "hw": [0, 3, 4, 6, 8, 11], "descript": [0, 8, 11], "specifi": [0, 5, 9], "core": [0, 4, 6, 8, 11, 13, 14], "alloc": [0, 5, 8, 10, 11, 13], "spatial": [0, 3, 5, 8, 9, 10, 14], "option": [0, 5], "tempor": [0, 3, 4, 6, 9], "order": [0, 2, 3, 5, 10, 11], "memori": [0, 3, 6, 8, 9, 10, 11, 13, 14], "operand": [0, 5, 8, 11, 13, 14], "link": [0, 1, 5, 8, 14], "optim": [0, 4, 6, 10], "target": 0, "It": [0, 4, 9, 11, 14], "edp": [0, 11], "delai": 0, "product": 0, "name": [0, 4, 5, 8, 13, 14], "result": [0, 6, 9, 13], "which": [0, 4, 5, 8, 9, 11, 13, 14], "includ": [0, 2, 3, 5, 9], "all": [0, 2, 4, 5, 9, 11, 14], "detail": [0, 2, 6, 7, 11], "metadata": 0, "analys": 0, "debug": 0, "number": [0, 5, 9], "indic": [0, 14], "overal": 0, "consum": 0, "run": [0, 2, 6, 7, 8, 11], "user": [0, 3, 4, 5, 6, 7, 11], "wai": [0, 1, 2, 4, 5, 9, 11, 13, 14], "cycl": [0, 5], "count": 0, "collect": 0, "cost": [0, 3, 4, 5, 6, 14], "evalu": [0, 5, 11], "stand": 0, "we": [0, 2, 3, 4, 5, 11], "demonstr": [0, 13], "how": [0, 2, 4, 5, 7, 8, 9, 11, 13], "us": [0, 1, 2, 3, 4, 5, 7, 8, 9, 11, 13, 14], "multipl": [0, 5, 6, 10, 11, 13, 14], "demo": 0, "comment": [1, 2], "within": [1, 5, 8, 11, 14], "sourc": [1, 2], "zigzag": [1, 4, 5, 8, 9, 11, 12, 13, 14], "framework": [1, 2, 4, 6, 7, 8, 10, 11, 12, 14], "support": [1, 3, 4, 5, 6, 11], "auto": [1, 5, 10], "doxygen": 1, "automat": [1, 4, 5, 6, 8, 11, 14], "updat": [1, 2, 3, 11], "soon": 1, "somebodi": 1, "push": 1, "someth": 1, "master": 1, "branch": 1, "github": [1, 7, 10], "repo": [1, 14], "project": [1, 6, 10], "follow": [1, 2, 4, 5, 8, 9, 11, 12, 14], "access": [1, 3, 5], "latest": 1, "version": [1, 6, 11], "when": [2, 8, 11, 14], "pleas": [2, 9, 11], "consid": [2, 11], "googl": 2, "style": 2, "guid": [2, 3, 6], "docstr": 2, "class": [2, 3, 11], "method": [2, 5, 9], "exampl": [2, 4, 8, 9, 11, 13, 14], "found": [2, 11, 14], "throughout": 2, "here": [2, 3, 4, 8, 10, 14], "accordingli": 2, "In": [2, 4, 5, 6, 9, 11, 14], "packag": [2, 6], "call": [2, 5, 13], "bumpver": 2, "twine": 2, "These": [2, 11], "instal": [2, 6], "pip": [2, 7], "first": [2, 5, 6, 11], "pull": 2, "make": [2, 3, 5, 7, 9], "sure": 2, "have": [2, 4, 5, 11, 14], "remot": 2, "cahng": 2, "merg": 2, "conflict": 2, "chang": [2, 5, 6, 11], "commit": 2, "Then": [2, 14], "command": [2, 4, 14], "patch": 2, "m": [2, 5, 10], "upload": 2, "dist": 2, "zigzag_ds": 2, "x": [2, 5], "y": [2, 5], "z": [2, 5], "whl": 2, "dse": [2, 6, 7], "tar": 2, "gz": 2, "provid": [2, 4, 5, 6, 7, 8, 11, 12, 14], "sever": 2, "differ": [2, 4, 5, 6, 9, 11, 14], "There": [2, 5, 9], "mani": [2, 5], "public": [2, 6], "relat": 2, "page": [2, 6], "allow": [2, 5, 8, 11], "everyon": 2, "get": [2, 6, 7], "familiar": 2, "more": [2, 3, 4, 5, 7, 11, 12, 14], "about": [2, 9, 11], "implement": 2, "ad": [2, 5, 8], "mandatori": 2, "what": [2, 5, 8, 9, 11], "doe": [2, 5], "achiev": [2, 5], "newli": 2, "explicit": 2, "resid": [2, 11, 13], "doc": 2, "folder": [2, 11], "restructuredtext": 2, "rst": 2, "decid": 2, "would": [2, 14], "best": [2, 11], "fit": 2, "exist": [2, 11], "one": [2, 4, 5, 11], "If": [2, 5, 7, 9, 11, 14], "creat": [2, 8, 13], "lower": [2, 5, 11], "case": [2, 3, 6, 14], "letter": [2, 14], "hyphen": 2, "between": [2, 4, 6, 14], "word": [2, 5], "after": [2, 5, 7, 11], "need": [2, 4, 5, 8, 14], "add": [2, 3, 5, 7, 14], "toctre": 2, "index": [2, 6], "same": [2, 3, 5], "webpag": 2, "sphinx": 2, "should": [2, 5, 8, 9, 14], "both": [2, 3, 5], "press": 2, "theme": 2, "easi": [2, 9], "through": [2, 4, 5, 6, 7, 8, 10, 11, 14], "requir": [2, 4, 5, 7, 8, 9, 11, 14], "txt": [2, 7], "cd": 2, "r": [2, 5, 7], "simpli": [2, 11], "b": [2, 5, 14], "html": 2, "entri": [2, 8], "point": [2, 6], "guidlin": 2, "paramet": [2, 5, 11], "constructor": 2, "download": 2, "describ": [2, 5, 14], "successfulli": 2, "configur": [2, 10], "done": 2, "either": [2, 5], "gui": 2, "conf": 2, "find": [3, 4, 6, 11], "plan": 3, "oper": [3, 8, 11], "ancestor": 3, "layernod": [3, 14], "dummynod": [3, 14], "fix": 3, "loop": [3, 10, 11, 13, 14], "multi": [3, 6], "dimension": 3, "unrol": [3, 5], "fraction": 3, "account": [3, 11, 14], "bandwidth": [3, 5], "loma": [3, 4, 10, 11], "memoryalloc": 3, "besid": [3, 4, 5, 11], "capac": [3, 5], "lpf": 3, "limit": [3, 10], "visualis": 3, "tutori": 3, "remak": 3, "tabl": [3, 5], "without": 3, "df": [3, 5], "stage": [3, 4, 6, 9, 12], "stack": 3, "combin": [3, 5, 8, 9, 11], "common": [3, 5], "versatil": 4, "tool": 4, "estim": [4, 6], "dl": [4, 6], "design": [4, 5, 6, 11], "multitud": 4, "set": [4, 11], "As": [4, 11], "step": [4, 11], "nn": [4, 5, 14], "onto": [4, 6, 8], "go": [4, 5, 13], "alexnet": 4, "ha": [4, 5, 11, 12, 14], "been": 4, "shape": 4, "infer": [4, 5], "mean": [4, 5, 11], "tensor": [4, 5, 14], "intermedi": [4, 5, 14], "inform": [4, 5, 8, 9, 11, 12, 14], "know": [4, 5, 8, 9, 14], "correctli": [4, 5, 14], "tpu": [4, 5], "like": [4, 11, 14], "tpu_lik": 4, "py": [4, 8, 11, 14], "must": [4, 11], "suggest": 4, "resourc": [4, 6, 8], "alexnet_on_tpu_lik": 4, "gener": [4, 5, 6, 8, 9, 11, 12, 13], "ran": 4, "main": [4, 5, 7, 9, 14], "pars": [4, 11, 14], "contain": [4, 8, 14], "program": 4, "flow": [4, 11], "document": [4, 7, 11, 12], "main_onnx": [4, 14], "note": [4, 9], "construct": [4, 5], "becaus": 4, "object": [4, 5, 9, 11, 13, 14], "respect": [4, 5, 9], "modul": [4, 6], "other": [4, 5, 11, 14], "also": [4, 5, 7, 8, 9, 11, 14], "see": [4, 9, 14], "section": [4, 5, 9, 11], "manual": [4, 5, 6, 8, 11], "definit": [4, 8, 9, 11], "resnet18": [4, 8, 14], "salsa": [4, 11], "search": [4, 6], "engin": [4, 6, 11], "util": [4, 9, 13], "schedul": [4, 5, 6, 11], "than": [4, 11], "main_onnx_salsa": 4, "dure": 4, "save": [4, 9], "depend": [4, 7, 14], "total": [4, 5, 11], "five": [4, 12], "each": [4, 5, 9, 11, 13, 14], "node": [4, 8, 9, 11], "onnxmodelparserstag": [4, 8, 11, 14], "wa": 4, "minimallatencystag": [4, 11], "refer": [4, 14], "introduc": 5, "concept": [5, 11], "well": 5, "known": 5, "start": [5, 6, 7, 11], "smallest": 5, "build": [5, 12, 14], "block": [5, 12, 13], "work": [5, 9], "our": [5, 11], "up": [5, 11], "toward": [5, 10], "summat": 5, "accumul": 5, "across": [5, 10, 11], "data": [5, 9, 11], "activ": 5, "train": 5, "weight": [5, 14], "typic": [5, 8], "multipli": 5, "two": [5, 9], "element": [5, 11], "attribut": [5, 9, 14], "input_precis": 5, "list": [5, 11, 13, 14], "precis": [5, 14], "bit": [5, 14], "output_precis": 5, "e": [5, 8, 10, 11, 13, 14], "g": [5, 8, 11, 13], "sum": [5, 11], "energy_cost": 5, "singl": [5, 11], "area": [5, 10], "overhead": 5, "inferenc": 5, "million": 5, "parallel": [5, 8, 14], "significantli": 5, "speed": 5, "comput": [5, 6, 8, 10, 14], "increas": 5, "effici": 5, "cover": 5, "later": [5, 11], "dimens": [5, 11, 14], "size": [5, 14], "explain": [5, 9, 11], "introduct": 5, "operational_unit": 5, "built": 5, "dictionari": [5, 8, 14], "kei": [5, 8], "being": [5, 11], "identifi": 5, "d1": 5, "d2": 5, "valu": [5, 11, 14], "along": 5, "store": 5, "attach": 5, "hierarch": 5, "fashion": 5, "big": 5, "term": 5, "write": [5, 8], "read": [5, 14], "its": [5, 7, 9, 11], "port": 5, "r_bw": 5, "w_bw": 5, "per": 5, "r_cost": 5, "w_cost": 5, "r_port": 5, "w_port": 5, "rw_port": 5, "address": 5, "receiv": [5, 11], "correspond": [5, 11], "For": [5, 11, 13, 14], "now": 5, "assum": [5, 14], "1": [5, 10, 13], "prefetch": 5, "behavior": 5, "thank": 5, "determinist": 5, "dataflow": [5, 10], "min_r_granular": 5, "min_w_granular": 5, "minim": [5, 11], "granular": 5, "better": 5, "half": 5, "quarter": 5, "pattern": [5, 11], "wordlength": 5, "256": 5, "100": 5, "128": 5, "approximatlli": 5, "onli": [5, 9, 11, 14], "50": 5, "spec": [5, 7], "encod": [5, 8], "interconnect": [5, 11], "add_memori": [5, 13], "where": [5, 11, 13, 14], "connect": [5, 11], "higher": [5, 11], "To": [5, 11], "anoth": [5, 11], "decoupl": 5, "algorithm": [5, 6, 8, 10, 14], "side": [5, 14], "oppos": 5, "w": [5, 8, 10, 13], "think": [5, 11], "virtual": [5, 14], "actual": [5, 14], "memory_operand_link": [5, 8, 14], "similarli": 5, "form": 5, "accompani": 5, "served_dimens": 5, "serv": [5, 11], "hot": 5, "tupl": [5, 11], "2": [5, 13], "3": [5, 7], "4": [5, 10], "four": 5, "none": [5, 14], "0": [5, 13], "12": [5, 13], "them": [5, 11], "lastli": 5, "assign": 5, "movement": 5, "possibl": [5, 14], "type": [5, 12, 14], "fh": 5, "th": 5, "low": 5, "fl": 5, "tl": 5, "written": 5, "current": [5, 9], "out": 5, "At": 5, "time": [5, 8], "syntax": 5, "port_typ": 5, "_port_": 5, "port_numb": 5, "rw": 5, "equal": 5, "altern": [5, 7, 14], "default": [5, 8, 11], "intern": [5, 7, 10, 11], "memoryhierarchi": [5, 13], "extend": 5, "networkx": 5, "digraph": 5, "so": [5, 11, 14], "operational_arrai": 5, "new": [5, 6, 11], "memorylevel": 5, "graph": [5, 11, 14], "memory_inst": 5, "memoryinst": [5, 13], "port_alloc": 5, "direction": 5, "abov": 5, "togeth": [5, 14], "id": [5, 8, 14], "memory_hierarchi": 5, "core_set": 5, "compris": 5, "global_buff": 5, "share": 5, "un": 5, "repositori": [5, 7], "5": 5, "dnn": [5, 10], "meta": 5, "prototyp": 5, "edg": [5, 14], "ascend": 5, "tesla": 5, "npu": 5, "depth": [5, 6], "research": 5, "fair": 5, "relev": [5, 9], "comparison": 5, "normal": 5, "1024": [5, 14], "mac": 5, "maxim": 5, "2mb": 5, "global": 5, "buffer": 5, "gb": 5, "kept": 5, "local": 5, "shown": 5, "idx": 5, "7": [5, 13], "9": 5, "variant": 5, "everi": [5, 8], "chip": 5, "denot": 5, "end": [5, 7, 11], "6": [5, 10, 11], "8": [5, 7, 10], "10": [5, 10], "k": [5, 10, 13, 14], "channel": [5, 14], "c": [5, 14], "ox": [5, 13, 14], "oi": [5, 13, 14], "featur": 5, "fx": [5, 13, 14], "fy": [5, 13, 14], "h": [5, 10], "sumbul": [5, 10], "t": [5, 8, 10, 14], "f": 5, "wu": [5, 10], "li": 5, "sarwar": 5, "koven": 5, "murphi": 5, "trotzki": 5, "cai": 5, "ansari": 5, "d": [5, 10], "morri": 5, "liu": [5, 10], "kim": 5, "beign": [5, 10], "lab": 5, "system": [5, 10, 11], "integr": [5, 10], "vr": 5, "custom": [5, 7, 8, 14], "power": 5, "7nm": 5, "technologi": 5, "codec": 5, "avatar": 5, "2022": [5, 10], "ieee": [5, 10], "circuit": [5, 10], "confer": [5, 10], "cicc": 5, "pp": [5, 10], "01": 5, "08": 5, "n": [5, 10], "p": [5, 10], "jouppi": 5, "young": 5, "patil": 5, "patterson": 5, "agraw": 5, "bajwa": 5, "bate": 5, "bhatia": 5, "boden": 5, "borcher": 5, "boyl": 5, "l": [5, 10], "cantin": 5, "chao": 5, "clark": 5, "j": 5, "coriel": 5, "dalei": 5, "dau": 5, "dean": 5, "gelb": 5, "v": [5, 10], "ghaemmaghami": 5, "gottipati": 5, "gulland": 5, "hagmann": 5, "ho": 5, "hogberg": 5, "hu": 5, "hundt": 5, "hurt": 5, "ibarz": 5, "jaffei": 5, "jaworski": 5, "kaplan": 5, "khaitan": 5, "killebrew": 5, "koch": 5, "kumar": 5, "laci": 5, "laudon": 5, "law": 5, "le": 5, "leari": 5, "luck": 5, "lundin": 5, "mackean": 5, "maggior": 5, "mahoni": 5, "miller": 5, "nagarajan": 5, "narayanaswami": 5, "ni": 5, "nix": 5, "norri": 5, "omernick": 5, "penukonda": 5, "phelp": 5, "ross": 5, "salek": 5, "samadiani": 5, "severn": 5, "sizikov": 5, "snelham": 5, "souter": 5, "steinberg": 5, "swing": 5, "tan": 5, "thorson": 5, "tian": 5, "toma": 5, "tuttl": 5, "vasudevan": 5, "walter": 5, "wang": 5, "wilcox": 5, "yoon": 5, "datacent": 5, "analysi": 5, "process": [5, 11], "sigarch": 5, "archit": 5, "vol": [5, 10], "45": 5, "jun": 5, "2017": 5, "yazdanbakhsh": 5, "seshadri": 5, "akin": 5, "convolut": [5, 8, 14], "arxiv": [5, 10], "print": [5, 10, 13], "2102": 5, "10423": 5, "feb": 5, "2021": [5, 10], "liao": 5, "tu": 5, "xia": 5, "zhou": 5, "yuan": 5, "scalabl": 5, "unifi": 5, "ubiquit": 5, "deep": [5, 6, 10], "industri": 5, "track": 5, "paper": [5, 10, 14], "symposium": [5, 10], "hpca": [5, 10], "789": 5, "801": 5, "talp": 5, "sarma": 5, "venkataramanan": 5, "bannon": 5, "mcgee": 5, "floer": 5, "jalot": 5, "hsiong": 5, "arora": 5, "gorti": 5, "sachdev": 5, "solut": 5, "full": 5, "self": [5, 9], "drive": 5, "micro": 5, "40": 5, "25": 5, "35": 5, "2020": [5, 10], "space": [6, 11], "explor": [6, 11], "learn": 6, "bridg": 6, "gap": 6, "decis": 6, "special": 6, "fast": [6, 10], "accur": 6, "analyt": [6, 10], "crucial": 6, "part": [6, 8], "clone": 6, "analyz": [6, 10], "api": [6, 7, 13], "get_hardware_performance_zigzag": 6, "visual": [6, 12], "futur": 6, "contribut": [6, 14], "guidelin": [6, 14], "upgrad": 6, "develop": 6, "idea": 6, "explan": 6, "studi": 6, "extens": 6, "cross": 6, "fuse": 6, "code": [6, 13], "re": 7, "interest": [7, 11], "modif": [7, 9], "directli": 7, "venv": 7, "conda": 7, "environ": 7, "look": [7, 11], "want": [7, 8, 11, 14], "git": 7, "com": 7, "kuleuven": 7, "mica": 7, "http": 7, "anaconda": 7, "argument": [7, 11], "autom": [8, 10], "some": [8, 11, 14], "aspect": [8, 9, 11], "interfac": 8, "core_alloc": [8, 14], "spatial_map": [8, 9, 14], "strategi": [8, 14], "spatialmappinggeneratorstag": [8, 11, 14], "hierarchi": [8, 9, 11, 13], "o": [8, 13, 14], "extra": [8, 11], "flexibl": 8, "scheme": 8, "don": 8, "put": 8, "safe": 8, "bet": 8, "copi": [8, 11], "exact": 8, "detect": 8, "interpret": 9, "predefin": 9, "costmodelevalu": [9, 11, 13], "knowledg": 9, "irrelev": 9, "handl": 9, "complexhandl": 9, "insid": [9, 11, 14], "represent": [9, 11], "invok": 9, "pass": 9, "__simplejsonrepr__": 9, "convert": [9, 11, 14], "off": [9, 10], "load": [9, 14], "reli": 9, "def": 9, "simpl": [9, 11], "energy_tot": 9, "latency_total2": 9, "standard": 9, "filename_pattern": [9, 11], "lose": 9, "etc": [9, 11], "concern": 9, "__jsonrepr__": 9, "temporal_map": 9, "mem_utili_shar": 9, "word_access": 9, "memory_word_access": 9, "operational_energi": 9, "mac_energi": 9, "memory_energi": 9, "mem_energi": 9, "energy_breakdown_per_level": 9, "energy_breakdown": 9, "energy_breakdown_per_level_per_operand": 9, "energy_breakdown_furth": 9, "latency_without_onloading_without_offload": 9, "latency_total0": 9, "latency_with_onloading_without_offload": 9, "latency_total1": 9, "latency_with_onloading_with_offload": 9, "goal": [9, 11], "straightforward": 9, "care": 9, "certain": 9, "modifi": [9, 11], "parser": 9, "pointer": 10, "mei": 10, "houshmand": 10, "jain": 10, "giraldo": 10, "verhelst": 10, "enlarg": 10, "joint": 10, "transact": 10, "70": 10, "1160": 10, "1174": 10, "aug": 10, "doi": 10, "1109": 10, "tc": 10, "3059962": 10, "uniform": 10, "divers": 10, "test": 10, "europ": 10, "exhibit": 10, "date": 10, "antwerp": 10, "belgium": 10, "220": 10, "225": 10, "23919": 10, "date54114": 10, "9774728": 10, "slide": 10, "video": 10, "symon": 10, "base": [10, 11], "3rd": 10, "artifici": 10, "intellig": 10, "aica": 10, "washington": 10, "dc": 10, "usa": 10, "aicas51828": 10, "9458493": 10, "coseman": 10, "papista": 10, "bhattacharje": 10, "deback": 10, "mallik": 10, "verkest": 10, "opportun": 10, "emerg": 10, "analog": 10, "electron": 10, "devic": 10, "meet": 10, "iedm": 10, "san": 10, "francisco": 10, "ca": 10, "29": 10, "iedm13553": 10, "9372006": 10, "accuraci": 10, "trade": 10, "contemporari": 10, "9458553": 10, "colleman": 10, "verelst": 10, "tuytelaar": 10, "processor": 10, "dynam": 10, "ifip": 10, "29th": 10, "veri": 10, "larg": [10, 14], "scale": 10, "vlsi": 10, "soc": 10, "singapor": 10, "soc53125": 10, "9607013": 10, "zhu": 10, "sun": 10, "mobil": 10, "transform": 10, "4th": 10, "incheon": 10, "korea": 10, "republ": 10, "142": 10, "145": 10, "aicas54282": 10, "9869945": 10, "goetschalckx": 10, "enabl": 10, "2023": 10, "karl": 10, "heterogen": 10, "exploit": 10, "fine": 10, "grain": 10, "48550": 10, "2212": 10, "10612": 10, "fasfou": 10, "genet": 10, "date56975": 10, "10137070": 10, "modularli": 11, "easili": 11, "adapt": 11, "sequenc": 11, "determin": 11, "mainstag": 11, "initi": 11, "acceleratorparserstag": 11, "simplesavestag": 11, "workloadstag": 11, "sm": 11, "lomastag": 11, "tm": 11, "costmodelstag": 11, "accelerator_path": 11, "arg": 11, "onnx_model_path": 11, "mapping_path": 11, "loma_lpf_limit": 11, "loma_show_progress_bar": 11, "true": [11, 14], "show": [11, 13], "progress": 11, "bar": [11, 13], "while": 11, "over": 11, "similar": 11, "those": 11, "pipelin": [11, 14], "remain": 11, "said": 11, "further": 11, "label": 11, "below": 11, "fed": 11, "far": 11, "discuss": 11, "last": 11, "revers": 11, "hold": 11, "finish": 11, "conbim": 11, "yield": 11, "chain": 11, "manipul": 11, "invoc": 11, "lowest": 11, "still": 11, "miss": 11, "__init__": 11, "workloadparserstag": 11, "workload_path": 11, "generalparameteriteratorstag": 11, "whose": 11, "predetermin": 11, "plottemporalmappingsstag": 11, "substag": 11, "keep": 11, "minimalenergystag": 11, "list_of_cal": 11, "minimaledpstag": 11, "sumstag": 11, "listifystag": 11, "instead": [11, 14], "removeextrainfostag": 11, "strip": 11, "info": 11, "subcal": 11, "cachebeforeyieldstag": 11, "cach": 11, "break": 11, "top": [11, 13], "bottom": [11, 13], "skipifdumpexistsstag": 11, "check": 11, "alreadi": 11, "skip": 11, "multiprocessingspawnstag": 11, "multiprocess": 11, "multiprocessinggatherstag": 11, "completesavestag": 11, "picklesavestag": 11, "dumpstag": 11, "salsastag": 11, "simul": 11, "anneal": 11, "temporalorderingconversionstag": 11, "spatialmappingconversionstag": 11, "auser": 11, "arrai": 11, "present": [11, 14], "inner": [11, 13], "most": [11, 13], "config": 11, "searchunusedmemorystag": 11, "instanc": 11, "usag": 11, "next": 11, "place": 11, "befor": 11, "workload_data_always_from_top_mem": 11, "fals": [11, 14], "final": [11, 14], "entir": 11, "highest": 11, "travel": 11, "removeunusedmemorystag": 11, "remov": 11, "unus": 11, "accord": 11, "let": 11, "sai": 11, "metric": 11, "easiest": 11, "intend": 11, "behaviour": 11, "guarante": 11, "correct": 11, "taken": 11, "inherit": 11, "abstract": 11, "callabl": 11, "kwarg": 11, "second": 11, "extra_info": 11, "reduct": 11, "statement": 11, "outsid": 11, "happen": 11, "regard": 12, "major": 12, "compon": 12, "termin": 13, "pickle_load": 13, "print_map": 13, "layernode_0": 13, "11": 13, "14": 13, "i2": 13, "i1": 13, "sram_2mb": 13, "dram": 13, "sram_32kb": 13, "rf_2b": 13, "outer": 13, "innermost": 13, "match": 13, "plot": 13, "bar_plot_cost_model_evaluations_breakdown": 13, "plot_cm": 13, "jpg": 13, "produc": 13, "chart": 13, "recommend": 14, "context": 14, "ml": 14, "often": 14, "recogn": 14, "complet": 14, "conv": 14, "qlinearconv": 14, "matmul": 14, "gemm": 14, "accelerat": 14, "incur": 14, "zero": 14, "feel": 14, "free": 14, "open": 14, "issu": 14, "yourself": 14, "rather": 14, "avoid": 14, "origin": 14, "discard": 14, "doesn": 14, "do": 14, "onnx_model": 14, "modelproto": 14, "my_model_with_internal_data": 14, "save_model": 14, "save_as_external_data": 14, "all_tensors_to_one_fil": 14, "locat": 14, "external_data_filenam": 14, "size_threshold": 14, "convert_attribut": 14, "raw": 14, "specif": 14, "directori": 14, "shape_infer": 14, "my_model": 14, "inferred_model": 14, "infer_shap": 14, "my_inferred_model": 14, "moreov": 14, "repres": 14, "equat": 14, "small": 14, "wherea": 14, "alwai": 14, "freeli": 14, "dimension_rel": 14, "relationship": 14, "stride": 14, "filter": 14, "dilat": 14, "rate": 14, "loop_dim_s": 14, "left": 14, "hand": 14, "operand_precis": 14, "partial": 14, "o_fin": 14, "operand_sourc": 14, "come": 14, "constant_operand": 14, "constant": 14, "prior": 14, "readm": 14, "notat": 14, "batch": 14, "row": 14, "column": 14, "kernel": 14}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"zigzag": [0, 2, 6, 7, 10], "api": 0, "get_hardware_performance_zigzag": 0, "code": [1, 2], "document": [1, 2, 3, 6], "contribut": 2, "guidelin": 2, "upgrad": 2, "project": 2, "version": 2, "develop": 2, "write": 2, "new": [2, 10], "part": 2, "gener": [2, 10], "build": 2, "local": 2, "which": 2, "support": [2, 10, 14], "doxygen": 2, "futur": 3, "chang": 3, "framework": 3, "get": 4, "start": 4, "first": [4, 10], "run": 4, "analyz": 4, "result": [4, 11], "hardwar": [5, 11], "architectur": 5, "oper": [5, 14], "unit": 5, "arrai": 5, "memori": 5, "instanc": 5, "hierarchi": 5, "core": [5, 10], "hw": 5, "acceler": 5, "model": [5, 10, 11, 14], "exampl": 5, "specif": 5, "set": 5, "refer": 5, "welcom": 6, "": [6, 14], "content": 6, "indic": 6, "tabl": 6, "instal": 7, "packag": 7, "manual": [7, 14], "clone": 7, "prerequisit": 7, "map": [8, 10, 11, 13], "user": [8, 12], "defin": 8, "constraint": 8, "output": 9, "simplesavestag": 9, "completesavestag": 9, "creat": [9, 11], "custom": [9, 11], "savestag": 9, "public": 10, "The": [10, 11], "idea": 10, "detail": 10, "latenc": [10, 13], "explan": 10, "tempor": [10, 11, 13], "search": 10, "engin": 10, "differ": 10, "design": 10, "space": 10, "explor": 10, "case": 10, "studi": 10, "extens": 10, "cross": 10, "layer": [10, 14], "depth": 10, "schedul": 10, "multi": 10, "fuse": 10, "stage": 11, "introduct": 11, "main": 11, "entri": 11, "point": 11, "sequenti": 11, "call": 11, "back": 11, "pass": 11, "implement": 11, "input": 11, "parser": 11, "iter": 11, "plot": 11, "reduc": 11, "optim": 11, "save": [11, 14], "dump": 11, "spatial": 11, "cost": 11, "modif": 11, "your": [11, 14], "guid": 12, "visual": 13, "energi": 13, "breakdown": 13, "workload": 14, "onnx": 14, "extern": 14, "data": 14, "infer": 14, "an": 14, "shape": 14, "definit": 14}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 58}, "alltitles": {"ZigZag API": [[0, "zigzag-api"]], "get_hardware_performance_zigzag()": [[0, "get-hardware-performance-zigzag"]], "Code Documentation": [[1, "code-documentation"]], "Contribute": [[2, "contribute"]], "Contributing guidelines": [[2, "contributing-guidelines"]], "Upgrading the project version (for ZigZag developers)": [[2, "upgrading-the-project-version-for-zigzag-developers"]], "Documentation": [[2, "documentation"], [3, "documentation"]], "Writing new parts for the general documentation": [[2, "writing-new-parts-for-the-general-documentation"]], "Building the general documentation locally": [[2, "building-the-general-documentation-locally"]], "Writing code which supports the code documentation with Doxygen": [[2, "writing-code-which-supports-the-code-documentation-with-doxygen"]], "Building the code documentation locally": [[2, "building-the-code-documentation-locally"]], "Future changes": [[3, "future-changes"]], "Framework": [[3, "framework"]], "Getting Started": [[4, "getting-started"]], "First run": [[4, "first-run"]], "Analyzing results": [[4, "analyzing-results"]], "Hardware Architecture": [[5, "hardware-architecture"]], "Operational Unit": [[5, "operational-unit"]], "Operational Array": [[5, "operational-array"]], "Memory Instance": [[5, "memory-instance"]], "Memory Hierarchy": [[5, "memory-hierarchy"]], "Core": [[5, "core"]], "HW Accelerator Model": [[5, "hw-accelerator-model"]], "Modelled examples": [[5, "modelled-examples"]], "Specific settings": [[5, "specific-settings"]], "References": [[5, "references"]], "Welcome to ZigZag\u2019s documentation!": [[6, "welcome-to-zigzag-s-documentation"]], "Contents:": [[6, null]], "Indices and tables": [[6, "indices-and-tables"]], "Installing ZigZag": [[7, "installing-zigzag"]], "Installing as a package": [[7, "installing-as-a-package"]], "Manual clone": [[7, "manual-clone"]], "Prerequisites": [[7, "prerequisites"]], "Installation": [[7, "installation"]], "Mapping": [[8, "mapping"]], "User-defined mapping constraints": [[8, "user-defined-mapping-constraints"]], "Outputs": [[9, "outputs"]], "SimpleSaveStage": [[9, "simplesavestage"]], "CompleteSaveStage": [[9, "completesavestage"]], "Creating a custom SaveStage": [[9, "creating-a-custom-savestage"]], "Publications": [[10, "publications"]], "The general idea of ZigZag": [[10, "the-general-idea-of-zigzag"]], "Detailed latency model explanation": [[10, "detailed-latency-model-explanation"]], "The new temporal mapping search engine": [[10, "the-new-temporal-mapping-search-engine"]], "Different design space exploration case studies": [[10, "different-design-space-exploration-case-studies"]], "Extension to support cross-layer depth-first scheduling": [[10, "extension-to-support-cross-layer-depth-first-scheduling"]], "Extension to support multi-core layer-fused scheduling": [[10, "extension-to-support-multi-core-layer-fused-scheduling"]], "Stages": [[11, "stages"]], "Introduction": [[11, "introduction"]], "The main entry point": [[11, "the-main-entry-point"]], "The sequential call of stages": [[11, "the-sequential-call-of-stages"]], "The back passing of results": [[11, "the-back-passing-of-results"]], "Implemented stages": [[11, "implemented-stages"]], "Input parser stages": [[11, "input-parser-stages"]], "Iterator stage": [[11, "iterator-stage"]], "Plot stages": [[11, "plot-stages"]], "Reduce stages": [[11, "reduce-stages"]], "Optimization stages": [[11, "optimization-stages"]], "Save and dump stages": [[11, "save-and-dump-stages"]], "Temporal mapping stages": [[11, "temporal-mapping-stages"]], "Spatial mapping stages": [[11, "spatial-mapping-stages"]], "Cost model stages": [[11, "cost-model-stages"]], "Hardware modification stages": [[11, "hardware-modification-stages"]], "Creating your custom stage": [[11, "creating-your-custom-stage"]], "User Guide": [[12, "user-guide"]], "Visualization": [[13, "visualization"]], "Temporal mapping": [[13, "temporal-mapping"]], "Energy and latency breakdown": [[13, "energy-and-latency-breakdown"]], "Workload": [[14, "workload"]], "Onnx models": [[14, "onnx-models"]], "Supported onnx operators": [[14, "supported-onnx-operators"]], "Saving your onnx model with external data": [[14, "saving-your-onnx-model-with-external-data"]], "Inferring an onnx model\u2019s shapes": [[14, "inferring-an-onnx-model-s-shapes"]], "Manual layer definition": [[14, "manual-layer-definition"]]}, "indexentries": {}}) \ No newline at end of file diff --git a/visualization.html b/visualization.html index 9187d2af..6fda3616 100644 --- a/visualization.html +++ b/visualization.html @@ -1,9 +1,29 @@ - Visualization — ZigZag 2.0.0 documentation Skip to content

Visualization

The generated CostModelEvaluation object(s) (from e.g. the API call) can be visualized in multiple ways.

Temporal mapping

The temporal mapping can be visualized by a function which prints it to the terminal. The code block demonstrates how to use it:

from zigzag.utils import pickle_load
+                          Visualization — ZigZag 2.0.0 documentation                     Skip to content  

Visualization

The generated CostModelEvaluation object(s) (from e.g. the API call) can be visualized in multiple ways.

Temporal mapping

The temporal mapping can be visualized by a function which prints it to the terminal. The code block demonstrates how to use it:

from zigzag.utils import pickle_load
 from zigzag.visualization.results.print_mapping import print_mapping
 cmes = pickle_load("zigzag/visualization/list_of_cmes.pickle")
 cme = cmes[0]
 print_mapping(cme)
-

The function will show the loops of the temporal mapping and for each operand shows at which memory level it resides. For example:

***** Temporal Mapping - CostModelEvaluation(layer=LayerNode_0, core=1) *****

O (O): [[(‘FX’, 11), (‘FY’, 11)], [(‘OY’, 7), (‘OY’, 2), (‘OX’, 14), (‘K’, 12)], []] W (I2): [[], [(‘FX’, 11), (‘FY’, 11), (‘OY’, 7), (‘OY’, 2), (‘OX’, 14)], [(‘K’, 12)]] I (I1): [[(‘FX’, 11), (‘FY’, 11), (‘OY’, 7), (‘OY’, 2), (‘OX’, 14), (‘K’, 12)], []]

Temporal Loops O W I

for K in [0:12) sram_2MB dram sram_2MB

for OX in [0:14) sram_2MB sram_32KB sram_2MB

for OY in [0:2) sram_2MB sram_32KB sram_2MB

for OY in [0:7) sram_2MB sram_32KB sram_2MB

for FY in [0:11) rf_2B sram_32KB sram_2MB

for FX in [0:11) rf_2B sram_32KB sram_2MB

The top loop is the outer-most for loop, where as the bottom loop is the inner-most. Going from bottom to top, loops are allocated to the innermost memories of the memory hierarchy for each operand. The names of the memories match the names of the MemoryInstance object used to create the memory level using the add_memory() call in the MemoryHierarchy.

Energy and latency breakdown

The energy and latency breakdown of a list of CostModelEvaluation objects can be plotted using the bar_plot_cost_model_evaluations_breakdown function:

from zigzag.utils import pickle_load
+

The function will show the loops of the temporal mapping and for each operand shows at which memory level it resides. For example:

********* Temporal Mapping - CostModelEvaluation(layer=LayerNode_0, core=1) *********
+O (O): [[('FX', 11), ('FY', 11)], [('OY', 7), ('OY', 2), ('OX', 14), ('K', 12)], []]
+W (I2): [[], [('FX', 11), ('FY', 11), ('OY', 7), ('OY', 2), ('OX', 14)], [('K', 12)]]
+I (I1): [[('FX', 11), ('FY', 11), ('OY', 7), ('OY', 2), ('OX', 14), ('K', 12)], []]
+
+-------------------------------------------------------------------------------------
+Temporal Loops                  O                  W                  I
+-------------------------------------------------------------------------------------
+for K in [0:12)                 sram_2MB           dram               sram_2MB
+-------------------------------------------------------------------------------------
+for OX in [0:14)               sram_2MB           sram_32KB          sram_2MB
+-------------------------------------------------------------------------------------
+for OY in [0:2)               sram_2MB           sram_32KB          sram_2MB
+-------------------------------------------------------------------------------------
+    for OY in [0:7)              sram_2MB           sram_32KB          sram_2MB
+-------------------------------------------------------------------------------------
+    for FY in [0:11)            rf_2B              sram_32KB          sram_2MB
+-------------------------------------------------------------------------------------
+    for FX in [0:11)           rf_2B              sram_32KB          sram_2MB
+-------------------------------------------------------------------------------------
+

The top loop is the outer-most for loop, where as the bottom loop is the inner-most. Going from bottom to top, loops are allocated to the innermost memories of the memory hierarchy for each operand. The names of the memories match the names of the MemoryInstance object used to create the memory level using the add_memory() call in the MemoryHierarchy.

Energy and latency breakdown

The energy and latency breakdown of a list of CostModelEvaluation objects can be plotted using the bar_plot_cost_model_evaluations_breakdown function:

from zigzag.utils import pickle_load
 from zigzag.visualization.results.plot_cme import bar_plot_cost_model_evaluations_breakdown
 
 cmes = pickle_load("zigzag/visualization/list_of_cmes.pickle")