From 2f88a0701f4147556cacadccf367fdf2cd90eb89 Mon Sep 17 00:00:00 2001 From: Karol Wolski Date: Mon, 7 Oct 2024 15:03:53 +0200 Subject: [PATCH] pandoc template introduced --- README.md | 25 + docs/assets/dyvenia-template.tex | 681 ++++++++++++++++++++ docs/assets/images/dyvenia-logo-gruen-1.png | Bin 0 -> 9885 bytes docs/data_platform/lifecycle.md | 35 +- 4 files changed, 722 insertions(+), 19 deletions(-) create mode 100644 docs/assets/dyvenia-template.tex create mode 100644 docs/assets/images/dyvenia-logo-gruen-1.png diff --git a/README.md b/README.md index fbd8e6b..48e21d0 100644 --- a/README.md +++ b/README.md @@ -25,3 +25,28 @@ rye run mkdocs serve ``` The docs will be available at http://127.0.0.1:8000/ (Ctrl+left click to open). + +## Exporting markdown page as PDF + +### Install pandoc + +On Windows, it can be installed using package installer from main pandoc page: https://pandoc.org/installing.html. Alternatively, it can be installed with Chocolatey: +``` +choco install pandoc +choco install rsvg-convert miktex +``` +On MacOS: +``` +brew install pandoc +brew install librsvg homebrew/cask/basictex +``` + +### Basic usage + +``` +pandoc docs/{folder}/{file_name}.md -o {file_name}.pdf --toc --template=docs/assets/dyvenia-template.tex --listings +``` + +### Additional commands + +Sometimes pandoc is putting image on next page with text from next pargraphs laying above it. In such situation page break command below image will help: `\clearpage` diff --git a/docs/assets/dyvenia-template.tex b/docs/assets/dyvenia-template.tex new file mode 100644 index 0000000..411ecde --- /dev/null +++ b/docs/assets/dyvenia-template.tex @@ -0,0 +1,681 @@ +% Options for packages loaded elsewhere +\PassOptionsToPackage{unicode$for(hyperrefoptions)$,$hyperrefoptions$$endfor$}{hyperref} +\PassOptionsToPackage{hyphens}{url} +$if(colorlinks)$ +\PassOptionsToPackage{dvipsnames,svgnames,x11names}{xcolor} +$endif$ +$if(CJKmainfont)$ +\PassOptionsToPackage{space}{xeCJK} +$endif$ +% +\documentclass[ +$if(fontsize)$ + $fontsize$, +$endif$ +$if(papersize)$ + $papersize$paper, +$endif$ +$if(beamer)$ + ignorenonframetext, +$if(handout)$ + handout, +$endif$ +$if(aspectratio)$ + aspectratio=$aspectratio$, +$endif$ +$if(babel-lang)$ + $babel-lang$, +$endif$ +$endif$ +$for(classoption)$ + $classoption$$sep$, +$endfor$ +]{$documentclass$} +$if(beamer)$ +$if(background-image)$ +\usebackgroundtemplate{% + \includegraphics[width=\paperwidth]{$background-image$}% +} +% In beamer background-image does not work well when other images are used, so this is the workaround +\pgfdeclareimage[width=\paperwidth,height=\paperheight]{background}{$background-image$} +\usebackgroundtemplate{\pgfuseimage{background}} +$endif$ +\usepackage{pgfpages} +\setbeamertemplate{caption}[numbered] +\setbeamertemplate{caption label separator}{: } +\setbeamercolor{caption name}{fg=normal text.fg} +\beamertemplatenavigationsymbols$if(navigation)$$navigation$$else$empty$endif$ +$for(beameroption)$ +\setbeameroption{$beameroption$} +$endfor$ +% Prevent slide breaks in the middle of a paragraph +\widowpenalties 1 10000 +\raggedbottom +$if(section-titles)$ +\setbeamertemplate{part page}{ + \centering + \begin{beamercolorbox}[sep=16pt,center]{part title} + \usebeamerfont{part title}\insertpart\par + \end{beamercolorbox} +} +\setbeamertemplate{section page}{ + \centering + \begin{beamercolorbox}[sep=12pt,center]{section title} + \usebeamerfont{section title}\insertsection\par + \end{beamercolorbox} +} +\setbeamertemplate{subsection page}{ + \centering + \begin{beamercolorbox}[sep=8pt,center]{subsection title} + \usebeamerfont{subsection title}\insertsubsection\par + \end{beamercolorbox} +} +\AtBeginPart{ + \frame{\partpage} +} +\AtBeginSection{ + \ifbibliography + \else + \frame{\sectionpage} + \fi +} +\AtBeginSubsection{ + \frame{\subsectionpage} +} +$endif$ +$endif$ +$if(beamerarticle)$ +\usepackage{beamerarticle} % needs to be loaded first +$endif$ +\usepackage{amsmath,amssymb} +$if(linestretch)$ +\usepackage{setspace} +$endif$ +\usepackage{iftex} +\ifPDFTeX + \usepackage[$if(fontenc)$$fontenc$$else$T1$endif$]{fontenc} + \usepackage[utf8]{inputenc} + \usepackage{textcomp} % provide euro and other symbols +\else % if luatex or xetex +$if(mathspec)$ + \ifXeTeX + \usepackage{mathspec} % this also loads fontspec + \else + \usepackage{unicode-math} % this also loads fontspec + \fi +$else$ + \usepackage{unicode-math} % this also loads fontspec +$endif$ + \defaultfontfeatures{Scale=MatchLowercase}$-- must come before Beamer theme + \defaultfontfeatures[\rmfamily]{Ligatures=TeX,Scale=1} +\fi +$if(fontfamily)$ +$else$ +$-- Set default font before Beamer theme so the theme can override it +\usepackage{lmodern} +$endif$ +$-- Set Beamer theme before user font settings so they can override theme +$if(beamer)$ +$if(theme)$ +\usetheme[$for(themeoptions)$$themeoptions$$sep$,$endfor$]{$theme$} +$endif$ +$if(colortheme)$ +\usecolortheme{$colortheme$} +$endif$ +$if(fonttheme)$ +\usefonttheme{$fonttheme$} +$endif$ +$if(mainfont)$ +\usefonttheme{serif} % use mainfont rather than sansfont for slide text +$endif$ +$if(innertheme)$ +\useinnertheme{$innertheme$} +$endif$ +$if(outertheme)$ +\useoutertheme{$outertheme$} +$endif$ +$endif$ +$-- User font settings (must come after default font and Beamer theme) +$if(fontfamily)$ +\usepackage[$for(fontfamilyoptions)$$fontfamilyoptions$$sep$,$endfor$]{$fontfamily$} +$endif$ +\ifPDFTeX\else + % xetex/luatex font selection +$if(mainfont)$ + $if(mainfontfallback)$ + \ifLuaTeX + \usepackage{luaotfload} + \directlua{luaotfload.add_fallback("mainfontfallback",{ + $for(mainfontfallback)$"$mainfontfallback$"$sep$,$endfor$ + })} + \fi + $endif$ + \setmainfont[$for(mainfontoptions)$$mainfontoptions$$sep$,$endfor$$if(mainfontfallback)$,RawFeature={fallback=mainfontfallback}$endif$]{$mainfont$} +$endif$ +$if(sansfont)$ + $if(sansfontfallback)$ + \ifLuaTeX + \usepackage{luaotfload} + \directlua{luaotfload.add_fallback("sansfontfallback",{ + $for(sansfontfallback)$"$sansfontfallback$"$sep$,$endfor$ + })} + \fi + $endif$ + \setsansfont[$for(sansfontoptions)$$sansfontoptions$$sep$,$endfor$$if(sansfontfallback)$,RawFeature={fallback=sansfontfallback}$endif$]{$sansfont$} +$endif$ +$if(monofont)$ + $if(monofontfallback)$ + \ifLuaTeX + \usepackage{luaotfload} + \directlua{luaotfload.add_fallback("monofontfallback",{ + $for(monofontfallback)$"$monofontfallback$"$sep$,$endfor$ + })} + \fi + $endif$ + \setmonofont[$for(monofontoptions)$$monofontoptions$$sep$,$endfor$$if(monofontfallback)$,RawFeature={fallback=monofontfallback}$endif$]{$monofont$} +$endif$ +$for(fontfamilies)$ + \newfontfamily{$fontfamilies.name$}[$for(fontfamilies.options)$$fontfamilies.options$$sep$,$endfor$]{$fontfamilies.font$} +$endfor$ +$if(mathfont)$ +$if(mathspec)$ + \ifXeTeX + \setmathfont(Digits,Latin,Greek)[$for(mathfontoptions)$$mathfontoptions$$sep$,$endfor$]{$mathfont$} + \else + \setmathfont[$for(mathfontoptions)$$mathfontoptions$$sep$,$endfor$]{$mathfont$} + \fi +$else$ + \setmathfont[$for(mathfontoptions)$$mathfontoptions$$sep$,$endfor$]{$mathfont$} +$endif$ +$endif$ +$if(CJKmainfont)$ + \ifXeTeX + \usepackage{xeCJK} + \setCJKmainfont[$for(CJKoptions)$$CJKoptions$$sep$,$endfor$]{$CJKmainfont$} + $if(CJKsansfont)$ + \setCJKsansfont[$for(CJKoptions)$$CJKoptions$$sep$,$endfor$]{$CJKsansfont$} + $endif$ + $if(CJKmonofont)$ + \setCJKmonofont[$for(CJKoptions)$$CJKoptions$$sep$,$endfor$]{$CJKmonofont$} + $endif$ + \fi +$endif$ +$if(luatexjapresetoptions)$ + \ifLuaTeX + \usepackage[$for(luatexjapresetoptions)$$luatexjapresetoptions$$sep$,$endfor$]{luatexja-preset} + \fi +$endif$ +$if(CJKmainfont)$ + \ifLuaTeX + \usepackage[$for(luatexjafontspecoptions)$$luatexjafontspecoptions$$sep$,$endfor$]{luatexja-fontspec} + \setmainjfont[$for(CJKoptions)$$CJKoptions$$sep$,$endfor$]{$CJKmainfont$} + \fi +$endif$ +\fi +$if(zero-width-non-joiner)$ +%% Support for zero-width non-joiner characters. +\makeatletter +\def\zerowidthnonjoiner{% + % Prevent ligatures and adjust kerning, but still support hyphenating. + \texorpdfstring{% + \TextOrMath{\nobreak\discretionary{-}{}{\kern.03em}% + \ifvmode\else\nobreak\hskip\z@skip\fi}{}% + }{}% +} +\makeatother +\ifPDFTeX + \DeclareUnicodeCharacter{200C}{\zerowidthnonjoiner} +\else + \catcode`^^^^200c=\active + \protected\def ^^^^200c{\zerowidthnonjoiner} +\fi +%% End of ZWNJ support +$endif$ +% Use upquote if available, for straight quotes in verbatim environments +\IfFileExists{upquote.sty}{\usepackage{upquote}}{} +\IfFileExists{microtype.sty}{% use microtype if available + \usepackage[$for(microtypeoptions)$$microtypeoptions$$sep$,$endfor$]{microtype} + \UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts +}{} +$if(indent)$ +$else$ +\makeatletter +\@ifundefined{KOMAClassName}{% if non-KOMA class + \IfFileExists{parskip.sty}{% + \usepackage{parskip} + }{% else + \setlength{\parindent}{0pt} + \setlength{\parskip}{6pt plus 2pt minus 1pt}} +}{% if KOMA class + \KOMAoptions{parskip=half}} +\makeatother +$endif$ +$if(verbatim-in-note)$ +\usepackage{fancyvrb} +$endif$ +\usepackage{xcolor} +$if(geometry)$ +$if(beamer)$ +\geometry{$for(geometry)$$geometry$$sep$,$endfor$} +$else$ +\usepackage[$for(geometry)$$geometry$$sep$,$endfor$]{geometry} +$endif$ +$endif$ +\usepackage[top=1in, bottom=1in, left=0.75in, right=0.75in]{geometry} % dyvenia adjusted +$if(beamer)$ +\newif\ifbibliography +$endif$ +$if(listings)$ +\usepackage{listings} +\newcommand{\passthrough}[1]{#1} +\lstset{defaultdialect=[5.3]Lua} +\lstset{defaultdialect=[x86masm]Assembler} +$endif$ +$if(lhs)$ +\lstnewenvironment{code}{\lstset{language=Haskell,basicstyle=\small\ttfamily}}{} +$endif$ +$if(highlighting-macros)$ +$highlighting-macros$ +$endif$ +$if(tables)$ +\usepackage{longtable,booktabs,array} +$if(multirow)$ +\usepackage{multirow} +$endif$ +\usepackage{calc} % for calculating minipage widths +$if(beamer)$ +\usepackage{caption} +% Make caption package work with longtable +\makeatletter +\def\fnum@table{\tablename~\thetable} +\makeatother +$else$ +% Correct order of tables after \paragraph or \subparagraph +\usepackage{etoolbox} +\makeatletter +\patchcmd\longtable{\par}{\if@noskipsec\mbox{}\fi\par}{}{} +\makeatother +% Allow footnotes in longtable head/foot +\IfFileExists{footnotehyper.sty}{\usepackage{footnotehyper}}{\usepackage{footnote}} +\makesavenoteenv{longtable} +$endif$ +$endif$ +$if(graphics)$ +\usepackage{graphicx} +\makeatletter +\newsavebox\pandoc@box +\newcommand*\pandocbounded[1]{% scales image to fit in text height/width + \sbox\pandoc@box{#1}% + \Gscale@div\@tempa{\textheight}{\dimexpr\ht\pandoc@box+\dp\pandoc@box\relax}% + \Gscale@div\@tempb{\linewidth}{\wd\pandoc@box}% + \ifdim\@tempb\p@<\@tempa\p@\let\@tempa\@tempb\fi% select the smaller of both + \ifdim\@tempa\p@<\p@\scalebox{\@tempa}{\usebox\pandoc@box}% + \else\usebox{\pandoc@box}% + \fi% +} +% Set default figure placement to htbp +\def\fps@figure{htbp} +\makeatother +$endif$ +$if(svg)$ +\usepackage{svg} +$endif$ +$if(strikeout)$ +$-- also used for underline +\ifLuaTeX + \usepackage{luacolor} + \usepackage[soul]{lua-ul} +\else + \usepackage{soul} +$if(beamer)$ + \makeatletter + \let\HL\hl + \renewcommand\hl{% fix for beamer highlighting + \let\set@color\beamerorig@set@color + \let\reset@color\beamerorig@reset@color + \HL} + \makeatother +$endif$ +$if(CJKmainfont)$ + \ifXeTeX + % soul's \st doesn't work for CJK: + \usepackage{xeCJKfntef} + \renewcommand{\st}[1]{\sout{#1}} + \fi +$endif$ +\fi +$endif$ +\setlength{\emergencystretch}{3em} % prevent overfull lines +\providecommand{\tightlist}{% + \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}} +$if(numbersections)$ +\setcounter{secnumdepth}{$if(secnumdepth)$$secnumdepth$$else$5$endif$} +$else$ +\setcounter{secnumdepth}{-\maxdimen} % remove section numbering +$endif$ +$if(subfigure)$ +\usepackage{subcaption} +$endif$ +$if(beamer)$ +$else$ +$if(block-headings)$ +% Make \paragraph and \subparagraph free-standing +\makeatletter +\ifx\paragraph\undefined\else + \let\oldparagraph\paragraph + \renewcommand{\paragraph}{ + \@ifstar + \xxxParagraphStar + \xxxParagraphNoStar + } + \newcommand{\xxxParagraphStar}[1]{\oldparagraph*{#1}\mbox{}} + \newcommand{\xxxParagraphNoStar}[1]{\oldparagraph{#1}\mbox{}} +\fi +\ifx\subparagraph\undefined\else + \let\oldsubparagraph\subparagraph + \renewcommand{\subparagraph}{ + \@ifstar + \xxxSubParagraphStar + \xxxSubParagraphNoStar + } + \newcommand{\xxxSubParagraphStar}[1]{\oldsubparagraph*{#1}\mbox{}} + \newcommand{\xxxSubParagraphNoStar}[1]{\oldsubparagraph{#1}\mbox{}} +\fi +\makeatother +$endif$ +$endif$ +$if(pagestyle)$ +\pagestyle{$pagestyle$} +$endif$ +$if(csl-refs)$ +% definitions for citeproc citations +\NewDocumentCommand\citeproctext{}{} +\NewDocumentCommand\citeproc{mm}{% + \begingroup\def\citeproctext{#2}\cite{#1}\endgroup} +\makeatletter + % allow citations to break across lines + \let\@cite@ofmt\@firstofone + % avoid brackets around text for \cite: + \def\@biblabel#1{} + \def\@cite#1#2{{#1\if@tempswa , #2\fi}} +\makeatother +\newlength{\cslhangindent} +\setlength{\cslhangindent}{1.5em} +\newlength{\csllabelwidth} +\setlength{\csllabelwidth}{3em} +\newenvironment{CSLReferences}[2] % #1 hanging-indent, #2 entry-spacing + {\begin{list}{}{% + \setlength{\itemindent}{0pt} + \setlength{\leftmargin}{0pt} + \setlength{\parsep}{0pt} + % turn on hanging indent if param 1 is 1 + \ifodd #1 + \setlength{\leftmargin}{\cslhangindent} + \setlength{\itemindent}{-1\cslhangindent} + \fi + % set entry spacing + \setlength{\itemsep}{#2\baselineskip}}} + {\end{list}} +\usepackage{calc} +\newcommand{\CSLBlock}[1]{\hfill\break\parbox[t]{\linewidth}{\strut\ignorespaces#1\strut}} +\newcommand{\CSLLeftMargin}[1]{\parbox[t]{\csllabelwidth}{\strut#1\strut}} +\newcommand{\CSLRightInline}[1]{\parbox[t]{\linewidth - \csllabelwidth}{\strut#1\strut}} +\newcommand{\CSLIndent}[1]{\hspace{\cslhangindent}#1} +$endif$ +$if(lang)$ +\ifLuaTeX +\usepackage[bidi=basic]{babel} +\else +\usepackage[bidi=default]{babel} +\fi +$if(babel-lang)$ +\babelprovide[main,import]{$babel-lang$} +$if(mainfont)$ +\ifPDFTeX +\else +\babelfont{rm}[$for(mainfontoptions)$$mainfontoptions$$sep$,$endfor$$if(mainfontfallback)$,RawFeature={fallback=mainfontfallback}$endif$]{$mainfont$} +\fi +$endif$ +$endif$ +$for(babel-otherlangs)$ +\babelprovide[import]{$babel-otherlangs$} +$endfor$ +$for(babelfonts/pairs)$ +\babelfont[$babelfonts.key$]{rm}{$babelfonts.value$} +$endfor$ +% get rid of language-specific shorthands (see #6817): +\let\LanguageShortHands\languageshorthands +\def\languageshorthands#1{} +$if(selnolig-langs)$ +\ifLuaTeX + \usepackage[$for(selnolig-langs)$$it$$sep$,$endfor$]{selnolig} % disable illegal ligatures +\fi +$endif$ +$endif$ +$for(header-includes)$ +$header-includes$ +$endfor$ + +% dyvenia header/footer section start +\usepackage{fancyhdr} % Include the fancyhdr package to customize headers +\usepackage{graphicx} % Include the graphicx package to insert the image + +% Configure header +\pagestyle{fancy} % Activate fancy headers +\fancyhf{} % Clear all header and footer fields + +% Add logo to the header (left-aligned) +\fancyhead[L]{\includegraphics[width=3cm]{/Users/kwolski/dyvenia/home/data-excellence/docs/assets/images/dyvenia-logo-gruen-1.png}} % Adjust width and path as necessary + +% Optional: Add page number to footer +\fancyfoot[C]{\thepage} +% dyvenia header/footer section end + +% dyvenia code background start +\usepackage{listings} % Include the listings package for code blocks +\usepackage{xcolor} % Required for coloring + +\definecolor{customGray}{RGB}{232, 232, 232} + +% Define a custom style for code blocks +\lstset{ + backgroundcolor=\color{customGray}, + basicstyle=\ttfamily, + frame=single, + rulecolor=\color{black}, + tabsize=4, + breaklines=true, + showstringspaces=false +} +% dyvenia code background end + +$if(dir)$ +\ifPDFTeX + \TeXXeTstate=1 + \newcommand{\RL}[1]{\beginR #1\endR} + \newcommand{\LR}[1]{\beginL #1\endL} + \newenvironment{RTL}{\beginR}{\endR} + \newenvironment{LTR}{\beginL}{\endL} +\fi +$endif$ +$if(natbib)$ +\usepackage[$natbiboptions$]{natbib} +\bibliographystyle{$if(biblio-style)$$biblio-style$$else$plainnat$endif$} +$endif$ +$if(biblatex)$ +\usepackage[$if(biblio-style)$style=$biblio-style$,$endif$$for(biblatexoptions)$$biblatexoptions$$sep$,$endfor$]{biblatex} +$for(bibliography)$ +\addbibresource{$bibliography$} +$endfor$ +$endif$ +$if(nocite-ids)$ +\nocite{$for(nocite-ids)$$it$$sep$, $endfor$} +$endif$ +$if(csquotes)$ +\usepackage{csquotes} +$endif$ +\usepackage{bookmark} +\IfFileExists{xurl.sty}{\usepackage{xurl}}{} % add URL line breaks if available +\urlstyle{$if(urlstyle)$$urlstyle$$else$same$endif$} +$if(links-as-notes)$ +% Make links footnotes instead of hotlinks: +\DeclareRobustCommand{\href}[2]{#2\footnote{\url{#1}}} +$endif$ +$if(verbatim-in-note)$ +\VerbatimFootnotes % allow verbatim text in footnotes +$endif$ +\hypersetup{ +$if(title-meta)$ + pdftitle={$title-meta$}, +$endif$ +$if(author-meta)$ + pdfauthor={$author-meta$}, +$endif$ +$if(lang)$ + pdflang={$lang$}, +$endif$ +$if(subject)$ + pdfsubject={$subject$}, +$endif$ +$if(keywords)$ + pdfkeywords={$for(keywords)$$keywords$$sep$, $endfor$}, +$endif$ +$if(colorlinks)$ + colorlinks=true, + linkcolor={$if(linkcolor)$$linkcolor$$else$Maroon$endif$}, + filecolor={$if(filecolor)$$filecolor$$else$Maroon$endif$}, + citecolor={$if(citecolor)$$citecolor$$else$Blue$endif$}, + urlcolor={$if(urlcolor)$$urlcolor$$else$Blue$endif$}, +$else$ +$if(boxlinks)$ +$else$ + hidelinks, +$endif$ +$endif$ + pdfcreator={LaTeX via pandoc}} + +$if(title)$ +\title{$title$$if(thanks)$\thanks{$thanks$}$endif$} +$endif$ +$if(subtitle)$ +$if(beamer)$ +$else$ +\usepackage{etoolbox} +\makeatletter +\providecommand{\subtitle}[1]{% add subtitle to \maketitle + \apptocmd{\@title}{\par {\large #1 \par}}{}{} +} +\makeatother +$endif$ +\subtitle{$subtitle$} +$endif$ +\author{$for(author)$$author$$sep$ \and $endfor$} +\date{$date$} +$if(beamer)$ +$if(institute)$ +\institute{$for(institute)$$institute$$sep$ \and $endfor$} +$endif$ +$if(titlegraphic)$ +\titlegraphic{\includegraphics$if(titlegraphicoptions)$[$for(titlegraphicoptions)$$titlegraphicoptions$$sep$, $endfor$]$endif${$titlegraphic$}} +$endif$ +$if(logo)$ +\logo{\includegraphics{$logo$}} +$endif$ +$endif$ + +\begin{document} +$if(has-frontmatter)$ +\frontmatter +$endif$ +$if(title)$ +$if(beamer)$ +\frame{\titlepage} +$else$ +\maketitle +$endif$ +$if(abstract)$ +\begin{abstract} +$abstract$ +\end{abstract} +$endif$ +$endif$ + +$for(include-before)$ +$include-before$ + +$endfor$ +$if(toc)$ +$if(toc-title)$ +\renewcommand*\contentsname{$toc-title$} +$endif$ +$if(beamer)$ +\begin{frame}[allowframebreaks] +$if(toc-title)$ + \frametitle{$toc-title$} +$endif$ + \setcounter{tocdepth}{$toc-depth$} + \tableofcontents +\end{frame} +$else$ +{ +$if(colorlinks)$ +\hypersetup{linkcolor=$if(toccolor)$$toccolor$$else$$endif$} +$endif$ +\setcounter{tocdepth}{$toc-depth$} +\tableofcontents +} +$endif$ +$endif$ +$if(lof)$ +\listoffigures +$endif$ +$if(lot)$ +\listoftables +$endif$ +$if(linestretch)$ +\setstretch{$linestretch$} +$endif$ +$if(has-frontmatter)$ +\mainmatter +$endif$ +$body$ + +$if(has-frontmatter)$ +\backmatter +$endif$ +$if(natbib)$ +$if(bibliography)$ +$if(biblio-title)$ +$if(has-chapters)$ +\renewcommand\bibname{$biblio-title$} +$else$ +\renewcommand\refname{$biblio-title$} +$endif$ +$endif$ +$if(beamer)$ +\begin{frame}[allowframebreaks]{$biblio-title$} + \bibliographytrue +$endif$ + \bibliography{$for(bibliography)$$bibliography$$sep$,$endfor$} +$if(beamer)$ +\end{frame} +$endif$ + +$endif$ +$endif$ +$if(biblatex)$ +$if(beamer)$ +\begin{frame}[allowframebreaks]{$biblio-title$} + \bibliographytrue + \printbibliography[heading=none] +\end{frame} +$else$ +\printbibliography$if(biblio-title)$[title=$biblio-title$]$endif$ +$endif$ + +$endif$ +$for(include-after)$ +$include-after$ + +$endfor$ +\end{document} diff --git a/docs/assets/images/dyvenia-logo-gruen-1.png b/docs/assets/images/dyvenia-logo-gruen-1.png new file mode 100644 index 0000000000000000000000000000000000000000..5a2619e224e98a57bee19b04c12e363526591113 GIT binary patch literal 9885 zcmeHt_cvT$^ftlhL5wzf@15wq_dbRYBM74=L5LbNg6O@MXd@U*)F8-^=)H{SB#2HB zME&}_|G>N6U*Gk;>#n=@JO!%jU$54|@^~_PtEfu}H|z+R1^S zZ41{FMpP+hm49XYriYeuuYGZoQ=F#uk|({l<>Sv(W(m)^q-GRr7M^}rgAHc53t@Cd zS)!Y}ZL-t)1Vn_VjNZ_-yne#mP^g~8E3YRJXxu(zSlt#HrqBQJhm}(OD{-*(np{+# zZtJIreClB3RO#}Z08p9R@jJU`FAHpcmB2${dCuPZvH#wSblIY*U|sh;q1`?8`IGcl zmT7~8fr6us^c+aEnv57(G5v7F29=pmrA+L55Q>At5vixCW*IoYw{#w0w^7jdXDp;aZV$59GH7mpSMBzXx==l4LFro2_Td~ z^B<5eys~qyhL@!{kA$N=LvUck_yU8JO!aQ(UQM^UY{Lr2gbwzE3TsIlj^81W=_sGgFF!kuyEGx6qEXl0gl`931bk zM?&^o%h55qj?4;4iBUUV%vAhk2WF>hKOHHl_~Vsarz&7g_tsmA=E;_q)*6~zD`7u+ zReKHJ#^@@OGxUGmJcx|Z?Ud>m@I={lGIDUd`4a}Wv2DZW;8+XZGTZzbvqnipJc|Zx z_Io{H<9Jj!4;wBB@QH}gh0xZPq^0g|IsfE}iHUUVrk96ZvA_5$D__Y#PL*P}@Pf9$ zvydF;V@+uu!Cy+Mf%2tl#_ckvxRt(>>M&xeltr7yIAiU2P0=aB@qtzMn3(n6elx#U z$36+ZBn89s_ZsbawZ<5~{+63kvhC9L2Hd9^r;EMyl`P2rOpRRQ+;U!Cxu?UJ#xpta z(K}Fn+gYAL`Vj|*8aaP)+M7O(g3T=OEaoP-R~+$+)?bgo^5}`TO=d zioDg1N9B)SOw5I(o11rl;_L7y6`J=+Vpw~S;{z~{?jJjACQj@eKapo4ZLN)u^G6zg zF0h$9gIKPP5(r1D+t>S&OX@eJs~iv@WPQMGUUkQ?=$u(PIJux_!i4QTXnF=hqJzpGt#4|L-hT8T55n{w)m z50p-&bZq|6m$zZvb}FGk-9o8_bu>ua47dxvt>}2~g+v;^PJhf<4@8B1P*$@Dzp&z~ zUk;k}H*&oVPs+cOD$&R65bDE%8O+)C`+%D@InuwjoS~LU@4@KHh~h?WHUjasNaNcB z-u|Zf>aERI9?cXuO{n13#!KDXgd359FITkPVtdcz=Q@e0%Ai^f3LTnZGe*GB-wS^A zfd<9r@94#tQ%a#)E99pVjH#{&09}tp8Sf)Mb4fC)|At#nkL9@P&N_8@g-zJ)j8;nW z+G3cgYwnRxr}dPnI^?XA16aa5sr)tCDGpzZBL;lHMexOr~mw&u}0QnqiETRhJgWO=8 z8n1z=bW46_k8vN81U>+isA6{LmYu5pP=0{Go6g4g@*U{}A?B9yKW2j|H6q~tfp{eD z(trKR_|XVwm|BI)d{sbxbP9a?jvbFih+iNr+UT>AfC0``l6O!ZIEmwUI)uPro$@y`ioeIyF89tg z8qWPnclWhQaOctMT5GiCLzma+#I6V5E87h*kL3JvFqa1qV2y#X#XV#d?pPJU_nemx z!Vz6{&`&^0lG$IZkC9i!JbI5qu!}=r!1Vx#!UGDW5SgN1m4yB7@>jWCm(F6Xguds4 zd}l6m`60Bq+3ht2S_75NDNJ<#7w(uE; z@M%-aMAD$M2s|SM;mtBf9faPqo134f5y}c6z9ZGD`HHD>FmyQZIieD>9IwY$UefIu zk!4dGvOn7Pfx$FD1y-m0ox*X@(8Mm#q`LL{Gch=g;*YCT5^ygA77hS5bpv)gn@+bc z9sUtvIc$;LMZRvW_=M9FlO#nV$H5*sLM}K~1vUVWN&U;C=e{u*{ew~yuMHCLC6v5_ zR;%^{Mm`@mK7fLDkL6*6=+Klhxr3Sk)2)=uZ>ug>6Nv`Ae7{>M`8D5p&L+l}J?vuimmvdKiT}V;p z)!GnVqar><#<04U#1`3Z$Ysm!uoyf8<*b@PlBt6Z11I@;7x_)8gbzLi8=+*q=XRtK z?thwYnNZPv;%K^DX2d&}#(Z4X&2coTgNIugH!&-ALV}{%nSjCj(;fpB1bFH67(=o~ zVkrQ5j>djnU=DEjl2d4Wvo)LcQuU4;yI&+u>T8d%G9IG8cmX1pS!0ZANc!2@7{X#) z*B|&M8M?nTsLDdjf-;S@uY{HZJnZDYVr4Do1M@vaD~Ud&bc(k zkA7aIg)@^pv(=p%}=b8N=q%4gW6XByZJAN z&D)*Og-92MeE)H=2OZ=eHmGFe$pgS2`TLJ4=J0RGn-Y6zN1nk_T*!-v>}U1-}G z(WQ9xlbXnwW;yT1D%mP*k{}hCAFbfvhA^%Y3e%}J-(=$;Yxm7 z1P%DpFE0}CAvA-Dc<4N_)MsJ0na$8uV#Yjj-+=)TJUoKjaFPv*&*Td7C~XDPY`Pp6B8j?g_3vMO}|AUU)ZfNJI`JNQJs>gya3J_ zhCWHg1PTtWF6Syxq`R!-T-fw`-=|8yPStpC*j}I7&^aq?vLIbp^X=4oWKX4&qDDp% z%{L3pD5~v7%*2*N3 zSh^N+N>jSg@?W`q{V1{!Ac!}yDLCp*ze zYtYBS0c=_sJ(>fVituJEPD^CMA!mYo5ad3ZmAG3&T1^>zjN5qf5ERx}NqbDPzhS9IyUO`Hj0DUS4r{M+{pvg;OFVW|$D3%~S zKlZd^S^)?{$173asty13X>mhosx`gdZAW({iQU_HGKtOQvJH^)@gs7buM!EFyohB^ zA9aU;$GL;4sz=ZEPn3Fsh!_-gEu79U?xoQZrE{B778w0QtWJ(co_ZIjUHs7(9fS?; zLKH2M`oupH+o-DKsvsojR;i&)>IPGHh?YAQQ2i7X0`O3js4Cu0ngdkF4IhB~lJ5!b zA3xq)9_lZ62+CxWN?R%4XO^|K#z;Nu?9^CD!DL8%b8LSb^|GJOxaX#JDo&HvZtnZ; z5gkfSq$i9bfE9HeM*hW9KZye=jOSfgC6`H6v>3U{J0^FavI*~Pk>47 z=>)!>kfKq}Z%rwDc6;WcBHfP-4ZvX0(x$P86^@0wu>(D;8m`^{atqX-NZU>K!rX== zAXd1+E_4PM{g)I4r~o!{qK<4AZ1Or=SO$*?I-6|Rl!NWicaMIB)?A< zi`468b1^qnQ)Q|JdsTC|3qC8TYgm>)J!}5ETmkmzz&@?PK5TB%s>U)rp&|MC)(}pE zD^8ZerK=Q#YYEi@*8D-hh{D??*h|yH{Y6+C6(0 z^U~wr=AB%3Tm)yP3fZ~viz&-V6FUU)^Zga?0pUxAmpU7Mfc_o-E%`~P9P)Z7SaPDV(c6yV6w3y-QbSWm{@LrqvRv0-@kyM7}wiz6%KG3z`dE)kX zas4SEJQe7dL%2&3k@&Go$rLTXzXMlsfsto!DYmu0+?S-ZzpJo0ZKjG?G9K`c8Ku?F zyrL-jo}H&RAhmM0NU`>jl>l$m9fx3yQA9SNr-F=NsTTtxvsfuvy->vnRFr|jtlZID zDnUge`ggDC?CiqXl?v`89>~Y0&Qj%w=ywvVDBJCX5AP$L0r~dYnAy-feiS0kKmB_7 z=slV+tjPc|Aq!z8XlFlfX2a|ha}~Vv(Id|sBS4kEhaDZQl>}@#cFW@*%WEl6<#4B8 zlihaQuQCVatJ+SX%rfq!FS^VnNZaEDb@rwm;NaXP`JYZzKe*px* zd~)6){)@`*Z5x6wPT9DtN0zypqy2AfCdDA!Vl6qX&+kLUBn-Hr%Y0^?d5sXQ_RN0g z?X*AMym_Ei!6wgZE+#!Kd_w&Bb7X2(sPLcA2iCtYucg*1fcZWbyjZGv`4UHTA)@sT zID5B;uzKDC=QeOh8;*qpQQwlar_{Pff7KpxO;<`iOuI6UEa1NFvzvNNLiQ=K_^>^D<_~2^2GFm^xDK#uS~0FhckM18S8E%*wvw?@ zvKJjQ{$kUenOq?W65q-UM4dNGomzhOK-TfG8ey=s4g6~IU*@#R&Cp0fC4z$n9Lx^# z1L^B}5K1E?uRiJtbgg*A?{7QBn#Bp<83vzpD#wFesOes-rSwJe3EgVB-VWd*Dd7w< z&qg7q8U%i%O_5bZxD@`%t{?B%K~%ek)^Z8&WE~+^(=TLv1yy_ZcMol-d}1V&zi`IY{}>yJ4_8!av!XPl2kL zc4A31z-F)O(Vli>m^MZbJ>w|8xoL6C{EPhtpg42&y633$hU-#W_H zZSkiqjZ9{}@&Gy%wYDZQ6&$uj60G z*q?TZvNaiIZvYdiI-Bp%EpHp? zeWig?QVdK>0ffVL#sh9lNH0=?5s=U?Xod%x<0}4zX!oB5TkT)ZG;1~T1Ai=d%4|M2 zd(nnD8+$1>odlPW znNpBv{%!`4gFiOM){XQ?B|7FshXkgzvP3r}@#{iHPd|5`(6JCXF_PcHGJtl|)))c7 zL8yykP1+ss5NdC%CaV57oIlE4?)VuC0s4$gxNk9u@6Ft=r&rq9c_x_WOh4}{O#?d= zQ_P1bS6JfAy^YuI2a9R)a;S`!LU}#yp-P*h$W^7J)&+@$yRe~v%bQF%F3wx{P3F;* z`c)>Ht~p2wBBD@fqP?{#A><*VQAk$4>iuKhQE8KyYGnRMN~O`?6h^V3{VJBW3ks~Cr3c{i!@1M_TN!y4qVvlXN`(> zw3?XhKsNRFL|_)WuY<9V`8Iwi{z2{}#YB&!HnN};?|y0Fzd$Z5A#JY0 z3mOGa)k$uDo>c6o2LWChQ{x||4*zOZ7;^l*$L4gPMe@{7 zuI(`3sIm}F@yX$dpzq(4;4&YVeE5ukn)IHfJKF9ctV+vI{)Hn})Wc!s9g~6Du1Hz)p1w z4TdZgMPHa7gU!>4URAHb5@$Q&-#JwC)5qV0X<hIptWdX)ZQ226u>UGe(k$(VgQrxod!nQ`0y|>(ubP=^ZW+hzlax8 zMryA4aGK>dizpCEG^E=OFPSiD7@33R?s}Vc)vt^A%f#KYws0r0m3`GY#go*c9`w;? zVdvUSZYVsYjc+@_3?0PBzaQaK076{@46uIJFB`Vj3anS$B1%~$e?p7XQ{Nx$-hRb& z12*RmUiht$K|ZI!S7eowMBwdG1O?(GwC3r15e^yC&&lMG1dHTQSILP0!~1be$+8_x*5WS3dNT|e||7b zBWiewdL4?_+VN?{HBDamf>iO#wXZfS%Ev;AsB_N_)SX>jx(du6u;az(tB`0bgk(z2 zxSJ3e46Rx>Ll84PVqtL4R=_UX)i)?j1h32qTg>2t*xsJ#HXAahVJlNp4Gb|_0s(*X zQ1FfY(WJ4SEA6diy0_pYv^bCb8bA0WM&K6eD3X%}y~&7(lMdo4;C`SCl9HIcLN~<0 zog(Ijttd=7fct;K4b@zcvWSdglp?7U%`}vGFRg?OyIgl{8Z+!S__*8DUQPq#yHc9- z5Rz+zX`84SZlW!$hs6pA(Y`o41v;UV$?Q-h?&#R?sF^$qDoq|W!ZB7nY6V^~lz0Hc zh$688g{i4TB7BBcr9kV`pnj`^UNaG5X6Wtrst z=`%M}XHEzHxq%O`v=ie-n8%bE=U+96Da@?Eewe{=GvY6P8RiJ9Ye^x~w?r9J6EsRM zP6qMst4%)lf)bkxd+xy)-)O<6xc0u@N^vpKGlhV3#?P`aBCk-X3}ArRSaH~Twn-!_ zw?-hbE*^m2VWfp)0drOycHa8fKw4YRT|R8pR?`ok(V(+p_-$vE3=$#-|RZIM5`EBo5D)LcngC&m+KX*zs*x1 zTnlx%|Ezl2RJzVuRi&Hy-@Mrb88YxS&lDI%bl9r5Yy9bkx!p?KRe}WTb<{t^g}I@n zAndYfLh<6~_)A7=ssZCuk1i%gOu1t{ChWghm?FwCvGXVQuuM%vEsM6Q%!CCX%-Mp> ztiFHw5x}$gUQU;a{pSGb?Ow6o#lSY(__y=TlN!9^uHW@t;>01k;vX&b?}HpW4fB&d z?{1vG_wsIg!xE8D5goj0@pvS;EY|y%o9>8l);;CKqD{X8P22??+!C1}S;UWe&`q`= z)j`kp0+A=al2lJ!y(OeWpxMN?TL4UV-)X{cJsx@SnT`nZsj?Ut##1+RJpB9qVeyOUC4IX7tm2}nmQhC{Nyvo+^EIfre_V^g)1?|FT);obnM#qS zH`U@NDg{wTJ2!QyMr4cUcqgj}Zcj~55XnH=M;<%Q?Ut1a;E&H3D=f{Ge2L`HF=_hV zT>vn6nCUrg3VfT3@rzcJenpf&C6N($vk}=30t^)-S;>rVenMF&m|D_6nJF@+({pxED>U4Z zG5nYOLQExP*N^DHczx%ciT%OI!RV?O^MGAPO!2~U#e1xaQGfepgIn2aw##1Dr{Tqg zt;lfy8Abh5|)m%^BMxNh+7u!we@f?6wZ2>lVOLL!K1fTV_3ETKH^1e>%HJea>K_vqD$0 zA&-Eh4IhQh!;+K~&Saem|H*!jU@FOk^t~u^@s|O@33eCa$IiN`+5j@O#YG)Gn)KMD z^X92RQL?`;wt0`Lzc&EN$GYEn`ts{BN>zVbx524qj zQkmeoGkZD)iwxQR`3q;9f*74!|Om0p7||1(n`tNmC=#|`{={M)Gkr2}_5Gj91a zuX9qV)cI&rJFGFM=-#`T8lr^lZTKQxJcB|{plq_~Vc zVtPrBle&%4iF(Qp9VV4-{xMIF(?sPRPIJcOx5-&52O~-3eQlfS4+LZl0>}{M5lvF& zf5e};`;7!MwoJ`^GS%Oom5$IZTp}fHKdjhxc_{8Tz3x=!v8W&L#Bc1QIpLd7ek0F2 zqOgS4812V>r^UbdK%ejPBSP%dIByb@oWtFBD~w`z)1V2_<~ydGQB(aNir2!UPlhg( zkLmGuL-@*HeYtsG?>> zunVBiKQUStCCz{7q>!*jkANt|9@rK|nhy?SnM=Le|L zYh!7;ga7>=F@d#X*PA3{x|4{}08uW|JGyNvVH zIa{zf%l_GubPk1>D(py!{@j~Daw9`z%BNo+77^0CF9GT1nYq?tQq@0$avEUh+N(O( z59uV&zUf9iEA|$A^$^=yn%K6r<)h9|^MfFP%=1X~qg{_gj&&GXvbK*-faX2Yh3xq; z8DF_&p=VnJ?jq>-P=1E_KWoX@%v6mJ>E{3b&IZ{+J_K~~m0y0XeR$cl!E*0gL@yuH zX-B{CpD)4cMVQoMiOeiH!E%y*9T*7q<$m%reP=9cJ~cyrGG*pflw^#m6f)n=C0xrb zI|_Z`v>$&)v$?1^7nl%%?w*>hoK0io~e1&_zW4(z}o*Y>4yuHe{0@*nth!5?^%8R^7Q}5OQrhXyJ*ns We`*^2eD{Cj;OJ=?Yu2hm;r|1uOPL7( literal 0 HcmV?d00001 diff --git a/docs/data_platform/lifecycle.md b/docs/data_platform/lifecycle.md index a46a605..5751795 100644 --- a/docs/data_platform/lifecycle.md +++ b/docs/data_platform/lifecycle.md @@ -13,14 +13,11 @@ The data engineering lifecycle is supported by several critical undercurrents, i --- -### 1. **Data Generation** +## 1. **Data Generation** Data generation is the first step in the lifecycle, where raw data is created or collected from various sources. These sources can include user activity on applications, sensor data from IoT devices, logs from systems, and external APIs. The goal of this phase is to collect raw, unprocessed data that will be used in downstream processes. -```{=latex} -\newpage -``` -#### Key Activities: +### Key Activities: - **Event Logging**: Capturing user or system events. - **IoT and Sensor Data**: Collecting data from physical devices. - **API Requests**: Pulling data from external sources. @@ -34,22 +31,22 @@ Data generation is the first step in the lifecycle, where raw data is created or --- -### 2. **Data Storage** +## 2. **Data Storage** Data storage is the foundational component where all ingested and processed data is stored securely and durably. The choice of storage system depends on the volume, velocity, and type of data. Data can be stored in structured databases, semi-structured storage, or unstructured data lakes. -#### Key Activities: +### Key Activities: - **Choosing Storage Systems**: Selecting between data warehouses, lakes, and object storage. - **Partitioning and Indexing**: Organizing data for faster access. - **Security and Compliance**: Ensuring data is encrypted and stored in compliance with regulatory requirements. --- -### 3. **Data Ingestion** +## 3. **Data Ingestion** Data ingestion refers to the process of bringing generated data into your data infrastructure. This can be done in real-time or through batch processes. The goal is to move data from its source into a system where it can be stored and transformed. -#### Key Activities: +### Key Activities: - **Streaming Ingestion**: Handling real-time data (e.g., Kafka, Pulsar). - **Batch Ingestion**: Periodically importing data (e.g., using ETL tools). - **API or File-Based Ingestion**: Pulling data from external services or systems via APIs or file transfers. @@ -58,11 +55,11 @@ Data ingestion refers to the process of bringing generated data into your data i --- -### 4. **Data Transformation** +## 4. **Data Transformation** Data transformation is where the raw data is cleaned, structured, and enriched to make it suitable for analysis or other uses. This phase involves applying business logic, converting data formats, aggregating values, and removing noise. -#### Key Activities: +### Key Activities: - **Data Cleaning**: Handling missing, duplicate, or erroneous data. - **Data Enrichment**: Adding additional information or context to raw data. - **Aggregations and Calculations**: Summing, averaging, or transforming data based on business rules. @@ -70,11 +67,11 @@ Data transformation is where the raw data is cleaned, structured, and enriched t --- -### 5. **Data Serving** +## 5. **Data Serving** Once the data is transformed, it needs to be made available for use in real-time systems or analytical tools. Data serving is about making sure that transformed data is accessible and delivered to users, applications, or other systems in a timely and efficient manner. -#### Key Activities: +### Key Activities: - **API Exposure**: Making data available through APIs. - **Data Warehousing**: Storing processed data in data warehouses for efficient querying. - **Data Lakes**: Providing storage for large volumes of raw or semi-structured data. @@ -82,22 +79,22 @@ Once the data is transformed, it needs to be made available for use in real-time --- -### 6. **Analytics** +## 6. **Analytics** After data is stored and made available, it is used for analytics to extract insights. This stage typically involves querying data, building reports, dashboards, and conducting exploratory analysis. The insights derived here help inform business decisions. -#### Key Activities: +### Key Activities: - **BI Tools**: Generating reports and dashboards using tools like PowerBI, Tableau, or Looker. - **Exploratory Data Analysis**: Querying data to find patterns, trends, and insights. - **KPI Tracking**: Monitoring key performance indicators through dashboards. --- -### 7. **Machine Learning** +## 7. **Machine Learning** Machine learning is an optional stage in the data engineering lifecycle. Not all data pipelines will require machine learning, but for those that do, this stage is where cleaned and transformed data is used to train, validate, and deploy predictive models. Machine learning models help automate decision-making processes or provide forecasts based on historical data patterns. -#### Key Activities: +### Key Activities: - **Model Training**: Using historical data to train predictive models. - **Feature Engineering**: Creating new variables that better represent underlying patterns in the data. - **Model Deployment**: Putting machine learning models into production to make predictions in real time. @@ -105,11 +102,11 @@ Machine learning is an optional stage in the data engineering lifecycle. Not all --- -### 8. **Reverse ETL** +## 8. **Reverse ETL** Reverse ETL is the process of moving data from your centralized data warehouse or data lake back into operational systems (such as CRMs, marketing platforms, or custom applications). This stage is about operationalizing data by sending it back into the tools that teams use day-to-day. -#### Key Activities: +### Key Activities: - **Syncing Data**: Moving data back into operational systems like Salesforce, HubSpot, or custom tools. - **Operational Analytics**: Making transformed data actionable in real-time for business operations. - **Data Enrichment**: Enhancing third-party tools with enriched or aggregated data from internal systems.