Skip to content

Commit

Permalink
Merge branch 'copy-code-from-dbt-core' of https://github.com/dbt-labs…
Browse files Browse the repository at this point in the history
…/dbt-common into er/ci-workflow
  • Loading branch information
emmyoop committed Jan 5, 2024
2 parents f3cfc0b + 0443b52 commit 73f0a35
Show file tree
Hide file tree
Showing 13 changed files with 1,423 additions and 10 deletions.
27 changes: 18 additions & 9 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,14 +1,24 @@
.DEFAULT_GOAL:=help

.PHONY: dev_req
dev_req: ## Installs dbt-* packages in develop mode along with only development dependencies.
@\
pip install -r dev-requirements.txt

.PHONY: dev
dev: dev_req ## Installs dbt-* packages in develop mode along with development dependencies and pre-commit.
@\
pre-commit install
.PHONY: run install-hatch overwrite-pre-commit install test lint json_schema

run:
export FORMAT_JSON_LOGS="1"

install-hatch:
pip3 install hatch

# This edits your local pre-commit hook file to use Hatch when executing.
overwrite-pre-commit:
hatch run dev-env:pre-commit install
hatch run dev-env:sed -i -e "s/exec /exec hatch run dev-env:/g" .git/hooks/pre-commit

test:
export FORMAT_JSON_LOGS="1" && hatch -v run dev-env:pytest -n auto tests

lint:
hatch run dev-env:pre-commit run --show-diff-on-failure --color=always --all-files

.PHONY: proto_types
proto_types: ## generates google protobuf python file from types.proto
Expand All @@ -20,4 +30,3 @@ help: ## Show this help message.
@echo
@echo 'targets:'
@grep -E '^[8+a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'

12 changes: 12 additions & 0 deletions dbt/common/ui.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,20 @@
import sys
import textwrap
from typing import Dict

import colorama

# Colorama is needed for colored logs on Windows because we're using logger.info
# intead of print(). If the Windows env doesn't have a TERM var set or it is set to None
# (i.e. in the case of Git Bash on Windows- this emulates Unix), then it's safe to initialize
# Colorama with wrapping turned on which allows us to strip ANSI sequences from stdout.
# You can safely initialize Colorama for any OS and the coloring stays the same except
# when piped to another process for Linux and MacOS, then it loses the coloring. To combat
# that, we will just initialize Colorama when needed on Windows using a non-Unix terminal.

if sys.platform == "win32" and (not os.getenv("TERM") or os.getenv("TERM") == "None"):
colorama.init(wrap=True)

COLORS: Dict[str, str] = {
"red": colorama.Fore.RED,
"green": colorama.Fore.GREEN,
Expand Down
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,13 @@ classifiers = [
]
dependencies = [
"agate~=1.7.0",
"colorama>=0.3.9,<0.5", # TODO: major version 0 - should we use it?
"jsonschema~=4.0",
"Jinja2~=3.0",
"mashumaro[msgpack]~=3.9",
"protobuf>=4.0.0",
"python-dateutil~=2.0",
"requests<3.0.0",
"typing-extensions~=4.4",
]

Expand Down Expand Up @@ -106,4 +109,4 @@ disallow_untyped_defs = false
profile = "black"

[tool.black]
line-length = 120
line-length = 120
Empty file added tests/unit/__init__.py
Empty file.
227 changes: 227 additions & 0 deletions tests/unit/test_agate_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
import unittest

import agate

from datetime import datetime
from decimal import Decimal
from isodate import tzinfo
import os
from shutil import rmtree
from tempfile import mkdtemp
from dbt.common.clients import agate_helper

SAMPLE_CSV_DATA = """a,b,c,d,e,f,g
1,n,test,3.2,20180806T11:33:29.320Z,True,NULL
2,y,asdf,900,20180806T11:35:29.320Z,False,a string"""

SAMPLE_CSV_BOM_DATA = "\ufeff" + SAMPLE_CSV_DATA


EXPECTED = [
[
1,
"n",
"test",
Decimal("3.2"),
datetime(2018, 8, 6, 11, 33, 29, 320000, tzinfo=tzinfo.Utc()),
True,
None,
],
[
2,
"y",
"asdf",
900,
datetime(2018, 8, 6, 11, 35, 29, 320000, tzinfo=tzinfo.Utc()),
False,
"a string",
],
]


EXPECTED_STRINGS = [
["1", "n", "test", "3.2", "20180806T11:33:29.320Z", "True", None],
["2", "y", "asdf", "900", "20180806T11:35:29.320Z", "False", "a string"],
]


class TestAgateHelper(unittest.TestCase):
def setUp(self):
self.tempdir = mkdtemp()

def tearDown(self):
rmtree(self.tempdir)

def test_from_csv(self):
path = os.path.join(self.tempdir, "input.csv")
with open(path, "wb") as fp:
fp.write(SAMPLE_CSV_DATA.encode("utf-8"))
tbl = agate_helper.from_csv(path, ())
self.assertEqual(len(tbl), len(EXPECTED))
for idx, row in enumerate(tbl):
self.assertEqual(list(row), EXPECTED[idx])

def test_bom_from_csv(self):
path = os.path.join(self.tempdir, "input.csv")
with open(path, "wb") as fp:
fp.write(SAMPLE_CSV_BOM_DATA.encode("utf-8"))
tbl = agate_helper.from_csv(path, ())
self.assertEqual(len(tbl), len(EXPECTED))
for idx, row in enumerate(tbl):
self.assertEqual(list(row), EXPECTED[idx])

def test_from_csv_all_reserved(self):
path = os.path.join(self.tempdir, "input.csv")
with open(path, "wb") as fp:
fp.write(SAMPLE_CSV_DATA.encode("utf-8"))
tbl = agate_helper.from_csv(path, tuple("abcdefg"))
self.assertEqual(len(tbl), len(EXPECTED_STRINGS))
for expected, row in zip(EXPECTED_STRINGS, tbl):
self.assertEqual(list(row), expected)

def test_from_data(self):
column_names = ["a", "b", "c", "d", "e", "f", "g"]
data = [
{
"a": "1",
"b": "n",
"c": "test",
"d": "3.2",
"e": "20180806T11:33:29.320Z",
"f": "True",
"g": "NULL",
},
{
"a": "2",
"b": "y",
"c": "asdf",
"d": "900",
"e": "20180806T11:35:29.320Z",
"f": "False",
"g": "a string",
},
]
tbl = agate_helper.table_from_data(data, column_names)
self.assertEqual(len(tbl), len(EXPECTED))
for idx, row in enumerate(tbl):
self.assertEqual(list(row), EXPECTED[idx])

def test_datetime_formats(self):
path = os.path.join(self.tempdir, "input.csv")
datetimes = [
"20180806T11:33:29.000Z",
"20180806T11:33:29Z",
"20180806T113329Z",
]
expected = datetime(2018, 8, 6, 11, 33, 29, 0, tzinfo=tzinfo.Utc())
for dt in datetimes:
with open(path, "wb") as fp:
fp.write("a\n{}".format(dt).encode("utf-8"))
tbl = agate_helper.from_csv(path, ())
self.assertEqual(tbl[0][0], expected)

def test_merge_allnull(self):
t1 = agate_helper.table_from_rows([(1, "a", None), (2, "b", None)], ("a", "b", "c"))
t2 = agate_helper.table_from_rows([(3, "c", None), (4, "d", None)], ("a", "b", "c"))
result = agate_helper.merge_tables([t1, t2])
self.assertEqual(result.column_names, ("a", "b", "c"))
assert isinstance(result.column_types[0], agate_helper.Integer)
assert isinstance(result.column_types[1], agate.data_types.Text)
assert isinstance(result.column_types[2], agate_helper.Integer)
self.assertEqual(len(result), 4)

def test_merge_mixed(self):
t1 = agate_helper.table_from_rows(
[(1, "a", None, None), (2, "b", None, None)], ("a", "b", "c", "d")
)
t2 = agate_helper.table_from_rows(
[(3, "c", "dog", 1), (4, "d", "cat", 5)], ("a", "b", "c", "d")
)
t3 = agate_helper.table_from_rows(
[(3, "c", None, 1.5), (4, "d", None, 3.5)], ("a", "b", "c", "d")
)

result = agate_helper.merge_tables([t1, t2])
self.assertEqual(result.column_names, ("a", "b", "c", "d"))
assert isinstance(result.column_types[0], agate_helper.Integer)
assert isinstance(result.column_types[1], agate.data_types.Text)
assert isinstance(result.column_types[2], agate.data_types.Text)
assert isinstance(result.column_types[3], agate_helper.Integer)
self.assertEqual(len(result), 4)

result = agate_helper.merge_tables([t1, t3])
self.assertEqual(result.column_names, ("a", "b", "c", "d"))
assert isinstance(result.column_types[0], agate_helper.Integer)
assert isinstance(result.column_types[1], agate.data_types.Text)
assert isinstance(result.column_types[2], agate_helper.Integer)
assert isinstance(result.column_types[3], agate.data_types.Number)
self.assertEqual(len(result), 4)

result = agate_helper.merge_tables([t2, t3])
self.assertEqual(result.column_names, ("a", "b", "c", "d"))
assert isinstance(result.column_types[0], agate_helper.Integer)
assert isinstance(result.column_types[1], agate.data_types.Text)
assert isinstance(result.column_types[2], agate.data_types.Text)
assert isinstance(result.column_types[3], agate.data_types.Number)
self.assertEqual(len(result), 4)

result = agate_helper.merge_tables([t3, t2])
self.assertEqual(result.column_names, ("a", "b", "c", "d"))
assert isinstance(result.column_types[0], agate_helper.Integer)
assert isinstance(result.column_types[1], agate.data_types.Text)
assert isinstance(result.column_types[2], agate.data_types.Text)
assert isinstance(result.column_types[3], agate.data_types.Number)
self.assertEqual(len(result), 4)

result = agate_helper.merge_tables([t1, t2, t3])
self.assertEqual(result.column_names, ("a", "b", "c", "d"))
assert isinstance(result.column_types[0], agate_helper.Integer)
assert isinstance(result.column_types[1], agate.data_types.Text)
assert isinstance(result.column_types[2], agate.data_types.Text)
assert isinstance(result.column_types[3], agate.data_types.Number)
self.assertEqual(len(result), 6)

def test_nocast_string_types(self):
# String fields should not be coerced into a representative type
# See: https://github.com/dbt-labs/dbt-core/issues/2984

column_names = ["a", "b", "c", "d", "e"]
result_set = [
{"a": "0005", "b": "01T00000aabbccdd", "c": "true", "d": 10, "e": False},
{"a": "0006", "b": "01T00000aabbccde", "c": "false", "d": 11, "e": True},
]

tbl = agate_helper.table_from_data_flat(data=result_set, column_names=column_names)
self.assertEqual(len(tbl), len(result_set))

expected = [
["0005", "01T00000aabbccdd", "true", Decimal(10), False],
["0006", "01T00000aabbccde", "false", Decimal(11), True],
]

for i, row in enumerate(tbl):
self.assertEqual(list(row), expected[i])

def test_nocast_bool_01(self):
# True and False values should not be cast to 1 and 0, and vice versa
# See: https://github.com/dbt-labs/dbt-core/issues/4511

column_names = ["a", "b"]
result_set = [
{"a": True, "b": 1},
{"a": False, "b": 0},
]

tbl = agate_helper.table_from_data_flat(data=result_set, column_names=column_names)
self.assertEqual(len(tbl), len(result_set))

assert isinstance(tbl.column_types[0], agate.data_types.Boolean)
assert isinstance(tbl.column_types[1], agate_helper.Integer)

expected = [
[True, Decimal(1)],
[False, Decimal(0)],
]

for i, row in enumerate(tbl):
self.assertEqual(list(row), expected[i])
59 changes: 59 additions & 0 deletions tests/unit/test_connection_retries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import functools
import pytest
from requests.exceptions import RequestException
from dbt.common.exceptions import ConnectionError
from dbt.common.utils.connection import connection_exception_retry


def no_retry_fn():
return "success"


class TestNoRetries:
def test_no_retry(self):
fn_to_retry = functools.partial(no_retry_fn)
result = connection_exception_retry(fn_to_retry, 3)

expected = "success"

assert result == expected


def no_success_fn():
raise RequestException("You'll never pass")
return "failure"


class TestMaxRetries:
def test_no_retry(self):
fn_to_retry = functools.partial(no_success_fn)

with pytest.raises(ConnectionError):
connection_exception_retry(fn_to_retry, 3)


def single_retry_fn():
global counter
if counter == 0:
counter += 1
raise RequestException("You won't pass this one time")
elif counter == 1:
counter += 1
return "success on 2"

return "How did we get here?"


class TestSingleRetry:
def test_no_retry(self):
global counter
counter = 0

fn_to_retry = functools.partial(single_retry_fn)
result = connection_exception_retry(fn_to_retry, 3)
expected = "success on 2"

# We need to test the return value here, not just that it did not throw an error.
# If the value is not being passed it causes cryptic errors
assert result == expected
assert counter == 2
Loading

0 comments on commit 73f0a35

Please sign in to comment.