Skip to content

Commit

Permalink
Merge pull request #74 from con/split-benchmarks
Browse files Browse the repository at this point in the history
Make benchmarks measure cache misses and hits separately
  • Loading branch information
yarikoptic authored Feb 22, 2022
2 parents 3c4b426 + f8c86f5 commit 64b1125
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 71 deletions.
164 changes: 94 additions & 70 deletions benchmarks/cache.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,58 @@
from abc import ABC, abstractmethod
from hashlib import sha256
import os
from pathlib import Path
import random
import shutil
from time import sleep, time
from uuid import uuid4
from morecontext import envset
from fscacher import PersistentCache


class TimeFile:
class BaseCacheBenchmark(ABC):
param_names = ["mode"]
params = [["populate", "hit", "ignore"]]

@abstractmethod
def init_path(self, *args):
# Must return the path created
...

@abstractmethod
def init_func(cache):
# Must return the function
...

def init_cache(self, ignore: bool = False):
with envset("FSCACHER_CACHE", "ignore" if ignore else ""):
self.cache = PersistentCache(path=str(uuid4()))
self.func = self.init_func(self.cache)

def setup(self, mode, *args):
self.path = self.init_path(mode, *args)
if mode == "hit":
self.init_cache()
self.func(self.path)
elif mode == "ignore":
self.init_cache(ignore=True)

def time_cache(self, mode, *_args):
if mode == "populate":
self.init_cache()
self.func(self.path)


class TimeFile(BaseCacheBenchmark):
FILE_SIZE = 1024
param_names = ["control"]
params = ["", "ignore"]

def setup_cache(self):
def init_path(self, *_args):
with open("foo.dat", "wb") as fp:
fp.write(bytes(random.choices(range(256), k=self.FILE_SIZE)))
return "foo.dat"

def setup(self, control):
with envset("FSCACHER_CACHE", control):
self.cache = PersistentCache(path=str(uuid4()))

@self.cache.memoize_path
@staticmethod
def init_func(cache):
@cache.memoize_path
def hashfile(path):
# "emulate" slow invocation so significant raise in benchmark
# consumed time would mean that we invoked it instead
Expand All @@ -31,32 +61,47 @@ def hashfile(path):
with open(path, "rb") as fp:
return sha256(fp.read()).hexdigest()

self._hashfile = hashfile

def time_file(self, _control):
for _ in range(100):
self._hashfile("foo.dat")

def teardown(self, _control):
self.cache.clear()


class TimeDirectoryFlat:

LAYOUT = (100,)

param_names = ["control", "tmpdir"]
params = (["", "ignore"], os.environ.get("FSCACHER_BENCH_TMPDIRS", ".").split(":"))

def setup(self, control, tmpdir):
cache_id = str(uuid4())
with envset("FSCACHER_CACHE", control):
self.cache = PersistentCache(path=os.path.join("cache", cache_id))
self.dir = Path(tmpdir, "work", cache_id)
self.dir.mkdir(parents=True)
create_tree(self.dir, self.LAYOUT)

@self.cache.memoize_path
return hashfile


class BaseDirectoryBenchmark(BaseCacheBenchmark):
param_names = BaseCacheBenchmark.param_names + ["tmpdir"]
params = BaseCacheBenchmark.params + [
os.environ.get("FSCACHER_BENCH_TMPDIRS", ".").split(":")
]

@staticmethod
@abstractmethod
def get_layout():
...

def init_path(self, _mode, tmpdir):
dirpath = Path(tmpdir, str(uuid4()))
dirpath.mkdir(parents=True)
base_time = time()
dirs = [dirpath]
layout = self.get_layout()
for i, width in enumerate(layout):
if i < len(layout) - 1:
dirs2 = []
for d in dirs:
for x in range(width):
d2 = d / f"d{x}"
d2.mkdir()
dirs2.append(d2)
dirs = dirs2
else:
for j, d in enumerate(dirs):
for x in range(width):
f = d / f"f{x}.dat"
f.write_bytes(b"\0" * random.randint(1, 1024))
t = base_time - x - j * width
os.utime(f, (t, t))
return dirpath

@staticmethod
def init_func(cache):
@cache.memoize_path
def dirsize(path):
total_size = 0
with os.scandir(path) as entries:
Expand All @@ -67,37 +112,16 @@ def dirsize(path):
total_size += e.stat().st_size
return total_size

self._dirsize = dirsize

def time_directory(self, _control, _tmpdir):
for _ in range(100):
self._dirsize(str(self.dir))

def teardown(self, *_args, **_kwargs):
self.cache.clear()
shutil.rmtree(self.dir)


class TimeDirectoryDeep(TimeDirectoryFlat):
LAYOUT = (3, 3, 3, 3)


def create_tree(root, layout):
base_time = time()
dirs = [Path(root)]
for i, width in enumerate(layout):
if i < len(layout) - 1:
dirs2 = []
for d in dirs:
for x in range(width):
d2 = d / f"d{x}"
d2.mkdir()
dirs2.append(d2)
dirs = dirs2
else:
for j, d in enumerate(dirs):
for x in range(width):
f = d / f"f{x}.dat"
f.write_bytes(b"\0" * random.randint(1, 1024))
t = base_time - x - j * width
os.utime(f, (t, t))
return dirsize


class TimeFlatDirectory(BaseDirectoryBenchmark):
@staticmethod
def get_layout():
return (100,)


class TimeDeepDirectory(BaseDirectoryBenchmark):
@staticmethod
def get_layout():
return (3, 3, 3, 3)
3 changes: 2 additions & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ deps =
asv
virtualenv
commands =
asv run {posargs}
asv run --strict {posargs} HEAD^1..HEAD

[pytest]
addopts = --cov=fscacher --no-cov-on-fail
Expand Down Expand Up @@ -57,6 +57,7 @@ doctests = True
exclude = .*/,build/,dist/,test/data,venv/,_version.py
import-order-style = jwodder
max-line-length = 88
unused-arguments-ignore-stub-functions = True
select = C,B,B902,B950,E,F,U100,W
extend-ignore = B005,E203

Expand Down

0 comments on commit 64b1125

Please sign in to comment.