From b9948b914bb83adb3f19bdc7b7a4343ebadffc4a Mon Sep 17 00:00:00 2001 From: univerone Date: Fri, 14 May 2021 14:14:36 +0800 Subject: [PATCH 01/13] :art: [db] update profiling module database part --- modelci/persistence/service_.py | 114 +++++++++++++++++++++++++++++++- modelci/types/models/mlmodel.py | 3 +- modelci/types/models/profile.py | 111 +++++++++++++++++++++++++++++++ 3 files changed, 226 insertions(+), 2 deletions(-) create mode 100644 modelci/types/models/profile.py diff --git a/modelci/persistence/service_.py b/modelci/persistence/service_.py index 232063ea..d9c45f38 100644 --- a/modelci/persistence/service_.py +++ b/modelci/persistence/service_.py @@ -7,7 +7,8 @@ Persistence service using PyMongo. """ -from typing import List +from ipaddress import IPv4Address, IPv6Address +from typing import List, Union, Optional import gridfs from bson import ObjectId @@ -17,6 +18,7 @@ from modelci.experimental.mongo_client import MongoClient from modelci.persistence.exceptions import ServiceException from modelci.types.models import MLModel, ModelUpdateSchema +from modelci.types.models.profile import StaticProfileResult, DynamicProfileResult _db = MongoClient()[db_settings.mongo_db] _collection = _db['model_d_o'] @@ -103,3 +105,113 @@ def delete_model(id_: str): if _fs.exists(ObjectId(model['weight'])): _fs.delete(ObjectId(model['weight'])) return _collection.delete_one({'_id': ObjectId(id_)}) + + +def register_static_profiling_result(id_: str, static_profiling_result: StaticProfileResult): + """ Register or update static profiling result to a model. + + Args: + id_: ID of the model + static_profiling_result: static profiling result + + Returns: + + """ + return _collection.update_one({'_id': ObjectId(id_)}, + {"$set": {"profile_result.static_profile_result": static_profiling_result.dict()}}, + upsert=True) + + +def register_dynamic_profiling_result(id_: str, dynamic_result: DynamicProfileResult): + """ Add one dynamic profiling result to a model. + + Args: + id_: ID of the model + dynamic_result: Dynamic profiling result + + Returns: + + """ + return _collection.update_one({'_id': ObjectId(id_)}, + {"$push": { + "profile_result.dynamic_profile_results": jsonable_encoder(dynamic_result)}}) + + +def exists_dynamic_profiling_result_by_pks(id_: str, ip: str, device_id: str) -> bool: + """Check if the dynamic profiling result exists. + + Args: + id_: ID of the model. + ip: IP address of dynamic profiling result to be deleted. + device_id: Device ID of dynamic profiling result to be deleted. + + Returns: True` for existence, `False` otherwise. + + """ + model = _collection.find_one(filter={ + '_id': ObjectId(id_), + 'profile_result.dynamic_profile_results.ip': ip, + 'profile_result.dynamic_profile_results.device_id': device_id + }) + return model is not None + + +def update_dynamic_profiling_result(id_: str, dynamic_result: DynamicProfileResult, + force_insert: Optional[bool] = False): + """ Update one dynamic profiling result to a model. + + Args: + id_: ID of the object + dynamic_result: Dynamic profiling result + force_insert: force to insert the dynamic result if it is not found + + Returns: + + """ + if exists_dynamic_profiling_result_by_pks(id_, ip=dynamic_result.ip, device_id=dynamic_result.device_id): + return _collection.update_one({ + '_id': ObjectId(id_), + 'profile_result.dynamic_profile_results.ip': dynamic_result.ip, + 'profile_result.dynamic_profile_results.device_id': dynamic_result.device_id + }, {"$set": {"profile_result.dynamic_profile_results.$": jsonable_encoder(dynamic_result)}}) + elif force_insert: + return register_dynamic_profiling_result(id_, dynamic_result) + else: + raise ServiceException( + f'Dynamic profiling result to be updated with ip={dynamic_result.ip}, ' + f'device_id={dynamic_result.device_id} does not exist. Either set `force_insert` or change the ip ' + f'and device_id.' + ) + + +def delete_dynamic_profiling_result(id_: str, dynamic_result_ip: Union[str, IPv4Address, IPv6Address], + dynamic_result_device_id: str): + """Delete one dynamic profiling result to a model. + + Args: + id_: ID of the object. + dynamic_result_ip: Host IP address of dynamic profiling result. + dynamic_result_device_id: Device ID of dynamic profiling result. + + Returns: + + """ + if exists_dynamic_profiling_result_by_pks(id_, ip=dynamic_result_ip, device_id=dynamic_result_device_id): + return _collection.update( + {'_id': ObjectId(id_)}, + {'$pull': { + 'profile_result.dynamic_profile_results': { + 'ip': dynamic_result_ip, + 'device_id': dynamic_result_device_id + } + } + }, + multi=True, + upsert=False + ) + else: + raise ServiceException( + f'Dynamic profiling result to be updated with ip={dynamic_result_ip}, ' + f'device_id={dynamic_result_device_id} does not exist. Either set `force_insert` or change the ip ' + f'and device_id.' + ) diff --git a/modelci/types/models/mlmodel.py b/modelci/types/models/mlmodel.py index b4fce00c..6b0fcc3f 100644 --- a/modelci/types/models/mlmodel.py +++ b/modelci/types/models/mlmodel.py @@ -18,6 +18,7 @@ from .common import Metric, IOShape, Framework, Engine, Task, ModelStatus, Status, PydanticObjectId, \ named_enum_json_encoder from .pattern import as_form +from .profile import ProfileResult from ...hub.utils import parse_path_plain, generate_path_plain from modelci.config import db_settings from modelci.experimental.mongo_client import MongoClient @@ -127,7 +128,7 @@ class MLModel(BaseMLModel): id: Optional[PydanticObjectId] = Field(default=None, alias='_id') parent_model_id: Optional[PydanticObjectId] weight: Weight - profile_result: Optional[Any] + profile_result: ProfileResult = ProfileResult() status: Optional[Status] = Status.Unknown model_input: Optional[list] # TODO: merge into field `inputs` model_status: Optional[List[ModelStatus]] = Field(default_factory=list) diff --git a/modelci/types/models/profile.py b/modelci/types/models/profile.py new file mode 100644 index 00000000..1d6edc1c --- /dev/null +++ b/modelci/types/models/profile.py @@ -0,0 +1,111 @@ +from datetime import datetime +from typing import List, Optional + +from pydantic import Field +from pydantic.main import BaseModel +from pydantic.types import PositiveInt, NonNegativeFloat, confloat, NonNegativeInt, conint + + +class InfoTuple(BaseModel): + """A triplet tuple containing overall, average, 50th percentile, 95th percentile, and 99th percentile values of a + data over a period of time. + """ + # the average value + avg: float + # 50th percentile, 95th percentile, and 99th percentile values, of a data. + p50: float + p95: float + p99: float + + def __init__(self, avg: float, p50: float, p95: float, p99: float): + super().__init__(avg=avg, p50=p50, p95=p95, p99=p99) + + +class ProfileMemory(BaseModel): + """Memory class in dynamic profiling result + """ + # Main or GPU memory consumption in Byte for loading and initializing the model + total_memory: NonNegativeInt + # GPU memory consumption in Byte for processing batch data + memory_usage: NonNegativeInt + # GPU utilization rate for processing batch data + utilization = confloat(ge=0, le=1) + + +class ProfileLatency(BaseModel): + """End-to-end latency in dynamic profiling result + """ + # Min, max and avg model loading and initialization latencies + initialization_latency: InfoTuple + # Min, max and avg preprocess latencies + preprocess_latency: InfoTuple + # Min, max and avg inference latencies + inference_latency: InfoTuple + # Min, max and avg postprocess latencies + postprocess_latency: InfoTuple + + +class ProfileThroughput(BaseModel): + """End-to-end throughput in dynamic profiling result + """ + # Batch formation QPS + batch_formation_throughput: NonNegativeFloat= Field(default=0) + # Batch preprocess QPS + preprocess_throughput: NonNegativeFloat= Field(default=0) + # Batch inference QPS + inference_throughput: NonNegativeFloat= Field(default=0) + # Batch postprocess QPS + postprocess_throughput: NonNegativeFloat= Field(default=0) + + +class DynamicProfileResult(BaseModel): + """ + Dynamic profiling result + + The primary key of the document is (ip, device_id) pair. + """ + # IP address of the cluster node + ip: str=Field(default='127.0.0.1') + # Device ID, e.g. cpu, cuda:0, cuda:1 + device_id: str + # Device name, e.g. Tesla K40c + device_name: str + # Batch size + batch: conint(ge=1) + # Memory class in dynamic profiling result + memory: ProfileMemory + # End-to-end latency in dynamic profiling result + latency: ProfileLatency + # End-to-end throughput in dynamic profiling result + throughput: ProfileThroughput + # Creation time of this record + create_time: Optional[datetime] = Field(default_factory=datetime.utcnow) + + +class StaticProfileResult(BaseModel): + """ + Static profiling result + """ + + # Number of parameters of this model + parameters: PositiveInt + # Floating point operations + flops: PositiveInt + # Memory consumption in Byte in order to load this model into GPU or CPU + memory: PositiveInt + # Memory read in Byte + mread: PositiveInt + # Memory write in Byte + mwrite: PositiveInt + # Memory readwrite in Byte + mrw: PositiveInt + + +class ProfileResult(BaseModel): + """ + Profiling result + """ + # Static profile result + static_profile_result: Optional[StaticProfileResult] + # Dynamic profile result + dynamic_profile_results: Optional[List[DynamicProfileResult]] = Field(default_factory=list) From 4bf290c22aa79b82ce45e9f8996b9aa26f1c9ff5 Mon Sep 17 00:00:00 2001 From: univerone Date: Sat, 15 May 2021 20:22:20 +0800 Subject: [PATCH 02/13] :art: [test] update test and fix service_ function --- example/resnet50_explicit_path.yml | 2 +- modelci/persistence/service_.py | 30 +++++-- tests/test_model_api.py | 25 +++--- tests/test_model_service.py | 138 +++++++++++++---------------- tests/test_modelhub_cli.py | 35 ++++---- 5 files changed, 120 insertions(+), 110 deletions(-) diff --git a/example/resnet50_explicit_path.yml b/example/resnet50_explicit_path.yml index cb1208ff..1f0a75b3 100644 --- a/example/resnet50_explicit_path.yml +++ b/example/resnet50_explicit_path.yml @@ -1,5 +1,5 @@ weight: "~/.modelci/ResNet50/pytorch-pytorch/image_classification/1.pth" -architecture: ResNet18 +architecture: ResNet50 framework: PYTORCH engine: PYTORCH version: 1 diff --git a/modelci/persistence/service_.py b/modelci/persistence/service_.py index d9c45f38..b272e428 100644 --- a/modelci/persistence/service_.py +++ b/modelci/persistence/service_.py @@ -7,6 +7,7 @@ Persistence service using PyMongo. """ +from enum import Enum from ipaddress import IPv4Address, IPv6Address from typing import List, Union, Optional @@ -17,8 +18,9 @@ from modelci.config import db_settings from modelci.experimental.mongo_client import MongoClient from modelci.persistence.exceptions import ServiceException -from modelci.types.models import MLModel, ModelUpdateSchema +from modelci.types.models import MLModel, ModelUpdateSchema, Metric from modelci.types.models.profile import StaticProfileResult, DynamicProfileResult +from modelci.utils.misc import remove_dict_null _db = MongoClient()[db_settings.mongo_db] _collection = _db['model_d_o'] @@ -85,19 +87,35 @@ def get_models(**kwargs) -> List[MLModel]: """ valid_keys = {'architecture', 'framework', 'engine', 'task', 'version'} - valid_kwargs = {key: value for key, value in kwargs.items() if value is not None and key in valid_keys} + + valid_kwargs = { + key: (value.value if isinstance(value, Enum) else value) + for key, value in remove_dict_null(kwargs).items() + if key in valid_keys + } models = _collection.find(valid_kwargs) return list(map(MLModel.parse_obj, models)) -def update_model(id: str, schema: ModelUpdateSchema) -> MLModel: - prev_model = get_by_id(id) +def update_model(id_: str, schema: ModelUpdateSchema) -> MLModel: + """ Update existed model info + + Args: + id_: the ID of targeted model + schema: + + Returns: the updated model object + + """ + prev_model = get_by_id(id_) + if schema.metric: + schema.metric = {Metric(k).name: v for k, v in schema.metric.items()} updated_data = { key: value for key, value in jsonable_encoder(schema, exclude_unset=True).items() if getattr(schema, key) != getattr(prev_model, key) } - _collection.update_one({'_id': ObjectId(id)}, {"$set": updated_data}) - return get_by_id(id) + _collection.update_one({'_id': ObjectId(id_)}, {"$set": updated_data}) + return get_by_id(id_) def delete_model(id_: str): diff --git a/tests/test_model_api.py b/tests/test_model_api.py index 8eb5b25a..7ab2540c 100644 --- a/tests/test_model_api.py +++ b/tests/test_model_api.py @@ -9,15 +9,17 @@ from pathlib import Path import requests +import torch +import torchvision from modelci.config import app_settings -from modelci.hub.registrar import download_model_from_url -Path(f"{str(Path.home())}/.modelci/ResNet50/pytorch-pytorch/image_classification").mkdir(parents=True, exist_ok=True) -download_model_from_url( - 'https://download.pytorch.org/models/resnet50-19c8e357.pth', - f'{str(Path.home())}/.modelci/ResNet50/pytorch-pytorch/image_classification/1.pth' -) +file_dir = str(Path.home() / '.modelci/ResNet50/pytorh-pytorch/image_classification') +Path(file_dir).mkdir(parents=True, exist_ok=True) +model_path = f'{file_dir}/1.pth' + +torch_model = torchvision.models.resnet50(pretrained=True) +torch.save(torch_model, model_path) def test_get_all_models(): @@ -32,9 +34,7 @@ def test_publish_model(): 'metric': "{'acc': 0.76}", 'task': '0', 'inputs': "[{'shape': [-1, 3, 224, 224], 'dtype': 11, 'name': 'input', 'format': 0}]", 'outputs': "[{'shape': [-1, 1000], 'dtype': 11, 'name': 'output', 'format': 0}]"} - files = [] - weights_file = f'{str(Path.home())}/.modelci/ResNet50/pytorch-pytorch/image_classification/1.pth' - files.append(("files", (weights_file, open(Path(weights_file), 'rb'), 'application/example'))) + files = [("files", (model_path, open(Path(model_path), 'rb'), 'application/example'))] response = requests.post(f'{app_settings.api_v1_prefix}/model/', params=payload, data=form_data, files=files) assert response.status_code == HTTPStatus.CREATED @@ -63,6 +63,7 @@ def test_update_model(): def test_delete_model(): with requests.get(f'{app_settings.api_v1_prefix}/model/') as r: model_list = r.json() - model_id = model_list[0]["id"] - response = requests.delete(f'{app_settings.api_v1_prefix}/model/{model_id}') - assert response.status_code == HTTPStatus.NO_CONTENT + for model in model_list: + model_id = model["id"] + response = requests.delete(f'{app_settings.api_v1_prefix}/model/{model_id}') + assert response.status_code == HTTPStatus.NO_CONTENT diff --git a/tests/test_model_service.py b/tests/test_model_service.py index 8adb9471..e896391b 100644 --- a/tests/test_model_service.py +++ b/tests/test_model_service.py @@ -1,23 +1,29 @@ +from pathlib import Path + +import torch +import torchvision + +from modelci.hub.registrar import register_model_from_yaml from modelci.persistence import mongo -from modelci.persistence.service import ModelService -from modelci.types.bo import ( - DynamicProfileResultBO, +from modelci.persistence.service_ import delete_model, register_static_profiling_result, \ + register_dynamic_profiling_result, update_dynamic_profiling_result, delete_dynamic_profiling_result +from modelci.persistence.service_ import get_models, get_by_id, update_model +from modelci.types.models import Task, Metric, ModelUpdateSchema +from modelci.types.models.profile import ( + StaticProfileResult, + DynamicProfileResult, ProfileMemory, ProfileLatency, ProfileThroughput, - ModelBO, - Framework, - Engine, - ModelVersion, - IOShape, - Weight, - StaticProfileResultBO, - InfoTuple, - Task, - Metric, - ModelStatus + InfoTuple ) -from modelci.types.trtis_objects import ModelInputFormat + +file_dir = str(Path.home() / '.modelci/ResNet50/pytorh-pytorch/image_classification') +Path(file_dir).mkdir(parents=True, exist_ok=True) +model_path = f'{file_dir}/1.pth' + +torch_model = torchvision.models.resnet50(pretrained=True) +torch.save(torch_model, model_path) def test_init(): @@ -25,25 +31,11 @@ def test_init(): def test_register_model(): - model = ModelBO( - 'ResNet50', - framework=Framework.PYTORCH, - engine=Engine.PYTORCH, - version=ModelVersion(1), - dataset='ImageNet', - metric={Metric.ACC: 0.80}, - task=Task.IMAGE_CLASSIFICATION, - inputs=[IOShape([-1, 3, 224, 224], dtype=float, format=ModelInputFormat.FORMAT_NCHW)], - outputs=[IOShape([-1, 1000], dtype=int)], - model_status=[ModelStatus.PUBLISHED], - weight=Weight(bytes([123])) - ) - - assert ModelService.post_model(model) + register_model_from_yaml("example/resnet50_explicit_path.yml") def test_get_model_by_name(): - models = ModelService.get_models('ResNet50') + models = get_models(architecture='ResNet50') # check length assert len(models) == 1 @@ -53,54 +45,49 @@ def test_get_model_by_name(): def test_get_model_by_task(): - models = ModelService.get_models_by_task(Task.IMAGE_CLASSIFICATION) + models = get_models(task=Task.Image_Classification) # check length assert len(models) == 1 # check name for model in models: - assert model.task == Task.IMAGE_CLASSIFICATION + assert model.task == Task.Image_Classification def test_get_model_by_id(): - model_bo = ModelService.get_models('ResNet50')[0] - model = ModelService.get_model_by_id(model_bo.id) + model = get_models()[0] + model_by_id = get_by_id(str(model.id)) # check model id - assert model.id == model_bo.id + assert model.id == model_by_id.id def test_update_model(): - model = ModelService.get_models('ResNet50')[0] - model.metric[Metric.ACC] = 0.9 - model.weight.weight = bytes([123, 255]) + model = get_models()[0] # check if update success - assert ModelService.update_model(model) - - model_ = ModelService.get_models('ResNet50')[0] + model_ = update_model(str(model.id), ModelUpdateSchema(metric={Metric.acc: 0.9})) # check updated model - assert abs(model_.metric[Metric.ACC] - 0.9) < 1e-6 - assert model_.weight.weight == model.weight.weight + assert abs(model_.metric[Metric.acc] - 0.9) < 1e-6 def test_register_static_profiling_result(): - model = ModelService.get_models('ResNet50')[0] - spr = StaticProfileResultBO(5000, 200000, 200000, 10000, 10000, 10000) - assert ModelService.register_static_profiling_result(model.id, spr) + model = get_models()[0] + spr = StaticProfileResult(parameters=5000, flops=200000, memory=200000, mread=10000, mwrite=10000, mrw=10000) + assert register_static_profiling_result(str(model.id), spr) def test_register_dynamic_profiling_result(): - model = ModelService.get_models('ResNet50')[0] + model = get_models()[0] dummy_info_tuple = InfoTuple(avg=1, p50=1, p95=1, p99=1) - dpr = DynamicProfileResultBO( + dpr = DynamicProfileResult( device_id='gpu:01', device_name='Tesla K40c', batch=1, - memory=ProfileMemory(1000, 1000, 0.5), + memory=ProfileMemory(total_memory=1000, memory_usage=1000, utilization=0.5), latency=ProfileLatency( - init_latency=dummy_info_tuple, + initialization_latency=dummy_info_tuple, preprocess_latency=dummy_info_tuple, inference_latency=dummy_info_tuple, postprocess_latency=dummy_info_tuple, @@ -112,20 +99,20 @@ def test_register_dynamic_profiling_result(): postprocess_throughput=1, ) ) - assert ModelService.append_dynamic_profiling_result(model.id, dpr) + assert register_dynamic_profiling_result(str(model.id), dpr) def test_update_dynamic_profiling_result(): - model = ModelService.get_models('ResNet50')[0] + model = get_models(architecture='ResNet50')[0] dummy_info_tuple = InfoTuple(avg=1, p50=1, p95=1, p99=1) updated_info_tuple = InfoTuple(avg=1, p50=2, p95=1, p99=1) - dpr = DynamicProfileResultBO( + dpr = DynamicProfileResult( device_id='gpu:01', device_name='Tesla K40c', batch=1, - memory=ProfileMemory(1000, 2000, 0.5), + memory=ProfileMemory(total_memory=1000, memory_usage=2000, utilization=0.5), latency=ProfileLatency( - init_latency=dummy_info_tuple, + initialization_latency=dummy_info_tuple, preprocess_latency=dummy_info_tuple, inference_latency=updated_info_tuple, postprocess_latency=dummy_info_tuple, @@ -138,25 +125,25 @@ def test_update_dynamic_profiling_result(): ) ) # check update - assert ModelService.update_dynamic_profiling_result(model.id, dpr) + assert update_dynamic_profiling_result(str(model.id), dpr) # check result - model = ModelService.get_models('ResNet50')[0] - assert model.profile_result.dynamic_results[0].memory.memory_usage == 2000 - assert model.profile_result.dynamic_results[0].latency.inference_latency.p50 == 2 + model = get_models()[0] + assert model.profile_result.dynamic_profile_results[0].memory.memory_usage == 2000 + assert model.profile_result.dynamic_profile_results[0].latency.inference_latency.p50 == 2 def test_delete_dynamic_profiling_result(): - model = ModelService.get_models('ResNet50')[0] + model = get_models()[0] dummy_info_tuple1 = InfoTuple(avg=1, p50=1, p95=1, p99=2) dummy_info_tuple2 = InfoTuple(avg=1, p50=1, p95=1, p99=1) - dpr = DynamicProfileResultBO( + dpr = DynamicProfileResult( device_id='gpu:02', device_name='Tesla K40c', batch=1, - memory=ProfileMemory(1000, 1000, 0.5), + memory=ProfileMemory(total_memory=1000, memory_usage=1000, utilization=0.5), latency=ProfileLatency( - init_latency=dummy_info_tuple1, + initialization_latency=dummy_info_tuple1, preprocess_latency=dummy_info_tuple2, inference_latency=dummy_info_tuple2, postprocess_latency=dummy_info_tuple2, @@ -168,27 +155,28 @@ def test_delete_dynamic_profiling_result(): postprocess_throughput=1, ) ) - ModelService.append_dynamic_profiling_result(model.id, dpr) + register_dynamic_profiling_result(str(model.id), dpr) # reload - model = ModelService.get_models('ResNet50')[0] - dpr_bo = model.profile_result.dynamic_results[0] - dpr_bo2 = model.profile_result.dynamic_results[1] + model = get_models()[0] + dpr_bo = model.profile_result.dynamic_profile_results[0] + dpr_bo2 = model.profile_result.dynamic_profile_results[1] # check delete - assert ModelService.delete_dynamic_profiling_result(model.id, dpr_bo.ip, dpr_bo.device_id) + assert delete_dynamic_profiling_result(str(model.id), dpr_bo.ip, dpr_bo.device_id) # check result - model = ModelService.get_models('ResNet50')[0] - assert len(model.profile_result.dynamic_results) == 1 + model = get_models()[0] + assert len(model.profile_result.dynamic_profile_results) == 1 - dpr_left = model.profile_result.dynamic_results[0] - assert dpr_bo2.latency.init_latency.avg == dpr_left.latency.init_latency.avg + dpr_left = model.profile_result.dynamic_profile_results[0] + assert dpr_bo2.latency.initialization_latency.avg == dpr_left.latency.initialization_latency.avg def test_delete_model(): - model = ModelService.get_models('ResNet50')[0] - assert ModelService.delete_model_by_id(model.id) + model_list = get_models(architecture='ResNet50') + for model in model_list: + assert delete_model(str(model.id)) def test_drop_test_database(): diff --git a/tests/test_modelhub_cli.py b/tests/test_modelhub_cli.py index 303de1d7..0f16f97f 100644 --- a/tests/test_modelhub_cli.py +++ b/tests/test_modelhub_cli.py @@ -3,28 +3,33 @@ import requests import torch +import tempfile from typer.testing import CliRunner import torchvision from modelci.config import app_settings from modelci.cli.modelhub import app runner = CliRunner() -file_dir = f"{str(Path.home())}/.modelci/ResNet50/pytorch-pytorch/image_classification" +file_dir = str(Path.home() / '.modelci/ResNet50/pytorh-pytorch/image_classification') Path(file_dir).mkdir(parents=True, exist_ok=True) -file_path = file_dir + "/1.pth" +weight_path = f'{tempfile.gettempdir()}/1.pth' +model_path = f'{file_dir}/1.pth' def test_get(): result = runner.invoke(app, [ 'get', 'https://download.pytorch.org/models/resnet50-19c8e357.pth', - f'{str(Path.home())}/.modelci/ResNet50/pytorch-pytorch/image_classification/1.pth' + weight_path ]) assert result.exit_code == 0 assert "model downloaded successfully" in result.stdout def test_publish(): + torch_model = torchvision.models.resnet50(pretrained=False) + torch_model.load_state_dict(torch.load(weight_path)) + torch.save(torch_model, model_path) result = runner.invoke(app, [ 'publish', '-f', 'example/resnet50.yml' ]) @@ -53,20 +58,18 @@ def test_update(): assert result.exit_code == 0 -def test_delete(): - with requests.get(f'{app_settings.api_v1_prefix}/model/') as r: - model_list = r.json() - model_id = model_list[0]["id"] - result = runner.invoke(app, ['delete', model_id]) - assert result.exit_code == 0 - assert f"Model {model_id} deleted\n" == result.output - - def test_convert(): - torch_model = torchvision.models.resnet50(pretrained=False) - torch_model.load_state_dict(torch.load(file_path)) - torch.save(torch_model, file_path) result = runner.invoke(app, [ 'convert', '-f', 'example/resnet50.yml' ]) - assert result.exit_code == 0 \ No newline at end of file + assert result.exit_code == 0 + + +def test_delete(): + with requests.get(f'{app_settings.api_v1_prefix}/model/') as r: + model_list = r.json() + for model in model_list: + model_id = model["id"] + result = runner.invoke(app, ['delete', model_id]) + assert result.exit_code == 0 + assert f"Model {model_id} deleted\n" == result.output From 579dc7d493c19c88689d916bbc2096f57ab47e29 Mon Sep 17 00:00:00 2001 From: univerone Date: Sun, 16 May 2021 10:21:18 +0800 Subject: [PATCH 03/13] :art: split manager into cache manager --- modelci/hub/cache_manager.py | 76 ++++++++++++++++++++ modelci/hub/manager.py | 121 +++++--------------------------- modelci/persistence/service_.py | 13 ++++ 3 files changed, 107 insertions(+), 103 deletions(-) create mode 100644 modelci/hub/cache_manager.py diff --git a/modelci/hub/cache_manager.py b/modelci/hub/cache_manager.py new file mode 100644 index 00000000..5f7f8b93 --- /dev/null +++ b/modelci/hub/cache_manager.py @@ -0,0 +1,76 @@ +import os +import subprocess +from typing import List + +from modelci.hub import converter +from modelci.hub.utils import TensorRTPlatform +from modelci.types.models import MLModel, Engine + +__all__ = ['get_remote_model_weight', 'get_remote_model_weights', 'delete_remote_weight'] + + +def get_remote_model_weight(model: MLModel): + """Download a local cache of model from remote ModelDB in a structured path. And generate a configuration file. + TODO(lym): + 1. set force insert config.pbtxt + 2. set other options in generation of config.pbtxt (e.g. max batch size, instance group...) + This function will keep a local cache of the used model in the path: + `~/.modelci//-//` + Arguments: + model (MLModel): MLModelobject. + Return: + Path: Model saved path. + """ + save_path = model.saved_path + + save_path.parent.mkdir(exist_ok=True, parents=True) + + if not save_path.exists(): + # TODO save TFS or TRT model files from gridfs + with open(str(save_path), 'wb') as f: + f.write(bytes(model.weight)) + if model.engine == Engine.TFS: + subprocess.call(['unzip', save_path, '-d', '/']) + os.remove(save_path) + elif model.engine == Engine.TRT: + subprocess.call(['unzip', save_path, '-d', '/']) + os.remove(save_path) + + converter.TRTConverter.generate_trt_config( + save_path.parent, # ~/.modelci//-// + inputs=model.inputs, + outputs=model.outputs, + arch_name=model.name, + platform=TensorRTPlatform.TENSORFLOW_SAVEDMODEL + ) + + return save_path + + +def get_remote_model_weights(models: List[MLModel]): + """Get remote model weights from a list of models. + Only models with highest version of each unique task, architecture, framework, and engine pair are download. + """ + + # group by (task, architecture, framework, engine) pair + pairs = set(map(lambda x: (x.task, x.architecture, x.framework, x.engine), models)) + model_groups = [ + sorted( + [model for model in models if (model.task, model.architecture, model.framework, model.engine) == pair], + key=lambda model: model.version, reverse=True + ) for pair + in pairs + ] + + # get weights of newest version of each pair + for model_group in model_groups: + get_remote_model_weight(model_group[0]) + + +def delete_remote_weight(model: MLModel): + save_path = model.saved_path + + if os.path.isfile(save_path): + os.remove(save_path) + elif os.path.isdir(save_path): + os.removedirs(save_path) diff --git a/modelci/hub/manager.py b/modelci/hub/manager.py index b2748835..ec4e9895 100644 --- a/modelci/hub/manager.py +++ b/modelci/hub/manager.py @@ -12,146 +12,61 @@ # or implied. See the License for the specific language governing # permissions and limitations under the License. -import os -import subprocess from typing import List -from modelci.hub import converter -from modelci.hub.utils import TensorRTPlatform -from modelci.persistence.service import ModelService -from modelci.types.bo import Task, ModelVersion, Framework, ModelBO -__all__ = ['get_remote_model_weight', 'retrieve_model', - 'retrieve_model_by_task', 'retrieve_model_by_parent_id'] +from modelci.hub.cache_manager import get_remote_model_weights +from modelci.persistence.service_ import get_by_parent_id, get_models +from modelci.types.models import MLModel, Task, Framework, Engine -from modelci.types.models.common import Engine - - -def get_remote_model_weight(model: ModelBO): - """Download a local cache of model from remote ModelDB in a structured path. And generate a configuration file. - TODO(lym): - 1. set force insert config.pbtxt - 2. set other options in generation of config.pbtxt (e.g. max batch size, instance group...) - This function will keep a local cache of the used model in the path: - `~/.modelci//-//` - Arguments: - model (ModelBO): Model business object. - Return: - Path: Model saved path. - """ - save_path = model.saved_path - - save_path.parent.mkdir(exist_ok=True, parents=True) - - if not save_path.exists(): - with open(str(save_path), 'wb') as f: - f.write(model.weight.weight) - if model.engine == Engine.TFS: - subprocess.call(['unzip', save_path, '-d', '/']) - os.remove(save_path) - elif model.engine == Engine.TRT: - subprocess.call(['unzip', save_path, '-d', '/']) - os.remove(save_path) - - converter.TRTConverter.generate_trt_config( - save_path.parent, # ~/.modelci//-// - inputs=model.inputs, - outputs=model.outputs, - arch_name=model.name, - platform=TensorRTPlatform.TENSORFLOW_SAVEDMODEL - ) - - return save_path - - -def _get_remote_model_weights(models: List[ModelBO]): - """Get remote model weights from a list of models. - Only models with highest version of each unique task, architecture, framework, and engine pair are download. - """ - - # group by (task, architecture, framework, engine) pair - pairs = set(map(lambda x: (x.task, x.architecture, x.framework, x.engine), models)) - model_groups = [ - [model for model in models if (model.task, model.architecture, model.framework, model.engine) == pair] for pair in pairs - ] - - # get weights of newest version of each pair - for model_group in model_groups: - get_remote_model_weight(model_group[0]) - - -def delete_remote_weight(model: ModelBO): - save_path = model.saved_path - - if model.engine in [Engine.TORCHSCRIPT, Engine.ONNX]: - os.remove(save_path) - else: - os.removedirs(save_path) +__all__ = ['retrieve_model', 'retrieve_model_by_parent_id'] def retrieve_model( - architecture_name: str = 'ResNet50', + architecture: str = 'ResNet50', task: Task = None, framework: Framework = None, engine: Engine = None, - version: ModelVersion = None, + version: int = None, download: bool = True, -) -> List[ModelBO]: +) -> List[MLModel]: """Query a model by name, task, framework, engine or version. Arguments: - architecture_name (str): Model architecture name. + architecture (str): Model architecture name. task (Task): which machine learn task is model used for,Default to None framework (Framework): Framework name, optional query key. Default to None. engine (Engine): Model optimization engine name. - version (ModelVersion): Model version. Default to None. + version (Int): Model version. Default to None. download (bool): Flag for whether the model needs to be cached locally. Returns: - List[ModelBO]: A list of model business object. + List[MLModel]: A list of model business object. """ # retrieve - models = ModelService.get_models(architecture_name, task=task, framework=framework, engine=engine, version=version) + models = get_models(architecture=architecture, task=task, framework=framework, engine=engine, version=version) # check if found if len(models) != 0 and download: - _get_remote_model_weights(models) - - return models - - -def retrieve_model_by_task(task: Task) -> List[ModelBO]: - """Query a model by task. - This function will download a cache model from the model DB. - - Arguments: - task (Task): Task name the model is used for. - - Returns: - List[ModelBO]: A list of model business object. - """ - # retrieve - models = ModelService.get_models_by_task(task) - # check if found - if len(models) == 0: - raise FileNotFoundError('Model not found!') - - _get_remote_model_weights(models) + get_remote_model_weights(models) return models -def retrieve_model_by_parent_id(parent_id: str) -> List[ModelBO]: +def retrieve_model_by_parent_id(parent_id: str, download: bool = True) -> List[MLModel]: """ Query models by specifying the parent model id Args: parent_id (str): : the parent model id of current model if this model is derived from a pre-existing one + download: Flag for whether the model needs to be cached locally. Returns: - List[ModelBO]: A list of model business object. + List[MLModel]: A list of MLModel object. """ - models = ModelService.get_models_by_parent_id(parent_id) + models = get_by_parent_id(parent_id) # check if found if len(models) == 0: raise FileNotFoundError('Model not found!') + elif download: + get_remote_model_weights(models) return models diff --git a/modelci/persistence/service_.py b/modelci/persistence/service_.py index b272e428..9d6c1790 100644 --- a/modelci/persistence/service_.py +++ b/modelci/persistence/service_.py @@ -72,6 +72,19 @@ def get_by_id(id: str) -> MLModel: raise ServiceException(f'Model with id={id} does not exist.') +def get_by_parent_id(id_: str) -> List[MLModel]: + """ Get MLModel objects by its parent model ID. + Args: + id_: The ID of parent model + Returns: List of model objects + """ + models = _collection.find(filter={'parent_model_id': ObjectId(id_)}) + if len(models): + return list(map(MLModel.parse_obj, models)) + else: + raise ServiceException(f'Model with parent model ID={id_} does not exist.') + + def exists_by_id(id: str) -> MLModel: model = _collection.find_one(filter={'_id': ObjectId(id)}) return model is not None From 59aa5689d9e0bc86af924eaa4073f6dbb19a7d70 Mon Sep 17 00:00:00 2001 From: univerone Date: Sun, 16 May 2021 16:54:28 +0800 Subject: [PATCH 04/13] :art: replace ModelService usage --- .../app/experimental/endpoints/cv_tuner.py | 23 +++++++++---------- modelci/app/v1/endpoints/visualizer.py | 13 ++++++----- modelci/hub/registrar.py | 3 --- modelci/types/models/__init__.py | 2 +- modelci/types/models/mlmodel.py | 5 ++-- modelci/types/type_conversion.py | 8 +++---- 6 files changed, 26 insertions(+), 28 deletions(-) diff --git a/modelci/app/experimental/endpoints/cv_tuner.py b/modelci/app/experimental/endpoints/cv_tuner.py index bf6fa7d6..3d6f67f0 100644 --- a/modelci/app/experimental/endpoints/cv_tuner.py +++ b/modelci/app/experimental/endpoints/cv_tuner.py @@ -11,11 +11,10 @@ from modelci.experimental.model.model_structure import Structure, Operation from modelci.hub.registrar import register_model -from modelci.hub.manager import get_remote_model_weight +from modelci.hub.cache_manager import get_remote_model_weight from modelci.hub.utils import generate_path_plain -from modelci.persistence.service import ModelService -from modelci.types.bo import ModelVersion, Engine, IOShape, ModelStatus -from modelci.types.models import MLModel +from modelci.persistence.service_ import get_by_id, get_models +from modelci.types.models import MLModel, Engine, IOShape, ModelStatus from modelci.types.type_conversion import model_data_type_to_torch, type_to_data_type from modelci.utils.exceptions import ModelStructureError @@ -52,7 +51,7 @@ def update_finetune_model_as_new(id: str, updated_layer: Structure, dry_run: boo """ if len(updated_layer.layer.items()) == 0: return True - model = ModelService.get_model_by_id(id) + model = get_by_id(id) if model.engine != Engine.PYTORCH: raise ValueError(f'model {id} is not supported for editing. ' f'Currently only support model with engine=PYTORCH') @@ -124,16 +123,16 @@ def update_finetune_model_as_new(id: str, updated_layer: Structure, dry_run: boo # TODO return validation result for dry_run mode # TODO apply Semantic Versioning https://semver.org/ # TODO reslove duplicate model version problem in a more efficient way - version = ModelVersion(model.version.ver + 1) - previous_models = ModelService.get_models( + version = model.version + 1 + previous_models = get_models( architecture=model.architecture, task=model.task, framework=model.framework, engine=Engine.NONE ) if len(previous_models): - last_version = max(previous_models, key=lambda k: k.version.ver).version.ver - version = ModelVersion(last_version + 1) + last_version = max(previous_models, key=lambda k: k.version.ver).version + version = last_version + 1 saved_path = generate_path_plain( architecture=model.architecture, @@ -143,7 +142,7 @@ def update_finetune_model_as_new(id: str, updated_layer: Structure, dry_run: boo version=version ) saved_path.parent.mkdir(parents=True, exist_ok=True) - torch.save(model,saved_path.with_suffix('.pt') ) + torch.save(model, saved_path.with_suffix('.pt')) mlmodelin = MLModel( dataset='', metric={key: 0 for key in model.metric.keys()}, @@ -163,7 +162,7 @@ def update_finetune_model_as_new(id: str, updated_layer: Structure, dry_run: boo convert=False, profile=False ) - model_bo = ModelService.get_models( + ml_model = get_models( architecture=model.architecture, task=model.task, framework=model.framework, @@ -171,4 +170,4 @@ def update_finetune_model_as_new(id: str, updated_layer: Structure, dry_run: boo version=version )[0] - return {'id': model_bo.id} + return {'id': ml_model.id} diff --git a/modelci/app/v1/endpoints/visualizer.py b/modelci/app/v1/endpoints/visualizer.py index 59a7564a..e901c289 100644 --- a/modelci/app/v1/endpoints/visualizer.py +++ b/modelci/app/v1/endpoints/visualizer.py @@ -8,8 +8,9 @@ """ from fastapi import APIRouter -from modelci.persistence.service import ModelService -from modelci.types.bo.model_objects import Engine + +from modelci.persistence.service_ import get_by_id +from modelci.types.models import Engine from torchviz import make_dot import torch @@ -18,11 +19,11 @@ @router.get('/{id}') def generate_model_graph(*, id: str): # noqa - model_bo = ModelService.get_model_by_id(id) + ml_model = get_by_id(id) dot_graph = '' - if model_bo.engine == Engine.PYTORCH: - pytorch_model = torch.load(model_bo.saved_path) - sample_data = torch.zeros(1, *model_bo.inputs[0].shape[1:], dtype=torch.float, requires_grad=False) + if ml_model.engine == Engine.PYTORCH: + pytorch_model = torch.load(ml_model.saved_path) + sample_data = torch.zeros(1, *ml_model.inputs[0].shape[1:], dtype=torch.float, requires_grad=False) out = pytorch_model(sample_data) dot_graph = make_dot(out, params=dict(list(pytorch_model.named_parameters()) + [('x', sample_data)])) diff --git a/modelci/hub/registrar.py b/modelci/hub/registrar.py index 19abe4d7..ef2d90c1 100644 --- a/modelci/hub/registrar.py +++ b/modelci/hub/registrar.py @@ -28,7 +28,6 @@ from modelci.hub.client.trt_client import CVTRTClient from modelci.hub.converter import converter from modelci.hub.utils import parse_path_plain -from modelci.persistence.service import ModelService from modelci.persistence.service_ import save from modelci.types.models import MLModelFromYaml, MLModel from urllib.request import urlopen, Request @@ -141,8 +140,6 @@ def register_model( } for model in models: - model.model_status = [ModelStatus.PROFILING] - ModelService.update_model(model) kwargs['model_info'] = model engine = model.engine diff --git a/modelci/types/models/__init__.py b/modelci/types/models/__init__.py index 57210c1f..6a0f89f6 100644 --- a/modelci/types/models/__init__.py +++ b/modelci/types/models/__init__.py @@ -5,5 +5,5 @@ Email: yli056@e.ntu.edu.sg Date: 2/17/2021 """ -from .common import Framework, Engine, IOShape, Task, Metric +from .common import Framework, Engine, IOShape, Task, Metric, ModelStatus, DataType from .mlmodel import MLModelFromYaml, MLModel, BaseMLModel, ModelUpdateSchema diff --git a/modelci/types/models/mlmodel.py b/modelci/types/models/mlmodel.py index 6b0fcc3f..abef474b 100644 --- a/modelci/types/models/mlmodel.py +++ b/modelci/types/models/mlmodel.py @@ -49,9 +49,9 @@ def __init__(self, __root__): @property def filename(self): - if self.file: + if getattr(self, 'file', None): return self.file.name - elif self._gridfs_out: + elif hasattr(self, '_gridfs_out') and self._gridfs_out is not None: return self._gridfs_out.filename else: return '' @@ -200,3 +200,4 @@ class ModelUpdateSchema(BaseModel): default_factory=list, example='[{"name": "output", "shape": [-1, 1000], "dtype": "TYPE_FP32"}]' ) + model_status: Optional[List[ModelStatus]] = Field(default_factory=list) \ No newline at end of file diff --git a/modelci/types/type_conversion.py b/modelci/types/type_conversion.py index da508e40..ae62aba9 100644 --- a/modelci/types/type_conversion.py +++ b/modelci/types/type_conversion.py @@ -9,7 +9,7 @@ def type_to_data_type(tensor_type): import tensorflow as tf import torch - from modelci.types.bo import DataType + from modelci.types.models import DataType mapper = defaultdict( lambda: DataType.TYPE_INVALID, { @@ -71,7 +71,7 @@ def type_to_data_type(tensor_type): def model_data_type_to_np(model_dtype): - from modelci.types.bo import DataType + from modelci.types.models import DataType mapper = { DataType.TYPE_INVALID: None, @@ -102,7 +102,7 @@ def model_data_type_to_np(model_dtype): def model_data_type_to_torch(model_dtype): - from modelci.types.models.common import DataType + from modelci.types.models import DataType import torch mapper = { @@ -130,7 +130,7 @@ def model_data_type_to_torch(model_dtype): def model_data_type_to_onnx(model_dtype): - from modelci.types.models.common import DataType + from modelci.types.models import DataType mapper = { DataType.TYPE_INVALID: onnxconverter_common, From 461c6b4fa9d06332b5479863a300b4754974c269 Mon Sep 17 00:00:00 2001 From: univerone Date: Sun, 16 May 2021 19:06:15 +0800 Subject: [PATCH 05/13] :art: [finetuner] remove ModelService in finetuner --- .../app/experimental/endpoints/cv_tuner.py | 10 ++--- .../experimental/endpoints/model_structure.py | 6 --- modelci/app/experimental/endpoints/trainer.py | 8 ++-- modelci/experimental/finetuner/trainer.py | 37 ++++++++++--------- modelci/experimental/model/model_train.py | 4 +- modelci/types/models/__init__.py | 2 +- 6 files changed, 31 insertions(+), 36 deletions(-) diff --git a/modelci/app/experimental/endpoints/cv_tuner.py b/modelci/app/experimental/endpoints/cv_tuner.py index 3d6f67f0..7bbc9d62 100644 --- a/modelci/app/experimental/endpoints/cv_tuner.py +++ b/modelci/app/experimental/endpoints/cv_tuner.py @@ -116,7 +116,7 @@ def update_finetune_model_as_new(id: str, updated_layer: Structure, dry_run: boo if not isinstance(output_tensors, (list, tuple)): output_tensors = (output_tensors,) for output_tensor in output_tensors: - output_shape = IOShape(shape=[bs, *output_tensor.shape[1:]], dtype=type_to_data_type(output_tensor.dtype)) + output_shape = IOShape(name="output", shape=[bs, *output_tensor.shape[1:]], dtype=type_to_data_type(output_tensor.dtype)) output_shapes.append(output_shape) if not dry_run: @@ -142,20 +142,20 @@ def update_finetune_model_as_new(id: str, updated_layer: Structure, dry_run: boo version=version ) saved_path.parent.mkdir(parents=True, exist_ok=True) - torch.save(model, saved_path.with_suffix('.pt')) + torch.save(net, saved_path.with_suffix('.pt')) mlmodelin = MLModel( dataset='', metric={key: 0 for key in model.metric.keys()}, task=model.task, inputs=model.inputs, outputs=output_shapes, - architecture=model.name, + architecture=model.architecture, framework=model.framework, engine=Engine.NONE, model_status=[ModelStatus.DRAFT], parent_model_id=model.id, version=version, - weight=saved_path + weight=saved_path.with_suffix('.pt') ) register_model( mlmodelin, @@ -170,4 +170,4 @@ def update_finetune_model_as_new(id: str, updated_layer: Structure, dry_run: boo version=version )[0] - return {'id': ml_model.id} + return {'id': str(ml_model.id)} diff --git a/modelci/app/experimental/endpoints/model_structure.py b/modelci/app/experimental/endpoints/model_structure.py index 3bd45585..2df2b5ff 100644 --- a/modelci/app/experimental/endpoints/model_structure.py +++ b/modelci/app/experimental/endpoints/model_structure.py @@ -7,13 +7,7 @@ ML model structure related API """ -import torch from fastapi import APIRouter -from modelci.hub.manager import get_remote_model_weight - -from modelci.types.bo import Engine - -from modelci.persistence.service import ModelService from modelci.experimental.model.model_structure import Structure diff --git a/modelci/app/experimental/endpoints/trainer.py b/modelci/app/experimental/endpoints/trainer.py index 07537f29..74d5eb31 100644 --- a/modelci/app/experimental/endpoints/trainer.py +++ b/modelci/app/experimental/endpoints/trainer.py @@ -7,7 +7,7 @@ """ from typing import List -from fastapi import APIRouter +from fastapi import APIRouter, BackgroundTasks from starlette.responses import JSONResponse from modelci.experimental.curd.model_train import save, get_by_id, get_all, delete_by_id, delete_all @@ -18,13 +18,14 @@ @router.post('/', status_code=201) -def create_training_job(training_job: TrainingJobIn): +def create_training_job(training_job: TrainingJobIn, background_tasks: BackgroundTasks): """ Create a training job data object, and save it into the database. Then, submit the created training job (with job ID generated by database) to the training job coordinator. TODO return training job as soon as created Args: + background_tasks: training_job (TrainingJobIn): Training job to be submitted. Returns: @@ -34,8 +35,7 @@ def create_training_job(training_job: TrainingJobIn): if id_ is not None: training_job = get_by_id(id_) trainer = PyTorchTrainer.from_training_job(training_job) - trainer.start() - trainer.join() + background_tasks.add_task(lambda trainer: trainer.start(), trainer) return {'id': str(id_)} diff --git a/modelci/experimental/finetuner/trainer.py b/modelci/experimental/finetuner/trainer.py index 1b7491b8..0b2ac5e4 100644 --- a/modelci/experimental/finetuner/trainer.py +++ b/modelci/experimental/finetuner/trainer.py @@ -19,10 +19,10 @@ from modelci.experimental.finetuner.pytorch_datamodule import PyTorchDataModule from modelci.experimental.finetuner.transfer_learning import freeze, FineTuneModule from modelci.experimental.model.model_train import TrainingJob, TrainingJobUpdate -from modelci.hub.manager import get_remote_model_weight -from modelci.persistence.service import ModelService -from modelci.types.bo import Engine, ModelStatus -from modelci.types.vo import Status +from modelci.hub.cache_manager import get_remote_model_weight +from modelci.persistence.service_ import get_by_id, update_model +from modelci.types.models import ModelUpdateSchema +from modelci.types.models.common import Engine, ModelStatus, Status class BaseTrainer(abc.ABC): @@ -98,12 +98,12 @@ def __init__( def from_training_job(cls, training_job: TrainingJob) -> 'PyTorchTrainer': # TODO: only support fine-tune - model_bo = ModelService.get_model_by_id(training_job.model) - if model_bo.engine != Engine.NONE: - raise ValueError(f'Model engine expected `{Engine.NONE}`, but got {model_bo.engine}.') + ml_model = get_by_id(training_job.model) + if ml_model.engine != Engine.NONE: + raise ValueError(f'Model engine expected `{Engine.NONE}`, but got {ml_model.engine}.') # download local cache - cache_path = get_remote_model_weight(model_bo) + cache_path = get_remote_model_weight(ml_model) net = torch.load(cache_path) freeze(module=net, n=-1, train_bn=True) @@ -142,18 +142,19 @@ def from_training_job(cls, training_job: TrainingJob) -> 'PyTorchTrainer': def start(self): def training_done_callback(future): - model_train_curd.update(TrainingJobUpdate(_id=self.id, status=Status.PASS)) + model_train_curd.update(TrainingJobUpdate(_id=self.id, status=Status.Pass)) # TODO: save to database and update model_status, engine print(self.export_model()) self._task = self._executor.submit(self.trainer_engine.fit, self.model, **self._data_loader_kwargs) self._task.add_done_callback(training_done_callback) - model_train_curd.update(TrainingJobUpdate(_id=self.id, status=Status.RUNNING)) + model_train_curd.update(TrainingJobUpdate(_id=self.id, status=Status.Running)) - model_bo = ModelService.get_model_by_id(self.model_id) - model_bo.model_status.remove(ModelStatus.DRAFT) - model_bo.model_status.append(ModelStatus.TRAINING) - ModelService.update_model(model_bo) + ml_model = get_by_id(self.model_id) + if ModelStatus.DRAFT in ml_model.model_status: + ml_model.model_status.remove(ModelStatus.DRAFT) + ml_model.model_status.append(ModelStatus.TRAINING) + update_model(str(ml_model.id), ModelUpdateSchema(model_status=ml_model.model_status)) def join(self, timeout=None): if self._task: @@ -164,10 +165,10 @@ def terminate(self): # trigger pytorch lighting training graceful shutdown via a ^C self._task.set_exception(KeyboardInterrupt()) model_train_curd.update(TrainingJobUpdate(_id=self.id, status=Status.FAIL)) - model_bo = ModelService.get_model_by_id(self.model_id) - model_bo.model_status.remove(ModelStatus.TRAINING) - model_bo.model_status.append(ModelStatus.DRAFT) - ModelService.update_model(model_bo) + ml_model = get_by_id(self.model_id) + ml_model.model_status.remove(ModelStatus.TRAINING) + ml_model.model_status.append(ModelStatus.DRAFT) + update_model(str(ml_model.id), ModelUpdateSchema(model_status=ml_model.model_status)) def export_model(self): return self.model.net.cpu() diff --git a/modelci/experimental/model/model_train.py b/modelci/experimental/model/model_train.py index fbfc5cea..4e6a9a76 100644 --- a/modelci/experimental/model/model_train.py +++ b/modelci/experimental/model/model_train.py @@ -14,7 +14,7 @@ from pydantic import BaseModel, PositiveInt, PositiveFloat, root_validator, validator, confloat from modelci.experimental.model.common import ObjectIdStr -from modelci.types.vo import Status +from modelci.types.models import Status class DataModuleProperty(BaseModel): @@ -154,7 +154,7 @@ class TrainingJob(BaseModel): lr_scheduler_type: LRSchedulerType lr_scheduler_property: _LRSchedulerProperty loss_function: LossFunctionType - status: Optional[Status] = Status.UNKNOWN + status: Optional[Status] = Status.Unknown valdation_accuracy: Optional[float] = None @root_validator(pre=True) diff --git a/modelci/types/models/__init__.py b/modelci/types/models/__init__.py index 6a0f89f6..168c5216 100644 --- a/modelci/types/models/__init__.py +++ b/modelci/types/models/__init__.py @@ -5,5 +5,5 @@ Email: yli056@e.ntu.edu.sg Date: 2/17/2021 """ -from .common import Framework, Engine, IOShape, Task, Metric, ModelStatus, DataType +from .common import Framework, Engine, IOShape, Task, Metric, ModelStatus, DataType, Status from .mlmodel import MLModelFromYaml, MLModel, BaseMLModel, ModelUpdateSchema From 6841ab436b7d99d83bfbcd95a4bb1b5b13d580fd Mon Sep 17 00:00:00 2001 From: univerone Date: Sun, 16 May 2021 19:07:04 +0800 Subject: [PATCH 06/13] :art: [test] add finetuner API test --- tests/test_model_api.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tests/test_model_api.py b/tests/test_model_api.py index 7ab2540c..5719cdd8 100644 --- a/tests/test_model_api.py +++ b/tests/test_model_api.py @@ -50,6 +50,44 @@ def test_get_model_by_id(): assert model_id in response.text +def test_generate_model_graph(): + params = {'architecture': 'ResNet50', 'framework': 'PyTorch', 'engine': 'PYTORCH'} + with requests.get(f'{app_settings.api_v1_prefix}/model/', params=params) as r: + model_list = r.json() + model_id = model_list[0]["id"] + response = requests.get(f'{app_settings.api_v1_prefix}/visualizer/{model_id}') + assert response.status_code == HTTPStatus.OK + assert 'digraph' in response.text + + +def test_update_model_strcuture(): + params = {'architecture': 'ResNet50', 'framework': 'PyTorch', 'engine': 'PYTORCH'} + with requests.get(f'{app_settings.api_v1_prefix}/model/', params=params) as r: + model_list = r.json() + model_id = model_list[0]["id"] + payload = {"layer": { + "fc": {"op_": "M", "type_": "torch.nn.Linear", "in_features": 2048, "out_features": 10, "bias": True}}, + "connection": {}} + response = requests.patch(f'{app_settings.server_url}/api/exp/cv-tuner/finetune/{model_id}', json=payload) + assert response.status_code == HTTPStatus.OK + assert 'id' in response.text + + +def test_create_training_job(): + params = {'architecture': 'ResNet50', 'framework': 'PyTorch', 'engine': 'NONE'} + with requests.get(f'{app_settings.api_v1_prefix}/model/', params=params) as r: + model_list = r.json() + model_id = model_list[0]["id"] + payload = {"model": model_id, "data_module": {"dataset_name": "CIFAR10", "batch_size": 4}, + "min_epochs": 10, "max_epochs": 15, "optimizer_type": "Adam", + "optimizer_property": {"betas": [0.9, 0.99], "eps": 1e-8, "weight_decay": 0, "amsgrad": False}, + "lr_scheduler_type": "StepLR", "lr_scheduler_property": {"lr": 0.01, "step_size": 30}, + "loss_function": "torch.nn.CrossEntropyLoss"} + response = requests.post(f'{app_settings.server_url}/api/exp/train/', json=payload) + assert response.status_code == HTTPStatus.CREATED + assert 'id' in response.text + + def test_update_model(): with requests.get(f'{app_settings.api_v1_prefix}/model/') as r: model_list = r.json() From 4b842a525012db1434051e23b19c33b73477caac Mon Sep 17 00:00:00 2001 From: univerone Date: Sun, 16 May 2021 19:07:43 +0800 Subject: [PATCH 07/13] :bug: [api] fix Too much data warning --- modelci/app/v1/endpoints/model.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/modelci/app/v1/endpoints/model.py b/modelci/app/v1/endpoints/model.py index 4f26ac75..9572aed7 100644 --- a/modelci/app/v1/endpoints/model.py +++ b/modelci/app/v1/endpoints/model.py @@ -6,7 +6,6 @@ Date: 6/20/2020 """ import asyncio -import http import json import shutil from pathlib import Path @@ -15,7 +14,7 @@ from fastapi import APIRouter, File, UploadFile, Depends from fastapi.exceptions import RequestValidationError, HTTPException from pydantic.error_wrappers import ErrorWrapper -from starlette.responses import JSONResponse +from starlette.responses import JSONResponse, Response from modelci.hub.registrar import register_model from modelci.persistence.service_ import get_by_id, get_models, update_model, delete_model, exists_by_id @@ -52,7 +51,7 @@ def update(id: str, schema: ModelUpdateSchema): return update_model(id, schema) -@router.delete('/{id}', status_code=http.HTTPStatus.NO_CONTENT) +@router.delete('/{id}', status_code=204, response_class=Response) def delete(id: str): if not exists_by_id(id): raise HTTPException( From 744f1d41264f80aec2a32e081eba4bb859ff52aa Mon Sep 17 00:00:00 2001 From: univerone Date: Sun, 16 May 2021 21:09:07 +0800 Subject: [PATCH 08/13] :pencil: [docs] update docs add weight update --- docs/tutorial/housekeeper.md | 43 +++++---- modelci/persistence/README.md | 166 ++++++++++++++++---------------- modelci/types/models/mlmodel.py | 11 ++- 3 files changed, 113 insertions(+), 107 deletions(-) diff --git a/docs/tutorial/housekeeper.md b/docs/tutorial/housekeeper.md index daf2049f..43afb621 100644 --- a/docs/tutorial/housekeeper.md +++ b/docs/tutorial/housekeeper.md @@ -17,15 +17,15 @@ Users can search for a model and obtain its detailed information. ```python -from modelci.hub.manager import retrieve_model, retrieve_model_by_task -from modelci.types.bo import Framework, Engine, Task +from modelci.hub.manager import retrieve_model +from modelci.types.models import Framework, Engine, Task # By model name and optionally filtered by model framework and(or) model engine -model_bo = retrieve_model( - architecture_name='ResNet50', framework=Framework.PYTORCH, engine=Engine.TORCHSCRIPT +ml_model = retrieve_model( + architecture='ResNet50', framework=Framework.PyTorch, engine=Engine.TORCHSCRIPT ) # By task -model_bo2 = retrieve_model_by_task(task=Task.IMAGE_CLASSIFICATION) +ml_model_by_task = retrieve_model(task=Task.Image_Classification) ``` The returned tuple contains the path where the model is cached and model meta information (e.g. model name, model framework). @@ -40,30 +40,31 @@ You can update the model information manually using the update API. Here is an example for updating the information of a ResNet50 model. The return value of the function `update_model(model)` is a boolean that indicates the status. -Since many models share a name, the function `get_models_by_name` will return a model list. You should specify the model version to get a model object. +Since many models share a name, the function `get_models` will return a model list. You should specify the model version to get a model object. ```python -from modelci.persistence.service import ModelService +from modelci.persistence.service import get_models, update_model +from modelci.types.models import ModelUpdateSchema, Metric +from pathlib import Path -# get_models_by_name will return a list of all matched results. +# get_models will return a list of all matched results. -model = ModelService.get_models('ResNet50')[0] -model.acc = 0.9 -model.weight.weight = bytes([123, 255]) +model = get_models(architecture='ResNet50')[0] # check if update success -assert ModelService.update_model(model) +assert update_model(str(model.id), ModelUpdateSchema(metric={Metric.acc: 0.9}, weight=Path('path-to-new-model-file'))) ``` -MLModelCI allows you to get the model object by id, task and name. +MLModelCI allows you to get the model object by id, task, architecture, framework, engine and version. ```python -from modelci.persistence.service import ModelService +from modelci.persistence.service import get_models, get_by_id +from modelci.types.models import Task -model_bo = ModelService.get_models('ResNet50')[0] # get model by name -models = ModelService.get_models_by_task('image classification') # get model by task -model_bo2 = ModelService.get_models('ResNet50')[0] # get model by id +model_by_architecture = get_models(architecture='ResNet50')[0] # get model by name +model_by_task = get_models(task=Task.Image_Classification) # get model by task +model_by_id = get_by_id(str(model_by_architecture.id)) # get model by id ``` Getting by name or task may return more than one model objects. @@ -73,11 +74,11 @@ Getting by name or task may return more than one model objects. You can delete a model record easily using MLModelCI. ```python -from modelci.persistence.service import ModelService +from modelci.persistence.service import delete_model, get_models -model = ModelService.get_models('ResNet50')[0] - -assert ModelService.delete_model_by_id(model.id) # delete the model record +model_list = get_models(architecture='ResNet50') +model = model_list[0] +assert delete_model(str(model.id)) # delete the model record ``` Currently, we only support deleting model by `model.id`. diff --git a/modelci/persistence/README.md b/modelci/persistence/README.md index daf96ea3..8d6e2ae9 100644 --- a/modelci/persistence/README.md +++ b/modelci/persistence/README.md @@ -3,87 +3,80 @@ ### API -#### 1. Connect to mongodb -Configure your MongoDB connection setting at `modelci/env-mongodb.env`. (You may have done this by -[installation](/README.md#installation). Source the env file when you want to connect to the database by: +#### 1. Setup Environment +Generate all the environment variables by one command (You may have done this by +[installation](/README.md#installation). A single `.env` file will be generated: ```shell script -set -o allexport -source modelci/env-mongodb.env -set +o allexport +python scripts/generate_env.py ``` -If you are using an IDE to run the following test, remember to add `modelci/env-mongodb.env` as an EnvFile. #### 2. Register a model ```python -from modelci.persistence.service import ModelService -from modelci.types.bo import Framework, Engine, ModelVersion, IOShape, Weight, ModelBO -from modelci.types.trtis_objects import ModelInputFormat - -# create a model business object -model = ModelBO( - name='ResNet50', - framework=Framework.PYTORCH, - engine=Engine.TRT, - version=ModelVersion(1), - dataset='Image Net', - acc=0.8, - task='image classification', - inputs=[IOShape([-1, 3, 224, 224], dtype=int, format=ModelInputFormat.FORMAT_NCHW)], - outputs=[IOShape([-1, 1000], dtype=int)], - weight=Weight(bytes([123])) +from pathlib import Path +from modelci.types.models.common import Engine, Task, Framework, Metric, ModelStatus, IOShape, DataType +from modelci.types.models import MLModel + +# create a MLModel object +mlmodel = MLModel( + weight=Path('path-to-model-file'), + architecture='ResNet50', + dataset='ImageNet', + framework=Framework.PyTorch, + engine=Engine.PYTORCH, + version=1, + metric={Metric.acc: 0.80}, + task=Task.Image_Classification, + inputs=[IOShape(name="input", shape=[-1, 3, 224, 224], dtype=DataType.TYPE_FP32)], + outputs=[IOShape(name="output", shape=[-1, 1000], dtype=DataType.TYPE_FP32)], + model_status=[ModelStatus.PUBLISHED] ) # register -ModelService.post_model(model) + +from modelci.hub.registrar import register_model +register_model(mlmodel, convert=True, profile=False) ``` See test `test/test_model_service.test_register_model`. #### 3. Get a list of models by architecture name + ```python -from modelci.persistence.service import ModelService +from modelci.persistence.service import get_models -model_service = ModelService() -models = model_service.get_models_by_pk('ResNet50') +models = get_models(architecture='ResNet50') ``` See test `test/test_model_service.test_get_model_by_name`. #### 4. Get a list of models by task + ```python -from modelci.persistence.service import ModelService +from modelci.persistence.service import get_models +from modelci.types.models import Task -model_service = ModelService() -models = model_service.get_models_by_task('image classification') +models = get_models(task=Task.Image_Classification) ``` See test `test/test_model_service.test_get_model_by_task`. #### 5. Get model by model ID + ```python -from modelci.persistence.service import ModelService +from modelci.persistence.service import get_by_id -model_service = ModelService() -model = model_service.get_model_by_id('123456789012') +ml_model = get_by_id('123456789012') ``` The ID must be a valid `ObjectID`. See test `test/test_model_service.get_model_by_id`. #### 6. Update model -```python -from modelci.persistence.service import ModelService -model_service = ModelService() -# Query from Mongo DB, obtain model business object and update -model_bo = ... +```python +from modelci.persistence.service import update_model +from modelci.types.models import ModelUpdateSchema -model_service.update_model(model_bo) +ml_model = ... +update_model(str(ml_model.id), ModelUpdateSchema(...)) ``` This API will check if the model exists in Model DB. It will reject the update by raising a `ValueError`. -If you would like to force update: -```python -from modelci.persistence.service import ModelService -model_service = ModelService() -model_bo = ... -model_service.update_model(model_bo, force_insert=True) -``` However, if there is a change of profiling results, please use profiling result related API for CRUD ([add static profiling](#7-add-static-profiling-result-to-a-registered-model), [add dynamic profiling result to a registered](#8-add-dynamic-profiling-result-to-a-registered-model), @@ -92,69 +85,80 @@ However, if there is a change of profiling results, please use profiling result See test `test/test_model_service.test_update_model`. #### 7. Add static profiling result to a registered model -```python -from modelci.persistence.service import ModelService -from modelci.types.bo import StaticProfileResultBO - -model_service = ModelService() -static_result = StaticProfileResultBO( - parameters=5000, - flops=200000, - memory=200000, - mread=10000, - mwrite=10000, +```python +from modelci.persistence.service import register_static_profiling_result +from modelci.types.models.profile import StaticProfileResult + +static_result = StaticProfileResult( + parameters=5000, + flops=200000, + memory=200000, + mread=10000, + mwrite=10000, mrw=10000 ) -model_service.register_static_profiling_result('123456789012', static_result) +register_static_profiling_result('123456789012', static_result) ``` The ID must be a valid `ObjectID`. See test `test/test_model_service.test_register_static_profiling_result` Update static profiling result may use the same API. #### 8. Add dynamic profiling result to a registered model + ```python -from modelci.persistence.service import ModelService -from modelci.types.bo import DynamicProfileResultBO, ProfileLatency, ProfileMemory, ProfileThroughput - -model_service = ModelService() - -dynamic_result = DynamicProfileResultBO( - device_id='gpu:01', - device_name='Tesla K40c', - batch=1, - memory=ProfileMemory(1000, 2000, 1000), - latency=ProfileLatency((1, 1, 1), (1, 1, 1), (1, 1, 1), (1, 1, 1)), - throughput=ProfileThroughput((1, 1, 1), (1, 1, 1), (1, 1, 1), (1, 1, 1)) +from modelci.persistence.service import register_dynamic_profiling_result +from modelci.types.models.profile import InfoTuple, DynamicProfileResult, ProfileMemory, ProfileLatency,ProfileThroughput + +info_tuple = InfoTuple(avg=1, p50=1, p95=1, p99=1) +dynamic_result = DynamicProfileResult( + device_id='gpu:01', + device_name='Tesla K40c', + batch=1, + memory=ProfileMemory(total_memory=1000, memory_usage=2000, utilization=0.5), + latency=ProfileLatency( + initialization_latency=info_tuple, + preprocess_latency=info_tuple, + inference_latency=info_tuple, + postprocess_latency=info_tuple + ), + throughput=ProfileThroughput( + batch_formation_throughput=1, + preprocess_throughput=1, + inference_throughput=1, + postprocess_throughput=1, + ) ) -model_service.append_dynamic_profiling_result('123456789012', dynamic_result) +register_dynamic_profiling_result('123456789012', dynamic_result) ``` The ID must be a valid `ObjectID`. This API will raise a `ValueError` if the `id` does not exist. See test `test/test_model_service.test_register_dynamic_profiling_result`. #### 9. Update dynamic profiling result + ```python -from modelci.persistence.service import ModelService +from modelci.persistence.service import update_dynamic_profiling_result # the updated dynamic profiling result dynamic_result = ... -ModelService.update_dynamic_profiling_result('123456789012', dynamic_result) +update_dynamic_profiling_result('123456789012', dynamic_result) ``` The ID must be a valid `ObjectID`. If a non-existent ID or a non-existent profiling result `ip`, `device_id` pair is supplied, this API will reject the update by raising a `ValueError`. You may set `force_insert` to register a profiling result if the `ip` and `device_id` does not exist. See test `test/test_model_service.test_update_dynamic_profiling_result`. #### 10. Delete dynamic profiling result + ```python import ipaddress -from modelci.persistence.service import ModelService +from modelci.persistence.service import delete_dynamic_profiling_result -model_bo = ... +ml_model = ... -ModelService.delete_dynamic_profiling_result( - id_=model_bo.id, - dynamic_result_ip=ipaddress.ip_address('localhost'), +delete_dynamic_profiling_result( + id_=str(ml_model.id), + dynamic_result_ip=ipaddress.ip_address('localhost'), dynamic_result_device_id='gpu:01' ) ``` @@ -163,11 +167,11 @@ The ID must be a valid `ObjectID`. This API will raise a `ValueError` if the `id See test `test/test_model_service.test_delete_dynamic_profiling_result`. #### 11. Delete model + ```python -from modelci.persistence.service import ModelService +from modelci.persistence.service import delete_model -model_service = ModelService() -model_service.delete_model_by_id('123456789012') +delete_model('123456789012') ``` The ID must be a valid `ObjectID`. See test `test/test_model_service.test_delete_model` diff --git a/modelci/types/models/mlmodel.py b/modelci/types/models/mlmodel.py index abef474b..07c9c16f 100644 --- a/modelci/types/models/mlmodel.py +++ b/modelci/types/models/mlmodel.py @@ -7,21 +7,21 @@ """ import getpass import os -import gridfs from datetime import datetime from pathlib import Path -from typing import Union, Optional, Dict, List, Any +from typing import Union, Optional, Dict, List +import gridfs from bson import ObjectId from pydantic import BaseModel, FilePath, DirectoryPath, PositiveInt, Field, root_validator +from modelci.config import db_settings +from modelci.experimental.mongo_client import MongoClient from .common import Metric, IOShape, Framework, Engine, Task, ModelStatus, Status, PydanticObjectId, \ named_enum_json_encoder from .pattern import as_form from .profile import ProfileResult from ...hub.utils import parse_path_plain, generate_path_plain -from modelci.config import db_settings -from modelci.experimental.mongo_client import MongoClient _db = MongoClient()[db_settings.mongo_db] _fs = gridfs.GridFS(_db) @@ -200,4 +200,5 @@ class ModelUpdateSchema(BaseModel): default_factory=list, example='[{"name": "output", "shape": [-1, 1000], "dtype": "TYPE_FP32"}]' ) - model_status: Optional[List[ModelStatus]] = Field(default_factory=list) \ No newline at end of file + model_status: Optional[List[ModelStatus]] = Field(default_factory=list) + weight: Optional[Weight] \ No newline at end of file From 13dd2242d276c8c8dd47471e0bf9e9b51748929a Mon Sep 17 00:00:00 2001 From: univerone Date: Sun, 16 May 2021 21:10:28 +0800 Subject: [PATCH 09/13] :fire: remove ModelService and rename --- .../app/experimental/endpoints/cv_tuner.py | 2 +- modelci/app/v1/endpoints/model.py | 2 +- modelci/app/v1/endpoints/visualizer.py | 2 +- modelci/cli/modelhub.py | 2 +- modelci/experimental/finetuner/trainer.py | 2 +- modelci/hub/manager.py | 2 +- modelci/hub/registrar.py | 2 +- modelci/persistence/service.py | 565 ++++++++---------- modelci/persistence/service_.py | 248 -------- tests/test_model_service.py | 4 +- 10 files changed, 254 insertions(+), 577 deletions(-) delete mode 100644 modelci/persistence/service_.py diff --git a/modelci/app/experimental/endpoints/cv_tuner.py b/modelci/app/experimental/endpoints/cv_tuner.py index 7bbc9d62..7a017090 100644 --- a/modelci/app/experimental/endpoints/cv_tuner.py +++ b/modelci/app/experimental/endpoints/cv_tuner.py @@ -13,7 +13,7 @@ from modelci.hub.registrar import register_model from modelci.hub.cache_manager import get_remote_model_weight from modelci.hub.utils import generate_path_plain -from modelci.persistence.service_ import get_by_id, get_models +from modelci.persistence.service import get_by_id, get_models from modelci.types.models import MLModel, Engine, IOShape, ModelStatus from modelci.types.type_conversion import model_data_type_to_torch, type_to_data_type from modelci.utils.exceptions import ModelStructureError diff --git a/modelci/app/v1/endpoints/model.py b/modelci/app/v1/endpoints/model.py index 9572aed7..44cc8797 100644 --- a/modelci/app/v1/endpoints/model.py +++ b/modelci/app/v1/endpoints/model.py @@ -17,7 +17,7 @@ from starlette.responses import JSONResponse, Response from modelci.hub.registrar import register_model -from modelci.persistence.service_ import get_by_id, get_models, update_model, delete_model, exists_by_id +from modelci.persistence.service import get_by_id, get_models, update_model, delete_model, exists_by_id from modelci.types.models import MLModel, BaseMLModel, ModelUpdateSchema, Framework, Engine, Task router = APIRouter() diff --git a/modelci/app/v1/endpoints/visualizer.py b/modelci/app/v1/endpoints/visualizer.py index e901c289..0d4e8e25 100644 --- a/modelci/app/v1/endpoints/visualizer.py +++ b/modelci/app/v1/endpoints/visualizer.py @@ -9,7 +9,7 @@ from fastapi import APIRouter -from modelci.persistence.service_ import get_by_id +from modelci.persistence.service import get_by_id from modelci.types.models import Engine from torchviz import make_dot import torch diff --git a/modelci/cli/modelhub.py b/modelci/cli/modelhub.py index c8af3262..135c3178 100644 --- a/modelci/cli/modelhub.py +++ b/modelci/cli/modelhub.py @@ -20,7 +20,7 @@ import typer import yaml from pydantic import ValidationError -import modelci.persistence.service_ as ModelDB +import modelci.persistence.service as ModelDB from modelci.hub.converter import generate_model_family from modelci.config import app_settings diff --git a/modelci/experimental/finetuner/trainer.py b/modelci/experimental/finetuner/trainer.py index 0b2ac5e4..57338b9e 100644 --- a/modelci/experimental/finetuner/trainer.py +++ b/modelci/experimental/finetuner/trainer.py @@ -20,7 +20,7 @@ from modelci.experimental.finetuner.transfer_learning import freeze, FineTuneModule from modelci.experimental.model.model_train import TrainingJob, TrainingJobUpdate from modelci.hub.cache_manager import get_remote_model_weight -from modelci.persistence.service_ import get_by_id, update_model +from modelci.persistence.service import get_by_id, update_model from modelci.types.models import ModelUpdateSchema from modelci.types.models.common import Engine, ModelStatus, Status diff --git a/modelci/hub/manager.py b/modelci/hub/manager.py index ec4e9895..3005d730 100644 --- a/modelci/hub/manager.py +++ b/modelci/hub/manager.py @@ -15,7 +15,7 @@ from typing import List from modelci.hub.cache_manager import get_remote_model_weights -from modelci.persistence.service_ import get_by_parent_id, get_models +from modelci.persistence.service import get_by_parent_id, get_models from modelci.types.models import MLModel, Task, Framework, Engine __all__ = ['retrieve_model', 'retrieve_model_by_parent_id'] diff --git a/modelci/hub/registrar.py b/modelci/hub/registrar.py index ef2d90c1..565da252 100644 --- a/modelci/hub/registrar.py +++ b/modelci/hub/registrar.py @@ -28,7 +28,7 @@ from modelci.hub.client.trt_client import CVTRTClient from modelci.hub.converter import converter from modelci.hub.utils import parse_path_plain -from modelci.persistence.service_ import save +from modelci.persistence.service import save from modelci.types.models import MLModelFromYaml, MLModel from urllib.request import urlopen, Request from tqdm.auto import tqdm diff --git a/modelci/persistence/service.py b/modelci/persistence/service.py index 555d6d9d..7fefcf75 100644 --- a/modelci/persistence/service.py +++ b/modelci/persistence/service.py @@ -1,328 +1,253 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Author: Li Yuanming +Email: yli056@e.ntu.edu.sg +Date: 2/17/2021 + +Persistence service using PyMongo. +""" +from enum import Enum from ipaddress import IPv4Address, IPv6Address -from typing import Union +from typing import List, Union, Optional +import gridfs from bson import ObjectId +from fastapi.encoders import jsonable_encoder -from modelci.types.bo import DynamicProfileResultBO, ModelBO, Task, Framework, Engine, StaticProfileResultBO, \ - ModelVersion -from . import mongo -from .exceptions import ServiceException, DoesNotExistException, BadRequestValueException -from .model_dao import ModelDAO +from modelci.config import db_settings +from modelci.experimental.mongo_client import MongoClient +from modelci.persistence.exceptions import ServiceException +from modelci.types.models import MLModel, ModelUpdateSchema, Metric +from modelci.types.models.profile import StaticProfileResult, DynamicProfileResult +from modelci.utils.misc import remove_dict_null +_db = MongoClient()[db_settings.mongo_db] +_collection = _db['model_d_o'] +_fs = gridfs.GridFS(_db) -class ModelService(object): - __model_DAO = ModelDAO - @classmethod - def get_models( - cls, - architecture: str = None, - task: Task = None, - framework: Framework = None, - engine: Engine = None, - version: ModelVersion = None - ): - """Get a list of model BO given primary key(s). - - Args: - architecture (str): model architecture for searching - task (Task): model task for searching - framework (Framework): model framework. Default to None, having no effect on the searching result. - engine (Engine): model engine. Default to None, having no effect on the searching result. - version (ModelVersion): model version. Default to None, having no effect on the searching result. - - Return: - A list of `ModelBO`. - """ - # build kwargs - kwargs = dict() - if architecture is not None: - kwargs['architecture'] = architecture - if task is not None: - kwargs['task'] = task.value - if framework is not None: - kwargs['framework'] = framework.value - if engine is not None: - kwargs['engine'] = engine.value - if version is not None: - kwargs['version'] = version.ver - - model_pos = cls.__model_DAO.get_models(**kwargs) - models = list(map(ModelBO.from_model_do, model_pos)) - return models - - @classmethod - def get_models_by_task(cls, task: Task): - """Get a list of model BO given task. - - Args: - task (Task): model task for searching - - Return: - A list of `ModelBO` - """ - model_pos = cls.__model_DAO.get_models_by_task(task=task.value) - - return list(map(ModelBO.from_model_do, model_pos)) - - @classmethod - def get_model_by_id(cls, id_: str): - """Get model given model ID. - - Args: - id_ (str): model ID, should be a valid BSON Object ID - - Return: - A model BO if found. - - Raises: - DoesNotExistException: If the model with given ID does not found. - """ - if not cls.__model_DAO.exists_by_id(ObjectId(id_)): - raise DoesNotExistException(f'Model id={id_} does not exists.') - - return ModelBO.from_model_do(cls.__model_DAO.get_model_by_id(ObjectId(id_))) - - @classmethod - def get_models_by_parent_id(cls, parent_id: str): - """ - - Args: - parent_id (): - - Returns: - A list of model BO if found. - """ - model_pos = cls.__model_DAO.get_models_by_parent_id(parent_id=parent_id) - - return list(map(ModelBO.from_model_do, model_pos)) - - @classmethod - def post_model(cls, model: ModelBO): - """Register a model into ModelDB and GridFS. `model.id` should be set as `None`, otherwise, the function will - raise a `ValueError`. - - Args: - model (ModelBO): model business object to be registered - - Return: - bool: True if successful, False otherwise - - Raises: - BadRequestValueException: If `model.id` is not None. - ServiceException: If model has exists with the same primary keys (architecture, framework, engine - and version). - """ - # check model id - if model.id is not None: - raise BadRequestValueException( - 'field `id` is expected `None`, but got {}. You should use `update_model` for a existing ' - 'model BO'.format(model.id) - ) - - model_po = model.to_model_do() - if cls.__model_DAO.exists_by_primary_keys( - architecture=model_po.architecture, - task=model_po.task, - framework=model_po.framework, - engine=model_po.engine, - version=model_po.version, - dataset=model_po.dataset - ): - raise ServiceException( - f'Model business object with primary keys architecture={model_po.architecture}, ' - f'task={model_po.task}, framework={model_po.framework}, engine={model_po.engine}, ' - f'version={model_po.version}, dataset={model_po.dataset} has exists.' - ) - - return bool(cls.__model_DAO.save_model(model_po)) - - @classmethod - def update_model(cls, model: ModelBO, force_insert=False): - """Update a model to ModelDB and GridFS. The function will check the existence of the provided model. It will - invoke the update. Note that this function will have not effect for static profiling result and dynamic - profiling result. Please see `register_static_profiling_result` and ` - - Args: - model (ModelBO): model business object to be updated. The model must exist in the ModelDB based on its - `id`. Otherwise, you should set `force_insert` to be `True`. - force_insert (bool: `True`, optional): Force insert flag for ModelDB. The model will force to insert - regardless its existence. - - Return: - True for successfully update, False otherwise. - - Raises: - ValueError: If `model.id` does not exist in ModelDB, and `force_insert` is not set. - """ - # check for the existence of Model - if cls.__model_DAO.exists_by_id(model.id): - model_po_new = model.to_model_do() - model_po = cls.__model_DAO.get_model_by_id(model.id) - - # if weight changes, save all without patch - if model_po_new.weight is not None and model_po_new.weight != model_po.weight: - return bool(cls.__model_DAO.save_model(model_po_new)) - - # build arguments - valid_keys = [ - 'architecture', 'framework', 'engine', 'version', 'dataset', 'metric', - 'weight', 'task', 'inputs', 'outputs', 'status', 'model_status' - ] - kwargs = dict() - - for valid_key in valid_keys: - new_value = getattr(model_po_new, valid_key) - if new_value is not None and new_value != getattr(model_po, valid_key): - kwargs[valid_key] = new_value - - # if kwargs is empty, not update - if len(kwargs) == 0: - return False - - return bool(cls.__model_DAO.update_model(model.id, **kwargs)) - # return bool(cls.__model_DAO.update_model(model_po_new)) - else: - # if `force_insert` is set - if force_insert: - model_po = model.to_model_do() - return bool(cls.__model_DAO.save_model(model_po)) - else: - raise ValueError('Model ID {} does not exist. You may change the ID or set `force_insert=True` ' - 'when call.'.format(model.id)) - - @classmethod - def delete_model_by_id(cls, id_: str) -> int: - """Delete a model from ModelDB given ID. - - Args: - id_ (str): ID of the object - - Return: - int: Number of affected record. - """ - id_ = ObjectId(id_) - model_po = cls.__model_DAO.get_model_by_id(id_) - model_po.weight.delete() - return cls.__model_DAO.delete_model_by_id(id_) - - @classmethod - def register_static_profiling_result(cls, id_, static_result: StaticProfileResultBO): - """Register or update static profiling result to a model. - - Args: - id_ (str): ID of the object - static_result (StaticProfileResultBO): static profiling result - - Return: - int: number of affected rows - - Raises: - DoesNotExistException: `model.id` does not exist in ModelDB - """ - id_ = ObjectId(id_) - if cls.__model_DAO.exists_by_id(id_): - return ModelService.__model_DAO.register_static_profiling_result( - id_, - static_result.to_static_profile_result_po() - ) - else: - raise DoesNotExistException('Model ID {} does not exist.'.format(id_)) - - @classmethod - def append_dynamic_profiling_result(cls, id_: str, dynamic_result: DynamicProfileResultBO): - """Add one dynamic profiling result to a model. - - Args: - id_ (str): ID of the object - dynamic_result (DynamicProfileResultBO): Dynamic profiling result - - Return: - int: number of affected rows - - Raises: - DoesNotExistException: `model.id` does not exist in ModelDB - """ - id_ = ObjectId(id_) - if cls.__model_DAO.exists_by_id(id_): - return cls.__model_DAO.register_dynamic_profiling_result( - id_, - dynamic_result.to_dynamic_profile_result_po() - ) - else: - raise DoesNotExistException('Model ID {} does not exist.'.format(id_)) - - @classmethod - def update_dynamic_profiling_result(cls, id_: str, dynamic_result: DynamicProfileResultBO, force_insert=False): - """Update one dynamic profiling result to a model. - - Args: - id_ (str): ID of the object - dynamic_result (DynamicProfileResultBO): Dynamic profiling result - force_insert: force to insert the dynamic result if it is not found - - Return: - int: number of affected rows - - Raise: - DoesNotExistException: Model ID does not exist in ModelDB; or - dynamic profiling result to be updated does not exist and `force_insert` is not set - """ - id_ = ObjectId(id_) - dpr_po = dynamic_result.to_dynamic_profile_result_po() - pks = {'ip': dpr_po.ip, 'device_id': dpr_po.device_id} - # if model ID exists - if cls.__model_DAO.exists_by_id(id_): - # if the dynamic profiling result to be updated exists - if cls.__model_DAO.exists_dynamic_profiling_result_by_pks(id_, **pks): - return cls.__model_DAO.update_dynamic_profiling_result(id_, dpr_po) - # force insert is set - elif force_insert: - return cls.__model_DAO.register_dynamic_profiling_result(id_, dpr_po) - else: - raise DoesNotExistException( - f'Dynamic profiling result to be updated with ip={dpr_po.ip}, ' - f'device_id={dpr_po.device_id} does not exist. Either set `force_insert` or change the ip ' - f'and device_id.' - ) - # if model ID does not exist - else: - raise DoesNotExistException('Model ID {} does not exist.'.format(id_)) - - @classmethod - def delete_dynamic_profiling_result( - cls, - id_: str, - dynamic_result_ip: Union[str, IPv4Address, IPv6Address], - dynamic_result_device_id: str +def save(model_in: MLModel): + """Register a model into ModelDB and GridFS. `model.id` should be set as `None`, otherwise, the function will + raise a `ValueError`. + + Args: + model_in (MLModelIn): model object to be registered + + Return: + MLModel: Saved ML model object. + + Raises: + BadRequestValueException: If `model.id` is not None. + ServiceException: If model has exists with the same primary keys (name, framework, engine and version). + """ + + if _collection.count_documents( + filter=model_in.dict( + use_enum_values=True, + include={'architecture', 'framework', 'engine', 'version', 'task', 'dataset'} + ), + limit=1 ): - """Delete one dynamic profiling result to a model. - - Args: - id_ (str): ID of the object. - dynamic_result_ip (Union[IPv4Address, IPv6Address]): Host IP address of dynamic profiling result. - dynamic_result_device_id (str): Device ID of dynamic profiling result. - - Raise: - DoesNotExistException: Model ID does not exist in ModelDB; or - dynamic profiling result to be updated does not exist and `force_insert` is not set - """ - id_ = ObjectId(id_) - pks = {'ip': str(dynamic_result_ip), 'device_id': dynamic_result_device_id} - # if model ID exists - if cls.__model_DAO.exists_by_id(id_): - # if the dynamic profiling result to be delete exists - if cls.__model_DAO.exists_dynamic_profiling_result_by_pks(id_, **pks): - return cls.__model_DAO.delete_dynamic_profiling_result(id_, **pks) - else: - raise DoesNotExistException( - f'Dynamic profiling result to be updated with ip={dynamic_result_ip}, ' - f'device_id={dynamic_result_device_id} does not exist. Either set `force_insert` or change the ip ' - f'and device_id.' - ) - # if model ID does not exist - else: - raise DoesNotExistException('Model ID {} does not exist.'.format(id_)) - - -__all__ = ['mongo', 'ModelService'] + raise ServiceException( + f'Model with primary keys architecture={model_in.architecture}, ' + f'framework={model_in.framework}, engine={model_in.engine}, version={model_in.version},' + f'task={model_in.task}, and dataset={model_in.dataset} has exists.' + ) + + # TODO: update weight ID in the MLModelIn + weight_id = _fs.put(bytes(model_in.weight), filename=model_in.weight.filename) + model = MLModel(**model_in.dict(exclude={'weight'}), weight=weight_id) + model.id = _collection.insert_one(model.dict(exclude_none=True, by_alias=True, use_enum_values=True)).inserted_id + return model + + +def get_by_id(id: str) -> MLModel: + """Get a MLModel object by its ID. + """ + model_data = _collection.find_one(filter={'_id': ObjectId(id)}) + if model_data is not None: + return MLModel.parse_obj(model_data) + else: + raise ServiceException(f'Model with id={id} does not exist.') + + +def get_by_parent_id(id_: str) -> List[MLModel]: + """ Get MLModel objects by its parent model ID. + Args: + id_: The ID of parent model + Returns: List of model objects + """ + models = _collection.find(filter={'parent_model_id': ObjectId(id_)}) + if len(models): + return list(map(MLModel.parse_obj, models)) + else: + raise ServiceException(f'Model with parent model ID={id_} does not exist.') + + +def exists_by_id(id: str) -> MLModel: + model = _collection.find_one(filter={'_id': ObjectId(id)}) + return model is not None + + +def get_models(**kwargs) -> List[MLModel]: + """ + + Args: + **kwargs: architecture, framework, engine, task and version + + Returns: list of models + + """ + valid_keys = {'architecture', 'framework', 'engine', 'task', 'version'} + + valid_kwargs = { + key: (value.value if isinstance(value, Enum) else value) + for key, value in remove_dict_null(kwargs).items() + if key in valid_keys + } + models = _collection.find(valid_kwargs) + return list(map(MLModel.parse_obj, models)) + + +def update_model(id_: str, schema: ModelUpdateSchema) -> MLModel: + """ Update existed model info + + Args: + id_: the ID of targeted model + schema: + + Returns: the updated model object + + """ + prev_model = get_by_id(id_) + if schema.metric: + schema.metric = {Metric(k).name: v for k, v in schema.metric.items()} + updated_data = { + key: value for key, value in jsonable_encoder(schema, exclude_unset=True).items() + if getattr(schema, key) != getattr(prev_model, key) and key != 'weight' + } + if schema.weight: + if _fs.exists(prev_model.weight.__root__): + _fs.delete(prev_model.weight.__root__) + weight_id = _fs.put(bytes(schema.weight), filename=schema.weight.filename) + schema.weight = weight_id + _collection.update_one({'_id': ObjectId(id_)}, {"$set": updated_data}) + return get_by_id(id_) + + +def delete_model(id_: str): + model = _collection.find_one(filter={'_id': ObjectId(id_)}) + if _fs.exists(ObjectId(model['weight'])): + _fs.delete(ObjectId(model['weight'])) + return _collection.delete_one({'_id': ObjectId(id_)}) + + +def register_static_profiling_result(id_: str, static_profiling_result: StaticProfileResult): + """ Register or update static profiling result to a model. + + Args: + id_: ID of the model + static_profiling_result: static profiling result + + Returns: + + """ + return _collection.update_one({'_id': ObjectId(id_)}, + {"$set": {"profile_result.static_profile_result": static_profiling_result.dict()}}, + upsert=True) + + +def register_dynamic_profiling_result(id_: str, dynamic_result: DynamicProfileResult): + """ Add one dynamic profiling result to a model. + + Args: + id_: ID of the model + dynamic_result: Dynamic profiling result + + Returns: + + """ + return _collection.update_one({'_id': ObjectId(id_)}, + {"$push": { + "profile_result.dynamic_profile_results": jsonable_encoder(dynamic_result)}}) + + +def exists_dynamic_profiling_result_by_pks(id_: str, ip: str, device_id: str) -> bool: + """Check if the dynamic profiling result exists. + + Args: + id_: ID of the model. + ip: IP address of dynamic profiling result to be deleted. + device_id: Device ID of dynamic profiling result to be deleted. + + Returns: True` for existence, `False` otherwise. + + """ + model = _collection.find_one(filter={ + '_id': ObjectId(id_), + 'profile_result.dynamic_profile_results.ip': ip, + 'profile_result.dynamic_profile_results.device_id': device_id + }) + return model is not None + + +def update_dynamic_profiling_result(id_: str, dynamic_result: DynamicProfileResult, + force_insert: Optional[bool] = False): + """ Update one dynamic profiling result to a model. + + Args: + id_: ID of the object + dynamic_result: Dynamic profiling result + force_insert: force to insert the dynamic result if it is not found + + Returns: + + """ + if exists_dynamic_profiling_result_by_pks(id_, ip=dynamic_result.ip, device_id=dynamic_result.device_id): + return _collection.update_one({ + '_id': ObjectId(id_), + 'profile_result.dynamic_profile_results.ip': dynamic_result.ip, + 'profile_result.dynamic_profile_results.device_id': dynamic_result.device_id + }, {"$set": {"profile_result.dynamic_profile_results.$": jsonable_encoder(dynamic_result)}}) + elif force_insert: + return register_dynamic_profiling_result(id_, dynamic_result) + else: + raise ServiceException( + f'Dynamic profiling result to be updated with ip={dynamic_result.ip}, ' + f'device_id={dynamic_result.device_id} does not exist. Either set `force_insert` or change the ip ' + f'and device_id.' + ) + + +def delete_dynamic_profiling_result(id_: str, dynamic_result_ip: Union[str, IPv4Address, IPv6Address], + dynamic_result_device_id: str): + """Delete one dynamic profiling result to a model. + + Args: + id_: ID of the object. + dynamic_result_ip: Host IP address of dynamic profiling result. + dynamic_result_device_id: Device ID of dynamic profiling result. + + Returns: + + """ + if exists_dynamic_profiling_result_by_pks(id_, ip=dynamic_result_ip, device_id=dynamic_result_device_id): + return _collection.update( + {'_id': ObjectId(id_)}, + {'$pull': { + 'profile_result.dynamic_profile_results': { + 'ip': dynamic_result_ip, + 'device_id': dynamic_result_device_id + } + } + }, + multi=True, + upsert=False + ) + else: + raise ServiceException( + f'Dynamic profiling result to be updated with ip={dynamic_result_ip}, ' + f'device_id={dynamic_result_device_id} does not exist. Either set `force_insert` or change the ip ' + f'and device_id.' + ) diff --git a/modelci/persistence/service_.py b/modelci/persistence/service_.py deleted file mode 100644 index 9d6c1790..00000000 --- a/modelci/persistence/service_.py +++ /dev/null @@ -1,248 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" -Author: Li Yuanming -Email: yli056@e.ntu.edu.sg -Date: 2/17/2021 - -Persistence service using PyMongo. -""" -from enum import Enum -from ipaddress import IPv4Address, IPv6Address -from typing import List, Union, Optional - -import gridfs -from bson import ObjectId -from fastapi.encoders import jsonable_encoder - -from modelci.config import db_settings -from modelci.experimental.mongo_client import MongoClient -from modelci.persistence.exceptions import ServiceException -from modelci.types.models import MLModel, ModelUpdateSchema, Metric -from modelci.types.models.profile import StaticProfileResult, DynamicProfileResult -from modelci.utils.misc import remove_dict_null - -_db = MongoClient()[db_settings.mongo_db] -_collection = _db['model_d_o'] -_fs = gridfs.GridFS(_db) - - -def save(model_in: MLModel): - """Register a model into ModelDB and GridFS. `model.id` should be set as `None`, otherwise, the function will - raise a `ValueError`. - - Args: - model_in (MLModelIn): model object to be registered - - Return: - MLModel: Saved ML model object. - - Raises: - BadRequestValueException: If `model.id` is not None. - ServiceException: If model has exists with the same primary keys (name, framework, engine and version). - """ - - if _collection.count_documents( - filter=model_in.dict( - use_enum_values=True, - include={'architecture', 'framework', 'engine', 'version', 'task', 'dataset'} - ), - limit=1 - ): - raise ServiceException( - f'Model with primary keys architecture={model_in.architecture}, ' - f'framework={model_in.framework}, engine={model_in.engine}, version={model_in.version},' - f'task={model_in.task}, and dataset={model_in.dataset} has exists.' - ) - - # TODO: update weight ID in the MLModelIn - weight_id = _fs.put(bytes(model_in.weight), filename=model_in.weight.filename) - model = MLModel(**model_in.dict(exclude={'weight'}), weight=weight_id) - model.id = _collection.insert_one(model.dict(exclude_none=True, by_alias=True, use_enum_values=True)).inserted_id - return model - - -def get_by_id(id: str) -> MLModel: - """Get a MLModel object by its ID. - """ - model_data = _collection.find_one(filter={'_id': ObjectId(id)}) - if model_data is not None: - return MLModel.parse_obj(model_data) - else: - raise ServiceException(f'Model with id={id} does not exist.') - - -def get_by_parent_id(id_: str) -> List[MLModel]: - """ Get MLModel objects by its parent model ID. - Args: - id_: The ID of parent model - Returns: List of model objects - """ - models = _collection.find(filter={'parent_model_id': ObjectId(id_)}) - if len(models): - return list(map(MLModel.parse_obj, models)) - else: - raise ServiceException(f'Model with parent model ID={id_} does not exist.') - - -def exists_by_id(id: str) -> MLModel: - model = _collection.find_one(filter={'_id': ObjectId(id)}) - return model is not None - - -def get_models(**kwargs) -> List[MLModel]: - """ - - Args: - **kwargs: architecture, framework, engine, task and version - - Returns: list of models - - """ - valid_keys = {'architecture', 'framework', 'engine', 'task', 'version'} - - valid_kwargs = { - key: (value.value if isinstance(value, Enum) else value) - for key, value in remove_dict_null(kwargs).items() - if key in valid_keys - } - models = _collection.find(valid_kwargs) - return list(map(MLModel.parse_obj, models)) - - -def update_model(id_: str, schema: ModelUpdateSchema) -> MLModel: - """ Update existed model info - - Args: - id_: the ID of targeted model - schema: - - Returns: the updated model object - - """ - prev_model = get_by_id(id_) - if schema.metric: - schema.metric = {Metric(k).name: v for k, v in schema.metric.items()} - updated_data = { - key: value for key, value in jsonable_encoder(schema, exclude_unset=True).items() - if getattr(schema, key) != getattr(prev_model, key) - } - _collection.update_one({'_id': ObjectId(id_)}, {"$set": updated_data}) - return get_by_id(id_) - - -def delete_model(id_: str): - model = _collection.find_one(filter={'_id': ObjectId(id_)}) - if _fs.exists(ObjectId(model['weight'])): - _fs.delete(ObjectId(model['weight'])) - return _collection.delete_one({'_id': ObjectId(id_)}) - - -def register_static_profiling_result(id_: str, static_profiling_result: StaticProfileResult): - """ Register or update static profiling result to a model. - - Args: - id_: ID of the model - static_profiling_result: static profiling result - - Returns: - - """ - return _collection.update_one({'_id': ObjectId(id_)}, - {"$set": {"profile_result.static_profile_result": static_profiling_result.dict()}}, - upsert=True) - - -def register_dynamic_profiling_result(id_: str, dynamic_result: DynamicProfileResult): - """ Add one dynamic profiling result to a model. - - Args: - id_: ID of the model - dynamic_result: Dynamic profiling result - - Returns: - - """ - return _collection.update_one({'_id': ObjectId(id_)}, - {"$push": { - "profile_result.dynamic_profile_results": jsonable_encoder(dynamic_result)}}) - - -def exists_dynamic_profiling_result_by_pks(id_: str, ip: str, device_id: str) -> bool: - """Check if the dynamic profiling result exists. - - Args: - id_: ID of the model. - ip: IP address of dynamic profiling result to be deleted. - device_id: Device ID of dynamic profiling result to be deleted. - - Returns: True` for existence, `False` otherwise. - - """ - model = _collection.find_one(filter={ - '_id': ObjectId(id_), - 'profile_result.dynamic_profile_results.ip': ip, - 'profile_result.dynamic_profile_results.device_id': device_id - }) - return model is not None - - -def update_dynamic_profiling_result(id_: str, dynamic_result: DynamicProfileResult, - force_insert: Optional[bool] = False): - """ Update one dynamic profiling result to a model. - - Args: - id_: ID of the object - dynamic_result: Dynamic profiling result - force_insert: force to insert the dynamic result if it is not found - - Returns: - - """ - if exists_dynamic_profiling_result_by_pks(id_, ip=dynamic_result.ip, device_id=dynamic_result.device_id): - return _collection.update_one({ - '_id': ObjectId(id_), - 'profile_result.dynamic_profile_results.ip': dynamic_result.ip, - 'profile_result.dynamic_profile_results.device_id': dynamic_result.device_id - }, {"$set": {"profile_result.dynamic_profile_results.$": jsonable_encoder(dynamic_result)}}) - elif force_insert: - return register_dynamic_profiling_result(id_, dynamic_result) - else: - raise ServiceException( - f'Dynamic profiling result to be updated with ip={dynamic_result.ip}, ' - f'device_id={dynamic_result.device_id} does not exist. Either set `force_insert` or change the ip ' - f'and device_id.' - ) - - -def delete_dynamic_profiling_result(id_: str, dynamic_result_ip: Union[str, IPv4Address, IPv6Address], - dynamic_result_device_id: str): - """Delete one dynamic profiling result to a model. - - Args: - id_: ID of the object. - dynamic_result_ip: Host IP address of dynamic profiling result. - dynamic_result_device_id: Device ID of dynamic profiling result. - - Returns: - - """ - if exists_dynamic_profiling_result_by_pks(id_, ip=dynamic_result_ip, device_id=dynamic_result_device_id): - return _collection.update( - {'_id': ObjectId(id_)}, - {'$pull': { - 'profile_result.dynamic_profile_results': { - 'ip': dynamic_result_ip, - 'device_id': dynamic_result_device_id - } - } - }, - multi=True, - upsert=False - ) - else: - raise ServiceException( - f'Dynamic profiling result to be updated with ip={dynamic_result_ip}, ' - f'device_id={dynamic_result_device_id} does not exist. Either set `force_insert` or change the ip ' - f'and device_id.' - ) diff --git a/tests/test_model_service.py b/tests/test_model_service.py index e896391b..21c416f0 100644 --- a/tests/test_model_service.py +++ b/tests/test_model_service.py @@ -5,9 +5,9 @@ from modelci.hub.registrar import register_model_from_yaml from modelci.persistence import mongo -from modelci.persistence.service_ import delete_model, register_static_profiling_result, \ +from modelci.persistence.service import delete_model, register_static_profiling_result, \ register_dynamic_profiling_result, update_dynamic_profiling_result, delete_dynamic_profiling_result -from modelci.persistence.service_ import get_models, get_by_id, update_model +from modelci.persistence.service import get_models, get_by_id, update_model from modelci.types.models import Task, Metric, ModelUpdateSchema from modelci.types.models.profile import ( StaticProfileResult, From 0d9befe9bc03acc2344eed640b41c1270134713f Mon Sep 17 00:00:00 2001 From: univerone Date: Sun, 16 May 2021 21:11:38 +0800 Subject: [PATCH 10/13] :fire: remove ModelDao --- modelci/experimental/curd/model_train.py | 6 +- modelci/persistence/model_dao.py | 229 ----------------------- 2 files changed, 3 insertions(+), 232 deletions(-) delete mode 100644 modelci/persistence/model_dao.py diff --git a/modelci/experimental/curd/model_train.py b/modelci/experimental/curd/model_train.py index 0ff9d7c0..c43e3879 100644 --- a/modelci/experimental/curd/model_train.py +++ b/modelci/experimental/curd/model_train.py @@ -12,8 +12,8 @@ from modelci.config import db_settings from modelci.experimental.model.model_train import TrainingJob, TrainingJobIn, TrainingJobUpdate from modelci.experimental.mongo_client import MongoClient +from modelci.persistence import service from modelci.persistence.exceptions import ServiceException -from modelci.persistence.model_dao import ModelDAO _db = MongoClient()[db_settings.mongo_db] _collection = _db['training_job'] @@ -42,7 +42,7 @@ def get_all() -> List[TrainingJob]: def save(training_job_in: TrainingJobIn) -> str: model_id = training_job_in.model - if not ModelDAO.exists_by_id(ObjectId(model_id)): + if not service.exists_by_id(str(model_id)): raise ServiceException(f'Model with ID {model_id} not exist.') training_job = TrainingJob(**training_job_in.dict(exclude_none=True)) @@ -55,7 +55,7 @@ def update(training_job: TrainingJobUpdate) -> int: raise ValueError(f'id {training_job.id} not found.') # check model ID - if training_job.model and not ModelDAO.exists_by_id(ObjectId(training_job.model)): + if training_job.model and not service.exists_by_id(str(training_job.model)): raise ServiceException(f'Model with ID {training_job.model} not exist.') # save update diff --git a/modelci/persistence/model_dao.py b/modelci/persistence/model_dao.py deleted file mode 100644 index d934dccd..00000000 --- a/modelci/persistence/model_dao.py +++ /dev/null @@ -1,229 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Model Data Access Object. - -This module contains model data access object (ModelDAO) providing methods to communicate with Mongo DB. `ModelDAO` uses -`mongoengine` as ORM for MongoDB access. -""" -from typing import List - -from bson import ObjectId - -from modelci.types.do import ModelDO, DynamicProfileResultDO, StaticProfileResultDO - - -class ModelDAO(object): - @staticmethod - def exists_by_id(id_: ObjectId): - """Check if the given Object ID exists in MongoDB. - - Args: - id_ (ObjectId): Model ID. - """ - return bool(ModelDO.objects(id=id_)) - - @staticmethod - def exists_by_primary_keys(**kwargs): - """ - - Args: - **kwargs: Keyword arguments of primary keys. Supported values: - architecture (str): Model name. - task (int): Model task enum value. - engine (int): Driving engine enum value. - framework (int): Model framework enum value. - version (int): Model version number. - dataset (str): the dataset used to train the model - - Returns: - bool: Existence of the model. - """ - return bool(ModelDO.objects(**kwargs)) - - @staticmethod - def get_model_by_id(id_: ObjectId) -> ModelDO: - """Get model plain object given model ID. - - Args: - id_ (ObjectId): Model ID. - - Return: - ModelDO: Model plain object. None for model PO not found. - """ - return ModelDO.objects(id=id_).first() - - @staticmethod - def get_models(**kwargs) -> List[ModelDO]: - """Get a list of model plain object given model name, framework and engine. - - Args: - kwargs (dict): A dictionary of arguments: - name (str): model name. - task: Model task. - framework: Model framework. - engine: Model engine. - version: Model version. - Return: - List[ModelDO]: A list of model plain objects. - """ - return ModelDO.objects(**kwargs).order_by('architecture', 'task', 'framework', 'engine', '-version') - - @staticmethod - def get_models_by_task(task: int) -> List[ModelDO]: - """Get a list of model plain objects given task. - - Args: - task (Task): Model predictive or descriptive task name - - Return: - List[ModelDO]: A list of model plain objects. An empty list will be returned if no such model. - """ - return ModelDO.objects(task=task) - - @staticmethod - def get_models_by_parent_id(parent_id: str) -> List[ModelDO]: - """Get a list of model plain objects given parent id. - - Args: - parent_id (str): the parent model id of current model if this model is derived from a pre-existing one - - Return: - List[ModelDO]: A list of model plain objects. An empty list will be returned if no such model. - """ - return ModelDO.objects(parent_model_id=parent_id) - - @staticmethod - def save_model(model: ModelDO, force_insert=False) -> ModelDO: - """Save a model PO. - - Args: - model (ModelDO): Model plain object to be saved. - force_insert (bool): Only try to create a new document. Default to `False`. - - Returns: - ModelPo: Updated model plain object. - """ - return model.save(force_insert=force_insert) - - @staticmethod - def update_model(id_: ObjectId, **kwargs) -> ModelDO: # TODO: try ModelPO.objects(...).update()? - """ - Update or register model PO. - - Args: - id_ (ObjectId): Model ID. - **kwargs: Keyword arguments to be updated. - - Returns: - ModelDO: - """ - return ModelDO.objects(id=id_).update(**kwargs) - - @staticmethod - def delete_model_by_id(id_: ObjectId) -> int: - """Delete model given model ID. - - Args: - id_ (ObjectId): Model ID. - - Return: - int: number of affected rows. - """ - return ModelDO.objects(id=id_).delete() - - @staticmethod - def register_static_profiling_result( - id_: ObjectId, - static_profiling_result: StaticProfileResultDO - ) -> int: - """Register static profiling result. - - Args: - id_ (objectId): ID of the model, where the static profiling result is added. - static_profiling_result (StaticProfileResultPO): Static profiling result. - - Return: - int: number of affected rows. - """ - return ModelDO.objects(id=id_).update_one(set__profile_result__static_profile_result=static_profiling_result) - - @staticmethod - def register_dynamic_profiling_result( - id_: ObjectId, - dynamic_profiling_result: DynamicProfileResultDO - ) -> int: - """Register dynamic profiling result. - - Args: - id_ (ObjectId): ID of the model, where the static profiling result is appended. - dynamic_profiling_result (DynamicProfileResultPO): Dynamic profiling result. - - Return: - int: number of affected rows. - """ - return ModelDO.objects(id=id_).update_one( - push__profile_result__dynamic_profile_results=dynamic_profiling_result) - - @staticmethod - def exists_dynamic_profiling_result_by_pks( - id_: ObjectId, - ip: str, - device_id: str, - ) -> bool: - """Check if the dynamic profiling result exists. - - Args: - id_ (ObjectId): ID of the model. - ip (str): IP address of dynamic profiling result to be deleted. - device_id (str): Device ID of dynamic profiling result to be deleted. - - Return: - bool: `True` for existence, `False` otherwise. - """ - return bool(ModelDO.objects( - id=id_, - profile_result__dynamic_profile_results__ip=ip, - profile_result__dynamic_profile_results__device_id=device_id) - ) - - @staticmethod - def update_dynamic_profiling_result( - id_: ObjectId, - dynamic_profiling_result: DynamicProfileResultDO - ) -> int: - """Update dynamic profiling result. - - Args: - id_ (ObjectId): ID of the model. - dynamic_profiling_result (DynamicProfileResultPO): Dynamic profiling result to be updated. - - Return: - int: number of affected rows. - """ - return ModelDO.objects( - id=id_, - profile_result__dynamic_profile_results__ip=dynamic_profiling_result.ip, - profile_result__dynamic_profile_results__device_id=dynamic_profiling_result.device_id - ).update( - set__profile_result__dynamic_profile_results__S=dynamic_profiling_result - ) - - @staticmethod - def delete_dynamic_profiling_result( - id_: ObjectId, - ip: str, - device_id: str, - ) -> None: - """Delete dynamic profiling result. - - Args: - id_ (ObjectId): ID of the model. - ip (str): IP address of dynamic profiling result to be deleted. - device_id (str): Device ID of dynamic profiling result to be deleted. - """ - return ModelDO.objects( - id=id_, - ).update_one( - pull__profile_result__dynamic_profile_results__ip=ip, - pull__profile_result__dynamic_profile_results__device_id=device_id - ) From 5461b6d4dab88b7aff0f053722b36cbcf303b9a4 Mon Sep 17 00:00:00 2001 From: univerone Date: Sun, 16 May 2021 21:19:27 +0800 Subject: [PATCH 11/13] :art: remove ModelService in deployer --- modelci/hub/deployer/dispatcher.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/modelci/hub/deployer/dispatcher.py b/modelci/hub/deployer/dispatcher.py index c99ff3d0..517eba6b 100644 --- a/modelci/hub/deployer/dispatcher.py +++ b/modelci/hub/deployer/dispatcher.py @@ -5,12 +5,13 @@ import docker from docker.models.containers import Container from docker.types import Mount, Ulimit -from modelci.persistence.service import ModelService from modelci.hub.deployer import config -from modelci.hub.manager import retrieve_model, retrieve_model_by_task +from modelci.hub.manager import retrieve_model from modelci.hub.utils import parse_path +from modelci.persistence.service import update_model from modelci.types.bo import Framework, Engine, ModelStatus +from modelci.types.models import ModelUpdateSchema from modelci.utils.misc import remove_dict_null, get_device @@ -96,26 +97,24 @@ def serve_by_name(args): framework = Framework[args.framework.upper()] engine = Engine[args.engine.upper()] - model_bo = retrieve_model(architecture_name=model, framework=framework, engine=engine) - serve(model_bo[0].saved_path, device=args.device, name=args.name, batch_size=args.bs) + ml_model = retrieve_model(architecture=model, framework=framework, engine=engine) + serve(ml_model[0].saved_path, device=args.device, name=args.name, batch_size=args.bs) # TODO: check if the service is dispatched sucessfully - new_status = [item for item in model_bo[0].model_status if + new_status = [item for item in ml_model[0].model_status if item is not (ModelStatus.CONVERTED or ModelStatus.PUBLISHED)] new_status.append(ModelStatus.IN_SERVICE) - model_bo[0].model_status = new_status - ModelService.update_model(model_bo[0]) + update_model(str(ml_model[0].id), ModelUpdateSchema(model_status=new_status)) def serve_by_task(args): - model_bo = retrieve_model_by_task(task=args.task) - serve(model_bo[0].saved_path, device=args.device, name=args.name, batch_size=args.bs) + ml_model = retrieve_model(task=args.task) + serve(ml_model[0].saved_path, device=args.device, name=args.name, batch_size=args.bs) # TODO: check if the service is dispatched sucessfully - new_status = [item for item in model_bo[0].model_status if + new_status = [item for item in ml_model[0].model_status if item is not (ModelStatus.CONVERTED or ModelStatus.PUBLISHED)] new_status.append(ModelStatus.IN_SERVICE) - model_bo[0].model_status = new_status - ModelService.update_model(model_bo[0]) + update_model(str(ml_model[0].id), ModelUpdateSchema(model_status=new_status)) if __name__ == '__main__': From 0fa022b8d1f3c103f2a5e0e2ce931a51a3fbacaf Mon Sep 17 00:00:00 2001 From: univerone Date: Sun, 16 May 2021 21:41:11 +0800 Subject: [PATCH 12/13] :art: [profiler] remove ModelService in profiler --- modelci/controller/executor.py | 15 +++++------ modelci/hub/profiler.py | 47 +++++++++++++++++----------------- 2 files changed, 30 insertions(+), 32 deletions(-) diff --git a/modelci/controller/executor.py b/modelci/controller/executor.py index 884c1588..05ecdb05 100644 --- a/modelci/controller/executor.py +++ b/modelci/controller/executor.py @@ -13,8 +13,9 @@ from modelci.hub.profiler import Profiler from modelci.metrics.benchmark.metric import BaseModelInspector -from modelci.persistence.service import ModelService -from modelci.types.bo import ModelBO, Status +from modelci.persistence.service import update_model, register_dynamic_profiling_result +from modelci.types.bo import Status +from modelci.types.models import MLModel, ModelUpdateSchema class Job(object): @@ -22,7 +23,7 @@ def __init__( self, client: BaseModelInspector, device: str, - model_info: ModelBO, + model_info: MLModel, container_name: str = None ): self.client = client @@ -81,16 +82,14 @@ def run(self) -> None: else: container_name = job.container_name # change model status - job.model.status = Status.RUNNING - ModelService.update_model(job.model) + update_model(str(job.model.id), ModelUpdateSchema(model_status=Status.RUNNING)) profiler = Profiler(model_info=job.model, server_name=container_name, inspector=job.client) dpr = profiler.diagnose(device=job.device) - ModelService.append_dynamic_profiling_result(job.model.id, dynamic_result=dpr) + register_dynamic_profiling_result(str(job.model.id), dynamic_result=dpr) # set model status to pass - job.model.status = Status.PASS - ModelService.update_model(job.model) + update_model(str(job.model.id), ModelUpdateSchema(model_status=Status.PASS)) if job.container_name is None: # get holding container diff --git a/modelci/hub/profiler.py b/modelci/hub/profiler.py index 284b4f97..71af94dc 100644 --- a/modelci/hub/profiler.py +++ b/modelci/hub/profiler.py @@ -16,13 +16,12 @@ from modelci.hub.client import CVTorchClient from modelci.metrics.benchmark.metric import BaseModelInspector from modelci.persistence.exceptions import ServiceException -from modelci.types.bo import ( - Framework, - DynamicProfileResultBO, +from modelci.types.models import MLModel, Engine +from modelci.types.models.profile import ( + DynamicProfileResult, ProfileMemory, ProfileLatency, - ProfileThroughput, - ModelBO + ProfileThroughput ) from modelci.utils.misc import get_ip @@ -35,12 +34,12 @@ class Profiler(object): """Profiler class, call this to test model performance. Args: - model_info (ModelBO): Information about the model, can get from `retrieve_model` method. + model_info (MLModel): Information about the model, can get from `retrieve_model` method. server_name (str): Serving platform's docker container's name. inspector (BaseModelInspector): The client instance implemented from :class:`BaseModelInspector`. """ - def __init__(self, model_info: ModelBO, server_name: str, inspector: BaseModelInspector = None): + def __init__(self, model_info: MLModel, server_name: str, inspector: BaseModelInspector = None): """Init a profiler object.""" if inspector is None: self.inspector = self.__auto_select_client() # TODO: To Improve @@ -51,10 +50,10 @@ def __init__(self, model_info: ModelBO, server_name: str, inspector: BaseModelIn raise TypeError("The inspector should be an instance of class BaseModelInspector!") self.server_name = server_name - self.model_bo = model_info + self.ml_model = model_info self.docker_client = docker.from_env() - def diagnose(self, batch_size: int = None, device='cuda', timeout=30) -> DynamicProfileResultBO: + def diagnose(self, batch_size: int = None, device='cuda', timeout=30) -> DynamicProfileResult: """Start diagnosing and profiling model. Args: @@ -82,7 +81,7 @@ def diagnose(self, batch_size: int = None, device='cuda', timeout=30) -> Dynamic result = self.inspector.run_model(server_name=self.server_name, device=device) - dpr_bo = DynamicProfileResultBO( + dpr_bo = DynamicProfileResult( ip=get_ip(), device_id=result['device_id'], device_name=result['device_name'], @@ -112,11 +111,11 @@ def auto_diagnose(self, available_devices, batch_list: list = None): """Select the free machine and deploy automatically to test the model using available platforms.""" from modelci.hub.deployer.dispatcher import serve - saved_path = self.model_bo.saved_path - model_id = self.model_bo.id - model_name = self.model_bo.name - model_framework = self.model_bo.framework - serving_engine = self.model_bo.engine + saved_path = self.ml_model.saved_path + model_id = self.ml_model.id + model_name = self.ml_model.name + model_framework = self.ml_model.framework + serving_engine = self.ml_model.engine # for testing print('\n available GPU devices: ', [device.name for device in available_devices]) @@ -151,24 +150,24 @@ def auto_diagnose(self, available_devices, batch_list: list = None): def __auto_select_client(self): # according to the serving engine, select the right testing client. # TODO: replace the input None data in each client with self-generated data. - serving_engine = self.model_bo.engine - if serving_engine == Framework.NONE: + serving_engine = self.ml_model.engine + if serving_engine == Engine.NONE: raise Exception( 'please choose a serving engine for the model') # TODO How can we deploy to all available platforms if we don't know the engine? - kwargs = {'repeat_data': None, 'model_info': self.model_bo, 'batch_num': DEFAULT_BATCH_NUM} - if serving_engine == Framework.TFS: + kwargs = {'repeat_data': None, 'model_info': self.ml_model, 'batch_num': DEFAULT_BATCH_NUM} + if serving_engine == Engine.TFS: return CVTFSClient(**kwargs) - elif serving_engine == Framework.TORCHSCRIPT: + elif serving_engine == Engine.TORCHSCRIPT: return CVTorchClient(**kwargs) - elif serving_engine == Framework.ONNX: + elif serving_engine == Engine.ONNX: return CVONNXClient(**kwargs) - elif serving_engine == Framework.TRT: + elif serving_engine == Engine.TRT: return CVTRTClient(**kwargs) - elif serving_engine == Framework.TVM: + elif serving_engine == Engine.TVM: raise NotImplementedError - elif serving_engine == Framework.CUSTOMIZED: + elif serving_engine == Engine.CUSTOMIZED: raise Exception('please pass a custom client to the Profiler.__init__.') else: return None From 4cba2ebd0a130470189bc3ac148519d19682ccd7 Mon Sep 17 00:00:00 2001 From: univerone Date: Sat, 22 May 2021 09:32:00 +0800 Subject: [PATCH 13/13] :bug: fix codacy and test issue --- modelci/hub/cache_manager.py | 4 ++-- modelci/ui.py | 4 ++-- tests/test_model_api.py | 15 --------------- 3 files changed, 4 insertions(+), 19 deletions(-) diff --git a/modelci/hub/cache_manager.py b/modelci/hub/cache_manager.py index 5f7f8b93..cff20d84 100644 --- a/modelci/hub/cache_manager.py +++ b/modelci/hub/cache_manager.py @@ -30,10 +30,10 @@ def get_remote_model_weight(model: MLModel): with open(str(save_path), 'wb') as f: f.write(bytes(model.weight)) if model.engine == Engine.TFS: - subprocess.call(['unzip', save_path, '-d', '/']) + subprocess.call(['unzip', save_path, '-d', '/']) # nosec os.remove(save_path) elif model.engine == Engine.TRT: - subprocess.call(['unzip', save_path, '-d', '/']) + subprocess.call(['unzip', save_path, '-d', '/']) # nosec os.remove(save_path) converter.TRTConverter.generate_trt_config( diff --git a/modelci/ui.py b/modelci/ui.py index 590bbda8..3916afbd 100644 --- a/modelci/ui.py +++ b/modelci/ui.py @@ -159,8 +159,8 @@ def model_detailed_view(model: MLModel): if not model.profile_result: converted_grid.add_row('N.A.') else: - spr = model.profile_result['static_result'] - dprs = model.profile_result['dynamic_results'] + spr = model.profile_result.static_profile_result + dprs = model.profile_result.dynamic_profile_results # Static profiling result converted_grid.add_row(Text('Static Result', style='bold turquoise2', justify='left')) diff --git a/tests/test_model_api.py b/tests/test_model_api.py index 5719cdd8..ab1ce062 100644 --- a/tests/test_model_api.py +++ b/tests/test_model_api.py @@ -73,21 +73,6 @@ def test_update_model_strcuture(): assert 'id' in response.text -def test_create_training_job(): - params = {'architecture': 'ResNet50', 'framework': 'PyTorch', 'engine': 'NONE'} - with requests.get(f'{app_settings.api_v1_prefix}/model/', params=params) as r: - model_list = r.json() - model_id = model_list[0]["id"] - payload = {"model": model_id, "data_module": {"dataset_name": "CIFAR10", "batch_size": 4}, - "min_epochs": 10, "max_epochs": 15, "optimizer_type": "Adam", - "optimizer_property": {"betas": [0.9, 0.99], "eps": 1e-8, "weight_decay": 0, "amsgrad": False}, - "lr_scheduler_type": "StepLR", "lr_scheduler_property": {"lr": 0.01, "step_size": 30}, - "loss_function": "torch.nn.CrossEntropyLoss"} - response = requests.post(f'{app_settings.server_url}/api/exp/train/', json=payload) - assert response.status_code == HTTPStatus.CREATED - assert 'id' in response.text - - def test_update_model(): with requests.get(f'{app_settings.api_v1_prefix}/model/') as r: model_list = r.json()