diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e278a7f7..91cfe8280 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ### Fixes - Support for more than 10k entries when using `fetch_*_table()` methods ([#1576](https://github.com/neptune-ai/neptune-client/pull/1576)) +- Docstrings visibility for Neptune objects ([#1580](https://github.com/neptune-ai/neptune-client/pull/1580)) ### Changes - Improved performance of `fetch_*_table()` methods up to 2x ([#1573])(https://github.com/neptune-ai/neptune-client/pull/1573) diff --git a/src/neptune/metadata_containers/model.py b/src/neptune/metadata_containers/model.py index 44847b533..d62e1cbe3 100644 --- a/src/neptune/metadata_containers/model.py +++ b/src/neptune/metadata_containers/model.py @@ -63,7 +63,98 @@ class Model(MetadataContainer): - """Class for registering a model to neptune.ai and retrieving information from it.""" + """Initializes a Model object from an existing or new model. + + You can use this to create a new model from code or to perform actions on existing models. + + A Model object is suitable for storing model metadata that is common to all versions (you can use ModelVersion + objects to track version-specific metadata). It does not track background metrics or logs automatically, + but you can assign metadata to the Model object just like you can for runs. + To learn more about model registry, see the docs: https://docs.neptune.ai/model_registry/overview/ + + You can also use the Model object as a context manager (see examples). + + Args: + with_id: The Neptune identifier of an existing model to resume, such as "CLS-PRE". + The identifier is stored in the model's "sys/id" field. + If left empty, a new model is created. + name: Custom name for the model. You can add it as a column in the models table ("sys/name"). + You can also edit the name in the app, in the information view. + key: Key for the model. Required when creating a new model. + Used together with the project key to form the model identifier. + Must be uppercase and unique within the project. + project: Name of a project in the form `workspace-name/project-name`. + If None, the value of the NEPTUNE_PROJECT environment variable is used. + api_token: User's API token. + If left empty, the value of the NEPTUNE_API_TOKEN environment variable is used (recommended). + mode: Connection mode in which the tracking will work. + If `None` (default), the value of the NEPTUNE_MODE environment variable is used. + If no value was set for the environment variable, "async" is used by default. + Possible values are `async`, `sync`, `offline`, `read-only`, and `debug`. + flush_period: In the asynchronous (default) connection mode, how often disk flushing is triggered + (in seconds). + proxies: Argument passed to HTTP calls made via the Requests library, as dictionary of strings. + For more information about proxies, see the Requests documentation. + async_lag_callback: Custom callback which is called if the lag between a queued operation and its + synchronization with the server exceeds the duration defined by `async_lag_threshold`. The callback + should take a Model object as the argument and can contain any custom code, such as calling `stop()` on + the object. + Note: Instead of using this argument, you can use Neptune's default callback by setting the + `NEPTUNE_ENABLE_DEFAULT_ASYNC_LAG_CALLBACK` environment variable to `TRUE`. + async_lag_threshold: In seconds, duration between the queueing and synchronization of an operation. + If a lag callback (default callback enabled via environment variable or custom callback passed to the + `async_lag_callback` argument) is enabled, the callback is called when this duration is exceeded. + async_no_progress_callback: Custom callback which is called if there has been no synchronization progress + whatsoever for the duration defined by `async_no_progress_threshold`. The callback should take a Model + object as the argument and can contain any custom code, such as calling `stop()` on the object. + Note: Instead of using this argument, you can use Neptune's default callback by setting the + `NEPTUNE_ENABLE_DEFAULT_ASYNC_NO_PROGRESS_CALLBACK` environment variable to `TRUE`. + async_no_progress_threshold: In seconds, for how long there has been no synchronization progress since the + object was initialized. If a no-progress callback (default callback enabled via environment variable or + custom callback passed to the `async_no_progress_callback` argument) is enabled, the callback is called + when this duration is exceeded. + + Returns: + Model object that is used to manage the model and log metadata to it. + + Examples: + + >>> import neptune + + Creating a new model: + + >>> model = neptune.init_model(key="PRE") + >>> model["metadata"] = some_metadata + + >>> # Or initialize with the constructor + ... model = Model(key="PRE") + + >>> # You can provide the project parameter as an environment variable + ... # or as an argument to the init_model() function: + ... model = neptune.init_model(key="PRE", project="workspace-name/project-name") + + >>> # When creating a model, you can give it a name: + ... model = neptune.init_model(key="PRE", name="Pre-trained model") + + Connecting to an existing model: + + >>> # Initialize existing model with identifier "CLS-PRE" + ... model = neptune.init_model(with_id="CLS-PRE") + + >>> # To prevent modifications when connecting to an existing model, you can connect in read-only mode + ... model = neptune.init_model(with_id="CLS-PRE", mode="read-only") + + Using the Model object as context manager: + + >>> with Model(key="PRE") as model: + ... model["metadata"] = some_metadata + + For details, see the docs: + Initializing a model: + https://docs.neptune.ai/api/neptune#init_model + Model class reference: + https://docs.neptune.ai/api/model + """ container_type = ContainerType.MODEL @@ -83,98 +174,6 @@ def __init__( async_no_progress_callback: Optional[NeptuneObjectCallback] = None, async_no_progress_threshold: float = ASYNC_NO_PROGRESS_THRESHOLD, ): - """Initializes a Model object from an existing or new model. - - You can use this to create a new model from code or to perform actions on existing models. - - A Model object is suitable for storing model metadata that is common to all versions (you can use ModelVersion - objects to track version-specific metadata). It does not track background metrics or logs automatically, - but you can assign metadata to the Model object just like you can for runs. - To learn more about model registry, see the docs: https://docs.neptune.ai/model_registry/overview/ - - You can also use the Model object as a context manager (see examples). - - Args: - with_id: The Neptune identifier of an existing model to resume, such as "CLS-PRE". - The identifier is stored in the model's "sys/id" field. - If left empty, a new model is created. - name: Custom name for the model. You can add it as a column in the models table ("sys/name"). - You can also edit the name in the app, in the information view. - key: Key for the model. Required when creating a new model. - Used together with the project key to form the model identifier. - Must be uppercase and unique within the project. - project: Name of a project in the form `workspace-name/project-name`. - If None, the value of the NEPTUNE_PROJECT environment variable is used. - api_token: User's API token. - If left empty, the value of the NEPTUNE_API_TOKEN environment variable is used (recommended). - mode: Connection mode in which the tracking will work. - If `None` (default), the value of the NEPTUNE_MODE environment variable is used. - If no value was set for the environment variable, "async" is used by default. - Possible values are `async`, `sync`, `offline`, `read-only`, and `debug`. - flush_period: In the asynchronous (default) connection mode, how often disk flushing is triggered - (in seconds). - proxies: Argument passed to HTTP calls made via the Requests library, as dictionary of strings. - For more information about proxies, see the Requests documentation. - async_lag_callback: Custom callback which is called if the lag between a queued operation and its - synchronization with the server exceeds the duration defined by `async_lag_threshold`. The callback - should take a Model object as the argument and can contain any custom code, such as calling `stop()` on - the object. - Note: Instead of using this argument, you can use Neptune's default callback by setting the - `NEPTUNE_ENABLE_DEFAULT_ASYNC_LAG_CALLBACK` environment variable to `TRUE`. - async_lag_threshold: In seconds, duration between the queueing and synchronization of an operation. - If a lag callback (default callback enabled via environment variable or custom callback passed to the - `async_lag_callback` argument) is enabled, the callback is called when this duration is exceeded. - async_no_progress_callback: Custom callback which is called if there has been no synchronization progress - whatsoever for the duration defined by `async_no_progress_threshold`. The callback should take a Model - object as the argument and can contain any custom code, such as calling `stop()` on the object. - Note: Instead of using this argument, you can use Neptune's default callback by setting the - `NEPTUNE_ENABLE_DEFAULT_ASYNC_NO_PROGRESS_CALLBACK` environment variable to `TRUE`. - async_no_progress_threshold: In seconds, for how long there has been no synchronization progress since the - object was initialized. If a no-progress callback (default callback enabled via environment variable or - custom callback passed to the `async_no_progress_callback` argument) is enabled, the callback is called - when this duration is exceeded. - - Returns: - Model object that is used to manage the model and log metadata to it. - - Examples: - - >>> import neptune - - Creating a new model: - - >>> model = neptune.init_model(key="PRE") - >>> model["metadata"] = some_metadata - - >>> # Or initialize with the constructor - ... model = Model(key="PRE") - - >>> # You can provide the project parameter as an environment variable - ... # or as an argument to the init_model() function: - ... model = neptune.init_model(key="PRE", project="workspace-name/project-name") - - >>> # When creating a model, you can give it a name: - ... model = neptune.init_model(key="PRE", name="Pre-trained model") - - Connecting to an existing model: - - >>> # Initialize existing model with identifier "CLS-PRE" - ... model = neptune.init_model(with_id="CLS-PRE") - - >>> # To prevent modifications when connecting to an existing model, you can connect in read-only mode - ... model = neptune.init_model(with_id="CLS-PRE", mode="read-only") - - Using the Model object as context manager: - - >>> with Model(key="PRE") as model: - ... model["metadata"] = some_metadata - - For details, see the docs: - Initializing a model: - https://docs.neptune.ai/api/neptune#init_model - Model class reference: - https://docs.neptune.ai/api/model - """ verify_type("with_id", with_id, (str, type(None))) verify_type("name", name, (str, type(None))) verify_type("key", key, (str, type(None))) diff --git a/src/neptune/metadata_containers/model_version.py b/src/neptune/metadata_containers/model_version.py index f7e82d515..0915f265b 100644 --- a/src/neptune/metadata_containers/model_version.py +++ b/src/neptune/metadata_containers/model_version.py @@ -58,7 +58,103 @@ class ModelVersion(MetadataContainer): - """Class for managing a version of a neptune.ai model and retrieving information from it.""" + """Initializes a ModelVersion object from an existing or new model version. + + Before creating model versions, you must first register a model by creating a Model object. + + A ModelVersion object is suitable for storing model metadata that is version-specific. It does not track + background metrics or logs automatically, but you can assign metadata to the model version just like you can + for runs. You can use the parent Model object to store metadata that is common to all versions of the model. + To learn more about model registry, see the docs: https://docs.neptune.ai/model_registry/overview/ + + To manage the stage of a model version, use its `change_stage()` method or use the menu in the web app. + + You can also use the ModelVersion object as a context manager (see examples). + + Args: + with_id: The Neptune identifier of an existing model version to resume, such as "CLS-PRE-3". + The identifier is stored in the model version's "sys/id" field. + If left empty, a new model version is created. + name: Custom name for the model version. You can add it as a column in the model versions table + ("sys/name"). You can also edit the name in the app, in the information view. + model: Identifier of the model for which the new version should be created. + Required when creating a new model version. + You can find the model ID in the leftmost column of the models table, or in a model's "sys/id" field. + project: Name of a project in the form `workspace-name/project-name`. + If None, the value of the NEPTUNE_PROJECT environment variable is used. + api_token: User's API token. + If left empty, the value of the NEPTUNE_API_TOKEN environment variable is used (recommended). + mode: Connection mode in which the tracking will work. + If None (default), the value of the NEPTUNE_MODE environment variable is used. + If no value was set for the environment variable, "async" is used by default. + Possible values are `async`, `sync`, `offline`, `read-only`, and `debug`. + flush_period: In the asynchronous (default) connection mode, how often disk flushing is triggered + (in seconds). + proxies: Argument passed to HTTP calls made via the Requests library, as dictionary of strings. + For more information about proxies, see the Requests documentation. + async_lag_callback: Custom callback which is called if the lag between a queued operation and its + synchronization with the server exceeds the duration defined by `async_lag_threshold`. The callback + should take a ModelVersion object as the argument and can contain any custom code, such as calling + `stop()` on the object. + Note: Instead of using this argument, you can use Neptune's default callback by setting the + `NEPTUNE_ENABLE_DEFAULT_ASYNC_LAG_CALLBACK` environment variable to `TRUE`. + async_lag_threshold: In seconds, duration between the queueing and synchronization of an operation. + If a lag callback (default callback enabled via environment variable or custom callback passed to the + `async_lag_callback` argument) is enabled, the callback is called when this duration is exceeded. + async_no_progress_callback: Custom callback which is called if there has been no synchronization progress + whatsoever for the duration defined by `async_no_progress_threshold`. The callback should take a + ModelVersion object as the argument and can contain any custom code, such as calling `stop()` on the + object. + Note: Instead of using this argument, you can use Neptune's default callback by setting the + `NEPTUNE_ENABLE_DEFAULT_ASYNC_NO_PROGRESS_CALLBACK` environment variable to `TRUE`. + async_no_progress_threshold: In seconds, for how long there has been no synchronization progress since the + object was initialized. If a no-progress callback (default callback enabled via environment variable or + custom callback passed to the `async_no_progress_callback` argument) is enabled, the callback is called + when this duration is exceeded. + + Returns: + ModelVersion object that is used to manage the model version and log metadata to it. + + Examples: + + >>> import neptune + + Creating a new model version: + + >>> # Create a new model version for a model with identifier "CLS-PRE" + ... model_version = neptune.init_model_version(model="CLS-PRE") + >>> model_version["your/structure"] = some_metadata + + >>> # You can provide the project parameter as an environment variable + ... # or directly in the init_model_version() function: + ... model_version = neptune.init_model_version( + ... model="CLS-PRE", + ... project="ml-team/classification", + ... ) + + >>> # Or initialize with the constructor: + ... model_version = ModelVersion(model="CLS-PRE") + + Connecting to an existing model version: + + >>> # Initialize an existing model version with identifier "CLS-PRE-12" + ... model_version = neptune.init_model_version(with_id="CLS-PRE-12") + + >>> # To prevent modifications when connecting to an existing model version, + ... # you can connect in read-only mode: + ... model_version = neptune.init_model(with_id="CLS-PRE-12", mode="read-only") + + Using the ModelVersion object as context manager: + + >>> with ModelVersion(model="CLS-PRE") as model_version: + ... model_version["metadata"] = some_metadata + + For more, see the docs: + Initializing a model version: + https://docs.neptune.ai/api/neptune#init_model_version + ModelVersion class reference: + https://docs.neptune.ai/api/model_version/ + """ container_type = ContainerType.MODEL_VERSION @@ -78,103 +174,6 @@ def __init__( async_no_progress_callback: Optional[NeptuneObjectCallback] = None, async_no_progress_threshold: float = ASYNC_NO_PROGRESS_THRESHOLD, ) -> None: - """Initializes a ModelVersion object from an existing or new model version. - - Before creating model versions, you must first register a model by creating a Model object. - - A ModelVersion object is suitable for storing model metadata that is version-specific. It does not track - background metrics or logs automatically, but you can assign metadata to the model version just like you can - for runs. You can use the parent Model object to store metadata that is common to all versions of the model. - To learn more about model registry, see the docs: https://docs.neptune.ai/model_registry/overview/ - - To manage the stage of a model version, use its `change_stage()` method or use the menu in the web app. - - You can also use the ModelVersion object as a context manager (see examples). - - Args: - with_id: The Neptune identifier of an existing model version to resume, such as "CLS-PRE-3". - The identifier is stored in the model version's "sys/id" field. - If left empty, a new model version is created. - name: Custom name for the model version. You can add it as a column in the model versions table - ("sys/name"). You can also edit the name in the app, in the information view. - model: Identifier of the model for which the new version should be created. - Required when creating a new model version. - You can find the model ID in the leftmost column of the models table, or in a model's "sys/id" field. - project: Name of a project in the form `workspace-name/project-name`. - If None, the value of the NEPTUNE_PROJECT environment variable is used. - api_token: User's API token. - If left empty, the value of the NEPTUNE_API_TOKEN environment variable is used (recommended). - mode: Connection mode in which the tracking will work. - If None (default), the value of the NEPTUNE_MODE environment variable is used. - If no value was set for the environment variable, "async" is used by default. - Possible values are `async`, `sync`, `offline`, `read-only`, and `debug`. - flush_period: In the asynchronous (default) connection mode, how often disk flushing is triggered - (in seconds). - proxies: Argument passed to HTTP calls made via the Requests library, as dictionary of strings. - For more information about proxies, see the Requests documentation. - async_lag_callback: Custom callback which is called if the lag between a queued operation and its - synchronization with the server exceeds the duration defined by `async_lag_threshold`. The callback - should take a ModelVersion object as the argument and can contain any custom code, such as calling - `stop()` on the object. - Note: Instead of using this argument, you can use Neptune's default callback by setting the - `NEPTUNE_ENABLE_DEFAULT_ASYNC_LAG_CALLBACK` environment variable to `TRUE`. - async_lag_threshold: In seconds, duration between the queueing and synchronization of an operation. - If a lag callback (default callback enabled via environment variable or custom callback passed to the - `async_lag_callback` argument) is enabled, the callback is called when this duration is exceeded. - async_no_progress_callback: Custom callback which is called if there has been no synchronization progress - whatsoever for the duration defined by `async_no_progress_threshold`. The callback should take a - ModelVersion object as the argument and can contain any custom code, such as calling `stop()` on the - object. - Note: Instead of using this argument, you can use Neptune's default callback by setting the - `NEPTUNE_ENABLE_DEFAULT_ASYNC_NO_PROGRESS_CALLBACK` environment variable to `TRUE`. - async_no_progress_threshold: In seconds, for how long there has been no synchronization progress since the - object was initialized. If a no-progress callback (default callback enabled via environment variable or - custom callback passed to the `async_no_progress_callback` argument) is enabled, the callback is called - when this duration is exceeded. - - Returns: - ModelVersion object that is used to manage the model version and log metadata to it. - - Examples: - - >>> import neptune - - Creating a new model version: - - >>> # Create a new model version for a model with identifier "CLS-PRE" - ... model_version = neptune.init_model_version(model="CLS-PRE") - >>> model_version["your/structure"] = some_metadata - - >>> # You can provide the project parameter as an environment variable - ... # or directly in the init_model_version() function: - ... model_version = neptune.init_model_version( - ... model="CLS-PRE", - ... project="ml-team/classification", - ... ) - - >>> # Or initialize with the constructor: - ... model_version = ModelVersion(model="CLS-PRE") - - Connecting to an existing model version: - - >>> # Initialize an existing model version with identifier "CLS-PRE-12" - ... model_version = neptune.init_model_version(with_id="CLS-PRE-12") - - >>> # To prevent modifications when connecting to an existing model version, - ... # you can connect in read-only mode: - ... model_version = neptune.init_model(with_id="CLS-PRE-12", mode="read-only") - - Using the ModelVersion object as context manager: - - >>> with ModelVersion(model="CLS-PRE") as model_version: - ... model_version["metadata"] = some_metadata - - For more, see the docs: - Initializing a model version: - https://docs.neptune.ai/api/neptune#init_model_version - ModelVersion class reference: - https://docs.neptune.ai/api/model_version/ - """ verify_type("with_id", with_id, (str, type(None))) verify_type("name", name, (str, type(None))) verify_type("model", model, (str, type(None))) diff --git a/src/neptune/metadata_containers/project.py b/src/neptune/metadata_containers/project.py index a4b6e5b17..319014444 100644 --- a/src/neptune/metadata_containers/project.py +++ b/src/neptune/metadata_containers/project.py @@ -51,7 +51,83 @@ class Project(MetadataContainer): - """Class for tracking and retrieving project-level metadata of a neptune.ai project.""" + """Starts a connection to an existing Neptune project. + + You can use the Project object to retrieve information about runs, models, and model versions + within the project. + + You can also log (and fetch) metadata common to the whole project, such as information about datasets, + links to documents, or key project metrics. + + Note: If you want to instead create a project, use the + [`management.create_project()`](https://docs.neptune.ai/api/management/#create_project) function. + + You can also use the Project object as a context manager (see examples). + + Args: + project: Name of a project in the form `workspace-name/project-name`. + If left empty, the value of the NEPTUNE_PROJECT environment variable is used. + api_token: User's API token. + If left empty, the value of the NEPTUNE_API_TOKEN environment variable is used (recommended). + mode: Connection mode in which the tracking will work. + If left empty, the value of the NEPTUNE_MODE environment variable is used. + If no value was set for the environment variable, "async" is used by default. + Possible values are `async`, `sync`, `offline`, `read-only`, and `debug`. + flush_period: In the asynchronous (default) connection mode, how often disk flushing is triggered. + Defaults to 5 (every 5 seconds). + proxies: Argument passed to HTTP calls made via the Requests library, as dictionary of strings. + For more information about proxies, see the Requests documentation. + async_lag_callback: Custom callback which is called if the lag between a queued operation and its + synchronization with the server exceeds the duration defined by `async_lag_threshold`. The callback + should take a Project object as the argument and can contain any custom code, such as calling `stop()` + on the object. + Note: Instead of using this argument, you can use Neptune's default callback by setting the + `NEPTUNE_ENABLE_DEFAULT_ASYNC_LAG_CALLBACK` environment variable to `TRUE`. + async_lag_threshold: In seconds, duration between the queueing and synchronization of an operation. + If a lag callback (default callback enabled via environment variable or custom callback passed to the + `async_lag_callback` argument) is enabled, the callback is called when this duration is exceeded. + async_no_progress_callback: Custom callback which is called if there has been no synchronization progress + whatsoever for the duration defined by `async_no_progress_threshold`. The callback + should take a Project object as the argument and can contain any custom code, such as calling `stop()` + on the object. + Note: Instead of using this argument, you can use Neptune's default callback by setting the + `NEPTUNE_ENABLE_DEFAULT_ASYNC_NO_PROGRESS_CALLBACK` environment variable to `TRUE`. + async_no_progress_threshold: In seconds, for how long there has been no synchronization progress since the + object was initialized. If a no-progress callback (default callback enabled via environment variable or + custom callback passed to the `async_no_progress_callback` argument) is enabled, the callback is called + when this duration is exceeded. + + Returns: + Project object that can be used to interact with the project as a whole, + like logging or fetching project-level metadata. + + Examples: + + >>> import neptune + + >>> # Connect to the project "classification" in the workspace "ml-team": + ... project = neptune.init_project(project="ml-team/classification") + + >>> # Or initialize with the constructor + ... project = Project(project="ml-team/classification") + + >>> # Connect to a project in read-only mode: + ... project = neptune.init_project( + ... project="ml-team/classification", + ... mode="read-only", + ... ) + + Using the Project object as context manager: + + >>> with Project(project="ml-team/classification") as project: + ... project["metadata"] = some_metadata + + For more, see the docs: + Initializing a project: + https://docs.neptune.ai/api/neptune#init_project + Project class reference: + https://docs.neptune.ai/api/project/ + """ container_type = ContainerType.PROJECT @@ -68,83 +144,6 @@ def __init__( async_no_progress_callback: Optional[NeptuneObjectCallback] = None, async_no_progress_threshold: float = ASYNC_NO_PROGRESS_THRESHOLD, ): - """Starts a connection to an existing Neptune project. - - You can use the Project object to retrieve information about runs, models, and model versions - within the project. - - You can also log (and fetch) metadata common to the whole project, such as information about datasets, - links to documents, or key project metrics. - - Note: If you want to instead create a project, use the - [`management.create_project()`](https://docs.neptune.ai/api/management/#create_project) function. - - You can also use the Project object as a context manager (see examples). - - Args: - project: Name of a project in the form `workspace-name/project-name`. - If left empty, the value of the NEPTUNE_PROJECT environment variable is used. - api_token: User's API token. - If left empty, the value of the NEPTUNE_API_TOKEN environment variable is used (recommended). - mode: Connection mode in which the tracking will work. - If left empty, the value of the NEPTUNE_MODE environment variable is used. - If no value was set for the environment variable, "async" is used by default. - Possible values are `async`, `sync`, `offline`, `read-only`, and `debug`. - flush_period: In the asynchronous (default) connection mode, how often disk flushing is triggered. - Defaults to 5 (every 5 seconds). - proxies: Argument passed to HTTP calls made via the Requests library, as dictionary of strings. - For more information about proxies, see the Requests documentation. - async_lag_callback: Custom callback which is called if the lag between a queued operation and its - synchronization with the server exceeds the duration defined by `async_lag_threshold`. The callback - should take a Project object as the argument and can contain any custom code, such as calling `stop()` - on the object. - Note: Instead of using this argument, you can use Neptune's default callback by setting the - `NEPTUNE_ENABLE_DEFAULT_ASYNC_LAG_CALLBACK` environment variable to `TRUE`. - async_lag_threshold: In seconds, duration between the queueing and synchronization of an operation. - If a lag callback (default callback enabled via environment variable or custom callback passed to the - `async_lag_callback` argument) is enabled, the callback is called when this duration is exceeded. - async_no_progress_callback: Custom callback which is called if there has been no synchronization progress - whatsoever for the duration defined by `async_no_progress_threshold`. The callback - should take a Project object as the argument and can contain any custom code, such as calling `stop()` - on the object. - Note: Instead of using this argument, you can use Neptune's default callback by setting the - `NEPTUNE_ENABLE_DEFAULT_ASYNC_NO_PROGRESS_CALLBACK` environment variable to `TRUE`. - async_no_progress_threshold: In seconds, for how long there has been no synchronization progress since the - object was initialized. If a no-progress callback (default callback enabled via environment variable or - custom callback passed to the `async_no_progress_callback` argument) is enabled, the callback is called - when this duration is exceeded. - - Returns: - Project object that can be used to interact with the project as a whole, - like logging or fetching project-level metadata. - - Examples: - - >>> import neptune - - >>> # Connect to the project "classification" in the workspace "ml-team": - ... project = neptune.init_project(project="ml-team/classification") - - >>> # Or initialize with the constructor - ... project = Project(project="ml-team/classification") - - >>> # Connect to a project in read-only mode: - ... project = neptune.init_project( - ... project="ml-team/classification", - ... mode="read-only", - ... ) - - Using the Project object as context manager: - - >>> with Project(project="ml-team/classification") as project: - ... project["metadata"] = some_metadata - - For more, see the docs: - Initializing a project: - https://docs.neptune.ai/api/neptune#init_project - Project class reference: - https://docs.neptune.ai/api/project/ - """ verify_type("mode", mode, (str, type(None))) # make mode proper Enum instead of string diff --git a/src/neptune/metadata_containers/run.py b/src/neptune/metadata_containers/run.py index e91e7257a..22c2d8e84 100644 --- a/src/neptune/metadata_containers/run.py +++ b/src/neptune/metadata_containers/run.py @@ -104,7 +104,178 @@ class Run(MetadataContainer): - """Starts a tracked run that logs ML model-building metadata to neptune.ai.""" + """Starts a new tracked run that logs ML model-building metadata to neptune.ai. + + You can log metadata by assigning it to the initialized Run object: + + ``` + run = neptune.init_run() + run["your/structure"] = some_metadata + ``` + + Examples of metadata you can log: metrics, losses, scores, artifact versions, images, predictions, + model weights, parameters, checkpoints, and interactive visualizations. + + By default, the run automatically tracks hardware consumption, stdout/stderr, source code, and Git information. + If you're using Neptune in an interactive session, however, some background monitoring needs to be enabled + explicitly. + + If you provide the ID of an existing run, that run is resumed and no new run is created. You may resume a run + either to log more metadata or to fetch metadata from it. + + The run ends either when its `stop()` method is called or when the script finishes execution. + + You can also use the Run object as a context manager (see examples). + + Args: + project: Name of the project where the run should go, in the form `workspace-name/project_name`. + If left empty, the value of the NEPTUNE_PROJECT environment variable is used. + api_token: User's API token. + If left empty, the value of the NEPTUNE_API_TOKEN environment variable is used (recommended). + with_id: If you want to resume a run, pass the identifier of an existing run. For example, "SAN-1". + If left empty, a new run is created. + custom_run_id: A unique identifier to be used when running Neptune in distributed training jobs. + Make sure to use the same identifier throughout the whole pipeline execution. + mode: Connection mode in which the tracking will work. + If left empty, the value of the NEPTUNE_MODE environment variable is used. + If no value was set for the environment variable, "async" is used by default. + Possible values are `async`, `sync`, `offline`, `read-only`, and `debug`. + name: Custom name for the run. You can add it as a column in the runs table ("sys/name"). + You can also edit the name in the app: Open the run menu and access the run information. + description: Custom description of the run. You can add it as a column in the runs table + ("sys/description"). + You can also edit the description in the app: Open the run menu and access the run information. + tags: Tags of the run as a list of strings. + You can edit the tags through the "sys/tags" field or in the app (run menu -> information). + You can also select multiple runs and manage their tags as a single action. + source_files: List of source files to be uploaded. + Uploaded source files are displayed in the "Source code" dashboard. + To not upload anything, pass an empty list (`[]`). + Unix style pathname pattern expansion is supported. For example, you can pass `*.py` to upload + all Python files from the current directory. + If None is passed, the Python file from which the run was created will be uploaded. + capture_stdout: Whether to log the stdout of the run. + Defaults to `False` in interactive sessions and `True` otherwise. + The data is logged under the monitoring namespace (see the `monitoring_namespace` parameter). + capture_stderr: Whether to log the stderr of the run. + Defaults to `False` in interactive sessions and `True` otherwise. + The data is logged under the monitoring namespace (see the `monitoring_namespace` parameter). + capture_hardware_metrics: Whether to send hardware monitoring logs (CPU, GPU, and memory utilization). + Defaults to `False` in interactive sessions and `True` otherwise. + The data is logged under the monitoring namespace (see the `monitoring_namespace` parameter). + fail_on_exception: Whether to register an uncaught exception handler to this process and, + in case of an exception, set the "sys/failed" field of the run to `True`. + An exception is always logged. + monitoring_namespace: Namespace inside which all hardware monitoring logs are stored. + Defaults to "monitoring/", where the hash is generated based on environment information, + to ensure that it's unique for each process. + flush_period: In the asynchronous (default) connection mode, how often disk flushing is triggered + (in seconds). + proxies: Argument passed to HTTP calls made via the Requests library, as dictionary of strings. + For more information about proxies, see the Requests documentation. + capture_traceback: Whether to log the traceback of the run in case of an exception. + The tracked metadata is stored in the "/traceback" namespace (see the + `monitoring_namespace` parameter). + git_ref: GitRef object containing information about the Git repository path. + If None, Neptune looks for a repository in the path of the script that is executed. + To specify a different location, set to GitRef(repository_path="path/to/repo"). + To turn off Git tracking for the run, set to False or GitRef.DISABLED. + dependencies: If you pass `"infer"`, Neptune logs dependencies installed in the current environment. + You can also pass a path to your dependency file directly. + If left empty, no dependencies are tracked. + async_lag_callback: Custom callback which is called if the lag between a queued operation and its + synchronization with the server exceeds the duration defined by `async_lag_threshold`. The callback + should take a Run object as the argument and can contain any custom code, such as calling `stop()` on + the object. + Note: Instead of using this argument, you can use Neptune's default callback by setting the + `NEPTUNE_ENABLE_DEFAULT_ASYNC_LAG_CALLBACK` environment variable to `TRUE`. + async_lag_threshold: In seconds, duration between the queueing and synchronization of an operation. + If a lag callback (default callback enabled via environment variable or custom callback passed to the + `async_lag_callback` argument) is enabled, the callback is called when this duration is exceeded. + async_no_progress_callback: Custom callback which is called if there has been no synchronization progress + whatsoever for the duration defined by `async_no_progress_threshold`. The callback + should take a Run object as the argument and can contain any custom code, such as calling `stop()` on + the object. + Note: Instead of using this argument, you can use Neptune's default callback by setting the + `NEPTUNE_ENABLE_DEFAULT_ASYNC_NO_PROGRESS_CALLBACK` environment variable to `TRUE`. + async_no_progress_threshold: In seconds, for how long there has been no synchronization progress since the + object was initialized. If a no-progress callback (default callback enabled via environment variable or + custom callback passed to the `async_no_progress_callback` argument) is enabled, the callback is called + when this duration is exceeded. + + Returns: + Run object that is used to manage the tracked run and log metadata to it. + + Examples: + + Creating a new run: + + >>> import neptune + + >>> # Minimal invoke + ... # (creates a run in the project specified by the NEPTUNE_PROJECT environment variable) + ... run = neptune.init_run() + + >>> # Or initialize with the constructor + ... run = Run(project="ml-team/classification") + + >>> # Create a run with a name and description, with no sources files or Git info tracked: + >>> run = neptune.init_run( + ... name="neural-net-mnist", + ... description="neural net trained on MNIST", + ... source_files=[], + ... git_ref=False, + ... ) + + >>> # Log all .py files from all subdirectories, excluding hidden files + ... run = neptune.init_run(source_files="**/*.py") + + >>> # Log all files and directories in the current working directory, excluding hidden files + ... run = neptune.init_run(source_files="*") + + >>> # Larger example + ... run = neptune.init_run( + ... project="ml-team/classification", + ... name="first-pytorch-ever", + ... description="Longer description of the run goes here", + ... tags=["tags", "go-here", "as-list-of-strings"], + ... source_files=["training_with_pytorch.py", "net.py"], + ... dependencies="infer", + ... capture_stderr=False, + ... git_ref=GitRef(repository_path="/Users/Jackie/repos/cls_project"), + ... ) + + Connecting to an existing run: + + >>> # Resume logging to an existing run with the ID "SAN-3" + ... run = neptune.init_run(with_id="SAN-3") + ... run["parameters/lr"] = 0.1 # modify or add metadata + + >>> # Initialize an existing run in read-only mode (logging new data is not possible, only fetching) + ... run = neptune.init_run(with_id="SAN-4", mode="read-only") + ... learning_rate = run["parameters/lr"].fetch() + + Using the Run object as context manager: + + >>> with Run() as run: + ... run["metric"].append(value) + + For more, see the docs: + Initializing a run: + https://docs.neptune.ai/api/neptune#init_run + Run class reference: + https://docs.neptune.ai/api/run/ + Essential logging methods: + https://docs.neptune.ai/logging/methods/ + Resuming a run: + https://docs.neptune.ai/logging/to_existing_object/ + Setting a custom run ID: + https://docs.neptune.ai/logging/custom_run_id/ + Logging to multiple runs at once: + https://docs.neptune.ai/logging/to_multiple_objects/ + Accessing the run from multiple places: + https://docs.neptune.ai/logging/from_multiple_places/ + """ container_type = ContainerType.RUN @@ -159,178 +330,6 @@ def __init__( async_no_progress_threshold: float = ASYNC_NO_PROGRESS_THRESHOLD, **kwargs, ): - """Starts a new tracked run that logs ML model-building metadata to neptune.ai. - - You can log metadata by assigning it to the initialized Run object: - - ``` - run = neptune.init_run() - run["your/structure"] = some_metadata - ``` - - Examples of metadata you can log: metrics, losses, scores, artifact versions, images, predictions, - model weights, parameters, checkpoints, and interactive visualizations. - - By default, the run automatically tracks hardware consumption, stdout/stderr, source code, and Git information. - If you're using Neptune in an interactive session, however, some background monitoring needs to be enabled - explicitly. - - If you provide the ID of an existing run, that run is resumed and no new run is created. You may resume a run - either to log more metadata or to fetch metadata from it. - - The run ends either when its `stop()` method is called or when the script finishes execution. - - You can also use the Run object as a context manager (see examples). - - Args: - project: Name of the project where the run should go, in the form `workspace-name/project_name`. - If left empty, the value of the NEPTUNE_PROJECT environment variable is used. - api_token: User's API token. - If left empty, the value of the NEPTUNE_API_TOKEN environment variable is used (recommended). - with_id: If you want to resume a run, pass the identifier of an existing run. For example, "SAN-1". - If left empty, a new run is created. - custom_run_id: A unique identifier to be used when running Neptune in distributed training jobs. - Make sure to use the same identifier throughout the whole pipeline execution. - mode: Connection mode in which the tracking will work. - If left empty, the value of the NEPTUNE_MODE environment variable is used. - If no value was set for the environment variable, "async" is used by default. - Possible values are `async`, `sync`, `offline`, `read-only`, and `debug`. - name: Custom name for the run. You can add it as a column in the runs table ("sys/name"). - You can also edit the name in the app: Open the run menu and access the run information. - description: Custom description of the run. You can add it as a column in the runs table - ("sys/description"). - You can also edit the description in the app: Open the run menu and access the run information. - tags: Tags of the run as a list of strings. - You can edit the tags through the "sys/tags" field or in the app (run menu -> information). - You can also select multiple runs and manage their tags as a single action. - source_files: List of source files to be uploaded. - Uploaded source files are displayed in the "Source code" dashboard. - To not upload anything, pass an empty list (`[]`). - Unix style pathname pattern expansion is supported. For example, you can pass `*.py` to upload - all Python files from the current directory. - If None is passed, the Python file from which the run was created will be uploaded. - capture_stdout: Whether to log the stdout of the run. - Defaults to `False` in interactive sessions and `True` otherwise. - The data is logged under the monitoring namespace (see the `monitoring_namespace` parameter). - capture_stderr: Whether to log the stderr of the run. - Defaults to `False` in interactive sessions and `True` otherwise. - The data is logged under the monitoring namespace (see the `monitoring_namespace` parameter). - capture_hardware_metrics: Whether to send hardware monitoring logs (CPU, GPU, and memory utilization). - Defaults to `False` in interactive sessions and `True` otherwise. - The data is logged under the monitoring namespace (see the `monitoring_namespace` parameter). - fail_on_exception: Whether to register an uncaught exception handler to this process and, - in case of an exception, set the "sys/failed" field of the run to `True`. - An exception is always logged. - monitoring_namespace: Namespace inside which all hardware monitoring logs are stored. - Defaults to "monitoring/", where the hash is generated based on environment information, - to ensure that it's unique for each process. - flush_period: In the asynchronous (default) connection mode, how often disk flushing is triggered - (in seconds). - proxies: Argument passed to HTTP calls made via the Requests library, as dictionary of strings. - For more information about proxies, see the Requests documentation. - capture_traceback: Whether to log the traceback of the run in case of an exception. - The tracked metadata is stored in the "/traceback" namespace (see the - `monitoring_namespace` parameter). - git_ref: GitRef object containing information about the Git repository path. - If None, Neptune looks for a repository in the path of the script that is executed. - To specify a different location, set to GitRef(repository_path="path/to/repo"). - To turn off Git tracking for the run, set to False or GitRef.DISABLED. - dependencies: If you pass `"infer"`, Neptune logs dependencies installed in the current environment. - You can also pass a path to your dependency file directly. - If left empty, no dependencies are tracked. - async_lag_callback: Custom callback which is called if the lag between a queued operation and its - synchronization with the server exceeds the duration defined by `async_lag_threshold`. The callback - should take a Run object as the argument and can contain any custom code, such as calling `stop()` on - the object. - Note: Instead of using this argument, you can use Neptune's default callback by setting the - `NEPTUNE_ENABLE_DEFAULT_ASYNC_LAG_CALLBACK` environment variable to `TRUE`. - async_lag_threshold: In seconds, duration between the queueing and synchronization of an operation. - If a lag callback (default callback enabled via environment variable or custom callback passed to the - `async_lag_callback` argument) is enabled, the callback is called when this duration is exceeded. - async_no_progress_callback: Custom callback which is called if there has been no synchronization progress - whatsoever for the duration defined by `async_no_progress_threshold`. The callback - should take a Run object as the argument and can contain any custom code, such as calling `stop()` on - the object. - Note: Instead of using this argument, you can use Neptune's default callback by setting the - `NEPTUNE_ENABLE_DEFAULT_ASYNC_NO_PROGRESS_CALLBACK` environment variable to `TRUE`. - async_no_progress_threshold: In seconds, for how long there has been no synchronization progress since the - object was initialized. If a no-progress callback (default callback enabled via environment variable or - custom callback passed to the `async_no_progress_callback` argument) is enabled, the callback is called - when this duration is exceeded. - - Returns: - Run object that is used to manage the tracked run and log metadata to it. - - Examples: - - Creating a new run: - - >>> import neptune - - >>> # Minimal invoke - ... # (creates a run in the project specified by the NEPTUNE_PROJECT environment variable) - ... run = neptune.init_run() - - >>> # Or initialize with the constructor - ... run = Run(project="ml-team/classification") - - >>> # Create a run with a name and description, with no sources files or Git info tracked: - >>> run = neptune.init_run( - ... name="neural-net-mnist", - ... description="neural net trained on MNIST", - ... source_files=[], - ... git_ref=False, - ... ) - - >>> # Log all .py files from all subdirectories, excluding hidden files - ... run = neptune.init_run(source_files="**/*.py") - - >>> # Log all files and directories in the current working directory, excluding hidden files - ... run = neptune.init_run(source_files="*") - - >>> # Larger example - ... run = neptune.init_run( - ... project="ml-team/classification", - ... name="first-pytorch-ever", - ... description="Longer description of the run goes here", - ... tags=["tags", "go-here", "as-list-of-strings"], - ... source_files=["training_with_pytorch.py", "net.py"], - ... dependencies="infer", - ... capture_stderr=False, - ... git_ref=GitRef(repository_path="/Users/Jackie/repos/cls_project"), - ... ) - - Connecting to an existing run: - - >>> # Resume logging to an existing run with the ID "SAN-3" - ... run = neptune.init_run(with_id="SAN-3") - ... run["parameters/lr"] = 0.1 # modify or add metadata - - >>> # Initialize an existing run in read-only mode (logging new data is not possible, only fetching) - ... run = neptune.init_run(with_id="SAN-4", mode="read-only") - ... learning_rate = run["parameters/lr"].fetch() - - Using the Run object as context manager: - - >>> with Run() as run: - ... run["metric"].append(value) - - For more, see the docs: - Initializing a run: - https://docs.neptune.ai/api/neptune#init_run - Run class reference: - https://docs.neptune.ai/api/run/ - Essential logging methods: - https://docs.neptune.ai/logging/methods/ - Resuming a run: - https://docs.neptune.ai/logging/to_existing_object/ - Setting a custom run ID: - https://docs.neptune.ai/logging/custom_run_id/ - Logging to multiple runs at once: - https://docs.neptune.ai/logging/to_multiple_objects/ - Accessing the run from multiple places: - https://docs.neptune.ai/logging/from_multiple_places/ - """ check_for_extra_kwargs("Run", kwargs) verify_type("with_id", with_id, (str, type(None)))