Skip to content

Commit

Permalink
Improved spec, added validations for duplicate information in the age…
Browse files Browse the repository at this point in the history
…nt spec which is copied from the action package.
  • Loading branch information
fabioz authored Sep 12, 2024
1 parent e9b03ac commit 5669551
Show file tree
Hide file tree
Showing 19 changed files with 574 additions and 108 deletions.
6 changes: 6 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@
- The yaml for `OAuth2` is now different (saved in new location with new structure).
- There's now a `mode` which can specify `mode: sema4ai` to use `Sema4.ai` provided configuration (without `clientSecret` required).
- New command: `Sema4.ai: Import Agent Package (Zip)`.
- Additional validations in the `agent-spec.yaml` to verify that the information is in sync with linked action packages:
- Check if the `zip`, `folder` type matches the actual referenced type
- Check if the action package `name` matches the referenced value
- Check if the action package `version` matches the referenced value
- Checks that all the actions found under `actions` are properly referenced in the `agent-spec.yaml`
- Note: handles both .zip and expanded action packages.

## New in 2.4.2 (2024-08-26)

Expand Down
Empty file.
6 changes: 3 additions & 3 deletions sema4ai/src/sema4ai_code/agents/agent_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@
"agent-package/agents/action-packages/name": {
"description": "The name of the action package. Example: \"my-action-package\"",
"required": true,
"expected-type": "string"
"expected-type": "action_package_name_link"
},
"agent-package/agents/action-packages/organization": {
"description": "The organization that provided the action. Example: \"MyActions\"",
Expand All @@ -112,14 +112,14 @@
"description": "The type of the action package distribution. Accepted values are \"zip\" and \"folder\"",
"required": true,
"expected-type": {
"type": "enum",
"type": "zip_or_folder_based_on_path",
"values": ["zip", "folder"]
}
},
"agent-package/agents/action-packages/version": {
"description": "Version of the action package. Example: \"1.3.0\"",
"required": true,
"expected-type": "string"
"expected-type": "action_package_version_link"
},
"agent-package/agents/action-packages/whitelist": {
"description": "Whitelist of actions (comma-separated string) accepted in the action package. An empty string value for whitelist implies usage of all actions. Example: action1, action2",
Expand Down
236 changes: 201 additions & 35 deletions sema4ai/src/sema4ai_code/agents/agent_spec_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,15 @@
from pathlib import Path
from typing import Any, Generic, Iterator, Optional, TypeVar

from sema4ai_ls_core.core_log import get_logger

if typing.TYPE_CHECKING:
from tree_sitter import Node, Tree

from sema4ai_code.agents.list_actions_from_agent import ActionPackageInFilesystem

log = get_logger(__name__)


class _ExpectedTypeEnum(Enum):
object = "object"
Expand All @@ -19,6 +25,9 @@ class _ExpectedTypeEnum(Enum):
int = "int"
float = "float"
NOT_SET = "NOT_SET" # Should be set only when deprecated is true.
action_package_version_link = "action_package_version_link"
action_package_name_link = "action_package_name_link"
zip_or_folder_based_on_path = "zip_or_folder_based_on_path"


class _YamlNodeKind(Enum):
Expand All @@ -35,6 +44,10 @@ class _YamlNodeKind(Enum):
string = "string"


class ErrorCode(Enum):
action_package_info_unsynchronized = "action_package_info_unsynchronized"


@dataclass
class _ExpectedType:
expected_type: _ExpectedTypeEnum # May be "object", "string", "enum", "file", "list" (implies "list[object]"), or "bool".
Expand Down Expand Up @@ -145,6 +158,7 @@ def load_spec(json_spec: dict[str, Any]) -> dict[str, Entry]:
class Error:
message: str
node: Optional["Node"] = None
code: Optional[ErrorCode] = None


class TreeNode(Generic[T]):
Expand Down Expand Up @@ -335,6 +349,9 @@ def __init__(self, spec_entries: dict[str, Entry], agent_root_dir: Path) -> None
# doing the actual validation from the spec checking that the nodes exist and have the correct info).
self._yaml_info_loaded = _YamlTreeNode("root")
self._yaml_cursor_node: _YamlTreeNode = self._yaml_info_loaded
self._action_packages_found_in_filesystem: dict[
Path, "ActionPackageInFilesystem"
] = {}

def _validate_key_pair(
self, key_node: "Node", value_node: Optional["Node"]
Expand Down Expand Up @@ -518,6 +535,48 @@ def _verify_yaml_matches_spec(
message=f"Expected {spec_node.data.path} to be a string (found {yaml_node.data.kind.value}).",
node=yaml_node.data.node,
)
elif spec_node.data.expected_type.expected_type in (
_ExpectedTypeEnum.action_package_version_link,
_ExpectedTypeEnum.action_package_name_link,
):
if yaml_node.data.kind != _YamlNodeKind.string:
yield Error(
message=f"Expected {spec_node.data.path} to be a string (found {yaml_node.data.kind.value}).",
node=yaml_node.data.node,
code=ErrorCode.action_package_info_unsynchronized,
)
else:
package_info_in_filesystem: Optional[
"ActionPackageInFilesystem"
] = self._get_action_package_info(spec_node, yaml_node)
if package_info_in_filesystem is not None:
if (
spec_node.data.expected_type.expected_type
== _ExpectedTypeEnum.action_package_version_link
):
version_in_filesystem = (
package_info_in_filesystem.get_version()
)
version_in_agent_package = self._get_value_text(yaml_node)
if version_in_filesystem != version_in_agent_package:
yield Error(
message=f"Expected {spec_node.data.path} to match the version in the action package being referenced ('{version_in_filesystem}'). Found in spec: '{version_in_agent_package}'",
node=yaml_node.data.node,
code=ErrorCode.action_package_info_unsynchronized,
)
elif (
spec_node.data.expected_type.expected_type
== _ExpectedTypeEnum.action_package_name_link
):
name_in_filesystem = package_info_in_filesystem.get_name()
name_in_agent_package = self._get_value_text(yaml_node)
if name_in_filesystem != name_in_agent_package:
yield Error(
message=f"Expected {spec_node.data.path} to match the name in the action package being referenced ('{name_in_filesystem}'). Found in spec: '{name_in_agent_package}'",
node=yaml_node.data.node,
code=ErrorCode.action_package_info_unsynchronized,
)

elif spec_node.data.expected_type.expected_type == _ExpectedTypeEnum.int:
if yaml_node.data.kind != _YamlNodeKind.int:
yield Error(
Expand All @@ -536,7 +595,10 @@ def _verify_yaml_matches_spec(
message=f"Expected {spec_node.data.path} to be a bool (found {yaml_node.data.kind.value}).",
node=yaml_node.data.node,
)
elif spec_node.data.expected_type.expected_type == _ExpectedTypeEnum.enum:
elif spec_node.data.expected_type.expected_type in (
_ExpectedTypeEnum.enum,
_ExpectedTypeEnum.zip_or_folder_based_on_path,
):
if yaml_node.data.kind != _YamlNodeKind.string:
yield Error(
message=f"Expected {spec_node.data.path} to be a string (found {yaml_node.data.kind.value}).",
Expand All @@ -562,53 +624,121 @@ def _verify_yaml_matches_spec(
message=f"Expected {spec_node.data.path} to be one of {enum_values} (found {yaml_node_text!r}).",
node=yaml_node.data.node,
)

elif (
spec_node.data.expected_type.expected_type
== _ExpectedTypeEnum.zip_or_folder_based_on_path
):
# Additional validation based on the path (to see if zip/folder matches what's in the spec).
package_info_in_filesystem = self._get_action_package_info(
spec_node, yaml_node
)
if package_info_in_filesystem is not None:
is_zip_package = package_info_in_filesystem.is_zip()
if is_zip_package:
expected_type_from_filesystem = "zip"
else:
expected_type_from_filesystem = "folder"

if yaml_node_text != expected_type_from_filesystem:
yield Error(
message=f"Expected {spec_node.data.path} to match the type in the action package being referenced ('{expected_type_from_filesystem}'). Found in spec: '{yaml_node_text}'",
node=yaml_node.data.node,
code=ErrorCode.action_package_info_unsynchronized,
)

elif spec_node.data.expected_type.expected_type == _ExpectedTypeEnum.file:
if yaml_node.data.kind != _YamlNodeKind.string:
yield Error(
message=f"Expected {spec_node.data.path} to be a string (found {yaml_node.data.kind.value}).",
node=yaml_node.data.node,
)
else:
value_text = self._get_value_text(yaml_node)
if value_text is None:
yield Error(
message=f"Error while handling: {spec_node.data.path} .",
node=yaml_node.data.node,
)
else:
if "\\" in value_text:
yield Error(
message=f"{spec_node.data.path} may not contain `\\` characters.",
node=yaml_node.data.node,
)
else:
relative_to = spec_node.data.expected_type.relative_to
assert relative_to, f"Expected relative_to to be set in {spec_node.data.path}"

if not relative_to.startswith("."):
relative_to = "./" + relative_to

p = self._agent_root_dir / relative_to / value_text
if not p.exists():
if relative_to.startswith("./"):
relative_to = relative_to[2:]
if relative_to == "./":
relative_to = "dir('agent-spec.yaml')"
else:
relative_to = (
f"dir('agent-spec.yaml')/{relative_to}"
)

yield Error(
message=f"Expected {spec_node.data.path} to map to a file named {value_text!r} relative to {relative_to!r}.",
node=yaml_node.data.node,
)
relative_to: Optional[
str
] = spec_node.data.expected_type.relative_to
assert (
relative_to
), f"Expected relative_to to be set in {spec_node.data.path}"

error_or_path = self._get_node_value_points_to_path(
spec_node.data.path, yaml_node, relative_to
)
if isinstance(error_or_path, Error):
yield error_or_path

else:
raise Exception(
f"Unexpected expected type: {spec_node.data.expected_type.expected_type}"
)

def _get_action_package_info(
self, spec_node, yaml_node
) -> Optional["ActionPackageInFilesystem"]:
p = spec_node.data.path.split("/")[:-1] + ["path"]
path_yaml_node = yaml_node.parent.children.get("path")
error_or_path = self._get_node_value_points_to_path(
p, path_yaml_node, "actions"
)
if not isinstance(error_or_path, Path):
# i.e.: if not path we can just ignore it.
return None

if error_or_path.is_dir():
found_in_filesystem: "ActionPackageInFilesystem" | None = (
self._action_packages_found_in_filesystem.get(
error_or_path / "package.yaml"
)
)

elif error_or_path.is_file():
found_in_filesystem = self._action_packages_found_in_filesystem.get(
error_or_path
)

else:
return None

if found_in_filesystem is not None:
found_in_filesystem.referenced_from_agent_spec = True
return found_in_filesystem

def _get_node_value_points_to_path(
self, spec_path_for_error_msg: str, yaml_node, relative_to: str
) -> Error | Path | None:
value_text = self._get_value_text(yaml_node)
if value_text is None:
return Error(
message=f"Error while handling: {spec_path_for_error_msg} .",
node=yaml_node.data.node,
)
else:
if "\\" in value_text:
return Error(
message=f"{spec_path_for_error_msg} may not contain `\\` characters.",
node=yaml_node.data.node,
)
else:
if not relative_to.startswith("."):
relative_to = "./" + relative_to

p = (self._agent_root_dir / relative_to / value_text).absolute()

path_exists: bool = p.exists()

if not path_exists:
if relative_to.startswith("./"):
relative_to = relative_to[2:]
if relative_to == "./":
relative_to = "dir('agent-spec.yaml')"
else:
relative_to = f"dir('agent-spec.yaml')/{relative_to}"
return Error(
message=f"Expected {spec_path_for_error_msg} to map to a file named {value_text!r} relative to {relative_to!r}.",
node=yaml_node.data.node,
)
return p

def _get_value_text(self, node: _YamlTreeNode) -> Optional[str]:
value_node = node.data.node.child_by_field_name("value")
if value_node is None:
Expand All @@ -631,6 +761,42 @@ def _validate_yaml_from_spec(self) -> Iterator[Error]:

yield from self._verify_yaml_matches_spec(root, self._yaml_info_loaded, None)

def _validate_unreferenced_action_packages(self) -> Iterator[Error]:
# print(self._yaml_info_loaded.pretty())

agent_package = self._yaml_info_loaded.children.get("agent-package")
report_error_at_node = agent_package
if not agent_package:
return

agents = agent_package.children.get("agents")
if agents:
report_error_at_node = agents
for _, agent in agents.children.items():
action_packages_node = agent.children.get("action-packages")
if action_packages_node is not None:
report_error_at_node = action_packages_node
break

for (
action_package_in_filesystem
) in self._action_packages_found_in_filesystem.values():
if not action_package_in_filesystem.referenced_from_agent_spec:
yield Error(
message=f"Action Package path not referenced in the `agent-spec.yaml`: '{action_package_in_filesystem.relative_path}'.",
node=report_error_at_node.data.node
if report_error_at_node is not None
else None,
code=ErrorCode.action_package_info_unsynchronized,
)

def validate(self, node: "Node") -> Iterator[Error]:
from sema4ai_code.agents.list_actions_from_agent import list_actions_from_agent

self._action_packages_found_in_filesystem = list_actions_from_agent(
self._agent_root_dir
)

yield from self._validate_nodes_exist_and_build_yaml_info(node)
yield from self._validate_yaml_from_spec()
yield from self._validate_unreferenced_action_packages()
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,9 @@ def validate_agent(
"source": "sema4ai",
"message": error.message,
}

if error.code:
diagnostic["code"] = error.code.value
yield diagnostic

else:
Expand Down
Loading

0 comments on commit 5669551

Please sign in to comment.