Skip to content

Commit

Permalink
source service
Browse files Browse the repository at this point in the history
  • Loading branch information
ensaremirerol committed Dec 13, 2024
1 parent 2edf118 commit 08161ad
Show file tree
Hide file tree
Showing 14 changed files with 904 additions and 10 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ dependencies = [
"idna==3.10",
"importlib_metadata==8.5.0",
"Jinja2==3.1.4",
"jsonpath-ng>=1.7.0",
"kink==0.8.1",
"markdown-it-py==3.0.0",
"MarkupSafe==3.0.2",
Expand Down
8 changes: 8 additions & 0 deletions server/const/err_enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,11 @@ class ErrCodes(Enum):

# DB Service
DB_ERROR = 80

# Source Service
SOURCE_NOT_FOUND = 100
UNSUPPORTED_FILE_TYPE = 101
JSON_PATH_DATA_NOT_FOUND = 102
JSON_PATH_DATA_NOT_UNIQUE = 103
JSON_PATH_NOT_ARRAY = 104
JSON_PATH_NOT_PROVIDED = 105
244 changes: 244 additions & 0 deletions server/models/mapping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
from dataclasses import dataclass
from enum import StrEnum


class MappingNodeType(StrEnum):
ENTITY = "entity"
LITERAL = "literal"
URIRef = "uri_ref"


@dataclass(kw_only=True)
class MappingNode:
"""
A node in a mapping graph.
Attributes:
id (str): The ID of the node
type (MappingNodeType): The type of the node
label (str): The label of the node
uri_pattern (str): The URI pattern of the node
rdf_type (list[str]): The RDF type/s of the node
"""

id: str
type: MappingNodeType
label: str
uri_pattern: str
rdf_type: list[str]

def to_dict(self):
return {
"id": self.id,
"type": self.type,
"label": self.label,
"uri_pattern": self.uri_pattern,
"rdf_type": self.rdf_type,
}

@classmethod
def from_dict(cls, data):
if "id" not in data:
raise ValueError("id is required")
if "type" not in data:
raise ValueError("type is required")
if "label" not in data:
raise ValueError("label is required")
if "uri_pattern" not in data:
raise ValueError("uri_pattern is required")
if "rdf_type" not in data:
data["rdf_type"] = []
return cls(
id=data["id"],
type=data["type"],
label=data["label"],
uri_pattern=data["uri_pattern"],
rdf_type=data["rdf_type"],
)


@dataclass(kw_only=True)
class MappingLiteral:
"""
A literal in a mapping graph.
Attributes:
id (str): The ID of the literal
type (MappingNodeType): The type of the literal
label (str): The label of the literal
value (str): The value of the literal
literal_type (str): The type of the literal
"""

id: str
type: MappingNodeType
label: str
value: str
literal_type: str

def to_dict(self):
return {
"id": self.id,
"type": self.type,
"label": self.label,
"value": self.value,
"literal_type": self.literal_type,
}

@classmethod
def from_dict(cls, data):
if "id" not in data:
raise ValueError("id is required")
if "type" not in data:
raise ValueError("type is required")
if "label" not in data:
raise ValueError("label is required")
if "value" not in data:
raise ValueError("value is required")
if "literal_type" not in data:
raise ValueError("literal_type is required")
return cls(
id=data["id"],
type=data["type"],
label=data["label"],
value=data["value"],
literal_type=data["literal_type"],
)


@dataclass(kw_only=True)
class MappingURIRef:
"""
A URI reference in a mapping graph.
Attributes:
id (str): The ID of the URI reference
type (MappingNodeType): The type of the URI reference
uri (str): The URI of the URI reference
"""

id: str
type: MappingNodeType
uri_pattern: str

def to_dict(self):
return {
"id": self.id,
"type": self.type,
"uri_pattern": self.uri_pattern,
}

@classmethod
def from_dict(cls, data):
if "id" not in data:
raise ValueError("id is required")
if "type" not in data:
raise ValueError("type is required")
if "uri_pattern" not in data:
raise ValueError("uri_pattern is required")
return cls(
id=data["id"],
type=data["type"],
uri_pattern=data["uri_pattern"],
)


@dataclass(kw_only=True)
class MappingEdge:
"""
An edge in a mapping graph.
Attributes:
id (str): The ID of the edge
source (str): The ID of the source node
target (str): The ID of the target node
predicate_uri (str): The URI of the predicate
"""

id: str
source: str
target: str
predicate_uri: str

def to_dict(self):
return {
"id": self.id,
"source": self.source,
"target": self.target,
"predicate_uri": self.predicate_uri,
}

@classmethod
def from_dict(cls, data):
if "id" not in data:
raise ValueError("id is required")
if "source" not in data:
raise ValueError("source is required")
if "target" not in data:
raise ValueError("target is required")
if "predicate_uri" not in data:
raise ValueError("predicate_uri is required")
return cls(
id=data["id"],
source=data["source"],
target=data["target"],
predicate_uri=data["predicate_uri"],
)


@dataclass(kw_only=True)
class MappingGraph:
"""
A mapping graph.
Attributes:
uuid (str): The UUID of the graph
nodes (list[MappingNode]): The nodes in the graph
edges (list[MappingEdge]): The edges in the graph
"""

uuid: str
source_id: str
nodes: list[
MappingNode | MappingLiteral | MappingURIRef
]
edges: list[MappingEdge]

def to_dict(self):
return {
"uuid": self.uuid,
"source_id": self.source_id,
"nodes": [
node.to_dict() for node in self.nodes
],
"edges": [
edge.to_dict() for edge in self.edges
],
}

@classmethod
def from_dict(cls, data):
if "uuid" not in data:
raise ValueError("uuid is required")
if "source_id" not in data:
raise ValueError("source_id is required")
if "nodes" not in data:
raise ValueError("nodes is required")
if "edges" not in data:
raise ValueError("edges is required")
return cls(
uuid=data["uuid"],
source_id=data["source_id"],
nodes=[
MappingNode.from_dict(node)
if node["type"] == MappingNodeType.ENTITY
else MappingLiteral.from_dict(node)
if node["type"] == MappingNodeType.LITERAL
else MappingURIRef.from_dict(node)
for node in data["nodes"]
],
edges=[
MappingEdge.from_dict(edge)
for edge in data["edges"]
],
)
80 changes: 80 additions & 0 deletions server/models/source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from dataclasses import dataclass
from enum import StrEnum


class SourceType(StrEnum):
"""
Enumeration of the types of sources.
"""

CSV = "csv"
JSON = "json"


@dataclass
class Source:
"""
Data class representing a source.
Attributes:
- type: SourceType
The type of the source.
- references: list[str]
The list of references that the source has. These references used for assisting user during the mapping process.
For example, if the source is a CSV file, the references can be the column names
- file_uuid: str
The UUID of the file. Depending on the type, this can point to a file or connection args to a database.
- extra: dict
Extra information that can be used for the source
"""

uuid: str
type: SourceType
references: list[str]
file_uuid: str
extra: dict

def to_dict(self) -> dict:
"""
Convert the object to a dictionary.
Returns:
dict: Dictionary representation of the object
"""
return {
"uuid": self.uuid,
"type": self.type,
"references": self.references,
"file_uuid": self.file_uuid,
"extra": self.extra if self.extra else {},
}

@staticmethod
def from_dict(data: dict) -> "Source":
"""
Create a Source object from a dictionary.
Args:
data (dict): Dictionary containing the data
Returns:
Source: Source object
"""

if "uuid" not in data:
raise ValueError("Missing 'uuid' in data")

if "type" not in data:
raise ValueError("Missing 'type' in data")
if "references" not in data:
raise ValueError("Missing 'references' in data")
if "file_uuid" not in data:
raise ValueError("Missing 'file_uuid' in data")

return Source(
uuid=data["uuid"],
type=SourceType(data["type"]),
references=data["references"],
file_uuid=data["file_uuid"],
extra=data.get("extra", {}),
)
Loading

0 comments on commit 08161ad

Please sign in to comment.