diff --git a/dataset_builders/pie/brat/README.md b/dataset_builders/pie/brat/README.md new file mode 100644 index 00000000..979f321f --- /dev/null +++ b/dataset_builders/pie/brat/README.md @@ -0,0 +1,80 @@ +# PIE Dataset Card for "conll2003" + +This is a [PyTorch-IE](https://github.com/ChristophAlt/pytorch-ie) wrapper for the +[BRAT Huggingface dataset loading script](https://huggingface.co/datasets/DFKI-SLT/brat). + +## Data Schema + +The document type for this dataset is `BratDocument` or `BratDocumentWithMergedSpans`, depending on if the +data was loaded with `merge_fragmented_spans=True` (default: `False`). They define the following data fields: + +- `text` (str) +- `id` (str, optional) +- `metadata` (dictionary, optional) + +and the following annotation layers: + +- `spans` (annotation type: `LabeledMultiSpan` in the case of `BratDocument` and `LabeledSpan` and in the case of `BratDocumentWithMergedSpans`, target: `text`) +- `relations` (annotation type: `BinaryRelation`, target: `spans`) +- `span_attributes` (annotation type: `Attribute`, target: `spans`) +- `relation_attributes` (annotation type: `Attribute`, target: `relations`) + +The `Attribute` annotation type is defined as follows: + +- `annotation` (type: `Annotation`): the annotation to which the attribute is attached +- `label` (type: `str`) +- `value` (type: `str`, optional) +- `score` (type: `float`, optional, not included in comparison) + +See [here](https://github.com/ChristophAlt/pytorch-ie/blob/main/src/pytorch_ie/annotations.py) for the remaining annotation type definitions. + +## Document Converters + +The dataset provides no predefined document converters because the BRAT format is very flexible and can be used +for many different tasks. You can add your own document converter by doing the following: + +```python +import dataclasses +from typing import Optional + +from pytorch_ie.core import AnnotationList, annotation_field +from pytorch_ie.documents import TextBasedDocument +from pytorch_ie.annotations import LabeledSpan + +from pie_datasets import DatasetDict + +# define your document class +@dataclasses.dataclass +class MyDocument(TextBasedDocument): + my_field: Optional[str] = None + my_span_annotations: AnnotationList[LabeledSpan] = annotation_field(target="text") + +# define your document converter +def my_converter(document: BratDocumentWithMergedSpans) -> MyDocument: + # create your document with the data from the original document. + # The fields "text", "id" and "metadata" are derived from the TextBasedDocument. + my_document = MyDocument(id=document.id, text=document.text, metadata=document.metadata, my_field="my_value") + + # create a new span annotation + new_span = LabeledSpan(label="my_label", start=2, end=10) + # add the new span annotation to your document + my_document.my_span_annotations.append(new_span) + + # add annotations from the document to your document + for span in document.spans: + # we need to copy the span because an annotation can only be attached to one document + my_document.my_span_annotations.append(span.copy()) + + return my_document + + +# load the dataset. We use the "merge_fragmented_spans" dataset variant here +# because it provides documents of type BratDocumentWithMergedSpans. +dataset = DatasetDict.load_dataset("pie/brat", name="merge_fragmented_spans", data_dir="path/to/brat/data") + +# attach your document converter to the dataset +dataset.register_document_converter(my_converter) + +# convert the dataset +converted_dataset = dataset.to_document_type(MyDocument) +``` diff --git a/dataset_builders/pie/brat/brat.py b/dataset_builders/pie/brat/brat.py new file mode 100644 index 00000000..752de772 --- /dev/null +++ b/dataset_builders/pie/brat/brat.py @@ -0,0 +1,305 @@ +import dataclasses +import logging +from collections import defaultdict +from typing import Any, Dict, List, Optional, Tuple, Union + +import datasets +from pytorch_ie.annotations import BinaryRelation, LabeledMultiSpan, LabeledSpan +from pytorch_ie.core import Annotation, AnnotationList, annotation_field +from pytorch_ie.documents import TextBasedDocument + +from pie_datasets import GeneratorBasedBuilder + +logger = logging.getLogger(__name__) + + +def dl2ld(dict_of_lists: Dict[str, List[Any]]) -> List[Dict[str, Any]]: + return [dict(zip(dict_of_lists, t)) for t in zip(*dict_of_lists.values())] + + +def ld2dl( + list_fo_dicts: List[Dict[str, Any]], keys: Optional[List[str]] = None +) -> Dict[str, List[Any]]: + keys = keys or list(list_fo_dicts[0]) + return {k: [dic[k] for dic in list_fo_dicts] for k in keys} + + +@dataclasses.dataclass(eq=True, frozen=True) +class Attribute(Annotation): + annotation: Annotation + label: str + value: Optional[str] = None + score: Optional[float] = dataclasses.field(default=None, compare=False) + + +@dataclasses.dataclass +class BratDocument(TextBasedDocument): + spans: AnnotationList[LabeledMultiSpan] = annotation_field(target="text") + relations: AnnotationList[BinaryRelation] = annotation_field(target="spans") + span_attributes: AnnotationList[Attribute] = annotation_field(target="spans") + relation_attributes: AnnotationList[Attribute] = annotation_field(target="relations") + + +@dataclasses.dataclass +class BratDocumentWithMergedSpans(TextBasedDocument): + spans: AnnotationList[LabeledSpan] = annotation_field(target="text") + relations: AnnotationList[BinaryRelation] = annotation_field(target="spans") + span_attributes: AnnotationList[Attribute] = annotation_field(target="spans") + relation_attributes: AnnotationList[Attribute] = annotation_field(target="relations") + + +def example_to_document( + example: Dict[str, Any], merge_fragmented_spans: bool = False +) -> BratDocument: + if merge_fragmented_spans: + doc = BratDocumentWithMergedSpans(text=example["context"], id=example["file_name"]) + else: + doc = BratDocument(text=example["context"], id=example["file_name"]) + + spans: Dict[str, LabeledSpan] = dict() + span_locations: List[Tuple[Tuple[int, int]]] = [] + span_texts: List[str] = [] + for span_dict in dl2ld(example["spans"]): + starts: List[int] = span_dict["locations"]["start"] + ends: List[int] = span_dict["locations"]["end"] + slices = tuple(zip(starts, ends)) + span_locations.append(slices) + span_texts.append(span_dict["text"]) + # sanity check + span_text_parts = [doc.text[start:end] for start, end in slices] + joined_span_texts_stripped = " ".join(span_text_parts).strip() + span_text_stripped = span_dict["text"].strip() + if joined_span_texts_stripped != span_text_stripped: + logger.warning( + f"joined span parts do not match stripped span text field content. " + f'joined_span_texts_stripped: "{joined_span_texts_stripped}" != stripped "text": "{span_text_stripped}"' + ) + if merge_fragmented_spans: + if len(starts) > 1: + # check if the text in between the fragments holds only space + merged_content_texts = [ + doc.text[start:end] for start, end in zip(ends[:-1], starts[1:]) + ] + merged_content_texts_not_empty = [ + text.strip() for text in merged_content_texts if text.strip() != "" + ] + if len(merged_content_texts_not_empty) > 0: + logger.warning( + f"document '{doc.id}' contains a non-contiguous span with text content in between " + f"(will be merged into a single span): " + f"newly covered text parts: {merged_content_texts_not_empty}, " + f"merged span text: '{doc.text[starts[0]:ends[-1]]}', " + f"annotation: {span_dict}" + ) + # just take everything + start = min(starts) + end = max(ends) + span = LabeledSpan(start=start, end=end, label=span_dict["type"]) + else: + span = LabeledMultiSpan(slices=slices, label=span_dict["type"]) + spans[span_dict["id"]] = span + + doc.spans.extend(spans.values()) + doc.metadata["span_ids"] = list(spans.keys()) + doc.metadata["span_locations"] = span_locations + doc.metadata["span_texts"] = span_texts + + relations: Dict[str, BinaryRelation] = dict() + for rel_dict in dl2ld(example["relations"]): + arguments = dict(zip(rel_dict["arguments"]["type"], rel_dict["arguments"]["target"])) + assert set(arguments) == {"Arg1", "Arg2"} + head = spans[arguments["Arg1"]] + tail = spans[arguments["Arg2"]] + rel = BinaryRelation(head=head, tail=tail, label=rel_dict["type"]) + relations[rel_dict["id"]] = rel + + doc.relations.extend(relations.values()) + doc.metadata["relation_ids"] = list(relations.keys()) + + equivalence_relations = dl2ld(example["equivalence_relations"]) + if len(equivalence_relations) > 0: + raise NotImplementedError("converting equivalence_relations is not yet implemented") + + events = dl2ld(example["events"]) + if len(events) > 0: + raise NotImplementedError("converting events is not yet implemented") + + attribute_annotations: Dict[str, Dict[str, Attribute]] = defaultdict(dict) + attribute_ids = [] + for attribute_dict in dl2ld(example["attributions"]): + target_id = attribute_dict["target"] + if target_id in spans: + target_layer_name = "spans" + annotation = spans[target_id] + elif target_id in relations: + target_layer_name = "relations" + annotation = relations[target_id] + else: + raise Exception("only span and relation attributes are supported yet") + attribute = Attribute( + annotation=annotation, + label=attribute_dict["type"], + value=attribute_dict["value"], + ) + attribute_annotations[target_layer_name][attribute_dict["id"]] = attribute + attribute_ids.append((target_layer_name, attribute_dict["id"])) + + doc.span_attributes.extend(attribute_annotations["spans"].values()) + doc.relation_attributes.extend(attribute_annotations["relations"].values()) + doc.metadata["attribute_ids"] = attribute_ids + + normalizations = dl2ld(example["normalizations"]) + if len(normalizations) > 0: + raise NotImplementedError("converting normalizations is not yet implemented") + + notes = dl2ld(example["notes"]) + if len(notes) > 0: + raise NotImplementedError("converting notes is not yet implemented") + + return doc + + +def document_to_example( + document: Union[BratDocument, BratDocumentWithMergedSpans] +) -> Dict[str, Any]: + example = { + "context": document.text, + "file_name": document.id, + } + span_dicts: Dict[Union[LabeledSpan, LabeledMultiSpan], Dict[str, Any]] = dict() + assert len(document.metadata["span_locations"]) == len(document.spans) + assert len(document.metadata["span_texts"]) == len(document.spans) + assert len(document.metadata["span_ids"]) == len(document.spans) + for i, span in enumerate(document.spans): + locations = tuple((start, end) for start, end in document.metadata["span_locations"][i]) + if isinstance(span, LabeledSpan): + assert locations[0][0] == span.start + assert locations[-1][1] == span.end + elif isinstance(span, LabeledMultiSpan): + assert span.slices == locations + else: + raise TypeError(f"span has unknown type [{type(span)}]: {span}") + + starts, ends = zip(*locations) + span_dict = { + "id": document.metadata["span_ids"][i], + "locations": { + "start": list(starts), + "end": list(ends), + }, + "text": document.metadata["span_texts"][i], + "type": span.label, + } + if span in span_dicts: + prev_ann_dict = span_dicts[span] + ann_dict = span_dict + logger.warning( + f"document {document.id}: annotation exists twice: {prev_ann_dict['id']} and {ann_dict['id']} " + f"are identical" + ) + span_dicts[span] = span_dict + example["spans"] = ld2dl(list(span_dicts.values()), keys=["id", "type", "locations", "text"]) + + relation_dicts: Dict[BinaryRelation, Dict[str, Any]] = dict() + assert len(document.metadata["relation_ids"]) == len(document.relations) + for i, rel in enumerate(document.relations): + arg1_id = span_dicts[rel.head]["id"] + arg2_id = span_dicts[rel.tail]["id"] + relation_dict = { + "id": document.metadata["relation_ids"][i], + "type": rel.label, + "arguments": { + "type": ["Arg1", "Arg2"], + "target": [arg1_id, arg2_id], + }, + } + if rel in relation_dicts: + prev_ann_dict = relation_dicts[rel] + ann_dict = relation_dict + logger.warning( + f"document {document.id}: annotation exists twice: {prev_ann_dict['id']} and {ann_dict['id']} " + f"are identical" + ) + relation_dicts[rel] = relation_dict + + example["relations"] = ld2dl(list(relation_dicts.values()), keys=["id", "type", "arguments"]) + + example["equivalence_relations"] = ld2dl([], keys=["type", "targets"]) + example["events"] = ld2dl([], keys=["id", "type", "trigger", "arguments"]) + + annotation_dicts = { + "spans": span_dicts, + "relations": relation_dicts, + } + all_attribute_annotations = { + "spans": document.span_attributes, + "relations": document.relation_attributes, + } + attribute_dicts: Dict[Annotation, Dict[str, Any]] = dict() + attribute_ids_per_target = defaultdict(list) + for target_layer, attribute_id in document.metadata["attribute_ids"]: + attribute_ids_per_target[target_layer].append(attribute_id) + + for target_layer, attribute_ids in attribute_ids_per_target.items(): + attribute_annotations = all_attribute_annotations[target_layer] + assert len(attribute_ids) == len(attribute_annotations) + for i, attribute_annotation in enumerate(attribute_annotations): + target_id = annotation_dicts[target_layer][attribute_annotation.annotation]["id"] + attribute_dict = { + "id": attribute_ids_per_target[target_layer][i], + "type": attribute_annotation.label, + "target": target_id, + "value": attribute_annotation.value, + } + if attribute_annotation in attribute_dicts: + prev_ann_dict = attribute_dicts[attribute_annotation] + ann_dict = attribute_annotation + logger.warning( + f"document {document.id}: annotation exists twice: {prev_ann_dict['id']} and {ann_dict['id']} " + f"are identical" + ) + attribute_dicts[attribute_annotation] = attribute_dict + + example["attributions"] = ld2dl( + list(attribute_dicts.values()), keys=["id", "type", "target", "value"] + ) + example["normalizations"] = ld2dl( + [], keys=["id", "type", "target", "resource_id", "entity_id"] + ) + example["notes"] = ld2dl([], keys=["id", "type", "target", "note"]) + + return example + + +class BratConfig(datasets.BuilderConfig): + """BuilderConfig for BratDatasetLoader.""" + + def __init__(self, merge_fragmented_spans: bool = False, **kwargs): + """BuilderConfig for DocRED. + + Args: + **kwargs: keyword arguments forwarded to super. + """ + super().__init__(**kwargs) + self.merge_fragmented_spans = merge_fragmented_spans + + +class BratDatasetLoader(GeneratorBasedBuilder): + # this requires https://github.com/ChristophAlt/pytorch-ie/pull/288 + DOCUMENT_TYPES = { + "default": BratDocument, + "merge_fragmented_spans": BratDocumentWithMergedSpans, + } + + DEFAULT_CONFIG_NAME = "default" + BUILDER_CONFIGS = [ + BratConfig(name="default"), + BratConfig(name="merge_fragmented_spans", merge_fragmented_spans=True), + ] + + BASE_DATASET_PATH = "DFKI-SLT/brat" + + def _generate_document(self, example, **kwargs): + return example_to_document( + example, merge_fragmented_spans=self.config.merge_fragmented_spans + ) diff --git a/tests/dataset_builders/common.py b/tests/dataset_builders/common.py index 57291fd2..70af75a7 100644 --- a/tests/dataset_builders/common.py +++ b/tests/dataset_builders/common.py @@ -11,6 +11,7 @@ HF_BASE_PATH = DATASET_BUILDER_BASE_PATH / "hf" PIE_BASE_PATH = DATASET_BUILDER_BASE_PATH / "pie" HF_DS_FIXTURE_DATA_PATH = FIXTURES_ROOT / "dataset_builders" / "hf" +PIE_DS_FIXTURE_DATA_PATH = FIXTURES_ROOT / "dataset_builders" / "pie" logger = logging.getLogger(__name__) diff --git a/tests/dataset_builders/pie/test_brat.py b/tests/dataset_builders/pie/test_brat.py new file mode 100644 index 00000000..8b036592 --- /dev/null +++ b/tests/dataset_builders/pie/test_brat.py @@ -0,0 +1,227 @@ +from typing import Any, Union + +import datasets +import pytest +from pytorch_ie.annotations import BinaryRelation, LabeledMultiSpan, LabeledSpan +from pytorch_ie.core import Annotation +from pytorch_ie.documents import TextBasedDocument + +from dataset_builders.pie.brat.brat import ( + BratDatasetLoader, + BratDocument, + BratDocumentWithMergedSpans, + document_to_example, + example_to_document, +) +from tests.dataset_builders.common import PIE_BASE_PATH, PIE_DS_FIXTURE_DATA_PATH + +datasets.disable_caching() + +DATASET_NAME = "brat" +PIE_DATASET_PATH = PIE_BASE_PATH / DATASET_NAME +HF_DATASET_PATH = BratDatasetLoader.BASE_DATASET_PATH +FIXTURE_DATA_PATH = PIE_DS_FIXTURE_DATA_PATH / DATASET_NAME +SPLIT_SIZES = {"train": 2} + + +def resolve_annotation(annotation: Annotation) -> Any: + if annotation.target is None: + return None + if isinstance(annotation, LabeledMultiSpan): + return ( + [annotation.target[start:end] for start, end in annotation.slices], + annotation.label, + ) + elif isinstance(annotation, LabeledSpan): + return (annotation.target[annotation.start : annotation.end], annotation.label) + elif isinstance(annotation, BinaryRelation): + return ( + resolve_annotation(annotation.head), + annotation.label, + resolve_annotation(annotation.tail), + ) + elif isinstance(annotation, Annotation) and str(type(annotation)).endswith("brat.Attribute'>"): + result = (resolve_annotation(annotation.annotation), annotation.label) + if annotation.value is not None: + return result + (annotation.value,) + else: + return result + else: + raise TypeError(f"Unknown annotation type: {type(annotation)}") + + +@pytest.fixture(scope="module") +def hf_dataset(): + return datasets.load_dataset(str(HF_DATASET_PATH), data_dir=str(FIXTURE_DATA_PATH)) + + +def test_hf_dataset(hf_dataset): + assert set(hf_dataset) == set(SPLIT_SIZES) + split_sizes = {split_name: len(ds) for split_name, ds in hf_dataset.items()} + assert split_sizes == SPLIT_SIZES + + +@pytest.fixture(params=range(SPLIT_SIZES["train"])) +def sample_idx(request): + return request.param + + +@pytest.fixture() +def hf_example(hf_dataset, sample_idx): + return hf_dataset["train"][sample_idx] + + +def test_hf_example(hf_example, sample_idx): + if sample_idx == 0: + assert hf_example == { + "context": "Jane lives in Berlin.\n", + "file_name": "1", + "spans": { + "id": ["T1", "T2"], + "type": ["person", "city"], + "locations": [{"start": [0], "end": [4]}, {"start": [14], "end": [20]}], + "text": ["Jane", "Berlin"], + }, + "relations": {"id": [], "type": [], "arguments": []}, + "equivalence_relations": {"type": [], "targets": []}, + "events": {"id": [], "type": [], "trigger": [], "arguments": []}, + "attributions": {"id": [], "type": [], "target": [], "value": []}, + "normalizations": { + "id": [], + "type": [], + "target": [], + "resource_id": [], + "entity_id": [], + }, + "notes": {"id": [], "type": [], "target": [], "note": []}, + } + elif sample_idx == 1: + assert hf_example == { + "context": "Seattle is a rainy city. Jenny Durkan is the city's mayor.\n", + "file_name": "2", + "spans": { + "id": ["T1", "T2"], + "type": ["city", "person"], + "locations": [{"start": [0], "end": [7]}, {"start": [25], "end": [37]}], + "text": ["Seattle", "Jenny Durkan"], + }, + "relations": { + "id": ["R1"], + "type": ["mayor_of"], + "arguments": [{"type": ["Arg1", "Arg2"], "target": ["T2", "T1"]}], + }, + "equivalence_relations": {"type": [], "targets": []}, + "events": {"id": [], "type": [], "trigger": [], "arguments": []}, + "attributions": { + "id": ["A1", "A2"], + "type": ["factuality", "statement"], + "target": ["T1", "R1"], + "value": ["actual", "true"], + }, + "normalizations": { + "id": [], + "type": [], + "target": [], + "resource_id": [], + "entity_id": [], + }, + "notes": {"id": [], "type": [], "target": [], "note": []}, + } + else: + raise ValueError(f"Unknown sample index: {sample_idx}") + + +@pytest.fixture( + params=[config.name for config in BratDatasetLoader.BUILDER_CONFIGS], # scope="module" +) +def pie_dataset_variant(request): + return request.param + + +@pytest.fixture() +def generated_document( + hf_example, hf_dataset, pie_dataset_variant +) -> Union[BratDocument, BratDocumentWithMergedSpans]: + builder = BratDatasetLoader(name=pie_dataset_variant) + kwargs = builder._generate_document_kwargs(hf_dataset["train"]) or {} + document = builder._generate_document(example=hf_example, **kwargs) + assert document is not None + return document + + +def test_generate_document(generated_document, pie_dataset_variant, sample_idx): + assert generated_document is not None + resolved_spans = [resolve_annotation(annotation=span) for span in generated_document.spans] + resolved_relations = [ + resolve_annotation(relation) for relation in generated_document.relations + ] + if sample_idx == 0: + assert len(generated_document.spans) == 2 + assert len(generated_document.relations) == 0 + assert len(generated_document.span_attributes) == 0 + assert len(generated_document.relation_attributes) == 0 + + if pie_dataset_variant == "default": + assert resolved_spans[0] == (["Jane"], "person") + assert resolved_spans[1] == (["Berlin"], "city") + elif pie_dataset_variant == "merge_fragmented_spans": + assert resolved_spans[0] == ("Jane", "person") + assert resolved_spans[1] == ("Berlin", "city") + else: + raise ValueError(f"Unknown dataset variant: {pie_dataset_variant}") + + elif sample_idx == 1: + assert len(generated_document.spans) == 2 + assert len(generated_document.relations) == 1 + assert len(generated_document.span_attributes) == 1 + assert len(generated_document.relation_attributes) == 1 + + resolved_span_attributes = [ + resolve_annotation(attribute) for attribute in generated_document.span_attributes + ] + resolved_relation_attributes = [ + resolve_annotation(attribute) for attribute in generated_document.relation_attributes + ] + + if pie_dataset_variant == "default": + assert resolved_spans[0] == (["Seattle"], "city") + assert resolved_spans[1] == (["Jenny Durkan"], "person") + assert resolved_relations[0] == ( + (["Jenny Durkan"], "person"), + "mayor_of", + (["Seattle"], "city"), + ) + assert resolved_span_attributes[0] == ((["Seattle"], "city"), "factuality", "actual") + assert resolved_relation_attributes[0] == ( + ((["Jenny Durkan"], "person"), "mayor_of", (["Seattle"], "city")), + "statement", + "true", + ) + elif pie_dataset_variant == "merge_fragmented_spans": + assert resolved_spans[0] == ("Seattle", "city") + assert resolved_spans[1] == ("Jenny Durkan", "person") + assert resolved_relations[0] == ( + ("Jenny Durkan", "person"), + "mayor_of", + ("Seattle", "city"), + ) + assert resolved_span_attributes[0] == (("Seattle", "city"), "factuality", "actual") + assert resolved_relation_attributes[0] == ( + (("Jenny Durkan", "person"), "mayor_of", ("Seattle", "city")), + "statement", + "true", + ) + else: + raise ValueError(f"Unknown dataset variant: {pie_dataset_variant}") + else: + raise ValueError(f"Unknown sample index: {sample_idx}") + + +@pytest.mark.parametrize("merge_fragmented_spans", [True, False]) +def test_example_to_document_and_back_all(hf_dataset, merge_fragmented_spans): + for split_name, split in hf_dataset.items(): + for hf_example in split: + doc = example_to_document(hf_example, merge_fragmented_spans=merge_fragmented_spans) + assert isinstance(doc, TextBasedDocument) + hf_example_back = document_to_example(doc) + assert hf_example == hf_example_back diff --git a/tests/fixtures/dataset_builders/pie/brat/train/1.ann b/tests/fixtures/dataset_builders/pie/brat/train/1.ann new file mode 100644 index 00000000..2586acc6 --- /dev/null +++ b/tests/fixtures/dataset_builders/pie/brat/train/1.ann @@ -0,0 +1,2 @@ +T1 person 0 4 Jane +T2 city 14 20 Berlin diff --git a/tests/fixtures/dataset_builders/pie/brat/train/1.txt b/tests/fixtures/dataset_builders/pie/brat/train/1.txt new file mode 100644 index 00000000..a83d9d48 --- /dev/null +++ b/tests/fixtures/dataset_builders/pie/brat/train/1.txt @@ -0,0 +1 @@ +Jane lives in Berlin. diff --git a/tests/fixtures/dataset_builders/pie/brat/train/2.ann b/tests/fixtures/dataset_builders/pie/brat/train/2.ann new file mode 100644 index 00000000..d46cf3d6 --- /dev/null +++ b/tests/fixtures/dataset_builders/pie/brat/train/2.ann @@ -0,0 +1,5 @@ +T1 city 0 7 Seattle +T2 person 25 37 Jenny Durkan +R1 mayor_of Arg1:T2 Arg2:T1 +A1 factuality T1 actual +A2 statement R1 diff --git a/tests/fixtures/dataset_builders/pie/brat/train/2.txt b/tests/fixtures/dataset_builders/pie/brat/train/2.txt new file mode 100644 index 00000000..02859e37 --- /dev/null +++ b/tests/fixtures/dataset_builders/pie/brat/train/2.txt @@ -0,0 +1 @@ +Seattle is a rainy city. Jenny Durkan is the city's mayor.