From 993e55cea70590fc2f1df5e402c3ec61d46a29dd Mon Sep 17 00:00:00 2001 From: Matt Bernstein Date: Fri, 22 Nov 2024 18:05:44 -0500 Subject: [PATCH 1/8] add JSF for generating schema examples --- poetry.lock | 152 ++++++++++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 1 + 2 files changed, 151 insertions(+), 2 deletions(-) diff --git a/poetry.lock b/poetry.lock index f5ba8ee..7920c4f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -297,8 +297,8 @@ jinja2 = ">=2.10.1,<4.0" packaging = "*" pydantic = [ {version = ">=1.10.0,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.12\" and python_version < \"4.0\""}, - {version = ">=1.10.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, {version = ">=1.5.1,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version < \"3.10\""}, + {version = ">=1.10.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, {version = ">=1.9.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, ] pyyaml = ">=6.0.1" @@ -359,6 +359,21 @@ files = [ [package.extras] test = ["pytest (>=6)"] +[[package]] +name = "faker" +version = "33.0.0" +description = "Faker is a Python package that generates fake data for you." +optional = false +python-versions = ">=3.8" +files = [ + {file = "Faker-33.0.0-py3-none-any.whl", hash = "sha256:68e5580cb6b4226710886e595eabc13127149d6e71e9d1db65506a7fbe2c7fce"}, + {file = "faker-33.0.0.tar.gz", hash = "sha256:9b01019c1ddaf2253ca2308c0472116e993f4ad8fc9905f82fa965e0c6f932e9"}, +] + +[package.dependencies] +python-dateutil = ">=2.4" +typing-extensions = "*" + [[package]] name = "genson" version = "1.3.0" @@ -634,6 +649,28 @@ files = [ {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"}, ] +[[package]] +name = "jsf" +version = "0.11.2" +description = "Creates fake JSON files from a JSON schema" +optional = false +python-versions = ">=3.8" +files = [ + {file = "jsf-0.11.2-py3-none-any.whl", hash = "sha256:b4472c8c2d776eb3e0bb08368caa6ae0ead7ea78b20653facc07b6d93768612c"}, + {file = "jsf-0.11.2.tar.gz", hash = "sha256:07055b363281d38ce871a9256a00587d8472802c5108721a7fe5884465104b5d"}, +] + +[package.dependencies] +faker = ">=15.3.4" +jsonschema = ">=4.17.3" +pydantic = ">=2.0.0" +rstr = ">=3.2.0" +smart-open = {version = ">=6.3.0", extras = ["http"]} +typing-extensions = ">=4.9.0" + +[package.extras] +cli = ["typer (>=0.7.0)"] + [[package]] name = "jsonschema" version = "4.23.0" @@ -1762,6 +1799,17 @@ files = [ {file = "rpds_py-0.20.1.tar.gz", hash = "sha256:e1791c4aabd117653530dccd24108fa03cc6baf21f58b950d0a73c3b3b29a350"}, ] +[[package]] +name = "rstr" +version = "3.2.2" +description = "Generate random strings in Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "rstr-3.2.2-py3-none-any.whl", hash = "sha256:f39195d38da1748331eeec52f1276e71eb6295e7949beea91a5e9af2340d7b3b"}, + {file = "rstr-3.2.2.tar.gz", hash = "sha256:c4a564d4dfb4472d931d145c43d1cf1ad78c24592142e7755b8866179eeac012"}, +] + [[package]] name = "six" version = "1.16.0" @@ -1773,6 +1821,32 @@ files = [ {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] +[[package]] +name = "smart-open" +version = "7.0.5" +description = "Utils for streaming large files (S3, HDFS, GCS, Azure Blob Storage, gzip, bz2...)" +optional = false +python-versions = "<4.0,>=3.7" +files = [ + {file = "smart_open-7.0.5-py3-none-any.whl", hash = "sha256:8523ed805c12dff3eaa50e9c903a6cb0ae78800626631c5fe7ea073439847b89"}, + {file = "smart_open-7.0.5.tar.gz", hash = "sha256:d3672003b1dbc85e2013e4983b88eb9a5ccfd389b0d4e5015f39a9ee5620ec18"}, +] + +[package.dependencies] +requests = {version = "*", optional = true, markers = "extra == \"http\""} +wrapt = "*" + +[package.extras] +all = ["azure-common", "azure-core", "azure-storage-blob", "boto3", "google-cloud-storage (>=2.6.0)", "paramiko", "requests", "zstandard"] +azure = ["azure-common", "azure-core", "azure-storage-blob"] +gcs = ["google-cloud-storage (>=2.6.0)"] +http = ["requests"] +s3 = ["boto3"] +ssh = ["paramiko"] +test = ["awscli", "azure-common", "azure-core", "azure-storage-blob", "boto3", "google-cloud-storage (>=2.6.0)", "moto[server]", "numpy", "paramiko", "pyopenssl", "pytest", "pytest-benchmark", "pytest-rerunfailures", "requests", "responses", "zstandard"] +webhdfs = ["requests"] +zst = ["zstandard"] + [[package]] name = "sniffio" version = "1.3.1" @@ -1964,6 +2038,80 @@ h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] +[[package]] +name = "wrapt" +version = "1.17.0" +description = "Module for decorators, wrappers and monkey patching." +optional = false +python-versions = ">=3.8" +files = [ + {file = "wrapt-1.17.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2a0c23b8319848426f305f9cb0c98a6e32ee68a36264f45948ccf8e7d2b941f8"}, + {file = "wrapt-1.17.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1ca5f060e205f72bec57faae5bd817a1560fcfc4af03f414b08fa29106b7e2d"}, + {file = "wrapt-1.17.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e185ec6060e301a7e5f8461c86fb3640a7beb1a0f0208ffde7a65ec4074931df"}, + {file = "wrapt-1.17.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb90765dd91aed05b53cd7a87bd7f5c188fcd95960914bae0d32c5e7f899719d"}, + {file = "wrapt-1.17.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:879591c2b5ab0a7184258274c42a126b74a2c3d5a329df16d69f9cee07bba6ea"}, + {file = "wrapt-1.17.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:fce6fee67c318fdfb7f285c29a82d84782ae2579c0e1b385b7f36c6e8074fffb"}, + {file = "wrapt-1.17.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:0698d3a86f68abc894d537887b9bbf84d29bcfbc759e23f4644be27acf6da301"}, + {file = "wrapt-1.17.0-cp310-cp310-win32.whl", hash = "sha256:69d093792dc34a9c4c8a70e4973a3361c7a7578e9cd86961b2bbf38ca71e4e22"}, + {file = "wrapt-1.17.0-cp310-cp310-win_amd64.whl", hash = "sha256:f28b29dc158ca5d6ac396c8e0a2ef45c4e97bb7e65522bfc04c989e6fe814575"}, + {file = "wrapt-1.17.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:74bf625b1b4caaa7bad51d9003f8b07a468a704e0644a700e936c357c17dd45a"}, + {file = "wrapt-1.17.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f2a28eb35cf99d5f5bd12f5dd44a0f41d206db226535b37b0c60e9da162c3ed"}, + {file = "wrapt-1.17.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:81b1289e99cf4bad07c23393ab447e5e96db0ab50974a280f7954b071d41b489"}, + {file = "wrapt-1.17.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f2939cd4a2a52ca32bc0b359015718472d7f6de870760342e7ba295be9ebaf9"}, + {file = "wrapt-1.17.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6a9653131bda68a1f029c52157fd81e11f07d485df55410401f745007bd6d339"}, + {file = "wrapt-1.17.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4e4b4385363de9052dac1a67bfb535c376f3d19c238b5f36bddc95efae15e12d"}, + {file = "wrapt-1.17.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bdf62d25234290db1837875d4dceb2151e4ea7f9fff2ed41c0fde23ed542eb5b"}, + {file = "wrapt-1.17.0-cp311-cp311-win32.whl", hash = "sha256:5d8fd17635b262448ab8f99230fe4dac991af1dabdbb92f7a70a6afac8a7e346"}, + {file = "wrapt-1.17.0-cp311-cp311-win_amd64.whl", hash = "sha256:92a3d214d5e53cb1db8b015f30d544bc9d3f7179a05feb8f16df713cecc2620a"}, + {file = "wrapt-1.17.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:89fc28495896097622c3fc238915c79365dd0ede02f9a82ce436b13bd0ab7569"}, + {file = "wrapt-1.17.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:875d240fdbdbe9e11f9831901fb8719da0bd4e6131f83aa9f69b96d18fae7504"}, + {file = "wrapt-1.17.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5ed16d95fd142e9c72b6c10b06514ad30e846a0d0917ab406186541fe68b451"}, + {file = "wrapt-1.17.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18b956061b8db634120b58f668592a772e87e2e78bc1f6a906cfcaa0cc7991c1"}, + {file = "wrapt-1.17.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:daba396199399ccabafbfc509037ac635a6bc18510ad1add8fd16d4739cdd106"}, + {file = "wrapt-1.17.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4d63f4d446e10ad19ed01188d6c1e1bb134cde8c18b0aa2acfd973d41fcc5ada"}, + {file = "wrapt-1.17.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8a5e7cc39a45fc430af1aefc4d77ee6bad72c5bcdb1322cfde852c15192b8bd4"}, + {file = "wrapt-1.17.0-cp312-cp312-win32.whl", hash = "sha256:0a0a1a1ec28b641f2a3a2c35cbe86c00051c04fffcfcc577ffcdd707df3f8635"}, + {file = "wrapt-1.17.0-cp312-cp312-win_amd64.whl", hash = "sha256:3c34f6896a01b84bab196f7119770fd8466c8ae3dfa73c59c0bb281e7b588ce7"}, + {file = "wrapt-1.17.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:714c12485aa52efbc0fc0ade1e9ab3a70343db82627f90f2ecbc898fdf0bb181"}, + {file = "wrapt-1.17.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da427d311782324a376cacb47c1a4adc43f99fd9d996ffc1b3e8529c4074d393"}, + {file = "wrapt-1.17.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba1739fb38441a27a676f4de4123d3e858e494fac05868b7a281c0a383c098f4"}, + {file = "wrapt-1.17.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e711fc1acc7468463bc084d1b68561e40d1eaa135d8c509a65dd534403d83d7b"}, + {file = "wrapt-1.17.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:140ea00c87fafc42739bd74a94a5a9003f8e72c27c47cd4f61d8e05e6dec8721"}, + {file = "wrapt-1.17.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:73a96fd11d2b2e77d623a7f26e004cc31f131a365add1ce1ce9a19e55a1eef90"}, + {file = "wrapt-1.17.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0b48554952f0f387984da81ccfa73b62e52817a4386d070c75e4db7d43a28c4a"}, + {file = "wrapt-1.17.0-cp313-cp313-win32.whl", hash = "sha256:498fec8da10e3e62edd1e7368f4b24aa362ac0ad931e678332d1b209aec93045"}, + {file = "wrapt-1.17.0-cp313-cp313-win_amd64.whl", hash = "sha256:fd136bb85f4568fffca995bd3c8d52080b1e5b225dbf1c2b17b66b4c5fa02838"}, + {file = "wrapt-1.17.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:17fcf043d0b4724858f25b8826c36e08f9fb2e475410bece0ec44a22d533da9b"}, + {file = "wrapt-1.17.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4a557d97f12813dc5e18dad9fa765ae44ddd56a672bb5de4825527c847d6379"}, + {file = "wrapt-1.17.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0229b247b0fc7dee0d36176cbb79dbaf2a9eb7ecc50ec3121f40ef443155fb1d"}, + {file = "wrapt-1.17.0-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8425cfce27b8b20c9b89d77fb50e368d8306a90bf2b6eef2cdf5cd5083adf83f"}, + {file = "wrapt-1.17.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9c900108df470060174108012de06d45f514aa4ec21a191e7ab42988ff42a86c"}, + {file = "wrapt-1.17.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:4e547b447073fc0dbfcbff15154c1be8823d10dab4ad401bdb1575e3fdedff1b"}, + {file = "wrapt-1.17.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:914f66f3b6fc7b915d46c1cc424bc2441841083de01b90f9e81109c9759e43ab"}, + {file = "wrapt-1.17.0-cp313-cp313t-win32.whl", hash = "sha256:a4192b45dff127c7d69b3bdfb4d3e47b64179a0b9900b6351859f3001397dabf"}, + {file = "wrapt-1.17.0-cp313-cp313t-win_amd64.whl", hash = "sha256:4f643df3d4419ea3f856c5c3f40fec1d65ea2e89ec812c83f7767c8730f9827a"}, + {file = "wrapt-1.17.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:69c40d4655e078ede067a7095544bcec5a963566e17503e75a3a3e0fe2803b13"}, + {file = "wrapt-1.17.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f495b6754358979379f84534f8dd7a43ff8cff2558dcdea4a148a6e713a758f"}, + {file = "wrapt-1.17.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:baa7ef4e0886a6f482e00d1d5bcd37c201b383f1d314643dfb0367169f94f04c"}, + {file = "wrapt-1.17.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8fc931382e56627ec4acb01e09ce66e5c03c384ca52606111cee50d931a342d"}, + {file = "wrapt-1.17.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:8f8909cdb9f1b237786c09a810e24ee5e15ef17019f7cecb207ce205b9b5fcce"}, + {file = "wrapt-1.17.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:ad47b095f0bdc5585bced35bd088cbfe4177236c7df9984b3cc46b391cc60627"}, + {file = "wrapt-1.17.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:948a9bd0fb2c5120457b07e59c8d7210cbc8703243225dbd78f4dfc13c8d2d1f"}, + {file = "wrapt-1.17.0-cp38-cp38-win32.whl", hash = "sha256:5ae271862b2142f4bc687bdbfcc942e2473a89999a54231aa1c2c676e28f29ea"}, + {file = "wrapt-1.17.0-cp38-cp38-win_amd64.whl", hash = "sha256:f335579a1b485c834849e9075191c9898e0731af45705c2ebf70e0cd5d58beed"}, + {file = "wrapt-1.17.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d751300b94e35b6016d4b1e7d0e7bbc3b5e1751e2405ef908316c2a9024008a1"}, + {file = "wrapt-1.17.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7264cbb4a18dc4acfd73b63e4bcfec9c9802614572025bdd44d0721983fc1d9c"}, + {file = "wrapt-1.17.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:33539c6f5b96cf0b1105a0ff4cf5db9332e773bb521cc804a90e58dc49b10578"}, + {file = "wrapt-1.17.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c30970bdee1cad6a8da2044febd824ef6dc4cc0b19e39af3085c763fdec7de33"}, + {file = "wrapt-1.17.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:bc7f729a72b16ee21795a943f85c6244971724819819a41ddbaeb691b2dd85ad"}, + {file = "wrapt-1.17.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:6ff02a91c4fc9b6a94e1c9c20f62ea06a7e375f42fe57587f004d1078ac86ca9"}, + {file = "wrapt-1.17.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:2dfb7cff84e72e7bf975b06b4989477873dcf160b2fd89959c629535df53d4e0"}, + {file = "wrapt-1.17.0-cp39-cp39-win32.whl", hash = "sha256:2399408ac33ffd5b200480ee858baa58d77dd30e0dd0cab6a8a9547135f30a88"}, + {file = "wrapt-1.17.0-cp39-cp39-win_amd64.whl", hash = "sha256:4f763a29ee6a20c529496a20a7bcb16a73de27f5da6a843249c7047daf135977"}, + {file = "wrapt-1.17.0-py3-none-any.whl", hash = "sha256:d2c63b93548eda58abf5188e505ffed0229bf675f7c3090f8e36ad55b8cbc371"}, + {file = "wrapt-1.17.0.tar.gz", hash = "sha256:16187aa2317c731170a88ef35e8937ae0f533c402872c1ee5e6d079fcf320801"}, +] + [[package]] name = "xmljson" version = "0.2.1" @@ -1997,4 +2145,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "2c1704ebc55c06304a71cb09459f7ace94b9cdadcad51b9a0a078f20c502e2e3" +content-hash = "0d843e66716b5a3291cfed5b6a15593f76b555d6ac304491b320d1d17de6dada" diff --git a/pyproject.toml b/pyproject.toml index d543b8a..3d2cd76 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,7 @@ typing_extensions = ">= 4.0.0" ujson = ">=5.8.0" xmljson = "0.2.1" +jsf = "^0.11.2" [tool.poetry.dev-dependencies] mypy = "1.0.1" pytest = "^7.4.0" From b24eecc08e402e55dad800ebf7544a98a7f0ef90 Mon Sep 17 00:00:00 2001 From: Matt Bernstein Date: Fri, 22 Nov 2024 18:06:49 -0500 Subject: [PATCH 2/8] hack to validate single choices --- .../label_interface/control_tags.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/label_studio_sdk/label_interface/control_tags.py b/src/label_studio_sdk/label_interface/control_tags.py index 45e0213..a738cfd 100644 --- a/src/label_studio_sdk/label_interface/control_tags.py +++ b/src/label_studio_sdk/label_interface/control_tags.py @@ -505,6 +505,13 @@ class SpanSelectionOffsets(SpanSelection): class ChoicesValue(BaseModel): choices: List[str] + # I don't know how Choices predictions with choice != 'multiple' was working without this... + @validator("choices", pre=True, always=True) + def coerce_to_list(cls, value: Union[str, List[str]]): + if isinstance(value, str): + return [value] + return value + class ChoicesTag(ControlTag): """ """ @@ -540,6 +547,15 @@ def to_json_schema(self): "description": f"Choices for {self.to_name[0]}" } + def _validate_labels(self, labels): + if super()._validate_labels(labels): + return True + + # HACK to continue to support single-item output in json schema + if not self.is_multiple_choice and isinstance(labels, str): + return super()._validate_labels([labels]) + + class LabelsValue(SpanSelection): labels: List[str] From baedd323c7250bb7010231218490dc188ef85701 Mon Sep 17 00:00:00 2001 From: Matt Bernstein Date: Fri, 22 Nov 2024 18:07:21 -0500 Subject: [PATCH 3/8] fix edge case --- src/label_studio_sdk/label_interface/interface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/label_studio_sdk/label_interface/interface.py b/src/label_studio_sdk/label_interface/interface.py index a37d393..4e7118e 100644 --- a/src/label_studio_sdk/label_interface/interface.py +++ b/src/label_studio_sdk/label_interface/interface.py @@ -770,7 +770,7 @@ def validate_region(self, region) -> bool: return False # type of the region should match the tag name - if control.tag.lower() != region["type"]: + if control.tag.lower() != region["type"].lower(): return False # make sure that in config it connects to the same tag as From 8ea2c3d97732b10a581dd4d353190a59bc2156eb Mon Sep 17 00:00:00 2001 From: Matt Bernstein Date: Fri, 22 Nov 2024 18:08:09 -0500 Subject: [PATCH 4/8] generate sample predictions using JSF --- .../label_interface/interface.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/label_studio_sdk/label_interface/interface.py b/src/label_studio_sdk/label_interface/interface.py index 4e7118e..3ac81a5 100644 --- a/src/label_studio_sdk/label_interface/interface.py +++ b/src/label_studio_sdk/label_interface/interface.py @@ -15,6 +15,7 @@ from collections import defaultdict, OrderedDict from lxml import etree import xmljson +from jsf import JSF from label_studio_sdk._legacy.exceptions import ( LSConfigParseException, @@ -839,9 +840,23 @@ def generate_sample_task(self, mode="upload", secure_mode=False): return task - def generate_sample_annotation(self): + def generate_sample_prediction(self): """ """ - raise NotImplemented() + prediction = PredictionValue( + model_version='sample model version', + result=[ + { + 'from_name': control.name, + 'to_name': control.to_name[0], + 'type': control.tag, + # TODO: put special case for choices in generation instead of validation + 'value': {control._label_attr_name: JSF(control.to_json_schema()).generate()} + } for control in self.controls + ] + ) + prediction_dct = prediction.model_dump() + assert self.validate_prediction(prediction_dct), 'could not generate a sample prediction' + return prediction_dct ##### ##### COMPATIBILITY LAYER From b155e8afbf29f0e587703bf03e0a2fe10c11dfb1 Mon Sep 17 00:00:00 2001 From: Matt Bernstein Date: Tue, 26 Nov 2024 00:14:21 -0500 Subject: [PATCH 5/8] use control.label() instead of lower level fns --- .../label_interface/control_tags.py | 16 ----- .../label_interface/interface.py | 63 +++++++++++++++---- 2 files changed, 52 insertions(+), 27 deletions(-) diff --git a/src/label_studio_sdk/label_interface/control_tags.py b/src/label_studio_sdk/label_interface/control_tags.py index a738cfd..45e0213 100644 --- a/src/label_studio_sdk/label_interface/control_tags.py +++ b/src/label_studio_sdk/label_interface/control_tags.py @@ -505,13 +505,6 @@ class SpanSelectionOffsets(SpanSelection): class ChoicesValue(BaseModel): choices: List[str] - # I don't know how Choices predictions with choice != 'multiple' was working without this... - @validator("choices", pre=True, always=True) - def coerce_to_list(cls, value: Union[str, List[str]]): - if isinstance(value, str): - return [value] - return value - class ChoicesTag(ControlTag): """ """ @@ -547,15 +540,6 @@ def to_json_schema(self): "description": f"Choices for {self.to_name[0]}" } - def _validate_labels(self, labels): - if super()._validate_labels(labels): - return True - - # HACK to continue to support single-item output in json schema - if not self.is_multiple_choice and isinstance(labels, str): - return super()._validate_labels([labels]) - - class LabelsValue(SpanSelection): labels: List[str] diff --git a/src/label_studio_sdk/label_interface/interface.py b/src/label_studio_sdk/label_interface/interface.py index 3ac81a5..a3f94d9 100644 --- a/src/label_studio_sdk/label_interface/interface.py +++ b/src/label_studio_sdk/label_interface/interface.py @@ -840,23 +840,64 @@ def generate_sample_task(self, mode="upload", secure_mode=False): return task - def generate_sample_prediction(self): - """ """ + def generate_sample_prediction(self) -> Optional[dict]: + """Generates a sample prediction that is valid for this label config. + + Example: + {'model_version': 'sample model version', + 'score': 0.0, + 'result': [{'id': 'e7bd76e6-4e88-4eb3-b433-55e03661bf5d', + 'from_name': 'sentiment', + 'to_name': 'text', + 'type': 'choices', + 'value': {'choices': ['Neutral']}}]} + + NOTE: `id` field in result is not required when importing predictions; it will be generated automatically. + """ prediction = PredictionValue( model_version='sample model version', result=[ - { - 'from_name': control.name, - 'to_name': control.to_name[0], - 'type': control.tag, - # TODO: put special case for choices in generation instead of validation - 'value': {control._label_attr_name: JSF(control.to_json_schema()).generate()} - } for control in self.controls + control.label(JSF(control.to_json_schema()).generate()) + for control in self.controls ] ) prediction_dct = prediction.model_dump() - assert self.validate_prediction(prediction_dct), 'could not generate a sample prediction' - return prediction_dct + if self.validate_prediction(prediction_dct): + return prediction_dct + else: + logger.debug(f'Sample prediction {prediction_dct} failed validation for label config {self.config}') + return None + + def generate_sample_annotation(self) -> Optional[dict]: + """Generates a sample annotation that is valid for this label config. + + Example: + {'was_cancelled': False, + 'ground_truth': False, + 'lead_time': 0.0, + 'result_count': 0, + 'completed_by': -1, + 'result': [{'id': 'b05da11d-3ffc-4657-8b8d-f5bc37cd59ac', + 'from_name': 'sentiment', + 'to_name': 'text', + 'type': 'choices', + 'value': {'choices': ['Negative']}}]} + + NOTE: `id` field in result is not required when importing predictions; it will be generated automatically. + """ + annotation = AnnotationValue( + completed_by=-1, # annotator's user id + result=[ + control.label(JSF(control.to_json_schema()).generate()) + for control in self.controls + ] + ) + annotation_dct = annotation.model_dump() + if self.validate_annotation(annotation_dct): + return annotation_dct + else: + logger.debug(f'Sample annotation {annotation_dct} failed validation for label config {self.config}') + return None ##### ##### COMPATIBILITY LAYER From 1bb1cf879b64b241ea97af6553de89b33d26f1e3 Mon Sep 17 00:00:00 2001 From: Matt Bernstein Date: Tue, 26 Nov 2024 00:16:04 -0500 Subject: [PATCH 6/8] fix hallucinated docstring --- src/label_studio_sdk/label_interface/region.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/label_studio_sdk/label_interface/region.py b/src/label_studio_sdk/label_interface/region.py index 431bcfd..260f57f 100644 --- a/src/label_studio_sdk/label_interface/region.py +++ b/src/label_studio_sdk/label_interface/region.py @@ -10,16 +10,7 @@ class Region(BaseModel): """ - Class for Region Tag - - Attributes: - ----------- - id: str - The unique identifier of the region - x: int - The x coordinate of the region - y: int - + A Region is an item in the `result` list of a PredictionValue or AnnotationValue. """ id: str = Field(default_factory=lambda: str(uuid4())) From b075aeee186d6164c14a9f40ad165d93b6439d0f Mon Sep 17 00:00:00 2001 From: Matt Bernstein Date: Tue, 26 Nov 2024 01:47:56 -0500 Subject: [PATCH 7/8] use an even higher level fn --- src/label_studio_sdk/label_interface/interface.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/label_studio_sdk/label_interface/interface.py b/src/label_studio_sdk/label_interface/interface.py index a3f94d9..072f974 100644 --- a/src/label_studio_sdk/label_interface/interface.py +++ b/src/label_studio_sdk/label_interface/interface.py @@ -853,13 +853,14 @@ def generate_sample_prediction(self) -> Optional[dict]: 'value': {'choices': ['Neutral']}}]} NOTE: `id` field in result is not required when importing predictions; it will be generated automatically. + NOTE: for each control tag, depends on tag.to_json_schema() being implemented correctly """ prediction = PredictionValue( model_version='sample model version', - result=[ - control.label(JSF(control.to_json_schema()).generate()) + result=self.create_regions({ + control.name: JSF(control.to_json_schema()).generate() for control in self.controls - ] + }) ) prediction_dct = prediction.model_dump() if self.validate_prediction(prediction_dct): @@ -884,13 +885,14 @@ def generate_sample_annotation(self) -> Optional[dict]: 'value': {'choices': ['Negative']}}]} NOTE: `id` field in result is not required when importing predictions; it will be generated automatically. + NOTE: for each control tag, depends on tag.to_json_schema() being implemented correctly """ annotation = AnnotationValue( completed_by=-1, # annotator's user id - result=[ - control.label(JSF(control.to_json_schema()).generate()) + result=self.create_regions({ + control.name: JSF(control.to_json_schema()).generate() for control in self.controls - ] + }) ) annotation_dct = annotation.model_dump() if self.validate_annotation(annotation_dct): From 78cc60f40cb667b467e76d4cd73e5518f2480e6f Mon Sep 17 00:00:00 2001 From: Matt Bernstein Date: Tue, 26 Nov 2024 09:26:40 -0500 Subject: [PATCH 8/8] separate out region generation --- .../label_interface/interface.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/label_studio_sdk/label_interface/interface.py b/src/label_studio_sdk/label_interface/interface.py index 072f974..de0edc3 100644 --- a/src/label_studio_sdk/label_interface/interface.py +++ b/src/label_studio_sdk/label_interface/interface.py @@ -840,6 +840,13 @@ def generate_sample_task(self, mode="upload", secure_mode=False): return task + def _generate_sample_regions(self): + """ Generate an example of each control tag's JSON schema and validate it as a region""" + return self.create_regions({ + control.name: JSF(control.to_json_schema()).generate() + for control in self.controls + }) + def generate_sample_prediction(self) -> Optional[dict]: """Generates a sample prediction that is valid for this label config. @@ -857,10 +864,7 @@ def generate_sample_prediction(self) -> Optional[dict]: """ prediction = PredictionValue( model_version='sample model version', - result=self.create_regions({ - control.name: JSF(control.to_json_schema()).generate() - for control in self.controls - }) + result=self._generate_sample_regions() ) prediction_dct = prediction.model_dump() if self.validate_prediction(prediction_dct): @@ -889,10 +893,7 @@ def generate_sample_annotation(self) -> Optional[dict]: """ annotation = AnnotationValue( completed_by=-1, # annotator's user id - result=self.create_regions({ - control.name: JSF(control.to_json_schema()).generate() - for control in self.controls - }) + result=self._generate_sample_regions() ) annotation_dct = annotation.model_dump() if self.validate_annotation(annotation_dct):