Skip to content

Commit

Permalink
feat: support JSON schema (#63)
Browse files Browse the repository at this point in the history
<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Added functionality to generate JSON schemas from arguments, enabling
integration with JSON editors like Visual Studio Code.
  
- **Documentation**
- Introduced new documentation on generating JSON schemas from
arguments.

- **Tests**
  - Added tests to validate JSON schema generation and type conversion.

- **Chores**
  - Updated dependencies to include `jsonschema` for testing.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
njzjz and pre-commit-ci[bot] authored Jun 3, 2024
1 parent d34601e commit 2a17ab3
Show file tree
Hide file tree
Showing 7 changed files with 249 additions and 2 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@ Please refer to test files for detailed usage.
- [PEP 484](https://peps.python.org/pep-0484/) type annotations
- Native integration with [Sphinx](https://github.com/sphinx-doc/sphinx), [DP-GUI](https://github.com/deepmodeling/dpgui), and [Jupyter Notebook](https://jupyter.org/)
- JSON encoder for `Argument` and `Variant` classes
- Generate [JSON schema](https://json-schema.org/) from an `Argument`, which can be further integrated with JSON editors such as [Visual Studio Code](https://code.visualstudio.com/)
6 changes: 5 additions & 1 deletion dargs/dargs.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,11 +460,15 @@ def _check_data(self, value: Any, path=None):
)

def _check_strict(self, value: dict, path=None):
allowed_keys = self.flatten_sub(value, path).keys()
allowed_keys = set(self.flatten_sub(value, path).keys())
# curpath = [*path, self.name]
if not len(allowed_keys):
# no allowed keys defined, allow any keys
return
# A special case to allow $schema in any dict to be compatible with vscode + json schema
# https://code.visualstudio.com/docs/languages/json#_mapping-in-the-json
# considering usually it's not a typo of users when they use $schema
allowed_keys.add("$schema")
for name in value.keys():
if name not in allowed_keys:
dym_message = did_you_mean(name, allowed_keys)
Expand Down
158 changes: 158 additions & 0 deletions dargs/json_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
"""Generate JSON schema from a given dargs.Argument."""

from __future__ import annotations

from typing import Any

from dargs.dargs import Argument, _Flags

try:
from typing import get_origin
except ImportError:
from typing_extensions import get_origin


def generate_json_schema(argument: Argument, id: str = "") -> dict:
"""Generate JSON schema from a given dargs.Argument.
Parameters
----------
argument : Argument
The argument to generate JSON schema.
id : str, optional
The URL of the schema, by default "".
Returns
-------
dict
The JSON schema. Use :func:`json.dump` to save it to a file
or :func:`json.dumps` to get a string.
Examples
--------
Dump the JSON schema of DeePMD-kit to a file:
>>> from dargs.json_schema import generate_json_schema
>>> from deepmd.utils.argcheck import gen_args
>>> import json
>>> from dargs import Argument
>>> a = Argument("DeePMD-kit", dtype=dict, sub_fields=gen_args())
>>> schema = generate_json_schema(a)
>>> with open("deepmd.json", "w") as f:
... json.dump(schema, f, indent=2)
"""
schema = {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": id,
"title": argument.name,
**_convert_single_argument(argument),
}
return schema


def _convert_single_argument(argument: Argument) -> dict:
"""Convert a single argument to JSON schema.
Parameters
----------
argument : Argument
The argument to convert.
Returns
-------
dict
The JSON schema of the argument.
"""
data = {
"description": argument.doc,
"type": list({_convert_types(tt) for tt in argument.dtype}),
}
if argument.default is not _Flags.NONE:
data["default"] = argument.default
properties = {
**{
nn: _convert_single_argument(aa)
for aa in argument.sub_fields.values()
for nn in (aa.name, *aa.alias)
},
**{
vv.flag_name: {
"type": "string",
"enum": list(vv.choice_dict.keys()) + list(vv.choice_alias.keys()),
"default": vv.default_tag,
"description": vv.doc,
}
for vv in argument.sub_variants.values()
},
}
required = [
aa.name
for aa in argument.sub_fields.values()
if not aa.optional and not aa.alias
] + [vv.flag_name for vv in argument.sub_variants.values() if not vv.optional]
allof = [
{
"if": {
"oneOf": [
{
"properties": {vv.flag_name: {"const": kkaa}},
}
for kkaa in (kk, *aa.alias)
],
"required": [vv.flag_name]
if not (vv.optional and vv.default_tag == kk)
else [],
},
"then": _convert_single_argument(aa),
}
for vv in argument.sub_variants.values()
for kk, aa in vv.choice_dict.items()
]
allof += [
{"oneOf": [{"required": [nn]} for nn in (aa.name, *aa.alias)]}
for aa in argument.sub_fields.values()
if not aa.optional and aa.alias
]
if not argument.repeat:
data["properties"] = properties
data["required"] = required
if allof:
data["allOf"] = allof
else:
data["items"] = {
"type": "object",
"properties": properties,
"required": required,
}
if allof:
data["items"]["allOf"] = allof
return data


def _convert_types(T: type | Any | None) -> str:
"""Convert a type to JSON schema type.
Parameters
----------
T : type | Any | None
The type to convert.
Returns
-------
str
The JSON schema type.
"""
# string, number, integer, object, array, boolean, null
if T is None or T is type(None):
return "null"
elif T is str:
return "string"
elif T in (int, float):
return "number"
elif T is bool:
return "boolean"
elif T is list or get_origin(T) is list:
return "array"
elif T is dict or get_origin(T) is dict:
return "object"
raise ValueError(f"Unknown type: {T}")
42 changes: 42 additions & 0 deletions docs/json_schema.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
## Generate JSON schema from an argument

One can use {func}`dargs.json_schema_generate_json_schema` to generate [JSON schema](https://json-schema.org/).

```py
import json

from dargs import Argument
from dargs.json_schema import generate_json_schema
from deepmd.utils.argcheck import gen_args


a = Argument("DeePMD-kit", dtype=dict, sub_fields=gen_args())
schema = generate_json_schema(a)
with open("deepmd.json", "w") as f:
json.dump(schema, f, indent=2)
```

JSON schema can be used in several JSON editors. For example, in [Visual Studio Code](https://code.visualstudio.com/), you can [configure JSON schema](https://code.visualstudio.com/docs/languages/json#_json-schemas-and-settings) in the project `settings.json`:

```json
{
"json.schemas": [
{
"fileMatch": [
"/**/*.json"
],
"url": "./deepmd.json"
}
]
}
```

VS Code also allows one to [specify the JSON schema in a JSON file](https://code.visualstudio.com/docs/languages/json#_mapping-in-the-json) with the `$schema` key.
To be compatible, dargs will not throw an error for `$schema` in the strict mode even if `$schema` is not defined in the argument.

```json
{
"$schema": "./deepmd.json",
"model": {}
}
```
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ repository = "https://github.com/deepmodeling/dargs"
[project.optional-dependencies]
test = [
"ipython",
"jsonschema",
]
typecheck = [
"basedpyright==1.12.2",
Expand Down
13 changes: 12 additions & 1 deletion tests/dpmdargs.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def descrpt_hybrid_args():
"type",
[
Argument("loc_frame", dict, descrpt_local_frame_args()),
Argument("se_a", dict, descrpt_se_a_args()),
Argument("se_e2_a", dict, descrpt_se_a_args(), alias=["se_a"]),
Argument("se_r", dict, descrpt_se_r_args()),
Argument(
"se_a_3be", dict, descrpt_se_a_3be_args(), alias=["se_at"]
Expand Down Expand Up @@ -764,8 +764,19 @@ def normalize(data):
return data


def gen_args() -> Argument:
ma = model_args()
lra = learning_rate_args()
la = loss_args()
ta = training_args()

base = Argument("base", dict, [ma, lra, la, ta])
return base


example_json_str = """
{
"$schema": "this should be ignored by dargs",
"_comment": " model parameters",
"model": {
"type_map": ["O", "H"],
Expand Down
30 changes: 30 additions & 0 deletions tests/test_json_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from __future__ import annotations

import json
import unittest

from jsonschema import validate

from dargs.json_schema import _convert_types, generate_json_schema

from .dpmdargs import example_json_str, gen_args


class TestJsonSchema(unittest.TestCase):
def test_json_schema(self):
args = gen_args()
schema = generate_json_schema(args)
data = json.loads(example_json_str)
validate(data, schema)

def test_convert_types(self):
self.assertEqual(_convert_types(int), "number")
self.assertEqual(_convert_types(str), "string")
self.assertEqual(_convert_types(float), "number")
self.assertEqual(_convert_types(bool), "boolean")
self.assertEqual(_convert_types(None), "null")
self.assertEqual(_convert_types(type(None)), "null")
self.assertEqual(_convert_types(list), "array")
self.assertEqual(_convert_types(dict), "object")
with self.assertRaises(ValueError):
_convert_types(set)

0 comments on commit 2a17ab3

Please sign in to comment.