-
Notifications
You must be signed in to change notification settings - Fork 0
/
MLdata-Basic-Schema.json
106 lines (106 loc) · 3.89 KB
/
MLdata-Basic-Schema.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
{
"$schema": "https://json-schema.org/draft/2019-09/schema",
"$id": "",
"title": "ML Data Basic Schema",
"description": "Information about a set of scientific training data for applying it to machine-learning. This schema servers as a basic for structuring this information and can be extended.",
"required": [],
"definitions": {
"checksum": {
"type": "string"
},
"termDefinition": {
"description": "Provides information about the human-readable term for a feature or a label, which is optimally defined in a vocabulary. This would support an automated re-featuring or -labeling in case the data needs to be harmonized with other training data. Additional properties could be implemneted to specify for example pre-prpcessing steps for the features and labels.",
"type": "object",
"required": ["term", "termValueDataType"],
"properties": {
"term": {
"type": "string"
},
"termDefinition": {
"type": "string",
"format": "uri"
},
"termValueDataType": {
"description": "Defines the data type of the feature or label value, on which the data preprocessin steps depend.",
"type": [
"integer",
"number",
"string"
]
}
},
"additionalProperties": true
},
"annotation": {
"type": "object",
"properties": {
"annotationFileRef": {
"type": "string",
"format": "uri"
},
"annotationChecksum": {
"$ref": "#/definitions/checksum"
}
}
},
"tag": {
"type": "string"
}
},
"type": "object",
"required": ["dataLocation", "dataFormat", "dataChecksum", "dataLicense"],
"properties": {
"dataLocation": {
"type": "string",
"format": "uri"
},
"dataFormat": {
"type": "string"
},
"dataChecksum": {
"$ref": "#/definitions/checksum"
},
"dataLicense": {
"type": "string"
}
},
"featureProperties": {
"description": "The input features of the data for the machine-learning algorithm. Properties will depend on the particular data.",
"type": "array",
"items": {
"type": "object",
"required": ["featureTerm"],
"properties": {
"featureTerm": {
"$ref": "#/definitions/termDefinition"
}
},
"additionalProperties": true
}
},
"labelProperties": {
"description": "The features that represent the output variables (i.e. labels) and will be predicted by the input features.",
"type": "array",
"items": {
"type": "object",
"required": ["labelDescription", "labelTerm"],
"properties": {
"labelDescription": {
"type": "string"
},
"labelTerm": {
"$ref": "#/definitions/termDefinition"
},
"imageLabelType": {
"description": "This is an example for image data, how the label information can be more specified. Additional properties could be implemented for other data types, e.g. text data.",
"type": ["string", "object"],
"enum": [
{"$ref": "#/definitions/annotation"},
{"$ref": "#/definitions/tag"}
]
},
"additionalProperties": true
}
}
}
}