This repository has been archived by the owner on Jan 10, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
arff.go
133 lines (110 loc) · 2.77 KB
/
arff.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
package arff
import (
"fmt"
"strconv"
"time"
)
// DataType represents the attribute data type
type DataType uint8
const (
DataTypeNumeric DataType = iota
DataTypeString
DataTypeDate
DataTypeNominal
)
// Relation contains meta-data and attribute definition
type Relation struct {
// The relation name
Name string
// The attributes
Attributes []Attribute
}
// AddAttribute stores an attribute, avoiding duplicates.
// Include nominalVals for nominal data-types
func (r *Relation) AddAttribute(name string, dataType DataType, nominalVals []string) error {
for _, attr := range r.Attributes {
if attr.Name == name {
return errAttrRedefined
}
}
r.Attributes = append(r.Attributes, Attribute{
Name: name,
DataType: dataType,
NominalValues: nominalVals,
})
return nil
}
func (r *Relation) validate() error {
if r.Name == "" {
return errMissingRelName
}
for _, attr := range r.Attributes {
if err := attr.validate(); err != nil {
return err
}
}
return nil
}
// Attribute is an attribute of the dataset
type Attribute struct {
// The attribute name
Name string
// DataType represent the attribute data-type
DataType DataType
// NominalValues are only populated for nominal types
NominalValues []string
}
func (a *Attribute) validate() error {
if a.Name == "" {
return errMissingAttrName
}
return nil
}
func (a *Attribute) parse(s string) (interface{}, error) {
if s == "?" {
return nil, nil
}
switch a.DataType {
case DataTypeNumeric:
num, err := strconv.ParseFloat(s, 64)
if err != nil {
return nil, fmt.Errorf("value '%s' is not numeric", s)
}
return num, nil
case DataTypeDate:
dt, err := time.ParseInLocation(iso8691DateFormat, s, utc)
if err != nil {
return nil, fmt.Errorf("value '%s' is not an ISO8601 date", s)
}
return dt, nil
}
return unquote(s), nil
}
// DataRow represents a parsed data row
type DataRow struct {
Values []interface{}
Weight float64
}
// --------------------------------------------------------------------
const iso8691DateFormat = "2006-01-02T15:04:05"
var utc *time.Location
func init() {
var err error
utc, err = time.LoadLocation("UTC")
if err != nil {
panic("unable to load UTC time-zone information: " + err.Error())
}
}
type constError string
// Error implements error interface
func (e constError) Error() string { return string(e) }
const (
errBadSyntax constError = "bad syntax"
errMissingAttrName constError = "missing attribute name"
errMissingAttrType constError = "missing data-type"
errInvalidAttrType constError = "invalid data-type"
errAttrRedefined constError = "redefined attribute"
errAttrMismatch constError = "attribute mismatch"
errMissingRelName constError = "missing relation name"
errInvalidWeight constError = "invalid weight definition"
)