Skip to content

Commit

Permalink
chore: clean up
Browse files Browse the repository at this point in the history
  • Loading branch information
willfarrell committed Aug 19, 2022
1 parent b2e121a commit 57c5b9f
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 28 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ dist
# IDE
.idea
*.iml
.nova

# OS
.DS_Store
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "csv-rex",
"version": "0.3.0",
"version": "0.3.1",
"description": "A tiny and fast CSV parser for JavaScript.",
"type": "module",
"files": [
Expand Down
53 changes: 30 additions & 23 deletions parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,7 @@ export const parse = (opts = {}) => {
const options = { ...defaultOptions, ...opts }
options.escapeChar ??= options.quoteChar

let { header } = options
let headerLength = length(header)

let { newlineChar, delimiterChar } = options
let { header, newlineChar, delimiterChar } = options
const {
quoteChar,
escapeChar,
Expand All @@ -38,8 +35,9 @@ export const parse = (opts = {}) => {
errorOnFieldsMismatch
// errorOnFieldMalformed
} = options
const delimiterCharRegExp = /,|\t|\||;|\x1E|\x1F/g // eslint-disable-line no-control-regex
const newlineCharRegExp = /\r\n|\n|\r/g
let headerLength = length(header)
const detectDelimiterCharRegExp = /,|\t|\||;|\x1E|\x1F/g // eslint-disable-line no-control-regex
const detectNewlineCharRegExp = /\r\n|\n|\r/g

const escapedQuoteChar = escapeChar + quoteChar
const escapedQuoteCharRegExp = new RegExp(
Expand Down Expand Up @@ -138,21 +136,6 @@ export const parse = (opts = {}) => {
}
}

const detectChar = (chunk, pattern) => {
let match
const chars = {}
while ((match = pattern.exec(chunk))) {
const char = match[0]
chars[char] ??= 0
chars[char] += 1
if (chars[char] > 5) return char
}
// pattern.lastIndex = 0 // not reused again
const { key } = Object.keys(chars)
.map((key) => ({ key, value: chars[key] }))
.sort((a, b) => a.value - b.value)[0]
return key
}
const chunkParse = (string, controller, flush = false) => {
chunk = string
chunkLength = length(chunk)
Expand All @@ -163,10 +146,16 @@ export const parse = (opts = {}) => {

// auto-detect
if (!newlineChar) {
newlineChar = detectChar(chunk.substring(0, 1024), newlineCharRegExp)
newlineChar = detectChar(
chunk.substring(0, 1024),
detectNewlineCharRegExp
)
newlineCharLength = length(newlineChar)
}
delimiterChar ||= detectChar(chunk.substring(0, 1024), delimiterCharRegExp)
delimiterChar ||= detectChar(
chunk.substring(0, 1024),
detectDelimiterCharRegExp
)

checkForEmptyLine()
let lineStart = 0
Expand Down Expand Up @@ -261,6 +250,24 @@ export const parse = (opts = {}) => {
}
}

export const detectChar = (chunk, pattern) => {
let match
const chars = {}
while ((match = pattern.exec(chunk))) {
const char = match[0]
console.log({ char, chars })
chars[char] ??= 0
chars[char] += 1
if (chars[char] > 5) return char
}
// pattern.lastIndex = 0 // not reused again
console.log(pattern, chars, chunk)
const { key } = Object.keys(chars)
.map((key) => ({ key, value: chars[key] }))
.sort((a, b) => a.value - b.value)[0]
return key
}

export const coerceTo = {
string: (field) => field,
boolean: (field) => {
Expand Down
5 changes: 3 additions & 2 deletions parse.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -330,9 +330,10 @@ for (const method of quoteMethods) {
for (const method of quoteMethods) {
test(`${method}: Should parse with { coerceField: (field) => ... }`, async (t) => {
const coerceField = (field, idx) => Object.values(coerceTo)[idx](field)
const options = { quoteChar: '\'', coerceField }
const options = { quoteChar: "'", coerceField }
const enqueue = sinon.spy()
const chunk = 'string,boolean,integer,decimal,json,timestamp,_true,_false,_null\r\nstring,true,-1,-1.1,\'{"a":"b"}\',2022-07-30T04:46:24.466Z,true,false,null\r\n'
const chunk =
'string,boolean,integer,decimal,json,timestamp,_true,_false,_null\r\nstring,true,-1,-1.1,\'{"a":"b"}\',2022-07-30T04:46:24.466Z,true,false,null\r\n'
const parser = parse(options)
parser[method](chunk, { enqueue })
deepEqual(enqueue.firstCall.args, [
Expand Down

0 comments on commit 57c5b9f

Please sign in to comment.