Skip to content

Commit

Permalink
Second batch of performance improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
dehesa committed Nov 22, 2020
1 parent df2d07d commit 554f5d5
Show file tree
Hide file tree
Showing 12 changed files with 127 additions and 102 deletions.
20 changes: 12 additions & 8 deletions sources/declarative/decodable/Decoder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -36,35 +36,39 @@ extension CSVDecoder {
/// - parameter data: The data blob representing a CSV file.
open func decode<T:Decodable>(_ type: T.Type, from data: Data) throws -> T {
let reader = try CSVReader(input: data, configuration: self._configuration.readerConfiguration)
let source = ShadowDecoder.Source(reader: reader, configuration: self._configuration, userInfo: self.userInfo)
return try T(from: ShadowDecoder(source: source, codingPath: []))
return try withExtendedLifetime(ShadowDecoder.Source(reader: reader, configuration: self._configuration, userInfo: self.userInfo)) {
try T(from: ShadowDecoder(source: .passUnretained($0), codingPath: []))
}
}

/// Returns a value of the type you specify, decoded from a CSV file (given as a `String`).
/// - parameter type: The type of the value to decode from the supplied file.
/// - parameter string: A Swift string representing a CSV file.
open func decode<T:Decodable>(_ type: T.Type, from string: String) throws -> T {
let reader = try CSVReader(input: string, configuration: self._configuration.readerConfiguration)
let source = ShadowDecoder.Source(reader: reader, configuration: self._configuration, userInfo: self.userInfo)
return try T(from: ShadowDecoder(source: source, codingPath: []))
return try withExtendedLifetime(ShadowDecoder.Source(reader: reader, configuration: self._configuration, userInfo: self.userInfo)) {
try T(from: ShadowDecoder(source: .passUnretained($0), codingPath: []))
}
}

/// Returns a value of the type you specify, decoded from a CSV file (being pointed by the url).
/// - parameter type: The type of the value to decode from the supplied file.
/// - parameter url: The URL pointing to the file to decode.
open func decode<T:Decodable>(_ type: T.Type, from url: URL) throws -> T {
let reader = try CSVReader(input: url, configuration: self._configuration.readerConfiguration)
let source = ShadowDecoder.Source(reader: reader, configuration: self._configuration, userInfo: self.userInfo)
return try T(from: ShadowDecoder(source: source, codingPath: []))
return try withExtendedLifetime(ShadowDecoder.Source(reader: reader, configuration: self._configuration, userInfo: self.userInfo)) {
try T(from: ShadowDecoder(source: .passUnretained($0), codingPath: []))
}
}

/// Returns a value of the type you specify, decoded from a CSV file (provided by the input stream).
/// - parameter type: The type of the value to decode from the supplied file.
/// - parameter stream: The input stream providing the raw bytes.
open func decode<T:Decodable>(_ type: T.Type, from stream: InputStream) throws -> T {
let reader = try CSVReader(input: stream, configuration: self._configuration.readerConfiguration)
let source = ShadowDecoder.Source(reader: reader, configuration: self._configuration, userInfo: self.userInfo)
return try T(from: ShadowDecoder(source: source, codingPath: []))
return try withExtendedLifetime(ShadowDecoder.Source(reader: reader, configuration: self._configuration, userInfo: self.userInfo)) {
try T(from: ShadowDecoder(source: .passUnretained($0), codingPath: []))
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion sources/declarative/decodable/DecoderLazy.swift
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ extension CSVDecoder.Lazy {
guard !self._source.isRowAtEnd(index: self._currentIndex) else { return nil }

defer { self._currentIndex += 1 }
let decoder = ShadowDecoder(source: self._source, codingPath: [IndexKey(self._currentIndex)])
let decoder = ShadowDecoder(source: .passUnretained(self._source), codingPath: [IndexKey(self._currentIndex)])
return Row(decoder: decoder)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,31 +43,35 @@ extension ShadowDecoder {
}

var allKeys: [Key] {
switch self._focus {
case .file:
guard let numRows = self._decoder.source.numRows, numRows > 0 else { return [] }
return (0..<numRows).compactMap { Key(intValue: $0) }
case .row:
let numFields = self._decoder.source.numExpectedFields
guard numFields > 0 else { return [] }

let numberKeys = (0..<numFields).compactMap { Key(intValue: $0) }
guard numberKeys.isEmpty else { return numberKeys }

return self._decoder.source.headers.compactMap { Key(stringValue: $0) }
self._decoder.source._withUnsafeGuaranteedRef { [focus = self._focus] in
switch focus {
case .file:
guard let numRows = $0.numRows, numRows > 0 else { return [] }
return (0..<numRows).compactMap { Key(intValue: $0) }
case .row:
let numFields = $0.numExpectedFields
guard numFields > 0 else { return [] }

let numberKeys = (0..<numFields).compactMap { Key(intValue: $0) }
guard numberKeys.isEmpty else { return numberKeys }

return $0.headers.compactMap { Key(stringValue: $0) }
}
}
}

func contains(_ key: Key) -> Bool {
switch self._focus {
case .file:
guard let index = key.intValue else { return false }
return self._decoder.source.contains(rowIndex: index)
case .row:
if let index = key.intValue {
return index >= 0 && index < self._decoder.source.numExpectedFields
} else {
return self._decoder.source.headers.contains(key.stringValue)
self._decoder.source._withUnsafeGuaranteedRef { [focus = self._focus] in
switch focus {
case .file:
guard let index = key.intValue else { return false }
return $0.contains(rowIndex: index)
case .row:
if let index = key.intValue {
return index >= 0 && index < $0.numExpectedFields
} else {
return $0.headers.contains(key.stringValue)
}
}
}
}
Expand Down Expand Up @@ -277,11 +281,11 @@ private extension ShadowDecoder.KeyedContainer {

switch self._focus {
case .row(let rowIndex):
index = (rowIndex, try self._decoder.source.fieldIndex(forKey: key, codingPath: self.codingPath))
index = (rowIndex, try self._decoder.source._withUnsafeGuaranteedRef({ try $0.fieldIndex(forKey: key, codingPath: self.codingPath) }))
case .file:
guard let rowIndex = key.intValue else { throw CSVDecoder.Error._invalidRowKey(forKey: key, codingPath: codingPath) }
// Values are only allowed to be decoded directly from a nested container in "file level" if the CSV rows have a single column.
guard self._decoder.source.numExpectedFields == 1 else { throw CSVDecoder.Error._invalidNestedRequired(codingPath: self.codingPath) }
guard self._decoder.source._withUnsafeGuaranteedRef({ $0.numExpectedFields == 1 }) else { throw CSVDecoder.Error._invalidNestedRequired(codingPath: self.codingPath) }
index = (rowIndex, 0)
codingPath.append(IndexKey(index.field))
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ extension ShadowDecoder {
case 2:
let key = (row: decoder.codingPath[0], field: decoder.codingPath[1])
let r = try key.row.intValue ?> CSVDecoder.Error._invalidRowKey(forKey: key.row, codingPath: decoder.codingPath)
let f = try decoder.source.fieldIndex(forKey: key.field, codingPath: decoder.codingPath)
let f = try decoder.source._withUnsafeGuaranteedRef { try $0.fieldIndex(forKey: key.field, codingPath: decoder.codingPath) }
self._focus = .field(r, f)
case 1:
let key = decoder.codingPath[0]
Expand All @@ -56,14 +56,14 @@ extension ShadowDecoder.SingleValueContainer {
}

func decodeNil() -> Bool {
switch self._decoder.source.configuration.nilStrategy {
switch self._decoder.source._withUnsafeGuaranteedRef({ $0.configuration.nilStrategy }) {
case .empty: return (try? self._lowlevelDecode { $0.isEmpty }) ?? false
case .custom(let closure): return closure(self._decoder)
}
}

func decode(_ type: Bool.Type) throws -> Bool {
switch self._decoder.source.configuration.boolStrategy {
switch self._decoder.source._withUnsafeGuaranteedRef({ $0.configuration.boolStrategy }) {
case .deferredToBool:
return try self._lowlevelDecode { Bool($0) }
case .insensitive:
Expand Down Expand Up @@ -122,7 +122,7 @@ extension ShadowDecoder.SingleValueContainer {
func decode(_ type: Float.Type) throws -> Float {
try self._lowlevelDecode {
guard let result = Float($0), result.isFinite else {
switch self._decoder.source.configuration.nonConformingFloatStrategy {
switch self._decoder.source._withUnsafeGuaranteedRef({ $0.configuration.nonConformingFloatStrategy }) {
case .throw: return nil
case .convert(let positiveInfinity, let negativeInfinity, let nan):
switch $0 {
Expand All @@ -141,7 +141,7 @@ extension ShadowDecoder.SingleValueContainer {
func decode(_ type: Double.Type) throws -> Double {
try self._lowlevelDecode {
guard let result = Double($0), result.isFinite else {
switch self._decoder.source.configuration.nonConformingFloatStrategy {
switch self._decoder.source._withUnsafeGuaranteedRef({ $0.configuration.nonConformingFloatStrategy }) {
case .throw: return nil
case .convert(let positiveInfinity, let negativeInfinity, let nan):
switch $0 {
Expand Down Expand Up @@ -173,7 +173,7 @@ extension ShadowDecoder.SingleValueContainer {
/// - parameter type: The type to decode as.
/// - returns: A value of the requested type.
func decode(_ type: Date.Type) throws -> Date {
switch self._decoder.source.configuration.dateStrategy {
switch self._decoder.source._withUnsafeGuaranteedRef({ $0.configuration.dateStrategy }) {
case .deferredToDate:
return try Date(from: self._decoder)
case .secondsSince1970:
Expand All @@ -197,7 +197,7 @@ extension ShadowDecoder.SingleValueContainer {
/// - parameter type: The type to decode as.
/// - returns: A value of the requested type.
func decode(_ type: Data.Type) throws -> Data {
switch self._decoder.source.configuration.dataStrategy {
switch self._decoder.source._withUnsafeGuaranteedRef({ $0.configuration.dataStrategy }) {
case .deferredToData:
return try Data(from: self._decoder)
case .base64:
Expand All @@ -212,7 +212,7 @@ extension ShadowDecoder.SingleValueContainer {
/// - parameter type: The type to decode as.
/// - returns: A value of the requested type.
func decode(_ type: Decimal.Type) throws -> Decimal {
switch self._decoder.source.configuration.decimalStrategy {
switch self._decoder.source._withUnsafeGuaranteedRef({ $0.configuration.decimalStrategy }) {
case .locale(let locale):
let string = try self.decode(String.self)
return try Decimal(string: string, locale: locale) ?> CSVDecoder.Error._invalidDecimal(string: string, locale: locale, codingPath: self.codingPath)
Expand Down Expand Up @@ -245,24 +245,24 @@ private extension ShadowDecoder.SingleValueContainer {
/// Decodes the `String` value under the receiving single value container's `focus` and then tries to transform it in the requested type.
/// - parameter transform: Closure transforming the decoded `String` value into the required type. If it fails, the closure returns `nil`.
func _lowlevelDecode<T>(transform: (String) -> T?) throws -> T {
let source = self._decoder.source

switch self._focus {
case .field(let rowIndex, let fieldIndex):
let string = try source.field(rowIndex, fieldIndex)
return try transform(string) ?> CSVDecoder.Error._invalid(type: T.self, string: string, codingPath: self.codingPath)
case .row(let rowIndex):
// Values are only allowed to be decoded directly from a single value container in "row level" if the CSV has single column rows.
guard source.numExpectedFields == 1 else { throw CSVDecoder.Error._invalidNestedRequired(codingPath: self.codingPath) }
let string = try source.field(rowIndex, 0)
return try transform(string) ?> CSVDecoder.Error._invalid(type: T.self, string: string, codingPath: self.codingPath + [IndexKey(0)])
case .file:
// Values are only allowed to be decoded directly from a single value container in "file level" if the CSV file has a single row with a single column.
if source.isRowAtEnd(index: 1), source.numExpectedFields == 1 {
let string = try self._decoder.source.field(0, 0)
return try transform(string) ?> CSVDecoder.Error._invalid(type: T.self, string: string, codingPath: self.codingPath + [IndexKey(0), IndexKey(0)])
} else {
throw CSVDecoder.Error._invalidNestedRequired(codingPath: self.codingPath)
try self._decoder.source._withUnsafeGuaranteedRef {
switch self._focus {
case .field(let rowIndex, let fieldIndex):
let string = try $0.field(rowIndex, fieldIndex)
return try transform(string) ?> CSVDecoder.Error._invalid(type: T.self, string: string, codingPath: self.codingPath)
case .row(let rowIndex):
// Values are only allowed to be decoded directly from a single value container in "row level" if the CSV has single column rows.
guard $0.numExpectedFields == 1 else { throw CSVDecoder.Error._invalidNestedRequired(codingPath: self.codingPath) }
let string = try $0.field(rowIndex, 0)
return try transform(string) ?> CSVDecoder.Error._invalid(type: T.self, string: string, codingPath: self.codingPath + [IndexKey(0)])
case .file:
// Values are only allowed to be decoded directly from a single value container in "file level" if the CSV file has a single row with a single column.
if $0.isRowAtEnd(index: 1), $0.numExpectedFields == 1 {
let string = try $0.field(0, 0)
return try transform(string) ?> CSVDecoder.Error._invalid(type: T.self, string: string, codingPath: self.codingPath + [IndexKey(0), IndexKey(0)])
} else {
throw CSVDecoder.Error._invalidNestedRequired(codingPath: self.codingPath)
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,20 @@ extension ShadowDecoder {
}

var count: Int? {
switch self._focus {
case .file: return self._decoder.source.numRows
case .row: return self._decoder.source.numExpectedFields
self._decoder.source._withUnsafeGuaranteedRef { [focus = self._focus] in
switch focus {
case .file: return $0.numRows
case .row: return $0.numExpectedFields
}
}
}

var isAtEnd: Bool {
switch self._focus {
case .file: return self._decoder.source.isRowAtEnd(index: self.currentIndex)
case .row: return self._decoder.source.isFieldAtEnd(index: self.currentIndex)
self._decoder.source._withUnsafeGuaranteedRef {
switch self._focus {
case .file: return $0.isRowAtEnd(index: self.currentIndex)
case .row: return $0.isFieldAtEnd(index: self.currentIndex)
}
}
}
}
Expand Down Expand Up @@ -234,7 +238,7 @@ private extension ShadowDecoder.UnkeyedContainer {
index = (rowIndex, self.currentIndex)
case .file:
// Values are only allowed to be decoded directly from a nested container in "file level" if the CSV rows have a single column.
guard self._decoder.source.numExpectedFields == 1 else { throw CSVDecoder.Error._invalidNestedRequired(codingPath: self.codingPath) }
guard self._decoder.source._withUnsafeGuaranteedRef({ $0.numExpectedFields == 1 }) else { throw CSVDecoder.Error._invalidNestedRequired(codingPath: self.codingPath) }
index = (self.currentIndex, 0)
codingPath.append(IndexKey(index.field))
}
Expand Down
13 changes: 9 additions & 4 deletions sources/declarative/decodable/internal/ShadowDecoder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,24 @@
/// A shadow decoder represents a moment in time on the decoding process. Therefore it is a immutable structure.
internal struct ShadowDecoder: Decoder {
/// The source of the CSV data.
let source: Source
let source: Unmanaged<Source>
/// The path of coding keys taken to get to this point in decoding.
let codingPath: [CodingKey]
/// Any contextual information set by the user for decoding.
var userInfo: [CodingUserInfoKey:Any] { self.source.userInfo }

/// Designated initializer passing all required components.
/// - parameter source: The data source for the decoder.
/// - parameter codingPath: The path taken to create the decoder instance.
init(source: Source, codingPath: [CodingKey]) {
init(source: Unmanaged<Source>, codingPath: [CodingKey]) {
self.source = source
self.codingPath = codingPath
}

/// Any contextual information set by the user for decoding.
var userInfo: [CodingUserInfoKey:Any] {
self.source._withUnsafeGuaranteedRef {
$0.userInfo
}
}
}

extension ShadowDecoder {
Expand Down
14 changes: 8 additions & 6 deletions sources/declarative/encodable/Encoder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,10 @@ extension CSVEncoder {
/// - returns: `Data` blob with the CSV representation of `value`.
open func encode<T:Encodable>(_ value: T, into type: Data.Type) throws -> Data {
let writer = try CSVWriter(configuration: self._configuration.writerConfiguration)
let sink = try ShadowEncoder.Sink(writer: writer, configuration: self._configuration, userInfo: self.userInfo)
try value.encode(to: ShadowEncoder(sink: sink, codingPath: []))
try sink.completeEncoding()
try withExtendedLifetime(try ShadowEncoder.Sink(writer: writer, configuration: self._configuration, userInfo: self.userInfo)) {
try value.encode(to: ShadowEncoder(sink: .passUnretained($0), codingPath: []))
try $0.completeEncoding()
}
return try writer.data()
}

Expand All @@ -59,9 +60,10 @@ extension CSVEncoder {
/// - parameter append: In case an existing file is under the given URL, this Boolean indicates that the information will be appended to the file (`true`), or the file will be overwritten (`false`).
open func encode<T:Encodable>(_ value: T, into fileURL: URL, append: Bool = false) throws {
let writer = try CSVWriter(fileURL: fileURL, append: append, configuration: self._configuration.writerConfiguration)
let sink = try ShadowEncoder.Sink(writer: writer, configuration: self._configuration, userInfo: self.userInfo)
try value.encode(to: ShadowEncoder(sink: sink, codingPath: []))
try sink.completeEncoding()
try withExtendedLifetime(try ShadowEncoder.Sink(writer: writer, configuration: self._configuration, userInfo: self.userInfo)) {
try value.encode(to: ShadowEncoder(sink: .passUnretained($0), codingPath: []))
try $0.completeEncoding()
}
}
}

Expand Down
Loading

3 comments on commit 554f5d5

@JackYoustra
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dehesa just curious, does this result in measurable performance improvements? Does the compiler not optimize out the retain / release calls from withExtendedLifetime and passUnretained calls?

@dehesa
Copy link
Owner Author

@dehesa dehesa commented on 554f5d5 Jun 28, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @JackYoustra,

At the time I was introducing this I was seeing in Instruments a big chunk of of the time was spent in retain/release (I don't remember on the top of my head, but I believe it was around 35%). Therefore, the compiler was not optimizing the reference lifecycle functions even for simple test samples (with large CSVs).

My guess was that, due to the usage of different modules (CodableCSV and the caller module), the compiler was unable to optimize this way. So, I wanted to slowly try to remove any reference type usage and measure it through strict benchmarking. That is why the next point in the Roadmap is "Benchmarking".

Sadly, I haven't had the time to continue with that effort. As you can see there is still many reference type usage and the benchmarks aren't done 🤷‍♂️

@JackYoustra
Copy link

@JackYoustra JackYoustra commented on 554f5d5 Jul 5, 2021 via email

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.