From 232b8c31bf9032bc80d0d59938a28f12ac69608f Mon Sep 17 00:00:00 2001 From: Henry Date: Fri, 20 Sep 2024 21:26:07 -0700 Subject: [PATCH] Add gzip-wrapping functions to CompressedResource --- cmd/rwp/cmd/serve/api.go | 19 ++++++--- cmd/rwp/cmd/serve/helpers.go | 4 +- pkg/archive/archive.go | 8 +++- pkg/archive/archive_exploded.go | 8 ++++ pkg/archive/archive_zip.go | 75 ++++++++++++++++++++++++++++++++- pkg/archive/gzip.go | 12 ++++++ pkg/fetcher/fetcher_archive.go | 18 ++++++++ pkg/fetcher/resource.go | 18 ++++++++ pkg/fetcher/traits.go | 2 + pkg/parser/epub/deobfuscator.go | 22 +++++++++- 10 files changed, 175 insertions(+), 11 deletions(-) create mode 100644 pkg/archive/gzip.go diff --git a/cmd/rwp/cmd/serve/api.go b/cmd/rwp/cmd/serve/api.go index ce8efc1..5783499 100644 --- a/cmd/rwp/cmd/serve/api.go +++ b/cmd/rwp/cmd/serve/api.go @@ -256,11 +256,20 @@ func (s *Server) getAsset(w http.ResponseWriter, r *http.Request) { } cres, ok := res.(fetcher.CompressedResource) - if ok && cres.CompressedAs(archive.CompressionMethodDeflate) && start == 0 && end == 0 && supportsDeflate(r) { - // Stream the asset in compressed format - w.Header().Set("content-encoding", "deflate") - w.Header().Set("content-length", strconv.FormatInt(cres.CompressedLength(), 10)) - _, err = cres.StreamCompressed(w) + if ok && cres.CompressedAs(archive.CompressionMethodDeflate) && start == 0 && end == 0 { + // Stream the asset in compressed format if supported by the user agent + if supportsEncoding(r, "deflate") { + w.Header().Set("content-encoding", "deflate") + w.Header().Set("content-length", strconv.FormatInt(cres.CompressedLength(), 10)) + _, err = cres.StreamCompressed(w) + } else if supportsEncoding(r, "gzip") && l <= archive.GzipMaxLength { + w.Header().Set("content-encoding", "gzip") + w.Header().Set("content-length", strconv.FormatInt(cres.CompressedLength()+archive.GzipWrapperLength, 10)) + _, err = cres.StreamCompressedGzip(w) + } else { + // Fall back to normal streaming + _, rerr = res.Stream(w, start, end) + } } else { // Stream the asset _, rerr = res.Stream(w, start, end) diff --git a/cmd/rwp/cmd/serve/helpers.go b/cmd/rwp/cmd/serve/helpers.go index 6e20e7e..998a925 100644 --- a/cmd/rwp/cmd/serve/helpers.go +++ b/cmd/rwp/cmd/serve/helpers.go @@ -74,7 +74,7 @@ func conformsToAsMimetype(conformsTo manifest.Profiles) string { return mime } -func supportsDeflate(r *http.Request) bool { +func supportsEncoding(r *http.Request, encoding string) bool { vv := r.Header.Values("Accept-Encoding") for _, v := range vv { for _, sv := range strings.Split(v, ",") { @@ -82,7 +82,7 @@ func supportsDeflate(r *http.Request) bool { if coding == "" { continue } - if coding == "deflate" { + if coding == encoding { return true } } diff --git a/pkg/archive/archive.go b/pkg/archive/archive.go index c476d4a..e4f6830 100644 --- a/pkg/archive/archive.go +++ b/pkg/archive/archive.go @@ -58,8 +58,12 @@ type Entry interface { CompressedAs(compressionMethod CompressionMethod) bool // Whether the entry is compressed using the given method. Read(start int64, end int64) ([]byte, error) // Reads the whole content of this entry, or a portion when [start] or [end] are specified. Stream(w io.Writer, start int64, end int64) (int64, error) // Streams the whole content of this entry to a writer, or a portion when [start] or [end] are specified. - StreamCompressed(w io.Writer) (int64, error) // Streams the compressed content of this entry to a writer. - ReadCompressed() ([]byte, error) // Reads the compressed content of this entry. + + StreamCompressed(w io.Writer) (int64, error) // Streams the compressed content of this entry to a writer. + StreamCompressedGzip(w io.Writer) (int64, error) // Streams the compressed content of this entry to a writer in a GZIP container. + ReadCompressed() ([]byte, error) // Reads the compressed content of this entry. + ReadCompressedGzip() ([]byte, error) // Reads the compressed content of this entry inside a GZIP container. + } // Represents an immutable archive. diff --git a/pkg/archive/archive_exploded.go b/pkg/archive/archive_exploded.go index 7e54d3f..1f1a76b 100644 --- a/pkg/archive/archive_exploded.go +++ b/pkg/archive/archive_exploded.go @@ -90,10 +90,18 @@ func (e explodedArchiveEntry) StreamCompressed(w io.Writer) (int64, error) { return -1, errors.New("entry is not compressed") } +func (e explodedArchiveEntry) StreamCompressedGzip(w io.Writer) (int64, error) { + return -1, errors.New("entry is not compressed") +} + func (e explodedArchiveEntry) ReadCompressed() ([]byte, error) { return nil, errors.New("entry is not compressed") } +func (e explodedArchiveEntry) ReadCompressedGzip() ([]byte, error) { + return nil, errors.New("entry is not compressed") +} + // An archive exploded on the file system as a directory. type explodedArchive struct { directory string // Directory, already cleaned! diff --git a/pkg/archive/archive_zip.go b/pkg/archive/archive_zip.go index 472dc0b..6eb05ea 100644 --- a/pkg/archive/archive_zip.go +++ b/pkg/archive/archive_zip.go @@ -4,11 +4,14 @@ import ( "archive/zip" "bytes" "compress/flate" - "errors" + "encoding/binary" "io" "io/fs" + "math" "path" "sync" + + "github.com/pkg/errors" ) type gozipArchiveEntry struct { @@ -164,6 +167,42 @@ func (e gozipArchiveEntry) StreamCompressed(w io.Writer) (int64, error) { return io.Copy(w, f) } +func (e gozipArchiveEntry) StreamCompressedGzip(w io.Writer) (int64, error) { + if e.file.Method != zip.Deflate { + return -1, errors.New("not a compressed resource") + } + if e.file.UncompressedSize64 > math.MaxUint32 { + return -1, errors.New("uncompressed size > 2^32 too large for GZIP") + } + f, err := e.file.OpenRaw() + if err != nil { + return -1, err + } + + // Header + buf := [10]byte{0: gzipID1, 1: gzipID2, 2: gzipDeflate, 9: 255} + // No extra, no name, no comment, no mod time, no compress level hint, unknown OS + + n, err := w.Write(buf[:10]) + if err != nil { + return -1, errors.Wrap(err, "failed to write GZIP header") + } + + nn, err := io.Copy(w, f) + if err != nil { + return int64(n), errors.Wrap(err, "failed copying deflated bytes") + } + + // Trailer + binary.LittleEndian.PutUint32(buf[:4], e.file.CRC32) + binary.LittleEndian.PutUint32(buf[4:8], uint32(e.file.UncompressedSize64)) + nnn, err := w.Write(buf[:8]) + if err != nil { + return int64(n) + nn, errors.Wrap(err, "failed writing GZIP trailer") + } + return int64(n) + nn + int64(nnn), nil +} + func (e gozipArchiveEntry) ReadCompressed() ([]byte, error) { if e.file.Method != zip.Deflate { return nil, errors.New("not a compressed resource") @@ -182,6 +221,40 @@ func (e gozipArchiveEntry) ReadCompressed() ([]byte, error) { return compressedData, nil } +func (e gozipArchiveEntry) ReadCompressedGzip() ([]byte, error) { + if e.file.Method != zip.Deflate { + return nil, errors.New("not a compressed resource") + } + if e.file.UncompressedSize64 > math.MaxUint32 { + return nil, errors.New("uncompressed size > 2^32 too large for GZIP") + } + f, err := e.file.OpenRaw() + if err != nil { + return nil, err + } + + compressedData := make([]byte, e.file.CompressedSize64+GzipWrapperLength) // Size of file + header + trailer + + // Deflated data + _, err = io.ReadAtLeast(f, compressedData[10:], int(e.file.CompressedSize64)) + if err != nil { + return nil, err + } + + // Header + compressedData[0] = gzipID1 + compressedData[1] = gzipID2 + compressedData[2] = gzipDeflate + compressedData[9] = 255 + // No extra, no name, no comment, no mod time, no compress level hint, unknown OS + + // Trailer + binary.LittleEndian.PutUint32(compressedData[10+e.file.CompressedSize64:], e.file.CRC32) + binary.LittleEndian.PutUint32(compressedData[10+e.file.CompressedSize64+4:], uint32(e.file.UncompressedSize64)) + + return compressedData, nil +} + // An archive from a zip file using go's stdlib type gozipArchive struct { zip *zip.Reader diff --git a/pkg/archive/gzip.go b/pkg/archive/gzip.go new file mode 100644 index 0000000..79705bf --- /dev/null +++ b/pkg/archive/gzip.go @@ -0,0 +1,12 @@ +package archive + +import "math" + +const ( + gzipID1 = 0x1f + gzipID2 = 0x8b + gzipDeflate = 8 +) + +const GzipWrapperLength = 18 +const GzipMaxLength = math.MaxUint32 diff --git a/pkg/fetcher/fetcher_archive.go b/pkg/fetcher/fetcher_archive.go index 5eb385f..f9b4096 100644 --- a/pkg/fetcher/fetcher_archive.go +++ b/pkg/fetcher/fetcher_archive.go @@ -171,6 +171,15 @@ func (r *entryResource) StreamCompressed(w io.Writer) (int64, *ResourceError) { return -1, Other(err) } +// StreamCompressedGzip implements CompressedResource +func (r *entryResource) StreamCompressedGzip(w io.Writer) (int64, *ResourceError) { + i, err := r.entry.StreamCompressedGzip(w) + if err == nil { + return i, nil + } + return -1, Other(err) +} + // ReadCompressed implements CompressedResource func (r *entryResource) ReadCompressed() ([]byte, *ResourceError) { i, err := r.entry.ReadCompressed() @@ -180,6 +189,15 @@ func (r *entryResource) ReadCompressed() ([]byte, *ResourceError) { return nil, Other(err) } +// ReadCompressedGzip implements CompressedResource +func (r *entryResource) ReadCompressedGzip() ([]byte, *ResourceError) { + i, err := r.entry.ReadCompressedGzip() + if err == nil { + return i, nil + } + return nil, Other(err) +} + // Length implements Resource func (r *entryResource) Length() (int64, *ResourceError) { return int64(r.entry.Length()), nil diff --git a/pkg/fetcher/resource.go b/pkg/fetcher/resource.go index 3aebc89..01f1894 100644 --- a/pkg/fetcher/resource.go +++ b/pkg/fetcher/resource.go @@ -394,6 +394,15 @@ func (r ProxyResource) StreamCompressed(w io.Writer) (int64, *ResourceError) { return cres.StreamCompressed(w) } +// StreamCompressedGzip implements CompressedResource +func (r ProxyResource) StreamCompressedGzip(w io.Writer) (int64, *ResourceError) { + cres, ok := r.Res.(CompressedResource) + if !ok { + return -1, Other(errors.New("resource is not compressed")) + } + return cres.StreamCompressedGzip(w) +} + // ReadCompressed implements CompressedResource func (r ProxyResource) ReadCompressed() ([]byte, *ResourceError) { cres, ok := r.Res.(CompressedResource) @@ -403,6 +412,15 @@ func (r ProxyResource) ReadCompressed() ([]byte, *ResourceError) { return cres.ReadCompressed() } +// ReadCompressedGzip implements CompressedResource +func (r ProxyResource) ReadCompressedGzip() ([]byte, *ResourceError) { + cres, ok := r.Res.(CompressedResource) + if !ok { + return nil, Other(errors.New("resource is not compressed")) + } + return cres.ReadCompressedGzip() +} + /** * Transforms the bytes of [resource] on-the-fly. * diff --git a/pkg/fetcher/traits.go b/pkg/fetcher/traits.go index 1985446..4796afc 100644 --- a/pkg/fetcher/traits.go +++ b/pkg/fetcher/traits.go @@ -10,5 +10,7 @@ type CompressedResource interface { CompressedAs(compressionMethod archive.CompressionMethod) bool CompressedLength() int64 StreamCompressed(w io.Writer) (int64, *ResourceError) + StreamCompressedGzip(w io.Writer) (int64, *ResourceError) ReadCompressed() ([]byte, *ResourceError) + ReadCompressedGzip() ([]byte, *ResourceError) } diff --git a/pkg/parser/epub/deobfuscator.go b/pkg/parser/epub/deobfuscator.go index b7b0c6d..1128f21 100644 --- a/pkg/parser/epub/deobfuscator.go +++ b/pkg/parser/epub/deobfuscator.go @@ -174,8 +174,18 @@ func (d DeobfuscatingResource) StreamCompressed(w io.Writer) (int64, *fetcher.Re return d.ProxyResource.StreamCompressed(w) } +// StreamCompressedGzip implements CompressedResource +func (d DeobfuscatingResource) StreamCompressedGzip(w io.Writer) (int64, *fetcher.ResourceError) { + _, v := d.obfuscation() + if v > 0 { + return 0, fetcher.Other(errors.New("cannot stream compressed resource when obfuscated")) + } + + return d.ProxyResource.StreamCompressedGzip(w) +} + // ReadCompressed implements CompressedResource -func (d DeobfuscatingResource) ReadCompressed(w io.Writer) ([]byte, *fetcher.ResourceError) { +func (d DeobfuscatingResource) ReadCompressed() ([]byte, *fetcher.ResourceError) { _, v := d.obfuscation() if v > 0 { return nil, fetcher.Other(errors.New("cannot read compressed resource when obfuscated")) @@ -184,6 +194,16 @@ func (d DeobfuscatingResource) ReadCompressed(w io.Writer) ([]byte, *fetcher.Res return d.ProxyResource.ReadCompressed() } +// ReadCompressedGzip implements CompressedResource +func (d DeobfuscatingResource) ReadCompressedGzip() ([]byte, *fetcher.ResourceError) { + _, v := d.obfuscation() + if v > 0 { + return nil, fetcher.Other(errors.New("cannot read compressed resource when obfuscated")) + } + + return d.ProxyResource.ReadCompressedGzip() +} + func (d DeobfuscatingResource) getHashKeyAdobe() []byte { hexbytes, _ := hex.DecodeString( strings.Replace(