Skip to content

Commit

Permalink
First version of JPEG and EXIF parser with starter unit tests and upd…
Browse files Browse the repository at this point in the history
…ated docs.
  • Loading branch information
codedread committed Jan 15, 2024
1 parent 24edc8c commit 35e8ca9
Show file tree
Hide file tree
Showing 7 changed files with 910 additions and 7 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ All notable changes to this project will be documented in this file.

### Added

- Added a GIF parser to bitjs.image.
- Added GIF and JPEG parsers to bitjs.image.
- Added a skip() method to ByteStream.

### Changed
Expand Down
24 changes: 19 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Includes:
* bitjs/codecs: Get the codec info of media containers in a ISO RFC6381
MIME type string
* bitjs/file: Detect the type of file from its binary signature.
* bitjs/image: Parsing GIF. Conversion of WebP to PNG or JPEG.
* bitjs/image: Parsing GIF, JPEG. Conversion of WebP to PNG or JPEG.
* bitjs/io: Low-level classes for interpreting binary data (BitStream
ByteStream). For example, reading or peeking at N bits at a time.

Expand Down Expand Up @@ -108,17 +108,17 @@ const mimeType = findMimeType(someArrayBuffer);
### bitjs.image

This package includes code for dealing with binary images. It includes general event-based parsers
for images (GIF only, at the moment). It also includes a module for converting WebP images into
alternative raster graphics formats (PNG/JPG). This latter module is deprecated, now that WebP
for images (GIF and JPEG only, at the moment). It also includes a module for converting WebP images
into alternative raster graphics formats (PNG/JPG). This latter module is deprecated, now that WebP
images are well-supported in all browsers.

#### GIF Parser
```javascript
import { GifParser } from './bitjs/image/parsers/gif.js'

const parser = new GifParser(someArrayBuffer);
parser.addEventListener('application_extension', evt => {
const appId = evt.applicationExtension.applicationIdentifier
parser.onApplicationExtension(evt => {
const appId = evt.applicationExtension.applicationIdentifier;
const appAuthCode = new TextDecoder().decode(
evt.applicationExtension.applicationAuthenticationCode);
if (appId === 'XMP Data' && appAuthCode === 'XMP') {
Expand All @@ -130,6 +130,20 @@ parser.addEventListener('application_extension', evt => {
parser.start();
```

#### JPEG Parser
```javascript
import { JpegParser } from './bitjs/image/parsers/jpeg.js'
import { ExifTagNumber } from './bitjs/image/parsers/exif.js';

const parser = new JpegParser(someArrayBuffer);
let exif;
const parser = new JpegParser(ab);
parser.onApp1Exif(evt => {
console.log(evt.exifValueMap.get(ExifTagNumber.IMAGE_DESCRIPTION).stringValue);
});
await parser.start();
```

#### WebP Converter
```javascript
import { convertWebPtoPNG, convertWebPtoJPG } from './bitjs/image/webp-shim/webp-shim.js';
Expand Down
2 changes: 1 addition & 1 deletion image/parsers/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
General-purpose, event-based parsers for digital images.

Currently only supports GIF.
Currently only supports GIF and JPEG.

Some nice implementations for HEIF, JPEG, PNG, TIFF here:
https://github.com/MikeKovarik/exifr/tree/master/src/file-parsers
234 changes: 234 additions & 0 deletions image/parsers/exif.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
import { ByteStream } from '../../io/bytestream.js';

/** @enum {number} */
export const ExifTagNumber = {
// Tags used by IFD0.
IMAGE_DESCRIPTION: 0x010e,
MAKE: 0x010f,
MODEL: 0x0110,
ORIENTATION: 0x0112,
X_RESOLUTION: 0x011a,
Y_RESOLUTION: 0x011b,
RESOLUTION_UNIT: 0x0128,
SOFTWARE: 0x0131,
DATE_TIME: 0x0132,
WHITE_POINT: 0x013e,
PRIMARY_CHROMATICITIES: 0x013f,
Y_CB_CR_COEFFICIENTS: 0x0211,
Y_CB_CR_POSITIONING: 0x0213,
REFERENCE_BLACK_WHITE: 0x0214,
COPYRIGHT: 0x8298,
EXIF_OFFSET: 0x8769,

// Tags used by Exif SubIFD.
EXPOSURE_TIME: 0x829a,
F_NUMBER: 0x829d,
EXPOSURE_PROGRAM: 0x8822,
ISO_SPEED_RATINGS: 0x8827,
EXIF_VERSION: 0x9000,
DATE_TIME_ORIGINAL: 0x9003,
DATE_TIME_DIGITIZED: 0x9004,
COMPONENT_CONFIGURATION: 0x9101,
COMPRESSED_BITS_PER_PIXEL: 0x9102,
SHUTTER_SPEED_VALUE: 0x9201,
APERTURE_VALUE: 0x9202,
BRIGHTNESS_VALUE: 0x9203,
EXPOSURE_BIAS_VALUE: 0x9204,
MAX_APERTURE_VALUE: 0x9205,
SUBJECT_DISTANCE: 0x9206,
METERING_MODE: 0x9207,
LIGHT_SOURCE: 0x9208,
FLASH: 0x9209,
FOCAL_LENGTH: 0x920a,
MAKER_NOTE: 0x927c,
USER_COMMENT: 0x9286,
FLASH_PIX_VERSION: 0xa000,
COLOR_SPACE: 0xa001,
EXIF_IMAGE_WIDTH: 0xa002,
EXIF_IMAGE_HEIGHT: 0xa003,
RELATED_SOUND_FILE: 0xa004,
EXIF_INTEROPERABILITY_OFFSET: 0xa005,
FOCAL_PLANE_X_RESOLUTION: 0xa20e,
FOCAL_PLANE_Y_RESOLUTION: 0x20f,
FOCAL_PLANE_RESOLUTION_UNIT: 0xa210,
SENSING_METHOD: 0xa217,
FILE_SOURCE: 0xa300,
SCENE_TYPE: 0xa301,

// Tags used by IFD1.
IMAGE_WIDTH: 0x0100,
IMAGE_LENGTH: 0x0101,
BITS_PER_SAMPLE: 0x0102,
COMPRESSION: 0x0103,
PHOTOMETRIC_INTERPRETATION: 0x0106,
STRIP_OFFSETS: 0x0111,
SAMPLES_PER_PIXEL: 0x0115,
ROWS_PER_STRIP: 0x0116,
STRIP_BYTE_COUNTS: 0x0117,
// X_RESOLUTION, Y_RESOLUTION
PLANAR_CONFIGURATION: 0x011c,
// RESOLUTION_UNIT
JPEG_IF_OFFSET: 0x0201,
JPEG_IF_BYTE_COUNT: 0x0202,
// Y_CB_CR_COEFFICIENTS
Y_CB_CR_SUB_SAMPLING: 0x0212,
// Y_CB_CR_POSITIONING, REFERENCE_BLACK_WHITE
};

/** @enum {number} */
export const ExifDataFormat = {
UNSIGNED_BYTE: 1,
ASCII_STRING: 2,
UNSIGNED_SHORT: 3,
UNSIGNED_LONG: 4,
UNSIGNED_RATIONAL: 5,
SIGNED_BYTE: 6,
UNDEFINED: 7,
SIGNED_SHORT: 8,
SIGNED_LONG: 9,
SIGNED_RATIONAL: 10,
SINGLE_FLOAT: 11,
DOUBLE_FLOAT: 12,
};

/**
* @typedef ExifValue
* @property {ExifTagNumber} tagNumber The numerical value of the tag.
* @property {string=} tagName A string representing the tag number.
* @property {ExifDataFormat} dataFormat The data format.
* @property {number=} numericalValue Populated for SIGNED/UNSIGNED BYTE/SHORT/LONG/FLOAT.
* @property {string=} stringValue Populated only for ASCII_STRING.
* @property {number=} numeratorValue Populated only for SIGNED/UNSIGNED RATIONAL.
* @property {number=} denominatorValue Populated only for SIGNED/UNSIGNED RATIONAL.
* @property {number=} numComponents Populated only for UNDEFINED data format.
* @property {number=} offsetValue Populated only for UNDEFINED data format.
*/

/**
* @param {number} tagNumber
* @param {string} type
* @param {number} len
* @param {number} dataVal
*/
function warnBadLength(tagNumber, type, len, dataVal) {
const hexTag = tagNumber.toString(16);
console.warn(`Tag 0x${hexTag} is ${type} with len=${len} and data=${dataVal}`);
}

/**
* @param {ByteStream} stream
* @param {ByteStream} lookAheadStream
* @param {boolean} debug
* @returns {ExifValue}
*/
export function getExifValue(stream, lookAheadStream, DEBUG = false) {
const tagNumber = stream.readNumber(2);
let tagName = findNameWithValue(ExifTagNumber, tagNumber);
if (!tagName) {
tagName = `UNKNOWN (0x${tagNumber.toString(16)})`;
}

let dataFormat = stream.readNumber(2);

// Handle bad types for special tags.
if (tagNumber === ExifTagNumber.EXIF_OFFSET) {
dataFormat = ExifDataFormat.UNSIGNED_LONG;
}

const dataFormatName = findNameWithValue(ExifDataFormat, dataFormat);
if (!dataFormatName) throw `Invalid data format: ${dataFormat}`;

/** @type {ExifValue} */
const exifValue = {
tagNumber,
tagName,
dataFormat,
};

let len = stream.readNumber(4);
switch (dataFormat) {
case ExifDataFormat.UNSIGNED_BYTE:
if (len !== 1 && DEBUG) {
warnBadLength(tagNumber, dataFormatName, len, stream.peekNumber(4));
}
exifValue.numericalValue = stream.readNumber(1);
stream.skip(3);
break;
case ExifDataFormat.ASCII_STRING:
if (len <= 4) {
exifValue.stringValue = stream.readString(4);
} else {
const strOffset = stream.readNumber(4);
exifValue.stringValue = lookAheadStream.tee().skip(strOffset).readString(len - 1);
}
break;
case ExifDataFormat.UNSIGNED_SHORT:
if (len !== 1 && DEBUG) {
warnBadLength(tagNumber, dataFormatName, len, stream.peekNumber(4));
}
exifValue.numericalValue = stream.readNumber(2);
stream.skip(2);
break;
case ExifDataFormat.UNSIGNED_LONG:
if (len !== 1 && DEBUG) {
warnBadLength(tagNumber, dataFormatName, len, stream.peekNumber(4));
}
exifValue.numericalValue = stream.readNumber(4);
break;
case ExifDataFormat.UNSIGNED_RATIONAL:
if (len !== 1 && DEBUG) {
warnBadLength(tagNumber, dataFormatName, len, stream.peekNumber(4));
}

const uratStream = lookAheadStream.tee().skip(stream.readNumber(4));
exifValue.numeratorValue = uratStream.readNumber(4);
exifValue.denominatorValue = uratStream.readNumber(4);
break;
case ExifDataFormat.SIGNED_BYTE:
if (len !== 1 && DEBUG) {
warnBadLength(tagNumber, dataFormatName, len, stream.peekSignedNumber(4));
}
exifValue.numericalValue = stream.readSignedNumber(1);
stream.skip(3);
break;
case ExifDataFormat.UNDEFINED:
exifValue.numComponents = len;
exifValue.offsetValue = stream.readNumber(4);
break;
case ExifDataFormat.SIGNED_SHORT:
if (len !== 1 && DEBUG) {
warnBadLength(tagNumber, dataFormatName, len, stream.peekSignedNumber(4));
}
exifValue.numericalValue = stream.readSignedNumber(2);
stream.skip(2);
break;
case ExifDataFormat.SIGNED_LONG:
if (len !== 1) {
warnBadLength(tagNumber, dataFormatName, len, stream.peekSignedNumber(4));
}
exifValue.numericalValue = stream.readSignedNumber(4);
break;
case ExifDataFormat.SIGNED_RATIONAL:
if (len !== 1 && DEBUG) {
warnBadLength(tagNumber, dataFormatName, len, stream.peekNumber(4));
}

const ratStream = lookAheadStream.tee().skip(stream.readNumber(4));
exifValue.numeratorValue = ratStream.readSignedNumber(4);
exifValue.denominatorValue = ratStream.readSignedNumber(4);
break;
default:
throw `Bad data format: ${dataFormat}`;
}
return exifValue;
}

/**
* @param {Object} obj A numeric enum.
* @param {number} valToFind The value to find.
* @returns {string|null}
*/
function findNameWithValue(obj, valToFind) {
const entry = Object.entries(obj).find(([k,v]) => v === valToFind);
return entry ? entry[0] : null;
}
Loading

0 comments on commit 35e8ca9

Please sign in to comment.