From: John Ellis Date: Sun, 12 Jun 2005 23:45:45 +0000 (+0000) Subject: Sun Jun 12 19:25:26 2005 John Ellis X-Git-Tag: v1.0.0~1744 X-Git-Url: http://geeqie.org/cgi-bin/gitweb.cgi?p=geeqie.git;a=commitdiff_plain;h=1aa500fa5246d93127daa3e62ef5ba2d5c699e2b Sun Jun 12 19:25:26 2005 John Ellis * format_canon.[ch]: Reimplement canon raw parser to use convenience functions from exif.c, also separated parsers into one per file type. For the cr2 format also verify compression type in tiff field 0x0103. * format_raw.c: Add FIXME comment noting current shortcomings. --- diff --git a/ChangeLog b/ChangeLog index bb4072f7..8ca45b93 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +Sun Jun 12 19:25:26 2005 John Ellis + + * format_canon.[ch]: Reimplement canon raw parser to use convenience + functions from exif.c, also separated parsers into one per file type. + For the cr2 format also verify compression type in tiff field 0x0103. + * format_raw.c: Add FIXME comment noting current shortcomings. + Sat Jun 11 01:06:49 2005 John Ellis * exif.c, format_nikon.c, format_olympus.c: Fix memory leaks, diff --git a/TODO b/TODO index b3e996cf..45953a9c 100644 --- a/TODO +++ b/TODO @@ -29,9 +29,9 @@ Major: d> make a generic tiff header and directory parser from the nikon parser for use by all raw parsers that involve tiff. - > clean up canon parser (there are now many convenience utils to simplify tiff header, etc.): - > canon_read_int can be substituted with, or wrap exif_get_int16/32. - > CR2 tiff code can now use exif_tiff_directory_offset. + d> clean up canon parser (there are now many convenience utils to simplify tiff header, etc.): + d> canon_read_int can be substituted with, or wrap exif_get_int16/32. + d> CR2 tiff code can now use exif_tiff_directory_offset. d> support olympus MakerNote, investigate RAW (raw embedded jpeg appears to be tiny). > support konica / minolta MakerNote, investigate RAW. diff --git a/src/format_canon.c b/src/format_canon.c index 31abfc4f..bf48039b 100644 --- a/src/format_canon.c +++ b/src/format_canon.c @@ -40,445 +40,199 @@ *----------------------------------------------------------------------------- */ - -#if 0 - #define CANON_DEBUG -#endif - -#ifdef CANON_DEBUG -int canonEnableDebug = 0; -/* This should be really a stack, but I am too lazy to implement */ -#define DEBUG_ENABLE (canonEnableDebug = 0) -#define DEBUG_DISABLE (canonEnableDebug = 1) -/* It would be nice if these functions indented according to depth in the stack, but I am too lazy to implement */ - -#define DEBUG_ENTRY(a) (canonEnableDebug || fprintf(stderr, "Entering function: %s [%s:%d]\n", a, __FILE__, __LINE__)) -#define DEBUG_EXIT(a) (canonEnableDebug || fprintf(stderr, "Exiting function: %s [%s:%d]\n", a, __FILE__, __LINE__)) -#define DEBUG_1(a) (canonEnableDebug || fprintf(stderr, a " [%s:%d]\n", __FILE__, __LINE__)) -#define DEBUG_2(a,b) (canonEnableDebug || fprintf(stderr, a " [%s:%d]\n",b, __FILE__, __LINE__)) -#define DEBUG_3(a,b,c) (canonEnableDebug || fprintf(stderr, a " [%s:%d]\n",b, c, __FILE__, __LINE__)) - -#else -#define DEBUG_ENABLE -#define DEBUG_DISABLE -#define DEBUG_ENTRY(a) -#define DEBUG_EXIT(a) - -#define DEBUG_1(a) -#define DEBUG_2(a,b) -#define DEBUG_3(a,b,c) -#endif - - -/* canon_read_int4 - - -The problem with gqview is that sometimes the data is to be read from -a file, and sometimes it is in memory. This function tries to isolate -the rest of the code from having to deal with both cases - -This function reads a 4 byte unsigned integer, and fixes its endianism. - -If fd >= 0 then the value is read from the corresponding file descriptor - - in that case, if offset is > 0, then the value is read from that offset - - otherwise it is read from the current file pointer - -if fd < 0 then the value is read from the memory pointed by data + offset - - -offset is a pointer to the actual offset of the file. - -sizeInt can be 2 or 4 (it is the number of bytes to read) - -RETURNS true is no error, false if it can't read the value - - -*/ -static int canon_read_int(unsigned int *offset, const void *data, int sizeInt, unsigned int *value ) +static gint canon_cr2_tiff_entry(unsigned char *data, const guint len, guint offset, ExifByteOrder bo, + guint *image_offset, gint *jpeg_encoding) { - DEBUG_DISABLE; - - DEBUG_ENTRY("canon_read_int"); - /* Verify values before we do anything */ - if (sizeInt != 2 && sizeInt != 4) return FALSE; - if (offset == NULL) return FALSE; - if (*offset <= 0) return FALSE; - if (data == NULL) return FALSE; - if (value == NULL) return FALSE; - - if (sizeInt == 4) { - *value = GUINT32_FROM_LE(*(guint32*)(data + *offset)); - *offset +=4; - DEBUG_3("Read 4 bytes %d %x", *value, *value); - } else { - *value = GUINT16_FROM_LE(*(guint16*)(data + *offset)); - *offset +=2; - DEBUG_3("Read 2 bytes %d %x", *value, *value); - } - - DEBUG_EXIT("canon_read_int"); - - DEBUG_ENABLE; - return TRUE; -} - -#define CANON_HEADER_SIZE 26 + guint tag; + guint type; + guint count; + guint jpeg_start; + + /* the two (tiff compliant) tags we want are: + * 0x0103 image compression type (must be type 6 for jpeg) + * 0x0111 jpeg start offset + * only use the first segment that contains an actual jpeg - as there + * is a another that contains the raw data. + */ + tag = exif_byte_get_int16(data + offset + EXIF_TIFD_OFFSET_TAG, bo); + type = exif_byte_get_int16(data + offset + EXIF_TIFD_OFFSET_FORMAT, bo); + count = exif_byte_get_int32(data + offset + EXIF_TIFD_OFFSET_COUNT, bo); + + /* tag 0x0103 contains the compression type for this segment's image data */ + if (tag == 0x0103) + { + if (ExifFormatList[type].size * count == 2 && + exif_byte_get_int16(data + offset + EXIF_TIFD_OFFSET_DATA, bo) == 6) + { + *jpeg_encoding = TRUE; + } + return FALSE; + } -/* + /* find and verify jpeg offset */ + if (tag != 0x0111 || + !jpeg_encoding) return FALSE; - The CR2 format is really a TIFF format. It is nicely documented in the TIFF V 6.0 document available from adobe. + /* make sure data segment contains 4 bytes */ + if (ExifFormatList[type].size * count != 4) return FALSE; - The CR2 file contains two thumbnails, one tiny and one decent sized. The record Id of the latter is 0x0111. + jpeg_start = exif_byte_get_int32(data + offset + EXIF_TIFD_OFFSET_DATA, bo); - The photo info is also available, in EXIF, and it looks like I don't need to do anything! Yeah! + /* verify this is jpeg data */ + if (len < jpeg_start + 4 || + memcmp(data + jpeg_start, "\xff\xd8", 2) != 0) + { + return FALSE; + } -*/ + *image_offset = jpeg_start; + return TRUE; +} -static int canon_cr2_process_directory(void *data, int offsetIFD, guint *jpegLocation, guint *exifLocation) +static gint canon_cr2_tiff_table(unsigned char *data, const guint len, guint offset, ExifByteOrder bo, + guint *image_offset) { - unsigned int offset; - int returnValue = FALSE; - - DEBUG_ENTRY("canon_cr2_process_directory"); - - /* The directory is a link list, after an array of records, the next 4 byptes point to the offset of the next directory. - - All offsets are absolution within the file (in CRWs the offsets are relative ). - - */ + gint jpeg_encoding = FALSE; + guint count; + guint i; - while (offsetIFD != 0 && offsetIFD != 0xFFFF) { - int countEntries=0; - int i; - /* Read directory, we start by reading number of entries in the directory */ + if (len < offset + 2) return 0; - offset = offsetIFD; - if (!canon_read_int(&offset, data, 2, &countEntries)) { - goto return_only; - } - DEBUG_2("Number of entries: %d\n", countEntries); - - for (i=0;i len) - goto return_only; - - currentOffset = heapHeaderOffset; - /* Let us read the number of records in the heap */ - if (!canon_read_int(¤tOffset, data, 2, &heapRecordsCount)) - goto return_only; - - DEBUG_2("heap record count %d ", heapRecordsCount); - - if (heapRecordsCount != 3) { - /* In all the cameras I have seen, this is always 3 - if not, something is wrong, so just quit */ - goto return_only; - } - - for (i=0;i<3;i++) { - int recordType; - int recordOffset; - int recordLength; - const void *jpgInDataOffset; - /* Read each record, to find jpg, it should be second */ - - if (!canon_read_int(¤tOffset, data, 2, &recordType)) - goto return_only; - - DEBUG_2("record type 0x%x ", recordType); - - if (recordType != 0x2007) { - /* Go to the next record, don't waste time, - but first, eat 8 bytes from header */ - currentOffset += 8; - continue; /* Nah, wrong record, go to next */ - } - /* Bingo, we are at the JPEG record */ - - /* Read length */ - if (!canon_read_int(¤tOffset, data, 4, &recordLength)) - goto return_only; - - DEBUG_2("record length %d ", recordLength); - - /* Read offset */ - - if (!canon_read_int(¤tOffset, data, 4, &recordOffset)) - goto return_only; - - DEBUG_2("record offset 0x%d ", recordOffset); - - /* Great, we now know where the JPEG is! - it is CANON_HEADER_SIZE (size of CRW header) + recordOffset - */ - - *image_offset = CANON_HEADER_SIZE + recordOffset; - DEBUG_2("image offset %d ", *image_offset); - - /* keep checking for potential errors */ - if (*image_offset > len) { - goto return_only; - } - /* Get the JPEG is */ - - jpgInDataOffset = data + *image_offset; - - if (memcmp(jpgInDataOffset, "\xff\xd8\xff\xdb",4) != 0) { - /* It is not at the JPEG! */ - DEBUG_2("THis is not a jpeg after all: there are the first 4 bytes 0x%x ", (int)jpgInDataOffset); - goto return_only; - } - returnValue = TRUE; - goto return_only; - } - /* undo whatever we need in case of an error*/ - DEBUG_1("We scan all records, but nothing was found!!!!!!!!!!!!!!!!!!"); - - - /* At this point we are returning */ -return_only: - if (returnValue) { - DEBUG_1("****We got an embedded JPEG for a canon CRW"); - - } - - DEBUG_EXIT("format_raw_test_canon"); - return returnValue; - -#undef DEBUG_2 -#undef DEBUG -#undef DEBUG_ENTRY -#undef DEBUG_EXIT + /* walk the directory entries looking for type jpeg (tag 0x2007), + * for reference, other tags are 0x2005 for raw and 0x300a for photo info: + */ + for (i = 0; i < count ; i++) + { + guint entry_offset; + guint record_type; + guint record_offset; + guint record_length; + + entry_offset = offset + i * CRW_DIR_ENTRY_SIZE; + + /* entry is 10 bytes (in order): + * 2 for type + * 4 for length of data + * 4 for offset into data segment of this block + */ + record_type = exif_byte_get_int16(data + entry_offset, CRW_BYTE_ORDER); + record_length = exif_byte_get_int32(data + entry_offset + 2, CRW_BYTE_ORDER); + record_offset = exif_byte_get_int32(data + entry_offset + 6, CRW_BYTE_ORDER); + + /* tag we want for jpeg data */ + if (record_type == 0x2007) + { + guint jpeg_offset; + + jpeg_offset = block_offset + record_offset; + if (len < jpeg_offset + record_length || + record_length < 4 || + memcmp(data + jpeg_offset, "\xff\xd8\xff\xdb", 4) != 0) + { + return FALSE; + } + + /* we now know offset and verified jpeg */ + *image_offset = jpeg_offset; + return TRUE; + } + } + return FALSE; } + /* *----------------------------------------------------------------------------- * EXIF Makernote for Canon diff --git a/src/format_canon.h b/src/format_canon.h index d9da01ca..64f0e1f1 100644 --- a/src/format_canon.h +++ b/src/format_canon.h @@ -22,16 +22,18 @@ #include "exif.h" -gint format_canon_raw(unsigned char *data, const guint len, - guint *image_offset, guint *exif_offset); +gint format_canon_raw_crw(unsigned char *data, const guint len, + guint *image_offset, guint *exif_offset); +gint format_canon_raw_cr2(unsigned char *data, const guint len, + guint *image_offset, guint *exif_offset); #define FORMAT_RAW_CANON { "crw", \ FORMAT_RAW_MATCH_MAGIC, 6, "HEAPCCDR", 8, \ - "Canon crw", format_canon_raw }, \ + "Canon crw", format_canon_raw_crw }, \ { "cr2", \ FORMAT_RAW_MATCH_TIFF_MAKE, 0, "Canon", 5, \ - "Canon cr2", format_canon_raw } + "Canon cr2", format_canon_raw_cr2 } gint format_canon_makernote(ExifData *exif, unsigned char *tiff, guint offset, diff --git a/src/format_raw.c b/src/format_raw.c index db2977d9..b130e628 100644 --- a/src/format_raw.c +++ b/src/format_raw.c @@ -304,6 +304,10 @@ gint format_raw_img_exif_offsets_fd(int fd, const gchar *path, if (debug) printf("RAW file parser extension match\n"); } + /* FIXME: + * when the target is a tiff file it should be mmaped prior to format_raw_find as + * the make field data may not always be within header_data + header_len + */ entry = format_raw_find(header_data, header_len); if (!entry || !entry->func_parse) return FALSE;