hwp3.x: make character shape data(CSD) optional, removing special 13 CSD handling

This commit is contained in:
Kevin Lin 2016-01-11 14:11:08 -05:00
parent 8658dbdcac
commit 85d4c20e75

View file

@ -666,8 +666,13 @@ static inline int parsehwp3_paragraph(cli_ctx *ctx, fmap_t *map, int p, int leve
offset += (nlines * HWP3_LINEINFO_SIZE);
#endif
/* character shape data, -1 is from the inclusion of the termination character in char count */
/* character shape data - may not be present if no byte flag is detected */
/* NOTE: each character shape data represents at least one character including the terminator */
/* peek at next character to check if character shape data is present */
if (fmap_readn(map, &csb, offset, sizeof(csb)) != sizeof(csb))
return CL_EREAD;
if (csb == 0) {
for (i = 0, c = 0; i < nchars; i++) {
/* examine byte for cs data type */
if (fmap_readn(map, &csb, offset, sizeof(csb)) != sizeof(csb))
@ -697,13 +702,6 @@ static inline int parsehwp3_paragraph(cli_ctx *ctx, fmap_t *map, int p, int leve
break;
case 1: /* normal character - as representation of another character for previous cs block */
break;
case 13: /* end-of-paragraph marker - treated identically as character */
hwp3_debug("HWP3.x: Detected end-of-paragraph marker @ offset %llu\n", (long long unsigned)offset-1);
term = 1;
/* terminator is 2 bytes but 1 byte is already done */
offset += 1;
break;
default:
cli_errmsg("HWP3.x: Paragraph[%d, %d]: unknown CS type 0x%x @ offset %llu\n", level, p, csb,
(long long unsigned)offset);
@ -713,6 +711,11 @@ static inline int parsehwp3_paragraph(cli_ctx *ctx, fmap_t *map, int p, int leve
}
hwp3_debug("HWP3.x: Paragraph[%d, %d]: detected %d CS block(s) and %d characters\n", level, p, c, i);
} else {
hwp3_debug("HWP3.x: Paragraph[%d, %d]: no character shape data detected\n", level, p);
}
if (!term)
hwp3_debug("HWP3.x: Paragraph[%d, %d]: content starts @ offset %llu\n", level, p, (long long unsigned)offset);
/* scan for end-of-paragraph [0x0d00 on offset parity to current content] */