1
/* cairo - a vector graphics library with display and print output
2
 *
3
 * Copyright © 2006 Red Hat, Inc.
4
 *
5
 * This library is free software; you can redistribute it and/or
6
 * modify it either under the terms of the GNU Lesser General Public
7
 * License version 2.1 as published by the Free Software Foundation
8
 * (the "LGPL") or, at your option, under the terms of the Mozilla
9
 * Public License Version 1.1 (the "MPL"). If you do not alter this
10
 * notice, a recipient may use your version of this file under either
11
 * the MPL or the LGPL.
12
 *
13
 * You should have received a copy of the LGPL along with this library
14
 * in the file COPYING-LGPL-2.1; if not, write to the Free Software
15
 * Foundation, Inc., 51 Franklin Street, Suite 500, Boston, MA 02110-1335, USA
16
 * You should have received a copy of the MPL along with this library
17
 * in the file COPYING-MPL-1.1
18
 *
19
 * The contents of this file are subject to the Mozilla Public License
20
 * Version 1.1 (the "License"); you may not use this file except in
21
 * compliance with the License. You may obtain a copy of the License at
22
 * http://www.mozilla.org/MPL/
23
 *
24
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY
25
 * OF ANY KIND, either express or implied. See the LGPL or the MPL for
26
 * the specific language governing rights and limitations.
27
 *
28
 * The Original Code is the cairo graphics library.
29
 *
30
 * The Initial Developer of the Original Code is University of Southern
31
 * California.
32
 *
33
 * Contributor(s):
34
 *	Carl D. Worth <cworth@cworth.org>
35
 */
36

            
37
#include "cairoint.h"
38
#include "cairo-error-private.h"
39

            
40
typedef struct _lzw_buf {
41
    cairo_status_t status;
42

            
43
    unsigned char *data;
44
    int data_size;
45
    int num_data;
46
    uint32_t pending;
47
    unsigned int pending_bits;
48
} lzw_buf_t;
49

            
50
/* An lzw_buf_t is a simple, growable chunk of memory for holding
51
 * variable-size objects of up to 16 bits each.
52
 *
53
 * Initialize an lzw_buf_t to the given size in bytes.
54
 *
55
 * To store objects into the lzw_buf_t, call _lzw_buf_store_bits and
56
 * when finished, call _lzw_buf_store_pending, (which flushes out the
57
 * last few bits that hadn't yet made a complete byte yet).
58
 *
59
 * Instead of returning failure from any functions, lzw_buf_t provides
60
 * a status value that the caller can query, (and should query at
61
 * least once when done with the object). The status value will be
62
 * either %CAIRO_STATUS_SUCCESS or %CAIRO_STATUS_NO_MEMORY;
63
 */
64
static void
65
_lzw_buf_init (lzw_buf_t *buf, int size)
66
{
67
    if (size == 0)
68
	size = 16;
69

            
70
    buf->status = CAIRO_STATUS_SUCCESS;
71
    buf->data_size = size;
72
    buf->num_data = 0;
73
    buf->pending = 0;
74
    buf->pending_bits = 0;
75

            
76
    buf->data = _cairo_malloc (size);
77
    if (unlikely (buf->data == NULL)) {
78
	buf->data_size = 0;
79
	buf->status = _cairo_error (CAIRO_STATUS_NO_MEMORY);
80
	return;
81
    }
82
}
83

            
84
/* Increase the buffer size by doubling.
85
 *
86
 * Returns %CAIRO_STATUS_SUCCESS or %CAIRO_STATUS_NO_MEMORY
87
 */
88
static cairo_status_t
89
_lzw_buf_grow (lzw_buf_t *buf)
90
{
91
    int new_size = buf->data_size * 2;
92
    unsigned char *new_data;
93

            
94
    if (buf->status)
95
	return buf->status;
96

            
97
    new_data = NULL;
98
    /* check for integer overflow */
99
    if (new_size / 2 == buf->data_size)
100
	new_data = realloc (buf->data, new_size);
101

            
102
    if (unlikely (new_data == NULL)) {
103
	free (buf->data);
104
	buf->data_size = 0;
105
	buf->status = _cairo_error (CAIRO_STATUS_NO_MEMORY);
106
	return buf->status;
107
    }
108

            
109
    buf->data = new_data;
110
    buf->data_size = new_size;
111

            
112
    return CAIRO_STATUS_SUCCESS;
113
}
114

            
115
/* Store the lowest num_bits bits of values into buf.
116
 *
117
 * Note: The bits of value above size_in_bits must be 0, (so don't lie
118
 * about the size).
119
 *
120
 * See also _lzw_buf_store_pending which must be called after the last
121
 * call to _lzw_buf_store_bits.
122
 *
123
 * Sets buf->status to either %CAIRO_STATUS_SUCCESS or %CAIRO_STATUS_NO_MEMORY.
124
 */
125
static void
126
_lzw_buf_store_bits (lzw_buf_t *buf, uint16_t value, int num_bits)
127
{
128
    cairo_status_t status;
129

            
130
    assert (value <= (1 << num_bits) - 1);
131

            
132
    if (buf->status)
133
	return;
134

            
135
    buf->pending = (buf->pending << num_bits) | value;
136
    buf->pending_bits += num_bits;
137

            
138
    while (buf->pending_bits >= 8) {
139
	if (buf->num_data >= buf->data_size) {
140
	    status = _lzw_buf_grow (buf);
141
	    if (unlikely (status))
142
		return;
143
	}
144
	buf->data[buf->num_data++] = buf->pending >> (buf->pending_bits - 8);
145
	buf->pending_bits -= 8;
146
    }
147
}
148

            
149
/* Store the last remaining pending bits into the buffer.
150
 *
151
 * Note: This function must be called after the last call to
152
 * _lzw_buf_store_bits.
153
 *
154
 * Sets buf->status to either %CAIRO_STATUS_SUCCESS or %CAIRO_STATUS_NO_MEMORY.
155
 */
156
static void
157
_lzw_buf_store_pending  (lzw_buf_t *buf)
158
{
159
    cairo_status_t status;
160

            
161
    if (buf->status)
162
	return;
163

            
164
    if (buf->pending_bits == 0)
165
	return;
166

            
167
    assert (buf->pending_bits < 8);
168

            
169
    if (buf->num_data >= buf->data_size) {
170
	status = _lzw_buf_grow (buf);
171
	if (unlikely (status))
172
	    return;
173
    }
174

            
175
    buf->data[buf->num_data++] = buf->pending << (8 - buf->pending_bits);
176
    buf->pending_bits = 0;
177
}
178

            
179
/* LZW defines a few magic code values */
180
#define LZW_CODE_CLEAR_TABLE	256
181
#define LZW_CODE_EOD		257
182
#define LZW_CODE_FIRST		258
183

            
184
/* We pack three separate values into a symbol as follows:
185
 *
186
 * 12 bits (31 down to 20):	CODE: code value used to represent this symbol
187
 * 12 bits (19 down to  8):	PREV: previous code value in chain
188
 *  8 bits ( 7 down to  0):	NEXT: next byte value in chain
189
 */
190
typedef uint32_t lzw_symbol_t;
191

            
192
#define LZW_SYMBOL_SET(sym, prev, next)			((sym) = ((prev) << 8)|(next))
193
#define LZW_SYMBOL_SET_CODE(sym, code, prev, next)	((sym) = ((code << 20)|(prev) << 8)|(next))
194
#define LZW_SYMBOL_GET_CODE(sym)			(((sym) >> 20))
195
#define LZW_SYMBOL_GET_PREV(sym)			(((sym) >>  8) & 0x7ff)
196
#define LZW_SYMBOL_GET_BYTE(sym)			(((sym) >>  0) & 0x0ff)
197

            
198
/* The PREV+NEXT fields can be seen as the key used to fetch values
199
 * from the hash table, while the code is the value fetched.
200
 */
201
#define LZW_SYMBOL_KEY_MASK	0x000fffff
202

            
203
/* Since code values are only stored starting with 258 we can safely
204
 * use a zero value to represent free slots in the hash table. */
205
#define LZW_SYMBOL_FREE		0x00000000
206

            
207
/* These really aren't very free for modifying. First, the PostScript
208
 * specification sets the 9-12 bit range. Second, the encoding of
209
 * lzw_symbol_t above also relies on 2 of LZW_BITS_MAX plus one byte
210
 * fitting within 32 bits.
211
 *
212
 * But other than that, the LZW compression scheme could function with
213
 * more bits per code.
214
 */
215
#define LZW_BITS_MIN		9
216
#define LZW_BITS_MAX		12
217
#define LZW_BITS_BOUNDARY(bits)	((1<<(bits))-1)
218
#define LZW_MAX_SYMBOLS		(1<<LZW_BITS_MAX)
219

            
220
#define LZW_SYMBOL_TABLE_SIZE	9013
221
#define LZW_SYMBOL_MOD1		LZW_SYMBOL_TABLE_SIZE
222
#define LZW_SYMBOL_MOD2		9011
223

            
224
typedef struct _lzw_symbol_table {
225
    lzw_symbol_t table[LZW_SYMBOL_TABLE_SIZE];
226
} lzw_symbol_table_t;
227

            
228
/* Initialize the hash table to entirely empty */
229
static void
230
_lzw_symbol_table_init (lzw_symbol_table_t *table)
231
{
232
    memset (table->table, 0, LZW_SYMBOL_TABLE_SIZE * sizeof (lzw_symbol_t));
233
}
234

            
235
/* Lookup a symbol in the symbol table. The PREV and NEXT fields of
236
 * symbol form the key for the lookup.
237
 *
238
 * If successful, then this function returns %TRUE and slot_ret will be
239
 * left pointing at the result that will have the CODE field of
240
 * interest.
241
 *
242
 * If the lookup fails, then this function returns %FALSE and slot_ret
243
 * will be pointing at the location in the table to which a new CODE
244
 * value should be stored along with PREV and NEXT.
245
 */
246
static cairo_bool_t
247
_lzw_symbol_table_lookup (lzw_symbol_table_t	 *table,
248
			  lzw_symbol_t		  symbol,
249
			  lzw_symbol_t		**slot_ret)
250
{
251
    /* The algorithm here is identical to that in cairo-hash.c. We
252
     * copy it here to allow for a rather more efficient
253
     * implementation due to several circumstances that do not apply
254
     * to the more general case:
255
     *
256
     * 1) We have a known bound on the total number of symbols, so we
257
     *    have a fixed-size table without any copying when growing
258
     *
259
     * 2) We never delete any entries, so we don't need to
260
     *    support/check for DEAD entries during lookup.
261
     *
262
     * 3) The object fits in 32 bits so we store each object in its
263
     *    entirety within the table rather than storing objects
264
     *    externally and putting pointers in the table, (which here
265
     *    would just double the storage requirements and have negative
266
     *    impacts on memory locality).
267
     */
268
    int i, idx, step, hash = symbol & LZW_SYMBOL_KEY_MASK;
269
    lzw_symbol_t candidate;
270

            
271
    idx = hash % LZW_SYMBOL_MOD1;
272
    step = 0;
273

            
274
    *slot_ret = NULL;
275
    for (i = 0; i < LZW_SYMBOL_TABLE_SIZE; i++)
276
    {
277
	candidate = table->table[idx];
278
	if (candidate == LZW_SYMBOL_FREE)
279
	{
280
	    *slot_ret = &table->table[idx];
281
	    return FALSE;
282
	}
283
	else /* candidate is LIVE */
284
	{
285
	    if ((candidate & LZW_SYMBOL_KEY_MASK) ==
286
		(symbol & LZW_SYMBOL_KEY_MASK))
287
	    {
288
		*slot_ret = &table->table[idx];
289
		return TRUE;
290
	    }
291
	}
292

            
293
	if (step == 0) {
294
	    step = hash % LZW_SYMBOL_MOD2;
295
	    if (step == 0)
296
		step = 1;
297
	}
298

            
299
	idx += step;
300
	if (idx >= LZW_SYMBOL_TABLE_SIZE)
301
	    idx -= LZW_SYMBOL_TABLE_SIZE;
302
    }
303

            
304
    return FALSE;
305
}
306

            
307
/* Compress a bytestream using the LZW algorithm.
308
 *
309
 * This is an original implementation based on reading the
310
 * specification of the LZWDecode filter in the PostScript Language
311
 * Reference. The free parameters in the LZW algorithm are set to the
312
 * values mandated by PostScript, (symbols encoded with widths from 9
313
 * to 12 bits).
314
 *
315
 * This function returns a pointer to a newly allocated buffer holding
316
 * the compressed data, or %NULL if an out-of-memory situation
317
 * occurs.
318
 *
319
 * Notice that any one of the _lzw_buf functions called here could
320
 * trigger an out-of-memory condition. But lzw_buf_t uses cairo's
321
 * shutdown-on-error idiom, so it's safe to continue to call into
322
 * lzw_buf without having to check for errors, (until a final check at
323
 * the end).
324
 */
325
unsigned char *
326
_cairo_lzw_compress (unsigned char *data, unsigned long *size_in_out)
327
{
328
    int bytes_remaining = *size_in_out;
329
    lzw_buf_t buf;
330
    lzw_symbol_table_t table;
331
    lzw_symbol_t symbol, *slot = NULL; /* just to squelch a warning */
332
    int code_next = LZW_CODE_FIRST;
333
    int code_bits = LZW_BITS_MIN;
334
    int prev, next = 0; /* just to squelch a warning */
335

            
336
    if (*size_in_out == 0)
337
	return NULL;
338

            
339
    _lzw_buf_init (&buf, *size_in_out);
340

            
341
    _lzw_symbol_table_init (&table);
342

            
343
    /* The LZW header is a clear table code. */
344
    _lzw_buf_store_bits (&buf, LZW_CODE_CLEAR_TABLE, code_bits);
345

            
346
    while (1) {
347

            
348
	/* Find the longest existing code in the symbol table that
349
	 * matches the current input, if any. */
350
	prev = *data++;
351
	bytes_remaining--;
352
	if (bytes_remaining) {
353
	    do
354
	    {
355
		next = *data++;
356
		bytes_remaining--;
357
		LZW_SYMBOL_SET (symbol, prev, next);
358
		if (_lzw_symbol_table_lookup (&table, symbol, &slot))
359
		    prev = LZW_SYMBOL_GET_CODE (*slot);
360
	    } while (bytes_remaining && *slot != LZW_SYMBOL_FREE);
361
	    if (*slot == LZW_SYMBOL_FREE) {
362
		data--;
363
		bytes_remaining++;
364
	    }
365
	}
366

            
367
	/* Write the code into the output. This is either a byte read
368
	 * directly from the input, or a code from the last successful
369
	 * lookup. */
370
	_lzw_buf_store_bits (&buf, prev, code_bits);
371

            
372
	if (likely (slot != NULL))
373
	    LZW_SYMBOL_SET_CODE (*slot, code_next, prev, next);
374

            
375
	code_next++;
376

            
377
	if (code_next > LZW_BITS_BOUNDARY(code_bits))
378
	{
379
	    code_bits++;
380
	    if (code_bits > LZW_BITS_MAX) {
381
		_lzw_symbol_table_init (&table);
382
		_lzw_buf_store_bits (&buf, LZW_CODE_CLEAR_TABLE, code_bits - 1);
383
		code_bits = LZW_BITS_MIN;
384
		code_next = LZW_CODE_FIRST;
385
	    }
386
	}
387

            
388
	if (bytes_remaining == 0)
389
	    break;
390
    }
391

            
392
    /* The LZW footer is an end-of-data code. */
393
    _lzw_buf_store_bits (&buf, LZW_CODE_EOD, code_bits);
394

            
395
    _lzw_buf_store_pending (&buf);
396

            
397
    /* See if we ever ran out of memory while writing to buf. */
398
    if (buf.status == CAIRO_STATUS_NO_MEMORY) {
399
	*size_in_out = 0;
400
	return NULL;
401
    }
402

            
403
    assert (buf.status == CAIRO_STATUS_SUCCESS);
404

            
405
    *size_in_out = buf.num_data;
406
    return buf.data;
407
}