Initial import of the CDE 2.1.30 sources from the Open Group.

This commit is contained in:
Peter Howkins
2012-03-10 18:21:40 +00:00
commit 83b6996daa
18978 changed files with 3945623 additions and 0 deletions

View File

@@ -0,0 +1,56 @@
XCOMM $XConsortium: Imakefile /main/12 1996/08/21 15:51:36 drk $
XCOMM ** WARNING **
XCOMM
XCOMM The files named here may appear in many different Imakefiles.
XCOMM If you add or remove a file, be sure to update all locations.
XCOMM It's unfortunate, but all this redundancy serves a purpose.
XCOMM
XCOMM Other possible locations are:
XCOMM .../lib/DtMmdb/Imakefile
XCOMM .../lib/DtMmdb/<subdir>/Imakefile
XCOMM .../programs/dtinfo/mmdb/Imakefile
XCOMM .../programs/dtinfo/mmdb/<subdir>/Imakefile
#define DoNormalLib NormalLibDtMmdb
#define DoSharedLib SharedLibDtMmdb
#define DoDebugLib DebugLibDtMmdb
#define DoProfileLib ProfileLibDtMmdb
#define LibName DtMmdb
#define SoRev SODTMMDBREV
#define LibHeaders NO
#define LibCreate NO
#define LargePICTable YES
#define CplusplusSource YES
DEPEND_DEFINES = $(CXXDEPENDINCLUDES)
XCOMM In DtMmdb we compile as C_API sources.
DEFINES = -DC_API
INCLUDES = -I.. $(EXCEPTIONS_INCLUDES)
NORMAL_SRCS = abs_agent.C zip.C huffman.C trie.C code.C lzss.C
PARSER_SRCS = sgml.C ps.C
#ifdef RegenParserFiles
XCOMM Hide the lex sources so the normal case is simple.
YACCSTACK = -DYYSTACKSIZE=20000
YFLAGS = -l -d
LFLAGS = -8 -s
LinkFile(sgml.l,sgml.lex)
LexTarget(sgml,sgml)
LinkFile(ps.l,ps.lex)
LexTarget(ps,ps)
#endif
SRCS = $(NORMAL_SRCS) $(PARSER_SRCS)
OBJS = $(NORMAL_SRCS:.C=.o) $(PARSER_SRCS:.C=.o)
#include <Library.tmpl>
SubdirLibraryRule($(OBJS))
DependTarget()

View File

@@ -0,0 +1,39 @@
/*
* $XConsortium: abs_agent.cc /main/4 1996/06/11 17:14:47 cde-hal $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
#include "compression/abs_agent.h"
getchar_func_t fill_buf_func = 0;
lex_action_func_t lex_action_func = 0;
compress_agent::compress_agent(c_code_t c_id) : pstring(c_id)
{
set_mode(SWAP_ALLOWED, false);
}
HANDLER_BODIES(compress_agent)

View File

@@ -0,0 +1,59 @@
/*
* $XConsortium: abs_agent.h /main/4 1996/06/11 17:14:52 cde-hal $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
#ifndef _compress_agent_h
#define _compress_agent_h 1
#include "utility/buffer.h"
#include "object/pstring.h"
typedef int (*getchar_func_t)(unsigned char* buf, int max_sz);
extern getchar_func_t fill_buf_func;
typedef void (*lex_action_func_t)(unsigned char*, int, int action_num);
extern lex_action_func_t lex_action_func;
typedef int (*lex_func_t)();
class compress_agent : public pstring
{
public:
compress_agent(c_code_t c_id) ;
virtual ~compress_agent() {};
virtual void compress(const buffer& uncompressed, buffer& compressed) = 0;
virtual void decompress(buffer& decompressed, buffer& uncompressed) = 0;
virtual io_status build_dict(lex_func_t f_lex, getchar_func_t f_getchar) = 0;
};
HANDLER_SIGNATURES(compress_agent)
#endif

View File

@@ -0,0 +1,48 @@
/*
* $XConsortium: code.cc /main/3 1996/06/11 17:14:57 cde-hal $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
#include "compression/code.h"
encoding_unit::encoding_unit(ostring* w, unsigned int f) :
word(w), freq(f), code(0)
{
}
encoding_unit::~encoding_unit()
{
delete word;
}
ostream& operator<<(ostream& out, encoding_unit& eu)
{
debug(out, *eu.word);
debug(out, eu.freq);
return out;
}

View File

@@ -0,0 +1,58 @@
/*
* $XConsortium: code.h /main/3 1996/06/11 17:15:01 cde-hal $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
#ifndef _code_h
#define _code_h 1
#include "utility/ostring.h"
////////////////////////////////////////
//
////////////////////////////////////////
class htr_node;
class encoding_unit
{
public:
ostring* word;
int bits;
unsigned int code;
unsigned int freq;
htr_node* leaf_htr_node;
public:
encoding_unit(ostring* w, unsigned int freq);
~encoding_unit();
friend ostream& operator <<(ostream&, encoding_unit&);
};
typedef encoding_unit* encoding_unitPtr;
#endif

View File

@@ -0,0 +1,506 @@
/*
* $XConsortium: huffman.cc /main/3 1996/06/11 17:15:06 cde-hal $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
#include "compression/huffman.h"
#include "dstr/heap.h"
////////////////////////////////////////
//
////////////////////////////////////////
htr_node::htr_node(encoding_unit* e, htr_node* lt, htr_node* rt):
left(lt), right(rt), eu(e), freq(e->freq), parent(0)
{
}
htr_node::htr_node(unsigned long f, htr_node* lt, htr_node* rt):
left(lt), right(rt), eu(0), freq(f), parent(0)
{
}
htr_node::~htr_node()
{
delete left;
delete right;
}
////////////////////////////////////////
//
////////////////////////////////////////
Boolean htr_eq(const void* n1, const void* n2)
{
if ( ((htr_node*)n1) -> freq == ((htr_node*)n2) -> freq )
return true;
else
return false;
}
Boolean htr_ls(const void* n1, const void* n2)
{
if ( ((htr_node*)n1) -> freq > ((htr_node*)n2) -> freq )
return true;
else
return false;
}
////////////////////////////////////////
//
////////////////////////////////////////
huff::huff(): compress_agent(HUFFMAN_AGENT_CODE),
e_units(0), cts(0), tri(new trie(26)), htr_root(0)
{
}
huff::~huff()
{
delete tri;
delete htr_root;
}
void huff::build_tree()
{
heap htr_node_set(htr_eq, htr_ls, cts);
htr_node* x ;
for (int i=0; i<cts; i++ ) {
if ( e_units[i] ) {
x = new htr_node(e_units[i]);
e_units[i] -> leaf_htr_node = x;
htr_node_set.insert(x);
}
}
htr_node_set.heapify();
htr_node *n1, *n2, *n3;
while ( htr_node_set.count() > 1 ) {
// max is the smallest element. see htr_ls()
n1 = (htr_node*)htr_node_set.max_elm() ;
htr_node_set.delete_max() ;
// max is the smallest element. see htr_ls()
n2 = (htr_node*)htr_node_set.max_elm() ;
htr_node_set.delete_max() ;
n3 = new htr_node(n1->freq+n2->freq, n1, n2);
n1 -> parent = n2 -> parent = n3;
htr_node_set.insert_heapify(n3);
}
htr_root = (htr_node*)htr_node_set.max_elm();
htr_node_set.delete_max() ;
}
void huff::calculate_code()
{
htr_node* x ;
htr_node* parent;
for (int i=0; i<cts; i++ ) {
if ( e_units[i] == 0 )
continue;
e_units[i] -> code = 0;
e_units[i] -> bits = 0;
x = e_units[i] -> leaf_htr_node;
while ( x ) {
parent = x -> parent;
if ( parent == 0 )
break;
e_units[i] -> code >>= 1;
if ( parent -> left == x ) {
e_units[i] -> code |= 0x80000000;
} else
if ( parent -> right != x ) {
debug(cerr, i);
throw(stringException("huffman tree corrupted"));
}
x = parent;
e_units[i] -> bits++;
if ( e_units[i] -> bits > BITS_IN(unsigned long) ) {
debug(cerr, e_units[i] -> bits);
throw(stringException("huffman tree too deep"));
}
}
e_units[i] -> code >>= ( 32 - e_units[i] -> bits );
//debug(cerr, hex(e_units[i] -> code));
}
}
ostream& huff::print_alphabet(ostream& out)
{
unsigned long total_uncmp = 0;
unsigned long int total_cmp = 0;
for (int i=0; i<cts; i++ ) {
if ( e_units[i] == 0 )
continue;
total_uncmp += (e_units[i] -> word -> size()) * (e_units[i] -> freq);
total_cmp += (e_units[i] -> bits) * (e_units[i] -> freq);
out << *(e_units[i] -> word) << ":" << e_units[i]->bits << "\n";
}
total_cmp = total_cmp / 8 + total_cmp % 8;
/*
debug(cerr, total_uncmp);
debug(cerr, total_cmp);
debug(cerr, 1 - float(total_cmp) / float(total_uncmp) );
*/
return out;
}
// self modifying buf ptr after taking an encoding unit.
encoding_unit* huff::get_e_unit(unsigned char*& buf, int len)
{
encoding_unit* x = tri -> parse(buf, len) ;
//debug(cerr, *(x -> word));
buf += x -> word -> size();
return x;
}
int total_uncomp = 0;
int total_comp = 0;
void huff::compress(const buffer& uncompressed, buffer& compressed)
{
//debug(cerr, *(buffer*)&uncompressed);
if ( compressed.buf_sz() < uncompressed.buf_sz() )
compressed.expand_chunk(uncompressed.buf_sz());
unsigned short total_bits = 0;
int uncmp_sz = uncompressed.content_sz();
unsigned char* buf = (unsigned char*)uncompressed.get_base();
unsigned int code_buf = 0;
unsigned int rem_long = 0;
int rem_bits = 0;
encoding_unit *e_ptr = 0;
while ( uncmp_sz > 0 ) {
//e_ptr = get_e_unit(buf, uncmp_sz);
e_ptr = tri -> parse(buf, uncmp_sz);
buf += e_ptr -> word -> size();
uncmp_sz -= e_ptr -> word -> size();
if ( rem_bits + e_ptr -> bits > 32 ) {
code_buf = e_ptr -> code; // shift bits to the higher end
rem_long <<= 32-rem_bits;
rem_bits += e_ptr -> bits - 32; // new rem_bits
code_buf >>= rem_bits; // get padding part
rem_long |= code_buf; // padding
compressed.put( rem_long );
// save remaining (rem_bits + e_ptr -> bits - 32) bits to rem_bits.
rem_long = e_ptr -> code & (~0L >> (32 - rem_bits));
} else {
rem_long <<= e_ptr -> bits;
rem_long |= e_ptr -> code;
rem_bits += e_ptr -> bits;
//debug(cerr, hex(rem_long));
}
total_bits += e_ptr -> bits;
total_bits &= 0x1f; // take the mod on 32
}
if ( rem_bits > 0 ) {
rem_long <<= 32 - rem_bits;
//MESSAGE(cerr, "PUT");
//debug(cerr, hex(rem_long));
compressed.put( rem_long );
}
//debug(cerr, total_bits);
compressed.put(char(total_bits));
// total_uncomp += uncompressed.content_sz();
// total_comp += compressed.content_sz();
/*
debug(cerr, total_uncomp);
debug(cerr, total_comp);
debug(cerr,
1-float(compressed.content_sz()-1)/float(uncompressed.content_sz())
);
*/
}
void huff::decompress(buffer& compressed, buffer& uncompressed)
{
char* buf_base = uncompressed.get_base();
char* str;
int str_len;
char rem_bits;
int ct = (compressed.content_sz() - 1) >> 2;
unsigned int c;
int bits_bound = 32;
htr_node *node_ptr = htr_root;
do {
compressed.get(c); ct--;
if ( ct == 0 ) {
compressed.get(rem_bits);
//debug(cerr, int(rem_bits));
bits_bound = rem_bits ;
}
for ( int i=0;i<bits_bound; i++ ) {
if ( node_ptr -> left == 0 && node_ptr -> right == 0 ) {
//for ( int j=0; j<node_ptr -> eu -> word -> size(); j++ ) {
// cerr << (node_ptr -> eu -> word -> get())[j];
//}
str_len = node_ptr -> eu -> word -> size();
str = node_ptr -> eu -> word -> get();
if ( str_len == 1 ) {
*buf_base = str[0];
buf_base++;
// uncompressed.put((node_ptr -> eu -> word -> get())[0]);
} else {
memcpy(buf_base, str, str_len);
buf_base += str_len;
/*
uncompressed.put( node_ptr -> eu -> word -> get(),
node_ptr -> eu -> word -> size()
);
*/
}
node_ptr = htr_root;
}
if ( c & 0x80000000 )
node_ptr = node_ptr -> left;
else
node_ptr = node_ptr -> right;
c <<= 1;
}
} while ( ct>0 );
//debug(cerr, buf_base-uncompressed.get_base());
uncompressed.set_content_sz(buf_base-uncompressed.get_base());
if ( rem_bits > 0 )
uncompressed.put( node_ptr -> eu -> word -> get(),
node_ptr -> eu -> word -> size()
);
}
//////////////////////////////////////////////////////////
//
//////////////////////////////////////////////////////////
MMDB_BODIES(huff)
int huff::cdr_sizeof()
{
return pstring::cdr_sizeof();
}
io_status huff::cdrOut(buffer& buf)
{
//MESSAGE(cerr, "huff::cdrOut");
//debug(cerr, my_oid());
static buffer v_out_buf(LBUFSIZ);
if ( cts > 0 ) {
//MESSAGE(cerr, "huff::cdrOut: dict out");
int sz = sizeof(int);
for ( int i=0; i<cts; i++ ) {
sz += ( e_units[i] -> word -> size() +
sizeof(unsigned int) +
sizeof(char)
);
}
v_out_buf.expand_chunk(sz);
v_out_buf.put(cts);
int word_sz;
for ( i=0; i<cts; i++ ) {
word_sz = e_units[i] -> word -> size();
v_out_buf.put(char(word_sz));
v_out_buf.put(e_units[i] -> word -> get(), word_sz);
v_out_buf.put(e_units[i] -> freq);
}
pstring::update(v_out_buf.get_base(), v_out_buf.content_sz());
}
return pstring::cdrOut(buf);
}
// format:
// entries_int
// (len_byte word_chars freq_int)+
//
io_status huff::cdrIn(buffer& buf)
{
static buffer v_in_buf(0);
pstring::cdrIn(buf);
if ( pstring::size() > 0 ) {
v_in_buf.set_chunk(pstring::get(), pstring::size());
v_in_buf.set_content_sz(pstring::size());
v_in_buf.get(cts);
char word_buf[BUFSIZ];
char word_sz;
unsigned int word_freq;
//ostring *z = 0;
for ( int i=0; i<cts; i++ ) {
v_in_buf.get(word_sz);
v_in_buf.get(word_buf, int(word_sz));
v_in_buf.get(word_freq);
/*
z = new ostring((char*)word_buf, word_sz);
extend_alphabet();
alphabet[alphabet_sz++] = new encoding_unit(z, word_freq);
*/
tri -> add_to_alphabet((unsigned char*)word_buf, word_sz, word_freq);
}
e_units = tri -> get_alphabet(cts);
build_tree();
calculate_code();
delete tri; tri = 0;
//print_alphabet(cerr);
}
return done;
}
trie* alphabet = 0;
void trie_add_wrap(unsigned char* buf, int len, int action_num)
{
switch ( action_num ) {
case 1:
alphabet -> add(buf, len);
break;
case 2:
alphabet -> add_letters(buf, len);
break;
default:
debug(cerr, action_num);
throw(stringException("unknown action number"));
}
}
io_status huff::build_dict(lex_func_t f_lex, getchar_func_t f_getchar)
{
MESSAGE(cerr, "get to huff build dict");
fill_buf_func = f_getchar;
alphabet = tri;
lex_action_func = trie_add_wrap;
if ( (*f_lex)() != 0 )
throw(stringException("huff::asciiIn(): Parsing input failed"));
e_units = tri -> get_alphabet(cts);
//debug(cerr, *tri);
build_tree();
calculate_code();
//print_alphabet(cerr);
set_mode(UPDATE, true);
return done;
}

View File

@@ -0,0 +1,93 @@
/*
* $XConsortium: huffman.h /main/3 1996/06/11 17:15:11 cde-hal $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
#ifndef _huff_h
#define _huff_h 1
#include "compression/abs_agent.h"
#include "compression/trie.h"
////////////////////////////////////////
//
////////////////////////////////////////
class htr_node
{
public:
htr_node* parent;
htr_node* left;
htr_node* right;
unsigned long freq;
encoding_unit* eu;
public:
htr_node(encoding_unit* eu, htr_node* lt = 0, htr_node* rt = 0);
htr_node(unsigned long freq, htr_node* lt = 0, htr_node* rt = 0);
~htr_node();
};
////////////////////////////////////////
//
////////////////////////////////////////
class huff : public compress_agent
{
protected:
htr_node* htr_root;
encoding_unit** e_units;
trie* tri;
unsigned int cts ;
protected:
void build_tree();
void calculate_code();
encoding_unit* get_e_unit(unsigned char*& data, int len);
public:
huff();
virtual ~huff() ;
virtual void compress(const buffer& uncompressed, buffer& compressed) ;
virtual void decompress(buffer& compressed, buffer& uncompressed) ;
ostream& print_alphabet(ostream& out);
MMDB_SIGNATURES(huff);
// compacted disk representation In and Out functions
virtual int cdr_sizeof();
virtual io_status cdrOut(buffer&);
virtual io_status cdrIn(buffer&);
// get data to compute the alphabet
virtual io_status build_dict(lex_func_t f_lex, getchar_func_t f_getchar);
};
extern huff g_huff_agent;
#endif

View File

@@ -0,0 +1,333 @@
/* $XConsortium: lzss.cc /main/5 1996/07/18 16:00:08 drk $ */
#include "compression/lzss.h"
/*
Adapted from LDS (lossless datacompression sources) Version 1.1 by
Nico E. de Vries.
qfc. 12-8-93.
*/
/**************************************************************
LZSS.C -- A Data Compression Program
(tab = 4 spaces)
***************************************************************
4/6/1989 Haruhiko Okumura
Use, distribute, and modify this program freely.
Please send me your improved versions.
PC-VAN SCIENCE
NIFTY-Serve PAF01022
CompuServe 74050,1022
**************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define N 4096 /* size of ring buffer */
#define F 18 /* upper limit for match_length */
#define THRESHOLD 2 /* encode string into position and length
if match_length is greater than this */
#define NIL N /* index for root of binary search trees */
unsigned long int
textsize = 0, /* text size counter */
codesize = 0, /* code size counter */
printcount = 0; /* counter for reporting progress every 1K bytes */
unsigned char
text_buf[N + F - 1]; /* ring buffer of size N,
with extra F-1 bytes to facilitate string comparison */
int match_position, match_length, /* of longest match. These are
set by the InsertNode() procedure. */
lson[N + 1], rson[N + 257], dad[N + 1]; /* left & right children &
parents -- These constitute binary search trees. */
//FILE *infile, *outfile; /* input & output files */
void InitTree(void) /* initialize trees */
{
int i;
/* For i = 0 to N - 1, rson[i] and lson[i] will be the right and
left children of node i. These nodes need not be initialized.
Also, dad[i] is the parent of node i. These are initialized to
NIL (= N), which stands for 'not used.'
For i = 0 to 255, rson[N + i + 1] is the root of the tree
for strings that begin with character i. These are initialized
to NIL. Note there are 256 trees. */
for (i = N + 1; i <= N + 256; i++) rson[i] = NIL;
for (i = 0; i < N; i++) dad[i] = NIL;
}
void InsertNode(int r)
/* Inserts string of length F, text_buf[r..r+F-1], into one of the
trees (text_buf[r]'th tree) and returns the longest-match position
and length via the global variables match_position and match_length.
If match_length = F, then removes the old node in favor of the new
one, because the old one will be deleted sooner.
Note r plays double role, as tree node and position in buffer. */
{
int i, p, cmp;
unsigned char *key;
cmp = 1; key = &text_buf[r]; p = N + 1 + key[0];
rson[r] = lson[r] = NIL; match_length = 0;
for ( ; ; ) {
if (cmp >= 0) {
if (rson[p] != NIL) p = rson[p];
else { rson[p] = r; dad[r] = p; return; }
} else {
if (lson[p] != NIL) p = lson[p];
else { lson[p] = r; dad[r] = p; return; }
}
for (i = 1; i < F; i++)
if ((cmp = key[i] - text_buf[p + i]) != 0) break;
if (i > match_length) {
match_position = p;
if ((match_length = i) >= F) break;
}
}
dad[r] = dad[p]; lson[r] = lson[p]; rson[r] = rson[p];
dad[lson[p]] = r; dad[rson[p]] = r;
if (rson[dad[p]] == p) rson[dad[p]] = r;
else lson[dad[p]] = r;
dad[p] = NIL; /* remove p */
}
void DeleteNode(int p) /* deletes node p from tree */
{
int q;
if (dad[p] == NIL) return; /* not in tree */
if (rson[p] == NIL) q = lson[p];
else if (lson[p] == NIL) q = rson[p];
else {
q = lson[p];
if (rson[q] != NIL) {
do { q = rson[q]; } while (rson[q] != NIL);
rson[dad[q]] = lson[q]; dad[lson[q]] = dad[q];
lson[q] = lson[p]; dad[lson[p]] = q;
}
rson[q] = rson[p]; dad[rson[p]] = q;
}
dad[q] = dad[p];
if (rson[dad[p]] == p) rson[dad[p]] = q; else lson[dad[p]] = q;
dad[p] = NIL;
}
void lzss::compress(const buffer& uncompressed, buffer& compressed)
{
if ( compressed.buf_sz() < uncompressed.buf_sz() )
compressed.expand_chunk(uncompressed.buf_sz());
////////////////////////////////////////////
////////////////////////////////////////////
int i, c, len, r, s, last_match_length, code_buf_ptr;
unsigned char code_buf[17], mask;
InitTree(); /* initialize trees */
code_buf[0] = 0; /* code_buf[1..16] saves eight units of code, and
code_buf[0] works as eight flags, "1" representing that the unit
is an unencoded letter (1 byte), "0" a position-and-length pair
(2 bytes). Thus, eight units require at most 16 bytes of code. */
code_buf_ptr = mask = 1;
s = 0; r = N - F;
for (i = s; i < r; i++) text_buf[i] = ' '; /* Clear the buffer with
any character that will appear often. */
char* unc_str = uncompressed.get_base();
int unc_str_len = uncompressed.content_sz();
int unc_str_ptr = 0;
for (len = 0; len < F; len++) {
if ( unc_str_ptr == unc_str_len )
break;
c = unc_str[unc_str_ptr++];
//cerr << char(c);
text_buf[r + len] = c; /* Read F bytes into the last F bytes of
the buffer */
}
if ((textsize = len) == 0) return; /* text of size zero */
for (i = 1; i <= F; i++) InsertNode(r - i); /* Insert the F strings,
each of which begins with one or more 'space' characters. Note
the order in which these strings are inserted. This way,
degenerate trees will be less likely to occur. */
InsertNode(r); /* Finally, insert the whole string just read. The
global variables match_length and match_position are set. */
do {
if (match_length > len) match_length = len; /* match_length
may be spuriously long near the end of text. */
if (match_length <= THRESHOLD) {
match_length = 1; /* Not long enough match. Send one byte. */
code_buf[0] |= mask; /* 'send one byte' flag */
code_buf[code_buf_ptr++] = text_buf[r]; /* Send uncoded. */
} else {
code_buf[code_buf_ptr++] = (unsigned char) match_position;
code_buf[code_buf_ptr++] = (unsigned char)
(((match_position >> 4) & 0xf0)
| (match_length - (THRESHOLD + 1))); /* Send position and
length pair. Note match_length > THRESHOLD. */
}
if ((mask <<= 1) == 0) { /* Shift mask left one bit. */
// for (i = 0; i < code_buf_ptr; i++) { /* Send at most 8 units of */
// putc(code_buf[i], outfile); /* code together */
// }
compressed.put((char*)code_buf, code_buf_ptr, true);
codesize += code_buf_ptr;
code_buf[0] = 0; code_buf_ptr = mask = 1;
}
last_match_length = match_length;
for (i = 0; i < last_match_length; i++) {
if ( unc_str_ptr == unc_str_len )
break;
c = unc_str[unc_str_ptr++];
DeleteNode(s); /* Delete old strings and */
text_buf[s] = c; /* read new bytes */
if (s < F - 1) text_buf[s + N] = c; /* If the position is
near the end of buffer, extend the buffer to make
string comparison easier. */
//s = (s + 1) & (N - 1); r = (r + 1) & (N - 1);
s++; s &= (N - 1); r++; r &= (N - 1);
/* Since this is a ring buffer, increment the position
modulo N. */
InsertNode(r); /* Register the string in text_buf[r..r+F-1] */
}
// if ((textsize += i) > printcount) {
// printf("%12ld\r", textsize); printcount += 1024;
// /* Reports progress each time the textsize exceeds
// multiples of 1024. */
// }
while (i++ < last_match_length) {/* After the end of text, */
DeleteNode(s); /* no need to read, but */
//s = (s + 1) & (N - 1); r = (r + 1) & (N - 1);
s++; s &= (N - 1); r++; r &= (N - 1);
if (--len) InsertNode(r); /* buffer may not be empty. */
}
} while (len > 0); /* until length of string to be processed is zero */
if (code_buf_ptr > 1) { /* Send remaining code. */
// for (i = 0; i < code_buf_ptr; i++) {
// //putc(code_buf[i], outfile);
// compressed.put(code_buf[i], true);
// }
compressed.put((char*)code_buf, code_buf_ptr, true);
codesize += code_buf_ptr;
}
//printf("In : %ld bytes\n", textsize); /* Encoding is done. */
//printf("Out: %ld bytes\n", codesize);
//printf("Out/In: %.3f\n", (double)codesize / textsize);
}
void lzss::decompress(buffer& compressed, buffer& uncompressed)
{
int i, j, k, r, c;
unsigned int flags;
for (i = 0; i < N - F; i++) text_buf[i] = ' ';
r = N - F; flags = 0;
for (;;) {
if (((flags >>= 1) & 256) == 0) {
// if ((c = getc(infile)) == EOF) break;
if ( compressed.content_sz() == 0 )
break;
compressed.getusc(c);
flags = c | 0xff00; /* uses higher byte cleverly */
} /* to count eight */
if (flags & 1) {
//if ((c = getc(infile)) == EOF) break;
//putc(c, outfile); text_buf[r++] = c; r &= (N - 1);
if ( compressed.content_sz() == 0 )
break;
compressed.getusc(c);
//debug(cerr, char(c));
uncompressed.put(char(c), true); text_buf[r++] = c; r &= (N - 1);
} else {
//if ((i = getc(infile)) == EOF) break;
//if ((j = getc(infile)) == EOF) break;
if ( compressed.content_sz() == 0 )
break;
else {
compressed.getusc(i);
}
if ( compressed.content_sz() == 0 )
break;
else
compressed.getusc(j);
i |= ((j & 0xf0) << 4); j = (j & 0x0f) + THRESHOLD;
for (k = 0; k <= j; k++) {
c = text_buf[(i + k) & (N - 1)];
//putc(c, outfile);
//debug(cerr, char(c));
uncompressed.put(char(c), true);
text_buf[r++] = c; r &= (N - 1);
}
}
}
}
io_status lzss::build_dict(lex_func_t, getchar_func_t)
{
return done;
}
MMDB_BODIES(lzss)

View File

@@ -0,0 +1,26 @@
/* $XConsortium: lzss.h /main/3 1996/06/11 17:15:21 cde-hal $ */
#ifndef _lzss_h
#define _lzss_h 1
#include "compression/abs_agent.h"
class lzss : public compress_agent
{
public:
lzss() : compress_agent(DICT_AGENT_CODE) {};
virtual ~lzss() {};
virtual void compress(const buffer& uncompressed, buffer& compressed) ;
virtual void decompress(buffer& compressed, buffer& uncompressed) ;
MMDB_SIGNATURES(lzss);
virtual io_status build_dict(lex_func_t f_lex, getchar_func_t f_getchar);
};
extern lzss g_lzss_agent;
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,78 @@
/*
* $XConsortium: ps.l /main/2 1996/07/18 14:27:45 drk $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
%a 30000
%e 10000
%k 10000
%n 10000
%o 40000
%p 20000
%{
#include "compression/trie.h"
#include "compression/abs_agent.h"
static unsigned char yybuf[LBUFSIZ];
static int yybuf_sz = LBUFSIZ;
static int yybuffed = 0;
#undef yywrap
int yywrap();
#undef YY_INPUT
#define YY_INPUT(buf,result,max_size) \
{ \
result = (*fill_buf_func)((unsigned char*)buf,max_size); \
}
%}
%%
.|\n {
if ( yybuffed >= yybuf_sz ) {
//alphabet -> add_letters( yybuf, yybuf_sz );
(*lex_action_func)(yybuf, yybuf_sz, 2);
yybuffed = 0;
}
yybuf[yybuffed++] = yytext[0];
}
%%
int yywrap()
{
//alphabet -> add_letters( yybuf, yybuffed );
(*lex_action_func)(yybuf, yybuffed, 2);
return 1;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,82 @@
/*
* $XConsortium: sgml.l /main/2 1996/07/18 14:28:02 drk $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
%a 30000
%e 10000
%k 10000
%n 10000
%o 40000
%p 20000
%{
#include "compression/trie.h"
#include "compression/abs_agent.h"
static unsigned char yybuf[LBUFSIZ];
static int yybuf_sz = LBUFSIZ;
static int yybuffed = 0;
#undef yywrap
int yywrap();
#undef YY_INPUT
#define YY_INPUT(buf,result,max_size) \
{ \
result = (*fill_buf_func)((unsigned char*)buf,max_size); \
}
%}
%%
"<"[0-9a-zA-Z_.]+">"|"</"[0-9a-zA-Z_.]+">" {
(*lex_action_func)((unsigned char*)(yytext), yyleng, 1);
}
.|\n {
if ( yybuffed >= yybuf_sz ) {
(*lex_action_func)(yybuf, yybuf_sz, 2);
yybuffed = 0;
}
yybuf[yybuffed++] = yytext[0];
}
%%
int yywrap()
{
(*lex_action_func)(yybuf, yybuffed, 2);
return 1;
}

View File

@@ -0,0 +1,445 @@
/*
* $XConsortium: trie.cc /main/3 1996/06/11 17:15:26 cde-hal $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
#include "compression/trie.h"
//trie_node_info::trie_node_info (char c, int f, trie_node* x)
trie_node_info::trie_node_info () : child(0)
{
info.int_view = 0;
image.heap = 0;
}
trie_node_info::~trie_node_info ()
{
delete child;
}
trie_node::trie_node(trie_node_info* parent)
{
#ifdef C_API
children = new trie_node_info_ptr_t[LANG_ALPHABET_SZ+1];
for (int i=0; i<LANG_ALPHABET_SZ+1; i++ )
children[i] = new trie_node_info;
children[LANG_ALPHABET_SZ] -> child = (trie_node*)parent;
#else
children[LANG_ALPHABET_SZ].child = (trie_node*)parent;
#endif
}
trie_node::~trie_node()
{
#ifdef C_API
for (int i=0; i<LANG_ALPHABET_SZ+1; i++ )
delete children[i];
delete children;
#else
children[LANG_ALPHABET_SZ].child = 0;
#endif
}
void trie_node::_print(ostream& out, char* prefix, int prefix_sz)
{
#ifdef C_API
for ( int i=0; i<LANG_ALPHABET_SZ; i++ ) {
if ( children[i] -> info.info_view.freq > 0 ) {
prefix[prefix_sz] = children[i] -> info.info_view.letter;
for (int j=0; j<prefix_sz; j++)
out << prefix[j];
out << char(children[i] -> info.info_view.letter);
out << ":" << int(children[i] -> info.info_view.freq) << "\n";
prefix[prefix_sz] = children[i] -> info.info_view.letter;
if ( children[i] -> child )
children[i] -> child -> _print(out, prefix, prefix_sz+1);
}
}
#else
for ( int i=0; i<LANG_ALPHABET_SZ; i++ ) {
if ( children[i].info.info_view.freq > 0 ) {
prefix[prefix_sz] = children[i].info.info_view.letter;
for (int j=0; j<prefix_sz; j++)
out << prefix[j];
out << char(children[i].info.info_view.letter);
out << ":" << int(children[i].info.info_view.freq) << "\n";
prefix[prefix_sz] = children[i].info.info_view.letter;
if ( children[i].child )
children[i].child -> _print(out, prefix, prefix_sz+1);
}
}
#endif
}
//////////////////////////////////////
//
//////////////////////////////////////
trie::trie(int estimatedsz) :
root(0), max_trie_level(0), total_nodes(1),
sorted_freqs(0), alphabet_sz(0), estimated_sz(estimatedsz)
{
alphabet = new encoding_unitPtr[ estimated_sz];
for ( int i=0; i< estimated_sz; alphabet[i++] = 0 );
}
trie::~trie()
{
delete root;
delete sorted_freqs;
for ( int i=0; i<alphabet_sz; i++ )
delete alphabet[i];
delete alphabet;
}
void trie::extend_alphabet()
{
if ( alphabet_sz >= estimated_sz ) {
encoding_unitPtr* new_alphabet = new encoding_unitPtr[2* estimated_sz];
for ( int k=0; k< estimated_sz; k++ ) {
new_alphabet[k] = alphabet[k] ;
new_alphabet[k + estimated_sz] = 0;
}
new_alphabet = new_alphabet;
estimated_sz *= 2;
}
}
void trie::add(unsigned char* word, int sz, int fq)
{
//cerr << sz << " ";
//for ( int k=0; k<sz; k++ )
// cerr << word[k];
//cerr << "\n";
static int j, level = 0;
static trie_node* x = 0;
static trie_node_info* y = 0;
static char buf[1];
static ostring *z;
if ( root == 0 )
root = new trie_node(0);
x = root;
for ( int i=0; i<sz; i++ ) {
j = word[i];
#ifdef C_API
y = (x -> children[j]);
#else
y = &(x -> children[j]);
#endif
y -> info.info_view.letter = j;
if ( i<sz-1 && y -> child == 0 ) {
y -> child = new trie_node(y);
total_nodes++;
}
if ( i == sz-1 ) {
y -> info.info_view.freq += fq;
if ( y -> image.eu == 0 ) {
y -> info.info_view.mark = 1;
y->image.eu = add_to_alphabet(word, sz, fq);
} else
y -> image.eu -> freq += fq;
}
x = y -> child;
}
}
encoding_unit* trie::add_to_alphabet(unsigned char* word, int sz, int fq)
{
extend_alphabet();
encoding_unit *x = new encoding_unit(new ostring((char*)word, sz), fq);
alphabet[alphabet_sz++] = x;
return x;
}
void trie::add_letters(unsigned char* letters, int sz)
{
/*
MESSAGE(cerr, "trie::add_letters()");
cerr << sz << " ";
for ( int k=0; k<sz; k++ )
cerr << letters[k];
cerr << "\n";
*/
static int j;
static trie_node_info* y = 0;
static char buf[1];
static ostring *z;
if ( root == 0 )
root = new trie_node(0);
for ( int i=0; i<sz; i++ ) {
j = letters[i];
#ifdef C_API
y = (root -> children[j]);
#else
y = &(root -> children[j]);
#endif
y -> info.info_view.freq ++;
if ( y -> image.eu == 0 ) {
y -> info.info_view.letter = j;
buf[0] = char(j);
z = new ostring(buf, 1);
y -> info.info_view.mark = 1;
extend_alphabet();
alphabet[alphabet_sz++] = y->image.eu =new encoding_unit(z, 1);
} else
y -> image.eu -> freq++;
}
}
ostream& operator <<(ostream& out, trie& tr)
{
static char char_buf[100];
if ( tr.root == 0 )
return out;
tr.root -> _print(out, char_buf, 0);
return out;
}
void update_index(int ind, void* x)
{
((trie_node_info*)x) -> image.heap = ind;
}
void trie::_find_leaf(trie_node* z, int& j)
{
trie_node_info* x = 0;
ostring *y;
for ( int i=0; i<LANG_ALPHABET_SZ; i++ ) {
#ifdef C_API
x = (z -> children[i]);
#else
x = &(z -> children[i]);
#endif
if ( x -> info.info_view.freq > 0 ) {
if ( x -> child == 0 ) {
x -> info.info_view.mark = 1;
y = get_word(x);
alphabet[j++] =x->image.eu =new encoding_unit(y, x -> info.info_view.freq);
} else
_find_leaf(x -> child, j);
}
}
}
encoding_unit** trie::get_alphabet(unsigned int& a_sz)
{
a_sz = alphabet_sz;
return alphabet;
}
ostring* trie::get_word(trie_node_info* leaf)
{
static char buf[128];
buf[127] = 0;
int i = 127;
trie_node_info* x = leaf;
if ( x == 0 )
throw(stringException("null leaf node pointer"));
while ( x ) {
buf[--i] = x -> info.info_view.letter;
x = &x[LANG_ALPHABET_SZ - x->info.info_view.letter];
x = (trie_node_info*)(x -> child);
}
//debug(cerr, buf+i);
return new ostring(buf+i, 127-i);
}
Boolean trie::travers_to(char* str, int len,
trie_node*& node, trie_node_info*& node_info
)
{
//MESSAGE(cerr, "travers_to BEG");
if ( root == 0 )
return false;
node = root;
int j;
for ( int i=0; i<len; i++ ) {
j = str[i];
//debug(cerr, str[i]);
#ifdef C_API
node_info = (node -> children[j]);
#else
node_info = &(node -> children[j]);
#endif
if ( node_info -> info.info_view.freq == 0 ) {
//MESSAGE(cerr, "BREAK");
break;
} else
node = node_info -> child;
}
//MESSAGE(cerr, "travers_to RET");
return ( len == i ) ? true : false;
}
void trie::collect_freqs(trie_node* rt, int level)
{
for ( int i= 0; i<LANG_ALPHABET_SZ; i++ ) {
#ifdef C_API
if ( rt -> children[i] -> info.info_view.freq > 0 ) {
sorted_freqs -> insert(rt -> children[i]);
if ( rt -> children[i] -> child )
collect_freqs(rt -> children[i] -> child, level+1);
}
#else
if ( rt -> children[i].info.info_view.freq > 0 ) {
sorted_freqs -> insert(&(rt -> children[i]));
if ( rt -> children[i].child )
collect_freqs(rt -> children[i].child, level+1);
}
#endif
}
}
encoding_unit* trie::parse(unsigned char* str, int len)
{
if ( root == 0 )
return 0;
trie_node* node = root;
trie_node_info* cell = 0;
trie_node_info* last_hit = 0;
int i=0;
while ( node && i < len ) {
//debug(cerr, str[i]);
#ifdef C_API
cell = (node -> children[str[i++]]);
#else
cell = &(node -> children[str[i++]]);
#endif
if ( cell -> info.info_view.mark == 1 )
last_hit = cell;
node = cell -> child;
}
if ( last_hit == 0 ) {
debug(cerr, len);
debug(cerr, str[i]);
debug(cerr, int(str[i]));
throw(stringException("parse(): string not in trie"));
}
if ( last_hit -> image.eu == 0 )
throw(stringException("parse(): encoding unit not found"));
return last_hit -> image.eu;
}
///////////////////////////////////////
//
///////////////////////////////////////
Boolean trie_node_ls(voidPtr n1, voidPtr n2)
{
if ( ((trie_node_info*)n1) -> info.info_view.freq < ((trie_node_info*)n2) -> info.info_view.freq )
return true;
else
return false;
}
Boolean trie_node_eq(voidPtr n1, voidPtr n2)
{
if ( ((trie_node_info*)n1) -> info.info_view.freq == ((trie_node_info*)n2) -> info.info_view.freq )
return true;
else
return false;
}

View File

@@ -0,0 +1,138 @@
/*
* $XConsortium: trie.h /main/3 1996/06/11 17:15:31 cde-hal $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
#ifndef _trie_h
#define _trie_h 1
#include "compression/code.h"
#include "dstr/heap.h"
#define MAX_LEVELS 50
#define LANG_ALPHABET_SZ 256
class trie_node;
struct info_t {
unsigned freq: 23;
unsigned letter: 8;
unsigned mark: 1;
};
class trie_node_info
{
trie_node* child;
union {
struct info_t info_view;
int int_view;
} info;
union {
encoding_unit* eu;
int heap;
int pos;
} image;
public:
//trie_node_info (char letter = 0, int freq = 0, trie_node* child = 0);
trie_node_info ();
~trie_node_info ();
friend class trie_node;
friend class trie;
friend Boolean trie_node_ls(voidPtr n1, voidPtr n2);
friend Boolean trie_node_eq(voidPtr n1, voidPtr n2);
friend void update_index(int ind, void* x);
};
typedef trie_node_info* trie_node_info_ptr_t;
class trie_node
{
protected:
#ifdef C_API
trie_node_info_ptr_t* children;
#else
trie_node_info children[LANG_ALPHABET_SZ+1];
// children[LANG_ALPHABET_SZ+1].child encodes the parent
#endif
public:
trie_node(trie_node_info* parent);
~trie_node();
void _print(ostream& out, char* prefix, int prefix_sz);
friend class trie;
};
class trie
{
protected:
int max_trie_level;
int total_nodes;
int level_sz[MAX_LEVELS];
trie_node* root;
heap* sorted_freqs;
int estimated_sz;
encoding_unit** alphabet;
unsigned int alphabet_sz;
protected:
void collect_freqs(trie_node* rt, int level);
ostring* get_word(trie_node_info* x);
void _find_leaf(trie_node* z, int& j);
void extend_alphabet();
public:
trie(int estimated_alphabet_sz = 400) ;
virtual ~trie() ;
virtual void add(unsigned char* word, int len, int freq = 1) ;
virtual void add_letters(unsigned char* letters, int len) ;
virtual encoding_unit* add_to_alphabet(unsigned char* word, int sz, int fq);
virtual encoding_unit** get_alphabet(unsigned int& alphabet_sz);
virtual Boolean travers_to(char* str, int sz,
trie_node*& node, trie_node_info*& node_info
);
// take the longest substring from str and returns its
// encoding_unit.
virtual encoding_unit* parse(unsigned char* str, int len);
friend ostream& operator <<(ostream& out, trie& tr);
};
#endif

View File

@@ -0,0 +1,128 @@
/*
* $XConsortium: zip.cc /main/3 1996/06/11 17:15:36 cde-hal $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
#include "compression/zip.h"
//zip g_zip_agent;
#define UNCOMPRESSED "/tmp/uncomp"
#define COMPRESSED "/tmp/comp"
void zip::compress(const buffer& uncompressed, buffer& compressed)
{
////////////////////////////////////////
// code for testing. I know it is slow.
////////////////////////////////////////
fstream out(UNCOMPRESSED, ios::out|ios::trunc);
if ( !out )
throw(streamException(out.rdstate()));
if ( out.write(uncompressed.get_base(), uncompressed.content_sz()) == 0 )
throw(streamException(out.rdstate()));
out.close();
system(form("gzip -c %s > %s", UNCOMPRESSED, COMPRESSED));
fstream in(COMPRESSED, ios::in);
if ( !in )
throw(streamException(in.rdstate()));
int x = bytes(in);
compressed.expand_chunk(x);
if ( in.read(compressed.get_base(), x) == 0 || x != in.gcount() )
throw(streamException(in.rdstate()));
compressed.set_content_sz(x);
in.close();
return;
}
void zip::decompress(buffer& compressed, buffer& uncompressed)
{
fstream out(COMPRESSED, ios::out|ios::trunc);
if ( !out )
throw(streamException(out.rdstate()));
if ( out.write(compressed.get_base(), compressed.content_sz()) == 0 )
throw(streamException(out.rdstate()));
out.close();
system(form("gzip -cd %s > %s", COMPRESSED, UNCOMPRESSED));
fstream in(UNCOMPRESSED, ios::in);
if ( !in )
throw(streamException(in.rdstate()));
int x = bytes(in);
uncompressed.expand_chunk(x);
if ( in.read(uncompressed.get_base(), x) == 0 || x != in.gcount() )
throw(streamException(in.rdstate()));
uncompressed.set_content_sz(x);
in.close();
return;
}
io_status zip::build_dict(lex_func_t, getchar_func_t)
{
return done;
}
MMDB_BODIES(zip)
int zip::cdr_sizeof()
{
return 0;
}
io_status zip::cdrOut(buffer&)
{
return done;
}
io_status zip::cdrIn(buffer&)
{
return done;
}

View File

@@ -0,0 +1,56 @@
/*
* $XConsortium: zip.h /main/3 1996/06/11 17:15:40 cde-hal $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
#ifndef _zip_h
#define _zip_h 1
#include "compression/abs_agent.h"
class zip : public compress_agent
{
public:
zip() : compress_agent(GZIP_AGENT_CODE) {};
virtual ~zip() {};
virtual void compress(const buffer& uncompressed, buffer& compressed) ;
virtual void decompress(buffer& compressed, buffer& uncompressed) ;
MMDB_SIGNATURES(zip);
// compacted disk representation In and Out functions
virtual int cdr_sizeof();
virtual io_status cdrOut(buffer&);
virtual io_status cdrIn(buffer&);
virtual io_status build_dict(lex_func_t f_lex, getchar_func_t f_getchar);
};
extern zip g_zip_agent;
#endif