Initial import of the CDE 2.1.30 sources from the Open Group.
This commit is contained in:
56
cde/programs/dtinfo/DtMmdb/compression/Imakefile
Normal file
56
cde/programs/dtinfo/DtMmdb/compression/Imakefile
Normal file
@@ -0,0 +1,56 @@
|
||||
XCOMM $XConsortium: Imakefile /main/12 1996/08/21 15:51:36 drk $
|
||||
|
||||
XCOMM ** WARNING **
|
||||
XCOMM
|
||||
XCOMM The files named here may appear in many different Imakefiles.
|
||||
XCOMM If you add or remove a file, be sure to update all locations.
|
||||
XCOMM It's unfortunate, but all this redundancy serves a purpose.
|
||||
XCOMM
|
||||
XCOMM Other possible locations are:
|
||||
XCOMM .../lib/DtMmdb/Imakefile
|
||||
XCOMM .../lib/DtMmdb/<subdir>/Imakefile
|
||||
XCOMM .../programs/dtinfo/mmdb/Imakefile
|
||||
XCOMM .../programs/dtinfo/mmdb/<subdir>/Imakefile
|
||||
|
||||
#define DoNormalLib NormalLibDtMmdb
|
||||
#define DoSharedLib SharedLibDtMmdb
|
||||
#define DoDebugLib DebugLibDtMmdb
|
||||
#define DoProfileLib ProfileLibDtMmdb
|
||||
#define LibName DtMmdb
|
||||
#define SoRev SODTMMDBREV
|
||||
#define LibHeaders NO
|
||||
#define LibCreate NO
|
||||
#define LargePICTable YES
|
||||
|
||||
#define CplusplusSource YES
|
||||
DEPEND_DEFINES = $(CXXDEPENDINCLUDES)
|
||||
|
||||
XCOMM In DtMmdb we compile as C_API sources.
|
||||
DEFINES = -DC_API
|
||||
INCLUDES = -I.. $(EXCEPTIONS_INCLUDES)
|
||||
|
||||
NORMAL_SRCS = abs_agent.C zip.C huffman.C trie.C code.C lzss.C
|
||||
PARSER_SRCS = sgml.C ps.C
|
||||
|
||||
#ifdef RegenParserFiles
|
||||
XCOMM Hide the lex sources so the normal case is simple.
|
||||
|
||||
YACCSTACK = -DYYSTACKSIZE=20000
|
||||
YFLAGS = -l -d
|
||||
LFLAGS = -8 -s
|
||||
|
||||
LinkFile(sgml.l,sgml.lex)
|
||||
LexTarget(sgml,sgml)
|
||||
|
||||
LinkFile(ps.l,ps.lex)
|
||||
LexTarget(ps,ps)
|
||||
#endif
|
||||
|
||||
SRCS = $(NORMAL_SRCS) $(PARSER_SRCS)
|
||||
OBJS = $(NORMAL_SRCS:.C=.o) $(PARSER_SRCS:.C=.o)
|
||||
|
||||
#include <Library.tmpl>
|
||||
|
||||
SubdirLibraryRule($(OBJS))
|
||||
|
||||
DependTarget()
|
||||
39
cde/programs/dtinfo/DtMmdb/compression/abs_agent.C
Normal file
39
cde/programs/dtinfo/DtMmdb/compression/abs_agent.C
Normal file
@@ -0,0 +1,39 @@
|
||||
/*
|
||||
* $XConsortium: abs_agent.cc /main/4 1996/06/11 17:14:47 cde-hal $
|
||||
*
|
||||
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
|
||||
* All rights reserved. Unpublished -- rights reserved under
|
||||
* the Copyright Laws of the United States. USE OF A COPYRIGHT
|
||||
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
|
||||
* OR DISCLOSURE.
|
||||
*
|
||||
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
|
||||
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
|
||||
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
|
||||
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
|
||||
* INTERNATIONAL, LTD.
|
||||
*
|
||||
* RESTRICTED RIGHTS LEGEND
|
||||
* Use, duplication, or disclosure by the Government is subject
|
||||
* to the restrictions as set forth in subparagraph (c)(l)(ii)
|
||||
* of the Rights in Technical Data and Computer Software clause
|
||||
* at DFARS 252.227-7013.
|
||||
*
|
||||
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
|
||||
* 1315 Dell Avenue
|
||||
* Campbell, CA 95008
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "compression/abs_agent.h"
|
||||
|
||||
getchar_func_t fill_buf_func = 0;
|
||||
lex_action_func_t lex_action_func = 0;
|
||||
|
||||
compress_agent::compress_agent(c_code_t c_id) : pstring(c_id)
|
||||
{
|
||||
set_mode(SWAP_ALLOWED, false);
|
||||
}
|
||||
|
||||
HANDLER_BODIES(compress_agent)
|
||||
59
cde/programs/dtinfo/DtMmdb/compression/abs_agent.h
Normal file
59
cde/programs/dtinfo/DtMmdb/compression/abs_agent.h
Normal file
@@ -0,0 +1,59 @@
|
||||
/*
|
||||
* $XConsortium: abs_agent.h /main/4 1996/06/11 17:14:52 cde-hal $
|
||||
*
|
||||
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
|
||||
* All rights reserved. Unpublished -- rights reserved under
|
||||
* the Copyright Laws of the United States. USE OF A COPYRIGHT
|
||||
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
|
||||
* OR DISCLOSURE.
|
||||
*
|
||||
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
|
||||
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
|
||||
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
|
||||
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
|
||||
* INTERNATIONAL, LTD.
|
||||
*
|
||||
* RESTRICTED RIGHTS LEGEND
|
||||
* Use, duplication, or disclosure by the Government is subject
|
||||
* to the restrictions as set forth in subparagraph (c)(l)(ii)
|
||||
* of the Rights in Technical Data and Computer Software clause
|
||||
* at DFARS 252.227-7013.
|
||||
*
|
||||
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
|
||||
* 1315 Dell Avenue
|
||||
* Campbell, CA 95008
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _compress_agent_h
|
||||
#define _compress_agent_h 1
|
||||
|
||||
#include "utility/buffer.h"
|
||||
#include "object/pstring.h"
|
||||
|
||||
typedef int (*getchar_func_t)(unsigned char* buf, int max_sz);
|
||||
extern getchar_func_t fill_buf_func;
|
||||
|
||||
typedef void (*lex_action_func_t)(unsigned char*, int, int action_num);
|
||||
extern lex_action_func_t lex_action_func;
|
||||
|
||||
typedef int (*lex_func_t)();
|
||||
|
||||
|
||||
class compress_agent : public pstring
|
||||
{
|
||||
|
||||
public:
|
||||
compress_agent(c_code_t c_id) ;
|
||||
virtual ~compress_agent() {};
|
||||
|
||||
virtual void compress(const buffer& uncompressed, buffer& compressed) = 0;
|
||||
virtual void decompress(buffer& decompressed, buffer& uncompressed) = 0;
|
||||
|
||||
virtual io_status build_dict(lex_func_t f_lex, getchar_func_t f_getchar) = 0;
|
||||
};
|
||||
|
||||
HANDLER_SIGNATURES(compress_agent)
|
||||
|
||||
#endif
|
||||
48
cde/programs/dtinfo/DtMmdb/compression/code.C
Normal file
48
cde/programs/dtinfo/DtMmdb/compression/code.C
Normal file
@@ -0,0 +1,48 @@
|
||||
/*
|
||||
* $XConsortium: code.cc /main/3 1996/06/11 17:14:57 cde-hal $
|
||||
*
|
||||
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
|
||||
* All rights reserved. Unpublished -- rights reserved under
|
||||
* the Copyright Laws of the United States. USE OF A COPYRIGHT
|
||||
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
|
||||
* OR DISCLOSURE.
|
||||
*
|
||||
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
|
||||
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
|
||||
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
|
||||
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
|
||||
* INTERNATIONAL, LTD.
|
||||
*
|
||||
* RESTRICTED RIGHTS LEGEND
|
||||
* Use, duplication, or disclosure by the Government is subject
|
||||
* to the restrictions as set forth in subparagraph (c)(l)(ii)
|
||||
* of the Rights in Technical Data and Computer Software clause
|
||||
* at DFARS 252.227-7013.
|
||||
*
|
||||
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
|
||||
* 1315 Dell Avenue
|
||||
* Campbell, CA 95008
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "compression/code.h"
|
||||
|
||||
encoding_unit::encoding_unit(ostring* w, unsigned int f) :
|
||||
word(w), freq(f), code(0)
|
||||
{
|
||||
}
|
||||
|
||||
encoding_unit::~encoding_unit()
|
||||
{
|
||||
delete word;
|
||||
}
|
||||
|
||||
ostream& operator<<(ostream& out, encoding_unit& eu)
|
||||
{
|
||||
debug(out, *eu.word);
|
||||
debug(out, eu.freq);
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
58
cde/programs/dtinfo/DtMmdb/compression/code.h
Normal file
58
cde/programs/dtinfo/DtMmdb/compression/code.h
Normal file
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
* $XConsortium: code.h /main/3 1996/06/11 17:15:01 cde-hal $
|
||||
*
|
||||
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
|
||||
* All rights reserved. Unpublished -- rights reserved under
|
||||
* the Copyright Laws of the United States. USE OF A COPYRIGHT
|
||||
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
|
||||
* OR DISCLOSURE.
|
||||
*
|
||||
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
|
||||
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
|
||||
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
|
||||
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
|
||||
* INTERNATIONAL, LTD.
|
||||
*
|
||||
* RESTRICTED RIGHTS LEGEND
|
||||
* Use, duplication, or disclosure by the Government is subject
|
||||
* to the restrictions as set forth in subparagraph (c)(l)(ii)
|
||||
* of the Rights in Technical Data and Computer Software clause
|
||||
* at DFARS 252.227-7013.
|
||||
*
|
||||
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
|
||||
* 1315 Dell Avenue
|
||||
* Campbell, CA 95008
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _code_h
|
||||
#define _code_h 1
|
||||
|
||||
#include "utility/ostring.h"
|
||||
////////////////////////////////////////
|
||||
//
|
||||
////////////////////////////////////////
|
||||
|
||||
class htr_node;
|
||||
|
||||
class encoding_unit
|
||||
{
|
||||
public:
|
||||
ostring* word;
|
||||
int bits;
|
||||
unsigned int code;
|
||||
unsigned int freq;
|
||||
htr_node* leaf_htr_node;
|
||||
|
||||
public:
|
||||
encoding_unit(ostring* w, unsigned int freq);
|
||||
~encoding_unit();
|
||||
|
||||
friend ostream& operator <<(ostream&, encoding_unit&);
|
||||
|
||||
};
|
||||
|
||||
typedef encoding_unit* encoding_unitPtr;
|
||||
|
||||
#endif
|
||||
506
cde/programs/dtinfo/DtMmdb/compression/huffman.C
Normal file
506
cde/programs/dtinfo/DtMmdb/compression/huffman.C
Normal file
@@ -0,0 +1,506 @@
|
||||
/*
|
||||
* $XConsortium: huffman.cc /main/3 1996/06/11 17:15:06 cde-hal $
|
||||
*
|
||||
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
|
||||
* All rights reserved. Unpublished -- rights reserved under
|
||||
* the Copyright Laws of the United States. USE OF A COPYRIGHT
|
||||
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
|
||||
* OR DISCLOSURE.
|
||||
*
|
||||
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
|
||||
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
|
||||
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
|
||||
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
|
||||
* INTERNATIONAL, LTD.
|
||||
*
|
||||
* RESTRICTED RIGHTS LEGEND
|
||||
* Use, duplication, or disclosure by the Government is subject
|
||||
* to the restrictions as set forth in subparagraph (c)(l)(ii)
|
||||
* of the Rights in Technical Data and Computer Software clause
|
||||
* at DFARS 252.227-7013.
|
||||
*
|
||||
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
|
||||
* 1315 Dell Avenue
|
||||
* Campbell, CA 95008
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "compression/huffman.h"
|
||||
#include "dstr/heap.h"
|
||||
|
||||
////////////////////////////////////////
|
||||
//
|
||||
////////////////////////////////////////
|
||||
|
||||
htr_node::htr_node(encoding_unit* e, htr_node* lt, htr_node* rt):
|
||||
left(lt), right(rt), eu(e), freq(e->freq), parent(0)
|
||||
{
|
||||
}
|
||||
|
||||
htr_node::htr_node(unsigned long f, htr_node* lt, htr_node* rt):
|
||||
left(lt), right(rt), eu(0), freq(f), parent(0)
|
||||
{
|
||||
}
|
||||
|
||||
htr_node::~htr_node()
|
||||
{
|
||||
delete left;
|
||||
delete right;
|
||||
}
|
||||
|
||||
////////////////////////////////////////
|
||||
//
|
||||
////////////////////////////////////////
|
||||
Boolean htr_eq(const void* n1, const void* n2)
|
||||
{
|
||||
if ( ((htr_node*)n1) -> freq == ((htr_node*)n2) -> freq )
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
Boolean htr_ls(const void* n1, const void* n2)
|
||||
{
|
||||
if ( ((htr_node*)n1) -> freq > ((htr_node*)n2) -> freq )
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////
|
||||
//
|
||||
////////////////////////////////////////
|
||||
huff::huff(): compress_agent(HUFFMAN_AGENT_CODE),
|
||||
e_units(0), cts(0), tri(new trie(26)), htr_root(0)
|
||||
{
|
||||
}
|
||||
|
||||
huff::~huff()
|
||||
{
|
||||
delete tri;
|
||||
delete htr_root;
|
||||
}
|
||||
|
||||
void huff::build_tree()
|
||||
{
|
||||
heap htr_node_set(htr_eq, htr_ls, cts);
|
||||
|
||||
htr_node* x ;
|
||||
for (int i=0; i<cts; i++ ) {
|
||||
if ( e_units[i] ) {
|
||||
x = new htr_node(e_units[i]);
|
||||
e_units[i] -> leaf_htr_node = x;
|
||||
htr_node_set.insert(x);
|
||||
}
|
||||
}
|
||||
|
||||
htr_node_set.heapify();
|
||||
|
||||
htr_node *n1, *n2, *n3;
|
||||
while ( htr_node_set.count() > 1 ) {
|
||||
|
||||
// max is the smallest element. see htr_ls()
|
||||
n1 = (htr_node*)htr_node_set.max_elm() ;
|
||||
htr_node_set.delete_max() ;
|
||||
|
||||
// max is the smallest element. see htr_ls()
|
||||
n2 = (htr_node*)htr_node_set.max_elm() ;
|
||||
htr_node_set.delete_max() ;
|
||||
|
||||
n3 = new htr_node(n1->freq+n2->freq, n1, n2);
|
||||
|
||||
n1 -> parent = n2 -> parent = n3;
|
||||
|
||||
htr_node_set.insert_heapify(n3);
|
||||
}
|
||||
|
||||
htr_root = (htr_node*)htr_node_set.max_elm();
|
||||
htr_node_set.delete_max() ;
|
||||
}
|
||||
|
||||
void huff::calculate_code()
|
||||
{
|
||||
htr_node* x ;
|
||||
htr_node* parent;
|
||||
|
||||
for (int i=0; i<cts; i++ ) {
|
||||
|
||||
if ( e_units[i] == 0 )
|
||||
continue;
|
||||
|
||||
e_units[i] -> code = 0;
|
||||
e_units[i] -> bits = 0;
|
||||
|
||||
x = e_units[i] -> leaf_htr_node;
|
||||
|
||||
while ( x ) {
|
||||
parent = x -> parent;
|
||||
|
||||
if ( parent == 0 )
|
||||
break;
|
||||
|
||||
e_units[i] -> code >>= 1;
|
||||
|
||||
if ( parent -> left == x ) {
|
||||
e_units[i] -> code |= 0x80000000;
|
||||
} else
|
||||
if ( parent -> right != x ) {
|
||||
debug(cerr, i);
|
||||
throw(stringException("huffman tree corrupted"));
|
||||
}
|
||||
|
||||
x = parent;
|
||||
e_units[i] -> bits++;
|
||||
|
||||
if ( e_units[i] -> bits > BITS_IN(unsigned long) ) {
|
||||
debug(cerr, e_units[i] -> bits);
|
||||
throw(stringException("huffman tree too deep"));
|
||||
}
|
||||
}
|
||||
|
||||
e_units[i] -> code >>= ( 32 - e_units[i] -> bits );
|
||||
//debug(cerr, hex(e_units[i] -> code));
|
||||
}
|
||||
}
|
||||
|
||||
ostream& huff::print_alphabet(ostream& out)
|
||||
{
|
||||
unsigned long total_uncmp = 0;
|
||||
unsigned long int total_cmp = 0;
|
||||
|
||||
for (int i=0; i<cts; i++ ) {
|
||||
|
||||
if ( e_units[i] == 0 )
|
||||
continue;
|
||||
|
||||
total_uncmp += (e_units[i] -> word -> size()) * (e_units[i] -> freq);
|
||||
total_cmp += (e_units[i] -> bits) * (e_units[i] -> freq);
|
||||
|
||||
out << *(e_units[i] -> word) << ":" << e_units[i]->bits << "\n";
|
||||
}
|
||||
total_cmp = total_cmp / 8 + total_cmp % 8;
|
||||
|
||||
/*
|
||||
debug(cerr, total_uncmp);
|
||||
debug(cerr, total_cmp);
|
||||
|
||||
debug(cerr, 1 - float(total_cmp) / float(total_uncmp) );
|
||||
*/
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
// self modifying buf ptr after taking an encoding unit.
|
||||
encoding_unit* huff::get_e_unit(unsigned char*& buf, int len)
|
||||
{
|
||||
encoding_unit* x = tri -> parse(buf, len) ;
|
||||
|
||||
//debug(cerr, *(x -> word));
|
||||
|
||||
buf += x -> word -> size();
|
||||
return x;
|
||||
}
|
||||
|
||||
int total_uncomp = 0;
|
||||
int total_comp = 0;
|
||||
|
||||
void huff::compress(const buffer& uncompressed, buffer& compressed)
|
||||
{
|
||||
//debug(cerr, *(buffer*)&uncompressed);
|
||||
if ( compressed.buf_sz() < uncompressed.buf_sz() )
|
||||
compressed.expand_chunk(uncompressed.buf_sz());
|
||||
|
||||
|
||||
unsigned short total_bits = 0;
|
||||
|
||||
int uncmp_sz = uncompressed.content_sz();
|
||||
unsigned char* buf = (unsigned char*)uncompressed.get_base();
|
||||
|
||||
|
||||
unsigned int code_buf = 0;
|
||||
unsigned int rem_long = 0;
|
||||
int rem_bits = 0;
|
||||
|
||||
encoding_unit *e_ptr = 0;
|
||||
|
||||
while ( uncmp_sz > 0 ) {
|
||||
|
||||
//e_ptr = get_e_unit(buf, uncmp_sz);
|
||||
|
||||
e_ptr = tri -> parse(buf, uncmp_sz);
|
||||
|
||||
buf += e_ptr -> word -> size();
|
||||
uncmp_sz -= e_ptr -> word -> size();
|
||||
|
||||
if ( rem_bits + e_ptr -> bits > 32 ) {
|
||||
|
||||
code_buf = e_ptr -> code; // shift bits to the higher end
|
||||
|
||||
rem_long <<= 32-rem_bits;
|
||||
|
||||
rem_bits += e_ptr -> bits - 32; // new rem_bits
|
||||
|
||||
code_buf >>= rem_bits; // get padding part
|
||||
|
||||
rem_long |= code_buf; // padding
|
||||
|
||||
compressed.put( rem_long );
|
||||
|
||||
// save remaining (rem_bits + e_ptr -> bits - 32) bits to rem_bits.
|
||||
|
||||
rem_long = e_ptr -> code & (~0L >> (32 - rem_bits));
|
||||
|
||||
} else {
|
||||
rem_long <<= e_ptr -> bits;
|
||||
rem_long |= e_ptr -> code;
|
||||
rem_bits += e_ptr -> bits;
|
||||
//debug(cerr, hex(rem_long));
|
||||
}
|
||||
|
||||
total_bits += e_ptr -> bits;
|
||||
total_bits &= 0x1f; // take the mod on 32
|
||||
}
|
||||
|
||||
if ( rem_bits > 0 ) {
|
||||
rem_long <<= 32 - rem_bits;
|
||||
//MESSAGE(cerr, "PUT");
|
||||
//debug(cerr, hex(rem_long));
|
||||
compressed.put( rem_long );
|
||||
}
|
||||
|
||||
//debug(cerr, total_bits);
|
||||
compressed.put(char(total_bits));
|
||||
|
||||
// total_uncomp += uncompressed.content_sz();
|
||||
// total_comp += compressed.content_sz();
|
||||
|
||||
/*
|
||||
debug(cerr, total_uncomp);
|
||||
debug(cerr, total_comp);
|
||||
|
||||
debug(cerr,
|
||||
1-float(compressed.content_sz()-1)/float(uncompressed.content_sz())
|
||||
);
|
||||
*/
|
||||
}
|
||||
|
||||
void huff::decompress(buffer& compressed, buffer& uncompressed)
|
||||
{
|
||||
char* buf_base = uncompressed.get_base();
|
||||
char* str;
|
||||
int str_len;
|
||||
|
||||
char rem_bits;
|
||||
|
||||
int ct = (compressed.content_sz() - 1) >> 2;
|
||||
|
||||
unsigned int c;
|
||||
|
||||
int bits_bound = 32;
|
||||
|
||||
htr_node *node_ptr = htr_root;
|
||||
|
||||
do {
|
||||
compressed.get(c); ct--;
|
||||
|
||||
if ( ct == 0 ) {
|
||||
compressed.get(rem_bits);
|
||||
//debug(cerr, int(rem_bits));
|
||||
bits_bound = rem_bits ;
|
||||
}
|
||||
|
||||
for ( int i=0;i<bits_bound; i++ ) {
|
||||
if ( node_ptr -> left == 0 && node_ptr -> right == 0 ) {
|
||||
//for ( int j=0; j<node_ptr -> eu -> word -> size(); j++ ) {
|
||||
// cerr << (node_ptr -> eu -> word -> get())[j];
|
||||
//}
|
||||
|
||||
str_len = node_ptr -> eu -> word -> size();
|
||||
str = node_ptr -> eu -> word -> get();
|
||||
|
||||
if ( str_len == 1 ) {
|
||||
|
||||
*buf_base = str[0];
|
||||
buf_base++;
|
||||
|
||||
// uncompressed.put((node_ptr -> eu -> word -> get())[0]);
|
||||
} else {
|
||||
|
||||
|
||||
memcpy(buf_base, str, str_len);
|
||||
buf_base += str_len;
|
||||
|
||||
|
||||
/*
|
||||
uncompressed.put( node_ptr -> eu -> word -> get(),
|
||||
node_ptr -> eu -> word -> size()
|
||||
);
|
||||
*/
|
||||
|
||||
}
|
||||
node_ptr = htr_root;
|
||||
}
|
||||
|
||||
if ( c & 0x80000000 )
|
||||
node_ptr = node_ptr -> left;
|
||||
else
|
||||
node_ptr = node_ptr -> right;
|
||||
|
||||
c <<= 1;
|
||||
}
|
||||
|
||||
|
||||
} while ( ct>0 );
|
||||
|
||||
//debug(cerr, buf_base-uncompressed.get_base());
|
||||
uncompressed.set_content_sz(buf_base-uncompressed.get_base());
|
||||
|
||||
if ( rem_bits > 0 )
|
||||
uncompressed.put( node_ptr -> eu -> word -> get(),
|
||||
node_ptr -> eu -> word -> size()
|
||||
);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
//
|
||||
//////////////////////////////////////////////////////////
|
||||
|
||||
MMDB_BODIES(huff)
|
||||
|
||||
int huff::cdr_sizeof()
|
||||
{
|
||||
return pstring::cdr_sizeof();
|
||||
}
|
||||
|
||||
io_status huff::cdrOut(buffer& buf)
|
||||
{
|
||||
//MESSAGE(cerr, "huff::cdrOut");
|
||||
//debug(cerr, my_oid());
|
||||
static buffer v_out_buf(LBUFSIZ);
|
||||
|
||||
if ( cts > 0 ) {
|
||||
//MESSAGE(cerr, "huff::cdrOut: dict out");
|
||||
int sz = sizeof(int);
|
||||
for ( int i=0; i<cts; i++ ) {
|
||||
sz += ( e_units[i] -> word -> size() +
|
||||
sizeof(unsigned int) +
|
||||
sizeof(char)
|
||||
);
|
||||
}
|
||||
|
||||
v_out_buf.expand_chunk(sz);
|
||||
|
||||
v_out_buf.put(cts);
|
||||
|
||||
int word_sz;
|
||||
|
||||
for ( i=0; i<cts; i++ ) {
|
||||
word_sz = e_units[i] -> word -> size();
|
||||
v_out_buf.put(char(word_sz));
|
||||
|
||||
v_out_buf.put(e_units[i] -> word -> get(), word_sz);
|
||||
v_out_buf.put(e_units[i] -> freq);
|
||||
}
|
||||
|
||||
pstring::update(v_out_buf.get_base(), v_out_buf.content_sz());
|
||||
}
|
||||
|
||||
return pstring::cdrOut(buf);
|
||||
}
|
||||
|
||||
// format:
|
||||
// entries_int
|
||||
// (len_byte word_chars freq_int)+
|
||||
//
|
||||
io_status huff::cdrIn(buffer& buf)
|
||||
{
|
||||
static buffer v_in_buf(0);
|
||||
|
||||
pstring::cdrIn(buf);
|
||||
|
||||
if ( pstring::size() > 0 ) {
|
||||
|
||||
v_in_buf.set_chunk(pstring::get(), pstring::size());
|
||||
v_in_buf.set_content_sz(pstring::size());
|
||||
|
||||
v_in_buf.get(cts);
|
||||
|
||||
char word_buf[BUFSIZ];
|
||||
char word_sz;
|
||||
unsigned int word_freq;
|
||||
//ostring *z = 0;
|
||||
|
||||
for ( int i=0; i<cts; i++ ) {
|
||||
|
||||
v_in_buf.get(word_sz);
|
||||
v_in_buf.get(word_buf, int(word_sz));
|
||||
v_in_buf.get(word_freq);
|
||||
|
||||
/*
|
||||
z = new ostring((char*)word_buf, word_sz);
|
||||
extend_alphabet();
|
||||
alphabet[alphabet_sz++] = new encoding_unit(z, word_freq);
|
||||
*/
|
||||
|
||||
tri -> add_to_alphabet((unsigned char*)word_buf, word_sz, word_freq);
|
||||
}
|
||||
|
||||
e_units = tri -> get_alphabet(cts);
|
||||
|
||||
build_tree();
|
||||
calculate_code();
|
||||
delete tri; tri = 0;
|
||||
|
||||
//print_alphabet(cerr);
|
||||
|
||||
}
|
||||
|
||||
return done;
|
||||
}
|
||||
|
||||
trie* alphabet = 0;
|
||||
|
||||
void trie_add_wrap(unsigned char* buf, int len, int action_num)
|
||||
{
|
||||
switch ( action_num ) {
|
||||
case 1:
|
||||
alphabet -> add(buf, len);
|
||||
break;
|
||||
case 2:
|
||||
alphabet -> add_letters(buf, len);
|
||||
break;
|
||||
|
||||
default:
|
||||
debug(cerr, action_num);
|
||||
throw(stringException("unknown action number"));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
io_status huff::build_dict(lex_func_t f_lex, getchar_func_t f_getchar)
|
||||
{
|
||||
MESSAGE(cerr, "get to huff build dict");
|
||||
fill_buf_func = f_getchar;
|
||||
|
||||
alphabet = tri;
|
||||
|
||||
lex_action_func = trie_add_wrap;
|
||||
|
||||
if ( (*f_lex)() != 0 )
|
||||
throw(stringException("huff::asciiIn(): Parsing input failed"));
|
||||
|
||||
e_units = tri -> get_alphabet(cts);
|
||||
|
||||
//debug(cerr, *tri);
|
||||
|
||||
build_tree();
|
||||
calculate_code();
|
||||
|
||||
//print_alphabet(cerr);
|
||||
|
||||
set_mode(UPDATE, true);
|
||||
|
||||
return done;
|
||||
}
|
||||
|
||||
93
cde/programs/dtinfo/DtMmdb/compression/huffman.h
Normal file
93
cde/programs/dtinfo/DtMmdb/compression/huffman.h
Normal file
@@ -0,0 +1,93 @@
|
||||
/*
|
||||
* $XConsortium: huffman.h /main/3 1996/06/11 17:15:11 cde-hal $
|
||||
*
|
||||
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
|
||||
* All rights reserved. Unpublished -- rights reserved under
|
||||
* the Copyright Laws of the United States. USE OF A COPYRIGHT
|
||||
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
|
||||
* OR DISCLOSURE.
|
||||
*
|
||||
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
|
||||
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
|
||||
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
|
||||
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
|
||||
* INTERNATIONAL, LTD.
|
||||
*
|
||||
* RESTRICTED RIGHTS LEGEND
|
||||
* Use, duplication, or disclosure by the Government is subject
|
||||
* to the restrictions as set forth in subparagraph (c)(l)(ii)
|
||||
* of the Rights in Technical Data and Computer Software clause
|
||||
* at DFARS 252.227-7013.
|
||||
*
|
||||
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
|
||||
* 1315 Dell Avenue
|
||||
* Campbell, CA 95008
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _huff_h
|
||||
#define _huff_h 1
|
||||
|
||||
#include "compression/abs_agent.h"
|
||||
#include "compression/trie.h"
|
||||
|
||||
////////////////////////////////////////
|
||||
//
|
||||
////////////////////////////////////////
|
||||
class htr_node
|
||||
{
|
||||
public:
|
||||
htr_node* parent;
|
||||
htr_node* left;
|
||||
htr_node* right;
|
||||
unsigned long freq;
|
||||
encoding_unit* eu;
|
||||
|
||||
public:
|
||||
htr_node(encoding_unit* eu, htr_node* lt = 0, htr_node* rt = 0);
|
||||
htr_node(unsigned long freq, htr_node* lt = 0, htr_node* rt = 0);
|
||||
~htr_node();
|
||||
};
|
||||
|
||||
|
||||
////////////////////////////////////////
|
||||
//
|
||||
////////////////////////////////////////
|
||||
class huff : public compress_agent
|
||||
{
|
||||
|
||||
protected:
|
||||
htr_node* htr_root;
|
||||
encoding_unit** e_units;
|
||||
trie* tri;
|
||||
unsigned int cts ;
|
||||
|
||||
protected:
|
||||
void build_tree();
|
||||
void calculate_code();
|
||||
encoding_unit* get_e_unit(unsigned char*& data, int len);
|
||||
|
||||
public:
|
||||
huff();
|
||||
virtual ~huff() ;
|
||||
|
||||
virtual void compress(const buffer& uncompressed, buffer& compressed) ;
|
||||
virtual void decompress(buffer& compressed, buffer& uncompressed) ;
|
||||
|
||||
ostream& print_alphabet(ostream& out);
|
||||
|
||||
MMDB_SIGNATURES(huff);
|
||||
|
||||
// compacted disk representation In and Out functions
|
||||
virtual int cdr_sizeof();
|
||||
virtual io_status cdrOut(buffer&);
|
||||
virtual io_status cdrIn(buffer&);
|
||||
|
||||
// get data to compute the alphabet
|
||||
virtual io_status build_dict(lex_func_t f_lex, getchar_func_t f_getchar);
|
||||
};
|
||||
|
||||
extern huff g_huff_agent;
|
||||
|
||||
#endif
|
||||
333
cde/programs/dtinfo/DtMmdb/compression/lzss.C
Normal file
333
cde/programs/dtinfo/DtMmdb/compression/lzss.C
Normal file
@@ -0,0 +1,333 @@
|
||||
/* $XConsortium: lzss.cc /main/5 1996/07/18 16:00:08 drk $ */
|
||||
|
||||
#include "compression/lzss.h"
|
||||
|
||||
/*
|
||||
|
||||
Adapted from LDS (lossless datacompression sources) Version 1.1 by
|
||||
Nico E. de Vries.
|
||||
|
||||
qfc. 12-8-93.
|
||||
|
||||
*/
|
||||
|
||||
/**************************************************************
|
||||
LZSS.C -- A Data Compression Program
|
||||
(tab = 4 spaces)
|
||||
***************************************************************
|
||||
4/6/1989 Haruhiko Okumura
|
||||
Use, distribute, and modify this program freely.
|
||||
Please send me your improved versions.
|
||||
PC-VAN SCIENCE
|
||||
NIFTY-Serve PAF01022
|
||||
CompuServe 74050,1022
|
||||
**************************************************************/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#define N 4096 /* size of ring buffer */
|
||||
#define F 18 /* upper limit for match_length */
|
||||
#define THRESHOLD 2 /* encode string into position and length
|
||||
if match_length is greater than this */
|
||||
#define NIL N /* index for root of binary search trees */
|
||||
|
||||
unsigned long int
|
||||
textsize = 0, /* text size counter */
|
||||
codesize = 0, /* code size counter */
|
||||
printcount = 0; /* counter for reporting progress every 1K bytes */
|
||||
unsigned char
|
||||
text_buf[N + F - 1]; /* ring buffer of size N,
|
||||
with extra F-1 bytes to facilitate string comparison */
|
||||
int match_position, match_length, /* of longest match. These are
|
||||
set by the InsertNode() procedure. */
|
||||
lson[N + 1], rson[N + 257], dad[N + 1]; /* left & right children &
|
||||
parents -- These constitute binary search trees. */
|
||||
//FILE *infile, *outfile; /* input & output files */
|
||||
|
||||
void InitTree(void) /* initialize trees */
|
||||
{
|
||||
int i;
|
||||
|
||||
/* For i = 0 to N - 1, rson[i] and lson[i] will be the right and
|
||||
left children of node i. These nodes need not be initialized.
|
||||
Also, dad[i] is the parent of node i. These are initialized to
|
||||
NIL (= N), which stands for 'not used.'
|
||||
For i = 0 to 255, rson[N + i + 1] is the root of the tree
|
||||
for strings that begin with character i. These are initialized
|
||||
to NIL. Note there are 256 trees. */
|
||||
|
||||
for (i = N + 1; i <= N + 256; i++) rson[i] = NIL;
|
||||
for (i = 0; i < N; i++) dad[i] = NIL;
|
||||
}
|
||||
|
||||
void InsertNode(int r)
|
||||
/* Inserts string of length F, text_buf[r..r+F-1], into one of the
|
||||
trees (text_buf[r]'th tree) and returns the longest-match position
|
||||
and length via the global variables match_position and match_length.
|
||||
If match_length = F, then removes the old node in favor of the new
|
||||
one, because the old one will be deleted sooner.
|
||||
Note r plays double role, as tree node and position in buffer. */
|
||||
{
|
||||
int i, p, cmp;
|
||||
unsigned char *key;
|
||||
|
||||
cmp = 1; key = &text_buf[r]; p = N + 1 + key[0];
|
||||
rson[r] = lson[r] = NIL; match_length = 0;
|
||||
for ( ; ; ) {
|
||||
if (cmp >= 0) {
|
||||
if (rson[p] != NIL) p = rson[p];
|
||||
else { rson[p] = r; dad[r] = p; return; }
|
||||
} else {
|
||||
if (lson[p] != NIL) p = lson[p];
|
||||
else { lson[p] = r; dad[r] = p; return; }
|
||||
}
|
||||
for (i = 1; i < F; i++)
|
||||
if ((cmp = key[i] - text_buf[p + i]) != 0) break;
|
||||
if (i > match_length) {
|
||||
match_position = p;
|
||||
if ((match_length = i) >= F) break;
|
||||
}
|
||||
}
|
||||
dad[r] = dad[p]; lson[r] = lson[p]; rson[r] = rson[p];
|
||||
dad[lson[p]] = r; dad[rson[p]] = r;
|
||||
if (rson[dad[p]] == p) rson[dad[p]] = r;
|
||||
else lson[dad[p]] = r;
|
||||
dad[p] = NIL; /* remove p */
|
||||
}
|
||||
|
||||
void DeleteNode(int p) /* deletes node p from tree */
|
||||
{
|
||||
int q;
|
||||
|
||||
if (dad[p] == NIL) return; /* not in tree */
|
||||
if (rson[p] == NIL) q = lson[p];
|
||||
else if (lson[p] == NIL) q = rson[p];
|
||||
else {
|
||||
q = lson[p];
|
||||
if (rson[q] != NIL) {
|
||||
do { q = rson[q]; } while (rson[q] != NIL);
|
||||
rson[dad[q]] = lson[q]; dad[lson[q]] = dad[q];
|
||||
lson[q] = lson[p]; dad[lson[p]] = q;
|
||||
}
|
||||
rson[q] = rson[p]; dad[rson[p]] = q;
|
||||
}
|
||||
dad[q] = dad[p];
|
||||
if (rson[dad[p]] == p) rson[dad[p]] = q; else lson[dad[p]] = q;
|
||||
dad[p] = NIL;
|
||||
}
|
||||
|
||||
void lzss::compress(const buffer& uncompressed, buffer& compressed)
|
||||
{
|
||||
if ( compressed.buf_sz() < uncompressed.buf_sz() )
|
||||
compressed.expand_chunk(uncompressed.buf_sz());
|
||||
|
||||
////////////////////////////////////////////
|
||||
////////////////////////////////////////////
|
||||
int i, c, len, r, s, last_match_length, code_buf_ptr;
|
||||
unsigned char code_buf[17], mask;
|
||||
|
||||
InitTree(); /* initialize trees */
|
||||
code_buf[0] = 0; /* code_buf[1..16] saves eight units of code, and
|
||||
code_buf[0] works as eight flags, "1" representing that the unit
|
||||
is an unencoded letter (1 byte), "0" a position-and-length pair
|
||||
(2 bytes). Thus, eight units require at most 16 bytes of code. */
|
||||
|
||||
code_buf_ptr = mask = 1;
|
||||
s = 0; r = N - F;
|
||||
for (i = s; i < r; i++) text_buf[i] = ' '; /* Clear the buffer with
|
||||
any character that will appear often. */
|
||||
|
||||
char* unc_str = uncompressed.get_base();
|
||||
int unc_str_len = uncompressed.content_sz();
|
||||
int unc_str_ptr = 0;
|
||||
|
||||
for (len = 0; len < F; len++) {
|
||||
|
||||
if ( unc_str_ptr == unc_str_len )
|
||||
break;
|
||||
|
||||
c = unc_str[unc_str_ptr++];
|
||||
//cerr << char(c);
|
||||
|
||||
text_buf[r + len] = c; /* Read F bytes into the last F bytes of
|
||||
the buffer */
|
||||
}
|
||||
|
||||
if ((textsize = len) == 0) return; /* text of size zero */
|
||||
|
||||
for (i = 1; i <= F; i++) InsertNode(r - i); /* Insert the F strings,
|
||||
each of which begins with one or more 'space' characters. Note
|
||||
the order in which these strings are inserted. This way,
|
||||
degenerate trees will be less likely to occur. */
|
||||
|
||||
InsertNode(r); /* Finally, insert the whole string just read. The
|
||||
global variables match_length and match_position are set. */
|
||||
|
||||
do {
|
||||
if (match_length > len) match_length = len; /* match_length
|
||||
may be spuriously long near the end of text. */
|
||||
|
||||
|
||||
if (match_length <= THRESHOLD) {
|
||||
|
||||
match_length = 1; /* Not long enough match. Send one byte. */
|
||||
code_buf[0] |= mask; /* 'send one byte' flag */
|
||||
code_buf[code_buf_ptr++] = text_buf[r]; /* Send uncoded. */
|
||||
|
||||
} else {
|
||||
|
||||
code_buf[code_buf_ptr++] = (unsigned char) match_position;
|
||||
code_buf[code_buf_ptr++] = (unsigned char)
|
||||
(((match_position >> 4) & 0xf0)
|
||||
| (match_length - (THRESHOLD + 1))); /* Send position and
|
||||
|
||||
|
||||
length pair. Note match_length > THRESHOLD. */
|
||||
}
|
||||
|
||||
if ((mask <<= 1) == 0) { /* Shift mask left one bit. */
|
||||
|
||||
|
||||
// for (i = 0; i < code_buf_ptr; i++) { /* Send at most 8 units of */
|
||||
// putc(code_buf[i], outfile); /* code together */
|
||||
// }
|
||||
|
||||
compressed.put((char*)code_buf, code_buf_ptr, true);
|
||||
|
||||
codesize += code_buf_ptr;
|
||||
code_buf[0] = 0; code_buf_ptr = mask = 1;
|
||||
}
|
||||
|
||||
last_match_length = match_length;
|
||||
|
||||
for (i = 0; i < last_match_length; i++) {
|
||||
|
||||
if ( unc_str_ptr == unc_str_len )
|
||||
break;
|
||||
|
||||
c = unc_str[unc_str_ptr++];
|
||||
|
||||
DeleteNode(s); /* Delete old strings and */
|
||||
text_buf[s] = c; /* read new bytes */
|
||||
|
||||
if (s < F - 1) text_buf[s + N] = c; /* If the position is
|
||||
near the end of buffer, extend the buffer to make
|
||||
string comparison easier. */
|
||||
|
||||
//s = (s + 1) & (N - 1); r = (r + 1) & (N - 1);
|
||||
s++; s &= (N - 1); r++; r &= (N - 1);
|
||||
/* Since this is a ring buffer, increment the position
|
||||
modulo N. */
|
||||
|
||||
InsertNode(r); /* Register the string in text_buf[r..r+F-1] */
|
||||
}
|
||||
|
||||
// if ((textsize += i) > printcount) {
|
||||
// printf("%12ld\r", textsize); printcount += 1024;
|
||||
// /* Reports progress each time the textsize exceeds
|
||||
// multiples of 1024. */
|
||||
// }
|
||||
|
||||
while (i++ < last_match_length) {/* After the end of text, */
|
||||
DeleteNode(s); /* no need to read, but */
|
||||
|
||||
//s = (s + 1) & (N - 1); r = (r + 1) & (N - 1);
|
||||
s++; s &= (N - 1); r++; r &= (N - 1);
|
||||
|
||||
if (--len) InsertNode(r); /* buffer may not be empty. */
|
||||
}
|
||||
|
||||
} while (len > 0); /* until length of string to be processed is zero */
|
||||
|
||||
if (code_buf_ptr > 1) { /* Send remaining code. */
|
||||
|
||||
// for (i = 0; i < code_buf_ptr; i++) {
|
||||
// //putc(code_buf[i], outfile);
|
||||
// compressed.put(code_buf[i], true);
|
||||
// }
|
||||
|
||||
compressed.put((char*)code_buf, code_buf_ptr, true);
|
||||
codesize += code_buf_ptr;
|
||||
}
|
||||
|
||||
|
||||
//printf("In : %ld bytes\n", textsize); /* Encoding is done. */
|
||||
//printf("Out: %ld bytes\n", codesize);
|
||||
//printf("Out/In: %.3f\n", (double)codesize / textsize);
|
||||
}
|
||||
|
||||
void lzss::decompress(buffer& compressed, buffer& uncompressed)
|
||||
{
|
||||
int i, j, k, r, c;
|
||||
unsigned int flags;
|
||||
|
||||
for (i = 0; i < N - F; i++) text_buf[i] = ' ';
|
||||
r = N - F; flags = 0;
|
||||
for (;;) {
|
||||
if (((flags >>= 1) & 256) == 0) {
|
||||
|
||||
// if ((c = getc(infile)) == EOF) break;
|
||||
|
||||
if ( compressed.content_sz() == 0 )
|
||||
break;
|
||||
|
||||
compressed.getusc(c);
|
||||
|
||||
flags = c | 0xff00; /* uses higher byte cleverly */
|
||||
} /* to count eight */
|
||||
|
||||
if (flags & 1) {
|
||||
//if ((c = getc(infile)) == EOF) break;
|
||||
//putc(c, outfile); text_buf[r++] = c; r &= (N - 1);
|
||||
|
||||
if ( compressed.content_sz() == 0 )
|
||||
break;
|
||||
|
||||
compressed.getusc(c);
|
||||
|
||||
//debug(cerr, char(c));
|
||||
uncompressed.put(char(c), true); text_buf[r++] = c; r &= (N - 1);
|
||||
|
||||
} else {
|
||||
//if ((i = getc(infile)) == EOF) break;
|
||||
//if ((j = getc(infile)) == EOF) break;
|
||||
|
||||
if ( compressed.content_sz() == 0 )
|
||||
break;
|
||||
else {
|
||||
compressed.getusc(i);
|
||||
}
|
||||
|
||||
if ( compressed.content_sz() == 0 )
|
||||
break;
|
||||
else
|
||||
compressed.getusc(j);
|
||||
|
||||
|
||||
i |= ((j & 0xf0) << 4); j = (j & 0x0f) + THRESHOLD;
|
||||
|
||||
for (k = 0; k <= j; k++) {
|
||||
c = text_buf[(i + k) & (N - 1)];
|
||||
|
||||
//putc(c, outfile);
|
||||
//debug(cerr, char(c));
|
||||
uncompressed.put(char(c), true);
|
||||
|
||||
text_buf[r++] = c; r &= (N - 1);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
io_status lzss::build_dict(lex_func_t, getchar_func_t)
|
||||
{
|
||||
return done;
|
||||
}
|
||||
|
||||
MMDB_BODIES(lzss)
|
||||
|
||||
26
cde/programs/dtinfo/DtMmdb/compression/lzss.h
Normal file
26
cde/programs/dtinfo/DtMmdb/compression/lzss.h
Normal file
@@ -0,0 +1,26 @@
|
||||
/* $XConsortium: lzss.h /main/3 1996/06/11 17:15:21 cde-hal $ */
|
||||
|
||||
|
||||
#ifndef _lzss_h
|
||||
#define _lzss_h 1
|
||||
|
||||
#include "compression/abs_agent.h"
|
||||
|
||||
class lzss : public compress_agent
|
||||
{
|
||||
|
||||
public:
|
||||
lzss() : compress_agent(DICT_AGENT_CODE) {};
|
||||
virtual ~lzss() {};
|
||||
|
||||
virtual void compress(const buffer& uncompressed, buffer& compressed) ;
|
||||
virtual void decompress(buffer& compressed, buffer& uncompressed) ;
|
||||
|
||||
MMDB_SIGNATURES(lzss);
|
||||
|
||||
virtual io_status build_dict(lex_func_t f_lex, getchar_func_t f_getchar);
|
||||
};
|
||||
|
||||
extern lzss g_lzss_agent;
|
||||
|
||||
#endif
|
||||
1041
cde/programs/dtinfo/DtMmdb/compression/ps.C
Normal file
1041
cde/programs/dtinfo/DtMmdb/compression/ps.C
Normal file
File diff suppressed because it is too large
Load Diff
78
cde/programs/dtinfo/DtMmdb/compression/ps.lex
Normal file
78
cde/programs/dtinfo/DtMmdb/compression/ps.lex
Normal file
@@ -0,0 +1,78 @@
|
||||
/*
|
||||
* $XConsortium: ps.l /main/2 1996/07/18 14:27:45 drk $
|
||||
*
|
||||
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
|
||||
* All rights reserved. Unpublished -- rights reserved under
|
||||
* the Copyright Laws of the United States. USE OF A COPYRIGHT
|
||||
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
|
||||
* OR DISCLOSURE.
|
||||
*
|
||||
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
|
||||
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
|
||||
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
|
||||
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
|
||||
* INTERNATIONAL, LTD.
|
||||
*
|
||||
* RESTRICTED RIGHTS LEGEND
|
||||
* Use, duplication, or disclosure by the Government is subject
|
||||
* to the restrictions as set forth in subparagraph (c)(l)(ii)
|
||||
* of the Rights in Technical Data and Computer Software clause
|
||||
* at DFARS 252.227-7013.
|
||||
*
|
||||
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
|
||||
* 1315 Dell Avenue
|
||||
* Campbell, CA 95008
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
%a 30000
|
||||
%e 10000
|
||||
%k 10000
|
||||
%n 10000
|
||||
%o 40000
|
||||
%p 20000
|
||||
|
||||
%{
|
||||
|
||||
#include "compression/trie.h"
|
||||
#include "compression/abs_agent.h"
|
||||
|
||||
static unsigned char yybuf[LBUFSIZ];
|
||||
static int yybuf_sz = LBUFSIZ;
|
||||
static int yybuffed = 0;
|
||||
|
||||
#undef yywrap
|
||||
int yywrap();
|
||||
|
||||
#undef YY_INPUT
|
||||
#define YY_INPUT(buf,result,max_size) \
|
||||
{ \
|
||||
result = (*fill_buf_func)((unsigned char*)buf,max_size); \
|
||||
}
|
||||
|
||||
|
||||
%}
|
||||
|
||||
%%
|
||||
|
||||
.|\n {
|
||||
if ( yybuffed >= yybuf_sz ) {
|
||||
//alphabet -> add_letters( yybuf, yybuf_sz );
|
||||
(*lex_action_func)(yybuf, yybuf_sz, 2);
|
||||
yybuffed = 0;
|
||||
}
|
||||
|
||||
yybuf[yybuffed++] = yytext[0];
|
||||
}
|
||||
|
||||
|
||||
%%
|
||||
|
||||
int yywrap()
|
||||
{
|
||||
//alphabet -> add_letters( yybuf, yybuffed );
|
||||
(*lex_action_func)(yybuf, yybuffed, 2);
|
||||
return 1;
|
||||
}
|
||||
|
||||
1051
cde/programs/dtinfo/DtMmdb/compression/sgml.C
Normal file
1051
cde/programs/dtinfo/DtMmdb/compression/sgml.C
Normal file
File diff suppressed because it is too large
Load Diff
82
cde/programs/dtinfo/DtMmdb/compression/sgml.lex
Normal file
82
cde/programs/dtinfo/DtMmdb/compression/sgml.lex
Normal file
@@ -0,0 +1,82 @@
|
||||
/*
|
||||
* $XConsortium: sgml.l /main/2 1996/07/18 14:28:02 drk $
|
||||
*
|
||||
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
|
||||
* All rights reserved. Unpublished -- rights reserved under
|
||||
* the Copyright Laws of the United States. USE OF A COPYRIGHT
|
||||
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
|
||||
* OR DISCLOSURE.
|
||||
*
|
||||
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
|
||||
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
|
||||
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
|
||||
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
|
||||
* INTERNATIONAL, LTD.
|
||||
*
|
||||
* RESTRICTED RIGHTS LEGEND
|
||||
* Use, duplication, or disclosure by the Government is subject
|
||||
* to the restrictions as set forth in subparagraph (c)(l)(ii)
|
||||
* of the Rights in Technical Data and Computer Software clause
|
||||
* at DFARS 252.227-7013.
|
||||
*
|
||||
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
|
||||
* 1315 Dell Avenue
|
||||
* Campbell, CA 95008
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
%a 30000
|
||||
%e 10000
|
||||
%k 10000
|
||||
%n 10000
|
||||
%o 40000
|
||||
%p 20000
|
||||
|
||||
%{
|
||||
|
||||
#include "compression/trie.h"
|
||||
#include "compression/abs_agent.h"
|
||||
|
||||
|
||||
static unsigned char yybuf[LBUFSIZ];
|
||||
static int yybuf_sz = LBUFSIZ;
|
||||
static int yybuffed = 0;
|
||||
|
||||
|
||||
#undef yywrap
|
||||
int yywrap();
|
||||
|
||||
#undef YY_INPUT
|
||||
#define YY_INPUT(buf,result,max_size) \
|
||||
{ \
|
||||
result = (*fill_buf_func)((unsigned char*)buf,max_size); \
|
||||
}
|
||||
|
||||
|
||||
%}
|
||||
|
||||
%%
|
||||
|
||||
"<"[0-9a-zA-Z_.]+">"|"</"[0-9a-zA-Z_.]+">" {
|
||||
(*lex_action_func)((unsigned char*)(yytext), yyleng, 1);
|
||||
}
|
||||
|
||||
.|\n {
|
||||
if ( yybuffed >= yybuf_sz ) {
|
||||
(*lex_action_func)(yybuf, yybuf_sz, 2);
|
||||
yybuffed = 0;
|
||||
}
|
||||
|
||||
yybuf[yybuffed++] = yytext[0];
|
||||
}
|
||||
|
||||
|
||||
%%
|
||||
|
||||
int yywrap()
|
||||
{
|
||||
(*lex_action_func)(yybuf, yybuffed, 2);
|
||||
return 1;
|
||||
}
|
||||
|
||||
445
cde/programs/dtinfo/DtMmdb/compression/trie.C
Normal file
445
cde/programs/dtinfo/DtMmdb/compression/trie.C
Normal file
@@ -0,0 +1,445 @@
|
||||
/*
|
||||
* $XConsortium: trie.cc /main/3 1996/06/11 17:15:26 cde-hal $
|
||||
*
|
||||
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
|
||||
* All rights reserved. Unpublished -- rights reserved under
|
||||
* the Copyright Laws of the United States. USE OF A COPYRIGHT
|
||||
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
|
||||
* OR DISCLOSURE.
|
||||
*
|
||||
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
|
||||
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
|
||||
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
|
||||
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
|
||||
* INTERNATIONAL, LTD.
|
||||
*
|
||||
* RESTRICTED RIGHTS LEGEND
|
||||
* Use, duplication, or disclosure by the Government is subject
|
||||
* to the restrictions as set forth in subparagraph (c)(l)(ii)
|
||||
* of the Rights in Technical Data and Computer Software clause
|
||||
* at DFARS 252.227-7013.
|
||||
*
|
||||
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
|
||||
* 1315 Dell Avenue
|
||||
* Campbell, CA 95008
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "compression/trie.h"
|
||||
|
||||
//trie_node_info::trie_node_info (char c, int f, trie_node* x)
|
||||
|
||||
trie_node_info::trie_node_info () : child(0)
|
||||
{
|
||||
info.int_view = 0;
|
||||
image.heap = 0;
|
||||
}
|
||||
|
||||
trie_node_info::~trie_node_info ()
|
||||
{
|
||||
delete child;
|
||||
}
|
||||
|
||||
|
||||
trie_node::trie_node(trie_node_info* parent)
|
||||
{
|
||||
#ifdef C_API
|
||||
children = new trie_node_info_ptr_t[LANG_ALPHABET_SZ+1];
|
||||
for (int i=0; i<LANG_ALPHABET_SZ+1; i++ )
|
||||
children[i] = new trie_node_info;
|
||||
|
||||
children[LANG_ALPHABET_SZ] -> child = (trie_node*)parent;
|
||||
#else
|
||||
children[LANG_ALPHABET_SZ].child = (trie_node*)parent;
|
||||
#endif
|
||||
}
|
||||
|
||||
trie_node::~trie_node()
|
||||
{
|
||||
#ifdef C_API
|
||||
for (int i=0; i<LANG_ALPHABET_SZ+1; i++ )
|
||||
delete children[i];
|
||||
|
||||
delete children;
|
||||
#else
|
||||
children[LANG_ALPHABET_SZ].child = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
void trie_node::_print(ostream& out, char* prefix, int prefix_sz)
|
||||
{
|
||||
#ifdef C_API
|
||||
for ( int i=0; i<LANG_ALPHABET_SZ; i++ ) {
|
||||
if ( children[i] -> info.info_view.freq > 0 ) {
|
||||
|
||||
prefix[prefix_sz] = children[i] -> info.info_view.letter;
|
||||
for (int j=0; j<prefix_sz; j++)
|
||||
out << prefix[j];
|
||||
|
||||
out << char(children[i] -> info.info_view.letter);
|
||||
|
||||
out << ":" << int(children[i] -> info.info_view.freq) << "\n";
|
||||
|
||||
prefix[prefix_sz] = children[i] -> info.info_view.letter;
|
||||
|
||||
if ( children[i] -> child )
|
||||
children[i] -> child -> _print(out, prefix, prefix_sz+1);
|
||||
}
|
||||
}
|
||||
#else
|
||||
for ( int i=0; i<LANG_ALPHABET_SZ; i++ ) {
|
||||
if ( children[i].info.info_view.freq > 0 ) {
|
||||
|
||||
prefix[prefix_sz] = children[i].info.info_view.letter;
|
||||
for (int j=0; j<prefix_sz; j++)
|
||||
out << prefix[j];
|
||||
|
||||
out << char(children[i].info.info_view.letter);
|
||||
|
||||
out << ":" << int(children[i].info.info_view.freq) << "\n";
|
||||
|
||||
prefix[prefix_sz] = children[i].info.info_view.letter;
|
||||
|
||||
if ( children[i].child )
|
||||
children[i].child -> _print(out, prefix, prefix_sz+1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
//////////////////////////////////////
|
||||
//
|
||||
//////////////////////////////////////
|
||||
|
||||
trie::trie(int estimatedsz) :
|
||||
root(0), max_trie_level(0), total_nodes(1),
|
||||
sorted_freqs(0), alphabet_sz(0), estimated_sz(estimatedsz)
|
||||
{
|
||||
alphabet = new encoding_unitPtr[ estimated_sz];
|
||||
for ( int i=0; i< estimated_sz; alphabet[i++] = 0 );
|
||||
}
|
||||
|
||||
trie::~trie()
|
||||
{
|
||||
delete root;
|
||||
delete sorted_freqs;
|
||||
|
||||
for ( int i=0; i<alphabet_sz; i++ )
|
||||
delete alphabet[i];
|
||||
|
||||
delete alphabet;
|
||||
}
|
||||
|
||||
void trie::extend_alphabet()
|
||||
{
|
||||
if ( alphabet_sz >= estimated_sz ) {
|
||||
encoding_unitPtr* new_alphabet = new encoding_unitPtr[2* estimated_sz];
|
||||
|
||||
for ( int k=0; k< estimated_sz; k++ ) {
|
||||
new_alphabet[k] = alphabet[k] ;
|
||||
new_alphabet[k + estimated_sz] = 0;
|
||||
}
|
||||
|
||||
new_alphabet = new_alphabet;
|
||||
estimated_sz *= 2;
|
||||
}
|
||||
}
|
||||
|
||||
void trie::add(unsigned char* word, int sz, int fq)
|
||||
{
|
||||
//cerr << sz << " ";
|
||||
//for ( int k=0; k<sz; k++ )
|
||||
// cerr << word[k];
|
||||
//cerr << "\n";
|
||||
|
||||
static int j, level = 0;
|
||||
static trie_node* x = 0;
|
||||
static trie_node_info* y = 0;
|
||||
|
||||
static char buf[1];
|
||||
static ostring *z;
|
||||
|
||||
if ( root == 0 )
|
||||
root = new trie_node(0);
|
||||
|
||||
x = root;
|
||||
|
||||
for ( int i=0; i<sz; i++ ) {
|
||||
|
||||
j = word[i];
|
||||
|
||||
#ifdef C_API
|
||||
y = (x -> children[j]);
|
||||
#else
|
||||
y = &(x -> children[j]);
|
||||
#endif
|
||||
|
||||
y -> info.info_view.letter = j;
|
||||
|
||||
if ( i<sz-1 && y -> child == 0 ) {
|
||||
y -> child = new trie_node(y);
|
||||
total_nodes++;
|
||||
}
|
||||
|
||||
if ( i == sz-1 ) {
|
||||
|
||||
y -> info.info_view.freq += fq;
|
||||
|
||||
if ( y -> image.eu == 0 ) {
|
||||
|
||||
y -> info.info_view.mark = 1;
|
||||
|
||||
y->image.eu = add_to_alphabet(word, sz, fq);
|
||||
|
||||
} else
|
||||
y -> image.eu -> freq += fq;
|
||||
}
|
||||
|
||||
x = y -> child;
|
||||
}
|
||||
}
|
||||
|
||||
encoding_unit* trie::add_to_alphabet(unsigned char* word, int sz, int fq)
|
||||
{
|
||||
extend_alphabet();
|
||||
encoding_unit *x = new encoding_unit(new ostring((char*)word, sz), fq);
|
||||
alphabet[alphabet_sz++] = x;
|
||||
return x;
|
||||
}
|
||||
|
||||
void trie::add_letters(unsigned char* letters, int sz)
|
||||
{
|
||||
/*
|
||||
MESSAGE(cerr, "trie::add_letters()");
|
||||
cerr << sz << " ";
|
||||
for ( int k=0; k<sz; k++ )
|
||||
cerr << letters[k];
|
||||
cerr << "\n";
|
||||
*/
|
||||
|
||||
static int j;
|
||||
static trie_node_info* y = 0;
|
||||
|
||||
static char buf[1];
|
||||
static ostring *z;
|
||||
|
||||
if ( root == 0 )
|
||||
root = new trie_node(0);
|
||||
|
||||
for ( int i=0; i<sz; i++ ) {
|
||||
|
||||
j = letters[i];
|
||||
|
||||
#ifdef C_API
|
||||
y = (root -> children[j]);
|
||||
#else
|
||||
y = &(root -> children[j]);
|
||||
#endif
|
||||
y -> info.info_view.freq ++;
|
||||
|
||||
if ( y -> image.eu == 0 ) {
|
||||
|
||||
y -> info.info_view.letter = j;
|
||||
|
||||
buf[0] = char(j);
|
||||
z = new ostring(buf, 1);
|
||||
y -> info.info_view.mark = 1;
|
||||
|
||||
extend_alphabet();
|
||||
|
||||
alphabet[alphabet_sz++] = y->image.eu =new encoding_unit(z, 1);
|
||||
} else
|
||||
y -> image.eu -> freq++;
|
||||
}
|
||||
}
|
||||
|
||||
ostream& operator <<(ostream& out, trie& tr)
|
||||
{
|
||||
static char char_buf[100];
|
||||
|
||||
if ( tr.root == 0 )
|
||||
return out;
|
||||
|
||||
tr.root -> _print(out, char_buf, 0);
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
void update_index(int ind, void* x)
|
||||
{
|
||||
((trie_node_info*)x) -> image.heap = ind;
|
||||
}
|
||||
|
||||
void trie::_find_leaf(trie_node* z, int& j)
|
||||
{
|
||||
trie_node_info* x = 0;
|
||||
ostring *y;
|
||||
|
||||
for ( int i=0; i<LANG_ALPHABET_SZ; i++ ) {
|
||||
#ifdef C_API
|
||||
x = (z -> children[i]);
|
||||
#else
|
||||
x = &(z -> children[i]);
|
||||
#endif
|
||||
|
||||
if ( x -> info.info_view.freq > 0 ) {
|
||||
|
||||
if ( x -> child == 0 ) {
|
||||
|
||||
x -> info.info_view.mark = 1;
|
||||
y = get_word(x);
|
||||
alphabet[j++] =x->image.eu =new encoding_unit(y, x -> info.info_view.freq);
|
||||
|
||||
} else
|
||||
_find_leaf(x -> child, j);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
encoding_unit** trie::get_alphabet(unsigned int& a_sz)
|
||||
{
|
||||
a_sz = alphabet_sz;
|
||||
return alphabet;
|
||||
}
|
||||
|
||||
ostring* trie::get_word(trie_node_info* leaf)
|
||||
{
|
||||
static char buf[128];
|
||||
buf[127] = 0;
|
||||
|
||||
int i = 127;
|
||||
|
||||
trie_node_info* x = leaf;
|
||||
|
||||
if ( x == 0 )
|
||||
throw(stringException("null leaf node pointer"));
|
||||
|
||||
while ( x ) {
|
||||
|
||||
buf[--i] = x -> info.info_view.letter;
|
||||
|
||||
x = &x[LANG_ALPHABET_SZ - x->info.info_view.letter];
|
||||
x = (trie_node_info*)(x -> child);
|
||||
}
|
||||
|
||||
//debug(cerr, buf+i);
|
||||
return new ostring(buf+i, 127-i);
|
||||
}
|
||||
|
||||
Boolean trie::travers_to(char* str, int len,
|
||||
trie_node*& node, trie_node_info*& node_info
|
||||
)
|
||||
{
|
||||
//MESSAGE(cerr, "travers_to BEG");
|
||||
if ( root == 0 )
|
||||
return false;
|
||||
|
||||
|
||||
node = root;
|
||||
int j;
|
||||
|
||||
for ( int i=0; i<len; i++ ) {
|
||||
j = str[i];
|
||||
|
||||
//debug(cerr, str[i]);
|
||||
|
||||
#ifdef C_API
|
||||
node_info = (node -> children[j]);
|
||||
#else
|
||||
node_info = &(node -> children[j]);
|
||||
#endif
|
||||
|
||||
if ( node_info -> info.info_view.freq == 0 ) {
|
||||
//MESSAGE(cerr, "BREAK");
|
||||
break;
|
||||
} else
|
||||
node = node_info -> child;
|
||||
}
|
||||
//MESSAGE(cerr, "travers_to RET");
|
||||
|
||||
return ( len == i ) ? true : false;
|
||||
}
|
||||
|
||||
void trie::collect_freqs(trie_node* rt, int level)
|
||||
{
|
||||
for ( int i= 0; i<LANG_ALPHABET_SZ; i++ ) {
|
||||
#ifdef C_API
|
||||
if ( rt -> children[i] -> info.info_view.freq > 0 ) {
|
||||
|
||||
sorted_freqs -> insert(rt -> children[i]);
|
||||
|
||||
if ( rt -> children[i] -> child )
|
||||
collect_freqs(rt -> children[i] -> child, level+1);
|
||||
}
|
||||
#else
|
||||
if ( rt -> children[i].info.info_view.freq > 0 ) {
|
||||
|
||||
sorted_freqs -> insert(&(rt -> children[i]));
|
||||
|
||||
if ( rt -> children[i].child )
|
||||
collect_freqs(rt -> children[i].child, level+1);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
encoding_unit* trie::parse(unsigned char* str, int len)
|
||||
{
|
||||
if ( root == 0 )
|
||||
return 0;
|
||||
|
||||
trie_node* node = root;
|
||||
trie_node_info* cell = 0;
|
||||
trie_node_info* last_hit = 0;
|
||||
int i=0;
|
||||
|
||||
while ( node && i < len ) {
|
||||
|
||||
//debug(cerr, str[i]);
|
||||
#ifdef C_API
|
||||
cell = (node -> children[str[i++]]);
|
||||
#else
|
||||
cell = &(node -> children[str[i++]]);
|
||||
#endif
|
||||
|
||||
if ( cell -> info.info_view.mark == 1 )
|
||||
last_hit = cell;
|
||||
|
||||
node = cell -> child;
|
||||
}
|
||||
|
||||
if ( last_hit == 0 ) {
|
||||
debug(cerr, len);
|
||||
debug(cerr, str[i]);
|
||||
debug(cerr, int(str[i]));
|
||||
throw(stringException("parse(): string not in trie"));
|
||||
}
|
||||
|
||||
if ( last_hit -> image.eu == 0 )
|
||||
throw(stringException("parse(): encoding unit not found"));
|
||||
|
||||
return last_hit -> image.eu;
|
||||
}
|
||||
|
||||
///////////////////////////////////////
|
||||
//
|
||||
///////////////////////////////////////
|
||||
Boolean trie_node_ls(voidPtr n1, voidPtr n2)
|
||||
{
|
||||
if ( ((trie_node_info*)n1) -> info.info_view.freq < ((trie_node_info*)n2) -> info.info_view.freq )
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
Boolean trie_node_eq(voidPtr n1, voidPtr n2)
|
||||
{
|
||||
if ( ((trie_node_info*)n1) -> info.info_view.freq == ((trie_node_info*)n2) -> info.info_view.freq )
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
138
cde/programs/dtinfo/DtMmdb/compression/trie.h
Normal file
138
cde/programs/dtinfo/DtMmdb/compression/trie.h
Normal file
@@ -0,0 +1,138 @@
|
||||
/*
|
||||
* $XConsortium: trie.h /main/3 1996/06/11 17:15:31 cde-hal $
|
||||
*
|
||||
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
|
||||
* All rights reserved. Unpublished -- rights reserved under
|
||||
* the Copyright Laws of the United States. USE OF A COPYRIGHT
|
||||
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
|
||||
* OR DISCLOSURE.
|
||||
*
|
||||
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
|
||||
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
|
||||
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
|
||||
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
|
||||
* INTERNATIONAL, LTD.
|
||||
*
|
||||
* RESTRICTED RIGHTS LEGEND
|
||||
* Use, duplication, or disclosure by the Government is subject
|
||||
* to the restrictions as set forth in subparagraph (c)(l)(ii)
|
||||
* of the Rights in Technical Data and Computer Software clause
|
||||
* at DFARS 252.227-7013.
|
||||
*
|
||||
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
|
||||
* 1315 Dell Avenue
|
||||
* Campbell, CA 95008
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _trie_h
|
||||
#define _trie_h 1
|
||||
|
||||
#include "compression/code.h"
|
||||
#include "dstr/heap.h"
|
||||
|
||||
#define MAX_LEVELS 50
|
||||
#define LANG_ALPHABET_SZ 256
|
||||
|
||||
class trie_node;
|
||||
|
||||
struct info_t {
|
||||
unsigned freq: 23;
|
||||
unsigned letter: 8;
|
||||
unsigned mark: 1;
|
||||
};
|
||||
|
||||
class trie_node_info
|
||||
{
|
||||
trie_node* child;
|
||||
union {
|
||||
struct info_t info_view;
|
||||
int int_view;
|
||||
} info;
|
||||
union {
|
||||
encoding_unit* eu;
|
||||
int heap;
|
||||
int pos;
|
||||
} image;
|
||||
|
||||
public:
|
||||
//trie_node_info (char letter = 0, int freq = 0, trie_node* child = 0);
|
||||
trie_node_info ();
|
||||
~trie_node_info ();
|
||||
|
||||
friend class trie_node;
|
||||
friend class trie;
|
||||
friend Boolean trie_node_ls(voidPtr n1, voidPtr n2);
|
||||
friend Boolean trie_node_eq(voidPtr n1, voidPtr n2);
|
||||
friend void update_index(int ind, void* x);
|
||||
};
|
||||
|
||||
typedef trie_node_info* trie_node_info_ptr_t;
|
||||
|
||||
class trie_node
|
||||
{
|
||||
|
||||
protected:
|
||||
#ifdef C_API
|
||||
trie_node_info_ptr_t* children;
|
||||
#else
|
||||
trie_node_info children[LANG_ALPHABET_SZ+1];
|
||||
// children[LANG_ALPHABET_SZ+1].child encodes the parent
|
||||
#endif
|
||||
|
||||
|
||||
public:
|
||||
trie_node(trie_node_info* parent);
|
||||
~trie_node();
|
||||
|
||||
void _print(ostream& out, char* prefix, int prefix_sz);
|
||||
|
||||
friend class trie;
|
||||
};
|
||||
|
||||
class trie
|
||||
{
|
||||
|
||||
protected:
|
||||
int max_trie_level;
|
||||
int total_nodes;
|
||||
int level_sz[MAX_LEVELS];
|
||||
|
||||
trie_node* root;
|
||||
heap* sorted_freqs;
|
||||
|
||||
int estimated_sz;
|
||||
encoding_unit** alphabet;
|
||||
unsigned int alphabet_sz;
|
||||
|
||||
protected:
|
||||
void collect_freqs(trie_node* rt, int level);
|
||||
ostring* get_word(trie_node_info* x);
|
||||
|
||||
void _find_leaf(trie_node* z, int& j);
|
||||
void extend_alphabet();
|
||||
|
||||
public:
|
||||
trie(int estimated_alphabet_sz = 400) ;
|
||||
virtual ~trie() ;
|
||||
|
||||
virtual void add(unsigned char* word, int len, int freq = 1) ;
|
||||
virtual void add_letters(unsigned char* letters, int len) ;
|
||||
virtual encoding_unit* add_to_alphabet(unsigned char* word, int sz, int fq);
|
||||
|
||||
virtual encoding_unit** get_alphabet(unsigned int& alphabet_sz);
|
||||
|
||||
virtual Boolean travers_to(char* str, int sz,
|
||||
trie_node*& node, trie_node_info*& node_info
|
||||
);
|
||||
|
||||
// take the longest substring from str and returns its
|
||||
// encoding_unit.
|
||||
virtual encoding_unit* parse(unsigned char* str, int len);
|
||||
|
||||
friend ostream& operator <<(ostream& out, trie& tr);
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
128
cde/programs/dtinfo/DtMmdb/compression/zip.C
Normal file
128
cde/programs/dtinfo/DtMmdb/compression/zip.C
Normal file
@@ -0,0 +1,128 @@
|
||||
/*
|
||||
* $XConsortium: zip.cc /main/3 1996/06/11 17:15:36 cde-hal $
|
||||
*
|
||||
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
|
||||
* All rights reserved. Unpublished -- rights reserved under
|
||||
* the Copyright Laws of the United States. USE OF A COPYRIGHT
|
||||
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
|
||||
* OR DISCLOSURE.
|
||||
*
|
||||
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
|
||||
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
|
||||
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
|
||||
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
|
||||
* INTERNATIONAL, LTD.
|
||||
*
|
||||
* RESTRICTED RIGHTS LEGEND
|
||||
* Use, duplication, or disclosure by the Government is subject
|
||||
* to the restrictions as set forth in subparagraph (c)(l)(ii)
|
||||
* of the Rights in Technical Data and Computer Software clause
|
||||
* at DFARS 252.227-7013.
|
||||
*
|
||||
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
|
||||
* 1315 Dell Avenue
|
||||
* Campbell, CA 95008
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "compression/zip.h"
|
||||
|
||||
//zip g_zip_agent;
|
||||
|
||||
#define UNCOMPRESSED "/tmp/uncomp"
|
||||
#define COMPRESSED "/tmp/comp"
|
||||
|
||||
void zip::compress(const buffer& uncompressed, buffer& compressed)
|
||||
{
|
||||
////////////////////////////////////////
|
||||
// code for testing. I know it is slow.
|
||||
////////////////////////////////////////
|
||||
|
||||
fstream out(UNCOMPRESSED, ios::out|ios::trunc);
|
||||
|
||||
if ( !out )
|
||||
throw(streamException(out.rdstate()));
|
||||
|
||||
if ( out.write(uncompressed.get_base(), uncompressed.content_sz()) == 0 )
|
||||
throw(streamException(out.rdstate()));
|
||||
|
||||
out.close();
|
||||
|
||||
system(form("gzip -c %s > %s", UNCOMPRESSED, COMPRESSED));
|
||||
|
||||
fstream in(COMPRESSED, ios::in);
|
||||
|
||||
if ( !in )
|
||||
throw(streamException(in.rdstate()));
|
||||
|
||||
int x = bytes(in);
|
||||
|
||||
compressed.expand_chunk(x);
|
||||
|
||||
if ( in.read(compressed.get_base(), x) == 0 || x != in.gcount() )
|
||||
throw(streamException(in.rdstate()));
|
||||
|
||||
compressed.set_content_sz(x);
|
||||
|
||||
in.close();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void zip::decompress(buffer& compressed, buffer& uncompressed)
|
||||
{
|
||||
fstream out(COMPRESSED, ios::out|ios::trunc);
|
||||
|
||||
if ( !out )
|
||||
throw(streamException(out.rdstate()));
|
||||
|
||||
if ( out.write(compressed.get_base(), compressed.content_sz()) == 0 )
|
||||
throw(streamException(out.rdstate()));
|
||||
|
||||
out.close();
|
||||
|
||||
system(form("gzip -cd %s > %s", COMPRESSED, UNCOMPRESSED));
|
||||
|
||||
fstream in(UNCOMPRESSED, ios::in);
|
||||
|
||||
if ( !in )
|
||||
throw(streamException(in.rdstate()));
|
||||
|
||||
int x = bytes(in);
|
||||
|
||||
uncompressed.expand_chunk(x);
|
||||
|
||||
if ( in.read(uncompressed.get_base(), x) == 0 || x != in.gcount() )
|
||||
throw(streamException(in.rdstate()));
|
||||
|
||||
uncompressed.set_content_sz(x);
|
||||
|
||||
in.close();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
io_status zip::build_dict(lex_func_t, getchar_func_t)
|
||||
{
|
||||
return done;
|
||||
}
|
||||
|
||||
MMDB_BODIES(zip)
|
||||
|
||||
int zip::cdr_sizeof()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
io_status zip::cdrOut(buffer&)
|
||||
{
|
||||
return done;
|
||||
}
|
||||
|
||||
io_status zip::cdrIn(buffer&)
|
||||
{
|
||||
return done;
|
||||
}
|
||||
|
||||
|
||||
56
cde/programs/dtinfo/DtMmdb/compression/zip.h
Normal file
56
cde/programs/dtinfo/DtMmdb/compression/zip.h
Normal file
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
* $XConsortium: zip.h /main/3 1996/06/11 17:15:40 cde-hal $
|
||||
*
|
||||
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
|
||||
* All rights reserved. Unpublished -- rights reserved under
|
||||
* the Copyright Laws of the United States. USE OF A COPYRIGHT
|
||||
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
|
||||
* OR DISCLOSURE.
|
||||
*
|
||||
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
|
||||
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
|
||||
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
|
||||
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
|
||||
* INTERNATIONAL, LTD.
|
||||
*
|
||||
* RESTRICTED RIGHTS LEGEND
|
||||
* Use, duplication, or disclosure by the Government is subject
|
||||
* to the restrictions as set forth in subparagraph (c)(l)(ii)
|
||||
* of the Rights in Technical Data and Computer Software clause
|
||||
* at DFARS 252.227-7013.
|
||||
*
|
||||
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
|
||||
* 1315 Dell Avenue
|
||||
* Campbell, CA 95008
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _zip_h
|
||||
#define _zip_h 1
|
||||
|
||||
#include "compression/abs_agent.h"
|
||||
|
||||
class zip : public compress_agent
|
||||
{
|
||||
|
||||
public:
|
||||
zip() : compress_agent(GZIP_AGENT_CODE) {};
|
||||
virtual ~zip() {};
|
||||
|
||||
virtual void compress(const buffer& uncompressed, buffer& compressed) ;
|
||||
virtual void decompress(buffer& compressed, buffer& uncompressed) ;
|
||||
|
||||
MMDB_SIGNATURES(zip);
|
||||
|
||||
// compacted disk representation In and Out functions
|
||||
virtual int cdr_sizeof();
|
||||
virtual io_status cdrOut(buffer&);
|
||||
virtual io_status cdrIn(buffer&);
|
||||
|
||||
virtual io_status build_dict(lex_func_t f_lex, getchar_func_t f_getchar);
|
||||
};
|
||||
|
||||
extern zip g_zip_agent;
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user