326 lines
8.7 KiB
Plaintext
326 lines
8.7 KiB
Plaintext
unit RLEUnit;
|
|
{-------------------------------------------------------------------------------
|
|
Run Length Encoder Unit
|
|
-----------------------
|
|
reSource v2.6
|
|
Copyright (C) 1998-2001 Victor Kasenda / gruv
|
|
http://go.to/gruv
|
|
email: vickas@singnet.com.sg
|
|
|
|
Desc:
|
|
This is the run length encoder for preprocessing the file before the sorting
|
|
phase.
|
|
|
|
Naming convention notes:
|
|
ix: input index
|
|
oix: output index
|
|
-------------------------------------------------------------------------------}
|
|
|
|
|
|
(**) interface (**)
|
|
uses StructsUnit;
|
|
|
|
const
|
|
{RunThreshold number of bytes signifies the start of a run.
|
|
4 = 4 + 0
|
|
5 = 4 + 1
|
|
6 = 4 + 1 bytes
|
|
4 will expand to 5 bytes, 5 will retain, 6 will compress to 5 bytes}
|
|
RUN_THRESHOLD = 100;
|
|
|
|
type
|
|
TRunLengthEncoder = class
|
|
private
|
|
in_block, out_block: PBlock;
|
|
block_length: longint; // in_block length
|
|
oix: longint; // index into out_block
|
|
run_length: longint; // current run count
|
|
last_symbol: byte; // the symbol that has a run
|
|
|
|
procedure PutByte(const b: byte);
|
|
procedure PutRunCount;
|
|
public
|
|
procedure EncodeBlock(_in_block, _out_block: PBlock; _block_length: longint;
|
|
var out_block_length: longint);
|
|
|
|
end;
|
|
|
|
|
|
|
|
TRunLengthDecoder = class
|
|
in_block, out_block: PBlock;
|
|
block_length: longint; // length of in_block
|
|
ix, oix: longint; // index into input and output block
|
|
|
|
function GetRunCount: longint;
|
|
procedure ExpandRun;
|
|
public
|
|
procedure DecodeBlock(_in_block, _out_block: PBlock; _block_length: longint;
|
|
var out_block_length: longint);
|
|
end;
|
|
|
|
|
|
(**) implementation (**)
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// Run Length Encoder
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
{-------------------------------------------------------------------------------
|
|
PutByte
|
|
-------
|
|
output a byte to out_block and increment the output index (oix)
|
|
-------------------------------------------------------------------------------}
|
|
procedure TRunLengthEncoder.PutByte(const b: byte);
|
|
begin
|
|
out_block^[oix] := b;
|
|
inc(oix);
|
|
end;
|
|
|
|
{-------------------------------------------------------------------------------
|
|
PutRunCount
|
|
-----------
|
|
|
|
Desc:
|
|
The count is encoded in as many 6 bit codes as needed, up to a max of 30 bits.
|
|
The 7th bit is set if more codes follow.
|
|
The most significant 6 bits are transmitted first.
|
|
-------------------------------------------------------------------------------}
|
|
procedure TRunLengthEncoder.PutRunCount;
|
|
var
|
|
d: byte;
|
|
bits_shift: shortint;
|
|
must_put: boolean;
|
|
begin
|
|
// Start by getting bits 25-30, then 19-24, 13-19 etc.
|
|
// if a bigger value was set eg. 25-30, then the rest of the values must be
|
|
// put although they may be 0
|
|
dec(run_length, RUN_THRESHOLD);
|
|
bits_shift := 24;
|
|
must_put := false;
|
|
repeat
|
|
d := ((run_length shr bits_shift) and $3F);
|
|
|
|
if (d > 0) or must_put then
|
|
begin
|
|
d := d or $40;
|
|
PutByte(d);
|
|
must_put := true;
|
|
end;
|
|
dec(bits_shift, 6);
|
|
until (bits_shift = 0);
|
|
|
|
// Put last byte (terminator) without the 7th bit set
|
|
d := (run_length and $3F);
|
|
PutByte(d);
|
|
end;
|
|
|
|
{-------------------------------------------------------------------------------
|
|
EncodeBlock
|
|
-----------
|
|
|
|
Algo:
|
|
Maintain 2 index, ix and oix into the input and output block respectively.
|
|
curr_symbol: current symbol
|
|
1) Read curr_symbol from the block
|
|
2) If curr_symbol equals the previous symbol then
|
|
a) increase run count
|
|
ELSE
|
|
a) If it is the end of a run (run count > run threshold) then
|
|
i) output the run length
|
|
ii) reset run length
|
|
3) Only output the curr_symbol if the run length is below run threshold
|
|
4) Repeat (1)
|
|
|
|
Notes:
|
|
If the run goes all the way to the end of the block, we must output the
|
|
run length in the end.
|
|
-------------------------------------------------------------------------------}
|
|
procedure TRunLengthEncoder.EncodeBlock(_in_block, _out_block: PBlock; _block_length: longint;
|
|
var out_block_length: longint);
|
|
|
|
|
|
{
|
|
Initialize resets the variables to process a new block
|
|
}
|
|
procedure Initialize;
|
|
begin
|
|
out_block := _out_block;
|
|
in_block := _in_block;
|
|
block_length := _block_length;
|
|
oix := 0;
|
|
end;
|
|
|
|
|
|
var
|
|
curr_symbol: byte;
|
|
ix: longint;
|
|
begin
|
|
Initialize;
|
|
|
|
{Init out_block with the first byte in in_block}
|
|
run_length := 1;
|
|
last_symbol := in_block^[0];
|
|
PutByte(last_symbol);
|
|
|
|
for ix := 1 to block_length-1 do
|
|
begin
|
|
curr_symbol := in_block^[ix];
|
|
|
|
if (curr_symbol = last_symbol) then
|
|
inc(run_length)
|
|
else
|
|
begin
|
|
{A different symbol indicates an end of run}
|
|
if (run_length >= RUN_THRESHOLD) then
|
|
PutRunCount;
|
|
run_length := 1;
|
|
end;
|
|
|
|
if (run_length <= RUN_THRESHOLD) then
|
|
PutByte(curr_symbol);
|
|
|
|
last_symbol := curr_symbol;
|
|
end;
|
|
|
|
{If there were more than RunThreshold bytes at the end of the block,
|
|
then we must terminate the run at the end}
|
|
if (run_length >= RUN_THRESHOLD) then PutRunCount;
|
|
|
|
|
|
out_block_length := oix;
|
|
end;
|
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// Run Length Decoder
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
{-------------------------------------------------------------------------------
|
|
DecodeBlock
|
|
-----------
|
|
Decode a block.
|
|
|
|
Algo:
|
|
Maintain 2 indexes, ix and oix into the input and output block.
|
|
1) Read in a character
|
|
2) If the character is repeated, then increase run length
|
|
3) If run length hits run threshold, (a run length follows)
|
|
a) decode the run length
|
|
b) expand the run (fill output block with run length number of thbe char curr_symbol)
|
|
b) reset run length to zero
|
|
4) Repeat (1)
|
|
|
|
Notes:
|
|
We start counting from index 1 so that previous char is init to the char at
|
|
index 0.
|
|
-------------------------------------------------------------------------------}
|
|
procedure TRunLengthDecoder.DecodeBlock(_in_block, _out_block: PBlock; _block_length: longint;
|
|
var out_block_length: longint);
|
|
|
|
|
|
procedure Initialize;
|
|
begin
|
|
out_block := _out_block;
|
|
in_block := _in_block;
|
|
block_length := _block_length;
|
|
end;
|
|
|
|
|
|
var
|
|
run_length: byte;
|
|
curr_symbol, last_symbol: byte;
|
|
begin
|
|
Initialize;
|
|
run_length := 1;
|
|
last_symbol := in_block^[0];
|
|
out_block^[0] := last_symbol;
|
|
|
|
oix := 1;
|
|
ix := 1;
|
|
while (ix < block_length) do
|
|
begin
|
|
curr_symbol := in_block^[ix];
|
|
out_block^[oix] := curr_symbol;
|
|
|
|
inc(ix); {The next index could point to a run length or another char}
|
|
inc(oix);
|
|
|
|
if (curr_symbol = last_symbol) then
|
|
begin
|
|
inc(run_length);
|
|
if (run_length = RUN_THRESHOLD) then
|
|
begin
|
|
ExpandRun;
|
|
run_length := 1;
|
|
end;
|
|
end
|
|
else
|
|
run_length := 1;
|
|
|
|
last_symbol := curr_symbol;
|
|
end; {while}
|
|
|
|
out_block_length := oix;
|
|
end;
|
|
|
|
{-------------------------------------------------------------------------------
|
|
GetRunCount
|
|
-----------
|
|
gets the run count by reading as many bits as necessary that represent the
|
|
run length. The run length is represented in 7 bits per byte.
|
|
-------------------------------------------------------------------------------}
|
|
function TRunLengthDecoder.GetRunCount: longint;
|
|
var
|
|
count: longint;
|
|
b: byte;
|
|
begin
|
|
count := 0;
|
|
|
|
repeat
|
|
b := in_block^[ix];
|
|
count := (count shl 6) or (b and $3F); // extract last 6 bits from b
|
|
inc(ix);
|
|
until ((b and $40) = 0); // continue if 7th bit set
|
|
|
|
result := count;
|
|
end;
|
|
|
|
|
|
{-------------------------------------------------------------------------------
|
|
ExpandRun
|
|
---------
|
|
Expand the run with length pointed to by ix.
|
|
ix-1 is the symbol used to expand.
|
|
|
|
GetRunCount will inc ix to get the run count.
|
|
ExpandRun itself will inc oix accordingly.
|
|
|
|
IN and OUT assertion:
|
|
ix and oix point to the next pos to input and output respectively.
|
|
-------------------------------------------------------------------------------}
|
|
procedure TRunLengthDecoder.ExpandRun;
|
|
var
|
|
run_symbol: byte;
|
|
expand_count: longint;
|
|
expand_limit: longint;
|
|
begin
|
|
run_symbol := in_block^[ix-1];
|
|
expand_count := GetRunCount;
|
|
expand_limit := oix + expand_count;
|
|
|
|
while (oix < expand_limit) do
|
|
begin
|
|
out_block^[oix] := run_symbol;
|
|
inc(oix);
|
|
end;
|
|
|
|
end;
|
|
|
|
|
|
end.
|