rsvk/Component/RLEUnit.pas

326 lines
8.7 KiB
Plaintext

unit RLEUnit;
{-------------------------------------------------------------------------------
Run Length Encoder Unit
-----------------------
reSource v2.6
Copyright (C) 1998-2001 Victor Kasenda / gruv
http://go.to/gruv
email: vickas@singnet.com.sg
Desc:
This is the run length encoder for preprocessing the file before the sorting
phase.
Naming convention notes:
ix: input index
oix: output index
-------------------------------------------------------------------------------}
(**) interface (**)
uses StructsUnit;
const
{RunThreshold number of bytes signifies the start of a run.
4 = 4 + 0
5 = 4 + 1
6 = 4 + 1 bytes
4 will expand to 5 bytes, 5 will retain, 6 will compress to 5 bytes}
RUN_THRESHOLD = 100;
type
TRunLengthEncoder = class
private
in_block, out_block: PBlock;
block_length: longint; // in_block length
oix: longint; // index into out_block
run_length: longint; // current run count
last_symbol: byte; // the symbol that has a run
procedure PutByte(const b: byte);
procedure PutRunCount;
public
procedure EncodeBlock(_in_block, _out_block: PBlock; _block_length: longint;
var out_block_length: longint);
end;
TRunLengthDecoder = class
in_block, out_block: PBlock;
block_length: longint; // length of in_block
ix, oix: longint; // index into input and output block
function GetRunCount: longint;
procedure ExpandRun;
public
procedure DecodeBlock(_in_block, _out_block: PBlock; _block_length: longint;
var out_block_length: longint);
end;
(**) implementation (**)
////////////////////////////////////////////////////////////////////////////////
// Run Length Encoder
////////////////////////////////////////////////////////////////////////////////
{-------------------------------------------------------------------------------
PutByte
-------
output a byte to out_block and increment the output index (oix)
-------------------------------------------------------------------------------}
procedure TRunLengthEncoder.PutByte(const b: byte);
begin
out_block^[oix] := b;
inc(oix);
end;
{-------------------------------------------------------------------------------
PutRunCount
-----------
Desc:
The count is encoded in as many 6 bit codes as needed, up to a max of 30 bits.
The 7th bit is set if more codes follow.
The most significant 6 bits are transmitted first.
-------------------------------------------------------------------------------}
procedure TRunLengthEncoder.PutRunCount;
var
d: byte;
bits_shift: shortint;
must_put: boolean;
begin
// Start by getting bits 25-30, then 19-24, 13-19 etc.
// if a bigger value was set eg. 25-30, then the rest of the values must be
// put although they may be 0
dec(run_length, RUN_THRESHOLD);
bits_shift := 24;
must_put := false;
repeat
d := ((run_length shr bits_shift) and $3F);
if (d > 0) or must_put then
begin
d := d or $40;
PutByte(d);
must_put := true;
end;
dec(bits_shift, 6);
until (bits_shift = 0);
// Put last byte (terminator) without the 7th bit set
d := (run_length and $3F);
PutByte(d);
end;
{-------------------------------------------------------------------------------
EncodeBlock
-----------
Algo:
Maintain 2 index, ix and oix into the input and output block respectively.
curr_symbol: current symbol
1) Read curr_symbol from the block
2) If curr_symbol equals the previous symbol then
a) increase run count
ELSE
a) If it is the end of a run (run count > run threshold) then
i) output the run length
ii) reset run length
3) Only output the curr_symbol if the run length is below run threshold
4) Repeat (1)
Notes:
If the run goes all the way to the end of the block, we must output the
run length in the end.
-------------------------------------------------------------------------------}
procedure TRunLengthEncoder.EncodeBlock(_in_block, _out_block: PBlock; _block_length: longint;
var out_block_length: longint);
{
Initialize resets the variables to process a new block
}
procedure Initialize;
begin
out_block := _out_block;
in_block := _in_block;
block_length := _block_length;
oix := 0;
end;
var
curr_symbol: byte;
ix: longint;
begin
Initialize;
{Init out_block with the first byte in in_block}
run_length := 1;
last_symbol := in_block^[0];
PutByte(last_symbol);
for ix := 1 to block_length-1 do
begin
curr_symbol := in_block^[ix];
if (curr_symbol = last_symbol) then
inc(run_length)
else
begin
{A different symbol indicates an end of run}
if (run_length >= RUN_THRESHOLD) then
PutRunCount;
run_length := 1;
end;
if (run_length <= RUN_THRESHOLD) then
PutByte(curr_symbol);
last_symbol := curr_symbol;
end;
{If there were more than RunThreshold bytes at the end of the block,
then we must terminate the run at the end}
if (run_length >= RUN_THRESHOLD) then PutRunCount;
out_block_length := oix;
end;
////////////////////////////////////////////////////////////////////////////////
// Run Length Decoder
////////////////////////////////////////////////////////////////////////////////
{-------------------------------------------------------------------------------
DecodeBlock
-----------
Decode a block.
Algo:
Maintain 2 indexes, ix and oix into the input and output block.
1) Read in a character
2) If the character is repeated, then increase run length
3) If run length hits run threshold, (a run length follows)
a) decode the run length
b) expand the run (fill output block with run length number of thbe char curr_symbol)
b) reset run length to zero
4) Repeat (1)
Notes:
We start counting from index 1 so that previous char is init to the char at
index 0.
-------------------------------------------------------------------------------}
procedure TRunLengthDecoder.DecodeBlock(_in_block, _out_block: PBlock; _block_length: longint;
var out_block_length: longint);
procedure Initialize;
begin
out_block := _out_block;
in_block := _in_block;
block_length := _block_length;
end;
var
run_length: byte;
curr_symbol, last_symbol: byte;
begin
Initialize;
run_length := 1;
last_symbol := in_block^[0];
out_block^[0] := last_symbol;
oix := 1;
ix := 1;
while (ix < block_length) do
begin
curr_symbol := in_block^[ix];
out_block^[oix] := curr_symbol;
inc(ix); {The next index could point to a run length or another char}
inc(oix);
if (curr_symbol = last_symbol) then
begin
inc(run_length);
if (run_length = RUN_THRESHOLD) then
begin
ExpandRun;
run_length := 1;
end;
end
else
run_length := 1;
last_symbol := curr_symbol;
end; {while}
out_block_length := oix;
end;
{-------------------------------------------------------------------------------
GetRunCount
-----------
gets the run count by reading as many bits as necessary that represent the
run length. The run length is represented in 7 bits per byte.
-------------------------------------------------------------------------------}
function TRunLengthDecoder.GetRunCount: longint;
var
count: longint;
b: byte;
begin
count := 0;
repeat
b := in_block^[ix];
count := (count shl 6) or (b and $3F); // extract last 6 bits from b
inc(ix);
until ((b and $40) = 0); // continue if 7th bit set
result := count;
end;
{-------------------------------------------------------------------------------
ExpandRun
---------
Expand the run with length pointed to by ix.
ix-1 is the symbol used to expand.
GetRunCount will inc ix to get the run count.
ExpandRun itself will inc oix accordingly.
IN and OUT assertion:
ix and oix point to the next pos to input and output respectively.
-------------------------------------------------------------------------------}
procedure TRunLengthDecoder.ExpandRun;
var
run_symbol: byte;
expand_count: longint;
expand_limit: longint;
begin
run_symbol := in_block^[ix-1];
expand_count := GetRunCount;
expand_limit := oix + expand_count;
while (oix < expand_limit) do
begin
out_block^[oix] := run_symbol;
inc(oix);
end;
end;
end.