本文简单描述了一种在 Unity 中进行数据压缩的方法
一般的游戏开发中,数据压缩往往跟资源加载等底层机制关系密切,在上层逻辑中的使用则并不常见.
.Net 中, System.IO.Compression命名空间下就原生提供了可以进行数据(解)压缩的各种类型(方法),譬如 DeflateStream, GZipStream 等等.
但是如果我们直接在 Unity 中使用这些类型(方法)来进行数据(解)压缩,那么就会遇到跨平台问题(移动平台不可用),怎么处理呢?
一种方式就是引入第三方库,而不去依赖原生的 System.IO.Compression,之前有人将 System.IO.Compression 的实现重新 port 成了 Unity.IO.Compression,其实也可以算作是引入第三方库的做法,有兴趣的朋友可以看看.
另一个常见的第三方库则是 SharpZipLib,也提供了很多(解)压缩的类型(方法).
引入第三方库的方式虽然通用,但是如果你使用的场景比较简单轻量,那么这种方式就显得有些过重了,很多时候我们往往仅希望使用一种通用的压缩接口(因为使用场景比较简单),而不是费力的摆弄各种类型,就像这样:
代码语言:javascript复制byte[] dataCompressed = CompressUtil.Compress(data);
// that's it.
实际上我们只要自己实现一种较通用的数据压缩方法就可以做到了,并且之前已经有人这么去做了,相关的讨论可以看这里,其中提及的源码可以直接使用(实现了LZF算法),代码不长,我简单调整了一下,如下所示:
代码语言:javascript复制/*
* Improved version to C# LibLZF Port:
* Copyright (c) 2010 Roman Atachiants <kelindar@gmail.com>
*
* Original CLZF Port:
* Copyright (c) 2005 Oren J. Maurice <oymaurice@hazorea.org.il>
*
* Original LibLZF Library Algorithm:
* Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp@schmorp.de>
*
* Redistribution and use in source and binary forms, with or without modifica-
* tion, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
* CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
* CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
* ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Alternatively, the contents of this file may be used under the terms of
* the GNU General Public License version 2 (the "GPL"), in which case the
* provisions of the GPL are applicable instead of the above. If you wish to
* allow the use of your version of this file only under the terms of the
* GPL and not to allow others to use your version of this file under the
* BSD license, indicate your decision by deleting the provisions above and
* replace them with the notice and other provisions required by the GPL. If
* you do not delete the provisions above, a recipient may use your version
* of this file under either the BSD or the GPL.
*/
using System;
/// <summary>
/// Improved C# LZF Compressor, a very small data compression library. The compression algorithm is extremely fast.
public static class CLZF2
{
private static readonly uint HLOG = 14;
private static readonly uint HSIZE = (1 << 14);
private static readonly uint MAX_LIT = (1 << 5);
private static readonly uint MAX_OFF = (1 << 13);
private static readonly uint MAX_REF = ((1 << 8) (1 << 3));
/// <summary>
/// Hashtable
/// </summary>
private static long[] HashTable = null;
// for release temp memory
public static void Reset()
{
HashTable = null;
}
// Compresses inputBytes
public static byte[] Compress(byte[] inputBytes)
{
if (inputBytes == null)
{
return null;
}
if (inputBytes.Length <= 1)
{
byte[] copyBytes = new byte[inputBytes.Length];
Buffer.BlockCopy(inputBytes, 0, copyBytes, 0, inputBytes.Length);
return copyBytes;
}
// Starting guess, increase it later if needed
int outputByteCountGuess = inputBytes.Length * 2;
byte[] tempBuffer = new byte[outputByteCountGuess];
int byteCount = lzf_compress(inputBytes, ref tempBuffer);
// If byteCount is 0, then increase buffer and try again
while (byteCount == 0)
{
outputByteCountGuess *= 2;
tempBuffer = new byte[outputByteCountGuess];
byteCount = lzf_compress(inputBytes, ref tempBuffer);
}
byte[] outputBytes = new byte[byteCount];
Buffer.BlockCopy(tempBuffer, 0, outputBytes, 0, byteCount);
return outputBytes;
}
// Decompress outputBytes
public static byte[] Decompress(byte[] inputBytes)
{
if (inputBytes == null)
{
return null;
}
if (inputBytes.Length <= 1)
{
byte[] copyBytes = new byte[inputBytes.Length];
Buffer.BlockCopy(inputBytes, 0, copyBytes, 0, inputBytes.Length);
return copyBytes;
}
// Starting guess, increase it later if needed
int outputByteCountGuess = inputBytes.Length * 2;
byte[] tempBuffer = new byte[outputByteCountGuess];
int byteCount = lzf_decompress(inputBytes, ref tempBuffer);
// If byteCount is 0, then increase buffer and try again
while (byteCount == 0)
{
outputByteCountGuess *= 2;
tempBuffer = new byte[outputByteCountGuess];
byteCount = lzf_decompress(inputBytes, ref tempBuffer);
}
byte[] outputBytes = new byte[byteCount];
Buffer.BlockCopy(tempBuffer, 0, outputBytes, 0, byteCount);
return outputBytes;
}
// NOTE you should make compressBuffer big enough
public static byte[] Compress(byte[] inputBytes, byte[] compressBuffer)
{
var byteCount = lzf_compress(inputBytes, ref compressBuffer);
if (byteCount > 0)
{
byte[] outputBytes = new byte[byteCount];
Buffer.BlockCopy(compressBuffer, 0, outputBytes, 0, byteCount);
return outputBytes;
}
return null;
}
// NOTE you should make decompressBuffer big enough
public static byte[] Decompress(byte[] inputBytes, byte[] decompressBuffer)
{
var byteCount = lzf_decompress(inputBytes, ref decompressBuffer);
if (byteCount > 0)
{
byte[] outputBytes = new byte[byteCount];
Buffer.BlockCopy(decompressBuffer, 0, outputBytes, 0, byteCount);
return outputBytes;
}
return null;
}
/// <summary>
/// Compresses the data using LibLZF algorithm
/// </summary>
/// <param name="input">Reference to the data to compress</param>
/// <param name="output">Reference to a buffer which will contain the compressed data</param>
/// <returns>The size of the compressed archive in the output buffer</returns>
static int lzf_compress(byte[] input, ref byte[] output)
{
if (input == null || output == null)
{
return 0;
}
int inputLength = input.Length;
int outputLength = output.Length;
// corner case handling
if (inputLength <= 1)
{
if (outputLength >= inputLength)
{
for (int i = 0; i < inputLength; i)
{
output[i] = input[i];
}
return inputLength;
}
return 0;
}
// handle HashTable
if (HashTable == null)
{
HashTable = new long[HSIZE];
}
else
{
Array.Clear(HashTable, 0, (int)HSIZE);
}
long hslot;
uint iidx = 0;
uint oidx = 0;
long reference;
uint hval = (uint)(((input[iidx]) << 8) | input[iidx 1]); // FRST(in_data, iidx);
long off;
int lit = 0;
while (true)
{
if (iidx < inputLength - 2)
{
hval = (hval << 8) | input[iidx 2];
hslot = ((hval ^ (hval << 5)) >> (int)(((3 * 8 - HLOG)) - hval * 5) & (HSIZE - 1));
reference = HashTable[hslot];
HashTable[hslot] = (long)iidx;
if ((off = iidx - reference - 1) < MAX_OFF
&& iidx 4 < inputLength
&& reference > 0
&& input[reference 0] == input[iidx 0]
&& input[reference 1] == input[iidx 1]
&& input[reference 2] == input[iidx 2]
)
{
/* match found at *reference */
uint len = 2;
uint maxlen = (uint)inputLength - iidx - len;
maxlen = maxlen > MAX_REF ? MAX_REF : maxlen;
if (oidx lit 1 3 >= outputLength)
{
return 0;
}
do
{
len ;
}
while (len < maxlen && input[reference len] == input[iidx len]);
if (lit != 0)
{
output[oidx ] = (byte)(lit - 1);
lit = -lit;
do
{
output[oidx ] = input[iidx lit];
}
while (( lit) != 0);
}
len -= 2;
iidx ;
if (len < 7)
{
output[oidx ] = (byte)((off >> 8) (len << 5));
}
else
{
output[oidx ] = (byte)((off >> 8) (7 << 5));
output[oidx ] = (byte)(len - 7);
}
output[oidx ] = (byte)off;
iidx = len - 1;
hval = (uint)(((input[iidx]) << 8) | input[iidx 1]);
hval = (hval << 8) | input[iidx 2];
HashTable[((hval ^ (hval << 5)) >> (int)(((3 * 8 - HLOG)) - hval * 5) & (HSIZE - 1))] = iidx;
iidx ;
hval = (hval << 8) | input[iidx 2];
HashTable[((hval ^ (hval << 5)) >> (int)(((3 * 8 - HLOG)) - hval * 5) & (HSIZE - 1))] = iidx;
iidx ;
continue;
}
}
else if (iidx == inputLength)
{
break;
}
/* one more literal byte we must copy */
lit ;
iidx ;
if (lit == MAX_LIT)
{
if (oidx 1 MAX_LIT >= outputLength)
{
return 0;
}
output[oidx ] = (byte)(MAX_LIT - 1);
lit = -lit;
do
{
output[oidx ] = input[iidx lit];
}
while (( lit) != 0);
}
}
if (lit != 0)
{
if (oidx lit 1 >= outputLength)
{
return 0;
}
output[oidx ] = (byte)(lit - 1);
lit = -lit;
do
{
output[oidx ] = input[iidx lit];
}
while (( lit) != 0);
}
return (int)oidx;
}
/// <summary>
/// Decompresses the data using LibLZF algorithm
/// </summary>
/// <param name="input">Reference to the data to decompress</param>
/// <param name="output">Reference to a buffer which will contain the decompressed data</param>
/// <returns>Returns decompressed size</returns>
static int lzf_decompress(byte[] input, ref byte[] output)
{
if (input == null || output == null)
{
return 0;
}
int inputLength = input.Length;
int outputLength = output.Length;
// corner case handling
if (inputLength <= 1)
{
if (outputLength >= inputLength)
{
for (int i = 0; i < inputLength; i)
{
output[i] = input[i];
}
return inputLength;
}
return 0;
}
uint iidx = 0;
uint oidx = 0;
do
{
uint ctrl = input[iidx ];
if (ctrl < (1 << 5)) /* literal run */
{
ctrl ;
if (oidx ctrl > outputLength)
{
//SET_ERRNO (E2BIG);
return 0;
}
do
{
output[oidx ] = input[iidx ];
}
while ((--ctrl) != 0);
}
else /* back reference */
{
uint len = ctrl >> 5;
int reference = (int)(oidx - ((ctrl & 0x1f) << 8) - 1);
if (len == 7)
{
len = input[iidx ];
}
reference -= input[iidx ];
if (oidx len 2 > outputLength)
{
//SET_ERRNO (E2BIG);
return 0;
}
if (reference < 0)
{
//SET_ERRNO (EINVAL);
return 0;
}
output[oidx ] = output[reference ];
output[oidx ] = output[reference ];
do
{
output[oidx ] = output[reference ];
}
while ((--len) != 0);
}
}
while (iidx < inputLength);
return (int)oidx;
}
}
使用时直接调用对应的接口即可:
代码语言:javascript复制byte[] dataCompressed = CLZF2.Compress(data);
// that's it.
当然,你如果想实现其他的(解)压缩算法自然也是可以的,选择标准应该依照你的使用场景而定,下面是 RLE算法 的一种实现:
代码语言:javascript复制using System;
using System.Collections.Generic;
public static class RLE
{
public const byte ESCAPE = 92; // ''
public const byte MAX_DUPLICATE_COUNT = byte.MaxValue;
public static byte[] Compress(byte[] input)
{
if (input != null)
{
m_innerBuffer.Clear();
var inputIndex = 0;
while (inputIndex < input.Length)
{
if (input[inputIndex] == ESCAPE)
{
// special handle escape
m_innerBuffer.Add(ESCAPE);
m_innerBuffer.Add(ESCAPE);
inputIndex;
}
else
{
// try find duplicate
int duplicateCount = 0;
for (int i = inputIndex 1; i < input.Length; i)
{
if (input[i] == input[inputIndex])
{
duplicateCount;
// check max duplicate count
if (duplicateCount == MAX_DUPLICATE_COUNT)
{
break;
}
}
else
{
break;
}
}
// do not compress less then 3 duplicate since meta data will take 3 byte
if (duplicateCount > 2)
{
m_innerBuffer.Add(ESCAPE);
// NOTE we should make value before count, since count could be 'ESCAPE'
m_innerBuffer.Add(input[inputIndex]);
// NOTE we do -3 offset for extend range mapping
m_innerBuffer.Add((byte)(duplicateCount - 3));
inputIndex = duplicateCount 1;
}
else
{
m_innerBuffer.Add(input[inputIndex]);
inputIndex;
}
}
}
return m_innerBuffer.ToArray();
}
return null;
}
public static byte[] Decompress(byte[] input)
{
if (input != null)
{
m_innerBuffer.Clear();
var inputIndex = 0;
while (inputIndex < input.Length)
{
if (input[inputIndex] == ESCAPE)
{
if (inputIndex 1 >= input.Length)
{
throw new Exception("[RLE]Unexpected Escape(0xFF) detected ...");
}
else
{
if (input[inputIndex 1] == ESCAPE)
{
m_innerBuffer.Add(ESCAPE);
inputIndex = 2;
}
else
{
if (inputIndex 2 >= input.Length)
{
throw new Exception("[RLE]Error compress data format ...");
}
else
{
var value = input[inputIndex 1];
// NOTE we do 3 offset since we compress do -3 offset
var duplicateCount = input[inputIndex 2] 3;
for (int i = 0; i <= duplicateCount; i)
{
m_innerBuffer.Add(value);
}
inputIndex = 3;
}
}
}
}
else
{
m_innerBuffer.Add(input[inputIndex]);
inputIndex;
}
}
return m_innerBuffer.ToArray();
}
return null;
}
static List<byte> m_innerBuffer = new List<byte>(1024);
}
另外值得一提的就是对于字符串(string)的压缩,一般来讲,压缩算法都是基于字节数组(byte[])的,所以压缩字符串的第一步就是将字符串转为字节数组,这可以借助 Encoding 类型来完成:
代码语言:javascript复制byte[] strBytes = Encoding.Unicode.GetBytes(str);
接着我们就可以调用压缩方法压缩上述的 strBytes 了:
代码语言:javascript复制byte[] strBytesCompressed = CLZF2.Compress(strBytes);
如果你还需要将压缩过的字节数组(strBytesCompressed)转为字符串,就需要小心了,因为压缩过的字节数组并不满足任何的 Unicode 编码格式,所以我们不能简单的使用 Encoding 来进行转换,一种通用的方法是对压缩过的字节数组进行 Base64 编码:
代码语言:javascript复制string strCompressed = Convert.ToBase64String(strBytesCompressed);
通过以上流程,就完成了 字符串 到 字符串 的压缩(解压则是上述过程的逆过程,在此不再赘述).