在VBAProject中,dir流以及VBA模块代码流都使用了run length encoding的算法进行压缩。
run length encoding请参考官方文档的2.4.1 Compression and Decompression。
代码实现:
代码语言:javascript复制'run length encoding
Private Type RLE
cpBytes() As Byte
cpBytesLen As Long
pcp As Long
uncpBytes() As Byte
puncp As Long
uncpBytesLen As Long
cpChunkStart As Long
cpChunkEnd As Long
uncpChunkStart As Long
End Type
Private r As RLE
Function UnCompress(b() As Byte, ret() As Byte) As String
r.cpBytes = b
' // SignatureByte 压缩标识为0x1才是压缩过的
If r.cpBytes(0) <> 1 Then
r.uncpBytes = r.cpBytes
Exit Function
End If
r.cpBytesLen = UBound(r.cpBytes) 1
r.uncpBytesLen = 2 * r.cpBytesLen
ReDim r.uncpBytes(r.uncpBytesLen - 1) As Byte
r.pcp = r.pcp 1
Do While r.pcp < r.cpBytesLen - 1
r.cpChunkStart = r.pcp
Chunk
Loop
ReDim Preserve r.uncpBytes(r.puncp - 1) As Byte
ret = r.uncpBytes
End Function
Private Function Chunk() As String
' // 每个输出块前面都有一个两个字节的头,表示块中的字节数和块的格式。
' // 每个压缩块被解码成4096字节的未压缩数据,被写入输出。
' // 对于每个块,从块header中提取大小和格式样式。然后根据header题中指定的格式读取和解码该块
Dim header As Integer
header = Bytes2Int(r.cpBytes, r.pcp)
r.pcp = r.pcp 2
' 获得压缩数据块的大小
Dim chunksize As Integer
chunksize = (header And &HFFF) 3
Dim i As Long
Dim iend As Long
' // 获取数据块压缩标识,1是压缩,0是没有压缩
Dim flag As Integer
flag = header And &H8000
If flag = &H8000 Then
'压缩数据块的最后位置
If r.cpBytesLen - 1 > (r.cpChunkStart chunksize) Then
r.cpChunkEnd = r.cpChunkStart chunksize
Else
r.cpChunkEnd = r.cpBytesLen - 1
End If
Do While r.pcp < r.cpChunkEnd
TokenSequence
Loop
Else
' // 未压缩的块,直接读取
chunksize = 4096
iend = r.pcp chunksize
If iend >= r.cpBytesLen Then iend = r.cpBytesLen - 1
chunksize = iend - r.pcp
For i = 0 To chunksize - 1
r.uncpBytes(r.puncp) = r.cpBytes(r.pcp)
r.pcp = r.pcp 1
puncpAdd
Next
End If
r.cpChunkStart = r.pcp
r.uncpChunkStart = r.puncp
End Function
Private Function TokenSequence() As String
' // flagByte的8位对应了8个Tokens
' // 0表示没有压缩,1表示是1个copyToken
Dim flagbyte As Byte
flagbyte = r.cpBytes(r.pcp)
r.pcp = r.pcp 1
Dim i As Long
For i = 0 To 8 - 1
If r.pcp < r.cpChunkEnd Then ' // 有可能没有8个token
' // CALL Decompressing a Token (section 2.4.1.3.5) with index and Byte
Token i, flagbyte
End If
Next
End Function
Private Function Token(index As Long, flagbyte As Byte) As String
Dim flag As Boolean
flag = ((flagbyte (2 ^ index)) And 1) > 0
Dim itoken As Integer
Dim Offset As Integer, Length As Integer
Dim i_start As Long, i_end As Long
Dim i As Long
If flag Then
itoken = Bytes2Int(r.cpBytes, r.pcp)
unpackCopyToken itoken, Offset, Length
' // SET CopySource TO DecompressedCurrent - Offset
' // CALL Byte Copy (section 2.4.1.3.11) with CopySource, DecompressedCurrent, and Length
i_start = r.puncp - Offset
i_end = r.puncp - Offset Length
For i = i_start To i_end - 1
r.uncpBytes(r.puncp) = r.uncpBytes(i)
puncpAdd
Next
'
r.pcp = r.pcp 2
Else
' COPY the byte at CompressedCurrent TO DecompressedCurrent
r.uncpBytes(r.puncp) = r.cpBytes(r.pcp)
r.pcp = r.pcp 1
puncpAdd
End If
End Function
Private Function unpackCopyToken(Token As Integer, ByRef Offset As Integer, ByRef Length As Integer) As String
' // 2.4.1.3.19.2 Unpack CopyToken
' // Offset (2 bytes): An unsigned 16-bit integer that specifies the beginning of a CopySequence (section 2.4.1.3.19).
' // Length (2 bytes): An unsigned 16-bit integer that specifies the length of a CopySequence
'
' //1. CALL CopyToken Help (section 2.4.1.3.19.1) returning LengthMask, OffsetMask, and BitCount.
Dim LengthMask As Integer, OffsetMask As Integer, BitCount As Integer
copyTokenHelp LengthMask, OffsetMask, BitCount, 0
' //2. SET Length TO (Token BITWISE AND LengthMask) PLUS 3.
Length = (Token And LengthMask) 3
' //3. SET temp1 TO Token BITWISE AND OffsetMask.
Dim temp1 As Integer
temp1 = Token And OffsetMask
' //4. SET temp2 TO 16 MINUS BitCount.
Dim temp2 As Integer
temp2 = 16 - BitCount
' //5. SET Offset TO (temp1 RIGHT SHIFT BY temp2) PLUS 1.
Offset = BitMoveRightInt(temp1, VBA.CLng(temp2)) 1
End Function
Private Function copyTokenHelp(LengthMask As Integer, OffsetMask As Integer, BitCount As Integer, MaximumLength As Integer) As String
' // LengthMask (2 bytes): An unsigned 16-bit integer. A bitmask used to access CopyToken.Length.
' // OffsetMask (2 bytes): An unsigned 16-bit integer. A bitmask used to access CopyToken.Offset.
' // BitCount (2 bytes): An unsigned 16-bit integer. The number of bits set to 0b1 in OffsetMask.
' // MaximumLength (2 bytes): An unsigned 16-bit integer. The largest possible integral value that can fit into CopyToken.Length
'
' //§ SET difference TO DecompressedCurrent MINUS DecompressedChunkStart
Dim difference As Long
difference = r.puncp - r.uncpChunkStart
' //§ SET BitCount TO the smallest integer that is GREATER THAN OR EQUAL TO LOGARITHM base 2 of difference
' // 大于或者等于log2(different)的最小整数,要向上取整
BitCount = VBA.CInt(Application.WorksheetFunction.RoundUp(Math.Log(difference) / Math.Log(2), 0))
'
' //§ SET BitCount TO the maximum of BitCount and 4
If BitCount < 4 Then
BitCount = 4
End If
'
' //§ SET LengthMask TO 0xFFFF RIGHT SHIFT BY BitCount
LengthMask = &HFFFF
LengthMask = BitMoveRightInt(LengthMask, VBA.CLng(BitCount))
' //§ SET OffsetMask TO BITWISE NOT LengthMask
OffsetMask = Not LengthMask
' //§ SET MaximumLength TO (0xFFFF RIGHT SHIFT BY BitCount) PLUS 3
MaximumLength = &HFFFF
MaximumLength = BitMoveRightInt(MaximumLength, VBA.CLng(BitCount)) 3
End Function
Private Function puncpAdd() As Long
If r.puncp = r.uncpBytesLen - 1 Then
r.uncpBytesLen = r.uncpBytesLen * 1.2
ReDim Preserve r.uncpBytes(r.uncpBytesLen - 1) As Byte
End If
r.puncp = 1 r.puncp
End Function