VBA编写Ribbon Custom UI编辑器02——编码转换

2020-08-14 11:12:45 浏览数 (1)

在Office文件的ZIP压缩包里,解压之后,customUI.xml的编码是UTF-8,VBA中的编码是UCS2(Unicode的学名是"Universal Multiple-Octet Coded Character Set",简称为UCS,VBA中使用的UCS2就是用两个字节编码)。

想要使用VBA来处理customUI.xml,必须要实现编码转换的功能。

关于编码方法的知识,建议网上找找资料看看,UTF-8与UCS2之间是有规律的,完全可以根据位移来实现编码的转换。

首先声明一些需要用到的常量:

代码语言:javascript复制
Private Const b_1000_0000 As Byte = 128
Private Const b_1100_0000 As Byte = 192
Private Const b_1110_0000 As Byte = 224
Private Const b_1111_0000 As Byte = 240
Private Const b_0001_1100 As Byte = 28
Private Const b_0000_0111 As Byte = 7
Private Const b_0000_0011 As Byte = 3
Private Const b_0011_1111 As Byte = 63
Private Const b_0000_1111 As Byte = 15
Private Const b_0011_1100 As Byte = 60
Private Const b_0000_0010 As Byte = 2

01

UTF-8转UCS2

代码语言:javascript复制
'// UCS-2转UTF-8
'// 1 对于不大于0x007F(即00000000 01111111)的,直接把它转成一个字节,变成ASCII
'// 2 对于不大于0x07FF(即00000111 11111111)的,转换成两个字节
'//   转换的时候把右边的11位分别放到110xxxxx 10yyyyyy里边
'//   即0000 0aaa bbbb bbbb ==> 110a aabb   10bb bbbb
'// 3 剩下的会转换成三个字节,转换的时候也是把16个位分别填写到那三个字节里面
'//   即aaaaaaaa bbbbbbbb ==> 1110 aaaa   10aa aabb   10bb bbbb
Function ToUTF8(SrcUCS2() As Byte, RetUTF8() As Byte) As String
    Dim ilensrc As Long
    ilensrc = UBound(SrcUCS2)   1
    
    If ilensrc < 2 Then
        ToUTF8 = "输入的UCS2字节数组太小了!"
        Exit Function
    End If
    
    Dim i As Long
    Dim iStart As Long
    '如果是从txt文件中读取的,可能会有BOM头
    If SrcUCS2(i) = &HFF And SrcUCS2(i   1) = &HFE Then
        iStart = 2
    End If
    
    If ilensrc Mod 2 Then
        ToUTF8 = "输入的UCS2字节数组不是偶数!"
        Exit Function
    End If
    
    ReDim RetUTF8(ilensrc / 2 * 3 - 1) As Byte
    Dim p As Long
    
    Dim tmp As Long
    Dim l1 As Long, l2 As Long
    For i = iStart To ilensrc - 1 Step 2
        l1 = VBA.CLng(SrcUCS2(i   1))
        l2 = VBA.CLng(SrcUCS2(i))
        
        tmp = l1 * 2 ^ 8 Or l2
        
        If tmp <= &H7F Then
            RetUTF8(p) = VBA.CByte(tmp)
            p = p   1
        ElseIf tmp <= &H7FF Then
            RetUTF8(p) = b_1100_0000 Or (SrcUCS2(i   1) * (2 ^ 2)) Or (SrcUCS2(i)  (2 ^ 6))
            p = p   1
            
            RetUTF8(p) = b_1000_0000 Or (SrcUCS2(i) And b_0011_1111)
            p = p   1
        Else
            RetUTF8(p) = b_1110_0000 Or (SrcUCS2(i   1)  (2 ^ 4))
            p = p   1
                
            RetUTF8(p) = b_1000_0000 Or ((SrcUCS2(i   1) And b_0000_1111) * (2 ^ 2)) Or (SrcUCS2(i)  (2 ^ 6))
            p = p   1
            
            RetUTF8(p) = b_1000_0000 Or (SrcUCS2(i) And b_0011_1111)
            p = p   1
        End If
    Next
    
    ReDim Preserve RetUTF8(p - 1) As Byte
End Function

02

UCS2转UTF-8

代码语言:javascript复制

Function FromUTF8(SrcUTF8() As Byte, RetUCS2() As Byte) As String
    Dim ilensrc As Long
    ilensrc = UBound(SrcUTF8)   1
    
    Dim i As Long
    Dim iStart As Long
    '如果是从txt文件中读取的,可能会有BOM头
    If SrcUTF8(i) = &HEF And SrcUTF8(i   1) = &HBB And SrcUTF8(i   2) = &HBF Then
        iStart = 3
    End If
    
    ReDim RetUCS2(ilensrc * 2 - 1) As Byte
    Dim p As Long
    
    Dim tmp As Long
    Dim b1 As Byte, b2 As Byte, b3 As Byte
    i = iStart
    Do While i < ilensrc
        b1 = SrcUTF8(i)
        i = i   1
        
        'UCS2 只有2个字节,只能转换3字节以下的UTF8
        If b1 >= b_1111_0000 Then
            FromUTF8 = "UCS2 只有2个字节,只能转换3字节以下的UTF8"
            Exit Function
            
        ElseIf b1 >= b_1110_0000 Then
            '// 1110 aaaa 10bb bbbb 10cc cccc ==> aaaa bbbb  bbcc cccc
            '// 需要再读取2个字节
            b2 = SrcUTF8(i)
            i = i   1
        
            b3 = SrcUTF8(i)
            i = i   1
            
            b1 = ((b1 And b_0000_1111) * 2 ^ 4) Or ((b2 And b_0011_1111)  2 ^ 2)
            b2 = ((b2 And b_0000_0011) * 2 ^ 6) Or (b3 And b_0011_1111)
        ElseIf b1 >= b_1100_0000 Then
            '// 110a aaaa 10bb bbbb ==> 0000 0aaa  aabb bbbb
            '// 需要再读取1个字节
            b2 = SrcUTF8(i)
            i = i   1
            
            b2 = ((b1 And b_0000_0011) * 2 ^ 6) Or (b2 And b_0011_1111)
            b1 = (b1 And b_0011_1111)  2 ^ 2
            
        Else
            '// 0aaa aaaa ==> 0000 0000  0aaa aaaa
            b2 = b1
            b1 = 0
        End If
        
        RetUCS2(p) = b2
        RetUCS2(p   1) = b1
        p = p   2
    Loop
    
    ReDim Preserve RetUCS2(p - 1) As Byte
End Function

0 人点赞