proto vs gzip proto in golang

我们知道proto是基于二进制编码的，比json格式的编码要节省大量空间，那么，如果对于proto编码后的结果再进行gzip压缩，是否能产生更多空间的节省呢？gzip压缩是否具有幂等性呢？本文讨论了在golang中对这两个问题的探索和研究

gzip压缩proto编码结果

准备条件

proto定义：

代码语言：javascript复制

// 共识规则
message ReviewRule {
  string id = 1;
  string name = 2;
  string desc = 3;
  string operator = 4;
  string tx_id = 5;
  repeated string participants = 6;
  ReviewType review_type = 7;
  google.protobuf.Struct vote = 8;
}

// 共识投票类型
enum ReviewType {
  // 全体成员投票通过
  All = 0;
  // 任意成员投票通过
  OneOf = 1;
  // 指定成员投票通过
  Designation = 2;
  // 比例成员投票通过，如30%, 50%, 70%
  Scale = 3;
}

在进行这个测试时，主要研究属性对象是ReviewRule的6,7,8，会根据长度大小生成固定长度的uuid字符串（随机），然后对ReviewRule做proto编码和proto编码后的gzip压缩。同时比对gzip解压缩后和原proto编码的字节长度是否一致，确保压缩和解压缩是对proto编码的结果无影响的。

结果比对

单位：字节Byte

随机长度	proto编码后	gzip写入	gzip压缩	gzip读取	gzip解压缩	gzip节省空间比率(%)
0	413	413	325	413	413	21.31
1	499	499	388	499	499	22.24
2	583	583	439	583	583	24.70
20	2099	2099	1240	2099	2099	40.92
200	17219	17219	8670	17219	17219	49.65
2000	168423	168423	82245	168423	168423	51.17
20000	1680423	1680423	816971	1680423	1680423	51.38

备注：gzip写入和gzip读取是为了保证在gzip处理过程中没有发生数据丢失或复写。

从上述表格中可以看到，gzip压缩后确实能在proto编码后再次降低使用的空间大小的，甚至数据量越大，压缩比越高，1.6G的数据大约可以降到800M不到，超过了50%。且解压缩后，数据大小仍然保持一致。

那么压缩的结果是否每次都能保持完全一致呢？

gzip压缩的幂等性

同样，在这个测试时，也是生成了长度为20000的随机参数ReviewRule。

结果比对

压缩次数	压缩结果base64编码结果数	压缩结果长度结果数
2	1	1
5	1	1
10	1	1
20	1	1
50	1	1
100	1	1

可以看到，对于相同的结果，压缩结果大小和base64编码后的结果不随压缩次数的增加而发生变化，因此可以推断gzip压缩是具有幂等性的，即压缩的结果每次都能保持完全一致。

附录

go版本：1.17.9 完整测试文件：

代码语言：javascript复制

package v1

import (
    "bytes"
    "compress/gzip"
    "context"
    "encoding/base64"
    "fmt"
    "io"
    "testing"

    "github.com/google/uuid"
    "github.com/pkg/errors"
    "github.com/stretchr/testify/assert"
    "golang.org/x/sync/errgroup"
    "google.golang.org/protobuf/proto"
    "google.golang.org/protobuf/types/known/structpb"
)

func newTestReviewRule() *ReviewRule {
    rr := new(ReviewRule)
    rr.Id = uuid.New().String()
    rr.Name = "ReviewRule"
    rr.Desc = `// Code generated by protoc-gen-go. DO NOT EDIT.
    // versions:
    //  protoc-gen-go v1.28.0
    //  protoc        v3.9.0
    // source: pkg/contracts/fabric/review/v1/review.proto`
    rr.Operator = "x509::CN=hlp,OU=client,O=Hyperledger,ST=North Carolina,C=US::CN=ca.org1.example.com,O=org1.example.com,L=Durham,ST=North Carolina,C=AR"
    rr.TxId = uuid.New().String()
    return rr
}

func fillRandomParams(rr *ReviewRule, length int) error {
    rr.Participants = genStringSlice(length)
    rr.ReviewType = ReviewType(length % (len(ReviewType_name) - 1))

    vote, err := structpb.NewStruct(map[string]interface{}{
        "vote": genInterfaceSlice(length),
    })
    if err != nil {
        return errors.Wrap(err, "structpb.NewStruct")
    }
    rr.Vote = vote

    return nil
}

func Test_proto_gzip(t *testing.T) {
    rr := newTestReviewRule()
    tests := []struct {
        name   string
        length int
    }{
        {"0", 0},
        {"1", 1},
        {"2", 2},
        {"20", 20},
        {"200", 200},
        {"2000", 2000},
        {"20000", 20000},
    }
    for _, tt := range tests {
        t.Run(tt.name, func(t *testing.T) {
            assert.Nil(t, fillRandomParams(rr, tt.length))

            protoBytes, err := proto.Marshal(rr)
            assert.Nil(t, err)
            var buf bytes.Buffer
            zw := gzip.NewWriter(&buf)

            i, err := zw.Write(protoBytes)
            assert.Nil(t, err)
            assert.Nil(t, zw.Close())

            fmt.Printf("name[%s]: proto[%d], gzip wi[%d], gzip[%d], ratio[%.2f], ", tt.name, len(protoBytes), i, buf.Len(), float64(i-buf.Len())/float64(i)*100)
            zr, err := gzip.NewReader(&buf)
            assert.Nil(t, err)
            defer zr.Close()

            var buf_ bytes.Buffer
            i_, err := io.Copy(&buf_, zr)
            assert.Nil(t, err)

            fmt.Printf("gzip ri[%d], read[%d]n", i_, buf_.Len())
        })
    }
}

func genStringSlice(length int) []string {
    s := make([]string, length)
    for i := 0; i < length; i   {
        s = append(s, genRandomString())
    }

    return s
}

func genInterfaceSlice(length int) []interface{} {
    s := make([]interface{}, length)
    for i := 0; i < length; i   {
        s = append(s, genRandomString())
    }

    return s
}

func genRandomString() string {
    return uuid.New().String()
}

func Test_idempotent_gzip(t *testing.T) {
    rr := newTestReviewRule()
    assert.Nil(t, fillRandomParams(rr, 20000))
    protoBytes, err := proto.Marshal(rr)
    assert.Nil(t, err)
    tests := []struct {
        name      string
        frequency int
    }{
        {"2", 2},
        {"5", 5},
        {"10", 10},
        {"20", 20},
        {"50", 50},
        {"100", 100},
    }
    for _, tt := range tests {
        t.Run(tt.name, func(t *testing.T) {
            stream := make(chan []byte)
            eg, egCtx := errgroup.WithContext(context.Background())
            for i := 0; i < tt.frequency; i   {
                eg.Go(func() error {
                    var buf bytes.Buffer
                    zw := gzip.NewWriter(&buf)
                    _, err = zw.Write(protoBytes)
                    if err != nil {
                        return errors.Wrap(err, "zw.Write")
                    }

                    if err = zw.Close(); err != nil {
                        return errors.Wrap(err, "zw.Close")
                    }

                    select {
                    case <-egCtx.Done():
                        return errors.New("eg context done")
                    case stream <- buf.Bytes():
                    }

                    return nil
                })
            }

            go func() {
                eg.Wait()
                close(stream)
            }()

            base64Set := make(map[string]interface{})
            lenSet := make(map[int]interface{})
            for gzipBytes := range stream {
                base64Set[base64.StdEncoding.EncodeToString(gzipBytes)] = struct{}{}
                lenSet[len(gzipBytes)] = struct{}{}
            }
            assert.Nil(t, eg.Wait())
            assert.Len(t, base64Set, 1)
            assert.Len(t, lenSet, 1)
        })
    }
}

文件存储腾讯云测试服务

0 人点赞