记某大用户Elasticsearch数据方案处理过程<二>

2022-11-18 17:06:44 浏览数 (1)

一、前言

客户需要将同一个集群A索引的字段内容,同步到B索引另外一个不同数据类型的不同字段上。把之前对应的数字存到新的mapping对应字段下的id里,如下图所示:

索引需求索引需求

实际背景:考虑到客户这里的目标字段名已更改,这里不能使用convert实现转换。

二、改造过程

方案思路:这里还是需要使用pipeline、reindex的方式实现数据的改造,考虑到目标索引的字段名改了,不能直接covert,那就只能先叠加pipeline,先set 新建一个字段,比如这里是A.b.c.id, 然后再转换该id的数据类型到string.然后再remove掉老字段。这里要用到copy_from,表示从某字段里拷贝内容出来。

三、实践

#第一步:创建一个pipeline

代码语言:javascript复制
PUT _ingest/pipeline/set_bar_test
{
  "description": "TO remove some fileds for buiness!",
  "processors": [
    {
      "set": {
        "field": "lastModifiedByhez.id",
        "copy_from": "lastModifiedBy"
      }
    },
    {
      "convert": {
        "field": "lastModifiedByhez.id",
        "type": "string"
      }
    },{
      "remove": {
        "field": "lastModifiedBy"
      }
    }
]
}

#第二步,创建一个doc,插入一条原始数据

代码语言:javascript复制
PUT index-A/_doc/1
{
  "id": "440507808333611039",
  "createdTime": "2021-12-29T21:49:24 08:00",
  "lastModifiedTime": "2022-01-10T21:37:41 08:00",
  "createdBy": 3007443382,
  "lastModifiedBy": 3007441089,
  "attachments": [
    {
      "id": "440514506553212948",
      "createdTime": "2021-12-29T22:13:41 08:00",
      "lastModifiedTime": "2021-12-29T22:13:41 08:00",
      "createdBy": 3007443382,
      "lastModifiedBy": 3007443382,
      "attachmentId": "440514506553212947",
      "fileName": "放大猫.jpg",
      "workorderId": "440507808333611039",
      "fileType": "JPEG",
      "attachmentUrl": null,
      "idempotentToken": null,
      "externalId": null,
      "source": null,
      "showUrl": null,
      "fileSize": null,
      "version": 1
    }
  ],
  "chatInfo": [
    {
      "id": "440507808333611040",
      "createdTime": "2021-12-29T21:49:24 08:00",
      "lastModifiedTime": "2021-12-29T21:49:24 08:00",
      "createdBy": 3007443382,
      "lastModifiedBy": 3007443382,
      "conditions": """{"channel":0,"pageType":0,"recordType":52,"type":1,"subType":1,"socialId":"1346_ww7bb062efaf5254e8_wonGLYDAAACdLbWuNl-S-mbsa0v1_tvw","id":"1346_ww7bb062efaf5254e8_wonGLYDAAACdLbWuNl-S-mbsa0v1_tvw","startTime":"1640781203000000","endTime":"1640783010000000","count":200,"kfext":3007443382,"sessionId":"commonsession_ssc_2852199210_1346_ww7bb062efaf5254e8_wonGLYDAAACdLbWuNl-S-mbsa0v1_tvw_1640781203156","_t":1640785575340,"_bqq_csrf":"ea21ffef2d961b81534dd14e486f07fbe2803c98"}""",
      "workorderId": "440507808333611039",
      "staffName": "maomaozzhou",
      "convType": "52",
      "convStartTime": "1640781203",
      "idempotentToken": null,
      "externalId": null,
      "source": null,
      "version": 1
    },
    {
      "id": "440507808333611041",
      "createdTime": "2021-12-29T21:49:24 08:00",
      "lastModifiedTime": "2021-12-29T21:49:24 08:00",
      "createdBy": 3007443382,
      "lastModifiedBy": 3007443382,
      "conditions": """{"channel":0,"pageType":0,"recordType":52,"type":1,"subType":1,"socialId":"1346_ww7bb062efaf5254e8_wonGLYDAAACdLbWuNl-S-mbsa0v1_tvw","id":"1346_ww7bb062efaf5254e8_wonGLYDAAACdLbWuNl-S-mbsa0v1_tvw","startTime":"1640779376000000","endTime":"1640779422000000","count":200,"kfext":3007443382,"sessionId":"commonsession_ssc_2852199210_1346_ww7bb062efaf5254e8_wonGLYDAAACdLbWuNl-S-mbsa0v1_tvw_1640779376130","_t":1640785575340,"_bqq_csrf":"ea21ffef2d961b81534dd14e486f07fbe2803c98"}""",
      "workorderId": "440507808333611039",
      "staffName": "maomaozzhou",
      "convType": "52",
      "convStartTime": "1640779376",
      "idempotentToken": null,
      "externalId": null,
      "source": null,
      "version": 1
    },
    {
      "id": "440507808333611042",
      "createdTime": "2021-12-29T21:49:24 08:00",
      "lastModifiedTime": "2021-12-29T21:49:24 08:00",
      "createdBy": 3007443382,
      "lastModifiedBy": 3007443382,
      "conditions": """{"channel":0,"pageType":0,"recordType":52,"type":1,"subType":1,"socialId":"1346_ww7bb062efaf5254e8_wonGLYDAAACdLbWuNl-S-mbsa0v1_tvw","id":"1346_ww7bb062efaf5254e8_wonGLYDAAACdLbWuNl-S-mbsa0v1_tvw","startTime":"1640768797000000","endTime":"1640770597000000","count":200,"kfext":3007443382,"sessionId":"commonsession_ssc_2852199210_1346_ww7bb062efaf5254e8_wonGLYDAAACdLbWuNl-S-mbsa0v1_tvw_1640768797266","_t":1640785575340,"_bqq_csrf":"ea21ffef2d961b81534dd14e486f07fbe2803c98"}""",
      "workorderId": "440507808333611039",
      "staffName": "maomaozzhou",
      "convType": "52",
      "convStartTime": "1640768797",
      "idempotentToken": null,
      "externalId": null,
      "source": null,
      "version": 1
    },
    {
      "id": "440507808333611043",
      "createdTime": "2021-12-29T21:49:24 08:00",
      "lastModifiedTime": "2021-12-29T21:49:24 08:00",
      "createdBy": 3007443382,
      "lastModifiedBy": 3007443382,
      "conditions": """{"channel":0,"pageType":0,"recordType":52,"type":1,"subType":1,"socialId":"1346_ww7bb062efaf5254e8_wonGLYDAAACdLbWuNl-S-mbsa0v1_tvw","id":"1346_ww7bb062efaf5254e8_wonGLYDAAACdLbWuNl-S-mbsa0v1_tvw","startTime":"1640765140000000","endTime":"1640766946000000","count":200,"kfext":3007443382,"sessionId":"commonsession_ssc_2852199210_1346_ww7bb062efaf5254e8_wonGLYDAAACdLbWuNl-S-mbsa0v1_tvw_1640765140433","_t":1640785575340,"_bqq_csrf":"ea21ffef2d961b81534dd14e486f07fbe2803c98"}""",
      "workorderId": "440507808333611039",
      "staffName": "maomaozzhou",
      "convType": "52",
      "convStartTime": "1640765140",
      "idempotentToken": null,
      "externalId": null,
      "source": null,
      "version": 1
    },
    {
      "id": "440507808333611044",
      "createdTime": "2021-12-29T21:49:24 08:00",
      "lastModifiedTime": "2021-12-29T21:49:24 08:00",
      "createdBy": 3007443382,
      "lastModifiedBy": 3007443382,
      "conditions": """{"channel":0,"pageType":0,"recordType":52,"type":1,"subType":1,"socialId":"1346_ww7bb062efaf5254e8_wonGLYDAAACdLbWuNl-S-mbsa0v1_tvw","id":"1346_ww7bb062efaf5254e8_wonGLYDAAACdLbWuNl-S-mbsa0v1_tvw","startTime":"1640764951000000","endTime":"1640764992000000","count":200,"kfext":3007443382,"sessionId":"commonsession_ssc_2852199210_1346_ww7bb062efaf5254e8_wonGLYDAAACdLbWuNl-S-mbsa0v1_tvw_1640764951672","_t":1640785575340,"_bqq_csrf":"ea21ffef2d961b81534dd14e486f07fbe2803c98"}""",
      "workorderId": "440507808333611039",
      "staffName": "maomaozzhou",
      "convType": "52",
      "convStartTime": "1640764951",
      "idempotentToken": null,
      "externalId": null,
      "source": null,
      "version": 1
    }
  ],
  "title": "maomao测试",
  "description": "<p>你放不下的人和事,岁月都会替你轻描淡写</p>",
  "status": {
    "id": "376287300427976726",
    "displayName": null,
    "type": null
  },
  "category": null,
  "priority": {
    "displayName": null,
    "id": "30"
  },
  "socialAccount": null,
  "socialAccountType": null,
  "phone": "18392385002",
  "email": null,
  "owner": {
    "id": "3007443382",
    "displayName": null,
    "face": null,
    "account": null,
    "gender": null,
    "onlineSetting": null,
    "ownerGroupId": null,
    "ownerGroupName": null
  },
  "createdByName": null,
  "associationUser": "",
  "associationUserName": "maomaozzhou",
  "latestCommentFrom": "customer_service",
  "latestUpdateFrom": null,
  "customerId": "",
  "customerName": null,
  "statusClosedTime": null,
  "createFrom": "COMBINER_WORKBENCH",
  "template": {
    "id": "440431363422842891",
    "displayName": null
  },
  "callLog": null,
  "ownerChangeDate": "2021-12-29T21:49:24 08:00",
  "ownerDistrict": null,
  "firstRespOvertime": "NONE",
  "firstResolveOvertime": "NONE",
  "ownerGroup": null,
  "idempotentToken": "4b89fdac-8345-4c14-aac9-5ab1995d40d4",
  "externalId": null,
  "source": null,
  "version": 12,
  "tenantId": "2852199210"
}

# 第三步,reindex 拷贝

代码语言:javascript复制
POST _reindex
{
  "source": {
    "index": "index-A"    
  },
  "dest": {
    "index": "index-B",
    "pipeline": "set_bar_test"
  }
}

发现源数据内容就存进这个目标index了。 而且目标字段的数据类型也没错。

# #目标query

代码语言:javascript复制

GET index-B/_search
{
  "query": {
    "match": {
      "lastModifiedByhez.id": "3007441089"
    }
  }
}
GET index-B/_search

四、总结

还是需要结合pipeline,多组合实现数据处理。对数据类型转换、字段预处理很有帮助。这里再记录一下。

0 人点赞