You Know, for Search
Installation after JDK and Docker
代码语言:shell复制# momory
# grep MemTotal /proc/meminfo
# processor
# cat /proc/cpuinfo | grep "model name"
# hard disk
# fdisk -l
# fun in a single node
# docker download
docker pull docker.elastic.co/elasticsearch/elasticsearch:7.10.1
# docker run
docker run -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" docker.elastic.co/elasticsearch/elasticsearch:7.10.1
# check
# open http://ip:9200/
# firewall
firewall-cmd --list-all
The missing web UI for Elasticsearch
Data Importer Data Browser Query Explorer Search Preview
Bring data from your JSON or CSV files to Elasticsearch, and set data mappings with a guided process. Browse your imported data, edit it, and add new fields. Create rich filtered and query views. Export data in JSON and CSV formats. Create search UI and test search relevancy of your dataset with zero lines of code. Export the code to get a functional React web app.
Installation [chrome extension / docker / live-server]
- chrome extension - Download
- docker image - Documentation
- live demo - Demo
docker pull appbaseio/dejavu
docker run -p 1358:1358 -d appbaseio/dejavu
open http://localhost:1358/
代码语言:python代码运行次数:0复制# pip install elasticsearch
from datetime import datetime
from elasticsearch import Elasticsearch
es = Elasticsearch()
doc = {
'author': 'kimchy',
'text': 'Elasticsearch: cool. bonsai cool.',
'timestamp': datetime.now(),
}
res = es.index(index="test-index", id=1, body=doc)
print(res['result'])
res = es.get(index="test-index", id=1)
print(res['_source'])
es.indices.refresh(index="test-index")
res = es.search(index="test-index", body={"query": {"match_all": {}}})
print("Got %d Hits:" % res['hits']['total']['value'])
for hit in res['hits']['hits']:
print("%(timestamp)s %(author)s: %(text)s" % hit["_source"])
over 10000 size:
1. conditional query
2. multiple query
3. config server
- 3.1 curl -XPUT http://127.0.0.1:9200/_settings -d '{ "index" : { "max_result_window" : 100000000}}‘
- 3.2 config/elasticsearch.yml 最后加上index.max_result_window: 100000000,注意在最前面加上空格
代码语言:python代码运行次数:0复制print(Elasticsearch(url).search(index=idx,body=bdy,size=sz)['hits']['hits'])
print(Elasticsearch(url).count(index=index_date, q='xxx_id:xxxx'))['count'])
举例统计某个skywalking服务的信息:
代码语言:python代码运行次数:0复制#!/usr/bin/env python3
import sys, base64
from elasticsearch import Elasticsearch
from proto.language_agent_v2 import trace_pb2
url_target = "http://xxx:9200"
url_reference = "http://xxx/_cat/indices?v"
idx_service_inventory = "xxx"
idx_interface_inventory = "xxx"
idx_service_prefix = "xxx-"
idx_relation_prefix = "xxx-"
idx_days = [20210101,20210102,20210103,20210104,20210105,20210106,20210107]
bdy_matchall = {"query": {"match_all": {}}}
bdy_scroll = '_scroll_id'
bdy_core = '_source'
bdy = 'n------------------------------'
def gen_date(idx_prefix): return [idx_prefix str(day) for day in idx_days]
class SkywalkingES:
def __init__(self,url=url_target): self.es = Elasticsearch(url)
def _search(self, idx,bdy=bdy_matchall,sz=10000):
return self.es.search(index=idx,body=bdy,size=sz)['hits']['hits']
def _scroll(self,idx,bdy=bdy_matchall,wait='5m',out='5s'):
a_search = self.es.search(index=idx,body=bdy,scroll='5m',timeout='5s',size=1000)
scrollid = a_search.get(bdy_scroll)
while a_search['hits']['total'] > 0:
a_search = self.es.scroll( scroll_id = scrollid, scroll = '2m' )
print(a_search['hits'],bdy)
print('That was all.')
def _count(self,query_condition,idx):
try: return self.es.count(index=idx, q=query_condition)['count']
except Exception as e: print(e); return -1
def _service_count(self,idx):
print('starting %s ...' % idx)
for service in self._search(idx_service_inventory):
service_name = service[bdy_core]['name'].replace(',','#')
service_sequence = service[bdy_core]['sequence']
service_count = self._count('service_id:' str(service_sequence), idx)
# if service_count == 0: continue
print("%s,%s,%s" % (service_name, service_sequence, service_count))
def get_service_list(self): return self._search(idx_service_inventory)
def get_service_count(self): return [self._service_count(idx) for idx in gen_date(idx_service_prefix)]
def get_interface_list(self): return self._search(idx_interface_inventory)
def get_service_by_name(self,nm):
sequence = self._search(idx=idx_service_inventory,bdy={"query":{"match":{"name": nm}}})[0][bdy_core]['sequence']
res = []
for i in gen_date(idx_service_prefix):
res.extend(self._search(idx=i,bdy={"query":{"match":{"service_id": sequence}}}))
print(res)
return res
# TODO
# tsp-apm-es-cluster-prd_segment-20210106
def get_data_binary(self):
test = gen_date(idx_service_prefix)[0]
data = self._search(test)
segment_object = trace_pb2.SegmentObject()
for service in data:
core = service[bdy_core]
trace_id = core['trace_id']
# print(trace_id)
segment_object.ParseFromString(base64.b64decode(core['data_binary']))
print(segment_object,bdy)
print(len(data))
# def relation():
# # service
# es = SkywalkingES(); service_list = es.search(idx_service_inventory)
# service_no_ip_address_list = {}
# for service in service_list:
# if service[bdy_core]['is_address'] == 1: continue
# service_no_ip_address_list[service[bdy_core]['sequence']] = service[bdy_core]['name'].replace(',','#')
# # relation
# for index_relation in [index_relation_0,index_relation_2,index_relation_3,index_relation_4,index_relation_5]:
# print(print_mark); relation_list = es.search(index_relation, body_matchall, 10000)
# for relation in relation_list:
# rel = relation[bdy_core]
# source_idx = rel['source_service_id']
# target_idx = rel['dest_service_id']
# try: print('%s,%s' % (service_no_ip_address_list[source_idx],service_no_ip_address_list[target_idx]))
# except: print('%s,%s' % (source_idx, target_idx))
if __name__ == '__main__':
swes = SkywalkingES()
swes._scroll(idx_service_prefix '20210106')
# swes.get_data_binary()
# swes.get_service_by_name('xxx')
# print(swes.get_service_list())
# print(swes.get_service_count())
# print(swes.get_interface_list())
#