Clickhouse 库表介绍
Sentry 和数据分析相关的表,只有四张,如下:
- errors_local : 存储所有的 issue 数据
- transactions_local :存储所有的 trace 、span 数据,以及自定义的 measurements
- sessions_raw_local :存储 session 的原始数据信息,崩溃率的原始数据
- sessions_hourly_local :存储 session 的小时粒度的统计聚合信息
分析案例
需求1:以下 SQL 用于实时的分析【启动开屏广告 P95 耗时】
SELECT
extract(`release`, '[0-9] .[0-9] .[0-9]') AS `version`,
(quantile(0.95)(if(has(measurements.key, 'tap_app_start_cold_time'), arrayElement(measurements.value, indexOf(measurements.key, 'tap_app_start_cold_time')), NULL) as fps)) as avg_fps
FROM
transactions_local
WHERE
`start_ts` BETWEEN timestamp_sub(now(), INTERVAL 28 DAY) and now()
AND fps > 0
AND `project_id` IN (148)
AND transaction_op = 'tap.biz.page.load'
AND version NOT IN ('2.53.1', '2.54.0')
AND `environment` IN ('rnd','release')
GROUP BY version
需求2:按天持久化【自定义埋点数据】。
针对这个进阶需求,可以采用 Clickhouse 的物化视图功能(CREATE VIEW | ClickHouse Docs )对源表进行聚合处理成物化表。然后查询物化图即可。
首先创建一张表,用于存储物化视图的结果
CREATE TABLE overview_daily.`transactions_daily_local`
(
`daytime` DateTime('Asia/Shanghai'),
`project_id` UInt64,
`major_release` String,
`os` String,
`area` String,
`app_start_cold_time_duration_p95` AggregateFunction (quantilesIf (0.5, 0.95), Float64, UInt8),
`app_start_cold_duration_p95` AggregateFunction (quantilesIf (0.5, 0.95), Float64, UInt8),
`first_page_duration_p95` AggregateFunction (quantilesIf (0.5, 0.95), Float64, UInt8),
`gamedetail_page_duration_p95` AggregateFunction (quantilesIf (0.5, 0.95), Float64, UInt8),
`ui_show_duration_p95` AggregateFunction (quantilesIf (0.5, 0.95), Float64, UInt8)
)
ENGINE = AggregatingMergeTree()
PARTITION BY toMonday(daytime)
ORDER BY (project_id,
major_release,
daytime)
SETTINGS index_granularity = 8192;
物化视图创建:
#---- app_start_cold_time_duration_p95 的物化视图
CREATE MATERIALIZED VIEW overview_daily.transactions_daily_mv_app_start_cold_time_duration_p95 to overview_daily.transactions_daily_local AS
SELECT
toStartOfDay(start_ts, 'Asia/Shanghai') AS daytime,
project_id,
multiIf( project_id in [148,175],'android', project_id in[140,92,177],'ios','N/A') AS os,
multiIf( project_id in [148,92,177],'cn', project_id in[175,140],'intl','N/A') AS area,
extract(`release`,'[0-9] .[0-9] .[0-9]') AS major_release,
(quantilesIfState(0.5,0.95)(if(has(measurements.key,'tap_app_start_cold_time'),
arrayElement(measurements.value,
indexOf(measurements.key,
'tap_app_start_cold_time')),
0) as temp_duration,(temp_duration !=4294967295))) as app_start_cold_time_duration_p95
FROM
`default`.transactions_local
WHERE
transaction_op = 'tap.biz.page.load'
AND `project_id` IN (148, 175, 92, 177, 140)
AND `environment` IN ('release')
and arrayElement(measurements.value,
indexOf(measurements.key,
'tap_app_start_cold_time')) > 0
GROUP BY
major_release,
daytime,
project_id;
#---- app_start_cold_duration_p95 的物化视图
CREATE MATERIALIZED VIEW overview_daily.transactions_daily_mv_app_start_cold_duration_p95 to overview_daily.transactions_daily_local AS
SELECT
toStartOfDay(start_ts, 'Asia/Shanghai') AS daytime,
project_id,
multiIf( project_id in [148,175],'android', project_id in[140,92,177],'ios','N/A') AS os,
multiIf( project_id in [148,92,177],'cn', project_id in[175,140],'intl','N/A') AS area,
extract(`release`,'[0-9] .[0-9] .[0-9]') AS major_release,
(quantilesIfState(0.5,0.95)(if(has(measurements.key,'app_start_cold'),
arrayElement(measurements.value,indexOf(measurements.key,'app_start_cold')),0)
as temp_duration,(temp_duration !=4294967295))) as app_start_cold_duration_p95
FROM
`default`.transactions_local
WHERE
`project_id` IN (148, 175, 92, 177, 140)
AND `environment` IN ('release')
and arrayElement(measurements.value,
indexOf(measurements.key,
'app_start_cold')) > 0
GROUP BY
major_release,
daytime,
project_id
#------ ui_show_duration_p95 的物化视图
CREATE MATERIALIZED VIEW overview_daily.transactions_daily_mv_ui_show_duration_p95 to overview_daily.transactions_daily_local AS
SELECT
toStartOfDay(start_ts, 'Asia/Shanghai') AS daytime,
project_id,
multiIf( project_id in [148,175],'android', project_id in[140,92,177],'ios','N/A') AS os,
multiIf( project_id in [148,92,177],'cn', project_id in[175,140],'intl','N/A') AS area,
extract(`release`,'[0-9] .[0-9] .[0-9]') AS major_release,
(quantilesIfState(0.5,0.95)(if(has(measurements.key,'tap_loading_more_ui_show_total'),
arrayElement(measurements.value,indexOf(measurements.key,'tap_loading_more_ui_show_total')),0)
as temp_duration,(temp_duration !=4294967295))) as ui_show_duration_p95
FROM
`default`.transactions_local
WHERE
`project_id` IN (148, 175, 92, 177, 140)
AND `environment` IN ('release','rnd')
AND transaction_op = 'tap.loading.more'
and arrayElement(measurements.value,
indexOf(measurements.key,
'tap_loading_more_ui_show_total')) > 0
GROUP BY
major_release,
daytime,
project_id
#------- gamedetail_page_duration_p95 的物化视图
CREATE MATERIALIZED VIEW overview_daily.transactions_daily_mv_gamedetail_page_duration_p95 to overview_daily.transactions_daily_local AS
SELECT
toStartOfDay(start_ts, 'Asia/Shanghai') AS daytime,
project_id,
multiIf( project_id in [148,175],'android', project_id in[140,92,177],'ios','N/A') AS os,
multiIf( project_id in [148,92,177],'cn', project_id in[175,140],'intl','N/A') AS area,
extract(`release`,'[0-9] .[0-9] .[0-9]') AS major_release,
(quantilesIfState(0.5,0.95)(toFloat64(duration),( duration!= 4294967295))) AS gamedetail_page_duration_p95
FROM
`default`.transactions_local
WHERE
`project_id` IN (148, 175, 92, 177, 140)
AND `environment` IN ('release')
AND transaction_op = 'ui.load'
AND `transaction_name` in ('GameDetailNewPager','GameDetailPager','GameDetail.GameDetailV2ViewController','TapTap.TapGameDetailViewController', 'TapTap.TapGameDetailInfoViewController', 'TapGameDetailViewController')
GROUP BY
major_release,
daytime,
project_id
#----------- first_page_duration_p95 的物化视图
CREATE MATERIALIZED VIEW overview_daily.transactions_daily_mv_first_page_duration_p95 to overview_daily.transactions_daily_local AS
SELECT
toStartOfDay(start_ts, 'Asia/Shanghai') AS daytime,
project_id,
multiIf( project_id in [148,175],'android', project_id in[140,92,177],'ios','N/A') AS os,
multiIf( project_id in [148,92,177],'cn', project_id in[175,140],'intl','N/A') AS area,
extract(`release`,'[0-9] .[0-9] .[0-9]') AS major_release,
(quantilesIfState(0.5,0.95)(toFloat64(duration),( duration!= 4294967295))) AS first_page_duration_p95
FROM
`default`.transactions_local
WHERE
`project_id` IN (148, 175, 92, 177, 140)
AND `environment` IN ('release')
AND transaction_op = 'ui.load'
AND `transaction_name` in ('AdvPageActivity', 'MainAct', 'HomePageActivity', 'TapHomeFeedListViewController', 'TapTap.TapHomeFeedListViewController', 'TapTap.TapHomeFeedsListViewController', 'HomeModule.ForYouViewController')
GROUP BY
major_release,
daytime,
project_id
物化视图创建后,只会对新增的数据生效,如果要处理历史数据,需要手动插入。如:
#从历史结果表里插入
INSERT INTO
overview_daily.transactions_daily_local (
daytime,
major_release,
os,
area,
app_start_cold_duration_p95,
first_page_duration_p95,
gamedetail_page_duration_p95,
ui_show_duration_p95
)
select
daytime,
major_release,
os,
area,
quantilesIfState (0.5, 0.95) (
app_start_cold_duration_p95,
(app_start_cold_duration_p95 != 4294967295)
) as app_start_cold_duration_p95,
quantilesIfState (0.5, 0.95) (
first_page_duration_p95,(first_page_duration_p95 != 4294967295)
) as first_page_duration_p95,
quantilesIfState (0.5, 0.95) (
gamedetail_page_duration_p95,
(gamedetail_page_duration_p95 != 4294967295)
) as gamedetail_page_duration_p95,
quantilesIfState (0.5, 0.95) (
ui_show_duration_p95,
(ui_show_duration_p95 != 4294967295)
) as ui_show_duration_p95
FROM
apm.sentry_overview_daily
GROUP BY daytime , major_release , os , area
#---- app_start_cold_time_duration_p95
INSERT INTO
overview_daily.transactions_daily_local (
daytime,
project_id,
os,
area,
major_release,
app_start_cold_time_duration_p95
)
SELECT
toStartOfDay(start_ts, 'Asia/Shanghai') AS daytime,
project_id,
multiIf( project_id in [148,175],'android', project_id in[140,92,177],'ios','N/A') AS os,
multiIf( project_id in [148,92,177],'cn', project_id in[175,140],'intl','N/A') AS area,
extract(`release`,
'[0-9] .[0-9] .[0-9]') AS major_release,
(quantilesIfState(0.5,0.95)(if(has(measurements.key,
'tap_app_start_cold_time'),
arrayElement(measurements.value,
indexOf(measurements.key,
'tap_app_start_cold_time')),
0) as fps,(fps !=4294967295))) as app_start_cold_time_duration_p95
FROM
`default`.transactions_local
WHERE
arrayElement(measurements.value,
indexOf(measurements.key,
'tap_app_start_cold_time')) > 0
AND transaction_op = 'tap.biz.page.load'
AND major_release NOT IN ('2.53.1', '2.54.0')
AND `project_id` IN (148, 175, 92, 177, 140)
AND `environment` IN ('release')
GROUP BY
major_release,
daytime,
project_id;
#---- ui_show_duration_p95
INSERT INTO
overview_daily.transactions_daily_local (
daytime,
project_id,
os,
area,
major_release,
ui_show_duration_p95
)
SELECT
toStartOfDay(start_ts, 'Asia/Shanghai') AS daytime,
project_id,
multiIf( project_id in [148,175],'android', project_id in[140,92,177],'ios','N/A') AS os,
multiIf( project_id in [148,92,177],'cn', project_id in[175,140],'intl','N/A') AS area,
extract(`release`,'[0-9] .[0-9] .[0-9]') AS major_release,
(quantilesIfState(0.5,0.95)(if(has(measurements.key,'tap_loading_more_ui_show_total'),
arrayElement(measurements.value,indexOf(measurements.key,'tap_loading_more_ui_show_total')),0)
as temp_duration,(temp_duration !=4294967295))) as ui_show_duration_p95
FROM
`default`.transactions_local
WHERE
`project_id` IN (148, 175, 92, 177, 140)
AND `environment` IN ('release','rnd')
AND transaction_op = 'tap.loading.more'
and arrayElement(measurements.value,
indexOf(measurements.key,
'tap_loading_more_ui_show_total')) > 0
and daytime ='2023-06-07 00:00:00'
GROUP BY
major_release,
daytime,
project_id
#------- gamedetail_page_duration_p95、
INSERT INTO
overview_daily.transactions_daily_local (
daytime,
project_id,
os,
area,
major_release,
gamedetail_page_duration_p95
)
SELECT
toStartOfDay(start_ts, 'Asia/Shanghai') AS daytime,
project_id,
multiIf( project_id in [148,175],'android', project_id in[140,92,177],'ios','N/A') AS os,
multiIf( project_id in [148,92,177],'cn', project_id in[175,140],'intl','N/A') AS area,
extract(`release`,'[0-9] .[0-9] .[0-9]') AS major_release,
(quantilesIfState(0.5,0.95)(toFloat64(duration),( duration!= 4294967295))) AS gamedetail_page_duration_p95
FROM
`default`.transactions_local
WHERE
`project_id` IN (148, 175, 92, 177, 140)
AND `environment` IN ('release')
AND transaction_op = 'ui.load'
AND `transaction_name` in ('GameDetailNewPager','GameDetailPager','GameDetail.GameDetailV2ViewController','TapTap.TapGameDetailViewController', 'TapTap.TapGameDetailInfoViewController', 'TapGameDetailViewController')
and daytime ='2023-06-07 00:00:00'
GROUP BY
major_release,
daytime,
project_id
#----------- first_page_duration_p95
INSERT INTO
overview_daily.transactions_daily_local (
daytime,
project_id,
os,
area,
major_release,
first_page_duration_p95
)
SELECT
toStartOfDay(start_ts, 'Asia/Shanghai') AS daytime,
project_id,
multiIf( project_id in [148,175],'android', project_id in[140,92,177],'ios','N/A') AS os,
multiIf( project_id in [148,92,177],'cn', project_id in[175,140],'intl','N/A') AS area,
extract(`release`,'[0-9] .[0-9] .[0-9]') AS major_release,
(quantilesIfState(0.5,0.95)(toFloat64(duration),( duration!= 4294967295))) AS first_page_duration_p95
FROM
`default`.transactions_local
WHERE
`project_id` IN (148, 175, 92, 177, 140)
AND `environment` IN ('release')
AND transaction_op = 'ui.load'
AND `transaction_name` in ('AdvPageActivity', 'MainAct', 'HomePageActivity', 'TapHomeFeedListViewController', 'TapTap.TapHomeFeedListViewController', 'TapTap.TapHomeFeedsListViewController', 'HomeModule.ForYouViewController')
and daytime ='2023-06-07 00:00:00'
GROUP BY
major_release,
daytime,
project_id
后续分析直接查询物化视图即可
select project_id FROM overview_daily.transactions_daily_mv_local
注意:AggregateFunction 类型标注的字段,需要使用 quantilesIfMerge 函数查询,比如
SELECT
daytime AS time,
major_release,
quantilesIfMerge(0.95)(app_start_cold_time_duration_p95)[1] AS `duration_p95`
FROM
overview_daily.transactions_daily_local
WHERE
daytime >= '2023-05-07 00:00:00'
AND os = 'android'
AND area = 'cn'
GROUP BY
`time`,
major_release
HAVING duration_p95>=0
需求3:按天持久化 SESSION 、SESSION_CRASHED 数据
依然采用 Clickhouse 的物化视图功能(CREATE VIEW | ClickHouse Docs )对源表进行聚合处理成物化表。然后查询物化图即可。
首先创建一张表,用于存储物化视图的结果,注意:目标表的表引擎需要使用 SummingMergeTree ,用于实时的聚合
CREATE TABLE overview_daily.`sessions_daily_local`
(
`daytime` DateTime('Asia/Shanghai'),
`major_release` String,
`os` String,
`area` String,
`total_session_crashed` Nullable(Float64),
`total_session` Nullable(Float64)
)
ENGINE = SummingMergeTree()
PARTITION BY toMonday(daytime)
ORDER BY (os,
major_release,
area,
daytime)
SETTINGS index_granularity = 8192;
然后创建物化视图
CREATE MATERIALIZED VIEW overview_daily.sessions_daily_mv_local to overview_daily.sessions_daily_local AS
SELECT
toStartOfDay (`started`, 'Asia/Shanghai') AS `daytime`,
extract(`release`, '[0-9] .[0-9] .[0-9] ') AS `major_release`,
multiIf (
`project_id` in (148, 175),
'android',
`project_id` in (92, 177, 140),
'ios',
'N/A'
) AS os,
multiIf (
`project_id` in (148, 92, 177),
'cn',
`project_id` in (175, 140),
'intl',
'N/A'
) AS area,
plus (
countIfMerge (sessions_crashed),
sumIfMerge (sessions_crashed_preaggr)
) AS total_session_crashed,
plus (
countIfMerge (sessions),
sumIfMerge (sessions_preaggr)
) AS total_session
FROM
default.sessions_hourly_local
WHERE
`project_id` IN (148, 175, 92, 177, 140)
AND `environment` IN ('release')
GROUP BY
`daytime`,
`major_release`,
os,
area
插入历史记录的数据
INSERT INTO overview_daily.sessions_daily_local
SELECT
toStartOfDay (`started`, 'Asia/Shanghai') AS `daytime`,
extract(`release`, '[0-9] .[0-9] .[0-9] ') AS `major_release`,
multiIf (
`project_id` in (148, 175),
'android',
`project_id` in (92, 177, 140),
'ios',
'N/A'
) AS os,
multiIf (
`project_id` in (148, 92, 177),
'cn',
`project_id` in (175, 140),
'intl',
'N/A'
) AS area,
plus (
countIfMerge (sessions_crashed),
sumIfMerge (sessions_crashed_preaggr)
) AS total_session_crashed,
plus (
countIfMerge (sessions),
sumIfMerge (sessions_preaggr)
) AS total_session
FROM
default.sessions_hourly_local
WHERE
`project_id` IN (148, 175, 92, 177, 140)
AND `environment` IN ('release')
and daytime = '2023-06-06 00:00:00'
GROUP BY
`daytime`,
`major_release`,
os,
area
需求4:按 5 分钟粒度持久化 CRASH_COUNT 、UNIQUE_USER_COUNT 数据
依然采用 Clickhouse 的物化视图功能(CREATE VIEW | ClickHouse Docs )对源表进行聚合处理成物化表。然后查询物化图即可。
首先创建一张表,用于存储物化视图的结果,注意:目标表的表引擎需要使用 SummingMergeTree ,用于实时的聚合
CREATE TABLE overview_daily.`errors_5min_local`
(
`mintime` DateTime('Asia/Shanghai'),
`os` String,
`area` String,
`crash_count` Float64,
`unique_user_count` Float64
)
ENGINE = SummingMergeTree()
PARTITION BY toMonday(mintime)
ORDER BY (os,
area,
mintime)
SETTINGS index_granularity = 8192;
创建物化视图
CREATE MATERIALIZED VIEW overview_daily.errors_5min_mv_local to overview_daily.`errors_5min_local` AS
SELECT toStartOfFiveMinute(`timestamp`, 'Asia/Shanghai') as `mintime`,
multiIf(`project_id` in (148, 175), 'android', `project_id` in (92,177, 140), 'ios','N/A') AS os,
multiIf(`project_id` in (148, 92,177), 'cn', `project_id` in (175,140), 'intl','N/A') AS area,
count(*) AS crash_count,
(ifNull(uniq((nullIf(user, '') AS _snuba_user)), 0) AS unique_user_count)
FROM default.errors_local
WHERE
equals(deleted, 0) AND equals((type AS _snuba_type), 'error')
and `project_id` in (148, 175, 92, 177, 140)
AND equals(ifNull((level AS `_snuba_tags[level]`), ''), 'fatal')
AND `environment` in ('release')
GROUP BY `mintime`,os,area
插入历史记录
#从历史结果表里插入
INSERT INTO
overview_daily.`errors_5min_local` (
mintime,
os,
area,
crash_count,
unique_user_count
)
select
time as mintime,
os,
area,
crash_count,
unique_user_count
FROM
apm.sentry_crash_user_5min
#------ errors_local 表导入
INSERT INTO overview_daily.errors_5min_local
SELECT toStartOfFiveMinute(`timestamp`, 'Asia/Shanghai') as `mintime`,
multiIf(`project_id` in (148, 175), 'android', `project_id` in (92,177, 140), 'ios','N/A') AS os,
multiIf(`project_id` in (148, 92,177), 'cn', `project_id` in (175,140), 'intl','N/A') AS area,
count(*) AS crash_count,
(ifNull(uniq((nullIf(user, '') AS _snuba_user)), 0) AS unique_user_count)
FROM default.errors_local
PREWHERE `project_id` in (148, 175, 92, 177, 140)
WHERE
equals(deleted, 0) AND equals((type AS _snuba_type), 'error')
AND equals(ifNull((level AS `_snuba_tags[level]`), ''), 'fatal')
AND `environment` in ('release')
and mintime > '2023-06-09 00:00:00'
GROUP BY `mintime`,os,area