- 查询每个分区有多少重复数据
SELECT
partition,
sumIf(cnt, type = 'with_dup') AS count_with_duplicates,
sumIf(cnt, type = 'dedup') AS count_after_dedup,
count_with_duplicates - count_after_dedup AS duplicate_rows,
round((duplicate_rows * 100.) / count_with_duplicates, 2) AS duplicate_percentage
FROM
(
SELECT
_partition_id AS partition,
'with_dup' AS type,
count() AS cnt
FROM table_name
GROUP BY partition
UNION ALL
SELECT
_partition_id AS partition,
'dedup' AS type,
count() AS cnt
FROM table_name
FINAL
GROUP BY partition
)
GROUP BY partition
ORDER BY duplicate_rows DESC
- 一个表每个分区有多少part
正常一个分区一个part,但单个part如果大于
max_bytes_to_merge_at_max_space_in_pool = 161061273600 -- 150GB 可能就超过限制不会在合并了
SELECT
partition,
count() AS parts_count,
groupArray(level) AS levels,
min(modification_time) AS oldest_part,
max(modification_time) AS newest_part,
dateDiff('hour', oldest_part, newest_part) AS hours_since_oldest
FROM system.parts
WHERE (`table` = 'your_table') AND active
GROUP BY partition
ORDER BY parts_count DESC