1. 查询每个分区有多少重复数据
SELECT
    partition,
    sumIf(cnt, type = 'with_dup') AS count_with_duplicates,
    sumIf(cnt, type = 'dedup') AS count_after_dedup,
    count_with_duplicates - count_after_dedup AS duplicate_rows,
    round((duplicate_rows * 100.) / count_with_duplicates, 2) AS duplicate_percentage
FROM
(
    SELECT
        _partition_id AS partition,
        'with_dup' AS type,
        count() AS cnt
    FROM table_name
    GROUP BY partition
    UNION ALL
    SELECT
        _partition_id AS partition,
        'dedup' AS type,
        count() AS cnt
    FROM table_name
    FINAL
    GROUP BY partition
)
GROUP BY partition
ORDER BY duplicate_rows DESC
  1. 一个表每个分区有多少part 正常一个分区一个part,但单个part如果大于 max_bytes_to_merge_at_max_space_in_pool = 161061273600 -- 150GB 可能就超过限制不会在合并了
SELECT
    partition,
    count() AS parts_count,
    groupArray(level) AS levels,
    min(modification_time) AS oldest_part,
    max(modification_time) AS newest_part,
    dateDiff('hour', oldest_part, newest_part) AS hours_since_oldest
FROM system.parts
WHERE (`table` = 'your_table') AND active
GROUP BY partition
ORDER BY parts_count DESC