> clickhouse-debug-bundle

Collect ClickHouse diagnostic data — system tables, query logs, merge status, and server metrics for support tickets and troubleshooting. Use when investigating persistent issues, preparing debug artifacts, or collecting evidence for ClickHouse support. Trigger: "clickhouse debug", "clickhouse diagnostics", "clickhouse support bundle", "collect clickhouse logs", "clickhouse system tables".

fetch
$curl "https://skillshub.wtf/jeremylongshore/claude-code-plugins-plus-skills/clickhouse-debug-bundle?format=md"
SKILL.mdclickhouse-debug-bundle

ClickHouse Debug Bundle

Overview

Collect comprehensive diagnostic data from ClickHouse system tables for troubleshooting performance issues, merge problems, or support escalation.

Prerequisites

  • Access to ClickHouse with system.* table read permissions
  • curl or clickhouse-client available

Instructions

Step 1: Server Health Overview

-- Server version and uptime
SELECT
    version()                       AS version,
    uptime()                        AS uptime_seconds,
    formatReadableTimeDelta(uptime()) AS uptime_human,
    currentDatabase()               AS current_db;

-- Global metrics snapshot
SELECT metric, value, description
FROM system.metrics
WHERE metric IN (
    'Query', 'Merge', 'PartMutation', 'ReplicatedFetch',
    'TCPConnection', 'HTTPConnection', 'MemoryTracking',
    'BackgroundMergesAndMutationsPoolTask'
);

Step 2: Disk and Table Health

-- Disk usage by table (top 20)
SELECT
    database,
    table,
    formatReadableSize(sum(bytes_on_disk))  AS disk_size,
    sum(rows)                               AS total_rows,
    count()                                 AS active_parts,
    max(modification_time)                  AS last_modified
FROM system.parts
WHERE active
GROUP BY database, table
ORDER BY sum(bytes_on_disk) DESC
LIMIT 20;

-- Tables with too many parts (merge pressure)
SELECT database, table, count() AS parts
FROM system.parts WHERE active
GROUP BY database, table
HAVING parts > 100
ORDER BY parts DESC;

-- Disk space per disk
SELECT
    name,
    path,
    formatReadableSize(total_space)     AS total,
    formatReadableSize(free_space)      AS free,
    round(free_space / total_space * 100, 1) AS free_pct
FROM system.disks;

Step 3: Query Performance Analysis

-- Slowest queries in the last 24 hours
SELECT
    event_time,
    query_duration_ms,
    read_rows,
    read_bytes,
    result_rows,
    memory_usage,
    substring(query, 1, 200) AS query_preview
FROM system.query_log
WHERE type = 'QueryFinish'
  AND event_time >= now() - INTERVAL 24 HOUR
ORDER BY query_duration_ms DESC
LIMIT 20;

-- Failed queries (last 24h)
SELECT
    event_time,
    exception_code,
    exception,
    substring(query, 1, 200) AS query_preview
FROM system.query_log
WHERE type = 'ExceptionWhileProcessing'
  AND event_time >= now() - INTERVAL 24 HOUR
ORDER BY event_time DESC
LIMIT 20;

-- Query patterns (group by normalized query)
SELECT
    normalized_query_hash,
    count()                          AS executions,
    avg(query_duration_ms)           AS avg_ms,
    max(query_duration_ms)           AS max_ms,
    sum(read_rows)                   AS total_rows_read,
    formatReadableSize(sum(read_bytes)) AS total_read,
    any(substring(query, 1, 150))    AS sample_query
FROM system.query_log
WHERE type = 'QueryFinish'
  AND event_time >= now() - INTERVAL 24 HOUR
GROUP BY normalized_query_hash
ORDER BY sum(query_duration_ms) DESC
LIMIT 20;

Step 4: Merge and Mutation Status

-- Active merges
SELECT
    database, table, elapsed, progress,
    num_parts, result_part_name,
    formatReadableSize(total_size_bytes_compressed) AS size
FROM system.merges;

-- Pending mutations
SELECT database, table, mutation_id, command, create_time, is_done
FROM system.mutations
WHERE NOT is_done
ORDER BY create_time DESC;

-- Replication health (if using ReplicatedMergeTree)
SELECT
    database, table,
    is_leader, total_replicas, active_replicas,
    queue_size, inserts_in_queue, merges_in_queue
FROM system.replicas
WHERE active_replicas < total_replicas OR queue_size > 0;

Step 5: Automated Debug Script

#!/bin/bash
# clickhouse-debug-bundle.sh
set -euo pipefail

CH_HOST="${CLICKHOUSE_HOST:-http://localhost:8123}"
CH_USER="${CLICKHOUSE_USER:-default}"
CH_PASS="${CLICKHOUSE_PASSWORD:-}"
BUNDLE="ch-debug-$(date +%Y%m%d-%H%M%S)"
mkdir -p "$BUNDLE"

ch_query() {
  curl -sS "${CH_HOST}" \
    --user "${CH_USER}:${CH_PASS}" \
    --data-binary "$1" 2>&1
}

echo "Collecting ClickHouse diagnostics..."

ch_query "SELECT version(), uptime(), currentDatabase()" > "$BUNDLE/version.txt"
ch_query "SELECT * FROM system.metrics FORMAT TabSeparatedWithNames" > "$BUNDLE/metrics.tsv"
ch_query "SELECT * FROM system.events FORMAT TabSeparatedWithNames" > "$BUNDLE/events.tsv"
ch_query "SELECT database, table, count() AS parts, sum(rows) AS rows, \
  formatReadableSize(sum(bytes_on_disk)) AS size FROM system.parts \
  WHERE active GROUP BY database, table ORDER BY sum(bytes_on_disk) DESC \
  FORMAT TabSeparatedWithNames" > "$BUNDLE/tables.tsv"
ch_query "SELECT * FROM system.merges FORMAT TabSeparatedWithNames" > "$BUNDLE/merges.tsv"
ch_query "SELECT * FROM system.query_log WHERE type IN ('ExceptionWhileProcessing') \
  AND event_time >= now() - INTERVAL 1 HOUR ORDER BY event_time DESC LIMIT 50 \
  FORMAT TabSeparatedWithNames" > "$BUNDLE/errors.tsv"
ch_query "SELECT * FROM system.replicas FORMAT TabSeparatedWithNames" > "$BUNDLE/replicas.tsv" 2>/dev/null || true

tar -czf "${BUNDLE}.tar.gz" "$BUNDLE"
rm -rf "$BUNDLE"
echo "Bundle created: ${BUNDLE}.tar.gz"

Step 6: Node.js Debug Collector

import { createClient } from '@clickhouse/client';

async function collectDebugBundle(client: ReturnType<typeof createClient>) {
  const queries = {
    version: 'SELECT version() AS ver, uptime() AS up',
    tables: `SELECT database, table, count() AS parts, sum(rows) AS rows
             FROM system.parts WHERE active GROUP BY database, table
             ORDER BY sum(bytes_on_disk) DESC LIMIT 20`,
    slow: `SELECT query_duration_ms, substring(query,1,200) AS q
           FROM system.query_log WHERE type='QueryFinish'
           AND event_time >= now() - INTERVAL 1 HOUR
           ORDER BY query_duration_ms DESC LIMIT 10`,
    errors: `SELECT exception_code, exception, substring(query,1,200) AS q
             FROM system.query_log WHERE type='ExceptionWhileProcessing'
             AND event_time >= now() - INTERVAL 1 HOUR LIMIT 10`,
    merges: 'SELECT * FROM system.merges',
  };

  const bundle: Record<string, unknown> = {};
  for (const [key, sql] of Object.entries(queries)) {
    try {
      const rs = await client.query({ query: sql, format: 'JSONEachRow' });
      bundle[key] = await rs.json();
    } catch (e) {
      bundle[key] = { error: (e as Error).message };
    }
  }

  return bundle;
}

Key System Tables

TablePurpose
system.partsData parts per table (size, rows, merge status)
system.query_logQuery history with timing and errors
system.metricsReal-time server metrics (gauges)
system.eventsCumulative server counters
system.mergesCurrently running merges
system.mutationsALTER TABLE mutations (UPDATE/DELETE)
system.replicasReplication status per table
system.processesCurrently executing queries
system.disksDisk space and health

Error Handling

IssueCauseSolution
system.query_log emptyLogging disabledSet log_queries = 1
Permission denied on system tablesRestricted userGrant SELECT ON system.*
Bundle too largeToo much historyNarrow time window

Resources

Next Steps

For connection and concurrency issues, see clickhouse-rate-limits.

┌ stats

installs/wk0
░░░░░░░░░░
github stars1.7K
██████████
first seenMar 23, 2026
└────────────

┌ repo

jeremylongshore/claude-code-plugins-plus-skills
by jeremylongshore
└────────────