In this article, we will transform ClickHouse logs to ndjson using vector.dev
With ClickHouse 22.8 version, ClickHouse supports writing logs in JSON format.
First, install vector.dev
# arm64 wget https://packages.timber.io/vector/0.15.2/vector_0.15.2-1_arm64.deb # amd64 wget https://packages.timber.io/vector/0.15.2/vector_0.15.2-1_amd64.deb dpkg -i vector_0.15.2-1_*.deb systemctl stop vector mkdir /var/log/clickhouse-server-json chown vector.vector /var/log/clickhouse-server-json usermod -a -G clickhouse vector
Add the following configs into vector config file.
vi /etc/vector/vector.toml
data_dir = "/var/lib/vector" [sources.clickhouse-log] type = "file" include = [ "/var/log/clickhouse-server/clickhouse-server.log" ] fingerprinting.strategy = "device_and_inode" message_start_indicator = '^\d+\.\d+\.\d+ \d+:\d+:\d+' multi_line_timeout = 1000 [transforms.clickhouse-log-text] inputs = [ "clickhouse-log" ] type = "remap" source = ''' . |= parse_regex!(.message, r'^(?P<timestamp>\d+\.\d+\.\d+ \d+:\d+:\d+\.\d+) \[\s?(?P<thread_id>\d+)\s?\] \{(?P<query_id>.*)\} <(?P<severity>\w+)> (?s)(?P<message>.*$)') ''' [sinks.emit-clickhouse-log-json] type = "file" inputs = [ "clickhouse-log-text" ] compression = "none" path = "/var/log/clickhouse-server-json/clickhouse-server.%Y-%m-%d.ndjson" encoding.only_fields = ["timestamp", "thread_id", "query_id", "severity", "message" ] encoding.codec = "ndjson"
Start vector
systemctl start vector
Now sink logs into the ClickHouse table
Create a table for logs.
create table default.clickhouse_logs( timestamp DateTime64(3), host LowCardinality(String), thread_id LowCardinality(String), severity LowCardinality(String), query_id String, message String) Engine = MergeTree Partition by toYYYYMM(timestamp) Order by (toStartOfHour(timestamp), host, severity, query_id);
Create and grant a user
create user chistadata identified by 'chistadata47';
grant insert on default.clickhouse_logs to chistadata;
create settings profile or replace profile_vector settings log_queries=0 readonly TO chistadata;
Add the following configs into the vector config file.
[sinks.clickhouse-output-clickhouse] inputs = ["clickhouse-log-text"] type = "clickhouse" host = "http://localhost:8123" database = "default" auth.strategy = "basic" auth.user = "chistadata" auth.password = "chistadata47" healthcheck = true table = "clickhouse_logs" encoding.timestamp_format = "unix" buffer.type = "disk" buffer.max_size = 104900000 buffer.when_full = "block" request.in_flight_limit = 20 encoding.only_fields = ["host", "timestamp", "thread_id", "query_id", "severity", "message"]
Restart vector
systemctl restart vector
Check the ClickHouse log table
select * from default.clickhouse_logs limit 5; Query id: 8feaef69-19b2-4808-9791-c4d2ffd1337a ┌───────────────timestamp─┬─host─────────┬─thread_id─┬─severity─┬─query_id─┬─message───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ 2022-10-24 14:40:51.283 │ clickhouse01 │ 1353 │ Debug │ │ DiskLocal: Reserving 1.00 MiB on disk `default`, having unreserved 9.06 GiB. │ │ 2022-10-24 14:40:51.303 │ clickhouse01 │ 1339 │ Debug │ │ system.asynchronous_metric_log (546b981c-3272-40dc-9fe3-a70e33991d26): Removing part from filesystem 202210_142925_145624_2265 │ │ 2022-10-24 14:40:51.303 │ clickhouse01 │ 1339 │ Debug │ │ system.asynchronous_metric_log (546b981c-3272-40dc-9fe3-a70e33991d26): Removing part from filesystem 202210_145625_145625_0 │ │ 2022-10-24 14:40:51.311 │ clickhouse01 │ 1291 │ Debug │ │ HTTP-Session: 193f570e-487e-4eac-8dc4-2ab9acdc8106 Authenticating user 'vector' from [::1]:48560 │ │ 2022-10-24 14:40:51.312 │ clickhouse01 │ 1291 │ Debug │ │ HTTP-Session: 193f570e-487e-4eac-8dc4-2ab9acdc8106 Authenticated with global context as user 834343cf-ce60-5ef6-7690-e4c709e7f6ab │ └─────────────────────────┴──────────────┴───────────┴──────────┴──────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘