Secure Proton config #248

New issue

Closed

opened 2026-03-28 04:22:42 +00:00 by mfreeman451 · 1 comment

mfreeman451 commented

2026-03-28 04:22:42 +00:00

Owner

Imported from GitHub.

Original GitHub issue: #715
Original author: @mfreeman451
Original URL: https://github.com/carverauto/serviceradar/issues/715
Original created: 2025-05-03T05:12:25Z

# NOTE: User and query level settings are set up in "users.yaml" file.
# If you have accidentally specified user-level settings here, server won't start.
# You can either move the settings to the right place inside "users.xml" file
# or add skip_check_for_incorrect_settings: 1 here.
logger:
    # Possible levels [1]:
    # - none (turns off logging)
    # - fatal
    # - critical
    # - error
    # - warning
    # - notice
    # - information
    # - debug
    # - trace
    # [1]: https://github.com/pocoproject/poco/blob/poco-1.9.4-release/Foundation/include/Poco/Logger.h#L105-L114
    level: information
    log: /var/log/proton-server/proton-server.log
    errorlog: /var/log/proton-server/proton-server.err.log
    # Rotation policy
    # See https://github.com/pocoproject/poco/blob/poco-1.9.4-release/Foundation/include/Poco/FileChannel.h#L54-L85
    size: 1000M
    count: 10
    # console: 1
    # Default behavior is autodetection (log to console if not daemon mode and is tty)

    # Per level overrides (legacy):
    # For example to suppress logging of the ConfigReloader you can use:
    # NOTE: levels.logger is reserved, see below.
    # levels:
    #     ConfigReloader: none

    # Per level overrides:
    # For example to suppress logging of the RBAC for default user you can use:
    # (But please note that the logger name maybe changed from version to version, even after minor upgrade)
    # levels:
    #     - logger:
    #         name: 'ContextAccess (default)'
    #         level: none
    #     - logger:
    #         name: 'DatabaseOrdinary (test)'
    #         level: none

# It is the name that will be shown in the proton-client.
# By default, anything with "production" will be highlighted in red in query prompt.
# display_name: production

# Port for HTTP API. See also 'https_port' for secure connections.
# This interface is also used by ODBC and JDBC drivers (DataGrip, Dbeaver, ...)
# and by most of web interfaces (embedded UI, Grafana, Redash, ...).
#http_port: 3218

# Port for Snapshot HTTP server, which run all stream query in 'table' mode.
snapshot_server_http_port: 8123

# Port for interaction by native protocol with:
# - proton-client and other native proton tools (proton-benchmark, proton-copier);
# - proton-server with other proton-servers for distributed query processing;
# - proton drivers and applications supporting native protocol
# (this protocol is also informally called as "the TCP protocol");
# See also 'tcp_port_secure' for secure connections.
# tcp_port: 8463

# Port for Snapshot TCP server, which run all stream query in 'table' mode.
snapshot_server_tcp_port: 7587

# Compatibility with PostgreSQL protocol.
# proton will pretend to be PostgreSQL for applications connecting to this port.
postgresql_port: 5432

# Compatibility with MySQL protocol.
# proton will pretend to be MySQL for applications connecting to this port.
mysql_port: 9004

# HTTP API with TLS (HTTPS).
# You have to configure certificate to enable this interface.
# See the openSSL section below.
https_port: 8443

# Native interface with TLS.
# You have to configure certificate to enable this interface.
# See the openSSL section below.
tcp_port_secure: 9440

# Native interface wrapped with PROXYv1 protocol
# PROXYv1 header sent for every connection.
# proton will extract information about proxy-forwarded client address from the header.
# tcp_with_proxy_port: 9011

# Port for communication between replicas. Used for data exchange.
# It provides low-level data access between servers.
# This port should not be accessible from untrusted networks.
# See also 'interserver_http_credentials'.
# Data transferred over connections to this port should not go through untrusted networks.
# See also 'interserver_https_port'.
interserver_http_port: 9009

# Port for communication between replicas with TLS.
# You have to configure certificate to enable this interface.
# See the openSSL section below.
# See also 'interserver_http_credentials'.
# interserver_https_port: 9010

# Hostname that is used by other replicas to request this server.
# If not specified, than it is determined analogous to 'hostname -f' command.
# This setting could be used to switch replication to another network interface
# (the server may be connected to multiple networks via multiple addresses)
# interserver_http_host: example.yandex.ru

# You can specify credentials for authenthication between replicas.
# This is required when interserver_https_port is accessible from untrusted networks,
# and also recommended to avoid SSRF attacks from possibly compromised services in your network.
# interserver_http_credentials:
#     user: interserver
#     password: ''

# Listen specified address.
# Use :: (wildcard IPv6 address), if you want to accept connections both with IPv4 and IPv6 from everywhere.
# Notes:
# If you open connections from wildcard address, make sure that at least one of the following measures applied:
# - server is protected by firewall and not accessible from untrusted networks;
# - all users are restricted to subset of network addresses (see users.xml);
# - all users have strong passwords, only secure (TLS) interfaces are accessible, or connections are only made via TLS interfaces.
# - users without password have readonly access.
# See also: https://www.shodan.io/search?query=proton
# listen_host: '::'

# Same for hosts without support for IPv6:
# listen_host: 0.0.0.0

# Default values - try listen localhost on IPv4 and IPv6.
# listen_host: '::1'
# listen_host: 127.0.0.1

# Don't exit if IPv6 or IPv4 networks are unavailable while trying to listen.
# listen_try: 0

# Allow multiple servers to listen on the same address:port. This is not recommended.
# listen_reuse_port: 0

# listen_backlog: 64
max_connections: 4096

# For 'Connection: keep-alive' in HTTP 1.1
keep_alive_timeout: 3

# Enable telemetry. This is used to collect the version and runtime environment information to Timeplus, Inc.
telemetry_enabled:
    "@from_env": TELEMETRY_ENABLED
    "@replace": true
    "#text": true

telemetry_interval_ms: 300000   # 5 minutes

# gRPC protocol (see src/Server/grpc_protos/proton_grpc.proto for the API)
# grpc_port: 9100
grpc:
    enable_ssl: false

    # The following two files are used only if enable_ssl=1
    ssl_cert_file: /path/to/ssl_cert_file
    ssl_key_file: /path/to/ssl_key_file

    # Whether server will request client for a certificate
    ssl_require_client_auth: false

    # The following file is used only if ssl_require_client_auth=1
    ssl_ca_cert_file: /path/to/ssl_ca_cert_file

    # Default compression algorithm (applied if client doesn't specify another algorithm).
    # Supported algorithms: none, deflate, gzip, stream_gzip
    compression: deflate

    # Default compression level (applied if client doesn't specify another level).
    # Supported levels: none, low, medium, high
    compression_level: medium

    # Send/receive message size limits in bytes. -1 means unlimited
    max_send_message_size: -1
    max_receive_message_size: -1

    # Enable if you want very detailed logs
    verbose_logs: false

# Used with https_port and tcp_port_secure. Full ssl options list: https://github.com/proton-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h#L71
openSSL:
    server:
        # Used for https server AND secure tcp port
        # openssl req -subj "/CN=localhost" -new -newkey rsa:2048 -days 365 -nodes -x509 -keyout /etc/proton-server/server.key -out /etc/proton-server/server.crt
        certificateFile: /etc/proton-server/root.pem
        privateKeyFile: /etc/proton-server/core-key.pem

        # dhparams are optional. You can delete the dhParamsFile: element.
        # To generate dhparams, use the following command:
        # openssl dhparam -out /etc/proton-server/dhparam.pem 4096
        # Only file format with BEGIN DH PARAMETERS is supported.
        dhParamsFile: /etc/proton-server/dhparam.pem
        verificationMode: none
        loadDefaultCAFile: true
        cacheSessions: true
        disableProtocols: 'sslv2,sslv3'
        preferServerCiphers: true
    client:
        # Used for connecting to https dictionary source and secured Zookeeper communication
        loadDefaultCAFile: true
        cacheSessions: true
        disableProtocols: 'sslv2,sslv3'
        preferServerCiphers: true

        # Use for self-signed: verificationMode: none
        invalidCertificateHandler:
            # Use for self-signed: name: AcceptCertificateHandler
            name: RejectCertificateHandler

# Default root page on http[s] server. For example load UI from https://tabix.io/ when opening http://localhost:8123
# http_server_default_response: |-
#     <html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>

# Maximum memory usage (resident set size) for server process.
# Zero value or unset means default. Default is "max_server_memory_usage_to_ram_ratio" of available physical RAM.
# If the value is larger than "max_server_memory_usage_to_ram_ratio" of available physical RAM, it will be cut down.

# The constraint is checked on query execution time.
# If a query tries to allocate memory and the current memory usage plus allocation is greater
# than specified threshold, exception will be thrown.

# It is not practical to set this constraint to small values like just a few gigabytes,
# because memory allocator will keep this amount of memory in caches and the server will deny service of queries.
max_server_memory_usage: 0

# Maximum number of threads in the Global thread pool.
# This will default to a maximum of 10000 threads if not specified.
# This setting will be useful in scenarios where there are a large number
# of distributed queries that are running concurrently but are idling most
# of the time, in which case a higher number of threads might be required.
max_thread_pool_size: 10000

# On memory constrained environments you may have to set this to value larger than 1.
max_server_memory_usage_to_ram_ratio: 0.9

# Maximum local disk utilization. Once it is hit, all ingest will be errored out
max_local_disk_usage_ratio: 0.9

# Simple server-wide memory profiler. Collect a stack trace at every peak allocation step (in bytes).
# Data will be stored in system.trace_log table with query_id = empty string.
# Zero means disabled.
total_memory_profiler_step: 4194304

# Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type.
# The probability is for every alloc/free regardless to the size of the allocation.
# Note that sampling happens only when the amount of untracked memory exceeds the untracked memory limit,
# which is 4 MiB by default but can be lowered if 'total_memory_profiler_step' is lowered.
# You may want to set 'total_memory_profiler_step' to 1 for extra fine grained sampling.
total_memory_tracker_sample_probability: 0

# Limit the size of the cache. e.g., uncompressed_cache_size or mark_cache_size
cache_size_to_ram_max_ratio: 0.5

# Set limit on number of open files (default: maximum). This setting makes sense on Mac OS X because getrlimit() fails to retrieve
# correct maximum value.
# max_open_files: 262144

# Size of cache of uncompressed blocks of data, used in tables of MergeTree family.
# In bytes. Cache is single for server. Memory is allocated only on demand.
# Cache is used when 'use_uncompressed_cache' user setting turned on (off by default).
# Uncompressed cache is advantageous only for very short queries and in rare cases.

# Note: uncompressed cache can be pointless for lz4, because memory bandwidth
# is slower than multi-core decompression on some server configurations.
# Enabling it can sometimes paradoxically make queries slower.
uncompressed_cache_size: 8589934592

# Approximate size of mark cache, used in tables of MergeTree family.
# In bytes. Cache is single for server. Memory is allocated only on demand.
# You should not lower this value.
mark_cache_size: 5368709120

# If you enable the `min_bytes_to_use_mmap_io` setting,
# the data in MergeTree tables can be read with mmap to avoid copying from kernel to userspace.
# It makes sense only for large files and helps only if data reside in page cache.
# To avoid frequent open/mmap/munmap/close calls (which are very expensive due to consequent page faults)
# and to reuse mappings from several threads and queries,
# the cache of mapped files is maintained. Its size is the number of mapped regions (usually equal to the number of mapped files).
# The amount of data in mapped files can be monitored
# in system.metrics, system.metric_log by the MMappedFiles, MMappedFileBytes metrics
# and in system.asynchronous_metrics, system.asynchronous_metrics_log by the MMapCacheCells metric,
# and also in system.events, system.processes, system.query_log, system.query_thread_log, system.query_views_log by the
# CreatedReadBufferMMap, CreatedReadBufferMMapFailed, MMappedFileCacheHits, MMappedFileCacheMisses events.
# Note that the amount of data in mapped files does not consume memory directly and is not accounted
# in query or server memory usage - because this memory can be discarded similar to OS page cache.
# The cache is dropped (the files are closed) automatically on removal of old parts in MergeTree,
# also it can be dropped manually by the SYSTEM DROP MMAP CACHE query.
mmap_cache_size: 1000

# Cache size for compiled expressions.
compiled_expression_cache_size: 134217728

# Path to data directory, with trailing slash.
path: /var/lib/proton/

# Path to temporary data for processing hard queries.
tmp_path: /var/lib/proton/tmp/

checkpoint:
    # Checkpoint storage type
    storage_type : local_file_system
    # Path to state checkpoint
    path: /var/lib/proton/checkpoint/
    # State checkpoint interval in seconds
    interval: 900
    # Checkpoint expired if not accessed after `ttl` seconds
    last_access_ttl: 604800 # 7 days
    # Periodical last access check interval
    last_access_check_interval: 7200
    # When unsubscribe a query, wait grace interval to delete its checkpoints
    delete_grace_interval: 60
    # When server teardown, last checkpoint flush timeout in seconds
    teardown_flush_timeout: 60

# Disable AuthType plaintext_password and no_password for ACL.
# allow_plaintext_password: 0
# allow_no_password: 0

# Policy from the <storage_configuration> for the temporary files.
# If not set <tmp_path> is used, otherwise <tmp_path> is ignored.

# Notes:
# - move_factor              is ignored
# - keep_free_space_bytes    is ignored
# - max_data_part_size_bytes is ignored
# - you must have exactly one volume in that policy
# tmp_policy: tmp

# Directory with user provided files that are accessible by 'file' table function.
user_files_path: /var/lib/proton/user_files/

# LDAP server definitions.
ldap_servers: ''

# List LDAP servers with their connection parameters here to later 1) use them as authenticators for dedicated local users,
# who have 'ldap' authentication mechanism specified instead of 'password', or to 2) use them as remote user directories.
# Parameters:
# host - LDAP server hostname or IP, this parameter is mandatory and cannot be empty.
# port - LDAP server port, default is 636 if enable_tls is set to true, 389 otherwise.
# bind_dn - template used to construct the DN to bind to.
# The resulting DN will be constructed by replacing all '{user_name}' substrings of the template with the actual
# user name during each authentication attempt.
# user_dn_detection - section with LDAP search parameters for detecting the actual user DN of the bound user.
# This is mainly used in search filters for further role mapping when the server is Active Directory. The
# resulting user DN will be used when replacing '{user_dn}' substrings wherever they are allowed. By default,
# user DN is set equal to bind DN, but once search is performed, it will be updated with to the actual detected
# user DN value.
# base_dn - template used to construct the base DN for the LDAP search.
# The resulting DN will be constructed by replacing all '{user_name}' and '{bind_dn}' substrings
# of the template with the actual user name and bind DN during the LDAP search.
# scope - scope of the LDAP search.
# Accepted values are: 'base', 'one_level', 'children', 'subtree' (the default).
# search_filter - template used to construct the search filter for the LDAP search.
# The resulting filter will be constructed by replacing all '{user_name}', '{bind_dn}', and '{base_dn}'
# substrings of the template with the actual user name, bind DN, and base DN during the LDAP search.
# Note, that the special characters must be escaped properly in XML.
# verification_cooldown - a period of time, in seconds, after a successful bind attempt, during which a user will be assumed
# to be successfully authenticated for all consecutive requests without contacting the LDAP server.
# Specify 0 (the default) to disable caching and force contacting the LDAP server for each authentication request.
# enable_tls - flag to trigger use of secure connection to the LDAP server.
# Specify 'no' for plain text (ldap://) protocol (not recommended).
# Specify 'yes' for LDAP over SSL/TLS (ldaps://) protocol (recommended, the default).
# Specify 'starttls' for legacy StartTLS protocol (plain text (ldap://) protocol, upgraded to TLS).
# tls_minimum_protocol_version - the minimum protocol version of SSL/TLS.
# Accepted values are: 'ssl2', 'ssl3', 'tls1.0', 'tls1.1', 'tls1.2' (the default).
# tls_require_cert - SSL/TLS peer certificate verification behavior.
# Accepted values are: 'never', 'allow', 'try', 'demand' (the default).
# tls_cert_file - path to certificate file.
# tls_key_file - path to certificate key file.
# tls_ca_cert_file - path to CA certificate file.
# tls_ca_cert_dir - path to the directory containing CA certificates.
# tls_cipher_suite - allowed cipher suite (in OpenSSL notation).
# Example:
# my_ldap_server:
#     host: localhost
#     port: 636
#     bind_dn: 'uid={user_name},ou=users,dc=example,dc=com'
#     verification_cooldown: 300
#     enable_tls: yes
#     tls_minimum_protocol_version: tls1.2
#     tls_require_cert: demand
#     tls_cert_file: /path/to/tls_cert_file
#     tls_key_file: /path/to/tls_key_file
#     tls_ca_cert_file: /path/to/tls_ca_cert_file
#     tls_ca_cert_dir: /path/to/tls_ca_cert_dir
#     tls_cipher_suite: ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:AES256-GCM-SHA384

# Example (typical Active Directory with configured user DN detection for further role mapping):
# my_ad_server:
#     host: localhost
#     port: 389
#     bind_dn: 'EXAMPLE\{user_name}'
#     user_dn_detection:
#         base_dn: CN=Users,DC=example,DC=com
#         search_filter: '(&amp;(objectClass=user)(sAMAccountName={user_name}))'
#     enable_tls: no

# To enable Kerberos authentication support for HTTP requests (GSS-SPNEGO), for those users who are explicitly configured
# to authenticate via Kerberos, define a single 'kerberos' section here.
# Parameters:
# principal - canonical service principal name, that will be acquired and used when accepting security contexts.
# This parameter is optional, if omitted, the default principal will be used.
# This parameter cannot be specified together with 'realm' parameter.
# realm - a realm, that will be used to restrict authentication to only those requests whose initiator's realm matches it.
# This parameter is optional, if omitted, no additional filtering by realm will be applied.
# This parameter cannot be specified together with 'principal' parameter.
# Example:
# kerberos: ''

# Example:
# kerberos:
#     principal: HTTP/proton.example.com@EXAMPLE.COM

# Example:
# kerberos:
#     realm: EXAMPLE.COM

# Sources to read users, roles, access rights, profiles of settings, quotas.
user_directories:
    users_xml:
        # Path to configuration file with predefined users.
        path: users.yaml
    local_directory:
        # Path to folder where users created by SQL commands are stored.
        path: /var/lib/proton/access/

#   # To add an LDAP server as a remote user directory of users that are not defined locally, define a single 'ldap' section
#   # with the following parameters:
#   # server - one of LDAP server names defined in 'ldap_servers' config section above.
#   # This parameter is mandatory and cannot be empty.
#   # roles - section with a list of locally defined roles that will be assigned to each user retrieved from the LDAP server.
#   # If no roles are specified here or assigned during role mapping (below), user will not be able to perform any
#   # actions after authentication.
#   # role_mapping - section with LDAP search parameters and mapping rules.
#   # When a user authenticates, while still bound to LDAP, an LDAP search is performed using search_filter and the
#   # name of the logged in user. For each entry found during that search, the value of the specified attribute is
#   # extracted. For each attribute value that has the specified prefix, the prefix is removed, and the rest of the
#   # value becomes the name of a local role defined in proton, which is expected to be created beforehand by
#   # CREATE ROLE command.
#   # There can be multiple 'role_mapping' sections defined inside the same 'ldap' section. All of them will be
#   # applied.
#   # base_dn - template used to construct the base DN for the LDAP search.
#   # The resulting DN will be constructed by replacing all '{user_name}', '{bind_dn}', and '{user_dn}'
#   # substrings of the template with the actual user name, bind DN, and user DN during each LDAP search.
#   # scope - scope of the LDAP search.
#   # Accepted values are: 'base', 'one_level', 'children', 'subtree' (the default).
#   # search_filter - template used to construct the search filter for the LDAP search.
#   # The resulting filter will be constructed by replacing all '{user_name}', '{bind_dn}', '{user_dn}', and
#   # '{base_dn}' substrings of the template with the actual user name, bind DN, user DN, and base DN during
#   # each LDAP search.
#   # Note, that the special characters must be escaped properly in XML.
#   # attribute - attribute name whose values will be returned by the LDAP search. 'cn', by default.
#   # prefix - prefix, that will be expected to be in front of each string in the original list of strings returned by
#   # the LDAP search. Prefix will be removed from the original strings and resulting strings will be treated
#   # as local role names. Empty, by default.
#   # Example:
#   # ldap:
#   #     server: my_ldap_server
#   #     roles:
#   #         my_local_role1: ''
#   #         my_local_role2: ''
#   #     role_mapping:
#   #         base_dn: 'ou=groups,dc=example,dc=com'
#   #         scope: subtree
#   #         search_filter: '(&amp;(objectClass=groupOfNames)(member={bind_dn}))'
#   #         attribute: cn
#   #         prefix: proton_
#   # Example (typical Active Directory with role mapping that relies on the detected user DN):
#   # ldap:
#   #     server: my_ad_server
#   #     role_mapping:
#   #         base_dn: 'CN=Users,DC=example,DC=com'
#   #         attribute: CN
#   #         scope: subtree
#   #         search_filter: '(&amp;(objectClass=group)(member={user_dn}))'
#   #         prefix: proton_

access_control_improvements:
  users_without_row_policies_can_read_rows: false
  # Enables logic that users without permissive row policies can still read rows using a SELECT query.
  # For example, if there two users A, B and a row policy is defined only for A, then
  # if this setting is true the user B will see all rows, and if this setting is false the user B will see no rows.
  # By default this setting is false for compatibility with earlier access configurations.

# Default profile of settings.
default_profile: default

# Comma-separated list of prefixes for user-defined settings.
# custom_settings_prefixes: ''
# System profile of settings. This settings are used by internal processes (Distributed DDL worker and so on).
# system_profile: default

# Buffer profile of settings.
# This settings are used by Buffer storage to flush data to the underlying table.
# Default: used from system_profile directive.
# buffer_profile: default

# Default database.
default_database: default

# Neutron database
neutron_database: neutron

# Server time zone could be set here.

# Time zone is used when converting between String and DateTime types,
# when printing DateTime in text formats and parsing DateTime from text,
# it is used in date and time related functions, if specific time zone was not passed as an argument.

# Time zone is specified as identifier from IANA time zone database, like UTC or Africa/Abidjan.
# If not specified, system time zone at server startup is used.

# Please note, that server could display time zone alias instead of specified name.
# Example: W-SU is an alias for Europe/Moscow and Zulu is an alias for UTC.
# timezone: Europe/Moscow

# You can specify umask here (see "man umask"). Server will apply it on startup.
# Number is always parsed as octal. Default umask is 027 (other users cannot read logs, data files, etc; group can only read).
# umask: 022

# Perform mlockall after startup to lower first queries latency
# and to prevent proton executable from being paged out under high IO load.
# Enabling this option is recommended but will lead to increased startup time for up to a few seconds.
mlock_executable: true

# Reallocate memory for machine code ("text") using huge pages. Highly experimental.
remap_executable: false

# Uncomment below in order to use JDBC table engine and function.
# To install and run JDBC bridge in background:
# * [Debian/Ubuntu]
# export MVN_URL=https://repo1.maven.org/maven2/ru/yandex//proton-jdbc-bridge
# export PKG_VER=$(curl -sL $MVN_URL/maven-metadata.xml | grep '<release>' | sed -e 's|.*>\(.*\)<.*|\1|')
# wget https://github.com/proton/proton-jdbc-bridge/releases/download/v$PKG_VER/proton-jdbc-bridge_$PKG_VER-1_all.deb
# apt install --no-install-recommends -f ./proton-jdbc-bridge_$PKG_VER-1_all.deb
# proton-jdbc-bridge &
# * [CentOS/RHEL]
# export MVN_URL=https://repo1.maven.org/maven2/ru/yandex/proton/proton-jdbc-bridge
# export PKG_VER=$(curl -sL $MVN_URL/maven-metadata.xml | grep '<release>' | sed -e 's|.*>\(.*\)<.*|\1|')
# wget https://github.com/proton/proton-jdbc-bridge/releases/download/v$PKG_VER/proton-jdbc-bridge-$PKG_VER-1.noarch.rpm
# yum localinstall -y proton-jdbc-bridge-$PKG_VER-1.noarch.rpm
# proton-jdbc-bridge &
# Please refer to https://github.com/proton/proton-jdbc-bridge#usage for more information.

# jdbc_bridge:
#     host: 127.0.0.1
#     port: 9019

# The list of hosts allowed to use in URL-related storage engines and table functions.
# If this section is not present in configuration, all hosts are allowed.
# remote_url_allow_hosts:

# Host should be specified exactly as in URL. The name is checked before DNS resolution.
# Example: "yandex.ru", "yandex.ru." and "www.yandex.ru" are different hosts.
# If port is explicitly specified in URL, the host:port is checked as a whole.
# If host specified here without port, any port with this host allowed.
# "yandex.ru" -> "yandex.ru:443", "yandex.ru:80" etc. is allowed, but "yandex.ru:80" -> only "yandex.ru:80" is allowed.
# If the host is specified as IP address, it is checked as specified in URL. Example: "[2a02:6b8:a::a]".
# If there are redirects and support for redirects is enabled, every redirect (the Location field) is checked.

# Regular expression can be specified. RE2 engine is used for regexps.
# Regexps are not aligned: don't forget to add ^ and $. Also don't forget to escape dot (.) metacharacter
# (forgetting to do so is a common source of error).

# If element has 'incl' attribute, then for it's value will be used corresponding substitution from another file.
# By default, path to file with substitutions is /etc/metrika.xml. It could be changed in config in 'include_from' element.
# Values for substitutions are specified in /yandex/name_of_substitution elements in that file.

# ZooKeeper is used to store metadata about replicas, when using Replicated tables.
# Optional. If you don't use replicated tables, you could omit that.
# See https://proton.tech/docs/en/engines/table-engines/mergetree-family/replication/

# zookeeper:
#     - node:
#         host: example1
#         port: 2181
#     - node:
#         host: example2
#         port: 2181
#     - node:
#         host: example3
#         port: 2181

# Substitutions for parameters of replicated tables.
# Optional. If you don't use replicated tables, you could omit that.
# See https://proton.tech/docs/en/engines/table-engines/mergetree-family/replication/#creating-replicated-tables
# macros:
#     shard: 01
#     replica: example01-01-1

# Reloading interval for embedded dictionaries, in seconds. Default: 3600.
builtin_dictionaries_reload_interval: 3600

# Maximum session timeout, in seconds. Default: 3600.
max_session_timeout: 3600

# Default session timeout, in seconds. Default: 60.
default_session_timeout: 60

# Sending data to Graphite for monitoring. Several sections can be defined.
# interval - send every X second
# root_path - prefix for keys
# hostname_in_path - append hostname to root_path (default = true)
# metrics - send data from table system.metrics
# events - send data from table system.events
# asynchronous_metrics - send data from table system.asynchronous_metrics

# graphite:
#     host: localhost
#     port: 42000
#     timeout: 0.1
#     interval: 60
#     root_path: one_min
#     hostname_in_path: true

#     metrics: true
#     events: true
#     events_cumulative: false
#     asynchronous_metrics: true

# graphite:
#     host: localhost
#     port: 42000
#     timeout: 0.1
#     interval: 1
#     root_path: one_sec

#     metrics: true
#     events: true
#     events_cumulative: false
#     asynchronous_metrics: false

# Serve endpoint for Prometheus monitoring.
# endpoint - mertics path (relative to root, statring with "/")
# port - port to setup server. If not defined or 0 than http_port used
# metrics - send data from table system.metrics
# events - send data from table system.events
# asynchronous_metrics - send data from table system.asynchronous_metrics
# status_info - send data from different component from CH, ex: Dictionaries status

prometheus:
    endpoint: /metrics
    port: 9363
    metrics: true
    events: true
    asynchronous_metrics: true
    status_info: true
    external_stream: true

# Query log. Used only for queries with setting log_queries = 1.
query_log:
    # What table to insert data. If table is not exist, it will be created.
    # When query log structure is changed after system update,
    # then old table will be renamed and new table will be created automatically.
    database: system
    table: query_log

    # PARTITION BY expr: https://proton.yandex/docs/en/table_engines/mergetree-family/custom_partitioning_key/
    # Example:
    # event_date
    # toMonday(event_date)
    # toYYYYMM(event_date)
    # toStartOfHour(event_time)
    partition_by: to_YYYYMM(event_date)

    # Table TTL specification: https://proton.tech/docs/en/engines/table-engines/mergetree-family/mergetree/#mergetree-table-ttl
    # Example:
    # event_date + INTERVAL 1 WEEK
    # event_date + INTERVAL 7 DAY DELETE
    # event_date + INTERVAL 2 WEEK TO DISK 'bbb'

    ttl: 'event_date + INTERVAL 30 DAY DELETE'

    # Instead of partition_by, you can provide full engine expression (starting with ENGINE = ) with parameters,
    # Example: engine: 'ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024'

    # Interval of flushing data.
    flush_interval_milliseconds: 7500

# Trace log. Stores stack traces collected by query profilers.
# See query_profiler_real_time_period_ns and query_profiler_cpu_time_period_ns settings.
trace_log:
    database: system
    table: trace_log
    partition_by: to_YYYYMM(event_date)
    flush_interval_milliseconds: 7500
    ttl: 'event_date + INTERVAL 7 DAY DELETE'

# Query thread log. Has information about all threads participated in query execution.
# Used only for queries with setting log_query_threads = 1.
query_thread_log:
    database: system
    table: query_thread_log
    partition_by: to_YYYYMM(event_date)
    ttl: 'event_date + INTERVAL 30 DAY DELETE'
    flush_interval_milliseconds: 7500

# Query views log. Has information about all dependent views associated with a query.
# Used only for queries with setting log_query_views = 1.
query_views_log:
    database: system
    table: query_views_log
    partition_by: to_YYYYMM(event_date)
    flush_interval_milliseconds: 7500
    ttl: 'event_date + INTERVAL 30 DAY DELETE'

# Uncomment if use part log.
# Part log contains information about all actions with parts in MergeTree tables (creation, deletion, merges, downloads).
part_log:
    database: system
    table: part_log
    partition_by: to_YYYYMM(event_date)
    flush_interval_milliseconds: 7500
    ttl: 'event_date + INTERVAL 30 DAY DELETE'

# Uncomment to write text log into table.
# Text log contains all information from usual server log but stores it in structured and efficient way.
# The level of the messages that goes to the table can be limited (<level>), if not specified all messages will go to the table.
# text_log:
#     database: system
#     table: text_log
#     flush_interval_milliseconds: 7500
#     level: ''

# Metric log contains rows with current values of ProfileEvents, CurrentMetrics collected with "collect_interval_milliseconds" interval.
metric_log:
    database: system
    table: metric_log
    flush_interval_milliseconds: 7500
    collect_interval_milliseconds: 1000
    ttl: 'event_date + INTERVAL 30 DAY DELETE'

# Asynchronous metric log contains values of metrics from
# system.asynchronous_metrics.
asynchronous_metric_log:
    database: system
    table: asynchronous_metric_log

    # Asynchronous metrics are updated once a minute, so there is
    # no need to flush more often.
    flush_interval_milliseconds: 60000
    ttl: 'event_date + INTERVAL 7 DAY DELETE'

# OpenTelemetry log contains OpenTelemetry trace spans.
opentelemetry_span_log:

    # The default table creation code is insufficient, this <engine> spec
    # is a workaround. There is no 'event_time' for this log, but two times,
    # start and finish. It is sorted by finish time, to avoid inserting
    # data too far away in the past (probably we can sometimes insert a span
    # that is seconds earlier than the last span in the table, due to a race
    # between several spans inserted in parallel). This gives the spans a
    # global order that we can use to e.g. retry insertion into some external
    # system.
    engine: |-
        engine MergeTree
             partition by to_YYYYMM(finish_date)
             order by (finish_date, finish_time_us, trace_id)
             ttl finish_date + INTERVAL 7 DAY DELETE
    database: system
    table: opentelemetry_span_log
    flush_interval_milliseconds: 7500

# Crash log. Stores stack traces for fatal errors.
# This table is normally empty.
crash_log:
    database: system
    table: crash_log
    partition_by: ''
    flush_interval_milliseconds: 1000

# Processor profile log.
processors_profile_log:
    database: system
    table: processors_profile_log
    partition_by: to_YYYYMM(event_date)
    flush_interval_milliseconds: 7500

# Parameters for embedded dictionaries, used in Yandex.Metrica.
# See https://proton.yandex/docs/en/dicts/internal_dicts/

# Path to file with region hierarchy.
# path_to_regions_hierarchy_file: /opt/geo/regions_hierarchy.txt

# Path to directory with files containing names of regions
# path_to_regions_names_files: /opt/geo/


# top_level_domains_path: /var/lib/proton/top_level_domains/
# Custom TLD lists.
# Format: name: /path/to/file

# Changes will not be applied w/o server restart.
# Path to the list is under top_level_domains_path (see above).
top_level_domains_lists: ''

# public_suffix_list: /path/to/public_suffix_list.dat

# Configuration of external dictionaries. See:
# https://proton.tech/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts
dictionaries_config: '*_dictionary.xml'

# Uncomment if you want data to be compressed 30-100% better.
# Don't do that if you just started using proton.

# compression:
#     # Set of variants. Checked in order. Last matching case wins. If nothing matches, lz4 will be used.
#     case:
#         Conditions. All must be satisfied. Some conditions may be omitted.
#         # min_part_size: 10000000000    # Min part size in bytes.
#         # min_part_size_ratio: 0.01     # Min size of part relative to whole table size.
#         # What compression method to use.
#         method: zstd

# Allow to execute distributed DDL queries (CREATE, DROP, ALTER, RENAME) on cluster.
# Works only if ZooKeeper is enabled. Comment it if such functionality isn't required.
distributed_ddl:
    # Path in ZooKeeper to queue with DDL queries
    path: /proton/task_queue/ddl

    # Settings from this profile will be used to execute DDL queries
    # profile: default

    # Controls how much ON CLUSTER queries can be run simultaneously.
    # pool_size: 1

    # Cleanup settings (active tasks will not be removed)

    # Controls task TTL (default 1 week)
    # task_max_lifetime: 604800

    # Controls how often cleanup should be performed (in seconds)
    # cleanup_delay_period: 60

    # Controls how many tasks could be in the queue
    # max_tasks_in_queue: 1000

# Settings to fine tune MergeTree tables. See documentation in source code, in MergeTreeSettings.h
# merge_tree:
#     max_suspicious_broken_parts: 5

# Protection from accidental DROP.
# If size of a MergeTree table is greater than max_stream_size_to_drop (in bytes) than table could not be dropped with any DROP query.
# If you want do delete one table and don't want to change proton-server config, you could create special file <proton-path>/flags/force_drop_table and make DROP once.
# By default max_stream_size_to_drop is 50GB; max_stream_size_to_drop=0 allows to DROP any tables.
# The same for max_partition_size_to_drop.
# Uncomment to disable protection.

# max_stream_size_to_drop: 0
# max_partition_size_to_drop: 0

# Example of parameters for GraphiteMergeTree table engine
graphite_rollup_example:
    pattern:
        regexp: click_cost
        function: any
        retention:
            - age: 0
              precision: 3600
            - age: 86400
              precision: 60
    default:
        function: max
        retention:
            - age: 0
              precision: 60
            - age: 3600
              precision: 300
            - age: 86400
              precision: 3600

# Directory in <proton-path> containing schema files for various input formats.
# The directory will be created if it doesn't exist.
format_schema_path: /var/lib/proton/format_schemas/

# Directory to look for protobuf files for the well-known types:
# https://protobuf.dev/reference/protobuf/google.protobuf/
google_protos_path: /usr/share/proton/protos/

# Default query masking rules, matching lines would be replaced with something else in the logs
# (both text logs and system.query_log).
# name - name for the rule (optional)
# regexp - RE2 compatible regular expression (mandatory)
# replace - substitution string for sensitive data (optional, by default - six asterisks)
query_masking_rules:
    rule:
        name: hide encrypt/decrypt arguments
        regexp: '((?:aes_)?(?:encrypt|decrypt)(?:_mysql)?)\s*\(\s*(?:''(?:\\''|.)+''|.*?)\s*\)'
        # or more secure, but also more invasive:
        # (aes_\w+)\s*\(.*\)
        replace: \1(???)

# Uncomment to use custom http handlers.
# rules are checked from top to bottom, first match runs the handler
# url - to match request URL, you can use 'regex:' prefix to use regex match(optional)
# methods - to match request method, you can use commas to separate multiple method matches(optional)
# headers - to match request headers, match each child element(child element name is header name), you can use 'regex:' prefix to use regex match(optional)
# handler is request handler
# type - supported types: static, dynamic_query_handler, predefined_query_handler
# query - use with predefined_query_handler type, executes query when the handler is called
# query_param_name - use with dynamic_query_handler type, extracts and executes the value corresponding to the <query_param_name> value in HTTP request params
# status - use with static type, response status code
# content_type - use with static type, response content-type
# response_content - use with static type, Response content sent to client, when using the prefix 'file://' or 'config://', find the content from the file or configuration send to client.

# NOTE, if enable these dynamic handlers, please follow /timeplusd/{uri_path} convention, check createHandlersFactoryFromConfig() in HTTPHandlerFactory.cpp
# http_handlers:
#     - rule:
#         url: /
#         methods: POST,GET
#         headers:
#           pragma: no-cache
#         handler:
#           type: dynamic_query_handler
#           query_param_name: query
#     - rule:
#         url: /predefined_query
#         methods: POST,GET
#         handler:
#           type: predefined_query_handler
#           query: 'SELECT * FROM system.settings'
#     - rule:
#         handler:
#           type: static
#           status: 200
#           content_type: 'text/plain; charset=UTF-8'
#           response_content: config://http_server_default_response

send_crash_reports:
    # Changing <enabled> to true allows sending crash reports to
    # the proton core developers team via Sentry https://sentry.io
    # Doing so at least in pre-production environments is highly appreciated
    enabled: false
    # Change <anonymize> to true if you don't feel comfortable attaching the server hostname to the crash report
    anonymize: false
    # Default endpoint should be changed to different Sentry DSN only if you have
    # some in-house engineers or hired consultants who're going to debug proton issues for you
    endpoint:

# Uncomment to disable proton internal DNS caching.
# disable_internal_dns_cache: 1

#rocksdb:
#    options:
#        max_background_jobs: 8
#    column_family_options:
#        num_levels: 2
#    tables:
#        - table
#            name: TABLE
#            options:
#                max_background_jobs: 8
#            column_family_options:
#                num_levels: 2

# User defined grok patterns file
grok_patterns_file: /etc/proton-server/grok-patterns

# Maximum number of concurrent queries.
max_concurrent_queries: 100
# Maximum number of concurrent insert queries.
max_concurrent_insert_queries: 100
# Maximum number of concurrent select queries.
max_concurrent_select_queries: 100

# On Linux systems this can control the behavior of OOM killer.
# oom_score: 1000

settings:
    global:
        query_mode: streaming               # Default query mode. table or streaming
        query_resource_group: "dedicated"   # Default resource group. dedicated or shared
        enable_light_ingest: true           # Light ingest is inserting partial columns of a table
        _tp_enable_log_stream_expr: true    # Log system analytic
        synchronous_ddl: true               # If setting is enabled, the DDL for streaming storage will be executed synchronously otherwise it will be asynchronous. By default is enabled.
        asterisk_include_reserved_columns: true # Show reserved columns on SELECT query.
        async_ingest_block_timeout_ms: 12000    # Max duration for a block to commit before it is considered expired during async ingestion
        aysnc_ingest_max_outstanding_blocks: 10000 # Max outstanding blocks to be committed per stream during async ingestion
        part_commit_pool_size: 8 # Total shared thread pool size for building and committing parts for Stream
        max_idempotent_ids: 1000 # Maximum idempotent IDs to keep in memory and on disk for idempotent data ingestion
        _tp_internal_system_open_sesame: true # Control the access to system.* streams
        javascript_max_memory_bytes: 104857600 #Maximum heap size of javascript UDA/UDF in bytes, default is 100*1024*1024 bytes
        recovery_policy: "strict" # Recovery policy for materialized view. strict or best_effort
        recovery_retry_for_sn_failure: 3 # retry times for sn failure. this value only apply if the `recovery_policy` is `best_effort`
        max_block_size: 65409 # 65536 - (PADDING_FOR_SIMD - 1)
        max_insert_block_size: 65409 # 65536 - (PADDING_FOR_SIMD - 1)

    stream:
        default_shards: 1
        default_replicas: 1
        default_sharding_expr: ""   # Empty string means `rand()` or `weak_hash32(<primary_keys>) if primary key set`
        distributed_ingest_mode: "async"    # Data ingestion mode for Stream
        logstore: ""
        logstore_replication_factor: 1
        storage_type: "hybrid"
        logstore_codec: none
        logstore_retention_bytes: -1 # when this threshold reaches, streaming storage delete old data. -1 means no limit
        logstore_retention_ms: 86400000 # when this threshold reaches, streaming storage delete old data. -1 means no limit

cluster_settings:
    logstore:
        # Multiple clusters of streaming storage are supported
        kafka:
            enabled: false
            default: true # true means, the streaming storage will be used to provision system internal topics
            cluster_name: default-sys-kafka-cluster-name
            cluster_id: default-sys-kafka-cluster-id
            security_protocol: PLAINTEXT # support PLAINTEXT, SASL_PLAINTEXT, SASL_SSL
            # if security_protocol is SASL_PLAINTEXT, username and password should be specified.
            # username:
            # password:
            # if security_protocol is SASL_SSL, ssl_ca_cert_file should be specified.
            # ssl_ca_cert_file:
            # Setup the Kafka brokers here
            brokers: localhost:9092
            # Group ID used internally. Once it is setup, can't be changed.
            # Otherwise, there may have data duplication as proton uses it
            # for checkpointing
            group_id: proton
            replication_factor: 1
            # Refer to https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
            debug: # generic, broker, topic etc
            # Note Redpanda doesn't support idempotence for compact topic
            enable_idempotence: false
            topic_metadata_refresh_interval_ms: 300000
            message_max_bytes: 1000000
            statistic_internal_ms: 30000
            queue_buffering_max_messages: 100000
            queue_buffering_max_kbytes: 1048576
            queue_buffering_max_ms: 50
            message_send_max_retries: 2
            retry_backoff_ms: 100
            compression_codec: snappy
            message_timeout_ms: 40000
            message_delivery_async_poll_ms: 100
            message_delivery_sync_poll_ms: 10
            check_crcs: false
            auto_commit_interval_ms: 5000
            fetch_message_max_bytes: 1048576
            fetch_wait_max_ms: 500
            queued_min_messages: 1000000
            queued_max_messages_kbytes: 65536
            session_timeout_ms: 10000
            max_poll_interval_ms: 30000
            dedicated_subscription_pool_size: 2
            shared_subscription_pool_max_size: 10
            shared_subscription_flush_threshold_count: 10000
            shared_subscription_flush_threshold_size: 10485760
            shared_subscription_flush_threshold_ms: 1000
            streaming_processing_pool_size: 100

        nativelog:
            enabled: true
            default: false
            check_crcs: false
            max_schedule_threads: 8
            max_adhoc_schedule_threads: 8
            metastore_data_dir: /var/lib/proton/nativelog/meta/
            log_data_dirs:
                # Proton doesn't support normal yaml list yet
                dir1: /var/lib/proton/nativelog/log/
            fetch_max_wait_ms: 500 # max wait time if no records are available
            fetch_max_bytes: 65536 # max bytes to fetch per request
            log_max_record_size: 10485760
            log_segment_size: 4294967296
            log_retention_size: -1
            log_retention_ms: 604800000
            log_index_internal_bytes: 4096
            log_index_internal_records: 1000
            log_flush_interval_ms: 600000
            log_flush_interval_records: 10000
            log_compression_codec: none
            cache_max_cached_entries: 10000
            cache_max_cached_bytes: 419430400
            cache_max_cached_entries_per_shard: 100
            cache_max_cached_bytes_per_shard: 4194304

    # Node identity shall be unique in proton cluster
    # When leave it empty, the system will the nodes FQDN and tcp port to
    # generate one
    node_identity: localhost:8123

# Metastore server config
metastore_server:
    enable_ipv6: true
    http_port: 9444
    server_id: 1

    namespace_whitelist:
        namespace:
            - "proton"
            - "neutron"
            - "udf"

    coordination_settings:
        # <!-- Default client session timeout -->
        session_timeout_ms: 30000

        # <!-- Default client operation timeout -->
        operation_timeout_ms: 10000

        # <!-- Heartbeat interval between quorum nodes, Default:500 -->
        # <!-- <heart_beat_interval_ms>500</heart_beat_interval_ms> -->

        # <!-- Lower bound of election timer (avoid too often leader elections), Default: 1000 -->
        # <!-- <election_timeout_lower_bound_ms>1000</election_timeout_lower_bound_ms> -->

        # <!-- Upper bound of election timer (avoid too often leader elections), Default: 2000 -->
        # <!-- <election_timeout_upper_bound_ms>2000</election_timeout_upper_bound_ms> -->

        # <!-- Log internal RAFT logs into main server log level. Default: information
        #         Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'
        # -->
        raft_logs_level: information

        # <!-- Call fsync on each change in RAFT changelog, Default: true -->
        force_sync: false

        # <!-- How many time we will until RAFT to start, Default: 30000 -->
        startup_timeout: 60000

        # <!-- How many time we will until RAFT shutdown, Default: 5000 -->
        shutdown_timeout: 5000

        # <!-- When node became stale and should receive snapshots from leader, Default: 10000 -->
        stale_log_gap: 10000

        # <!-- When node became fresh, Default: 200 -->
        fresh_log_gap: 200

        # <!-- How many log items to store (don't remove during compaction), Default: 100000 -->
        # <!-- we want all logs for complex problems investigation -->
        reserved_log_items: 100000

        # <!-- How many log items we have to collect to write new snapshot, Default: 100000 -->
        snapshot_distance: 100000

        # <!-- Max size of batch in requests count before it will be sent to RAFT, Default: 100 -->
        max_requests_batch_size: 100

        # <!-- Allow to forward write requests from followers to leader, Default: true -->
        auto_forwarding: true

        # <!-- Execute read requests as writes through whole RAFT consesus with similar speed, Default: false -->
        quorum_reads: false

    raft_configuration:
        server:
            id: 1
            hostname: localhost
            port: 9445
            http_port: 3218

Imported from GitHub. Original GitHub issue: #715 Original author: @mfreeman451 Original URL: https://github.com/carverauto/serviceradar/issues/715 Original created: 2025-05-03T05:12:25Z --- ``` # NOTE: User and query level settings are set up in "users.yaml" file. # If you have accidentally specified user-level settings here, server won't start. # You can either move the settings to the right place inside "users.xml" file # or add skip_check_for_incorrect_settings: 1 here. logger: # Possible levels [1]: # - none (turns off logging) # - fatal # - critical # - error # - warning # - notice # - information # - debug # - trace # [1]: https://github.com/pocoproject/poco/blob/poco-1.9.4-release/Foundation/include/Poco/Logger.h#L105-L114 level: information log: /var/log/proton-server/proton-server.log errorlog: /var/log/proton-server/proton-server.err.log # Rotation policy # See https://github.com/pocoproject/poco/blob/poco-1.9.4-release/Foundation/include/Poco/FileChannel.h#L54-L85 size: 1000M count: 10 # console: 1 # Default behavior is autodetection (log to console if not daemon mode and is tty) # Per level overrides (legacy): # For example to suppress logging of the ConfigReloader you can use: # NOTE: levels.logger is reserved, see below. # levels: # ConfigReloader: none # Per level overrides: # For example to suppress logging of the RBAC for default user you can use: # (But please note that the logger name maybe changed from version to version, even after minor upgrade) # levels: # - logger: # name: 'ContextAccess (default)' # level: none # - logger: # name: 'DatabaseOrdinary (test)' # level: none # It is the name that will be shown in the proton-client. # By default, anything with "production" will be highlighted in red in query prompt. # display_name: production # Port for HTTP API. See also 'https_port' for secure connections. # This interface is also used by ODBC and JDBC drivers (DataGrip, Dbeaver, ...) # and by most of web interfaces (embedded UI, Grafana, Redash, ...). #http_port: 3218 # Port for Snapshot HTTP server, which run all stream query in 'table' mode. snapshot_server_http_port: 8123 # Port for interaction by native protocol with: # - proton-client and other native proton tools (proton-benchmark, proton-copier); # - proton-server with other proton-servers for distributed query processing; # - proton drivers and applications supporting native protocol # (this protocol is also informally called as "the TCP protocol"); # See also 'tcp_port_secure' for secure connections. # tcp_port: 8463 # Port for Snapshot TCP server, which run all stream query in 'table' mode. snapshot_server_tcp_port: 7587 # Compatibility with PostgreSQL protocol. # proton will pretend to be PostgreSQL for applications connecting to this port. postgresql_port: 5432 # Compatibility with MySQL protocol. # proton will pretend to be MySQL for applications connecting to this port. mysql_port: 9004 # HTTP API with TLS (HTTPS). # You have to configure certificate to enable this interface. # See the openSSL section below. https_port: 8443 # Native interface with TLS. # You have to configure certificate to enable this interface. # See the openSSL section below. tcp_port_secure: 9440 # Native interface wrapped with PROXYv1 protocol # PROXYv1 header sent for every connection. # proton will extract information about proxy-forwarded client address from the header. # tcp_with_proxy_port: 9011 # Port for communication between replicas. Used for data exchange. # It provides low-level data access between servers. # This port should not be accessible from untrusted networks. # See also 'interserver_http_credentials'. # Data transferred over connections to this port should not go through untrusted networks. # See also 'interserver_https_port'. interserver_http_port: 9009 # Port for communication between replicas with TLS. # You have to configure certificate to enable this interface. # See the openSSL section below. # See also 'interserver_http_credentials'. # interserver_https_port: 9010 # Hostname that is used by other replicas to request this server. # If not specified, than it is determined analogous to 'hostname -f' command. # This setting could be used to switch replication to another network interface # (the server may be connected to multiple networks via multiple addresses) # interserver_http_host: example.yandex.ru # You can specify credentials for authenthication between replicas. # This is required when interserver_https_port is accessible from untrusted networks, # and also recommended to avoid SSRF attacks from possibly compromised services in your network. # interserver_http_credentials: # user: interserver # password: '' # Listen specified address. # Use :: (wildcard IPv6 address), if you want to accept connections both with IPv4 and IPv6 from everywhere. # Notes: # If you open connections from wildcard address, make sure that at least one of the following measures applied: # - server is protected by firewall and not accessible from untrusted networks; # - all users are restricted to subset of network addresses (see users.xml); # - all users have strong passwords, only secure (TLS) interfaces are accessible, or connections are only made via TLS interfaces. # - users without password have readonly access. # See also: https://www.shodan.io/search?query=proton # listen_host: '::' # Same for hosts without support for IPv6: # listen_host: 0.0.0.0 # Default values - try listen localhost on IPv4 and IPv6. # listen_host: '::1' # listen_host: 127.0.0.1 # Don't exit if IPv6 or IPv4 networks are unavailable while trying to listen. # listen_try: 0 # Allow multiple servers to listen on the same address:port. This is not recommended. # listen_reuse_port: 0 # listen_backlog: 64 max_connections: 4096 # For 'Connection: keep-alive' in HTTP 1.1 keep_alive_timeout: 3 # Enable telemetry. This is used to collect the version and runtime environment information to Timeplus, Inc. telemetry_enabled: "@from_env": TELEMETRY_ENABLED "@replace": true "#text": true telemetry_interval_ms: 300000 # 5 minutes # gRPC protocol (see src/Server/grpc_protos/proton_grpc.proto for the API) # grpc_port: 9100 grpc: enable_ssl: false # The following two files are used only if enable_ssl=1 ssl_cert_file: /path/to/ssl_cert_file ssl_key_file: /path/to/ssl_key_file # Whether server will request client for a certificate ssl_require_client_auth: false # The following file is used only if ssl_require_client_auth=1 ssl_ca_cert_file: /path/to/ssl_ca_cert_file # Default compression algorithm (applied if client doesn't specify another algorithm). # Supported algorithms: none, deflate, gzip, stream_gzip compression: deflate # Default compression level (applied if client doesn't specify another level). # Supported levels: none, low, medium, high compression_level: medium # Send/receive message size limits in bytes. -1 means unlimited max_send_message_size: -1 max_receive_message_size: -1 # Enable if you want very detailed logs verbose_logs: false # Used with https_port and tcp_port_secure. Full ssl options list: https://github.com/proton-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h#L71 openSSL: server: # Used for https server AND secure tcp port # openssl req -subj "/CN=localhost" -new -newkey rsa:2048 -days 365 -nodes -x509 -keyout /etc/proton-server/server.key -out /etc/proton-server/server.crt certificateFile: /etc/proton-server/root.pem privateKeyFile: /etc/proton-server/core-key.pem # dhparams are optional. You can delete the dhParamsFile: element. # To generate dhparams, use the following command: # openssl dhparam -out /etc/proton-server/dhparam.pem 4096 # Only file format with BEGIN DH PARAMETERS is supported. dhParamsFile: /etc/proton-server/dhparam.pem verificationMode: none loadDefaultCAFile: true cacheSessions: true disableProtocols: 'sslv2,sslv3' preferServerCiphers: true client: # Used for connecting to https dictionary source and secured Zookeeper communication loadDefaultCAFile: true cacheSessions: true disableProtocols: 'sslv2,sslv3' preferServerCiphers: true # Use for self-signed: verificationMode: none invalidCertificateHandler: # Use for self-signed: name: AcceptCertificateHandler name: RejectCertificateHandler # Default root page on http[s] server. For example load UI from https://tabix.io/ when opening http://localhost:8123 # http_server_default_response: |- # <html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html> # Maximum memory usage (resident set size) for server process. # Zero value or unset means default. Default is "max_server_memory_usage_to_ram_ratio" of available physical RAM. # If the value is larger than "max_server_memory_usage_to_ram_ratio" of available physical RAM, it will be cut down. # The constraint is checked on query execution time. # If a query tries to allocate memory and the current memory usage plus allocation is greater # than specified threshold, exception will be thrown. # It is not practical to set this constraint to small values like just a few gigabytes, # because memory allocator will keep this amount of memory in caches and the server will deny service of queries. max_server_memory_usage: 0 # Maximum number of threads in the Global thread pool. # This will default to a maximum of 10000 threads if not specified. # This setting will be useful in scenarios where there are a large number # of distributed queries that are running concurrently but are idling most # of the time, in which case a higher number of threads might be required. max_thread_pool_size: 10000 # On memory constrained environments you may have to set this to value larger than 1. max_server_memory_usage_to_ram_ratio: 0.9 # Maximum local disk utilization. Once it is hit, all ingest will be errored out max_local_disk_usage_ratio: 0.9 # Simple server-wide memory profiler. Collect a stack trace at every peak allocation step (in bytes). # Data will be stored in system.trace_log table with query_id = empty string. # Zero means disabled. total_memory_profiler_step: 4194304 # Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. # The probability is for every alloc/free regardless to the size of the allocation. # Note that sampling happens only when the amount of untracked memory exceeds the untracked memory limit, # which is 4 MiB by default but can be lowered if 'total_memory_profiler_step' is lowered. # You may want to set 'total_memory_profiler_step' to 1 for extra fine grained sampling. total_memory_tracker_sample_probability: 0 # Limit the size of the cache. e.g., uncompressed_cache_size or mark_cache_size cache_size_to_ram_max_ratio: 0.5 # Set limit on number of open files (default: maximum). This setting makes sense on Mac OS X because getrlimit() fails to retrieve # correct maximum value. # max_open_files: 262144 # Size of cache of uncompressed blocks of data, used in tables of MergeTree family. # In bytes. Cache is single for server. Memory is allocated only on demand. # Cache is used when 'use_uncompressed_cache' user setting turned on (off by default). # Uncompressed cache is advantageous only for very short queries and in rare cases. # Note: uncompressed cache can be pointless for lz4, because memory bandwidth # is slower than multi-core decompression on some server configurations. # Enabling it can sometimes paradoxically make queries slower. uncompressed_cache_size: 8589934592 # Approximate size of mark cache, used in tables of MergeTree family. # In bytes. Cache is single for server. Memory is allocated only on demand. # You should not lower this value. mark_cache_size: 5368709120 # If you enable the `min_bytes_to_use_mmap_io` setting, # the data in MergeTree tables can be read with mmap to avoid copying from kernel to userspace. # It makes sense only for large files and helps only if data reside in page cache. # To avoid frequent open/mmap/munmap/close calls (which are very expensive due to consequent page faults) # and to reuse mappings from several threads and queries, # the cache of mapped files is maintained. Its size is the number of mapped regions (usually equal to the number of mapped files). # The amount of data in mapped files can be monitored # in system.metrics, system.metric_log by the MMappedFiles, MMappedFileBytes metrics # and in system.asynchronous_metrics, system.asynchronous_metrics_log by the MMapCacheCells metric, # and also in system.events, system.processes, system.query_log, system.query_thread_log, system.query_views_log by the # CreatedReadBufferMMap, CreatedReadBufferMMapFailed, MMappedFileCacheHits, MMappedFileCacheMisses events. # Note that the amount of data in mapped files does not consume memory directly and is not accounted # in query or server memory usage - because this memory can be discarded similar to OS page cache. # The cache is dropped (the files are closed) automatically on removal of old parts in MergeTree, # also it can be dropped manually by the SYSTEM DROP MMAP CACHE query. mmap_cache_size: 1000 # Cache size for compiled expressions. compiled_expression_cache_size: 134217728 # Path to data directory, with trailing slash. path: /var/lib/proton/ # Path to temporary data for processing hard queries. tmp_path: /var/lib/proton/tmp/ checkpoint: # Checkpoint storage type storage_type : local_file_system # Path to state checkpoint path: /var/lib/proton/checkpoint/ # State checkpoint interval in seconds interval: 900 # Checkpoint expired if not accessed after `ttl` seconds last_access_ttl: 604800 # 7 days # Periodical last access check interval last_access_check_interval: 7200 # When unsubscribe a query, wait grace interval to delete its checkpoints delete_grace_interval: 60 # When server teardown, last checkpoint flush timeout in seconds teardown_flush_timeout: 60 # Disable AuthType plaintext_password and no_password for ACL. # allow_plaintext_password: 0 # allow_no_password: 0 # Policy from the <storage_configuration> for the temporary files. # If not set <tmp_path> is used, otherwise <tmp_path> is ignored. # Notes: # - move_factor is ignored # - keep_free_space_bytes is ignored # - max_data_part_size_bytes is ignored # - you must have exactly one volume in that policy # tmp_policy: tmp # Directory with user provided files that are accessible by 'file' table function. user_files_path: /var/lib/proton/user_files/ # LDAP server definitions. ldap_servers: '' # List LDAP servers with their connection parameters here to later 1) use them as authenticators for dedicated local users, # who have 'ldap' authentication mechanism specified instead of 'password', or to 2) use them as remote user directories. # Parameters: # host - LDAP server hostname or IP, this parameter is mandatory and cannot be empty. # port - LDAP server port, default is 636 if enable_tls is set to true, 389 otherwise. # bind_dn - template used to construct the DN to bind to. # The resulting DN will be constructed by replacing all '{user_name}' substrings of the template with the actual # user name during each authentication attempt. # user_dn_detection - section with LDAP search parameters for detecting the actual user DN of the bound user. # This is mainly used in search filters for further role mapping when the server is Active Directory. The # resulting user DN will be used when replacing '{user_dn}' substrings wherever they are allowed. By default, # user DN is set equal to bind DN, but once search is performed, it will be updated with to the actual detected # user DN value. # base_dn - template used to construct the base DN for the LDAP search. # The resulting DN will be constructed by replacing all '{user_name}' and '{bind_dn}' substrings # of the template with the actual user name and bind DN during the LDAP search. # scope - scope of the LDAP search. # Accepted values are: 'base', 'one_level', 'children', 'subtree' (the default). # search_filter - template used to construct the search filter for the LDAP search. # The resulting filter will be constructed by replacing all '{user_name}', '{bind_dn}', and '{base_dn}' # substrings of the template with the actual user name, bind DN, and base DN during the LDAP search. # Note, that the special characters must be escaped properly in XML. # verification_cooldown - a period of time, in seconds, after a successful bind attempt, during which a user will be assumed # to be successfully authenticated for all consecutive requests without contacting the LDAP server. # Specify 0 (the default) to disable caching and force contacting the LDAP server for each authentication request. # enable_tls - flag to trigger use of secure connection to the LDAP server. # Specify 'no' for plain text (ldap://) protocol (not recommended). # Specify 'yes' for LDAP over SSL/TLS (ldaps://) protocol (recommended, the default). # Specify 'starttls' for legacy StartTLS protocol (plain text (ldap://) protocol, upgraded to TLS). # tls_minimum_protocol_version - the minimum protocol version of SSL/TLS. # Accepted values are: 'ssl2', 'ssl3', 'tls1.0', 'tls1.1', 'tls1.2' (the default). # tls_require_cert - SSL/TLS peer certificate verification behavior. # Accepted values are: 'never', 'allow', 'try', 'demand' (the default). # tls_cert_file - path to certificate file. # tls_key_file - path to certificate key file. # tls_ca_cert_file - path to CA certificate file. # tls_ca_cert_dir - path to the directory containing CA certificates. # tls_cipher_suite - allowed cipher suite (in OpenSSL notation). # Example: # my_ldap_server: # host: localhost # port: 636 # bind_dn: 'uid={user_name},ou=users,dc=example,dc=com' # verification_cooldown: 300 # enable_tls: yes # tls_minimum_protocol_version: tls1.2 # tls_require_cert: demand # tls_cert_file: /path/to/tls_cert_file # tls_key_file: /path/to/tls_key_file # tls_ca_cert_file: /path/to/tls_ca_cert_file # tls_ca_cert_dir: /path/to/tls_ca_cert_dir # tls_cipher_suite: ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:AES256-GCM-SHA384 # Example (typical Active Directory with configured user DN detection for further role mapping): # my_ad_server: # host: localhost # port: 389 # bind_dn: 'EXAMPLE\{user_name}' # user_dn_detection: # base_dn: CN=Users,DC=example,DC=com # search_filter: '(&(objectClass=user)(sAMAccountName={user_name}))' # enable_tls: no # To enable Kerberos authentication support for HTTP requests (GSS-SPNEGO), for those users who are explicitly configured # to authenticate via Kerberos, define a single 'kerberos' section here. # Parameters: # principal - canonical service principal name, that will be acquired and used when accepting security contexts. # This parameter is optional, if omitted, the default principal will be used. # This parameter cannot be specified together with 'realm' parameter. # realm - a realm, that will be used to restrict authentication to only those requests whose initiator's realm matches it. # This parameter is optional, if omitted, no additional filtering by realm will be applied. # This parameter cannot be specified together with 'principal' parameter. # Example: # kerberos: '' # Example: # kerberos: # principal: HTTP/proton.example.com@EXAMPLE.COM # Example: # kerberos: # realm: EXAMPLE.COM # Sources to read users, roles, access rights, profiles of settings, quotas. user_directories: users_xml: # Path to configuration file with predefined users. path: users.yaml local_directory: # Path to folder where users created by SQL commands are stored. path: /var/lib/proton/access/ # # To add an LDAP server as a remote user directory of users that are not defined locally, define a single 'ldap' section # # with the following parameters: # # server - one of LDAP server names defined in 'ldap_servers' config section above. # # This parameter is mandatory and cannot be empty. # # roles - section with a list of locally defined roles that will be assigned to each user retrieved from the LDAP server. # # If no roles are specified here or assigned during role mapping (below), user will not be able to perform any # # actions after authentication. # # role_mapping - section with LDAP search parameters and mapping rules. # # When a user authenticates, while still bound to LDAP, an LDAP search is performed using search_filter and the # # name of the logged in user. For each entry found during that search, the value of the specified attribute is # # extracted. For each attribute value that has the specified prefix, the prefix is removed, and the rest of the # # value becomes the name of a local role defined in proton, which is expected to be created beforehand by # # CREATE ROLE command. # # There can be multiple 'role_mapping' sections defined inside the same 'ldap' section. All of them will be # # applied. # # base_dn - template used to construct the base DN for the LDAP search. # # The resulting DN will be constructed by replacing all '{user_name}', '{bind_dn}', and '{user_dn}' # # substrings of the template with the actual user name, bind DN, and user DN during each LDAP search. # # scope - scope of the LDAP search. # # Accepted values are: 'base', 'one_level', 'children', 'subtree' (the default). # # search_filter - template used to construct the search filter for the LDAP search. # # The resulting filter will be constructed by replacing all '{user_name}', '{bind_dn}', '{user_dn}', and # # '{base_dn}' substrings of the template with the actual user name, bind DN, user DN, and base DN during # # each LDAP search. # # Note, that the special characters must be escaped properly in XML. # # attribute - attribute name whose values will be returned by the LDAP search. 'cn', by default. # # prefix - prefix, that will be expected to be in front of each string in the original list of strings returned by # # the LDAP search. Prefix will be removed from the original strings and resulting strings will be treated # # as local role names. Empty, by default. # # Example: # # ldap: # # server: my_ldap_server # # roles: # # my_local_role1: '' # # my_local_role2: '' # # role_mapping: # # base_dn: 'ou=groups,dc=example,dc=com' # # scope: subtree # # search_filter: '(&(objectClass=groupOfNames)(member={bind_dn}))' # # attribute: cn # # prefix: proton_ # # Example (typical Active Directory with role mapping that relies on the detected user DN): # # ldap: # # server: my_ad_server # # role_mapping: # # base_dn: 'CN=Users,DC=example,DC=com' # # attribute: CN # # scope: subtree # # search_filter: '(&(objectClass=group)(member={user_dn}))' # # prefix: proton_ access_control_improvements: users_without_row_policies_can_read_rows: false # Enables logic that users without permissive row policies can still read rows using a SELECT query. # For example, if there two users A, B and a row policy is defined only for A, then # if this setting is true the user B will see all rows, and if this setting is false the user B will see no rows. # By default this setting is false for compatibility with earlier access configurations. # Default profile of settings. default_profile: default # Comma-separated list of prefixes for user-defined settings. # custom_settings_prefixes: '' # System profile of settings. This settings are used by internal processes (Distributed DDL worker and so on). # system_profile: default # Buffer profile of settings. # This settings are used by Buffer storage to flush data to the underlying table. # Default: used from system_profile directive. # buffer_profile: default # Default database. default_database: default # Neutron database neutron_database: neutron # Server time zone could be set here. # Time zone is used when converting between String and DateTime types, # when printing DateTime in text formats and parsing DateTime from text, # it is used in date and time related functions, if specific time zone was not passed as an argument. # Time zone is specified as identifier from IANA time zone database, like UTC or Africa/Abidjan. # If not specified, system time zone at server startup is used. # Please note, that server could display time zone alias instead of specified name. # Example: W-SU is an alias for Europe/Moscow and Zulu is an alias for UTC. # timezone: Europe/Moscow # You can specify umask here (see "man umask"). Server will apply it on startup. # Number is always parsed as octal. Default umask is 027 (other users cannot read logs, data files, etc; group can only read). # umask: 022 # Perform mlockall after startup to lower first queries latency # and to prevent proton executable from being paged out under high IO load. # Enabling this option is recommended but will lead to increased startup time for up to a few seconds. mlock_executable: true # Reallocate memory for machine code ("text") using huge pages. Highly experimental. remap_executable: false # Uncomment below in order to use JDBC table engine and function. # To install and run JDBC bridge in background: # * [Debian/Ubuntu] # export MVN_URL=https://repo1.maven.org/maven2/ru/yandex//proton-jdbc-bridge # export PKG_VER=$(curl -sL $MVN_URL/maven-metadata.xml | grep '<release>' | sed -e 's|.*>$.*$<.*|\1|') # wget https://github.com/proton/proton-jdbc-bridge/releases/download/v$PKG_VER/proton-jdbc-bridge_$PKG_VER-1_all.deb # apt install --no-install-recommends -f ./proton-jdbc-bridge_$PKG_VER-1_all.deb # proton-jdbc-bridge & # * [CentOS/RHEL] # export MVN_URL=https://repo1.maven.org/maven2/ru/yandex/proton/proton-jdbc-bridge # export PKG_VER=$(curl -sL $MVN_URL/maven-metadata.xml | grep '<release>' | sed -e 's|.*>$.*$<.*|\1|') # wget https://github.com/proton/proton-jdbc-bridge/releases/download/v$PKG_VER/proton-jdbc-bridge-$PKG_VER-1.noarch.rpm # yum localinstall -y proton-jdbc-bridge-$PKG_VER-1.noarch.rpm # proton-jdbc-bridge & # Please refer to https://github.com/proton/proton-jdbc-bridge#usage for more information. # jdbc_bridge: # host: 127.0.0.1 # port: 9019 # The list of hosts allowed to use in URL-related storage engines and table functions. # If this section is not present in configuration, all hosts are allowed. # remote_url_allow_hosts: # Host should be specified exactly as in URL. The name is checked before DNS resolution. # Example: "yandex.ru", "yandex.ru." and "www.yandex.ru" are different hosts. # If port is explicitly specified in URL, the host:port is checked as a whole. # If host specified here without port, any port with this host allowed. # "yandex.ru" -> "yandex.ru:443", "yandex.ru:80" etc. is allowed, but "yandex.ru:80" -> only "yandex.ru:80" is allowed. # If the host is specified as IP address, it is checked as specified in URL. Example: "[2a02:6b8:a::a]". # If there are redirects and support for redirects is enabled, every redirect (the Location field) is checked. # Regular expression can be specified. RE2 engine is used for regexps. # Regexps are not aligned: don't forget to add ^ and $. Also don't forget to escape dot (.) metacharacter # (forgetting to do so is a common source of error). # If element has 'incl' attribute, then for it's value will be used corresponding substitution from another file. # By default, path to file with substitutions is /etc/metrika.xml. It could be changed in config in 'include_from' element. # Values for substitutions are specified in /yandex/name_of_substitution elements in that file. # ZooKeeper is used to store metadata about replicas, when using Replicated tables. # Optional. If you don't use replicated tables, you could omit that. # See https://proton.tech/docs/en/engines/table-engines/mergetree-family/replication/ # zookeeper: # - node: # host: example1 # port: 2181 # - node: # host: example2 # port: 2181 # - node: # host: example3 # port: 2181 # Substitutions for parameters of replicated tables. # Optional. If you don't use replicated tables, you could omit that. # See https://proton.tech/docs/en/engines/table-engines/mergetree-family/replication/#creating-replicated-tables # macros: # shard: 01 # replica: example01-01-1 # Reloading interval for embedded dictionaries, in seconds. Default: 3600. builtin_dictionaries_reload_interval: 3600 # Maximum session timeout, in seconds. Default: 3600. max_session_timeout: 3600 # Default session timeout, in seconds. Default: 60. default_session_timeout: 60 # Sending data to Graphite for monitoring. Several sections can be defined. # interval - send every X second # root_path - prefix for keys # hostname_in_path - append hostname to root_path (default = true) # metrics - send data from table system.metrics # events - send data from table system.events # asynchronous_metrics - send data from table system.asynchronous_metrics # graphite: # host: localhost # port: 42000 # timeout: 0.1 # interval: 60 # root_path: one_min # hostname_in_path: true # metrics: true # events: true # events_cumulative: false # asynchronous_metrics: true # graphite: # host: localhost # port: 42000 # timeout: 0.1 # interval: 1 # root_path: one_sec # metrics: true # events: true # events_cumulative: false # asynchronous_metrics: false # Serve endpoint for Prometheus monitoring. # endpoint - mertics path (relative to root, statring with "/") # port - port to setup server. If not defined or 0 than http_port used # metrics - send data from table system.metrics # events - send data from table system.events # asynchronous_metrics - send data from table system.asynchronous_metrics # status_info - send data from different component from CH, ex: Dictionaries status prometheus: endpoint: /metrics port: 9363 metrics: true events: true asynchronous_metrics: true status_info: true external_stream: true # Query log. Used only for queries with setting log_queries = 1. query_log: # What table to insert data. If table is not exist, it will be created. # When query log structure is changed after system update, # then old table will be renamed and new table will be created automatically. database: system table: query_log # PARTITION BY expr: https://proton.yandex/docs/en/table_engines/mergetree-family/custom_partitioning_key/ # Example: # event_date # toMonday(event_date) # toYYYYMM(event_date) # toStartOfHour(event_time) partition_by: to_YYYYMM(event_date) # Table TTL specification: https://proton.tech/docs/en/engines/table-engines/mergetree-family/mergetree/#mergetree-table-ttl # Example: # event_date + INTERVAL 1 WEEK # event_date + INTERVAL 7 DAY DELETE # event_date + INTERVAL 2 WEEK TO DISK 'bbb' ttl: 'event_date + INTERVAL 30 DAY DELETE' # Instead of partition_by, you can provide full engine expression (starting with ENGINE = ) with parameters, # Example: engine: 'ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024' # Interval of flushing data. flush_interval_milliseconds: 7500 # Trace log. Stores stack traces collected by query profilers. # See query_profiler_real_time_period_ns and query_profiler_cpu_time_period_ns settings. trace_log: database: system table: trace_log partition_by: to_YYYYMM(event_date) flush_interval_milliseconds: 7500 ttl: 'event_date + INTERVAL 7 DAY DELETE' # Query thread log. Has information about all threads participated in query execution. # Used only for queries with setting log_query_threads = 1. query_thread_log: database: system table: query_thread_log partition_by: to_YYYYMM(event_date) ttl: 'event_date + INTERVAL 30 DAY DELETE' flush_interval_milliseconds: 7500 # Query views log. Has information about all dependent views associated with a query. # Used only for queries with setting log_query_views = 1. query_views_log: database: system table: query_views_log partition_by: to_YYYYMM(event_date) flush_interval_milliseconds: 7500 ttl: 'event_date + INTERVAL 30 DAY DELETE' # Uncomment if use part log. # Part log contains information about all actions with parts in MergeTree tables (creation, deletion, merges, downloads). part_log: database: system table: part_log partition_by: to_YYYYMM(event_date) flush_interval_milliseconds: 7500 ttl: 'event_date + INTERVAL 30 DAY DELETE' # Uncomment to write text log into table. # Text log contains all information from usual server log but stores it in structured and efficient way. # The level of the messages that goes to the table can be limited (<level>), if not specified all messages will go to the table. # text_log: # database: system # table: text_log # flush_interval_milliseconds: 7500 # level: '' # Metric log contains rows with current values of ProfileEvents, CurrentMetrics collected with "collect_interval_milliseconds" interval. metric_log: database: system table: metric_log flush_interval_milliseconds: 7500 collect_interval_milliseconds: 1000 ttl: 'event_date + INTERVAL 30 DAY DELETE' # Asynchronous metric log contains values of metrics from # system.asynchronous_metrics. asynchronous_metric_log: database: system table: asynchronous_metric_log # Asynchronous metrics are updated once a minute, so there is # no need to flush more often. flush_interval_milliseconds: 60000 ttl: 'event_date + INTERVAL 7 DAY DELETE' # OpenTelemetry log contains OpenTelemetry trace spans. opentelemetry_span_log: # The default table creation code is insufficient, this <engine> spec # is a workaround. There is no 'event_time' for this log, but two times, # start and finish. It is sorted by finish time, to avoid inserting # data too far away in the past (probably we can sometimes insert a span # that is seconds earlier than the last span in the table, due to a race # between several spans inserted in parallel). This gives the spans a # global order that we can use to e.g. retry insertion into some external # system. engine: |- engine MergeTree partition by to_YYYYMM(finish_date) order by (finish_date, finish_time_us, trace_id) ttl finish_date + INTERVAL 7 DAY DELETE database: system table: opentelemetry_span_log flush_interval_milliseconds: 7500 # Crash log. Stores stack traces for fatal errors. # This table is normally empty. crash_log: database: system table: crash_log partition_by: '' flush_interval_milliseconds: 1000 # Processor profile log. processors_profile_log: database: system table: processors_profile_log partition_by: to_YYYYMM(event_date) flush_interval_milliseconds: 7500 # Parameters for embedded dictionaries, used in Yandex.Metrica. # See https://proton.yandex/docs/en/dicts/internal_dicts/ # Path to file with region hierarchy. # path_to_regions_hierarchy_file: /opt/geo/regions_hierarchy.txt # Path to directory with files containing names of regions # path_to_regions_names_files: /opt/geo/ # top_level_domains_path: /var/lib/proton/top_level_domains/ # Custom TLD lists. # Format: name: /path/to/file # Changes will not be applied w/o server restart. # Path to the list is under top_level_domains_path (see above). top_level_domains_lists: '' # public_suffix_list: /path/to/public_suffix_list.dat # Configuration of external dictionaries. See: # https://proton.tech/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts dictionaries_config: '*_dictionary.xml' # Uncomment if you want data to be compressed 30-100% better. # Don't do that if you just started using proton. # compression: # # Set of variants. Checked in order. Last matching case wins. If nothing matches, lz4 will be used. # case: # Conditions. All must be satisfied. Some conditions may be omitted. # # min_part_size: 10000000000 # Min part size in bytes. # # min_part_size_ratio: 0.01 # Min size of part relative to whole table size. # # What compression method to use. # method: zstd # Allow to execute distributed DDL queries (CREATE, DROP, ALTER, RENAME) on cluster. # Works only if ZooKeeper is enabled. Comment it if such functionality isn't required. distributed_ddl: # Path in ZooKeeper to queue with DDL queries path: /proton/task_queue/ddl # Settings from this profile will be used to execute DDL queries # profile: default # Controls how much ON CLUSTER queries can be run simultaneously. # pool_size: 1 # Cleanup settings (active tasks will not be removed) # Controls task TTL (default 1 week) # task_max_lifetime: 604800 # Controls how often cleanup should be performed (in seconds) # cleanup_delay_period: 60 # Controls how many tasks could be in the queue # max_tasks_in_queue: 1000 # Settings to fine tune MergeTree tables. See documentation in source code, in MergeTreeSettings.h # merge_tree: # max_suspicious_broken_parts: 5 # Protection from accidental DROP. # If size of a MergeTree table is greater than max_stream_size_to_drop (in bytes) than table could not be dropped with any DROP query. # If you want do delete one table and don't want to change proton-server config, you could create special file <proton-path>/flags/force_drop_table and make DROP once. # By default max_stream_size_to_drop is 50GB; max_stream_size_to_drop=0 allows to DROP any tables. # The same for max_partition_size_to_drop. # Uncomment to disable protection. # max_stream_size_to_drop: 0 # max_partition_size_to_drop: 0 # Example of parameters for GraphiteMergeTree table engine graphite_rollup_example: pattern: regexp: click_cost function: any retention: - age: 0 precision: 3600 - age: 86400 precision: 60 default: function: max retention: - age: 0 precision: 60 - age: 3600 precision: 300 - age: 86400 precision: 3600 # Directory in <proton-path> containing schema files for various input formats. # The directory will be created if it doesn't exist. format_schema_path: /var/lib/proton/format_schemas/ # Directory to look for protobuf files for the well-known types: # https://protobuf.dev/reference/protobuf/google.protobuf/ google_protos_path: /usr/share/proton/protos/ # Default query masking rules, matching lines would be replaced with something else in the logs # (both text logs and system.query_log). # name - name for the rule (optional) # regexp - RE2 compatible regular expression (mandatory) # replace - substitution string for sensitive data (optional, by default - six asterisks) query_masking_rules: rule: name: hide encrypt/decrypt arguments regexp: '((?:aes_)?(?:encrypt|decrypt)(?:_mysql)?)\s*$\s*(?:''(?:\\''|.)+''|.*?)\s*$' # or more secure, but also more invasive: # (aes_\w+)\s*$.*$ replace: \1(???) # Uncomment to use custom http handlers. # rules are checked from top to bottom, first match runs the handler # url - to match request URL, you can use 'regex:' prefix to use regex match(optional) # methods - to match request method, you can use commas to separate multiple method matches(optional) # headers - to match request headers, match each child element(child element name is header name), you can use 'regex:' prefix to use regex match(optional) # handler is request handler # type - supported types: static, dynamic_query_handler, predefined_query_handler # query - use with predefined_query_handler type, executes query when the handler is called # query_param_name - use with dynamic_query_handler type, extracts and executes the value corresponding to the <query_param_name> value in HTTP request params # status - use with static type, response status code # content_type - use with static type, response content-type # response_content - use with static type, Response content sent to client, when using the prefix 'file://' or 'config://', find the content from the file or configuration send to client. # NOTE, if enable these dynamic handlers, please follow /timeplusd/{uri_path} convention, check createHandlersFactoryFromConfig() in HTTPHandlerFactory.cpp # http_handlers: # - rule: # url: / # methods: POST,GET # headers: # pragma: no-cache # handler: # type: dynamic_query_handler # query_param_name: query # - rule: # url: /predefined_query # methods: POST,GET # handler: # type: predefined_query_handler # query: 'SELECT * FROM system.settings' # - rule: # handler: # type: static # status: 200 # content_type: 'text/plain; charset=UTF-8' # response_content: config://http_server_default_response send_crash_reports: # Changing <enabled> to true allows sending crash reports to # the proton core developers team via Sentry https://sentry.io # Doing so at least in pre-production environments is highly appreciated enabled: false # Change <anonymize> to true if you don't feel comfortable attaching the server hostname to the crash report anonymize: false # Default endpoint should be changed to different Sentry DSN only if you have # some in-house engineers or hired consultants who're going to debug proton issues for you endpoint: # Uncomment to disable proton internal DNS caching. # disable_internal_dns_cache: 1 #rocksdb: # options: # max_background_jobs: 8 # column_family_options: # num_levels: 2 # tables: # - table # name: TABLE # options: # max_background_jobs: 8 # column_family_options: # num_levels: 2 # User defined grok patterns file grok_patterns_file: /etc/proton-server/grok-patterns # Maximum number of concurrent queries. max_concurrent_queries: 100 # Maximum number of concurrent insert queries. max_concurrent_insert_queries: 100 # Maximum number of concurrent select queries. max_concurrent_select_queries: 100 # On Linux systems this can control the behavior of OOM killer. # oom_score: 1000 settings: global: query_mode: streaming # Default query mode. table or streaming query_resource_group: "dedicated" # Default resource group. dedicated or shared enable_light_ingest: true # Light ingest is inserting partial columns of a table _tp_enable_log_stream_expr: true # Log system analytic synchronous_ddl: true # If setting is enabled, the DDL for streaming storage will be executed synchronously otherwise it will be asynchronous. By default is enabled. asterisk_include_reserved_columns: true # Show reserved columns on SELECT query. async_ingest_block_timeout_ms: 12000 # Max duration for a block to commit before it is considered expired during async ingestion aysnc_ingest_max_outstanding_blocks: 10000 # Max outstanding blocks to be committed per stream during async ingestion part_commit_pool_size: 8 # Total shared thread pool size for building and committing parts for Stream max_idempotent_ids: 1000 # Maximum idempotent IDs to keep in memory and on disk for idempotent data ingestion _tp_internal_system_open_sesame: true # Control the access to system.* streams javascript_max_memory_bytes: 104857600 #Maximum heap size of javascript UDA/UDF in bytes, default is 100*1024*1024 bytes recovery_policy: "strict" # Recovery policy for materialized view. strict or best_effort recovery_retry_for_sn_failure: 3 # retry times for sn failure. this value only apply if the `recovery_policy` is `best_effort` max_block_size: 65409 # 65536 - (PADDING_FOR_SIMD - 1) max_insert_block_size: 65409 # 65536 - (PADDING_FOR_SIMD - 1) stream: default_shards: 1 default_replicas: 1 default_sharding_expr: "" # Empty string means `rand()` or `weak_hash32(<primary_keys>) if primary key set` distributed_ingest_mode: "async" # Data ingestion mode for Stream logstore: "" logstore_replication_factor: 1 storage_type: "hybrid" logstore_codec: none logstore_retention_bytes: -1 # when this threshold reaches, streaming storage delete old data. -1 means no limit logstore_retention_ms: 86400000 # when this threshold reaches, streaming storage delete old data. -1 means no limit cluster_settings: logstore: # Multiple clusters of streaming storage are supported kafka: enabled: false default: true # true means, the streaming storage will be used to provision system internal topics cluster_name: default-sys-kafka-cluster-name cluster_id: default-sys-kafka-cluster-id security_protocol: PLAINTEXT # support PLAINTEXT, SASL_PLAINTEXT, SASL_SSL # if security_protocol is SASL_PLAINTEXT, username and password should be specified. # username: # password: # if security_protocol is SASL_SSL, ssl_ca_cert_file should be specified. # ssl_ca_cert_file: # Setup the Kafka brokers here brokers: localhost:9092 # Group ID used internally. Once it is setup, can't be changed. # Otherwise, there may have data duplication as proton uses it # for checkpointing group_id: proton replication_factor: 1 # Refer to https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md debug: # generic, broker, topic etc # Note Redpanda doesn't support idempotence for compact topic enable_idempotence: false topic_metadata_refresh_interval_ms: 300000 message_max_bytes: 1000000 statistic_internal_ms: 30000 queue_buffering_max_messages: 100000 queue_buffering_max_kbytes: 1048576 queue_buffering_max_ms: 50 message_send_max_retries: 2 retry_backoff_ms: 100 compression_codec: snappy message_timeout_ms: 40000 message_delivery_async_poll_ms: 100 message_delivery_sync_poll_ms: 10 check_crcs: false auto_commit_interval_ms: 5000 fetch_message_max_bytes: 1048576 fetch_wait_max_ms: 500 queued_min_messages: 1000000 queued_max_messages_kbytes: 65536 session_timeout_ms: 10000 max_poll_interval_ms: 30000 dedicated_subscription_pool_size: 2 shared_subscription_pool_max_size: 10 shared_subscription_flush_threshold_count: 10000 shared_subscription_flush_threshold_size: 10485760 shared_subscription_flush_threshold_ms: 1000 streaming_processing_pool_size: 100 nativelog: enabled: true default: false check_crcs: false max_schedule_threads: 8 max_adhoc_schedule_threads: 8 metastore_data_dir: /var/lib/proton/nativelog/meta/ log_data_dirs: # Proton doesn't support normal yaml list yet dir1: /var/lib/proton/nativelog/log/ fetch_max_wait_ms: 500 # max wait time if no records are available fetch_max_bytes: 65536 # max bytes to fetch per request log_max_record_size: 10485760 log_segment_size: 4294967296 log_retention_size: -1 log_retention_ms: 604800000 log_index_internal_bytes: 4096 log_index_internal_records: 1000 log_flush_interval_ms: 600000 log_flush_interval_records: 10000 log_compression_codec: none cache_max_cached_entries: 10000 cache_max_cached_bytes: 419430400 cache_max_cached_entries_per_shard: 100 cache_max_cached_bytes_per_shard: 4194304 # Node identity shall be unique in proton cluster # When leave it empty, the system will the nodes FQDN and tcp port to # generate one node_identity: localhost:8123 # Metastore server config metastore_server: enable_ipv6: true http_port: 9444 server_id: 1 namespace_whitelist: namespace: - "proton" - "neutron" - "udf" coordination_settings: #  session_timeout_ms: 30000 #  operation_timeout_ms: 10000 #  #  #  #  #  #  #  raft_logs_level: information #  force_sync: false #  startup_timeout: 60000 #  shutdown_timeout: 5000 #  stale_log_gap: 10000 #  fresh_log_gap: 200 #  #  reserved_log_items: 100000 #  snapshot_distance: 100000 #  max_requests_batch_size: 100 #  auto_forwarding: true #  quorum_reads: false raft_configuration: server: id: 1 hostname: localhost port: 9445 http_port: 3218 ```

mfreeman451 commented

2026-03-28 04:22:43 +00:00

Author

Owner

Imported GitHub comment.

Original author: @mfreeman451
Original URL: https://github.com/carverauto/serviceradar/issues/715#issuecomment-2849428817
Original created: 2025-05-04T21:15:14Z

closing as completed

Imported GitHub comment. Original author: @mfreeman451 Original URL: https://github.com/carverauto/serviceradar/issues/715#issuecomment-2849428817 Original created: 2025-05-04T21:15:14Z --- closing as completed