# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # ## Bookie settings ############################################################################# ## Server parameters ############################################################################# # Port that bookie server listen on bookiePort=3181 # Directories BookKeeper outputs its write ahead log. # Could define multi directories to store write head logs, separated by ','. # For example: # journalDirectories=/tmp/bk-journal1,/tmp/bk-journal2 # If journalDirectories is set, bookies will skip journalDirectory and use # this setting directory. # journalDirectories=/tmp/bk-journal # Directory Bookkeeper outputs its write ahead log # @deprecated since 4.5.0. journalDirectories is preferred over journalDirectory. journalDirectory=data/bookkeeper/journal # Configure the bookie to allow/disallow multiple ledger/index/journal directories # in the same filesystem disk partition # allowMultipleDirsUnderSameDiskPartition=false # Minimum safe Usable size to be available in index directory for bookie to create # Index File while replaying journal at the time of bookie Start in Readonly Mode (in bytes) minUsableSizeForIndexFileCreation=1073741824 # Set the network interface that the bookie should listen on. # If not set, the bookie will listen on all interfaces. # listeningInterface=eth0 # Configure a specific hostname or IP address that the bookie should use to advertise itself to # clients. If not set, bookie will advertised its own IP address or hostname, depending on the # listeningInterface and useHostNameAsBookieID settings. advertisedAddress= # Whether the bookie allowed to use a loopback interface as its primary # interface(i.e. the interface it uses to establish its identity)? # By default, loopback interfaces are not allowed as the primary # interface. # Using a loopback interface as the primary interface usually indicates # a configuration error. For example, its fairly common in some VPS setups # to not configure a hostname, or to have the hostname resolve to # 127.0.0.1. If this is the case, then all bookies in the cluster will # establish their identities as 127.0.0.1:3181, and only one will be able # to join the cluster. For VPSs configured like this, you should explicitly # set the listening interface. allowLoopback=false # Interval to watch whether bookie is dead or not, in milliseconds bookieDeathWatchInterval=1000 # When entryLogPerLedgerEnabled is enabled, checkpoint doesn't happens # when a new active entrylog is created / previous one is rolled over. # Instead SyncThread checkpoints periodically with 'flushInterval' delay # (in milliseconds) in between executions. Checkpoint flushes both ledger # entryLogs and ledger index pages to disk. # Flushing entrylog and index files will introduce much random disk I/O. # If separating journal dir and ledger dirs each on different devices, # flushing would not affect performance. But if putting journal dir # and ledger dirs on same device, performance degrade significantly # on too frequent flushing. You can consider increment flush interval # to get better performance, but you need to pay more time on bookie # server restart after failure. # This config is used only when entryLogPerLedgerEnabled is enabled. flushInterval=60000 # Allow the expansion of bookie storage capacity. Newly added ledger # and index dirs must be empty. # allowStorageExpansion=false # Whether the bookie should use its hostname to register with the # co-ordination service(eg: Zookeeper service). # When false, bookie will use its ipaddress for the registration. # Defaults to false. useHostNameAsBookieID=false # Whether the bookie is allowed to use an ephemeral port (port 0) as its # server port. By default, an ephemeral port is not allowed. # Using an ephemeral port as the service port usually indicates a configuration # error. However, in unit tests, using an ephemeral port will address port # conflict problems and allow running tests in parallel. # allowEphemeralPorts=false # Whether allow the bookie to listen for BookKeeper clients executed on the local JVM. # enableLocalTransport=false # Whether allow the bookie to disable bind on network interfaces, # this bookie will be available only to BookKeeper clients executed on the local JVM. # disableServerSocketBind=false # The number of bytes we should use as chunk allocation for # org.apache.bookkeeper.bookie.SkipListArena # skipListArenaChunkSize=4194304 # The max size we should allocate from the skiplist arena. Allocations # larger than this should be allocated directly by the VM to avoid fragmentation. # skipListArenaMaxAllocSize=131072 # The bookie authentication provider factory class name. # If this is null, no authentication will take place. # bookieAuthProviderFactoryClass=null ############################################################################# ## Garbage collection settings ############################################################################# # How long the interval to trigger next garbage collection, in milliseconds # Since garbage collection is running in background, too frequent gc # will heart performance. It is better to give a higher number of gc # interval if there is enough disk capacity. gcWaitTime=900000 # How long the interval to trigger next garbage collection of overreplicated # ledgers, in milliseconds [Default: 1 day]. This should not be run very frequently # since we read the metadata for all the ledgers on the bookie from zk gcOverreplicatedLedgerWaitTime=86400000 # Number of threads that should handle write requests. if zero, the writes would # be handled by netty threads directly. numAddWorkerThreads=0 # Number of threads that should handle read requests. if zero, the reads would # be handled by netty threads directly. numReadWorkerThreads=8 # Number of threads that should be used for high priority requests # (i.e. recovery reads and adds, and fencing). numHighPriorityWorkerThreads=8 # If read workers threads are enabled, limit the number of pending requests, to # avoid the executor queue to grow indefinitely maxPendingReadRequestsPerThread=2500 # If add workers threads are enabled, limit the number of pending requests, to # avoid the executor queue to grow indefinitely maxPendingAddRequestsPerThread=10000 # Whether force compaction is allowed when the disk is full or almost full. # Forcing GC may get some space back, but may also fill up disk space more quickly. # This is because new log files are created before GC, while old garbage # log files are deleted after GC. # isForceGCAllowWhenNoSpace=false # True if the bookie should double check readMetadata prior to gc # verifyMetadataOnGC=false ############################################################################# ## TLS settings ############################################################################# # TLS Provider (JDK or OpenSSL). # tlsProvider=OpenSSL # The path to the class that provides security. # tlsProviderFactoryClass=org.apache.bookkeeper.security.SSLContextFactory # Type of security used by server. # tlsClientAuthentication=true # Bookie Keystore type. # tlsKeyStoreType=JKS # Bookie Keystore location (path). # tlsKeyStore=null # Bookie Keystore password path, if the keystore is protected by a password. # tlsKeyStorePasswordPath=null # Bookie Truststore type. # tlsTrustStoreType=null # Bookie Truststore location (path). # tlsTrustStore=null # Bookie Truststore password path, if the trust store is protected by a password. # tlsTrustStorePasswordPath=null ############################################################################# ## Long poll request parameter settings ############################################################################# # The number of threads that should handle long poll requests. # numLongPollWorkerThreads=10 # The tick duration in milliseconds for long poll requests. # requestTimerTickDurationMs=10 # The number of ticks per wheel for the long poll request timer. # requestTimerNumTicks=1024 ############################################################################# ## AutoRecovery settings ############################################################################# # The interval between auditor bookie checks. # The auditor bookie check, checks ledger metadata to see which bookies should # contain entries for each ledger. If a bookie which should contain entries is # unavailable, then the ledger containing that entry is marked for recovery. # Setting this to 0 disabled the periodic check. Bookie checks will still # run when a bookie fails. # The interval is specified in seconds. auditorPeriodicBookieCheckInterval=86400 # The number of entries that a replication will rereplicate in parallel. rereplicationEntryBatchSize=100 # Auto-replication # The grace period, in seconds, that the replication worker waits before fencing and # replicating a ledger fragment that's still being written to upon bookie failure. openLedgerRereplicationGracePeriod=30 # Whether the bookie itself can start auto-recovery service also or not autoRecoveryDaemonEnabled=true # How long to wait, in seconds, before starting auto recovery of a lost bookie lostBookieRecoveryDelay=0 ############################################################################# ## Netty server settings ############################################################################# # This settings is used to enabled/disabled Nagle's algorithm, which is a means of # improving the efficiency of TCP/IP networks by reducing the number of packets # that need to be sent over the network. # If you are sending many small messages, such that more than one can fit in # a single IP packet, setting server.tcpnodelay to false to enable Nagle algorithm # can provide better performance. # Default value is true. serverTcpNoDelay=true # This setting is used to send keep-alive messages on connection-oriented sockets. # serverSockKeepalive=true # The socket linger timeout on close. # When enabled, a close or shutdown will not return until all queued messages for # the socket have been successfully sent or the linger timeout has been reached. # Otherwise, the call returns immediately and the closing is done in the background. # serverTcpLinger=0 # The Recv ByteBuf allocator initial buf size. # byteBufAllocatorSizeInitial=65536 # The Recv ByteBuf allocator min buf size. # byteBufAllocatorSizeMin=65536 # The Recv ByteBuf allocator max buf size. # byteBufAllocatorSizeMax=1048576 ############################################################################# ## Journal settings ############################################################################# # The journal format version to write. # Available formats are 1-6: # 1: no header # 2: a header section was added # 3: ledger key was introduced # 4: fencing key was introduced # 5: expanding header to 512 and padding writes to align sector size configured by `journalAlignmentSize` # 6: persisting explicitLac is introduced # By default, it is `6`. # If you'd like to disable persisting ExplicitLac, you can set this config to < `6` and also # fileInfoFormatVersionToWrite should be set to 0. If there is mismatch then the serverconfig is considered invalid. # You can disable `padding-writes` by setting journal version back to `4`. This feature is available in 4.5.0 # and onward versions. journalFormatVersionToWrite=5 # Max file size of journal file, in mega bytes # A new journal file will be created when the old one reaches the file size limitation journalMaxSizeMB=2048 # Max number of old journal file to kept # Keep a number of old journal files would help data recovery in specia case journalMaxBackups=5 # How much space should we pre-allocate at a time in the journal. journalPreAllocSizeMB=16 # Size of the write buffers used for the journal journalWriteBufferSizeKB=64 # Should we remove pages from page cache after force write journalRemoveFromPageCache=true # Should the data be fsynced on journal before acknowledgment. # By default, data sync is enabled to guarantee durability of writes. # Beware: while disabling data sync in the Bookie journal might improve the bookie write performance, it will also # introduce the possibility of data loss. With no sync, the journal entries are written in the OS page cache but # not flushed to disk. In case of power failure, the affected bookie might lose the unflushed data. If the ledger # is replicated to multiple bookies, the chances of data loss are reduced though still present. journalSyncData=true # Should we group journal force writes, which optimize group commit # for higher throughput journalAdaptiveGroupWrites=true # Maximum latency to impose on a journal write to achieve grouping journalMaxGroupWaitMSec=1 # Maximum writes to buffer to achieve grouping journalBufferedWritesThreshold=524288 # The number of threads that should handle journal callbacks numJournalCallbackThreads=8 # All the journal writes and commits should be aligned to given size. # If not, zeros will be padded to align to given size. # It only takes effects when journalFormatVersionToWrite is set to 5 journalAlignmentSize=4096 # Maximum entries to buffer to impose on a journal write to achieve grouping. # journalBufferedEntriesThreshold=0 # If we should flush the journal when journal queue is empty journalFlushWhenQueueEmpty=false ############################################################################# ## Ledger storage settings ############################################################################# # Ledger storage implementation class ledgerStorageClass=org.apache.bookkeeper.bookie.storage.ldb.DbLedgerStorage # Directory Bookkeeper outputs ledger snapshots # could define multi directories to store snapshots, separated by ',' # For example: # ledgerDirectories=/tmp/bk1-data,/tmp/bk2-data # # Ideally ledger dirs and journal dir are each in a differet device, # which reduce the contention between random i/o and sequential write. # It is possible to run with a single disk, but performance will be significantly lower. ledgerDirectories=data/bookkeeper/ledgers # Directories to store index files. If not specified, will use ledgerDirectories to store. # indexDirectories=data/bookkeeper/ledgers # Interval at which the auditor will do a check of all ledgers in the cluster. # By default this runs once a week. The interval is set in seconds. # To disable the periodic check completely, set this to 0. # Note that periodic checking will put extra load on the cluster, so it should # not be run more frequently than once a day. auditorPeriodicCheckInterval=604800 # Whether sorted-ledger storage enabled (default true) # sortedLedgerStorageEnabled=ture # The skip list data size limitation (default 64MB) in EntryMemTable # skipListSizeLimit=67108864L ############################################################################# ## Ledger cache settings ############################################################################# # Max number of ledger index files could be opened in bookie server # If number of ledger index files reaches this limitation, bookie # server started to swap some ledgers from memory to disk. # Too frequent swap will affect performance. You can tune this number # to gain performance according your requirements. openFileLimit=0 # The fileinfo format version to write. # Available formats are 0-1: # 0: Initial version # 1: persisting explicitLac is introduced # By default, it is `1`. # If you'd like to disable persisting ExplicitLac, you can set this config to 0 and # also journalFormatVersionToWrite should be set to < 6. If there is mismatch then the # serverconfig is considered invalid. fileInfoFormatVersionToWrite=0 # Size of a index page in ledger cache, in bytes # A larger index page can improve performance writing page to disk, # which is efficent when you have small number of ledgers and these # ledgers have similar number of entries. # If you have large number of ledgers and each ledger has fewer entries, # smaller index page would improve memory usage. # pageSize=8192 # How many index pages provided in ledger cache # If number of index pages reaches this limitation, bookie server # starts to swap some ledgers from memory to disk. You can increment # this value when you found swap became more frequent. But make sure # pageLimit*pageSize should not more than JVM max memory limitation, # otherwise you would got OutOfMemoryException. # In general, incrementing pageLimit, using smaller index page would # gain bettern performance in lager number of ledgers with fewer entries case # If pageLimit is -1, bookie server will use 1/3 of JVM memory to compute # the limitation of number of index pages. pageLimit=0 ############################################################################# ## Ledger manager settings ############################################################################# # Ledger Manager Class # What kind of ledger manager is used to manage how ledgers are stored, managed # and garbage collected. Try to read 'BookKeeper Internals' for detail info. # ledgerManagerFactoryClass=org.apache.bookkeeper.meta.HierarchicalLedgerManagerFactory # @Drepcated - `ledgerManagerType` is deprecated in favor of using `ledgerManagerFactoryClass`. # ledgerManagerType=hierarchical # Root Zookeeper path to store ledger metadata # This parameter is used by zookeeper-based ledger manager as a root znode to # store all ledgers. zkLedgersRootPath=/ledgers ############################################################################# ## Entry log settings ############################################################################# # Max file size of entry logger, in bytes # A new entry log file will be created when the old one reaches the file size limitation logSizeLimit=1073741824 # Enable/Disable entry logger preallocation entryLogFilePreallocationEnabled=true # Entry log flush interval in bytes. # Default is 0. 0 or less disables this feature and effectively flush # happens on log rotation. # Flushing in smaller chunks but more frequently reduces spikes in disk # I/O. Flushing too frequently may also affect performance negatively. # flushEntrylogBytes=0 # The number of bytes we should use as capacity for BufferedReadChannel. Default is 512 bytes. readBufferSizeBytes=4096 # The number of bytes used as capacity for the write buffer. Default is 64KB. writeBufferSizeBytes=65536 # Specifies if entryLog per ledger is enabled/disabled. If it is enabled, then there would be a # active entrylog for each ledger. It would be ideal to enable this feature if the underlying # storage device has multiple DiskPartitions or SSD and if in a given moment, entries of fewer # number of active ledgers are written to a bookie. # entryLogPerLedgerEnabled=false ############################################################################# ## Entry log compaction settings ############################################################################# # Set the rate at which compaction will readd entries. The unit is adds per second. compactionRate=1000 # If bookie is using hostname for registration and in ledger metadata then # whether to use short hostname or FQDN hostname. Defaults to false. # useShortHostName=false # Threshold of minor compaction # For those entry log files whose remaining size percentage reaches below # this threshold will be compacted in a minor compaction. # If it is set to less than zero, the minor compaction is disabled. minorCompactionThreshold=0.2 # Interval to run minor compaction, in seconds # If it is set to less than zero, the minor compaction is disabled. # Note: should be greater than gcWaitTime. minorCompactionInterval=3600 # Set the maximum number of entries which can be compacted without flushing. # When compacting, the entries are written to the entrylog and the new offsets # are cached in memory. Once the entrylog is flushed the index is updated with # the new offsets. This parameter controls the number of entries added to the # entrylog before a flush is forced. A higher value for this parameter means # more memory will be used for offsets. Each offset consists of 3 longs. # This parameter should _not_ be modified unless you know what you're doing. # The default is 100,000. compactionMaxOutstandingRequests=100000 # Threshold of major compaction # For those entry log files whose remaining size percentage reaches below # this threshold will be compacted in a major compaction. # Those entry log files whose remaining size percentage is still # higher than the threshold will never be compacted. # If it is set to less than zero, the minor compaction is disabled. majorCompactionThreshold=0.5 # Interval to run major compaction, in seconds # If it is set to less than zero, the major compaction is disabled. # Note: should be greater than gcWaitTime. majorCompactionInterval=86400 # Throttle compaction by bytes or by entries. isThrottleByBytes=false # Set the rate at which compaction will readd entries. The unit is adds per second. compactionRateByEntries=1000 # Set the rate at which compaction will readd entries. The unit is bytes added per second. compactionRateByBytes=1000000 ############################################################################# ## Statistics ############################################################################# # Whether statistics are enabled # enableStatistics=true # Stats Provider Class (if statistics are enabled) statsProviderClass=org.apache.bookkeeper.stats.prometheus.PrometheusMetricsProvider # Default port for Prometheus metrics exporter prometheusStatsHttpPort=8000 ############################################################################# ## Read-only mode support ############################################################################# # If all ledger directories configured are full, then support only read requests for clients. # If "readOnlyModeEnabled=true" then on all ledger disks full, bookie will be converted # to read-only mode and serve only read requests. Otherwise the bookie will be shutdown. # By default this will be disabled. readOnlyModeEnabled=true # Whether the bookie is force started in read only mode or not # forceReadOnlyBookie=false # Persiste the bookie status locally on the disks. So the bookies can keep their status upon restarts # @Since 4.6 # persistBookieStatusEnabled=false ############################################################################# ## Disk utilization ############################################################################# # For each ledger dir, maximum disk space which can be used. # Default is 0.95f. i.e. 95% of disk can be used at most after which nothing will # be written to that partition. If all ledger dir partions are full, then bookie # will turn to readonly mode if 'readOnlyModeEnabled=true' is set, else it will # shutdown. # Valid values should be in between 0 and 1 (exclusive). diskUsageThreshold=0.95 # The disk free space low water mark threshold. # Disk is considered full when usage threshold is exceeded. # Disk returns back to non-full state when usage is below low water mark threshold. # This prevents it from going back and forth between these states frequently # when concurrent writes and compaction are happening. This also prevent bookie from # switching frequently between read-only and read-writes states in the same cases. # diskUsageWarnThreshold=0.95 # Set the disk free space low water mark threshold. Disk is considered full when # usage threshold is exceeded. Disk returns back to non-full state when usage is # below low water mark threshold. This prevents it from going back and forth # between these states frequently when concurrent writes and compaction are # happening. This also prevent bookie from switching frequently between # read-only and read-writes states in the same cases. # diskUsageLwmThreshold=0.90 # Disk check interval in milli seconds, interval to check the ledger dirs usage. # Default is 10000 diskCheckInterval=10000 ############################################################################# ## ZooKeeper parameters ############################################################################# # A list of one of more servers on which Zookeeper is running. # The server list can be comma separated values, for example: # zkServers=zk1:2181,zk2:2181,zk3:2181 zkServers=localhost:2181 # ZooKeeper client session timeout in milliseconds # Bookie server will exit if it received SESSION_EXPIRED because it # was partitioned off from ZooKeeper for more than the session timeout # JVM garbage collection, disk I/O will cause SESSION_EXPIRED. # Increment this value could help avoiding this issue zkTimeout=30000 # The Zookeeper client backoff retry start time in millis. # zkRetryBackoffStartMs=1000 # The Zookeeper client backoff retry max time in millis. # zkRetryBackoffMaxMs=10000 # Set ACLs on every node written on ZooKeeper, this way only allowed users # will be able to read and write BookKeeper metadata stored on ZooKeeper. # In order to make ACLs work you need to setup ZooKeeper JAAS authentication # all the bookies and Client need to share the same user, and this is usually # done using Kerberos authentication. See ZooKeeper documentation zkEnableSecurity=false ############################################################################# ## Server parameters ############################################################################# # The flag enables/disables starting the admin http server. Default value is 'false'. httpServerEnabled=false # The http server port to listen on. Default value is 8080. # Use `8000` as the port to keep it consistent with prometheus stats provider httpServerPort=8000 # The http server class httpServerClass=org.apache.bookkeeper.http.vertx.VertxHttpServer # Configure a list of server components to enable and load on a bookie server. # This provides the plugin run extra services along with a bookie server. # # extraServerComponents= ############################################################################# ## DB Ledger storage configuration ############################################################################# # These configs are used when the selected 'ledgerStorageClass' is # org.apache.bookkeeper.bookie.storage.ldb.DbLedgerStorage # Size of Write Cache. Memory is allocated from JVM direct memory. # Write cache is used to buffer entries before flushing into the entry log # For good performance, it should be big enough to hold a substantial amount # of entries in the flush interval # By default it will be allocated to 1/4th of the available direct memory dbStorage_writeCacheMaxSizeMb= # Size of Read cache. Memory is allocated from JVM direct memory. # This read cache is pre-filled doing read-ahead whenever a cache miss happens # By default it will be allocated to 1/4th of the available direct memory dbStorage_readAheadCacheMaxSizeMb= # How many entries to pre-fill in cache after a read cache miss dbStorage_readAheadCacheBatchSize=1000 ## RocksDB specific configurations ## DbLedgerStorage uses RocksDB to store the indexes from ## (ledgerId, entryId) -> (entryLog, offset) # Size of RocksDB block-cache. For best performance, this cache # should be big enough to hold a significant portion of the index # database which can reach ~2GB in some cases # Default is to use 10% of the direct memory size dbStorage_rocksDB_blockCacheSize= # Other RocksDB specific tunables dbStorage_rocksDB_writeBufferSizeMB=64 dbStorage_rocksDB_sstSizeInMB=64 dbStorage_rocksDB_blockSize=65536 dbStorage_rocksDB_bloomFilterBitsPerKey=10 dbStorage_rocksDB_numLevels=-1 dbStorage_rocksDB_numFilesInLevel0=4 dbStorage_rocksDB_maxSizeInLevel1MB=256