1422 lines
49 KiB
Plaintext
1422 lines
49 KiB
Plaintext
docspell.server {
|
||
|
||
# This is shown in the top right corner of the web application
|
||
app-name = "Docspell"
|
||
|
||
# This is the id of this node. If you run more than one server, you
|
||
# have to make sure to provide unique ids per node.
|
||
app-id = "rest1"
|
||
|
||
# This is the base URL this application is deployed to. This is used
|
||
# to create absolute URLs and to configure the cookie.
|
||
#
|
||
# If default is not changed, the HOST line of the login request is
|
||
# used instead or the value of the `X-Forwarded-For` header. If set
|
||
# to some other value, the request is not inspected.
|
||
base-url = "https://docs.pukeko.xyz"
|
||
|
||
# This url is the base url for reaching this server internally.
|
||
# While you might set `base-url` to some external address (like
|
||
# mydocs.myserver.com), the `internal-url` must be set such that
|
||
# other nodes can reach this server.
|
||
internal-url = "http://restserver:7880"
|
||
|
||
# Configures logging
|
||
logging {
|
||
# The format for the log messages. Can be one of:
|
||
# Json, Logfmt, Fancy or Plain
|
||
format = "Fancy"
|
||
|
||
# The minimum level to log. From lowest to highest:
|
||
# Trace, Debug, Info, Warn, Error
|
||
minimum-level = "Warn"
|
||
|
||
# Override the log level of specific loggers
|
||
levels = {
|
||
"docspell" = "Info"
|
||
"org.flywaydb" = "Info"
|
||
"binny" = "Info"
|
||
"org.http4s" = "Info"
|
||
}
|
||
}
|
||
|
||
# Where the server binds to.
|
||
bind {
|
||
address = "0.0.0.0"
|
||
port = 7880
|
||
}
|
||
|
||
# Options for tuning the http server
|
||
server-options {
|
||
enable-http-2 = false
|
||
|
||
# Maximum allowed connections
|
||
max-connections = 1024
|
||
|
||
# Timeout for waiting for the first output of the response
|
||
response-timeout = 45s
|
||
}
|
||
|
||
# This is a hard limit to restrict the size of a batch that is
|
||
# returned when searching for items. The user can set this limit
|
||
# within the client config, but it is restricted by the server to
|
||
# the number defined here. An admin might choose a lower number
|
||
# depending on the available resources.
|
||
max-item-page-size = 200
|
||
|
||
# The number of characters to return for each item notes when
|
||
# searching. Item notes may be very long, when returning them with
|
||
# all the results from a search, they add quite some data to return.
|
||
# In order to keep this low, a limit can be defined here.
|
||
max-note-length = 180
|
||
|
||
# This defines whether the classification form in the collective
|
||
# settings is displayed or not. If all joex instances have document
|
||
# classification disabled, it makes sense to hide its settings from
|
||
# users.
|
||
show-classification-settings = true
|
||
|
||
# Authentication.
|
||
auth {
|
||
|
||
# The secret for this server that is used to sign the authenicator
|
||
# tokens. If multiple servers are running, all must share the same
|
||
# secret. You can use base64 or hex strings (prefix with b64: and
|
||
# hex:, respectively). If empty, a random secret is generated.
|
||
# Example: b64:YRx77QujCGkHSvll0TVEmtTaw3Z5eXr+nWMsEJowgKg=
|
||
server-secret = ""
|
||
|
||
# How long an authentication token is valid. The web application
|
||
# will get a new one periodically.
|
||
session-valid = "5 minutes"
|
||
|
||
remember-me {
|
||
enabled = true
|
||
# How long the remember me cookie/token is valid.
|
||
valid = "30 days"
|
||
}
|
||
|
||
# One of: fail, convert
|
||
#
|
||
# Accounts can be local or defined at a remote provider and
|
||
# integrated via OIDC. If the same account is defined in both
|
||
# sources, docspell by default fails if a user mixes logins (e.g.
|
||
# when registering a user locally and then logging in with the
|
||
# same user via OIDC). When set to `convert` docspell treats it as
|
||
# being the same and simply updates the account to reflect the new
|
||
# account source.
|
||
on-account-source-conflict = "fail"
|
||
}
|
||
|
||
# Settings for "download as zip"
|
||
download-all {
|
||
# How many files to allow in a zip.
|
||
max-files = 500
|
||
|
||
# The maximum (uncompressed) size of the zip file contents.
|
||
max-size = 1400M
|
||
}
|
||
|
||
# Configures OpenID Connect (OIDC) or OAuth2 authentication. Only
|
||
# the "Authorization Code Flow" is supported.
|
||
#
|
||
# Multiple authentication providers can be defined. Each is
|
||
# configured in the array below. The `provider` block gives all
|
||
# details necessary to authenticate against an external OIDC or
|
||
# OAuth provider. This requires at least two URLs for OIDC and three
|
||
# for OAuth2. When using OIDC, the `user-url` is only required if
|
||
# the account data is to be retrieved from the user-info endpoint
|
||
# and not from the JWT token. For the request to the `user-url`, the
|
||
# access token is then used to authenticate at the provider. Thus,
|
||
# it doesn't need to be validated here and therefore no `sign-key`
|
||
# setting is needed. However, if you want to extract the account
|
||
# information from the access token, it must be validated here and
|
||
# therefore the correct signature key and algorithm must be
|
||
# provided. If the `sign-key` is left empty, the `user-url` is used
|
||
# and must be specified. If the `sign-key` is _not_ empty, the
|
||
# response from the authentication provider is validated using this
|
||
# key.
|
||
#
|
||
# If a `logout-url` is provided, it will be used to finally redirect
|
||
# the browser to this url that should logout the user from Docspell
|
||
# at the provider.
|
||
#
|
||
# After successful authentication, docspell needs to create the
|
||
# account. For this a username and collective name is required. The
|
||
# account name is defined by the `user-key` and `collective-key`
|
||
# setting. The `user-key` is used to search the JSON structure, that
|
||
# is obtained from the JWT token or the user-info endpoint, for the
|
||
# login name to use. It traverses the JSON structure recursively,
|
||
# until it finds an object with that key. The first value is used.
|
||
#
|
||
# The `collective-key` can be used in multiple ways and both can
|
||
# work together to retrieve the full account id:
|
||
#
|
||
# - If it starts with `fixed:`, like "fixed:collective", the name
|
||
# after the `fixed:` prefix is used as collective as is. So all
|
||
# users are in the same collective.
|
||
#
|
||
# - If it starts with `lookup:`, like "lookup:collective_name", the
|
||
# value after the prefix is used to search the JSON response for
|
||
# an object with this key, just like it works with the `user-key`.
|
||
#
|
||
# - If it starts with `account:`, like "account:demo", it works the
|
||
# same as `lookup:` only that the value is interpreted as the full
|
||
# account name of form `collective/login`. The `user-key` value is
|
||
# ignored in this case.
|
||
#
|
||
# If these values cannot be obtained from the response, docspell
|
||
# fails the authentication. It is then assumed that the successfully
|
||
# authenticated user at the OP has not enough permissions to access
|
||
# docspell.
|
||
#
|
||
# Below are examples for OpenID Connect (keycloak) and OAuth2
|
||
# (github).
|
||
openid =
|
||
[ { enabled = false,
|
||
|
||
# The name to render on the login link/button.
|
||
display = "Keycloak"
|
||
|
||
# This illustrates to use a custom keycloak setup as the
|
||
# authentication provider. For details, please refer to the
|
||
# keycloak documentation. The settings here assume a certain
|
||
# configuration at keycloak.
|
||
#
|
||
# Keycloak can be configured to return the collective name for
|
||
# each user in the access token. It may also be configured to
|
||
# return it in the user info response. If it is already in the
|
||
# access token, an additional request can be omitted. Set the
|
||
# `sign-key` to an empty string then. Otherwise provide the
|
||
# algo and key from your realm settings. In this example, the
|
||
# realm is called "home".
|
||
provider = {
|
||
provider-id = "keycloak",
|
||
client-id = "docspell",
|
||
client-secret = "example-secret-439e-bf06-911e4cdd56a6",
|
||
scope = "profile", # scope is required for OIDC
|
||
authorize-url = "http://localhost:8080/auth/realms/home/protocol/openid-connect/auth",
|
||
token-url = "http://localhost:8080/auth/realms/home/protocol/openid-connect/token",
|
||
#User URL is not used when signature key is set.
|
||
#user-url = "http://localhost:8080/auth/realms/home/protocol/openid-connect/userinfo",
|
||
logout-url = "http://localhost:8080/auth/realms/home/protocol/openid-connect/logout"
|
||
sign-key = "b64:anVzdC1hLXRlc3Q=",
|
||
sig-algo = "RS512"
|
||
},
|
||
# The collective of the user is given in the access token as
|
||
# property `docspell_collective`.
|
||
collective-key = "lookup:docspell_collective",
|
||
# The username to use for the docspell account
|
||
user-key = "preferred_username"
|
||
},
|
||
{ enabled = true,
|
||
|
||
# The name to render on the login link/button.
|
||
display = "Authelia"
|
||
provider = {
|
||
provider-id = "authelia",
|
||
client-id = "docspell",
|
||
client-secret = "tEf47Me$YsXG8K4%63$%!kbMqbgVnc*bAq2i4SPERay#T!&ajc35m&D%C#uRMiaSv@cRFxwMcqo%SwEq*49G9HufJ&d#^f*&MK9hzU6s&7C2^XmfGC8Up7YeegnH#VhP",
|
||
scope = "openid profile groups email", # scope is not needed for github
|
||
authorize-url = "https://auth.pukeko.xyz/api/oidc/authorize",
|
||
token-url = "https://auth.pukeko.xyz/api/oidc/token",
|
||
user-url = "https://auth.pukeko.xyz/api/oidc/userinfo",
|
||
sign-key = "" # this must be set empty
|
||
sig-algo = "RS256" #unused but must be set to something
|
||
},
|
||
|
||
# If the authentication provider doesn't provide the
|
||
# collective name, simply use a fixed one. This means all
|
||
# users from this provider are in the same collective!
|
||
collective-key = "fixed:shmick",
|
||
|
||
# Github provides the login name via the `login` property as
|
||
# response from the user-url. This value is used to construct
|
||
# the account in docspell.
|
||
user-key = "preferred_username"
|
||
}
|
||
]
|
||
|
||
# When exactly one OIDC/OAuth provider is configured, then the weapp
|
||
# automatically redirects to its authentication page skipping the
|
||
# docspell login page.
|
||
oidc-auto-redirect = true
|
||
|
||
# This endpoint allows to upload files to any collective. The
|
||
# intention is that local software integrates with docspell more
|
||
# easily. Therefore the endpoint is not protected by the usual
|
||
# means.
|
||
#
|
||
# For security reasons, this endpoint is disabled by default. If
|
||
# enabled, you can choose from some ways to protect it. It may be a
|
||
# good idea to further protect this endpoint using a firewall, such
|
||
# that outside traffic is not routed.
|
||
#
|
||
# NOTE: If all protection methods are disabled, the endpoint is not
|
||
# protected at all!
|
||
integration-endpoint {
|
||
enabled = false
|
||
|
||
# The priority to use when submitting files through this endpoint.
|
||
priority = "low"
|
||
|
||
# The name used for the item "source" property when uploaded
|
||
# through this endpoint.
|
||
source-name = "integration"
|
||
|
||
# IPv4 addresses to allow access. An empty list, if enabled,
|
||
# prohibits all requests. IP addresses may be specified as simple
|
||
# globs: a part marked as `*' matches any octet, like in
|
||
# `192.168.*.*`. The `127.0.0.1' (the default) matches the
|
||
# loopback address.
|
||
allowed-ips {
|
||
enabled = false
|
||
ips = [ "127.0.0.1" ]
|
||
}
|
||
|
||
# Requests are expected to use http basic auth when uploading
|
||
# files.
|
||
http-basic {
|
||
enabled = false
|
||
realm = "Docspell Integration"
|
||
user = "docspell-int"
|
||
password = "docspell-int"
|
||
}
|
||
|
||
# Requests are expected to supply some specific header when
|
||
# uploading files.
|
||
http-header {
|
||
enabled = false
|
||
header-name = "Docspell-Integration"
|
||
header-value = "some-secret"
|
||
}
|
||
}
|
||
|
||
# This is a special endpoint that allows some basic administration.
|
||
#
|
||
# It is intended to be used by admins only, that is users who
|
||
# installed the app and have access to the system. Normal users
|
||
# should not have access and therefore a secret must be provided in
|
||
# order to access it.
|
||
#
|
||
# This is used for some endpoints, for example:
|
||
# - re-create complete fulltext index:
|
||
# curl -XPOST -H'Docspell-Admin-Secret: xyz' http://docspell-restserver:7880/api/v1/admin/fts/reIndexAll
|
||
admin-endpoint {
|
||
# The secret. If empty, the endpoint is disabled.
|
||
secret = ""
|
||
}
|
||
|
||
# Configuration of the full-text search engine. (the same must be used for joex)
|
||
full-text-search {
|
||
# The full-text search feature can be disabled. It requires an
|
||
# additional index server which needs additional memory and disk
|
||
# space. It can be enabled later any time.
|
||
#
|
||
# Currently the SOLR search platform and PostgreSQL is supported.
|
||
enabled = false
|
||
|
||
# Which backend to use, either solr or postgresql
|
||
backend = "solr"
|
||
|
||
# Configuration for the SOLR backend.
|
||
solr = {
|
||
# The URL to solr
|
||
url = "http://localhost:8983/solr/docspell"
|
||
# Used to tell solr when to commit the data
|
||
commit-within = 1000
|
||
# If true, logs request and response bodies
|
||
log-verbose = false
|
||
# The defType parameter to lucene that defines the parser to
|
||
# use. You might want to try "edismax" or look here:
|
||
# https://solr.apache.org/guide/8_4/query-syntax-and-parsing.html#query-syntax-and-parsing
|
||
def-type = "lucene"
|
||
# The default combiner for tokens. One of {AND, OR}.
|
||
q-op = "OR"
|
||
}
|
||
|
||
# Configuration for PostgreSQL backend
|
||
postgresql = {
|
||
# Whether to use the default database, only works if it is
|
||
# postgresql
|
||
use-default-connection = false
|
||
|
||
# The database connection.
|
||
jdbc {
|
||
url = "jdbc:postgresql://server:5432/db"
|
||
user = "pguser"
|
||
password = ""
|
||
}
|
||
|
||
# A mapping from a language to a postgres text search config. By
|
||
# default a language is mapped to a predefined config.
|
||
# PostgreSQL has predefined configs for some languages. This
|
||
# setting allows to create a custom text search config and
|
||
# define it here for some or all languages.
|
||
#
|
||
# Example:
|
||
# { german = "my-german" }
|
||
#
|
||
# See https://www.postgresql.org/docs/14/textsearch-tables.html ff.
|
||
pg-config = {
|
||
}
|
||
|
||
# Define which query parser to use.
|
||
#
|
||
# https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-PARSING-QUERIES
|
||
pg-query-parser = "websearch_to_tsquery"
|
||
|
||
# Allows to define a normalization for the ranking.
|
||
#
|
||
# https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-RANKING
|
||
pg-rank-normalization = [ 4 ]
|
||
}
|
||
}
|
||
|
||
# Configuration for the backend.
|
||
backend {
|
||
|
||
# Enable or disable debugging for e-mail related functionality. This
|
||
# applies to both sending and receiving mails. For security reasons
|
||
# logging is not very extensive on authentication failures. Setting
|
||
# this to true, results in a lot of data printed to stdout.
|
||
mail-debug = false
|
||
|
||
# The database connection.
|
||
jdbc {
|
||
# The JDBC url to the database. By default a H2 file-based
|
||
# database is configured. You can provide a postgresql or
|
||
# mariadb connection here. When using H2 use the PostgreSQL
|
||
# compatibility mode and AUTO_SERVER feature.
|
||
#url = "jdbc:h2://"${java.io.tmpdir}"/docspell-demo.db;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE"
|
||
url = "jdbc:postgresql://db:5432/dbname"
|
||
|
||
# The database user.
|
||
user = "dbuser"
|
||
|
||
# The database password.
|
||
password = "dbpass"
|
||
}
|
||
|
||
# Additional settings related to schema migration.
|
||
database-schema = {
|
||
# Whether to run main database migrations.
|
||
run-main-migrations = true
|
||
|
||
# Whether to run the fixup migrations.
|
||
run-fixup-migrations = true
|
||
|
||
# Use with care. This repairs all migrations in the database by
|
||
# updating their checksums and removing failed migrations. Good
|
||
# for testing, not recommended for normal operation.
|
||
repair-schema = false
|
||
}
|
||
|
||
# Configuration for registering new users.
|
||
signup {
|
||
|
||
# The mode defines if new users can signup or not. It can have
|
||
# three values:
|
||
#
|
||
# - open: every new user can sign up
|
||
# - invite: new users can sign up only if they provide a correct
|
||
# invitation key. Invitation keys can be generated by the
|
||
# server.
|
||
# - closed: signing up is disabled.
|
||
mode = "open"
|
||
|
||
# If mode == 'invite', a password must be provided to generate
|
||
# invitation keys. It must not be empty.
|
||
new-invite-password = ""
|
||
|
||
# If mode == 'invite', this is the period an invitation token is
|
||
# considered valid.
|
||
invite-time = "3 days"
|
||
}
|
||
|
||
files {
|
||
# Defines the chunk size (in bytes) used to store the files.
|
||
# This will affect the memory footprint when uploading and
|
||
# downloading files. At most this amount is loaded into RAM for
|
||
# down- and uploading.
|
||
#
|
||
# It also defines the chunk size used for the blobs inside the
|
||
# database.
|
||
chunk-size = 2097152
|
||
|
||
# The file content types that are considered valid. Docspell
|
||
# will only pass these files to processing. The processing code
|
||
# itself has also checks for which files are supported and which
|
||
# not. This affects the uploading part and can be used to
|
||
# restrict file types that should be handed over to processing.
|
||
# By default all files are allowed.
|
||
valid-mime-types = [ ]
|
||
|
||
# The id of an enabled store from the `stores` array that should
|
||
# be used.
|
||
#
|
||
# IMPORTANT NOTE: All nodes must have the exact same file store
|
||
# configuration!
|
||
default-store = "database"
|
||
|
||
# A list of possible file stores. Each entry must have a unique
|
||
# id. The `type` is one of: default-database, filesystem, s3.
|
||
#
|
||
# The enabled property serves currently to define target stores
|
||
# for te "copy files" task. All stores with enabled=false are
|
||
# removed from the list. The `default-store` must be enabled.
|
||
stores = {
|
||
database =
|
||
{ enabled = true
|
||
type = "default-database"
|
||
}
|
||
|
||
filesystem =
|
||
{ enabled = false
|
||
type = "file-system"
|
||
directory = "/some/directory"
|
||
}
|
||
|
||
minio =
|
||
{ enabled = false
|
||
type = "s3"
|
||
endpoint = "http://localhost:9000"
|
||
access-key = "username"
|
||
secret-key = "password"
|
||
bucket = "docspell"
|
||
}
|
||
}
|
||
}
|
||
|
||
addons = {
|
||
enabled = false
|
||
|
||
# Whether installing addons requiring network should be allowed
|
||
# or not.
|
||
allow-impure = true
|
||
|
||
# Define patterns of urls that are allowed to install addons
|
||
# from.
|
||
#
|
||
# A pattern is compared against an URL by comparing three parts
|
||
# of an URL via globs: scheme, host and path.
|
||
#
|
||
# You can use '*' (0 or more) and '?' (one) as wildcards in each
|
||
# part. For example:
|
||
#
|
||
# https://*.mydomain.com/projects/*
|
||
# *s://gitea.mydomain/*
|
||
#
|
||
# A hostname is separated by dots and the path by a slash. A '*'
|
||
# in a pattern means to match one or more characters. The path
|
||
# pattern is always matching the given prefix. So /a/b/* matches
|
||
# /a/b/c and /a/b/c/d and all other sub-paths.
|
||
#
|
||
# Multiple patterns can be defined va a comma separated string
|
||
# or as an array. An empty string matches no URL, while the
|
||
# special pattern '*' all by itself means to match every URL.
|
||
allowed-urls = "*"
|
||
|
||
# Same as `allowed-urls` but a match here means do deny addons
|
||
# from this url.
|
||
denied-urls = ""
|
||
}
|
||
}
|
||
}
|
||
docspell.joex {
|
||
|
||
# This is the id of this node. If you run more than one server, you
|
||
# have to make sure to provide unique ids per node.
|
||
app-id = "joex1"
|
||
|
||
|
||
# This is the base URL this application is deployed to. This is used
|
||
# to register this joex instance such that docspell rest servers can
|
||
# reach them
|
||
base-url = "http://docspell-joex:7878"
|
||
|
||
# Where the REST server binds to.
|
||
#
|
||
# JOEX provides a very simple REST interface to inspect its state.
|
||
bind {
|
||
address = "0.0.0.0"
|
||
port = 7878
|
||
}
|
||
|
||
# Configures logging
|
||
logging {
|
||
# The format for the log messages. Can be one of:
|
||
# Json, Logfmt, Fancy or Plain
|
||
format = "Fancy"
|
||
|
||
# The minimum level to log. From lowest to highest:
|
||
# Trace, Debug, Info, Warn, Error
|
||
minimum-level = "Warn"
|
||
|
||
# Override the log level of specific loggers
|
||
levels = {
|
||
"docspell" = "Info"
|
||
"org.flywaydb" = "Info"
|
||
"binny" = "Info"
|
||
"org.http4s" = "Info"
|
||
}
|
||
}
|
||
|
||
# The database connection.
|
||
#
|
||
# It must be the same connection as the rest server is using.
|
||
jdbc {
|
||
|
||
# The JDBC url to the database. By default a H2 file-based
|
||
# database is configured. You can provide a postgresql or mariadb
|
||
# connection here. When using H2 use the PostgreSQL compatibility
|
||
# mode and AUTO_SERVER feature.
|
||
#url = "jdbc:h2://"${java.io.tmpdir}"/docspell-demo.db;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE"
|
||
url = "jdbc:postgresql://db:5432/dbname"
|
||
# The database user.
|
||
user = "dbuser"
|
||
# The database password.
|
||
password = "dbpass"
|
||
}
|
||
|
||
# Additional settings related to schema migration.
|
||
database-schema = {
|
||
# Whether to run main database migrations.
|
||
run-main-migrations = true
|
||
|
||
# Whether to run the fixup migrations.
|
||
run-fixup-migrations = true
|
||
|
||
# Use with care. This repairs all migrations in the database by
|
||
# updating their checksums and removing failed migrations. Good
|
||
# for testing, not recommended for normal operation.
|
||
repair-schema = false
|
||
}
|
||
|
||
# Enable or disable debugging for e-mail related functionality. This
|
||
# applies to both sending and receiving mails. For security reasons
|
||
# logging is not very extensive on authentication failures. Setting
|
||
# this to true, results in a lot of data printed to stdout.
|
||
mail-debug = false
|
||
|
||
send-mail {
|
||
# This is used as the List-Id e-mail header when mails are sent
|
||
# from docspell to its users (example: for notification mails). It
|
||
# is not used when sending to external recipients. If it is empty,
|
||
# no such header is added. Using this header is often useful when
|
||
# filtering mails.
|
||
#
|
||
# It should be a string in angle brackets. See
|
||
# https://tools.ietf.org/html/rfc2919 for a formal specification
|
||
# of this header.
|
||
list-id = ""
|
||
}
|
||
|
||
# Configuration for the job scheduler.
|
||
scheduler {
|
||
|
||
# Each scheduler needs a unique name. This defaults to the node
|
||
# name, which must be unique, too.
|
||
name = ${docspell.joex.app-id}
|
||
|
||
# Number of processing allowed in parallel.
|
||
pool-size = 1
|
||
|
||
# A counting scheme determines the ratio of how high- and low-prio
|
||
# jobs are run. For example: 4,1 means run 4 high prio jobs, then
|
||
# 1 low prio and then start over.
|
||
counting-scheme = "4,1"
|
||
|
||
# How often a failed job should be retried until it enters failed
|
||
# state. If a job fails, it becomes "stuck" and will be retried
|
||
# after a delay.
|
||
retries = 2
|
||
|
||
# The delay until the next try is performed for a failed job. This
|
||
# delay is increased exponentially with the number of retries.
|
||
retry-delay = "1 minute"
|
||
|
||
# The queue size of log statements from a job.
|
||
log-buffer-size = 500
|
||
|
||
# If no job is left in the queue, the scheduler will wait until a
|
||
# notify is requested (using the REST interface). To also retry
|
||
# stuck jobs, it will notify itself periodically.
|
||
wakeup-period = "30 minutes"
|
||
}
|
||
|
||
periodic-scheduler {
|
||
|
||
# Each scheduler needs a unique name. This defaults to the node
|
||
# name, which must be unique, too.
|
||
name = ${docspell.joex.app-id}
|
||
|
||
# A fallback to start looking for due periodic tasks regularily.
|
||
# Usually joex instances should be notified via REST calls if
|
||
# external processes change tasks. But these requests may get
|
||
# lost.
|
||
wakeup-period = "10 minutes"
|
||
}
|
||
|
||
# Configuration for the user-tasks.
|
||
user-tasks {
|
||
# Allows to import e-mails by scanning a mailbox.
|
||
scan-mailbox {
|
||
# A limit of how many folders to scan through. If a user
|
||
# configures more than this, only upto this limit folders are
|
||
# scanned and a warning is logged.
|
||
max-folders = 50
|
||
|
||
# How many mails (headers only) to retrieve in one chunk.
|
||
#
|
||
# If this is greater than `max-mails' it is set automatically to
|
||
# the value of `max-mails'.
|
||
mail-chunk-size = 50
|
||
|
||
# A limit on how many mails to process in one job run. This is
|
||
# meant to avoid too heavy resource allocation to one
|
||
# user/collective.
|
||
#
|
||
# If more than this number of mails is encountered, a warning is
|
||
# logged.
|
||
max-mails = 500
|
||
}
|
||
}
|
||
|
||
|
||
# Docspell uses periodic house keeping tasks, like cleaning expired
|
||
# invites, that can be configured here.
|
||
house-keeping {
|
||
|
||
# When the house keeping tasks execute. Default is to run every
|
||
# week.
|
||
schedule = "Sun *-*-* 00:00:00 UTC"
|
||
|
||
# This task removes invitation keys that have been created but not
|
||
# used. The timespan here must be greater than the `invite-time'
|
||
# setting in the rest server config file.
|
||
cleanup-invites = {
|
||
|
||
# Whether this task is enabled.
|
||
enabled = true
|
||
|
||
# The minimum age of invites to be deleted.
|
||
older-than = "30 days"
|
||
}
|
||
|
||
# This task removes expired remember-me tokens. The timespan
|
||
# should be greater than the `valid` time in the restserver
|
||
# config.
|
||
cleanup-remember-me = {
|
||
# Whether the job is enabled.
|
||
enabled = true
|
||
|
||
# The minimum age of tokens to be deleted.
|
||
older-than = "30 days"
|
||
}
|
||
|
||
# Jobs store their log output in the database. Normally this data
|
||
# is only interesting for some period of time. The processing logs
|
||
# of old files can be removed eventually.
|
||
cleanup-jobs = {
|
||
|
||
# Whether this task is enabled.
|
||
enabled = true
|
||
|
||
# The minimum age of jobs to delete. It is matched against the
|
||
# `finished' timestamp.
|
||
older-than = "30 days"
|
||
|
||
# This defines how many jobs are deleted in one transaction.
|
||
# Since the data to delete may get large, it can be configured
|
||
# whether more or less memory should be used.
|
||
delete-batch = "100"
|
||
}
|
||
|
||
# Zip files created for downloading multiple files are cached and
|
||
# can be cleared periodically.
|
||
cleanup-downloads = {
|
||
|
||
# Whether to enable clearing old download archives.
|
||
enabled = true
|
||
|
||
# The minimum age of a download file to be deleted.
|
||
older-than = "14 days"
|
||
}
|
||
|
||
# Removes node entries that are not reachable anymore.
|
||
check-nodes {
|
||
# Whether this task is enabled
|
||
enabled = true
|
||
# How often the node must be unreachable, before it is removed.
|
||
min-not-found = 2
|
||
}
|
||
|
||
# Checks all files against their checksum
|
||
integrity-check {
|
||
enabled = true
|
||
}
|
||
}
|
||
|
||
# A periodic task to check for new releases of docspell. It can
|
||
# inform about a new release via e-mail. You need to specify an
|
||
# account that has SMTP settings to use for sending.
|
||
update-check {
|
||
# Whether to enable this task
|
||
enabled = false
|
||
|
||
# Sends the mail without checking the latest release. Can be used
|
||
# if you want to see if mail sending works, but don't want to wait
|
||
# until a new release is published.
|
||
test-run = false
|
||
|
||
# When the update check should execute. Default is to run every
|
||
# week. You can specify a time zone identifier, like
|
||
# 'Europe/Berlin' at the end.
|
||
schedule = "Sun *-*-* 00:00:00 UTC"
|
||
|
||
# An account id in form of `collective/user` (or just `user` if
|
||
# collective and user name are the same). This user account must
|
||
# have at least one valid SMTP settings which are used to send the
|
||
# mail.
|
||
sender-account = ""
|
||
|
||
# The SMTP connection id that should be used for sending the mail.
|
||
smtp-id = ""
|
||
|
||
# A list of recipient e-mail addresses.
|
||
# Example: `[ "john.doe@gmail.com" ]`
|
||
recipients = []
|
||
|
||
# The subject of the mail. It supports the same variables as the
|
||
# body.
|
||
subject = "Docspell {{ latestVersion }} is available"
|
||
|
||
# The body of the mail. Subject and body can contain these
|
||
# variables which are replaced:
|
||
#
|
||
# - `latestVersion` the latest available version of Docspell
|
||
# - `currentVersion` the currently running (old) version of Docspell
|
||
# - `releasedAt` a date when the release was published
|
||
#
|
||
# The body is processed as markdown after the variables have been
|
||
# replaced.
|
||
body = """
|
||
Hello,
|
||
|
||
You are currently running Docspell {{ currentVersion }}. Version *{{ latestVersion }}*
|
||
is now available, which was released on {{ releasedAt }}. Check the release page at:
|
||
|
||
<https://github.com/eikek/docspell/releases/latest>
|
||
|
||
Have a nice day!
|
||
|
||
Docpell Update Check
|
||
"""
|
||
}
|
||
|
||
# Configuration of text extraction
|
||
extraction {
|
||
# For PDF files it is first tried to read the text parts of the
|
||
# PDF. But PDFs can be complex documents and they may contain text
|
||
# and images. If the returned text is shorter than the value
|
||
# below, OCR is run afterwards. Then both extracted texts are
|
||
# compared and the longer will be used.
|
||
#
|
||
# If you set this to 0 (or a negative value), then the text parts
|
||
# of a PDF are ignored and OCR is always run and its result used.
|
||
pdf {
|
||
min-text-len = 500
|
||
}
|
||
|
||
preview {
|
||
# When rendering a pdf page, use this dpi. This results in
|
||
# scaling the image. A standard A4 page rendered at 96dpi
|
||
# results in roughly 790x1100px image. Using 32 results in
|
||
# roughly 200x300px image.
|
||
#
|
||
# Note, when this is changed, you might want to re-generate
|
||
# preview images. Check the api for this, there is an endpoint
|
||
# to regenerate all for a collective.
|
||
dpi = 32
|
||
}
|
||
|
||
# Extracting text using OCR works for image and pdf files. It will
|
||
# first run ghostscript to create a gray image from a pdf. Then
|
||
# unpaper is run to optimize the image for the upcoming ocr, which
|
||
# will be done by tesseract. All these programs must be available
|
||
# in your PATH or the absolute path can be specified below.
|
||
ocr {
|
||
|
||
# Images greater than this size are skipped. Note that every
|
||
# image is loaded completely into memory for doing OCR. This is
|
||
# the pixel count, `height * width` of the image.
|
||
max-image-size = 28000000
|
||
|
||
# Defines what pages to process. If a PDF with 600 pages is
|
||
# submitted, it is probably not necessary to scan through all of
|
||
# them. This would take a long time and occupy resources for no
|
||
# value. The first few pages should suffice. The default is first
|
||
# 10 pages.
|
||
#
|
||
# If you want all pages being processed, set this number to -1.
|
||
#
|
||
# Note: if you change the ghostscript command below, be aware that
|
||
# this setting (if not -1) will add another parameter to the
|
||
# beginning of the command.
|
||
page-range {
|
||
begin = 10
|
||
}
|
||
|
||
# The ghostscript command.
|
||
ghostscript {
|
||
command {
|
||
program = "gs"
|
||
args = [ "-dNOPAUSE"
|
||
, "-dBATCH"
|
||
, "-dSAFER"
|
||
, "-sDEVICE=tiffscaled8"
|
||
, "-sOutputFile={{outfile}}"
|
||
, "{{infile}}"
|
||
]
|
||
timeout = "5 minutes"
|
||
}
|
||
working-dir = ${java.io.tmpdir}"/docspell-extraction"
|
||
}
|
||
|
||
# The unpaper command.
|
||
unpaper {
|
||
command {
|
||
program = "unpaper"
|
||
args = [ "{{infile}}", "{{outfile}}" ]
|
||
timeout = "5 minutes"
|
||
}
|
||
}
|
||
|
||
# The tesseract command.
|
||
tesseract {
|
||
command {
|
||
program = "tesseract"
|
||
args = ["{{file}}"
|
||
, "stdout"
|
||
, "-l"
|
||
, "{{lang}}"
|
||
]
|
||
timeout = "5 minutes"
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
# Settings for text analysis
|
||
text-analysis {
|
||
# Maximum length of text to be analysed.
|
||
#
|
||
# All text to analyse must fit into RAM. A large document may take
|
||
# too much heap. Also, most important information is at the
|
||
# beginning of a document, so in most cases the first two pages
|
||
# should suffice. Default is 5000, which are about 2 pages (just a
|
||
# rough guess, of course). For my data, more than 80% of the
|
||
# documents are less than 5000 characters.
|
||
#
|
||
# This values applies to nlp and the classifier. If this value is
|
||
# <= 0, the limit is disabled.
|
||
max-length = 0
|
||
|
||
# A working directory for the analyser to store temporary/working
|
||
# files.
|
||
working-dir = ${java.io.tmpdir}"/docspell-analysis"
|
||
|
||
nlp {
|
||
# The mode for configuring NLP models:
|
||
#
|
||
# 1. full – builds the complete pipeline
|
||
# 2. basic - builds only the ner annotator
|
||
# 3. regexonly - matches each entry in your address book via regexps
|
||
# 4. disabled - doesn't use any stanford-nlp feature
|
||
#
|
||
# The full and basic variants rely on pre-build language models
|
||
# that are available for only a few languages. Memory usage
|
||
# varies among the languages. So joex should run with -Xmx1400M
|
||
# at least when using mode=full.
|
||
#
|
||
# The basic variant does a quite good job for German and
|
||
# English. It might be worse for French, always depending on the
|
||
# type of text that is analysed. Joex should run with about 500M
|
||
# heap, here again lanugage German uses the most.
|
||
#
|
||
# The regexonly variant doesn't depend on a language. It roughly
|
||
# works by converting all entries in your addressbook into
|
||
# regexps and matches each one against the text. This can get
|
||
# memory intensive, too, when the addressbook grows large. This
|
||
# is included in the full and basic by default, but can be used
|
||
# independently by setting mode=regexner.
|
||
#
|
||
# When mode=disabled, then the whole nlp pipeline is disabled,
|
||
# and you won't get any suggestions. Only what the classifier
|
||
# returns (if enabled).
|
||
mode = full
|
||
|
||
# The StanfordCoreNLP library caches language models which
|
||
# requires quite some amount of memory. Setting this interval to a
|
||
# positive duration, the cache is cleared after this amount of
|
||
# idle time. Set it to 0 to disable it if you have enough memory,
|
||
# processing will be faster.
|
||
#
|
||
# This has only any effect, if mode != disabled.
|
||
clear-interval = "15 minutes"
|
||
|
||
# Restricts proposals for due dates. Only dates earlier than this
|
||
# number of years in the future are considered.
|
||
max-due-date-years = 10
|
||
|
||
regex-ner {
|
||
# Whether to enable custom NER annotation. This uses the
|
||
# address book of a collective as input for NER tagging (to
|
||
# automatically find correspondent and concerned entities). If
|
||
# the address book is large, this can be quite memory
|
||
# intensive and also makes text analysis much slower. But it
|
||
# improves accuracy and can be used independent of the
|
||
# lanugage. If this is set to 0, it is effectively disabled
|
||
# and NER tagging uses only statistical models (that also work
|
||
# quite well, but are restricted to the languages mentioned
|
||
# above).
|
||
#
|
||
# Note, this is only relevant if nlp-config.mode is not
|
||
# "disabled".
|
||
max-entries = 1000
|
||
|
||
# The NER annotation uses a file of patterns that is derived
|
||
# from a collective's address book. This is is the time how
|
||
# long this data will be kept until a check for a state change
|
||
# is done.
|
||
file-cache-time = "1 minute"
|
||
}
|
||
}
|
||
|
||
# Settings for doing document classification.
|
||
#
|
||
# This works by learning from existing documents. This requires a
|
||
# satstical model that is computed from all existing documents.
|
||
# This process is run periodically as configured by the
|
||
# collective. It may require more memory, depending on the amount
|
||
# of data.
|
||
#
|
||
# It utilises this NLP library: https://nlp.stanford.edu/.
|
||
classification {
|
||
# Whether to enable classification globally. Each collective can
|
||
# enable/disable auto-tagging. The classifier is also used for
|
||
# finding correspondents and concerned entities, if enabled
|
||
# here.
|
||
enabled = true
|
||
|
||
# If concerned with memory consumption, this restricts the
|
||
# number of items to consider. More are better for training. A
|
||
# negative value or zero means to train on all items.
|
||
#
|
||
# This limit and `text-analysis.max-length` define how much
|
||
# memory is required. On weaker hardware, it is advised to play
|
||
# with these values.
|
||
item-count = 600
|
||
|
||
# These settings are used to configure the classifier. If
|
||
# multiple are given, they are all tried and the "best" is
|
||
# chosen at the end. See
|
||
# https://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/classify/ColumnDataClassifier.html
|
||
# for more info about these settings. The settings here yielded
|
||
# good results with *my* dataset.
|
||
#
|
||
# Enclose regexps in triple quotes.
|
||
classifiers = [
|
||
{ "useSplitWords" = "true"
|
||
"splitWordsTokenizerRegexp" = """[\p{L}][\p{L}0-9]*|(?:\$ ?)?[0-9]+(?:\.[0-9]{2})?%?|\s+|."""
|
||
"splitWordsIgnoreRegexp" = """\s+"""
|
||
"useSplitPrefixSuffixNGrams" = "true"
|
||
"maxNGramLeng" = "4"
|
||
"minNGramLeng" = "1"
|
||
"splitWordShape" = "chris4"
|
||
"intern" = "true" # makes it slower but saves memory
|
||
}
|
||
]
|
||
}
|
||
}
|
||
|
||
# Configuration for converting files into PDFs.
|
||
#
|
||
# Most of it is delegated to external tools, which can be configured
|
||
# below. They must be in the PATH environment or specify the full
|
||
# path below via the `program` key.
|
||
convert {
|
||
|
||
# The chunk size used when storing files. This should be the same
|
||
# as used with the rest server.
|
||
chunk-size = 2097152
|
||
#${docspell.joex.files.chunk-size}
|
||
|
||
# A string used to change the filename of the converted pdf file.
|
||
# If empty, the original file name is used for the pdf file ( the
|
||
# extension is always replaced with `pdf`).
|
||
converted-filename-part = "converted"
|
||
|
||
# When reading images, this is the maximum size. Images that are
|
||
# larger are not processed.
|
||
max-image-size = ${docspell.joex.extraction.ocr.max-image-size}
|
||
|
||
# Settings when processing markdown files (and other text files)
|
||
# to HTML.
|
||
#
|
||
# In order to support text formats, text files are first converted
|
||
# to HTML using a markdown processor. The resulting HTML is then
|
||
# converted to a PDF file.
|
||
markdown {
|
||
|
||
# The CSS that is used to style the resulting HTML.
|
||
internal-css = """
|
||
body { padding: 2em 5em; }
|
||
"""
|
||
}
|
||
|
||
# Which HTML->PDF converter command to use. One of: wkhtmlpdf,
|
||
# weasyprint.
|
||
html-converter = "wkhtmlpdf"
|
||
|
||
# To convert HTML files into PDF files, the external tool
|
||
# wkhtmltopdf is used.
|
||
wkhtmlpdf {
|
||
command = {
|
||
program = "wkhtmltopdf"
|
||
args = [
|
||
"-s",
|
||
"A4",
|
||
"--encoding",
|
||
"{{encoding}}",
|
||
"--load-error-handling", "ignore",
|
||
"--load-media-error-handling", "ignore",
|
||
"-",
|
||
"{{outfile}}"
|
||
]
|
||
timeout = "10 minutes"
|
||
}
|
||
working-dir = ${java.io.tmpdir}"/docspell-wkhtmltopdf"
|
||
}
|
||
|
||
# An alternative to wkhtmltopdf is weasyprint.
|
||
weasyprint {
|
||
command = {
|
||
program = "weasyprint"
|
||
args = [
|
||
"--optimize-size", "all",
|
||
"--encoding", "{{encoding}}",
|
||
"-",
|
||
"{{outfile}}"
|
||
]
|
||
timeout = "10 minutes"
|
||
}
|
||
working-dir = ${java.io.tmpdir}"/docspell-weasyprint"
|
||
}
|
||
|
||
# To convert image files to PDF files, tesseract is used. This
|
||
# also extracts the text in one go.
|
||
tesseract = {
|
||
command = {
|
||
program = "tesseract"
|
||
args = [
|
||
"{{infile}}",
|
||
"out",
|
||
"-l",
|
||
"{{lang}}",
|
||
"pdf",
|
||
"txt"
|
||
]
|
||
timeout = "10 minutes"
|
||
}
|
||
working-dir = ${java.io.tmpdir}"/docspell-convert"
|
||
}
|
||
|
||
# To convert "office" files to PDF files, the external tool
|
||
# unoconv is used. Unoconv uses libreoffice/openoffice for
|
||
# converting. So it supports all formats that are possible to read
|
||
# with libreoffice/openoffic.
|
||
#
|
||
# Note: to greatly improve performance, it is recommended to start
|
||
# a libreoffice listener by running `unoconv -l` in a separate
|
||
# process.
|
||
unoconv = {
|
||
command = {
|
||
program = "unoconv"
|
||
args = [
|
||
"-f",
|
||
"pdf",
|
||
"-o",
|
||
"{{outfile}}",
|
||
"{{infile}}"
|
||
]
|
||
timeout = "10 minutes"
|
||
}
|
||
working-dir = ${java.io.tmpdir}"/docspell-convert"
|
||
}
|
||
|
||
# The tool ocrmypdf can be used to convert pdf files to pdf files
|
||
# in order to add extracted text as a separate layer. This makes
|
||
# image-only pdfs searchable and you can select and copy/paste the
|
||
# text. It also converts pdfs into pdf/a type pdfs, which are best
|
||
# suited for archiving. So it makes sense to use this even for
|
||
# text-only pdfs.
|
||
#
|
||
# It is recommended to install ocrympdf, but it also is optional.
|
||
# If it is enabled but fails, the error is not fatal and the
|
||
# processing will continue using the original pdf for extracting
|
||
# text. You can also disable it to remove the errors from the
|
||
# processing logs.
|
||
#
|
||
# The `--skip-text` option is necessary to not fail on "text" pdfs
|
||
# (where ocr is not necessary). In this case, the pdf will be
|
||
# converted to PDF/A.
|
||
ocrmypdf = {
|
||
enabled = true
|
||
command = {
|
||
program = "ocrmypdf"
|
||
args = [
|
||
"-l", "{{lang}}",
|
||
"--skip-text",
|
||
"--deskew",
|
||
"-j", "1",
|
||
"{{infile}}",
|
||
"{{outfile}}"
|
||
]
|
||
timeout = "10 minutes"
|
||
}
|
||
working-dir = ${java.io.tmpdir}"/docspell-convert"
|
||
}
|
||
|
||
# Allows to try to decrypt a PDF with encryption or protection. If
|
||
# enabled, a PDFs encryption or protection will be removed during
|
||
# conversion.
|
||
#
|
||
# For encrypted PDFs, this is necessary to be processed, because
|
||
# docspell needs to read it. It also requires to specify a
|
||
# password here. All passwords are tried when reading a PDF.
|
||
#
|
||
# This is enabled by default with an empty password list. This
|
||
# removes protection from PDFs, which is better for processing.
|
||
#
|
||
# Passwords can be given here and each collective can maintain
|
||
# their passwords as well. But if the `enabled` setting below is
|
||
# `false`, then no attempt at decrypting is done.
|
||
decrypt-pdf = {
|
||
enabled = true
|
||
passwords = []
|
||
}
|
||
}
|
||
|
||
# The same section is also present in the rest-server config. It is
|
||
# used when submitting files into the job queue for processing.
|
||
#
|
||
# Currently, these settings may affect memory usage of all nodes, so
|
||
# it should be the same on all nodes.
|
||
files {
|
||
# Defines the chunk size (in bytes) used to store the files.
|
||
# This will affect the memory footprint when uploading and
|
||
# downloading files. At most this amount is loaded into RAM for
|
||
# down- and uploading.
|
||
#
|
||
# It also defines the chunk size used for the blobs inside the
|
||
# database.
|
||
chunk-size = 524288
|
||
|
||
# The file content types that are considered valid. Docspell
|
||
# will only pass these files to processing. The processing code
|
||
# itself has also checks for which files are supported and which
|
||
# not. This affects the uploading part and can be used to
|
||
# restrict file types that should be handed over to processing.
|
||
# By default all files are allowed.
|
||
valid-mime-types = [ ]
|
||
|
||
# The id of an enabled store from the `stores` array that should
|
||
# be used.
|
||
#
|
||
# IMPORTANT NOTE: All nodes must have the exact same file store
|
||
# configuration!
|
||
default-store = "database"
|
||
|
||
# A list of possible file stores. Each entry must have a unique
|
||
# id. The `type` is one of: default-database, filesystem, s3.
|
||
#
|
||
# The enabled property serves currently to define target stores
|
||
# for te "copy files" task. All stores with enabled=false are
|
||
# removed from the list. The `default-store` must be enabled.
|
||
stores = {
|
||
database =
|
||
{ enabled = true
|
||
type = "default-database"
|
||
}
|
||
|
||
filesystem =
|
||
{ enabled = false
|
||
type = "file-system"
|
||
directory = "/some/directory"
|
||
}
|
||
|
||
minio =
|
||
{ enabled = false
|
||
type = "s3"
|
||
endpoint = "http://localhost:9000"
|
||
access-key = "username"
|
||
secret-key = "password"
|
||
bucket = "docspell"
|
||
}
|
||
}
|
||
}
|
||
|
||
# Configuration of the full-text search engine. (the same must be used for restserver)
|
||
full-text-search {
|
||
# The full-text search feature can be disabled. It requires an
|
||
# additional index server which needs additional memory and disk
|
||
# space. It can be enabled later any time.
|
||
#
|
||
# Currently the SOLR search platform and PostgreSQL is supported.
|
||
enabled = false
|
||
|
||
# Which backend to use, either solr or postgresql
|
||
backend = "solr"
|
||
|
||
# Configuration for the SOLR backend.
|
||
solr = {
|
||
# The URL to solr
|
||
url = "http://localhost:8983/solr/docspell"
|
||
# Used to tell solr when to commit the data
|
||
commit-within = 1000
|
||
# If true, logs request and response bodies
|
||
log-verbose = false
|
||
# The defType parameter to lucene that defines the parser to
|
||
# use. You might want to try "edismax" or look here:
|
||
# https://solr.apache.org/guide/8_4/query-syntax-and-parsing.html#query-syntax-and-parsing
|
||
def-type = "lucene"
|
||
# The default combiner for tokens. One of {AND, OR}.
|
||
q-op = "OR"
|
||
}
|
||
|
||
# Configuration for PostgreSQL backend
|
||
postgresql = {
|
||
# Whether to use the default database, only works if it is
|
||
# postgresql
|
||
use-default-connection = false
|
||
|
||
# The database connection.
|
||
jdbc {
|
||
url = "jdbc:postgresql://db:5432/dbname"
|
||
user = "dbuser"
|
||
password = "dbpass"
|
||
}
|
||
|
||
# A mapping from a language to a postgres text search config. By
|
||
# default a language is mapped to a predefined config.
|
||
# PostgreSQL has predefined configs for some languages. This
|
||
# setting allows to create a custom text search config and
|
||
# define it here for some or all languages.
|
||
#
|
||
# Example:
|
||
# { german = "my-german" }
|
||
#
|
||
# See https://www.postgresql.org/docs/14/textsearch-tables.html ff.
|
||
pg-config = {
|
||
}
|
||
|
||
# Define which query parser to use.
|
||
#
|
||
# https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-PARSING-QUERIES
|
||
pg-query-parser = "websearch_to_tsquery"
|
||
|
||
# Allows to define a normalization for the ranking.
|
||
#
|
||
# https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-RANKING
|
||
pg-rank-normalization = [ 4 ]
|
||
}
|
||
|
||
# Settings for running the index migration tasks
|
||
migration = {
|
||
# Chunk size to use when indexing data from the database. This
|
||
# many attachments are loaded into memory and pushed to the
|
||
# full-text index.
|
||
index-all-chunk = 10
|
||
}
|
||
}
|
||
|
||
addons {
|
||
# A directory to extract addons when running them. Everything in
|
||
# here will be cleared after each run.
|
||
working-dir = ${java.io.tmpdir}"/docspell-addons"
|
||
|
||
# A directory for addons to store data between runs. This is not
|
||
# cleared by Docspell and can get large depending on the addons
|
||
# executed.
|
||
#
|
||
# This directory is used as base. In it subdirectories are created
|
||
# per run configuration id.
|
||
cache-dir = ${java.io.tmpdir}"/docspell-addon-cache"
|
||
|
||
executor-config {
|
||
# Define a (comma or whitespace separated) list of runners that
|
||
# are responsible for executing an addon. This setting is
|
||
# compared to what is supported by addons. Possible values are:
|
||
#
|
||
# - nix-flake: use nix-flake runner if the addon supports it
|
||
# (this requires the nix package manager on the joex machine)
|
||
# - docker: use docker
|
||
# - trivial: use the trivial runner
|
||
#
|
||
# The first successful execution is used. This should list all
|
||
# runners the computer supports.
|
||
runner = "nix-flake, docker, trivial"
|
||
|
||
# systemd-nspawn can be used to run the program in a container.
|
||
# This is used by runners nix-flake and trivial.
|
||
nspawn = {
|
||
# If this is false, systemd-nspawn is not tried. When true, the
|
||
# addon is executed inside a lightweight container via
|
||
# systemd-nspawn.
|
||
enabled = false
|
||
|
||
# Path to sudo command. By default systemd-nspawn is executed
|
||
# via sudo - the user running joex must be allowed to do so NON
|
||
# INTERACTIVELY. If this is empty, then nspawn is tried to
|
||
# execute without sudo.
|
||
sudo-binary = "sudo"
|
||
|
||
# Path to the systemd-nspawn command.
|
||
nspawn-binary = "systemd-nspawn"
|
||
|
||
# Workaround, if multiple same named containers are run too fast
|
||
container-wait = "100 millis"
|
||
}
|
||
|
||
# When multiple addons are executed sequentially, stop after the
|
||
# first failing result. If this is false, then subsequent addons
|
||
# will be run for their side effects only.
|
||
fail-fast = true
|
||
|
||
# The timeout for running an addon.
|
||
run-timeout = "15 minutes"
|
||
|
||
# Configure the nix flake runner.
|
||
nix-runner {
|
||
# Path to the nix command.
|
||
nix-binary = "nix"
|
||
|
||
# The timeout for building the package (running nix build).
|
||
build-timeout = "15 minutes"
|
||
}
|
||
|
||
# Configure the docker runner
|
||
docker-runner {
|
||
# Path to the docker command.
|
||
docker-binary = "docker"
|
||
|
||
# The timeout for building the package (running docker build).
|
||
build-timeout = "15 minutes"
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|