catalog/incubator/docspell/3.0.2/ix_values.yaml

552 lines
16 KiB
YAML

image:
repository: tccr.io/truecharts/docspell-server
tag: 0.39.0@sha256:0cd6a12d95b8c944267777895d994191a9a1407bee0ebe5020fadb45e5d8b911
pullPolicy: IfNotPresent
joexImage:
repository: tccr.io/truecharts/docspell-joex
tag: 0.39.0@sha256:54948e73bee85841c689d911e75f8aeab90e03820b94e1838d37391077aed835
pullPolicy: IfNotPresent
dscImage:
repository: tccr.io/truecharts/docspell-dsc
tag: v0.9.0@sha256:829b2bc83fdfddc8127baef5b1149d96dcf0d5ca93f210fcf1344899c36b400e
pullPolicy: IfNotPresent
args:
- /opt/server.conf
podSecurityContext:
runAsUser: 0
runAsGroup: 0
securityContext:
runAsNonRoot: false
readOnlyRootFilesystem: false
dsc:
# -- You need to enable integration endpoint with HTTP Header in rest server
# -- If you enable allowed IPs you also need 127.0.0.1 as an entry there
enabled: false
# move | delete
imported_action: move
not_match_glob: "**/.*"
match_glob: ""
language: ""
tag: ""
rest_server:
# -- App name, shows on the top right corner
app_name: Docspell
base_url: ""
logging:
# -- The format for the log messages. Can be one of:
# -- Json | Logfmt | Fancy | Plain
format: Fancy
# -- The minimum level to log. From lowest to highest:
# -- Trace | Debug | Info | Warn | Error
minimum_level: Warn
levels:
# -- Override the log level of specific loggers
docspell: Info
flywaydb: Info
binny: Info
http4s: Info
server_opts:
# -- Enable HTTP2
enable_http2: false
# -- Maximum allowed connections
max_connections: 1024
# -- Timeout for waiting for the first output of the response
response_timeout: 45s
# -- This is a hard limit to restrict the size of a batch that is returned when searching for items.
max_item_page_size: 200
# -- The number of characters to return for each item notes when searching.
max_note_length: 180
# -- This defines whether the classification form in the collective settings is displayed or not.
show_classification_settings: true
auth:
# -- How long a authentication token is valid
session_valid: 5 minutes
remember_me:
enabled: true
# -- How long the remember me cookie/token is valid.
valid: 30 days
download_all:
# -- How many files to allow in a zip.
max_files: 500
# -- The maximum (uncompressed) size of the zip file contents.
max_size: 1400M
openid:
- enabled: false
display: Keycloak
provider:
provider_id: keycloak
client_id: docspell
client_secret: example-secret-439e-bf06-911e4cdd56a6
scope: profile
authorize_url: http://localhost:8080/auth/realms/home/protocol/openid-connect/auth
token_url: http://localhost:8080/auth/realms/home/protocol/openid-connect/token
user_url: ""
logout_url: http://localhost:8080/auth/realms/home/protocol/openid-connect/logout
sign_key: b64:anVzdC1hLXRlc3Q=
sig_algo: RS512
collective_key: lookup:docspell_collective
user_key: preferred_username
- enabled: false
display: Github
provider:
provider_id: github
client_id: <your github client id>
client_secret: <your github client secret>
scope: ""
authorize_url: https://github.com/login/oauth/authorize
token_url: https://github.com/login/oauth/access_token
user_url: https://api.github.com/user
logout_url: ""
sign_key: ""
sig_algo: RS256
collective_key: fixed:demo
user_key: login
oidc_auto_redirect: true
integration_endpoint:
enabled: false
# -- The priority to use when submitting files through this endpoint.
# low | high
priority: low
# -- The name used for the item "source" property when uploaded through this endpoint.
source_name: integration
allowed_ips:
enabled: false
ips:
- "127.0.0.1"
http_basic_auth:
enabled: false
realm: Docspell Integration
user: docspell-int
password: docspell-int
http_header:
enabled: false
header_name: Docspell-Integration
header_value: some-secret
admin_endpoint:
# -- Disables endpoint if empty
secret: ""
full_text_search:
solr:
# -- Used to tell solr when to commit the data
commit_within: 1000
# -- If true, logs request and response bodies
log_verbose: false
def_type: lucene
# -- The default combiner for tokens. One of AND | OR
q_op: OR
backend:
# -- Enable or disable debugging for e-mail related functionality
mail_debug: false
database_schema:
# -- Whether to run main database migrations.
run_main_migrations: true
# -- Whether to run the fixup migrations.
run_fixup_migrations: true
# -- Use with care. This repairs all migrations in the database by updating their checksums and removing failed migrations.
repair_schema: false
signup:
# -- The mode defines if new users can signup or not.
# open | invite | closed
mode: open
# -- This is the period an invitation token is considered valid.
invite_time: 3 days
files:
# -- Defines the chunk size (in bytes) used to store the files.
chunk_size: 524288
# -- The file content types that are considered valid.
valid_mime_types: []
# database | minio | filesystem
default_store: database
stores:
database:
enabled: true
minio:
enabled: false
endpoint: http://localhost:9000
access_key: access_key
secret_key: secret_key
bucket: docspell
# -- Highly NOT recommended
filesystem:
enabled: false
directory: /documents
addons:
# TODO: Check how exactly addons work. There are mentions of docker daemon
enabled: false
# -- Whether installing addons requiring network should be allowed or not.
allow_impure: true
# -- Define patterns of urls that are allowed to install addons from.
allowed_urls:
- "*"
# -- Define patterns of urls that are denied to install addons from.
denied_urls: []
joex:
logging:
# -- The format for the log messages. Can be one of:
# -- Json | Logfmt | Fancy | Plain
format: Fancy
# -- The minimum level to log. From lowest to highest:
# -- Trace | Debug | Info | Warn | Error
minimum_level: Warn
levels:
# -- Override the log level of specific loggers
docspell: Info
flywaydb: Info
binny: Info
http4s: Info
database_schema:
# -- Whether to run main database migrations.
run_main_migrations: false
# -- Whether to run the fixup migrations.
run_fixup_migrations: true
# -- Use with care. This repairs all migrations in the database by updating their checksums and removing failed migrations.
repair_schema: false
# -- Enable or disable debugging for e-mail related functionality. This applies to both sending and receiving mails.
mail_debug: false
send_mail:
# -- This is used as the List-Id e-mail header when mails are sent from docspell to its users
list_id: ""
scheduler:
# -- Number of processing allowed in parallel.
pool_size: 1
# -- A counting scheme determines the ratio of how high and low priority jobs are run.
counting_scheme: 4,1
# -- How many times a failed job should be retried until it enters failed state.
retries: 2
# -- The delay until the next try is performed for a failed job.
retry_delay: 1 minute
# -- The queue size of log statements from a job.
log_buffer_size: 500
# -- If no job is left in the queue, the scheduler will wait until a notify is requested.
wakeup_period: 30 minutes
periodic_scheduler:
# -- A fallback to start looking for due periodic tasks regularly.
wakeup_period: 10 minutes
user_tasks:
scan_mailbox:
# -- A limit of how many folders to scan through.
max_folders: 50
# -- How many mails (headers only) to retrieve in one chunk.
mail_chunk_size: 50
# -- A limit on how many mails to process in one job run.
max_mails: 500
house_keeping:
# -- When the house keeping tasks execute.
schedule: Sun *-*-* 00:00:00 UTC
# -- This task removes invitation keys that have been created but not used.
cleanup_invites:
# -- Whether this task is enabled.
enabled: true
# -- The minimum age of invites to be deleted.
older_than: 30 days
# -- This task removes expired remember-me tokens.
cleanup_remember_me:
# -- Whether this task is enabled.
enabled: true
# -- The minimum age of tokens to be deleted.
older_than: 30 days
# -- Jobs store their log output in the database.
cleanup_jobs:
# -- Whether this task is enabled.
enabled: true
# -- The minimum age of jobs to delete.
older_than: 30 days
# -- This defines how many jobs are deleted in one transaction.
delete_batch: 100
# -- Zip files created for downloading multiple files are cached and can be cleared periodically.
cleanup_downloads:
# -- Whether this task is enabled.
enabled: true
# -- The minimum age of a download file to be deleted.
older_than: 14 days
# -- Removes node entries that are not reachable anymore.
check_nodes:
# -- Whether this task is enabled.
enabled: true
# -- How often the node must be unreachable, before it is removed.
min_not_found: 2
# -- Checks all files against their checksum
integrity_check:
enabled: true
# -- A periodic task to check for new releases of docspell.
update_check:
# -- Whether to enable this task
enabled: false
# -- Sends the mail without checking the latest release.
test_run: false
# -- When the update check should execute.
schedule: "Sun *-*-* 00:00:00 UTC"
# -- An account id in form of `collective/user`
sender_account: ""
# -- The SMTP connection id that should be used for sending the mail.
smtp_id: ""
# -- A list of recipient e-mail addresses.
recipients: []
# -- The subject of the mail.
subject: Docspell {{ latestVersion }} is available
# -- The body of the mail.
body: |
Hello,
You are currently running Docspell {{ currentVersion }}. Version *{{ latestVersion }}*
is now available, which was released on {{ releasedAt }}. Check the release page at
<https://github.com/eikek/docspell/releases/latest>
Have a nice day!
Docspell Update Check
# -- Configuration of text extraction
extraction:
pdf:
min_text_length: 500
preview:
# -- When rendering a pdf page, use this dpi.
dpi: 32
ocr:
# -- Images greater than this size are skipped.
max_image_size: 14000000
page_range:
# -- Defines what pages to process.
begin: 10
# -- The ghostscript command.
ghostscript:
command:
program: gs
args:
- -dNOPAUSE
- -dBATCH
- -dSAFER
- -sDEVICE=tiffscaled8
- "-sOutputFile={{outfile}}"
- "{{infile}}"
timeout: 5 minutes
working_dir: /tmp/docspell-extraction
# -- The unpaper command.
unpaper:
command:
program: unpaper
args:
- "{{infile}}"
- "{{outfile}}"
timeout: 5 minutes
# -- The tesseract command.
tesseract:
command:
program: tesseract
args:
- "{{file}}"
- stdout
- -l
- "{{lang}}"
timeout: 5 minutes
text_analysis:
# -- Maximum length of text to be analyzed.
max_length: 0
working_dir: /tmp/docspell-analysis
nlp:
# -- The mode for configuring NLP models
# -- full | basic | regexonly | disabled
mode: full
clear_interval: 15 minutes
# -- Restricts proposals for due dates.
max_due_date_years: 10
regex_ner:
# -- Whether to enable custom NER annotation.
max_entries: 1000
file_cache_time: 1 minute
classification:
# -- Whether to enable classification globally.
enabled: true
# -- This limit and `text-analysis.max-length` define how much memory is required.
item_count: 600
# -- Enclose regexps in triple quotes.
classifiers:
useSplitWords: true
splitWordsTokenizerRegexp: '"""[\p{L}][\p{L}0-9]*|(?:\$ ?)?[0-9]+(?:\.[0-9]{2})?%?|\s+|."""'
splitWordsIgnoreRegexp: '"""\s+"""'
useSplitPrefixSuffixNGrams: true
maxNGramLeng: 4
minNGramLeng: 1
splitWordShape: chris4
intern: true
# -- Configuration for converting files into PDFs.
convert:
# -- The chunk size used when storing files.
chunk_size: 524288
# -- A string used to change the filename of the converted pdf file.
converted_filename_part: converted
# -- When reading images, this is the maximum size.
max_image_size: 14000000
markdown:
# -- The CSS that is used to style the resulting HTML.
internal_css: "body { padding: 2em 5em; }"
wkhtmlpdf:
command:
program: wkhtmltopdf
args:
- -s
- A4
- --encoding
- "{{enconding}}"
- --load-error-handling
- ignore
- --load-media-error-handling
- ignore
- "-"
- "{{outfile}}"
timeout: 2 minutes
working_dir: /tmp/docspell-convert
tesseract:
command:
program: tesseract
args:
- "{{infile}}"
- out
- -l
- "{{lang}}"
- pdf
- txt
timeout: 5 minutes
working_dir: /tmp/docspell-convert
unoconv:
command:
program: unoconv
args:
- -f
- pdf
- -o
- "{{outfile}}"
- "{{infile}}"
timeout: 5 minutes
working_dir: /tmp/docspell-convert
ocrmypdf:
enabled: true
command:
program: ocrmypdf
args:
- -l
- "{{lang}}"
- --skip-text
- --deskew
- -j
- "1"
- "{{infile}}"
- "{{outfile}}"
timeout: 5 minutes
working_dir: /tmp/docspell-convert
decrypt_pdf:
enabled: true
passwords: []
files:
# -- Defines the chunk size (in bytes) used to store the files.
chunk_size: 524288
# -- The file content types that are considered valid.
valid_mime_types: []
# database | minio | filesystem
default_store: database
stores:
database:
enabled: true
minio:
enabled: false
endpoint: http://localhost:9000
access_key: access_key
secret_key: secret_key
bucket: docspell
# -- Highly NOT recommended
filesystem:
enabled: false
directory: /documents
full_text_search:
solr:
# -- Used to tell solr when to commit the data
commit_within: 1000
# -- If true, logs request and response bodies
log_verbose: false
def_type: lucene
# -- The default combiner for tokens. One of AND | OR
q_op: OR
migration:
index_all_chunk: 10
addons:
# TODO: Check TODO above for addons
working_dir: /tmp/docspell-addons
cache_dir: /tmp/docspell-addons-cache
executor_config:
runner: trivial
fail_fast: true
run_timeout: 15 minutes
probes:
liveness:
type: HTTP
path: /api/info/version
readiness:
type: HTTP
path: /api/info/version
startup:
type: HTTP
path: /api/info/version
service:
main:
ports:
main:
port: 10320
protocol: HTTP
joex:
enabled: true
type: ClusterIP
ports:
joex:
enabled: true
port: 10321
protocol: HTTP
persistence:
server:
enabled: true
type: secret
readOnly: true
defaultMode: "0600"
objectName: '{{ include "tc.common.names.fullname" . }}-server-secret'
mountPath: /opt/server.conf
subPath: server.conf
joex:
enabled: true
type: secret
noMount: true
readOnly: true
defaultMode: "0600"
objectName: '{{ include "tc.common.names.fullname" . }}-joex-secret'
mountPath: /opt/joex.conf
subPath: joex.conf
import:
enabled: true
noMount: true
mountPath: /import
postgresql:
enabled: true
existingSecret: dbcreds
postgresqlUsername: docspell
postgresqlDatabase: docspell
solr:
enabled: true
existingSecret: solrcreds
solrCores: docspell
solrUsername: docspell
solrEnableAuthentication: "no"
portal:
enabled: true