Djvu-viewer: Difference between revisions
No edit summary |
|||
| (13 intermediate revisions by the same user not shown) | |||
| Line 38: | Line 38: | ||
= djvu catalog and remote viewer = | = djvu catalog and remote viewer = | ||
== Demos == | == Demos == | ||
{{DjVuDemo|RWTH Aachen|https://djvu-viewer.wikidata.dbis.rwth-aachen.de}} | |||
{{DjVuDemo|CompGen e.V|https://genwiki39.genealogy.net/djvu-viewer/}} | |||
{{pip|djvu-viewer}} | {{pip|djvu-viewer}} | ||
= Command Line = | = Command Line = | ||
== djvuconv == | == djvuconv == | ||
| Line 87: | Line 86: | ||
--url URL Process a single DjVu file (only valid in convert | --url URL Process a single DjVu file (only valid in convert | ||
mode) | mode) | ||
</source> | |||
= Server configuration = | |||
== Apache proxy == | |||
<source lang='apache'> | |||
# Apache Configuration for djvu.bitplan.com | |||
# WF 2026-02-02 | |||
# http Port: 9840 | |||
# SSL Port: 443 | |||
# | |||
# timeout: 3.0 | |||
<VirtualHost *:443> | |||
ServerName djvu.bitplan.com | |||
ServerAdmin webmaster@bitplan.com | |||
Include ssl.conf | |||
ErrorLog ${APACHE_LOG_DIR}/djvu_error_ssl.log | |||
CustomLog ${APACHE_LOG_DIR}/djvu_ssl.log combined | |||
RewriteEngine On | |||
RewriteCond %{HTTP:Upgrade} =websocket [NC] | |||
RewriteRule /(.*) ws://localhost:9840/$1 [P,L] | |||
RewriteCond %{HTTP:Upgrade} !=websocket [NC] | |||
RewriteRule /(.*) http://localhost:9840/$1 [P,L] | |||
ProxyPassReverse / http://localhost:9840/ | |||
</VirtualHost> | |||
<VirtualHost *:80> | |||
ServerName djvu.bitplan.com | |||
ServerAdmin webmaster@bitplan.com | |||
ErrorLog ${APACHE_LOG_DIR}/djvu_error.log | |||
CustomLog ${APACHE_LOG_DIR}/djvu.log combined | |||
RewriteEngine On | |||
RewriteCond %{HTTP:Upgrade} =websocket [NC] | |||
RewriteRule /(.*) ws://localhost:9840/$1 [P,L] | |||
RewriteCond %{HTTP:Upgrade} !=websocket [NC] | |||
RewriteRule /(.*) http://localhost:9840/$1 [P,L] | |||
ProxyPassReverse / http://localhost:9840/ | |||
</VirtualHost> | |||
</source> | |||
== Caddy proxy == | |||
<source lang='json'> | |||
# Djvu-Viewer (NiceGUI on 9840), mounted at /djvu-viewer/ | |||
# Strip the prefix for the upstream and tell it where it's mounted. | |||
handle_path /djvu-viewer/* { | |||
reverse_proxy http://localhost:9840 { | |||
header_up Host {host} | |||
header_up X-Forwarded-Host {host} | |||
header_up X-Forwarded-Proto {scheme} | |||
header_up X-Forwarded-Prefix /djvu-viewer | |||
# WebSocket Origin enforcement | |||
header_up Origin {scheme}://{host} | |||
} | |||
} | |||
</source> | |||
= Catalog database structure = | |||
== YAML == | |||
=== AB1938_Kreis-Beckum_Inhaltsverz === | |||
Note: the bundled flag might be wrong in some of our demos - the bundle state can be determined from all djvu_path being identical | |||
<source lang='yaml'> | |||
path: /images/c/c7/AB1938_Kreis-Beckum_Inhaltsverz.djvu | |||
page_count: 3 | |||
bundled: false | |||
pages: | |||
- path: AB1938 Kreis-Beckum_Titel_0001.djvu | |||
page_index: 1 | |||
valid: false | |||
width: 2105 | |||
height: 1511 | |||
dpi: 300 | |||
djvu_path: /images/c/c7/AB1938_Kreis-Beckum_Inhaltsverz.djvu | |||
page_key: /images/c/c7/AB1938_Kreis-Beckum_Inhaltsverz.djvu#0001 | |||
- path: s003_0001.djvu | |||
page_index: 2 | |||
valid: false | |||
width: 2089 | |||
height: 2742 | |||
dpi: 300 | |||
djvu_path: /images/c/c7/AB1938_Kreis-Beckum_Inhaltsverz.djvu | |||
page_key: /images/c/c7/AB1938_Kreis-Beckum_Inhaltsverz.djvu#0002 | |||
- path: s004_0001.djvu | |||
page_index: 3 | |||
valid: false | |||
width: 2057 | |||
height: 1943 | |||
dpi: 300 | |||
djvu_path: /images/c/c7/AB1938_Kreis-Beckum_Inhaltsverz.djvu | |||
page_key: /images/c/c7/AB1938_Kreis-Beckum_Inhaltsverz.djvu#0003 | |||
</source> | |||
=== AB1932-Ramrath === | |||
<source lang='yaml'> | |||
path: /images/f/ff/AB1932-Ramrath.djvu | |||
page_count: 2 | |||
bundled: true | |||
pages: | |||
- path: AB1932-Ramrath_s274_0001.djvu | |||
page_index: 1 | |||
valid: false | |||
width: 761 | |||
height: 2341 | |||
dpi: 300 | |||
djvu_path: /images/f/ff/AB1932-Ramrath.djvu | |||
page_key: /images/f/ff/AB1932-Ramrath.djvu#0001 | |||
- path: AB1932-Ramrath_s275_0001.djvu | |||
page_index: 2 | |||
valid: false | |||
width: 1386 | |||
height: 2306 | |||
dpi: 300 | |||
djvu_path: /images/f/ff/AB1932-Ramrath.djvu | |||
page_key: /images/f/ff/AB1932-Ramrath.djvu#0002 | |||
</source> | |||
=== Plauen-AB-1938 === | |||
<source lang='yaml'> | |||
path: /images/c/ce/Plauen-AB-1938.djvu | |||
page_count: 2 | |||
bundled: true | |||
pages: | |||
- path: BaEVKcJP5e.djvu | |||
page_index: 1 | |||
valid: false | |||
width: 685 | |||
height: 1049 | |||
dpi: 72 | |||
djvu_path: /images/c/ce/Plauen-AB-1938.djvu | |||
page_key: /images/c/ce/Plauen-AB-1938.djvu#0001 | |||
- path: y4RN9G4reI.djvu | |||
page_index: 2 | |||
valid: false | |||
width: 725 | |||
height: 1031 | |||
dpi: 72 | |||
djvu_path: /images/c/ce/Plauen-AB-1938.djvu | |||
page_key: /images/c/ce/Plauen-AB-1938.djvu#0002 | |||
</source> | |||
== SQL == | |||
=== DjVu === | |||
<source lang='sql'> | |||
CREATE TABLE DjVu ( | |||
path TEXT PRIMARY KEY, | |||
page_count INTEGER, | |||
bundled BOOLEAN, | |||
iso_date TEXT, | |||
filesize INTEGER, | |||
tar_iso_date TEXT, | |||
tar_filesize INTEGER | |||
) | |||
</source> | |||
=== Page === | |||
<source lang='sql'> | |||
CREATE TABLE Page ( | |||
path TEXT, | |||
page_index INTEGER, | |||
valid BOOLEAN, | |||
width INTEGER, | |||
height INTEGER, | |||
dpi INTEGER, | |||
djvu_path TEXT, | |||
page_key TEXT PRIMARY KEY | |||
) | |||
</source> | </source> | ||
Latest revision as of 19:41, 2 January 2026
| OsProject | |
|---|---|
| id | djvu-viewer |
| state | active |
| owner | WolfgangFahl |
| title | Djvu Viewer |
| url | https://github.com/WolfgangFahl/djvu-viewer |
| version | 0.3.0 |
| description | DjVu Viewer and tarball converter |
| date | 2026-02-02 |
| since | 2026-02-01 |
| until | |
Introduction
DjVu is a computer file format created by Léon Bottou and others with the goal to efficiently store scanned documents. At the time of its creation the priority was to save disk space and computer resources since these were precious and limited.
DjVu has elaborate mechanisms to provide this efficiency even to academic perfection. There is a tradeoff in this - DjVu files can only be processed with DjVu tools and there is basically only a single implementation available: DjvuLibre. The DjVuLibre git repository has not been touched for 8 years as of 2025 and it only has 23 stars. The original SourceForge repository https://djvu.sourceforge.net/ saw its latest release in 2020.
While DjVu is a great technology and a very useful special purpose solution, it is not optimal for modern AI pipelines and other applications where other aspects of processing play a more important role.
djvuconv therefore repackages DjVu files to tarballs including the lossless PNG version of each page - a "table of contents" in YAML format and optionally prepackaged thumbnails in JPEG format.
djvuconv is written in python but makes use of the original DjVuLibre library using the python-djvulibre library by Friedrich Fröbel which is a fork of the python-djvulibre library with Copyright © 2010-2021 Jakub Wilk <jwilk@jwilk.net> and GNU General Public License version 2 which was archive by Jakub in 2022.
Motivation
DjVu support in MediaWiki has been degrading over the past few years see
- wikimedia phabricator page
- DjVu thumbnails are not being generated at all
- Stackoverflow Question:DjVuImage::getMultiPageInfo: multi-page DJVU file contained no pages
Goals
- create a simple cross platform format for scanned documents
- use PNG, JPEG, PDF and YAML formats which have plenty of tools for viewing, processing and handling
- add native OCR support
- add native AI/LLM support
djvu catalog and remote viewer
Demos
RWTH Aachen
https://djvu-viewer.wikidata.dbis.rwth-aachen.de
RESTFul API
CompGen e.V
https://genwiki39.genealogy.net/djvu-viewer/
RESTFul API
Installation
pip install djvu-viewer
# alternatively if your pip is not a python3 pip
pip3 install djvu-viewer
# local install from source directory of djvu-viewer
pip install .
upgrade
pip install djvu-viewer -U
# alternatively if your pip is not a python3 pip
pip3 install djvu-viewer -U
Command Line
djvuconv
djvuconv converts collection of DjVu files to tarballs. It create optionally collects the metadata which may be stored in a database for further analysis and processing.
Command Line Usage
djvuconv -h
usage: djvuconv [-h] [--base-path BASE_PATH] [--batch-size BATCH_SIZE]
--command {catalog,convert,thumbnails,dbupdate} [-d]
[--db-path DB_PATH] [-f] [--limit LIMIT]
[--max-errors MAX_ERRORS] [--max-workers MAX_WORKERS]
[--output-path OUTPUT_PATH] [--serial] [--sort {asc,desc}]
[-v] [--url URL]
Process DjVu files
options:
-h, --help show this help message and exit
--base-path BASE_PATH
Base path for DjVu files
--batch-size BATCH_SIZE
Number of pages to process in each batch (default:
100)
--command {catalog,convert,thumbnails,dbupdate}
Command to execute
-d, --debug Enable debugging
--db-path DB_PATH Path to the database
-f, --force Force recreation
--limit LIMIT Maximum number of pages to process
--max-errors MAX_ERRORS
Maximum allowed error percentage before skipping
database update
--max-workers MAX_WORKERS
Maximum number of worker threads (default: CPU count *
4)
--output-path OUTPUT_PATH
Path for PNG files
--serial Use serial processing - parallel is default
--sort {asc,desc} Sort by page count (asc=smallest first)
-v, --verbose Enable debugging
--url URL Process a single DjVu file (only valid in convert
mode)
Server configuration
Apache proxy
# Apache Configuration for djvu.bitplan.com
# WF 2026-02-02
# http Port: 9840
# SSL Port: 443
#
# timeout: 3.0
<VirtualHost *:443>
ServerName djvu.bitplan.com
ServerAdmin webmaster@bitplan.com
Include ssl.conf
ErrorLog ${APACHE_LOG_DIR}/djvu_error_ssl.log
CustomLog ${APACHE_LOG_DIR}/djvu_ssl.log combined
RewriteEngine On
RewriteCond %{HTTP:Upgrade} =websocket [NC]
RewriteRule /(.*) ws://localhost:9840/$1 [P,L]
RewriteCond %{HTTP:Upgrade} !=websocket [NC]
RewriteRule /(.*) http://localhost:9840/$1 [P,L]
ProxyPassReverse / http://localhost:9840/
</VirtualHost>
<VirtualHost *:80>
ServerName djvu.bitplan.com
ServerAdmin webmaster@bitplan.com
ErrorLog ${APACHE_LOG_DIR}/djvu_error.log
CustomLog ${APACHE_LOG_DIR}/djvu.log combined
RewriteEngine On
RewriteCond %{HTTP:Upgrade} =websocket [NC]
RewriteRule /(.*) ws://localhost:9840/$1 [P,L]
RewriteCond %{HTTP:Upgrade} !=websocket [NC]
RewriteRule /(.*) http://localhost:9840/$1 [P,L]
ProxyPassReverse / http://localhost:9840/
</VirtualHost>
Caddy proxy
# Djvu-Viewer (NiceGUI on 9840), mounted at /djvu-viewer/
# Strip the prefix for the upstream and tell it where it's mounted.
handle_path /djvu-viewer/* {
reverse_proxy http://localhost:9840 {
header_up Host {host}
header_up X-Forwarded-Host {host}
header_up X-Forwarded-Proto {scheme}
header_up X-Forwarded-Prefix /djvu-viewer
# WebSocket Origin enforcement
header_up Origin {scheme}://{host}
}
}
Catalog database structure
YAML
AB1938_Kreis-Beckum_Inhaltsverz
Note: the bundled flag might be wrong in some of our demos - the bundle state can be determined from all djvu_path being identical
path: /images/c/c7/AB1938_Kreis-Beckum_Inhaltsverz.djvu
page_count: 3
bundled: false
pages:
- path: AB1938 Kreis-Beckum_Titel_0001.djvu
page_index: 1
valid: false
width: 2105
height: 1511
dpi: 300
djvu_path: /images/c/c7/AB1938_Kreis-Beckum_Inhaltsverz.djvu
page_key: /images/c/c7/AB1938_Kreis-Beckum_Inhaltsverz.djvu#0001
- path: s003_0001.djvu
page_index: 2
valid: false
width: 2089
height: 2742
dpi: 300
djvu_path: /images/c/c7/AB1938_Kreis-Beckum_Inhaltsverz.djvu
page_key: /images/c/c7/AB1938_Kreis-Beckum_Inhaltsverz.djvu#0002
- path: s004_0001.djvu
page_index: 3
valid: false
width: 2057
height: 1943
dpi: 300
djvu_path: /images/c/c7/AB1938_Kreis-Beckum_Inhaltsverz.djvu
page_key: /images/c/c7/AB1938_Kreis-Beckum_Inhaltsverz.djvu#0003
AB1932-Ramrath
path: /images/f/ff/AB1932-Ramrath.djvu
page_count: 2
bundled: true
pages:
- path: AB1932-Ramrath_s274_0001.djvu
page_index: 1
valid: false
width: 761
height: 2341
dpi: 300
djvu_path: /images/f/ff/AB1932-Ramrath.djvu
page_key: /images/f/ff/AB1932-Ramrath.djvu#0001
- path: AB1932-Ramrath_s275_0001.djvu
page_index: 2
valid: false
width: 1386
height: 2306
dpi: 300
djvu_path: /images/f/ff/AB1932-Ramrath.djvu
page_key: /images/f/ff/AB1932-Ramrath.djvu#0002
Plauen-AB-1938
path: /images/c/ce/Plauen-AB-1938.djvu
page_count: 2
bundled: true
pages:
- path: BaEVKcJP5e.djvu
page_index: 1
valid: false
width: 685
height: 1049
dpi: 72
djvu_path: /images/c/ce/Plauen-AB-1938.djvu
page_key: /images/c/ce/Plauen-AB-1938.djvu#0001
- path: y4RN9G4reI.djvu
page_index: 2
valid: false
width: 725
height: 1031
dpi: 72
djvu_path: /images/c/ce/Plauen-AB-1938.djvu
page_key: /images/c/ce/Plauen-AB-1938.djvu#0002
SQL
DjVu
CREATE TABLE DjVu (
path TEXT PRIMARY KEY,
page_count INTEGER,
bundled BOOLEAN,
iso_date TEXT,
filesize INTEGER,
tar_iso_date TEXT,
tar_filesize INTEGER
)
Page
CREATE TABLE Page (
path TEXT,
page_index INTEGER,
valid BOOLEAN,
width INTEGER,
height INTEGER,
dpi INTEGER,
djvu_path TEXT,
page_key TEXT PRIMARY KEY
)