Documentation and Repositories
Create a new project
* ATTENTION: Currently, it is not possible to delete or rename a project ⇒ choose a meaningful and sharable name
Create new document (= ms)
* line offset: baseline
Add images to a document (wait until the upload is finished!)
Detect layout (Segmentation model for regions and lines)
mkdir /Documents/kraken/
cd /Documents/kraken/
python3 -m venv kraken-env
source kraken-env/bin/activate
pip install kraken
kraken --version
pip install albumentations
cd ../export_doc1631_averroes_logica_alto_20260325130540/
ls *.png | head -n 8 > train.txt
ls *.png | tail -n 2 > val.txt
realpath $(cat train.txt) > train_abs.txt
realpath $(cat val.txt) > val_abs.txt
ketos segtrain --load /Documents/kraken/blla.mlmodel -t export_doc1631_averroes_logica_alto_20260325130540/train_abs.txt -e export_doc1631_averroes_logica_alto_20260325130540/val_abs.txt -f alto --augment --resize union -N 50 -o averroes_seg
pip install YALTAi
cd yaltai_training/
yaltai convert alto-to-yolo Averroes*.xml my-dataset --shuffle 0.1 --segmonto region
yolo task=detect mode=train model=yolov8n.pt data=my-dataset/config.yml epochs=100 batch=4 imgsz=960 device=0
yaltai kraken --device cuda:0 -I "/Documents/kraken/averroes_complete/*.png" --suffix ".xml" segment --yolo runs/detect/train/weights/best.pt -i /Documents/kraken/blla.mlmodel
kraken alto -i Averroes_Logica_JuntSecunda_01-1_2.pdf_page_221.xml Averroes_Logica_JuntSecunda_01-1_2.pdf_page_221_alto.xml
python3 convert-json2ALTO.py
for f in *.xml; do sed -i 's|/home/padlina/Documents/kraken/averroes_complete/||g' "$f"; done
zip -j alto_output.zip *.xml