Source code for song_phenotyping.tools.pipeline_paths

"""Central path constants for the song phenotyping pipeline output tree.

All pipeline modules import from here so that directory names can be
changed in one place.  Use :func:`stage_path` to build absolute paths.

Output tree layout
------------------
::

    <save_path>/<bird>/
        <run_name>/              ← e.g. "d5dfde49" (SHA256[:8] of config)
            stages/
                01_specs/        ← Stage A  spectrogram HDF5 files
                02_features/     ← Stage B  flattened feature HDF5 files
                03_embeddings/   ← Stage C  UMAP HDF5 + pkl models
                04_labels/       ← Stage D  cluster label HDF5 files
                syllable_database/  ← syllable_features.{csv,h5} + feature_params.json
                05_phenotype/    ← Stage E  detailed phenotype pkl files
            results/
                master_summary.csv
                phenotype_results.csv
                run_config.json
                catalog/         ← HTML song catalogs
                plots/           ← PDFs / images

Each unique combination of computational parameters produces a different
``<run_name>`` directory, isolating run artifacts completely.  The
``stages/`` subtree contains internal computation artifacts; ``results/``
contains outputs intended for human inspection.
"""

from pathlib import Path

# ---------------------------------------------------------------------------
# Top-level subdirectory names
# ---------------------------------------------------------------------------

STAGES_DIR  = "stages"
RESULTS_DIR = "results"

# ---------------------------------------------------------------------------
# Stage subdirectories (relative to run root)
# ---------------------------------------------------------------------------

SPECS_DIR       = "stages/01_specs"
FEATURES_DIR    = "stages/02_features"
EMBEDDINGS_DIR  = "stages/03_embeddings"
LABELS_DIR      = "stages/04_labels"
SYLLABLE_DB_DIR = "stages/syllable_database"
PHENOTYPE_DIR   = "stages/05_phenotype"

# ---------------------------------------------------------------------------
# Results subdirectories (relative to run root)
# ---------------------------------------------------------------------------

CATALOG_DIR = "results/catalog"
PLOTS_DIR   = "results/plots"

# ---------------------------------------------------------------------------
# Helper
# ---------------------------------------------------------------------------

[docs] def stage_path(bird_root: str | Path, subdir: str) -> Path: """Return ``bird_root / subdir`` as an absolute :class:`~pathlib.Path`. Parameters ---------- bird_root: Root directory for a single bird, e.g. ``E:/pipeline_runs/or18or24``. subdir: One of the ``*_DIR`` constants defined in this module, or any relative path string. Example ------- >>> from song_phenotyping.tools.pipeline_paths import stage_path, SPECS_DIR >>> stage_path("/data/pipeline_runs/or18or24", SPECS_DIR) PosixPath('/data/pipeline_runs/or18or24/stages/01_specs') """ return Path(bird_root) / subdir
# --------------------------------------------------------------------------- # Run-scoped helpers # ---------------------------------------------------------------------------
[docs] def run_root(bird_root, run_name: str) -> Path: """Return ``<bird_root>/<run_name>`` as a :class:`~pathlib.Path`. Parameters ---------- bird_root: Root directory for a single bird, e.g. ``/data/pipeline_runs/or18or24``. run_name: Human-readable or hash-derived run identifier, e.g. ``"baseline"`` or ``"a1b2c3d4"``. Example ------- >>> run_root("/data/or18or24", "baseline") PosixPath('/data/or18or24/baseline') """ return Path(bird_root) / run_name
[docs] def run_stage_path(bird_root, run_name: str, subdir: str) -> Path: """Return ``<bird_root>/<run_name>/<subdir>`` as a :class:`~pathlib.Path`. Parameters ---------- bird_root: Root directory for a single bird. run_name: Run identifier (see :func:`run_root`). subdir: One of the stage ``*_DIR`` constants, e.g. :data:`SPECS_DIR`. Example ------- >>> run_stage_path("/data/or18or24", "baseline", SPECS_DIR) PosixPath('/data/or18or24/baseline/stages/01_specs') """ return run_root(bird_root, run_name) / subdir