Source code for revise.config.runner_conf

from __future__ import annotations

import os
from dataclasses import dataclass, field
from typing import Optional

"""Runner-side configuration contracts.

This module is intentionally separate from `revise/revise.yaml`:
- `revise/revise.yaml` is the external source-of-truth config and routing surface.
- Dataclasses in this module are internal runner contracts consumed by legacy-style
  kernels/runners, created in `revise.backend.adapters` from merged YAML config.

Keeping this layer explicit makes route resolution and runner compatibility concerns
independent and easier to evolve.
"""


@dataclass
class BaseConf:
    # runtime parameters
    sample_name: str
    raw_data_path: str
    result_root_path: str

    # annotate column keys
    cell_type_col: str
    confidence_col: str
    unknown_key: str


[docs] @dataclass class ApplicationSpConf(BaseConf): st_file: str sc_ref_file: str annotate_mode: str # annotate parameters annotate_pot_reg: float = 0.1 annotate_pot_reg_m: float = 0.0 annotate_pot_reg_type: str = "entropy" # preprocess parameters prep_st_min_counts: int = 20 prep_st_min_cells: int = 30 prep_sc_min_counts: int = 20 prep_sc_min_cells: int = 50 # plot parameters plot_flag: bool = True plot_cluster_resolution: list = field(default_factory=lambda: [0.1, 0.3, 0.5]) plot_min_genes: int = 20 plot_min_cells: int = 3 plot_sample_size: int = 10000 # reconstruct parameters rec_graph_n_neighbors: int = 10 rec_graph_exp_neighbor_num: int = 10 rec_graph_spatial_neighbor_num: int = 10 rec_graph_method: str = "joint" rec_graph_alpha: float = 0.4 # reconstruct ot rec_pot_reg: float = 0.05 rec_pot_reg_m: float = 1.0 rec_pot_reg_type: str = "kl" rec_alpha = 0.5 @property def result_dir(self): return os.path.join(self.result_root_path, self.sample_name) @property def st_file_path(self): return os.path.join(self.raw_data_path, f"{self.sample_name}_{self.st_file}") @property def sc_ref_file_path(self): return os.path.join(self.raw_data_path, self.sc_ref_file)
[docs] @dataclass class ApplicationScConf(BaseConf): st_file: str sc_ref_file: str annotate_mode: Optional[str] = None # annotate parameters annotate_pot_reg: float = 0.06 annotate_pot_reg_m: float = 0.015 annotate_pot_reg_type: str = "entropy" # preprocess parameters prep_st_min_counts: int = 60 prep_st_min_cells: int = 100 prep_sc_min_counts: int = 0 prep_sc_min_cells: int = 100 # reconstruct parameters rec_graph_n_neighbors: int = 10 rec_graph_exp_neighbor_num: int = 15 rec_graph_spatial_neighbor_num: int = 6 rec_graph_method: str = "joint" rec_graph_alpha: float = 0.2 # reconstruct ot rec_pot_reg: float = 0.06 rec_pot_reg_m: float = 0.015 rec_pot_reg_type: str = "entropy" rec_alpha = 0.5 rec_match_spot_sum: bool = False @property def result_dir(self): return os.path.join(self.result_root_path, self.sample_name) @property def st_file_path(self): return os.path.join(self.raw_data_path, f"{self.sample_name}_{self.st_file}") @property def sc_ref_file_path(self): return os.path.join(self.raw_data_path, self.sc_ref_file)
[docs] @dataclass class ApplicationScSrConf(BaseConf): st_file: str sc_ref_file: str annotate_mode: Optional[str] = None # annotate parameters annotate_pot_reg: float = 0.01 annotate_pot_reg_m: float = 0.0001 annotate_pot_reg_type: str = "kl" # preprocess parameters prep_st_min_counts: int = 60 prep_st_min_cells: int = 100 prep_sc_min_counts: int = 0 prep_sc_min_cells: int = 100 # graph parameters rec_graph_n_neighbors = 20 rec_graph_method = "joint" rec_graph_alpha = 0.2 rec_graph_exp_neighbor_num = 10 rec_graph_spatial_neighbor_num = 20 # pot parameters rec_pot_reg = 0.05 rec_pot_reg_m = 1.0 rec_pot_reg_type = "kl" rec_alpha = 1.0 rec_match_spot_sum = False # svc parameters svc_completeness: bool = True @property def result_dir(self): return os.path.join(self.result_root_path, self.sample_name) @property def st_file_path(self): return os.path.join(self.raw_data_path, f"{self.sample_name}_{self.st_file}") @property def sc_ref_file_path(self): return os.path.join(self.raw_data_path, self.sc_ref_file) @property def pm_on_cell_file(self): return os.path.join(self.raw_data_path, "PM_on_cell.csv")
[docs] @dataclass class BenchmarkSegConf(BaseConf): st_file: str gt_svc_file: str sc_ref_file: str seg_method: str annotate_mode: str case_subdir: Optional[str] = None # annotate parameters annotate_pot_reg: float = 0.1 annotate_pot_reg_m: float = 0.0 annotate_pot_reg_type: str = "entropy" # segmentation effect parameters dropout_total_counts: int = 60 swapping_total_counts: int = 300 lower_ts: float = 0.2 upper_ts: float = 0.8 # reconstruct graph rec_graph_n_neighbors: int = 50 rec_graph_exp_neighbor_num: int = 30 rec_graph_spatial_neighbor_num: int = 30 rec_graph_method: str = "joint" rec_graph_alpha: float = 0.8 # reconstruct ot rec_pot_reg: float = 1.0 rec_pot_reg_m: float = 0.0 rec_pot_reg_type: str = "kl" rec_alpha: float = 1.0 @property def result_dir(self): leaf = self.case_subdir or self.seg_method return os.path.join(self.result_root_path, self.sample_name, leaf) @property def st_file_path(self): return os.path.join(self.raw_data_path, self.sample_name, self.seg_method, self.st_file) @property def gt_svc_file_path(self): return os.path.join(self.raw_data_path, self.sample_name, self.gt_svc_file) @property def sc_ref_file_path(self): return os.path.join(self.raw_data_path, self.sample_name, self.sc_ref_file)
[docs] @dataclass class BenchmarkSrConf(BaseConf): st_file: str gt_svc_file: str sc_ref_file: str spot_size: int annotate_mode: str case_subdir: Optional[str] = None # annotate parameters annotate_pot_reg: float = 0.01 annotate_pot_reg_m: float = 0.0001 annotate_pot_reg_type: str = "kl" # svc parameters svc_completeness: bool = True # optional graph aggregation (SR robustness benchmark) rec_graph_n_neighbors: int = 20 rec_graph_exp_neighbor_num: int = 10 rec_graph_spatial_neighbor_num: int = 20 rec_graph_method: str = "joint" rec_graph_alpha: float = 0.2 rec_pot_reg: float = 0.05 rec_pot_reg_m: float = 1.0 rec_pot_reg_type: str = "kl" rec_alpha: float = 1.0 rec_graph_agg_enabled: bool = False # spot-level spatial leakage noise (benchmark stress test) sr_noise_enabled: bool = False sr_noise_lambda: float = 0.0 sr_noise_k: int = 4 sr_noise_weight: str = "distance" sr_noise_preserve_total_counts: bool = True sr_noise_seed: int = 42 @property def result_dir(self): leaf = self.case_subdir or f"spot_{self.spot_size}" return os.path.join(self.result_root_path, self.sample_name, leaf) @property def st_file_path(self): return os.path.join(self.raw_data_path, self.sample_name, f"spot_{self.spot_size}", self.st_file) @property def gt_svc_file_path(self): return os.path.join(self.raw_data_path, self.sample_name, self.gt_svc_file) @property def sc_ref_file_path(self): return os.path.join(self.raw_data_path, self.sample_name, self.sc_ref_file) @property def pm_on_cell_file(self): return os.path.join(os.path.dirname(os.path.join(self.raw_data_path, self.sample_name)), "PM_on_cell.csv")
[docs] @dataclass class BenchmarkImputeConf(BaseConf): st_file: str gt_svc_file: str sc_ref_file: str annotate_mode: str case_subdir: Optional[str] = None # preprocess parameters prep_min_cells: int = 30 prep_min_counts: int = 60 # annotate parameters annotate_pot_reg: float = 0.01 annotate_pot_reg_m: float = 0.0001 annotate_pot_reg_type: str = "kl" # reconstruct graph rec_graph_preprocess: bool = True rec_graph_n_pcs: int = 50 rec_graph_n_neighbors: int = 15 # reconstruct ot rec_impute_pot_reg: float = 5.0 rec_impute_pot_reg_m: float = 0.0 rec_impute_pot_reg_type: str = "kl" # reconstruct impute rec_merge_subcluster_method: str = "mean" rec_subcluster_resolution: int = 3 rec_impute_prune_flag: bool = True rec_impute_n_neighbors: int = 1 rec_impute_method: str = "mean" @property def result_dir(self): if self.case_subdir: return os.path.join(self.result_root_path, self.sample_name, self.case_subdir) return os.path.join(self.result_root_path, self.sample_name) @property def st_file_path(self): return os.path.join(self.raw_data_path, self.sample_name, self.st_file) @property def sc_ref_file_path(self): return os.path.join(self.raw_data_path, self.sample_name, self.sc_ref_file) @property def gt_svc_file_path(self): return os.path.join(self.raw_data_path, self.sample_name, self.gt_svc_file)