from __future__ import annotations
import os
from dataclasses import dataclass, field
from typing import Optional
"""Runner-side configuration contracts.
This module is intentionally separate from `revise/revise.yaml`:
- `revise/revise.yaml` is the external source-of-truth config and routing surface.
- Dataclasses in this module are internal runner contracts consumed by legacy-style
kernels/runners, created in `revise.backend.adapters` from merged YAML config.
Keeping this layer explicit makes route resolution and runner compatibility concerns
independent and easier to evolve.
"""
@dataclass
class BaseConf:
# runtime parameters
sample_name: str
raw_data_path: str
result_root_path: str
# annotate column keys
cell_type_col: str
confidence_col: str
unknown_key: str
[docs]
@dataclass
class ApplicationSpConf(BaseConf):
st_file: str
sc_ref_file: str
annotate_mode: str
# annotate parameters
annotate_pot_reg: float = 0.1
annotate_pot_reg_m: float = 0.0
annotate_pot_reg_type: str = "entropy"
# preprocess parameters
prep_st_min_counts: int = 20
prep_st_min_cells: int = 30
prep_sc_min_counts: int = 20
prep_sc_min_cells: int = 50
# plot parameters
plot_flag: bool = True
plot_cluster_resolution: list = field(default_factory=lambda: [0.1, 0.3, 0.5])
plot_min_genes: int = 20
plot_min_cells: int = 3
plot_sample_size: int = 10000
# reconstruct parameters
rec_graph_n_neighbors: int = 10
rec_graph_exp_neighbor_num: int = 10
rec_graph_spatial_neighbor_num: int = 10
rec_graph_method: str = "joint"
rec_graph_alpha: float = 0.4
# reconstruct ot
rec_pot_reg: float = 0.05
rec_pot_reg_m: float = 1.0
rec_pot_reg_type: str = "kl"
rec_alpha = 0.5
@property
def result_dir(self):
return os.path.join(self.result_root_path, self.sample_name)
@property
def st_file_path(self):
return os.path.join(self.raw_data_path, f"{self.sample_name}_{self.st_file}")
@property
def sc_ref_file_path(self):
return os.path.join(self.raw_data_path, self.sc_ref_file)
[docs]
@dataclass
class ApplicationScConf(BaseConf):
st_file: str
sc_ref_file: str
annotate_mode: Optional[str] = None
# annotate parameters
annotate_pot_reg: float = 0.06
annotate_pot_reg_m: float = 0.015
annotate_pot_reg_type: str = "entropy"
# preprocess parameters
prep_st_min_counts: int = 60
prep_st_min_cells: int = 100
prep_sc_min_counts: int = 0
prep_sc_min_cells: int = 100
# reconstruct parameters
rec_graph_n_neighbors: int = 10
rec_graph_exp_neighbor_num: int = 15
rec_graph_spatial_neighbor_num: int = 6
rec_graph_method: str = "joint"
rec_graph_alpha: float = 0.2
# reconstruct ot
rec_pot_reg: float = 0.06
rec_pot_reg_m: float = 0.015
rec_pot_reg_type: str = "entropy"
rec_alpha = 0.5
rec_match_spot_sum: bool = False
@property
def result_dir(self):
return os.path.join(self.result_root_path, self.sample_name)
@property
def st_file_path(self):
return os.path.join(self.raw_data_path, f"{self.sample_name}_{self.st_file}")
@property
def sc_ref_file_path(self):
return os.path.join(self.raw_data_path, self.sc_ref_file)
[docs]
@dataclass
class ApplicationScSrConf(BaseConf):
st_file: str
sc_ref_file: str
annotate_mode: Optional[str] = None
# annotate parameters
annotate_pot_reg: float = 0.01
annotate_pot_reg_m: float = 0.0001
annotate_pot_reg_type: str = "kl"
# preprocess parameters
prep_st_min_counts: int = 60
prep_st_min_cells: int = 100
prep_sc_min_counts: int = 0
prep_sc_min_cells: int = 100
# graph parameters
rec_graph_n_neighbors = 20
rec_graph_method = "joint"
rec_graph_alpha = 0.2
rec_graph_exp_neighbor_num = 10
rec_graph_spatial_neighbor_num = 20
# pot parameters
rec_pot_reg = 0.05
rec_pot_reg_m = 1.0
rec_pot_reg_type = "kl"
rec_alpha = 1.0
rec_match_spot_sum = False
# svc parameters
svc_completeness: bool = True
@property
def result_dir(self):
return os.path.join(self.result_root_path, self.sample_name)
@property
def st_file_path(self):
return os.path.join(self.raw_data_path, f"{self.sample_name}_{self.st_file}")
@property
def sc_ref_file_path(self):
return os.path.join(self.raw_data_path, self.sc_ref_file)
@property
def pm_on_cell_file(self):
return os.path.join(self.raw_data_path, "PM_on_cell.csv")
[docs]
@dataclass
class BenchmarkSegConf(BaseConf):
st_file: str
gt_svc_file: str
sc_ref_file: str
seg_method: str
annotate_mode: str
case_subdir: Optional[str] = None
# annotate parameters
annotate_pot_reg: float = 0.1
annotate_pot_reg_m: float = 0.0
annotate_pot_reg_type: str = "entropy"
# segmentation effect parameters
dropout_total_counts: int = 60
swapping_total_counts: int = 300
lower_ts: float = 0.2
upper_ts: float = 0.8
# reconstruct graph
rec_graph_n_neighbors: int = 50
rec_graph_exp_neighbor_num: int = 30
rec_graph_spatial_neighbor_num: int = 30
rec_graph_method: str = "joint"
rec_graph_alpha: float = 0.8
# reconstruct ot
rec_pot_reg: float = 1.0
rec_pot_reg_m: float = 0.0
rec_pot_reg_type: str = "kl"
rec_alpha: float = 1.0
@property
def result_dir(self):
leaf = self.case_subdir or self.seg_method
return os.path.join(self.result_root_path, self.sample_name, leaf)
@property
def st_file_path(self):
return os.path.join(self.raw_data_path, self.sample_name, self.seg_method, self.st_file)
@property
def gt_svc_file_path(self):
return os.path.join(self.raw_data_path, self.sample_name, self.gt_svc_file)
@property
def sc_ref_file_path(self):
return os.path.join(self.raw_data_path, self.sample_name, self.sc_ref_file)
[docs]
@dataclass
class BenchmarkSrConf(BaseConf):
st_file: str
gt_svc_file: str
sc_ref_file: str
spot_size: int
annotate_mode: str
case_subdir: Optional[str] = None
# annotate parameters
annotate_pot_reg: float = 0.01
annotate_pot_reg_m: float = 0.0001
annotate_pot_reg_type: str = "kl"
# svc parameters
svc_completeness: bool = True
# optional graph aggregation (SR robustness benchmark)
rec_graph_n_neighbors: int = 20
rec_graph_exp_neighbor_num: int = 10
rec_graph_spatial_neighbor_num: int = 20
rec_graph_method: str = "joint"
rec_graph_alpha: float = 0.2
rec_pot_reg: float = 0.05
rec_pot_reg_m: float = 1.0
rec_pot_reg_type: str = "kl"
rec_alpha: float = 1.0
rec_graph_agg_enabled: bool = False
# spot-level spatial leakage noise (benchmark stress test)
sr_noise_enabled: bool = False
sr_noise_lambda: float = 0.0
sr_noise_k: int = 4
sr_noise_weight: str = "distance"
sr_noise_preserve_total_counts: bool = True
sr_noise_seed: int = 42
@property
def result_dir(self):
leaf = self.case_subdir or f"spot_{self.spot_size}"
return os.path.join(self.result_root_path, self.sample_name, leaf)
@property
def st_file_path(self):
return os.path.join(self.raw_data_path, self.sample_name, f"spot_{self.spot_size}", self.st_file)
@property
def gt_svc_file_path(self):
return os.path.join(self.raw_data_path, self.sample_name, self.gt_svc_file)
@property
def sc_ref_file_path(self):
return os.path.join(self.raw_data_path, self.sample_name, self.sc_ref_file)
@property
def pm_on_cell_file(self):
return os.path.join(os.path.dirname(os.path.join(self.raw_data_path, self.sample_name)), "PM_on_cell.csv")
[docs]
@dataclass
class BenchmarkImputeConf(BaseConf):
st_file: str
gt_svc_file: str
sc_ref_file: str
annotate_mode: str
case_subdir: Optional[str] = None
# preprocess parameters
prep_min_cells: int = 30
prep_min_counts: int = 60
# annotate parameters
annotate_pot_reg: float = 0.01
annotate_pot_reg_m: float = 0.0001
annotate_pot_reg_type: str = "kl"
# reconstruct graph
rec_graph_preprocess: bool = True
rec_graph_n_pcs: int = 50
rec_graph_n_neighbors: int = 15
# reconstruct ot
rec_impute_pot_reg: float = 5.0
rec_impute_pot_reg_m: float = 0.0
rec_impute_pot_reg_type: str = "kl"
# reconstruct impute
rec_merge_subcluster_method: str = "mean"
rec_subcluster_resolution: int = 3
rec_impute_prune_flag: bool = True
rec_impute_n_neighbors: int = 1
rec_impute_method: str = "mean"
@property
def result_dir(self):
if self.case_subdir:
return os.path.join(self.result_root_path, self.sample_name, self.case_subdir)
return os.path.join(self.result_root_path, self.sample_name)
@property
def st_file_path(self):
return os.path.join(self.raw_data_path, self.sample_name, self.st_file)
@property
def sc_ref_file_path(self):
return os.path.join(self.raw_data_path, self.sample_name, self.sc_ref_file)
@property
def gt_svc_file_path(self):
return os.path.join(self.raw_data_path, self.sample_name, self.gt_svc_file)