{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# scSVC-reconstructed CAFs uncovers their functionally distinct niches within the tumor microenvironment" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Notebook Guide\n", "\n", "**Purpose.** Analyze CAF/Fibroblast sc-SVC states, pathway programs, spatial niches, and pseudotime-like transitions.\n", "\n", "**Inputs.** Executed Fibroblast reconstruction outputs under `../../output/sc_SVC_case/P2CRC_Xenium/Fibroblast/` plus raw Xenium data.\n", "\n", "**Outputs.** CAF subtype maps, marker plots, enrichment/pathway figures, spatial niche plots, and trajectory visualizations displayed inline and saved to disk.\n", "\n", "**Reading order.**\n", "1. Load reconstructed SVCs\n", "2. Annotate CAF subtypes and marker programs\n", "3. Summarize pathway enrichments\n", "4. Compare spatial niches and trajectory structure\n", "\n", "**Reproducibility note.** `revise` imports are standard package imports from the installed `revise-svc` distribution; this notebook does not modify `sys.path` to import the repository source tree.\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2026-05-13T10:25:19.300100Z", "iopub.status.busy": "2026-05-13T10:25:19.299868Z", "iopub.status.idle": "2026-05-13T10:25:20.047336Z", "shell.execute_reply": "2026-05-13T10:25:20.046793Z" } }, "outputs": [], "source": [ "import os\n", "os.environ.setdefault(\"TQDM_DISABLE\", \"1\")\n", "os.environ.setdefault(\"TQDM_MININTERVAL\", \"60\")\n", "os.environ.setdefault(\"NUMBA_DISABLE_JIT\", \"1\")\n", "\n", "try:\n", " from IPython import get_ipython\n", " _ipython = get_ipython()\n", " if _ipython is not None:\n", " _ipython.run_line_magic(\"matplotlib\", \"inline\")\n", "except Exception:\n", " pass\n", "\n", "output_dir = \"../../output/sc_SVC_case/P2CRC_Xenium\"\n", "select_ct = \"Fibroblast\"\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load Reconstructed SVCs\n", "\n", "Load the executed reconstruction outputs that drive the downstream figures.\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2026-05-13T10:25:20.049928Z", "iopub.status.busy": "2026-05-13T10:25:20.049648Z", "iopub.status.idle": "2026-05-13T10:25:54.899298Z", "shell.execute_reply": "2026-05-13T10:25:54.898503Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/cpfs01/projects-HDD/cfff-c7cd658afc74_HDD/jiaoyifeng/miniconda3/envs/brainbeacon/lib/python3.9/site-packages/numba/core/decorators.py:246: RuntimeWarning: nopython is set for njit and is ignored\n", " warnings.warn('nopython is set for njit and is ignored', RuntimeWarning)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Conducting differential expression analysis...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_30433/566921725.py:62: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", "\n", "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", "\n", "\n", " adata_sc.obs['Level1'].replace({\"Mono/Macro\": \"Mono_Macro\"}, inplace=True)\n", "/tmp/ipykernel_30433/566921725.py:62: FutureWarning: The behavior of Series.replace (and DataFrame.replace) with CategoricalDtype is deprecated. In a future version, replace will only be used for cases that preserve the categories. To change the categories, use ser.cat.rename_categories instead.\n", " adata_sc.obs['Level1'].replace({\"Mono/Macro\": \"Mono_Macro\"}, inplace=True)\n" ] } ], "source": [ "import os\n", "import scanpy as sc\n", "\n", "from revise.backend.runners.sc_svc_application import ScSVCAnalysis\n", "import pandas as pd\n", "import gseapy as gp\n", "import revise.analysis.bio as _revise_bio\n", "\n", "_EMPTY_ENRICHMENT_COLUMNS = [\n", " \"Gene_set\",\n", " \"Term\",\n", " \"Overlap\",\n", " \"P-value\",\n", " \"Adjusted P-value\",\n", " \"Old P-value\",\n", " \"Old Adjusted P-value\",\n", " \"Odds Ratio\",\n", " \"Combined Score\",\n", " \"Genes\",\n", "]\n", "\n", "def _get_enrichment_human_compatible(deg_genes, geneset_file, cutoff=0.05):\n", " if not deg_genes:\n", " return pd.DataFrame(columns=_EMPTY_ENRICHMENT_COLUMNS)\n", " try:\n", " enr = gp.enrichr(\n", " gene_list=deg_genes,\n", " gene_sets=geneset_file,\n", " organism=\"human\",\n", " cutoff=cutoff,\n", " )\n", " return enr.results\n", " except Exception as exc:\n", " print(f\"Skipping enrichment analysis: {type(exc).__name__}: {exc}\")\n", " return pd.DataFrame(columns=_EMPTY_ENRICHMENT_COLUMNS)\n", "\n", "_revise_bio.get_enrichment = _get_enrichment_human_compatible\n", "\n", "\n", "svc_save_dir = f\"{output_dir}/{select_ct}\"\n", "sc_svc_expr = sc.read_h5ad(f\"{svc_save_dir}/sc_SVC_expr.h5ad\")\n", "sc_svc_spatial = sc.read_h5ad(f\"{svc_save_dir}/sc_SVC_spatial.h5ad\")\n", "\n", "sc_svc_analysis = ScSVCAnalysis(sc_svc_spatial, sc_svc_expr, \n", " \"SVC_cluster\")\n", "# Rebuild the upstream variables that the legacy analysis notebook expected\n", "# from the reconstruction notebook kernel state.\n", "patient_id = \"P2CRC\"\n", "data_type = \"Xenium\"\n", "raw_data_path = \"../../raw_data/Real_application\"\n", "raw_file_name = f\"{raw_data_path}/{patient_id}_{data_type}.h5ad\"\n", "sc_ref_file = f\"{raw_data_path}/adata_sc_all_reanno.h5ad\"\n", "\n", "adata_sp = sc.read(raw_file_name)\n", "adata_sp = adata_sp[adata_sp.obs['transcript_counts'] >= 60, :].copy()\n", "sc.pp.filter_genes(adata_sp, min_cells=100)\n", "\n", "adata_sc = sc.read(sc_ref_file)\n", "adata_sc = adata_sc[adata_sc.obs['Patient'] == patient_id, :].copy()\n", "adata_sc.obs = adata_sc.obs[['Level1', 'Level2']]\n", "sc.pp.filter_genes(adata_sc, min_cells=100)\n", "adata_sc.obs['Level1'].replace({\"Mono/Macro\": \"Mono_Macro\"}, inplace=True)\n", "\n", "overlap_genes = adata_sp.var_names.intersection(adata_sc.var_names)\n", "adata_sp = adata_sp[:, overlap_genes].copy()\n", "\n", "sc_SVC_adata = sc_svc_analysis.sc_SVC_adata_spatial.copy()\n", "\n", "\n", "def _apply_category_palette(adata, key, palette, fallback=\"#d3d3d3\"):\n", " \"\"\"Keep categorical colors aligned with the categories present in the plotted subset.\"\"\"\n", " if key not in adata.obs:\n", " return\n", " if not hasattr(adata.obs[key], \"cat\"):\n", " adata.obs[key] = adata.obs[key].astype(\"category\")\n", " adata.obs[key] = adata.obs[key].cat.remove_unused_categories()\n", " categories = adata.obs[key].cat.categories.astype(str).tolist()\n", " adata.uns[f\"{key}_colors\"] = [palette.get(category, fallback) for category in categories]\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2026-05-13T10:25:54.902117Z", "iopub.status.busy": "2026-05-13T10:25:54.901389Z", "iopub.status.idle": "2026-05-13T10:25:54.949368Z", "shell.execute_reply": "2026-05-13T10:25:54.948966Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/cpfs01/projects-HDD/cfff-c7cd658afc74_HDD/jiaoyifeng/miniconda3/envs/brainbeacon/lib/python3.9/site-packages/revise/backend/runners/sc_svc_application.py:67: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n", " grouped = self.sc_SVC_adata_expr.obs.groupby([self.cluster_col, sub_cell_type_col]).size()\n" ] }, { "data": { "text/html": [ "
| Level2 | \n", "apCAF | \n", "iCAF | \n", "mCAF | \n", "pCAF | \n", "
|---|---|---|---|---|
| SVC_cluster | \n", "\n", " | \n", " | \n", " | \n", " |
| 0 | \n", "105 | \n", "30 | \n", "135 | \n", "4 | \n", "
| 1 | \n", "14 | \n", "25 | \n", "305 | \n", "1 | \n", "
| 2 | \n", "15 | \n", "120 | \n", "5 | \n", "0 | \n", "
| 3 | \n", "44 | \n", "120 | \n", "252 | \n", "1 | \n", "
| 4 | \n", "27 | \n", "10 | \n", "146 | \n", "5 | \n", "
| 5 | \n", "65 | \n", "48 | \n", "243 | \n", "6 | \n", "
| 6 | \n", "37 | \n", "48 | \n", "20 | \n", "0 | \n", "
| 7 | \n", "32 | \n", "2 | \n", "6 | \n", "1 | \n", "
| 8 | \n", "7 | \n", "4 | \n", "94 | \n", "5 | \n", "
| 9 | \n", "30 | \n", "13 | \n", "61 | \n", "1 | \n", "
| \n", " | group | \n", "gene | \n", "logfoldchanges | \n", "pvals | \n", "pvals_adj | \n", "log_q | \n", "
|---|---|---|---|---|---|---|
| 0 | \n", "3 | \n", "C3 | \n", "4.284732 | \n", "2.006549e-242 | \n", "2.626171e-238 | \n", "100.000000 | \n", "
| 1 | \n", "1 | \n", "TIMP3 | \n", "3.870138 | \n", "8.797006e-79 | \n", "1.151352e-74 | \n", "73.938792 | \n", "
| 2 | \n", "5 | \n", "TAGLN | \n", "3.033704 | \n", "1.085613e-76 | \n", "1.420850e-72 | \n", "71.847452 | \n", "
| 3 | \n", "1 | \n", "COMP | \n", "5.330997 | \n", "4.945397e-76 | \n", "3.236268e-72 | \n", "71.489956 | \n", "
| 4 | \n", "2 | \n", "INHBA | \n", "-4.796747 | \n", "7.599509e-71 | \n", "9.946238e-67 | \n", "66.002341 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 130875 | \n", "8 | \n", "C4BPB | \n", "0.000000 | \n", "1.000000e+00 | \n", "1.000000e+00 | \n", "-0.000000 | \n", "
| 130876 | \n", "8 | \n", "IL26 | \n", "0.000000 | \n", "1.000000e+00 | \n", "1.000000e+00 | \n", "-0.000000 | \n", "
| 130877 | \n", "8 | \n", "TM4SF20 | \n", "0.000000 | \n", "1.000000e+00 | \n", "1.000000e+00 | \n", "-0.000000 | \n", "
| 130878 | \n", "8 | \n", "NOS3 | \n", "0.000000 | \n", "1.000000e+00 | \n", "1.000000e+00 | \n", "-0.000000 | \n", "
| 130879 | \n", "0 | \n", "P2RY12 | \n", "0.000000 | \n", "1.000000e+00 | \n", "1.000000e+00 | \n", "-0.000000 | \n", "
130880 rows × 6 columns
\n", "