1. 读取Visium数据
方法1:直接读取10x Visium格式文件(.h5)
sample1 = sc.read_visium('./GSM7996201_SXR_1')
sample2 = sc.read_visium('./GSM7996202_SXR_2')
sample3 = sc.read_visium('./GSM7996203_YZL_1')
sample4 = sc.read_visium('./GSM7996204_YZL_2')
samples = [sample1, sample2, sample3, sample4]
# 添加批次信息
for i, sample in enumerate(samples): sample.obs['batch'] = f'sample_{i+1}'
# 合并数据
combined = samples[0].concatenate(samples[1:])
# 批次效应校正
sc.pp.combat(combined, key='batch')
如果是matrix.mtx、features.tsv、barcodes.tsv和spatial目录
sc.settings.cachedir = "/home/sunlab/yanghc/workspace/project/TNBC_spatial/test/visium/cache"
adata = sc.read_10x_mtx(
sample_path, # 包含matrix.mtx.gz等文件的目录
var_names='gene_symbols',
cache=True
)
# 2. 读取空间位置信息
spatial_path = os.path.join(sample_path, 'spatial')
positions_path = os.path.join(spatial_path, 'tissue_positions.csv')
positions = pd.read_csv(positions_path)
# 3. 调整位置数据格式 (检查列名以确保正确)
# 注意: 根据10x的版本,positions的列名可能有不同
if 'barcode' not in positions.columns and positions.shape[1] == 6:
positions.columns = ['barcode', 'in_tissue', 'array_row', 'array_col',
'pxl_row_in_fullres', 'pxl_col_in_fullres']
# 设置barcode为索引
positions.index = positions['barcode']
# 4. 匹配表达矩阵和空间位置的细胞条形码
common_barcodes = positions.index.intersection(adata.obs_names)
if len(common_barcodes) < len(adata.obs_names):
print(f" 警告: 只有{len(common_barcodes)}/{len(adata.obs_names)}的条形码在空间数据中找到")
adata = adata[common_barcodes]
positions = positions.loc[common_barcodes]
# 5. 将空间坐标添加到adata对象中
adata.obs['in_tissue'] = positions['in_tissue'].values
adata.obsm['spatial'] = positions[['pxl_row_in_fullres', 'pxl_col_in_fullres']].values
# 6. 读取比例因子
scalefactors_path = os.path.join(spatial_path, 'scalefactors_json.json')
with open(scalefactors_path, 'r') as f:
scalefactors = json.load(f)
# 7. 正确设置空间可视化信息
library_id = sample_id # 使用样本ID作为库ID
adata.uns['spatial'] = {}
adata.uns['spatial'][library_id] = {}
# 8. 添加组织图像
hires_path = os.path.join(spatial_path, 'tissue_hires_image.png')
lowres_path = os.path.join(spatial_path, 'tissue_lowres_image.png')
adata.uns['spatial'][library_id]['images'] = {}
try:
adata.uns['spatial'][library_id]['images']['hires'] = io.imread(hires_path)
adata.uns['spatial'][library_id]['images']['lowres'] = io.imread(lowres_path)
except Exception as e:
print(f" 警告: 无法读取图像: {e}")
# 9. 添加比例因子
adata.uns['spatial'][library_id]['scalefactors'] = scalefactors