单细胞Seruat和h5ad数据格式互换(R与python)方法学习和整理

2024-09-27 22:35:53 浏览数 (1)

SeruatV4数据转化为h5ad格式数据
1、导入(R)
代码语言:javascript复制
rm(list = ls())
library(Seurat)
library(qs)
library(reticulate)
library(hdf5r)
library(sceasy)
library(BiocParallel)
register(MulticoreParam(workers = 4, progressbar = TRUE)) 
scRNA <- qread("sc_dataset.qs")
scRNA
# An object of class Seurat 
# 30269 features across 44651 samples within 2 assays 
# Active assay: integrated (2000 features, 2000 variable features)
#  1 other assay present: RNA
#  3 dimensional reductions calculated: pca, umap, tsne
2、配置python环境(终端/linux)
代码语言:javascript复制
# 配置环境
conda create -n sceasy python=3.9
conda activate sceasy
conda install loompy

# 可选安装
conda install anndata
conda install scipy
3、开始转换(R)
代码语言:javascript复制
# 在R语言中加载python环境
use_condaenv('sceasy')
loompy <- reticulate::import('loompy')

# Seurat to AnnData
sceasy::convertFormat(scRNA, from="seurat", to="anndata",
                      outFile='scRNA.h5ad')
# AnnData object with n_obs × n_vars = 44651 × 28269
#     obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'GSE_num', 'Gender', 'Age', 'subsite', 'hpv', 'percent.mt', 'percent.rp', 'percent.hb', 'RNA_snn_res.0.1', 'RNA_snn_res.0.2', 'RNA_snn_res.0.3', 'RNA_snn_res.0.4', 'RNA_snn_res.0.5', 'RNA_snn_res.0.6', 'RNA_snn_res.0.7', 'RNA_snn_res.0.8', 'RNA_snn_res.0.9', 'RNA_snn_res.1', 'RNA_snn_res.1.1', 'RNA_snn_res.1.2', 'RNA_snn_res.1.3', 'RNA_snn_res.1.4', 'RNA_snn_res.1.5', 'RNA_snn_res.1.6', 'RNA_snn_res.1.7', 'RNA_snn_res.1.8', 'RNA_snn_res.1.9', 'RNA_snn_res.2', 'seurat_clusters', 'celltype', 'integrated_snn_res.0.1', 'integrated_snn_res.0.2', 'integrated_snn_res.0.3', 'integrated_snn_res.0.4', 'integrated_snn_res.0.5', 'integrated_snn_res.0.6', 'integrated_snn_res.0.7', 'integrated_snn_res.0.8', 'integrated_snn_res.0.9', 'integrated_snn_res.1', 'integrated_snn_res.1.1', 'integrated_snn_res.1.2', 'integrated_snn_res.1.3', 'integrated_snn_res.1.4', 'integrated_snn_res.1.5', 'integrated_snn_res.1.6', 'integrated_snn_res.1.7', 'integrated_snn_res.1.8', 'integrated_snn_res.1.9', 'integrated_snn_res.2'
#     var: 'name'
#     obsm: 'X_pca', 'X_umap', 'X_tsne'

#Seurat to SingleCellExperiment
sceasy::convertFormat(scRNA, from="seurat", to="sce",
                      outFile='scRNA.rds')
4、IDE中确认一下(python)
代码语言:javascript复制
# 加载库
import scanpy as sc
import os

# 确认路径
os.getcwd()

# 读取数据
adata = sc.read_h5ad('scRNA.h5ad')
adata
# AnnData object with n_obs × n_vars = 44651 × 28269
#     obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'GSE_num', 'Gender', 'Age', 'subsite', 'hpv', 'percent.mt', 'percent.rp', 'percent.hb', 'RNA_snn_res.0.1', 'RNA_snn_res.0.2', 'RNA_snn_res.0.3', 'RNA_snn_res.0.4', 'RNA_snn_res.0.5', 'RNA_snn_res.0.6', 'RNA_snn_res.0.7', 'RNA_snn_res.0.8', 'RNA_snn_res.0.9', 'RNA_snn_res.1', 'RNA_snn_res.1.1', 'RNA_snn_res.1.2', 'RNA_snn_res.1.3', 'RNA_snn_res.1.4', 'RNA_snn_res.1.5', 'RNA_snn_res.1.6', 'RNA_snn_res.1.7', 'RNA_snn_res.1.8', 'RNA_snn_res.1.9', 'RNA_snn_res.2', 'seurat_clusters', 'celltype', 'integrated_snn_res.0.1', 'integrated_snn_res.0.2', 'integrated_snn_res.0.3', 'integrated_snn_res.0.4', 'integrated_snn_res.0.5', 'integrated_snn_res.0.6', 'integrated_snn_res.0.7', 'integrated_snn_res.0.8', 'integrated_snn_res.0.9', 'integrated_snn_res.1', 'integrated_snn_res.1.1', 'integrated_snn_res.1.2', 'integrated_snn_res.1.3', 'integrated_snn_res.1.4', 'integrated_snn_res.1.5', 'integrated_snn_res.1.6', 'integrated_snn_res.1.7', 'integrated_snn_res.1.8', 'integrated_snn_res.1.9', 'integrated_snn_res.2'
#     var: 'name'
#     obsm: 'X_pca', 'X_tsne', 'X_umap'
SeruatV5数据转化为h5ad格式数据
1、导入(R)
代码语言:javascript复制
rm(list = ls())
V5_path = "/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/seurat5/"
.libPaths(V5_path)
.libPaths()
library(Seurat)
library(qs)
library(reticulate)
library(hdf5r)
library(sceasy)
library(BiocParallel)
register(MulticoreParam(workers = 4, progressbar = TRUE)) 
scRNA_V5 <- readRDS("scRNA_tumor.rds")
scRNA_V5
# An object of class Seurat 
# 20124 features across 5042 samples within 1 assay 
# Active assay: RNA (20124 features, 2000 variable features)
#  3 layers present: counts, data, scale.data
#  3 dimensional reductions calculated: pca, harmony, umap
2、配置python环境(终端/linux)
代码语言:javascript复制
# 配置环境
conda create -n sceasy python=3.9
conda activate sceasy
conda install loompy

# 可选安装
conda install anndata
conda install scipy
3、R语言转换(R)
代码语言:javascript复制
# 在R语言中加载python环境
use_condaenv('sceasy')
loompy <- reticulate::import('loompy')

# Seurat to AnnData
scRNA_V5[["RNA"]] <- as(scRNA_V5[["RNA"]], "Assay")
sceasy::convertFormat(scRNA_V5, from="seurat", to="anndata",
                      outFile='scRNA_V5.h5ad')
# AnnData object with n_obs × n_vars = 5042 × 20124
#     obs: 'nCount_RNA', 'nFeature_RNA', 'Sample', 'Cell.Barcode', 'Type', 'RNA_snn_res.0.1', 'RNA_snn_res.0.2', 'RNA_snn_res.0.3', 'RNA_snn_res.0.4', 'RNA_snn_res.0.5', 'RNA_snn_res.0.6', 'RNA_snn_res.0.7', 'RNA_snn_res.0.8', 'RNA_snn_res.0.9', 'RNA_snn_res.1', 'RNA_snn_res.1.1', 'RNA_snn_res.1.2', 'seurat_clusters', 'celltype', 'seurat_annotation'
#     var: 'vf_vst_counts_mean', 'vf_vst_counts_variance', 'vf_vst_counts_variance.expected', 'vf_vst_counts_variance.standardized', 'vf_vst_counts_variable', 'vf_vst_counts_rank', 'var.features', 'var.features.rank'
#     obsm: 'X_pca', 'X_harmony', 'X_umap'
# Warning message:
# In .regularise_df(obj@meta.data, drop_single_values = drop_single_values) :
#   Dropping single category variables:orig.ident

先将 Seurat V5 对象中的 Assay5 类型转换为 Seurat 旧版本中的 Assay 类型,然后再进行转化

4、IDE中确认一下(python)
代码语言:javascript复制
# 加载库
import scanpy as sc
import os

# 确认路径
os.getcwd()

# 读取数据
adata = sc.read_h5ad('scRNA.h5ad')
adata
# AnnData object with n_obs × n_vars = 5042 × 20124
#     obs: 'nCount_RNA', 'nFeature_RNA', 'Sample', 'Cell.Barcode', 'Type', 'RNA_snn_res.0.1', 'RNA_snn_res.0.2', 'RNA_snn_res.0.3', 'RNA_snn_res.0.4', 'RNA_snn_res.0.5', 'RNA_snn_res.0.6', 'RNA_snn_res.0.7', 'RNA_snn_res.0.8', 'RNA_snn_res.0.9', 'RNA_snn_res.1', 'RNA_snn_res.1.1', 'RNA_snn_res.1.2', 'seurat_clusters', 'celltype', 'seurat_annotation'
#     var: 'vf_vst_counts_mean', 'vf_vst_counts_variance', 'vf_vst_counts_variance.expected', 'vf_vst_counts_variance.standardized', 'vf_vst_counts_variable', 'vf_vst_counts_rank', 'var.features', 'var.features.rank'
#     obsm: 'X_harmony', 'X_pca', 'X_umap'
ha5d格式数据转化成seruat对象
1.导入
代码语言:javascript复制
rm(list = ls())
library(sceasy)
library(reticulate)
library(Seurat)
library(BiocParallel)
register(MulticoreParam(workers = 4, progressbar = TRUE)) 
2、R语言转换
代码语言:javascript复制
# h5ad转为Seurat
sceasy::convertFormat(obj = "scRNA.h5ad", 
                      from="anndata",to="seurat",
                      outFile = 'scRNA.rds')
# X -> counts
# An object of class Seurat 
# 28269 features across 44651 samples within 1 assay 
# Active assay: RNA (28269 features, 0 variable features)
#  2 layers present: counts, data
#  3 dimensional reductions calculated: pca, tsne, umap

这种方法得到的数据是SeruatV4版本的,所以如果要用于SeruatV5的话还需要再转化一下。

还有细胞数很多的话sceasy就不好用了,这个时候可以用dior包。

参考资料:
  1. sceasy: https://github.com/cellgeni/sceasy
  2. dior: https://github.com/JiekaiLab/dior
  3. 单细胞天地: https://mp.weixin.qq.com/s/qHBeQnYJdK0ATGlTOROPeA
  4. 生信菜鸟团: https://mp.weixin.qq.com/s/8fwJSc9Dnp8h_Suv76oXVA
  5. KS科研分享与服务:https://mp.weixin.qq.com/s/Wt9TU5Qk3yqPDlRlXr6BfQ

:若对内容有疑惑或者有发现明确错误的朋友,请联系后台(欢迎交流)。更多内容可关注公众号:生信方舟

- END -

0 人点赞