Installation

pip install bioflex

Visit our page bioflex or github for more information.

In [1]:
import bioflex
import numpy as np
In [2]:
conn = bioflex.connect('a6e9ae0b52204cef90ca3d*****')
databases = conn.databases()
databases
Out[2]:
[DataBase(id="5010c7d573ae4ff2b9691422b99aa2cd",name="BioTuring database",species="human",version=1),
 DataBase(id="5010c7d573ae4ff2b9691422b99aa2cd",name="BioTuring database",species="human",version=2),
 DataBase(id="5010c7d573ae4ff2b9691422b99aa2cd",name="BioTuring database",species="human",version=3),
 DataBase(id="5010c7d573ae4ff2b9691422b99aa2cd",name="BioTuring database",species="mouse",version=1),
 DataBase(id="5010c7d573ae4ff2b9691422b99aa2cd",name="BioTuring database",species="primate",version=1),
 DataBase(id="1de28e67227b4ed9bd54aa9b642736e3",name="Lung atlas",species="human",version=1),
 DataBase(id="31052bef5c3f4514b9dbd194a03bcafa",name="Renal atlas",species="human",version=1),
 DataBase(id="58651b0a42434cfba267f78ac42a6fec",name="NK cell atlas (full)",species="human",version=1)]

Using a database from list

In [3]:
using_database = databases[2]

Get cell types gene expression across database

In [4]:
result = using_database.get_celltypes_expression_summary(['CD3D', 'CD3E'])
print(result['CD3D'][:5])
print(result['CD3E'][:5])
[Summary(name="B cell",sum=707108874.0,mean=4192.709686217774,rate=0.03504117106973723,count=168652.0,total=4812967), Summary(name="CD4-positive, alpha-beta T cell",sum=9489987442.0,mean=4657.561967741555,rate=0.5283278751435854,count=2037544.0,total=3856590), Summary(name="CD4-positive, alpha-beta cytotoxic T cell",sum=342799107.0,mean=4684.903951018846,rate=0.5532527824824582,count=73171.0,total=132256), Summary(name="CD8-positive, alpha-beta T cell",sum=8799563254.0,mean=4704.7405575715065,rate=0.5471126656122398,count=1870361.0,total=3418603), Summary(name="CD8-positive, alpha-beta cytotoxic T cell",sum=411976171.0,mean=4748.566944835058,rate=0.5942491575111647,count=86758.0,total=145996)]
[Summary(name="B cell",sum=569738449.0,mean=4098.277566375819,rate=0.028884262036286558,count=139019.0,total=4812967), Summary(name="CD4-positive, alpha-beta T cell",sum=10050349852.0,mean=4702.274442320307,rate=0.5542041025880377,count=2137338.0,total=3856590), Summary(name="CD4-positive, alpha-beta cytotoxic T cell",sum=362243512.0,mean=4758.973068131059,rate=0.5755353254294702,count=76118.0,total=132256), Summary(name="CD8-positive, alpha-beta T cell",sum=9239057247.0,mean=4722.38210576353,rate=0.5722922492023789,count=1956440.0,total=3418603), Summary(name="CD8-positive, alpha-beta cytotoxic T cell",sum=376955768.0,mean=4697.736447247077,rate=0.5496177977478836,count=80242.0,total=145996)]

Create study instance

For study hash ID, search from BioTuring studies

In [5]:
study = using_database.get_study('GSE96583_batch2')
study
Out[5]:
Study(id="1557",hash_id="GSE96583_batch2",title="Multiplexed droplet single-cell RNA-sequencing using natural genetic variation (Batch 2)",reference="https://www.nature.com/articles/nbt.4042")

Take a peek at study metadata

In [6]:
study.metalist[:5]
Out[6]:
[Metadata(id=0,name="Number of mRNA transcripts",type="Numeric"),
 Metadata(id=1,name="Number of genes",type="Numeric"),
 Metadata(id=2,name="Batch id",type="Category"),
 Metadata(id=3,name="Stimulation",type="Category"),
 Metadata(id=4,name="Author's cell type",type="Category")]

Fetch a study metadata

In [7]:
metadata = study.metalist[4]
metadata.fetch()
metadata.values
Out[7]:
array(['CD8 T cells', 'Dendritic cells', 'CD4 T cells', ...,
       'CD8 T cells', 'B cells', 'CD4 T cells'], dtype='<U17')

Query genes

In [8]:
study.query_genes(['CD3D', 'CD3E'], bioflex.UNIT_RAW)
Out[8]:
<29065x2 sparse matrix of type '<class 'numpy.float32'>'
	with 15492 stored elements in Compressed Sparse Column format>

Get study barcodes

In [9]:
np.array(study.barcodes())
Out[9]:
array(['GSM2560249_AAACATACCAAGCT-1', 'GSM2560249_AAACATACCCCTAC-1',
       'GSM2560249_AAACATACCCGTAA-1', ..., 'GSM2560248_TTTGCATGGGAACG-1',
       'GSM2560248_TTTGCATGGTCCTC-1', 'GSM2560248_TTTGCATGTTCATC-1'],
      dtype='<U27')

Get study features

In [10]:
np.array(study.features())
Out[10]:
array(['5S_RRNA', '5_8S_RRNA', '7SK', ..., 'C17orf72', 'RP11-361K17.2',
       'CR759784.2'], dtype='<U26')

Get study full matrix

In [11]:
np.array(study.matrix(bioflex.UNIT_LOGNORM))
Downloading: 100%|██████████| 141M/141M [02:56<00:00, 797kbytes/s]  
Out[11]:
array(<29065x64642 sparse matrix of type '<class 'numpy.float32'>'
	with 17570739 stored elements in Compressed Sparse Column format>,
      dtype=object)