{
"cells": [
{
"cell_type": "markdown",
"id": "acbff5eb",
"metadata": {},
"source": [
"## Installation\n",
"\n",
"```sh\n",
"pip install bioflex\n",
"```\n",
"\n",
"Visit our page bioflex or github for more information."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "1bdd1fae",
"metadata": {},
"outputs": [],
"source": [
"import bioflex\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "a6fe7bcc",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[DataBase(id=\"5010c7d573ae4ff2b9691422b99aa2cd\",name=\"BioTuring database\",species=\"human\",version=1),\n",
" DataBase(id=\"5010c7d573ae4ff2b9691422b99aa2cd\",name=\"BioTuring database\",species=\"human\",version=2),\n",
" DataBase(id=\"5010c7d573ae4ff2b9691422b99aa2cd\",name=\"BioTuring database\",species=\"human\",version=3),\n",
" DataBase(id=\"5010c7d573ae4ff2b9691422b99aa2cd\",name=\"BioTuring database\",species=\"mouse\",version=1),\n",
" DataBase(id=\"5010c7d573ae4ff2b9691422b99aa2cd\",name=\"BioTuring database\",species=\"primate\",version=1),\n",
" DataBase(id=\"1de28e67227b4ed9bd54aa9b642736e3\",name=\"Lung atlas\",species=\"human\",version=1),\n",
" DataBase(id=\"31052bef5c3f4514b9dbd194a03bcafa\",name=\"Renal atlas\",species=\"human\",version=1),\n",
" DataBase(id=\"58651b0a42434cfba267f78ac42a6fec\",name=\"NK cell atlas (full)\",species=\"human\",version=1)]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"conn = bioflex.connect('a6e9ae0b52204cef90ca3d*****')\n",
"databases = conn.databases()\n",
"databases"
]
},
{
"cell_type": "markdown",
"id": "d26bdddf",
"metadata": {},
"source": [
"### Using a database from list"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "20919f19",
"metadata": {},
"outputs": [],
"source": [
"using_database = databases[2]"
]
},
{
"cell_type": "markdown",
"id": "5dd8221f",
"metadata": {},
"source": [
"### Get cell types gene expression across database"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "31bea945",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[Summary(name=\"B cell\",sum=707108874.0,mean=4192.709686217774,rate=0.03504117106973723,count=168652.0,total=4812967), Summary(name=\"CD4-positive, alpha-beta T cell\",sum=9489987442.0,mean=4657.561967741555,rate=0.5283278751435854,count=2037544.0,total=3856590), Summary(name=\"CD4-positive, alpha-beta cytotoxic T cell\",sum=342799107.0,mean=4684.903951018846,rate=0.5532527824824582,count=73171.0,total=132256), Summary(name=\"CD8-positive, alpha-beta T cell\",sum=8799563254.0,mean=4704.7405575715065,rate=0.5471126656122398,count=1870361.0,total=3418603), Summary(name=\"CD8-positive, alpha-beta cytotoxic T cell\",sum=411976171.0,mean=4748.566944835058,rate=0.5942491575111647,count=86758.0,total=145996)]\n",
"[Summary(name=\"B cell\",sum=569738449.0,mean=4098.277566375819,rate=0.028884262036286558,count=139019.0,total=4812967), Summary(name=\"CD4-positive, alpha-beta T cell\",sum=10050349852.0,mean=4702.274442320307,rate=0.5542041025880377,count=2137338.0,total=3856590), Summary(name=\"CD4-positive, alpha-beta cytotoxic T cell\",sum=362243512.0,mean=4758.973068131059,rate=0.5755353254294702,count=76118.0,total=132256), Summary(name=\"CD8-positive, alpha-beta T cell\",sum=9239057247.0,mean=4722.38210576353,rate=0.5722922492023789,count=1956440.0,total=3418603), Summary(name=\"CD8-positive, alpha-beta cytotoxic T cell\",sum=376955768.0,mean=4697.736447247077,rate=0.5496177977478836,count=80242.0,total=145996)]\n"
]
}
],
"source": [
"result = using_database.get_celltypes_expression_summary(['CD3D', 'CD3E'])\n",
"print(result['CD3D'][:5])\n",
"print(result['CD3E'][:5])"
]
},
{
"cell_type": "markdown",
"id": "cb26c9d9",
"metadata": {},
"source": [
"### Create study instance\n",
"For study hash ID, search from BioTuring studies"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "f281ef25",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Study(id=\"1557\",hash_id=\"GSE96583_batch2\",title=\"Multiplexed droplet single-cell RNA-sequencing using natural genetic variation (Batch 2)\",reference=\"https://www.nature.com/articles/nbt.4042\")"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"study = using_database.get_study('GSE96583_batch2')\n",
"study"
]
},
{
"cell_type": "markdown",
"id": "0bcdb619",
"metadata": {},
"source": [
"### Take a peek at study metadata"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "2e597853",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Metadata(id=0,name=\"Number of mRNA transcripts\",type=\"Numeric\"),\n",
" Metadata(id=1,name=\"Number of genes\",type=\"Numeric\"),\n",
" Metadata(id=2,name=\"Batch id\",type=\"Category\"),\n",
" Metadata(id=3,name=\"Stimulation\",type=\"Category\"),\n",
" Metadata(id=4,name=\"Author's cell type\",type=\"Category\")]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"study.metalist[:5]"
]
},
{
"cell_type": "markdown",
"id": "81134849",
"metadata": {},
"source": [
"### Fetch a study metadata"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "1a8d4ab9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['CD8 T cells', 'Dendritic cells', 'CD4 T cells', ...,\n",
" 'CD8 T cells', 'B cells', 'CD4 T cells'], dtype=''\n",
"\twith 15492 stored elements in Compressed Sparse Column format>"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"study.query_genes(['CD3D', 'CD3E'], bioflex.UNIT_RAW)"
]
},
{
"cell_type": "markdown",
"id": "b5be6f37",
"metadata": {},
"source": [
"### Get study barcodes"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "6ee1b524",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['GSM2560249_AAACATACCAAGCT-1', 'GSM2560249_AAACATACCCCTAC-1',\n",
" 'GSM2560249_AAACATACCCGTAA-1', ..., 'GSM2560248_TTTGCATGGGAACG-1',\n",
" 'GSM2560248_TTTGCATGGTCCTC-1', 'GSM2560248_TTTGCATGTTCATC-1'],\n",
" dtype=''\n",
"\twith 17570739 stored elements in Compressed Sparse Column format>,\n",
" dtype=object)"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.array(study.matrix(bioflex.UNIT_LOGNORM))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.8.10 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
},
"vscode": {
"interpreter": {
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}