{ "cells": [ { "cell_type": "markdown", "id": "acbff5eb", "metadata": {}, "source": [ "## Installation\n", "\n", "```sh\n", "pip install bioflex\n", "```\n", "\n", "Visit our page bioflex or github for more information." ] }, { "cell_type": "code", "execution_count": 1, "id": "1bdd1fae", "metadata": {}, "outputs": [], "source": [ "import bioflex\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "id": "a6fe7bcc", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[DataBase(id=\"5010c7d573ae4ff2b9691422b99aa2cd\",name=\"BioTuring database\",species=\"human\",version=1),\n", " DataBase(id=\"5010c7d573ae4ff2b9691422b99aa2cd\",name=\"BioTuring database\",species=\"human\",version=2),\n", " DataBase(id=\"5010c7d573ae4ff2b9691422b99aa2cd\",name=\"BioTuring database\",species=\"human\",version=3),\n", " DataBase(id=\"5010c7d573ae4ff2b9691422b99aa2cd\",name=\"BioTuring database\",species=\"mouse\",version=1),\n", " DataBase(id=\"5010c7d573ae4ff2b9691422b99aa2cd\",name=\"BioTuring database\",species=\"primate\",version=1),\n", " DataBase(id=\"1de28e67227b4ed9bd54aa9b642736e3\",name=\"Lung atlas\",species=\"human\",version=1),\n", " DataBase(id=\"31052bef5c3f4514b9dbd194a03bcafa\",name=\"Renal atlas\",species=\"human\",version=1),\n", " DataBase(id=\"58651b0a42434cfba267f78ac42a6fec\",name=\"NK cell atlas (full)\",species=\"human\",version=1)]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "conn = bioflex.connect('a6e9ae0b52204cef90ca3d*****')\n", "databases = conn.databases()\n", "databases" ] }, { "cell_type": "markdown", "id": "d26bdddf", "metadata": {}, "source": [ "### Using a database from list" ] }, { "cell_type": "code", "execution_count": 3, "id": "20919f19", "metadata": {}, "outputs": [], "source": [ "using_database = databases[2]" ] }, { "cell_type": "markdown", "id": "5dd8221f", "metadata": {}, "source": [ "### Get cell types gene expression across database" ] }, { "cell_type": "code", "execution_count": 4, "id": "31bea945", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[Summary(name=\"B cell\",sum=707108874.0,mean=4192.709686217774,rate=0.03504117106973723,count=168652.0,total=4812967), Summary(name=\"CD4-positive, alpha-beta T cell\",sum=9489987442.0,mean=4657.561967741555,rate=0.5283278751435854,count=2037544.0,total=3856590), Summary(name=\"CD4-positive, alpha-beta cytotoxic T cell\",sum=342799107.0,mean=4684.903951018846,rate=0.5532527824824582,count=73171.0,total=132256), Summary(name=\"CD8-positive, alpha-beta T cell\",sum=8799563254.0,mean=4704.7405575715065,rate=0.5471126656122398,count=1870361.0,total=3418603), Summary(name=\"CD8-positive, alpha-beta cytotoxic T cell\",sum=411976171.0,mean=4748.566944835058,rate=0.5942491575111647,count=86758.0,total=145996)]\n", "[Summary(name=\"B cell\",sum=569738449.0,mean=4098.277566375819,rate=0.028884262036286558,count=139019.0,total=4812967), Summary(name=\"CD4-positive, alpha-beta T cell\",sum=10050349852.0,mean=4702.274442320307,rate=0.5542041025880377,count=2137338.0,total=3856590), Summary(name=\"CD4-positive, alpha-beta cytotoxic T cell\",sum=362243512.0,mean=4758.973068131059,rate=0.5755353254294702,count=76118.0,total=132256), Summary(name=\"CD8-positive, alpha-beta T cell\",sum=9239057247.0,mean=4722.38210576353,rate=0.5722922492023789,count=1956440.0,total=3418603), Summary(name=\"CD8-positive, alpha-beta cytotoxic T cell\",sum=376955768.0,mean=4697.736447247077,rate=0.5496177977478836,count=80242.0,total=145996)]\n" ] } ], "source": [ "result = using_database.get_celltypes_expression_summary(['CD3D', 'CD3E'])\n", "print(result['CD3D'][:5])\n", "print(result['CD3E'][:5])" ] }, { "cell_type": "markdown", "id": "cb26c9d9", "metadata": {}, "source": [ "### Create study instance\n", "For study hash ID, search from BioTuring studies" ] }, { "cell_type": "code", "execution_count": 5, "id": "f281ef25", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Study(id=\"1557\",hash_id=\"GSE96583_batch2\",title=\"Multiplexed droplet single-cell RNA-sequencing using natural genetic variation (Batch 2)\",reference=\"https://www.nature.com/articles/nbt.4042\")" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "study = using_database.get_study('GSE96583_batch2')\n", "study" ] }, { "cell_type": "markdown", "id": "0bcdb619", "metadata": {}, "source": [ "### Take a peek at study metadata" ] }, { "cell_type": "code", "execution_count": 6, "id": "2e597853", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Metadata(id=0,name=\"Number of mRNA transcripts\",type=\"Numeric\"),\n", " Metadata(id=1,name=\"Number of genes\",type=\"Numeric\"),\n", " Metadata(id=2,name=\"Batch id\",type=\"Category\"),\n", " Metadata(id=3,name=\"Stimulation\",type=\"Category\"),\n", " Metadata(id=4,name=\"Author's cell type\",type=\"Category\")]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "study.metalist[:5]" ] }, { "cell_type": "markdown", "id": "81134849", "metadata": {}, "source": [ "### Fetch a study metadata" ] }, { "cell_type": "code", "execution_count": 7, "id": "1a8d4ab9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['CD8 T cells', 'Dendritic cells', 'CD4 T cells', ...,\n", " 'CD8 T cells', 'B cells', 'CD4 T cells'], dtype=''\n", "\twith 15492 stored elements in Compressed Sparse Column format>" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "study.query_genes(['CD3D', 'CD3E'], bioflex.UNIT_RAW)" ] }, { "cell_type": "markdown", "id": "b5be6f37", "metadata": {}, "source": [ "### Get study barcodes" ] }, { "cell_type": "code", "execution_count": 9, "id": "6ee1b524", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['GSM2560249_AAACATACCAAGCT-1', 'GSM2560249_AAACATACCCCTAC-1',\n", " 'GSM2560249_AAACATACCCGTAA-1', ..., 'GSM2560248_TTTGCATGGGAACG-1',\n", " 'GSM2560248_TTTGCATGGTCCTC-1', 'GSM2560248_TTTGCATGTTCATC-1'],\n", " dtype=''\n", "\twith 17570739 stored elements in Compressed Sparse Column format>,\n", " dtype=object)" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.array(study.matrix(bioflex.UNIT_LOGNORM))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.8.10 64-bit", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" }, "vscode": { "interpreter": { "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" } } }, "nbformat": 4, "nbformat_minor": 5 }