%%html
<script src="https://bits.csb.pitt.edu/preamble.js"></script>

%%html
<div id="mlaccess" style="width: 500px"></div>
<script>
$('head').append('<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />');


	jQuery('#mlaccess').asker({
	    id: "mlaccess",
	    question: "How much faster is it to access memory than disk?",
		answers: ["100X","1,000X","10,000X","100,000X","1,000,000X"],
        server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",
		charter: chartmaker})
    
$(".jp-InputArea .o:contains(html)").closest('.jp-InputArea').hide();


</script>

import threading,time

cnt = [0]

def incrementCnt(cnt):
    for i in range(1000000): # a million times
        x = cnt[0] 

t1 = threading.Thread(target=incrementCnt,args=(cnt,))
t2 = threading.Thread(target=incrementCnt,args=(cnt,))

t1.start()
t2.start()

%%html
<div id="mlthread1" style="width: 500px"></div>
<script>
$('head').append('<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />');

    var divid = '#mlthread1';
	jQuery(divid).asker({
	    id: divid,
	    question: "What is cnt?",
		answers: ['0','2','1000000','2000000',"I don't know"],
        server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",
		charter: chartmaker})
    
$(".jp-InputArea .o:contains(html)").closest('.jp-InputArea').hide();

</script>

import threading,time

cnt = [0]

def incrementCnt(cnt):
    for i in range(1000000): # a million times
        cnt[0] += 1

t1 = threading.Thread(target=incrementCnt,args=(cnt,))
t2 = threading.Thread(target=incrementCnt,args=(cnt,))
t1.start()
t2.start()

print(cnt) #what do we expect to print out?
time.sleep(1)
print(cnt)
time.sleep(1)
print(cnt)

[6446]
[2000000]
[2000000]

import threading,time

cnt = [0]

def incrementCnt(cnt):
    for i in range(10000): # a million times
        x = cnt[0]
        x += 1
        time.sleep(0)
        cnt[0] = x

t1 = threading.Thread(target=incrementCnt,args=(cnt,))
t2 = threading.Thread(target=incrementCnt,args=(cnt,))
t1.start()
t2.start()

print(cnt) #what do we expect to print out?
time.sleep(1)
print(cnt)
time.sleep(1)

print(cnt)

[2]
[10002]
[10002]

import multiprocess,time

cnt = [0]

p1 = multiprocess.Process(target=incrementCnt,args=(cnt,))
p2 = multiprocess.Process(target=incrementCnt,args=(cnt,))

p1.start()
p2.start()

#what do we expect when we print out cnt[0]?

%%html
<div id="mlproc1" style="width: 500px"></div>
<script>
$('head').append('<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />');

    var divid = '#mlproc1';
	jQuery(divid).asker({
	    id: divid,
	    question: "What will print out?",
		answers: ['0','2','1000000','2000000',"I don't know"],
        server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",
		charter: chartmaker})
    
$(".jp-InputArea .o:contains(html)").closest('.jp-InputArea').hide();


</script>

cnt = [0]
p1 = multiprocess.Process(target=incrementCnt,args=(cnt,))
p2 = multiprocess.Process(target=incrementCnt,args=(cnt,))

p1.start()
p2.start()

print(cnt[0])
time.sleep(3)
print(cnt[0])

0
0

def dowork(inQ, outQ):
    val = inQ.get()
    outQ.put(val*val)

inQ = multiprocess.Queue()
outQ = multiprocess.Queue()
pool = multiprocess.Pool(4, dowork, (inQ, outQ))

inQ.put(4)

outQ.get()

16

import multiprocess

def chatty(conn): #this takes a Connection object representing one end of a pipe
    msg = conn.recv()
    conn.send("you sent me "+msg)
    
(c1,c2) = multiprocess.Pipe()

p1 = multiprocess.Process(target=chatty,args=(c2,))
p1.start()

c1.send("Hello!")
result = c1.recv()
p1.join()

print(result)

you sent me Hello!

def f(x):
    return x*x

pool = multiprocess.Pool(processes=4)

print(pool.map(f,range(20)))

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 196, 225, 256, 289, 324, 361]

import lmdb

env = lmdb.open('db')
with env.begin(write=True) as txn:
    txn.put(b'key1',b'123')
    txn.put(b'key2',b'abc')

env = lmdb.open('db',readonly=True)
with env.begin() as txn:
    print(txn.get(b'key1'))

b'123'

import numpy as np
a = np.array([1.0,3.14,2])
env = lmdb.open('db')
with env.begin(write=True) as txn:
    txn.put(b'key', a)

env = lmdb.open('db',readonly=True)
with env.begin() as txn:
    buffer = txn.get(b'key')
buffer

b'\x00\x00\x00\x00\x00\x00\xf0?\x1f\x85\xebQ\xb8\x1e\t@\x00\x00\x00\x00\x00\x00\x00@'

newa = np.frombuffer(buffer,dtype=np.float64) # does NOT copy!

newa.base is buffer

True

newa[0] = 4

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[52], line 1
----> 1 newa[0] = 4

ValueError: assignment destination is read-only

import h5py  # tries to provide numpy interface to hd5 data

mouse = h5py.File('mouse_tabula_muris_10x_log1p_cpm.h5ad','r')

mouse.keys()

<KeysViewHDF5 ['X', 'obs', 'raw.X', 'raw.var', 'uns', 'var']>

mouse['obs']  # "opaque" type of size 45

<HDF5 dataset "obs": shape (54967,), type "|V45">

np.array(mouse['obs'])

array([(b'10X_P4_0_AAACCTGAGATTACCC', 10, 2,  7, 0.11555646,  9848., 2857),
       (b'10X_P4_0_AAACCTGAGTGCCAGA', 10, 2, 31, 0.17775837, 17175., 2933),
       (b'10X_P4_0_AAACCTGCAAATCCGT', 10, 2, 31, 0.09034352, 22181., 3217),
       ...,
       (b'10X_P8_15_TTTGTCATCGGCTTGG', 11, 2, 19, 0.09656238,  2589., 1302),
       (b'10X_P8_15_TTTGTCATCTTACCGC', 11, 2, 44, 0.10493047,  2373.,  973),
       (b'10X_P8_15_TTTGTCATCTTGTTTG', 11, 2, 44, 0.07301372,  5903., 1800)],
      dtype=[('index', 'S26'), ('tissue', 'i1'), ('subtissue', 'i1'), ('cell_ontology_class', 'i1'), ('percent_ribo', '<f4'), ('n_counts', '<f4'), ('n_genes', '<i8')])

import pandas as pd
pd.DataFrame(np.array(mouse['obs']))

mouse['var']

<HDF5 dataset "var": shape (18099,), type "|V22">

mouse['var'].dtype

dtype([('index', 'S14'), ('n_cells', '<i8')])

np.array(mouse['var'])

array([(b'0610005C13Rik',  1847), (b'0610007C21Rik', 23434),
       (b'0610007L01Rik', 13040), ..., (b'Zzz3',  8077), (b'a',   165),
       (b'l7Rn6', 14131)], dtype=[('index', 'S14'), ('n_cells', '<i8')])

import pandas as pd
pd.DataFrame(np.array(mouse['var']))

mouse['X']

<HDF5 group "/X" (3 members)>

mouse['X'].keys()

<KeysViewHDF5 ['data', 'indices', 'indptr']>

dict(mouse['X'].attrs)

{'h5sparse_format': 'csr', 'h5sparse_shape': array([54967, 18099])}

mouse['X']['data']

<HDF5 dataset "data": shape (105321967,), type "<f4">

import scipy.sparse
X = mouse['X']
M = scipy.sparse.csr_matrix((X['data'],X['indices'],X['indptr']))

M

<54967x18099 sparse matrix of type '<class 'numpy.float32'>'
	with 105321967 stored elements in Compressed Sparse Row format>

mouse['uns'].keys()

<KeysViewHDF5 ['cell_ontology_class_categories', 'subtissue_categories', 'tissue_categories']>

np.array(mouse['uns']['cell_ontology_class_categories'])

array([b'B cell', b'DN1 thymic pro-T cell', b'Fraction A pre-pro B cell',
       b'Langerhans cell', b'T cell', b'alveolar macrophage',
       b'basal cell', b'basal cell of epidermis', b'basophil',
       b'bladder cell', b'bladder urothelial cell', b'blood cell',
       b'cardiac muscle cell',
       b'ciliated columnar cell of tracheobronchial tree',
       b'classical monocyte', b'dendritic cell', b'duct epithelial cell',
       b'early pro-B cell', b'endocardial cell', b'endothelial cell',
       b'endothelial cell of hepatic sinusoid', b'epithelial cell',
       b'erythroblast', b'erythrocyte', b'fibroblast', b'granulocyte',
       b'granulocytopoietic cell', b'hematopoietic precursor cell',
       b'hepatocyte', b'immature B cell', b'immature T cell',
       b'keratinocyte', b'kidney capillary endothelial cell',
       b'kidney cell', b'kidney collecting duct epithelial cell',
       b'kidney loop of Henle ascending limb epithelial cell',
       b'kidney proximal straight tubule epithelial cell',
       b'late pro-B cell', b'leukocyte',
       b'luminal epithelial cell of mammary gland',
       b'lung endothelial cell', b'macrophage', b'mast cell',
       b'mesangial cell', b'mesenchymal cell', b'mesenchymal stem cell',
       b'monocyte', b'myeloid cell', b'natural killer cell',
       b'neuroendocrine cell', b'non-classical monocyte',
       b'proerythroblast', b'professional antigen presenting cell',
       b'promonocyte', b'skeletal muscle satellite cell', b'stromal cell',
       b'type II pneumocyte'], dtype=object)

import anndata
data = anndata.read_h5ad('mouse_tabula_muris_10x_log1p_cpm.h5ad')
data

AnnData object with n_obs × n_vars = 54967 × 18099
    obs: 'tissue', 'subtissue', 'cell_ontology_class', 'percent_ribo', 'n_counts', 'n_genes'
    var: 'n_cells'

data.obs

import scanpy
data=scanpy.read_h5ad('mouse_tabula_muris_10x_log1p_cpm.h5ad')

data

AnnData object with n_obs × n_vars = 54967 × 18099
    obs: 'tissue', 'subtissue', 'cell_ontology_class', 'percent_ribo', 'n_counts', 'n_genes'
    var: 'n_cells'

data.var_names

Index(['0610005C13Rik', '0610007C21Rik', '0610007L01Rik', '0610007N19Rik',
       '0610007P08Rik', '0610007P14Rik', '0610007P22Rik', '0610008F07Rik',
       '0610009B14Rik', '0610009B22Rik',
       ...
       'Zxda', 'Zxdb', 'Zxdc', 'Zyg11a', 'Zyg11b', 'Zyx', 'Zzef1', 'Zzz3', 'a',
       'l7Rn6'],
      dtype='object', name='index', length=18099)

data.var

data.X

<54967x18099 sparse matrix of type '<class 'numpy.float32'>'
	with 105321967 stored elements in Compressed Sparse Row format>

data.X[:5,:5]

<5x5 sparse matrix of type '<class 'numpy.float32'>'
	with 6 stored elements in Compressed Sparse Row format>

data.X[:5,:5].toarray()

array([[0.       , 5.7223763, 0.       , 0.       , 5.318546 ],
       [0.       , 4.08133  , 4.08133  , 0.       , 0.       ],
       [0.       , 4.914498 , 0.       , 0.       , 0.       ],
       [0.       , 0.       , 0.       , 0.       , 0.       ],
       [0.       , 3.955095 , 0.       , 0.       , 0.       ]],
      dtype=float32)

data.obs

	tissue	subtissue	cell_ontology_class	percent_ribo	n_counts	n_genes
index
10X_P4_0_AAACCTGAGATTACCC	Tongue	nan	basal cell of epidermis	0.115556	9848.0	2857
10X_P4_0_AAACCTGAGTGCCAGA	Tongue	nan	keratinocyte	0.177758	17175.0	2933
10X_P4_0_AAACCTGCAAATCCGT	Tongue	nan	keratinocyte	0.090344	22181.0	3217
10X_P4_0_AAACCTGGTAATCGTC	Tongue	nan	basal cell of epidermis	0.185273	16840.0	3108
10X_P4_0_AAACCTGGTCCAACTA	Tongue	nan	basal cell of epidermis	0.144283	19531.0	3713
...	...	...	...	...	...	...
10X_P8_15_TTTGTCAGTTGTCGCG	Trachea	nan	endothelial cell	0.126633	6507.0	2256
10X_P8_15_TTTGTCATCACGATGT	Trachea	nan	blood cell	0.128589	1672.0	772
10X_P8_15_TTTGTCATCGGCTTGG	Trachea	nan	endothelial cell	0.096562	2589.0	1302
10X_P8_15_TTTGTCATCTTACCGC	Trachea	nan	mesenchymal cell	0.104930	2373.0	973
10X_P8_15_TTTGTCATCTTGTTTG	Trachea	nan	mesenchymal cell	0.073014	5903.0	1800

	tissue	subtissue	cell_ontology_class	percent_ribo	n_counts	n_genes
index
10X_P4_0_AAACCTGAGATTACCC	Tongue	nan	basal cell of epidermis	0.115556	9848.0	2857
10X_P4_0_AAACCTGAGTGCCAGA	Tongue	nan	keratinocyte	0.177758	17175.0	2933
10X_P4_0_AAACCTGCAAATCCGT	Tongue	nan	keratinocyte	0.090344	22181.0	3217
10X_P4_0_AAACCTGGTAATCGTC	Tongue	nan	basal cell of epidermis	0.185273	16840.0	3108
10X_P4_0_AAACCTGGTCCAACTA	Tongue	nan	basal cell of epidermis	0.144283	19531.0	3713
...	...	...	...	...	...	...
10X_P8_15_TTTGTCAGTTGTCGCG	Trachea	nan	endothelial cell	0.126633	6507.0	2256
10X_P8_15_TTTGTCATCACGATGT	Trachea	nan	blood cell	0.128589	1672.0	772
10X_P8_15_TTTGTCATCGGCTTGG	Trachea	nan	endothelial cell	0.096562	2589.0	1302
10X_P8_15_TTTGTCATCTTACCGC	Trachea	nan	mesenchymal cell	0.104930	2373.0	973
10X_P8_15_TTTGTCATCTTGTTTG	Trachea	nan	mesenchymal cell	0.073014	5903.0	1800

Computer Systems Overview¶

1/28/2026¶

Memory Access Times¶

Disk Access Time¶

SSD Access Time¶

Takeaways¶

Parallel Programming¶

Threads vs. Processes¶

Address Spaces¶

Threads vs. Processs¶

The Answer¶

Threads vs. Processes¶

The Answer¶

Parallel Programming Concepts: Communication¶

Queue¶

Pipe/Socket¶

Queue Example¶

Pipe Example¶

Pools¶

Pool Example¶

Threads or Processes?¶

Memory Mapping¶

When mmap can provide performance benefit¶

lmdb¶

Going beyond raw bytes¶

HDF5¶

Specializations of HDF5¶

AnnData¶

anndata package provides a higher level interface¶

scanpy - anndata wrapper specifically for single cell data¶

	index	tissue	subtissue	cell_ontology_class	percent_ribo	n_counts	n_genes
0	b'10X_P4_0_AAACCTGAGATTACCC'	10	2	7	0.115556	9848.0	2857
1	b'10X_P4_0_AAACCTGAGTGCCAGA'	10	2	31	0.177758	17175.0	2933
2	b'10X_P4_0_AAACCTGCAAATCCGT'	10	2	31	0.090344	22181.0	3217
3	b'10X_P4_0_AAACCTGGTAATCGTC'	10	2	7	0.185273	16840.0	3108
4	b'10X_P4_0_AAACCTGGTCCAACTA'	10	2	7	0.144283	19531.0	3713
...	...	...	...	...	...	...	...
54962	b'10X_P8_15_TTTGTCAGTTGTCGCG'	11	2	19	0.126633	6507.0	2256
54963	b'10X_P8_15_TTTGTCATCACGATGT'	11	2	11	0.128589	1672.0	772
54964	b'10X_P8_15_TTTGTCATCGGCTTGG'	11	2	19	0.096562	2589.0	1302
54965	b'10X_P8_15_TTTGTCATCTTACCGC'	11	2	44	0.104930	2373.0	973
54966	b'10X_P8_15_TTTGTCATCTTGTTTG'	11	2	44	0.073014	5903.0	1800

	index	n_cells
0	b'0610005C13Rik'	1847
1	b'0610007C21Rik'	23434
2	b'0610007L01Rik'	13040
3	b'0610007N19Rik'	11829
4	b'0610007P08Rik'	3299
...	...	...
18094	b'Zyx'	18955
18095	b'Zzef1'	6096
18096	b'Zzz3'	8077
18097	b'a'	165
18098	b'l7Rn6'	14131

	n_cells
index
0610005C13Rik	1847
0610007C21Rik	23434
0610007L01Rik	13040
0610007N19Rik	11829
0610007P08Rik	3299
...	...
Zyx	18955
Zzef1	6096
Zzz3	8077
a	165
l7Rn6	14131