import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
url="https://github.com/xfreppihs/lab_data_analysis/blob/master/mouse_tumor_xenograft/B1_B9.xlsx?raw=true"
data=pd.read_excel(url)
data.head()
Unnamed: 0 | date | weight (g) | Unnamed: 3 | Unnamed: 4 | Unnamed: 5 | Unnamed: 6 | Unnamed: 7 | Unnamed: 8 | Unnamed: 9 | Unnamed: 10 | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | B1 | 2017-06-13 00:00:00 | 23.6 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 | B2 | 2017-06-13 00:00:00 | 25.1 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 | B3 | 2017-06-13 00:00:00 | 25.1 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3 | B4 | 2017-06-13 00:00:00 | 24.3 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
4 | B5 | 2017-06-13 00:00:00 | 21.8 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
# select only IVIS data
idata = data[['Unnamed: 0','date','Unnamed: 3','Unnamed: 4']]
idata.head()
Unnamed: 0 | date | Unnamed: 3 | Unnamed: 4 | |
---|---|---|---|---|
0 | B1 | 2017-06-13 00:00:00 | NaN | NaN |
1 | B2 | 2017-06-13 00:00:00 | NaN | NaN |
2 | B3 | 2017-06-13 00:00:00 | NaN | NaN |
3 | B4 | 2017-06-13 00:00:00 | NaN | NaN |
4 | B5 | 2017-06-13 00:00:00 | NaN | NaN |
# remove NaN
idata.dropna(inplace = True)
idata.head()
<ipython-input-4-a6527fe07297>:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy idata.dropna(inplace = True)
Unnamed: 0 | date | Unnamed: 3 | Unnamed: 4 | |
---|---|---|---|---|
10 | B1 | 2017-06-15 00:00:00 | 10270000 | 55130000 |
11 | B2 | 2017-06-15 00:00:00 | 51890000 | 66210000 |
12 | B3 | 2017-06-15 00:00:00 | 55170000 | 30940000 |
13 | B4 | 2017-06-15 00:00:00 | 48380000 | 74310000 |
14 | B5 | 2017-06-15 00:00:00 | 27340000 | 60090000 |
# rename columns
idata.set_axis(['id','date','left','right'],axis = 'columns',inplace = True)
idata.head()
id | date | left | right | |
---|---|---|---|---|
10 | B1 | 2017-06-15 00:00:00 | 10270000 | 55130000 |
11 | B2 | 2017-06-15 00:00:00 | 51890000 | 66210000 |
12 | B3 | 2017-06-15 00:00:00 | 55170000 | 30940000 |
13 | B4 | 2017-06-15 00:00:00 | 48380000 | 74310000 |
14 | B5 | 2017-06-15 00:00:00 | 27340000 | 60090000 |
idata_melt = pd.melt(idata, id_vars=['id','date'], value_vars=['left', 'right'])
idata_melt
id | date | variable | value | |
---|---|---|---|---|
0 | B1 | 2017-06-15 | left | 10270000 |
1 | B2 | 2017-06-15 | left | 51890000 |
2 | B3 | 2017-06-15 | left | 55170000 |
3 | B4 | 2017-06-15 | left | 48380000 |
4 | B5 | 2017-06-15 | left | 27340000 |
5 | B6 | 2017-06-15 | left | 58690000 |
6 | B1 | 2017-06-19 | left | 115800000 |
7 | B2 | 2017-06-19 | left | 468700000 |
8 | B3 | 2017-06-19 | left | 556600000 |
9 | B4 | 2017-06-19 | left | 196300000 |
10 | B6 | 2017-06-19 | left | 183600000 |
11 | B1 | 2017-06-22 | left | 403900000 |
12 | B2 | 2017-06-22 | left | 784700000 |
13 | B3 | 2017-06-22 | left | 794500000 |
14 | B4 | 2017-06-22 | left | 892800000 |
15 | B5 | 2017-06-22 | left | 431100000 |
16 | B6 | 2017-06-22 | left | 110000000 |
17 | B1 | 2017-06-26 | left | 1821000000 |
18 | B2 | 2017-06-26 | left | 2929000000 |
19 | B3 | 2017-06-26 | left | 4642000000 |
20 | B5 | 2017-06-26 | left | 898200000 |
21 | B6 | 2017-06-26 | left | 965400000 |
22 | B1 | 2017-06-29 | left | 2589000000 |
23 | B2 | 2017-06-29 | left | 7103000000 |
24 | B3 | 2017-06-29 | left | 10940000000 |
25 | B5 | 2017-06-29 | left | 3606000000 |
26 | B6 | 2017-06-29 | left | 6687000000 |
27 | B1 | 2017-06-15 | right | 55130000 |
28 | B2 | 2017-06-15 | right | 66210000 |
29 | B3 | 2017-06-15 | right | 30940000 |
30 | B4 | 2017-06-15 | right | 74310000 |
31 | B5 | 2017-06-15 | right | 60090000 |
32 | B6 | 2017-06-15 | right | 123100000 |
33 | B1 | 2017-06-19 | right | 436900000 |
34 | B2 | 2017-06-19 | right | 521400000 |
35 | B3 | 2017-06-19 | right | 346400000 |
36 | B4 | 2017-06-19 | right | 1627000000 |
37 | B6 | 2017-06-19 | right | 1277000000 |
38 | B1 | 2017-06-22 | right | 1736000000 |
39 | B2 | 2017-06-22 | right | 1401000000 |
40 | B3 | 2017-06-22 | right | 744900000 |
41 | B4 | 2017-06-22 | right | 3907000000 |
42 | B5 | 2017-06-22 | right | 5005000000 |
43 | B6 | 2017-06-22 | right | 1520000000 |
44 | B1 | 2017-06-26 | right | 8948000000 |
45 | B2 | 2017-06-26 | right | 3840000000 |
46 | B3 | 2017-06-26 | right | 3034000000 |
47 | B5 | 2017-06-26 | right | 6210000000 |
48 | B6 | 2017-06-26 | right | 3554000000 |
49 | B1 | 2017-06-29 | right | 15290000000 |
50 | B2 | 2017-06-29 | right | 8009000000 |
51 | B3 | 2017-06-29 | right | 6552000000 |
52 | B5 | 2017-06-29 | right | 6920000000 |
53 | B6 | 2017-06-29 | right | 17920000000 |
# convert 'value' to numbers, 'date' to strings
idata_melt['value'] = idata_melt['value'].astype(float)
idata_melt['date'] = idata_melt['date'].astype(str)
sns.set_style('ticks') # can choose from white, dark, whitegrid, darkgrid, ticks
g1 = sns.FacetGrid(idata_melt, col='variable', hue = 'id', height = 5, aspect = 1.2) # create facetgrid canvas
#g1.fig.set_figwidth(12) # another way to set figure size
#g1.fig.set_figheight(5)
g1 = g1.map(sns.lineplot, 'date', 'value', lw = 3) # draw line plot
g1.set_titles(col_template="{col_name} tumor",size = 14) # add title to each grid
g1.set_axis_labels('', 'Total flux (p/s)' ,size = 14) # remove or change axis labels
xlabels = ['Day 3', 'Day 7', 'Day 10', 'Day 14', 'Day 17']
g1.set_xticklabels(xlabels) # change x-axis tick labels
axes = g1.axes.flatten() # access to individual plot
handle, label = axes[0].get_legend_handles_labels()
axes[1].legend(handles=handle, labels=label, title = 'Mouse', bbox_to_anchor=(1.05, 0.7)) # add customized legend box
#g1.add_legend() # alternatively, add legend directly, but this will only have a default style
<matplotlib.legend.Legend at 0x25f6672de80>
FacetGrid methods see https://seaborn.pydata.org/generated/seaborn.FacetGrid.html
axes methods see https://matplotlib.org/3.2.1/api/axes_api.html
# select only volume data
vdata = data[['Unnamed: 0','date','Unnamed: 7','Unnamed: 10']]
vdata.head()
Unnamed: 0 | date | Unnamed: 7 | Unnamed: 10 | |
---|---|---|---|---|
0 | B1 | 2017-06-13 00:00:00 | NaN | NaN |
1 | B2 | 2017-06-13 00:00:00 | NaN | NaN |
2 | B3 | 2017-06-13 00:00:00 | NaN | NaN |
3 | B4 | 2017-06-13 00:00:00 | NaN | NaN |
4 | B5 | 2017-06-13 00:00:00 | NaN | NaN |
# remove NaN
vdata.dropna(inplace = True)
vdata.head()
<ipython-input-10-e4d5ceb43dd1>:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy vdata.dropna(inplace = True)
Unnamed: 0 | date | Unnamed: 7 | Unnamed: 10 | |
---|---|---|---|---|
30 | B1 | 2017-06-22 00:00:00 | 147.253 | 108.749 |
31 | B2 | 2017-06-22 00:00:00 | 216.212 | 232.359 |
32 | B3 | 2017-06-22 00:00:00 | 126.703 | 297.977 |
33 | B4 | 2017-06-22 00:00:00 | 232.078 | 614.967 |
34 | B5 | 2017-06-22 00:00:00 | 202.615 | 368.527 |
# rename columns
vdata.set_axis(['id','date','left','right'],axis = 'columns',inplace = True)
vdata.head()
id | date | left | right | |
---|---|---|---|---|
30 | B1 | 2017-06-22 00:00:00 | 147.253 | 108.749 |
31 | B2 | 2017-06-22 00:00:00 | 216.212 | 232.359 |
32 | B3 | 2017-06-22 00:00:00 | 126.703 | 297.977 |
33 | B4 | 2017-06-22 00:00:00 | 232.078 | 614.967 |
34 | B5 | 2017-06-22 00:00:00 | 202.615 | 368.527 |
vdata_melt = pd.melt(vdata, id_vars=['id','date'], value_vars=['left', 'right'])
vdata_melt
id | date | variable | value | |
---|---|---|---|---|
0 | B1 | 2017-06-22 | left | 147.253 |
1 | B2 | 2017-06-22 | left | 216.212 |
2 | B3 | 2017-06-22 | left | 126.703 |
3 | B4 | 2017-06-22 | left | 232.078 |
4 | B5 | 2017-06-22 | left | 202.615 |
5 | B6 | 2017-06-22 | left | 77.7219 |
6 | B1 | 2017-06-26 | left | 172.591 |
7 | B2 | 2017-06-26 | left | 177.083 |
8 | B3 | 2017-06-26 | left | 181.959 |
9 | B5 | 2017-06-26 | left | 227.389 |
10 | B6 | 2017-06-26 | left | 83.149 |
11 | B1 | 2017-06-29 | left | 234.699 |
12 | B2 | 2017-06-29 | left | 249.822 |
13 | B3 | 2017-06-29 | left | 326.095 |
14 | B5 | 2017-06-29 | left | 256.048 |
15 | B6 | 2017-06-29 | left | 165.888 |
16 | B1 | 2017-06-22 | right | 108.749 |
17 | B2 | 2017-06-22 | right | 232.359 |
18 | B3 | 2017-06-22 | right | 297.977 |
19 | B4 | 2017-06-22 | right | 614.967 |
20 | B5 | 2017-06-22 | right | 368.527 |
21 | B6 | 2017-06-22 | right | 280.714 |
22 | B1 | 2017-06-26 | right | 179.18 |
23 | B2 | 2017-06-26 | right | 242.319 |
24 | B3 | 2017-06-26 | right | 287.399 |
25 | B5 | 2017-06-26 | right | 558.001 |
26 | B6 | 2017-06-26 | right | 475.344 |
27 | B1 | 2017-06-29 | right | 839.154 |
28 | B2 | 2017-06-29 | right | 364.514 |
29 | B3 | 2017-06-29 | right | 309.838 |
30 | B5 | 2017-06-29 | right | 803.831 |
31 | B6 | 2017-06-29 | right | 1149.27 |
# convert 'value' to numbers, 'date' to strings
vdata_melt['value'] = vdata_melt['value'].astype(float)
vdata_melt['date'] = vdata_melt['date'].astype(str)
sns.set_style('ticks') # can choose from white, dark, whitegrid, darkgrid, ticks
g3 = sns.FacetGrid(vdata_melt, col='variable', hue = 'id', height = 5, aspect = 1.2) # create facetgrid canvas
#g3.fig.set_figwidth(12) # another way to set figure size
#g3.fig.set_figheight(5)
g3 = g3.map(sns.lineplot, 'date', 'value', lw = 3) # draw line plot
g3.set_titles(col_template="{col_name} tumor",size = 14) # add title to each grid
g3.set_axis_labels('', 'Tumor volume (mm3)' ,size = 14) # remove or change axis labels
xlabels2 = ['Day 10', 'Day 14', 'Day 17']
g3.set_xticklabels(xlabels2) # change x-axis tick labels
axes = g3.axes.flatten() # access to individual plot
handle, label = axes[0].get_legend_handles_labels()
axes[1].legend(handles=handle, labels=label, title = 'Mouse', bbox_to_anchor=(1.05, 0.7)) # add customized legend box
#g3.add_legend() # alternatively, add legend directly, but this will only have a default style
<matplotlib.legend.Legend at 0x25f66bed8e0>
url2 = "https://github.com/xfreppihs/lab_data_analysis/blob/master/mouse_tumor_xenograft/B10_B74.xlsx?raw=true"
data2 = pd.read_excel(url2)
data2.head()
ID | cell_line | volume | group | |
---|---|---|---|---|
0 | B25 | rSCC61 | 179.812813 | 5 |
1 | B26 | rSCC61 | 169.372175 | 2 |
2 | B27 | rSCC61 | 179.681898 | 4 |
3 | B28 | rSCC61 | 165.902013 | 1 |
4 | B29 | rSCC61 | 202.898808 | 1 |
sns.set_style('ticks') # can choose from white, dark, whitegrid, darkgrid, ticks
g2 = sns.FacetGrid(data2, row='cell_line', height = 3, aspect = 2, margin_titles = True, row_order = ['SCC61', 'rSCC61'],
xlim = (0,1000)) # create facetgrid canvas
g2 = g2.map(sns.distplot, 'volume', bins = 10, kde_kws={"color": "black", "lw": 3},
hist_kws={'lw': 2, 'fill': False, 'edgecolor':'black'}) # draw dist plot
g2.set_titles(row_template="{row_name}",size = 14) # add title to each grid
g2.set_axis_labels('Volume (mm3)', 'Density', size = 14) # change axis labels
axes= g2.axes.flatten() # add vertical lines for the means
axes[0].axvline(data2[data2['cell_line']=='SCC61']['volume'].mean(), ls='--', lw = 3, c = 'r')
axes[1].axvline(data2[data2['cell_line']=='rSCC61']['volume'].mean(), ls='--', lw = 3, c = 'r')
C:\Users\chenx\anaconda3\lib\site-packages\seaborn\distributions.py:2551: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning) C:\Users\chenx\anaconda3\lib\site-packages\seaborn\distributions.py:2551: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
<matplotlib.lines.Line2D at 0x25f66c34e20>
Using distplot, I was not able to set bin size the same between the grids. But the advantage is the ability to customize the hist_kws and kde_kws.
sns.set_style('ticks') # can choose from white, dark, whitegrid, darkgrid, ticks
g2 = sns.displot(data = data2, x = 'volume', row = 'cell_line', binwidth = 50, kde = True, alpha = 0.5, color = 'grey',
edgecolor = 'black', height = 3, aspect = 3, row_order = ['SCC61', 'rSCC61'], hue = 'cell_line',
palette = 'Set2') # draw displot
g2.set_titles(row_template="{row_name}",size = 14) # add title to each grid
g2.set_axis_labels('Volume (mm3)', 'Density', size = 14) # change axis labels
g2._legend.remove() # remove legend
axes= g2.axes.flatten() # add vertical lines for the means
axes[0].axvline(data2[data2['cell_line']=='SCC61']['volume'].mean(), ls='--', lw = 3, c = 'r')
axes[1].axvline(data2[data2['cell_line']=='rSCC61']['volume'].mean(), ls='--', lw = 3, c = 'r')
Using displot to unify the binwidth between grids are easy, but I was not able to change the order of the grids. The color theme of the histogram and kde line is hard to change, too.
fig, axes = plt.subplots(nrows = 2,ncols = 1, figsize=(6,9))
sns.distplot(data2[data2['cell_line']=='SCC61']['volume'], ax = axes[0], bins = 10, kde_kws={"color": "black", "lw": 3},
hist_kws={'lw': 2, 'fill': False, 'edgecolor':'black'})
sns.distplot(data2[data2['cell_line']=='rSCC61']['volume'], ax = axes[1], bins = 5, kde_kws={"color": "black", "lw": 3},
hist_kws={'lw': 2, 'fill': False, 'edgecolor':'black'})
# another way to access individual axis is to assign sns.distplot directly to an object (like axes = sns.distplot())
axes[0].set_xlim(0,1000)
axes[0].set_xlabel('Volume (mm3)', size = 14)
axes[0].set_ylabel('Density', size = 14)
axes[0].set_title('SCC-61', size = 14)
axes[1].set_xlim(0,1000)
axes[1].set_xlabel('Volume (mm3)', size = 14)
axes[1].set_ylabel('Density', size = 14)
axes[1].set_title('rSCC-61', size = 14)
plt.tight_layout()
axes[0].axvline(data2[data2['cell_line']=='SCC61']['volume'].mean(), ls='--', lw = 3, c = 'r')
axes[1].axvline(data2[data2['cell_line']=='rSCC61']['volume'].mean(), ls='--', lw = 3, c = 'r')
Here I started plotting subplots individually instead of using a FacetGrid, but controlling distplot bins instead of binwidth is still difficult and not ideal.
fig, axes = plt.subplots(nrows = 2,ncols = 1, figsize=(6,9))
sns.histplot(data = data2[data2['cell_line']=='SCC61'], x = 'volume', ax = axes[0], binwidth = 50, kde = True, fill = False,
color = 'black', lw = 2, stat = 'density')
sns.histplot(data = data2[data2['cell_line']=='rSCC61'], x = 'volume', ax = axes[1], binwidth = 50, kde = True, fill = False,
color = 'black', lw = 2, stat = 'density')
# another way to access individual axis is to assign sns.histplot directly to an object (like axes = sns.histplot())
axes[0].set_xlim(0,1000)
axes[0].set_xlabel('Volume (mm3)', size = 14)
axes[0].set_ylabel('Density', size = 14)
axes[0].set_title('SCC-61', size = 14)
axes[1].set_xlim(0,1000)
axes[1].set_xlabel('Volume (mm3)', size = 14)
axes[1].set_ylabel('Density', size = 14)
axes[1].set_title('rSCC-61', size = 14)
plt.tight_layout()
axes[0].axvline(data2[data2['cell_line']=='SCC61']['volume'].mean(), ls='--', lw = 3, c = 'r')
axes[1].axvline(data2[data2['cell_line']=='rSCC61']['volume'].mean(), ls='--', lw = 3, c = 'r')
<matplotlib.lines.Line2D at 0x25f678e89d0>
Plotting individual subplots as histplot solves the binwidth problem, but the kde line does not extend fully.
axes = sns.stripplot(data = data2, x = 'group', y = 'volume', hue = 'cell_line', linewidth = 1, dodge = True, size = 10,
hue_order = ['SCC61', 'rSCC61'], palette = 'coolwarm')
axes.set_xlabel('Group', size = 14) # or plt.xlabel('Group', size = 14)
axes.set_ylabel('Volume (mm3)', size = 14) # or plt.ylabel('Volume (mm3)', size = 14)
axes.set_xticklabels(['5 min', '30 min', '60 min', '90 min', '120 min']) # or plt.xticks((0,1,2,3,4),('5 min', '30 min', '60 min', '90 min', '120 min'))
axes.legend(frameon = True, fontsize = 12) # or plt.legend(frameon = True, fontsize = 12)
sns.despine()