import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


url="https://github.com/xfreppihs/lab_data_analysis/blob/master/mouse_tumor_xenograft/B1_B9.xlsx?raw=true"
data=pd.read_excel(url)
data.head()


# select only IVIS data
idata = data[['Unnamed: 0','date','Unnamed: 3','Unnamed: 4']]
idata.head()


# remove NaN
idata.dropna(inplace = True)
idata.head()

<ipython-input-4-a6527fe07297>:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  idata.dropna(inplace = True)


# rename columns
idata.set_axis(['id','date','left','right'],axis = 'columns',inplace = True)
idata.head()


idata_melt = pd.melt(idata, id_vars=['id','date'], value_vars=['left', 'right'])
idata_melt


# convert 'value' to numbers, 'date' to strings
idata_melt['value'] = idata_melt['value'].astype(float)
idata_melt['date'] = idata_melt['date'].astype(str)


sns.set_style('ticks') # can choose from white, dark, whitegrid, darkgrid, ticks
g1 = sns.FacetGrid(idata_melt, col='variable', hue = 'id', height = 5, aspect = 1.2) # create facetgrid canvas
#g1.fig.set_figwidth(12) # another way to set figure size
#g1.fig.set_figheight(5)

g1 = g1.map(sns.lineplot, 'date', 'value', lw = 3) # draw line plot

g1.set_titles(col_template="{col_name} tumor",size = 14) # add title to each grid

g1.set_axis_labels('', 'Total flux (p/s)' ,size = 14) # remove or change axis labels

xlabels = ['Day 3', 'Day 7', 'Day 10', 'Day 14', 'Day 17'] 
g1.set_xticklabels(xlabels) # change x-axis tick labels

axes = g1.axes.flatten() # access to individual plot
handle, label = axes[0].get_legend_handles_labels()
axes[1].legend(handles=handle, labels=label, title = 'Mouse', bbox_to_anchor=(1.05, 0.7)) # add customized legend box
#g1.add_legend() # alternatively, add legend directly, but this will only have a default style

<matplotlib.legend.Legend at 0x25f6672de80>


# select only volume data
vdata = data[['Unnamed: 0','date','Unnamed: 7','Unnamed: 10']]
vdata.head()


# remove NaN
vdata.dropna(inplace = True)
vdata.head()

<ipython-input-10-e4d5ceb43dd1>:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vdata.dropna(inplace = True)


# rename columns
vdata.set_axis(['id','date','left','right'],axis = 'columns',inplace = True)
vdata.head()


vdata_melt = pd.melt(vdata, id_vars=['id','date'], value_vars=['left', 'right'])
vdata_melt


# convert 'value' to numbers, 'date' to strings
vdata_melt['value'] = vdata_melt['value'].astype(float)
vdata_melt['date'] = vdata_melt['date'].astype(str)


sns.set_style('ticks') # can choose from white, dark, whitegrid, darkgrid, ticks
g3 = sns.FacetGrid(vdata_melt, col='variable', hue = 'id', height = 5, aspect = 1.2) # create facetgrid canvas
#g3.fig.set_figwidth(12) # another way to set figure size
#g3.fig.set_figheight(5)

g3 = g3.map(sns.lineplot, 'date', 'value', lw = 3) # draw line plot

g3.set_titles(col_template="{col_name} tumor",size = 14) # add title to each grid

g3.set_axis_labels('', 'Tumor volume (mm3)' ,size = 14) # remove or change axis labels

xlabels2 = ['Day 10', 'Day 14', 'Day 17'] 
g3.set_xticklabels(xlabels2) # change x-axis tick labels

axes = g3.axes.flatten() # access to individual plot
handle, label = axes[0].get_legend_handles_labels()
axes[1].legend(handles=handle, labels=label, title = 'Mouse', bbox_to_anchor=(1.05, 0.7)) # add customized legend box
#g3.add_legend() # alternatively, add legend directly, but this will only have a default style

<matplotlib.legend.Legend at 0x25f66bed8e0>


url2 = "https://github.com/xfreppihs/lab_data_analysis/blob/master/mouse_tumor_xenograft/B10_B74.xlsx?raw=true"
data2 = pd.read_excel(url2)
data2.head()


sns.set_style('ticks') # can choose from white, dark, whitegrid, darkgrid, ticks
g2 = sns.FacetGrid(data2, row='cell_line', height = 3, aspect = 2, margin_titles = True, row_order = ['SCC61', 'rSCC61'], 
                   xlim = (0,1000)) # create facetgrid canvas

g2 = g2.map(sns.distplot, 'volume', bins = 10, kde_kws={"color": "black", "lw": 3}, 
            hist_kws={'lw': 2, 'fill': False, 'edgecolor':'black'}) # draw dist plot

g2.set_titles(row_template="{row_name}",size = 14) # add title to each grid

g2.set_axis_labels('Volume (mm3)', 'Density', size = 14) # change axis labels

axes= g2.axes.flatten() # add vertical lines for the means
axes[0].axvline(data2[data2['cell_line']=='SCC61']['volume'].mean(), ls='--', lw = 3, c = 'r')
axes[1].axvline(data2[data2['cell_line']=='rSCC61']['volume'].mean(), ls='--', lw = 3, c = 'r')

C:\Users\chenx\anaconda3\lib\site-packages\seaborn\distributions.py:2551: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)
C:\Users\chenx\anaconda3\lib\site-packages\seaborn\distributions.py:2551: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)

<matplotlib.lines.Line2D at 0x25f66c34e20>


sns.set_style('ticks') # can choose from white, dark, whitegrid, darkgrid, ticks

g2 = sns.displot(data = data2, x = 'volume', row = 'cell_line', binwidth = 50, kde = True, alpha = 0.5, color = 'grey', 
                 edgecolor = 'black', height = 3, aspect = 3, row_order = ['SCC61', 'rSCC61'], hue = 'cell_line', 
                 palette = 'Set2') # draw displot
    
g2.set_titles(row_template="{row_name}",size = 14) # add title to each grid

g2.set_axis_labels('Volume (mm3)', 'Density', size = 14) # change axis labels

g2._legend.remove() # remove legend

axes= g2.axes.flatten() # add vertical lines for the means
axes[0].axvline(data2[data2['cell_line']=='SCC61']['volume'].mean(), ls='--', lw = 3, c = 'r')
axes[1].axvline(data2[data2['cell_line']=='rSCC61']['volume'].mean(), ls='--', lw = 3, c = 'r')


fig, axes = plt.subplots(nrows = 2,ncols = 1, figsize=(6,9))
sns.distplot(data2[data2['cell_line']=='SCC61']['volume'], ax = axes[0], bins = 10, kde_kws={"color": "black", "lw": 3}, 
            hist_kws={'lw': 2, 'fill': False, 'edgecolor':'black'})
sns.distplot(data2[data2['cell_line']=='rSCC61']['volume'], ax = axes[1], bins = 5, kde_kws={"color": "black", "lw": 3}, 
            hist_kws={'lw': 2, 'fill': False, 'edgecolor':'black'})
# another way to access individual axis is to assign sns.distplot directly to an object (like axes = sns.distplot())
axes[0].set_xlim(0,1000)
axes[0].set_xlabel('Volume (mm3)', size = 14)
axes[0].set_ylabel('Density', size = 14)
axes[0].set_title('SCC-61', size = 14)
axes[1].set_xlim(0,1000)
axes[1].set_xlabel('Volume (mm3)', size = 14)
axes[1].set_ylabel('Density', size = 14)
axes[1].set_title('rSCC-61', size = 14)
plt.tight_layout()
axes[0].axvline(data2[data2['cell_line']=='SCC61']['volume'].mean(), ls='--', lw = 3, c = 'r')
axes[1].axvline(data2[data2['cell_line']=='rSCC61']['volume'].mean(), ls='--', lw = 3, c = 'r')


fig, axes = plt.subplots(nrows = 2,ncols = 1, figsize=(6,9))
sns.histplot(data = data2[data2['cell_line']=='SCC61'], x = 'volume', ax = axes[0], binwidth = 50, kde = True, fill = False, 
             color = 'black', lw = 2, stat = 'density')
sns.histplot(data = data2[data2['cell_line']=='rSCC61'], x = 'volume', ax = axes[1], binwidth = 50, kde = True, fill = False, 
             color = 'black', lw = 2, stat = 'density')
# another way to access individual axis is to assign sns.histplot directly to an object (like axes = sns.histplot())
axes[0].set_xlim(0,1000)
axes[0].set_xlabel('Volume (mm3)', size = 14)
axes[0].set_ylabel('Density', size = 14)
axes[0].set_title('SCC-61', size = 14)
axes[1].set_xlim(0,1000)
axes[1].set_xlabel('Volume (mm3)', size = 14)
axes[1].set_ylabel('Density', size = 14)
axes[1].set_title('rSCC-61', size = 14)
plt.tight_layout()
axes[0].axvline(data2[data2['cell_line']=='SCC61']['volume'].mean(), ls='--', lw = 3, c = 'r')
axes[1].axvline(data2[data2['cell_line']=='rSCC61']['volume'].mean(), ls='--', lw = 3, c = 'r')

<matplotlib.lines.Line2D at 0x25f678e89d0>


axes = sns.stripplot(data = data2, x = 'group', y = 'volume', hue = 'cell_line', linewidth = 1, dodge = True, size = 10,
                    hue_order = ['SCC61', 'rSCC61'], palette = 'coolwarm')
axes.set_xlabel('Group', size = 14) # or plt.xlabel('Group', size = 14)
axes.set_ylabel('Volume (mm3)', size = 14) # or plt.ylabel('Volume (mm3)', size = 14)
axes.set_xticklabels(['5 min', '30 min', '60 min', '90 min', '120 min']) # or plt.xticks((0,1,2,3,4),('5 min', '30 min', '60 min', '90 min', '120 min'))
axes.legend(frameon = True, fontsize = 12) # or plt.legend(frameon = True, fontsize = 12)
sns.despine()

	Unnamed: 0	date	Unnamed: 3	Unnamed: 4
10	B1	2017-06-15 00:00:00	10270000	55130000
11	B2	2017-06-15 00:00:00	51890000	66210000
12	B3	2017-06-15 00:00:00	55170000	30940000
13	B4	2017-06-15 00:00:00	48380000	74310000
14	B5	2017-06-15 00:00:00	27340000	60090000

	id	date	left	right
10	B1	2017-06-15 00:00:00	10270000	55130000
11	B2	2017-06-15 00:00:00	51890000	66210000
12	B3	2017-06-15 00:00:00	55170000	30940000
13	B4	2017-06-15 00:00:00	48380000	74310000
14	B5	2017-06-15 00:00:00	27340000	60090000

	id	date	variable	value
0	B1	2017-06-15	left	10270000
1	B2	2017-06-15	left	51890000
2	B3	2017-06-15	left	55170000
3	B4	2017-06-15	left	48380000
4	B5	2017-06-15	left	27340000
5	B6	2017-06-15	left	58690000
6	B1	2017-06-19	left	115800000
7	B2	2017-06-19	left	468700000
8	B3	2017-06-19	left	556600000
9	B4	2017-06-19	left	196300000
10	B6	2017-06-19	left	183600000
11	B1	2017-06-22	left	403900000
12	B2	2017-06-22	left	784700000
13	B3	2017-06-22	left	794500000
14	B4	2017-06-22	left	892800000
15	B5	2017-06-22	left	431100000
16	B6	2017-06-22	left	110000000
17	B1	2017-06-26	left	1821000000
18	B2	2017-06-26	left	2929000000
19	B3	2017-06-26	left	4642000000
20	B5	2017-06-26	left	898200000
21	B6	2017-06-26	left	965400000
22	B1	2017-06-29	left	2589000000
23	B2	2017-06-29	left	7103000000
24	B3	2017-06-29	left	10940000000
25	B5	2017-06-29	left	3606000000
26	B6	2017-06-29	left	6687000000
27	B1	2017-06-15	right	55130000
28	B2	2017-06-15	right	66210000
29	B3	2017-06-15	right	30940000
30	B4	2017-06-15	right	74310000
31	B5	2017-06-15	right	60090000
32	B6	2017-06-15	right	123100000
33	B1	2017-06-19	right	436900000
34	B2	2017-06-19	right	521400000
35	B3	2017-06-19	right	346400000
36	B4	2017-06-19	right	1627000000
37	B6	2017-06-19	right	1277000000
38	B1	2017-06-22	right	1736000000
39	B2	2017-06-22	right	1401000000
40	B3	2017-06-22	right	744900000
41	B4	2017-06-22	right	3907000000
42	B5	2017-06-22	right	5005000000
43	B6	2017-06-22	right	1520000000
44	B1	2017-06-26	right	8948000000
45	B2	2017-06-26	right	3840000000
46	B3	2017-06-26	right	3034000000
47	B5	2017-06-26	right	6210000000
48	B6	2017-06-26	right	3554000000
49	B1	2017-06-29	right	15290000000
50	B2	2017-06-29	right	8009000000
51	B3	2017-06-29	right	6552000000
52	B5	2017-06-29	right	6920000000
53	B6	2017-06-29	right	17920000000

	Unnamed: 0	date	Unnamed: 7	Unnamed: 10
30	B1	2017-06-22 00:00:00	147.253	108.749
31	B2	2017-06-22 00:00:00	216.212	232.359
32	B3	2017-06-22 00:00:00	126.703	297.977
33	B4	2017-06-22 00:00:00	232.078	614.967
34	B5	2017-06-22 00:00:00	202.615	368.527

	id	date	left	right
30	B1	2017-06-22 00:00:00	147.253	108.749
31	B2	2017-06-22 00:00:00	216.212	232.359
32	B3	2017-06-22 00:00:00	126.703	297.977
33	B4	2017-06-22 00:00:00	232.078	614.967
34	B5	2017-06-22 00:00:00	202.615	368.527

	Unnamed: 0	date	weight (g)	Unnamed: 3	Unnamed: 4	Unnamed: 5	Unnamed: 6	Unnamed: 7	Unnamed: 8	Unnamed: 9	Unnamed: 10
0	B1	2017-06-13 00:00:00	23.6	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	B2	2017-06-13 00:00:00	25.1	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	B3	2017-06-13 00:00:00	25.1	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	B4	2017-06-13 00:00:00	24.3	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	B5	2017-06-13 00:00:00	21.8	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	ID	cell_line	volume	group
0	B25	rSCC61	179.812813	5
1	B26	rSCC61	169.372175	2
2	B27	rSCC61	179.681898	4
3	B28	rSCC61	165.902013	1
4	B29	rSCC61	202.898808	1