Commit 4a7d1a6b authored by Georgi Tushev's avatar Georgi Tushev
Browse files

add spike-ins metainfo

parent 40ef5258
......@@ -16,6 +16,17 @@ samples (x 2 replica):
The library preparation is done using [MACE](https://www.ncbi.nlm.nih.gov/pubmed/25052703) method. Reads are sequenced on Illumina NextSeq 550.
## Overview
## Preprocessing
### Spike-ins
|samples|concentration [ng/ul]|total.rna [ng]|rounded.rna [ng]|ERCCs volume [ul]|ERCCs dilution|
|:-----:|:-----------:|:-------:|:---------:|:----------:|:------------:|
|Input|164.5|3290|3500|7|1:100|
|Unbound|13|260|300|6|1:1000|
|80s|21|420|500|10|1:1000|
|2and3|39|780|800|16|1:1000|
|4and5|54|1080|1100|22|1:1000|
|6and7|57|1140|1200|24|1:1000|
|>7|170|3400|3500|7|1:100|
### Normalisation
Re-sort ID ERCC ID subgroup concentration in Mix 1 (attomoles/ul) concentration in Mix 2 (attomoles/ul) expected fold-change ratio log2(Mix 1/Mix 2)
1 ERCC-00130 A 30000 7500 4 2
2 ERCC-00004 A 7500 1875 4 2
3 ERCC-00136 A 1875 468.75 4 2
4 ERCC-00108 A 937.5 234.375 4 2
5 ERCC-00116 A 468.75 117.1875 4 2
6 ERCC-00092 A 234.375 58.59375 4 2
7 ERCC-00095 A 117.1875 29.296875 4 2
8 ERCC-00131 A 117.1875 29.296875 4 2
9 ERCC-00062 A 58.59375 14.6484375 4 2
10 ERCC-00019 A 29.296875 7.32421875 4 2
11 ERCC-00144 A 29.296875 7.32421875 4 2
12 ERCC-00170 A 14.6484375 3.66210938 4 2
13 ERCC-00154 A 7.32421875 1.83105469 4 2
14 ERCC-00085 A 7.32421875 1.83105469 4 2
15 ERCC-00028 A 3.66210938 0.91552734 4 2
16 ERCC-00033 A 1.83105469 0.45776367 4 2
17 ERCC-00134 A 1.83105469 0.45776367 4 2
18 ERCC-00147 A 0.91552734 0.22888184 4 2
19 ERCC-00097 A 0.45776367 0.11444092 4 2
20 ERCC-00156 A 0.45776367 0.11444092 4 2
21 ERCC-00123 A 0.22888184 0.05722046 4 2
22 ERCC-00017 A 0.11444092 0.02861023 4 2
23 ERCC-00083 A 0.02861023 0.00715256 4 2
24 ERCC-00096 B 15000 15000 1 0
25 ERCC-00171 B 3750 3750 1 0
26 ERCC-00009 B 937.5 937.5 1 0
27 ERCC-00042 B 468.75 468.75 1 0
28 ERCC-00060 B 234.375 234.375 1 0
29 ERCC-00035 B 117.1875 117.1875 1 0
30 ERCC-00025 B 58.59375 58.59375 1 0
31 ERCC-00051 B 58.59375 58.59375 1 0
32 ERCC-00053 B 29.296875 29.296875 1 0
33 ERCC-00148 B 14.6484375 14.6484375 1 0
34 ERCC-00126 B 14.6484375 14.6484375 1 0
35 ERCC-00034 B 7.32421875 7.32421875 1 0
36 ERCC-00150 B 3.66210938 3.66210938 1 0
37 ERCC-00067 B 3.66210938 3.66210938 1 0
38 ERCC-00031 B 1.83105469 1.83105469 1 0
39 ERCC-00109 B 0.91552734 0.91552734 1 0
40 ERCC-00073 B 0.91552734 0.91552734 1 0
41 ERCC-00158 B 0.45776367 0.45776367 1 0
42 ERCC-00104 B 0.22888184 0.22888184 1 0
43 ERCC-00142 B 0.22888184 0.22888184 1 0
44 ERCC-00138 B 0.11444092 0.11444092 1 0
45 ERCC-00117 B 0.05722046 0.05722046 1 0
46 ERCC-00075 B 0.01430512 0.01430512 1 0
47 ERCC-00074 C 15000 22500 0.67 -0.58
48 ERCC-00113 C 3750 5625 0.67 -0.58
49 ERCC-00145 C 937.5 1406.25 0.67 -0.58
50 ERCC-00111 C 468.75 703.125 0.67 -0.58
51 ERCC-00076 C 234.375 351.5625 0.67 -0.58
52 ERCC-00044 C 117.1875 175.78125 0.67 -0.58
53 ERCC-00162 C 58.59375 87.890625 0.67 -0.58
54 ERCC-00071 C 58.59375 87.890625 0.67 -0.58
55 ERCC-00084 C 29.296875 43.9453125 0.67 -0.58
56 ERCC-00099 C 14.6484375 21.9726563 0.67 -0.58
57 ERCC-00054 C 14.6484375 21.9726563 0.67 -0.58
58 ERCC-00157 C 7.32421875 10.9863281 0.67 -0.58
59 ERCC-00143 C 3.66210938 5.49316406 0.67 -0.58
60 ERCC-00039 C 3.66210938 5.49316406 0.67 -0.58
61 ERCC-00058 C 1.83105469 2.74658203 0.67 -0.58
62 ERCC-00120 C 0.91552734 1.37329102 0.67 -0.58
63 ERCC-00040 C 0.91552734 1.37329102 0.67 -0.58
64 ERCC-00164 C 0.45776367 0.68664551 0.67 -0.58
65 ERCC-00024 C 0.22888184 0.34332275 0.67 -0.58
66 ERCC-00016 C 0.22888184 0.34332275 0.67 -0.58
67 ERCC-00012 C 0.11444092 0.17166138 0.67 -0.58
68 ERCC-00098 C 0.05722046 0.08583069 0.67 -0.58
69 ERCC-00057 C 0.01430512 0.02145767 0.67 -0.58
70 ERCC-00002 D 15000 30000 0.5 -1
71 ERCC-00046 D 3750 7500 0.5 -1
72 ERCC-00003 D 937.5 1875 0.5 -1
73 ERCC-00043 D 468.75 937.5 0.5 -1
74 ERCC-00022 D 234.375 468.75 0.5 -1
75 ERCC-00112 D 117.1875 234.375 0.5 -1
76 ERCC-00165 D 58.59375 117.1875 0.5 -1
77 ERCC-00079 D 58.59375 117.1875 0.5 -1
78 ERCC-00078 D 29.296875 58.59375 0.5 -1
79 ERCC-00163 D 14.6484375 29.296875 0.5 -1
80 ERCC-00059 D 14.6484375 29.296875 0.5 -1
81 ERCC-00160 D 7.32421875 14.6484375 0.5 -1
82 ERCC-00014 D 3.66210938 7.32421875 0.5 -1
83 ERCC-00077 D 3.66210938 7.32421875 0.5 -1
84 ERCC-00069 D 1.83105469 3.66210938 0.5 -1
85 ERCC-00137 D 0.91552734 1.83105469 0.5 -1
86 ERCC-00013 D 0.91552734 1.83105469 0.5 -1
87 ERCC-00168 D 0.45776367 0.91552734 0.5 -1
88 ERCC-00041 D 0.22888184 0.45776367 0.5 -1
89 ERCC-00081 D 0.22888184 0.45776367 0.5 -1
90 ERCC-00086 D 0.11444092 0.22888184 0.5 -1
91 ERCC-00061 D 0.05722046 0.11444092 0.5 -1
92 ERCC-00048 D 0.01430512 0.02861023 0.5 -1
......@@ -13,18 +13,95 @@ library(RUVSeq);
rm(list = ls());
# read UTR counts
utrs<-as.matrix(read.table("/Users/tushevg/Desktop/UTRPolyribosomes/countTable_UTRPolysomes_UTRs_21Nov2017.txt",stringsAsFactors=F, header=T, row.names=1, check.names=F));
utrs<-as.matrix(read.table("/Users/tushevg/Desktop/UTRPolyribosomes/countTable_UTRPolysomes_UTRs_21Nov2017.txt",stringsAsFactors=F, header=T, row.names=1, check.names=F, colClasses=c("factor", rep("NULL", 5), rep("integer", 12))));
# read ERCC counts
ercc<-as.matrix(read.table("/Users/tushevg/Desktop/UTRPolyribosomes/countTable_UTRPolysomes_ERCC_21Nov2017.txt",stringsAsFactors=F, header=T, row.names=1, check.names=F));
# reorder data by name
#data<-indata[,c("InputHc2","InputHc3","80SHc2","80SHc3","2and3Hc2","2and3Hc3","4and5Hc2","4and5Hc3","6and7Hc2","6and7Hc3","bigger7Hc2","bigger7Hc3")];
utrs<-utrs[,c("InputHc2","InputHc3","80SHc2","80SHc3","2and3Hc2","2and3Hc3","4and5Hc2","4and5Hc3","6and7Hc2","6and7Hc3","bigger7Hc2","bigger7Hc3")];
ercc<-ercc[,c("InputHc2","InputHc3","80SHc2","80SHc3","2and3Hc2","2and3Hc3","4and5Hc2","4and5Hc3","6and7Hc2","6and7Hc3","bigger7Hc2","bigger7Hc3")];
data<-rbind(utrs, ercc);
# filter for 2 reads per million
#libsize<-colSums(data);
#tmp<-1e6*data / libsize;
#filter<-apply(tmp, 1, function(x) length(x[x>=2])>=2);
#data<-data[filter,];
# filter for 5 reads in at least two samples
filter<-apply(data, 1,function(x) length(x[x>=5])>=2);
data<-data[filter,];
utrs<-rownames(data)[grep("^ERCC", rownames(data), invert=TRUE)];
ercc<-rownames(data)[grep("^ERCC", rownames(data))];
# set factors
levelSample<-as.factor(rep(c("Input","80S","2and3","4and5","6and7","bigger7"), each=2));
# create EDASeq set
set<-newSeqExpressionSet(as.matrix(data), phenoData=data.frame(levelSample, row.names=colnames(data)));
# plot raw data
library(RColorBrewer)
colors<-brewer.pal(3, "Set2");
plotRLE(set, outline=FALSE, ylim=c(-4,4), col=colors[levelSample]);
plotPCA(set, col=colors[levelSample]);
# betweenLaneNormalization
set<-betweenLaneNormalization(set, which="upper");
plotRLE(set, outline=FALSE, ylim=c(-4,4), col=colors[levelSample]);
plotPCA(set, col=colors[levelSample]);
# unwanted variation with spiked-ins
set1<-RUVg(set, ercc, k=1);
pData(set1)
plotRLE(set, outline=FALSE, ylim=c(-4,4), col=colors[levelSample]);
plotPCA(set, col=colors[levelSample]);
# differential expression analysis EdgeR
design<-model.matrix(~levelSample + W_1, data=pData(set1));
y<-DGEList(counts=counts(set1), group=levelSample);
y<-calcNormFactors(y, method="upperquartile");
y<-estimateGLMCommonDisp(y, design);
y<-estimateGLMTagwiseDisp(y, design);
fit<-glmFit(y, design);
lrt<-glmLRT(fit, coef=2);
topTags(lrt);
# differential expression analysis with DESeq2
library(DESeq2);
dds<-DESeqDataSetFromMatrix(countData=counts(set1), colData=pData(set1), design=~W_1+levelSample);
dds<-DESeq(dds);
res<-results(dds);
res
dds<-DESeq(dds, test="LRT", reduced=as.formula("~ W_1"));
res<-results(dds);
res
# RUVs based on sample replica
differences<-makeGroups(levelSample);
set3<-RUVs(set, utrs, k=1, differences);
pData(set3)
# RUVr ersimating the factors of unwanted variation using residuals
design<-model.matrix(~levelSample, data=pData(set));
y <- DGEList(counts=counts(set), group=levelSample)
y <- calcNormFactors(y, method="upperquartile")
y <- estimateGLMCommonDisp(y, design)
y <- estimateGLMTagwiseDisp(y, design)
fit <- glmFit(y, design)
res <- residuals(fit, type="deviance")
set4 <- RUVr(set, utrs, k=1, res)
pData(set4)
# filter data
#filter<-apply(data, 1, function(x) length(x[x>5]) >= 2);
#filtered<-data[filter,];
......@@ -8,8 +8,54 @@ stats = readCountTable('countTable_UTRPolysomes_Stats_21Nov2017.txt');
%% read ercc file
ercc = readCountTable('countTable_UTRPolysomes_ERCC_21Nov2017.txt');
fr=fopen('cms_095046.txt','r');
fgetl(fr);
txt = textscan(fr,'%*s %s %*s %n %n %*s %*s','delimiter','\t');
fclose(fr);
ercc.ref=txt{1};
ercc.mix1=txt{2};
ercc.mix2=txt{3};
ercc.volume = [16,22,24,10,7,7];
ercc.volume = repmat(ercc.volume, 2, 1);
ercc.volume = ercc.volume(:)';
ercc.dilution = [1000,1000,1000,1000,100,100];
ercc.dilution = repmat(ercc.dilution, 2, 1);
ercc.dilution = ercc.dilution(:)';
ercc.weight = ercc.volume ./ ercc.dilution;
ercc.mix = repmat(ercc.mix1,1,size(ercc.counts,2));
ercc.conc = bsxfun(@times, ercc.mix, ercc.weight);
[~,idxQry, idxRef] = intersect(ercc.label, ercc.ref);
figure('color','w');
clrmtx = jet(12);
hold on;
for k = 1 : size(ercc.counts,2)
X = ercc.counts(idxQry,k);
Y = ercc.conc(idxRef,k);
idxFilter = X < 5;
X(idxFilter) = [];
Y(idxFilter) = [];
plot(log2(X),log2(Y),'.','Color',clrmtx(k,:));
p = polyfit(log2(X),log2(Y),1);
Xfit = linspace(2,16,100);
Yfit = polyval(p,Xfit);
plot(Xfit,Yfit,'color',clrmtx(k,:));
end
hold off;
%% read UTRs file
%{
fh = fopen('countTable_UTRPolysomes_UTRs_21Nov2017.txt', 'r');
header = fgetl(fh);
header = regexp(header, '\t', 'split');
......@@ -26,5 +72,5 @@ data.window = txt{4};
data.span = txt{5};
data.reads = txt{6};
data.counts = [txt{7:end}];
%}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment