Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
mpibr
schu
UTRPolyribosomes
Commits
c660e865
Commit
c660e865
authored
Nov 24, 2017
by
Georgi Tushev
⚽
Browse files
ercc normed
parent
afcabb43
Changes
7
Expand all
Hide whitespace changes
Inline
Side-by-side
.gitignore
0 → 100644
View file @
c660e865
.DS_Store
.Rhistory
*.m~
sandbox/edgeR_RUVSeqNorm.R
View file @
c660e865
...
...
@@ -8,40 +8,27 @@
# load edgeR
library
(
edgeR
);
library
(
RUVSeq
);
library
(
rstudioapi
)
# set working directory
(
WD
<-
dirname
(
rstudioapi
::
getSourceEditorContext
()
$
path
))
if
(
!
is.null
(
WD
))
setwd
(
WD
)
# clean current variables
rm
(
list
=
ls
());
# read UTR counts
utrs
<-
as.matrix
(
read.table
(
"/Users/tushevg/Desktop/UTRPolyribosomes/countTable_UTRPolysomes_UTRs_21Nov2017.txt"
,
stringsAsFactors
=
F
,
header
=
T
,
row.names
=
1
,
check.names
=
F
,
colClasses
=
c
(
"factor"
,
rep
(
"NULL"
,
5
),
rep
(
"integer"
,
12
))));
filterUTRs
<-
grep
(
"group"
,
rownames
(
utrs
));
utrs
<-
as.matrix
(
read.table
(
"../tables/countTable_UTRPolysomes_UTRSupp_23Nov2017.txt"
,
stringsAsFactors
=
F
,
header
=
T
,
row.names
=
1
,
check.names
=
F
));
# read ERCC counts
ercc
<-
as.matrix
(
read.table
(
"/Users/tushevg/Desktop/UTRPolyribosomes/countTable_UTRPolysomes_ERCC_21Nov2017.txt"
,
stringsAsFactors
=
F
,
header
=
T
,
row.names
=
1
,
check.names
=
F
));
ercc
<-
as.matrix
(
read.table
(
"../tables/countTable_UTRPolysomes_ERCC_21Nov2017.txt"
,
stringsAsFactors
=
F
,
header
=
T
,
row.names
=
1
,
check.names
=
F
));
filterErcc
<-
apply
(
ercc
,
1
,
function
(
x
)
length
(
x
[
x
>=
5
])
>=
2
);
ercc
<-
ercc
[
filterErcc
,];
# reorder data by name
utrs
<-
utrs
[,
c
(
"InputHc2"
,
"InputHc3"
,
"80SHc2"
,
"80SHc3"
,
"2and3Hc2"
,
"2and3Hc3"
,
"4and5Hc2"
,
"4and5Hc3"
,
"6and7Hc2"
,
"6and7Hc3"
,
"bigger7Hc2"
,
"bigger7Hc3"
)];
ercc
<-
ercc
[,
c
(
"InputHc2"
,
"InputHc3"
,
"80SHc2"
,
"80SHc3"
,
"2and3Hc2"
,
"2and3Hc3"
,
"4and5Hc2"
,
"4and5Hc3"
,
"6and7Hc2"
,
"6and7Hc3"
,
"bigger7Hc2"
,
"bigger7Hc3"
)];
# concatenate data
data
<-
rbind
(
utrs
,
ercc
);
# filter for 2 reads per million
#libsize<-colSums(data);
#tmp<-1e6*data / libsize;
#filter<-apply(tmp, 1, function(x) length(x[x>=2])>=2);
#data<-data[filter,];
# filter for 5 reads in at least two samples
filter
<-
apply
(
data
,
1
,
function
(
x
)
length
(
x
[
x
>=
5
])
>=
2
);
data
<-
data
[
filter
,];
utrs
<-
rownames
(
data
)[
grep
(
"group"
,
rownames
(
data
))];
ercc
<-
rownames
(
data
)[
grep
(
"^ERCC"
,
rownames
(
data
))];
# normalize with loess regression
weights
<-
c
(
7
/
100
,
7
/
100
,
10
/
1000
,
10
/
1000
,
16
/
1000
,
16
/
1000
,
22
/
1000
,
22
/
1000
,
24
/
1000
,
24
/
1000
,
7
/
100
,
7
/
100
);
namesUtrs
<-
rownames
(
data
)[
grep
(
"group"
,
rownames
(
data
))];
namesErcc
<-
rownames
(
data
)[
grep
(
"^ERCC"
,
rownames
(
data
))];
# set factors
levelSample
<-
as.factor
(
rep
(
c
(
"Input"
,
"80S"
,
"2and3"
,
"4and5"
,
"6and7"
,
"bigger7"
),
each
=
2
));
...
...
@@ -51,7 +38,7 @@ set<-newSeqExpressionSet(as.matrix(data), phenoData=data.frame(levelSample, row.
# plot raw data
library
(
RColorBrewer
)
colors
<-
brewer.pal
(
3
,
"Set2"
);
colors
<-
brewer.pal
(
6
,
"Set2"
);
plotRLE
(
set
,
outline
=
FALSE
,
ylim
=
c
(
-4
,
4
),
col
=
colors
[
levelSample
]);
plotPCA
(
set
,
col
=
colors
[
levelSample
]);
...
...
@@ -61,12 +48,12 @@ plotRLE(set, outline=FALSE, ylim=c(-4,4), col=colors[levelSample]);
plotPCA
(
set
,
col
=
colors
[
levelSample
]);
# unwanted variation with spiked-ins
set1
<-
RUVg
(
set
,
e
rcc
,
k
=
1
);
set1
<-
RUVg
(
set
,
namesE
rcc
,
k
=
1
);
pData
(
set1
)
plotRLE
(
set
,
outline
=
FALSE
,
ylim
=
c
(
-4
,
4
),
col
=
colors
[
levelSample
]);
plotPCA
(
set
,
col
=
colors
[
levelSample
]);
write.table
(
normCounts
(
set
1
),
"
/Users/tushevg/Desktop/UTRPolyribosom
es/countTable_UTRPolysomes_RUVg_22Nov2017.txt"
,
quote
=
FALSE
,
sep
=
"\t"
,
col.names
=
TRUE
);
write.table
(
normCounts
(
set
),
"
../tabl
es/countTable_UTRPolysomes_RUVg_22Nov2017.txt"
,
quote
=
FALSE
,
sep
=
"\t"
,
col.names
=
TRUE
);
# differential expression analysis EdgeR
design
<-
model.matrix
(
~
levelSample
+
W_1
,
data
=
pData
(
set1
));
...
...
sandbox/normaliseErcc.m
0 → 100644
View file @
c660e865
% normaliseErcc
clc
clear
variables
close
all
%% read count file
data
=
readCountTable
(
'../tables/countTable_UTRPolysomes_UQNormed_24Nov2017.txt'
);
%% extract ercc
idxErcc
=
strncmp
(
'ERCC'
,
data
.
label
,
4
);
ercc
.
ids
=
data
.
label
(
idxErcc
);
ercc
.
counts
=
data
.
counts
(
idxErcc
,:);
%% read ercc mix concentration
fr
=
fopen
(
'../tables/cms_095046.txt'
,
'r'
);
fgetl
(
fr
);
txt
=
textscan
(
fr
,
'%*s %s %*s %n %n %*s %*s'
,
'delimiter'
,
'\t'
);
fclose
(
fr
);
mix
=
txt
{
2
};
[
~
,
idxRef
,
idxQry
]
=
intersect
(
txt
{
1
},
ercc
.
ids
);
ercc
.
mix
(
idxQry
,
1
)
=
mix
(
idxRef
);
%% assign weight
ercc
.
volume
=
[
7
,
10
,
16
,
22
,
24
,
7
];
ercc
.
dilution
=
1.
/[
100
,
1000
,
1000
,
1000
,
1000
,
100
];
ercc
.
weight
=
ercc
.
volume
.*
ercc
.
dilution
;
ercc
.
weight
=
repmat
(
ercc
.
weight
,
2
,
1
);
ercc
.
weight
=
ercc
.
weight
(:);
%% calculate concentration
ercc
.
attomoles
=
ercc
.
mix
*
ercc
.
weight
'
;
%% linear regression
clrmtx
=
jet
(
6
);
figure
(
'color'
,
'w'
);
i
=
1
;
pArray
=
zeros
(
12
,
2
);
hold
on
;
k
=
1
;
for
k
=
1
:
12
X
=
log2
(
ercc
.
counts
(:,
k
)
+
1
);
Y
=
log2
(
ercc
.
attomoles
(:,
k
)
+
1
);
idxFilter
=
X
>=
4
;
plot
(
X
,
Y
,
'.'
,
'color'
,
clrmtx
(
i
,:));
p
=
polyfit
(
X
(
idxFilter
),
Y
(
idxFilter
),
1
);
pArray
(
k
,:)
=
p
;
xfit
=
linspace
(
min
(
X
),
max
(
X
),
100
);
yfit
=
polyval
(
p
,
xfit
);
plot
(
xfit
,
yfit
,
'-'
,
'color'
,
clrmtx
(
i
,:),
'linewidth'
,
1.2
);
if
mod
(
k
,
2
)
==
0
i
=
i
+
1
;
end
end
hold
off
;
nfactor
=
mean
(
pArray
(:,
1
))
.
/
pArray
(:,
1
);
ncounts
=
bsxfun
(
@
times
,
data
.
counts
(
~
idxErcc
,:),
nfactor
'
)
+
1
;
%% write out normed counts
fmtout
=
repmat
({
'%.6f\t'
},
size
(
ncounts
,
2
)
+
1
,
1
);
fmtout
(
1
)
=
{
'%s\t'
};
fmtout
(
end
)
=
{
'%.6f\n'
};
fmtout
=
sprintf
(
'%s'
,
fmtout
{:});
mtxout
=
[
data
.
label
(
~
idxErcc
),
num2cell
(
ncounts
)]
'
;
hdrout
=
sprintf
(
'%s\t'
,
data
.
header
{:});
hdrout
(
end
)
=
[];
fw
=
fopen
(
'../tables/countTable_UTRPolysomes_ERCCNormed_24Nov2017.txt'
,
'w'
);
fprintf
(
fw
,
'%s\n'
,
hdrout
);
fprintf
(
fw
,
fmtout
,
mtxout
{:});
fclose
(
fw
);
%% plot results
%{
rle
=
log2
(
ncounts
.
/
median
(
ncounts
(:)));
figure
(
'color'
,
'w'
);
hold
on
;
plot
([
0.5
,
12.5
],[
0
,
0
],
'k'
);
boxplot
(
rle
,
'Symbol'
,
''
,
'colors'
,
kron
(
clrmtx
,
ones
(
2
,
1
)));
hold
off
set
(
gca
,
'Box'
,
'off'
,
...
'XTickLabel'
,
data
.
header
(
2
:
end
),
...
'XTickLabelRotation'
,
45
,
...
'YLim'
,[
-
5
,
8.5
]);
c
=
pca
(
ncounts
,
'Algorithm'
,
'eig'
,
'Centered'
,
true
);
i
=
1
;
figure
(
'color'
,
'w'
);
hold
on
;
for
k
=
1
:
12
h
(
k
)
=
plot
(
c
(
k
,
1
),
c
(
k
,
2
),
'.'
,
'color'
,
clrmtx
(
i
,:),
'MarkerSize'
,
20
);
if
mod
(
k
,
2
)
==
0
i
=
i
+
1
;
end
end
hl
=
legend
(
h
(
1
:
2
:
end
),{
'Input'
,
'80s'
,
'2and3'
,
'4and5'
,
'6and7'
,
'>7'
});
set
(
hl
,
'Location'
,
'NorthWest'
,
'EdgeColor'
,
'w'
);
hold
off
;
%}
sandbox/polyribosomeAnalysis.m
View file @
c660e865
...
...
@@ -3,8 +3,8 @@ clc
clear
variables
close
all
tmp
=
readCountTable
(
'countTable_UTRPolysomes_RUVg_22Nov2017.txt'
);
%{
tmp
=
readCountTable
(
'
../tables/
countTable_UTRPolysomes_RUVg_22Nov2017.txt'
);
x
=
tmp
.
counts
(:,
1
:
2
);
y
=
tmp
.
counts
(:,
3
:
2
:
end
);
...
...
@@ -38,25 +38,24 @@ for k = 1 : N
end
hold
off
;
%{
%}
%% read stats file
stats
=
readCountTable
(
'countTable_UTRPolysomes_Stats_21Nov2017.txt'
);
%
stats = readCountTable('
../tables/
countTable_UTRPolysomes_Stats_21Nov2017.txt');
%% read ercc file
ercc
=
readCountTable
(
'countTable_UTRPolysomes_ERCC_21Nov2017.txt'
);
fr
=
fopen
(
'cms_095046.txt'
,
'r'
);
ercc
=
readCountTable
(
'
../tables/
countTable_UTRPolysomes_ERCC_21Nov2017.txt'
);
fr
=
fopen
(
'
../tables/
cms_095046.txt'
,
'r'
);
fgetl
(
fr
);
txt
=
textscan
(
fr
,
'%*s %s %*s %n %n %*s %*s'
,
'delimiter'
,
'\t'
);
fclose
(
fr
);
ercc
.
ref
=
txt
{
1
};
ercc
.
mix1
=
txt
{
2
};
ercc
.
mix2
=
txt
{
3
};
ercc
.
volume
=
[
16
,
22
,
24
,
10
,
7
,
7
];
ercc
.
volume
=
[
7
,
10
,
16
,
22
,
24
,
7
];
ercc
.
volume
=
repmat
(
ercc
.
volume
,
2
,
1
);
ercc
.
volume
=
ercc
.
volume
(:)
'
;
ercc
.
dilution
=
[
100
0
,
1000
,
1000
,
1000
,
100
,
100
];
ercc
.
dilution
=
[
100
,
1000
,
1000
,
1000
,
100
0
,
100
];
ercc
.
dilution
=
repmat
(
ercc
.
dilution
,
2
,
1
);
ercc
.
dilution
=
ercc
.
dilution
(:)
'
;
ercc
.
weight
=
ercc
.
volume
.
/
ercc
.
dilution
;
...
...
@@ -65,31 +64,43 @@ ercc.mix = repmat(ercc.mix1,1,size(ercc.counts,2));
ercc
.
conc
=
bsxfun
(
@
times
,
ercc
.
mix
,
ercc
.
weight
);
[
~
,
idxQry
,
idxRef
]
=
intersect
(
ercc
.
label
,
ercc
.
ref
);
X
=
ercc
.
conc
(
idxRef
,:);
Y
=
ercc
.
counts
(
idxQry
,:);
Y
=
ercc
.
conc
(
idxRef
,:);
X
=
ercc
.
counts
(
idxQry
,:);
idxFilter
=
(
sum
(
Y
>=
5
,
2
)
>=
2
);
X
=
X
(
idxFilter
,:);
Y
=
Y
(
idxFilter
,:);
c
=
pca
(
Y
);
Ysmth
=
smooth
(
X
(:),
Y
(:),
'lowess'
);
Ysmth
=
reshape
(
Ysmth
,
size
(
Y
));
YY
=
bsxfun
(
@
rdivide
,
Y
,
sum
(
Y
));
%% example variation, raw counts
tmp
=
Y
+
1
;
tmpr
=
bsxfun
(
@
rdivide
,
tmp
,
median
(
tmp
(:)));
figure
(
'color'
,
'w'
)
clrmtx
=
jet
(
12
);
PP
=
zeros
(
12
,
2
);
figure
(
'color'
,
'w'
);
hold
on
;
plot
([
0
,
13
],[
0
,
0
],
'k'
);
boxplot
(
log2
(
tmpr
),
'color'
,[
.
45
,
.
45
,
.
45
]);
for
k
=
1
:
12
pX
=
log2
(
X
(:,
k
));
pY
=
log2
(
Y
(:,
k
));
p
=
polyfit
(
pX
,
pY
,
1
);
PP
(
k
,:)
=
p
;
plot
(
pX
,
pY
,
'.'
,
'color'
,
clrmtx
(
k
,:));
xfit
=
linspace
(
min
(
pX
),
max
(
pX
),
100
);
yfit
=
polyval
(
p
,
xfit
);
plot
(
xfit
,
yfit
,
'color'
,
clrmtx
(
k
,:));
end
xfit
=
linspace
(
2
,
15
,
100
);
yfit
=
polyval
(
median
(
PP
),
xfit
);
plot
(
xfit
,
yfit
,
'color'
,
'k'
,
'linewidth'
,
1.2
);
hold
off
;
set
(
gca
,
'XTickLabel'
,
ercc
.
header
(
2
:
end
),
...
'XTickLabelRotation'
,
45
);
%}
...
...
tables/countTable_UTRPolysomes_ERCCNormed_24Nov2017.txt
0 → 100644
View file @
c660e865
This diff is collapsed.
Click to expand it.
tables/countTable_UTRPolysomes_RUVg_22Nov2017.txt
deleted
100644 → 0
View file @
afcabb43
This diff is collapsed.
Click to expand it.
tables/countTable_UTRPolysomes_UQNormed_24Nov2017.txt
0 → 100644
View file @
c660e865
This diff is collapsed.
Click to expand it.
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment