Commit c660e865 authored by Georgi Tushev's avatar Georgi Tushev
Browse files

ercc normed

parent afcabb43
.DS_Store
.Rhistory
*.m~
......@@ -8,40 +8,27 @@
# load edgeR
library(edgeR);
library(RUVSeq);
library(rstudioapi)
# set working directory
(WD <- dirname(rstudioapi::getSourceEditorContext()$path))
if (!is.null(WD)) setwd(WD)
# clean current variables
rm(list = ls());
# read UTR counts
utrs<-as.matrix(read.table("/Users/tushevg/Desktop/UTRPolyribosomes/countTable_UTRPolysomes_UTRs_21Nov2017.txt",stringsAsFactors=F, header=T, row.names=1, check.names=F, colClasses=c("factor", rep("NULL", 5), rep("integer", 12))));
filterUTRs<-grep("group", rownames(utrs));
utrs<-as.matrix(read.table("../tables/countTable_UTRPolysomes_UTRSupp_23Nov2017.txt",stringsAsFactors=F, header=T, row.names=1, check.names=F));
# read ERCC counts
ercc<-as.matrix(read.table("/Users/tushevg/Desktop/UTRPolyribosomes/countTable_UTRPolysomes_ERCC_21Nov2017.txt",stringsAsFactors=F, header=T, row.names=1, check.names=F));
ercc<-as.matrix(read.table("../tables/countTable_UTRPolysomes_ERCC_21Nov2017.txt",stringsAsFactors=F, header=T, row.names=1, check.names=F));
filterErcc<-apply(ercc, 1, function(x) length(x[x>=5])>=2);
ercc<-ercc[filterErcc,];
# reorder data by name
utrs<-utrs[,c("InputHc2","InputHc3","80SHc2","80SHc3","2and3Hc2","2and3Hc3","4and5Hc2","4and5Hc3","6and7Hc2","6and7Hc3","bigger7Hc2","bigger7Hc3")];
ercc<-ercc[,c("InputHc2","InputHc3","80SHc2","80SHc3","2and3Hc2","2and3Hc3","4and5Hc2","4and5Hc3","6and7Hc2","6and7Hc3","bigger7Hc2","bigger7Hc3")];
# concatenate data
data<-rbind(utrs, ercc);
# filter for 2 reads per million
#libsize<-colSums(data);
#tmp<-1e6*data / libsize;
#filter<-apply(tmp, 1, function(x) length(x[x>=2])>=2);
#data<-data[filter,];
# filter for 5 reads in at least two samples
filter<-apply(data, 1,function(x) length(x[x>=5])>=2);
data<-data[filter,];
utrs<-rownames(data)[grep("group", rownames(data))];
ercc<-rownames(data)[grep("^ERCC", rownames(data))];
# normalize with loess regression
weights<-c(7/100,7/100,10/1000,10/1000,16/1000,16/1000,22/1000,22/1000,24/1000,24/1000,7/100,7/100);
namesUtrs<-rownames(data)[grep("group", rownames(data))];
namesErcc<-rownames(data)[grep("^ERCC", rownames(data))];
# set factors
levelSample<-as.factor(rep(c("Input","80S","2and3","4and5","6and7","bigger7"), each=2));
......@@ -51,7 +38,7 @@ set<-newSeqExpressionSet(as.matrix(data), phenoData=data.frame(levelSample, row.
# plot raw data
library(RColorBrewer)
colors<-brewer.pal(3, "Set2");
colors<-brewer.pal(6, "Set2");
plotRLE(set, outline=FALSE, ylim=c(-4,4), col=colors[levelSample]);
plotPCA(set, col=colors[levelSample]);
......@@ -61,12 +48,12 @@ plotRLE(set, outline=FALSE, ylim=c(-4,4), col=colors[levelSample]);
plotPCA(set, col=colors[levelSample]);
# unwanted variation with spiked-ins
set1<-RUVg(set, ercc, k=1);
set1<-RUVg(set, namesErcc, k=1);
pData(set1)
plotRLE(set, outline=FALSE, ylim=c(-4,4), col=colors[levelSample]);
plotPCA(set, col=colors[levelSample]);
write.table(normCounts(set1),"/Users/tushevg/Desktop/UTRPolyribosomes/countTable_UTRPolysomes_RUVg_22Nov2017.txt",quote=FALSE,sep="\t",col.names=TRUE);
write.table(normCounts(set),"../tables/countTable_UTRPolysomes_RUVg_22Nov2017.txt",quote=FALSE,sep="\t",col.names=TRUE);
# differential expression analysis EdgeR
design<-model.matrix(~levelSample + W_1, data=pData(set1));
......
% normaliseErcc
clc
clear variables
close all
%% read count file
data = readCountTable('../tables/countTable_UTRPolysomes_UQNormed_24Nov2017.txt');
%% extract ercc
idxErcc = strncmp('ERCC', data.label, 4);
ercc.ids = data.label(idxErcc);
ercc.counts = data.counts(idxErcc,:);
%% read ercc mix concentration
fr=fopen('../tables/cms_095046.txt','r');
fgetl(fr);
txt = textscan(fr,'%*s %s %*s %n %n %*s %*s','delimiter','\t');
fclose(fr);
mix = txt{2};
[~, idxRef, idxQry] = intersect(txt{1}, ercc.ids);
ercc.mix(idxQry,1) = mix(idxRef);
%% assign weight
ercc.volume = [7, 10, 16, 22, 24, 7];
ercc.dilution = 1./[100, 1000, 1000, 1000, 1000, 100];
ercc.weight = ercc.volume .* ercc.dilution;
ercc.weight = repmat(ercc.weight, 2, 1);
ercc.weight = ercc.weight(:);
%% calculate concentration
ercc.attomoles = ercc.mix * ercc.weight';
%% linear regression
clrmtx = jet(6);
figure('color','w');
i = 1;
pArray = zeros(12, 2);
hold on;
k = 1;
for k = 1 : 12
X = log2(ercc.counts(:,k) + 1);
Y = log2(ercc.attomoles(:,k) + 1);
idxFilter = X >= 4;
plot(X,Y,'.','color',clrmtx(i,:));
p = polyfit(X(idxFilter),Y(idxFilter),1);
pArray(k,:) = p;
xfit = linspace(min(X),max(X),100);
yfit = polyval(p, xfit);
plot(xfit, yfit, '-','color',clrmtx(i,:),'linewidth',1.2);
if mod(k,2) == 0
i = i + 1;
end
end
hold off;
nfactor = mean(pArray(:,1))./pArray(:,1);
ncounts = bsxfun(@times, data.counts(~idxErcc,:), nfactor') + 1;
%% write out normed counts
fmtout = repmat({'%.6f\t'},size(ncounts,2)+1,1);
fmtout(1) = {'%s\t'};
fmtout(end) = {'%.6f\n'};
fmtout = sprintf('%s',fmtout{:});
mtxout = [data.label(~idxErcc),num2cell(ncounts)]';
hdrout = sprintf('%s\t',data.header{:});
hdrout(end) = [];
fw = fopen('../tables/countTable_UTRPolysomes_ERCCNormed_24Nov2017.txt','w');
fprintf(fw,'%s\n',hdrout);
fprintf(fw,fmtout,mtxout{:});
fclose(fw);
%% plot results
%{
rle = log2(ncounts ./ median(ncounts(:)));
figure('color','w');
hold on;
plot([0.5,12.5],[0,0],'k');
boxplot(rle,'Symbol','','colors',kron(clrmtx,ones(2,1)));
hold off
set(gca,'Box','off',...
'XTickLabel',data.header(2:end),...
'XTickLabelRotation',45,...
'YLim',[-5,8.5]);
c = pca(ncounts,'Algorithm','eig','Centered',true);
i = 1;
figure('color','w');
hold on;
for k = 1 : 12
h(k) = plot(c(k,1),c(k,2),'.','color',clrmtx(i,:),'MarkerSize',20);
if mod(k,2) == 0
i = i + 1;
end
end
hl = legend(h(1:2:end),{'Input','80s','2and3','4and5','6and7','>7'});
set(hl,'Location','NorthWest','EdgeColor','w');
hold off;
%}
......@@ -3,8 +3,8 @@ clc
clear variables
close all
tmp = readCountTable('countTable_UTRPolysomes_RUVg_22Nov2017.txt');
%{
tmp = readCountTable('../tables/countTable_UTRPolysomes_RUVg_22Nov2017.txt');
x = tmp.counts(:,1:2);
y = tmp.counts(:,3:2:end);
......@@ -38,25 +38,24 @@ for k = 1 : N
end
hold off;
%{
%}
%% read stats file
stats = readCountTable('countTable_UTRPolysomes_Stats_21Nov2017.txt');
%stats = readCountTable('../tables/countTable_UTRPolysomes_Stats_21Nov2017.txt');
%% read ercc file
ercc = readCountTable('countTable_UTRPolysomes_ERCC_21Nov2017.txt');
fr=fopen('cms_095046.txt','r');
ercc = readCountTable('../tables/countTable_UTRPolysomes_ERCC_21Nov2017.txt');
fr=fopen('../tables/cms_095046.txt','r');
fgetl(fr);
txt = textscan(fr,'%*s %s %*s %n %n %*s %*s','delimiter','\t');
fclose(fr);
ercc.ref=txt{1};
ercc.mix1=txt{2};
ercc.mix2=txt{3};
ercc.volume = [16,22,24,10,7,7];
ercc.volume = [7,10,16,22,24,7];
ercc.volume = repmat(ercc.volume, 2, 1);
ercc.volume = ercc.volume(:)';
ercc.dilution = [1000,1000,1000,1000,100,100];
ercc.dilution = [100,1000,1000,1000,1000,100];
ercc.dilution = repmat(ercc.dilution, 2, 1);
ercc.dilution = ercc.dilution(:)';
ercc.weight = ercc.volume ./ ercc.dilution;
......@@ -65,31 +64,43 @@ ercc.mix = repmat(ercc.mix1,1,size(ercc.counts,2));
ercc.conc = bsxfun(@times, ercc.mix, ercc.weight);
[~,idxQry, idxRef] = intersect(ercc.label, ercc.ref);
X = ercc.conc(idxRef,:);
Y = ercc.counts(idxQry,:);
Y = ercc.conc(idxRef,:);
X = ercc.counts(idxQry,:);
idxFilter = (sum(Y >= 5, 2) >= 2);
X = X(idxFilter,:);
Y = Y(idxFilter,:);
c = pca(Y);
Ysmth = smooth(X(:),Y(:),'lowess');
Ysmth = reshape(Ysmth, size(Y));
YY = bsxfun(@rdivide, Y, sum(Y));
%% example variation, raw counts
tmp = Y+1;
tmpr = bsxfun(@rdivide, tmp, median(tmp(:)));
figure('color','w')
clrmtx = jet(12);
PP = zeros(12,2);
figure('color','w');
hold on;
plot([0,13],[0,0],'k');
boxplot(log2(tmpr),'color',[.45,.45,.45]);
for k = 1 : 12
pX = log2(X(:,k));
pY = log2(Y(:,k));
p = polyfit(pX,pY,1);
PP(k,:) = p;
plot(pX,pY,'.','color',clrmtx(k,:));
xfit = linspace(min(pX),max(pX),100);
yfit = polyval(p, xfit);
plot(xfit,yfit, 'color',clrmtx(k,:));
end
xfit = linspace(2,15,100);
yfit = polyval(median(PP),xfit);
plot(xfit, yfit, 'color','k','linewidth',1.2);
hold off;
set(gca,'XTickLabel',ercc.header(2:end),...
'XTickLabelRotation',45);
%}
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment