Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
mpibr
schu
UTRPolyribosomes
Commits
947eaf12
Commit
947eaf12
authored
Nov 21, 2017
by
Georgi Tushev
Browse files
experiment summary
parent
768973df
Changes
2
Hide whitespace changes
Inline
Side-by-side
README.md
View file @
947eaf12
# UTRPolyribosomes
3'UTR-End Sequencing on polyribosome fractions
## Layout
samples (x 2 replica):
*
input (bound and unbound fraction together)
*
80S (monosome fraction)
*
2and3
*
4and5
*
6and7
*
\>
7
\+
spike-in controls (ERCC)
The library preparation is done using
[
MACE
](
https://www.ncbi.nlm.nih.gov/pubmed/25052703
)
method. Reads are sequenced on Illumina NextSeq 550.
## Overview
experimentSummary.pl
0 → 100755
View file @
947eaf12
#!/usr/bin/perl
use
Getopt::
Long
;
use
warnings
;
use
strict
;
sub
usage
($);
sub
fileList
($$);
sub
checkPaths
($$$$);
sub
parseFileList
($$$);
sub
parseLogs
($$);
sub
reportSummary
($$);
#sub parseLogFile($);
#sub parseCounts($$);
MAIN:
{
my
$help
;
my
$pathLogs
;
my
$pathCoverage
;
my
$keyDelimiter
=
'
_
';
my
$reportErcc
;
my
%summary
=
();
# input parser
GetOptions
(
"
logs=s
"
=>
\
$pathLogs
,
"
gbed=s
"
=>
\
$pathCoverage
,
"
delim=s
"
=>
\
$keyDelimiter
,
"
ercc
"
=>
\
$reportErcc
,
"
help
"
=>
\
$help
)
or
usage
("
Error :: invalid command line options
");
#defaults
usage
("
version 0.1, Nov 2017
")
if
(
$help
);
usage
("
Error :: path to log files is required!
")
if
(
!
defined
(
$pathLogs
));
usage
("
Error :: path to gbed files is required!
")
if
(
!
defined
(
$pathCoverage
));
# check paths
checkPaths
(
$pathLogs
,
$pathCoverage
,
$keyDelimiter
,
\
%summary
);
# parse logs
parseFileList
(
\
%summary
,
"
log
",
\
&parseLogs
);
parseFileList
(
\
%summary
,
"
gbed
",
\
&parseCoverage
);
# report
reportSummary
(
\
%summary
,
$reportErcc
);
exit
0
;
}
### --- subroutines --- ###
## CHECKPATHS
sub
checkPaths
($$$$)
{
my
$pathLogs
=
$_
[
0
];
my
$pathCoverage
=
$_
[
1
];
my
$keyDelimiter
=
$_
[
2
];
my
$summary
=
$_
[
3
];
my
$listLogs
=
fileList
(
$pathLogs
,
"
txt
");
my
$listCoverage
=
fileList
(
$pathCoverage
,
"
gbed.gz
");
# check logs and gbed
my
$iter
=
0
;
foreach
my
$fileLog
(
@
{
$listLogs
})
{
# current files
my
$fileBam
=
$listCoverage
->
[
$iter
];
# check if logs and gbed match
my
$checkKey
=
$fileBam
;
$checkKey
=~
s/\.gbed.gz//g
;
usage
("
Error :: Log and Bam files mismatch!
")
if
(
index
(
$fileLog
,
$checkKey
)
==
-
1
);
# check summary key
my
$summaryKey
=
(
$checkKey
=~
m/(.*)$keyDelimiter.*/
)
?
$
1
:
"
unknown
";
usage
("
Error :: Summary key cannot be parsed!
")
if
(
$summaryKey
eq
"
unknown
");
# set summary
$summary
->
{
$checkKey
}{"
log
"}
=
$pathLogs
.
'
/
'
.
$fileLog
;
$summary
->
{
$checkKey
}{"
gbed
"}
=
$pathCoverage
.
'
/
'
.
$fileBam
;
# next set files
$iter
++
;
}
return
;
}
## FILELIST
sub
fileList
($$)
{
my
$path
=
$_
[
0
];
my
$extension
=
$_
[
1
];
my
@list
=
();
opendir
(
my
$ph
,
$path
)
or
die
$!
;
while
(
my
$file
=
readdir
(
$ph
))
{
# filter for file
next
unless
(
-
f
"
$path
/
$file
");
# filter for extension
next
unless
(
$file
=~
m/\.$extension$/
);
# accumulate list
push
(
@list
,
$file
);
}
closedir
(
$ph
);
return
\
@list
;
}
## PARSEFILELIST
sub
parseFileList
($$$)
{
my
$summary
=
$_
[
0
];
my
$secondaryKey
=
$_
[
1
];
my
$fcnCallback
=
$_
[
2
];
foreach
my
$primaryKey
(
keys
%
{
$summary
})
{
my
$queryFile
=
$summary
->
{
$primaryKey
}{
$secondaryKey
};
$fcnCallback
->
(
$queryFile
,
$summary
->
{
$primaryKey
});
}
return
;
}
## PARSELOGS
sub
parseLogs
($$)
{
my
$logFile
=
$_
[
0
];
my
$summary
=
$_
[
1
];
my
$countsRaw
=
0
;
my
$countsUniquelyAligned
=
0
;
# parse each file
open
(
my
$fh
,
"
<
",
$logFile
)
or
die
$!
;
while
(
<
$fh
>
)
{
chomp
(
$_
);
# parse total number of reads
if
(
$_
=~
m/Number of input reads \|\s+([0-9]+)/g
)
{
$countsRaw
=
$
1
;
}
# parse uniquely aligned reads
if
(
$_
=~
m/Uniquely mapped reads number \|\s+([0-9]+)/g
)
{
$countsUniquelyAligned
=
$
1
;
}
}
close
(
$fh
);
# check results
usage
("
Error :: could not parse log files!
")
if
(
$countsRaw
==
0
||
$countsUniquelyAligned
==
0
);
# fill summary
$summary
->
{"
reads.raw
"}
=
$countsRaw
;
$summary
->
{"
reads.aligned
"}
=
$countsUniquelyAligned
;
return
;
}
## PARSECOVERAGE
sub
parseCoverage
($$)
{
my
$gbedFile
=
$_
[
0
];
my
$summary
=
$_
[
1
];
# fix gbed file spaces
$gbedFile
=~
s/\ /\\ /g
;
# open stream
my
$erccCount
=
0
;
my
$mitoCount
=
0
;
open
(
my
$fh
,
"
gzcat
$gbedFile
|
")
or
die
$!
;
while
(
<
$fh
>
)
{
chomp
(
$_
);
# count ERCC
if
(
$_
=~
m/^ERCC/
)
{
my
@line
=
split
("
\t
",
$_
,
6
);
$erccCount
+=
$line
[
3
];
$summary
->
{"
list.ercc
"}{
$line
[
0
]}
+=
$line
[
3
];
}
# count Mitochondria
if
(
$_
=~
m/^chrM/
)
{
my
@line
=
split
("
\t
",
$_
,
6
);
$mitoCount
+=
$line
[
3
];
}
}
close
(
$fh
);
$summary
->
{"
reads.ercc
"}
=
$erccCount
;
$summary
->
{"
reads.mito
"}
=
$mitoCount
;
}
## REPORTSUMMARY
sub
reportSummary
($$)
{
my
$summary
=
$_
[
0
];
my
$reportErcc
=
$_
[
1
];
if
(
$reportErcc
)
{
# extract Ercc list
my
%listErcc
=
();
foreach
my
$key
(
sort
keys
%
{
$summary
})
{
foreach
my
$ercc
(
sort
keys
%
{
$summary
->
{
$key
}{"
list.ercc
"}})
{
$listErcc
{
$ercc
}
=
1
;
}
}
# print table
my
@keysList
=
sort
keys
%
{
$summary
};
print
"
ERCC
\t
",
join
("
\t
",
@keysList
),"
\n
";
foreach
my
$ercc
(
sort
keys
%listErcc
)
{
print
$ercc
;
foreach
my
$key
(
@keysList
)
{
my
$erccCount
=
exists
(
$summary
->
{
$key
}{"
list.ercc
"}{
$ercc
})
?
$summary
->
{
$key
}{"
list.ercc
"}{
$ercc
}
:
0
;
print
"
\t
",
$erccCount
;
}
print
"
\n
";
}
}
else
{
# header
print
"
sample
\t
reads.raw
\t
reads.aligned
\t
reads.ercc
\t
reads.mito
\t
ratio.aligned
\t
ratio.ercc
\t
ratio.mito
\n
";
foreach
my
$key
(
sort
keys
%
{
$summary
})
{
# counts
my
$countsRaw
=
exists
(
$summary
->
{
$key
}{"
reads.raw
"})
?
$summary
->
{
$key
}{"
reads.raw
"}
:
0
;
my
$countsAligned
=
exists
(
$summary
->
{
$key
}{"
reads.aligned
"})
?
$summary
->
{
$key
}{"
reads.aligned
"}
:
0
;
my
$countsErcc
=
exists
(
$summary
->
{
$key
}{"
reads.ercc
"})
?
$summary
->
{
$key
}{"
reads.ercc
"}
:
0
;
my
$countsMito
=
exists
(
$summary
->
{
$key
}{"
reads.mito
"})
?
$summary
->
{
$key
}{"
reads.mito
"}
:
0
;
# percentage
my
$percentAligned
=
(
100
*
$countsAligned
)
/
$countsRaw
;
my
$percentErcc
=
(
100
*
$countsErcc
)
/
$countsRaw
;
my
$percentMito
=
(
100
*
$countsMito
)
/
$countsRaw
;
# output
print
$key
,"
\t
",
$countsRaw
,"
\t
",
$countsAligned
,"
\t
",
$countsErcc
,"
\t
",
$countsMito
,"
\t
",
sprintf
("
%.6f
",
$percentAligned
),"
\t
",
sprintf
("
%.6f
",
$percentErcc
),"
\t
",
sprintf
("
%.6f
",
$percentMito
),"
\n
";
}
}
}
## USAGE
sub
usage
($)
{
my
$message
=
$_
[
0
];
if
(
defined
$message
&&
length
$message
)
{
$message
.=
"
\n
"
unless
(
$message
=~
/\n$/
);
}
my
$command
=
$
0
;
$command
=~
s#^.*/##
;
print
STDERR
(
$message
,
"
usage:
$command
-logs /path/to/STAR/logFiles/ -gbed /path/to/STAR/gbedFiles/
\n
"
.
"
description: creates a table for summarising reads
\n
"
.
"
parameters:
\n
"
.
"
--logs
\n
"
.
"
\t
/path/to/STAR/logFiles/, log files are created during STAR alignment
\n
"
.
"
--gbed
\n
"
.
"
\t
/path/to/STAR/gbedFiles/, coverage files
\n
"
.
"
--delim
\n
"
.
"
\t
delimiter to extract sample key
\n
"
.
"
-ercc
\n
"
.
"
\t
exports ercc table instead of experiment report
\n
"
.
"
-help
\n
"
.
"
\t
define usage
\n
"
);
die
("
\n
");
}
### ------------------- ###
=head
sub parseCounts($$)
{
my $file_counts = $_[0];
my $summary = $_[1];
my @keyPrimary = ();
open(my $fh, "<", $file_counts) or die $!;
while(<$fh>)
{
chomp($_);
# skip commands
next if($_ =~ m/^#/);
# split header
if($_ =~ m/^Geneid/)
{
my @header = split("\t", $_);
@header = @header[6..$#header];
# parse header
foreach my $label (@header)
{
my @tmp = split("\/",$label);
@tmp = split("\_", $tmp[-1]);
my $key = $tmp[0];
push(@keyPrimary, $key);
}
}
else
{
my @line = split("\t", $_);
@line = @line[6..$#line];
my $keySecondary = ($_ =~ m/^ERCC\-/) ? "ercc" : "gene";
for(my $i = 0; $i <= $#line; $i++)
{
$summary->{$keyPrimary[$i]}{$keySecondary} += $line[$i];
}
}
}
close($fh);
}
=cut
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment