Skip to content

Commit 448c97d

Browse files
Aaron LunAaron Lun
Aaron Lun
authored and
Aaron Lun
committed
Added exclusion to neighbourhood calculations.
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/diffHic@106315 bc3139a8-67e5-0310-9ffc-ced21a209358
1 parent c70cfd6 commit 448c97d

14 files changed

+275
-121
lines changed

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: diffHic
2-
Version: 1.1.9
3-
Date: 2015/06/27
2+
Version: 1.1.10
3+
Date: 2015/07/13
44
Title: Differential analyis of Hi-C data
55
Author: Aaron Lun <alun@wehi.edu.au>
66
Maintainer: Aaron Lun <alun@wehi.edu.au>

R/filterPeaks.R

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
enrichedPairs <- function(data, flank=5, prior.count=2, abundances=NULL)
1+
enrichedPairs <- function(data, flank=5, exclude=0, prior.count=2, abundances=NULL)
22
# This function identifies the highest-abundance neighbour in the interaction space
33
# for each bin pair in `data`. The aim is to compare the abundance of each element
44
# with the abundance of its neighbour.
@@ -8,7 +8,11 @@ enrichedPairs <- function(data, flank=5, prior.count=2, abundances=NULL)
88
# last modified 29 April 2015
99
{
1010
flank <- as.integer(flank)
11+
exclude <- as.integer(exclude)
1112
if (flank <= 0L) { stop("flank width must be a positive integer") }
13+
if (exclude < 0L) { stop("exclude width must be a positive integer") }
14+
if (flank <= exclude) { stop("exclude width must be less than the flank width") }
15+
1216
rdata <- .splitByChr(regions(data))
1317
last.id <- rdata$last
1418
first.id <- rdata$first
@@ -51,7 +55,7 @@ enrichedPairs <- function(data, flank=5, prior.count=2, abundances=NULL)
5155
o <- order(all.a, all.t)
5256
collected <- .Call(cxx_quadrant_bg, all.a[o], all.t[o],
5357
converted$int[o], converted$dec[o], MULT,
54-
flank, a.len, t.len, anchor==target)
58+
flank, exclude, a.len, t.len, anchor==target)
5559
if (is.character(collected)) { stop(collected) }
5660
collected[o] <- collected
5761

R/neighbourCounts.R

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
neighbourCounts <- function(files, param, width=50000, filter=1L, flank=NULL, prior.count=NULL)
1+
neighbourCounts <- function(files, param, width=50000, filter=1L, flank=NULL, exclude=NULL, prior.count=NULL)
22
# This does the same thing as squareCounts, except that it simultaneously computes the
33
# filter statistic for each extracted bin pair. This has lower memory requirements as
44
# it doesn't need to hold the entire `filter=1` output in memory at once.
@@ -42,7 +42,12 @@ neighbourCounts <- function(files, param, width=50000, filter=1L, flank=NULL, pr
4242

4343
# Other stuff related to calculation of the neighbourhood regions.
4444
if (is.null(flank)) { flank <- formals(enrichedPairs)$flank }
45+
if (is.null(exclude)) { exclude <- formals(enrichedPairs)$exclude }
4546
flank <- as.integer(flank)
47+
exclude <- as.integer(exclude)
48+
if (flank <= 0L) { stop("flank width must be a positive integer") }
49+
if (exclude < 0L) { stop("exclude width must be a positive integer") }
50+
if (flank <= exclude) { stop("exclude width must be less than the flank width") }
4651
if (is.null(prior.count)) { prior.count <- formals(enrichedPairs)$prior.count }
4752
prior.count <- as.double(prior.count)
4853

@@ -55,7 +60,7 @@ neighbourCounts <- function(files, param, width=50000, filter=1L, flank=NULL, pr
5560
chr.limits=frag.by.chr, discard=discard, cap=cap)
5661

5762
# Aggregating counts in C++ to obtain count combinations for each bin pair.
58-
out <- .Call(cxx_count_background, pairs, new.pts$id, flank, filter,
63+
out <- .Call(cxx_count_background, pairs, new.pts$id, flank, exclude, filter,
5964
bin.by.chr$first[[target]], bin.by.chr$last[[target]], bin.by.chr$first[[anchor]], bin.by.chr$last[[anchor]],
6065
maxit, tol, offsets, disp, prior.count)
6166
if (is.character(out)) { stop(out) }

inst/NEWS.Rd

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
\title{diffHic News}
33
\encoding{UTF-8}
44

5-
\section{Version 1.1.9}{\itemize{
5+
\section{Version 1.1.10}{\itemize{
66
\item
77
Added library size specification to DIList methods normalize(), asDGEList().
88

@@ -21,6 +21,9 @@ Added iter_map.py to inst/python, for iterative mapping of DNase Hi-C data.
2121
\item
2222
Added the neighbourCounts() function, for simultaneous read counting and enrichment calculation.
2323

24+
\item
25+
Added exclude for enrichedPairs(), to provide an exclusion zone in the local neighbourhood.
26+
2427
\item
2528
Switched default colour in rotPlaid(), plotPlaid() to black.
2629

@@ -45,6 +48,7 @@ Modified consolidatePairs() to accept index vectors for greater modularity.
4548
\item
4649
Added reference argument for large bin pairs, in filterDirect() and filterTrended().
4750

51+
4852
\item
4953
Updated documentation, tests and user's guide.
5054
}}

inst/tests/test-neighbor.R

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,25 @@ suppressPackageStartupMessages(require(edgeR))
66

77
# Defining some odds and ends.
88

9-
lower.left <- function(x) {
9+
lower.left <- function(x, exclude=0) {
1010
out <- matrix(TRUE, nrow=nrow(x), ncol=ncol(x))
11-
out[nrow(x),1] <- FALSE
11+
out[nrow(x)+(-exclude):0,1:(1+exclude)] <- FALSE
1212
out
1313
}
14-
all.but.middle <- function(x) {
14+
15+
all.but.middle <- function(x, exclude=0) {
1516
out <- matrix(TRUE, nrow=nrow(x), ncol=ncol(x))
16-
out[ceiling(length(out)/2)] <- FALSE
17+
midrow <- ceiling(nrow(x)/2) + (-exclude):exclude
18+
midrow <- midrow[midrow > 0 & midrow <= nrow(x)]
19+
midcol <- ceiling(ncol(x)/2) + (-exclude):exclude
20+
midcol <- midcol[midcol > 0 & midcol <= ncol(x)]
21+
out[midrow, midcol] <- FALSE
1722
out
1823
}
1924

20-
comp <- function(npairs, chromos, flanking, prior=2) {
25+
comp <- function(npairs, chromos, flanking, exclude=0, prior=2) {
2126
flanking <- as.integer(flanking)
27+
exclude <- as.integer(exclude)
2228

2329
nlibs <- 4L
2430
lambda <- 5
@@ -37,7 +43,7 @@ comp <- function(npairs, chromos, flanking, prior=2) {
3743
data@regions$nfrags <- rep(1:3, length.out=nbins)
3844

3945
# Computing the reference enrichment value.
40-
bg <- enrichedPairs(data, flank=flanking, prior.count=prior)
46+
bg <- enrichedPairs(data, flank=flanking, prior.count=prior, exclude=exclude)
4147
final.ref <- numeric(length(bg))
4248

4349
# Sorting them by chromosome pairs.
@@ -74,6 +80,7 @@ comp <- function(npairs, chromos, flanking, prior=2) {
7480
for (pair in 1:nrow(current)) {
7581
total.num <- 4L
7682
collected <- numeric(total.num)
83+
collected.n <- numeric(total.num)
7784
ax <- a.dex[pair]
7885
tx <- t.dex[pair]
7986

@@ -102,16 +109,22 @@ comp <- function(npairs, chromos, flanking, prior=2) {
102109
out[x > alen | x < 1 | y > tlen | y < 1] <- -1
103110
return(out)
104111
})
105-
indices <- indices[keep(indices)]
112+
indices <- indices[keep(indices, exclude)]
106113
indices <- indices[indices > 0]
107114
indices <- indices[valid[indices]]
108115

109116
# Computing the average across this quadrant.
110117
relevant.rows <- inter.space[indices]
111118
is.zero <- relevant.rows==0L
112119
collected[quad] <- sum(rel.ab[relevant.rows[!is.zero]])/length(relevant.rows)
120+
collected.n[quad] <- length(relevant.rows)
113121
}
114-
# print(sprintf("%i %i %.3f", ax, tx, collected[6]))
122+
123+
# if (exclude) { # Troubleshooting.
124+
# print(c(aid[pair], tid[pair]))
125+
# print(collected)
126+
# print(collected.n)
127+
# }
115128

116129
output[pair] <- log2((rel.ab[pair]+prior)/(max(collected, na.rm=TRUE)+prior))
117130
}
@@ -147,6 +160,11 @@ comp(200, c(chrA=10, chrC=20), 1)
147160
comp(200, c(chrA=10, chrB=5, chrC=20), 1)
148161
comp(200, c(chrA=20, chrB=5), 1)
149162

163+
comp(200, c(chrA=10, chrB=30, chrC=20), 3, exclude=1)
164+
comp(200, c(chrA=10, chrC=20), 3, exclude=1)
165+
comp(200, c(chrA=10, chrB=5, chrC=20), 3, exclude=1)
166+
comp(200, c(chrA=20, chrB=5), 3, exclude=1)
167+
150168
###################################################################################################
151169
# Same sort of simulation, but direct from read data, for neighbourCounts testing.
152170

@@ -157,16 +175,17 @@ dir.create("temp-neighbor")
157175
dir1<-"temp-neighbor/1.h5"
158176
dir2<-"temp-neighbor/2.h5"
159177

160-
comp2 <- function(npairs1, npairs2, width, cuts, filter=1, flank=5, prior.count=2) {
178+
comp2 <- function(npairs1, npairs2, width, cuts, filter=1, flank=5, exclude=0, prior.count=2) {
161179
simgen(dir1, npairs1, chromos)
162180
simgen(dir2, npairs2, chromos)
163181
param <- pairParam(fragments=cuts)
164182

165-
out <- neighbourCounts(c(dir1, dir2), param, width=width, filter=filter, flank=flank, prior.count=prior.count)
183+
out <- neighbourCounts(c(dir1, dir2), param, width=width, filter=filter, flank=flank, prior.count=prior.count,
184+
exclude=exclude)
166185

167186
ref <- squareCounts(c(dir1, dir2), width=width, param, filter=1)
168187
keep <- rowSums(counts(ref)) >= filter
169-
enrichment <- enrichedPairs(ref, flank=flank, prior.count=prior.count)
188+
enrichment <- enrichedPairs(ref, flank=flank, prior.count=prior.count, exclude=exclude)
170189

171190
if (!identical(ref[keep,], out$interaction)) { stop("extracted counts don't match up") }
172191
if (any(abs(enrichment[keep] - out$enrichment) > 1e-6)) { stop("enrichment values don't match up") }
@@ -194,6 +213,11 @@ comp2(10, 20, 1000, cuts=simcuts(chromos), filter=5)
194213
comp2(10, 20, 1000, cuts=simcuts(chromos), flank=3)
195214
comp2(10, 20, 1000, cuts=simcuts(chromos), prior.count=1)
196215

216+
comp2(10, 20, 1000, cuts=simcuts(chromos), exclude=1)
217+
comp2(50, 20, 1000, cuts=simcuts(chromos), exclude=1)
218+
comp2(100, 50, 1000, cuts=simcuts(chromos), exclude=2)
219+
comp2(50, 200, 1000, cuts=simcuts(chromos), exclude=2)
220+
197221
#####################################################################################################
198222
# Cleaning up
199223

inst/tests/test-neighbor.Rout.save

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,19 +25,25 @@ Type 'q()' to quit R.
2525
>
2626
> # Defining some odds and ends.
2727
>
28-
> lower.left <- function(x) {
28+
> lower.left <- function(x, exclude=0) {
2929
+ out <- matrix(TRUE, nrow=nrow(x), ncol=ncol(x))
30-
+ out[nrow(x),1] <- FALSE
30+
+ out[nrow(x)+(-exclude):0,1:(1+exclude)] <- FALSE
3131
+ out
3232
+ }
33-
> all.but.middle <- function(x) {
33+
>
34+
> all.but.middle <- function(x, exclude=0) {
3435
+ out <- matrix(TRUE, nrow=nrow(x), ncol=ncol(x))
35-
+ out[ceiling(length(out)/2)] <- FALSE
36+
+ midrow <- ceiling(nrow(x)/2) + (-exclude):exclude
37+
+ midrow <- midrow[midrow > 0 & midrow <= nrow(x)]
38+
+ midcol <- ceiling(ncol(x)/2) + (-exclude):exclude
39+
+ midcol <- midcol[midcol > 0 & midcol <= ncol(x)]
40+
+ out[midrow, midcol] <- FALSE
3641
+ out
3742
+ }
3843
>
39-
> comp <- function(npairs, chromos, flanking, prior=2) {
44+
> comp <- function(npairs, chromos, flanking, exclude=0, prior=2) {
4045
+ flanking <- as.integer(flanking)
46+
+ exclude <- as.integer(exclude)
4147
+
4248
+ nlibs <- 4L
4349
+ lambda <- 5
@@ -56,7 +62,7 @@ Type 'q()' to quit R.
5662
+ data@regions$nfrags <- rep(1:3, length.out=nbins)
5763
+
5864
+ # Computing the reference enrichment value.
59-
+ bg <- enrichedPairs(data, flank=flanking, prior.count=prior)
65+
+ bg <- enrichedPairs(data, flank=flanking, prior.count=prior, exclude=exclude)
6066
+ final.ref <- numeric(length(bg))
6167
+
6268
+ # Sorting them by chromosome pairs.
@@ -93,6 +99,7 @@ Type 'q()' to quit R.
9399
+ for (pair in 1:nrow(current)) {
94100
+ total.num <- 4L
95101
+ collected <- numeric(total.num)
102+
+ collected.n <- numeric(total.num)
96103
+ ax <- a.dex[pair]
97104
+ tx <- t.dex[pair]
98105
+
@@ -121,16 +128,22 @@ Type 'q()' to quit R.
121128
+ out[x > alen | x < 1 | y > tlen | y < 1] <- -1
122129
+ return(out)
123130
+ })
124-
+ indices <- indices[keep(indices)]
131+
+ indices <- indices[keep(indices, exclude)]
125132
+ indices <- indices[indices > 0]
126133
+ indices <- indices[valid[indices]]
127134
+
128135
+ # Computing the average across this quadrant.
129136
+ relevant.rows <- inter.space[indices]
130137
+ is.zero <- relevant.rows==0L
131138
+ collected[quad] <- sum(rel.ab[relevant.rows[!is.zero]])/length(relevant.rows)
139+
+ collected.n[quad] <- length(relevant.rows)
132140
+ }
133-
+ # print(sprintf("%i %i %.3f", ax, tx, collected[6]))
141+
+
142+
+ # if (exclude) { # Troubleshooting.
143+
+ # print(c(aid[pair], tid[pair]))
144+
+ # print(collected)
145+
+ # print(collected.n)
146+
+ # }
134147
+
135148
+ output[pair] <- log2((rel.ab[pair]+prior)/(max(collected, na.rm=TRUE)+prior))
136149
+ }
@@ -183,6 +196,15 @@ Type 'q()' to quit R.
183196
> comp(200, c(chrA=20, chrB=5), 1)
184197
[1] -0.11150832 -0.49642583 0.08572987 -0.02106162 -0.36564947 0.99297957
185198
>
199+
> comp(200, c(chrA=10, chrB=30, chrC=20), 3, exclude=1)
200+
[1] -0.03394733 1.53115606 1.09019781 1.51255401 -0.32192809 0.47846288
201+
> comp(200, c(chrA=10, chrC=20), 3, exclude=1)
202+
[1] 0.9177729 1.4823928 0.6076826 0.6100535 0.4639471 0.9855004
203+
> comp(200, c(chrA=10, chrB=5, chrC=20), 3, exclude=1)
204+
[1] 1.1803590 -0.3870231 1.3440294 1.4636562 1.0533811 0.8349408
205+
> comp(200, c(chrA=20, chrB=5), 3, exclude=1)
206+
[1] 0.49749966 0.64114534 -0.04439412 -0.07275634 0.18737014 0.46174548
207+
>
186208
> ###################################################################################################
187209
> # Same sort of simulation, but direct from read data, for neighbourCounts testing.
188210
>
@@ -193,16 +215,17 @@ Type 'q()' to quit R.
193215
> dir1<-"temp-neighbor/1.h5"
194216
> dir2<-"temp-neighbor/2.h5"
195217
>
196-
> comp2 <- function(npairs1, npairs2, width, cuts, filter=1, flank=5, prior.count=2) {
218+
> comp2 <- function(npairs1, npairs2, width, cuts, filter=1, flank=5, exclude=0, prior.count=2) {
197219
+ simgen(dir1, npairs1, chromos)
198220
+ simgen(dir2, npairs2, chromos)
199221
+ param <- pairParam(fragments=cuts)
200222
+
201-
+ out <- neighbourCounts(c(dir1, dir2), param, width=width, filter=filter, flank=flank, prior.count=prior.count)
223+
+ out <- neighbourCounts(c(dir1, dir2), param, width=width, filter=filter, flank=flank, prior.count=prior.count,
224+
+ exclude=exclude)
202225
+
203226
+ ref <- squareCounts(c(dir1, dir2), width=width, param, filter=1)
204227
+ keep <- rowSums(counts(ref)) >= filter
205-
+ enrichment <- enrichedPairs(ref, flank=flank, prior.count=prior.count)
228+
+ enrichment <- enrichedPairs(ref, flank=flank, prior.count=prior.count, exclude=exclude)
206229
+
207230
+ if (!identical(ref[keep,], out$interaction)) { stop("extracted counts don't match up") }
208231
+ if (any(abs(enrichment[keep] - out$enrichment) > 1e-6)) { stop("enrichment values don't match up") }
@@ -247,6 +270,15 @@ numeric(0)
247270
> comp2(10, 20, 1000, cuts=simcuts(chromos), prior.count=1)
248271
[1] 0.5556565 0.5494439 0.5123425 0.5123425 0.5659609 0.5620062
249272
>
273+
> comp2(10, 20, 1000, cuts=simcuts(chromos), exclude=1)
274+
[1] 0.2333634 0.2617857 0.3156551 0.3203674 0.3071017 0.2802203
275+
> comp2(50, 20, 1000, cuts=simcuts(chromos), exclude=1)
276+
[1] 0.2393575 0.2314517 0.2759848 0.2487711 0.2332494 0.2502265
277+
> comp2(100, 50, 1000, cuts=simcuts(chromos), exclude=2)
278+
[1] 0.2782308 0.3050583 0.5465546 0.2617857 0.2036715 0.1560364
279+
> comp2(50, 200, 1000, cuts=simcuts(chromos), exclude=2)
280+
[1] 0.09905107 0.26167813 0.26713529 0.26081098 0.15124940 0.24708803
281+
>
250282
> #####################################################################################################
251283
> # Cleaning up
252284
>
@@ -259,4 +291,4 @@ numeric(0)
259291
>
260292
> proc.time()
261293
user system elapsed
262-
11.464 0.298 11.755
294+
12.418 0.362 12.783

0 commit comments

Comments
 (0)