SmythLab
diff --git a/‎DESCRIPTION
Lines changed: 2 additions & 2 deletions b/‎DESCRIPTION
Lines changed: 2 additions & 2 deletions
diff --git a/‎R/filterPeaks.R
Lines changed: 6 additions & 2 deletions b/‎R/filterPeaks.R
Lines changed: 6 additions & 2 deletions
diff --git a/‎R/neighbourCounts.R
Lines changed: 7 additions & 2 deletions b/‎R/neighbourCounts.R
Lines changed: 7 additions & 2 deletions
diff --git a/‎inst/NEWS.Rd
Lines changed: 5 additions & 1 deletion b/‎inst/NEWS.Rd
Lines changed: 5 additions & 1 deletion
diff --git a/‎inst/tests/test-neighbor.R
Lines changed: 35 additions & 11 deletions b/‎inst/tests/test-neighbor.R
Lines changed: 35 additions & 11 deletions
diff --git a/‎inst/tests/test-neighbor.Rout.save
Lines changed: 44 additions & 12 deletions b/‎inst/tests/test-neighbor.Rout.save
Lines changed: 44 additions & 12 deletions
@@ -1,6 +1,6 @@
 Package: diffHic
-Version: 1.1.9
-Date: 2015/06/27
+Version: 1.1.10
+Date: 2015/07/13
 Title: Differential analyis of Hi-C data
 Author: Aaron Lun <alun@wehi.edu.au>
 Maintainer: Aaron Lun <alun@wehi.edu.au>
 
@@ -1,4 +1,4 @@
-enrichedPairs <- function(data, flank=5, prior.count=2, abundances=NULL)
+enrichedPairs <- function(data, flank=5, exclude=0, prior.count=2, abundances=NULL)
 # This function identifies the highest-abundance neighbour in the interaction space
 # for each bin pair in `data`. The aim is to compare the abundance of each element
 # with the abundance of its neighbour. 
@@ -8,7 +8,11 @@ enrichedPairs <- function(data, flank=5, prior.count=2, abundances=NULL)
 # last modified 29 April 2015
 {
 	flank <- as.integer(flank)
+	exclude <- as.integer(exclude)
 	if (flank <= 0L) { stop("flank width must be a positive integer") }
+	if (exclude < 0L) { stop("exclude width must be a positive integer") }
+	if (flank <= exclude) { stop("exclude width must be less than the flank width") }
+	
 	rdata <- .splitByChr(regions(data))
 	last.id <- rdata$last
 	first.id <- rdata$first
@@ -51,7 +55,7 @@ enrichedPairs <- function(data, flank=5, prior.count=2, abundances=NULL)
 			o <- order(all.a, all.t)
 			collected <- .Call(cxx_quadrant_bg, all.a[o], all.t[o], 
 				converted$int[o], converted$dec[o], MULT, 
-				flank, a.len, t.len, anchor==target)
+				flank, exclude, a.len, t.len, anchor==target)
 			if (is.character(collected)) { stop(collected) }
 			collected[o] <- collected
 
 
@@ -1,4 +1,4 @@
-neighbourCounts <- function(files, param, width=50000, filter=1L, flank=NULL, prior.count=NULL)
+neighbourCounts <- function(files, param, width=50000, filter=1L, flank=NULL, exclude=NULL, prior.count=NULL)
 # This does the same thing as squareCounts, except that it simultaneously computes the 
 # filter statistic for each extracted bin pair. This has lower memory requirements as
 # it doesn't need to hold the entire `filter=1` output in memory at once.
@@ -42,7 +42,12 @@ neighbourCounts <- function(files, param, width=50000, filter=1L, flank=NULL, pr
 
 	# Other stuff related to calculation of the neighbourhood regions.	
 	if (is.null(flank)) { flank <- formals(enrichedPairs)$flank }
+	if (is.null(exclude)) { exclude <- formals(enrichedPairs)$exclude }
 	flank <- as.integer(flank)
+	exclude <- as.integer(exclude)
+	if (flank <= 0L) { stop("flank width must be a positive integer") }
+	if (exclude < 0L) { stop("exclude width must be a positive integer") }
+	if (flank <= exclude) { stop("exclude width must be less than the flank width") }
 	if (is.null(prior.count)) { prior.count <- formals(enrichedPairs)$prior.count } 
 	prior.count <- as.double(prior.count)    
 
@@ -55,7 +60,7 @@ neighbourCounts <- function(files, param, width=50000, filter=1L, flank=NULL, pr
 				chr.limits=frag.by.chr, discard=discard, cap=cap)
 
 			# Aggregating counts in C++ to obtain count combinations for each bin pair.
-			out <- .Call(cxx_count_background, pairs, new.pts$id, flank, filter, 
+			out <- .Call(cxx_count_background, pairs, new.pts$id, flank, exclude, filter, 
 				bin.by.chr$first[[target]], bin.by.chr$last[[target]], bin.by.chr$first[[anchor]], bin.by.chr$last[[anchor]],
 				maxit, tol, offsets, disp, prior.count)
 			if (is.character(out)) { stop(out) }
 
@@ -2,7 +2,7 @@
 \title{diffHic News}
 \encoding{UTF-8}
 
-\section{Version 1.1.9}{\itemize{
+\section{Version 1.1.10}{\itemize{
 \item
 Added library size specification to DIList methods normalize(), asDGEList().
 
@@ -21,6 +21,9 @@ Added iter_map.py to inst/python, for iterative mapping of DNase Hi-C data.
 \item
 Added the neighbourCounts() function, for simultaneous read counting and enrichment calculation.
 
+\item
+Added exclude for enrichedPairs(), to provide an exclusion zone in the local neighbourhood.
+
 \item
 Switched default colour in rotPlaid(), plotPlaid() to black.
 
@@ -45,6 +48,7 @@ Modified consolidatePairs() to accept index vectors for greater modularity.
 \item 
 Added reference argument for large bin pairs, in filterDirect() and filterTrended().
 
+
 \item 
 Updated documentation, tests and user's guide.
 }}
 
@@ -6,19 +6,25 @@ suppressPackageStartupMessages(require(edgeR))
 
 # Defining some odds and ends.
 
-lower.left <- function(x) { 
+lower.left <- function(x, exclude=0) { 
 	out <- matrix(TRUE, nrow=nrow(x), ncol=ncol(x))
-	out[nrow(x),1] <- FALSE
+	out[nrow(x)+(-exclude):0,1:(1+exclude)] <- FALSE
 	out
 }
-all.but.middle <- function(x) {
+
+all.but.middle <- function(x, exclude=0) {
 	out <- matrix(TRUE, nrow=nrow(x), ncol=ncol(x))
-	out[ceiling(length(out)/2)] <- FALSE
+	midrow <- ceiling(nrow(x)/2) + (-exclude):exclude
+	midrow <- midrow[midrow > 0 & midrow <= nrow(x)]
+	midcol <- ceiling(ncol(x)/2) + (-exclude):exclude
+	midcol <- midcol[midcol > 0 & midcol <= ncol(x)]
+	out[midrow, midcol] <- FALSE
 	out
 }
 
-comp <- function(npairs, chromos, flanking, prior=2) {
+comp <- function(npairs, chromos, flanking, exclude=0, prior=2) {
 	flanking <- as.integer(flanking)
+	exclude <- as.integer(exclude)
 
 	nlibs <- 4L
 	lambda <- 5
@@ -37,7 +43,7 @@ comp <- function(npairs, chromos, flanking, prior=2) {
 	data@regions$nfrags <- rep(1:3, length.out=nbins)
 
 	# Computing the reference enrichment value.
-	bg <- enrichedPairs(data, flank=flanking, prior.count=prior)
+	bg <- enrichedPairs(data, flank=flanking, prior.count=prior, exclude=exclude)
 	final.ref <- numeric(length(bg))
 
 	# Sorting them by chromosome pairs.
@@ -74,6 +80,7 @@ comp <- function(npairs, chromos, flanking, prior=2) {
 		for (pair in 1:nrow(current)) {
 			total.num <- 4L
 			collected <- numeric(total.num)
+			collected.n <- numeric(total.num)
 			ax <- a.dex[pair]
 			tx <- t.dex[pair]
 
@@ -102,16 +109,22 @@ comp <- function(npairs, chromos, flanking, prior=2) {
 					out[x > alen | x < 1 | y > tlen | y < 1] <- -1
 					return(out)
 				})
-				indices <- indices[keep(indices)]
+				indices <- indices[keep(indices, exclude)]
 				indices <- indices[indices > 0]
 				indices <- indices[valid[indices]]
 
 				# Computing the average across this quadrant.
 				relevant.rows <- inter.space[indices]
 				is.zero <- relevant.rows==0L			
 				collected[quad] <- sum(rel.ab[relevant.rows[!is.zero]])/length(relevant.rows)
+				collected.n[quad] <- length(relevant.rows)
 			}
-#			print(sprintf("%i %i %.3f", ax, tx, collected[6]))
+
+#			if (exclude) { # Troubleshooting.
+#				print(c(aid[pair], tid[pair]))
+#				print(collected)
+#				print(collected.n)
+#			}
 
 			output[pair] <- log2((rel.ab[pair]+prior)/(max(collected, na.rm=TRUE)+prior))
 		}
@@ -147,6 +160,11 @@ comp(200, c(chrA=10, chrC=20), 1)
 comp(200, c(chrA=10, chrB=5, chrC=20), 1)
 comp(200, c(chrA=20, chrB=5), 1)
 
+comp(200, c(chrA=10, chrB=30, chrC=20), 3, exclude=1)
+comp(200, c(chrA=10, chrC=20), 3, exclude=1)
+comp(200, c(chrA=10, chrB=5, chrC=20), 3, exclude=1)
+comp(200, c(chrA=20, chrB=5), 3, exclude=1)
+
 ###################################################################################################
 # Same sort of simulation, but direct from read data, for neighbourCounts testing.
 
@@ -157,16 +175,17 @@ dir.create("temp-neighbor")
 dir1<-"temp-neighbor/1.h5"
 dir2<-"temp-neighbor/2.h5"
 
-comp2 <- function(npairs1, npairs2, width, cuts, filter=1, flank=5, prior.count=2) {
+comp2 <- function(npairs1, npairs2, width, cuts, filter=1, flank=5, exclude=0, prior.count=2) {
 	simgen(dir1, npairs1, chromos)
 	simgen(dir2, npairs2, chromos)
 	param <- pairParam(fragments=cuts)
 
-	out <- neighbourCounts(c(dir1, dir2), param, width=width, filter=filter, flank=flank, prior.count=prior.count)
+	out <- neighbourCounts(c(dir1, dir2), param, width=width, filter=filter, flank=flank, prior.count=prior.count, 
+		exclude=exclude)
 
 	ref <- squareCounts(c(dir1, dir2), width=width, param, filter=1)
 	keep <- rowSums(counts(ref)) >= filter
-	enrichment <- enrichedPairs(ref, flank=flank, prior.count=prior.count)
+	enrichment <- enrichedPairs(ref, flank=flank, prior.count=prior.count, exclude=exclude)
 
 	if (!identical(ref[keep,], out$interaction)) { stop("extracted counts don't match up") }
 	if (any(abs(enrichment[keep] - out$enrichment) > 1e-6)) { stop("enrichment values don't match up") }
@@ -194,6 +213,11 @@ comp2(10, 20, 1000, cuts=simcuts(chromos), filter=5)
 comp2(10, 20, 1000, cuts=simcuts(chromos), flank=3)
 comp2(10, 20, 1000, cuts=simcuts(chromos), prior.count=1)
 
+comp2(10, 20, 1000, cuts=simcuts(chromos), exclude=1)
+comp2(50, 20, 1000, cuts=simcuts(chromos), exclude=1)
+comp2(100, 50, 1000, cuts=simcuts(chromos), exclude=2)
+comp2(50, 200, 1000, cuts=simcuts(chromos), exclude=2)
+
 #####################################################################################################
 # Cleaning up
 
 
@@ -25,19 +25,25 @@ Type 'q()' to quit R.
 > 
 > # Defining some odds and ends.
 > 
-> lower.left <- function(x) { 
+> lower.left <- function(x, exclude=0) { 
 + 	out <- matrix(TRUE, nrow=nrow(x), ncol=ncol(x))
-+ 	out[nrow(x),1] <- FALSE
++ 	out[nrow(x)+(-exclude):0,1:(1+exclude)] <- FALSE
 + 	out
 + }
-> all.but.middle <- function(x) {
+> 
+> all.but.middle <- function(x, exclude=0) {
 + 	out <- matrix(TRUE, nrow=nrow(x), ncol=ncol(x))
-+ 	out[ceiling(length(out)/2)] <- FALSE
++ 	midrow <- ceiling(nrow(x)/2) + (-exclude):exclude
++ 	midrow <- midrow[midrow > 0 & midrow <= nrow(x)]
++ 	midcol <- ceiling(ncol(x)/2) + (-exclude):exclude
++ 	midcol <- midcol[midcol > 0 & midcol <= ncol(x)]
++ 	out[midrow, midcol] <- FALSE
 + 	out
 + }
 > 
-> comp <- function(npairs, chromos, flanking, prior=2) {
+> comp <- function(npairs, chromos, flanking, exclude=0, prior=2) {
 + 	flanking <- as.integer(flanking)
++ 	exclude <- as.integer(exclude)
 + 
 + 	nlibs <- 4L
 + 	lambda <- 5
@@ -56,7 +62,7 @@ Type 'q()' to quit R.
 + 	data@regions$nfrags <- rep(1:3, length.out=nbins)
 + 	
 + 	# Computing the reference enrichment value.
-+ 	bg <- enrichedPairs(data, flank=flanking, prior.count=prior)
++ 	bg <- enrichedPairs(data, flank=flanking, prior.count=prior, exclude=exclude)
 + 	final.ref <- numeric(length(bg))
 + 
 + 	# Sorting them by chromosome pairs.
@@ -93,6 +99,7 @@ Type 'q()' to quit R.
 + 		for (pair in 1:nrow(current)) {
 + 			total.num <- 4L
 + 			collected <- numeric(total.num)
++ 			collected.n <- numeric(total.num)
 + 			ax <- a.dex[pair]
 + 			tx <- t.dex[pair]
 + 
@@ -121,16 +128,22 @@ Type 'q()' to quit R.
 + 					out[x > alen | x < 1 | y > tlen | y < 1] <- -1
 + 					return(out)
 + 				})
-+ 				indices <- indices[keep(indices)]
++ 				indices <- indices[keep(indices, exclude)]
 + 				indices <- indices[indices > 0]
 + 				indices <- indices[valid[indices]]
 + 
 + 				# Computing the average across this quadrant.
 + 				relevant.rows <- inter.space[indices]
 + 				is.zero <- relevant.rows==0L			
 + 				collected[quad] <- sum(rel.ab[relevant.rows[!is.zero]])/length(relevant.rows)
++ 				collected.n[quad] <- length(relevant.rows)
 + 			}
-+ #			print(sprintf("%i %i %.3f", ax, tx, collected[6]))
++ 
++ #			if (exclude) { # Troubleshooting.
++ #				print(c(aid[pair], tid[pair]))
++ #				print(collected)
++ #				print(collected.n)
++ #			}
 + 		
 + 			output[pair] <- log2((rel.ab[pair]+prior)/(max(collected, na.rm=TRUE)+prior))
 + 		}
@@ -183,6 +196,15 @@ Type 'q()' to quit R.
 > comp(200, c(chrA=20, chrB=5), 1)
 [1] -0.11150832 -0.49642583  0.08572987 -0.02106162 -0.36564947  0.99297957
 > 
+> comp(200, c(chrA=10, chrB=30, chrC=20), 3, exclude=1)
+[1] -0.03394733  1.53115606  1.09019781  1.51255401 -0.32192809  0.47846288
+> comp(200, c(chrA=10, chrC=20), 3, exclude=1)
+[1] 0.9177729 1.4823928 0.6076826 0.6100535 0.4639471 0.9855004
+> comp(200, c(chrA=10, chrB=5, chrC=20), 3, exclude=1)
+[1]  1.1803590 -0.3870231  1.3440294  1.4636562  1.0533811  0.8349408
+> comp(200, c(chrA=20, chrB=5), 3, exclude=1)
+[1]  0.49749966  0.64114534 -0.04439412 -0.07275634  0.18737014  0.46174548
+> 
 > ###################################################################################################
 > # Same sort of simulation, but direct from read data, for neighbourCounts testing.
 > 
@@ -193,16 +215,17 @@ Type 'q()' to quit R.
 > dir1<-"temp-neighbor/1.h5"
 > dir2<-"temp-neighbor/2.h5"
 > 
-> comp2 <- function(npairs1, npairs2, width, cuts, filter=1, flank=5, prior.count=2) {
+> comp2 <- function(npairs1, npairs2, width, cuts, filter=1, flank=5, exclude=0, prior.count=2) {
 + 	simgen(dir1, npairs1, chromos)
 + 	simgen(dir2, npairs2, chromos)
 + 	param <- pairParam(fragments=cuts)
 + 
-+ 	out <- neighbourCounts(c(dir1, dir2), param, width=width, filter=filter, flank=flank, prior.count=prior.count)
++ 	out <- neighbourCounts(c(dir1, dir2), param, width=width, filter=filter, flank=flank, prior.count=prior.count, 
++ 		exclude=exclude)
 + 
 + 	ref <- squareCounts(c(dir1, dir2), width=width, param, filter=1)
 + 	keep <- rowSums(counts(ref)) >= filter
-+ 	enrichment <- enrichedPairs(ref, flank=flank, prior.count=prior.count)
++ 	enrichment <- enrichedPairs(ref, flank=flank, prior.count=prior.count, exclude=exclude)
 + 
 + 	if (!identical(ref[keep,], out$interaction)) { stop("extracted counts don't match up") }
 + 	if (any(abs(enrichment[keep] - out$enrichment) > 1e-6)) { stop("enrichment values don't match up") }
@@ -247,6 +270,15 @@ numeric(0)
 > comp2(10, 20, 1000, cuts=simcuts(chromos), prior.count=1)
 [1] 0.5556565 0.5494439 0.5123425 0.5123425 0.5659609 0.5620062
 > 
+> comp2(10, 20, 1000, cuts=simcuts(chromos), exclude=1)
+[1] 0.2333634 0.2617857 0.3156551 0.3203674 0.3071017 0.2802203
+> comp2(50, 20, 1000, cuts=simcuts(chromos), exclude=1)
+[1] 0.2393575 0.2314517 0.2759848 0.2487711 0.2332494 0.2502265
+> comp2(100, 50, 1000, cuts=simcuts(chromos), exclude=2)
+[1] 0.2782308 0.3050583 0.5465546 0.2617857 0.2036715 0.1560364
+> comp2(50, 200, 1000, cuts=simcuts(chromos), exclude=2)
+[1] 0.09905107 0.26167813 0.26713529 0.26081098 0.15124940 0.24708803
+> 
 > #####################################################################################################
 > # Cleaning up
 > 
@@ -259,4 +291,4 @@ numeric(0)
 > 
 > proc.time()
    user  system elapsed 
- 11.464   0.298  11.755 
+ 12.418   0.362  12.783