The Figure 6 is the showcase for geomeTriD package to
validate the prediction of 3D structures.
library(geomeTriD)
library(geomeTriD.documentation)
library(GenomicRanges)
library(GenomeInfoDb)
library(trackViewer)
library(InteractionSet)
# original interaction map vs. spatialDistanceHeatmap
gr <- GRanges('chrX:1-20000000')
## load FLAMINGO results
FLAMINGO_chrX_5kb <- importFLAMINGO('https://github.com/wangjr03/FLAMINGO/raw/refs/heads/main/predictions/GM12878/chrX_5kb.txt')[[1]]
sdm_FLAMINGO <- spatialDistanceMatrix(subsetByOverlaps(FLAMINGO_chrX_5kb, gr), fill_NA = TRUE)
## load SuperRec results
extdata <- system.file('extdata', 'GSE63525', package = 'geomeTriD.documentation')
superRec_chrX_5kb <- importSuperRec(file.path(extdata, 'combined_30.chrX.SuperRec.txt.gz'),
## this file is not the real input file SuperRec
## the file is reduced size to construct the xyz object for this doc.
file.path(extdata, 'combined_30.chrX.SuperRec.input.subset.gz'),
binsize = 5000, chr='chrX')[[1]]
sdm_superRec<- spatialDistanceMatrix(subsetByOverlaps(superRec_chrX_5kb, gr), fill_NA = TRUE)
## load hic signal matrix
hic_sig <- importGInteractions('https://ftp.ncbi.nlm.nih.gov/geo/series/GSE63nnn/GSE63525/suppl/GSE63525%5FGM12878%5Fdilution%5Fcombined%5F30.hic',
format = 'hic', ranges = GRanges('X:1-20000000'),
resolution = 250000, out='GInteractions')
seqlevelsStyle(regions(hic_sig)) <- 'UCSC'
anchors <- anchorIds(hic_sig)
rg <- range(regions(hic_sig))
w <- unique(width(regions(hic_sig)))
gr <- slidingWindows(rg, width = w, step = w)[[1]]
df <- data.frame(i=anchors[[1]], j=anchors[[2]], score=log10(mcols(hic_sig)$score))
m <- xtabs(score ~ i + j, data = df)
m <- m+t(m)
## function to increase the contrasts
sigmoid_transform <- function(mat, center_value, steepness){
1 / (1 + exp(-steepness * (m - center_value)))
}
mat_sigmoid <- sigmoid_transform(m, center_value = mean(m, na.rm=TRUE), steepness = 5)
par("mfcol"=c(1, 2))
image(mat_sigmoid, useRaster=TRUE, col = rev(hcl.colors(n=12, "OrRd")))
image(sdm_FLAMINGO, useRaster = TRUE, col = hcl.colors(n=12, "OrRd"))
## check Distance-Contact correlation
safe_scale <- function(x) {
rng <- range(x, na.rm = TRUE)
if (diff(rng) == 0) return(x)
(x - rng[1]) / diff(rng)
}
sdm_80 <- function(x){
sdm_80 <- rowsum(x, rep(seq.int(80), each=50)[seq.int(nrow(x))])
sdm_80 <- rowsum(t(sdm_80), rep(seq.int(80), each=50)[seq.int(ncol(sdm_80))])
sdm_80 <- t(sdm_80)
sdm_scaled_safe <- safe_scale(sdm_80)
}
mat_scaled_safe <- safe_scale(m)
sdm_80_FLAMINGO <- sdm_80(sdm_FLAMINGO)
sdm_80_superRec <- sdm_80(sdm_superRec)
(cor_FLAMINGO <- cor.test(as.numeric(mat_scaled_safe),
as.numeric(1/sdm_80_FLAMINGO), method = 'spearman'))## Warning in cor.test.default(as.numeric(mat_scaled_safe),
## as.numeric(1/sdm_80_FLAMINGO), : Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: as.numeric(mat_scaled_safe) and as.numeric(1/sdm_80_FLAMINGO)
## S = 9.739e+09, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.7770931
(cor_superRec <- cor.test(as.numeric(mat_scaled_safe),
as.numeric(1/sdm_80_superRec), method = 'spearman'))## Warning in cor.test.default(as.numeric(mat_scaled_safe),
## as.numeric(1/sdm_80_superRec), : Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: as.numeric(mat_scaled_safe) and as.numeric(1/sdm_80_superRec)
## S = 4.9934e+10, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.1428887
## check Reconstruction Error
re <- function(d_model, hic_sig){
sum(hic_sig*(d_model - 1/hic_sig)^2, na.rm = TRUE)
}
re(sdm_80_FLAMINGO, mat_scaled_safe)## [1] 42149.23
re(sdm_80_superRec, mat_scaled_safe)## [1] 39530.13
## R version 4.5.2 (2025-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Etc/UTC
## tzcode source: system (glibc)
##
## attached base packages:
## [1] grid stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] InteractionSet_1.38.0 SummarizedExperiment_1.40.0
## [3] Biobase_2.70.0 MatrixGenerics_1.22.0
## [5] matrixStats_1.5.0 trackViewer_1.47.0
## [7] GenomeInfoDb_1.46.0 GenomicRanges_1.62.0
## [9] Seqinfo_1.0.0 IRanges_2.44.0
## [11] S4Vectors_0.48.0 BiocGenerics_0.56.0
## [13] generics_0.1.4 geomeTriD.documentation_0.0.6
## [15] geomeTriD_1.5.0
##
## loaded via a namespace (and not attached):
## [1] strawr_0.0.92 RColorBrewer_1.1-3 rstudioapi_0.17.1
## [4] jsonlite_2.0.0 magrittr_2.0.4 GenomicFeatures_1.62.0
## [7] farver_2.1.2 rmarkdown_2.30 fs_1.6.6
## [10] BiocIO_1.20.0 ragg_1.5.0 vctrs_0.6.5
## [13] memoise_2.0.1 Rsamtools_2.26.0 RCurl_1.98-1.17
## [16] base64enc_0.1-3 htmltools_0.5.8.1 S4Arrays_1.10.0
## [19] progress_1.2.3 plotrix_3.8-4 curl_7.0.0
## [22] Rhdf5lib_1.32.0 rhdf5_2.54.0 SparseArray_1.10.1
## [25] Formula_1.2-5 sass_0.4.10 parallelly_1.45.1
## [28] bslib_0.9.0 htmlwidgets_1.6.4 desc_1.4.3
## [31] Gviz_1.54.0 httr2_1.2.1 cachem_1.1.0
## [34] GenomicAlignments_1.46.0 igraph_2.2.1 lifecycle_1.0.4
## [37] pkgconfig_2.0.3 Matrix_1.7-4 R6_2.6.1
## [40] fastmap_1.2.0 future_1.67.0 aricode_1.0.3
## [43] clue_0.3-66 digest_0.6.38 colorspace_2.1-2
## [46] AnnotationDbi_1.72.0 textshaping_1.0.4 Hmisc_5.2-4
## [49] RSQLite_2.4.4 filelock_1.0.3 progressr_0.18.0
## [52] httr_1.4.7 abind_1.4-8 compiler_4.5.2
## [55] bit64_4.6.0-1 backports_1.5.0 htmlTable_2.4.3
## [58] S7_0.2.0 BiocParallel_1.44.0 DBI_1.2.3
## [61] R.utils_2.13.0 biomaRt_2.66.0 MASS_7.3-65
## [64] rappdirs_0.3.3 DelayedArray_0.36.0 rjson_0.2.23
## [67] tools_4.5.2 foreign_0.8-90 future.apply_1.20.0
## [70] nnet_7.3-20 R.oo_1.27.1 glue_1.8.0
## [73] restfulr_0.0.16 dbscan_1.2.3 rhdf5filters_1.22.0
## [76] checkmate_2.3.3 cluster_2.1.8.1 gtable_0.3.6
## [79] BSgenome_1.78.0 R.methodsS3_1.8.2 ensembldb_2.34.0
## [82] data.table_1.17.8 hms_1.1.4 XVector_0.50.0
## [85] RANN_2.6.2 pillar_1.11.1 stringr_1.6.0
## [88] dplyr_1.1.4 BiocFileCache_3.0.0 lattice_0.22-7
## [91] deldir_2.0-4 rtracklayer_1.70.0 bit_4.6.0
## [94] biovizBase_1.58.0 tidyselect_1.2.1 Biostrings_2.78.0
## [97] knitr_1.50 gridExtra_2.3 ProtGenerics_1.42.0
## [100] xfun_0.54 stringi_1.8.7 UCSC.utils_1.6.0
## [103] lazyeval_0.2.2 yaml_2.3.10 evaluate_1.0.5
## [106] codetools_0.2-20 cigarillo_1.0.0 interp_1.1-6
## [109] tibble_3.3.0 cli_3.6.5 rpart_4.1.24
## [112] systemfonts_1.3.1 jquerylib_0.1.4 dichromat_2.0-0.1
## [115] Rcpp_1.1.0 globals_0.18.0 grImport_0.9-7
## [118] dbplyr_2.5.1 png_0.1-8 XML_3.99-0.20
## [121] parallel_4.5.2 pkgdown_2.2.0 rgl_1.3.24
## [124] ggplot2_4.0.0 blob_1.2.4 prettyunits_1.2.0
## [127] jpeg_0.1-11 latticeExtra_0.6-31 AnnotationFilter_1.34.0
## [130] bitops_1.0-9 txdbmaker_1.6.0 listenv_0.10.0
## [133] VariantAnnotation_1.56.0 scales_1.4.0 crayon_1.5.3
## [136] rlang_1.1.6 KEGGREST_1.50.0