Abstract

Visualize conserved gene clusters in multiple genomes

Introduction

Installation

BiocManager::install('jianhong/geneClusterPattern')

Quick start

library(geneClusterPattern)
library(org.Dr.eg.db)
library(GenomeInfoDb)
## prepare all the ensembl ids
ids <- as.list(org.Dr.egENSEMBL)
ensembl_gene_ids <- sort(unique(unlist(ids)))
## extract gene information via biomaRt
fish_mart <- guessSpecies('zebrafish', output='mart', version=112)
fish <- grangesFromEnsemblIDs(mart = fish_mart,
                              ensembl_gene_ids = ensembl_gene_ids)
## subset the ensembl_gene_ids to save time
ensembl_gene_ids <- names(fish[seqnames(fish)=='24'])
## keep the standard sequence only
fish <- pruningSequences(fish)
## retrieve homologs
species <- guessSpecies(c('human', 'house mouse', 'Japanese medaka', 'turquoise killifish', 'gaculeatus'), version=112) # three-spined stickleback
species
## [1] "hsapiens"   "mmusculus"  "olatipes"   "nfurzeri"   "gaculeatus"
homologs <- getHomologGeneList(species, fish_mart, ensembl_gene_ids)
homologs <- pruningSequences(homologs)
## get gene cluster for target gene
queryGene <- 'pcolce2b'
nearest10neighbors <- getGeneCluster(fish, queryGene, homologs, k=10)

## plot cluster
genesList <- c(drerio=fish, homologs)[
  c("hsapiens", "mmusculus", "drerio", "olatipes", "nfurzeri", "gaculeatus")]

## plot
pgp <- plotGeneClusterPatterns(genesList, nearest10neighbors)

gps <- geneOrderScore(genesList, ref='drerio', nearest10neighbors)