vignettes/vignette.Rmd
vignette.Rmd
Abstract
Visualize conserved gene clusters in multiple genomes
BiocManager::install('jianhong/geneClusterPattern')
library(geneClusterPattern)
library(org.Dr.eg.db)
library(GenomeInfoDb)
## prepare all the ensembl ids
ids <- as.list(org.Dr.egENSEMBL)
ensembl_gene_ids <- sort(unique(unlist(ids)))
## extract gene information via biomaRt
fish_mart <- guessSpecies('zebrafish', output='mart', version=112)
fish <- grangesFromEnsemblIDs(mart = fish_mart,
ensembl_gene_ids = ensembl_gene_ids)
## subset the ensembl_gene_ids to save time
ensembl_gene_ids <- names(fish[seqnames(fish)=='24'])
## keep the standard sequence only
fish <- pruningSequences(fish)
## retrieve homologs
species <- guessSpecies(c('human', 'house mouse', 'Japanese medaka', 'turquoise killifish', 'gaculeatus'), version=112) # three-spined stickleback
species
## [1] "hsapiens" "mmusculus" "olatipes" "nfurzeri" "gaculeatus"
homologs <- getHomologGeneList(species, fish_mart, ensembl_gene_ids)
homologs <- pruningSequences(homologs)
## get gene cluster for target gene
queryGene <- 'pcolce2b'
nearest10neighbors <- getGeneCluster(fish, queryGene, homologs, k=10)
## plot cluster
genesList <- c(drerio=fish, homologs)[
c("hsapiens", "mmusculus", "drerio", "olatipes", "nfurzeri", "gaculeatus")]
## plot
pgp <- plotGeneClusterPatterns(genesList, nearest10neighbors)
gps <- geneOrderScore(genesList, ref='drerio', nearest10neighbors)