library(Seurat)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(utils)
This is the output from the randomForest script which was run by Katia on the chimera
mic <- readRDS("MG22HIPP_RF_output1.rds")
k_cell <- read.delim2("cluster_by_cell_MG_22_HIPP.txt")
dim(mic)
## [1] 17936 1387
dim(k_cell)
## [1] 1387 2
table(k_cell$cluster)
##
## 0 1 2
## 653 592 142
This is the distribution of cells in each cluster as determined by the RF.
mic <- RunPCA(mic, features = VariableFeatures(object = mic))
## PC_ 1
## Positive: RHOB, HSPB1, TUBB4B, IER5, EGR1, DDIT4, JUN, GADD45B, DNAJB1, ATF3
## FOSB, IDI1, DNAJB4, PLK2, PMAIP1, AMD1, IER3, HSPD1, CSKMT, MAFB
## HSP90AA1, HSPH1, TRNAU1AP, TOB1, IER5L, DNAJA4, SLC20A1, SERTAD1, IFRD1, HSPA1B
## Negative: RPL37A, RPS27A, RPL21, RPS29, RPS12, RPS27, FTL, APOC1, GLDN, RPL41
## ADAP2, NSRP1, TMIGD3, ADPGK, METAP2, GOLGB1, MNDA, ZNF721, LGALS1, OLFML3
## PIGK, PLTP, SASH3, MRPL19, FOLR2, PPIE, LGALS3, NCF1, MCM3, AC012645.4
## PC_ 2
## Positive: S100A9, S100A8, IFITM2, FCGR3B, CXCL8, S100P, NAMPT, VNN2, MXD1, G0S2
## BCL2A1, S100A6, LITAF, SELL, IFITM1, BASP1, TAGLN2, FPR1, S100A12, CDKN2D
## MNDA, LSP1, AQP9, PROK2, TREM1, HCAR3, FTH1, CMTM2, RIPOR2, CXCR1
## Negative: SLC1A3, HSPB1, SPP1, KCTD12, ADAP2, RGS1, GRID2, LINC02256, ARHGAP25, AC025164.1
## AC245297.3, KCNQ1OT1, GGCX, SGK1, CDKN1A, SOX4, GOLGB1, PLK2, CD69, MAFB
## RPL37A, HIST1H2BG, RPS12, GCNT2, ATF7IP2, EPS8, TRNAU1AP, DNAJB4, GPR183, CACNA1A
## PC_ 3
## Positive: CXCL8, FTH1, FCGR3B, G0S2, HCAR3, NAMPT, NFKBIA, NEAT1, CXCR2, TAGLN2
## FTL, RIPOR2, TREM1, IL1B, IFITM1, SMIM25, PTGS2, IFITM3, AQP9, SRGN
## LITAF, IFITM2, CXCR1, PPIF, CSRNP1, FFAR2, GLUL, SAT1, CXCL1, FOSL2
## Negative: CRISP3, PGLYRP1, LCN2, LTF, CD24, MMP8, CAMP, FCN1, S100A12, CEACAM8
## ARG1, PADI4, CD177, MMP9, ANXA1, RETN, LYZ, CKAP4, AC245128.3, NKG7
## VIM, ORM1, CDA, PYGL, CYP4F3, VNN1, TFF3, PLBD1, RGCC, CLC
## PC_ 4
## Positive: SLC1A3, NEAT1, TRNAU1AP, PLK2, CSKMT, AL136038.3, AP000787.1, S100A9, AC016745.2, NCF1
## S100A8, SOD2, DENND2C, AC024257.3, GCNT2, FCGR3B, PHLDA1, AL031777.3, VNN2, CCDC84
## AC010618.3, DNAJB7, CD34, ADAP2, AC025164.1, HIST1H2AG, GOLGB1, SPP1, KCNB1, CXCL8
## Negative: KLRD1, CRIP1, DOK2, CCL5, NKG7, CST7, IL32, KLRC2, GZMB, CCL4
## RPS12, CEMIP2, DUSP2, RPS29, RPS27A, RPL41, GNLY, IL2RB, LINC00861, ANXA1
## RPL37A, RPL21, GZMA, XCL2, HSH2D, RPS27, CD247, IKZF3, ZNF331, S100A10
## PC_ 5
## Positive: CST7, CCL5, KLRD1, DOK2, CRIP1, NKG7, IL32, KLRC2, GNLY, GZMB
## HSH2D, CCL4, LINC00861, IL2RB, IKZF3, AL139246.5, GZMA, MAPK13, CD247, IFITM1
## AC026979.2, TNFAIP3, XCL2, COA1, PMAIP1, UBXN2A, CEMIP2, CCL3, PTPN7, KLRC1
## Negative: RGS1, GPR183, SELENOP, SRGN, CEBPD, FTH1, SGK1, RPL37A, RPS29, RPL21
## CXCR4, RPS12, CD83, NR4A2, F13A1, RPS27, RPS27A, RPL41, FTL, MARCO
## FOS, C5AR1, INSIG1, IER3, DDIT4-AS1, CCDC152, MRC1, GPR155, GLUL, RNASE1
ElbowPlot(mic)
mic <- RunUMAP(mic, dims = 1:7)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 13:29:22 UMAP embedding parameters a = 0.9922 b = 1.112
## 13:29:22 Read 1387 rows and found 7 numeric columns
## 13:29:22 Using Annoy for neighbor search, n_neighbors = 30
## 13:29:22 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 13:29:23 Writing NN index file to temp file /var/folders/b8/csskzk797yqdzbrxw6stvzhh0000gn/T//RtmpysMyoh/file1072c2fdb4c8f
## 13:29:23 Searching Annoy index using 1 thread, search_k = 3000
## 13:29:23 Annoy recall = 100%
## 13:29:23 Commencing smooth kNN distance calibration using 1 thread
## 13:29:23 Initializing from normalized Laplacian + noise
## 13:29:23 Commencing optimization for 500 epochs, with 53154 positive edges
## 13:29:25 Optimization finished
#mic@meta.data$cell = rownames(mic@meta.data)
#mic@meta.data <- mic@meta.data %>% left_join(k_cell, by = "cell")
#mic@meta.data$cluster <- as.character(mic@meta.data$cluster)
#rownames(mic@meta.data) <- mic@meta.data$cell
#add cluster ids from RF
#confirm that the cells are in the same order
head(k_cell[,1])
## [1] "AAACCCATCTTCTTCC-1" "AAACGAACACAGCTTA-1" "AAACGCTAGAGAGCGG-1"
## [4] "AAACGCTGTTTACGTG-1" "AAAGAACAGTGCTCGC-1" "AAAGTCCGTTTCCAAG-1"
head(mic@assays$RNA@counts[1,])
## AAACCCATCTTCTTCC-1 AAACGAACACAGCTTA-1 AAACGCTAGAGAGCGG-1 AAACGCTGTTTACGTG-1
## 0 0 0 0
## AAAGAACAGTGCTCGC-1 AAAGTCCGTTTCCAAG-1
## 0 0
tail(k_cell[,1])
## [1] "TTTGATCGTCCAAATC-1" "TTTGGAGGTATCATGC-1" "TTTGGAGGTCTTGCTC-1"
## [4] "TTTGGTTGTAAGATTG-1" "TTTGTTGCACCAAATC-1" "TTTGTTGGTAATTAGG-1"
tail(mic@assays$RNA@counts[1,])
## TTTGATCGTCCAAATC-1 TTTGGAGGTATCATGC-1 TTTGGAGGTCTTGCTC-1 TTTGGTTGTAAGATTG-1
## 0 0 0 0
## TTTGTTGCACCAAATC-1 TTTGTTGGTAATTAGG-1
## 0 0
#add cluster number to metadata
mic$cluster <- k_cell[,2]
mic <- SetIdent(mic, value = mic$cluster)
DimPlot(mic, reduction = "umap", group.by = "cluster")
# Differential Expression
mic.markers <- FindAllMarkers(mic, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 0
top10markers <- mic.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
top10markers_table <-data.frame("Cluster0" = top10markers[1:10,7],
"Cluster1" = top10markers[11:20,7],
"Cluster2" = top10markers[21:30,7])
top10markers_table
## gene gene.1 gene.2
## 1 HSPB1 S100A9 SOCS6
## 2 JUN S100A8 PLXDC2
## 3 DDIT4 FCGR3B C1QB
## 4 PLK2 S100P RPL37A
## 5 ATF3 CXCL8 GPR34
## 6 RGS16 VNN2 RPS27A
## 7 DNAJB4 IFITM2 CX3CR1
## 8 TUBB4B G0S2 RPS21
## 9 IDI1 NAMPT GLDN
## 10 TSPYL2 S100A12 APOC1
DoHeatmap(mic, features = top10markers$gene) + NoLegend()
new.cluster.ids <- c("'exAM'","Monocytes","homeostatic")
names(new.cluster.ids) <- levels(mic)
mic <- RenameIdents(mic, new.cluster.ids)
DimPlot(mic, reduction = "umap", label = TRUE, pt.size = 0.5)
## Warning: Using `as.character()` on a quosure is deprecated as of rlang 0.3.0.
## Please use `as_label()` or `as_name()` instead.
## This warning is displayed once per session.