Setup

library(Seurat)
library(readr)
library("Matrix")

Load data and convert to Seurat Object

#setwd("/sc/arion/projects/ad-omics/emily/Olah")
alldat <-read_csv("Olah_SupplementaryData14.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Registered S3 method overwritten by 'cli':
##   method     from    
##   print.boxx spatstat
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double(),
##   X1 = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
suppdata <- alldat
colnames(suppdata)[1] <-  NA
head(suppdata)
## # A tibble: 6 x 16,246
##   NA    Cell_1 Cell_2 Cell_3 Cell_4 Cell_5 Cell_6 Cell_7 Cell_8 Cell_9 Cell_10
##   <chr>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>   <dbl>
## 1 RP11…      0      0      0      0      0      0      0      0      0       0
## 2 FAM1…      0      0      0      0      0      0      0      0      0       0
## 3 OR4F5      0      0      0      0      0      0      0      0      0       0
## 4 RP11…      0      0      0      0      0      0      0      0      0       0
## 5 RP11…      0      0      0      0      0      0      0      0      0       0
## 6 RP11…      0      0      0      0      0      0      0      0      0       0
## # … with 16,235 more variables: Cell_11 <dbl>, Cell_12 <dbl>, Cell_13 <dbl>,
## #   Cell_14 <dbl>, Cell_15 <dbl>, Cell_16 <dbl>, Cell_17 <dbl>, Cell_18 <dbl>,
## #   Cell_19 <dbl>, Cell_20 <dbl>, Cell_21 <dbl>, Cell_22 <dbl>, Cell_23 <dbl>,
## #   Cell_24 <dbl>, Cell_25 <dbl>, Cell_26 <dbl>, Cell_27 <dbl>, Cell_28 <dbl>,
## #   Cell_29 <dbl>, Cell_30 <dbl>, Cell_31 <dbl>, Cell_32 <dbl>, Cell_33 <dbl>,
## #   Cell_34 <dbl>, Cell_35 <dbl>, Cell_36 <dbl>, Cell_37 <dbl>, Cell_38 <dbl>,
## #   Cell_39 <dbl>, Cell_40 <dbl>, Cell_41 <dbl>, Cell_42 <dbl>, Cell_43 <dbl>,
## #   Cell_44 <dbl>, Cell_45 <dbl>, Cell_46 <dbl>, Cell_47 <dbl>, Cell_48 <dbl>,
## #   Cell_49 <dbl>, Cell_50 <dbl>, Cell_51 <dbl>, Cell_52 <dbl>, Cell_53 <dbl>,
## #   Cell_54 <dbl>, Cell_55 <dbl>, Cell_56 <dbl>, Cell_57 <dbl>, Cell_58 <dbl>,
## #   Cell_59 <dbl>, Cell_60 <dbl>, Cell_61 <dbl>, Cell_62 <dbl>, Cell_63 <dbl>,
## #   Cell_64 <dbl>, Cell_65 <dbl>, Cell_66 <dbl>, Cell_67 <dbl>, Cell_68 <dbl>,
## #   Cell_69 <dbl>, Cell_70 <dbl>, Cell_71 <dbl>, Cell_72 <dbl>, Cell_73 <dbl>,
## #   Cell_74 <dbl>, Cell_75 <dbl>, Cell_76 <dbl>, Cell_77 <dbl>, Cell_78 <dbl>,
## #   Cell_79 <dbl>, Cell_80 <dbl>, Cell_81 <dbl>, Cell_82 <dbl>, Cell_83 <dbl>,
## #   Cell_84 <dbl>, Cell_85 <dbl>, Cell_86 <dbl>, Cell_87 <dbl>, Cell_88 <dbl>,
## #   Cell_89 <dbl>, Cell_90 <dbl>, Cell_91 <dbl>, Cell_92 <dbl>, Cell_93 <dbl>,
## #   Cell_94 <dbl>, Cell_95 <dbl>, Cell_96 <dbl>, Cell_97 <dbl>, Cell_98 <dbl>,
## #   Cell_99 <dbl>, Cell_100 <dbl>, Cell_101 <dbl>, Cell_102 <dbl>,
## #   Cell_103 <dbl>, Cell_104 <dbl>, Cell_105 <dbl>, Cell_106 <dbl>,
## #   Cell_107 <dbl>, Cell_108 <dbl>, Cell_109 <dbl>, Cell_110 <dbl>, …
## next we need to convert to a df because tibble doesn't support rownames
suppdata.df<- as.data.frame(suppdata)
rownames(suppdata.df) <- suppdata.df[, 1]
suppdata.df <- suppdata.df[, -1]
suppdata.df[1:10,1:10]
##               Cell_1 Cell_2 Cell_3 Cell_4 Cell_5 Cell_6 Cell_7 Cell_8 Cell_9
## RP11-34P13.3       0      0      0      0      0      0      0      0      0
## FAM138A            0      0      0      0      0      0      0      0      0
## OR4F5              0      0      0      0      0      0      0      0      0
## RP11-34P13.7       0      0      0      0      0      0      0      0      0
## RP11-34P13.8       0      0      0      0      0      0      0      0      0
## RP11-34P13.14      0      0      0      0      0      0      0      0      0
## RP11-34P13.9       0      0      0      0      0      0      0      0      0
## FO538757.3         0      0      0      0      0      0      0      0      0
## FO538757.2         0      1      0      0      0      0      0      1      0
## AP006222.2         0      0      1      0      1      1      0      0      0
##               Cell_10
## RP11-34P13.3        0
## FAM138A             0
## OR4F5               0
## RP11-34P13.7        0
## RP11-34P13.8        0
## RP11-34P13.14       0
## RP11-34P13.9        0
## FO538757.3          0
## FO538757.2          0
## AP006222.2          0
Olah <- CreateSeuratObject(suppdata.df, assay = "RNA", )
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
Olah$new.ident <- "Olah_prefilter"
Idents(object = Olah) <- Olah$new.ident
saveRDS(Olah, "Olah_Seurat.RDS") ##this is the seurat object 
Olah
## An object of class Seurat 
## 33660 features across 16245 samples within 1 assay 
## Active assay: RNA (33660 features, 0 variable features)
Olah@meta.data[1:10,]
##         orig.ident nCount_RNA nFeature_RNA      new.ident
## Cell_1        Cell       1827         1037 Olah_prefilter
## Cell_2        Cell       1166          773 Olah_prefilter
## Cell_3        Cell       2581         1644 Olah_prefilter
## Cell_4        Cell       2528         1179 Olah_prefilter
## Cell_5        Cell       2408         1018 Olah_prefilter
## Cell_6        Cell       2105         1247 Olah_prefilter
## Cell_7        Cell       1458          971 Olah_prefilter
## Cell_8        Cell       4046         2159 Olah_prefilter
## Cell_9        Cell       1108          654 Olah_prefilter
## Cell_10       Cell       2191         1146 Olah_prefilter