############################################################################### #EdgeR Analysis on AD H3K27ac ChIP seq data #Sarah Marzi #edgeR_3.20.9 ############################################################################### # All HTSeq generated read count files must be located in the current working directory # The working directory also contains a subfolder "Pheno" with two files: # cases.txt: A list of all sample IDs of cases # AD_ChIP_Pheno.csv: A table of phenotypic covariates of all samples # Load DESeq and edgeR libraries library(edgeR) library(DESeq2) # Read in all samples (removed 404, 405 and 407 manually from directory), now 47 samples Files <- list.files(".") Table <- data.frame(sampleName = substring(Files, 1, 7), fileName = Files) # Annotate with case-control information cases <- read.table("Pheno/cases.txt", header=F) cases<-cases$V1 Table$condition <- Table$sampleName %in% cases Table$condition <- as.character(Table$condition) Table$condition[Table$condition ==TRUE] <- "AD" Table$condition[Table$condition == FALSE] <- "C" # Read in phenotypic information AD_pheno<-read.csv("Pheno/AD_ChIP_Pheno.csv") AD_pheno$Brain.Bank.ID <- as.character(AD_pheno$Brain.Bank.ID) # Merge with Table Table$sampleName <- as.character(Table$sampleName) Table <- merge(Table, AD_pheno, by.x="sampleName", by.y="Brain.Bank.ID", all.x=T, all.y=F) # Create DESeq dataset ddsAD <- DESeqDataSetFromHTSeqCount(sampleTable = Table, directory = directory, design= ~ condition) # Extract counts from DESeq object ADcounts<-counts(ddsAD) ADcounts<-as.data.frame(ADcounts) #all(names(ADcounts)==Table$sampleName) # Create count list condition<-as.factor(Table$condition) ADcountList<-DGEList(counts=ADcounts, group=condition) # Filter out peaks with low read counts (2 corresponds to 6-7 reads in min samples) keep <- rowSums(cpm(ADcountList)>1) >= 2 ADcountList<-ADcountList[keep, ,keep.lib.sizes=FALSE] gender<-factor(Table$Gender) CETS<-Table$CETS.Proportion age<-Table$Age..at.death. PMI<-Table$Post.mortem.delay..mins. braak<-Table$Braak.Stage #Converting cell proportion to 5 level factor CETS<-as.character(CETS) CETS<-as.numeric(CETS) # Imputing the mean for 1 missing cell proportion estimate medianCETS <- median(CETS, na.rm=T) CETSi<-CETS CETSi[which(is.na(CETSi))]<- medianCETS CETSif<-cut(CETSi, breaks=5, ordered_result = T) #Converting age to 5 level factor agef<-cut(age, breaks=5, ordered_result = T) # EdgeR model controlling for age and CETS as factors design<-model.matrix(~agef+CETSif+condition) ADcountList_n <- calcNormFactors(ADcountList) ADcountList_nd<-estimateDisp(ADcountList_n, design) fit <- glmQLFit(ADcountList_nd, design) qlf <- glmQLFTest(fit)