用R作生存分析

更新时间:2023-11-17 22:43:01 阅读量: 教育文库 文档下载

说明:文章内容仅供预览,部分内容可能不全。下载后的文档,内容与下面显示的完全一致。下载之前请确认下面内容是否您想要的,是否完整无缺。

生存分析

一.Libraries and data sets

library() # 查看你的系统中所有已装的libraries library(survival) # 加载一个library, 也可使用鼠标来做

library(help=survival) # see the list of available functions and data sets. data(aml) # 加载数据 aml aml # see the data

# 《生存分析》需要的两个 libraries library(survival)

library(KMsurv) #包含我们教材中所有的数据

# 若想使数据中的变量能够在R中作为一个变量使用,use attach(): > attach(aids) > infect

> detach(aids)

二.Survival Objects

#由函数 Surv 产生一个 survival object Surv (time, time2, event, type) ? time: survival time

? time2: ending time if it is interval censored ? event: censoring variable.

For interval censored data, 0=right censored, 1=event at time, 2=left censored, 3=interval censored.

? type: indicating whether it is right censored, left censored or interval censored or counting

process. 默认值是右删失或 counting process

#假设一个观测值为 [2, 3],则 R 语句为

Surv(time=2,time2=3, event=3, type = \

#右删失数据 ? attach(aml)

? Surv(time,status) ? detach(aml)

#左截断右删失数据 ? data(psych) ? attach(psych)

? my.surv.object <- Surv(age, age+time, death)

? my.surv.object

[1] (51,52 ] (58,59 ] (55,57 ] (28,50 ] (21,51+] (19,47 ] (25,57 ] [22] (29,63+] (35,65+] (32,67 ] (36,76 ] (32,71+] ? detach(psych)

三. The Kaplan-Meier Estimates

#计算生存曲线:survfit,有三个重要的参数:formula, conf.int, and conf.type.

survfit(formula, conf.int = 0.95, conf.type = \

conf.type=”plain” #linear confidence interval in our book conf.type=”log” # 对H(t)作log transformation conf.type=”log-log” #log-transformed in our book

myfit = survfit(Surv(time,status) ~ 1,data=kidney) #在新版的R里,这个~1是必要的。

attach(kidney)

myfit = survfit(Surv(time,status) ~ 1) #在新版的R里,这个~1是必要的。

a=summary(survfit(Surv(time,status)~1))

b=summary(survfit(Surv(time,status)~1), censored=TRUE)

#提取 survfit 产生的结果

a$surv # outputs the Kaplan-Meier estimate at each t_i a$time # {t_i} a$n.risk # {Y_i} a$n.event # {d_i}

summary(myfit)$std.err # standard error of the K-M estimate at {t_i} summary(myfit)$lower # lower pointwise estimates summary(myfit)$upper # upper pointwise estimates

画出生存曲线

plot(myfit, main=\ xlab=\

par(mfcol=c(2,1))

plot(myfit, conf.int=F, main=\ xlab=\plot(survfit(Surv(time,status) ~ 1, conf.int=0.99),

main=\

xlab=\par(mfrow=c(1,1))

#假设一个数据里有不同的组,下列指令可同时画出各组的生存曲线

attach(kidney)

myfit1 = survfit( Surv(time,status) ~ sex)

plot(myfit1, main=\sex group\

plot(myfit1, conf.int=T, col=c(\ylab=\

#下列指令输出 Residual mean time 和 它的标准差,及median time print(myfit, show.rmean=TRUE)

四.Nelson-Aalen estimator

risk = summary(myfit)$n.risk event = summary(myfit)$n.event time = summary(myfit)$time hazard = event / risk n = length(hazard) H = rep(0,n) H[1] = hazard[1]

for( i in 2:n) H[i]=H[i-1]+hazard[i] plot(time,H,type=”s”)

#数据aml

my <- summary(survfit(Surv(aml$time[1:11], aml$status[1:11]),type=\list(my$time, -log(my$surv))

五. Log-rank test

attach(melanom)

survdiff(Surv(days,status==1)~sex)

survdiff(Surv(days,status==1)~sex+strata(ulc))

#关于儿童急性白血病的配对数据, section 1.2

attach(drug6mp)

placebo= drug6mp*,c(\

drug = drug6mp[,c(\

将变量名统一

colnames(placebo)*3+=”time” colnames(drug)*3+=”time” colnames(drug)*4+=”censor”

对placebo组加上删失变量 censor=rep(1,21)

placebo=cbind(placebo,censor)

all = rbind(placebo,drug)

### Compare treatment and placebo

survdiff(Surv(time, censor)?1) # 不对, no stratification survdiff(Surv(time, censor)?1 + strata(pair)) # stratify on pair

五. Cox 模型

#一般语句

coxph(formula, data=, method=c(\

summary(coxph(Surv(days,status==1)~sex+log(thick)+strata(ulc), data=melanom))

plot(survfit(coxph(Surv(days,status==1)~sex+log(thick)+sex+strata(ulc),data=melanom)))

To obtain the survival function of a particular subject with specific covariate values (x = 1): coxfit1 <- coxph(Surv(time, status)~x, data=aml) survfit(coxfit1, newdata=data.frame(x=1))

coxfit2=coxph(Surv(days,status==1)~sex+log(thick)+strata(ulc), data=melanom) survfit(coxfit2, newdata=data.frame(sex=1,thick=65,ulc=2))

#计算 baseline cumulative hazard function

a = coxph(Surv(days,status==1)~sex+log(thick)+sex+strata(ulc), data=melanom) b = basehaz(a)

六. 参数模型

#指数模型

fit = survreg(Surv(aml$time, aml$status)~aml$x, dist=\fit = survreg( Surv(time, status)~x, data=aml, dist=\predict(fit, type=\

七. 如何产生生存数据

#没有删失的数据

reg <- function(x , rates){

y = rexp (length(rates), rate=rates)

fit1 = survreg(Surv(y, rep(1, length(y)))~x,dist=\ fit2 = coxph(Surv(y, rep(1, length(y)))~x) return(c(fit1$coef[2], fit2$coef)) }

#有删失的数据

? simulate a lifetime vector

? simulate independently a termination time (censoring time) vector ? observe whichever comes

lifetimes <- rexp( 25, rate = 0.2) censtimes <- 5 + 5*runif(25)

ztimes <- pmin(lifetimes, censtimes)

status <- as.numeric(censtime > lifetimes)

本文来源:https://www.bwwdw.com/article/kevv.html

Top