###### MAIN PROGRAM in S-plus ############ Lkexact.pv_function(pv, threshold, k) { # An inputted p-value sequence (pv) is transferred into # a binary sequence by the inputted threshold, then its # longest k-interrupt run Lk, k=0-3 and the corresponding # tail probability are reported # The main program need the following subroutine to work : # Lk.length: length of Lk # Markov.test: goodness of fit test for markov independence # 1st order dependence # Lk0to3: calculated the exact p-value under 1st order dependence, # (need subroutine Lk.0, Lk.1, Lk.2, Lk.3) # Lk.waterman: calculated the approximated p-value under independence. a_as.numeric(pv < threshold) n <- length(a) d <- Lk.length(a, k) MT <- round(Markov.test(a,1),dig=6) d1 <- d + 1 ps <- sum(a)/n cat("length of sequence=", n, " pss=", MT[2, 2], " pff=", MT[1, 1], "\n") cat("Lk=", d, "\n") kpb <- Lk0to3(ps, MT[2, 2], MT[1, 1], n, d, k) cat("for 1st order Markov dependent model, exact p-value P(Lk>=d)=", round(kpb, dig=5), "\n") cat("for Markov independent model, p-value P(Lk>=d)=", round(Lk.waterman(a,k),dig=5), "\n") } ############################ Lk.length_function(a, k) { #this program reports the max length of well-matching (1) run, allowing #k mismatches within a binary sequence a.# # output: Lk if(k > 5) cat("k has to be less than 6", "\n") n <- length(a) j <- 0 x <- NULL y <- NULL for(i in 1:(n - 1)) { if(a[i] != a[i + 1]) { j <- j + 1 x[j] <- i } } L1 <- matrix(0, 1, j) L2 <- matrix(0, 1, j) L3 <- matrix(0, 1, j) L4 <- matrix(0, 1, j) L5 <- matrix(0, 1, j) L <- matrix(1, 1, j) if(j == 0) { if(a[1] == 1) { y[1] <- n L[1] <- n } if(a[1] == 0) { y[1] <- n L[1] <- 0 } } if(j > 0) { y[1] <- x[1] y[j + 1] <- n - x[j] if(j > 1) { for(j in 2:j) { y[j] <- x[j] - x[j - 1] } } t <- 1 if(a[1] == 1) { if((2 * t + 1) > (j + 1)) { L <- y[1] } while((2 * t + 1) <= (j + 1)) { s1 <- y[2 * t - 1] + y[2 * t + 1] s2 <- y[2 * t] L1[t] <- (s1 + s2) * (s2 <= k) if((2 * t + 3) <= (j + 1) && k > 1) { s21 <- (y[2 * t + 3] + s1) s22 <- (y[2 * t + 2] + s2) L2[t] <- (s21 + s22) * (s22 <= k) } if((2 * t + 5) <= (j + 1) && k > 2) { s31 <- (y[2 * t + 5] + s21) s32 <- (y[2 * t + 4] + s22) L3[t] <- (s31 + s32) * (s32 <= k) } if((2 * t + 7) <= (j + 1) && k > 3) { s41 <- (y[2 * t + 7] + s31) s42 <- (y[2 * t + 6] + s32) L4[t] <- (s41 + s42) * (s42 <= k) } if((2 * t + 9) <= (j + 1) && k > 4) { s51 <- (y[2 * t + 9] + s41) s52 <- (y[2 * t + 8] + s42) L5[t] <- (s51 + s52) * (s52 <= k) } L[t] <- max((y[2 * t - 1]), (y[2 * t + 1]), L1[t], L2[t], L3[ t], L4[t], L5[t]) t <- t + 1 } } if(a[1] == 0) { if((2 * t + 2) > (j + 1)) { L <- y[2] } while((2 * t + 2) <= (j + 1)) { s1 <- y[2 * t] + y[2 * t + 2] s2 <- y[2 * t + 1] L1[t] <- (s1 + s2) * (s2 <= k) if((2 * t + 4) <= (j + 1) && k > 1) { s21 <- (y[2 * t + 4] + s1) s22 <- (y[2 * t + 3] + s2) L2[t] <- (s21 + s22) * (s22 <= k) } if((2 * t + 6) <= (j + 1) && k > 2) { s31 <- (y[2 * t + 6] + s21) s32 <- (y[2 * t + 5] + s22) L3[t] <- (s31 + s32) * (s32 <= k) } if((2 * t + 8) <= (j + 1) && k > 3) { s41 <- (y[2 * t + 8] + s31) s42 <- (y[2 * t + 7] + s32) L4[t] <- (s41 + s42) * (s42 <= k) } if((2 * t + 10) <= (j + 1) && k > 4) { s51 <- (y[2 * t + 10] + s41) s52 <- (y[2 * t + 9] + s42) L5[t] <- (s51 + s52) * (s52 <= k) } L[t] <- max((y[2 * t]), (y[2 * t + 2]), L1[t], L2[t], L3[t], L4[ t], L5[t]) t <- t + 1 } } } #result_list(y=y,a=a,L1=L1,L2=L2,L3=L3,L4=L4,L5=L5,L=L) #the max length of k-interrupt run in binary sequence d <- max(L) d } ############################# Markov.test_function(sq,t) { cat("The imported sequence sq is assume to have state 0,1,...,t","\n") cat("Each state must have at least 1 observation","\n") # input: sq =the target discrete sequence# # input t: states of sq = 0,1,...,t# options(warn = -1) sqm1 <- length(sq) - 1 MT_ table(sq[1:sqm1], sq[2:length(sq)]) cat("transition matrix","\n") print (MT) t1_t+1 for (i in 1:t1) MT[i,]_MT[i,]/sum(MT[i,]) # MT as transition prob. matrix cat("Test sequence is Ho: independent vs. H1: 1st order Markov","\n") temp1 <- chisq.test(sq[1:sqm1], sq[2:length(sq)], correct= T) print(temp1) temp2_Markov.1vs2(sq,t) if(temp1$p.value >= 0.05) cat("Conclusion: independent sequence", "\n") else if(temp1$p.value < 0.05 && temp2 > 0.05) cat("Conclusion: 1st order Markov", "\n") else if(temp2 < 0.05) cat("Conclusion: 2nd order Markov", "\n") MT } #################################### #sub program for Markov.test Markov.1vs2_function(sq,t) { #test H0:1st order vs H1:2nd order for sequence sq # export chisq stat and the test p-value n_length(sq) nm2_n-2 SOME_0 for (a in 0:t) for (b in 0:t) for (c in 0:t) { ts1_0 ts2_0 ts3_0 ts4_0 for (i in 1:nm2) { ts1_ts1+as.numeric(sq[i]==a && sq[i+1]==b && sq[i+2]==c) ts2_ts2+as.numeric(sq[i]==a && sq[i+1]==b) ts3_ts3+as.numeric(sq[i+1]==b && sq[i+2]==c) ts4_ts4+as.numeric(sq[i+1]==b) } OME_(ts1-(ts2*ts3/ts4))**2/(ts2*ts3/ts4) SOME_SOME+OME } df_(t+1)*(t)**2 qSOME_1-pchisq(SOME,df) cat("Test Ho: 1st order Markov vs. H1: 2nd order Markov","\n") cat("Test stat. is:",round(SOME,3), "with Chisq dist'n of df=",df, "\n") cat("p-value=",round(qSOME,4),"\n") qSOME } ############################# Lk0to3_function(ps, pss, pff, n, d, k) { if(k == 0) Lk.0(ps, pss, pff, n, d) else if(k == 1) Lk.1(ps, pss, pff, n, d) else if(k == 2) Lk.2(ps, pss, pff, n, d) else if(k == 3) Lk.3(ps, pss, pff, n, d) else if(k > 3) cat("This program allows 3 mismatches at most", "\n") } ############################# Lk.0_function(ps, pss, pff, n, d) { #this program compute the probability P(L0>=d)# #L0 is the perfect matching run within a sequence of length n# # ps is the initial prob. for "s", and pss is the transition prob of 1 given 1# a <- matrix(1, d, 1) b <- diag(a, d) c <- matrix(0, 1, d) p0 <- cbind(1, c) p0 <- cbind(p0, 0) U <- rbind(a, 0) U <- rbind(1, U) c <- cbind(c, 1) M <- cbind((1 - pss) * a, pss * b) M <- rbind(M, c) M <- cbind(0, M) M <- rbind(0, M) M[1, 2] <- 1 - ps M[1, 3] <- ps M[2, 2] <- pff M[2, 3] <- 1 - pff Mn <- M %*% M for(i in 3:n) { Mn <- Mn %*% M } 1 - p0 %*% Mn %*% U } Lk.1_function(ps, pss, pff, n, d) { #this program compute the probability P(N(n,d)=0)# #i.e., the number of non-overlapping type 1 runs having at most 1 type 0 # #under given initial ps and pf with length d is zero.d=#1+#0# NN <- (((1 + d) * d)/2) + 1 pf <- 1 - ps psf <- 1 - pss pfs <- 1 - pff c0 <- matrix(0, 1, NN - 1) p0 <- cbind(1, c0) c1 <- matrix(1, NN - 1, 1) U <- rbind(c1, 0) M <- matrix(0, NN, NN) for(ir in 0:(d - 1)) { for(jr in 0:ir) { for(ic in 0:(d - 1)) { for(jc in 0:ic) { rid <- (((ir + 1) * ir)/2) + jr + 1 cid <- (((ic + 1) * ic)/2) + jc + 1 if(ir > 0 && jr == 0 && ic == ir + 1 && jc == 0) { M[rid, cid] <- pss } if(jr > 1 && ic == ir + 1 && jc == jr + 1) { M[rid, cid] <- pss } if(ir > 0 && ic == ir + 1 && jr == 0 && jc == 1) { M[rid, cid] <- psf } if(jr > 0 && ic == jr && jc == 1) { M[rid, cid] <- psf } if(ir > 0 && jr == 1 && ic == 1 && jc == 1) { M[rid, cid] <- pff } if(ir > 0 && jr == 1 && ic == ir + 1 && jc == 2) { M[rid, cid] <- pfs } if(ir == 0 && jr == 0 && ic == 1 && jc == 0) { M[rid, cid] <- ps } if(ir == 0 && jr == 0 && ic == 1 && jc == 1) { M[rid, cid] <- pf } } } } } for(i in 1:NN) { M[i, NN] <- 1 - sum(M[i, ]) } Mn <- M %*% M for(i in 3:n) { Mn <- Mn %*% M } 1 - p0 %*% Mn %*% U } Lk.2_function(ps, pss, pff, n, d) { #this program compute the probability P(L2>=d)# #L2 is the 2-interrupted run within a sequence of length n# # ps is the initial prob. for "s", and pss is the transition prob of 1 given 1# NN <- (2 * d^3 + 10 * d)/12 + 1 pf <- 1 - ps psf <- 1 - pss pfs <- 1 - pff c0 <- matrix(0, 1, NN - 1) p0 <- cbind(1, c0) c1 <- matrix(1, NN - 1, 1) U <- rbind(c1, 0) M <- matrix(0, NN, NN) for(ir in 0:(d - 1)) { for(jr in 0:ir) { for(kr in 0:ir) { for(ic in 0:(d - 1)) { for(jc in 0:ic) { for(kc in 0:ic) { rid <- ((2 * ir^3 + 10 * ir)/12 + ((jr - 1) * (2 * ir - jr + 2))/2 + kr - jr + 2) * (kr > jr) * (jr > 0) + ((2 * ir^3 + 10 * ir)/12 + (( (jr - 1) * (2 * ir - jr + 2))/ 2 + 2) * (jr > 0)) * (kr == 0) + 1 * (kr == 0) * (jr == 0) cid <- ((2 * ic^3 + 10 * ic)/12 + ((jc - 1) * (2 * ic - jc + 2))/2 + kc - jc + 2) * (kc > jc) * (jc > 0) + ((2 * ic^3 + 10 * ic)/12 + (( (jc - 1) * (2 * ic - jc + 2))/ 2 + 2) * (jc > 0)) * (kc == 0) + 1 * (kc == 0) * (jc == 0) if(ir > 0) { if(jr > 1 && kr > jr && ic == ir + 1 && jc == jr + 1 && kc == kr + 1) { M[rid, cid] <- pss } if(jr > 1 && kr == 0 && ic == ir + 1 && jc == jr + 1 && kc == 0) { M[rid, cid] <- pss } if(jr == 0 && kr == 0 && ic == ir + 1 && kc == 0 && jc == 0) { M[rid, cid] <- pss } if(jr == 0 && kr == 0 && ic == ir + 1 && jc == 1 && kc == 0) { M[rid, cid] <- psf } if(jr > 1 && kr == 0 && ic == ir + 1 && jc == 1 && kc == jr + 1) { M[rid, cid] <- psf } if(jr > 1 && kr > jr && ic == kr && jc == 1 && kc == jr + 1) { M[rid, cid] <- psf } if(jr == 1 && kr == 0 && ic == ir + 1 && jc == 1 && kc == 2) { M[rid, cid] <- pff } if(jr == 1 && kr > jr + 1 && ic == kr && jc == 1 && kc == 2) { M[rid, cid] <- pff } if(jr == 1 && kr == 2 && ic == 2 && jc == 1 && kc == 2) { M[rid, cid] <- pff } if(jr == 1 && kr == 0 && ic == ir + 1 && jc == 2 && kc == 0) { M[rid, cid] <- pfs } if(jr == 1 && kr > jr && ic == ir + 1 && jc == jr + 1 && kc == kr + 1) { M[rid, cid] <- pfs } } if(ir == 0 && jr == 0 && kr == 0 && ic == 1 && jc == 0 && kc == 0) { M[rid, cid] <- ps } if(ir == 0 && jr == 0 && kr == 0 && ic == 1 && jc == 1 && kc == 0) { M[rid, cid] <- pf } } } } } } } for(i in 1:NN) { M[i, NN] <- 1 - sum(M[i, ]) } Mn <- M %*% M for(i in 3:n) { Mn <- Mn %*% M } 1 - p0 %*% Mn %*% U } Lk.3_function(ps, pss, pff, n,d) { #this program compute the probability P(N(n,d)=0)# #i.e., the number of non-overlapping type 1 runs having at most 3 type 0# #under given initial ps and pf with length d is zero.d=#1+#0# NN_(d^4-2*d^3+11*d^2+14*d+24)/24 pf_1-ps psf_1-pss pfs_1-pff c0_matrix(0,1,NN-1) p0_cbind(1,c0) c1_matrix(1,NN-1,1) U_rbind(c1,0) M_matrix(0,NN,NN) for(ir in 0:(d-1)){ for(jr in 0:ir){ for(kr in 0:ir){ for(lr in 0:ir){ for(ic in 0:(d-1)){ for(jc in 0:ic){ for(kc in 0:ic){ for(lc in 0:ic){ if((kr>0 && kr<=jr)||(lr>0 && lr<=kr) || (jr==0 && ( kr!=0||lr!=0))|| (kr==0&&lr!=0)){rid_0} else{ rid_(ir^4-2*ir^3+11*ir^2+14*ir)/24+1+(jr-1+ (-6*ir^2-6*ir-6*ir*jr^2+6*ir^2*jr+12*ir*jr+2*jr^3-6*jr^2+4*jr)/12+1+ (((2*ir-jr-kr+2)*(kr-jr-1)/2)*(kr>jr)+1+(lr-kr)*(lr>kr))*(kr>1))*(jr>0)} if((kc>0 && kc<=jc)||(lc>0 && lc<=kc) || (jc==0 && ( kc!=0||lc!=0))|| (kc==0&&lc!=0)){cid_0} else{ cid_(ic^4-2*ic^3+11*ic^2+14*ic)/24+1+(jc-1+ (-6*ic^2-6*ic-6*ic*jc^2+6*ic^2*jc+12*ic*jc+2*jc^3-6*jc^2+4*jc)/12+1+ (((2*ic-jc-kc+2)*(kc-jc-1)/2)*(kc>jc)+1+(lc-kc)*(lc>kc))*(kc>1))*(jc>0)} if(jr!=1 && ir>0){ if(ic==ir+1 && kr==0 && jr==0 && kc==0 && jc==0){M[rid,cid]_pss} if(ic==ir+1 && jc==(jr+1)*(jr>0)&&kc==(kr+1)*(kr>0)&& lc==(lr+1)*(lr>0)){M[rid,cid]_pss}} if(jr==1 && jc==1){ if(kr==0 && lr==0 && lc==0 && kc==2 && jc==1 && ic==ir+1){M[rid,cid]_pff} if(ic==ir+1 && kc==2 && lc==kr+1 && lr==0 && kr>1){M[rid,cid]_pff} if(lr>kr && kr>jr && ic==lr && kc==jr+1 && lc==kr+1){M[rid,cid]_pff}} if(jr==1){ if(kr==0 && lr==0 && ic==ir+1 && jc==2 && kc==0 && lc==0){M[rid,cid]_pfs} if(jc==2 && kc==kr+1 && ic==ir+1 && lc==0 && lr==0){M[rid,cid]_pfs} if(ic==ir+1 && jc==jr+1 && kc==kr+1 && lc==lr+1 && lr>kr&& kr>jr) {M[rid,cid]_pfs}} if(ir>0 && jr!=1&& jc==1){ if(ic==ir+1){ if( kr==0 && kc==0 && jr==0 && lr==0 && lc==0){M[rid,cid]_psf} if( jr>1 && kc==jr+1 && lr==0 &&lc==0&&kr==0){M[rid,cid]_psf} if( jr>1 && kr>jr && kc==jr+1 && lr==0 && lc==kr+1) {M[rid,cid]_psf}} if(ic==lr && jc==1 && kc==jr+1 && lc==kr+1&& jr>1&& kr>jr && lr>kr ){M[rid,cid]_psf}} if(ir==0 && jr==0 && kr==0 && lr==0 && ic==1 && jc==0 && kc==0 && lc==0){M[rid,cid]_ps} if(ir==0 && jr==0 && kr==0 && lr==0 && ic==1 && jc==1 && kc==0 && lc==0){M[rid,cid]_pf} }}}}}}}} for(i in 1:NN) { M[i,NN]_1-sum(M[i,]) } Mn_M%*%M for (i in 3:n) {Mn_Mn%*%M } 1-p0%*%Mn%*%U } ################################ Lk.waterman_function(a , k) { #this program compute the probability of k-interrupt-run, P(Lk>=d), # in a iid Bernoulli seq with P(1)=ps, # where n is the length of seq., # d is the length of Lk, and k is the num. of mismached. # output 3 values: est. of the prob, and its lower, upper bounds n <- length(a) d <- Lk.length(a, k) ps <- sum(a)/n s <- d - k EEW <- (n * (s/d - ps) * dbinom(s, d, ps)) * (-1) EEW <- exp(EEW) erro <- 7 * d * dbinom(s, d, ps) + (1 - pbinom(s, d, ps)) upb <- EEW + erro lob <- EEW - erro bd <- c(EEW, upb, lob) bd <- 1 - bd if(bd[2] > 1) bd[2] <- 1 if(bd[3] < 0) bd[3] <- 0 bd[1] }