-
Notifications
You must be signed in to change notification settings - Fork 0
/
R code
145 lines (113 loc) · 3.9 KB
/
R code
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
GlobalAlign <- function(match, mismatch, gap, a, b){
library(stringr)
#Checking if all parameter in the function are correct
if (is.numeric(match) != TRUE) {
stop(print("Match value should be an integer"))
}
if (is.numeric(mismatch) != TRUE) {
stop(print("Mismatch value should be an integer"))
}
if (is.numeric(gap) != TRUE) {
stop(print("Gap value should be an integer"))
}
if (is.character(a) != TRUE) {
stop(print("The first sequence is not a string"))
}
if (is.character(b) != TRUE) {
stop(print("The second sequence is not a string"))
}
#Transforming match, mismatch and gap values in integers
match <- as.integer(match)
mismatch <- as.integer(mismatch)
gap <- as.integer(gap)
#Now checking that the values are in according with the SW algorithm
if (match <= 0) {
stop(print("Match value should be greater than zero"))
}
if (mismatch > 0) {
stop(print("Mismatch value should be less than zero or equal zero"))
}
if (gap > 0) {
stop(print("Gap value should be less than zero or equal zero"))
}
#Preparing the matrix of the algorithm
a_splitted <- unlist(strsplit(a, "")) #Splitting the first sequence string
b_splitted <- unlist(strsplit(b, "")) #splitting the second sequence string
cols <- nchar(a) + 1 #Setting the length of the rows and the columns
rows <- nchar(b) + 1
m <- matrix(0, rows, cols) #Creating the matrix and fill it with zeros
#Initializing the matrix with the gap penality
lena <- nchar(a)
lenb <- nchar(b)
m[1,] <- cumsum(c(0, rep(gap, lena)))
m[,1] <- cumsum(c(0, rep(gap, lenb)))
#Assigning sequences to the row names and the column names of the matrix m
coln <- c("", a_splitted)
rown<- c("", b_splitted)
colnames(m) <- coln
rownames(m) <- rown
#Creating a second matrix to store the direction
m1 <- m
m1[1,] <- "left"
m1[,1] <- "up"
m1[1,1] <- 0
#Now filling the matrix with the scores given in the function's arguments
#Recalling Needleman-Wunsch algortithm:
#S(i,j) = max { S(i-1,j-1) + s(ai,bj),
#S(i-1,j) - Wx,
#S(i,j-1) - Wy }
for (i in 2:rows){
for (j in 2:cols){
up <- m[i -1, j] + gap #Up score means a gap in the seq on the columns
left <- m[i, j -1] + gap #Left score means a gap in the seq on the rows
#Diagonal is the value with the match or the mismatch
diagonal <- if (rownames(m)[i] == colnames(m)[j]){
m[i -1, j -1] + match
} else {
m[i -1, j -1] + mismatch
}
maxx <- max(up, left, diagonal) #Selecting the maximum value
m[i,j] <- maxx
#Storing the direction in the second matrix
if (maxx == up) {
m1[i,j] <- "up"
} else if (maxx == left){
m1[i,j] <- "left"
} else if (maxx == diagonal){
m1[i,j] <- "diagonal"
}
}
}
#Traceback, starting from the last cell of the matrix
#Creating ala and alb to store the alignment
ala <- character()
alb <- character()
#While we are in the matrix, doing the traceback using the direction matrix
#if the direction matrix says up, inserting a gap in the seq on the column and
#reporting the value on the row; if the direction matrix says left, inserting
#a gap in the seq on the rows and reporting the value on the column; if the
#direction matrix says diagonal, reporting both values on the row and on the
#column
while (i > 1 & j > 1){
if (m1[i,j] == "up"){
ala <- c("-", ala)
alb <- c(rownames(m1)[i], alb)
i <- i -1
} else if (m1[i,j] == "left") {
ala <- c(colnames(m1)[j], ala)
alb <- c("-", alb)
j <- j -1
} else if (m1[i,j] == "diagonal") {
ala <- c(colnames(m1)[j], ala)
alb <- c(rownames(m1)[i], alb)
i <- i -1
j <- j -1
}
}
#Removing NA values from the output
ala <- ala[!is.na(ala)]
alb <- alb[!is.na(alb)]
#Printing the alignment
cat(paste0(ala, collapse=''), "\n")
cat(paste0(alb, collapse=''), "\n")
}