-
Notifications
You must be signed in to change notification settings - Fork 11
/
R_gc_content.R
35 lines (29 loc) · 992 Bytes
/
R_gc_content.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# Function to calculate GC content
calculate_gc_content <- function(sequence) {
g_count <- sum(strsplit(sequence, "")[[1]] == "G")
c_count <- sum(strsplit(sequence, "")[[1]] == "C")
gc_content <- (g_count + c_count) / nchar(sequence) * 100
return(gc_content)
}
# Function to read FASTA file and concatenate all sequences
read_fasta <- function(filename) {
fasta_lines <- readLines(filename)
sequences <- fasta_lines[!grepl("^>", fasta_lines)] # Filter out header lines
full_sequence <- paste(sequences, collapse = "")
return(full_sequence)
}
# Main function
main <- function() {
args <- commandArgs(trailingOnly = TRUE)
if (length(args) != 1) {
stop("Usage: Rscript gc_content.R <fasta_file>")
}
fasta_file <- args[1]
full_sequence <- read_fasta(fasta_file)
gc_content <- calculate_gc_content(full_sequence)
cat(sprintf("Overall GC Content: %.2f%%\n", gc_content)
}
# Call main function if script is run
if (interactive() == FALSE) {
main()
}