-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
coded the first draft and started the debug to fix one bug and add then the usage.
- Loading branch information
Gaurav Sablok
committed
Sep 18, 2024
0 parents
commit 9c7df22
Showing
4 changed files
with
113 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
go-whole-genome-estimates | ||
|
||
- a whole genome linear block estimation. | ||
- block is defined as a single line for the aligned genome. | ||
- no matter how many genome you aligned you can pass the alignment file. | ||
- the first genome will be taken as a reference. | ||
- check the AlignmentGO package for defining your own block size. | ||
|
||
Gaurav Sablok |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
module github.com/whole-genome-alignment-estimate | ||
|
||
go 1.23.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
package main | ||
|
||
import ( | ||
"bufio" | ||
"flag" | ||
"fmt" | ||
"log" | ||
"os" | ||
"strconv" | ||
"strings" | ||
) | ||
|
||
/* | ||
* | ||
Author Gaurav Sablok | ||
Universitat Potsdam | ||
Date 2024-9-19 | ||
It takes a whole genome aligned files and then estimates the linear block by using the first genome as a reference genome. | ||
You can pass as many genome as a reference genome and it will take the first genome as a reference. This only compares the | ||
single line block. In the alignmentGO package, you can find the specific for defining your own block. | ||
*/ | ||
|
||
func main () { | ||
|
||
alignment := flag.String("alignment", "path to the alignment file", "file") | ||
|
||
flag.Parse() | ||
|
||
type alignmentIDStore struct { | ||
id string | ||
} | ||
|
||
type alignmentSeqStore struct { | ||
seq string | ||
} | ||
|
||
fOpen, err := os.Open(*alignment) | ||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
|
||
alignmentID := []alignmentIDStore{} | ||
alignmentSeq := []alignmentSeqStore{} | ||
sequenceSpec := []string{} | ||
|
||
fRead := bufio.NewScanner(fOpen) | ||
for fRead.Scan() { | ||
line := fRead.Text() | ||
if strings.HasPrefix(string(line), ">") { | ||
alignmentID = append(alignmentID, alignmentIDStore{ | ||
id: strings.Replace(string(line), ">", "", -1), | ||
}) | ||
} | ||
if !strings.HasPrefix(string(line), ">") { | ||
alignmentSeq = append(alignmentSeq, alignmentSeqStore{ | ||
seq: string(line), | ||
}) | ||
} | ||
if !strings.HasPrefix(string(line), ">") { | ||
sequenceSpec = append(sequenceSpec, string(line)) | ||
} | ||
} | ||
|
||
counterA := 0 | ||
counterT := 0 | ||
counterG := 0 | ||
counterC := 0 | ||
|
||
for i := 0; i < len(sequenceSpec)-1; i++ { | ||
for j := 0; j < len(sequenceSpec[0]); j++ { | ||
if string(sequenceSpec[i][j]) == "A" && string(sequenceSpec[i+1][j]) == "T", | ||
|| string(sequenceSpec[i+1][j] == "C" || string(sequenceSpec[i+1][j]) == "G" { | ||
counterA++ | ||
} | ||
if string(sequenceSpec[i][j]) == "T" && string(sequenceSpec[i+1][j]) == "C", | ||
|| string(sequenceSpec[i+1][j]) == "G" || string(sequenceSpec[i+1][j]) == "A" { | ||
counterT++ | ||
} | ||
if string(sequenceSpec[i][j]) == "C" && string(sequenceSpec[i+1][j]) == "G", | ||
|| string(sequenceSpec[i+1][j]) == "A" || string(sequenceSpec[i+1][j]) == "T" { | ||
counterC++ | ||
} | ||
if string(sequenceSpec[i][j]) == "G" && string(sequenceSpec[i+1][j]) == "A", | ||
|| string(SequenceSpec[i+1][j]) == "T" || string(sequenceSpec[i+1][j]) == "C" { | ||
counterG++ | ||
} else { | ||
continue | ||
} | ||
} | ||
} | ||
fmt.Printf("The collinearity block of line width 1 for A as a base pattern is %d\n", counterA) | ||
fmt.Printf("The collinearity block of line width 1 for T as a base pattern is %d\n", counterT) | ||
fmt.Printf("The collinearity block of line width 1 for G as a base pattern is %d\n", counterG) | ||
fmt.Printf("The collinearity block of line width 1 for C as a base pattern is %d\n", counterC) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
>ENA|OX291461|OX291461.1 | ||
TCAGTATC----TC-------- | ||
>ENA|OX291509|OX291509.1 | ||
TATC----TC--------ATAG |