From 9c7df2262087782615bdcfbec0e15aa1a403d9e7 Mon Sep 17 00:00:00 2001 From: Gaurav Sablok Date: Wed, 18 Sep 2024 22:29:38 +0200 Subject: [PATCH] codecreatede coded the first draft and started the debug to fix one bug and add then the usage. --- README.md | 9 +++ go.mod | 3 + main.go | 97 ++++++++++++++++++++++++++++++++ samplefile/samplealignment.fasta | 4 ++ 4 files changed, 113 insertions(+) create mode 100644 README.md create mode 100644 go.mod create mode 100644 main.go create mode 100644 samplefile/samplealignment.fasta diff --git a/README.md b/README.md new file mode 100644 index 0000000..82dcead --- /dev/null +++ b/README.md @@ -0,0 +1,9 @@ +go-whole-genome-estimates + +- a whole genome linear block estimation. +- block is defined as a single line for the aligned genome. +- no matter how many genome you aligned you can pass the alignment file. +- the first genome will be taken as a reference. +- check the AlignmentGO package for defining your own block size. + +Gaurav Sablok diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..07e7592 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module github.com/whole-genome-alignment-estimate + +go 1.23.1 diff --git a/main.go b/main.go new file mode 100644 index 0000000..1ccb019 --- /dev/null +++ b/main.go @@ -0,0 +1,97 @@ +package main + +import ( + "bufio" + "flag" + "fmt" + "log" + "os" + "strconv" + "strings" +) + +/* + * + Author Gaurav Sablok + Universitat Potsdam + Date 2024-9-19 + +It takes a whole genome aligned files and then estimates the linear block by using the first genome as a reference genome. +You can pass as many genome as a reference genome and it will take the first genome as a reference. This only compares the +single line block. In the alignmentGO package, you can find the specific for defining your own block. + + */ + + func main () { + + alignment := flag.String("alignment", "path to the alignment file", "file") + + flag.Parse() + + type alignmentIDStore struct { + id string + } + + type alignmentSeqStore struct { + seq string + } + + fOpen, err := os.Open(*alignment) + if err != nil { + log.Fatal(err) + } + + alignmentID := []alignmentIDStore{} + alignmentSeq := []alignmentSeqStore{} + sequenceSpec := []string{} + + fRead := bufio.NewScanner(fOpen) + for fRead.Scan() { + line := fRead.Text() + if strings.HasPrefix(string(line), ">") { + alignmentID = append(alignmentID, alignmentIDStore{ + id: strings.Replace(string(line), ">", "", -1), + }) + } + if !strings.HasPrefix(string(line), ">") { + alignmentSeq = append(alignmentSeq, alignmentSeqStore{ + seq: string(line), + }) + } + if !strings.HasPrefix(string(line), ">") { + sequenceSpec = append(sequenceSpec, string(line)) + } + } + + counterA := 0 + counterT := 0 + counterG := 0 + counterC := 0 + + for i := 0; i < len(sequenceSpec)-1; i++ { + for j := 0; j < len(sequenceSpec[0]); j++ { + if string(sequenceSpec[i][j]) == "A" && string(sequenceSpec[i+1][j]) == "T", + || string(sequenceSpec[i+1][j] == "C" || string(sequenceSpec[i+1][j]) == "G" { + counterA++ + } + if string(sequenceSpec[i][j]) == "T" && string(sequenceSpec[i+1][j]) == "C", + || string(sequenceSpec[i+1][j]) == "G" || string(sequenceSpec[i+1][j]) == "A" { + counterT++ + } + if string(sequenceSpec[i][j]) == "C" && string(sequenceSpec[i+1][j]) == "G", + || string(sequenceSpec[i+1][j]) == "A" || string(sequenceSpec[i+1][j]) == "T" { + counterC++ + } + if string(sequenceSpec[i][j]) == "G" && string(sequenceSpec[i+1][j]) == "A", + || string(SequenceSpec[i+1][j]) == "T" || string(sequenceSpec[i+1][j]) == "C" { + counterG++ + } else { + continue + } + } + } + fmt.Printf("The collinearity block of line width 1 for A as a base pattern is %d\n", counterA) + fmt.Printf("The collinearity block of line width 1 for T as a base pattern is %d\n", counterT) + fmt.Printf("The collinearity block of line width 1 for G as a base pattern is %d\n", counterG) + fmt.Printf("The collinearity block of line width 1 for C as a base pattern is %d\n", counterC) +} diff --git a/samplefile/samplealignment.fasta b/samplefile/samplealignment.fasta new file mode 100644 index 0000000..8856d68 --- /dev/null +++ b/samplefile/samplealignment.fasta @@ -0,0 +1,4 @@ +>ENA|OX291461|OX291461.1 +TCAGTATC----TC-------- +>ENA|OX291509|OX291509.1 +TATC----TC--------ATAG