Skip to content

Commit

Permalink
[go] fix owner resolution logic
Browse files Browse the repository at this point in the history
**Summary**

When a blog post has a title with multiple names, the owner has to be resolved with the name that appears last.

And also the theme name should be the source of truth for the owner name.

**Test**

- go test

**Issue**

- fix #71
  • Loading branch information
yssk22 committed Jun 24, 2024
1 parent 5aa1761 commit f7d957f
Show file tree
Hide file tree
Showing 24 changed files with 513 additions and 1 deletion.
16 changes: 15 additions & 1 deletion go/service/helloproject/ameblo/ameblo.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"context"
"fmt"
"net/url"
"strings"

"github.com/spf13/cobra"
"github.com/yssk22/hpapp/go/foundation/assert"
Expand Down Expand Up @@ -328,18 +329,31 @@ func (s *amebloService) getTaggedMembersFromText(ctx context.Context, post *post
members := s.tagger.GetTaggedMembers(ctx, bytes.NewBufferString(post.EntryTitle))
if len(members) > 0 {
taggedMembers = append(taggedMembers, members...)
owner = members[len(members)-1]
// note that members slice doesn't assure the order so we need to find the owner who appears last
// by checking the position of the name in the title.
ownerPresentAt := -1
for _, m := range members {
at := strings.Index(post.EntryTitle, m.Name)
if at >= ownerPresentAt {
ownerPresentAt = at
owner = m
}
}
}
members = s.tagger.GetTaggedMembers(ctx, bytes.NewBufferString(post.ThemeName))
if len(members) > 0 {
taggedMembers = append(taggedMembers, members...)
// we cannot override since there is a case that name in title is the correct while the name in theme is not.
// example: https://ameblo.jp/juicejuice-official/entry-11599064407.html
if owner == nil {
owner = members[0]
}
}
members = s.tagger.GetTaggedMembers(ctx, bytes.NewBufferString(post.EntryText))
if len(members) > 0 {
taggedMembers = append(taggedMembers, members...)
// we are not sure if this doesn't create an issue but at least there is a post without name in Title and Theme.
// example: https://ameblo.jp/morningm-13ki/entry-12340773412.html
if owner == nil {
owner = members[0]
}
Expand Down
12 changes: 12 additions & 0 deletions go/service/helloproject/ameblo/ameblo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,18 @@ func TestAmeblo(t *testing.T) {
post, err := s.crawl(ctx, "https://ameblo.jp/morningmusume-10ki/entry-12540780771.html", true)
a.Nil(err)
a.Equals(post.QueryOwnerMember().FirstIDX(ctx), member.ID)

// Issue #71
post, err = s.crawl(ctx, "https://ameblo.jp/mm-12ki/entry-12857225326.html", true)
a.Nil(err)
member = post.QueryOwnerMember().OnlyX(ctx)
a.Equals("野中美希", member.Name)

post, err = s.crawl(ctx, "https://ameblo.jp/mm-12ki/entry-12856792074.html", true)
a.Nil(err)
member = post.QueryOwnerMember().OnlyX(ctx)
a.Equals("牧野真莉愛", member.Name)

})

test.New("Conflict between Theme and EntryTitle", test.WithHPMaster(), test.WithFixedTimestamp()).Run(t, func(ctx context.Context, tt *testing.T) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"data":{"12856792074":{"commentCnt":58,"reblogCnt":6,"iineCnt":695}},"meta":{}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
{
"status_code": 200,
"status": "200 OK",
"header": {
"Accept-Ch": [
"Sec-CH-UA-Full"
],
"Accept-Ranges": [
"bytes"
],
"Age": [
"0"
],
"Cache-Control": [
"max-age=20, stale-while-revalidate=60"
],
"Content-Security-Policy": [
"upgrade-insecure-requests"
],
"Content-Type": [
"application/json; charset=utf-8"
],
"Date": [
"Mon, 24 Jun 2024 03:34:01 GMT"
],
"Etag": [
"W/\"50-S9x5KMnST61kHXiBf8N8NqoEbAI\""
],
"Origin-Trial": [
"AgkzE0SdPqccfaG9xo/nbroJVd9BqLU0uZCCQbV46yVUMHh4r8RDCAWLKP8BgMPiXpQLpE/b2F9w+bJKwTj9Zg8AAAB1eyJvcmlnaW4iOiJodHRwczovL2FtZWJsby5qcDo0NDMiLCJmZWF0dXJlIjoiU2VuZEZ1bGxVc2VyQWdlbnRBZnRlclJlZHVjdGlvbiIsImV4cGlyeSI6MTY4NDg4NjM5OSwiaXNTdWJkb21haW4iOnRydWV9"
],
"Permissions-Policy": [
"ch-ua-full=*"
],
"Server-Timing": [
"total; dur=8.956336; desc=\"Total Response Time\"",
"MISS-CLUSTER, fastly;desc=\"Edge time\";dur=17"
],
"Timing-Allow-Origin": [
"*"
],
"Vary": [
"Origin,Accept-Encoding,X-Loading"
],
"X-Cdn-Ttl": [
"86400.000"
],
"X-Content-Type-Options": [
"nosniff"
],
"X-Envoy-Decorator-Operation": [
"public-web.blog.svc.cluster.local:8080/*"
],
"X-Envoy-Upstream-Service-Time": [
"9"
],
"X-Is-Default-Ttl": [
"false"
],
"X-Loading": [
"?0"
],
"X-Timer": [
"S1719200041.151928,VS0,VE17"
],
"X-User-Agent": [
"desktop"
]
},
"content_path": "testdata/httpsnapshot/ameblo.jp/_api/blogEntryReactions;amebaId=mm-12ki;blogId=10049636311;entryIds=12856792074?returnMeta=true"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"data":{"12857225326":{"commentCnt":109,"reblogCnt":5,"iineCnt":410}},"meta":{}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
{
"status_code": 200,
"status": "200 OK",
"header": {
"Accept-Ch": [
"Sec-CH-UA-Full"
],
"Accept-Ranges": [
"bytes"
],
"Age": [
"0"
],
"Cache-Control": [
"max-age=20, stale-while-revalidate=60"
],
"Content-Security-Policy": [
"upgrade-insecure-requests"
],
"Content-Type": [
"application/json; charset=utf-8"
],
"Date": [
"Mon, 24 Jun 2024 03:15:50 GMT"
],
"Etag": [
"W/\"51-D6ygumd95SOIGKjL26x1aIrsHIk\""
],
"Origin-Trial": [
"AgkzE0SdPqccfaG9xo/nbroJVd9BqLU0uZCCQbV46yVUMHh4r8RDCAWLKP8BgMPiXpQLpE/b2F9w+bJKwTj9Zg8AAAB1eyJvcmlnaW4iOiJodHRwczovL2FtZWJsby5qcDo0NDMiLCJmZWF0dXJlIjoiU2VuZEZ1bGxVc2VyQWdlbnRBZnRlclJlZHVjdGlvbiIsImV4cGlyeSI6MTY4NDg4NjM5OSwiaXNTdWJkb21haW4iOnRydWV9"
],
"Permissions-Policy": [
"ch-ua-full=*"
],
"Server-Timing": [
"total; dur=7.492946; desc=\"Total Response Time\"",
"MISS-CLUSTER, fastly;desc=\"Edge time\";dur=18"
],
"Timing-Allow-Origin": [
"*"
],
"Vary": [
"Origin,Accept-Encoding,X-Loading"
],
"X-Cdn-Ttl": [
"86400.000"
],
"X-Content-Type-Options": [
"nosniff"
],
"X-Envoy-Decorator-Operation": [
"public-web.blog.svc.cluster.local:8080/*"
],
"X-Envoy-Upstream-Service-Time": [
"11"
],
"X-Is-Default-Ttl": [
"false"
],
"X-Loading": [
"?0"
],
"X-Timer": [
"S1719198951.536664,VS0,VE18"
],
"X-User-Agent": [
"desktop"
]
},
"content_path": "testdata/httpsnapshot/ameblo.jp/_api/blogEntryReactions;amebaId=mm-12ki;blogId=10049636311;entryIds=12857225326?returnMeta=true"
}

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
{
"status_code": 200,
"status": "200 OK",
"header": {
"Accept-Ch": [
"Sec-CH-UA-Full"
],
"Accept-Ranges": [
"bytes"
],
"Age": [
"0"
],
"Cache-Control": [
"max-age=5"
],
"Content-Security-Policy": [
"upgrade-insecure-requests"
],
"Content-Type": [
"text/html; charset=utf-8"
],
"Date": [
"Mon, 24 Jun 2024 03:34:01 GMT"
],
"Etag": [
"W/\"1b684-Up3LbGlNahEpqGRxLFlArz8sOSU\""
],
"Origin-Trial": [
"AgkzE0SdPqccfaG9xo/nbroJVd9BqLU0uZCCQbV46yVUMHh4r8RDCAWLKP8BgMPiXpQLpE/b2F9w+bJKwTj9Zg8AAAB1eyJvcmlnaW4iOiJodHRwczovL2FtZWJsby5qcDo0NDMiLCJmZWF0dXJlIjoiU2VuZEZ1bGxVc2VyQWdlbnRBZnRlclJlZHVjdGlvbiIsImV4cGlyeSI6MTY4NDg4NjM5OSwiaXNTdWJkb21haW4iOnRydWV9"
],
"Permissions-Policy": [
"ch-ua-full=*"
],
"Server-Timing": [
"data; dur=64, rendering; dur=10, total; dur=76.763746; desc=\"Total Response Time\"",
"MISS-CLUSTER, fastly;desc=\"Edge time\";dur=129"
],
"Timing-Allow-Origin": [
"*"
],
"Vary": [
"Origin,Accept-Encoding,X-User-Agent,X-Loading"
],
"X-Cdn-Ttl": [
"86400.000"
],
"X-Content-Type-Options": [
"nosniff"
],
"X-Envoy-Decorator-Operation": [
"public-web.blog.svc.cluster.local:8080/*"
],
"X-Envoy-Upstream-Service-Time": [
"78"
],
"X-Is-Default-Ttl": [
"false"
],
"X-Loading": [
"?0"
],
"X-Timer": [
"S1719200041.007545,VS0,VE129"
],
"X-User-Agent": [
"desktop"
]
},
"content_path": "testdata/httpsnapshot/ameblo.jp/mm-12ki/entry-12856792074.html"
}

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
{
"status_code": 200,
"status": "200 OK",
"header": {
"Accept-Ch": [
"Sec-CH-UA-Full"
],
"Accept-Ranges": [
"bytes"
],
"Age": [
"0"
],
"Cache-Control": [
"max-age=5"
],
"Content-Security-Policy": [
"upgrade-insecure-requests"
],
"Content-Type": [
"text/html; charset=utf-8"
],
"Date": [
"Mon, 24 Jun 2024 03:15:50 GMT"
],
"Etag": [
"W/\"3320a-mJpdqAiUEzRM2htfU8V9fF7HGYs\""
],
"Origin-Trial": [
"AgkzE0SdPqccfaG9xo/nbroJVd9BqLU0uZCCQbV46yVUMHh4r8RDCAWLKP8BgMPiXpQLpE/b2F9w+bJKwTj9Zg8AAAB1eyJvcmlnaW4iOiJodHRwczovL2FtZWJsby5qcDo0NDMiLCJmZWF0dXJlIjoiU2VuZEZ1bGxVc2VyQWdlbnRBZnRlclJlZHVjdGlvbiIsImV4cGlyeSI6MTY4NDg4NjM5OSwiaXNTdWJkb21haW4iOnRydWV9"
],
"Permissions-Policy": [
"ch-ua-full=*"
],
"Server-Timing": [
"data; dur=192, rendering; dur=14, total; dur=216.21336399999998; desc=\"Total Response Time\"",
"MISS-CLUSTER, fastly;desc=\"Edge time\";dur=244"
],
"Timing-Allow-Origin": [
"*"
],
"Vary": [
"Origin,Accept-Encoding,X-User-Agent,X-Loading"
],
"X-Cdn-Ttl": [
"86400.000"
],
"X-Content-Type-Options": [
"nosniff"
],
"X-Envoy-Decorator-Operation": [
"public-web.blog.svc.cluster.local:8080/*"
],
"X-Envoy-Upstream-Service-Time": [
"229"
],
"X-Is-Default-Ttl": [
"false"
],
"X-Loading": [
"?0"
],
"X-Timer": [
"S1719198950.271865,VS0,VE244"
],
"X-User-Agent": [
"desktop"
]
},
"content_path": "testdata/httpsnapshot/ameblo.jp/mm-12ki/entry-12857225326.html"
}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit f7d957f

Please sign in to comment.