-
Notifications
You must be signed in to change notification settings - Fork 0
/
crawl_disquiet.py
57 lines (45 loc) · 1.55 KB
/
crawl_disquiet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import requests
import json
from pathlib import Path
from typing import Union
def get_query(path: Union[str, Path]):
with open(path) as f:
lines = f.readlines()
return "".join(lines)
def get_disuqiet_posts(offset=0, limit=10, sortOption="popular", type="product"):
url = "https://api.disquiet.io/graphql"
headers = {"content-type": "application/json"}
payload = {
"operationName": "GetPosts",
"query": get_query("./query.gql"),
"variables": {
"offset": offset,
"limit": limit,
"sortOption": sortOption,
"type": type,
},
}
response = requests.post(url, headers=headers, data=json.dumps(payload))
data = response.json()
return data["data"]["posts"]
def parse_posts_to_md(posts):
md_text = ""
for post in posts:
typename = post.get("__typename", None)
if typename == "Product":
name = post.get("name", "제목이 없습니다.")
tagline = post.get("tagline", "설명이 없습니다.")
topics = ",".join(
[topic["display_name"] for topic in post.get("topics", [])]
)
url_slug = post.get("url_slug", "")
url = "https://disquiet.io/product/" + url_slug
else:
continue
content = f"<div><a href='{url}'><h3>{name}</h3><div>{tagline}</div><div>{topics}</div></a></div>"
md_text += content
return md_text
if __name__ == "__main__":
posts = get_disuqiet_posts()
md = parse_posts_to_md(posts)
print(md)