Skip to content

Commit

Permalink
Added the similar feature and relevant additions in element.py and re…
Browse files Browse the repository at this point in the history
…quest.py
  • Loading branch information
bolithium committed Jan 7, 2025
1 parent ce1df6d commit 878590d
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 1 deletion.
1 change: 1 addition & 0 deletions google_play_scraper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
from .features.permissions import permissions # noqa: F401
from .features.reviews import reviews, reviews_all # noqa: F401
from .features.search import search # noqa: F401
from .features.similar import similar
11 changes: 11 additions & 0 deletions google_play_scraper/constants/element.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,3 +240,14 @@ class ElementSpecs:
"developer": ElementSpec(None, [0, 14]),
"installs": ElementSpec(None, [0, 15]),
}

Similar = {
"appId": ElementSpec(None, [0, 0]),
"title": ElementSpec(None,[3]),
"description": ElementSpec(None,[13, 1],unescape_text),
"developer": ElementSpec(None, [14]),
"currency": ElementSpec(None, [8, 1, 0, 1]),
"price": ElementSpec(None, [8, 1, 0, 0], lambda price: (price / 1000000) or 0),
"score": ElementSpec(None, [4, 1]),
"scoreText": ElementSpec(None, [4, 0])
}
30 changes: 30 additions & 0 deletions google_play_scraper/constants/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,35 @@ def fallback_build(self, app_id: str, lang: str) -> str:
def build_body(self, *args):
return None

class _Similar(Format):
URL_FORMAT = (
"{}/_/PlayStoreUi/data/batchexecute?rpcids=CLXjtf%2CA6yuRe%2CWs7gDc%2CZittHe%2Cag2B9c%2Ce7uDs%2CoCPfdb&source-path=%2Fstore%2Fapps%2Fdetails&f.sid=-6820885803383552233&bl=boq_playuiserver_20241007.10_p0&hl=en-US&authuser&soc-app=121&soc-platform=1&soc-device=1&_reqid=575098&rt=c".format(
PLAY_STORE_BASE_URL
)
)

URL_SECOND_FORMAT = (
"{}/store/apps/collection/cluster?gsr={{gsr}}".format(
PLAY_STORE_BASE_URL
)
)

def build(self) -> str:
return self.URL_FORMAT.format()

PAYLOAD_FORMAT = "f.req=%5B%5B%5B%22CLXjtf%22%2C%22%5B%5B%5C%22{app_id}%5C%22%2C7%5D%5D%22%2Cnull%2C%221%22%5D%2C%5B%22A6yuRe%22%2C%22%5B%5B%5C%22{app_id}%5C%22%2C7%5D%5D%22%2Cnull%2C%223%22%5D%2C%5B%22Ws7gDc%22%2C%22%5Bnull%2Cnull%2C%5B%5B1%2C9%2C10%2C11%2C13%2C14%2C19%2C20%2C38%2C43%2C47%2C49%2C52%2C58%2C59%2C63%2C69%2C70%2C73%2C74%2C75%2C78%2C79%2C80%2C91%2C92%2C95%2C96%2C97%2C100%2C101%2C103%2C106%2C112%2C119%2C129%2C137%2C139%2C141%2C145%2C146%2C151%2C155%2C169%5D%5D%2C%5B%5B%5B1%2Cnull%2C1%5D%2Cnull%2C%5B%5B%5B%5D%5D%5D%2Cnull%2Cnull%2Cnull%2Cnull%2C%5Bnull%2C2%5D%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2C%5B1%5D%5D%2C%5Bnull%2C%5B%5B%5B%5D%5D%5D%2Cnull%2Cnull%2C%5B1%5D%5D%2C%5Bnull%2C%5B%5B%5B%5D%5D%5D%2Cnull%2C%5B1%5D%5D%2C%5Bnull%2C%5B%5B%5B%5D%5D%5D%5D%2Cnull%2Cnull%2Cnull%2Cnull%2C%5B%5B%5B%5B%5D%5D%5D%5D%2C%5B%5B%5B%5B%5D%5D%5D%5D%5D%2Cnull%2C%5B%5B%5C%22{app_id}%5C%22%2C7%5D%5D%5D%22%2Cnull%2C%225%22%5D%2C%5B%22ZittHe%22%2C%22%5B%5Bnull%2C%5B%5B3%2C%5B10%5D%5D%2Cnull%2Cnull%2C%5B184%5D%5D%2C%5B%5C%22{app_id}%5C%22%2C7%5D%5D%5D%22%2Cnull%2C%227%22%5D%2C%5B%22ag2B9c%22%2C%22%5B%5Bnull%2C%5B%5C%22{app_id}%5C%22%2C7%5D%2Cnull%2C%5B%5B3%2C%5B6%5D%5D%2Cnull%2Cnull%2C%5B1%2C8%5D%5D%5D%2C%5B1%5D%5D%22%2Cnull%2C%229%22%5D%2C%5B%22e7uDs%22%2C%22%5B%5B%5C%22{app_id}%5C%22%2C7%5D%5D%22%2Cnull%2C%2211%22%5D%2C%5B%22Ws7gDc%22%2C%22%5Bnull%2Cnull%2C%5B%5B52%5D%5D%2C%5B%5Bnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2C%5B2%5D%5D%5D%2Cnull%2C%5B%5B%5C%22{app_id}%5C%22%2C7%5D%5D%5D%22%2Cnull%2C%2213%22%5D%2C%5B%22oCPfdb%22%2C%22%5Bnull%2C%5B2%2Cnull%2C%5B20%5D%2Cnull%2C%5Bnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2C2%5D%5D%2C%5B%5C%22{app_id}%5C%22%2C7%5D%5D%22%2Cnull%2C%2215%22%5D%5D%5D&"

def build_body(
self,
app_id: str
) -> bytes:
result = self.PAYLOAD_FORMAT.format(app_id=app_id)
return result.encode()

def build_second(self,gsr: str) -> str:
return self.URL_SECOND_FORMAT.format(gsr=gsr)


class _Reviews(Format):
URL_FORMAT = (
"{}/_/PlayStoreUi/data/batchexecute?hl={{lang}}&gl={{country}}".format(
Expand Down Expand Up @@ -113,4 +142,5 @@ def build_body(self, *args):
Detail = _Detail()
Reviews = _Reviews()
Permissions = _Permissions()
Similar = _Similar()
Searchresults = _Searchresults()
2 changes: 1 addition & 1 deletion google_play_scraper/features/reviews.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,4 +172,4 @@ def reviews_all(app_id: str, sleep_milliseconds: int = 0, **kwargs) -> list:
if sleep_milliseconds:
sleep(sleep_milliseconds / 1000)

return result
return result
65 changes: 65 additions & 0 deletions google_play_scraper/features/similar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import json
from typing import Any, Dict, List
from urllib.parse import quote
import re
import codecs

from google_play_scraper.constants.element import ElementSpecs
from google_play_scraper.constants.regex import Regex
from google_play_scraper.constants.request import Formats
from google_play_scraper.exceptions import NotFoundError
from google_play_scraper.utils.request import post, get

def similar(
app_id: str,
lang: str = "en",
country: str = "us",
n_hits: int = 30
):

# first request finds the gsr query parameter for the supplied application
url = Formats.Similar.build()
dom = post(
url,
Formats.Similar.build_body(
app_id
),
{"content-type": "application/x-www-form-urlencoded"}
)

result = re.findall(r"cluster\?gsr[^\"']+",dom)[0].rstrip('\\')
cluster = codecs.decode(result.replace('\\\\','\\'),'unicode_escape')
gsr = re.findall(r"gsr=([^&]+)", cluster)[0]

# second request uses the gsr query parameter against the /cluster endpoint
url = Formats.Similar.build_second(gsr)
dom = get(url)
matches = Regex.SCRIPT.findall(dom) # take out script blocks from dom

dataset = {}
for match in matches:
key_match = Regex.KEY.findall(match)
value_match = Regex.VALUE.findall(match)

if key_match and value_match:
key = key_match[0]
value = json.loads(value_match[0])

dataset[key] = value

dataset = dataset["ds:3"][0][1][0][21][0]
n_apps = min(len(dataset), n_hits)

search_results = []

for app_idx in range(n_apps):
app = {}
for k, spec in ElementSpecs.Similar.items():
content = spec.extract_content(dataset[app_idx])
app[k] = content

search_results.append(app)

return search_results

return

0 comments on commit 878590d

Please sign in to comment.