Skip to content

Commit

Permalink
Fix search top result
Browse files Browse the repository at this point in the history
  • Loading branch information
JoMingyu committed Jan 29, 2024
1 parent 53ef664 commit 8eab212
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 27 deletions.
34 changes: 30 additions & 4 deletions google_play_scraper/constants/element.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from datetime import datetime
from typing import Any, Callable, List, Optional

from google_play_scraper.constants.regex import Regex
from google_play_scraper.utils import nested_lookup
from google_play_scraper.utils.data_processors import unescape_text

Expand Down Expand Up @@ -189,11 +188,38 @@ class ElementSpecs:
"appVersion": ElementSpec(None, [10]),
}

Permission_Type = ElementSpec(None, [0])
Permission_List = ElementSpec(
PermissionType = ElementSpec(None, [0])

PermissionList = ElementSpec(
None, [2], lambda container: sorted([item[1] for item in container])
)
Searchresult = {

SearchResultOnTop = {
"appId": ElementSpec(None, [11, 0, 0]),
"icon": ElementSpec(None, [2, 95, 0, 3, 2]),
"screenshots": ElementSpec(
None,
[2, 78, 0],
lambda container: [item[3][2] for item in container],
[],
),
"title": ElementSpec(None, [2, 0, 0]),
"score": ElementSpec(None, [2, 51, 0, 1]),
"genre": ElementSpec(None, [2, 79, 0, 0, 0]),
"price": ElementSpec(
None, [2, 57, 0, 0, 0, 0, 1, 0, 0], lambda price: (price / 1000000) or 0
),
"free": ElementSpec(None, [2, 57, 0, 0, 0, 0, 1, 0, 0], lambda s: s == 0),
"currency": ElementSpec(None, [2, 57, 0, 0, 0, 0, 1, 0, 1]),
"video": ElementSpec(None, [2, 100, 0, 0, 3, 2]),
"videoImage": ElementSpec(None, [2, 100, 1, 0, 3, 2]),
"description": ElementSpec(None, [2, 72, 0, 1], unescape_text),
"descriptionHTML": ElementSpec(None, [2, 72, 0, 1]),
"developer": ElementSpec(None, [2, 68, 0]),
"installs": ElementSpec(None, [2, 13, 0]),
}

SearchResult = {
"appId": ElementSpec(None, [0, 0, 0]),
"icon": ElementSpec(None, [0, 1, 3, 2]),
"screenshots": ElementSpec(
Expand Down
12 changes: 2 additions & 10 deletions google_play_scraper/features/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,9 @@ def parse_dom(dom: str, app_id: str, url: str) -> Dict[str, Any]:
result = {}

for k, spec in ElementSpecs.Detail.items():
if isinstance(spec, list):
for sub_spec in spec:
content = sub_spec.extract_content(dataset)
content = spec.extract_content(dataset)

if content is not None:
result[k] = content
break
else:
content = spec.extract_content(dataset)

result[k] = content
result[k] = content

result["appId"] = app_id
result["url"] = url
Expand Down
4 changes: 2 additions & 2 deletions google_play_scraper/features/permissions.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def permissions(app_id: str, lang: str = "en", country: str = "us") -> Dict[str,
for permission in permission_items:
if permission:
result[
ElementSpecs.Permission_Type.extract_content(permission)
] = ElementSpecs.Permission_List.extract_content(permission)
ElementSpecs.PermissionType.extract_content(permission)
] = ElementSpecs.PermissionList.extract_content(permission)

return result
42 changes: 33 additions & 9 deletions google_play_scraper/features/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
def search(
query: str, n_hits: int = 30, lang: str = "en", country: str = "us"
) -> List[Dict[str, Any]]:
if n_hits <= 0:
return []

query = quote(query)
url = Formats.Searchresults.build(query=query, lang=lang, country=country)
try:
Expand All @@ -20,15 +23,24 @@ def search(
url = Formats.Searchresults.fallback_build(query=query, lang=lang)
dom = get(url)

matches = Regex.SCRIPT.findall(dom) #take out script blocks from dom
matches = Regex.SCRIPT.findall(dom) # take out script blocks from dom

dataset = {}

dataset = {key: json.loads(value) for match, key, value in zip(matches, Regex.KEY.findall(match), Regex.VALUE.findall(match)) if key and value}
"""
This is to create a dictionary "dataset" that would combine key-value pairs for each match obtained from matches under the condition that the key and value are non-empty.
The matches variable is a list of match objects returned by the Regex.SCRIPT.findall() function.
"""
for match in matches:
key_match = Regex.KEY.findall(match)
value_match = Regex.VALUE.findall(match)

if key_match and value_match:
key = key_match[0]
value = json.loads(value_match[0])

dataset[key] = value

try:
top_result = dataset["ds:4"][0][1][0][23][16]
except IndexError:
top_result = None

success = False
# different idx for different countries and languages
Expand All @@ -42,12 +54,24 @@ def search(
return []

n_apps = min(len(dataset), n_hits)
search_results = []
for app_idx in range(n_apps):

search_results = (
[
{
k: spec.extract_content(top_result)
for k, spec in ElementSpecs.SearchResultOnTop.items()
}
]
if top_result
else []
)

for app_idx in range(n_apps - len(search_results)):
app = {}
for k, spec in ElementSpecs.Searchresult.items():
for k, spec in ElementSpecs.SearchResult.items():
content = spec.extract_content(dataset[app_idx])
app[k] = content

search_results.append(app)

return search_results
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "google-play-scraper"
version = "1.2.4"
version = "1.2.5"
description = "Google-Play-Scraper provides APIs to easily crawl the Google Play Store for Python without any external dependencies!"
authors = ["JoMingyu <[email protected]>"]
license = "MIT"
Expand Down
1 change: 1 addition & 0 deletions tests/e2e_tests/test_permissions.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def test_reply_data_only_other_type(self):
"control vibration",
"full network access",
"run at startup",
"prevent device from sleeping",
"view network connections",
],
},
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e_tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def test_e2e_scenario_1(self):
self.assertEqual("Niantic, Inc.", result["developer"])
self.assertEqual("Adventure", result["genre"])
self.assertEqual(
"https://play-lh.googleusercontent.com/wAWerkEu_g2_BMCl85WKqN2mxn0xW1O22nV6yJOayrMKu9pqtrLMn7S2Zd1xaykKm0g",
"https://play-lh.googleusercontent.com/3UpKaqsS-3LDEQJqoNLXkj61eiA-_-h77heP22dYOy-WR4PSha3O_tPK57w4wZ4jIXII",
result["icon"],
)
self.assertTrue(result["screenshots"])
Expand Down

0 comments on commit 8eab212

Please sign in to comment.