-
Notifications
You must be signed in to change notification settings - Fork 0
/
remove_noise.py
75 lines (58 loc) · 2.55 KB
/
remove_noise.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
def simplify_comma_separated_string(text):
# The issue is SteamSpy provides the list of developers and publishers as a string, in which commas are separators.
# One attempt to deal with this issue is simplify_string(), but it is not satisfactory.
# This leads to wrong hyperlinks when displaying the ranking:
# - correct link: https://store.steampowered.com/search/?developer=CAPCOM%20CO.%2C%20LTD.
# - wrong link: https://store.steampowered.com/search/?developer=CAPCOM%20Co.
# - trade-off: https://store.steampowered.com/search/?term=CAPCOM%20Co.
# TODO deal with developers and publishers whose names contain a comma,
# e.g. 'CAPCOM CO., LTD.' should not become 'CAPCOM CO.'
return text
def simplify_string(text):
# Strings with commas which are not used as separators
text = text.replace(', INC', ' INC')
text = text.replace(', Inc', ' Inc')
text = text.replace(', LLC', ' LLC')
text = text.replace(', Ltd', ' Ltd')
text = text.replace(', S.L.', ' S.L.')
text = text.replace(', a.s.', ' a.s.')
text = text.replace(', inc', ' inc')
text = text.replace(', s.r.o.', ' s.r.o.')
text = text.replace('CO.,', 'CO.')
text = text.replace('Co.,', 'Co.')
text = text.replace('Oh, ', 'Oh ')
text = text.replace('co.,', 'co.')
# Strings with unnecessary information, which would lead to the same dev appearing under different names
text = text.replace(' - ', ' ')
text = text.replace(' and ', ' ')
text = text.replace('&', '')
text = text.replace('/', ' ')
text = text.replace('amp;', '')
text = text.replace('(Mac', '')
text = text.replace('Linux)', '')
text = text.replace('Linux, ', ' ')
text = text.replace('Mac, ', ' ')
text = text.replace('PC Port', '')
text = text.replace('Windows Update', '')
text = text.replace('(Developments)', '')
text = text.replace('(Linux)', '')
text = text.replace('(Mac)', '')
text = text.replace('(Some Models)', '')
text = text.replace('(art)', '')
text = text.replace('(co-designer)', '')
text = text.replace('(creator)', '')
text = text.replace('(dev)', '')
text = text.replace('(original release)', '')
text = text.replace('(', '')
text = text.replace(')', '')
while ' ' in text:
text = text.replace(' ', ' ')
return text
def main():
# noinspection SpellCheckingInspection
input_text = 'Konami Digital Entertainment Co., Ltd'
simplified_text = simplify_string(input_text)
print(simplified_text)
return True
if __name__ == '__main__':
main()