diff --git a/.github/workflows/create-summary.yml b/.github/workflows/create-summary.yml new file mode 100644 index 0000000..51af786 --- /dev/null +++ b/.github/workflows/create-summary.yml @@ -0,0 +1,137 @@ +name: Create Summary + +on: + schedule: + - cron: '0 7 * * 1' + workflow_dispatch: + inputs: + year: + description: 'Year to create the summary of' + required: true + week: + description: 'ISO week to create the summary of' + required: true + +jobs: + publish: + runs-on: ubuntu-latest + + name: Create Summary + + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Install rclone + shell: bash + run: | + curl -sL https://rclone.org/install.sh | sudo bash + + rclone config create --no-obscure openttd s3 \ + provider Cloudflare \ + access_key_id ${{ secrets.R2_SURVEY_ACCESS_KEY_ID }} \ + secret_access_key ${{ secrets.R2_SURVEY_SECRET_ACCESS_KEY }} \ + endpoint ${{ secrets.R2_SURVEY_ENDPOINT }} \ + acl private \ + no_check_bucket true + + - name: Calculate dates + shell: bash + id: dates + run: | + year="${{ inputs.year }}" + week="${{ inputs.week }}" + + # If no year/week is given, detect the week before today. + # This assumes this script is started on a Monday. + if [ -z "${year}" ] || [ -z "${week}"]; then + year=$(date -d "yesterday" +%G) + week=$(date -d "yesterday" +%V) + fi + + # Calculate the start and end of the week. + day_in_week=$(date -d "${year}-01-01" +%u) + if [ "${day_in_week}" -lt 5 ]; then + first_monday=$(date -d "${year}-01-01 -${day_in_week} days +1 day" +%Y-%m-%d) + else + first_monday=$(date -d "${year}-01-01 -${day_in_week} days +8 day" +%Y-%m-%d) + fi + start_date=$(date -d "${first_monday} +${week} weeks -1 week" +%Y-%m-%d) + end_date=$(date -d "${start_date} +6 days" +%Y-%m-%d) + + # Ensure the start date is actually in the week we want. + start_check=$(date -d "${start_date}" +%G-%V) + if [ "${start_check}" != "${year}-${week}" ]; then + echo "Start date ${start_date} is not in week ${week} of ${year}, but in ${start_check}" + exit 1 + fi + + # Ensure the end date is actually in the week we want. + end_check=$(date -d "${end_date}" +%G-%V) + if [ "${end_check}" != "${year}-${week}" ]; then + echo "End date ${end_date} is not in week ${week} of ${year}, but in ${end_check}" + exit 1 + fi + + if [ "${week}" -lt 10 ]; then + week="0${week}" + fi + + echo "Week: ${week}" + echo "Year: ${year}" + echo "Start date: ${start_date}" + echo "End date: ${end_date}" + + echo "week=${week}" >> "$GITHUB_OUTPUT" + echo "year=${year}" >> "$GITHUB_OUTPUT" + echo "start_date=${start_date}" >> "$GITHUB_OUTPUT" + echo "end_date=${end_date}" >> "$GITHUB_OUTPUT" + + - name: Download packed results + shell: bash + run: | + echo "Downloading packs for week ${{ steps.dates.outputs.week }} in ${{ steps.dates.outputs.year }}: [${{ steps.dates.outputs.start_date }} .. ${{ steps.dates.outputs.end_date }}]" + + mkdir -p packed + for i in $(seq 0 6); do + date=$(date -d "${start_date} +${i} days" +%Y-%m-%d) + date_year=$(date -d "${start_date} +${i} days" +%Y) + date_month=$(date -d "${start_date} +${i} days" +%m) + + echo "Downloading ${date}" + rclone copy -v openttd:survey-packed-prod/${date_year}/${date_month}/openttd-survey-pack.${date}.tar.xz packed + done + + - name: Run analysis + shell: bash + run: | + mkdir -p _data/summaries/${{ steps.dates.outputs.year }} + python -m analysis packed/* > _data/summaries/${{ steps.dates.outputs.year }}/wk${{ steps.dates.outputs.week }}.json + + - name: Create summary entry + shell: bash + run: | + mkdir -p _summaries/${{ steps.dates.outputs.year }} + summary="_summaries/${{ steps.dates.outputs.year }}/wk${{ steps.dates.outputs.week }}.md" + + echo "---" > ${summary} + echo "name: ${{ steps.dates.outputs.year }} - Week ${{ steps.dates.outputs.week }}" >> ${summary} + echo "title: Survey Result Summary - ${{ steps.dates.outputs.year }} - Week ${{ steps.dates.outputs.week }}" + echo "active_nav: summaries" >> ${summary} + echo "year: \"${{ steps.dates.outputs.year }}\"" >> ${summary} + echo "week: wk${{ steps.dates.outputs.week }}" >> ${summary} + echo "start_date: \"${{ steps.dates.outputs.start_date }}\"" >> ${summary} + echo "end_date: \"${{ steps.dates.outputs.end_date }}\"" >> ${summary} + echo "---" >> ${summary} + + - name: Commit and push + shell: bash + run: | + git config --global user.name "OpenTTD Survey" + git config --global user.email "survey@openttd.org" + + git add _data/summaries/${{ steps.dates.outputs.year }}/wk${{ steps.dates.outputs.week }}.json + git add _summaries/${{ steps.dates.outputs.year }}/wk${{ steps.dates.outputs.week }}.md + + git commit -m "Add: summary for week ${{ steps.dates.outputs.week }} of ${{ steps.dates.outputs.year }}" + git push diff --git a/README.md b/README.md index 19c1741..8777a95 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,12 @@ This is a [Jekyll](https://jekyllrb.com/) website, and is served by nginx as a s ## Development +### Survey results + +To summarize survey results, the Python application `analysis` processes a bundle of JSONs and outputs another JSON with the summary. + +To run it, simply execute `python3 -m analysis ` + ### Running a local server If you do not want to run a server, but just build the current site, replace `serve` with `build` in the examples below. diff --git a/_config.yml b/_config.yml index a89693c..649b3f7 100644 --- a/_config.yml +++ b/_config.yml @@ -23,10 +23,10 @@ defaults: layout: "default" - scope: path: "" - type: "analysis" + type: "summaries" values: - layout: "analysis" + layout: "summaries" collections: - analysis: + summaries: output: true diff --git a/_layouts/default.html b/_layouts/default.html index b7d56f3..4faa12e 100644 --- a/_layouts/default.html +++ b/_layouts/default.html @@ -25,8 +25,8 @@
  • Participate
  • -
  • - Results +
  • + Summaries
  • diff --git a/_layouts/summaries.html b/_layouts/summaries.html new file mode 100644 index 0000000..27f11fb --- /dev/null +++ b/_layouts/summaries.html @@ -0,0 +1,83 @@ +--- +layout: default +--- + +
    +
    +

    Survey Result Summary - {{ page.name }}

    +
    +
    +
    +

    + Below is the summary of the survey results from {{ page.start_date }} to {{ page.end_date }}. +

    + +
      + {% for version in site.data.summaries[page.year][page.week] %} +
    • + {{ version[0] }} +
    • + {% endfor %} +
    + +

    + The following statistics are based on "seconds played". + That is to say, if a setting was "true" for 100 seconds of play-time and "false" for 200 seconds of play-time, it would be 33% true and 66% false. + This is to avoid biasing the results towards shorter games. +
    + Worded differently, the following statistics are not counting how often something was used, but how long it was used for. +

    + +

    + Be mindful that these numbers are only based on those people willing to send us survey results. + It might not be representative of the entire OpenTTD player base. +

    +
    +
    + + {% for version in site.data.summaries[page.year][page.week] %} +
    +

    {{ version[0] }}

    +
    +
    +
    + {% if version[1] == nil %} +

    + Due to low number of games played, no summary is available. +

    + {% continue %} + {% endif %} + + {% assign seconds = version[1].summary.seconds | times: 1.0 %} + +

    + We received surveys for a total of {{ version[1].summary.seconds | divided_by: 3600 | round: 2 }} hours of games played, over a total of {{ version[1].summary.ids }} games. + This is an average of {{ seconds | divided_by: version[1].summary.ids | divided_by: 3600 | round: 2 }} hours per game. +

    + + + {% for summary in version[1] %} + {% if summary[0] == "summary" %}{% continue %}{% endif %} + + + + {% for line in summary[1] %} + {% assign percentage = line[1] | divided_by: seconds | percentage %} + + + + {% if percentage == "0.0" %} + + {% else %} + + {% endif %} + + {% endfor %} + {% endfor %} +
    + {{ summary[0] }} +
    {{ line[0] }}
    <0.1%{{ percentage }}%
    +
    +
    + {% endfor %} +
    diff --git a/_plugins/openttd-filters.rb b/_plugins/openttd-filters.rb new file mode 100644 index 0000000..1733cc1 --- /dev/null +++ b/_plugins/openttd-filters.rb @@ -0,0 +1,10 @@ +module Jekyll + module OpenTTDFilters + + def percentage(value) + return sprintf("%.1f", value * 100) + end + end +end + +Liquid::Template.register_filter(Jekyll::OpenTTDFilters) diff --git a/analysis/__main__.py b/analysis/__main__.py index ee642f2..2adc055 100644 --- a/analysis/__main__.py +++ b/analysis/__main__.py @@ -1,14 +1,226 @@ -""" -TODO -""" +import json +import sys +import tarfile -import asyncio +from collections import defaultdict +from .windows_name import WINDOWS_BUILD_NUMBER_TO_NAME -async def main(): - pass +# Ensure the summary is always based on a good amount of surveys. +# Otherwise it is very easy for one user to be visible in the results. +THRESHOLD_DIFFERENT_SAVEGAMES = 150 +THRESHOLD_DIFFERENT_SURVEYS = 300 + +BLACKLIST_PATHS = [ + "date", # Not interesting. + "game.companies", # Processed differently. + "game.game_script", # Processed differently. + "game.grfs", # Processed differently. + "game.settings.game_creation.generation_seed", # Too many results. + "game.settings.game_creation.generation_unique_id", # Too many results. + "game.settings.large_font", # Might expose user information, and is already covered by info.font.large. + "game.settings.last_newgrf_count", # Not interesting. + "game.settings.medium_font", # Might expose user information, and is already covered by info.font.medium. + "game.settings.mono_font", # Might expose user information, and is already covered by info.font.mono. + "game.settings.music.custom_1", # Not interesting. + "game.settings.music.custom_2", # Not interesting. + "game.settings.music.effect_vol", # Not interesting. + "game.settings.music.music_vol", # Not interesting. + "game.settings.musicset", # Already in "info.configuration.music_set". + "game.settings.player_face", # Not interesting. + "game.settings.small_font", # Might expose user information, and is already covered by info.font.small. + "game.settings.soundsset", # Already in "info.configuration.sound_set". + "game.timers", # Not interesting. + "id", # Not interesting. + "info.compiler", # Not interesting. + "info.configuration.graphics_set", # Processed differently. + "info.configuration.music_set", # Processed differently. + "info.configuration.sound_set", # Processed differently. + "info.libraries", # Not interesting. + "info.openttd.build_date", # Not interesting. + "info.openttd.version", # Not interesting. + "info.os.machine", # OS specific setting, not interesting. + "info.os.max_ver", # OS specific setting, Not interesting. + "info.os.min_ver", # OS specific setting, Not interesting. + "info.os.release", # Combined with "info.os.os". + "info.os.version", # OS specific setting, Not interesting. + "key", # Not interesting. + "schema", # Not interesting. +] + + +def summarize_setting(summary, version, seconds, path, data): + if path in BLACKLIST_PATHS: + return + + if type(data) is dict: + for key, value in data.items(): + # Combine info.os.os with info.os.release, as their whole is the OS version. + if path == "info.os" and key == "os": + summarize_setting(summary, version, seconds, f"{path}.vendor", value) + value = f"{value} {data['release']}".replace(" ()", "").split("-")[0] + + summarize_setting(summary, version, seconds, f"{path}.{key}", value) + + return + + if type(data) is list: + raise Exception("Lists are not implemented yet") + + if path in ("game.settings.display_opt", "game.settings.extra_display_opt"): + if not data: + return + + for option in data.split("|"): + summarize_setting(summary, version, seconds, f"{path}.{option}", "true") + return + + if path == "info.configuration.video_info": + if "(" not in data or data.startswith("sdl "): + data = "(no hardware acceleration)" + else: + driver = data.split("(")[0].strip() + + # SDL reports slightly different from the rest. + if driver == "sdl-opengl": + data = data.split("(", 2)[2] + else: + data = data.split("(", 1)[1] + + # Only keep the graphics driver name; remove all versions etc. + data = data.replace("(TM)", "@TM@").replace("(R)", "@R@").replace("(C)", "@C@") + data = data.split(",")[0].split("(")[0].strip() + data = data.replace("@TM@", "(TM)").replace("@R@", "(R)").replace("@C@", "(C)") + + if path == "game.settings.resolution": + width, _, height = data.partition(",") + if width and height and width.isdigit() and height.isdigit(): + summarize_setting(summary, version, seconds, f"{path}.width", int(width)) + summarize_setting(summary, version, seconds, f"{path}.height", int(height)) + else: + # We failed to split in width/height, so record unknowns. + summarize_setting(summary, version, seconds, f"{path}.width", "(unknown)") + summarize_setting(summary, version, seconds, f"{path}.height", "(unknown)") + + if path == "info.os.os": + if data.startswith("Windows"): + major, minor, buildnumber = data.split(" ", 1)[1].split(".") + os_version = WINDOWS_BUILD_NUMBER_TO_NAME.get(f"{major}.{minor}", data) + if major == "10" and buildnumber.isdigit() and int(buildnumber) >= 22000: + os_version = WINDOWS_BUILD_NUMBER_TO_NAME.get(f"{major}.{minor}.22000", os_version) + elif data.startswith("MacOS"): + major, minor, patch = data.split(" ", 1)[1].split(".") + if major.isdigit() and int(major) <= 10: + os_version = f"MacOS {major}.{minor}" + else: + os_version = f"MacOS {major}" + elif data.startswith("Linux"): + os_version = "Linux" + else: + os_version = data + + summarize_setting(summary, version, seconds, f"{path}.version", os_version) + + if type(data) is str: + if data.startswith('"') and data.endswith('"'): + data = data[1:-1] + if not data: + data = "(empty)" + + summary[version][path][data] += seconds + + +def summarize_result(summary, fp): + data = json.loads(fp.read()) + + try: + seconds = data["game"]["timers"]["seconds"] + ticks = data["game"]["timers"]["ticks"] + except KeyError: + # Invalid (or very old) survey result. + return + + # Surveys results that were either mostly paused or really short are skipped + # to avoid people gaming the system. + if seconds < 60 or ticks < 100: + return + + version = data["info"]["openttd"]["version"]["revision"] + + if "-" in version and version[0:8].isdigit(): + branch = version.split("-")[1] + # Only track the nightlies. + if branch == "master": + version = "vanilla-master" + else: + return + + for key, value in data.items(): + summarize_setting(summary, version, seconds, key, value) + + summary[version]["summary"]["count"] += 1 + summary[version]["summary"]["seconds"] += seconds + + if "ids" not in summary[version]["summary"]: + summary[version]["summary"]["ids"] = set() + summary[version]["summary"]["ids"].add(data["id"]) + + +def summarize_archive(summary, filename): + if filename.endswith(".json"): + if not filename.endswith("verified.json"): + return + + with open(filename) as fp: + summarize_result(summary, fp) + return + + with tarfile.open(filename) as archive: + for member in archive: + if not member.isfile(): + continue + + # If the filename doesn't end with "verified.json", the survey result + # wasn't created by an official client. For now, we skip those results. + if not member.name.endswith("verified.json"): + continue + + with archive.extractfile(member) as fp: + summarize_result(summary, fp) + + +def main(): + summary = defaultdict(lambda: defaultdict(lambda: defaultdict(int))) + + for filename in sys.argv[1:]: + summarize_archive(summary, filename) + + # Calculate the "false" condition of each display option, assuming that if you didn't have it on, it was off. + for version, version_summary in summary.items(): + # Sort the data based on the path. + summary[version] = dict(sorted(summary[version].items(), key=lambda item: item[0])) + + for path, data in version_summary.items(): + if path == "summary": + data["ids"] = len(data["ids"]) + + if data["ids"] < THRESHOLD_DIFFERENT_SAVEGAMES or data["count"] < THRESHOLD_DIFFERENT_SURVEYS: + summary[version] = None + break + + if path.startswith("game.settings.display_opt.") or path.startswith("game.settings.extra_display_opt."): + data["false"] = version_summary["summary"]["seconds"] - data["true"] + + # Check if it adds up to the total; if not, it is (most likely) an OS specific setting. + total = sum(data.values()) + if path != "summary" and total != version_summary["summary"]["seconds"]: + data["(not reported)"] = version_summary["summary"]["seconds"] - total + + # Sort the data based on the value. + summary[version][path] = dict(sorted(data.items(), key=lambda item: item[1], reverse=True)) + + print(json.dumps(summary, indent=4)) if __name__ == "__main__": - loop = asyncio.get_event_loop() - loop.run_until_complete(main()) + main() diff --git a/analysis/windows_name.py b/analysis/windows_name.py new file mode 100644 index 0000000..35c1452 --- /dev/null +++ b/analysis/windows_name.py @@ -0,0 +1,15 @@ +WINDOWS_BUILD_NUMBER_TO_NAME = { + "4.00": "Windows 95", + "4.0": "Windows NT", + "4.10": "Windows 98", + "4.90": "Windows Me", + "5.0": "Windows 2000", + "5.1": "Windows XP", + "5.2": "Windows XP", + "6.0": "Windows Vista", + "6.1": "Windows 7", + "6.2": "Windows 8", + "6.3": "Windows 8.1", + "10.0": "Windows 10", + "10.0.22000": "Windows 11", +} diff --git a/pages/results.html b/pages/results.html deleted file mode 100644 index aee4447..0000000 --- a/pages/results.html +++ /dev/null @@ -1,19 +0,0 @@ ---- -layout: default -title: Results -active_nav: results -permalink: /results.html ---- - -
    -
    -

    Results

    -
    -
    -
    -

    - Coming soon (once enough surveys have been received to make the results meaningful) -

    -
    -
    -
    diff --git a/pages/summaries.html b/pages/summaries.html new file mode 100644 index 0000000..ac80a31 --- /dev/null +++ b/pages/summaries.html @@ -0,0 +1,19 @@ +--- +layout: default +title: Summaries +active_nav: summaries +permalink: /summaries.html +--- + +
    +
    +

    Survey Result Summaries

    +
    +
    +
    + {% for year in site.summaries %} + {{ year.end_date }}: Survey Result Summary - {{ year.name }}
    + {% endfor %} +
    +
    +
    diff --git a/requirements.base b/requirements.base deleted file mode 100644 index e69de29..0000000 diff --git a/static/css/page.css b/static/css/page.css index 3430074..26a2025 100644 --- a/static/css/page.css +++ b/static/css/page.css @@ -100,3 +100,29 @@ article > .content { margin-bottom: 20px; text-align: center; } + +.summary-table tr:nth-child(odd) { + background-color: #f8f8f8; +} +.summary-table tr:nth-child(even) { + background-color: #f4f4f4; +} +.summary-table tr:hover { + background-color: #e0e0e0; +} + +.summary-table tr.setting { + background-color: #ffffff; +} + +.summary-table tr.setting th { + padding-top: 8px; + width: 600px; +} + +.summary-table pre { + background-color: #e0e0e0; + display: inline-block; + padding: 0; + margin: 0; +}