#!/usr/bin/env python3
"""
Given a log from running tests using pytest -n 2 (or more) see which tests actually
never completed
"""

import re
import sys
from pathlib import Path

logfile = Path(sys.argv[-1])
print(f"Working on {logfile}")

lines = logfile.read_text().splitlines()
lines = [l.strip() for l in lines]
test_line = re.compile('datalad/.*tests/')

test_regex = r'\s+(\.\./)?(?P<test>[a-z].*tests/test_\S+)'
tests_started = set()
for l in lines:
    # Lines might differ between CI backends - some times prepended with extra time stamp
    if res := re.match('(\[[0-9]{2}:[0-9]{2}:[0-9]{2}\])?' + test_regex, l):
        tests_started.add(res.groupdict()['test'])
tests_completed = set()
for l in lines:
    if res := re.match('(\[[0-9]{2}:[0-9]{2}:[0-9]{2}\])?\s+\[gw[0-9]+\].*' + test_regex, l):
        tests_completed.add(res.groupdict()['test'])
tests_didnot_complete = tests_started - tests_completed

# print(tests_completed)
print(f"{len(lines)} lines: {len(tests_started)} started, {len(tests_completed)} completed")
if tests_didnot_complete:
    print("Never completed:")
    for t in sorted(tests_didnot_complete):
        print(t)
