#!/usr/bin/env python # coding: utf-8 # # debugging O # # ref: https://github.com/conda-forge/openmpi-feedstock/pull/142 # # Given two conda-forge build logs, compare them after normalization of random elements like timestamps, paths. # # Also extract conda installs, as the most likely source # First, download log files (this won't work after log retention expires) # In[1]: from pathlib import Path import requests urls = { "bad": "https://dev.azure.com/conda-forge/84710dde-1620-425b-80d0-4cf5baca359d/_apis/build/builds/866388/logs/79", "good": "https://dev.azure.com/conda-forge/84710dde-1620-425b-80d0-4cf5baca359d/_apis/build/builds/866253/logs/39", } logs_dir = Path("logs") logs_dir.mkdir(exist_ok=True) log_paths = [] for name, url in urls.items(): p = logs_dir / f"{name}.txt" log_paths.append(p) if not p.exists(): print(f"Downloading {url} to {p}") r = requests.get(url) r.raise_for_status() with p.open("w") as f: f.write(r.text) # Normalize path prefixes, strip timestamps, discard docker pull progress # In[2]: # normalize path prefixes, strip timestamps import re prefix_pattern = re.compile(r"/home/conda/feedstock_root/build_artifacts/([^/]+)/") placehold_pattern = re.compile(r"_placehold[placehold_]+") docker_pattern = re.compile(r"^[a-f0-9]{12}: ") def process_line(line): if not line: return "" _ts, line = line.split(" ", 1) if docker_pattern.match(line): return "" line, _ = prefix_pattern.subn("$BUILD_DIR/", line) line, _ = placehold_pattern.subn("", line) return line for log_path in log_paths: with log_path.open() as f_in, log_path.with_suffix(".strip.txt").open("w") as f_out: for line in f_in: line = process_line(line) f_out.write(line) # Extract each conda-install plan into a separate file # In[3]: import shutil from enum import Enum class State: skipping = 1 consuming = 2 chunk_bounds = { "Package Plan": re.compile("Preparing transaction"), "conda list": re.compile(r"^\+"), } def env_chunks(path): """Yield strings, each representing a single conda install""" chunk_lines = [] state = State.skipping current_chunk_end = None with open(path) as f: for line in f: if not line: continue line = process_line(line) for chunk_start, chunk_end in chunk_bounds.items(): if chunk_start in line: chunk_lines = [] state = State.consuming current_chunk_end = chunk_end break if chunk_lines and current_chunk_end.search(line): yield ''.join(chunk_lines) chunk_lines = [] state = State.skipping if state == State.consuming: chunk_lines.append(line) for path in log_paths: env_dir = path.with_suffix(".envs") shutil.rmtree(env_dir) env_dir.mkdir(exist_ok=True) for i, chunk in enumerate(env_chunks(path)): env_path = env_dir / f"env.{i:02}.txt" with env_path.open("w") as f: f.write(chunk) get_ipython().system('cat logs/good.envs/env.01.txt') # First, show the diffs of environment installations: # In[4]: get_ipython().system('diff --color=always -U 1 -r logs/bad.envs logs/good.envs') # Next, compare the full output to see if there's any useful info in there. # # There are lots of small diffs, e.g. line ordering due to parallel builds. # In[5]: get_ipython().system('difft --context=0 --color=always --display=inline --background=light logs/bad.strip.txt logs/good.strip.txt | head -n 950')