src/ci/stage-build.py

   1 #!/usr/bin/env python3
   2 # ignore-tidy-linelength
   3
   4 # Compatible with Python 3.6+
   5
   6 import contextlib
   7 import getpass
   8 import glob
   9 import json
  10 import logging
  11 import os
  12 import pprint
  13 import shutil
  14 import subprocess
  15 import sys
  16 import time
  17 import traceback
  18 import urllib.request
  19 from io import StringIO
  20 from pathlib import Path
  21 from typing import Callable, ContextManager, Dict, Iterable, Iterator, List, Optional, \
  22     Tuple, Union
  23
  24 PGO_HOST = os.environ["PGO_HOST"]
  25
  26 LOGGER = logging.getLogger("stage-build")
  27
  28 LLVM_PGO_CRATES = [
  29     "syn-1.0.89",
  30     "cargo-0.60.0",
  31     "serde-1.0.136",
  32     "ripgrep-13.0.0",
  33     "regex-1.5.5",
  34     "clap-3.1.6",
  35     "hyper-0.14.18"
  36 ]
  37
  38 RUSTC_PGO_CRATES = [
  39     "externs",
  40     "ctfe-stress-5",
  41     "cargo-0.60.0",
  42     "token-stream-stress",
  43     "match-stress",
  44     "tuple-stress",
  45     "diesel-1.4.8",
  46     "bitmaps-3.1.0"
  47 ]
  48
  49 LLVM_BOLT_CRATES = LLVM_PGO_CRATES
  50
  51 class Pipeline:
  52     # Paths
  53     def checkout_path(self) -> Path:
  54         """
  55         The root checkout, where the source is located.
  56         """
  57         raise NotImplementedError
  58
  59     def downloaded_llvm_dir(self) -> Path:
  60         """
  61         Directory where the host LLVM is located.
  62         """
  63         raise NotImplementedError
  64
  65     def build_root(self) -> Path:
  66         """
  67         The main directory where the build occurs.
  68         """
  69         raise NotImplementedError
  70
  71     def build_artifacts(self) -> Path:
  72         return self.build_root() / "build" / PGO_HOST
  73
  74     def rustc_stage_0(self) -> Path:
  75         return self.build_artifacts() / "stage0" / "bin" / "rustc"
  76
  77     def cargo_stage_0(self) -> Path:
  78         return self.build_artifacts() / "stage0" / "bin" / "cargo"
  79
  80     def rustc_stage_2(self) -> Path:
  81         return self.build_artifacts() / "stage2" / "bin" / "rustc"
  82
  83     def opt_artifacts(self) -> Path:
  84         raise NotImplementedError
  85
  86     def llvm_profile_dir_root(self) -> Path:
  87         return self.opt_artifacts() / "llvm-pgo"
  88
  89     def llvm_profile_merged_file(self) -> Path:
  90         return self.opt_artifacts() / "llvm-pgo.profdata"
  91
  92     def rustc_perf_dir(self) -> Path:
  93         return self.opt_artifacts() / "rustc-perf"
  94
  95     def build_rustc_perf(self):
  96         raise NotImplementedError()
  97
  98     def rustc_profile_dir_root(self) -> Path:
  99         return self.opt_artifacts() / "rustc-pgo"
 100
 101     def rustc_profile_merged_file(self) -> Path:
 102         return self.opt_artifacts() / "rustc-pgo.profdata"
 103
 104     def rustc_profile_template_path(self) -> Path:
 105         """
 106         The profile data is written into a single filepath that is being repeatedly merged when each
 107         rustc invocation ends. Empirically, this can result in some profiling data being lost. That's
 108         why we override the profile path to include the PID. This will produce many more profiling
 109         files, but the resulting profile will produce a slightly faster rustc binary.
 110         """
 111         return self.rustc_profile_dir_root() / "default_%m_%p.profraw"
 112
 113     def supports_bolt(self) -> bool:
 114         raise NotImplementedError
 115
 116     def llvm_bolt_profile_merged_file(self) -> Path:
 117         return self.opt_artifacts() / "bolt.profdata"
 118
 119     def metrics_path(self) -> Path:
 120         return self.build_root() / "build" / "metrics.json"
 121
 122
 123 class LinuxPipeline(Pipeline):
 124     def checkout_path(self) -> Path:
 125         return Path("/checkout")
 126
 127     def downloaded_llvm_dir(self) -> Path:
 128         return Path("/rustroot")
 129
 130     def build_root(self) -> Path:
 131         return self.checkout_path() / "obj"
 132
 133     def opt_artifacts(self) -> Path:
 134         return Path("/tmp/tmp-multistage/opt-artifacts")
 135
 136     def build_rustc_perf(self):
 137         # /tmp/rustc-perf comes from the Dockerfile
 138         shutil.copytree("/tmp/rustc-perf", self.rustc_perf_dir())
 139         cmd(["chown", "-R", f"{getpass.getuser()}:", self.rustc_perf_dir()])
 140
 141         with change_cwd(self.rustc_perf_dir()):
 142             cmd([self.cargo_stage_0(), "build", "-p", "collector"], env=dict(
 143                 RUSTC=str(self.rustc_stage_0()),
 144                 RUSTC_BOOTSTRAP="1"
 145             ))
 146
 147     def supports_bolt(self) -> bool:
 148         return True
 149
 150
 151 class WindowsPipeline(Pipeline):
 152     def __init__(self):
 153         self.checkout_dir = Path(os.getcwd())
 154
 155     def checkout_path(self) -> Path:
 156         return self.checkout_dir
 157
 158     def downloaded_llvm_dir(self) -> Path:
 159         return self.checkout_path() / "citools" / "clang-rust"
 160
 161     def build_root(self) -> Path:
 162         return self.checkout_path()
 163
 164     def opt_artifacts(self) -> Path:
 165         return self.checkout_path() / "opt-artifacts"
 166
 167     def rustc_stage_0(self) -> Path:
 168         return super().rustc_stage_0().with_suffix(".exe")
 169
 170     def cargo_stage_0(self) -> Path:
 171         return super().cargo_stage_0().with_suffix(".exe")
 172
 173     def rustc_stage_2(self) -> Path:
 174         return super().rustc_stage_2().with_suffix(".exe")
 175
 176     def build_rustc_perf(self):
 177         # rustc-perf version from 2023-03-15
 178         perf_commit = "9dfaa35193154b690922347ee1141a06ec87a199"
 179         rustc_perf_zip_path = self.opt_artifacts() / "perf.zip"
 180
 181         def download_rustc_perf():
 182             download_file(
 183                 f"https://github.com/rust-lang/rustc-perf/archive/{perf_commit}.zip",
 184                 rustc_perf_zip_path
 185             )
 186             with change_cwd(self.opt_artifacts()):
 187                 unpack_archive(rustc_perf_zip_path)
 188                 move_path(Path(f"rustc-perf-{perf_commit}"), self.rustc_perf_dir())
 189                 delete_file(rustc_perf_zip_path)
 190
 191         retry_action(download_rustc_perf, "Download rustc-perf")
 192
 193         with change_cwd(self.rustc_perf_dir()):
 194             cmd([self.cargo_stage_0(), "build", "-p", "collector"], env=dict(
 195                 RUSTC=str(self.rustc_stage_0()),
 196                 RUSTC_BOOTSTRAP="1"
 197             ))
 198
 199     def rustc_profile_template_path(self) -> Path:
 200         """
 201         On Windows, we don't have enough space to use separate files for each rustc invocation.
 202         Therefore, we use a single file for the generated profiles.
 203         """
 204         return self.rustc_profile_dir_root() / "default_%m.profraw"
 205
 206     def supports_bolt(self) -> bool:
 207         return False
 208
 209
 210 def get_timestamp() -> float:
 211     return time.time()
 212
 213
 214 Duration = float
 215
 216
 217 def iterate_timers(timer: "Timer", name: str, level: int = 0) -> Iterator[
 218     Tuple[int, str, Duration]]:
 219     """
 220     Hierarchically iterate the children of a timer, in a depth-first order.
 221     """
 222     yield (level, name, timer.total_duration())
 223     for (child_name, child_timer) in timer.children:
 224         yield from iterate_timers(child_timer, child_name, level=level + 1)
 225
 226
 227 class Timer:
 228     def __init__(self, parent_names: Tuple[str, ...] = ()):
 229         self.children: List[Tuple[str, Timer]] = []
 230         self.section_active = False
 231         self.parent_names = parent_names
 232         self.duration_excluding_children: Duration = 0
 233
 234     @contextlib.contextmanager
 235     def section(self, name: str) -> ContextManager["Timer"]:
 236         assert not self.section_active
 237         self.section_active = True
 238
 239         start = get_timestamp()
 240         exc = None
 241
 242         child_timer = Timer(parent_names=self.parent_names + (name,))
 243         full_name = " > ".join(child_timer.parent_names)
 244         try:
 245             LOGGER.info(f"Section `{full_name}` starts")
 246             yield child_timer
 247         except BaseException as exception:
 248             exc = exception
 249             raise
 250         finally:
 251             end = get_timestamp()
 252             duration = end - start
 253
 254             child_timer.duration_excluding_children = duration - child_timer.total_duration()
 255             self.add_child(name, child_timer)
 256             if exc is None:
 257                 LOGGER.info(f"Section `{full_name}` ended: OK ({duration:.2f}s)")
 258             else:
 259                 LOGGER.info(f"Section `{full_name}` ended: FAIL ({duration:.2f}s)")
 260             self.section_active = False
 261
 262     def total_duration(self) -> Duration:
 263         return self.duration_excluding_children + sum(
 264             c.total_duration() for (_, c) in self.children)
 265
 266     def has_children(self) -> bool:
 267         return len(self.children) > 0
 268
 269     def print_stats(self):
 270         rows = []
 271         for (child_name, child_timer) in self.children:
 272             for (level, name, duration) in iterate_timers(child_timer, child_name, level=0):
 273                 label = f"{'  ' * level}{name}:"
 274                 rows.append((label, duration))
 275
 276         # Empty row
 277         rows.append(("", ""))
 278
 279         total_duration_label = "Total duration:"
 280         total_duration = self.total_duration()
 281         rows.append((total_duration_label, humantime(total_duration)))
 282
 283         space_after_label = 2
 284         max_label_length = max(16, max(len(label) for (label, _) in rows)) + space_after_label
 285
 286         table_width = max_label_length + 23
 287         divider = "-" * table_width
 288
 289         with StringIO() as output:
 290             print(divider, file=output)
 291             for (label, duration) in rows:
 292                 if isinstance(duration, Duration):
 293                     pct = (duration / total_duration) * 100
 294                     value = f"{duration:>12.2f}s ({pct:>5.2f}%)"
 295                 else:
 296                     value = f"{duration:>{len(total_duration_label) + 7}}"
 297                 print(f"{label:<{max_label_length}} {value}", file=output)
 298             print(divider, file=output, end="")
 299             LOGGER.info(f"Timer results\n{output.getvalue()}")
 300
 301     def add_child(self, name: str, timer: "Timer"):
 302         self.children.append((name, timer))
 303
 304     def add_duration(self, name: str, duration: Duration):
 305         timer = Timer(parent_names=self.parent_names + (name,))
 306         timer.duration_excluding_children = duration
 307         self.add_child(name, timer)
 308
 309
 310 class BuildStep:
 311     def __init__(self, type: str, children: List["BuildStep"], duration: float):
 312         self.type = type
 313         self.children = children
 314         self.duration = duration
 315
 316     def find_all_by_type(self, type: str) -> Iterator["BuildStep"]:
 317         if type == self.type:
 318             yield self
 319         for child in self.children:
 320             yield from child.find_all_by_type(type)
 321
 322     def __repr__(self):
 323         return f"BuildStep(type={self.type}, duration={self.duration}, children={len(self.children)})"
 324
 325
 326 def load_last_metrics(path: Path) -> BuildStep:
 327     """
 328     Loads the metrics of the most recent bootstrap execution from a metrics.json file.
 329     """
 330     with open(path, "r") as f:
 331         metrics = json.load(f)
 332     invocation = metrics["invocations"][-1]
 333
 334     def parse(entry) -> Optional[BuildStep]:
 335         if "kind" not in entry or entry["kind"] != "rustbuild_step":
 336             return None
 337         type = entry.get("type", "")
 338         duration = entry.get("duration_excluding_children_sec", 0)
 339         children = []
 340
 341         for child in entry.get("children", ()):
 342             step = parse(child)
 343             if step is not None:
 344                 children.append(step)
 345                 duration += step.duration
 346         return BuildStep(type=type, children=children, duration=duration)
 347
 348     children = [parse(child) for child in invocation.get("children", ())]
 349     return BuildStep(
 350         type="root",
 351         children=children,
 352         duration=invocation.get("duration_including_children_sec", 0)
 353     )
 354
 355
 356 @contextlib.contextmanager
 357 def change_cwd(dir: Path):
 358     """
 359     Temporarily change working directory to `dir`.
 360     """
 361     cwd = os.getcwd()
 362     LOGGER.debug(f"Changing working dir from `{cwd}` to `{dir}`")
 363     os.chdir(dir)
 364     try:
 365         yield
 366     finally:
 367         LOGGER.debug(f"Reverting working dir to `{cwd}`")
 368         os.chdir(cwd)
 369
 370
 371 def humantime(time_s: float) -> str:
 372     hours = time_s // 3600
 373     time_s = time_s % 3600
 374     minutes = time_s // 60
 375     seconds = time_s % 60
 376
 377     result = ""
 378     if hours > 0:
 379         result += f"{int(hours)}h "
 380     if minutes > 0:
 381         result += f"{int(minutes)}m "
 382     result += f"{round(seconds)}s"
 383     return result
 384
 385
 386 def move_path(src: Path, dst: Path):
 387     LOGGER.info(f"Moving `{src}` to `{dst}`")
 388     shutil.move(src, dst)
 389
 390
 391 def delete_file(path: Path):
 392     LOGGER.info(f"Deleting file `{path}`")
 393     os.unlink(path)
 394
 395
 396 def delete_directory(path: Path):
 397     LOGGER.info(f"Deleting directory `{path}`")
 398     shutil.rmtree(path)
 399
 400
 401 def unpack_archive(archive: Path):
 402     LOGGER.info(f"Unpacking archive `{archive}`")
 403     shutil.unpack_archive(archive)
 404
 405
 406 def download_file(src: str, target: Path):
 407     LOGGER.info(f"Downloading `{src}` into `{target}`")
 408     urllib.request.urlretrieve(src, str(target))
 409
 410
 411 def retry_action(action, name: str, max_fails: int = 5):
 412     LOGGER.info(f"Attempting to perform action `{name}` with retry")
 413     for iteration in range(max_fails):
 414         LOGGER.info(f"Attempt {iteration + 1}/{max_fails}")
 415         try:
 416             action()
 417             return
 418         except:
 419             LOGGER.error(f"Action `{name}` has failed\n{traceback.format_exc()}")
 420
 421     raise Exception(f"Action `{name}` has failed after {max_fails} attempts")
 422
 423
 424 def cmd(
 425         args: List[Union[str, Path]],
 426         env: Optional[Dict[str, str]] = None,
 427         output_path: Optional[Path] = None
 428 ):
 429     args = [str(arg) for arg in args]
 430
 431     environment = os.environ.copy()
 432
 433     cmd_str = ""
 434     if env is not None:
 435         environment.update(env)
 436         cmd_str += " ".join(f"{k}={v}" for (k, v) in (env or {}).items())
 437         cmd_str += " "
 438     cmd_str += " ".join(args)
 439     if output_path is not None:
 440         cmd_str += f" > {output_path}"
 441     LOGGER.info(f"Executing `{cmd_str}`")
 442
 443     if output_path is not None:
 444         with open(output_path, "w") as f:
 445             return subprocess.run(
 446                 args,
 447                 env=environment,
 448                 check=True,
 449                 stdout=f
 450             )
 451     return subprocess.run(args, env=environment, check=True)
 452
 453 class BenchmarkRunner:
 454     def run_rustc(self, pipeline: Pipeline):
 455         raise NotImplementedError
 456
 457     def run_llvm(self, pipeline: Pipeline):
 458         raise NotImplementedError
 459
 460     def run_bolt(self, pipeline: Pipeline):
 461         raise NotImplementedError
 462
 463 class DefaultBenchmarkRunner(BenchmarkRunner):
 464     def run_rustc(self, pipeline: Pipeline):
 465         # Here we're profiling the `rustc` frontend, so we also include `Check`.
 466         # The benchmark set includes various stress tests that put the frontend under pressure.
 467         run_compiler_benchmarks(
 468             pipeline,
 469             profiles=["Check", "Debug", "Opt"],
 470             scenarios=["All"],
 471             crates=RUSTC_PGO_CRATES,
 472             env=dict(
 473                 LLVM_PROFILE_FILE=str(pipeline.rustc_profile_template_path())
 474             )
 475         )
 476     def run_llvm(self, pipeline: Pipeline):
 477         run_compiler_benchmarks(
 478             pipeline,
 479             profiles=["Debug", "Opt"],
 480             scenarios=["Full"],
 481             crates=LLVM_PGO_CRATES
 482         )
 483
 484     def run_bolt(self, pipeline: Pipeline):
 485         run_compiler_benchmarks(
 486             pipeline,
 487             profiles=["Check", "Debug", "Opt"],
 488             scenarios=["Full"],
 489             crates=LLVM_BOLT_CRATES
 490         )
 491
 492 def run_compiler_benchmarks(
 493         pipeline: Pipeline,
 494         profiles: List[str],
 495         scenarios: List[str],
 496         crates: List[str],
 497         env: Optional[Dict[str, str]] = None
 498 ):
 499     env = env if env is not None else {}
 500
 501     # Compile libcore, both in opt-level=0 and opt-level=3
 502     with change_cwd(pipeline.build_root()):
 503         cmd([
 504             pipeline.rustc_stage_2(),
 505             "--edition", "2021",
 506             "--crate-type", "lib",
 507             str(pipeline.checkout_path() / "library/core/src/lib.rs"),
 508             "--out-dir", pipeline.opt_artifacts()
 509         ], env=dict(RUSTC_BOOTSTRAP="1", **env))
 510
 511         cmd([
 512             pipeline.rustc_stage_2(),
 513             "--edition", "2021",
 514             "--crate-type", "lib",
 515             "-Copt-level=3",
 516             str(pipeline.checkout_path() / "library/core/src/lib.rs"),
 517             "--out-dir", pipeline.opt_artifacts()
 518         ], env=dict(RUSTC_BOOTSTRAP="1", **env))
 519
 520     # Run rustc-perf benchmarks
 521     # Benchmark using profile_local with eprintln, which essentially just means
 522     # don't actually benchmark -- just make sure we run rustc a bunch of times.
 523     with change_cwd(pipeline.rustc_perf_dir()):
 524         cmd([
 525             pipeline.cargo_stage_0(),
 526             "run",
 527             "-p", "collector", "--bin", "collector", "--",
 528             "profile_local", "eprintln",
 529             pipeline.rustc_stage_2(),
 530             "--id", "Test",
 531             "--cargo", pipeline.cargo_stage_0(),
 532             "--profiles", ",".join(profiles),
 533             "--scenarios", ",".join(scenarios),
 534             "--include", ",".join(crates)
 535         ], env=dict(
 536             RUST_LOG="collector=debug",
 537             RUSTC=str(pipeline.rustc_stage_0()),
 538             RUSTC_BOOTSTRAP="1",
 539             **env
 540         ))
 541
 542
 543 # https://stackoverflow.com/a/31631711/1107768
 544 def format_bytes(size: int) -> str:
 545     """Return the given bytes as a human friendly KiB, MiB or GiB string."""
 546     KB = 1024
 547     MB = KB ** 2  # 1,048,576
 548     GB = KB ** 3  # 1,073,741,824
 549     TB = KB ** 4  # 1,099,511,627,776
 550
 551     if size < KB:
 552         return f"{size} B"
 553     elif KB <= size < MB:
 554         return f"{size / KB:.2f} KiB"
 555     elif MB <= size < GB:
 556         return f"{size / MB:.2f} MiB"
 557     elif GB <= size < TB:
 558         return f"{size / GB:.2f} GiB"
 559     else:
 560         return str(size)
 561
 562
 563 # https://stackoverflow.com/a/63307131/1107768
 564 def count_files(path: Path) -> int:
 565     return sum(1 for p in path.rglob("*") if p.is_file())
 566
 567
 568 def count_files_with_prefix(path: Path) -> int:
 569     return sum(1 for p in glob.glob(f"{path}*") if Path(p).is_file())
 570
 571
 572 # https://stackoverflow.com/a/55659577/1107768
 573 def get_path_size(path: Path) -> int:
 574     if path.is_dir():
 575         return sum(p.stat().st_size for p in path.rglob("*"))
 576     return path.stat().st_size
 577
 578
 579 def get_path_prefix_size(path: Path) -> int:
 580     """
 581     Get size of all files beginning with the prefix `path`.
 582     Alternative to shell `du -sh <path>*`.
 583     """
 584     return sum(Path(p).stat().st_size for p in glob.glob(f"{path}*"))
 585
 586
 587 def get_files(directory: Path, filter: Optional[Callable[[Path], bool]] = None) -> Iterable[Path]:
 588     for file in os.listdir(directory):
 589         path = directory / file
 590         if filter is None or filter(path):
 591             yield path
 592
 593
 594 def build_rustc(
 595         pipeline: Pipeline,
 596         args: List[str],
 597         env: Optional[Dict[str, str]] = None
 598 ):
 599     arguments = [
 600                     sys.executable,
 601                     pipeline.checkout_path() / "x.py",
 602                     "build",
 603                     "--target", PGO_HOST,
 604                     "--host", PGO_HOST,
 605                     "--stage", "2",
 606                     "library/std"
 607                 ] + args
 608     cmd(arguments, env=env)
 609
 610
 611 def create_pipeline() -> Pipeline:
 612     if sys.platform == "linux":
 613         return LinuxPipeline()
 614     elif sys.platform in ("cygwin", "win32"):
 615         return WindowsPipeline()
 616     else:
 617         raise Exception(f"Optimized build is not supported for platform {sys.platform}")
 618
 619
 620 def gather_llvm_profiles(pipeline: Pipeline, runner: BenchmarkRunner):
 621     LOGGER.info("Running benchmarks with PGO instrumented LLVM")
 622
 623     runner.run_llvm(pipeline)
 624
 625     profile_path = pipeline.llvm_profile_merged_file()
 626     LOGGER.info(f"Merging LLVM PGO profiles to {profile_path}")
 627     cmd([
 628         pipeline.downloaded_llvm_dir() / "bin" / "llvm-profdata",
 629         "merge",
 630         "-o", profile_path,
 631         pipeline.llvm_profile_dir_root()
 632     ])
 633
 634     LOGGER.info("LLVM PGO statistics")
 635     LOGGER.info(f"{profile_path}: {format_bytes(get_path_size(profile_path))}")
 636     LOGGER.info(
 637         f"{pipeline.llvm_profile_dir_root()}: {format_bytes(get_path_size(pipeline.llvm_profile_dir_root()))}")
 638     LOGGER.info(f"Profile file count: {count_files(pipeline.llvm_profile_dir_root())}")
 639
 640     # We don't need the individual .profraw files now that they have been merged
 641     # into a final .profdata
 642     delete_directory(pipeline.llvm_profile_dir_root())
 643
 644
 645 def gather_rustc_profiles(pipeline: Pipeline, runner: BenchmarkRunner):
 646     LOGGER.info("Running benchmarks with PGO instrumented rustc")
 647
 648
 649     runner.run_rustc(pipeline)
 650
 651
 652     profile_path = pipeline.rustc_profile_merged_file()
 653     LOGGER.info(f"Merging Rustc PGO profiles to {profile_path}")
 654     cmd([
 655         pipeline.build_artifacts() / "llvm" / "bin" / "llvm-profdata",
 656         "merge",
 657         "-o", profile_path,
 658         pipeline.rustc_profile_dir_root()
 659     ])
 660
 661     LOGGER.info("Rustc PGO statistics")
 662     LOGGER.info(f"{profile_path}: {format_bytes(get_path_size(profile_path))}")
 663     LOGGER.info(
 664         f"{pipeline.rustc_profile_dir_root()}: {format_bytes(get_path_size(pipeline.rustc_profile_dir_root()))}")
 665     LOGGER.info(f"Profile file count: {count_files(pipeline.rustc_profile_dir_root())}")
 666
 667     # We don't need the individual .profraw files now that they have been merged
 668     # into a final .profdata
 669     delete_directory(pipeline.rustc_profile_dir_root())
 670
 671
 672 def gather_llvm_bolt_profiles(pipeline: Pipeline, runner: BenchmarkRunner):
 673     LOGGER.info("Running benchmarks with BOLT instrumented LLVM")
 674
 675     runner.run_bolt(pipeline)
 676
 677     merged_profile_path = pipeline.llvm_bolt_profile_merged_file()
 678     profile_files_path = Path("/tmp/prof.fdata")
 679     LOGGER.info(f"Merging LLVM BOLT profiles to {merged_profile_path}")
 680
 681     profile_files = sorted(glob.glob(f"{profile_files_path}*"))
 682     cmd([
 683         "merge-fdata",
 684         *profile_files,
 685     ], output_path=merged_profile_path)
 686
 687     LOGGER.info("LLVM BOLT statistics")
 688     LOGGER.info(f"{merged_profile_path}: {format_bytes(get_path_size(merged_profile_path))}")
 689     LOGGER.info(
 690         f"{profile_files_path}: {format_bytes(get_path_prefix_size(profile_files_path))}")
 691     LOGGER.info(f"Profile file count: {count_files_with_prefix(profile_files_path)}")
 692
 693
 694 def clear_llvm_files(pipeline: Pipeline):
 695     """
 696     Rustbuild currently doesn't support rebuilding LLVM when PGO options
 697     change (or any other llvm-related options); so just clear out the relevant
 698     directories ourselves.
 699     """
 700     LOGGER.info("Clearing LLVM build files")
 701     delete_directory(pipeline.build_artifacts() / "llvm")
 702     delete_directory(pipeline.build_artifacts() / "lld")
 703
 704
 705 def print_binary_sizes(pipeline: Pipeline):
 706     bin_dir = pipeline.build_artifacts() / "stage2" / "bin"
 707     binaries = get_files(bin_dir)
 708
 709     lib_dir = pipeline.build_artifacts() / "stage2" / "lib"
 710     libraries = get_files(lib_dir, lambda p: p.suffix == ".so")
 711
 712     paths = sorted(binaries) + sorted(libraries)
 713     with StringIO() as output:
 714         for path in paths:
 715             path_str = f"{path.name}:"
 716             print(f"{path_str:<50}{format_bytes(path.stat().st_size):>14}", file=output)
 717         LOGGER.info(f"Rustc binary size\n{output.getvalue()}")
 718
 719
 720 def print_free_disk_space(pipeline: Pipeline):
 721     usage = shutil.disk_usage(pipeline.opt_artifacts())
 722     total = usage.total
 723     used = usage.used
 724     free = usage.free
 725
 726     logging.info(
 727         f"Free disk space: {format_bytes(free)} out of total {format_bytes(total)} ({(used / total) * 100:.2f}% used)")
 728
 729
 730 def log_metrics(step: BuildStep):
 731     substeps: List[Tuple[int, BuildStep]] = []
 732
 733     def visit(step: BuildStep, level: int):
 734         substeps.append((level, step))
 735         for child in step.children:
 736             visit(child, level=level + 1)
 737
 738     visit(step, 0)
 739
 740     output = StringIO()
 741     for (level, step) in substeps:
 742         label = f"{'.' * level}{step.type}"
 743         print(f"{label:<65}{step.duration:>8.2f}s", file=output)
 744     logging.info(f"Build step durations\n{output.getvalue()}")
 745
 746
 747 def record_metrics(pipeline: Pipeline, timer: Timer):
 748     metrics = load_last_metrics(pipeline.metrics_path())
 749     if metrics is None:
 750         return
 751     llvm_steps = tuple(metrics.find_all_by_type("bootstrap::llvm::Llvm"))
 752     assert len(llvm_steps) > 0
 753     llvm_duration = sum(step.duration for step in llvm_steps)
 754
 755     rustc_steps = tuple(metrics.find_all_by_type("bootstrap::compile::Rustc"))
 756     assert len(rustc_steps) > 0
 757     rustc_duration = sum(step.duration for step in rustc_steps)
 758
 759     # The LLVM step is part of the Rustc step
 760     rustc_duration -= llvm_duration
 761
 762     timer.add_duration("LLVM", llvm_duration)
 763     timer.add_duration("Rustc", rustc_duration)
 764
 765     log_metrics(metrics)
 766
 767
 768 def execute_build_pipeline(timer: Timer, pipeline: Pipeline, runner: BenchmarkRunner, final_build_args: List[str]):
 769     # Clear and prepare tmp directory
 770     shutil.rmtree(pipeline.opt_artifacts(), ignore_errors=True)
 771     os.makedirs(pipeline.opt_artifacts(), exist_ok=True)
 772
 773     pipeline.build_rustc_perf()
 774
 775     # Stage 1: Build rustc + PGO instrumented LLVM
 776     with timer.section("Stage 1 (LLVM PGO)") as stage1:
 777         with stage1.section("Build rustc and LLVM") as rustc_build:
 778             build_rustc(pipeline, args=[
 779                 "--llvm-profile-generate"
 780             ], env=dict(
 781                 LLVM_PROFILE_DIR=str(pipeline.llvm_profile_dir_root() / "prof-%p")
 782             ))
 783             record_metrics(pipeline, rustc_build)
 784
 785         with stage1.section("Gather profiles"):
 786             gather_llvm_profiles(pipeline, runner)
 787         print_free_disk_space(pipeline)
 788
 789     clear_llvm_files(pipeline)
 790     final_build_args += [
 791         "--llvm-profile-use",
 792         pipeline.llvm_profile_merged_file()
 793     ]
 794
 795     # Stage 2: Build PGO instrumented rustc + LLVM
 796     with timer.section("Stage 2 (rustc PGO)") as stage2:
 797         with stage2.section("Build rustc and LLVM") as rustc_build:
 798             build_rustc(pipeline, args=[
 799                 "--rust-profile-generate",
 800                 pipeline.rustc_profile_dir_root()
 801             ])
 802             record_metrics(pipeline, rustc_build)
 803
 804         with stage2.section("Gather profiles"):
 805             gather_rustc_profiles(pipeline, runner)
 806         print_free_disk_space(pipeline)
 807
 808     clear_llvm_files(pipeline)
 809     final_build_args += [
 810         "--rust-profile-use",
 811         pipeline.rustc_profile_merged_file()
 812     ]
 813
 814     # Stage 3: Build rustc + BOLT instrumented LLVM
 815     if pipeline.supports_bolt():
 816         with timer.section("Stage 3 (LLVM BOLT)") as stage3:
 817             with stage3.section("Build rustc and LLVM") as rustc_build:
 818                 build_rustc(pipeline, args=[
 819                     "--llvm-profile-use",
 820                     pipeline.llvm_profile_merged_file(),
 821                     "--llvm-bolt-profile-generate",
 822                     "--rust-profile-use",
 823                     pipeline.rustc_profile_merged_file()
 824                 ])
 825                 record_metrics(pipeline, rustc_build)
 826
 827             with stage3.section("Gather profiles"):
 828                 gather_llvm_bolt_profiles(pipeline, runner)
 829
 830         # LLVM is not being cleared here, we want to reuse the previous build
 831         print_free_disk_space(pipeline)
 832         final_build_args += [
 833             "--llvm-bolt-profile-use",
 834             pipeline.llvm_bolt_profile_merged_file()
 835         ]
 836
 837     # Stage 4: Build PGO optimized rustc + PGO/BOLT optimized LLVM
 838     with timer.section("Stage 4 (final build)") as stage4:
 839         cmd(final_build_args)
 840         record_metrics(pipeline, stage4)
 841
 842
 843 def run(runner: BenchmarkRunner):
 844     logging.basicConfig(
 845         level=logging.DEBUG,
 846         format="%(name)s %(levelname)-4s: %(message)s",
 847     )
 848
 849     LOGGER.info(f"Running multi-stage build using Python {sys.version}")
 850     LOGGER.info(f"Environment values\n{pprint.pformat(dict(os.environ), indent=2)}")
 851
 852     build_args = sys.argv[1:]
 853
 854     timer = Timer()
 855     pipeline = create_pipeline()
 856
 857     try:
 858         execute_build_pipeline(timer, pipeline, runner, build_args)
 859     except BaseException as e:
 860         LOGGER.error("The multi-stage build has failed")
 861         raise e
 862     finally:
 863         timer.print_stats()
 864         print_free_disk_space(pipeline)
 865
 866     print_binary_sizes(pipeline)
 867
 868 if __name__ == "__main__":
 869     runner = DefaultBenchmarkRunner()
 870     run(runner)