mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-06-27 13:30:31 +02:00
Add documentation on tracepoints
Also clean up how-to-build documentation
This commit is contained in:
parent
81da950ebe
commit
367e04f164
4 changed files with 346 additions and 166 deletions
160
ctf_to_json.py
Executable file
160
ctf_to_json.py
Executable file
|
@ -0,0 +1,160 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# Any copyright is dedicated to the Public Domain.
|
||||||
|
# https://creativecommons.org/publicdomain/zero/1.0/
|
||||||
|
#
|
||||||
|
# Originally written by Andy Wingo <wingo@igalia.com>.
|
||||||
|
|
||||||
|
import bt2 # From the babeltrace2 package.
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
# Usage: ./ctf_to_json.py ~/lttng-traces/name-of-your-trace > foo.json
|
||||||
|
#
|
||||||
|
# Convert a Common Trace Format (CTF) trace, for example as produced by
|
||||||
|
# LTTng, to the JSON-based Trace Event Format (TEF), for example as
|
||||||
|
# consumed by `chrome://tracing`, `https://ui.perfetto.dev/`, or
|
||||||
|
# `https://profiler.firefox.com`.
|
||||||
|
|
||||||
|
# The Trace Event Format is documented here:
|
||||||
|
#
|
||||||
|
# https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview?tab=t.0
|
||||||
|
|
||||||
|
# By default, events are emitted as EventPhase.INSTANT. We also support
|
||||||
|
# rewriting the event stream so as to generate EventPhase.BEGIN /
|
||||||
|
# EventPhase.END events for specific named events.
|
||||||
|
|
||||||
|
synthetic_events = {
|
||||||
|
'gc': ['whippet:mutator_cause_gc',
|
||||||
|
'whippet:restarting_mutators'],
|
||||||
|
'stop-the-world': ['whippet:requesting_stop',
|
||||||
|
'whippet:mutators_stopped'],
|
||||||
|
'trace': ['whippet:prepare_gc',
|
||||||
|
'whippet:restarting_mutators'],
|
||||||
|
'mutator-stopped': ['whippet:mutator_stopping',
|
||||||
|
'whippet:mutator_restarted'],
|
||||||
|
'trace-roots': ['whippet:trace_roots_begin',
|
||||||
|
'whippet:trace_roots_end'],
|
||||||
|
'trace-check-termination': ['whippet:trace_check_termination_begin',
|
||||||
|
'whippet:trace_check_termination_end'],
|
||||||
|
'trace-objects': ['whippet:trace_objects_begin',
|
||||||
|
'whippet:trace_objects_end'],
|
||||||
|
'trace-worker': ['whippet:trace_worker_begin',
|
||||||
|
'whippet:trace_worker_end']
|
||||||
|
}
|
||||||
|
|
||||||
|
class EventPhase(Enum):
|
||||||
|
BEGIN = 'B'
|
||||||
|
END = 'E'
|
||||||
|
COMPLETE = 'X'
|
||||||
|
INSTANT = 'i'
|
||||||
|
COUNTER = 'C'
|
||||||
|
NESTABLE_START = 'b'
|
||||||
|
NESTABLE_INSTANT = 'n'
|
||||||
|
NESTABLE_END = 'e'
|
||||||
|
FLOW_START = 's'
|
||||||
|
FLOW_STEP = 't'
|
||||||
|
FLOW_END = 'f'
|
||||||
|
SAMPLE = 'P'
|
||||||
|
OBJECT_CREATED = 'N'
|
||||||
|
OBJECT_SNAPSHOT = 'O'
|
||||||
|
OBJECT_DESTROYED = 'D'
|
||||||
|
METADATA = 'M'
|
||||||
|
MEMORY_DUMP_GLOBAL = 'V'
|
||||||
|
MEMORY_DUMP_PROCESS = 'V'
|
||||||
|
MARK = 'R'
|
||||||
|
CLOCK_SYNC = 'c'
|
||||||
|
CONTEXT_BEGIN = '('
|
||||||
|
CONTEXT_END = ')'
|
||||||
|
|
||||||
|
base_time = None
|
||||||
|
def event_us(msg):
|
||||||
|
assert(msg.default_clock_snapshot.clock_class.name == 'monotonic')
|
||||||
|
assert(msg.default_clock_snapshot.clock_class.frequency == 1e9)
|
||||||
|
global base_time
|
||||||
|
ns = msg.default_clock_snapshot.value
|
||||||
|
if base_time is None:
|
||||||
|
base_time = ns
|
||||||
|
return (ns - base_time) * 1e-3
|
||||||
|
|
||||||
|
def lower(x):
|
||||||
|
if isinstance(x, str) or isinstance(x, int) or isinstance(x, float):
|
||||||
|
return x
|
||||||
|
if isinstance(x, dict) or isinstance(x, bt2._StructureFieldConst):
|
||||||
|
return {lower(k):lower(v) for k, v in x.items()}
|
||||||
|
if isinstance(x, bt2._BoolValueConst) or isinstance(x, bt2._BoolFieldConst):
|
||||||
|
return bool(x)
|
||||||
|
if isinstance(x, bt2._EnumerationFieldConst):
|
||||||
|
return repr(x)
|
||||||
|
if isinstance(x, bt2._IntegerValueConst) or isinstance(x, bt2._IntegerFieldConst):
|
||||||
|
return int(x)
|
||||||
|
if isinstance(x, bt2._RealValueConst) or isinstance(x, bt2._RealFieldConst):
|
||||||
|
return float(x)
|
||||||
|
if isinstance(x, bt2._StringValueConst) or isinstance(x, bt2._StringFieldConst):
|
||||||
|
return str(x)
|
||||||
|
raise ValueError("Unexpected value from trace", x)
|
||||||
|
|
||||||
|
# Specific Whippet events.
|
||||||
|
synthetic_begin = {}
|
||||||
|
synthetic_end = {}
|
||||||
|
for synthetic, [begin, end] in synthetic_events.items():
|
||||||
|
synthetic_begin[begin] = []
|
||||||
|
synthetic_end[end] = []
|
||||||
|
for synthetic, [begin, end] in synthetic_events.items():
|
||||||
|
synthetic_begin[begin].append(synthetic)
|
||||||
|
synthetic_end[end].append(synthetic)
|
||||||
|
|
||||||
|
def put(str):
|
||||||
|
sys.stdout.write(str)
|
||||||
|
|
||||||
|
need_comma = False
|
||||||
|
def print_event(ev):
|
||||||
|
global need_comma
|
||||||
|
if need_comma:
|
||||||
|
sys.stdout.write(',\n ')
|
||||||
|
else:
|
||||||
|
need_comma = True
|
||||||
|
# It appears to be faster to make a string, then print the string,
|
||||||
|
# than to call json.dump with a file object.
|
||||||
|
# json.dump(ev, sys.stdout, ensure_ascii=False, check_circular=False)
|
||||||
|
put(json.dumps(ev, ensure_ascii=False, check_circular=False))
|
||||||
|
|
||||||
|
def emit_event(msg, name, phase):
|
||||||
|
ev = {'name': name,
|
||||||
|
'cat': 'whippet',
|
||||||
|
'ph': phase.value,
|
||||||
|
'ts': event_us(msg),
|
||||||
|
'pid': lower(msg.event.common_context_field['vpid']),
|
||||||
|
'tid': lower(msg.event.common_context_field['vtid']),
|
||||||
|
'args': lower(msg.event.payload_field)}
|
||||||
|
print_event(ev)
|
||||||
|
def emit_begin_event(msg, name):
|
||||||
|
emit_event(msg, name, EventPhase.BEGIN)
|
||||||
|
def emit_end_event(msg, name):
|
||||||
|
emit_event(msg, name, EventPhase.END)
|
||||||
|
|
||||||
|
def emit_events(msg):
|
||||||
|
emit_event(msg, msg.event.name, EventPhase.INSTANT)
|
||||||
|
for begin in synthetic_begin.get(msg.event.name, []):
|
||||||
|
emit_begin_event(msg, begin)
|
||||||
|
for end in synthetic_end.get(msg.event.name, []):
|
||||||
|
emit_end_event(msg, end)
|
||||||
|
|
||||||
|
def ctf_to_json(path):
|
||||||
|
msg_it = bt2.TraceCollectionMessageIterator(path)
|
||||||
|
put('{\n')
|
||||||
|
put(' "traceEvents": [\n ')
|
||||||
|
for msg in msg_it:
|
||||||
|
if hasattr(msg, 'event'):
|
||||||
|
emit_events(msg)
|
||||||
|
put('\n')
|
||||||
|
put('\n ],\n')
|
||||||
|
put(' "displayTimeUnit": "ns"\n')
|
||||||
|
put('}\n')
|
||||||
|
|
||||||
|
if len(sys.argv) != 2:
|
||||||
|
sys.stderr.write(
|
||||||
|
'usage: ' + sys.argv[0] + ' ~/lttng-traces/name-of-your-trace\n')
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
ctf_to_json(sys.argv[1])
|
226
doc/manual.md
226
doc/manual.md
|
@ -176,13 +176,14 @@ implementations of that API: `semi`, a simple semi-space collector;
|
||||||
collector; and `mmc`, a mostly-marking collector inspired by Immix.
|
collector; and `mmc`, a mostly-marking collector inspired by Immix.
|
||||||
|
|
||||||
The program that embeds Whippet selects the collector implementation at
|
The program that embeds Whippet selects the collector implementation at
|
||||||
build-time. In the case of the `mmc` collector, the program
|
build-time. For `pcc`, the program can also choose whether to be
|
||||||
also configures a specific collector mode, again at build-time:
|
generational or not. For `mmc` collector, the program configures a
|
||||||
generational or not, parallel or not, stack-conservative or not, and
|
specific collector mode, again at build-time: generational or not,
|
||||||
heap-conservative or not. It may be nice in the future to be able to
|
parallel or not, stack-conservative or not, and heap-conservative or
|
||||||
configure these at run-time, but for the time being they are
|
not. It may be nice in the future to be able to configure these at
|
||||||
compile-time options so that adding new features doesn't change the
|
run-time, but for the time being they are compile-time options so that
|
||||||
footprint of a more minimal collector.
|
adding new features doesn't change the footprint of a more minimal
|
||||||
|
collector.
|
||||||
|
|
||||||
Different collectors have different allocation strategies: for example,
|
Different collectors have different allocation strategies: for example,
|
||||||
the BDW collector allocates from thread-local freelists, whereas the
|
the BDW collector allocates from thread-local freelists, whereas the
|
||||||
|
@ -199,97 +200,58 @@ compiling user code.
|
||||||
|
|
||||||
### Compiling the collector
|
### Compiling the collector
|
||||||
|
|
||||||
Building the collector is not as easy as it should be. As an embed-only
|
As an embed-only library, Whippet needs to be integrated into the build
|
||||||
library, we don't get to choose the One True Build System and then just
|
system of its host (embedder). Currently the only supported build
|
||||||
build the software in that way; instead Whippet needs to be buildable
|
system uses GNU make. We would be happy to add other systems over time.
|
||||||
with any build system. At some point we will have snippets that
|
|
||||||
embedders can include in their various build systems, but for now we
|
|
||||||
document the low-level structure, so that people can craft the
|
|
||||||
appropriate incantations for their program's build system.
|
|
||||||
|
|
||||||
Whippet consists of some collector-implementation-agnostic independent
|
At a high level, first the embedder chooses a collector and defines how
|
||||||
modules, and then the collector implementation itself. Though Whippet
|
to specialize the collector against the embedder. Whippet's `embed.mk`
|
||||||
tries to put performance-sensitive interfaces in header files, users
|
Makefile snippet then defines how to build the set of object files that
|
||||||
should also compile with link-time optimization (LTO) to remove any
|
define the collector, and how to specialize the embedder against the
|
||||||
overhead imposed by the division of code into separate compilation
|
chosen collector.
|
||||||
units.
|
|
||||||
|
|
||||||
Usually you want to build with maximum optimization and no debugging
|
As an example, say you have a file `program.c`, and you want to compile
|
||||||
assertions. Sometimes you want minimal optimization and all assertions.
|
it against a Whippet checkout in `whippet/`. Your headers are in
|
||||||
Here's what we do, as a `Makefile` snippet:
|
`include/`, and you have written an implementation of the embedder
|
||||||
|
interface in `host-gc.h`. In that case you would have a Makefile like
|
||||||
|
this:
|
||||||
|
|
||||||
```
|
```
|
||||||
DEFAULT_BUILD=opt
|
HOST_DIR:=$(dir $(lastword $(MAKEFILE_LIST)))
|
||||||
BUILD_CFLAGS_opt=-O2 -g -DNDEBUG
|
WHIPPET_DIR=$(HOST_DIR)whippet/
|
||||||
BUILD_CFLAGS_optdebug=-Og -g -DGC_DEBUG=1
|
|
||||||
BUILD_CFLAGS_debug=-O0 -g -DGC_DEBUG=1
|
all: out
|
||||||
BUILD_CFLAGS=$(BUILD_CFLAGS_$(or $(BUILD),$(DEFAULT_BUILD)))
|
|
||||||
|
# The collector to choose: e.g. semi, bdw, pcc, generational-pcc, mmc,
|
||||||
|
# parallel-mmc, etc.
|
||||||
|
GC_COLLECTOR=pcc
|
||||||
|
|
||||||
|
include $(WHIPPET_DIR)embed.mk
|
||||||
|
|
||||||
|
# Host cflags go here...
|
||||||
|
HOST_CFLAGS=
|
||||||
|
|
||||||
|
# Whippet's embed.mk uses this variable when it compiles code that
|
||||||
|
# should be specialized against the embedder.
|
||||||
|
EMBEDDER_TO_GC_CFLAGS=$(HOST_CFLAGS) -include $(HOST_DIR)host-gc.h
|
||||||
|
|
||||||
|
program.o: program.c
|
||||||
|
$(GC_COMPILE) $(HOST_CFLAGS) $(GC_TO_EMBEDDER_CFLAGS) -c $<
|
||||||
|
program: program.o $(GC_OBJS)
|
||||||
|
$(GC_LINK) $^ $(GC_LIBS)
|
||||||
```
|
```
|
||||||
|
|
||||||
So if you do just plain `make`, it will do an `opt` build. You can
|
The optimization settings passed to the C compiler are taken from
|
||||||
specify the build mode by setting `BUILD` on the command line, as in
|
`GC_BUILD_CFLAGS`. Embedders can override this variable directly, or
|
||||||
`make BUILD=debug`.
|
via the shorthand `GC_BUILD` variable. A `GC_BUILD` of `opt` indicates
|
||||||
|
maximum optimization and no debugging assertions; `optdebug` adds
|
||||||
|
debugging assertions; and `debug` removes optimizations.
|
||||||
|
|
||||||
Then for the actual compilation flags, we do:
|
Though Whippet tries to put performance-sensitive interfaces in header
|
||||||
|
files, users should also compile with link-time optimization (LTO) to
|
||||||
```
|
remove any overhead imposed by the division of code into separate
|
||||||
CC=gcc
|
compilation units. `embed.mk` includes the necessary LTO flags in
|
||||||
CFLAGS=-Wall -flto -fno-strict-aliasing -fvisibility=hidden -Wno-unused $(BUILD_CFLAGS)
|
`GC_CFLAGS` and `GC_LDFLAGS`.
|
||||||
INCLUDES=-I.
|
|
||||||
LDFLAGS=-lpthread -flto
|
|
||||||
COMPILE=$(CC) $(CFLAGS) $(INCLUDES)
|
|
||||||
```
|
|
||||||
|
|
||||||
The actual include directory (the dot in `-I.`) should be adjusted as
|
|
||||||
appropriate.
|
|
||||||
|
|
||||||
#### Collector-implementation-agnostic independent modules
|
|
||||||
|
|
||||||
There are currently four generic modules that don't depend on the choice
|
|
||||||
of collector. The first is `gc-stack.o`, which has supporting code to
|
|
||||||
associate mutators (threads) with slices of the native stack, in order
|
|
||||||
to support conservative root-finding.
|
|
||||||
|
|
||||||
```
|
|
||||||
$(COMPILE) -o gc-stack.o -c gc-stack.c
|
|
||||||
```
|
|
||||||
|
|
||||||
The next is a generic options interface, to allow the user to
|
|
||||||
parameterize the collector at run-time, for example to implement a
|
|
||||||
specific heap sizing strategy.
|
|
||||||
|
|
||||||
```
|
|
||||||
$(COMPILE) -o gc-options.o -c gc-options.c
|
|
||||||
```
|
|
||||||
|
|
||||||
Next, where Whippet needs to get data from the operating system, for
|
|
||||||
example the number of processors available, it does so behind an
|
|
||||||
abstract interface that is selected at compile-time. The only
|
|
||||||
implementation currently is for GNU/Linux, but it's a pretty thin layer,
|
|
||||||
so adding more systems should not be difficult.
|
|
||||||
|
|
||||||
```
|
|
||||||
PLATFORM=gnu-linux
|
|
||||||
$(COMPILE) -o gc-platform.o -c gc-platform-$(PLATFORM).c
|
|
||||||
```
|
|
||||||
|
|
||||||
Finally, something a little more complicated: ephemerons. Ephemerons
|
|
||||||
are objects that make a weak association between a key and a value. As
|
|
||||||
first-class objects, they need to be classifiable by the user system,
|
|
||||||
and notably via the `gc_trace_object` procedure, and therefore need to
|
|
||||||
have a header whose shape is understandable by the embedding program.
|
|
||||||
We do this by including the `gc-embedder-api.h` implementation, via
|
|
||||||
`-include`, in this case providing `foo-embedder.h`:
|
|
||||||
|
|
||||||
```
|
|
||||||
$(COMPILE) -include foo-embedder.h -o gc-ephemeron.o -c gc-ephemeron.c
|
|
||||||
```
|
|
||||||
|
|
||||||
As for ephemerons, finalizers also have their own compilation unit.
|
|
||||||
|
|
||||||
```
|
|
||||||
$(COMPILE) -include foo-embedder.h -o gc-finalizer.o -c gc-finalizer.c
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Compile-time options
|
#### Compile-time options
|
||||||
|
|
||||||
|
@ -316,82 +278,14 @@ Some collectors require specific compile-time options. For example, the
|
||||||
semi-space collector has to be able to move all objects; this is not
|
semi-space collector has to be able to move all objects; this is not
|
||||||
compatible with conservative roots or heap edges.
|
compatible with conservative roots or heap edges.
|
||||||
|
|
||||||
#### Building `semi`
|
#### Tracing support
|
||||||
|
|
||||||
Finally, let's build a collector. The simplest collector is the
|
Whippet includes support for low-overhead run-time tracing via
|
||||||
semi-space collector. The entirety of the implementation can be had by
|
[LTTng](https://lttng.org/). If the support library `lttng-ust` is
|
||||||
compiling `semi.c`, providing the program's embedder API implementation
|
present when Whippet is compiled (as checked via `pkg-config`),
|
||||||
via `-include`:
|
tracepoint support will be present. See
|
||||||
|
[tracepoints.md](./tracepoints.md) for more information on how to get
|
||||||
```
|
performance traces out of Whippet.
|
||||||
$(COMPILE) -DGC_PRECISE_ROOTS=1 -include foo-embedder.h -o gc.o -c semi.c
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Building `bdw`
|
|
||||||
|
|
||||||
The next simplest collector uses
|
|
||||||
[BDW-GC](https://github.com/ivmai/bdwgc). This collector must scan the
|
|
||||||
roots and heap conservatively. The collector is parallel if BDW-GC
|
|
||||||
itself was compiled with parallelism enabled.
|
|
||||||
|
|
||||||
```
|
|
||||||
$(COMPILE) -DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1 \
|
|
||||||
`pkg-config --cflags bdw-gc` \
|
|
||||||
-include foo-embedder.h -o gc.o -c bdw.c
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Building `pcc`
|
|
||||||
|
|
||||||
The parallel copying collector is like `semi` but better in every way:
|
|
||||||
it supports multiple mutator threads, and evacuates in parallel if
|
|
||||||
multiple threads are available.
|
|
||||||
|
|
||||||
```
|
|
||||||
$(COMPILE) -DGC_PARALLEL=1 -DGC_PRECISE_ROOTS=1 \
|
|
||||||
-include foo-embedder.h -o gc.o -c pcc.c
|
|
||||||
```
|
|
||||||
|
|
||||||
You can also build `pcc` in a generational configuration by passing
|
|
||||||
`-DGC_GENERATIONAL=1`. The nursery is 2 MB per active mutator, capped
|
|
||||||
to the number of processors, so if the last cycle had a maximum of 4
|
|
||||||
mutator threads active at the same time and your machine has 24 cores,
|
|
||||||
your nursery would be 8 MB.
|
|
||||||
|
|
||||||
#### Building `mmc`
|
|
||||||
|
|
||||||
Finally, there is the mostly-marking collector. It can collect roots
|
|
||||||
precisely or conservatively, trace precisely or conservatively, be
|
|
||||||
parallel or not, and be generational or not.
|
|
||||||
|
|
||||||
```
|
|
||||||
$(COMPILE) -DGC_PARALLEL=1 -DGC_GENERATIONAL=1 -DGC_PRECISE_ROOTS=1 \
|
|
||||||
-include foo-embedder.h -o gc.o -c mvv.c
|
|
||||||
```
|
|
||||||
|
|
||||||
### Compiling your program
|
|
||||||
|
|
||||||
Any compilation unit that uses the GC API should have the same set of
|
|
||||||
compile-time options defined as when compiling the collector.
|
|
||||||
Additionally those compilation units should include the "attributes"
|
|
||||||
header for the collector in question, namely `semi-attrs.h`,
|
|
||||||
`bdw-attrs.h`, `pcc-attrs.h`, or `mmc-attrs.h`. For example, for
|
|
||||||
parallel generational mmc, you might have:
|
|
||||||
|
|
||||||
```
|
|
||||||
$(COMPILE) -DGC_PARALLEL=1 -DGC_GENERATIONAL=1 -DGC_PRECISE_ROOTS=1 \
|
|
||||||
-include mmc-attrs.h -o my-program.o -c my-program.c
|
|
||||||
```
|
|
||||||
|
|
||||||
### Linking the collector into your program
|
|
||||||
|
|
||||||
Finally to link, pass all objects to the linker. You will want to
|
|
||||||
ensure that the linker enables `-flto`, for link-time optimization. We
|
|
||||||
do it like this:
|
|
||||||
|
|
||||||
```
|
|
||||||
$(CC) $(LDFLAGS) -o my-program \
|
|
||||||
my-program.o gc-stack.o gc-platform.o gc-options.o gc-ephemeron.o
|
|
||||||
```
|
|
||||||
|
|
||||||
## Using the collector
|
## Using the collector
|
||||||
|
|
||||||
|
|
BIN
doc/perfetto-minor-gc.png
Normal file
BIN
doc/perfetto-minor-gc.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 169 KiB |
126
doc/tracepoints.md
Normal file
126
doc/tracepoints.md
Normal file
|
@ -0,0 +1,126 @@
|
||||||
|
# Whippet performance tracing
|
||||||
|
|
||||||
|
Whippet includes support for run-time tracing via
|
||||||
|
[LTTng](https://LTTng.org) user-space tracepoints. This allows you to
|
||||||
|
get a detailed look at how Whippet is performing on your system.
|
||||||
|
Tracing support is currently limited to Linux systems.
|
||||||
|
|
||||||
|
## Getting started
|
||||||
|
|
||||||
|
First, you need to build Whippet with LTTng support. Usually this is as
|
||||||
|
easy as building it in an environment where the `lttng-ust` library is
|
||||||
|
present, as determined by `pkg-config --libs lttng-ust`. You can know
|
||||||
|
if your Whippet has tracing support by seeing if the resulting binaries
|
||||||
|
are dynamically linked to `liblttng-ust`.
|
||||||
|
|
||||||
|
If we take as an example the `mt-gcbench` test in the Whippet source
|
||||||
|
tree, we would have:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ ldd bin/mt-gcbench.pcc | grep lttng
|
||||||
|
...
|
||||||
|
liblttng-ust.so.1 => ...
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
### Capturing traces
|
||||||
|
|
||||||
|
Actually capturing traces is a little annoying; it's not as easy as
|
||||||
|
`perf run`. The [LTTng
|
||||||
|
documentation](https://lttng.org/docs/v2.13/#doc-controlling-tracing) is
|
||||||
|
quite thorough, but here is a summary.
|
||||||
|
|
||||||
|
First, create your tracing session:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ lttng create
|
||||||
|
Session auto-20250214-091153 created.
|
||||||
|
Traces will be output to $HOME/lttng-traces/auto-20250214-091153
|
||||||
|
```
|
||||||
|
|
||||||
|
You run all these commands as your own user; they don't require root
|
||||||
|
permissions or system-wide modifications, as all of the Whippet
|
||||||
|
tracepoints are user-space tracepoints (UST).
|
||||||
|
|
||||||
|
Just having an LTTng session created won't do anything though; you need
|
||||||
|
to configure the session. Monotonic nanosecond-resolution timestamps
|
||||||
|
are already implicitly part of each event. We also want to have process
|
||||||
|
and thread IDs for all events:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ lttng add-context --userspace --type=vpid --type=vtid
|
||||||
|
ust context vpid added to all channels
|
||||||
|
ust context vtid added to all channels
|
||||||
|
```
|
||||||
|
|
||||||
|
Now enable Whippet events:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ lttng enable-event --userspace 'whippet:*'
|
||||||
|
ust event whippet:* created in channel channel0
|
||||||
|
```
|
||||||
|
|
||||||
|
And now, start recording:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ lttng start
|
||||||
|
Tracing started for session auto-20250214-091153
|
||||||
|
```
|
||||||
|
|
||||||
|
With this, traces will be captured for our program of interest:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ bin/mt-gcbench.pcc 2.5 8
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
Now stop the trace:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ lttng stop
|
||||||
|
Waiting for data availability
|
||||||
|
Tracing stopped for session auto-20250214-091153
|
||||||
|
```
|
||||||
|
|
||||||
|
Whew. If we did it right, our data is now in
|
||||||
|
$HOME/lttng-traces/auto-20250214-091153.
|
||||||
|
|
||||||
|
### Visualizing traces
|
||||||
|
|
||||||
|
LTTng produces traces in the [Common Trace Format
|
||||||
|
(CTF)](https://diamon.org/ctf/). My favorite trace viewing tool is the
|
||||||
|
family of web-based trace viewers derived from `chrome://tracing`. The
|
||||||
|
best of these appear to be [the Firefox
|
||||||
|
profiler](https://profiler.firefox.com) and
|
||||||
|
[Perfetto](https://ui.perfetto.dev). Unfortunately neither of these can
|
||||||
|
work with CTF directly, so we instead need to run a trace converter.
|
||||||
|
|
||||||
|
Oddly, there is no trace converter that can read CTF and write something
|
||||||
|
that Perfetto (e.g.) can read. However there is a JSON-based tracing
|
||||||
|
format that Perfetto can read, and [Python bindings for Babeltrace, a
|
||||||
|
library that works with CTF](https://babeltrace.org/), so that's what we
|
||||||
|
do:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ python3 ctf_to_json.py ~/lttng-traces/auto-20250214-091153 > trace.json
|
||||||
|
```
|
||||||
|
|
||||||
|
While Firefox Profiler can load this file, it works better on Perfetto,
|
||||||
|
as the Whippet events are visually rendered on their respective threads.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
### Expanding the set of events
|
||||||
|
|
||||||
|
As of February 2025,
|
||||||
|
the current set of tracepoints includes the [heap
|
||||||
|
events](https://github.com/wingo/whippet/blob/main/doc/manual.md#statistics)
|
||||||
|
and some detailed internals of the parallel tracer. We expect this set
|
||||||
|
of tracepoints to expand over time.
|
||||||
|
|
||||||
|
### Overhead of tracepoints
|
||||||
|
|
||||||
|
When tracepoints are compiled in but no events are enabled, tracepoints
|
||||||
|
appear to have no impact on run-time. When event collection is on, for
|
||||||
|
x86-64 hardware, [emitting a tracepoint event takes about
|
||||||
|
100ns](https://discuss.systems/@DesnoyersMa/113986344940256872).
|
Loading…
Add table
Add a link
Reference in a new issue