mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-06-27 05:30:23 +02:00
Add documentation on tracepoints
Also clean up how-to-build documentation
This commit is contained in:
parent
81da950ebe
commit
367e04f164
4 changed files with 346 additions and 166 deletions
160
ctf_to_json.py
Executable file
160
ctf_to_json.py
Executable file
|
@ -0,0 +1,160 @@
|
|||
#!/usr/bin/env python3
|
||||
# Any copyright is dedicated to the Public Domain.
|
||||
# https://creativecommons.org/publicdomain/zero/1.0/
|
||||
#
|
||||
# Originally written by Andy Wingo <wingo@igalia.com>.
|
||||
|
||||
import bt2 # From the babeltrace2 package.
|
||||
import sys
|
||||
import json
|
||||
from enum import Enum
|
||||
|
||||
# Usage: ./ctf_to_json.py ~/lttng-traces/name-of-your-trace > foo.json
|
||||
#
|
||||
# Convert a Common Trace Format (CTF) trace, for example as produced by
|
||||
# LTTng, to the JSON-based Trace Event Format (TEF), for example as
|
||||
# consumed by `chrome://tracing`, `https://ui.perfetto.dev/`, or
|
||||
# `https://profiler.firefox.com`.
|
||||
|
||||
# The Trace Event Format is documented here:
|
||||
#
|
||||
# https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview?tab=t.0
|
||||
|
||||
# By default, events are emitted as EventPhase.INSTANT. We also support
|
||||
# rewriting the event stream so as to generate EventPhase.BEGIN /
|
||||
# EventPhase.END events for specific named events.
|
||||
|
||||
synthetic_events = {
|
||||
'gc': ['whippet:mutator_cause_gc',
|
||||
'whippet:restarting_mutators'],
|
||||
'stop-the-world': ['whippet:requesting_stop',
|
||||
'whippet:mutators_stopped'],
|
||||
'trace': ['whippet:prepare_gc',
|
||||
'whippet:restarting_mutators'],
|
||||
'mutator-stopped': ['whippet:mutator_stopping',
|
||||
'whippet:mutator_restarted'],
|
||||
'trace-roots': ['whippet:trace_roots_begin',
|
||||
'whippet:trace_roots_end'],
|
||||
'trace-check-termination': ['whippet:trace_check_termination_begin',
|
||||
'whippet:trace_check_termination_end'],
|
||||
'trace-objects': ['whippet:trace_objects_begin',
|
||||
'whippet:trace_objects_end'],
|
||||
'trace-worker': ['whippet:trace_worker_begin',
|
||||
'whippet:trace_worker_end']
|
||||
}
|
||||
|
||||
class EventPhase(Enum):
|
||||
BEGIN = 'B'
|
||||
END = 'E'
|
||||
COMPLETE = 'X'
|
||||
INSTANT = 'i'
|
||||
COUNTER = 'C'
|
||||
NESTABLE_START = 'b'
|
||||
NESTABLE_INSTANT = 'n'
|
||||
NESTABLE_END = 'e'
|
||||
FLOW_START = 's'
|
||||
FLOW_STEP = 't'
|
||||
FLOW_END = 'f'
|
||||
SAMPLE = 'P'
|
||||
OBJECT_CREATED = 'N'
|
||||
OBJECT_SNAPSHOT = 'O'
|
||||
OBJECT_DESTROYED = 'D'
|
||||
METADATA = 'M'
|
||||
MEMORY_DUMP_GLOBAL = 'V'
|
||||
MEMORY_DUMP_PROCESS = 'V'
|
||||
MARK = 'R'
|
||||
CLOCK_SYNC = 'c'
|
||||
CONTEXT_BEGIN = '('
|
||||
CONTEXT_END = ')'
|
||||
|
||||
base_time = None
|
||||
def event_us(msg):
|
||||
assert(msg.default_clock_snapshot.clock_class.name == 'monotonic')
|
||||
assert(msg.default_clock_snapshot.clock_class.frequency == 1e9)
|
||||
global base_time
|
||||
ns = msg.default_clock_snapshot.value
|
||||
if base_time is None:
|
||||
base_time = ns
|
||||
return (ns - base_time) * 1e-3
|
||||
|
||||
def lower(x):
|
||||
if isinstance(x, str) or isinstance(x, int) or isinstance(x, float):
|
||||
return x
|
||||
if isinstance(x, dict) or isinstance(x, bt2._StructureFieldConst):
|
||||
return {lower(k):lower(v) for k, v in x.items()}
|
||||
if isinstance(x, bt2._BoolValueConst) or isinstance(x, bt2._BoolFieldConst):
|
||||
return bool(x)
|
||||
if isinstance(x, bt2._EnumerationFieldConst):
|
||||
return repr(x)
|
||||
if isinstance(x, bt2._IntegerValueConst) or isinstance(x, bt2._IntegerFieldConst):
|
||||
return int(x)
|
||||
if isinstance(x, bt2._RealValueConst) or isinstance(x, bt2._RealFieldConst):
|
||||
return float(x)
|
||||
if isinstance(x, bt2._StringValueConst) or isinstance(x, bt2._StringFieldConst):
|
||||
return str(x)
|
||||
raise ValueError("Unexpected value from trace", x)
|
||||
|
||||
# Specific Whippet events.
|
||||
synthetic_begin = {}
|
||||
synthetic_end = {}
|
||||
for synthetic, [begin, end] in synthetic_events.items():
|
||||
synthetic_begin[begin] = []
|
||||
synthetic_end[end] = []
|
||||
for synthetic, [begin, end] in synthetic_events.items():
|
||||
synthetic_begin[begin].append(synthetic)
|
||||
synthetic_end[end].append(synthetic)
|
||||
|
||||
def put(str):
|
||||
sys.stdout.write(str)
|
||||
|
||||
need_comma = False
|
||||
def print_event(ev):
|
||||
global need_comma
|
||||
if need_comma:
|
||||
sys.stdout.write(',\n ')
|
||||
else:
|
||||
need_comma = True
|
||||
# It appears to be faster to make a string, then print the string,
|
||||
# than to call json.dump with a file object.
|
||||
# json.dump(ev, sys.stdout, ensure_ascii=False, check_circular=False)
|
||||
put(json.dumps(ev, ensure_ascii=False, check_circular=False))
|
||||
|
||||
def emit_event(msg, name, phase):
|
||||
ev = {'name': name,
|
||||
'cat': 'whippet',
|
||||
'ph': phase.value,
|
||||
'ts': event_us(msg),
|
||||
'pid': lower(msg.event.common_context_field['vpid']),
|
||||
'tid': lower(msg.event.common_context_field['vtid']),
|
||||
'args': lower(msg.event.payload_field)}
|
||||
print_event(ev)
|
||||
def emit_begin_event(msg, name):
|
||||
emit_event(msg, name, EventPhase.BEGIN)
|
||||
def emit_end_event(msg, name):
|
||||
emit_event(msg, name, EventPhase.END)
|
||||
|
||||
def emit_events(msg):
|
||||
emit_event(msg, msg.event.name, EventPhase.INSTANT)
|
||||
for begin in synthetic_begin.get(msg.event.name, []):
|
||||
emit_begin_event(msg, begin)
|
||||
for end in synthetic_end.get(msg.event.name, []):
|
||||
emit_end_event(msg, end)
|
||||
|
||||
def ctf_to_json(path):
|
||||
msg_it = bt2.TraceCollectionMessageIterator(path)
|
||||
put('{\n')
|
||||
put(' "traceEvents": [\n ')
|
||||
for msg in msg_it:
|
||||
if hasattr(msg, 'event'):
|
||||
emit_events(msg)
|
||||
put('\n')
|
||||
put('\n ],\n')
|
||||
put(' "displayTimeUnit": "ns"\n')
|
||||
put('}\n')
|
||||
|
||||
if len(sys.argv) != 2:
|
||||
sys.stderr.write(
|
||||
'usage: ' + sys.argv[0] + ' ~/lttng-traces/name-of-your-trace\n')
|
||||
sys.exit(1)
|
||||
else:
|
||||
ctf_to_json(sys.argv[1])
|
226
doc/manual.md
226
doc/manual.md
|
@ -176,13 +176,14 @@ implementations of that API: `semi`, a simple semi-space collector;
|
|||
collector; and `mmc`, a mostly-marking collector inspired by Immix.
|
||||
|
||||
The program that embeds Whippet selects the collector implementation at
|
||||
build-time. In the case of the `mmc` collector, the program
|
||||
also configures a specific collector mode, again at build-time:
|
||||
generational or not, parallel or not, stack-conservative or not, and
|
||||
heap-conservative or not. It may be nice in the future to be able to
|
||||
configure these at run-time, but for the time being they are
|
||||
compile-time options so that adding new features doesn't change the
|
||||
footprint of a more minimal collector.
|
||||
build-time. For `pcc`, the program can also choose whether to be
|
||||
generational or not. For `mmc` collector, the program configures a
|
||||
specific collector mode, again at build-time: generational or not,
|
||||
parallel or not, stack-conservative or not, and heap-conservative or
|
||||
not. It may be nice in the future to be able to configure these at
|
||||
run-time, but for the time being they are compile-time options so that
|
||||
adding new features doesn't change the footprint of a more minimal
|
||||
collector.
|
||||
|
||||
Different collectors have different allocation strategies: for example,
|
||||
the BDW collector allocates from thread-local freelists, whereas the
|
||||
|
@ -199,97 +200,58 @@ compiling user code.
|
|||
|
||||
### Compiling the collector
|
||||
|
||||
Building the collector is not as easy as it should be. As an embed-only
|
||||
library, we don't get to choose the One True Build System and then just
|
||||
build the software in that way; instead Whippet needs to be buildable
|
||||
with any build system. At some point we will have snippets that
|
||||
embedders can include in their various build systems, but for now we
|
||||
document the low-level structure, so that people can craft the
|
||||
appropriate incantations for their program's build system.
|
||||
As an embed-only library, Whippet needs to be integrated into the build
|
||||
system of its host (embedder). Currently the only supported build
|
||||
system uses GNU make. We would be happy to add other systems over time.
|
||||
|
||||
Whippet consists of some collector-implementation-agnostic independent
|
||||
modules, and then the collector implementation itself. Though Whippet
|
||||
tries to put performance-sensitive interfaces in header files, users
|
||||
should also compile with link-time optimization (LTO) to remove any
|
||||
overhead imposed by the division of code into separate compilation
|
||||
units.
|
||||
At a high level, first the embedder chooses a collector and defines how
|
||||
to specialize the collector against the embedder. Whippet's `embed.mk`
|
||||
Makefile snippet then defines how to build the set of object files that
|
||||
define the collector, and how to specialize the embedder against the
|
||||
chosen collector.
|
||||
|
||||
Usually you want to build with maximum optimization and no debugging
|
||||
assertions. Sometimes you want minimal optimization and all assertions.
|
||||
Here's what we do, as a `Makefile` snippet:
|
||||
As an example, say you have a file `program.c`, and you want to compile
|
||||
it against a Whippet checkout in `whippet/`. Your headers are in
|
||||
`include/`, and you have written an implementation of the embedder
|
||||
interface in `host-gc.h`. In that case you would have a Makefile like
|
||||
this:
|
||||
|
||||
```
|
||||
DEFAULT_BUILD=opt
|
||||
BUILD_CFLAGS_opt=-O2 -g -DNDEBUG
|
||||
BUILD_CFLAGS_optdebug=-Og -g -DGC_DEBUG=1
|
||||
BUILD_CFLAGS_debug=-O0 -g -DGC_DEBUG=1
|
||||
BUILD_CFLAGS=$(BUILD_CFLAGS_$(or $(BUILD),$(DEFAULT_BUILD)))
|
||||
HOST_DIR:=$(dir $(lastword $(MAKEFILE_LIST)))
|
||||
WHIPPET_DIR=$(HOST_DIR)whippet/
|
||||
|
||||
all: out
|
||||
|
||||
# The collector to choose: e.g. semi, bdw, pcc, generational-pcc, mmc,
|
||||
# parallel-mmc, etc.
|
||||
GC_COLLECTOR=pcc
|
||||
|
||||
include $(WHIPPET_DIR)embed.mk
|
||||
|
||||
# Host cflags go here...
|
||||
HOST_CFLAGS=
|
||||
|
||||
# Whippet's embed.mk uses this variable when it compiles code that
|
||||
# should be specialized against the embedder.
|
||||
EMBEDDER_TO_GC_CFLAGS=$(HOST_CFLAGS) -include $(HOST_DIR)host-gc.h
|
||||
|
||||
program.o: program.c
|
||||
$(GC_COMPILE) $(HOST_CFLAGS) $(GC_TO_EMBEDDER_CFLAGS) -c $<
|
||||
program: program.o $(GC_OBJS)
|
||||
$(GC_LINK) $^ $(GC_LIBS)
|
||||
```
|
||||
|
||||
So if you do just plain `make`, it will do an `opt` build. You can
|
||||
specify the build mode by setting `BUILD` on the command line, as in
|
||||
`make BUILD=debug`.
|
||||
The optimization settings passed to the C compiler are taken from
|
||||
`GC_BUILD_CFLAGS`. Embedders can override this variable directly, or
|
||||
via the shorthand `GC_BUILD` variable. A `GC_BUILD` of `opt` indicates
|
||||
maximum optimization and no debugging assertions; `optdebug` adds
|
||||
debugging assertions; and `debug` removes optimizations.
|
||||
|
||||
Then for the actual compilation flags, we do:
|
||||
|
||||
```
|
||||
CC=gcc
|
||||
CFLAGS=-Wall -flto -fno-strict-aliasing -fvisibility=hidden -Wno-unused $(BUILD_CFLAGS)
|
||||
INCLUDES=-I.
|
||||
LDFLAGS=-lpthread -flto
|
||||
COMPILE=$(CC) $(CFLAGS) $(INCLUDES)
|
||||
```
|
||||
|
||||
The actual include directory (the dot in `-I.`) should be adjusted as
|
||||
appropriate.
|
||||
|
||||
#### Collector-implementation-agnostic independent modules
|
||||
|
||||
There are currently four generic modules that don't depend on the choice
|
||||
of collector. The first is `gc-stack.o`, which has supporting code to
|
||||
associate mutators (threads) with slices of the native stack, in order
|
||||
to support conservative root-finding.
|
||||
|
||||
```
|
||||
$(COMPILE) -o gc-stack.o -c gc-stack.c
|
||||
```
|
||||
|
||||
The next is a generic options interface, to allow the user to
|
||||
parameterize the collector at run-time, for example to implement a
|
||||
specific heap sizing strategy.
|
||||
|
||||
```
|
||||
$(COMPILE) -o gc-options.o -c gc-options.c
|
||||
```
|
||||
|
||||
Next, where Whippet needs to get data from the operating system, for
|
||||
example the number of processors available, it does so behind an
|
||||
abstract interface that is selected at compile-time. The only
|
||||
implementation currently is for GNU/Linux, but it's a pretty thin layer,
|
||||
so adding more systems should not be difficult.
|
||||
|
||||
```
|
||||
PLATFORM=gnu-linux
|
||||
$(COMPILE) -o gc-platform.o -c gc-platform-$(PLATFORM).c
|
||||
```
|
||||
|
||||
Finally, something a little more complicated: ephemerons. Ephemerons
|
||||
are objects that make a weak association between a key and a value. As
|
||||
first-class objects, they need to be classifiable by the user system,
|
||||
and notably via the `gc_trace_object` procedure, and therefore need to
|
||||
have a header whose shape is understandable by the embedding program.
|
||||
We do this by including the `gc-embedder-api.h` implementation, via
|
||||
`-include`, in this case providing `foo-embedder.h`:
|
||||
|
||||
```
|
||||
$(COMPILE) -include foo-embedder.h -o gc-ephemeron.o -c gc-ephemeron.c
|
||||
```
|
||||
|
||||
As for ephemerons, finalizers also have their own compilation unit.
|
||||
|
||||
```
|
||||
$(COMPILE) -include foo-embedder.h -o gc-finalizer.o -c gc-finalizer.c
|
||||
```
|
||||
Though Whippet tries to put performance-sensitive interfaces in header
|
||||
files, users should also compile with link-time optimization (LTO) to
|
||||
remove any overhead imposed by the division of code into separate
|
||||
compilation units. `embed.mk` includes the necessary LTO flags in
|
||||
`GC_CFLAGS` and `GC_LDFLAGS`.
|
||||
|
||||
#### Compile-time options
|
||||
|
||||
|
@ -316,82 +278,14 @@ Some collectors require specific compile-time options. For example, the
|
|||
semi-space collector has to be able to move all objects; this is not
|
||||
compatible with conservative roots or heap edges.
|
||||
|
||||
#### Building `semi`
|
||||
#### Tracing support
|
||||
|
||||
Finally, let's build a collector. The simplest collector is the
|
||||
semi-space collector. The entirety of the implementation can be had by
|
||||
compiling `semi.c`, providing the program's embedder API implementation
|
||||
via `-include`:
|
||||
|
||||
```
|
||||
$(COMPILE) -DGC_PRECISE_ROOTS=1 -include foo-embedder.h -o gc.o -c semi.c
|
||||
```
|
||||
|
||||
#### Building `bdw`
|
||||
|
||||
The next simplest collector uses
|
||||
[BDW-GC](https://github.com/ivmai/bdwgc). This collector must scan the
|
||||
roots and heap conservatively. The collector is parallel if BDW-GC
|
||||
itself was compiled with parallelism enabled.
|
||||
|
||||
```
|
||||
$(COMPILE) -DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1 \
|
||||
`pkg-config --cflags bdw-gc` \
|
||||
-include foo-embedder.h -o gc.o -c bdw.c
|
||||
```
|
||||
|
||||
#### Building `pcc`
|
||||
|
||||
The parallel copying collector is like `semi` but better in every way:
|
||||
it supports multiple mutator threads, and evacuates in parallel if
|
||||
multiple threads are available.
|
||||
|
||||
```
|
||||
$(COMPILE) -DGC_PARALLEL=1 -DGC_PRECISE_ROOTS=1 \
|
||||
-include foo-embedder.h -o gc.o -c pcc.c
|
||||
```
|
||||
|
||||
You can also build `pcc` in a generational configuration by passing
|
||||
`-DGC_GENERATIONAL=1`. The nursery is 2 MB per active mutator, capped
|
||||
to the number of processors, so if the last cycle had a maximum of 4
|
||||
mutator threads active at the same time and your machine has 24 cores,
|
||||
your nursery would be 8 MB.
|
||||
|
||||
#### Building `mmc`
|
||||
|
||||
Finally, there is the mostly-marking collector. It can collect roots
|
||||
precisely or conservatively, trace precisely or conservatively, be
|
||||
parallel or not, and be generational or not.
|
||||
|
||||
```
|
||||
$(COMPILE) -DGC_PARALLEL=1 -DGC_GENERATIONAL=1 -DGC_PRECISE_ROOTS=1 \
|
||||
-include foo-embedder.h -o gc.o -c mvv.c
|
||||
```
|
||||
|
||||
### Compiling your program
|
||||
|
||||
Any compilation unit that uses the GC API should have the same set of
|
||||
compile-time options defined as when compiling the collector.
|
||||
Additionally those compilation units should include the "attributes"
|
||||
header for the collector in question, namely `semi-attrs.h`,
|
||||
`bdw-attrs.h`, `pcc-attrs.h`, or `mmc-attrs.h`. For example, for
|
||||
parallel generational mmc, you might have:
|
||||
|
||||
```
|
||||
$(COMPILE) -DGC_PARALLEL=1 -DGC_GENERATIONAL=1 -DGC_PRECISE_ROOTS=1 \
|
||||
-include mmc-attrs.h -o my-program.o -c my-program.c
|
||||
```
|
||||
|
||||
### Linking the collector into your program
|
||||
|
||||
Finally to link, pass all objects to the linker. You will want to
|
||||
ensure that the linker enables `-flto`, for link-time optimization. We
|
||||
do it like this:
|
||||
|
||||
```
|
||||
$(CC) $(LDFLAGS) -o my-program \
|
||||
my-program.o gc-stack.o gc-platform.o gc-options.o gc-ephemeron.o
|
||||
```
|
||||
Whippet includes support for low-overhead run-time tracing via
|
||||
[LTTng](https://lttng.org/). If the support library `lttng-ust` is
|
||||
present when Whippet is compiled (as checked via `pkg-config`),
|
||||
tracepoint support will be present. See
|
||||
[tracepoints.md](./tracepoints.md) for more information on how to get
|
||||
performance traces out of Whippet.
|
||||
|
||||
## Using the collector
|
||||
|
||||
|
|
BIN
doc/perfetto-minor-gc.png
Normal file
BIN
doc/perfetto-minor-gc.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 169 KiB |
126
doc/tracepoints.md
Normal file
126
doc/tracepoints.md
Normal file
|
@ -0,0 +1,126 @@
|
|||
# Whippet performance tracing
|
||||
|
||||
Whippet includes support for run-time tracing via
|
||||
[LTTng](https://LTTng.org) user-space tracepoints. This allows you to
|
||||
get a detailed look at how Whippet is performing on your system.
|
||||
Tracing support is currently limited to Linux systems.
|
||||
|
||||
## Getting started
|
||||
|
||||
First, you need to build Whippet with LTTng support. Usually this is as
|
||||
easy as building it in an environment where the `lttng-ust` library is
|
||||
present, as determined by `pkg-config --libs lttng-ust`. You can know
|
||||
if your Whippet has tracing support by seeing if the resulting binaries
|
||||
are dynamically linked to `liblttng-ust`.
|
||||
|
||||
If we take as an example the `mt-gcbench` test in the Whippet source
|
||||
tree, we would have:
|
||||
|
||||
```
|
||||
$ ldd bin/mt-gcbench.pcc | grep lttng
|
||||
...
|
||||
liblttng-ust.so.1 => ...
|
||||
...
|
||||
```
|
||||
|
||||
### Capturing traces
|
||||
|
||||
Actually capturing traces is a little annoying; it's not as easy as
|
||||
`perf run`. The [LTTng
|
||||
documentation](https://lttng.org/docs/v2.13/#doc-controlling-tracing) is
|
||||
quite thorough, but here is a summary.
|
||||
|
||||
First, create your tracing session:
|
||||
|
||||
```
|
||||
$ lttng create
|
||||
Session auto-20250214-091153 created.
|
||||
Traces will be output to $HOME/lttng-traces/auto-20250214-091153
|
||||
```
|
||||
|
||||
You run all these commands as your own user; they don't require root
|
||||
permissions or system-wide modifications, as all of the Whippet
|
||||
tracepoints are user-space tracepoints (UST).
|
||||
|
||||
Just having an LTTng session created won't do anything though; you need
|
||||
to configure the session. Monotonic nanosecond-resolution timestamps
|
||||
are already implicitly part of each event. We also want to have process
|
||||
and thread IDs for all events:
|
||||
|
||||
```
|
||||
$ lttng add-context --userspace --type=vpid --type=vtid
|
||||
ust context vpid added to all channels
|
||||
ust context vtid added to all channels
|
||||
```
|
||||
|
||||
Now enable Whippet events:
|
||||
|
||||
```
|
||||
$ lttng enable-event --userspace 'whippet:*'
|
||||
ust event whippet:* created in channel channel0
|
||||
```
|
||||
|
||||
And now, start recording:
|
||||
|
||||
```
|
||||
$ lttng start
|
||||
Tracing started for session auto-20250214-091153
|
||||
```
|
||||
|
||||
With this, traces will be captured for our program of interest:
|
||||
|
||||
```
|
||||
$ bin/mt-gcbench.pcc 2.5 8
|
||||
...
|
||||
```
|
||||
|
||||
Now stop the trace:
|
||||
|
||||
```
|
||||
$ lttng stop
|
||||
Waiting for data availability
|
||||
Tracing stopped for session auto-20250214-091153
|
||||
```
|
||||
|
||||
Whew. If we did it right, our data is now in
|
||||
$HOME/lttng-traces/auto-20250214-091153.
|
||||
|
||||
### Visualizing traces
|
||||
|
||||
LTTng produces traces in the [Common Trace Format
|
||||
(CTF)](https://diamon.org/ctf/). My favorite trace viewing tool is the
|
||||
family of web-based trace viewers derived from `chrome://tracing`. The
|
||||
best of these appear to be [the Firefox
|
||||
profiler](https://profiler.firefox.com) and
|
||||
[Perfetto](https://ui.perfetto.dev). Unfortunately neither of these can
|
||||
work with CTF directly, so we instead need to run a trace converter.
|
||||
|
||||
Oddly, there is no trace converter that can read CTF and write something
|
||||
that Perfetto (e.g.) can read. However there is a JSON-based tracing
|
||||
format that Perfetto can read, and [Python bindings for Babeltrace, a
|
||||
library that works with CTF](https://babeltrace.org/), so that's what we
|
||||
do:
|
||||
|
||||
```
|
||||
$ python3 ctf_to_json.py ~/lttng-traces/auto-20250214-091153 > trace.json
|
||||
```
|
||||
|
||||
While Firefox Profiler can load this file, it works better on Perfetto,
|
||||
as the Whippet events are visually rendered on their respective threads.
|
||||
|
||||

|
||||
|
||||
### Expanding the set of events
|
||||
|
||||
As of February 2025,
|
||||
the current set of tracepoints includes the [heap
|
||||
events](https://github.com/wingo/whippet/blob/main/doc/manual.md#statistics)
|
||||
and some detailed internals of the parallel tracer. We expect this set
|
||||
of tracepoints to expand over time.
|
||||
|
||||
### Overhead of tracepoints
|
||||
|
||||
When tracepoints are compiled in but no events are enabled, tracepoints
|
||||
appear to have no impact on run-time. When event collection is on, for
|
||||
x86-64 hardware, [emitting a tracepoint event takes about
|
||||
100ns](https://discuss.systems/@DesnoyersMa/113986344940256872).
|
Loading…
Add table
Add a link
Reference in a new issue