[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <Z9jXh98bpnraXL0x@google.com>
Date: Mon, 17 Mar 2025 19:16:39 -0700
From: Namhyung Kim <namhyung@...nel.org>
To: Ian Rogers <irogers@...gle.com>
Cc: Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>,
Mark Rutland <mark.rutland@....com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
Jiri Olsa <jolsa@...nel.org>,
Adrian Hunter <adrian.hunter@...el.com>,
Kan Liang <kan.liang@...ux.intel.com>,
Athira Rajeev <atrajeev@...ux.vnet.ibm.com>,
Stephen Brennan <stephen.s.brennan@...cle.com>,
James Clark <james.clark@...aro.org>,
Yunseong Kim <yskelg@...il.com>, linux-perf-users@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH v1 2/3] perf dso: Use lock annotations to fix asan
deadlock
On Thu, Mar 13, 2025 at 08:08:35PM -0700, Ian Rogers wrote:
> dso__list_del with address sanitizer and/or reference count checking
> will call dso__put that can call dso__data_close reentrantly trying to
> lock the dso__data_open_lock and deadlocking. Switch from pthread
> mutexes to perf's mutex so that lock checking is performed in debug
> builds. Add lock annotations that diagnosed the problem. Release the
> dso__data_open_lock around the dso__put to avoid the deadlock.
>
> Change the declaration of dso__data_get_fd to return a boolean,
> indicating the fd is valid and the lock is held, to make it compatible
> with the thread safety annotations as a try lock.
>
> Signed-off-by: Ian Rogers <irogers@...gle.com>
> ---
> tools/perf/tests/dso-data.c | 4 +-
> tools/perf/util/dso.c | 77 +++++++++++++++---------
> tools/perf/util/dso.h | 15 +++--
> tools/perf/util/unwind-libunwind-local.c | 16 ++---
> 4 files changed, 68 insertions(+), 44 deletions(-)
>
> diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c
> index 5286ae8bd2d7..06be7c5d8495 100644
> --- a/tools/perf/tests/dso-data.c
> +++ b/tools/perf/tests/dso-data.c
> @@ -106,9 +106,9 @@ struct test_data_offset offsets[] = {
> /* move it from util/dso.c for compatibility */
> static int dso__data_fd(struct dso *dso, struct machine *machine)
> {
> - int fd = dso__data_get_fd(dso, machine);
> + int fd = -1;
>
> - if (fd >= 0)
> + if (dso__data_get_fd(dso, machine, &fd))
> dso__data_put_fd(dso);
>
> return fd;
> diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
> index 7576e8e24838..fe42837eefec 100644
> --- a/tools/perf/util/dso.c
> +++ b/tools/perf/util/dso.c
> @@ -493,11 +493,25 @@ void dso__set_module_info(struct dso *dso, struct kmod_path *m,
> /*
> * Global list of open DSOs and the counter.
> */
> +struct mutex _dso__data_open_lock;
> static LIST_HEAD(dso__data_open);
> -static long dso__data_open_cnt;
> -static pthread_mutex_t dso__data_open_lock = PTHREAD_MUTEX_INITIALIZER;
> +static long dso__data_open_cnt GUARDED_BY(_dso__data_open_lock);
>
> -static void dso__list_add(struct dso *dso)
> +static void dso__data_open_lock_init(void)
> +{
> + mutex_init(&_dso__data_open_lock);
> +}
> +
> +static struct mutex *dso__data_open_lock(void) LOCK_RETURNED(_dso__data_open_lock)
> +{
> + static pthread_once_t data_open_lock_once = PTHREAD_ONCE_INIT;
> +
> + pthread_once(&data_open_lock_once, dso__data_open_lock_init);
> +
> + return &_dso__data_open_lock;
> +}
> +
> +static void dso__list_add(struct dso *dso) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
> {
> list_add_tail(&dso__data(dso)->open_entry, &dso__data_open);
> #ifdef REFCNT_CHECKING
> @@ -508,11 +522,13 @@ static void dso__list_add(struct dso *dso)
> dso__data_open_cnt++;
> }
>
> -static void dso__list_del(struct dso *dso)
> +static void dso__list_del(struct dso *dso) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
> {
> list_del_init(&dso__data(dso)->open_entry);
> #ifdef REFCNT_CHECKING
> + mutex_unlock(dso__data_open_lock());
> dso__put(dso__data(dso)->dso);
> + mutex_lock(dso__data_open_lock());
> #endif
> WARN_ONCE(dso__data_open_cnt <= 0,
> "DSO data fd counter out of bounds.");
> @@ -521,7 +537,7 @@ static void dso__list_del(struct dso *dso)
>
> static void close_first_dso(void);
>
> -static int do_open(char *name)
> +static int do_open(char *name) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
> {
> int fd;
> char sbuf[STRERR_BUFSIZE];
> @@ -548,6 +564,7 @@ char *dso__filename_with_chroot(const struct dso *dso, const char *filename)
> }
>
> static int __open_dso(struct dso *dso, struct machine *machine)
> + EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
> {
> int fd = -EINVAL;
> char *root_dir = (char *)"";
> @@ -613,6 +630,7 @@ static void check_data_close(void);
> * list/count of open DSO objects.
> */
> static int open_dso(struct dso *dso, struct machine *machine)
> + EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
> {
> int fd;
> struct nscookie nsc;
> @@ -638,7 +656,7 @@ static int open_dso(struct dso *dso, struct machine *machine)
> return fd;
> }
>
> -static void close_data_fd(struct dso *dso)
> +static void close_data_fd(struct dso *dso) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
> {
> if (dso__data(dso)->fd >= 0) {
> close(dso__data(dso)->fd);
> @@ -655,12 +673,12 @@ static void close_data_fd(struct dso *dso)
> * Close @dso's data file descriptor and updates
> * list/count of open DSO objects.
> */
> -static void close_dso(struct dso *dso)
> +static void close_dso(struct dso *dso) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
> {
> close_data_fd(dso);
> }
>
> -static void close_first_dso(void)
> +static void close_first_dso(void) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
> {
> struct dso_data *dso_data;
> struct dso *dso;
> @@ -705,7 +723,7 @@ void reset_fd_limit(void)
> fd_limit = 0;
> }
>
> -static bool may_cache_fd(void)
> +static bool may_cache_fd(void) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
> {
> if (!fd_limit)
> fd_limit = get_fd_limit();
> @@ -721,7 +739,7 @@ static bool may_cache_fd(void)
> * for opened dso file descriptors. The limit is half
> * of the RLIMIT_NOFILE files opened.
> */
> -static void check_data_close(void)
> +static void check_data_close(void) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
> {
> bool cache_fd = may_cache_fd();
>
> @@ -737,12 +755,13 @@ static void check_data_close(void)
> */
> void dso__data_close(struct dso *dso)
> {
> - pthread_mutex_lock(&dso__data_open_lock);
> + mutex_lock(dso__data_open_lock());
> close_dso(dso);
> - pthread_mutex_unlock(&dso__data_open_lock);
> + mutex_unlock(dso__data_open_lock());
> }
>
> static void try_to_open_dso(struct dso *dso, struct machine *machine)
> + EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock)
> {
> enum dso_binary_type binary_type_data[] = {
> DSO_BINARY_TYPE__BUILD_ID_CACHE,
> @@ -784,25 +803,27 @@ static void try_to_open_dso(struct dso *dso, struct machine *machine)
> * returns file descriptor. It should be paired with
> * dso__data_put_fd() if it returns non-negative value.
> */
> -int dso__data_get_fd(struct dso *dso, struct machine *machine)
> +bool dso__data_get_fd(struct dso *dso, struct machine *machine, int *fd)
> {
> + *fd = -1;
> if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR)
> - return -1;
> + return false;
>
> - if (pthread_mutex_lock(&dso__data_open_lock) < 0)
> - return -1;
> + mutex_lock(dso__data_open_lock());
>
> try_to_open_dso(dso, machine);
>
> - if (dso__data(dso)->fd < 0)
> - pthread_mutex_unlock(&dso__data_open_lock);
> + *fd = dso__data(dso)->fd;
> + if (*fd >= 0)
> + return true;
>
> - return dso__data(dso)->fd;
> + mutex_unlock(dso__data_open_lock());
> + return false;
> }
>
> void dso__data_put_fd(struct dso *dso __maybe_unused)
> {
> - pthread_mutex_unlock(&dso__data_open_lock);
> + mutex_unlock(dso__data_open_lock());
> }
>
> bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by)
> @@ -954,7 +975,7 @@ static ssize_t file_read(struct dso *dso, struct machine *machine,
> {
> ssize_t ret;
>
> - pthread_mutex_lock(&dso__data_open_lock);
> + mutex_lock(dso__data_open_lock());
>
> /*
> * dso__data(dso)->fd might be closed if other thread opened another
> @@ -970,7 +991,7 @@ static ssize_t file_read(struct dso *dso, struct machine *machine,
>
> ret = pread(dso__data(dso)->fd, data, DSO__DATA_CACHE_SIZE, offset);
> out:
> - pthread_mutex_unlock(&dso__data_open_lock);
> + mutex_unlock(dso__data_open_lock());
> return ret;
> }
>
> @@ -1078,7 +1099,7 @@ static int file_size(struct dso *dso, struct machine *machine)
> struct stat st;
> char sbuf[STRERR_BUFSIZE];
>
> - pthread_mutex_lock(&dso__data_open_lock);
> + mutex_lock(dso__data_open_lock());
>
> /*
> * dso__data(dso)->fd might be closed if other thread opened another
> @@ -1102,7 +1123,7 @@ static int file_size(struct dso *dso, struct machine *machine)
> dso__data(dso)->file_size = st.st_size;
>
> out:
> - pthread_mutex_unlock(&dso__data_open_lock);
> + mutex_unlock(dso__data_open_lock());
> return ret;
> }
>
> @@ -1611,12 +1632,12 @@ size_t dso__fprintf(struct dso *dso, FILE *fp)
>
> enum dso_type dso__type(struct dso *dso, struct machine *machine)
> {
> - int fd;
> + int fd = -1;
> enum dso_type type = DSO__TYPE_UNKNOWN;
>
> - fd = dso__data_get_fd(dso, machine);
> - if (fd >= 0) {
> - type = dso__type_fd(fd);
> + if (dso__data_get_fd(dso, machine, &fd)) {
> + if (fd >= 0)
This line seem unnecessary. Other than that, this all looks good to me.
Thanks,
Namhyung
> + type = dso__type_fd(fd);
> dso__data_put_fd(dso);
> }
>
> diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
> index 84d5aac666aa..846b74510038 100644
> --- a/tools/perf/util/dso.h
> +++ b/tools/perf/util/dso.h
> @@ -232,6 +232,8 @@ DECLARE_RC_STRUCT(dso) {
> char name[];
> };
>
> +extern struct mutex _dso__data_open_lock;
> +
> /* dso__for_each_symbol - iterate over the symbols of given type
> *
> * @dso: the 'struct dso *' in which symbols are iterated
> @@ -653,7 +655,7 @@ void __dso__inject_id(struct dso *dso, const struct dso_id *id);
> int dso__name_len(const struct dso *dso);
>
> struct dso *dso__get(struct dso *dso);
> -void dso__put(struct dso *dso);
> +void dso__put(struct dso *dso) LOCKS_EXCLUDED(_dso__data_open_lock);
>
> static inline void __dso__zput(struct dso **dso)
> {
> @@ -733,8 +735,8 @@ void dso__set_module_info(struct dso *dso, struct kmod_path *m,
> * The current usage of the dso__data_* interface is as follows:
> *
> * Get DSO's fd:
> - * int fd = dso__data_get_fd(dso, machine);
> - * if (fd >= 0) {
> + * int fd;
> + * if (dso__data_get_fd(dso, machine, &fd)) {
> * USE 'fd' SOMEHOW
> * dso__data_put_fd(dso);
> * }
> @@ -756,9 +758,10 @@ void dso__set_module_info(struct dso *dso, struct kmod_path *m,
> *
> * TODO
> */
> -int dso__data_get_fd(struct dso *dso, struct machine *machine);
> -void dso__data_put_fd(struct dso *dso);
> -void dso__data_close(struct dso *dso);
> +bool dso__data_get_fd(struct dso *dso, struct machine *machine, int *fd)
> + EXCLUSIVE_TRYLOCK_FUNCTION(true, _dso__data_open_lock);
> +void dso__data_put_fd(struct dso *dso) UNLOCK_FUNCTION(_dso__data_open_lock);
> +void dso__data_close(struct dso *dso) LOCKS_EXCLUDED(_dso__data_open_lock);
>
> int dso__data_file_size(struct dso *dso, struct machine *machine);
> off_t dso__data_size(struct dso *dso, struct machine *machine);
> diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
> index 5f4387e2423a..9fb2c1343c7f 100644
> --- a/tools/perf/util/unwind-libunwind-local.c
> +++ b/tools/perf/util/unwind-libunwind-local.c
> @@ -330,8 +330,7 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct unwind_info *ui,
> int ret, fd;
>
> if (dso__data(dso)->eh_frame_hdr_offset == 0) {
> - fd = dso__data_get_fd(dso, ui->machine);
> - if (fd < 0)
> + if (!dso__data_get_fd(dso, ui->machine, &fd))
> return -EINVAL;
>
> /* Check the .eh_frame section for unwinding info */
> @@ -372,8 +371,7 @@ static int read_unwind_spec_debug_frame(struct dso *dso,
> * has to be pointed by symsrc_filename
> */
> if (ofs == 0) {
> - fd = dso__data_get_fd(dso, machine);
> - if (fd >= 0) {
> + if (dso__data_get_fd(dso, machine, &fd) {
> ofs = elf_section_offset(fd, ".debug_frame");
> dso__data_put_fd(dso);
> }
> @@ -485,14 +483,16 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi,
> /* Check the .debug_frame section for unwinding info */
> if (ret < 0 &&
> !read_unwind_spec_debug_frame(dso, ui->machine, &segbase)) {
> - int fd = dso__data_get_fd(dso, ui->machine);
> - int is_exec = elf_is_exec(fd, dso__name(dso));
> + int fd;
> u64 start = map__start(map);
> - unw_word_t base = is_exec ? 0 : start;
> + unw_word_t base = start;
> const char *symfile;
>
> - if (fd >= 0)
> + if (dso__data_get_fd(dso, ui->machine, &fd)) {
> + if (elf_is_exec(fd, dso__name(dso)))
> + base = 0;
> dso__data_put_fd(dso);
> + }
>
> symfile = dso__symsrc_filename(dso) ?: dso__name(dso);
>
> --
> 2.49.0.rc1.451.g8f38331e32-goog
>
Powered by blists - more mailing lists