tegrakernel/kernel/nvidia/drivers/misc/eventlib/eventlib_flt.h

/*
 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#ifndef EVENTLIB_FLT_H
#define EVENTLIB_FLT_H

#include "eventlib.h"

/* Readers' connects/updates/disconnects and writer's checks for updates, are
 * all fully asynchronous.
 *
 * Slot allocation by readers is based on bits of 'busy' word in r2w memory.
 * Readers acquire bits in this word using atomic test-and-set.
 * Writer is not involved in this.
 * - reader:
 *   - increment seqlock,
 *   - write new slot content,
 *   - increment seqlock;
 * - writer:
 *   - read seqlock
 *   - if seqlock is odd, retry later
 *   - read content into temporary buffer
 *   - read seqlock
 *   - if seqlock changed, retry later
 *   - accept temporary byffer as new content
 *
 * Notification of writer on updated slots, without need for unbounded loop
 * on writer side, is based on two words, 'notify' and 'ack', in r2w memory.
 * - reader:
 *   - update slot,
 *   - fetch 'ack' word,
 *   - atomically replace bit corresponding to this reader in 'notify' word
 *     with inverted corresponding bit of just fetched 'ack' word
 * - writer:
 *   - fetch 'notify' word,
 *   - calculate 'dirty' as XOR of fetched 'notify' and current 'ack',
 *   - bits set in 'dirty' correspond to updated slots
 * Correctness of this (i.e. if reader changes slot, writer always notices that)
 * follows from: (a) every reader only changes bit corresponding to this reader,
 * nobody else touches that bit, and (b) moment when writer writes to 'ack'
 * acts as a barrier:
 * - if a reader has already updated a slot, this update will be fetched by
 *   writer immediately after this write to 'ack',
 * - any newer update (including one currently happenning) will be followed by
 *   setting bit in 'notify' to value different from bit in just written 'ack',
 *   and thus will be noticed at the next check.
 *
 * On disconnect, reader sets it's mask to all zeroes. Writer fetches this
 * update following normal procedure, and thus cleans after disconnected
 * reader. As an optimization, writer notices all-zero mask copy, and marks
 * it as invalid, so it won't be included into future combined mask
 * calculations.
 */

#define MAX_MASK_SIZE 20
#define NUM_SLOTS 4

/* w2r shared memory subblock */
struct eventlib_flt_w2r {
	uint32_t compat;
	uint16_t num_bits[EVENTLIB_FILTER_DOMAIN_MAX];
} __attribute__((__packed__));

/* slot representation in r2w shared memory block */
struct eventlib_flt_slot {
	uint32_t seqlock;
	uint8_t mask[0];
	/* bit array for filters, padded at end to 32bit alignmemt */
} __attribute__((__packed__));

/* r2w shared memory block */
struct eventlib_flt_r2w {
	uint32_t notify;
	uint32_t ack;
	uint32_t busy;
	struct eventlib_flt_slot slots[0];
	/* slots here:
	 * - count of all slots is defined by NUM_SLOTS
	 * - size of each slot is total of 'num_bits' value from w2r subblock
	 *   rounded up to next multiple of four (see EVENTLIB_FLT_MASK_SIZE)
	 */
} __attribute__((__packed__));

/* helper structure used to represent domain's geometry
 * within all-domains mask
 */
struct eventlib_flt_domain_geo {
	/* total bit flags in domain */
	uint16_t bits;
	/* byte offset of domain in all-domains mask */
	uint16_t offset;
};

/* filtering context object */
struct eventlib_flt_ctx {
	/* Was filtering inited? */
	bool inited;

	/* Poiters to shared memory blocks */
	shmptr struct eventlib_flt_w2r *w2r;
	shmptr struct eventlib_flt_r2w *r2w;

	/* Parameters calculated at init time */
	struct eventlib_flt_domain_geo geo[EVENTLIB_FILTER_DOMAIN_MAX];
	uint32_t slot_size;

	/* The rest of structure is very different between writer and reader,
	 * thus using union here
	 */
	union {
		/* writer's version */
		struct {
			/* local copy of 'ack' word */
			uint32_t ack;
			/* current combined mask */
			uint8_t combined_mask[MAX_MASK_SIZE];
			/* local copies of masks in slots (row-major) */
			uint8_t mask_copy[32][MAX_MASK_SIZE];
			/* validity mask of local copies */
			uint32_t mask_copy_valid;
			/* extra space for temporary mask copy */
			uint8_t spare_mask[MAX_MASK_SIZE];
		} w;

		/* reader's version */
		struct {
			/* local copy of current mask */
			uint8_t mask[MAX_MASK_SIZE];
			/* index of allocated slot */
			uint8_t slot_index;
		} r;
	};
};

/* Below synchronization wrappers are based on GCC atomic builtins.
 * These functions are only used in the reader path.
 */

static inline bool sync_test_and_set_bit(unsigned int n, uint32_t *p)
{
	return !!(__sync_fetch_and_or(p, (1u << n)) & (1u << n));
}

static inline void sync_set_bit(unsigned int n, uint32_t *p)
{
	__sync_fetch_and_or(p, (1u << n));
}

static inline void sync_clear_bit(unsigned int n, uint32_t *p)
{
	__sync_fetch_and_and(p, ~(1u << n));
}

/* Below functions are implemented by the filter subsystem interface.
 * Linkage is optional to support writer environments w/out filtering.
 */

#pragma weak flt_init
extern int flt_init(struct eventlib_ctx *ctx);

#pragma weak flt_fini
extern void flt_fini(struct eventlib_ctx *ctx);

#endif
initial commit tegra kernel 32.6.1 2022-02-16 09:13:02 -06:00			`/*`
			`* Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.`
			`*`
			`* Permission is hereby granted, free of charge, to any person obtaining a`
			`* copy of this software and associated documentation files (the "Software"),`
			`* to deal in the Software without restriction, including without limitation`
			`* the rights to use, copy, modify, merge, publish, distribute, sublicense,`
			`* and/or sell copies of the Software, and to permit persons to whom the`
			`* Software is furnished to do so, subject to the following conditions:`
			`*`
			`* The above copyright notice and this permission notice shall be included in`
			`* all copies or substantial portions of the Software.`
			`*`
			`* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR`
			`* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,`
			`* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL`
			`* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER`
			`* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING`
			`* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER`
			`* DEALINGS IN THE SOFTWARE.`
			`*/`

			`#ifndef EVENTLIB_FLT_H`
			`#define EVENTLIB_FLT_H`

			`#include "eventlib.h"`

			`/* Readers' connects/updates/disconnects and writer's checks for updates, are`
			`* all fully asynchronous.`
			`*`
			`* Slot allocation by readers is based on bits of 'busy' word in r2w memory.`
			`* Readers acquire bits in this word using atomic test-and-set.`
			`* Writer is not involved in this.`
			`* - reader:`
			`* - increment seqlock,`
			`* - write new slot content,`
			`* - increment seqlock;`
			`* - writer:`
			`* - read seqlock`
			`* - if seqlock is odd, retry later`
			`* - read content into temporary buffer`
			`* - read seqlock`
			`* - if seqlock changed, retry later`
			`* - accept temporary byffer as new content`
			`*`
			`* Notification of writer on updated slots, without need for unbounded loop`
			`* on writer side, is based on two words, 'notify' and 'ack', in r2w memory.`
			`* - reader:`
			`* - update slot,`
			`* - fetch 'ack' word,`
			`* - atomically replace bit corresponding to this reader in 'notify' word`
			`* with inverted corresponding bit of just fetched 'ack' word`
			`* - writer:`
			`* - fetch 'notify' word,`
			`* - calculate 'dirty' as XOR of fetched 'notify' and current 'ack',`
			`* - bits set in 'dirty' correspond to updated slots`
			`* Correctness of this (i.e. if reader changes slot, writer always notices that)`
			`* follows from: (a) every reader only changes bit corresponding to this reader,`
			`* nobody else touches that bit, and (b) moment when writer writes to 'ack'`
			`* acts as a barrier:`
			`* - if a reader has already updated a slot, this update will be fetched by`
			`* writer immediately after this write to 'ack',`
			`* - any newer update (including one currently happenning) will be followed by`
			`* setting bit in 'notify' to value different from bit in just written 'ack',`
			`* and thus will be noticed at the next check.`
			`*`
			`* On disconnect, reader sets it's mask to all zeroes. Writer fetches this`
			`* update following normal procedure, and thus cleans after disconnected`
			`* reader. As an optimization, writer notices all-zero mask copy, and marks`
			`* it as invalid, so it won't be included into future combined mask`
			`* calculations.`
			`*/`

			`#define MAX_MASK_SIZE 20`
			`#define NUM_SLOTS 4`

			`/* w2r shared memory subblock */`
			`struct eventlib_flt_w2r {`
			`uint32_t compat;`
			`uint16_t num_bits[EVENTLIB_FILTER_DOMAIN_MAX];`
			`} __attribute__((__packed__));`

			`/* slot representation in r2w shared memory block */`
			`struct eventlib_flt_slot {`
			`uint32_t seqlock;`
			`uint8_t mask[0];`
			`/* bit array for filters, padded at end to 32bit alignmemt */`
			`} __attribute__((__packed__));`

			`/* r2w shared memory block */`
			`struct eventlib_flt_r2w {`
			`uint32_t notify;`
			`uint32_t ack;`
			`uint32_t busy;`
			`struct eventlib_flt_slot slots[0];`
			`/* slots here:`
			`* - count of all slots is defined by NUM_SLOTS`
			`* - size of each slot is total of 'num_bits' value from w2r subblock`
			`* rounded up to next multiple of four (see EVENTLIB_FLT_MASK_SIZE)`
			`*/`
			`} __attribute__((__packed__));`

			`/* helper structure used to represent domain's geometry`
			`* within all-domains mask`
			`*/`
			`struct eventlib_flt_domain_geo {`
			`/* total bit flags in domain */`
			`uint16_t bits;`
			`/* byte offset of domain in all-domains mask */`
			`uint16_t offset;`
			`};`

			`/* filtering context object */`
			`struct eventlib_flt_ctx {`
			`/* Was filtering inited? */`
			`bool inited;`

			`/* Poiters to shared memory blocks */`
			`shmptr struct eventlib_flt_w2r *w2r;`
			`shmptr struct eventlib_flt_r2w *r2w;`

			`/* Parameters calculated at init time */`
			`struct eventlib_flt_domain_geo geo[EVENTLIB_FILTER_DOMAIN_MAX];`
			`uint32_t slot_size;`

			`/* The rest of structure is very different between writer and reader,`
			`* thus using union here`
			`*/`
			`union {`
			`/* writer's version */`
			`struct {`
			`/* local copy of 'ack' word */`
			`uint32_t ack;`
			`/* current combined mask */`
			`uint8_t combined_mask[MAX_MASK_SIZE];`
			`/* local copies of masks in slots (row-major) */`
			`uint8_t mask_copy[32][MAX_MASK_SIZE];`
			`/* validity mask of local copies */`
			`uint32_t mask_copy_valid;`
			`/* extra space for temporary mask copy */`
			`uint8_t spare_mask[MAX_MASK_SIZE];`
			`} w;`

			`/* reader's version */`
			`struct {`
			`/* local copy of current mask */`
			`uint8_t mask[MAX_MASK_SIZE];`
			`/* index of allocated slot */`
			`uint8_t slot_index;`
			`} r;`
			`};`
			`};`

			`/* Below synchronization wrappers are based on GCC atomic builtins.`
			`* These functions are only used in the reader path.`
			`*/`

			`static inline bool sync_test_and_set_bit(unsigned int n, uint32_t *p)`
			`{`
			`return !!(__sync_fetch_and_or(p, (1u << n)) & (1u << n));`
			`}`

			`static inline void sync_set_bit(unsigned int n, uint32_t *p)`
			`{`
			`__sync_fetch_and_or(p, (1u << n));`
			`}`

			`static inline void sync_clear_bit(unsigned int n, uint32_t *p)`
			`{`
			`__sync_fetch_and_and(p, ~(1u << n));`
			`}`

			`/* Below functions are implemented by the filter subsystem interface.`
			`* Linkage is optional to support writer environments w/out filtering.`
			`*/`

			`#pragma weak flt_init`
			`extern int flt_init(struct eventlib_ctx *ctx);`

			`#pragma weak flt_fini`
			`extern void flt_fini(struct eventlib_ctx *ctx);`

			`#endif`