tegrakernel/kernel/nvidia/drivers/misc/eventlib/eventlib_flt.h

184 lines
6.0 KiB
C

/*
* Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef EVENTLIB_FLT_H
#define EVENTLIB_FLT_H
#include "eventlib.h"
/* Readers' connects/updates/disconnects and writer's checks for updates, are
* all fully asynchronous.
*
* Slot allocation by readers is based on bits of 'busy' word in r2w memory.
* Readers acquire bits in this word using atomic test-and-set.
* Writer is not involved in this.
* - reader:
* - increment seqlock,
* - write new slot content,
* - increment seqlock;
* - writer:
* - read seqlock
* - if seqlock is odd, retry later
* - read content into temporary buffer
* - read seqlock
* - if seqlock changed, retry later
* - accept temporary byffer as new content
*
* Notification of writer on updated slots, without need for unbounded loop
* on writer side, is based on two words, 'notify' and 'ack', in r2w memory.
* - reader:
* - update slot,
* - fetch 'ack' word,
* - atomically replace bit corresponding to this reader in 'notify' word
* with inverted corresponding bit of just fetched 'ack' word
* - writer:
* - fetch 'notify' word,
* - calculate 'dirty' as XOR of fetched 'notify' and current 'ack',
* - bits set in 'dirty' correspond to updated slots
* Correctness of this (i.e. if reader changes slot, writer always notices that)
* follows from: (a) every reader only changes bit corresponding to this reader,
* nobody else touches that bit, and (b) moment when writer writes to 'ack'
* acts as a barrier:
* - if a reader has already updated a slot, this update will be fetched by
* writer immediately after this write to 'ack',
* - any newer update (including one currently happenning) will be followed by
* setting bit in 'notify' to value different from bit in just written 'ack',
* and thus will be noticed at the next check.
*
* On disconnect, reader sets it's mask to all zeroes. Writer fetches this
* update following normal procedure, and thus cleans after disconnected
* reader. As an optimization, writer notices all-zero mask copy, and marks
* it as invalid, so it won't be included into future combined mask
* calculations.
*/
#define MAX_MASK_SIZE 20
#define NUM_SLOTS 4
/* w2r shared memory subblock */
struct eventlib_flt_w2r {
uint32_t compat;
uint16_t num_bits[EVENTLIB_FILTER_DOMAIN_MAX];
} __attribute__((__packed__));
/* slot representation in r2w shared memory block */
struct eventlib_flt_slot {
uint32_t seqlock;
uint8_t mask[0];
/* bit array for filters, padded at end to 32bit alignmemt */
} __attribute__((__packed__));
/* r2w shared memory block */
struct eventlib_flt_r2w {
uint32_t notify;
uint32_t ack;
uint32_t busy;
struct eventlib_flt_slot slots[0];
/* slots here:
* - count of all slots is defined by NUM_SLOTS
* - size of each slot is total of 'num_bits' value from w2r subblock
* rounded up to next multiple of four (see EVENTLIB_FLT_MASK_SIZE)
*/
} __attribute__((__packed__));
/* helper structure used to represent domain's geometry
* within all-domains mask
*/
struct eventlib_flt_domain_geo {
/* total bit flags in domain */
uint16_t bits;
/* byte offset of domain in all-domains mask */
uint16_t offset;
};
/* filtering context object */
struct eventlib_flt_ctx {
/* Was filtering inited? */
bool inited;
/* Poiters to shared memory blocks */
shmptr struct eventlib_flt_w2r *w2r;
shmptr struct eventlib_flt_r2w *r2w;
/* Parameters calculated at init time */
struct eventlib_flt_domain_geo geo[EVENTLIB_FILTER_DOMAIN_MAX];
uint32_t slot_size;
/* The rest of structure is very different between writer and reader,
* thus using union here
*/
union {
/* writer's version */
struct {
/* local copy of 'ack' word */
uint32_t ack;
/* current combined mask */
uint8_t combined_mask[MAX_MASK_SIZE];
/* local copies of masks in slots (row-major) */
uint8_t mask_copy[32][MAX_MASK_SIZE];
/* validity mask of local copies */
uint32_t mask_copy_valid;
/* extra space for temporary mask copy */
uint8_t spare_mask[MAX_MASK_SIZE];
} w;
/* reader's version */
struct {
/* local copy of current mask */
uint8_t mask[MAX_MASK_SIZE];
/* index of allocated slot */
uint8_t slot_index;
} r;
};
};
/* Below synchronization wrappers are based on GCC atomic builtins.
* These functions are only used in the reader path.
*/
static inline bool sync_test_and_set_bit(unsigned int n, uint32_t *p)
{
return !!(__sync_fetch_and_or(p, (1u << n)) & (1u << n));
}
static inline void sync_set_bit(unsigned int n, uint32_t *p)
{
__sync_fetch_and_or(p, (1u << n));
}
static inline void sync_clear_bit(unsigned int n, uint32_t *p)
{
__sync_fetch_and_and(p, ~(1u << n));
}
/* Below functions are implemented by the filter subsystem interface.
* Linkage is optional to support writer environments w/out filtering.
*/
#pragma weak flt_init
extern int flt_init(struct eventlib_ctx *ctx);
#pragma weak flt_fini
extern void flt_fini(struct eventlib_ctx *ctx);
#endif