wshen0123 · May 10, 2016 21:58
diff --git a/ring.h b/ring.h
 #ifndef _RING_H_
 #define _RING_H_

 #include <rte_ring.h>

 static inline void __attribute__((always_inline))
 ring_sp_enqueue(struct rte_ring *r, void **obj_table, uint32_t n)
 {
    uint32_t prod_head, cons_tail;
    uint32_t prod_next, free_entries;
    unsigned i;
    uint32_t mask = r->prod.mask;
    
    while (1) {
        prod_head = r->prod.head;
        cons_tail = r->cons.tail;
        free_entries = mask + cons_tail - prod_head;

        if (unlikely(n > free_entries))
            continue;

        prod_next = prod_head + n;
        //rte_atomic32_cmpset(&r->prod.head, prod_head, prod_next);
        r->prod.head = prod_next;

        /* write entries in ring */
        ENQUEUE_PTRS();
        rte_compiler_barrier();

        r->prod.tail = prod_next;

        break;
    } /* end of enqueue */
 }

 static inline void __attribute__((always_inline))
 ring_sc_dequeue(struct rte_ring *r, void **obj_table, uint32_t n)
 {
    uint32_t cons_head, prod_tail;
    uint32_t cons_next, entries;
    unsigned i;
    uint32_t mask = r->prod.mask;

    while (1) {
        cons_head = r->cons.head;
        prod_tail = r->prod.tail;
        entries = prod_tail - cons_head;

        if (n > entries)
            continue;

        cons_next = cons_head + n;
        //rte_atomic32_cmpset(&r->cons.head, cons_head, cons_next);
        r->cons.head = cons_next;

        DEQUEUE_PTRS();
        rte_compiler_barrier();

        r->cons.tail = cons_next;

        break;
    } /* end of dequeue */
 }

 static inline void __attribute((always_inline))
 ring_mp_enqueue(struct rte_ring *r, void **obj_table, uint32_t n, uint64_t *enqueue_tries)
 {
    volatile uint32_t prod_head, cons_tail;
    uint32_t prod_next, free_entries;
    int success;
    unsigned i, rep = 0;
    uint32_t mask = r->prod.mask;

    do {
 retry:
        prod_head = r->prod.head;
        cons_tail = r->cons.tail;
        free_entries = (mask + cons_tail - prod_head);

        if (n > free_entries)
            goto retry;

        *enqueue_tries += 1;
        prod_next = prod_head + n;
        success = rte_atomic32_cmpset(&r->prod.head, prod_head, prod_next);
    } while (unlikely(success == 0));

    ENQUEUE_PTRS();
    rte_compiler_barrier();

    while (unlikely(r->prod.tail != prod_head)) {
        rte_pause();

        if (RTE_RING_PAUSE_REP_COUNT &&
            ++rep == RTE_RING_PAUSE_REP_COUNT) {
            rep = 0;
            sched_yield();
        }
    }
    r->prod.tail = prod_next;
 }

 static inline void __attribute((always_inline))
 ring_mc_dequeue(struct rte_ring *r, void **obj_table, uint32_t n, uint64_t *dequeue_tries)
 {
    volatile uint32_t cons_head, prod_tail;
    uint32_t cons_next, entries;
    int success;
    unsigned i, rep = 0;
    uint32_t mask = r->prod.mask;

    do {
 retry:
        cons_head = r->cons.head;
        prod_tail = r->prod.tail;
        entries = (prod_tail - cons_head);

        if (n > entries)
            goto retry;

        *dequeue_tries += 1;
        cons_next = cons_head + n;
        success = rte_atomic32_cmpset(&r->cons.head, cons_head, cons_next);
    } while (unlikely(success == 0));

    DEQUEUE_PTRS();
    rte_compiler_barrier();

    while (unlikely(r->cons.tail != cons_head)) {
        rte_pause();
        if (RTE_RING_PAUSE_REP_COUNT &&
            ++rep == RTE_RING_PAUSE_REP_COUNT) {
            rep = 0;
            sched_yield();
        }
    }
    r->cons.tail = cons_next;
 }
 #endif
diff --git a/test_ring.c b/test_ring.c
 int app_sp_thread(struct lcore_params *lp)
 {
    unsigned lcore;
    uint64_t j;
    void *dummy_obj[max_bulk_size];
    uint64_t num_iteration;

    printf("[Producer %d] Start\n", rte_lcore_id());

    num_iteration = lp->num_iteration;
    for (j = 0; j < lp->num_iteration; j++) {
        /* do some work with object */
        do_producer_workload_cycles(work_cycles * bulk_size);

        while (rte_ring_sp_enqueue_bulk(lp->ring, dummy_obj, lp->batch_size) != 0);
    }

    printf("[Producer %d] Finish\n", lcore);
    return 0;
 }


 int app_sc_thread(struct lcore_params *lp)
 {
    void *dummy_obj[max_bulk_size];
    uint64_t j, start_cycle, end_cycle, hz;
    double time;
    uint64_t num_iteration;

    num_iteration = lp->num_iteration;

    printf("[Consumer %d] Start\n", rte_lcore_id());

    hz = rte_get_tsc_hz();
    start_cycle = rte_get_tsc_cycles();
    for (j = 0; j < lp->num_iteration; j++) {
        while (rte_ring_sc_dequeue_bulk(lp->ring, dummy_obj, lp->batch_size) != 0);

        do_consumer_workload_cycles(work_cycles * bulk_size);
    }
    end_cycle = rte_get_tsc_cycles();
    time = (end_cycle - start_cycle) / (double)hz;

    printf("[Consumer %d] Finish\n", rte_lcore_id());
    printf(" - Count         : %'" PRId64 " (objs)\n", num_iteration);
    printf(" - Time          : %.3f (s)\n", time);
    printf(" - Throughput    : %'d (obj/s)\n", (int)(num_iteration* bulk_size / time));

    return 0;
 }

 void driver_sr_sp_sc()
 {
    int i, lcore;
    struct lcore_params lp;

    lp.ring = rings[0];
    lp.batch_size = bulk_size;
    lp.num_iteration = NUM_ITERATION;

    lcore = rte_get_master_lcore();

    lcore = rte_get_next_lcore(lcore, 1, 0);
    if (lcore < RTE_MAX_LCORE)
        rte_eal_remote_launch((lcore_function_t *)app_sp_thread, &lp, lcore);

    rte_delay_us(2);

    lcore = rte_get_next_lcore(lcore, 1, 0);
    if (lcore < RTE_MAX_LCORE)
        rte_eal_remote_launch((lcore_function_t *)app_sc_thread, &lp, lcore);
 }
	#ifndef _RING_H_
	#define _RING_H_

	#include <rte_ring.h>

	static inline void __attribute__((always_inline))
	ring_sp_enqueue(struct rte_ring r, void *obj_table, uint32_t n)
	{
	uint32_t prod_head, cons_tail;
	uint32_t prod_next, free_entries;
	unsigned i;
	uint32_t mask = r->prod.mask;

	while (1) {
	prod_head = r->prod.head;
	cons_tail = r->cons.tail;
	free_entries = mask + cons_tail - prod_head;

	if (unlikely(n > free_entries))
	continue;

	prod_next = prod_head + n;
	//rte_atomic32_cmpset(&r->prod.head, prod_head, prod_next);
	r->prod.head = prod_next;

	/* write entries in ring */
	ENQUEUE_PTRS();
	rte_compiler_barrier();

	r->prod.tail = prod_next;

	break;
	} /* end of enqueue */
	}

	static inline void __attribute__((always_inline))
	ring_sc_dequeue(struct rte_ring r, void *obj_table, uint32_t n)
	{
	uint32_t cons_head, prod_tail;
	uint32_t cons_next, entries;
	unsigned i;
	uint32_t mask = r->prod.mask;

	while (1) {
	cons_head = r->cons.head;
	prod_tail = r->prod.tail;
	entries = prod_tail - cons_head;

	if (n > entries)
	continue;

	cons_next = cons_head + n;
	//rte_atomic32_cmpset(&r->cons.head, cons_head, cons_next);
	r->cons.head = cons_next;

	DEQUEUE_PTRS();
	rte_compiler_barrier();

	r->cons.tail = cons_next;

	break;
	} /* end of dequeue */
	}

	static inline void __attribute((always_inline))
	ring_mp_enqueue(struct rte_ring r, void obj_table, uint32_t n, uint64_t enqueue_tries)
	{
	volatile uint32_t prod_head, cons_tail;
	uint32_t prod_next, free_entries;
	int success;
	unsigned i, rep = 0;
	uint32_t mask = r->prod.mask;

	do {
	retry:
	prod_head = r->prod.head;
	cons_tail = r->cons.tail;
	free_entries = (mask + cons_tail - prod_head);

	if (n > free_entries)
	goto retry;

	*enqueue_tries += 1;
	prod_next = prod_head + n;
	success = rte_atomic32_cmpset(&r->prod.head, prod_head, prod_next);
	} while (unlikely(success == 0));

	ENQUEUE_PTRS();
	rte_compiler_barrier();

	while (unlikely(r->prod.tail != prod_head)) {
	rte_pause();

	if (RTE_RING_PAUSE_REP_COUNT &&
	++rep == RTE_RING_PAUSE_REP_COUNT) {
	rep = 0;
	sched_yield();
	}
	}
	r->prod.tail = prod_next;
	}

	static inline void __attribute((always_inline))
	ring_mc_dequeue(struct rte_ring r, void obj_table, uint32_t n, uint64_t dequeue_tries)
	{
	volatile uint32_t cons_head, prod_tail;
	uint32_t cons_next, entries;
	int success;
	unsigned i, rep = 0;
	uint32_t mask = r->prod.mask;

	do {
	retry:
	cons_head = r->cons.head;
	prod_tail = r->prod.tail;
	entries = (prod_tail - cons_head);

	if (n > entries)
	goto retry;

	*dequeue_tries += 1;
	cons_next = cons_head + n;
	success = rte_atomic32_cmpset(&r->cons.head, cons_head, cons_next);
	} while (unlikely(success == 0));

	DEQUEUE_PTRS();
	rte_compiler_barrier();

	while (unlikely(r->cons.tail != cons_head)) {
	rte_pause();
	if (RTE_RING_PAUSE_REP_COUNT &&
	++rep == RTE_RING_PAUSE_REP_COUNT) {
	rep = 0;
	sched_yield();
	}
	}
	r->cons.tail = cons_next;
	}
	#endif
	int app_sp_thread(struct lcore_params *lp)
	{
	unsigned lcore;
	uint64_t j;
	void *dummy_obj[max_bulk_size];
	uint64_t num_iteration;

	printf("[Producer %d] Start\n", rte_lcore_id());

	num_iteration = lp->num_iteration;
	for (j = 0; j < lp->num_iteration; j++) {
	/* do some work with object */
	do_producer_workload_cycles(work_cycles * bulk_size);

	while (rte_ring_sp_enqueue_bulk(lp->ring, dummy_obj, lp->batch_size) != 0);
	}

	printf("[Producer %d] Finish\n", lcore);
	return 0;
	}


	int app_sc_thread(struct lcore_params *lp)
	{
	void *dummy_obj[max_bulk_size];
	uint64_t j, start_cycle, end_cycle, hz;
	double time;
	uint64_t num_iteration;

	num_iteration = lp->num_iteration;

	printf("[Consumer %d] Start\n", rte_lcore_id());

	hz = rte_get_tsc_hz();
	start_cycle = rte_get_tsc_cycles();
	for (j = 0; j < lp->num_iteration; j++) {
	while (rte_ring_sc_dequeue_bulk(lp->ring, dummy_obj, lp->batch_size) != 0);

	do_consumer_workload_cycles(work_cycles * bulk_size);
	}
	end_cycle = rte_get_tsc_cycles();
	time = (end_cycle - start_cycle) / (double)hz;

	printf("[Consumer %d] Finish\n", rte_lcore_id());
	printf(" - Count : %'" PRId64 " (objs)\n", num_iteration);
	printf(" - Time : %.3f (s)\n", time);
	printf(" - Throughput : %'d (obj/s)\n", (int)(num_iteration* bulk_size / time));

	return 0;
	}

	void driver_sr_sp_sc()
	{
	int i, lcore;
	struct lcore_params lp;

	lp.ring = rings[0];
	lp.batch_size = bulk_size;
	lp.num_iteration = NUM_ITERATION;

	lcore = rte_get_master_lcore();

	lcore = rte_get_next_lcore(lcore, 1, 0);
	if (lcore < RTE_MAX_LCORE)
	rte_eal_remote_launch((lcore_function_t *)app_sp_thread, &lp, lcore);

	rte_delay_us(2);

	lcore = rte_get_next_lcore(lcore, 1, 0);
	if (lcore < RTE_MAX_LCORE)
	rte_eal_remote_launch((lcore_function_t *)app_sc_thread, &lp, lcore);
	}