TransWikia.com

Tail implementation using a dynamic queue library

Code Review Asked on December 15, 2021

As per K & R exercise 5-13

Write the program tail, which prints the last n lines of its input. By default, n is 10, say, but it can be changed by an optional argument, so that

tail -n

prints the last n lines. The program should behave rationally no matter how unreasonable the input or the value of n. Write the program so it makes the best use of available storage; lines should be stored as in the sorting program of Section 5.6, not in a two-dimensional array of fixed size.

I’ve written my own version of tail in C. It uses a newer version of a library for a dynamic queue that I also wrote and submitted here before (I’ve made a lot of changes to it since then). I’d like to get some feedback on how my code performs, both memory wise and speed wise, and how I might improve this in the future. I’ve compared its performance to the GNU implementation of tail and found that for small files my program uses less memory but for larger files it uses a fair bit more (although I did find that GNU tail leaks memory – 96 bytes according to Valgrind) and I was hoping I could get some insight as to how it does this better.

tail.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dqueue.h>

#define MAX_LEN 256
#define MAX_LINES 32
#define DEFAULT_TAIL 10

int getlinep(char *s, int lim);

int main(int argc, char *argv[])
{
    char *line;
    char *temp;
    int tail_len = DEFAULT_TAIL;
    queue_t * queue = NULL;
    int elements;
    int len;

    if(!(queue = queue_init(sizeof(char *)))) {
        fprintf(stderr, "Error %d: Could not initialise queue!n", MEM_ERROR);
        return MEM_ERROR;
    }

    if(argc >= 2) {
        if(atoi(*(++argv))) {
            tail_len = -atoi(*argv);
            if(tail_len <= 0 || tail_len > MAX_LEN)
                tail_len = DEFAULT_TAIL;
        }
    }

    for(elements = 0; elements < tail_len; elements++) {
        if(!(line = malloc(MAX_LEN))) {
            fprintf(stderr, "Error: Memory allocation failure!n");
            return MEM_ERROR;
        }

        if(!getlinep(line, MAX_LEN)) {
            free(line);

            if(elements == 0) {
                queue_destroy(&queue);
                return EXIT_SUCCESS;
            }

            break;
        }

        queue_push(&line, queue);
    }

    if(elements == tail_len) {
        if(!(temp = malloc(MAX_LEN))) {
            fprintf(stderr, "Error: Memory allocation failure!n");
            return MEM_ERROR;
        }
        for(;;) {
            if(!(len = getlinep(temp, MAX_LEN))) {
                free(temp);
                break;
            }

            queue_pop(&line, queue);
            memcpy(line, temp, len + 1);
            queue_push(&line, queue);
        }
    }

    while(elements-- > 0) {
        queue_pop(&line, queue);
        printf("%s", line);
        free(line);
    }

    queue_destroy(&queue);

    return EXIT_SUCCESS;
}

int getlinep(char *s, int lim)
{
    int i;
    
    for(i = 0; i < lim - 1 && (*s = getchar()) != EOF && *s != 'n'; i++, s++)
        ;

    if(*s == 'n') {
        s++;
        i++;
    }
    
    *s = '';
    
    return i;
}

dqueue.h

#ifndef DQUEUE_H
#define DQUEUE_H

#include <stdlib.h>                                                     /* Needed for size_t */

#define QUEUE_OK 0
#define MEM_ERROR -1                                                    /* Memory allocation error */
#define SIZE_ERROR -2                                                   /* Queue dimension error */
#define INDEX_ERROR -3                                                  /* No data at given index */

#define BLOCK_SIZE 1024

typedef struct queue_element_t
{
    void * data;                                                        /* Contains the data stored at this node */
    void * next;                                                        /* Contains the pointer to the next element, or NULL if it's the tail node */
} queue_element_t;

typedef struct
{
    queue_element_t *   head;                                           /* Pointer to the head of the queue */
    queue_element_t *   tail;                                           /* Pointer to the tail of the queue */
    size_t              size;                                           /* The number of elements in the queue */
    size_t              element_width;                                  /* The size of each element in the queue */
    size_t              tail_pos;                                       /* The byte offset of the data being pushed into the queue (i.e. in the tail block) */
    size_t              head_pos;                                       /* The byte offset of the data being popped out of the queue (i.e. in the head block) */
    int                 status;
} queue_t;

queue_t *   queue_init(size_t element_size);                            /* Initialise the queue data structure */
void        queue_pop(void * const element, queue_t * queue);           /* Pop an element from the front of the queue, deals with cleanup when the head node is empty */
int         queue_push(const void * const element, queue_t * queue);    /* Push an element to the back of the queue, creates a new block when tail node is full */
int         queue_debug(const queue_t * const queue);                   /* Print information about the queue, returns the queue status if a valid queue pointer is given  */
void        queue_destroy(queue_t * queue);                         /* Destroy the queue data structure and any associated nodes */

#endif

dqueue.c

/* 
 * Filename:    dqueue.c
 * Author:      Jess Turner
 * Date:        17/02/18
 * Licence:     GNU GPL V3
 *
 * Library for a lightweight, generic, and dynamically allocated queue
 *
 * Return/exit codes:
 *  QUEUE_OK        - No error
 *  SIZE_ERROR      - Queue size error (invalid block size or number of elements)
 *  MEM_ERROR       - Memory allocation error
 *  INDEX_ERROR     - Couldn't pop data from the queue
 *
 * All functions returning pointers will return NULL on memory allocation faliure, else they will specify an error in queue->status for the user to handle
 *
 * Todo:
 *  - Add secure versions of queue_destroy() and queue_pop() to overwrite memory blocks that are no longer in use
 * 
 */

#include <dqueue.h>
#include <stdio.h>
#include <string.h>

queue_t * queue_init(size_t element_width)
{
    queue_t * queue;

    if(!(queue = malloc(sizeof(queue_t))))
        return NULL;

    if(BLOCK_SIZE % element_width != 0 || (queue->element_width = element_width) <= 0) {
        queue->status = SIZE_ERROR;
        return queue;
    }

    queue->tail_pos = 0;
    queue->head_pos = 0;
    queue->tail     = NULL;
    queue->head     = NULL;
    queue->size     = 0;
    queue->status   = QUEUE_OK;

    return queue;
}

void queue_destroy(queue_t * queue)
{
    queue_element_t * temp;

    if(queue == NULL)
        return;

    while(queue->head) {
        free(queue->head->data);
        temp = queue->head->next;
        free(queue->head);
        queue->head = temp;
    }
    
    queue->size             = 0;
    queue->status           = 0;
    queue->element_width    = 0;
    queue->tail_pos         = 0;
    queue->head_pos         = 0;
    queue->tail             = NULL;

    free(queue);
}

int queue_push(const void * const element, queue_t * queue)
{
    queue_element_t * new_element;

    if(queue->tail_pos == 0) {
        if(!(new_element = malloc(sizeof(queue_element_t)))) {
            queue->status = MEM_ERROR;
            return queue->status;
        }

        if(!(new_element->data = malloc(BLOCK_SIZE))) {
            free(new_element);
            queue->status = MEM_ERROR;
            return queue->status;
        }

        if(queue->head == NULL)
            queue->head = new_element;
        else
            queue->tail->next = new_element;

        queue->tail = new_element;
        queue->tail->next = NULL;
        queue->size++;
    }

    memcpy(queue->tail->data + queue->tail_pos, element, queue->element_width); 

    queue->tail_pos += queue->element_width;

    if(queue->tail_pos >= BLOCK_SIZE)
        queue->tail_pos = 0;

    return queue->status;
}

void queue_pop(void * const element, queue_t * queue)
{
    queue_element_t * temp;

    if(queue->head == NULL || ((queue->head == queue->tail) && (queue->head_pos == queue->tail_pos))) {
        if(queue->tail_pos == 0) { /* Catch an error related to reseting the tail position and incrementing a block after a block has been filled */
            queue->tail_pos = BLOCK_SIZE;
        } else {
            queue->status = INDEX_ERROR;
            return;
        }
    }

    memcpy(element, queue->head->data + queue->head_pos, queue->element_width);

    queue->head_pos += queue->element_width;

    if(queue->head_pos >= BLOCK_SIZE) {
        free(queue->head->data);
        temp = queue->head;
        queue->head = queue->head->next;
        free(temp);

        queue->head_pos = 0;
        queue->size--;
    }
}

int queue_debug(const queue_t * const queue)
{
    if(queue == NULL) {
        printf("Error: Invalid queue pointer!n");
        return MEM_ERROR;
    }

    if(queue->status == QUEUE_OK) {
        printf("Queue is %d blocks long with an element width of %d bytes with each block containing %d elementsnQueue head is at %p and the current element is %pn", (int)queue->size, (int)queue->element_width, BLOCK_SIZE / (int)queue->element_width, (void *)queue->head, (void *)queue->tail);
    } else if(queue->status == MEM_ERROR) {
        printf("Memory error in queue!n");
    } else if(queue->status == SIZE_ERROR) {
        printf("Size error in queue!n");
    } else if(queue->status == INDEX_ERROR) {
        printf("Index error in queue!n");
    }

    return queue->status;
}

2 Answers

The most misleading thing in your code was this line in queue_element_t typedef:

void * data;      

You would think data is a pointer to data, but rather it is a pointer to an array of pointer to data. So it should really be:

void * * buffer;      

Or even better let's make "pointer to data" a new type.

typedef void* Data_Ptr;

typedef struct 
{
    Data_Ptr * buffer; 
    void * next;
} queue_element_t;

There are several things we need to change:

  • In queue_push we can use pointer assignment instead of memcpy. Replace:

    memcpy (queue->tail->data + queue->tail_pos, element, queue->element_width);
    queue->tail_pos += queue->element_width;    
    

    with

    queue->tail->buffer [queue->tail_pos] = element; queue->tail_pos ++;

You can even combine those two lines if you want.

  • You should change the signiture of queue_pop to Data_Ptr queue_pop (queue_t * queue) and return element or NULL; Change where you call the function to line = queue_pop (queue);

  • Same with queue_pop

    element = queue->head->buffer [queue->head_pos];
    queue->head_pos ++;
    
  • queue_push should become int queue_push (Data_Ptr element, queue_t * queue) element don't need to be const because we are passing in by value. Change where you call the function to queue_push (line, queue);

Once we make this change some optimization opportunities become clear.

  • We don't need to know element_width anymore because it is sizeof(Data_Ptr). Delete anything to do with element_width.

  • Not sure about the exact requirement of the exercise, but since we know the maximum size of the queue, we don't even need to use a linked list. we can just dynamically allocate an array of Data_Ptr. Refer to this for implementation of array-based queue.

  • You don't need to have a separate malloc for temp line for when you have n_element in the array already, just allocate n_element+1 lines in the previous loop, then just pop 1 line and use that as temp.

Other error:

  • You did not check if queue * passed in queue_push and queue_pop is not NULL, result in UB.

  • void queue_pop(void * const element, queue_t * queue);

    should really be

    void queue_pop(void ** element, queue_t * queue);

    in your implementation.

  • queue_destroy(&queue);

    signature of queue_destory is

    void queue_destroy(queue_t * queue);

    but you passed queue** it should really be

    queue_destroy(&queue);

    there are 2 occurance of this.

Answered by wooooooooosh on December 15, 2021

It appears that you are allocating BLOCK_SIZE bytes for each item in the queue, whereas they can be at most MAX_LEN bytes long, am I reading this correctly? And MAX_LEN is 256, where BLOCK_SIZE is 1024, so you are only using at most 25% of the allocated memory.

Another thing is that malloc() has a minimum block size, which is 32 bytes for dlmalloc. Assuming you are on a 64 bit platform, your queue_element_t is 2*sizeof(void *) = 16 bytes. Therefore you are also loosing 16 bytes per element. You can avoid this by allocating the queue_element_t along with its buffer in one go:

#define BLOCK_SIZE (256 - sizeof(void *))
typedef struct {
  void *next;
  char data[BLOCK_SIZE];
} queue_element_t;

Then queue_element_t will be nicely allocated to a 256 byte bloc and no memory will be wasted in overhead.

Answered by Antoine Albertelli on December 15, 2021

Add your own answers!

Ask a Question

Get help from others!

© 2024 TransWikia.com. All rights reserved. Sites we Love: PCI Database, UKBizDB, Menu Kuliner, Sharing RPP