[Bug 37294] Multithreaded code does not utilize processors on a Centrino Duo

Alex gsasha at cs.technion.ac.il
Thu Mar 30 08:41:37 UTC 2006


Public bug report changed:
https://launchpad.net/malone/bugs/37294

Comment:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <pthread.h>
#include <sys/time.h>
#include <sys/resource.h>

struct Params {
  int left;
  int right;
};

struct ThreadParams {
  int* numbers;
  int size;
};

void q_sort(int numbers[], int size)
{
  struct Params stack[64];
  int stack_pos = 0;
  stack[0].left = 0;
  stack[0].right = size-1;
  
  while (stack_pos >= 0)
  {
    int left = stack[stack_pos].left;
    int right = stack[stack_pos].right;
    int pivot = numbers[left];
    
    while (left < right)
    {
      while ((numbers[right] >= pivot) && (left < right))
        right--;
      if (left != right)
      {
        numbers[left] = numbers[right];
        left++;
      }
      while ((numbers[left] <= pivot) && (left < right))
        left++;
      if (left != right)
      {
        numbers[right] = numbers[left];
        right--;
      }
    }
    numbers[left] = pivot;
    
    pivot = left;
    left = stack[stack_pos].left;
    right = stack[stack_pos].right;
    
    if (pivot-left > right-pivot)
    {
      // size of the left subarray is larger. Push it first.
      stack_pos--;
      if (left < pivot) {
        stack_pos++;
        stack[stack_pos].left = left;
        stack[stack_pos].right = pivot-1;
      }
      if (right > pivot) {
        stack_pos++;
        stack[stack_pos].left = pivot+1;
        stack[stack_pos].right = right;
      }
    }
    else
    {
      // size of the right subarray is larger. Push it first
      stack_pos--;
      if (right > pivot) {
        stack_pos++;
        stack[stack_pos].left = pivot+1;
        stack[stack_pos].right = right;
      }
      if (left < pivot) {
        stack_pos++;
        stack[stack_pos].left = left;
        stack[stack_pos].right = pivot-1;
      }
    }
  }
}

void* q_sort_worker(void* p)
{
  struct ThreadParams* params = (struct ThreadParams*)p;
  q_sort(params->numbers, params->size);
  return 0;
}

void qsort_pthread(int numbers[], int size)
{
  struct ThreadParams params1;
  struct ThreadParams params2;
  pthread_t thr1, thr2;
  int res1 = -1, res2 = -1;
  
  int left = 0;
  int right = size-1;
  int pivot = numbers[left];
  
  while (left < right)
  {
    while ((numbers[right] >= pivot) && (left < right))
      right--;
    if (left != right)
    {
      numbers[left] = numbers[right];
      left++;
    }
    while ((numbers[left] <= pivot) && (left < right))
      left++;
    if (left != right)
    {
      numbers[right] = numbers[left];
      right--;
    }
  }
  numbers[left] = pivot;
  
  pivot = left;
  left = 0;
  right = size-1;
  if (left < pivot) {
    params1.numbers = numbers+left;
    params1.size = pivot-left-1;
    res1 = pthread_create(&thr1, NULL, q_sort_worker, &params1);
  }
  if (right > pivot) {
    params2.numbers = numbers+pivot+1;
    params2.size = right-(pivot+1);
    res2 = pthread_create(&thr2, NULL, q_sort_worker, &params2);
  }
  if (res1 == 0) {
    pthread_join(thr1, NULL);
  }
  if (res2 == 0) {
    pthread_join(thr2, NULL);
  }
}

int main()
{
  int i;
  int iterations = 100000;
  int cur_size = 10;
  int max_size = 1000000;
  int *data = (int*)malloc(max_size * sizeof(int));
  int *work_data = (int*)malloc(max_size*sizeof(int));
  for (i=0; i<max_size; i++)
  {
    work_data[i] = data[i] = rand();
  }
  // --- Prepare it so that the median is the first element, so that the
  //      parallelization will have good load balancing
  q_sort(work_data, max_size);
  data[0] = work_data[max_size/2];
  
  printf("# size, serial, parallel, speedup \n");
  while (cur_size < max_size)
  {
    struct timeval start, end;
    struct timezone zone;
    double seq_seconds, par_seconds;
    
    zone.tz_minuteswest = 0;
    zone.tz_dsttime = 0;
    
    // ----------- Run the parallel version
    gettimeofday(&start, &zone);
  // Peform the timed operation
    for (i=0; i<iterations; ++i) {
      int j;
      for (j=0; j<cur_size; j++)
        work_data[j] = data[j];
      
      qsort_pthread(work_data,cur_size);
    }
    gettimeofday(&end, &zone);
    par_seconds = end.tv_sec-start.tv_sec
      +1e-6*(end.tv_usec-start.tv_usec);
    
    // ------------ Run the serial version
    gettimeofday(&start, &zone);
  // Peform the timed operation
    for (i=0; i<iterations; ++i) {
      int j;
      for (j=0; j<cur_size; j++)
        work_data[j] = data[j];
      
      q_sort(work_data,cur_size);
    }
    gettimeofday(&end, &zone);
    seq_seconds = end.tv_sec-start.tv_sec
      +1e-6*(end.tv_usec-start.tv_usec);
    
    printf("%6d %6d %2.4f %2.4f  %f %f  %f\n", cur_size, iterations,
           seq_seconds, par_seconds,
           seq_seconds/iterations, par_seconds/iterations, seq_seconds/(par_seconds+1e-20));
    if (par_seconds > 10.0 && iterations > 10) {
      iterations /= 2;
    }
  /*for (i=0; i<size; i++)
    printf("%d ", data[i]);
  printf("\n");
  for (i=0; i<size; i++)
    printf("%d ", work_data[i]);
  printf("\n");*/
    cur_size = (int)(cur_size * 1.3);
  }
}




More information about the kernel-bugs mailing list