This tutorial shows how the bandwidth of the nonzero pattern of a sparse matrix can be reduced by renumbering the unknowns (i.e. rows and columns). Such a reordering can significantly improve cache reuse for algorithms such as sparse matrix-vector products and may also reduce the iteration required in iterative solvers.
There are two parts: The first part defines a couple of helper function. Feel free to directly go to the second part, which shows the actual tutorial code. 
#include <iostream>
#include <fstream>
#include <string>
#include <algorithm>
#include <map>
#include <vector>
#include <deque>
#include <cmath>
Part 1: Helper functions
We start by defining a couple of helper functions for sparse matrices represented by C++ STL containers, i.e. std::vector<std::map<T,U> >
Reorder Matrix
Reorders a matrix according to a previously generated node number permutation vector r 
inline std::vector< std::map<int, double> > reorder_matrix(std::vector< std::map<int, double> > const & matrix, std::vector<int> const & r)
{
    std::vector< std::map<int, double> > matrix2(r.size());
    for (std::size_t i = 0; i < r.size(); i++)
      for (std::map<int, double>::const_iterator it = matrix[i].begin();  it != matrix[i].end(); it++)
        matrix2[static_cast<std::size_t>(r[i])][r[static_cast<std::size_t>(it->first)]] = it->second;
    return matrix2;
}
Bandwidth Calculation
Calculates the bandwidth of a matrix 
inline int calc_bw(std::vector< std::map<int, double> > const & matrix)
{
    int bw = 0;
    for (std::size_t i = 0; i < matrix.size(); i++)
    {
      int min_index = static_cast<int>(matrix.size());
      int max_index = 0;
      for (std::map<int, double>::const_iterator it = matrix[i].begin();  it != matrix[i].end(); it++)
      {
        if (it->first > max_index)
          max_index = it->first;
        if (it->first < min_index)
          min_index = it->first;
      }
      if (max_index > min_index) 
        bw = 
std::max(bw, max_index - min_index);
    }
    return bw;
}
Bandwidth Calculation 
Calculate the bandwidth of a reordered matrix 
template<typename IndexT>
int calc_reordered_bw(std::vector< std::map<int, double> > 
const & matrix,  std::vector<IndexT> 
const & r)
 
{
    int bw = 0;
    for (std::size_t i = 0; i < r.size(); i++)
    {
      int min_index = static_cast<int>(matrix.size());
      int max_index = 0;
      for (std::map<int, double>::const_iterator it = matrix[i].begin();  it != matrix[i].end(); it++)
      {
        std::size_t col_idx = static_cast<std::size_t>(it->first);
        if (r[col_idx] > max_index)
          max_index = r[col_idx];
        if (r[col_idx] < min_index)
          min_index = r[col_idx];
      }
      if (max_index > min_index)
        bw = 
std::max(bw, max_index - min_index);
    }
    return bw;
}
Generate Random Permutation
Generates a random permutation by Knuth shuffle algorithm reference: http://en.wikipedia.org/wiki/Knuth_shuffle (URL taken on July 2nd, 2011) 
inline std::vector<int> generate_random_reordering(std::size_t n)
{
    std::vector<int> r(n);
    int tmp;
    for (std::size_t i = 0; i < n; i++)
        r[i] = static_cast<int>(i);
    for (std::size_t i = 0; i < n - 1; i++)
    {
        std::size_t j = i + static_cast<std::size_t>((static_cast<double>(rand()) / static_cast<double>(RAND_MAX)) * static_cast<double>(n - 1 - i));
        if (j != i)
        {
            tmp = r[i];
            r[i] = r[j];
            r[j] = tmp;
        }
    }
    return r;
}
Matrix Generation for 3D Meshes
This function generates an incidence matrix from of an underlying three-dimensional mesh l: x dimension m: y dimension n: z dimension tri: true for tetrahedral mesh, false for cubic mesh return value: matrix of size l * m * n 
inline std::vector< std::map<int, double> > gen_3d_mesh_matrix(std::size_t l, std::size_t m, std::size_t n, bool tri)
{
    std::vector< std::map<int, double> > matrix;
    std::size_t s = l * m * n;
    std::size_t ind;
    std::size_t ind1;
    std::size_t ind2;
    matrix.resize(s);
    for (std::size_t i = 0; i < l; i++)
    {
        for (std::size_t j = 0; j < m; j++)
        {
            for (std::size_t k = 0; k < n; k++)
            {
                ind = i + l * j + l * m * k;
                matrix[ind][static_cast<int>(ind)] = 1.0;
                if (i > 0)
                {
                    ind2 = ind - 1;
                    matrix[ind][static_cast<int>(ind2)] = 1.0;
                    matrix[ind2][static_cast<int>(ind)] = 1.0;
                }
                if (j > 0)
                {
                    ind2 = ind - l;
                    matrix[ind][static_cast<int>(ind2)] = 1.0;
                    matrix[ind2][static_cast<int>(ind)] = 1.0;
                }
                if (k > 0)
                {
                    ind2 = ind - l * m;
                    matrix[ind][static_cast<int>(ind2)] = 1.0;
                    matrix[ind2][static_cast<int>(ind)] = 1.0;
                }
                if (tri)
                {
                    if (i < l - 1 && j < m - 1)
                    {
                        if ((i + j + k) % 2 == 0)
                        {
                            ind1 = ind;
                            ind2 = ind + 1 + l;
                        }
                        else
                        {
                            ind1 = ind + 1;
                            ind2 = ind + l;
                        }
                        matrix[ind1][static_cast<int>(ind2)] = 1.0;
                        matrix[ind2][static_cast<int>(ind1)] = 1.0;
                    }
                    if (i < l - 1 && k < n - 1)
                    {
                        if ((i + j + k) % 2 == 0)
                        {
                            ind1 = ind;
                            ind2 = ind + 1 + l * m;
                        }
                        else
                        {
                            ind1 = ind + 1;
                            ind2 = ind + l * m;
                        }
                        matrix[ind1][static_cast<int>(ind2)] = 1.0;
                        matrix[ind2][static_cast<int>(ind1)] = 1.0;
                    }
                    if (j < m - 1 && k < n - 1)
                    {
                        if ((i + j + k) % 2 == 0)
                        {
                            ind1 = ind;
                            ind2 = ind + l + l * m;
                        }
                        else
                        {
                            ind1 = ind + l;
                            ind2 = ind + l * m;
                        }
                        matrix[ind1][static_cast<int>(ind2)] = 1.0;
                        matrix[ind2][static_cast<int>(ind1)] = 1.0;
                    }
                }
            }
        }
    }
    return matrix;
}
Part 2: Tutorial code
After all the helper functions have been defined, we are now ready to see how reordering routines are called. 
{
  srand(42);
  std::cout << "-- Generating matrix --" << std::endl;
  std::size_t dof_per_dim = 64;   
  std::size_t n = dof_per_dim * dof_per_dim * dof_per_dim; 
  std::vector< std::map<int, double> > matrix = gen_3d_mesh_matrix(dof_per_dim, dof_per_dim, dof_per_dim, false);  
  Shuffle the generated matrix 
std::vector<int> r = generate_random_reordering(n);
std::vector< std::map<int, double> > matrix2 = reorder_matrix(matrix, r);
 Print some statistics about the generated matrix: 
std::cout << " * Unknowns: " << n << std::endl;
std::cout << " * Initial bandwidth: " << calc_bw(matrix) << std::endl;
std::cout << " * Randomly reordered bandwidth: " << calc_bw(matrix2) << std::endl;
 Reorder using Cuthill-McKee algorithm and print new bandwidth: 
std::cout << "-- Cuthill-McKee algorithm --" << std::endl;
std::cout << 
" * Reordered bandwidth: " << 
calc_reordered_bw(matrix2, r) << std::endl;
 Reorder using advanced Cuthill-McKee algorithm and print new bandwidth: 
std::cout << "-- Advanced Cuthill-McKee algorithm --" << std::endl;
double a = 0.0;
std::size_t gmax = 1;
std::cout << 
" * Reordered bandwidth: " << 
calc_reordered_bw(matrix2, r) << std::endl;
 Reorder using Gibbs-Poole-Stockmeyer algorithm and print new bandwidth: 
std::cout << "-- Gibbs-Poole-Stockmeyer algorithm --" << std::endl;
std::cout << 
" * Reordered bandwidth: " << 
calc_reordered_bw(matrix2, r) << std::endl;
 That's it. 
  std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;
  return EXIT_SUCCESS;
}
Full Example Code
#include <iostream>
#include <fstream>
#include <string>
#include <algorithm>
#include <map>
#include <vector>
#include <deque>
#include <cmath>
inline std::vector< std::map<int, double> > reorder_matrix(std::vector< std::map<int, double> > const & matrix, std::vector<int> const & r)
{
    std::vector< std::map<int, double> > matrix2(r.size());
    for (std::size_t i = 0; i < r.size(); i++)
      for (std::map<int, double>::const_iterator it = matrix[i].begin();  it != matrix[i].end(); it++)
        matrix2[static_cast<std::size_t>(r[i])][r[static_cast<std::size_t>(it->first)]] = it->second;
    return matrix2;
}
inline int calc_bw(std::vector< std::map<int, double> > const & matrix)
{
    int bw = 0;
    for (std::size_t i = 0; i < matrix.size(); i++)
    {
      int min_index = static_cast<int>(matrix.size());
      int max_index = 0;
      for (std::map<int, double>::const_iterator it = matrix[i].begin();  it != matrix[i].end(); it++)
      {
        if (it->first > max_index)
          max_index = it->first;
        if (it->first < min_index)
          min_index = it->first;
      }
      if (max_index > min_index) 
        bw = 
std::max(bw, max_index - min_index);
    }
    return bw;
}
template<typename IndexT>
int calc_reordered_bw(std::vector< std::map<int, double> > 
const & matrix,  std::vector<IndexT> 
const & r)
 
{
    int bw = 0;
    for (std::size_t i = 0; i < r.size(); i++)
    {
      int min_index = static_cast<int>(matrix.size());
      int max_index = 0;
      for (std::map<int, double>::const_iterator it = matrix[i].begin();  it != matrix[i].end(); it++)
      {
        std::size_t col_idx = static_cast<std::size_t>(it->first);
        if (r[col_idx] > max_index)
          max_index = r[col_idx];
        if (r[col_idx] < min_index)
          min_index = r[col_idx];
      }
      if (max_index > min_index)
        bw = 
std::max(bw, max_index - min_index);
    }
    return bw;
}
inline std::vector<int> generate_random_reordering(std::size_t n)
{
    std::vector<int> r(n);
    int tmp;
    for (std::size_t i = 0; i < n; i++)
        r[i] = static_cast<int>(i);
    for (std::size_t i = 0; i < n - 1; i++)
    {
        std::size_t j = i + static_cast<std::size_t>((static_cast<double>(rand()) / static_cast<double>(RAND_MAX)) * static_cast<double>(n - 1 - i));
        if (j != i)
        {
            tmp = r[i];
            r[i] = r[j];
            r[j] = tmp;
        }
    }
    return r;
}
inline std::vector< std::map<int, double> > gen_3d_mesh_matrix(std::size_t l, std::size_t m, std::size_t n, bool tri)
{
    std::vector< std::map<int, double> > matrix;
    std::size_t s = l * m * n;
    std::size_t ind;
    std::size_t ind1;
    std::size_t ind2;
    matrix.resize(s);
    for (std::size_t i = 0; i < l; i++)
    {
        for (std::size_t j = 0; j < m; j++)
        {
            for (std::size_t k = 0; k < n; k++)
            {
                ind = i + l * j + l * m * k;
                matrix[ind][static_cast<int>(ind)] = 1.0;
                if (i > 0)
                {
                    ind2 = ind - 1;
                    matrix[ind][static_cast<int>(ind2)] = 1.0;
                    matrix[ind2][static_cast<int>(ind)] = 1.0;
                }
                if (j > 0)
                {
                    ind2 = ind - l;
                    matrix[ind][static_cast<int>(ind2)] = 1.0;
                    matrix[ind2][static_cast<int>(ind)] = 1.0;
                }
                if (k > 0)
                {
                    ind2 = ind - l * m;
                    matrix[ind][static_cast<int>(ind2)] = 1.0;
                    matrix[ind2][static_cast<int>(ind)] = 1.0;
                }
                if (tri)
                {
                    if (i < l - 1 && j < m - 1)
                    {
                        if ((i + j + k) % 2 == 0)
                        {
                            ind1 = ind;
                            ind2 = ind + 1 + l;
                        }
                        else
                        {
                            ind1 = ind + 1;
                            ind2 = ind + l;
                        }
                        matrix[ind1][static_cast<int>(ind2)] = 1.0;
                        matrix[ind2][static_cast<int>(ind1)] = 1.0;
                    }
                    if (i < l - 1 && k < n - 1)
                    {
                        if ((i + j + k) % 2 == 0)
                        {
                            ind1 = ind;
                            ind2 = ind + 1 + l * m;
                        }
                        else
                        {
                            ind1 = ind + 1;
                            ind2 = ind + l * m;
                        }
                        matrix[ind1][static_cast<int>(ind2)] = 1.0;
                        matrix[ind2][static_cast<int>(ind1)] = 1.0;
                    }
                    if (j < m - 1 && k < n - 1)
                    {
                        if ((i + j + k) % 2 == 0)
                        {
                            ind1 = ind;
                            ind2 = ind + l + l * m;
                        }
                        else
                        {
                            ind1 = ind + l;
                            ind2 = ind + l * m;
                        }
                        matrix[ind1][static_cast<int>(ind2)] = 1.0;
                        matrix[ind2][static_cast<int>(ind1)] = 1.0;
                    }
                }
            }
        }
    }
    return matrix;
}
{
  srand(42);
  std::cout << "-- Generating matrix --" << std::endl;
  std::size_t dof_per_dim = 64;   
  std::size_t n = dof_per_dim * dof_per_dim * dof_per_dim; 
  std::vector< std::map<int, double> > matrix = gen_3d_mesh_matrix(dof_per_dim, dof_per_dim, dof_per_dim, false);  
  std::vector<int> r = generate_random_reordering(n);
  std::vector< std::map<int, double> > matrix2 = reorder_matrix(matrix, r);
  std::cout << " * Unknowns: " << n << std::endl;
  std::cout << " * Initial bandwidth: " << calc_bw(matrix) << std::endl;
  std::cout << " * Randomly reordered bandwidth: " << calc_bw(matrix2) << std::endl;
  std::cout << "-- Cuthill-McKee algorithm --" << std::endl;
  std::cout << 
" * Reordered bandwidth: " << 
calc_reordered_bw(matrix2, r) << std::endl;
  std::cout << "-- Advanced Cuthill-McKee algorithm --" << std::endl;
  double a = 0.0;
  std::size_t gmax = 1;
  std::cout << 
" * Reordered bandwidth: " << 
calc_reordered_bw(matrix2, r) << std::endl;
  std::cout << "-- Gibbs-Poole-Stockmeyer algorithm --" << std::endl;
  std::cout << 
" * Reordered bandwidth: " << 
calc_reordered_bw(matrix2, r) << std::endl;
  std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;
  return EXIT_SUCCESS;
}