Source code for pyjac.core.CUDAParams

"""
Module containing parameters that control CUDA code generation


Parameters
----------

Jacob_Unroll : int
  The number of reactions to attempt to place in each Jacobian reaction update subfile
Jacob_Spec_Unroll : int
  The number of species to attempt to place in each Jacobian species update subfile
Rates_Unroll : int
  The number of reactions to limit each reaction rate subfile to
Max_Lines : int
  The number of lines to attempt to limit each Jacobian reaction update subfile to
Max_Spec_Lines : int
  The number of lines to attempt to limit each Jacobian species update subfile to

"""

# Standard libraries
import os
from math import floor

Jacob_Unroll = 40
Jacob_Spec_Unroll = 40
Rates_Unroll = 250
Max_Lines = 10000
Max_Spec_Lines = 5000


[docs]def get_L1_size(L1_Preferred):
    """
    Returns the size (in number of doubles) of the L1 cache for sm_20

    Parameters
    ----------
    L1_Preferred : bool
        If true, prefer a larger L1 cache over more shared memory (recommended)
    """
    if L1_Preferred:
        return 49152 / 8  # doubles
    else:
        return 16384 / 8  # doubles


[docs]def get_shared_size(L1_Preferred):
    """
    Returns the size (in number of doubles) of shared memory for sm_20

    Parameters
    ----------
    L1_Preferred : bool
        If true, prefer a larger L1 cache over more shared memory (recommended)
    """
    if not L1_Preferred:
        return 49152 / 8  # doubles
    else:
        return 16384 / 8  # doubles


[docs]def get_register_count(num_blocks, num_threads):
    """
    Returns the number of registers available per block for sm_20

    Parameters
    ----------
    num_blocks : int
        The number of blocks to target per kernel launch
    num_threads : int
        The number of threads to target per kernel launch
    """
    return max(min((32768 / num_blocks) / num_threads, 63), 1)


[docs]def write_launch_bounds(builddir, blocks_per_sm=8, num_threads=64,
                        L1_PREFERRED=True, no_shared=False
                        ):
    """Creates the launch_bounds.cuh file that may be included by CUDA solvers

    Parameters
    ----------

    builddir : str
        The directory to place the source file in
    blocks_per_sm : int, optional
        The number of blocks to target per kernel launch
    num_threads : int, optional
        The number of threads per block in the per kernel launch
    L1_PREFERRED : bool, optional
        If true, prefer a larger L1 cache over more shared memory (recommended)
    no_shared : bool, optional
        If false, turn off shared memory

    Returns
    -------
    None

    """
    shared_per_block = (int(floor(get_shared_size(L1_PREFERRED) / blocks_per_sm))
                        if not no_shared
                        else 0
                        )
    with open(os.path.join(builddir, 'launch_bounds.cuh'), "w") as file:
        file.write('#ifndef LAUNCH_BOUNDS_CUH\n'
                   '#define LAUNCH_BOUNDS_CUH\n'
                   '#define TARGET_BLOCK_SIZE ({})\n'.format(num_threads) +
                   '#define TARGET_BLOCKS ({})\n'.format(blocks_per_sm) +
                   ('' if no_shared else '//shared memory active\n') +
                   '#define SHARED_SIZE ({}'.format(shared_per_block) +
                   ' * sizeof(double))\n' +
                   ('//Large L1 cache active\n#define PREFERL1\n'
                    if L1_PREFERRED else '//Large shared memory active\n'
                    ) + '#endif\n'
                   )
    with open(os.path.join(builddir, 'regcount'), 'w') as file:
        file.write('{}'.format(get_register_count(blocks_per_sm, num_threads)))
Source code for pyjac.core.CUDAParams

pyJac

Navigation