舌足らずですみません。コードはこんな感じです。
threadFunctionは単なる加算値、joinFunctionは集計処理です。
コアは物理2論理4です。
template< class ArgType >
void Reduce( std::vector< ArgType >& threadArgs, void* (*threadFunction)(void*), void (*joinFunction)(std::vector< ArgType >&) )
{
const size_t threadCount = threadArgs.size();
threads.resize( threadCount );
std::vector< void* > voidPtrArgs = CastArgsToVoidPtrs( threadArgs );
for ( int threadIndex = 0; threadIndex < threadCount; ++threadIndex )
{
sched_param schedParam;
schedParam.sched_priority = sched_get_priority_max( SCHED_FIFO );

pthread_attr_t threadAttribute;
pthread_attr_init( & threadAttribute );
pthread_attr_setschedpolicy( & threadAttribute, schedPolicy );
pthread_attr_setinheritsched( & threadAttribute, PTHREAD_EXPLICIT_SCHED );

pthread_t& thread = threads[ threadIndex ];
pthread_setschedparam( thread, schedPolicy, & schedParam );
pthread_create( & thread, & threadAttribute, threadFunction, voidPtrArgs[ threadIndex ] );
}

for ( int threadIndex = 0; threadIndex < threadCount; ++threadIndex )
{
pthread_t thread = threads[ threadIndex ];
pthread_join( thread, NULL );
}
joinFunction( threadArgs );
}