Point Cloud Library (PCL) 1.15.0
Loading...
Searching...
No Matches
sparse_matrix.hpp
1/*
2Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without modification,
6are permitted provided that the following conditions are met:
7
8Redistributions of source code must retain the above copyright notice, this list of
9conditions and the following disclaimer. Redistributions in binary form must reproduce
10the above copyright notice, this list of conditions and the following disclaimer
11in the documentation and/or other materials provided with the distribution.
12
13Neither the name of the Johns Hopkins University nor the names of its contributors
14may be used to endorse or promote products derived from this software without specific
15prior written permission.
16
17THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
18EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES
19OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
20SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
23BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
25ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26DAMAGE.
27*/
28
29#ifdef _WIN32
30# ifndef WIN32_LEAN_AND_MEAN
31# define WIN32_LEAN_AND_MEAN
32# endif // WIN32_LEAN_AND_MEAN
33# ifndef NOMINMAX
34# define NOMINMAX
35# endif // NOMINMAX
36# include <windows.h>
37#endif //_WIN32
38
39///////////////////
40// SparseMatrix //
41///////////////////
42///////////////////////////////////////////
43// Static Allocator Methods and Memebers //
44///////////////////////////////////////////
45
46namespace pcl
47{
48 namespace poisson
49 {
50
51
52 template<class T> int SparseMatrix<T>::UseAlloc=0;
53 template<class T> Allocator<MatrixEntry<T> > SparseMatrix<T>::internalAllocator;
54 template<class T> int SparseMatrix<T>::UseAllocator(void){return UseAlloc;}
55 template<class T>
56 void SparseMatrix<T>::SetAllocator( int blockSize )
57 {
58 if(blockSize>0){
59 UseAlloc=1;
60 internalAllocator.set(blockSize);
61 }
62 else{UseAlloc=0;}
63 }
64 ///////////////////////////////////////
65 // SparseMatrix Methods and Memebers //
66 ///////////////////////////////////////
67
68 template< class T >
70 {
71 _contiguous = false;
72 _maxEntriesPerRow = 0;
73 rows = 0;
74 rowSizes = NULL;
75 m_ppElements = NULL;
76 }
77
78 template< class T > SparseMatrix< T >::SparseMatrix( int rows ) : SparseMatrix< T >() { Resize( rows ); }
79 template< class T > SparseMatrix< T >::SparseMatrix( int rows , int maxEntriesPerRow ) : SparseMatrix< T >() { Resize( rows , maxEntriesPerRow ); }
80
81 template< class T >
83 {
84 if( M._contiguous ) Resize( M.rows , M._maxEntriesPerRow );
85 else Resize( M.rows );
86 for( int i=0 ; i<rows ; i++ )
87 {
88 SetRowSize( i , M.rowSizes[i] );
89 memcpy( (*this)[i] , M[i] , sizeof( MatrixEntry< T > ) * rowSizes[i] );
90 }
91 }
92 template<class T>
93 int SparseMatrix<T>::Entries( void ) const
94 {
95 int e = 0;
96 for( int i=0 ; i<rows ; i++ ) e += int( rowSizes[i] );
97 return e;
98 }
99 template<class T>
101 {
102 if( M._contiguous ) Resize( M.rows , M._maxEntriesPerRow );
103 else Resize( M.rows );
104 for( int i=0 ; i<rows ; i++ )
105 {
106 SetRowSize( i , M.rowSizes[i] );
107 memcpy( (*this)[i] , M[i] , sizeof( MatrixEntry< T > ) * rowSizes[i] );
108 }
109 return *this;
110 }
111
112 template<class T>
113 SparseMatrix<T>::~SparseMatrix( void ){ Resize( 0 ); }
114
115 template< class T >
116 bool SparseMatrix< T >::write( const char* fileName ) const
117 {
118 FILE* fp = fopen( fileName , "wb" );
119 if( !fp ) return false;
120 bool ret = write( fp );
121 fclose( fp );
122 return ret;
123 }
124 template< class T >
125 bool SparseMatrix< T >::read( const char* fileName )
126 {
127 FILE* fp = fopen( fileName , "rb" );
128 if( !fp ) return false;
129 bool ret = read( fp );
130 fclose( fp );
131 return ret;
132 }
133 template< class T >
134 bool SparseMatrix< T >::write( FILE* fp ) const
135 {
136 if( fwrite( &rows , sizeof( int ) , 1 , fp )!=1 ) return false;
137 if( fwrite( rowSizes , sizeof( int ) , rows , fp )!=rows ) return false;
138 for( int i=0 ; i<rows ; i++ ) if( fwrite( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] ) return false;
139 return true;
140 }
141 template< class T >
142 bool SparseMatrix< T >::read( FILE* fp )
143 {
144 int r;
145 if( fread( &r , sizeof( int ) , 1 , fp )!=1 ) return false;
146 Resize( r );
147 if( fread( rowSizes , sizeof( int ) , rows , fp )!=rows ) return false;
148 for( int i=0 ; i<rows ; i++ )
149 {
150 r = rowSizes[i];
151 rowSizes[i] = 0;
152 SetRowSize( i , r );
153 if( fread( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] ) return false;
154 }
155 return true;
156 }
157
158
159 template< class T >
161 {
162 if( rows>0 )
163 {
164
165 if( !UseAlloc )
166 if( _contiguous ){ if( _maxEntriesPerRow ) free( m_ppElements[0] ); }
167 else for( int i=0 ; i<rows ; i++ ){ if( rowSizes[i] ) free( m_ppElements[i] ); }
168 free( m_ppElements );
169 free( rowSizes );
170 }
171 rows = r;
172 if( r )
173 {
174 rowSizes = ( int* )malloc( sizeof( int ) * r );
175 memset( rowSizes , 0 , sizeof( int ) * r );
176 m_ppElements = ( MatrixEntry< T >** )malloc( sizeof( MatrixEntry< T >* ) * r );
177 }
178 _contiguous = false;
179 _maxEntriesPerRow = 0;
180 }
181 template< class T >
182 void SparseMatrix< T >::Resize( int r , int e )
183 {
184 if( rows>0 )
185 {
186 if( !UseAlloc )
187 if( _contiguous ){ if( _maxEntriesPerRow ) free( m_ppElements[0] ); }
188 else for( int i=0 ; i<rows ; i++ ){ if( rowSizes[i] ) free( m_ppElements[i] ); }
189 free( m_ppElements );
190 free( rowSizes );
191 }
192 rows = r;
193 if( r )
194 {
195 rowSizes = ( int* )malloc( sizeof( int ) * r );
196 memset( rowSizes , 0 , sizeof( int ) * r );
197 m_ppElements = ( MatrixEntry< T >** )malloc( sizeof( MatrixEntry< T >* ) * r );
198 m_ppElements[0] = ( MatrixEntry< T >* )malloc( sizeof( MatrixEntry< T > ) * r * e );
199 for( int i=1 ; i<r ; i++ ) m_ppElements[i] = m_ppElements[i-1] + e;
200 }
201 _contiguous = true;
202 _maxEntriesPerRow = e;
203 }
204
205 template<class T>
206 void SparseMatrix< T >::SetRowSize( int row , int count )
207 {
208 if( _contiguous )
209 {
210 if (count > _maxEntriesPerRow)
211 {
212 POISSON_THROW_EXCEPTION (pcl::poisson::PoissonBadArgumentException, "Attempted to set row size on contiguous matrix larger than max row size: (requested)"<< count << " > (maximum)" << _maxEntriesPerRow );
213 }
214 rowSizes[row] = count;
215 }
216 else if( row>=0 && row<rows )
217 {
218 if( UseAlloc ) m_ppElements[row] = internalAllocator.newElements(count);
219 else
220 {
221 if( rowSizes[row] ) free( m_ppElements[row] );
222 if( count>0 ) m_ppElements[row] = ( MatrixEntry< T >* )malloc( sizeof( MatrixEntry< T > ) * count );
223 }
224 }
225 }
226
227
228 template<class T>
230 {
231 // copied from operator *=
232 for (int i=0; i<rows; i++)
233 {
234 for(int ii=0;ii<rowSizes[i];ii++){m_ppElements[i][ii].Value=T(0);}
235 }
236 }
237
238 template<class T>
240 {
241 SetZero();
242 for(int ij=0; ij < std::min<int>( rows, _maxEntriesPerRow ); ij++)
243 (*this)(ij,ij) = T(1);
244 }
245
246 template<class T>
248 {
249 SparseMatrix<T> M(*this);
250 M *= V;
251 return M;
252 }
253
254 template<class T>
256 {
257 for (int i=0; i<rows; i++)
258 {
259 for(int ii=0;ii<rowSizes[i];ii++){m_ppElements[i][ii].Value*=V;}
260 }
261 return *this;
262 }
263
264 template<class T>
266 {
267 SparseMatrix<T> R( rows, M._maxEntriesPerRow );
268 for(int i=0; i<R.rows; i++){
269 for(int ii=0;ii<rowSizes[i];ii++){
270 int N=m_ppElements[i][ii].N;
271 T Value=m_ppElements[i][ii].Value;
272 for(int jj=0;jj<M.rowSizes[N];jj++){
273 R(i,M.m_ppElements[N][jj].N) += Value * M.m_ppElements[N][jj].Value;
274 }
275 }
276 }
277 return R;
278 }
279
280 template<class T>
281 template<class T2>
283 {
284 Vector<T2> R( rows );
285
286 for (int i=0; i<rows; i++)
287 {
288 T2 temp=T2();
289 for(int ii=0;ii<rowSizes[i];ii++){
290 temp+=m_ppElements[i][ii].Value * V.m_pV[m_ppElements[i][ii].N];
291 }
292 R(i)=temp;
293 }
294 return R;
295 }
296
297 template<class T>
298 template<class T2>
299 void SparseMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , int threads ) const
300 {
301#pragma omp parallel for num_threads( threads ) schedule( static )
302 for( int i=0 ; i<rows ; i++ )
303 {
304 T2 temp = T2();
305 temp *= 0;
306 for( int j=0 ; j<rowSizes[i] ; j++ ) temp += m_ppElements[i][j].Value * In.m_pV[m_ppElements[i][j].N];
307 Out.m_pV[i] = temp;
308 }
309 }
310
311 template<class T>
313 {
314 return Multiply(M);
315 }
316 template<class T>
317 template<class T2>
319 {
320 return Multiply(V);
321 }
322
323 template<class T>
325 {
326 SparseMatrix<T> M( _maxEntriesPerRow, rows );
327
328 for (int i=0; i<rows; i++)
329 {
330 for(int ii=0;ii<rowSizes[i];ii++){
331 M(m_ppElements[i][ii].N,i) = m_ppElements[i][ii].Value;
332 }
333 }
334 return M;
335 }
336
337 template<class T>
338 template<class T2>
339 int SparseMatrix<T>::SolveSymmetric( const SparseMatrix<T>& M , const Vector<T2>& b , int iters , Vector<T2>& solution , const T2 eps , int reset , int threads )
340 {
341 if( reset )
342 {
343 solution.Resize( b.Dimensions() );
344 solution.SetZero();
345 }
346 Vector< T2 > r;
347 r.Resize( solution.Dimensions() );
348 M.Multiply( solution , r );
349 r = b - r;
350 Vector< T2 > d = r;
351 double delta_new , delta_0;
352 for( int i=0 ; i<r.Dimensions() ; i++ ) delta_new += r.m_pV[i] * r.m_pV[i];
353 delta_0 = delta_new;
354 if( delta_new<eps ) return 0;
355 int ii;
356 Vector< T2 > q;
357 q.Resize( d.Dimensions() );
358 for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ )
359 {
360 M.Multiply( d , q , threads );
361 double dDotQ = 0 , alpha = 0;
362 for( int i=0 ; i<d.Dimensions() ; i++ ) dDotQ += d.m_pV[i] * q.m_pV[i];
363 alpha = delta_new / dDotQ;
364#pragma omp parallel for num_threads( threads ) schedule( static )
365 for( int i=0 ; i<r.Dimensions() ; i++ ) solution.m_pV[i] += d.m_pV[i] * T2( alpha );
366 if( !(ii%50) )
367 {
368 r.Resize( solution.Dimensions() );
369 M.Multiply( solution , r , threads );
370 r = b - r;
371 }
372 else
373#pragma omp parallel for num_threads( threads ) schedule( static )
374 for( int i=0 ; i<r.Dimensions() ; i++ ) r.m_pV[i] = r.m_pV[i] - q.m_pV[i] * T2(alpha);
375
376 double delta_old = delta_new , beta;
377 delta_new = 0;
378 for( int i=0 ; i<r.Dimensions() ; i++ ) delta_new += r.m_pV[i]*r.m_pV[i];
379 beta = delta_new / delta_old;
380#pragma omp parallel for num_threads( threads ) schedule( static )
381 for( int i=0 ; i<d.Dimensions() ; i++ ) d.m_pV[i] = r.m_pV[i] + d.m_pV[i] * T2( beta );
382 }
383 return ii;
384 }
385
386 // Solve for x s.t. M(x)=b by solving for x s.t. M^tM(x)=M^t(b)
387 template<class T>
388 int SparseMatrix<T>::Solve(const SparseMatrix<T>& M,const Vector<T>& b,int iters,Vector<T>& solution,const T eps){
389 SparseMatrix mTranspose=M.Transpose();
390 Vector<T> bb=mTranspose*b;
391 Vector<T> d,r,Md;
392 T alpha,beta,rDotR;
393 int i;
394
395 solution.Resize(bb.Dimensions());
396 solution.SetZero();
397
398 d=r=bb;
399 rDotR=r.Dot(r);
400 for(i=0;i<iters && rDotR>eps;i++){
401 T temp;
402 Md=mTranspose*(M*d);
403 alpha=rDotR/d.Dot(Md);
404 solution+=d*alpha;
405 r-=Md*alpha;
406 temp=r.Dot(r);
407 beta=temp/rDotR;
408 rDotR=temp;
409 d=r+d*beta;
410 }
411 return i;
412 }
413
414
415
416
417 ///////////////////////////
418 // SparseSymmetricMatrix //
419 ///////////////////////////
420 template<class T>
421 template<class T2>
423 template<class T>
424 template<class T2>
426 {
428
429 for(int i=0; i<SparseMatrix<T>::rows; i++){
430 for(int ii=0;ii<SparseMatrix<T>::rowSizes[i];ii++){
431 int j=SparseMatrix<T>::m_ppElements[i][ii].N;
432 R(i)+=SparseMatrix<T>::m_ppElements[i][ii].Value * V.m_pV[j];
433 R(j)+=SparseMatrix<T>::m_ppElements[i][ii].Value * V.m_pV[i];
434 }
435 }
436 return R;
437 }
438
439 template<class T>
440 template<class T2>
441 void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , bool addDCTerm ) const
442 {
443 Out.SetZero();
444 const T2* in = &In[0];
445 T2* out = &Out[0];
446 T2 dcTerm = T2( 0 );
447 if( addDCTerm )
448 {
449 for( int i=0 ; i<SparseMatrix<T>::rows ; i++ ) dcTerm += in[i];
450 dcTerm /= SparseMatrix<T>::rows;
451 }
452 for( int i=0 ; i<this->SparseMatrix<T>::rows ; i++ )
453 {
455 const MatrixEntry<T>* end = temp + SparseMatrix<T>::rowSizes[i];
456 const T2& in_i_ = in[i];
457 T2 out_i = T2(0);
458 for( ; temp!=end ; temp++ )
459 {
460 int j=temp->N;
461 T2 v=temp->Value;
462 out_i += v * in[j];
463 out[j] += v * in_i_;
464 }
465 out[i] += out_i;
466 }
467 if( addDCTerm ) for( int i=0 ; i<SparseMatrix<T>::rows ; i++ ) out[i] += dcTerm;
468 }
469 template<class T>
470 template<class T2>
471 void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , MapReduceVector< T2 >& OutScratch , bool addDCTerm ) const
472 {
473 int dim = int( In.Dimensions() );
474 const T2* in = &In[0];
475 int threads = OutScratch.threads();
476 if( addDCTerm )
477 {
478 T2 dcTerm = 0;
479#pragma omp parallel for num_threads( threads ) reduction ( + : dcTerm )
480 for( int t=0 ; t<threads ; t++ )
481 {
482 T2* out = OutScratch[t];
483 memset( out , 0 , sizeof( T2 ) * dim );
484 for( int i=(SparseMatrix<T>::rows*t)/threads ; i<(SparseMatrix<T>::rows*(t+1))/threads ; i++ )
485 {
486 const T2& in_i_ = in[i];
487 T2& out_i_ = out[i];
488 for( const MatrixEntry< T > *temp = SparseMatrix<T>::m_ppElements[i] , *end = temp+SparseMatrix<T>::rowSizes[i] ; temp!=end ; temp++ )
489 {
490 int j = temp->N;
491 T2 v = temp->Value;
492 out_i_ += v * in[j];
493 out[j] += v * in_i_;
494 }
495 dcTerm += in_i_;
496 }
497 }
498 dcTerm /= dim;
499 dim = int( Out.Dimensions() );
500 T2* out = &Out[0];
501#pragma omp parallel for num_threads( threads ) schedule( static )
502 for( int i=0 ; i<dim ; i++ )
503 {
504 T2 _out = dcTerm;
505 for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
506 out[i] = _out;
507 }
508 }
509 else
510 {
511#pragma omp parallel for num_threads( threads )
512 for( int t=0 ; t<threads ; t++ )
513 {
514 T2* out = OutScratch[t];
515 memset( out , 0 , sizeof( T2 ) * dim );
516 for( int i=(SparseMatrix<T>::rows*t)/threads ; i<(SparseMatrix<T>::rows*(t+1))/threads ; i++ )
517 {
518 const T2& in_i_ = in[i];
519 T2& out_i_ = out[i];
520 for( const MatrixEntry< T > *temp = SparseMatrix<T>::m_ppElements[i] , *end = temp+SparseMatrix<T>::rowSizes[i] ; temp!=end ; temp++ )
521 {
522 int j = temp->N;
523 T2 v = temp->Value;
524 out_i_ += v * in[j];
525 out[j] += v * in_i_;
526 }
527 }
528 }
529 dim = int( Out.Dimensions() );
530 T2* out = &Out[0];
531#pragma omp parallel for num_threads( threads ) schedule( static )
532 for( int i=0 ; i<dim ; i++ )
533 {
534 T2 _out = T2(0);
535 for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
536 out[i] = _out;
537 }
538 }
539 }
540 template<class T>
541 template<class T2>
542 void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , std::vector< T2* >& OutScratch , const std::vector< int >& bounds ) const
543 {
544 int dim = In.Dimensions();
545 const T2* in = &In[0];
546 int threads = OutScratch.size();
547#pragma omp parallel for num_threads( threads )
548 for( int t=0 ; t<threads ; t++ )
549 for( int i=0 ; i<dim ; i++ ) OutScratch[t][i] = T2(0);
550#pragma omp parallel for num_threads( threads )
551 for( int t=0 ; t<threads ; t++ )
552 {
553 T2* out = OutScratch[t];
554 for( int i=bounds[t] ; i<bounds[t+1] ; i++ )
555 {
557 const MatrixEntry<T>* end = temp + SparseMatrix<T>::rowSizes[i];
558 const T2& in_i_ = in[i];
559 T2& out_i_ = out[i];
560 for( ; temp!=end ; temp++ )
561 {
562 int j = temp->N;
563 T2 v = temp->Value;
564 out_i_ += v * in[j];
565 out[j] += v * in_i_;
566 }
567 }
568 }
569 T2* out = &Out[0];
570#pragma omp parallel for num_threads( threads ) schedule( static )
571 for( int i=0 ; i<Out.Dimensions() ; i++ )
572 {
573 T2& _out = out[i];
574 _out = T2(0);
575 for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
576 }
577 }
578#if defined _WIN32 && !defined __MINGW32__
579#ifndef _AtomicIncrement_
580#define _AtomicIncrement_
581 inline void AtomicIncrement( volatile float* ptr , float addend )
582 {
583 float newValue = *ptr;
584 LONG& _newValue = *( (LONG*)&newValue );
585 LONG _oldValue;
586 for( ;; )
587 {
588 _oldValue = _newValue;
589 newValue += addend;
590 _newValue = InterlockedCompareExchange( (LONG*) ptr , _newValue , _oldValue );
591 if( _newValue==_oldValue ) break;
592 }
593 }
594 inline void AtomicIncrement( volatile double* ptr , double addend )
595 //inline void AtomicIncrement( double* ptr , double addend )
596 {
597 double newValue = *ptr;
598 LONGLONG& _newValue = *( (LONGLONG*)&newValue );
599 LONGLONG _oldValue;
600 do
601 {
602 _oldValue = _newValue;
603 newValue += addend;
604 _newValue = InterlockedCompareExchange64( (LONGLONG*) ptr , _newValue , _oldValue );
605 }
606 while( _newValue!=_oldValue );
607 }
608#endif // _AtomicIncrement_
609 template< class T >
610 void MultiplyAtomic( const SparseSymmetricMatrix< T >& A , const Vector< float >& In , Vector< float >& Out , int threads , const int* partition=NULL )
611 {
612 Out.SetZero();
613 const float* in = &In[0];
614 float* out = &Out[0];
615 if( partition )
616#pragma omp parallel for num_threads( threads )
617 for( int t=0 ; t<threads ; t++ )
618 for( int i=partition[t] ; i<partition[t+1] ; i++ )
619 {
620 const MatrixEntry< T >* temp = A[i];
621 const MatrixEntry< T >* end = temp + A.rowSizes[i];
622 const float& in_i = in[i];
623 float out_i = 0.;
624 for( ; temp!=end ; temp++ )
625 {
626 int j = temp->N;
627 float v = temp->Value;
628 out_i += v * in[j];
629 AtomicIncrement( out+j , v * in_i );
630 }
631 AtomicIncrement( out+i , out_i );
632 }
633 else
634#pragma omp parallel for num_threads( threads )
635 for( int i=0 ; i<A.rows ; i++ )
636 {
637 const MatrixEntry< T >* temp = A[i];
638 const MatrixEntry< T >* end = temp + A.rowSizes[i];
639 const float& in_i = in[i];
640 float out_i = 0.f;
641 for( ; temp!=end ; temp++ )
642 {
643 int j = temp->N;
644 float v = temp->Value;
645 out_i += v * in[j];
646 AtomicIncrement( out+j , v * in_i );
647 }
648 AtomicIncrement( out+i , out_i );
649 }
650 }
651 template< class T >
652 void MultiplyAtomic( const SparseSymmetricMatrix< T >& A , const Vector< double >& In , Vector< double >& Out , int threads , const int* partition=NULL )
653 {
654 Out.SetZero();
655 const double* in = &In[0];
656 double* out = &Out[0];
657
658 if( partition )
659#pragma omp parallel for num_threads( threads )
660 for( int t=0 ; t<threads ; t++ )
661 for( int i=partition[t] ; i<partition[t+1] ; i++ )
662 {
663 const MatrixEntry< T >* temp = A[i];
664 const MatrixEntry< T >* end = temp + A.rowSizes[i];
665 const double& in_i = in[i];
666 double out_i = 0.;
667 for( ; temp!=end ; temp++ )
668 {
669 int j = temp->N;
670 T v = temp->Value;
671 out_i += v * in[j];
672 AtomicIncrement( out+j , v * in_i );
673 }
674 AtomicIncrement( out+i , out_i );
675 }
676 else
677#pragma omp parallel for num_threads( threads )
678 for( int i=0 ; i<A.rows ; i++ )
679 {
680 const MatrixEntry< T >* temp = A[i];
681 const MatrixEntry< T >* end = temp + A.rowSizes[i];
682 const double& in_i = in[i];
683 double out_i = 0.;
684 for( ; temp!=end ; temp++ )
685 {
686 int j = temp->N;
687 T v = temp->Value;
688 out_i += v * in[j];
689 AtomicIncrement( out+j , v * in_i );
690 }
691 AtomicIncrement( out+i , out_i );
692 }
693 }
694
695 template< class T >
696 template< class T2 >
697 int SparseSymmetricMatrix< T >::SolveAtomic( const SparseSymmetricMatrix< T >& A , const Vector< T2 >& b , int iters , Vector< T2 >& x , T2 eps , int reset , int threads , bool solveNormal )
698 {
699 eps *= eps;
700 int dim = b.Dimensions();
701 if( reset )
702 {
703 x.Resize( dim );
704 x.SetZero();
705 }
706 Vector< T2 > r( dim ) , d( dim ) , q( dim );
707 Vector< T2 > temp;
708 if( solveNormal ) temp.Resize( dim );
709 T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
710 const T2* _b = &b[0];
711
712 std::vector< int > partition( threads+1 );
713 {
714 int eCount = 0;
715 for( int i=0 ; i<A.rows ; i++ ) eCount += A.rowSizes[i];
716 partition[0] = 0;
717#pragma omp parallel for num_threads( threads )
718 for( int t=0 ; t<threads ; t++ )
719 {
720 int _eCount = 0;
721 for( int i=0 ; i<A.rows ; i++ )
722 {
723 _eCount += A.rowSizes[i];
724 if( _eCount*threads>=eCount*(t+1) )
725 {
726 partition[t+1] = i;
727 break;
728 }
729 }
730 }
731 partition[threads] = A.rows;
732 }
733 if( solveNormal )
734 {
735 MultiplyAtomic( A , x , temp , threads , &partition[0] );
736 MultiplyAtomic( A , temp , r , threads , &partition[0] );
737 MultiplyAtomic( A , b , temp , threads , &partition[0] );
738#pragma omp parallel for num_threads( threads ) schedule( static )
739 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i];
740 }
741 else
742 {
743 MultiplyAtomic( A , x , r , threads , &partition[0] );
744#pragma omp parallel for num_threads( threads ) schedule( static )
745 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i];
746 }
747 double delta_new = 0 , delta_0;
748 for( std::size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i];
749 delta_0 = delta_new;
750 if( delta_new<eps )
751 {
752 fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
753 return 0;
754 }
755 int ii;
756 for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ )
757 {
758 if( solveNormal ) MultiplyAtomic( A , d , temp , threads , &partition[0] ) , MultiplyAtomic( A , temp , q , threads , &partition[0] );
759 else MultiplyAtomic( A , d , q , threads , &partition[0] );
760 double dDotQ = 0;
761 for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
762 T2 alpha = T2( delta_new / dDotQ );
763#pragma omp parallel for num_threads( threads ) schedule( static )
764 for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
765 if( (ii%50)==(50-1) )
766 {
767 r.Resize( dim );
768 if( solveNormal ) MultiplyAtomic( A , x , temp , threads , &partition[0] ) , MultiplyAtomic( A , temp , r , threads , &partition[0] );
769 else MultiplyAtomic( A , x , r , threads , &partition[0] );
770#pragma omp parallel for num_threads( threads ) schedule( static )
771 for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i];
772 }
773 else
774#pragma omp parallel for num_threads( threads ) schedule( static )
775 for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha;
776
777 double delta_old = delta_new;
778 delta_new = 0;
779 for( std::size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i];
780 T2 beta = T2( delta_new / delta_old );
781#pragma omp parallel for num_threads( threads ) schedule( static )
782 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
783 }
784 return ii;
785 }
786#endif // _WIN32 && !__MINGW32__
787 template< class T >
788 template< class T2 >
789 int SparseSymmetricMatrix< T >::Solve( const SparseSymmetricMatrix<T>& A , const Vector<T2>& b , int iters , Vector<T2>& x , MapReduceVector< T2 >& scratch , T2 eps , int reset , bool addDCTerm , bool solveNormal )
790 {
791 int threads = scratch.threads();
792 eps *= eps;
793 int dim = int( b.Dimensions() );
794 Vector< T2 > r( dim ) , d( dim ) , q( dim ) , temp;
795 if( reset ) x.Resize( dim );
796 if( solveNormal ) temp.Resize( dim );
797 T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
798 const T2* _b = &b[0];
799
800 double delta_new = 0 , delta_0;
801 if( solveNormal )
802 {
803 A.Multiply( x , temp , scratch , addDCTerm ) , A.Multiply( temp , r , scratch , addDCTerm ) , A.Multiply( b , temp , scratch , addDCTerm );
804#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
805 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i];
806 }
807 else
808 {
809 A.Multiply( x , r , scratch , addDCTerm );
810#pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
811 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i];
812 }
813 delta_0 = delta_new;
814 if( delta_new<eps )
815 {
816 fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
817 return 0;
818 }
819 int ii;
820 for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
821 {
822 if( solveNormal ) A.Multiply( d , temp , scratch , addDCTerm ) , A.Multiply( temp , q , scratch , addDCTerm );
823 else A.Multiply( d , q , scratch , addDCTerm );
824 double dDotQ = 0;
825#pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
826 for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
827 T2 alpha = T2( delta_new / dDotQ );
828 double delta_old = delta_new;
829 delta_new = 0;
830 if( (ii%50)==(50-1) )
831 {
832#pragma omp parallel for num_threads( threads )
833 for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
834 r.Resize( dim );
835 if( solveNormal ) A.Multiply( x , temp , scratch , addDCTerm ) , A.Multiply( temp , r , scratch , addDCTerm );
836 else A.Multiply( x , r , scratch , addDCTerm );
837#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
838 for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
839 }
840 else
841#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
842 for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
843
844 T2 beta = T2( delta_new / delta_old );
845#pragma omp parallel for num_threads( threads )
846 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
847 }
848 return ii;
849 }
850 template< class T >
851 template< class T2 >
852 int SparseSymmetricMatrix<T>::Solve( const SparseSymmetricMatrix<T>& A , const Vector<T2>& b , int iters , Vector<T2>& x , T2 eps , int reset , int threads , bool addDCTerm , bool solveNormal )
853 {
854 eps *= eps;
855 int dim = int( b.Dimensions() );
856 MapReduceVector< T2 > outScratch;
857 if( threads<1 ) threads = 1;
858 if( threads>1 ) outScratch.resize( threads , dim );
859 if( reset ) x.Resize( dim );
860 Vector< T2 > r( dim ) , d( dim ) , q( dim );
861 Vector< T2 > temp;
862 if( solveNormal ) temp.Resize( dim );
863 T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
864 const T2* _b = &b[0];
865
866 double delta_new = 0 , delta_0;
867
868 if( solveNormal )
869 {
870 if( threads>1 ) A.Multiply( x , temp , outScratch , addDCTerm ) , A.Multiply( temp , r , outScratch , addDCTerm ) , A.Multiply( b , temp , outScratch , addDCTerm );
871 else A.Multiply( x , temp , addDCTerm ) , A.Multiply( temp , r , addDCTerm ) , A.Multiply( b , temp , addDCTerm );
872#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
873 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i];
874 }
875 else
876 {
877 if( threads>1 ) A.Multiply( x , r , outScratch , addDCTerm );
878 else A.Multiply( x , r , addDCTerm );
879#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
880 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i];
881 }
882
883 delta_0 = delta_new;
884 if( delta_new<eps )
885 {
886 fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
887 return 0;
888 }
889 int ii;
890 for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
891 {
892 if( solveNormal )
893 {
894 if( threads>1 ) A.Multiply( d , temp , outScratch , addDCTerm ) , A.Multiply( temp , q , outScratch , addDCTerm );
895 else A.Multiply( d , temp , addDCTerm ) , A.Multiply( temp , q , addDCTerm );
896 }
897 else
898 {
899 if( threads>1 ) A.Multiply( d , q , outScratch , addDCTerm );
900 else A.Multiply( d , q , addDCTerm );
901 }
902 double dDotQ = 0;
903#pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
904 for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
905 T2 alpha = T2( delta_new / dDotQ );
906 double delta_old = delta_new;
907 delta_new = 0;
908
909 if( (ii%50)==(50-1) )
910 {
911#pragma omp parallel for num_threads( threads )
912 for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
913 r.SetZero();
914 if( solveNormal )
915 {
916 if( threads>1 ) A.Multiply( x , temp , outScratch , addDCTerm ) , A.Multiply( temp , r , outScratch , addDCTerm );
917 else A.Multiply( x , temp , addDCTerm ) , A.Multiply( temp , r , addDCTerm );
918 }
919 else
920 {
921 if( threads>1 ) A.Multiply( x , r , outScratch , addDCTerm );
922 else A.Multiply( x , r , addDCTerm );
923 }
924#pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
925 for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
926 }
927 else
928 {
929#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
930 for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
931 }
932
933 T2 beta = T2( delta_new / delta_old );
934#pragma omp parallel for num_threads( threads )
935 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
936 }
937 return ii;
938 }
939
940 template<class T>
941 template<class T2>
942 int SparseSymmetricMatrix<T>::Solve( const SparseSymmetricMatrix<T>& M , const Vector<T2>& diagonal , const Vector<T2>& b , int iters , Vector<T2>& solution , int reset )
943 {
944 Vector<T2> d,r,Md;
945
946 if(reset)
947 {
948 solution.Resize(b.Dimensions());
949 solution.SetZero();
950 }
951 Md.Resize(M.rows);
952 for( int i=0 ; i<iters ; i++ )
953 {
954 M.Multiply( solution , Md );
955 r = b-Md;
956 // solution_new[j] * diagonal[j] + ( Md[j] - solution_old[j] * diagonal[j] ) = b[j]
957 // solution_new[j] = ( b[j] - ( Md[j] - solution_old[j] * diagonal[j] ) ) / diagonal[j]
958 // solution_new[j] = ( b[j] - Md[j] ) / diagonal[j] + solution_old[j]
959 for( int j=0 ; j<int(M.rows) ; j++ ) solution[j] += (b[j]-Md[j])/diagonal[j];
960 }
961 return iters;
962 }
963 template< class T >
964 template< class T2 >
966 {
967 diagonal.Resize( SparseMatrix<T>::rows );
968 for( int i=0 ; i<SparseMatrix<T>::rows ; i++ )
969 {
970 diagonal[i] = 0.;
971 for( int j=0 ; j<SparseMatrix<T>::rowSizes[i] ; j++ ) if( SparseMatrix<T>::m_ppElements[i][j].N==i ) diagonal[i] += SparseMatrix<T>::m_ppElements[i][j].Value * 2;
972 }
973 }
974
975 }
976}
An exception that is thrown when the arguments number or type is wrong/unhandled.
MatrixEntry< T > ** m_ppElements
SparseMatrix< T > Multiply(const SparseMatrix< T > &M) const
SparseMatrix< T > Transpose() const
static void SetAllocator(int blockSize)
SparseMatrix< T > & operator*=(const T &V)
SparseMatrix< T > & operator=(const SparseMatrix< T > &M)
bool write(FILE *fp) const
static int SolveSymmetric(const SparseMatrix< T > &M, const Vector< T2 > &b, int iters, Vector< T2 > &solution, const T2 eps=1e-8, int reset=1, int threads=1)
void SetRowSize(int row, int count)
static int Solve(const SparseMatrix< T > &M, const Vector< T > &b, int iters, Vector< T > &solution, const T eps=1e-8)
SparseMatrix< T > operator*(const T &V) const
static int UseAllocator(void)
static Allocator< MatrixEntry< T > > internalAllocator
static int Solve(const SparseSymmetricMatrix< T > &M, const Vector< T2 > &b, int iters, Vector< T2 > &solution, T2 eps=1e-8, int reset=1, int threads=0, bool addDCTerm=false, bool solveNormal=false)
void getDiagonal(Vector< T2 > &diagonal) const
Vector< T2 > operator*(const Vector< T2 > &V) const
Vector< T2 > Multiply(const Vector< T2 > &V) const
static int SolveAtomic(const SparseSymmetricMatrix< T > &M, const Vector< T2 > &b, int iters, Vector< T2 > &solution, T2 eps=1e-8, int reset=1, int threads=0, bool solveNormal=false)
std::size_t Dimensions() const
Definition vector.hpp:91
void Resize(std::size_t N)
Definition vector.hpp:63
T Dot(const Vector &V) const
Definition vector.hpp:239
PCL_EXPORTS void Multiply(const double in1[2], const double in2[2], double out[2])
void MultiplyAtomic(const SparseSymmetricMatrix< T > &A, const Vector< float > &In, Vector< float > &Out, int threads, const int *partition=NULL)
void AtomicIncrement(volatile float *ptr, float addend)
void read(std::istream &stream, Type &value)
Function for reading data from a stream.
Definition region_xy.h:46
void write(std::ostream &stream, Type value)
Function for writing data to a stream.
Definition region_xy.h:63
void resize(int threads, int dim)
T Value
int N