Point Cloud Library (PCL) 1.14.0
Loading...
Searching...
No Matches
sparse_matrix.hpp
1/*
2Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without modification,
6are permitted provided that the following conditions are met:
7
8Redistributions of source code must retain the above copyright notice, this list of
9conditions and the following disclaimer. Redistributions in binary form must reproduce
10the above copyright notice, this list of conditions and the following disclaimer
11in the documentation and/or other materials provided with the distribution.
12
13Neither the name of the Johns Hopkins University nor the names of its contributors
14may be used to endorse or promote products derived from this software without specific
15prior written permission.
16
17THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
18EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES
19OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
20SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
23BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
25ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26DAMAGE.
27*/
28
29#ifdef _WIN32
30# ifndef WIN32_LEAN_AND_MEAN
31# define WIN32_LEAN_AND_MEAN
32# endif // WIN32_LEAN_AND_MEAN
33# ifndef NOMINMAX
34# define NOMINMAX
35# endif // NOMINMAX
36# include <windows.h>
37#endif //_WIN32
38
39///////////////////
40// SparseMatrix //
41///////////////////
42///////////////////////////////////////////
43// Static Allocator Methods and Memebers //
44///////////////////////////////////////////
45
46namespace pcl
47{
48 namespace poisson
49 {
50
51
52 template<class T> int SparseMatrix<T>::UseAlloc=0;
53 template<class T> Allocator<MatrixEntry<T> > SparseMatrix<T>::internalAllocator;
54 template<class T> int SparseMatrix<T>::UseAllocator(void){return UseAlloc;}
55 template<class T>
56 void SparseMatrix<T>::SetAllocator( int blockSize )
57 {
58 if(blockSize>0){
59 UseAlloc=1;
60 internalAllocator.set(blockSize);
61 }
62 else{UseAlloc=0;}
63 }
64 ///////////////////////////////////////
65 // SparseMatrix Methods and Memebers //
66 ///////////////////////////////////////
67
68 template< class T >
70 {
71 _contiguous = false;
72 _maxEntriesPerRow = 0;
73 rows = 0;
74 rowSizes = NULL;
75 m_ppElements = NULL;
76 }
77
78 template< class T > SparseMatrix< T >::SparseMatrix( int rows ) : SparseMatrix< T >() { Resize( rows ); }
79 template< class T > SparseMatrix< T >::SparseMatrix( int rows , int maxEntriesPerRow ) : SparseMatrix< T >() { Resize( rows , maxEntriesPerRow ); }
80
81 template< class T >
83 {
84 if( M._contiguous ) Resize( M.rows , M._maxEntriesPerRow );
85 else Resize( M.rows );
86 for( int i=0 ; i<rows ; i++ )
87 {
88 SetRowSize( i , M.rowSizes[i] );
89 memcpy( (*this)[i] , M[i] , sizeof( MatrixEntry< T > ) * rowSizes[i] );
90 }
91 }
92 template<class T>
93 int SparseMatrix<T>::Entries( void ) const
94 {
95 int e = 0;
96 for( int i=0 ; i<rows ; i++ ) e += int( rowSizes[i] );
97 return e;
98 }
99 template<class T>
101 {
102 if( M._contiguous ) Resize( M.rows , M._maxEntriesPerRow );
103 else Resize( M.rows );
104 for( int i=0 ; i<rows ; i++ )
105 {
106 SetRowSize( i , M.rowSizes[i] );
107 memcpy( (*this)[i] , M[i] , sizeof( MatrixEntry< T > ) * rowSizes[i] );
108 }
109 return *this;
110 }
111
112 template<class T>
113 SparseMatrix<T>::~SparseMatrix( void ){ Resize( 0 ); }
114
115 template< class T >
116 bool SparseMatrix< T >::write( const char* fileName ) const
117 {
118 FILE* fp = fopen( fileName , "wb" );
119 if( !fp ) return false;
120 bool ret = write( fp );
121 fclose( fp );
122 return ret;
123 }
124 template< class T >
125 bool SparseMatrix< T >::read( const char* fileName )
126 {
127 FILE* fp = fopen( fileName , "rb" );
128 if( !fp ) return false;
129 bool ret = read( fp );
130 fclose( fp );
131 return ret;
132 }
133 template< class T >
134 bool SparseMatrix< T >::write( FILE* fp ) const
135 {
136 if( fwrite( &rows , sizeof( int ) , 1 , fp )!=1 ) return false;
137 if( fwrite( rowSizes , sizeof( int ) , rows , fp )!=rows ) return false;
138 for( int i=0 ; i<rows ; i++ ) if( fwrite( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] ) return false;
139 return true;
140 }
141 template< class T >
142 bool SparseMatrix< T >::read( FILE* fp )
143 {
144 int r;
145 if( fread( &r , sizeof( int ) , 1 , fp )!=1 ) return false;
146 Resize( r );
147 if( fread( rowSizes , sizeof( int ) , rows , fp )!=rows ) return false;
148 for( int i=0 ; i<rows ; i++ )
149 {
150 r = rowSizes[i];
151 rowSizes[i] = 0;
152 SetRowSize( i , r );
153 if( fread( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] ) return false;
154 }
155 return true;
156 }
157
158
159 template< class T >
161 {
162 if( rows>0 )
163 {
164
165 if( !UseAlloc )
166 if( _contiguous ){ if( _maxEntriesPerRow ) free( m_ppElements[0] ); }
167 else for( int i=0 ; i<rows ; i++ ){ if( rowSizes[i] ) free( m_ppElements[i] ); }
168 free( m_ppElements );
169 free( rowSizes );
170 }
171 rows = r;
172 if( r )
173 {
174 rowSizes = ( int* )malloc( sizeof( int ) * r );
175 memset( rowSizes , 0 , sizeof( int ) * r );
176 m_ppElements = ( MatrixEntry< T >** )malloc( sizeof( MatrixEntry< T >* ) * r );
177 }
178 _contiguous = false;
179 _maxEntriesPerRow = 0;
180 }
181 template< class T >
182 void SparseMatrix< T >::Resize( int r , int e )
183 {
184 if( rows>0 )
185 {
186 if( !UseAlloc )
187 if( _contiguous ){ if( _maxEntriesPerRow ) free( m_ppElements[0] ); }
188 else for( int i=0 ; i<rows ; i++ ){ if( rowSizes[i] ) free( m_ppElements[i] ); }
189 free( m_ppElements );
190 free( rowSizes );
191 }
192 rows = r;
193 if( r )
194 {
195 rowSizes = ( int* )malloc( sizeof( int ) * r );
196 memset( rowSizes , 0 , sizeof( int ) * r );
197 m_ppElements = ( MatrixEntry< T >** )malloc( sizeof( MatrixEntry< T >* ) * r );
198 m_ppElements[0] = ( MatrixEntry< T >* )malloc( sizeof( MatrixEntry< T > ) * r * e );
199 for( int i=1 ; i<r ; i++ ) m_ppElements[i] = m_ppElements[i-1] + e;
200 }
201 _contiguous = true;
202 _maxEntriesPerRow = e;
203 }
204
205 template<class T>
206 void SparseMatrix< T >::SetRowSize( int row , int count )
207 {
208 if( _contiguous )
209 {
210 if (count > _maxEntriesPerRow)
211 {
212 POISSON_THROW_EXCEPTION (pcl::poisson::PoissonBadArgumentException, "Attempted to set row size on contiguous matrix larger than max row size: (requested)"<< count << " > (maximum)" << _maxEntriesPerRow );
213 }
214 rowSizes[row] = count;
215 }
216 else if( row>=0 && row<rows )
217 {
218 if( UseAlloc ) m_ppElements[row] = internalAllocator.newElements(count);
219 else
220 {
221 if( rowSizes[row] ) free( m_ppElements[row] );
222 if( count>0 ) m_ppElements[row] = ( MatrixEntry< T >* )malloc( sizeof( MatrixEntry< T > ) * count );
223 }
224 }
225 }
226
227
228 template<class T>
230 {
231 Resize(this->m_N, this->m_M);
232 }
233
234 template<class T>
236 {
237 SetZero();
238 for(int ij=0; ij < Min( this->Rows(), this->Columns() ); ij++)
239 (*this)(ij,ij) = T(1);
240 }
241
242 template<class T>
244 {
245 SparseMatrix<T> M(*this);
246 M *= V;
247 return M;
248 }
249
250 template<class T>
252 {
253 for (int i=0; i<rows; i++)
254 {
255 for(int ii=0;ii<rowSizes[i];ii++){m_ppElements[i][ii].Value*=V;}
256 }
257 return *this;
258 }
259
260 template<class T>
262 {
263 SparseMatrix<T> R( rows, M._maxEntriesPerRow );
264 for(int i=0; i<R.rows; i++){
265 for(int ii=0;ii<rowSizes[i];ii++){
266 int N=m_ppElements[i][ii].N;
267 T Value=m_ppElements[i][ii].Value;
268 for(int jj=0;jj<M.rowSizes[N];jj++){
269 R(i,M.m_ppElements[N][jj].N) += Value * M.m_ppElements[N][jj].Value;
270 }
271 }
272 }
273 return R;
274 }
275
276 template<class T>
277 template<class T2>
279 {
280 Vector<T2> R( rows );
281
282 for (int i=0; i<rows; i++)
283 {
284 T2 temp=T2();
285 for(int ii=0;ii<rowSizes[i];ii++){
286 temp+=m_ppElements[i][ii].Value * V.m_pV[m_ppElements[i][ii].N];
287 }
288 R(i)=temp;
289 }
290 return R;
291 }
292
293 template<class T>
294 template<class T2>
295 void SparseMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , int threads ) const
296 {
297#pragma omp parallel for num_threads( threads ) schedule( static )
298 for( int i=0 ; i<rows ; i++ )
299 {
300 T2 temp = T2();
301 temp *= 0;
302 for( int j=0 ; j<rowSizes[i] ; j++ ) temp += m_ppElements[i][j].Value * In.m_pV[m_ppElements[i][j].N];
303 Out.m_pV[i] = temp;
304 }
305 }
306
307 template<class T>
309 {
310 return Multiply(M);
311 }
312 template<class T>
313 template<class T2>
315 {
316 return Multiply(V);
317 }
318
319 template<class T>
321 {
322 SparseMatrix<T> M( _maxEntriesPerRow, rows );
323
324 for (int i=0; i<rows; i++)
325 {
326 for(int ii=0;ii<rowSizes[i];ii++){
327 M(m_ppElements[i][ii].N,i) = m_ppElements[i][ii].Value;
328 }
329 }
330 return M;
331 }
332
333 template<class T>
334 template<class T2>
335 int SparseMatrix<T>::SolveSymmetric( const SparseMatrix<T>& M , const Vector<T2>& b , int iters , Vector<T2>& solution , const T2 eps , int reset , int threads )
336 {
337 if( reset )
338 {
339 solution.Resize( b.Dimensions() );
340 solution.SetZero();
341 }
342 Vector< T2 > r;
343 r.Resize( solution.Dimensions() );
344 M.Multiply( solution , r );
345 r = b - r;
346 Vector< T2 > d = r;
347 double delta_new , delta_0;
348 for( int i=0 ; i<r.Dimensions() ; i++ ) delta_new += r.m_pV[i] * r.m_pV[i];
349 delta_0 = delta_new;
350 if( delta_new<eps ) return 0;
351 int ii;
352 Vector< T2 > q;
353 q.Resize( d.Dimensions() );
354 for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ )
355 {
356 M.Multiply( d , q , threads );
357 double dDotQ = 0 , alpha = 0;
358 for( int i=0 ; i<d.Dimensions() ; i++ ) dDotQ += d.m_pV[i] * q.m_pV[i];
359 alpha = delta_new / dDotQ;
360#pragma omp parallel for num_threads( threads ) schedule( static )
361 for( int i=0 ; i<r.Dimensions() ; i++ ) solution.m_pV[i] += d.m_pV[i] * T2( alpha );
362 if( !(ii%50) )
363 {
364 r.Resize( solution.Dimensions() );
365 M.Multiply( solution , r , threads );
366 r = b - r;
367 }
368 else
369#pragma omp parallel for num_threads( threads ) schedule( static )
370 for( int i=0 ; i<r.Dimensions() ; i++ ) r.m_pV[i] = r.m_pV[i] - q.m_pV[i] * T2(alpha);
371
372 double delta_old = delta_new , beta;
373 delta_new = 0;
374 for( int i=0 ; i<r.Dimensions() ; i++ ) delta_new += r.m_pV[i]*r.m_pV[i];
375 beta = delta_new / delta_old;
376#pragma omp parallel for num_threads( threads ) schedule( static )
377 for( int i=0 ; i<d.Dimensions() ; i++ ) d.m_pV[i] = r.m_pV[i] + d.m_pV[i] * T2( beta );
378 }
379 return ii;
380 }
381
382 // Solve for x s.t. M(x)=b by solving for x s.t. M^tM(x)=M^t(b)
383 template<class T>
384 int SparseMatrix<T>::Solve(const SparseMatrix<T>& M,const Vector<T>& b,int iters,Vector<T>& solution,const T eps){
385 SparseMatrix mTranspose=M.Transpose();
386 Vector<T> bb=mTranspose*b;
387 Vector<T> d,r,Md;
388 T alpha,beta,rDotR;
389 int i;
390
391 solution.Resize(M.Columns());
392 solution.SetZero();
393
394 d=r=bb;
395 rDotR=r.Dot(r);
396 for(i=0;i<iters && rDotR>eps;i++){
397 T temp;
398 Md=mTranspose*(M*d);
399 alpha=rDotR/d.Dot(Md);
400 solution+=d*alpha;
401 r-=Md*alpha;
402 temp=r.Dot(r);
403 beta=temp/rDotR;
404 rDotR=temp;
405 d=r+d*beta;
406 }
407 return i;
408 }
409
410
411
412
413 ///////////////////////////
414 // SparseSymmetricMatrix //
415 ///////////////////////////
416 template<class T>
417 template<class T2>
419 template<class T>
420 template<class T2>
422 {
424
425 for(int i=0; i<SparseMatrix<T>::rows; i++){
426 for(int ii=0;ii<SparseMatrix<T>::rowSizes[i];ii++){
427 int j=SparseMatrix<T>::m_ppElements[i][ii].N;
428 R(i)+=SparseMatrix<T>::m_ppElements[i][ii].Value * V.m_pV[j];
429 R(j)+=SparseMatrix<T>::m_ppElements[i][ii].Value * V.m_pV[i];
430 }
431 }
432 return R;
433 }
434
435 template<class T>
436 template<class T2>
437 void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , bool addDCTerm ) const
438 {
439 Out.SetZero();
440 const T2* in = &In[0];
441 T2* out = &Out[0];
442 T2 dcTerm = T2( 0 );
443 if( addDCTerm )
444 {
445 for( int i=0 ; i<SparseMatrix<T>::rows ; i++ ) dcTerm += in[i];
446 dcTerm /= SparseMatrix<T>::rows;
447 }
448 for( int i=0 ; i<this->SparseMatrix<T>::rows ; i++ )
449 {
451 const MatrixEntry<T>* end = temp + SparseMatrix<T>::rowSizes[i];
452 const T2& in_i_ = in[i];
453 T2 out_i = T2(0);
454 for( ; temp!=end ; temp++ )
455 {
456 int j=temp->N;
457 T2 v=temp->Value;
458 out_i += v * in[j];
459 out[j] += v * in_i_;
460 }
461 out[i] += out_i;
462 }
463 if( addDCTerm ) for( int i=0 ; i<SparseMatrix<T>::rows ; i++ ) out[i] += dcTerm;
464 }
465 template<class T>
466 template<class T2>
467 void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , MapReduceVector< T2 >& OutScratch , bool addDCTerm ) const
468 {
469 int dim = int( In.Dimensions() );
470 const T2* in = &In[0];
471 int threads = OutScratch.threads();
472 if( addDCTerm )
473 {
474 T2 dcTerm = 0;
475#pragma omp parallel for num_threads( threads ) reduction ( + : dcTerm )
476 for( int t=0 ; t<threads ; t++ )
477 {
478 T2* out = OutScratch[t];
479 memset( out , 0 , sizeof( T2 ) * dim );
480 for( int i=(SparseMatrix<T>::rows*t)/threads ; i<(SparseMatrix<T>::rows*(t+1))/threads ; i++ )
481 {
482 const T2& in_i_ = in[i];
483 T2& out_i_ = out[i];
484 for( const MatrixEntry< T > *temp = SparseMatrix<T>::m_ppElements[i] , *end = temp+SparseMatrix<T>::rowSizes[i] ; temp!=end ; temp++ )
485 {
486 int j = temp->N;
487 T2 v = temp->Value;
488 out_i_ += v * in[j];
489 out[j] += v * in_i_;
490 }
491 dcTerm += in_i_;
492 }
493 }
494 dcTerm /= dim;
495 dim = int( Out.Dimensions() );
496 T2* out = &Out[0];
497#pragma omp parallel for num_threads( threads ) schedule( static )
498 for( int i=0 ; i<dim ; i++ )
499 {
500 T2 _out = dcTerm;
501 for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
502 out[i] = _out;
503 }
504 }
505 else
506 {
507#pragma omp parallel for num_threads( threads )
508 for( int t=0 ; t<threads ; t++ )
509 {
510 T2* out = OutScratch[t];
511 memset( out , 0 , sizeof( T2 ) * dim );
512 for( int i=(SparseMatrix<T>::rows*t)/threads ; i<(SparseMatrix<T>::rows*(t+1))/threads ; i++ )
513 {
514 const T2& in_i_ = in[i];
515 T2& out_i_ = out[i];
516 for( const MatrixEntry< T > *temp = SparseMatrix<T>::m_ppElements[i] , *end = temp+SparseMatrix<T>::rowSizes[i] ; temp!=end ; temp++ )
517 {
518 int j = temp->N;
519 T2 v = temp->Value;
520 out_i_ += v * in[j];
521 out[j] += v * in_i_;
522 }
523 }
524 }
525 dim = int( Out.Dimensions() );
526 T2* out = &Out[0];
527#pragma omp parallel for num_threads( threads ) schedule( static )
528 for( int i=0 ; i<dim ; i++ )
529 {
530 T2 _out = T2(0);
531 for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
532 out[i] = _out;
533 }
534 }
535 }
536 template<class T>
537 template<class T2>
538 void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , std::vector< T2* >& OutScratch , const std::vector< int >& bounds ) const
539 {
540 int dim = In.Dimensions();
541 const T2* in = &In[0];
542 int threads = OutScratch.size();
543#pragma omp parallel for num_threads( threads )
544 for( int t=0 ; t<threads ; t++ )
545 for( int i=0 ; i<dim ; i++ ) OutScratch[t][i] = T2(0);
546#pragma omp parallel for num_threads( threads )
547 for( int t=0 ; t<threads ; t++ )
548 {
549 T2* out = OutScratch[t];
550 for( int i=bounds[t] ; i<bounds[t+1] ; i++ )
551 {
553 const MatrixEntry<T>* end = temp + SparseMatrix<T>::rowSizes[i];
554 const T2& in_i_ = in[i];
555 T2& out_i_ = out[i];
556 for( ; temp!=end ; temp++ )
557 {
558 int j = temp->N;
559 T2 v = temp->Value;
560 out_i_ += v * in[j];
561 out[j] += v * in_i_;
562 }
563 }
564 }
565 T2* out = &Out[0];
566#pragma omp parallel for num_threads( threads ) schedule( static )
567 for( int i=0 ; i<Out.Dimensions() ; i++ )
568 {
569 T2& _out = out[i];
570 _out = T2(0);
571 for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
572 }
573 }
574#if defined _WIN32 && !defined __MINGW32__
575#ifndef _AtomicIncrement_
576#define _AtomicIncrement_
577 inline void AtomicIncrement( volatile float* ptr , float addend )
578 {
579 float newValue = *ptr;
580 LONG& _newValue = *( (LONG*)&newValue );
581 LONG _oldValue;
582 for( ;; )
583 {
584 _oldValue = _newValue;
585 newValue += addend;
586 _newValue = InterlockedCompareExchange( (LONG*) ptr , _newValue , _oldValue );
587 if( _newValue==_oldValue ) break;
588 }
589 }
590 inline void AtomicIncrement( volatile double* ptr , double addend )
591 //inline void AtomicIncrement( double* ptr , double addend )
592 {
593 double newValue = *ptr;
594 LONGLONG& _newValue = *( (LONGLONG*)&newValue );
595 LONGLONG _oldValue;
596 do
597 {
598 _oldValue = _newValue;
599 newValue += addend;
600 _newValue = InterlockedCompareExchange64( (LONGLONG*) ptr , _newValue , _oldValue );
601 }
602 while( _newValue!=_oldValue );
603 }
604#endif // _AtomicIncrement_
605 template< class T >
606 void MultiplyAtomic( const SparseSymmetricMatrix< T >& A , const Vector< float >& In , Vector< float >& Out , int threads , const int* partition=NULL )
607 {
608 Out.SetZero();
609 const float* in = &In[0];
610 float* out = &Out[0];
611 if( partition )
612#pragma omp parallel for num_threads( threads )
613 for( int t=0 ; t<threads ; t++ )
614 for( int i=partition[t] ; i<partition[t+1] ; i++ )
615 {
616 const MatrixEntry< T >* temp = A[i];
617 const MatrixEntry< T >* end = temp + A.rowSizes[i];
618 const float& in_i = in[i];
619 float out_i = 0.;
620 for( ; temp!=end ; temp++ )
621 {
622 int j = temp->N;
623 float v = temp->Value;
624 out_i += v * in[j];
625 AtomicIncrement( out+j , v * in_i );
626 }
627 AtomicIncrement( out+i , out_i );
628 }
629 else
630#pragma omp parallel for num_threads( threads )
631 for( int i=0 ; i<A.rows ; i++ )
632 {
633 const MatrixEntry< T >* temp = A[i];
634 const MatrixEntry< T >* end = temp + A.rowSizes[i];
635 const float& in_i = in[i];
636 float out_i = 0.f;
637 for( ; temp!=end ; temp++ )
638 {
639 int j = temp->N;
640 float v = temp->Value;
641 out_i += v * in[j];
642 AtomicIncrement( out+j , v * in_i );
643 }
644 AtomicIncrement( out+i , out_i );
645 }
646 }
647 template< class T >
648 void MultiplyAtomic( const SparseSymmetricMatrix< T >& A , const Vector< double >& In , Vector< double >& Out , int threads , const int* partition=NULL )
649 {
650 Out.SetZero();
651 const double* in = &In[0];
652 double* out = &Out[0];
653
654 if( partition )
655#pragma omp parallel for num_threads( threads )
656 for( int t=0 ; t<threads ; t++ )
657 for( int i=partition[t] ; i<partition[t+1] ; i++ )
658 {
659 const MatrixEntry< T >* temp = A[i];
660 const MatrixEntry< T >* end = temp + A.rowSizes[i];
661 const double& in_i = in[i];
662 double out_i = 0.;
663 for( ; temp!=end ; temp++ )
664 {
665 int j = temp->N;
666 T v = temp->Value;
667 out_i += v * in[j];
668 AtomicIncrement( out+j , v * in_i );
669 }
670 AtomicIncrement( out+i , out_i );
671 }
672 else
673#pragma omp parallel for num_threads( threads )
674 for( int i=0 ; i<A.rows ; i++ )
675 {
676 const MatrixEntry< T >* temp = A[i];
677 const MatrixEntry< T >* end = temp + A.rowSizes[i];
678 const double& in_i = in[i];
679 double out_i = 0.;
680 for( ; temp!=end ; temp++ )
681 {
682 int j = temp->N;
683 T v = temp->Value;
684 out_i += v * in[j];
685 AtomicIncrement( out+j , v * in_i );
686 }
687 AtomicIncrement( out+i , out_i );
688 }
689 }
690
691 template< class T >
692 template< class T2 >
693 int SparseSymmetricMatrix< T >::SolveAtomic( const SparseSymmetricMatrix< T >& A , const Vector< T2 >& b , int iters , Vector< T2 >& x , T2 eps , int reset , int threads , bool solveNormal )
694 {
695 eps *= eps;
696 int dim = b.Dimensions();
697 if( reset )
698 {
699 x.Resize( dim );
700 x.SetZero();
701 }
702 Vector< T2 > r( dim ) , d( dim ) , q( dim );
703 Vector< T2 > temp;
704 if( solveNormal ) temp.Resize( dim );
705 T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
706 const T2* _b = &b[0];
707
708 std::vector< int > partition( threads+1 );
709 {
710 int eCount = 0;
711 for( int i=0 ; i<A.rows ; i++ ) eCount += A.rowSizes[i];
712 partition[0] = 0;
713#pragma omp parallel for num_threads( threads )
714 for( int t=0 ; t<threads ; t++ )
715 {
716 int _eCount = 0;
717 for( int i=0 ; i<A.rows ; i++ )
718 {
719 _eCount += A.rowSizes[i];
720 if( _eCount*threads>=eCount*(t+1) )
721 {
722 partition[t+1] = i;
723 break;
724 }
725 }
726 }
727 partition[threads] = A.rows;
728 }
729 if( solveNormal )
730 {
731 MultiplyAtomic( A , x , temp , threads , &partition[0] );
732 MultiplyAtomic( A , temp , r , threads , &partition[0] );
733 MultiplyAtomic( A , b , temp , threads , &partition[0] );
734#pragma omp parallel for num_threads( threads ) schedule( static )
735 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i];
736 }
737 else
738 {
739 MultiplyAtomic( A , x , r , threads , &partition[0] );
740#pragma omp parallel for num_threads( threads ) schedule( static )
741 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i];
742 }
743 double delta_new = 0 , delta_0;
744 for( std::size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i];
745 delta_0 = delta_new;
746 if( delta_new<eps )
747 {
748 fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
749 return 0;
750 }
751 int ii;
752 for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ )
753 {
754 if( solveNormal ) MultiplyAtomic( A , d , temp , threads , &partition[0] ) , MultiplyAtomic( A , temp , q , threads , &partition[0] );
755 else MultiplyAtomic( A , d , q , threads , &partition[0] );
756 double dDotQ = 0;
757 for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
758 T2 alpha = T2( delta_new / dDotQ );
759#pragma omp parallel for num_threads( threads ) schedule( static )
760 for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
761 if( (ii%50)==(50-1) )
762 {
763 r.Resize( dim );
764 if( solveNormal ) MultiplyAtomic( A , x , temp , threads , &partition[0] ) , MultiplyAtomic( A , temp , r , threads , &partition[0] );
765 else MultiplyAtomic( A , x , r , threads , &partition[0] );
766#pragma omp parallel for num_threads( threads ) schedule( static )
767 for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i];
768 }
769 else
770#pragma omp parallel for num_threads( threads ) schedule( static )
771 for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha;
772
773 double delta_old = delta_new;
774 delta_new = 0;
775 for( std::size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i];
776 T2 beta = T2( delta_new / delta_old );
777#pragma omp parallel for num_threads( threads ) schedule( static )
778 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
779 }
780 return ii;
781 }
782#endif // _WIN32 && !__MINGW32__
783 template< class T >
784 template< class T2 >
785 int SparseSymmetricMatrix< T >::Solve( const SparseSymmetricMatrix<T>& A , const Vector<T2>& b , int iters , Vector<T2>& x , MapReduceVector< T2 >& scratch , T2 eps , int reset , bool addDCTerm , bool solveNormal )
786 {
787 int threads = scratch.threads();
788 eps *= eps;
789 int dim = int( b.Dimensions() );
790 Vector< T2 > r( dim ) , d( dim ) , q( dim ) , temp;
791 if( reset ) x.Resize( dim );
792 if( solveNormal ) temp.Resize( dim );
793 T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
794 const T2* _b = &b[0];
795
796 double delta_new = 0 , delta_0;
797 if( solveNormal )
798 {
799 A.Multiply( x , temp , scratch , addDCTerm ) , A.Multiply( temp , r , scratch , addDCTerm ) , A.Multiply( b , temp , scratch , addDCTerm );
800#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
801 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i];
802 }
803 else
804 {
805 A.Multiply( x , r , scratch , addDCTerm );
806#pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
807 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i];
808 }
809 delta_0 = delta_new;
810 if( delta_new<eps )
811 {
812 fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
813 return 0;
814 }
815 int ii;
816 for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
817 {
818 if( solveNormal ) A.Multiply( d , temp , scratch , addDCTerm ) , A.Multiply( temp , q , scratch , addDCTerm );
819 else A.Multiply( d , q , scratch , addDCTerm );
820 double dDotQ = 0;
821#pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
822 for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
823 T2 alpha = T2( delta_new / dDotQ );
824 double delta_old = delta_new;
825 delta_new = 0;
826 if( (ii%50)==(50-1) )
827 {
828#pragma omp parallel for num_threads( threads )
829 for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
830 r.Resize( dim );
831 if( solveNormal ) A.Multiply( x , temp , scratch , addDCTerm ) , A.Multiply( temp , r , scratch , addDCTerm );
832 else A.Multiply( x , r , scratch , addDCTerm );
833#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
834 for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
835 }
836 else
837#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
838 for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
839
840 T2 beta = T2( delta_new / delta_old );
841#pragma omp parallel for num_threads( threads )
842 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
843 }
844 return ii;
845 }
846 template< class T >
847 template< class T2 >
848 int SparseSymmetricMatrix<T>::Solve( const SparseSymmetricMatrix<T>& A , const Vector<T2>& b , int iters , Vector<T2>& x , T2 eps , int reset , int threads , bool addDCTerm , bool solveNormal )
849 {
850 eps *= eps;
851 int dim = int( b.Dimensions() );
852 MapReduceVector< T2 > outScratch;
853 if( threads<1 ) threads = 1;
854 if( threads>1 ) outScratch.resize( threads , dim );
855 if( reset ) x.Resize( dim );
856 Vector< T2 > r( dim ) , d( dim ) , q( dim );
857 Vector< T2 > temp;
858 if( solveNormal ) temp.Resize( dim );
859 T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
860 const T2* _b = &b[0];
861
862 double delta_new = 0 , delta_0;
863
864 if( solveNormal )
865 {
866 if( threads>1 ) A.Multiply( x , temp , outScratch , addDCTerm ) , A.Multiply( temp , r , outScratch , addDCTerm ) , A.Multiply( b , temp , outScratch , addDCTerm );
867 else A.Multiply( x , temp , addDCTerm ) , A.Multiply( temp , r , addDCTerm ) , A.Multiply( b , temp , addDCTerm );
868#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
869 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i];
870 }
871 else
872 {
873 if( threads>1 ) A.Multiply( x , r , outScratch , addDCTerm );
874 else A.Multiply( x , r , addDCTerm );
875#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
876 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i];
877 }
878
879 delta_0 = delta_new;
880 if( delta_new<eps )
881 {
882 fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
883 return 0;
884 }
885 int ii;
886 for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
887 {
888 if( solveNormal )
889 {
890 if( threads>1 ) A.Multiply( d , temp , outScratch , addDCTerm ) , A.Multiply( temp , q , outScratch , addDCTerm );
891 else A.Multiply( d , temp , addDCTerm ) , A.Multiply( temp , q , addDCTerm );
892 }
893 else
894 {
895 if( threads>1 ) A.Multiply( d , q , outScratch , addDCTerm );
896 else A.Multiply( d , q , addDCTerm );
897 }
898 double dDotQ = 0;
899#pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
900 for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
901 T2 alpha = T2( delta_new / dDotQ );
902 double delta_old = delta_new;
903 delta_new = 0;
904
905 if( (ii%50)==(50-1) )
906 {
907#pragma omp parallel for num_threads( threads )
908 for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
909 r.SetZero();
910 if( solveNormal )
911 {
912 if( threads>1 ) A.Multiply( x , temp , outScratch , addDCTerm ) , A.Multiply( temp , r , outScratch , addDCTerm );
913 else A.Multiply( x , temp , addDCTerm ) , A.Multiply( temp , r , addDCTerm );
914 }
915 else
916 {
917 if( threads>1 ) A.Multiply( x , r , outScratch , addDCTerm );
918 else A.Multiply( x , r , addDCTerm );
919 }
920#pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
921 for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
922 }
923 else
924 {
925#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
926 for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
927 }
928
929 T2 beta = T2( delta_new / delta_old );
930#pragma omp parallel for num_threads( threads )
931 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
932 }
933 return ii;
934 }
935
936 template<class T>
937 template<class T2>
938 int SparseSymmetricMatrix<T>::Solve( const SparseSymmetricMatrix<T>& M , const Vector<T2>& diagonal , const Vector<T2>& b , int iters , Vector<T2>& solution , int reset )
939 {
940 Vector<T2> d,r,Md;
941
942 if(reset)
943 {
944 solution.Resize(b.Dimensions());
945 solution.SetZero();
946 }
947 Md.Resize(M.rows);
948 for( int i=0 ; i<iters ; i++ )
949 {
950 M.Multiply( solution , Md );
951 r = b-Md;
952 // solution_new[j] * diagonal[j] + ( Md[j] - solution_old[j] * diagonal[j] ) = b[j]
953 // solution_new[j] = ( b[j] - ( Md[j] - solution_old[j] * diagonal[j] ) ) / diagonal[j]
954 // solution_new[j] = ( b[j] - Md[j] ) / diagonal[j] + solution_old[j]
955 for( int j=0 ; j<int(M.rows) ; j++ ) solution[j] += (b[j]-Md[j])/diagonal[j];
956 }
957 return iters;
958 }
959 template< class T >
960 template< class T2 >
962 {
963 diagonal.Resize( SparseMatrix<T>::rows );
964 for( int i=0 ; i<SparseMatrix<T>::rows ; i++ )
965 {
966 diagonal[i] = 0.;
967 for( int j=0 ; j<SparseMatrix<T>::rowSizes[i] ; j++ ) if( SparseMatrix<T>::m_ppElements[i][j].N==i ) diagonal[i] += SparseMatrix<T>::m_ppElements[i][j].Value * 2;
968 }
969 }
970
971 }
972}
An exception that is thrown when the arguments number or type is wrong/unhandled.
MatrixEntry< T > ** m_ppElements
SparseMatrix< T > Multiply(const SparseMatrix< T > &M) const
SparseMatrix< T > Transpose() const
static void SetAllocator(int blockSize)
SparseMatrix< T > & operator*=(const T &V)
SparseMatrix< T > & operator=(const SparseMatrix< T > &M)
bool write(FILE *fp) const
static int SolveSymmetric(const SparseMatrix< T > &M, const Vector< T2 > &b, int iters, Vector< T2 > &solution, const T2 eps=1e-8, int reset=1, int threads=1)
void SetRowSize(int row, int count)
static int Solve(const SparseMatrix< T > &M, const Vector< T > &b, int iters, Vector< T > &solution, const T eps=1e-8)
SparseMatrix< T > operator*(const T &V) const
static int UseAllocator(void)
static Allocator< MatrixEntry< T > > internalAllocator
static int Solve(const SparseSymmetricMatrix< T > &M, const Vector< T2 > &b, int iters, Vector< T2 > &solution, T2 eps=1e-8, int reset=1, int threads=0, bool addDCTerm=false, bool solveNormal=false)
void getDiagonal(Vector< T2 > &diagonal) const
Vector< T2 > operator*(const Vector< T2 > &V) const
Vector< T2 > Multiply(const Vector< T2 > &V) const
static int SolveAtomic(const SparseSymmetricMatrix< T > &M, const Vector< T2 > &b, int iters, Vector< T2 > &solution, T2 eps=1e-8, int reset=1, int threads=0, bool solveNormal=false)
std::size_t Dimensions() const
Definition vector.hpp:91
void Resize(std::size_t N)
Definition vector.hpp:63
T Dot(const Vector &V) const
Definition vector.hpp:239
PCL_EXPORTS void Multiply(const double in1[2], const double in2[2], double out[2])
void MultiplyAtomic(const SparseSymmetricMatrix< T > &A, const Vector< float > &In, Vector< float > &Out, int threads, const int *partition=NULL)
void AtomicIncrement(volatile float *ptr, float addend)
void read(std::istream &stream, Type &value)
Function for reading data from a stream.
Definition region_xy.h:46
void write(std::ostream &stream, Type value)
Function for writing data to a stream.
Definition region_xy.h:63
void resize(int threads, int dim)
T Value
int N