/***************************************************************************** Copyright (c) 2005, Uwe Schmitt (http://www.procoders.net) All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of procoders.net nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ #ifndef _SVMPP_SPARSEVECTOR_H__ #define _SVMPP_SPARSEVECTOR_H__ #include #include #include #include #include #include #include class SVIterWrap; namespace pcsvm { class KernelBase; using namespace pcsvm; //////////////////////////////////////////////////////////////////////////////////////////////////////////////// // // sparse vector class // //////////////////////////////////////////////////////////////////////////////////////////////////////////////// // class SparseVector { public: //////////////////////////////////////////////////////////////////////////////////////////////////// // constructors //////////////////////////////////////////////////////////////////////////////////////////////////// SparseVector(): data_() {}; SparseVector(const SparseVector &); SparseVector(const SparseVector *); SparseVector(const std::vector &v); SparseVector(const float *v, int n); SparseVector& operator=(const SparseVector& other); std::string to_str() const; // needed by python-interface: SparseVector copy() const; //////////////////////////////////////////////////////////////////////////////////////////////////// // modification methods //////////////////////////////////////////////////////////////////////////////////////////////////// // put is fast if you keep idx increasing -> constant time ( due to push_back()) void put(int idx, float value); float get(int idx) const; // pruning void prune(float epsilon); // adds alpha*ypsilon to *this and returns reference ! SparseVector& plusAlphaYpsilon(float alpha, const SparseVector &ypsilon); //////////////////////////////////////////////////////////////////////////////////////////////////// // processing / accessing internal structure data_ //////////////////////////////////////////////////////////////////////////////////////////////////// inline int lastIndex() const { return data_.size() ? data_.rbegin()->index: -1; } inline int entries() const { return data_.size(); } inline void clear() { data_.clear(); } inline bool empty() const { return data_.empty(); } //////////////////////////////////////////////////////////////////////////////////////////////////// // operator[idx] does not deliver reference to object of type T if the accessed // element is zero and thus does not appear in the internal list representing the vector. // Instead Proy(idx, *this) is returned. Proxy() has the appropriate methods // for getting and setting. //////////////////////////////////////////////////////////////////////////////////////////////////// class Proxy { public: Proxy(int idx, SparseVector &sv): _idx(idx), _sv(sv) {} operator float() { return _sv.get(_idx) ;} float operator=(float val) { _sv.put(_idx, val); return val;} private: int _idx; SparseVector &_sv; }; inline Proxy operator[](int idx) { return Proxy(idx, *this); } float operator[](int idx) const { return get(idx); } //////////////////////////////////////////////////////////////////////////////////////////////////// // arithmetic operations //////////////////////////////////////////////////////////////////////////////////////////////////// const SparseVector& operator*=(float); // inner product inline double norm() const { return std::sqrt(double(dot(*this, *this))); } float operator*(const SparseVector &other) const { return dot(*this, other); } // unusual division operator: v1/v2 minimizes || v1 - alpha *v2 || resp. alpha double operator/(const SparseVector &other) const { return dot(*this, other)/dot(other, other); } const SparseVector operator+(const SparseVector &other) const { return axpy(1, *this, other) ;} const SparseVector& operator+=(const SparseVector &other) { return this->plusAlphaYpsilon(+1, other) ;} const SparseVector operator-(const SparseVector &other) const { return axpy(-1, other, *this) ;} const SparseVector& operator-=(const SparseVector &other) { return this->plusAlphaYpsilon(-1, other) ;} const SparseVector operator-() const { return -1* *this; } //////////////////////////////////////////////////////////////////////////////////////////////////// // conversion methods //////////////////////////////////////////////////////////////////////////////////////////////////// std::vector toVector(int startIndex=0, int endIndex=-1) const ; typedef float (*manip)(float); // basis class for map()-style access from python: struct FunctionType { FunctionType(): _f(0) { }; virtual ~FunctionType() {}; FunctionType(manip f): _f(f) {}; virtual float operator()(float x) const { return _f(x); } private: manip _f; }; SparseVector map(const FunctionType&) const; inline SparseVector map(manip f) const { return map(FunctionType(f)) ;} ///////////////////////////////////////////////////////////////////////////////////////////////// // friends ///////////////////////////////////////////////////////////////////////////////////////////////// friend SparseVector axpy(float a, const SparseVector &x, const SparseVector &y); friend float dot(const SparseVector &x, const SparseVector &y); friend double pDist(const SparseVector &x, const SparseVector &y, double p); friend double dist(const SparseVector &x, const SparseVector &y); // skalar multipliacaton for most scalar types: friend SparseVector operator*(float skal, const SparseVector &sv); friend SparseVector operator*(const SparseVector &sv,float skal); // other template instances with different base types are friends // and thus are allowed to access the internal list friend class KernelFunction; friend class KernelBase; friend class AnyIter; friend class BothIter; friend class FullIter; // for python module friend class ::SVIterWrap; friend std::ostream& operator<<(std::ostream &o, const SparseVector &); struct item { item() : index(0), value(0) {}; item(int i, float v) : index(i), value(v) {}; int index; float value ; }; private: // sparse vector : list sorted by index in decreasing order // derived class such that begin() and end() can be restricted // to certain parts of the sparse vector ! std::list data_; // len of list public: // typedef typename std::list::const_iterator iter; typedef std::list::const_iterator const_iterator; typedef std::list::iterator iterator; inline const_iterator begin() const { return data_.begin(); } inline iterator begin() { return data_.begin(); } inline const_iterator end() const { return data_.end(); } inline iterator end() { return data_.end(); } inline iterator erase(iterator which) { return data_.erase(which); } inline iterator erase(iterator from, iterator to) { return data_.erase(from, to); } }; /////////////////////////////////////////////////////////////////////////////////////////////////////// // hilfsiteratoren /////////////////////////////////////////////////////////////////////////////////////////////////////// struct IterValue { IterValue(): val1(0), val2(0), idx(0) {}; float val1; float val2; int idx; }; // ANYITER: iterates over two sparse vectors, and delivers triple (idx, val1, val2) for which // at least val1 or val2 is not zero // application: eg sum of two sparse vectors class AnyIter { public: AnyIter(const SparseVector &v1, const SparseVector &v2): _v1iter(v1.data_.begin()), _v2iter(v2.data_.begin()), _v1end(v1.data_.end()), _v2end(v2.data_.end()) { updateActual(); }; AnyIter(const SparseVector::const_iterator &v1begin, const SparseVector::const_iterator &v1end, const SparseVector::const_iterator &v2begin, const SparseVector::const_iterator &v2end): _v1iter(v1begin), _v2iter(v2begin), _v1end(v1end), _v2end(v2end) { updateActual(); }; bool isValid(); AnyIter& operator++(); const IterValue* operator->(); IterValue& operator*(); protected: void updateActual(); SparseVector::const_iterator _v1iter; SparseVector::const_iterator _v2iter; SparseVector::const_iterator _v1end; SparseVector::const_iterator _v2end; IterValue actualValue_; }; // BOTHITER: iterates over two sparse vectors and yields tuples (idx, val1, val2) for which // both values val1 and val2 are not zero // appliation, eg: dot product class BothIter// : public AnyIter { public: BothIter(const SparseVector &v1, const SparseVector &v2): _v1iter(v1.data_.begin()), _v2iter(v2.data_.begin()), _v1end(v1.data_.end()), _v2end(v2.data_.end()) { updateNext(); }; BothIter(const SparseVector::const_iterator &v1begin, const SparseVector::const_iterator &v1end, const SparseVector::const_iterator &v2begin, const SparseVector::const_iterator &v2end) { _v1iter= v1begin; _v2iter = v2begin; _v1end = v1end; _v2end = v2end; updateNext(); }; bool isValid(); BothIter& operator++(); const IterValue* operator->(); IterValue& operator*(); private: void updateNext(); SparseVector::const_iterator _v1iter; SparseVector::const_iterator _v2iter; SparseVector::const_iterator _v1end; SparseVector::const_iterator _v2end; protected: IterValue actualValue_; }; // FULLITER: iterates over pair of sparse vectors v1, v2 and delivers tuples (idx, val1, val2) for // 0 <= idx <= max( v1.lastIndex(), v2.lastIndex()) class FullIter { public: FullIter(const SparseVector &v1, const SparseVector &v2): _v1iter(v1.data_.begin()), _v2iter(v2.data_.begin()), _v1end(v1.data_.end()), _v2end(v2.data_.end()) { initFirstPair(); }; FullIter(const SparseVector::const_iterator &v1begin, const SparseVector::const_iterator &v1end, const SparseVector::const_iterator &v2begin, const SparseVector::const_iterator &v2end) : _v1iter(v1begin), _v2iter(v2begin), _v1end(v1end), _v2end(v2end) { initFirstPair(); }; bool isValid(); FullIter& operator++(); const IterValue* operator->(); IterValue& operator*(); private: void initFirstPair(); SparseVector::const_iterator _v1iter; SparseVector::const_iterator _v2iter; SparseVector::const_iterator _v1end; SparseVector::const_iterator _v2end; int _actualIndex; bool _isValid; protected: IterValue actualValue_; }; double norm(const SparseVector &sv); float dot(const SparseVector &x, const SparseVector &y); float _dot(const SparseVector::const_iterator &v1begin, const SparseVector::const_iterator &v1end, const SparseVector::const_iterator &v2begin, const SparseVector::const_iterator &v2end); ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // // enough declarations, here come definitions ! // ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////// // iterators for iterating over two sparse vectors, /////////////////////////////////////////////////////////////////////////////////////////////////////// // ANYITER: iterates over two sparse vectors, and delivers triple (idx, val1, val2) for which // at least val1 or val2 is not zero // application: eg sum of two sparse vectors // BOTHITER: iterates over two sparse vectors and yields tuples (idx, val1, val2) for which // both values val1 and val2 are not zero // appliation, eg: dot product /////////////////////////////////////////////////////////////////////////////////////////////////////// // helping functions, take iterators on sparse vectors instead of sparse vectors themselves /////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Conversion functions /////////////////////////////////////////////////////////////////////////////////////////////////////////////// } #endif