From 9ffdeb3f3cbd369efc0edfb3e9cd9acded44d6bb Mon Sep 17 00:00:00 2001 From: Syd Xu Date: Thu, 11 Nov 2021 15:35:49 +0800 Subject: [PATCH 1/9] feat(counter): add p2pnet crowd counter --- README.md | 2 + go/README.md | 2 + go/counter/cgo.go | 12 +++ go/counter/cgo_vulkan.go | 11 ++ go/counter/counter.go | 40 ++++++++ go/counter/doc.go | 2 + go/counter/p2pnet.go | 45 +++++++++ go/error.go | 6 ++ go/examples/counter/main.go | 92 +++++++++++++++++ src/CMakeLists.txt | 6 ++ src/counter/counter.cpp | 21 ++++ src/counter/counter.h | 17 ++++ src/counter/counter.hpp | 13 +++ src/counter/p2pnet/p2pnet.cpp | 185 ++++++++++++++++++++++++++++++++++ src/counter/p2pnet/p2pnet.hpp | 28 +++++ 15 files changed, 482 insertions(+) create mode 100644 go/counter/cgo.go create mode 100644 go/counter/cgo_vulkan.go create mode 100644 go/counter/counter.go create mode 100644 go/counter/doc.go create mode 100644 go/counter/p2pnet.go create mode 100644 go/examples/counter/main.go create mode 100644 src/counter/counter.cpp create mode 100644 src/counter/counter.h create mode 100644 src/counter/counter.hpp create mode 100644 src/counter/p2pnet/p2pnet.cpp create mode 100644 src/counter/p2pnet/p2pnet.hpp diff --git a/README.md b/README.md index ede2dde..21563c0 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,8 @@ cmake .. # optional -DNCNN_VULKAN=OFF -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COM - animegan2 [Google Drive](https://drive.google.com/drive/folders/1K6ZScENPHVbxupHkwl5WcpG8PPECtD8e?usp=sharing) - tracker - lighttrack [Google Drive](https://drive.google.com/drive/folders/16cxns_xzSOABHn6UcY1OXyf4MFcSSbEf?usp=sharing) +- counter + - p2pnet [Google Drive](https://drive.google.com/drive/folders/1kmtBsPIS79C3hMAwm_Tv9tAPvJLV9k35?usp=sharing) - golang binding (github.com/bububa/openvision/go) ## Reference diff --git a/go/README.md b/go/README.md index b0f6c94..b593adc 100644 --- a/go/README.md +++ b/go/README.md @@ -66,3 +66,5 @@ make -j 4 - animegan2 [Google Drive](https://drive.google.com/drive/folders/1K6ZScENPHVbxupHkwl5WcpG8PPECtD8e?usp=sharing) - tracker - lighttrack [Google Drive](https://drive.google.com/drive/folders/16cxns_xzSOABHn6UcY1OXyf4MFcSSbEf?usp=sharing) +- counter + - p2pnet [Google Drive](https://drive.google.com/drive/folders/1kmtBsPIS79C3hMAwm_Tv9tAPvJLV9k35?usp=sharing) diff --git a/go/counter/cgo.go b/go/counter/cgo.go new file mode 100644 index 0000000..28af83a --- /dev/null +++ b/go/counter/cgo.go @@ -0,0 +1,12 @@ +//go:build !vulkan +// +build !vulkan + +package counter + +/* +#cgo CXXFLAGS: --std=c++11 -fopenmp +#cgo CPPFLAGS: -I ${SRCDIR}/../../include -I /usr/local/include +#cgo LDFLAGS: -lstdc++ -lncnn -lomp -lopenvision +#cgo LDFLAGS: -L /usr/local/lib -L ${SRCDIR}/../../lib +*/ +import "C" diff --git a/go/counter/cgo_vulkan.go b/go/counter/cgo_vulkan.go new file mode 100644 index 0000000..c91e493 --- /dev/null +++ b/go/counter/cgo_vulkan.go @@ -0,0 +1,11 @@ +// +build vulkan + +package counter + +/* +#cgo CXXFLAGS: --std=c++11 -fopenmp +#cgo CPPFLAGS: -I ${SRCDIR}/../../include -I /usr/local/include +#cgo LDFLAGS: -lstdc++ -lncnn -lomp -lopenvision -lglslang -lvulkan -lSPIRV -lOGLCompiler -lMachineIndependent -lGenericCodeGen -lOSDependent +#cgo LDFLAGS: -L /usr/local/lib -L ${SRCDIR}/../../lib +*/ +import "C" diff --git a/go/counter/counter.go b/go/counter/counter.go new file mode 100644 index 0000000..7fc718b --- /dev/null +++ b/go/counter/counter.go @@ -0,0 +1,40 @@ +package counter + +/* +#include +#include +#include "openvision/common/common.h" +#include "openvision/counter/counter.h" +*/ +import "C" +import ( + "unsafe" + + openvision "github.com/bububa/openvision/go" + "github.com/bububa/openvision/go/common" +) + +// Counter represents Object Counter interface +type Counter interface { + common.Estimator + CrowdCount(img *common.Image) ([]common.Keypoint, error) +} + +// CrowdCount returns object counter +func CrowdCount(d Counter, img *common.Image) ([]common.Keypoint, error) { + imgWidth := img.WidthF64() + imgHeight := img.HeightF64() + data := img.Bytes() + ptsC := common.NewCKeypointVector() + defer common.FreeCKeypointVector(ptsC) + errCode := C.crowd_count( + (C.ICounter)(d.Pointer()), + (*C.uchar)(unsafe.Pointer(&data[0])), + C.int(imgWidth), + C.int(imgHeight), + (*C.KeypointVector)(unsafe.Pointer(ptsC))) + if errCode != 0 { + return nil, openvision.CounterError(int(errCode)) + } + return common.GoKeypointVector(ptsC, imgWidth, imgHeight), nil +} diff --git a/go/counter/doc.go b/go/counter/doc.go new file mode 100644 index 0000000..a399388 --- /dev/null +++ b/go/counter/doc.go @@ -0,0 +1,2 @@ +// Package counter include object counter +package counter diff --git a/go/counter/p2pnet.go b/go/counter/p2pnet.go new file mode 100644 index 0000000..82bc9fa --- /dev/null +++ b/go/counter/p2pnet.go @@ -0,0 +1,45 @@ +package counter + +/* +#include +#include +#include "openvision/counter/counter.h" +*/ +import "C" +import ( + "unsafe" + + "github.com/bububa/openvision/go/common" +) + +// P2PNet represents p2pnet counter +type P2PNet struct { + d C.ICounter +} + +// NewP2PNet returns a new P2PNet +func NewP2PNet() *P2PNet { + return &P2PNet{ + d: C.new_p2pnet_crowd_counter(), + } +} + +// Destroy free tracker +func (d *P2PNet) Destroy() { + common.DestroyEstimator(d) +} + +// Pointer implement Estimator interface +func (d *P2PNet) Pointer() unsafe.Pointer { + return unsafe.Pointer(d.d) +} + +// LoadModel load model for detecter +func (d *P2PNet) LoadModel(modelPath string) error { + return common.EstimatorLoadModel(d, modelPath) +} + +// CrowdCount implement Object Counter interface +func (d *P2PNet) CrowdCount(img *common.Image) ([]common.Keypoint, error) { + return CrowdCount(d, img) +} diff --git a/go/error.go b/go/error.go index e04809f..b897658 100644 --- a/go/error.go +++ b/go/error.go @@ -74,6 +74,12 @@ var ( Message: "object tracker error", } } + CounterError = func(code int) Error { + return Error{ + Code: code, + Message: "object counter error", + } + } RealsrError = func(code int) Error { return Error{ Code: code, diff --git a/go/examples/counter/main.go b/go/examples/counter/main.go new file mode 100644 index 0000000..55c1092 --- /dev/null +++ b/go/examples/counter/main.go @@ -0,0 +1,92 @@ +package main + +import ( + "bytes" + "image" + "image/jpeg" + "log" + "os" + "os/user" + "path/filepath" + "strings" + + "github.com/bububa/openvision/go/common" + "github.com/bububa/openvision/go/counter" +) + +func main() { + wd, _ := os.Getwd() + dataPath := cleanPath(wd, "~/go/src/github.com/bububa/openvision/data") + imgPath := filepath.Join(dataPath, "./images") + modelPath := filepath.Join(dataPath, "./models") + common.CreateGPUInstance() + defer common.DestroyGPUInstance() + cpuCores := common.GetBigCPUCount() + common.SetOMPThreads(cpuCores) + log.Printf("CPU big cores:%d\n", cpuCores) + d := p2pnet(modelPath) + defer d.Destroy() + common.SetEstimatorThreads(d, cpuCores) + crowdCount(d, imgPath, "congested2.jpg") +} + +func p2pnet(modelPath string) counter.Counter { + modelPath = filepath.Join(modelPath, "p2pnet") + d := counter.NewP2PNet() + if err := d.LoadModel(modelPath); err != nil { + log.Fatalln(err) + } + return d +} + +func crowdCount(d counter.Counter, imgPath string, filename string) { + inPath := filepath.Join(imgPath, filename) + imgLoaded, err := loadImage(inPath) + if err != nil { + log.Fatalln("load image failed,", err) + } + img := common.NewImage(imgLoaded) + pts, err := d.CrowdCount(img) + if err != nil { + log.Fatalln(err) + } + log.Printf("count: %d\n", len(pts)) +} + +func loadImage(filePath string) (image.Image, error) { + fn, err := os.Open(filePath) + if err != nil { + return nil, err + } + defer fn.Close() + img, _, err := image.Decode(fn) + if err != nil { + return nil, err + } + return img, nil +} + +func saveImage(img image.Image, filePath string) error { + buf := new(bytes.Buffer) + if err := jpeg.Encode(buf, img, nil); err != nil { + return err + } + fn, err := os.Create(filePath) + if err != nil { + return err + } + defer fn.Close() + fn.Write(buf.Bytes()) + return nil +} + +func cleanPath(wd string, path string) string { + usr, _ := user.Current() + dir := usr.HomeDir + if path == "~" { + return dir + } else if strings.HasPrefix(path, "~/") { + return filepath.Join(dir, path[2:]) + } + return filepath.Join(wd, path) +} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index dab634b..5084975 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -73,6 +73,8 @@ target_include_directories(openvision $ $ + + $ ) #install(TARGETS openvision EXPORT openvision ARCHIVE DESTINATION ${LIBRARY_OUTPUT_PATH}) @@ -115,3 +117,7 @@ file(COPY DESTINATION ${INCLUDE_OUTPUT_PATH}/openvision/tracker ) +file(COPY + ${CMAKE_CURRENT_SOURCE_DIR}/counter/counter.h + DESTINATION ${INCLUDE_OUTPUT_PATH}/openvision/counter +) diff --git a/src/counter/counter.cpp b/src/counter/counter.cpp new file mode 100644 index 0000000..b54527d --- /dev/null +++ b/src/counter/counter.cpp @@ -0,0 +1,21 @@ +#include "counter.h" +#include "p2pnet/p2pnet.hpp" + +ICounter new_p2pnet_crowd_counter() { return new ovcounter::P2PNet(); } + +int crowd_count(ICounter d, const unsigned char *rgbdata, int img_width, + int img_height, KeypointVector *keypoints) { + std::vector pts; + int ret = static_cast(d)->CrowdCount(rgbdata, img_width, + img_height, &pts); + if (ret != 0) { + return ret; + } + keypoints->length = pts.size(); + keypoints->points = + (ov::Keypoint *)malloc(keypoints->length * sizeof(ov::Keypoint)); + for (size_t i = 0; i < keypoints->length; ++i) { + keypoints->points[i] = pts.at(i); + } + return 0; +} diff --git a/src/counter/counter.h b/src/counter/counter.h new file mode 100644 index 0000000..dbde0bd --- /dev/null +++ b/src/counter/counter.h @@ -0,0 +1,17 @@ +#ifndef _COUNTER_C_H_ +#define _COUNTER_C_H_ + +#include "../common/common.h" + +#ifdef __cplusplus +#include "counter.hpp" +extern "C" { +#endif +typedef void *ICounter; +ICounter new_p2pnet_crowd_counter(); +int crowd_count(ICounter d, const unsigned char *rgbdata, int img_width, + int img_height, KeypointVector *pts); +#ifdef __cplusplus +} +#endif +#endif // !_COUNTER_C_H_ diff --git a/src/counter/counter.hpp b/src/counter/counter.hpp new file mode 100644 index 0000000..5b4abbb --- /dev/null +++ b/src/counter/counter.hpp @@ -0,0 +1,13 @@ +#ifndef _COUNTER_H_ +#define _COUNTER_H_ + +#include "../common/common.hpp" + +namespace ovcounter { +class Counter : public ov::Estimator { +public: + virtual int CrowdCount(const unsigned char *rgbdata, int img_width, + int img_height, std::vector *pts) = 0; +}; +} // namespace ovcounter +#endif // !_COUNTER_H_ diff --git a/src/counter/p2pnet/p2pnet.cpp b/src/counter/p2pnet/p2pnet.cpp new file mode 100644 index 0000000..fba1587 --- /dev/null +++ b/src/counter/p2pnet/p2pnet.cpp @@ -0,0 +1,185 @@ +#include "p2pnet.hpp" + +#include + +#ifdef OV_VULKAN +#include "gpu.h" +#endif // OV_VULKAN + +namespace ovcounter { + +void P2PNet::shift(int w, int h, int stride, std::vector anchor_points, + std::vector &shifted_anchor_points) { + std::vector x_, y_; + for (int i = 0; i < w; i++) { + float x = (i + 0.5) * stride; + x_.push_back(x); + } + for (int i = 0; i < h; i++) { + float y = (i + 0.5) * stride; + y_.push_back(y); + } + + std::vector shift_x(w * h, 0), shift_y(w * h, 0); + for (int i = 0; i < h; i++) { + for (int j = 0; j < w; j++) { + shift_x[i * w + j] = x_[j]; + } + } + for (int i = 0; i < h; i++) { + for (int j = 0; j < w; j++) { + shift_y[i * w + j] = y_[i]; + } + } + + std::vector shifts(w * h * 2, 0); + for (int i = 0; i < w * h; i++) { + shifts[i * 2] = shift_x[i]; + shifts[i * 2 + 1] = shift_y[i]; + } + + shifted_anchor_points.resize(2 * w * h * anchor_points.size() / 2, 0); + for (int i = 0; i < w * h; i++) { + for (int j = 0; j < anchor_points.size() / 2; j++) { + float x = anchor_points[j * 2] + shifts[i * 2]; + float y = anchor_points[j * 2 + 1] + shifts[i * 2 + 1]; + shifted_anchor_points[i * anchor_points.size() / 2 * 2 + j * 2] = x; + shifted_anchor_points[i * anchor_points.size() / 2 * 2 + j * 2 + 1] = y; + } + } +} + +void P2PNet::generate_anchor_points(int stride, int row, int line, + std::vector &anchor_points) { + float row_step = (float)stride / row; + float line_step = (float)stride / line; + + std::vector x_, y_; + for (int i = 1; i < line + 1; i++) { + float x = (i - 0.5) * line_step - (float)stride / 2; + x_.push_back(x); + } + for (int i = 1; i < row + 1; i++) { + float y = (i - 0.5) * row_step - (float)stride / 2; + y_.push_back(y); + } + std::vector shift_x(row * line, 0), shift_y(row * line, 0); + for (int i = 0; i < row; i++) { + for (int j = 0; j < line; j++) { + shift_x[i * line + j] = x_[j]; + } + } + for (int i = 0; i < row; i++) { + for (int j = 0; j < line; j++) { + shift_y[i * line + j] = y_[i]; + } + } + anchor_points.resize(row * line * 2, 0); + for (int i = 0; i < row * line; i++) { + float x = shift_x[i]; + float y = shift_y[i]; + anchor_points[i * 2] = x; + anchor_points[i * 2 + 1] = y; + } +} + +void P2PNet::generate_anchor_points(int img_w, int img_h, + std::vector pyramid_levels, int row, + int line, + std::vector &all_anchor_points) { + std::vector> image_shapes; + std::vector strides; + for (int i = 0; i < pyramid_levels.size(); i++) { + int new_h = floor((img_h + pow(2, pyramid_levels[i]) - 1) / + pow(2, pyramid_levels[i])); + int new_w = floor((img_w + pow(2, pyramid_levels[i]) - 1) / + pow(2, pyramid_levels[i])); + image_shapes.push_back(std::make_pair(new_w, new_h)); + strides.push_back(pow(2, pyramid_levels[i])); + } + + all_anchor_points.clear(); + for (int i = 0; i < pyramid_levels.size(); i++) { + std::vector anchor_points; + generate_anchor_points(pow(2, pyramid_levels[i]), row, line, anchor_points); + std::vector shifted_anchor_points; + shift(image_shapes[i].first, image_shapes[i].second, strides[i], + anchor_points, shifted_anchor_points); + all_anchor_points.insert(all_anchor_points.end(), + shifted_anchor_points.begin(), + shifted_anchor_points.end()); + } +} + +int P2PNet::CrowdCount(const unsigned char *rgbdata, int img_width, + int img_height, std::vector *keypoints) { + + if (!initialized_) { + return 10000; + } + if (rgbdata == 0) { + return 10001; + } + + // pad to multiple of 32 + int w = img_width; + int h = img_height; + float scale = 1.f; + if (w > h) { + scale = (float)target_size / w; + w = target_size; + h = h * scale; + } else { + scale = (float)target_size / h; + h = target_size; + w = w * scale; + } + + ncnn::Mat input = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB, + img_width, img_height, w, h); + + // pad to target_size rectangle + int wpad = (w + 31) / 32 * 32 - w; + int hpad = (h + 31) / 32 * 32 - h; + ncnn::Mat in_pad; + ncnn::copy_make_border(input, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, + wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f); + + std::vector pyramid_levels(1, 3); + std::vector all_anchor_points; + generate_anchor_points(in_pad.w, in_pad.h, pyramid_levels, 2, 2, + all_anchor_points); + + ncnn::Mat anchor_points = + ncnn::Mat(2, all_anchor_points.size() / 2, all_anchor_points.data()); + + ncnn::Extractor ex = net_->create_extractor(); + ex.set_light_mode(light_mode_); + ex.set_num_threads(num_threads); + + in_pad.substract_mean_normalize(mean_vals1, norm_vals1); + + ex.input("input", in_pad); + ex.input("anchor", anchor_points); + + ncnn::Mat score, points; + ex.extract("pred_scores", score); + ex.extract("pred_points", points); + + keypoints->clear(); + for (int i = 0; i < points.h; i++) { + float *score_data = score.row(i); + float *points_data = points.row(i); + if (score_data[1] > 0.5) { + ov::Keypoint kpt; + int x = (points_data[0] - ((float)wpad / 2)) / scale; + int y = (points_data[1] - ((float)hpad / 2)) / scale; + kpt.p = ov::Point2f(x, y); + kpt.score = score_data[1]; + keypoints->push_back(kpt); + } + } + return 0; +} + +} // namespace ovcounter diff --git a/src/counter/p2pnet/p2pnet.hpp b/src/counter/p2pnet/p2pnet.hpp new file mode 100644 index 0000000..e6847c3 --- /dev/null +++ b/src/counter/p2pnet/p2pnet.hpp @@ -0,0 +1,28 @@ +#ifndef _COUNTER_P2PNET_H_ +#define _COUNTER_P2PNET_H_ + +#include "../counter.hpp" + +namespace ovcounter { + +class P2PNet : public Counter { +public: + int CrowdCount(const unsigned char *rgbdata, int img_width, int img_height, + std::vector *keypoints); + +private: + const int target_size = 640; + const float mean_vals1[3] = {123.675f, 116.28f, 103.53f}; + const float norm_vals1[3] = {0.01712475f, 0.0175f, 0.01742919f}; + + void generate_anchor_points(int stride, int row, int line, + std::vector &anchor_points); + void generate_anchor_points(int img_w, int img_h, + std::vector pyramid_levels, int row, + int line, std::vector &all_anchor_points); + void shift(int w, int h, int stride, std::vector anchor_points, + std::vector &shifted_anchor_points); +}; + +} // namespace ovcounter +#endif // !_COUNTER_P2PNET_H_ From 500eefe53966839bba85a7eb5e8c4b2ef68f1b3e Mon Sep 17 00:00:00 2001 From: Syd Xu Date: Thu, 11 Nov 2021 17:30:56 +0800 Subject: [PATCH 2/9] feat(face): add hair segmentor --- README.md | 1 + go/README.md | 1 + go/common/image.go | 23 ++++++-- go/error.go | 6 ++ go/examples/aligner/main.go | 3 +- go/examples/hair/main.go | 94 ++++++++++++++++++++++++++++++ go/examples/poseseg/main.go | 12 ++-- go/examples/styletransfer/main.go | 5 +- go/face/aligner/aligner.go | 8 +-- go/face/hair/cgo.go | 11 ++++ go/face/hair/cgo_vulkan.go | 11 ++++ go/face/hair/doc.go | 2 + go/face/hair/hair.go | 61 +++++++++++++++++++ go/face/hopenet/hopenet.go | 1 - go/pose/segmentor/deeplabv3plus.go | 9 ++- go/pose/segmentor/erdnet.go | 9 ++- go/pose/segmentor/rvm.go | 9 ++- go/pose/segmentor/segmentor.go | 19 +++--- go/styletransfer/animegan2.go | 5 +- go/styletransfer/transfer.go | 10 ++-- src/CMakeLists.txt | 3 +- src/face/hair.h | 17 ++++++ src/face/hair/hair.cpp | 64 ++++++++++++++++++++ src/face/hair/hair.hpp | 18 ++++++ src/face/hopenet.h | 7 ++- src/face/hopenet/hopenet.cpp | 2 +- src/face/hopenet/hopenet.hpp | 16 +++-- 27 files changed, 362 insertions(+), 65 deletions(-) create mode 100644 go/examples/hair/main.go create mode 100644 go/face/hair/cgo.go create mode 100644 go/face/hair/cgo_vulkan.go create mode 100644 go/face/hair/doc.go create mode 100644 go/face/hair/hair.go create mode 100644 src/face/hair.h create mode 100644 src/face/hair/hair.cpp create mode 100644 src/face/hair/hair.hpp diff --git a/README.md b/README.md index 21563c0..59c75e9 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,7 @@ cmake .. # optional -DNCNN_VULKAN=OFF -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COM - scrfd [Google Drive](https://drive.google.com/drive/folders/1XPjfsuXGj9rXqAmo1K70BsqWmHvoYQv_?usp=sharing) - tracker (for face IOU calculation bettween frames) - hopenet (for head pose detection) [Google Drive](https://drive.google.com/drive/folders/1zLam-8s9ZMPDUxUEtNU2F9yFTDRM5fk-?usp=sharing) + - hair (for hair segmentation) [Google Drive](https://drive.google.com/drive/folders/14DOBaFrxTL1k4T1ved5qfRUUziurItT8?usp=sharing) - pose - detector (for pose detection/estimation) - ultralight [Google Drive](https://drive.google.com/drive/folders/15b-I5HDyGe2WLb-TO85SJYmnYONvGOKh?usp=sharing) diff --git a/go/README.md b/go/README.md index b593adc..ba72e51 100644 --- a/go/README.md +++ b/go/README.md @@ -45,6 +45,7 @@ make -j 4 - scrfd [Google Drive](https://drive.google.com/drive/folders/1XPjfsuXGj9rXqAmo1K70BsqWmHvoYQv_?usp=sharing) - tracker (for face IOU calculation bettween frames) - hopenet (for head pose detection) [Google Drive](https://drive.google.com/drive/folders/1zLam-8s9ZMPDUxUEtNU2F9yFTDRM5fk-?usp=sharing) + - hair (for hair segmentation) [Google Drive](https://drive.google.com/drive/folders/14DOBaFrxTL1k4T1ved5qfRUUziurItT8?usp=sharing) - pose - detector (for pose detection/estimation) - ultralight [Google Drive](https://drive.google.com/drive/folders/15b-I5HDyGe2WLb-TO85SJYmnYONvGOKh?usp=sharing) diff --git a/go/common/image.go b/go/common/image.go index 43e57b3..a37a1da 100644 --- a/go/common/image.go +++ b/go/common/image.go @@ -26,6 +26,9 @@ type Image struct { // NewImage returns a new Image func NewImage(img image.Image) *Image { buf := new(bytes.Buffer) + if img == nil { + return &Image{buffer: buf} + } Image2RGB(buf, img) return &Image{ Image: img, @@ -33,6 +36,14 @@ func NewImage(img image.Image) *Image { } } +// Write write bytes to buffer +func (i *Image) Write(b []byte) { + if i.buffer == nil { + return + } + i.buffer.Write(b) +} + // Bytes returns image bytes in rgb func (i Image) Bytes() []byte { if i.buffer == nil { @@ -74,20 +85,23 @@ func NewCImage() *C.Image { return ret } +// FreeCImage free C.Image func FreeCImage(c *C.Image) { C.FreeImage(c) C.free(unsafe.Pointer(c)) } -func GoImage(c *C.Image) (image.Image, error) { +// GoImage returns Image from C.Image +func GoImage(c *C.Image, out *Image) { w := int(c.width) h := int(c.height) channels := int(c.channels) data := C.GoBytes(unsafe.Pointer(c.data), C.int(w*h*channels)*C.sizeof_uchar) - return NewImageFromBytes(data, w, h, channels) + NewImageFromBytes(data, w, h, channels, out) } -func NewImageFromBytes(data []byte, w int, h int, channels int) (image.Image, error) { +// NewImageFromBytes returns Image by []byte +func NewImageFromBytes(data []byte, w int, h int, channels int, out *Image) { img := image.NewRGBA(image.Rect(0, 0, w, h)) for y := 0; y < h; y++ { for x := 0; x < w; x++ { @@ -97,9 +111,10 @@ func NewImageFromBytes(data []byte, w int, h int, channels int) (image.Image, er alpha = data[pos+3] } img.SetRGBA(x, y, color.RGBA{uint8(data[pos]), uint8(data[pos+1]), uint8(data[pos+2]), uint8(alpha)}) + out.Write([]byte{byte(data[pos]), byte(data[pos+1]), byte(data[pos+2]), byte(alpha)}) } } - return img, nil + out.Image = img } // Image2RGB write image rgbdata to buffer diff --git a/go/error.go b/go/error.go index b897658..4ce1551 100644 --- a/go/error.go +++ b/go/error.go @@ -56,6 +56,12 @@ var ( Message: "detect head pose failed", } } + HairMattingError = func(code int) Error { + return Error{ + Code: code, + Message: "hair matting failed", + } + } DetectHandError = func(code int) Error { return Error{ Code: code, diff --git a/go/examples/aligner/main.go b/go/examples/aligner/main.go index 9a3b7e2..f40d477 100644 --- a/go/examples/aligner/main.go +++ b/go/examples/aligner/main.go @@ -50,8 +50,9 @@ func align(d detecter.Detecter, a *aligner.Aligner, imgPath string, filename str if err != nil { log.Fatalln(err) } + aligned := common.NewImage(nil) for idx, face := range faces { - aligned, err := a.Align(img, face) + err := a.Align(img, face, aligned) if err != nil { log.Fatalln(err) } diff --git a/go/examples/hair/main.go b/go/examples/hair/main.go new file mode 100644 index 0000000..805eac8 --- /dev/null +++ b/go/examples/hair/main.go @@ -0,0 +1,94 @@ +package main + +import ( + "bytes" + "fmt" + "image" + "image/jpeg" + "log" + "os" + "os/user" + "path/filepath" + "strings" + + "github.com/bububa/openvision/go/common" + "github.com/bububa/openvision/go/face/hair" +) + +func main() { + wd, _ := os.Getwd() + dataPath := cleanPath(wd, "~/go/src/github.com/bububa/openvision/data") + imgPath := filepath.Join(dataPath, "./images") + modelPath := filepath.Join(dataPath, "./models") + common.CreateGPUInstance() + defer common.DestroyGPUInstance() + d := estimator(modelPath) + defer d.Destroy() + matting(d, imgPath, "hair1.jpg") +} + +func estimator(modelPath string) *hair.Hair { + modelPath = filepath.Join(modelPath, "hair") + d := hair.NewHair() + if err := d.LoadModel(modelPath); err != nil { + log.Fatalln(err) + } + return d +} + +func matting(d *hair.Hair, imgPath string, filename string) { + inPath := filepath.Join(imgPath, filename) + imgLoaded, err := loadImage(inPath) + if err != nil { + log.Fatalln("load image failed,", err) + } + img := common.NewImage(imgLoaded) + out := common.NewImage(nil) + if err := d.Matting(img, out); err != nil { + log.Fatalln(err) + } + outPath := filepath.Join(imgPath, "./results", fmt.Sprintf("hair-matting-%s", filename)) + + if err := saveImage(out, outPath); err != nil { + log.Fatalln(err) + } + +} + +func loadImage(filePath string) (image.Image, error) { + fn, err := os.Open(filePath) + if err != nil { + return nil, err + } + defer fn.Close() + img, _, err := image.Decode(fn) + if err != nil { + return nil, err + } + return img, nil +} + +func saveImage(img image.Image, filePath string) error { + buf := new(bytes.Buffer) + if err := jpeg.Encode(buf, img, nil); err != nil { + return err + } + fn, err := os.Create(filePath) + if err != nil { + return err + } + defer fn.Close() + fn.Write(buf.Bytes()) + return nil +} + +func cleanPath(wd string, path string) string { + usr, _ := user.Current() + dir := usr.HomeDir + if path == "~" { + return dir + } else if strings.HasPrefix(path, "~/") { + return filepath.Join(dir, path[2:]) + } + return filepath.Join(wd, path) +} diff --git a/go/examples/poseseg/main.go b/go/examples/poseseg/main.go index 24a28c3..77a7649 100644 --- a/go/examples/poseseg/main.go +++ b/go/examples/poseseg/main.go @@ -77,8 +77,8 @@ func videomatting(seg segmentor.Segmentor, imgPath string, filename string, idx log.Fatalln("load image failed,", err) } img := common.NewImage(imgLoaded) - out, err := seg.Matting(img) - if err != nil { + out := common.NewImage(nil) + if err := seg.Matting(img, out); err != nil { log.Fatalln(err) } outPath := filepath.Join(imgPath, "./results/videomatting", fmt.Sprintf("%d.jpeg", idx)) @@ -95,8 +95,8 @@ func matting(seg segmentor.Segmentor, imgPath string, filename string, idx int) log.Fatalln("load image failed,", err) } img := common.NewImage(imgLoaded) - out, err := seg.Matting(img) - if err != nil { + out := common.NewImage(nil) + if err := seg.Matting(img, out); err != nil { log.Fatalln(err) } outPath := filepath.Join(imgPath, "./results", fmt.Sprintf("poseseg-matting-%d-%s", idx, filename)) @@ -119,8 +119,8 @@ func merge(seg segmentor.Segmentor, imgPath string, filename string, bgFilename log.Fatalln("load bg image failed,", err) } bg := common.NewImage(bgLoaded) - out, err := seg.Merge(img, bg) - if err != nil { + out := common.NewImage(nil) + if err := seg.Merge(img, bg, out); err != nil { log.Fatalln(err) } outPath := filepath.Join(imgPath, "./results", fmt.Sprintf("poseseg-merge-%d-%s", idx, filename)) diff --git a/go/examples/styletransfer/main.go b/go/examples/styletransfer/main.go index f8fd553..9f9983e 100644 --- a/go/examples/styletransfer/main.go +++ b/go/examples/styletransfer/main.go @@ -49,15 +49,14 @@ func transform(transfer styletransfer.StyleTransfer, imgPath string, filename st log.Fatalln("load image failed,", err) } img := common.NewImage(imgLoaded) - out, err := transfer.Transform(img) - if err != nil { + out := common.NewImage(nil) + if err := transfer.Transform(img, out); err != nil { log.Fatalln(err) } outPath := filepath.Join(imgPath, "./results", fmt.Sprintf("%s-%s", modelName, filename)) if err := saveImage(out, outPath); err != nil { log.Fatalln(err) } - } func loadImage(filePath string) (image.Image, error) { diff --git a/go/face/aligner/aligner.go b/go/face/aligner/aligner.go index bfaf2f5..cfe8948 100644 --- a/go/face/aligner/aligner.go +++ b/go/face/aligner/aligner.go @@ -8,7 +8,6 @@ package aligner */ import "C" import ( - "image" "unsafe" openvision "github.com/bububa/openvision/go" @@ -39,7 +38,7 @@ func (a *Aligner) SetThreads(n int) { } // Align face -func (a *Aligner) Align(img *common.Image, faceInfo face.FaceInfo) (image.Image, error) { +func (a *Aligner) Align(img *common.Image, faceInfo face.FaceInfo, out *common.Image) error { imgWidth := img.WidthF64() imgHeight := img.HeightF64() data := img.Bytes() @@ -61,7 +60,8 @@ func (a *Aligner) Align(img *common.Image, faceInfo face.FaceInfo) (image.Image, (*C.Image)(unsafe.Pointer(outImgC)), ) if errCode != 0 { - return nil, openvision.AlignFaceError(int(errCode)) + return openvision.AlignFaceError(int(errCode)) } - return common.GoImage(outImgC) + common.GoImage(outImgC, out) + return nil } diff --git a/go/face/hair/cgo.go b/go/face/hair/cgo.go new file mode 100644 index 0000000..727fd8a --- /dev/null +++ b/go/face/hair/cgo.go @@ -0,0 +1,11 @@ +// +build !vulkan + +package hair + +/* +#cgo CXXFLAGS: --std=c++11 -fopenmp +#cgo CPPFLAGS: -I ${SRCDIR}/../../../include -I /usr/local/include +#cgo LDFLAGS: -lstdc++ -lncnn -lomp -lopenvision +#cgo LDFLAGS: -L /usr/local/lib -L ${SRCDIR}/../../../lib +*/ +import "C" diff --git a/go/face/hair/cgo_vulkan.go b/go/face/hair/cgo_vulkan.go new file mode 100644 index 0000000..9d7735a --- /dev/null +++ b/go/face/hair/cgo_vulkan.go @@ -0,0 +1,11 @@ +// +build vulkan + +package hair + +/* +#cgo CXXFLAGS: --std=c++11 -fopenmp +#cgo CPPFLAGS: -I ${SRCDIR}/../../../include -I /usr/local/include +#cgo LDFLAGS: -lstdc++ -lncnn -lomp -lopenvision -lglslang -lvulkan -lSPIRV -lOGLCompiler -lMachineIndependent -lGenericCodeGen -lOSDependent +#cgo LDFLAGS: -L /usr/local/lib -L ${SRCDIR}/../../../lib +*/ +import "C" diff --git a/go/face/hair/doc.go b/go/face/hair/doc.go new file mode 100644 index 0000000..a4254a3 --- /dev/null +++ b/go/face/hair/doc.go @@ -0,0 +1,2 @@ +// Package hair include hair segmentation +package hair diff --git a/go/face/hair/hair.go b/go/face/hair/hair.go new file mode 100644 index 0000000..edd9bef --- /dev/null +++ b/go/face/hair/hair.go @@ -0,0 +1,61 @@ +package hair + +/* +#include +#include +#include "openvision/face/hair.h" +*/ +import "C" +import ( + "unsafe" + + openvision "github.com/bububa/openvision/go" + "github.com/bububa/openvision/go/common" +) + +// Hair represents Hair segmentor +type Hair struct { + d C.IHair +} + +// NewHair returns a new Hair +func NewHair() *Hair { + return &Hair{ + d: C.new_hair(), + } +} + +// Pointer implement Estimator interface +func (h *Hair) Pointer() unsafe.Pointer { + return unsafe.Pointer(h.d) +} + +// LoadModel load detecter model +func (h *Hair) LoadModel(modelPath string) error { + return common.EstimatorLoadModel(h, modelPath) +} + +// Destroy destroy C.IHair +func (h *Hair) Destroy() { + common.DestroyEstimator(h) +} + +// Matting returns hair matting image +func (h *Hair) Matting(img *common.Image, out *common.Image) error { + imgWidth := img.WidthF64() + imgHeight := img.HeightF64() + data := img.Bytes() + outImgC := common.NewCImage() + defer common.FreeCImage(outImgC) + errCode := C.hair_matting( + (C.IHair)(h.Pointer()), + (*C.uchar)(unsafe.Pointer(&data[0])), + C.int(imgWidth), C.int(imgHeight), + (*C.Image)(unsafe.Pointer(outImgC)), + ) + if errCode != 0 { + return openvision.HairMattingError(int(errCode)) + } + common.GoImage(outImgC, out) + return nil +} diff --git a/go/face/hopenet/hopenet.go b/go/face/hopenet/hopenet.go index 16dd55b..6161589 100644 --- a/go/face/hopenet/hopenet.go +++ b/go/face/hopenet/hopenet.go @@ -34,7 +34,6 @@ func (h *Hopenet) Pointer() unsafe.Pointer { // LoadModel load detecter model func (h *Hopenet) LoadModel(modelPath string) error { return common.EstimatorLoadModel(h, modelPath) - return nil } // Destroy destroy C.IHopeNet diff --git a/go/pose/segmentor/deeplabv3plus.go b/go/pose/segmentor/deeplabv3plus.go index 5f819c4..1e90e0e 100644 --- a/go/pose/segmentor/deeplabv3plus.go +++ b/go/pose/segmentor/deeplabv3plus.go @@ -7,7 +7,6 @@ package segmentor */ import "C" import ( - "image" "unsafe" "github.com/bububa/openvision/go/common" @@ -41,11 +40,11 @@ func (d *Deeplabv3plus) LoadModel(modelPath string) error { } // Matting implement Segmentor interface -func (d *Deeplabv3plus) Matting(img *common.Image) (image.Image, error) { - return Matting(d, img) +func (d *Deeplabv3plus) Matting(img *common.Image, out *common.Image) error { + return Matting(d, img, out) } // Merge implement Segmentor interface -func (d *Deeplabv3plus) Merge(img *common.Image, bg *common.Image) (image.Image, error) { - return Merge(d, img, bg) +func (d *Deeplabv3plus) Merge(img *common.Image, bg *common.Image, out *common.Image) error { + return Merge(d, img, bg, out) } diff --git a/go/pose/segmentor/erdnet.go b/go/pose/segmentor/erdnet.go index c269bbf..b6472d5 100644 --- a/go/pose/segmentor/erdnet.go +++ b/go/pose/segmentor/erdnet.go @@ -7,7 +7,6 @@ package segmentor */ import "C" import ( - "image" "unsafe" "github.com/bububa/openvision/go/common" @@ -41,11 +40,11 @@ func (d *ERDNet) LoadModel(modelPath string) error { } // Matting implement Segmentor interface -func (d *ERDNet) Matting(img *common.Image) (image.Image, error) { - return Matting(d, img) +func (d *ERDNet) Matting(img *common.Image, out *common.Image) error { + return Matting(d, img, out) } // Merge implement Segmentor interface -func (d *ERDNet) Merge(img *common.Image, bg *common.Image) (image.Image, error) { - return Merge(d, img, bg) +func (d *ERDNet) Merge(img *common.Image, bg *common.Image, out *common.Image) error { + return Merge(d, img, bg, out) } diff --git a/go/pose/segmentor/rvm.go b/go/pose/segmentor/rvm.go index a50bc5b..32243c0 100644 --- a/go/pose/segmentor/rvm.go +++ b/go/pose/segmentor/rvm.go @@ -7,7 +7,6 @@ package segmentor */ import "C" import ( - "image" "unsafe" "github.com/bububa/openvision/go/common" @@ -44,11 +43,11 @@ func (d *RVM) LoadModel(modelPath string) error { } // Matting implement Segmentor interface -func (d *RVM) Matting(img *common.Image) (image.Image, error) { - return Matting(d, img) +func (d *RVM) Matting(img *common.Image, out *common.Image) error { + return Matting(d, img, out) } // Merge implement Segmentor interface -func (d *RVM) Merge(img *common.Image, bg *common.Image) (image.Image, error) { - return Merge(d, img, bg) +func (d *RVM) Merge(img *common.Image, bg *common.Image, out *common.Image) error { + return Merge(d, img, bg, out) } diff --git a/go/pose/segmentor/segmentor.go b/go/pose/segmentor/segmentor.go index 07ff69e..2ee75b3 100644 --- a/go/pose/segmentor/segmentor.go +++ b/go/pose/segmentor/segmentor.go @@ -8,7 +8,6 @@ package segmentor */ import "C" import ( - "image" "unsafe" openvision "github.com/bububa/openvision/go" @@ -18,12 +17,12 @@ import ( // Segmentor represents segmentor interface type Segmentor interface { common.Estimator - Matting(img *common.Image) (image.Image, error) - Merge(img *common.Image, bg *common.Image) (image.Image, error) + Matting(img *common.Image, out *common.Image) error + Merge(img *common.Image, bg *common.Image, out *common.Image) error } // Matting returns pose segment matting image -func Matting(d Segmentor, img *common.Image) (image.Image, error) { +func Matting(d Segmentor, img *common.Image, out *common.Image) error { imgWidth := img.WidthF64() imgHeight := img.HeightF64() data := img.Bytes() @@ -36,13 +35,14 @@ func Matting(d Segmentor, img *common.Image) (image.Image, error) { C.int(imgHeight), (*C.Image)(unsafe.Pointer(outImgC))) if errCode != 0 { - return nil, openvision.DetectPoseError(int(errCode)) + return openvision.DetectPoseError(int(errCode)) } - return common.GoImage(outImgC) + common.GoImage(outImgC, out) + return nil } // Merge merge pose with background -func Merge(d Segmentor, img *common.Image, bg *common.Image) (image.Image, error) { +func Merge(d Segmentor, img *common.Image, bg *common.Image, out *common.Image) error { imgWidth := img.WidthF64() imgHeight := img.HeightF64() data := img.Bytes() @@ -59,7 +59,8 @@ func Merge(d Segmentor, img *common.Image, bg *common.Image) (image.Image, error C.int(bgWidth), C.int(bgHeight), (*C.Image)(unsafe.Pointer(outImgC))) if errCode != 0 { - return nil, openvision.DetectPoseError(int(errCode)) + return openvision.DetectPoseError(int(errCode)) } - return common.GoImage(outImgC) + common.GoImage(outImgC, out) + return nil } diff --git a/go/styletransfer/animegan2.go b/go/styletransfer/animegan2.go index 3f649f4..1154927 100644 --- a/go/styletransfer/animegan2.go +++ b/go/styletransfer/animegan2.go @@ -7,7 +7,6 @@ package styletransfer */ import "C" import ( - "image" "unsafe" "github.com/bububa/openvision/go/common" @@ -41,6 +40,6 @@ func (d *AnimeGan2) LoadModel(modelPath string) error { } // Transform implement StyleTransfer interface -func (d *AnimeGan2) Transform(img *common.Image) (image.Image, error) { - return Transform(d, img) +func (d *AnimeGan2) Transform(img *common.Image, out *common.Image) error { + return Transform(d, img, out) } diff --git a/go/styletransfer/transfer.go b/go/styletransfer/transfer.go index c1d09f9..d4c7b42 100644 --- a/go/styletransfer/transfer.go +++ b/go/styletransfer/transfer.go @@ -8,7 +8,6 @@ package styletransfer */ import "C" import ( - "image" "unsafe" openvision "github.com/bububa/openvision/go" @@ -18,11 +17,11 @@ import ( // StyleTransfer represents Style Transfer interface type StyleTransfer interface { common.Estimator - Transform(img *common.Image) (image.Image, error) + Transform(img *common.Image, out *common.Image) error } // Transform returns style transform image -func Transform(d StyleTransfer, img *common.Image) (image.Image, error) { +func Transform(d StyleTransfer, img *common.Image, out *common.Image) error { imgWidth := img.WidthF64() imgHeight := img.HeightF64() data := img.Bytes() @@ -35,7 +34,8 @@ func Transform(d StyleTransfer, img *common.Image) (image.Image, error) { C.int(imgHeight), (*C.Image)(unsafe.Pointer(outImgC))) if errCode != 0 { - return nil, openvision.DetectPoseError(int(errCode)) + return openvision.DetectPoseError(int(errCode)) } - return common.GoImage(outImgC) + common.GoImage(outImgC, out) + return nil } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5084975..6b87012 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -57,7 +57,7 @@ target_include_directories(openvision $ $ - + $ $ $ @@ -91,6 +91,7 @@ file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/face/tracker.h ${CMAKE_CURRENT_SOURCE_DIR}/face/hopenet.h ${CMAKE_CURRENT_SOURCE_DIR}/face/aligner.h + ${CMAKE_CURRENT_SOURCE_DIR}/face/hair.h DESTINATION ${INCLUDE_OUTPUT_PATH}/openvision/face ) diff --git a/src/face/hair.h b/src/face/hair.h new file mode 100644 index 0000000..891f598 --- /dev/null +++ b/src/face/hair.h @@ -0,0 +1,17 @@ +#ifndef _FACE_HAIR_C_H_ +#define _FACE_HAIR_C_H_ + +#include "common.h" + +#ifdef __cplusplus +#include "hair/hair.hpp" +extern "C" { +#endif +typedef void *IHair; +IHair new_hair(); +int hair_matting(IHair d, const unsigned char *rgbdata, int img_width, + int img_height, Image *out); +#ifdef __cplusplus +} +#endif +#endif // !_FACE_HAIR_C_H_ diff --git a/src/face/hair/hair.cpp b/src/face/hair/hair.cpp new file mode 100644 index 0000000..1356f74 --- /dev/null +++ b/src/face/hair/hair.cpp @@ -0,0 +1,64 @@ +#include "../hair.h" + +#ifdef OV_VULKAN +#include "gpu.h" +#endif // OV_VULKAN + +IHair new_hair() { return new ovface::Hair(); } + +int hair_matting(IHair d, const unsigned char *rgbdata, int img_width, + int img_height, Image *out) { + int ret = static_cast(d)->Matting(rgbdata, img_width, + img_height, out); + if (ret != 0) { + return ret; + } + return 0; +} + +namespace ovface { + +int Hair::Matting(const unsigned char *rgbdata, int img_width, int img_height, + Image *out) { + if (!initialized_) { + return 10000; + } + if (rgbdata == 0) { + return 10001; + } + + ncnn::Extractor ex = net_->create_extractor(); + ex.set_light_mode(light_mode_); + ex.set_num_threads(num_threads); + + ncnn::Mat ncnn_in = + ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB, img_width, + img_height, target_size, target_size); + ncnn::Mat matting = ncnn::Mat(target_size, target_size, 3); + + ncnn_in.substract_mean_normalize(mean_vals, norm_vals); + ex.input("input", ncnn_in); + ncnn::Mat ncnn_out; + ex.extract("1006", ncnn_out); + + for (int c = 0; c < 3; ++c) { + float *pImage = matting.channel(c); + for (int i = 0; i < target_size * target_size; i++) { + const float alpha = ncnn_out[i]; + float value = 255 * alpha; + value = std::max(std::min(value, 255.f), 0.f); + pImage[i] = value; + } + } + ncnn::Mat outimg; + ncnn::resize_bicubic(matting, outimg, img_width, img_height); + + out->width = outimg.w; + out->height = outimg.h; + out->channels = outimg.c; + out->data = (unsigned char *)malloc(outimg.total()); + outimg.to_pixels(out->data, ncnn::Mat::PIXEL_RGB); + return 0; +} + +} // namespace ovface diff --git a/src/face/hair/hair.hpp b/src/face/hair/hair.hpp new file mode 100644 index 0000000..a1dd486 --- /dev/null +++ b/src/face/hair/hair.hpp @@ -0,0 +1,18 @@ +#ifndef _FACE_HAIR_H_ +#define _FACE_HAIR_H_ + +#include "../common/common.h" + +namespace ovface { +class Hair : public ov::Estimator { +public: + int Matting(const unsigned char *rgbdata, int img_width, int img_height, + Image *out); + +private: + const int target_size = 288; + const float mean_vals[3] = {123.675f, 116.28f, 103.53f}; + const float norm_vals[3] = {0.01712475f, 0.0175f, 0.01742919f}; +}; +} // namespace ovface +#endif // !_FACE_HAIR_H_ diff --git a/src/face/hopenet.h b/src/face/hopenet.h index 71dd3cb..bc29506 100644 --- a/src/face/hopenet.h +++ b/src/face/hopenet.h @@ -7,9 +7,10 @@ #include "hopenet/hopenet.hpp" extern "C" { #endif - typedef void* IHopenet; - IHopenet new_hopenet(); - int hopenet_detect(IHopenet d, const unsigned char* rgbdata, int img_width, int img_height, const Rect* roi, HeadPose* euler_angles); +typedef void *IHopenet; +IHopenet new_hopenet(); +int hopenet_detect(IHopenet d, const unsigned char *rgbdata, int img_width, + int img_height, const Rect *roi, HeadPose *euler_angles); #ifdef __cplusplus } #endif diff --git a/src/face/hopenet/hopenet.cpp b/src/face/hopenet/hopenet.cpp index 3dd3987..ca10b81 100644 --- a/src/face/hopenet/hopenet.cpp +++ b/src/face/hopenet/hopenet.cpp @@ -32,7 +32,7 @@ int Hopenet::LoadModel(const char *root_path) { } int Hopenet::Detect(const unsigned char *rgbdata, int img_width, int img_height, - Rect roi, HeadPose *head_angles) { + ov::Rect roi, HeadPose *head_angles) { float diff = fabs(roi.height - roi.width); if (roi.height > roi.width) { roi.x -= diff / 2; diff --git a/src/face/hopenet/hopenet.hpp b/src/face/hopenet/hopenet.hpp index 2ed3744..5721f92 100644 --- a/src/face/hopenet/hopenet.hpp +++ b/src/face/hopenet/hopenet.hpp @@ -7,16 +7,14 @@ namespace ovface { class Hopenet : public ov::Estimator { public: - int LoadModel(const char* root_path); - int Detect(const unsigned char* rgbdata, - int img_width, int img_height, - Rect roi, HeadPose* euler_angles); + int LoadModel(const char *root_path); + int Detect(const unsigned char *rgbdata, int img_width, int img_height, + ov::Rect roi, HeadPose *euler_angles); private: - float idx_tensor[66]; - void softmax(float* z, size_t el); - double getAngle(float* prediction, size_t len); - + float idx_tensor[66]; + void softmax(float *z, size_t el); + double getAngle(float *prediction, size_t len); }; -} +} // namespace ovface #endif // !_HEAD_HOPENET_H_ From 2da845af29de010196ed86b48d6fd50199c4ee34 Mon Sep 17 00:00:00 2001 From: Syd Xu Date: Thu, 11 Nov 2021 19:16:27 +0800 Subject: [PATCH 3/9] feat(face): add eye status detecter --- README.md | 2 ++ go/README.md | 2 ++ go/face/eye/cgo.go | 11 +++++++++ go/face/eye/cgo_vulkan.go | 11 +++++++++ go/face/eye/detecter.go | 43 ++++++++++++++++++++++++++++++++++ go/face/eye/doc.go | 2 ++ go/face/eye/lenet.go | 45 ++++++++++++++++++++++++++++++++++++ go/face/tracker/cgo.go | 2 +- src/CMakeLists.txt | 2 ++ src/face/eye.h | 17 ++++++++++++++ src/face/eye/eye.cpp | 20 ++++++++++++++++ src/face/eye/eye.hpp | 13 +++++++++++ src/face/eye/lenet/lenet.cpp | 39 +++++++++++++++++++++++++++++++ src/face/eye/lenet/lenet.hpp | 17 ++++++++++++++ 14 files changed, 225 insertions(+), 1 deletion(-) create mode 100644 go/face/eye/cgo.go create mode 100644 go/face/eye/cgo_vulkan.go create mode 100644 go/face/eye/detecter.go create mode 100644 go/face/eye/doc.go create mode 100644 go/face/eye/lenet.go create mode 100644 src/face/eye.h create mode 100644 src/face/eye/eye.cpp create mode 100644 src/face/eye/eye.hpp create mode 100644 src/face/eye/lenet/lenet.cpp create mode 100644 src/face/eye/lenet/lenet.hpp diff --git a/README.md b/README.md index 59c75e9..7bdd285 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,8 @@ cmake .. # optional -DNCNN_VULKAN=OFF -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COM - tracker (for face IOU calculation bettween frames) - hopenet (for head pose detection) [Google Drive](https://drive.google.com/drive/folders/1zLam-8s9ZMPDUxUEtNU2F9yFTDRM5fk-?usp=sharing) - hair (for hair segmentation) [Google Drive](https://drive.google.com/drive/folders/14DOBaFrxTL1k4T1ved5qfRUUziurItT8?usp=sharing) + - eye + - lenet (eye status detector) [Google Drive](https://drive.google.com/drive/folders/1jaonx6PeXFLA8gBKo4eQGuxsncVnqS7o?usp=sharing) - pose - detector (for pose detection/estimation) - ultralight [Google Drive](https://drive.google.com/drive/folders/15b-I5HDyGe2WLb-TO85SJYmnYONvGOKh?usp=sharing) diff --git a/go/README.md b/go/README.md index ba72e51..b60f548 100644 --- a/go/README.md +++ b/go/README.md @@ -46,6 +46,8 @@ make -j 4 - tracker (for face IOU calculation bettween frames) - hopenet (for head pose detection) [Google Drive](https://drive.google.com/drive/folders/1zLam-8s9ZMPDUxUEtNU2F9yFTDRM5fk-?usp=sharing) - hair (for hair segmentation) [Google Drive](https://drive.google.com/drive/folders/14DOBaFrxTL1k4T1ved5qfRUUziurItT8?usp=sharing) + - eye + - lenet (eye status detector) [Google Drive](https://drive.google.com/drive/folders/1jaonx6PeXFLA8gBKo4eQGuxsncVnqS7o?usp=sharing) - pose - detector (for pose detection/estimation) - ultralight [Google Drive](https://drive.google.com/drive/folders/15b-I5HDyGe2WLb-TO85SJYmnYONvGOKh?usp=sharing) diff --git a/go/face/eye/cgo.go b/go/face/eye/cgo.go new file mode 100644 index 0000000..05f982e --- /dev/null +++ b/go/face/eye/cgo.go @@ -0,0 +1,11 @@ +// +build !vulkan + +package recognizer + +/* +#cgo CXXFLAGS: --std=c++11 -fopenmp +#cgo CPPFLAGS: -I ${SRCDIR}/../../../include -I /usr/local/include +#cgo LDFLAGS: -lstdc++ -lncnn -lomp -lopenvision +#cgo LDFLAGS: -L /usr/local/lib -L ${SRCDIR}/../../../lib +*/ +import "C" diff --git a/go/face/eye/cgo_vulkan.go b/go/face/eye/cgo_vulkan.go new file mode 100644 index 0000000..a72bb5c --- /dev/null +++ b/go/face/eye/cgo_vulkan.go @@ -0,0 +1,11 @@ +// +build vulkan + +package eye + +/* +#cgo CXXFLAGS: --std=c++11 -fopenmp +#cgo CPPFLAGS: -I ${SRCDIR}/../../../include -I /usr/local/include +#cgo LDFLAGS: -lstdc++ -lncnn -lomp -lopenvision -lglslang -lvulkan -lSPIRV -lOGLCompiler -lMachineIndependent -lGenericCodeGen -lOSDependent +#cgo LDFLAGS: -L /usr/local/lib -L ${SRCDIR}/../../../lib +*/ +import "C" diff --git a/go/face/eye/detecter.go b/go/face/eye/detecter.go new file mode 100644 index 0000000..cd973a2 --- /dev/null +++ b/go/face/eye/detecter.go @@ -0,0 +1,43 @@ +package eye + +/* +#include +#include +#include "openvision/common/common.h" +#include "openvision/face/eye.h" +*/ +import "C" +import ( + "unsafe" + + openvision "github.com/bububa/openvision/go" + "github.com/bububa/openvision/go/common" +) + +// Detecter represents Eye Detector interface +type Detecter interface { + common.Estimator + Status(img *common.Image, face common.Rectangle) ([]float64, error) +} + +// Status extract scores using recognizer +func Status(r Detecter, img *common.Image, faceRect common.Rectangle) ([]float64, error) { + imgWidth := img.WidthF64() + imgHeight := img.HeightF64() + data := img.Bytes() + CFeatures := common.NewCFloatVector() + defer common.FreeCFloatVector(CFeatures) + CRect := faceRect.CRect(imgWidth, imgHeight) + errCode := C.eye_status( + (C.IEye)(r.Pointer()), + (*C.uchar)(unsafe.Pointer(&data[0])), + C.int(imgWidth), C.int(imgHeight), + (*C.Rect)(unsafe.Pointer(CRect)), + (*C.FloatVector)(unsafe.Pointer(CFeatures)), + ) + C.free(unsafe.Pointer(CRect)) + if errCode != 0 { + return nil, openvision.RecognizeFaceError(int(errCode)) + } + return common.GoFloatVector(CFeatures), nil +} diff --git a/go/face/eye/doc.go b/go/face/eye/doc.go new file mode 100644 index 0000000..15a80c4 --- /dev/null +++ b/go/face/eye/doc.go @@ -0,0 +1,2 @@ +// Package eye include eye status detector +package eye diff --git a/go/face/eye/lenet.go b/go/face/eye/lenet.go new file mode 100644 index 0000000..a1b4f10 --- /dev/null +++ b/go/face/eye/lenet.go @@ -0,0 +1,45 @@ +package eye + +/* +#include +#include +#include "openvision/face/eye.h" +*/ +import "C" +import ( + "unsafe" + + "github.com/bububa/openvision/go/common" +) + +// Lenet represents Lenet detecter +type Lenet struct { + d C.IEye +} + +// NetLenet returns a new Lenet detecter +func NewLenet() *Lenet { + return &Lenet{ + d: C.new_lenet_eye(), + } +} + +// Pointer implement Estimator interface +func (d *Lenet) Pointer() unsafe.Pointer { + return unsafe.Pointer(d.d) +} + +// LoadModel implement Recognizer interface +func (d *Lenet) LoadModel(modelPath string) error { + return common.EstimatorLoadModel(d, modelPath) +} + +// Destroy implement Recognizer interface +func (d *Lenet) Destroy() { + common.DestroyEstimator(d) +} + +// Status implement Eye Detecter interface +func (d *Lenet) Status(img *common.Image, faceRect common.Rectangle) ([]float64, error) { + return Status(d, img, faceRect) +} diff --git a/go/face/tracker/cgo.go b/go/face/tracker/cgo.go index 0f33239..c64d9fc 100644 --- a/go/face/tracker/cgo.go +++ b/go/face/tracker/cgo.go @@ -1,6 +1,6 @@ // +build !vulkan -package tracker +package eye /* #cgo CXXFLAGS: --std=c++11 -fopenmp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6b87012..99be3f4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -58,6 +58,7 @@ target_include_directories(openvision $ $ + $ $ $ @@ -92,6 +93,7 @@ file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/face/hopenet.h ${CMAKE_CURRENT_SOURCE_DIR}/face/aligner.h ${CMAKE_CURRENT_SOURCE_DIR}/face/hair.h + ${CMAKE_CURRENT_SOURCE_DIR}/face/eye.h DESTINATION ${INCLUDE_OUTPUT_PATH}/openvision/face ) diff --git a/src/face/eye.h b/src/face/eye.h new file mode 100644 index 0000000..89c5072 --- /dev/null +++ b/src/face/eye.h @@ -0,0 +1,17 @@ +#ifndef _FACE_EYE_C_H_ +#define _FACE_EYE_C_H_ + +#include "common.h" + +#ifdef __cplusplus +#include "eye/eye.hpp" +extern "C" { +#endif +typedef void *IEye; +IEye new_lenet_eye(); +int eye_status(IEye d, const unsigned char *rgbdata, int img_width, + int img_height, const Rect *rect, FloatVector *cls_scores); +#ifdef __cplusplus +} +#endif +#endif // !_FACE_HAIR_C_H_ diff --git a/src/face/eye/eye.cpp b/src/face/eye/eye.cpp new file mode 100644 index 0000000..e76b141 --- /dev/null +++ b/src/face/eye/eye.cpp @@ -0,0 +1,20 @@ +#include "../eye.h" +#include "lenet/lenet.hpp" + +IEye new_lenet_eye() { return new ovface::LenetEye(); } + +int eye_status(IEye d, const unsigned char *rgbdata, int img_width, + int img_height, const Rect *rect, FloatVector *cls_scores) { + std::vector scores; + int ret = static_cast(d)->Status(rgbdata, img_width, + img_height, *rect, scores); + if (ret != 0) { + return ret; + } + cls_scores->length = scores.size(); + cls_scores->values = (float *)malloc(cls_scores->length * sizeof(float)); + for (int i = 0; i < cls_scores->length; ++i) { + cls_scores->values[i] = scores[i]; + } + return 0; +} diff --git a/src/face/eye/eye.hpp b/src/face/eye/eye.hpp new file mode 100644 index 0000000..2da098f --- /dev/null +++ b/src/face/eye/eye.hpp @@ -0,0 +1,13 @@ +#ifndef _FACE_EYE_H_ +#define _FACE_EYE_H_ + +#include "../../common/common.hpp" +namespace ovface { +class Eye : public ov::Estimator { +public: + virtual int Status(const unsigned char *rgbdata, int img_width, + int img_height, const ov::Rect rect, + std::vector &cls_scores) = 0; +}; +} // namespace ovface +#endif // !_FACE_EYE_H_ diff --git a/src/face/eye/lenet/lenet.cpp b/src/face/eye/lenet/lenet.cpp new file mode 100644 index 0000000..1332535 --- /dev/null +++ b/src/face/eye/lenet/lenet.cpp @@ -0,0 +1,39 @@ +#include "lenet.hpp" + +#ifdef OV_VULKAN +#include "gpu.h" +#endif // OV_VULKAN + +namespace ovface { +int LenetEye::Status(const unsigned char *rgbdata, int img_width, + int img_height, const ov::Rect rect, + std::vector &cls_scores) { + if (!initialized_) { + return 10000; + } + if (rgbdata == 0) { + return 10001; + } + cls_scores.clear(); + ncnn::Mat in = ncnn::Mat::from_pixels_roi_resize( + rgbdata, ncnn::Mat::PIXEL_RGB2GRAY, img_width, img_height, rect.x, rect.y, + rect.width, rect.height, target_width, target_height); // PIXEL_GRAY + in.substract_mean_normalize(mean_vals, 0); + ncnn::Extractor ex = net_->create_extractor(); + ex.set_light_mode(light_mode_); + ex.set_num_threads(num_threads); + ex.input("data", in); + + ncnn::Mat out; + ex.extract("prob", out); + + cls_scores.resize(out.c); + for (int c = 0; c < out.c; c++) { + const float *prob = out.channel(c); + cls_scores[c] = prob[0]; + } + + return 0; +} + +} // namespace ovface diff --git a/src/face/eye/lenet/lenet.hpp b/src/face/eye/lenet/lenet.hpp new file mode 100644 index 0000000..8bdafaf --- /dev/null +++ b/src/face/eye/lenet/lenet.hpp @@ -0,0 +1,17 @@ +#ifndef _FACE_EYE_LENET_H_ +#define _FACE_EYE_LENET_H_ +#include "../eye.hpp" + +namespace ovface { +class LenetEye : public Eye { +public: + int Status(const unsigned char *rgbdata, int img_width, int img_height, + const ov::Rect rect, std::vector &cls_scores); + +private: + const int target_width = 36; + const int target_height = 28; + const float mean_vals[1] = {60.f}; +}; +} // namespace ovface +#endif // !_FACE_EYE_LENET_H_ From 55680ea57a9ffd91905518caf9bda3df44bf1bf5 Mon Sep 17 00:00:00 2001 From: Syd Xu Date: Fri, 12 Nov 2021 13:49:47 +0800 Subject: [PATCH 4/9] fix(face): eye status goapi --- go/examples/eye/main.go | 115 ++++++++++++++++++++++++++++++++++++++++ go/face/eye/cgo.go | 2 +- go/face/eye/detecter.go | 17 +++--- go/face/eye/lenet.go | 6 +-- 4 files changed, 128 insertions(+), 12 deletions(-) create mode 100644 go/examples/eye/main.go diff --git a/go/examples/eye/main.go b/go/examples/eye/main.go new file mode 100644 index 0000000..99f7f62 --- /dev/null +++ b/go/examples/eye/main.go @@ -0,0 +1,115 @@ +package main + +import ( + "bytes" + "fmt" + "image" + "image/jpeg" + "log" + "os" + "os/user" + "path/filepath" + "strings" + + "github.com/bububa/openvision/go/common" + "github.com/bububa/openvision/go/face/detecter" + "github.com/bububa/openvision/go/face/eye" +) + +func main() { + wd, _ := os.Getwd() + dataPath := cleanPath(wd, "~/go/src/github.com/bububa/openvision/data") + imgPath := filepath.Join(dataPath, "./images") + modelPath := filepath.Join(dataPath, "./models") + common.CreateGPUInstance() + defer common.DestroyGPUInstance() + cpuCores := common.GetBigCPUCount() + common.SetOMPThreads(cpuCores) + log.Printf("CPU big cores:%d\n", cpuCores) + d := retinaface(modelPath) + defer d.Destroy() + common.SetEstimatorThreads(d, cpuCores) + e := lenet(modelPath) + defer e.Destroy() + common.SetEstimatorThreads(e, cpuCores) + for _, fn := range []string{"eye-open.jpg", "eye-close.jpg", "eye-half.jpg"} { + detect(d, e, imgPath, fn) + } +} + +func retinaface(modelPath string) detecter.Detecter { + modelPath = filepath.Join(modelPath, "fd") + d := detecter.NewRetinaFace() + if err := d.LoadModel(modelPath); err != nil { + log.Fatalln(err) + } + return d +} + +func lenet(modelPath string) eye.Detecter { + modelPath = filepath.Join(modelPath, "eye/lenet") + d := eye.NewLenet() + if err := d.LoadModel(modelPath); err != nil { + log.Fatalln(err) + } + return d +} + +func detect(d detecter.Detecter, e eye.Detecter, imgPath string, filename string) { + inPath := filepath.Join(imgPath, filename) + imgLoaded, err := loadImage(inPath) + if err != nil { + log.Fatalln("load image failed,", err) + } + img := common.NewImage(imgLoaded) + faces, err := d.Detect(img) + if err != nil { + log.Fatalln(err) + } + for _, face := range faces { + rect := face.Rect + closed, err := e.IsClosed(img, rect) + if err != nil { + log.Fatalln(err) + } + fmt.Printf("fn: %s, closed: %+v\n", filename, closed) + } +} + +func loadImage(filePath string) (image.Image, error) { + fn, err := os.Open(filePath) + if err != nil { + return nil, err + } + defer fn.Close() + img, _, err := image.Decode(fn) + if err != nil { + return nil, err + } + return img, nil +} + +func saveImage(img image.Image, filePath string) error { + buf := new(bytes.Buffer) + if err := jpeg.Encode(buf, img, nil); err != nil { + return err + } + fn, err := os.Create(filePath) + if err != nil { + return err + } + defer fn.Close() + fn.Write(buf.Bytes()) + return nil +} + +func cleanPath(wd string, path string) string { + usr, _ := user.Current() + dir := usr.HomeDir + if path == "~" { + return dir + } else if strings.HasPrefix(path, "~/") { + return filepath.Join(dir, path[2:]) + } + return filepath.Join(wd, path) +} diff --git a/go/face/eye/cgo.go b/go/face/eye/cgo.go index 05f982e..c64d9fc 100644 --- a/go/face/eye/cgo.go +++ b/go/face/eye/cgo.go @@ -1,6 +1,6 @@ // +build !vulkan -package recognizer +package eye /* #cgo CXXFLAGS: --std=c++11 -fopenmp diff --git a/go/face/eye/detecter.go b/go/face/eye/detecter.go index cd973a2..2d218ce 100644 --- a/go/face/eye/detecter.go +++ b/go/face/eye/detecter.go @@ -17,27 +17,28 @@ import ( // Detecter represents Eye Detector interface type Detecter interface { common.Estimator - Status(img *common.Image, face common.Rectangle) ([]float64, error) + IsClosed(img *common.Image, face common.Rectangle) (bool, error) } -// Status extract scores using recognizer -func Status(r Detecter, img *common.Image, faceRect common.Rectangle) ([]float64, error) { +// IsClosed check whether eyes are closed +func IsClosed(r Detecter, img *common.Image, faceRect common.Rectangle) (bool, error) { imgWidth := img.WidthF64() imgHeight := img.HeightF64() data := img.Bytes() - CFeatures := common.NewCFloatVector() - defer common.FreeCFloatVector(CFeatures) + scoresC := common.NewCFloatVector() + defer common.FreeCFloatVector(scoresC) CRect := faceRect.CRect(imgWidth, imgHeight) errCode := C.eye_status( (C.IEye)(r.Pointer()), (*C.uchar)(unsafe.Pointer(&data[0])), C.int(imgWidth), C.int(imgHeight), (*C.Rect)(unsafe.Pointer(CRect)), - (*C.FloatVector)(unsafe.Pointer(CFeatures)), + (*C.FloatVector)(unsafe.Pointer(scoresC)), ) C.free(unsafe.Pointer(CRect)) if errCode != 0 { - return nil, openvision.RecognizeFaceError(int(errCode)) + return false, openvision.RecognizeFaceError(int(errCode)) } - return common.GoFloatVector(CFeatures), nil + scores := common.GoFloatVector(scoresC) + return len(scores) > 0 && scores[0] == 1, nil } diff --git a/go/face/eye/lenet.go b/go/face/eye/lenet.go index a1b4f10..c784fba 100644 --- a/go/face/eye/lenet.go +++ b/go/face/eye/lenet.go @@ -39,7 +39,7 @@ func (d *Lenet) Destroy() { common.DestroyEstimator(d) } -// Status implement Eye Detecter interface -func (d *Lenet) Status(img *common.Image, faceRect common.Rectangle) ([]float64, error) { - return Status(d, img, faceRect) +// IsClosed implement Eye Detecter interface +func (d *Lenet) IsClosed(img *common.Image, faceRect common.Rectangle) (bool, error) { + return IsClosed(d, img, faceRect) } From 63d088f64a38d779e0c0ff4ed386d47be7366550 Mon Sep 17 00:00:00 2001 From: Syd Xu Date: Fri, 26 Nov 2021 14:28:38 +0800 Subject: [PATCH 5/9] feat(classifier): add svm classifier --- data/font/.gitignore | 3 + go/classifier/doc.go | 2 + go/classifier/svm/binary_classifier.go | 40 + go/classifier/svm/binary_trainer.go | 50 + go/classifier/svm/cgo.go | 12 + go/classifier/svm/cgo_vulkan.go | 12 + go/classifier/svm/classifier.go | 58 + go/classifier/svm/doc.go | 2 + go/classifier/svm/multiclass_classifier.go | 40 + go/classifier/svm/multiclass_trainer.go | 50 + go/classifier/svm/trainer.go | 63 + go/common/color.go | 3 + go/common/font.go | 45 + go/common/geometry.go | 2 + go/common/image.go | 48 + go/error.go | 12 + go/examples/detecter/main.go | 56 +- go/face/drawer/const.go | 2 + go/face/drawer/drawer.go | 8 + go/face/drawer/option.go | 16 + go/face/face_info.go | 2 + src/CMakeLists.txt | 15 + src/classifier/svm/svm_binary_classifier.cpp | 55 + src/classifier/svm/svm_binary_classifier.hpp | 19 + src/classifier/svm/svm_binary_trainer.cpp | 126 + src/classifier/svm/svm_binary_trainer.hpp | 28 + src/classifier/svm/svm_classifier.cpp | 33 + src/classifier/svm/svm_classifier.hpp | 16 + src/classifier/svm/svm_common.hpp | 11 + src/classifier/svm/svm_light/kernel.h | 40 + .../svm/svm_light/pr_loqo/pr_loqo.c | 619 +++ .../svm/svm_light/pr_loqo/pr_loqo.h | 93 + src/classifier/svm/svm_light/svm_classify.c | 198 + src/classifier/svm/svm_light/svm_common.c | 1854 ++++++++ src/classifier/svm/svm_light/svm_common.h | 385 ++ src/classifier/svm/svm_light/svm_hideo.c | 1054 +++++ src/classifier/svm/svm_light/svm_learn.c | 4216 +++++++++++++++++ src/classifier/svm/svm_light/svm_learn.h | 166 + src/classifier/svm/svm_light/svm_learn_main.c | 303 ++ src/classifier/svm/svm_light/svm_loqo.c | 211 + .../svm/svm_multiclass_classifier.cpp | 70 + .../svm/svm_multiclass_classifier.hpp | 21 + src/classifier/svm/svm_multiclass_trainer.cpp | 151 + src/classifier/svm/svm_multiclass_trainer.hpp | 31 + .../svm/svm_struct/svm_struct_classify.c | 186 + .../svm/svm_struct/svm_struct_common.c | 66 + .../svm/svm_struct/svm_struct_common.h | 61 + .../svm/svm_struct/svm_struct_learn.c | 1289 +++++ .../svm/svm_struct/svm_struct_learn.h | 101 + .../svm/svm_struct/svm_struct_main.c | 417 ++ src/classifier/svm/svm_struct_api.c | 615 +++ src/classifier/svm/svm_struct_api.h | 76 + src/classifier/svm/svm_struct_api_types.h | 114 + src/classifier/svm/svm_struct_learn_custom.c | 42 + src/classifier/svm/svm_trainer.cpp | 34 + src/classifier/svm/svm_trainer.hpp | 16 + src/classifier/svm_classifier.h | 19 + src/classifier/svm_trainer.h | 20 + src/common/common.cpp | 567 ++- src/common/common.hpp | 340 +- 60 files changed, 13719 insertions(+), 485 deletions(-) create mode 100644 data/font/.gitignore create mode 100644 go/classifier/doc.go create mode 100644 go/classifier/svm/binary_classifier.go create mode 100644 go/classifier/svm/binary_trainer.go create mode 100644 go/classifier/svm/cgo.go create mode 100644 go/classifier/svm/cgo_vulkan.go create mode 100644 go/classifier/svm/classifier.go create mode 100644 go/classifier/svm/doc.go create mode 100644 go/classifier/svm/multiclass_classifier.go create mode 100644 go/classifier/svm/multiclass_trainer.go create mode 100644 go/classifier/svm/trainer.go create mode 100644 go/common/font.go create mode 100644 src/classifier/svm/svm_binary_classifier.cpp create mode 100644 src/classifier/svm/svm_binary_classifier.hpp create mode 100644 src/classifier/svm/svm_binary_trainer.cpp create mode 100644 src/classifier/svm/svm_binary_trainer.hpp create mode 100644 src/classifier/svm/svm_classifier.cpp create mode 100644 src/classifier/svm/svm_classifier.hpp create mode 100644 src/classifier/svm/svm_common.hpp create mode 100644 src/classifier/svm/svm_light/kernel.h create mode 100644 src/classifier/svm/svm_light/pr_loqo/pr_loqo.c create mode 100644 src/classifier/svm/svm_light/pr_loqo/pr_loqo.h create mode 100644 src/classifier/svm/svm_light/svm_classify.c create mode 100644 src/classifier/svm/svm_light/svm_common.c create mode 100644 src/classifier/svm/svm_light/svm_common.h create mode 100644 src/classifier/svm/svm_light/svm_hideo.c create mode 100644 src/classifier/svm/svm_light/svm_learn.c create mode 100644 src/classifier/svm/svm_light/svm_learn.h create mode 100644 src/classifier/svm/svm_light/svm_learn_main.c create mode 100644 src/classifier/svm/svm_light/svm_loqo.c create mode 100644 src/classifier/svm/svm_multiclass_classifier.cpp create mode 100644 src/classifier/svm/svm_multiclass_classifier.hpp create mode 100644 src/classifier/svm/svm_multiclass_trainer.cpp create mode 100644 src/classifier/svm/svm_multiclass_trainer.hpp create mode 100755 src/classifier/svm/svm_struct/svm_struct_classify.c create mode 100644 src/classifier/svm/svm_struct/svm_struct_common.c create mode 100755 src/classifier/svm/svm_struct/svm_struct_common.h create mode 100755 src/classifier/svm/svm_struct/svm_struct_learn.c create mode 100755 src/classifier/svm/svm_struct/svm_struct_learn.h create mode 100755 src/classifier/svm/svm_struct/svm_struct_main.c create mode 100755 src/classifier/svm/svm_struct_api.c create mode 100755 src/classifier/svm/svm_struct_api.h create mode 100755 src/classifier/svm/svm_struct_api_types.h create mode 100755 src/classifier/svm/svm_struct_learn_custom.c create mode 100644 src/classifier/svm/svm_trainer.cpp create mode 100644 src/classifier/svm/svm_trainer.hpp create mode 100644 src/classifier/svm_classifier.h create mode 100644 src/classifier/svm_trainer.h diff --git a/data/font/.gitignore b/data/font/.gitignore new file mode 100644 index 0000000..94548af --- /dev/null +++ b/data/font/.gitignore @@ -0,0 +1,3 @@ +* +*/ +!.gitignore diff --git a/go/classifier/doc.go b/go/classifier/doc.go new file mode 100644 index 0000000..1b38181 --- /dev/null +++ b/go/classifier/doc.go @@ -0,0 +1,2 @@ +// Package classifier implement different classifiers +package classifier diff --git a/go/classifier/svm/binary_classifier.go b/go/classifier/svm/binary_classifier.go new file mode 100644 index 0000000..6b2c92d --- /dev/null +++ b/go/classifier/svm/binary_classifier.go @@ -0,0 +1,40 @@ +package svm + +/* +#include +#include +#include "openvision/classifier/svm_classifier.h" +*/ +import "C" + +// BinaryClassifier represents svm classifier +type BinaryClassifier struct { + d C.ISVMClassifier +} + +// NewBinaryClassifier returns a new BinaryClassifier +func NewBinaryClassifier() *BinaryClassifier { + return &BinaryClassifier{ + d: C.new_svm_binary_classifier(), + } +} + +// Destroy destroy C.ISVMClassifier +func (t *BinaryClassifier) Destroy() { + DestroyClassifier(t.d) +} + +// LoadModel load model +func (t *BinaryClassifier) LoadModel(modelPath string) { + LoadClassifierModel(t.d, modelPath) +} + +// Predict returns predicted score +func (t *BinaryClassifier) Predict(vec []float32) float64 { + return Predict(t.d, vec) +} + +// Classify returns classifid scores +func (t *BinaryClassifier) Classify(vec []float32) ([]float64, error) { + return Classify(t.d, vec) +} diff --git a/go/classifier/svm/binary_trainer.go b/go/classifier/svm/binary_trainer.go new file mode 100644 index 0000000..9fc5b1e --- /dev/null +++ b/go/classifier/svm/binary_trainer.go @@ -0,0 +1,50 @@ +package svm + +/* +#include +#include +#include "openvision/classifier/svm_trainer.h" +*/ +import "C" + +// BinaryTrainer represents svm trainer +type BinaryTrainer struct { + d C.ISVMTrainer +} + +// NewBinaryTrainer returns a new BinaryTrainer +func NewBinaryTrainer() *BinaryTrainer { + return &BinaryTrainer{ + d: C.new_svm_binary_trainer(), + } +} + +// Destroy destroy C.ISVMTrainer +func (t *BinaryTrainer) Destroy() { + DestroyTrainer(t.d) +} + +// Reset reset C.ISVMTrainer +func (t *BinaryTrainer) Reset() { + ResetTrainer(t.d) +} + +// SetLabels set total labels +func (t *BinaryTrainer) Labels(labels int) { + SetLabels(t.d, labels) +} + +// SetFeatures set total features +func (t *BinaryTrainer) SetFeatures(feats int) { + SetFeatures(t.d, feats) +} + +// AddData add data with label +func (t *BinaryTrainer) AddData(labelID int, feats []float32) { + AddData(t.d, labelID, feats) +} + +// Train train model +func (t *BinaryTrainer) Train(modelPath string) error { + return Train(t.d, modelPath) +} diff --git a/go/classifier/svm/cgo.go b/go/classifier/svm/cgo.go new file mode 100644 index 0000000..13e5cf5 --- /dev/null +++ b/go/classifier/svm/cgo.go @@ -0,0 +1,12 @@ +//go:build !vulkan +// +build !vulkan + +package svm + +/* +#cgo CXXFLAGS: --std=c++11 -fopenmp +#cgo CPPFLAGS: -I ${SRCDIR}/../../../include -I /usr/local/include +#cgo LDFLAGS: -lstdc++ -lncnn -lomp -lopenvision +#cgo LDFLAGS: -L /usr/local/lib -L ${SRCDIR}/../../../lib +*/ +import "C" diff --git a/go/classifier/svm/cgo_vulkan.go b/go/classifier/svm/cgo_vulkan.go new file mode 100644 index 0000000..02df4b6 --- /dev/null +++ b/go/classifier/svm/cgo_vulkan.go @@ -0,0 +1,12 @@ +//go:build vulkan +// +build vulkan + +package svm + +/* +#cgo CXXFLAGS: --std=c++11 -fopenmp +#cgo CPPFLAGS: -I ${SRCDIR}/../../../include -I /usr/local/include +#cgo LDFLAGS: -lstdc++ -lncnn -lomp -lopenvision -lglslang -lvulkan -lSPIRV -lOGLCompiler -lMachineIndependent -lGenericCodeGen -lOSDependent +#cgo LDFLAGS: -L /usr/local/lib -L ${SRCDIR}/../../../lib +*/ +import "C" diff --git a/go/classifier/svm/classifier.go b/go/classifier/svm/classifier.go new file mode 100644 index 0000000..b373643 --- /dev/null +++ b/go/classifier/svm/classifier.go @@ -0,0 +1,58 @@ +package svm + +/* +#include +#include +#include "openvision/classifier/svm_classifier.h" +*/ +import "C" +import ( + "unsafe" + + openvision "github.com/bububa/openvision/go" + "github.com/bububa/openvision/go/common" +) + +// Classifier represents svm classifier +type Classifier interface { + LoadModel(string) + Destroy() + Predict(vec []float32) float64 + Classify(vec []float32) (scores []float64, err error) +} + +// Destroy destroy C.ISVMClassifier +func DestroyClassifier(d C.ISVMClassifier) { + C.destroy_svm_classifier(d) +} + +// LoadModel load model +func LoadClassifierModel(d C.ISVMClassifier, modelPath string) { + cPath := C.CString(modelPath) + defer C.free(unsafe.Pointer(cPath)) + C.svm_classifier_load_model(d, cPath) +} + +func Predict(d C.ISVMClassifier, vec []float32) float64 { + cvals := make([]C.float, 0, len(vec)) + for _, v := range vec { + cvals = append(cvals, C.float(v)) + } + score := C.svm_predict(d, &cvals[0]) + return float64(score) +} + +// Classify returns class scores +func Classify(d C.ISVMClassifier, vec []float32) ([]float64, error) { + cvals := make([]C.float, 0, len(vec)) + for _, v := range vec { + cvals = append(cvals, C.float(v)) + } + cScores := common.NewCFloatVector() + defer common.FreeCFloatVector(cScores) + errCode := C.svm_classify(d, &cvals[0], (*C.FloatVector)(unsafe.Pointer(cScores))) + if errCode != 0 { + return nil, openvision.ClassifyError(int(errCode)) + } + return common.GoFloatVector(cScores), nil +} diff --git a/go/classifier/svm/doc.go b/go/classifier/svm/doc.go new file mode 100644 index 0000000..08f3086 --- /dev/null +++ b/go/classifier/svm/doc.go @@ -0,0 +1,2 @@ +// Package svm implement svm classifier +package svm diff --git a/go/classifier/svm/multiclass_classifier.go b/go/classifier/svm/multiclass_classifier.go new file mode 100644 index 0000000..7ed12b8 --- /dev/null +++ b/go/classifier/svm/multiclass_classifier.go @@ -0,0 +1,40 @@ +package svm + +/* +#include +#include +#include "openvision/classifier/svm_classifier.h" +*/ +import "C" + +// MultiClassClassifier represents svm classifier +type MultiClassClassifier struct { + d C.ISVMClassifier +} + +// NewMultiClassClassifier returns a new MultiClassClassifier +func NewMultiClassClassifier() *MultiClassClassifier { + return &MultiClassClassifier{ + d: C.new_svm_multiclass_classifier(), + } +} + +// Destroy destroy C.ISVMClassifier +func (t *MultiClassClassifier) Destroy() { + DestroyClassifier(t.d) +} + +// LoadModel load model +func (t *MultiClassClassifier) LoadModel(modelPath string) { + LoadClassifierModel(t.d, modelPath) +} + +// Predict returns predicted score +func (t *MultiClassClassifier) Predict(vec []float32) float64 { + return Predict(t.d, vec) +} + +// Classify returns classifid scores +func (t *MultiClassClassifier) Classify(vec []float32) ([]float64, error) { + return Classify(t.d, vec) +} diff --git a/go/classifier/svm/multiclass_trainer.go b/go/classifier/svm/multiclass_trainer.go new file mode 100644 index 0000000..d47f801 --- /dev/null +++ b/go/classifier/svm/multiclass_trainer.go @@ -0,0 +1,50 @@ +package svm + +/* +#include +#include +#include "openvision/classifier/svm_trainer.h" +*/ +import "C" + +// MultiClassTrainer represents svm trainer +type MultiClassTrainer struct { + d C.ISVMTrainer +} + +// NewMultiClassTrainer returns a new MultiClassTrainer +func NewMultiClassTrainer() *MultiClassTrainer { + return &MultiClassTrainer{ + d: C.new_svm_multiclass_trainer(), + } +} + +// Destroy destroy C.ISVMTrainer +func (t *MultiClassTrainer) Destroy() { + DestroyTrainer(t.d) +} + +// Reset reset C.ISVMTrainer +func (t *MultiClassTrainer) Reset() { + ResetTrainer(t.d) +} + +// SetLabels set total labels +func (t *MultiClassTrainer) SetLabels(labels int) { + SetLabels(t.d, labels) +} + +// SetFeatures set total features +func (t *MultiClassTrainer) SetFeatures(feats int) { + SetFeatures(t.d, feats) +} + +// AddData add data with label +func (t *MultiClassTrainer) AddData(labelID int, feats []float32) { + AddData(t.d, labelID, feats) +} + +// Train train model +func (t *MultiClassTrainer) Train(modelPath string) error { + return Train(t.d, modelPath) +} diff --git a/go/classifier/svm/trainer.go b/go/classifier/svm/trainer.go new file mode 100644 index 0000000..a9442f6 --- /dev/null +++ b/go/classifier/svm/trainer.go @@ -0,0 +1,63 @@ +package svm + +/* +#include +#include +#include "openvision/classifier/svm_trainer.h" +*/ +import "C" +import ( + "unsafe" + + openvision "github.com/bububa/openvision/go" +) + +// Trainer represents svm trainer +type Trainer interface { + Reset() + Destroy() + SetLabels(int) + SetFeatures(int) + AddData(int, []float32) + Train(modelPath string) error +} + +// DestroyTrainer destroy C.ISVMTrainer +func DestroyTrainer(d C.ISVMTrainer) { + C.destroy_svm_trainer(d) +} + +// ResetTrainer reset C.ISVMTrainer +func ResetTrainer(d C.ISVMTrainer) { + C.svm_trainer_reset(d) +} + +// SetLabels set total labels +func SetLabels(d C.ISVMTrainer, labels int) { + C.svm_trainer_set_labels(d, C.int(labels)) +} + +// SetFeatures set total features +func SetFeatures(d C.ISVMTrainer, feats int) { + C.svm_trainer_set_features(d, C.int(feats)) +} + +// AddData add data with label +func AddData(d C.ISVMTrainer, labelID int, feats []float32) { + vec := make([]C.float, 0, len(feats)) + for _, v := range feats { + vec = append(vec, C.float(v)) + } + C.svm_trainer_add_data(d, C.int(labelID), &vec[0]) +} + +// Train train model +func Train(d C.ISVMTrainer, modelPath string) error { + cPath := C.CString(modelPath) + defer C.free(unsafe.Pointer(cPath)) + errCode := C.svm_train(d, cPath) + if errCode != 0 { + return openvision.TrainingError(int(errCode)) + } + return nil +} diff --git a/go/common/color.go b/go/common/color.go index 734329f..020250e 100644 --- a/go/common/color.go +++ b/go/common/color.go @@ -38,6 +38,9 @@ func parseHexColor(x string) (r, g, b, a uint32) { } const ( + White = "#FFFFFF" + Black = "#000000" + Gray = "#333333" Green = "#64DD17" Pink = "#E91E63" Red = "#FF1744" diff --git a/go/common/font.go b/go/common/font.go new file mode 100644 index 0000000..94a145a --- /dev/null +++ b/go/common/font.go @@ -0,0 +1,45 @@ +package common + +import ( + "errors" + + "github.com/golang/freetype/truetype" + "github.com/llgcode/draw2d" +) + +// Font font info +type Font struct { + // Cache FontCache + Cache draw2d.FontCache + // Size font size + Size float64 `json:"size,omitempty"` + // Data font setting + Data *draw2d.FontData `json:"data,omitempty"` + // Font + Font *truetype.Font `json:"-"` +} + +// Load font from font cache +func (f *Font) Load(cache draw2d.FontCache) error { + if f.Font != nil { + return nil + } + if f.Data == nil { + return nil + } + if cache == nil { + return errors.New("missing font cache") + } + ft, err := cache.Load(*f.Data) + if err != nil { + return err + } + f.Cache = cache + f.Font = ft + return nil +} + +// NewFontCache load font cache +func NewFontCache(fontFolder string) *draw2d.SyncFolderFontCache { + return draw2d.NewSyncFolderFontCache(fontFolder) +} diff --git a/go/common/geometry.go b/go/common/geometry.go index e5e618a..c803e4a 100644 --- a/go/common/geometry.go +++ b/go/common/geometry.go @@ -60,6 +60,8 @@ func GoRect(c *C.Rect, w float64, h float64) Rectangle { var ZR = Rectangle{} +var FullRect = Rect(0, 0, 1, 1) + // Point represents a Point type Point struct { X float64 diff --git a/go/common/image.go b/go/common/image.go index a37a1da..ed6e42a 100644 --- a/go/common/image.go +++ b/go/common/image.go @@ -36,6 +36,13 @@ func NewImage(img image.Image) *Image { } } +func (i *Image) Reset() { + i.Image = nil + if i.buffer != nil { + i.buffer.Reset() + } +} + // Write write bytes to buffer func (i *Image) Write(b []byte) { if i.buffer == nil { @@ -185,3 +192,44 @@ func DrawCircle(gc *draw2dimg.GraphicContext, pt Point, r float64, borderColor s gc.Stroke() } } + +// DrawLabel draw label text to image +func DrawLabel(gc *draw2dimg.GraphicContext, font *Font, label string, pt Point, txtColor string, bgColor string, scale float64) { + if font == nil || font.Cache == nil || font.Data == nil { + return + } + gc.FontCache = font.Cache + gc.SetFontData(*font.Data) + gc.SetFontSize(font.Size * scale) + var ( + x = float64(pt.X) + y = float64(pt.Y) + padding = 2.0 * scale + ) + left, top, right, bottom := gc.GetStringBounds(label) + height := bottom - top + width := right - left + if bgColor != "" { + gc.SetFillColor(ColorFromHex(bgColor)) + draw2dkit.Rectangle(gc, x, y, x+width+padding*2, y+height+padding*2) + gc.Fill() + } + gc.SetFillColor(ColorFromHex(txtColor)) + gc.FillStringAt(label, x-left+padding, y-top+padding) +} + +// DrawLabelInWidth draw label text to image in width restrict +func DrawLabelInWidth(gc *draw2dimg.GraphicContext, font *Font, label string, pt Point, txtColor string, bgColor string, boundWidth float64) { + if font == nil || font.Cache == nil || font.Data == nil { + return + } + gc.FontCache = font.Cache + gc.SetFontData(*font.Data) + gc.SetFontSize(font.Size) + left, _, right, _ := gc.GetStringBounds(label) + padding := 2.0 + width := right - left + fontWidth := width + padding*2 + scale := boundWidth / fontWidth + DrawLabelInWidth(gc, font, label, pt, txtColor, bgColor, scale) +} diff --git a/go/error.go b/go/error.go index 4ce1551..1126403 100644 --- a/go/error.go +++ b/go/error.go @@ -92,4 +92,16 @@ var ( Message: "super-resolution process error", } } + TrainingError = func(code int) Error { + return Error{ + Code: code, + Message: "training process failed", + } + } + ClassifyError = func(code int) Error { + return Error{ + Code: code, + Message: "classify process failed", + } + } ) diff --git a/go/examples/detecter/main.go b/go/examples/detecter/main.go index 694ab33..ddb0d0d 100644 --- a/go/examples/detecter/main.go +++ b/go/examples/detecter/main.go @@ -9,10 +9,14 @@ import ( "os" "os/user" "path/filepath" + "strconv" "strings" + "github.com/llgcode/draw2d" + "github.com/bububa/openvision/go/common" "github.com/bububa/openvision/go/face/detecter" + "github.com/bububa/openvision/go/face/drawer" facedrawer "github.com/bububa/openvision/go/face/drawer" ) @@ -21,16 +25,35 @@ func main() { dataPath := cleanPath(wd, "~/go/src/github.com/bububa/openvision/data") imgPath := filepath.Join(dataPath, "./images") modelPath := filepath.Join(dataPath, "./models") + fontPath := filepath.Join(dataPath, "./font") common.CreateGPUInstance() defer common.DestroyGPUInstance() cpuCores := common.GetBigCPUCount() common.SetOMPThreads(cpuCores) log.Printf("CPU big cores:%d\n", cpuCores) - test_detect(imgPath, modelPath, cpuCores) - test_mask(imgPath, modelPath, cpuCores) + test_detect(imgPath, modelPath, fontPath, cpuCores) + test_mask(imgPath, modelPath, fontPath, cpuCores) } -func test_detect(imgPath string, modelPath string, threads int) { +func load_font(fontPath string) *common.Font { + fontCache := common.NewFontCache(fontPath) + fnt := &common.Font{ + Size: 9, + Data: &draw2d.FontData{ + Name: "NotoSansCJKsc", + //Name: "Roboto", + Family: draw2d.FontFamilySans, + Style: draw2d.FontStyleNormal, + }, + } + if err := fnt.Load(fontCache); err != nil { + log.Fatalln(err) + } + return fnt +} +func test_detect(imgPath string, modelPath string, fontPath string, threads int) { + fnt := load_font(fontPath) + drawer := facedrawer.New(drawer.WithFont(fnt)) for idx, d := range []detecter.Detecter{ retinaface(modelPath), centerface(modelPath), @@ -39,16 +62,22 @@ func test_detect(imgPath string, modelPath string, threads int) { scrfd(modelPath), } { common.SetEstimatorThreads(d, threads) - detect(d, imgPath, idx, "4.jpg", false) + detect(d, drawer, imgPath, idx, "4.jpg") d.Destroy() } } -func test_mask(imgPath string, modelPath string, threads int) { +func test_mask(imgPath string, modelPath string, fontPath string, threads int) { + fnt := load_font(fontPath) + drawer := facedrawer.New( + facedrawer.WithBorderColor(common.Red), + facedrawer.WithMaskColor(common.Green), + facedrawer.WithFont(fnt), + ) d := anticonv(modelPath) common.SetEstimatorThreads(d, threads) defer d.Destroy() - detect(d, imgPath, 0, "mask3.jpg", true) + detect(d, drawer, imgPath, 0, "mask3.jpg") } func retinaface(modelPath string) detecter.Detecter { @@ -105,7 +134,7 @@ func anticonv(modelPath string) detecter.Detecter { return d } -func detect(d detecter.Detecter, imgPath string, idx int, filename string, mask bool) { +func detect(d detecter.Detecter, drawer *facedrawer.Drawer, imgPath string, idx int, filename string) { inPath := filepath.Join(imgPath, filename) img, err := loadImage(inPath) if err != nil { @@ -116,18 +145,11 @@ func detect(d detecter.Detecter, imgPath string, idx int, filename string, mask log.Fatalln(err) } - outPath := filepath.Join(imgPath, "./results", fmt.Sprintf("%d-%s", idx, filename)) - - var drawer *facedrawer.Drawer - if mask { - drawer = facedrawer.New( - facedrawer.WithBorderColor(common.Red), - facedrawer.WithMaskColor(common.Green), - ) - } else { - drawer = facedrawer.New() + for idx, face := range faces { + faces[idx].Label = strconv.FormatFloat(float64(face.Score), 'f', 4, 64) } + outPath := filepath.Join(imgPath, "./results", fmt.Sprintf("%d-%s", idx, filename)) out := drawer.Draw(img, faces) if err := saveImage(out, outPath); err != nil { diff --git a/go/face/drawer/const.go b/go/face/drawer/const.go index d96d45c..63c8cd3 100644 --- a/go/face/drawer/const.go +++ b/go/face/drawer/const.go @@ -17,4 +17,6 @@ const ( DefaultKeypointStrokeWidth = 2 // DefaultInvalidBorderColor default drawer invalid border color DefaultInvalidBorderColor = common.Red + // DefaultLabelColor default label color + DefaultLabelColor = common.White ) diff --git a/go/face/drawer/drawer.go b/go/face/drawer/drawer.go index 2db51c8..339fb02 100644 --- a/go/face/drawer/drawer.go +++ b/go/face/drawer/drawer.go @@ -26,6 +26,10 @@ type Drawer struct { MaskColor string // InvalidBorderColor InvalidBorderColor string + // LabelColor string + LabelColor string + // Font + Font *common.Font } // New returns a new Drawer @@ -38,6 +42,7 @@ func New(options ...Option) *Drawer { KeypointRadius: DefaultKeypointRadius, InvalidBorderColor: DefaultInvalidBorderColor, MaskColor: DefaultBorderColor, + LabelColor: DefaultLabelColor, } for _, opt := range options { opt.apply(d) @@ -69,6 +74,9 @@ func (d *Drawer) Draw(img image.Image, faces []face.FaceInfo) image.Image { for _, pt := range face.Keypoints { common.DrawCircle(gc, common.Pt(pt.X*imgW, pt.Y*imgH), d.KeypointRadius, d.KeypointColor, "", d.KeypointStrokeWidth) } + if face.Label != "" { + common.DrawLabelInWidth(gc, d.Font, face.Label, common.Pt(rect.X, rect.MaxY()), d.LabelColor, borderColor, rect.Width) + } } return out } diff --git a/go/face/drawer/option.go b/go/face/drawer/option.go index a7fc784..9fabc81 100644 --- a/go/face/drawer/option.go +++ b/go/face/drawer/option.go @@ -1,5 +1,7 @@ package drawer +import "github.com/bububa/openvision/go/common" + // Option represents Drawer option interface type Option interface { apply(*Drawer) @@ -59,3 +61,17 @@ func WithMaskColor(color string) Option { d.MaskColor = color }) } + +// WithLabelColor set Drawer LabelColor +func WithLabelColor(color string) Option { + return optionFunc(func(d *Drawer) { + d.LabelColor = color + }) +} + +// WithFont set Drawer Font +func WithFont(font *common.Font) Option { + return optionFunc(func(d *Drawer) { + d.Font = font + }) +} diff --git a/go/face/face_info.go b/go/face/face_info.go index ed93f0d..66dd404 100644 --- a/go/face/face_info.go +++ b/go/face/face_info.go @@ -22,6 +22,8 @@ type FaceInfo struct { Keypoints [5]common.Point // Mask has mask or not Mask bool + // Label + Label string } // GoFaceInfo convert c FaceInfo to go type diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 99be3f4..f6312ed 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,6 +1,7 @@ file(GLOB_RECURSE SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/*.cxx + ${CMAKE_CURRENT_SOURCE_DIR}/*.c ) message(${SRC_FILES}) @@ -10,6 +11,11 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O2 -fPIC -std=c++11 -fopenmp") add_library(openvision STATIC ${SRC_FILES}) target_link_libraries(openvision PUBLIC ncnn) +# set(CMAKE_PREFIX_PATH "${CMAKE_PREFIX_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/../libtorch/share/cmake/Torch") +# find_package(Torch REQUIRED) +# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") +# target_link_libraries(openvision PUBLIC ${TORCH_LIBRARIES}) + if(OV_OPENMP) find_package(OpenMP) if(NOT TARGET OpenMP::OpenMP_CXX AND (OpenMP_CXX_FOUND OR OPENMP_FOUND)) @@ -76,6 +82,9 @@ target_include_directories(openvision $ $ + + $ + $ ) #install(TARGETS openvision EXPORT openvision ARCHIVE DESTINATION ${LIBRARY_OUTPUT_PATH}) @@ -124,3 +133,9 @@ file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/counter/counter.h DESTINATION ${INCLUDE_OUTPUT_PATH}/openvision/counter ) + +file(COPY + ${CMAKE_CURRENT_SOURCE_DIR}/classifier/svm_trainer.h + ${CMAKE_CURRENT_SOURCE_DIR}/classifier/svm_classifier.h + DESTINATION ${INCLUDE_OUTPUT_PATH}/openvision/classifier +) diff --git a/src/classifier/svm/svm_binary_classifier.cpp b/src/classifier/svm/svm_binary_classifier.cpp new file mode 100644 index 0000000..6bbd531 --- /dev/null +++ b/src/classifier/svm/svm_binary_classifier.cpp @@ -0,0 +1,55 @@ +#include "svm_binary_classifier.hpp" +#include "svm_light/svm_common.h" + +namespace ovclassifier { +SVMBinaryClassifier::SVMBinaryClassifier() {} + +SVMBinaryClassifier::~SVMBinaryClassifier() { + if (model_ != NULL) { + free_model(model_, 1); + model_ = NULL; + } +} + +int SVMBinaryClassifier::LoadModel(const char *modelfile) { + if (model_ != NULL) { + free_model(model_, 1); + } + model_ = (MODEL *)my_malloc(sizeof(MODEL)); + model_ = read_model((char *)modelfile); + if (model_->kernel_parm.kernel_type == 0) { /* linear kernel */ + /* compute weight vector */ + add_weight_vector_to_linear_model(model_); + } + return 0; +} + +double SVMBinaryClassifier::Predict(const float *vec) { + WORD *words = (WORD *)malloc(sizeof(WORD) * (model_->totwords + 10)); + for (int i = 0; i < (model_->totwords + 10); ++i) { + if (i >= model_->totwords) { + words[i].wnum = 0; + words[i].weight = 0; + } else { + words[i].wnum = i + 1; + words[i].weight = vec[i]; + } + } + DOC *doc = + create_example(-1, 0, 0, 0.0, create_svector(words, (char *)"", 1.0)); + free(words); + double dist; + if (model_->kernel_parm.kernel_type == 0) { + dist = classify_example_linear(model_, doc); + } else { + dist = classify_example(model_, doc); + } + free_example(doc, 1); + return dist; +} + +int SVMBinaryClassifier::Classify(const float *vec, + std::vector &scores) { + return -1; +} +} // namespace ovclassifier diff --git a/src/classifier/svm/svm_binary_classifier.hpp b/src/classifier/svm/svm_binary_classifier.hpp new file mode 100644 index 0000000..169973e --- /dev/null +++ b/src/classifier/svm/svm_binary_classifier.hpp @@ -0,0 +1,19 @@ +#ifndef _CLASSIFIER_SVM_BINARY_CLASSIFIER_H_ +#define _CLASSIFIER_SVM_BINARY_CLASSIFIER_H_ + +#include "svm_classifier.hpp" + +namespace ovclassifier { +class SVMBinaryClassifier : public SVMClassifier { +public: + SVMBinaryClassifier(); + ~SVMBinaryClassifier(); + int LoadModel(const char *modelfile); + double Predict(const float *vec); + int Classify(const float *vec, std::vector &scores); + +private: + MODEL *model_ = NULL; +}; +} // namespace ovclassifier +#endif // !_CLASSIFIER_SVM_BINARY_CLASSIFIER_H_ diff --git a/src/classifier/svm/svm_binary_trainer.cpp b/src/classifier/svm/svm_binary_trainer.cpp new file mode 100644 index 0000000..525c089 --- /dev/null +++ b/src/classifier/svm/svm_binary_trainer.cpp @@ -0,0 +1,126 @@ +#include "svm_binary_trainer.hpp" +#include "svm_light/svm_common.h" +#include "svm_light/svm_learn.h" + +namespace ovclassifier { + +SVMBinaryTrainer::SVMBinaryTrainer() { + learn_parm = (LEARN_PARM *)malloc(sizeof(LEARN_PARM)); + strcpy(learn_parm->predfile, "trans_predictions"); + strcpy(learn_parm->alphafile, ""); + learn_parm->biased_hyperplane = 1; + learn_parm->sharedslack = 0; + learn_parm->remove_inconsistent = 0; + learn_parm->skip_final_opt_check = 0; + learn_parm->svm_maxqpsize = 10; + learn_parm->svm_newvarsinqp = 0; + learn_parm->svm_iter_to_shrink = -9999; + learn_parm->maxiter = 100000; + learn_parm->kernel_cache_size = 40; + learn_parm->svm_c = 0.0; + learn_parm->eps = 0.1; + learn_parm->transduction_posratio = -1.0; + learn_parm->svm_costratio = 1.0; + learn_parm->svm_costratio_unlab = 1.0; + learn_parm->svm_unlabbound = 1E-5; + learn_parm->epsilon_crit = 0.001; + learn_parm->epsilon_a = 1E-15; + learn_parm->compute_loo = 0; + learn_parm->rho = 1.0; + learn_parm->xa_depth = 0; + kernel_parm = (KERNEL_PARM *)malloc(sizeof(KERNEL_PARM)); + kernel_parm->kernel_type = 0; + kernel_parm->poly_degree = 3; + kernel_parm->rbf_gamma = 1.0; + kernel_parm->coef_lin = 1; + kernel_parm->coef_const = 1; + strcpy(kernel_parm->custom, "empty"); +} + +SVMBinaryTrainer::~SVMBinaryTrainer() { + if (learn_parm != NULL) { + free(learn_parm); + learn_parm = NULL; + } + if (kernel_parm != NULL) { + free(kernel_parm); + kernel_parm = NULL; + } + if (kernel_cache != NULL) { + kernel_cache_cleanup(kernel_cache); + kernel_cache = NULL; + } +} + +void SVMBinaryTrainer::Reset() { + feats_ = 0; + items_.clear(); + if (kernel_cache != NULL) { + kernel_cache_cleanup(kernel_cache); + } +} + +void SVMBinaryTrainer::SetLabels(int labels) {} + +void SVMBinaryTrainer::SetFeatures(int feats) { feats_ = feats; } + +void SVMBinaryTrainer::AddData(int label, const float *vec) { + if (label != 1 && label != -1) { + return; + } + LabelItem itm; + itm.label = label; + for (int i = 0; i < feats_; ++i) { + itm.vec.push_back(vec[i]); + } + items_.push_back(itm); +} + +int SVMBinaryTrainer::Train(const char *modelfile) { + int totdoc = items_.size(); + if (totdoc == 0 || feats_ == 0) { + return -1; + } + kernel_cache = kernel_cache_init(totdoc, learn_parm->kernel_cache_size); + double *labels = (double *)malloc(sizeof(double) * totdoc); + double *alphas = (double *)malloc(sizeof(double) * totdoc); + DOC **docs = (DOC **)malloc(sizeof(DOC *) * totdoc); + WORD *words = (WORD *)malloc(sizeof(WORD) * (feats_ + 10)); + for (int dnum = 0; dnum < totdoc; ++dnum) { + const int docFeats = items_[dnum].vec.size(); + for (int i = 0; i < (feats_ + 10); ++i) { + if (i >= feats_) { + words[i].wnum = 0; + } else { + words[i].wnum = i + 1; + } + if (i >= docFeats) { + words[i].weight = 0; + } else { + words[i].weight = items_[dnum].vec[i]; + } + } + labels[dnum] = items_[dnum].label; + docs[dnum] = + create_example(dnum, 0, 0, 0, create_svector(words, (char *)"", 1.0)); + } + free(words); + + MODEL *model_ = (MODEL *)malloc(sizeof(MODEL)); + svm_learn_classification(docs, labels, (long int)totdoc, (long int)feats_, + learn_parm, kernel_parm, kernel_cache, model_, + alphas); + write_model((char *)modelfile, model_); + free(labels); + labels = NULL; + free(alphas); + alphas = NULL; + for (int i = 0; i < totdoc; i++) { + free_example(docs[i], 1); + } + free_model(model_, 0); + model_ = NULL; + return 0; +} + +} // namespace ovclassifier diff --git a/src/classifier/svm/svm_binary_trainer.hpp b/src/classifier/svm/svm_binary_trainer.hpp new file mode 100644 index 0000000..b950520 --- /dev/null +++ b/src/classifier/svm/svm_binary_trainer.hpp @@ -0,0 +1,28 @@ +#ifndef _SVM_BINARY_TRAINER_H_ +#define _SVM_BINARY_TRAINER_H_ + +#include "svm_common.hpp" +#include "svm_light/svm_common.h" +#include "svm_trainer.hpp" +#include + +namespace ovclassifier { +class SVMBinaryTrainer : public SVMTrainer { +public: + SVMBinaryTrainer(); + ~SVMBinaryTrainer(); + void Reset(); + void SetLabels(int labels); + void SetFeatures(int feats); + void AddData(int label, const float *vec); + int Train(const char *modelfile); + +private: + KERNEL_PARM *kernel_parm = NULL; + LEARN_PARM *learn_parm = NULL; + KERNEL_CACHE *kernel_cache = NULL; + int feats_; + std::vector items_; +}; +} // namespace ovclassifier +#endif // _SVM_BINARY_TRAINER_H_ diff --git a/src/classifier/svm/svm_classifier.cpp b/src/classifier/svm/svm_classifier.cpp new file mode 100644 index 0000000..7b54b29 --- /dev/null +++ b/src/classifier/svm/svm_classifier.cpp @@ -0,0 +1,33 @@ +#include "../svm_classifier.h" +#include "svm_binary_classifier.hpp" +#include "svm_multiclass_classifier.hpp" + +ISVMClassifier new_svm_binary_classifier() { + return new ovclassifier::SVMBinaryClassifier(); +} +ISVMClassifier new_svm_multiclass_classifier() { + return new ovclassifier::SVMMultiClassClassifier(); +} +void destroy_svm_classifier(ISVMClassifier e) { + delete static_cast(e); +} +int svm_classifier_load_model(ISVMClassifier e, const char *modelfile) { + return static_cast(e)->LoadModel(modelfile); +} +double svm_predict(ISVMClassifier e, const float *vec) { + return static_cast(e)->Predict(vec); +} +int svm_classify(ISVMClassifier e, const float *vec, FloatVector *scores) { + std::vector scores_; + int ret = + static_cast(e)->Classify(vec, scores_); + if (ret != 0) { + return ret; + } + scores->length = scores_.size(); + scores->values = (float *)malloc(sizeof(float) * scores->length); + for (int i = 0; i < scores->length; ++i) { + scores->values[i] = scores_[i]; + } + return 0; +} diff --git a/src/classifier/svm/svm_classifier.hpp b/src/classifier/svm/svm_classifier.hpp new file mode 100644 index 0000000..30d8392 --- /dev/null +++ b/src/classifier/svm/svm_classifier.hpp @@ -0,0 +1,16 @@ +#ifndef _CLASSIFIER_SVM_CLASSIFIER_H_ +#define _CLASSIFIER_SVM_CLASSIFIER_H_ + +#include "svm_light/svm_common.h" +#include + +namespace ovclassifier { +class SVMClassifier { +public: + virtual ~SVMClassifier(){}; + virtual int LoadModel(const char *modelfile) = 0; + virtual double Predict(const float *vec) = 0; + virtual int Classify(const float *vec, std::vector &scores) = 0; +}; +} // namespace ovclassifier +#endif // !_CLASSIFIER_SVM_CLASSIFIER_H_ diff --git a/src/classifier/svm/svm_common.hpp b/src/classifier/svm/svm_common.hpp new file mode 100644 index 0000000..917ef91 --- /dev/null +++ b/src/classifier/svm/svm_common.hpp @@ -0,0 +1,11 @@ +#ifndef _SVM_COMMON_HPP_ +#define _SVM_COMMON_HPP_ + +#include +namespace ovclassifier { +struct LabelItem { + int label; + std::vector vec; +}; +} // namespace ovclassifier +#endif // !_SVM_COMMON_HPP_ diff --git a/src/classifier/svm/svm_light/kernel.h b/src/classifier/svm/svm_light/kernel.h new file mode 100644 index 0000000..33ddf78 --- /dev/null +++ b/src/classifier/svm/svm_light/kernel.h @@ -0,0 +1,40 @@ +/************************************************************************/ +/* */ +/* kernel.h */ +/* */ +/* User defined kernel function. Feel free to plug in your own. */ +/* */ +/* Copyright: Thorsten Joachims */ +/* Date: 16.12.97 */ +/* */ +/************************************************************************/ + +/* KERNEL_PARM is defined in svm_common.h The field 'custom' is reserved for */ +/* parameters of the user defined kernel. You can also access and use */ +/* the parameters of the other kernels. Just replace the line + return((double)(1.0)); + with your own kernel. */ + + /* Example: The following computes the polynomial kernel. sprod_ss + computes the inner product between two sparse vectors. + + return((CFLOAT)pow(kernel_parm->coef_lin*sprod_ss(a,b) + +kernel_parm->coef_const,(double)kernel_parm->poly_degree)); + */ + +/* If you are implementing a kernel that is not based on a + feature/value representation, you might want to make use of the + field "userdefined" in SVECTOR. By default, this field will contain + whatever string you put behind a # sign in the example file. So, if + a line in your training file looks like + + -1 1:3 5:6 #abcdefg + + then the SVECTOR field "words" will contain the vector 1:3 5:6, and + "userdefined" will contain the string "abcdefg". */ + +double custom_kernel(KERNEL_PARM *kernel_parm, SVECTOR *a, SVECTOR *b) + /* plug in you favorite kernel */ +{ + return((double)(1.0)); +} diff --git a/src/classifier/svm/svm_light/pr_loqo/pr_loqo.c b/src/classifier/svm/svm_light/pr_loqo/pr_loqo.c new file mode 100644 index 0000000..6fb4e96 --- /dev/null +++ b/src/classifier/svm/svm_light/pr_loqo/pr_loqo.c @@ -0,0 +1,619 @@ +/* + * File: pr_loqo.c + * Purpose: solves quadratic programming problem for pattern recognition + * for support vectors + * + * Author: Alex J. Smola + * Created: 10/14/97 + * Updated: 11/08/97 + * Updated: 13/08/98 (removed exit(1) as it crashes svm lite when the margin + * in a not sufficiently conservative manner) + * + * + * Copyright (c) 1997 GMD Berlin - All rights reserved + * THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE of GMD Berlin + * The copyright notice above does not evidence any + * actual or intended publication of this work. + * + * Unauthorized commercial use of this software is not allowed + */ + +#include "pr_loqo.h" +#include +#include +#include +#include + +#define max(A, B) ((A) > (B) ? (A) : (B)) +#define min(A, B) ((A) < (B) ? (A) : (B)) +#define sqr(A) ((A) * (A)) +#define ABS(A) ((A) > 0 ? (A) : (-(A))) + +#define PREDICTOR 1 +#define CORRECTOR 2 + +/***************************************************************** + replace this by any other function that will exit gracefully + in a larger system + ***************************************************************/ + +void nrerror(char error_text[]) { + printf("ERROR: terminating optimizer - %s\n", error_text); + /* exit(1); */ +} + +/***************************************************************** + taken from numerical recipes and modified to accept pointers + moreover numerical recipes code seems to be buggy (at least the + ones on the web) + + cholesky solver and backsubstitution + leaves upper right triangle intact (rows first order) + ***************************************************************/ + +void choldc(double a[], int n, double p[]) { + void nrerror(char error_text[]); + int i, j, k; + double sum; + + for (i = 0; i < n; i++) { + for (j = i; j < n; j++) { + sum = a[n * i + j]; + for (k = i - 1; k >= 0; k--) + sum -= a[n * i + k] * a[n * j + k]; + if (i == j) { + if (sum <= 0.0) { + nrerror((char *)"choldc failed, matrix not positive definite"); + sum = 0.0; + } + p[i] = sqrt(sum); + } else + a[n * j + i] = sum / p[i]; + } + } +} + +void cholsb(double a[], int n, double p[], double b[], double x[]) { + int i, k; + double sum; + + for (i = 0; i < n; i++) { + sum = b[i]; + for (k = i - 1; k >= 0; k--) + sum -= a[n * i + k] * x[k]; + x[i] = sum / p[i]; + } + + for (i = n - 1; i >= 0; i--) { + sum = x[i]; + for (k = i + 1; k < n; k++) + sum -= a[n * k + i] * x[k]; + x[i] = sum / p[i]; + } +} + +/***************************************************************** + sometimes we only need the forward or backward pass of the + backsubstitution, hence we provide these two routines separately + ***************************************************************/ + +void chol_forward(double a[], int n, double p[], double b[], double x[]) { + int i, k; + double sum; + + for (i = 0; i < n; i++) { + sum = b[i]; + for (k = i - 1; k >= 0; k--) + sum -= a[n * i + k] * x[k]; + x[i] = sum / p[i]; + } +} + +void chol_backward(double a[], int n, double p[], double b[], double x[]) { + int i, k; + double sum; + + for (i = n - 1; i >= 0; i--) { + sum = b[i]; + for (k = i + 1; k < n; k++) + sum -= a[n * k + i] * x[k]; + x[i] = sum / p[i]; + } +} + +/***************************************************************** + solves the system | -H_x A' | |x_x| = |c_x| + | A H_y| |x_y| |c_y| + + with H_x (and H_y) positive (semidefinite) matrices + and n, m the respective sizes of H_x and H_y + + for variables see pg. 48 of notebook or do the calculations on a + sheet of paper again + + predictor solves the whole thing, corrector assues that H_x didn't + change and relies on the results of the predictor. therefore do + _not_ modify workspace + + if you want to speed tune anything in the code here's the right + place to do so: about 95% of the time is being spent in + here. something like an iterative refinement would be nice, + especially when switching from double to single precision. if you + have a fast parallel cholesky use it instead of the numrec + implementations. + + side effects: changes H_y (but this is just the unit matrix or zero anyway + in our case) + ***************************************************************/ + +void solve_reduced(int n, int m, double h_x[], double h_y[], double a[], + double x_x[], double x_y[], double c_x[], double c_y[], + double workspace[], int step) { + int i, j, k; + + double *p_x; + double *p_y; + double *t_a; + double *t_c; + double *t_y; + + p_x = workspace; /* together n + m + n*m + n + m = n*(m+2)+2*m */ + p_y = p_x + n; + t_a = p_y + m; + t_c = t_a + n * m; + t_y = t_c + n; + + if (step == PREDICTOR) { + choldc(h_x, n, p_x); /* do cholesky decomposition */ + + for (i = 0; i < m; i++) /* forward pass for A' */ + chol_forward(h_x, n, p_x, a + i * n, t_a + i * n); + + for (i = 0; i < m; i++) /* compute (h_y + a h_x^-1A') */ + for (j = i; j < m; j++) + for (k = 0; k < n; k++) + h_y[m * i + j] += t_a[n * j + k] * t_a[n * i + k]; + + choldc(h_y, m, p_y); /* and cholesky decomposition */ + } + + chol_forward(h_x, n, p_x, c_x, t_c); + /* forward pass for c */ + + for (i = 0; i < m; i++) { /* and solve for x_y */ + t_y[i] = c_y[i]; + for (j = 0; j < n; j++) + t_y[i] += t_a[i * n + j] * t_c[j]; + } + + cholsb(h_y, m, p_y, t_y, x_y); + + for (i = 0; i < n; i++) { /* finally solve for x_x */ + t_c[i] = -t_c[i]; + for (j = 0; j < m; j++) + t_c[i] += t_a[j * n + i] * x_y[j]; + } + + chol_backward(h_x, n, p_x, t_c, x_x); +} + +/***************************************************************** + matrix vector multiplication (symmetric matrix but only one triangle + given). computes m*x = y + no need to tune it as it's only of O(n^2) but cholesky is of + O(n^3). so don't waste your time _here_ although it isn't very + elegant. + ***************************************************************/ + +void matrix_vector(int n, double m[], double x[], double y[]) { + int i, j; + + for (i = 0; i < n; i++) { + y[i] = m[(n + 1) * i] * x[i]; + + for (j = 0; j < i; j++) + y[i] += m[i + n * j] * x[j]; + + for (j = i + 1; j < n; j++) + y[i] += m[n * i + j] * x[j]; + } +} + +/***************************************************************** + call only this routine; this is the only one you're interested in + for doing quadratical optimization + + the restart feature exists but it may not be of much use due to the + fact that an initial setting, although close but not very close the + the actual solution will result in very good starting diagnostics + (primal and dual feasibility and small infeasibility gap) but incur + later stalling of the optimizer afterwards as we have to enforce + positivity of the slacks. + ***************************************************************/ + +int pr_loqo(int n, int m, double c[], double h_x[], double a[], double b[], + double l[], double u[], double primal[], double dual[], int verb, + double sigfig_max, int counter_max, double margin, double bound, + int restart) { + /* the knobs to be tuned ... */ + /* double margin = -0.95; we will go up to 95% of the + distance between old variables and zero */ + /* double bound = 10; preset value for the start. small + values give good initial + feasibility but may result in slow + convergence afterwards: we're too + close to zero */ + /* to be allocated */ + double *workspace; + double *diag_h_x; + double *h_y; + double *c_x; + double *c_y; + double *h_dot_x; + double *rho; + double *nu; + double *tau; + double *sigma; + double *gamma_z; + double *gamma_s; + + double *hat_nu; + double *hat_tau; + + double *delta_x; + double *delta_y; + double *delta_s; + double *delta_z; + double *delta_g; + double *delta_t; + + double *d; + + /* from the header - pointers into primal and dual */ + double *x; + double *y; + double *g; + double *z; + double *s; + double *t; + + /* auxiliary variables */ + double b_plus_1; + double c_plus_1; + + double x_h_x; + double primal_inf; + double dual_inf; + + double sigfig; + double primal_obj, dual_obj; + double mu; + double alfa, step; + int counter = 0; + + int status = STILL_RUNNING; + + int i, j, k; + + /* memory allocation */ + workspace = malloc((n * (m + 2) + 2 * m) * sizeof(double)); + diag_h_x = malloc(n * sizeof(double)); + h_y = malloc(m * m * sizeof(double)); + c_x = malloc(n * sizeof(double)); + c_y = malloc(m * sizeof(double)); + h_dot_x = malloc(n * sizeof(double)); + + rho = malloc(m * sizeof(double)); + nu = malloc(n * sizeof(double)); + tau = malloc(n * sizeof(double)); + sigma = malloc(n * sizeof(double)); + + gamma_z = malloc(n * sizeof(double)); + gamma_s = malloc(n * sizeof(double)); + + hat_nu = malloc(n * sizeof(double)); + hat_tau = malloc(n * sizeof(double)); + + delta_x = malloc(n * sizeof(double)); + delta_y = malloc(m * sizeof(double)); + delta_s = malloc(n * sizeof(double)); + delta_z = malloc(n * sizeof(double)); + delta_g = malloc(n * sizeof(double)); + delta_t = malloc(n * sizeof(double)); + + d = malloc(n * sizeof(double)); + + /* pointers into the external variables */ + x = primal; /* n */ + g = x + n; /* n */ + t = g + n; /* n */ + + y = dual; /* m */ + z = y + m; /* n */ + s = z + n; /* n */ + + /* initial settings */ + b_plus_1 = 1; + c_plus_1 = 0; + for (i = 0; i < n; i++) + c_plus_1 += c[i]; + + /* get diagonal terms */ + for (i = 0; i < n; i++) + diag_h_x[i] = h_x[(n + 1) * i]; + + /* starting point */ + if (restart == 1) { + /* x, y already preset */ + for (i = 0; i < n; i++) { /* compute g, t for primal feasibility */ + g[i] = max(ABS(x[i] - l[i]), bound); + t[i] = max(ABS(u[i] - x[i]), bound); + } + + matrix_vector(n, h_x, x, h_dot_x); /* h_dot_x = h_x * x */ + + for (i = 0; i < n; i++) { /* sigma is a dummy variable to calculate z, s */ + sigma[i] = c[i] + h_dot_x[i]; + for (j = 0; j < m; j++) + sigma[i] -= a[n * j + i] * y[j]; + + if (sigma[i] > 0) { + s[i] = bound; + z[i] = sigma[i] + bound; + } else { + s[i] = bound - sigma[i]; + z[i] = bound; + } + } + } else { /* use default start settings */ + for (i = 0; i < m; i++) + for (j = i; j < m; j++) + h_y[i * m + j] = (i == j) ? 1 : 0; + + for (i = 0; i < n; i++) { + c_x[i] = c[i]; + h_x[(n + 1) * i] += 1; + } + + for (i = 0; i < m; i++) + c_y[i] = b[i]; + + /* and solve the system [-H_x A'; A H_y] [x, y] = [c_x; c_y] */ + solve_reduced(n, m, h_x, h_y, a, x, y, c_x, c_y, workspace, PREDICTOR); + + /* initialize the other variables */ + for (i = 0; i < n; i++) { + g[i] = max(ABS(x[i] - l[i]), bound); + z[i] = max(ABS(x[i]), bound); + t[i] = max(ABS(u[i] - x[i]), bound); + s[i] = max(ABS(x[i]), bound); + } + } + + for (i = 0, mu = 0; i < n; i++) + mu += z[i] * g[i] + s[i] * t[i]; + mu = mu / (2 * n); + + /* the main loop */ + if (verb >= STATUS) { + printf("counter | pri_inf | dual_inf | pri_obj | dual_obj | "); + printf("sigfig | alpha | nu \n"); + printf("-------------------------------------------------------"); + printf("---------------------------\n"); + } + + while (status == STILL_RUNNING) { + /* predictor */ + + /* put back original diagonal values */ + for (i = 0; i < n; i++) + h_x[(n + 1) * i] = diag_h_x[i]; + + matrix_vector(n, h_x, x, h_dot_x); /* compute h_dot_x = h_x * x */ + + for (i = 0; i < m; i++) { + rho[i] = b[i]; + for (j = 0; j < n; j++) + rho[i] -= a[n * i + j] * x[j]; + } + + for (i = 0; i < n; i++) { + nu[i] = l[i] - x[i] + g[i]; + tau[i] = u[i] - x[i] - t[i]; + + sigma[i] = c[i] - z[i] + s[i] + h_dot_x[i]; + for (j = 0; j < m; j++) + sigma[i] -= a[n * j + i] * y[j]; + + gamma_z[i] = -z[i]; + gamma_s[i] = -s[i]; + } + + /* instrumentation */ + x_h_x = 0; + primal_inf = 0; + dual_inf = 0; + + for (i = 0; i < n; i++) { + x_h_x += h_dot_x[i] * x[i]; + primal_inf += sqr(tau[i]); + primal_inf += sqr(nu[i]); + dual_inf += sqr(sigma[i]); + } + for (i = 0; i < m; i++) + primal_inf += sqr(rho[i]); + primal_inf = sqrt(primal_inf) / b_plus_1; + dual_inf = sqrt(dual_inf) / c_plus_1; + + primal_obj = 0.5 * x_h_x; + dual_obj = -0.5 * x_h_x; + for (i = 0; i < n; i++) { + primal_obj += c[i] * x[i]; + dual_obj += l[i] * z[i] - u[i] * s[i]; + } + for (i = 0; i < m; i++) + dual_obj += b[i] * y[i]; + + sigfig = log10(ABS(primal_obj) + 1) - log10(ABS(primal_obj - dual_obj)); + sigfig = max(sigfig, 0); + + /* the diagnostics - after we computed our results we will + analyze them */ + + if (counter > counter_max) + status = ITERATION_LIMIT; + if (sigfig > sigfig_max) + status = OPTIMAL_SOLUTION; + if (primal_inf > 10e100) + status = PRIMAL_INFEASIBLE; + if (dual_inf > 10e100) + status = DUAL_INFEASIBLE; + if ((primal_inf > 10e100) & (dual_inf > 10e100)) + status = PRIMAL_AND_DUAL_INFEASIBLE; + if (ABS(primal_obj) > 10e100) + status = PRIMAL_UNBOUNDED; + if (ABS(dual_obj) > 10e100) + status = DUAL_UNBOUNDED; + + /* write some nice routine to enforce the time limit if you + _really_ want, however it's quite useless as you can compute + the time from the maximum number of iterations as every + iteration costs one cholesky decomposition plus a couple of + backsubstitutions */ + + /* generate report */ + if ((verb >= FLOOD) | ((verb == STATUS) & (status != 0))) + printf("%7i | %.2e | %.2e | % .2e | % .2e | %6.3f | %.4f | %.2e\n", + counter, primal_inf, dual_inf, primal_obj, dual_obj, sigfig, alfa, + mu); + + counter++; + + if (status == 0) { /* we may keep on going, otherwise + it'll cost one loop extra plus a + messed up main diagonal of h_x */ + /* intermediate variables (the ones with hat) */ + for (i = 0; i < n; i++) { + hat_nu[i] = nu[i] + g[i] * gamma_z[i] / z[i]; + hat_tau[i] = tau[i] - t[i] * gamma_s[i] / s[i]; + /* diagonal terms */ + d[i] = z[i] / g[i] + s[i] / t[i]; + } + + /* initialization before the cholesky solver */ + for (i = 0; i < n; i++) { + h_x[(n + 1) * i] = diag_h_x[i] + d[i]; + c_x[i] = sigma[i] - z[i] * hat_nu[i] / g[i] - s[i] * hat_tau[i] / t[i]; + } + for (i = 0; i < m; i++) { + c_y[i] = rho[i]; + for (j = i; j < m; j++) + h_y[m * i + j] = 0; + } + + /* and do it */ + solve_reduced(n, m, h_x, h_y, a, delta_x, delta_y, c_x, c_y, workspace, + PREDICTOR); + + for (i = 0; i < n; i++) { + /* backsubstitution */ + delta_s[i] = s[i] * (delta_x[i] - hat_tau[i]) / t[i]; + delta_z[i] = z[i] * (hat_nu[i] - delta_x[i]) / g[i]; + + delta_g[i] = g[i] * (gamma_z[i] - delta_z[i]) / z[i]; + delta_t[i] = t[i] * (gamma_s[i] - delta_s[i]) / s[i]; + + /* central path (corrector) */ + gamma_z[i] = mu / g[i] - z[i] - delta_z[i] * delta_g[i] / g[i]; + gamma_s[i] = mu / t[i] - s[i] - delta_s[i] * delta_t[i] / t[i]; + + /* (some more intermediate variables) the hat variables */ + hat_nu[i] = nu[i] + g[i] * gamma_z[i] / z[i]; + hat_tau[i] = tau[i] - t[i] * gamma_s[i] / s[i]; + + /* initialization before the cholesky */ + c_x[i] = sigma[i] - z[i] * hat_nu[i] / g[i] - s[i] * hat_tau[i] / t[i]; + } + + for (i = 0; i < m; i++) { /* comput c_y and rho */ + c_y[i] = rho[i]; + for (j = i; j < m; j++) + h_y[m * i + j] = 0; + } + + /* and do it */ + solve_reduced(n, m, h_x, h_y, a, delta_x, delta_y, c_x, c_y, workspace, + CORRECTOR); + + for (i = 0; i < n; i++) { + /* backsubstitution */ + delta_s[i] = s[i] * (delta_x[i] - hat_tau[i]) / t[i]; + delta_z[i] = z[i] * (hat_nu[i] - delta_x[i]) / g[i]; + + delta_g[i] = g[i] * (gamma_z[i] - delta_z[i]) / z[i]; + delta_t[i] = t[i] * (gamma_s[i] - delta_s[i]) / s[i]; + } + + alfa = -1; + for (i = 0; i < n; i++) { + alfa = min(alfa, delta_g[i] / g[i]); + alfa = min(alfa, delta_t[i] / t[i]); + alfa = min(alfa, delta_s[i] / s[i]); + alfa = min(alfa, delta_z[i] / z[i]); + } + alfa = (margin - 1) / alfa; + + /* compute mu */ + for (i = 0, mu = 0; i < n; i++) + mu += z[i] * g[i] + s[i] * t[i]; + mu = mu / (2 * n); + mu = mu * sqr((alfa - 1) / (alfa + 10)); + + for (i = 0; i < n; i++) { + x[i] += alfa * delta_x[i]; + g[i] += alfa * delta_g[i]; + t[i] += alfa * delta_t[i]; + z[i] += alfa * delta_z[i]; + s[i] += alfa * delta_s[i]; + } + + for (i = 0; i < m; i++) + y[i] += alfa * delta_y[i]; + } + } + if ((status == 1) && (verb >= STATUS)) { + printf("-------------------------------------------------------------------" + "---------------\n"); + printf("optimization converged\n"); + } + + /* free memory */ + free(workspace); + free(diag_h_x); + free(h_y); + free(c_x); + free(c_y); + free(h_dot_x); + + free(rho); + free(nu); + free(tau); + free(sigma); + free(gamma_z); + free(gamma_s); + + free(hat_nu); + free(hat_tau); + + free(delta_x); + free(delta_y); + free(delta_s); + free(delta_z); + free(delta_g); + free(delta_t); + + free(d); + + /* and return to sender */ + return status; +} diff --git a/src/classifier/svm/svm_light/pr_loqo/pr_loqo.h b/src/classifier/svm/svm_light/pr_loqo/pr_loqo.h new file mode 100644 index 0000000..aab4d6e --- /dev/null +++ b/src/classifier/svm/svm_light/pr_loqo/pr_loqo.h @@ -0,0 +1,93 @@ +/* + * File: pr_loqo.h + * Purpose: solves quadratic programming problem for pattern recognition + * for support vectors + * + * Author: Alex J. Smola + * Created: 10/14/97 + * Updated: 11/08/97 + * + * + * Copyright (c) 1997 GMD Berlin - All rights reserved + * THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE of GMD Berlin + * The copyright notice above does not evidence any + * actual or intended publication of this work. + * + * Unauthorized commercial use of this software is not allowed + */ + +/* verbosity levels */ +#ifndef _PR_LOQO_H_ +#define _PR_LOQO_H_ + +#define QUIET 0 +#define STATUS 1 +#define FLOOD 2 + +/* status outputs */ + +#define STILL_RUNNING 0 +#define OPTIMAL_SOLUTION 1 +#define SUBOPTIMAL_SOLUTION 2 +#define ITERATION_LIMIT 3 +#define PRIMAL_INFEASIBLE 4 +#define DUAL_INFEASIBLE 5 +#define PRIMAL_AND_DUAL_INFEASIBLE 6 +#define INCONSISTENT 7 +#define PRIMAL_UNBOUNDED 8 +#define DUAL_UNBOUNDED 9 +#define TIME_LIMIT 10 + +/* + * solve the quadratic programming problem + * + * minimize c' * x + 1/2 x' * H * x + * subject to A*x = b + * l <= x <= u + * + * for a documentation see R. Vanderbei, LOQO: an Interior Point Code + * for Quadratic Programming + */ + +/* + * n : number of primal variables + * m : number of constraints (typically 1) + * h_x : dot product matrix (n.n) + * a : constraint matrix (n.m) + * b : constant term (m) + * l : lower bound (n) + * u : upper bound (m) + * + * primal : workspace for primal variables, has to be of size 3 n + * + * x = primal; n + * g = x + n; n + * t = g + n; n + * + * dual : workspace for dual variables, has to be of size m + 2 n + * + * y = dual; m + * z = y + m; n + * s = z + n; n + * + * verb : verbosity level + * sigfig_max : number of significant digits + * counter_max: stopping criterion + * restart : 1 if restart desired + * + */ + +int pr_loqo(int n, int m, double c[], double h_x[], double a[], double b[], + double l[], double u[], double primal[], double dual[], int verb, + double sigfig_max, int counter_max, double margin, double bound, + int restart); + +/* + * compile with + cc -O4 -c pr_loqo.c + cc -xO4 -fast -xarch=v8plus -xchip=ultra -xparallel -c pr_loqo.c + mex pr_loqo_c.c pr_loqo.o + cmex4 pr_loqo_c.c pr_loqo.o -DMATLAB4 -o pr_loqo_c4 + * + */ +#endif // !_PR_LOQO_H_ diff --git a/src/classifier/svm/svm_light/svm_classify.c b/src/classifier/svm/svm_light/svm_classify.c new file mode 100644 index 0000000..4dfd4b2 --- /dev/null +++ b/src/classifier/svm/svm_light/svm_classify.c @@ -0,0 +1,198 @@ +/***********************************************************************/ +/* */ +/* svm_classify.c */ +/* */ +/* Classification module of Support Vector Machine. */ +/* */ +/* Author: Thorsten Joachims */ +/* Date: 02.07.02 */ +/* */ +/* Copyright (c) 2002 Thorsten Joachims - All rights reserved */ +/* */ +/* This software is available for non-commercial use only. It must */ +/* not be modified and distributed without prior permission of the */ +/* author. The author is not responsible for implications from the */ +/* use of this software. */ +/* */ +/************************************************************************/ + +# include "svm_common.h" + +char docfile[200]; +char modelfile[200]; +char predictionsfile[200]; + +void read_input_parameters(int, char **, char *, char *, char *, long *, + long *); +void print_help(void); + + +int main (int argc, char* argv[]) +{ + DOC *doc; /* test example */ + WORD *words; + long max_docs,max_words_doc,lld; + long totdoc=0,queryid,slackid; + long correct=0,incorrect=0,no_accuracy=0; + long res_a=0,res_b=0,res_c=0,res_d=0,wnum,pred_format; + long j; + double t1,runtime=0; + double dist,doc_label,costfactor; + char *line,*comment; + FILE *predfl,*docfl; + MODEL *model; + + read_input_parameters(argc,argv,docfile,modelfile,predictionsfile, + &verbosity,&pred_format); + + nol_ll(docfile,&max_docs,&max_words_doc,&lld); /* scan size of input file */ + max_words_doc+=2; + lld+=2; + + line = (char *)my_malloc(sizeof(char)*lld); + words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10)); + + model=read_model(modelfile); + + if(model->kernel_parm.kernel_type == 0) { /* linear kernel */ + /* compute weight vector */ + add_weight_vector_to_linear_model(model); + } + + if(verbosity>=2) { + printf("Classifying test examples.."); fflush(stdout); + } + + if ((docfl = fopen (docfile, "r")) == NULL) + { perror (docfile); exit (1); } + if ((predfl = fopen (predictionsfile, "w")) == NULL) + { perror (predictionsfile); exit (1); } + + while((!feof(docfl)) && fgets(line,(int)lld,docfl)) { + if(line[0] == '#') continue; /* line contains comments */ + parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor,&wnum, + max_words_doc,&comment); + totdoc++; + if(model->kernel_parm.kernel_type == LINEAR) {/* For linear kernel, */ + for(j=0;(words[j]).wnum != 0;j++) { /* check if feature numbers */ + if((words[j]).wnum>model->totwords) /* are not larger than in */ + (words[j]).wnum=0; /* model. Remove feature if */ + } /* necessary. */ + } + doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0)); + t1=get_runtime(); + + if(model->kernel_parm.kernel_type == LINEAR) { /* linear kernel */ + dist=classify_example_linear(model,doc); + } + else { /* non-linear kernel */ + dist=classify_example(model,doc); + } + + runtime+=(get_runtime()-t1); + free_example(doc,1); + + if(dist>0) { + if(pred_format==0) { /* old weired output format */ + fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist); + } + if(doc_label>0) correct++; else incorrect++; + if(doc_label>0) res_a++; else res_b++; + } + else { + if(pred_format==0) { /* old weired output format */ + fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist); + } + if(doc_label<0) correct++; else incorrect++; + if(doc_label>0) res_c++; else res_d++; + } + if(pred_format==1) { /* output the value of decision function */ + fprintf(predfl,"%.8g\n",dist); + } + if((int)(0.01+(doc_label*doc_label)) != 1) + { no_accuracy=1; } /* test data is not binary labeled */ + if(verbosity>=2) { + if(totdoc % 100 == 0) { + printf("%ld..",totdoc); fflush(stdout); + } + } + } + fclose(predfl); + fclose(docfl); + free(line); + free(words); + free_model(model,1); + + if(verbosity>=2) { + printf("done\n"); + +/* Note by Gary Boone Date: 29 April 2000 */ +/* o Timing is inaccurate. The timer has 0.01 second resolution. */ +/* Because classification of a single vector takes less than */ +/* 0.01 secs, the timer was underflowing. */ + printf("Runtime (without IO) in cpu-seconds: %.2f\n", + (float)(runtime/100.0)); + + } + if((!no_accuracy) && (verbosity>=1)) { + printf("Accuracy on test set: %.2f%% (%ld correct, %ld incorrect, %ld total)\n",(float)(correct)*100.0/totdoc,correct,incorrect,totdoc); + printf("Precision/recall on test set: %.2f%%/%.2f%%\n",(float)(res_a)*100.0/(res_a+res_b),(float)(res_a)*100.0/(res_a+res_c)); + } + + return(0); +} + +void read_input_parameters(int argc, char **argv, char *docfile, + char *modelfile, char *predictionsfile, + long int *verbosity, long int *pred_format) +{ + long i; + + /* set default */ + strcpy (modelfile, "svm_model"); + strcpy (predictionsfile, "svm_predictions"); + (*verbosity)=2; + (*pred_format)=1; + + for(i=1;(i=argc) { + printf("\nNot enough input parameters!\n\n"); + print_help(); + exit(0); + } + strcpy (docfile, argv[i]); + strcpy (modelfile, argv[i+1]); + if((i+2) this help\n"); + printf(" -v [0..3] -> verbosity level (default 2)\n"); + printf(" -f [0,1] -> 0: old output format of V1.0\n"); + printf(" -> 1: output the value of decision function (default)\n\n"); +} + + + + diff --git a/src/classifier/svm/svm_light/svm_common.c b/src/classifier/svm/svm_light/svm_common.c new file mode 100644 index 0000000..30ed6fd --- /dev/null +++ b/src/classifier/svm/svm_light/svm_common.c @@ -0,0 +1,1854 @@ +/************************************************************************/ +/* */ +/* svm_common.c */ +/* */ +/* Definitions and functions used in both svm_learn and svm_classify. */ +/* */ +/* Author: Thorsten Joachims */ +/* Date: 02.07.04 */ +/* */ +/* Copyright (c) 2004 Thorsten Joachims - All rights reserved */ +/* */ +/* This software is available for non-commercial use only. It must */ +/* not be modified and distributed without prior permission of the */ +/* author. The author is not responsible for implications from the */ +/* use of this software. */ +/* */ +/************************************************************************/ + +#include "svm_common.h" +#include "ctype.h" +#include "kernel.h" /* this contains a user supplied kernel */ + +#define MAX(x, y) ((x) < (y) ? (y) : (x)) +#define MIN(x, y) ((x) > (y) ? (y) : (x)) +#define SIGN(x) ((x) > (0) ? (1) : (((x) < (0) ? (-1) : (0)))) + +long verbosity; /* verbosity level (0-4) */ +long kernel_cache_statistic; + +double classify_example(MODEL *model, DOC *ex) +/* classifies one example */ +{ + register long i; + register double dist; + + if ((model->kernel_parm.kernel_type == LINEAR) && (model->lin_weights)) + return (classify_example_linear(model, ex)); + + dist = 0; + for (i = 1; i < model->sv_num; i++) { + dist += kernel(&model->kernel_parm, model->supvec[i], ex) * model->alpha[i]; + } + return (dist - model->b); +} + +double classify_example_linear(MODEL *model, DOC *ex) +/* classifies example for linear kernel */ + +/* important: the model must have the linear weight vector computed */ +/* use: add_weight_vector_to_linear_model(&model); */ + +/* important: the feature numbers in the example to classify must */ +/* not be larger than the weight vector! */ +{ + double sum = 0; + SVECTOR *f; + + for (f = ex->fvec; f; f = f->next) + sum += f->factor * sprod_ns(model->lin_weights, f); + return (sum - model->b); +} + +double kernel(KERNEL_PARM *kernel_parm, DOC *a, DOC *b) +/* calculate the kernel function */ +{ + double sum = 0; + SVECTOR *fa, *fb; + + if (kernel_parm->kernel_type == GRAM) { /* use value from explicitly */ + if ((a->kernelid >= 0) && (b->kernelid >= 0)) /* stored gram matrix */ + return (kernel_parm->gram_matrix->element[MAX(a->kernelid, b->kernelid)] + [MIN(a->kernelid, b->kernelid)]); + else + return (0); /* in case it is called for unknown vector */ + } + + /* in case the constraints are sums of feature vector as represented + as a list of SVECTOR's with their coefficient factor in the sum, + take the kernel between all pairs */ + for (fa = a->fvec; fa; fa = fa->next) { + for (fb = b->fvec; fb; fb = fb->next) { + if (fa->kernel_id == fb->kernel_id) + sum += fa->factor * fb->factor * single_kernel(kernel_parm, fa, fb); + } + } + return (sum); +} + +double single_kernel(KERNEL_PARM *kernel_parm, SVECTOR *a, SVECTOR *b) +/* calculate the kernel function between two vectors */ +{ + kernel_cache_statistic++; + switch (kernel_parm->kernel_type) { + case LINEAR: /* linear */ + return (sprod_ss(a, b)); + case POLY: /* polynomial */ + return ( + pow(kernel_parm->coef_lin * sprod_ss(a, b) + kernel_parm->coef_const, + (double)kernel_parm->poly_degree)); + case RBF: /* radial basis function */ + if (a->twonorm_sq < 0) + a->twonorm_sq = sprod_ss(a, a); + if (b->twonorm_sq < 0) + b->twonorm_sq = sprod_ss(b, b); + return (exp(-kernel_parm->rbf_gamma * + (a->twonorm_sq - 2 * sprod_ss(a, b) + b->twonorm_sq))); + case SIGMOID: /* sigmoid neural net */ + return ( + tanh(kernel_parm->coef_lin * sprod_ss(a, b) + kernel_parm->coef_const)); + case CUSTOM: /* custom-kernel supplied in file kernel.h*/ + return (custom_kernel(kernel_parm, a, b)); + default: + printf("Error: Unknown kernel function\n"); + exit(1); + } +} + +SVECTOR *create_svector(WORD *words, char *userdefined, double factor) { + SVECTOR *vec; + long fnum, i; + + fnum = 0; + while (words[fnum].wnum) { + fnum++; + } + fnum++; + vec = (SVECTOR *)my_malloc(sizeof(SVECTOR)); + vec->words = (WORD *)my_malloc(sizeof(WORD) * (fnum)); + for (i = 0; i < fnum; i++) { + vec->words[i] = words[i]; + } + vec->twonorm_sq = -1; + + if (userdefined) { + vec->userdefined = + (char *)my_malloc(sizeof(char) * (strlen(userdefined) + 1)); + strcpy(vec->userdefined, userdefined); + } else + vec->userdefined = NULL; + + vec->kernel_id = 0; + vec->next = NULL; + vec->factor = factor; + return (vec); +} + +SVECTOR *create_svector_shallow(WORD *words, char *userdefined, double factor) +/* unlike 'create_svector' this does not copy words and userdefined */ +{ + SVECTOR *vec; + + vec = (SVECTOR *)my_malloc(sizeof(SVECTOR)); + vec->words = words; + vec->twonorm_sq = -1; + vec->userdefined = userdefined; + vec->kernel_id = 0; + vec->next = NULL; + vec->factor = factor; + return (vec); +} + +SVECTOR *create_svector_n(double *nonsparsevec, long maxfeatnum, + char *userdefined, double factor) { + return (create_svector_n_r(nonsparsevec, maxfeatnum, userdefined, factor, 0)); +} + +SVECTOR *create_svector_n_r(double *nonsparsevec, long maxfeatnum, + char *userdefined, double factor, + double min_non_zero) { + SVECTOR *vec; + long fnum, i; + + fnum = 0; + for (i = 1; i <= maxfeatnum; i++) + if ((nonsparsevec[i] < -min_non_zero) || (nonsparsevec[i] > min_non_zero)) + fnum++; + vec = (SVECTOR *)my_malloc(sizeof(SVECTOR)); + vec->words = (WORD *)my_malloc(sizeof(WORD) * (fnum + 1)); + fnum = 0; + for (i = 1; i <= maxfeatnum; i++) { + if ((nonsparsevec[i] < -min_non_zero) || (nonsparsevec[i] > min_non_zero)) { + vec->words[fnum].wnum = i; + vec->words[fnum].weight = nonsparsevec[i]; + fnum++; + } + } + vec->words[fnum].wnum = 0; + vec->twonorm_sq = -1; + + if (userdefined) { + vec->userdefined = + (char *)my_malloc(sizeof(char) * (strlen(userdefined) + 1)); + strcpy(vec->userdefined, userdefined); + } else + vec->userdefined = NULL; + + vec->kernel_id = 0; + vec->next = NULL; + vec->factor = factor; + return (vec); +} + +SVECTOR *copy_svector(SVECTOR *vec) { + SVECTOR *newvec = NULL; + if (vec) { + newvec = create_svector(vec->words, vec->userdefined, vec->factor); + newvec->kernel_id = vec->kernel_id; + newvec->next = copy_svector(vec->next); + } + return (newvec); +} + +SVECTOR *copy_svector_shallow(SVECTOR *vec) +/* unlike 'copy_svector' this does not copy words and userdefined */ +{ + SVECTOR *newvec = NULL; + if (vec) { + newvec = create_svector_shallow(vec->words, vec->userdefined, vec->factor); + newvec->kernel_id = vec->kernel_id; + newvec->next = copy_svector_shallow(vec->next); + } + return (newvec); +} + +void free_svector(SVECTOR *vec) { + SVECTOR *next; + while (vec) { + if (vec->words) + free(vec->words); + if (vec->userdefined) + free(vec->userdefined); + next = vec->next; + free(vec); + vec = next; + } +} + +void free_svector_shallow(SVECTOR *vec) +/* unlike 'free_svector' this does not free words and userdefined */ +{ + SVECTOR *next; + while (vec) { + next = vec->next; + free(vec); + vec = next; + } +} + +double sprod_ss(SVECTOR *a, SVECTOR *b) +/* compute the inner product of two sparse vectors */ +{ + register double sum = 0; + register WORD *ai, *bj; + ai = a->words; + bj = b->words; + while (ai->wnum && bj->wnum) { + if (ai->wnum > bj->wnum) { + bj++; + } else if (ai->wnum < bj->wnum) { + ai++; + } else { + sum += (ai->weight) * (bj->weight); + ai++; + bj++; + } + } + return ((double)sum); +} + +SVECTOR *multadd_ss(SVECTOR *a, SVECTOR *b, double fa, double fb) { + return (multadd_ss_r(a, b, fa, fb, 0)); +} + +SVECTOR *multadd_ss_r(SVECTOR *a, SVECTOR *b, double fa, double fb, + double min_non_zero) +/* compute fa*a+fb*b of two sparse vectors */ +/* Note: SVECTOR lists are not followed, but only the first + SVECTOR is used */ +{ + SVECTOR *vec; + register WORD *sum, *sumi; + register WORD *ai, *bj; + long veclength; + double weight; + + ai = a->words; + bj = b->words; + veclength = 0; + while (ai->wnum && bj->wnum) { + if (ai->wnum > bj->wnum) { + veclength++; + bj++; + } else if (ai->wnum < bj->wnum) { + veclength++; + ai++; + } else { + veclength++; + ai++; + bj++; + } + } + while (bj->wnum) { + veclength++; + bj++; + } + while (ai->wnum) { + veclength++; + ai++; + } + veclength++; + + sum = (WORD *)my_malloc(sizeof(WORD) * veclength); + sumi = sum; + ai = a->words; + bj = b->words; + while (ai->wnum && bj->wnum) { + if (ai->wnum > bj->wnum) { + (*sumi) = (*bj); + sumi->weight *= fb; + sumi++; + bj++; + } else if (ai->wnum < bj->wnum) { + (*sumi) = (*ai); + sumi->weight *= fa; + sumi++; + ai++; + } else { + weight = fa * (double)ai->weight + fb * (double)bj->weight; + if ((weight < -min_non_zero) || (weight > min_non_zero)) { + sumi->wnum = ai->wnum; + sumi->weight = weight; + sumi++; + } + ai++; + bj++; + } + } + while (bj->wnum) { + (*sumi) = (*bj); + sumi->weight *= fb; + sumi++; + bj++; + } + while (ai->wnum) { + (*sumi) = (*ai); + sumi->weight *= fa; + sumi++; + ai++; + } + sumi->wnum = 0; + + if (1) { /* potentially this wastes some memory, but saves malloc'ing */ + vec = create_svector_shallow(sum, NULL, 1.0); + } else { /* this is more memory efficient */ + vec = create_svector(sum, NULL, 1.0); + free(sum); + } + return (vec); +} + +SVECTOR *sub_ss(SVECTOR *a, SVECTOR *b) +/* compute the difference a-b of two sparse vectors */ +/* Note: SVECTOR lists are not followed, but only the first + SVECTOR is used */ +{ + return (multadd_ss_r(a, b, 1.0, -1.0, 0)); +} + +SVECTOR *sub_ss_r(SVECTOR *a, SVECTOR *b, double min_non_zero) +/* compute the difference a-b of two sparse vectors and rounds to zero */ +/* Note: SVECTOR lists are not followed, but only the first + SVECTOR is used */ +{ + return (multadd_ss_r(a, b, 1.0, -1.0, min_non_zero)); +} + +SVECTOR *add_ss(SVECTOR *a, SVECTOR *b) +/* compute the sum a+b of two sparse vectors */ +/* Note: SVECTOR lists are not followed, but only the first + SVECTOR is used */ +{ + return (multadd_ss_r(a, b, 1.0, 1.0, 0)); +} + +SVECTOR *add_ss_r(SVECTOR *a, SVECTOR *b, double min_non_zero) +/* compute the sum a+b of two sparse vectors and rounds to zero */ +/* Note: SVECTOR lists are not followed, but only the first + SVECTOR is used */ +{ + return (multadd_ss_r(a, b, 1.0, 1.0, min_non_zero)); +} + +SVECTOR *add_list_ss(SVECTOR *a) { return (add_list_ss_r(a, 0)); } + +SVECTOR *add_dual_list_ss_r(SVECTOR *a, SVECTOR *b, double min_non_zero) +/* computes the linear combination of the two SVECTOR lists weighted + by the factor of each SVECTOR */ +{ + SVECTOR *f, *sum; + + for (f = a; f->next; f = f->next) + ; /* find end of first vector list */ + f->next = b; /* temporarily append the two vector lists */ + sum = add_list_ss_r(a, min_non_zero); + f->next = NULL; /* undo append */ + return (sum); +} + +SVECTOR *add_list_ss_r(SVECTOR *a, double min_non_zero) +/* computes the linear combination of the SVECTOR list weighted + by the factor of each SVECTOR */ +{ + SVECTOR *oldsum, *sum, *f; + WORD empty[2]; + + if (!a) { + empty[0].wnum = 0; + sum = create_svector(empty, NULL, 1.0); + } else if (a && (!a->next)) { + sum = smult_s(a, a->factor); + } else { + sum = multadd_ss_r(a, a->next, a->factor, a->next->factor, min_non_zero); + for (f = a->next->next; f; f = f->next) { + oldsum = sum; + sum = multadd_ss_r(oldsum, f, 1.0, f->factor, min_non_zero); + free_svector(oldsum); + } + } + return (sum); +} + +int compareup_word(const void *a, const void *b) { + double va, vb; + va = ((WORD *)a)->wnum; + vb = ((WORD *)b)->wnum; + return ((va > vb) - (va < vb)); +} + +SVECTOR *add_list_sort_ss(SVECTOR *a) +/* computes the linear combination of the SVECTOR list weighted + by the factor of each SVECTOR. This should be a lot faster + than add_list_ss for long lists. */ +{ + return (add_list_sort_ss_r(a, 0)); +} + +SVECTOR *add_dual_list_sort_ss_r(SVECTOR *a, SVECTOR *b, double min_non_zero) +/* computes the linear combination of the two SVECTOR lists weighted + by the factor of each SVECTOR */ +{ + SVECTOR *f, *sum; + + for (f = a; f->next; f = f->next) + ; /* find end of first vector list */ + f->next = b; /* temporarily append the two vector lists */ + sum = add_list_sort_ss_r(a, min_non_zero); + f->next = NULL; /* undo append */ + return (sum); +} + +SVECTOR *add_list_sort_ss_r(SVECTOR *a, double min_non_zero) +/* Like add_list_sort_ss(SVECTOR *a), but rounds values smaller + than min_non_zero to zero. */ +{ + SVECTOR *sum, *f; + WORD empty[2], *ai, *concat, *concati, *concat_read, *concat_write; + long length, i; + double weight; + + if (a) { + /* count number or entries over all vectors */ + length = 0; + for (f = a; f; f = f->next) { + + ai = f->words; + while (ai->wnum) { + length++; + ai++; + } + } + + /* write all entries into one long array and sort by feature number */ + concat = (WORD *)my_malloc(sizeof(WORD) * (length + 1)); + concati = concat; + for (f = a; f; f = f->next) { + ai = f->words; + while (ai->wnum) { + (*concati) = (*ai); + concati->weight *= f->factor; + concati++; + ai++; + } + } + qsort(concat, length, sizeof(WORD), compareup_word); + + concat_read = concat + 1; + concat_write = concat; + for (i = 0; (i < length - 1) && (concat_write->wnum != concat_read->wnum); + i++) { + concat_write++; + concat_read++; + } + weight = concat_write->weight; + for (i = i; (i < length - 1); i++) { + if (concat_write->wnum == concat_read->wnum) { + weight += (double)concat_read->weight; + concat_read++; + } else { + if ((weight > min_non_zero) || (weight < -min_non_zero)) { + concat_write->weight = weight; + concat_write++; + } + (*concat_write) = (*concat_read); + weight = concat_write->weight; + concat_read++; + } + } + if ((length > 0) && ((weight > min_non_zero) || (weight < -min_non_zero))) { + concat_write->weight = weight; + concat_write++; + } + concat_write->wnum = 0; + + if (1) { /* this wastes some memory, but saves malloc'ing */ + sum = create_svector_shallow(concat, NULL, 1.0); + } else { /* this is more memory efficient */ + sum = create_svector(concat, NULL, 1.0); + free(concat); + } + } else { + empty[0].wnum = 0; + sum = create_svector(empty, NULL, 1.0); + } + return (sum); +} + +SVECTOR *add_list_ns(SVECTOR *a) { return (add_list_ns_r(a, 0)); } + +SVECTOR *add_dual_list_ns_r(SVECTOR *a, SVECTOR *b, double min_non_zero) +/* computes the linear combination of the two SVECTOR lists weighted + by the factor of each SVECTOR */ +{ + SVECTOR *f, *sum; + + for (f = a; f->next; f = f->next) + ; /* find end of first vector list */ + f->next = b; /* temporarily append the two vector lists */ + sum = add_list_ns_r(a, min_non_zero); + f->next = NULL; /* undo append */ + return (sum); +} + +SVECTOR *add_list_ns_r(SVECTOR *a, double min_non_zero) +/* computes the linear combination of the SVECTOR list weighted + by the factor of each SVECTOR. assumes that the number of + features is small compared to the number of elements in the + list */ +{ + SVECTOR *vec, *f; + register WORD *ai; + long totwords; + double *sum; + + /* find max feature number */ + totwords = 0; + for (f = a; f; f = f->next) { + ai = f->words; + while (ai->wnum) { + if (totwords < ai->wnum) + totwords = ai->wnum; + ai++; + } + } + sum = create_nvector(totwords); + + clear_nvector(sum, totwords); + for (f = a; f; f = f->next) + add_vector_ns(sum, f, f->factor); + + vec = create_svector_n_r(sum, totwords, NULL, 1.0, min_non_zero); + free(sum); + + return (vec); +} + +void add_list_n_ns(double *vec_n, SVECTOR *vec_s, double faktor) { + SVECTOR *f; + for (f = vec_s; f; f = f->next) + add_vector_ns(vec_n, f, f->factor * faktor); +} + +void append_svector_list(SVECTOR *a, SVECTOR *b) +/* appends SVECTOR b to the end of SVECTOR a. */ +{ + SVECTOR *f; + + for (f = a; f->next; f = f->next) + ; /* find end of first vector list */ + f->next = b; /* append the two vector lists */ +} + +void mult_svector_list(SVECTOR *a, double factor) +/* multiplies the factor of each element in vector list with factor */ +{ + SVECTOR *f; + + for (f = a; f; f = f->next) + f->factor *= factor; +} + +void setfactor_svector_list(SVECTOR *a, double factor) +/* sets the factor of each element in vector list to factor */ +{ + SVECTOR *f; + + for (f = a; f; f = f->next) + f->factor = factor; +} + +SVECTOR *smult_s(SVECTOR *a, double factor) +/* scale sparse vector a by factor */ +{ + SVECTOR *vec; + register WORD *sum, *sumi; + register WORD *ai; + long veclength; + char *userdefined = NULL; + + ai = a->words; + veclength = 0; + while (ai->wnum) { + veclength++; + ai++; + } + veclength++; + + sum = (WORD *)my_malloc(sizeof(WORD) * veclength); + sumi = sum; + ai = a->words; + while (ai->wnum) { + (*sumi) = (*ai); + sumi->weight *= factor; + if (sumi->weight != 0) + sumi++; + ai++; + } + sumi->wnum = 0; + + if (a->userdefined) { + userdefined = + (char *)my_malloc(sizeof(char) * (strlen(a->userdefined) + 1)); + strcpy(userdefined, a->userdefined); + } + + vec = create_svector_shallow(sum, userdefined, 1.0); + return (vec); +} + +SVECTOR *shift_s(SVECTOR *a, long shift) +/* shifts the feature numbers by shift positions */ +{ + SVECTOR *vec; + register WORD *sum, *sumi; + register WORD *ai; + long veclength; + char *userdefined = NULL; + + ai = a->words; + veclength = 0; + while (ai->wnum) { + veclength++; + ai++; + } + veclength++; + + sum = (WORD *)my_malloc(sizeof(WORD) * veclength); + sumi = sum; + ai = a->words; + while (ai->wnum) { + (*sumi) = (*ai); + sumi->wnum += shift; + ai++; + sumi++; + } + sumi->wnum = 0; + + if (a->userdefined) { + userdefined = + (char *)my_malloc(sizeof(char) * (strlen(a->userdefined) + 1)); + strcpy(userdefined, a->userdefined); + } + + vec = create_svector_shallow(sum, userdefined, a->factor); + return (vec); +} + +int featvec_eq(SVECTOR *a, SVECTOR *b) +/* tests two sparse vectors for equality */ +{ + register WORD *ai, *bj; + ai = a->words; + bj = b->words; + while (ai->wnum && bj->wnum) { + if (ai->wnum > bj->wnum) { + if ((bj->weight) != 0) + return (0); + bj++; + } else if (ai->wnum < bj->wnum) { + if ((ai->weight) != 0) + return (0); + ai++; + } else { + if ((ai->weight) != (bj->weight)) + return (0); + ai++; + bj++; + } + } + return (1); +} + +double model_length_s(MODEL *model) +/* compute length of weight vector */ +{ + register long i, j; + register double sum = 0, alphai; + register DOC *supveci; + KERNEL_PARM *kernel_parm = &(model->kernel_parm); + + for (i = 1; i < model->sv_num; i++) { + alphai = model->alpha[i]; + supveci = model->supvec[i]; + for (j = 1; j < model->sv_num; j++) { + sum += alphai * model->alpha[j] * + kernel(kernel_parm, supveci, model->supvec[j]); + } + } + return (sqrt(sum)); +} + +double model_length_n(MODEL *model) +/* compute length of weight vector */ +{ + long i, totwords = model->totwords + 1; + double sum, *weight_n; + SVECTOR *weight; + + if (model->kernel_parm.kernel_type != LINEAR) { + printf("ERROR: model_length_n applies only to linear kernel!\n"); + exit(1); + } + weight_n = create_nvector(totwords); + clear_nvector(weight_n, totwords); + for (i = 1; i < model->sv_num; i++) + add_list_n_ns(weight_n, model->supvec[i]->fvec, model->alpha[i]); + weight = create_svector_n(weight_n, totwords, NULL, 1.0); + sum = sprod_ss(weight, weight); + free(weight_n); + free_svector(weight); + return (sqrt(sum)); +} + +void mult_vector_ns(double *vec_n, SVECTOR *vec_s, double faktor) { + register WORD *ai; + ai = vec_s->words; + while (ai->wnum) { + vec_n[ai->wnum] *= (faktor * (double)ai->weight); + ai++; + } +} + +void add_vector_ns(double *vec_n, SVECTOR *vec_s, double faktor) { + /* Note: SVECTOR lists are not followed, but only the first + SVECTOR is used */ + register WORD *ai; + ai = vec_s->words; + while (ai->wnum) { + vec_n[ai->wnum] += (faktor * (double)ai->weight); + ai++; + } +} + +double sprod_ns(double *vec_n, SVECTOR *vec_s) { + register double sum = 0; + register WORD *ai; + ai = vec_s->words; + while (ai->wnum) { + sum += (vec_n[ai->wnum] * (double)ai->weight); + ai++; + } + return (sum); +} + +void add_weight_vector_to_linear_model(MODEL *model) +/* compute weight vector in linear case and add to model */ +{ + long i; + SVECTOR *f; + + model->lin_weights = create_nvector(model->totwords); + clear_nvector(model->lin_weights, model->totwords); + for (i = 1; i < model->sv_num; i++) { + for (f = (model->supvec[i])->fvec; f; f = f->next) + add_vector_ns(model->lin_weights, f, f->factor * model->alpha[i]); + } +} + +DOC *create_example(long docnum, long queryid, long slackid, double costfactor, + SVECTOR *fvec) { + DOC *example; + example = (DOC *)my_malloc(sizeof(DOC)); + example->docnum = docnum; + example->kernelid = docnum; + example->queryid = queryid; + example->slackid = slackid; + example->costfactor = costfactor; + example->fvec = fvec; + return (example); +} + +void free_example(DOC *example, long deep) { + if (example) { + if (deep) { + if (example->fvec) + free_svector(example->fvec); + } + free(example); + } +} + +int compare_randpair(const void *a, const void *b) { + long va, vb; + va = ((RANDPAIR *)a)->sort; + vb = ((RANDPAIR *)b)->sort; + return ((va > vb) - (va < vb)); +} + +long *random_order(long n) +/* creates an array of the integers [0..n-1] in random order */ +{ + long *randarray = (long *)my_malloc(sizeof(long) * n); + RANDPAIR *randpair = (RANDPAIR *)my_malloc(sizeof(RANDPAIR) * n); + long i; + + for (i = 0; i < n; i++) { + randpair[i].val = i; + randpair[i].sort = rand(); + } + qsort(randpair, n, sizeof(RANDPAIR), compare_randpair); + for (i = 0; i < n; i++) { + randarray[i] = randpair[i].val; + } + free(randpair); + return (randarray); +} + +void print_percent_progress(long *progress, long maximum, long percentperdot, + char *symbol) +/* every time this function gets called, progress is + incremented. It prints symbol every percentperdot calls, + assuming that maximum is the max number of calls */ +{ + if ((percentperdot * (*progress - 1) / maximum) != + (percentperdot * (*progress) / maximum)) { + fflush(stdout); + } + (*progress)++; +} + +/************ Some useful dense vector and matrix routines ****************/ + +MATRIX *create_matrix(int n, int m) +/* create matrix with n rows and m colums */ +{ + int i; + MATRIX *matrix; + + matrix = (MATRIX *)my_malloc(sizeof(MATRIX)); + matrix->n = n; + matrix->m = m; + matrix->element = (double **)my_malloc(sizeof(double *) * n); + for (i = 0; i < n; i++) { + matrix->element[i] = (double *)my_malloc(sizeof(double) * m); + } + return (matrix); +} + +MATRIX *realloc_matrix(MATRIX *matrix, int n, int m) +/* extends/shrinks matrix to n rows and m colums. Not that added elements are + not initialized. */ +{ + int i; + + if (!matrix) + return (create_matrix(n, m)); + + for (i = n; i < matrix->n; i++) + free(matrix->element[i]); + matrix->element = (double **)realloc(matrix->element, sizeof(double *) * n); + for (i = matrix->n; i < n; i++) + matrix->element[i] = (double *)my_malloc(sizeof(double) * m); + for (i = 0; i < MIN(n, matrix->n); i++) { + matrix->element[i] = + (double *)realloc(matrix->element[i], sizeof(double) * m); + } + matrix->n = n; + matrix->m = m; + return (matrix); +} + +double *create_nvector(int n) +/* creates a dense column vector with n+1 rows. unfortunately, there + is part of the code that starts counting at 0, while the sparse + vectors start counting at 1. So, it always allocates one extra + row. */ +{ + double *vector; + + vector = (double *)my_malloc(sizeof(double) * (n + 1)); + + return (vector); +} + +void clear_nvector(double *vec, long int n) { + register long i; + for (i = 0; i <= n; i++) + vec[i] = 0; +} + +MATRIX *copy_matrix(MATRIX *matrix) +/* create deep copy of matrix */ +{ + int i, j; + MATRIX *copy; + + copy = create_matrix(matrix->n, matrix->m); + for (i = 0; i < matrix->n; i++) { + for (j = 0; j < matrix->m; j++) { + copy->element[i][j] = matrix->element[i][j]; + } + } + return (copy); +} + +void free_matrix(MATRIX *matrix) +/* deallocates memory */ +{ + int i; + + for (i = 0; i < matrix->n; i++) { + free(matrix->element[i]); + } + free(matrix->element); + free(matrix); +} + +void free_nvector(double *vector) +/* deallocates memory */ +{ + free(vector); +} + +MATRIX *transpose_matrix(MATRIX *matrix) +/* returns copy with transpose of matrix */ +{ + int i, j; + MATRIX *copy; + + copy = create_matrix(matrix->m, matrix->n); + for (i = 0; i < matrix->n; i++) { + for (j = 0; j < matrix->m; j++) { + copy->element[j][i] = matrix->element[i][j]; + } + } + return (copy); +} + +MATRIX *cholesky_matrix(MATRIX *A) +/* Given a positive-definite symmetric matrix A[0..n-1][0..n-1], this routine + constructs its Cholesky decomposition, A = L · LT . On input, only the upper + riangle of A need be given; A is not modified. The Cholesky factor L is + eturned in the lower triangle. */ +{ + int i, j, k, n; + double sum; + MATRIX *L; + + if (A->m != A->n) { + printf("ERROR: Matrix not quadratic. Cannot compute Cholesky!\n"); + exit(1); + } + n = A->n; + L = copy_matrix(A); + + for (i = 0; i < n; i++) { + for (j = i; j < n; j++) { + for (sum = L->element[i][j], k = i - 1; k >= 0; k--) + sum -= L->element[i][k] * L->element[j][k]; + if (i == j) { + if (sum <= 0.0) + printf("Cholesky: Matrix not positive definite"); + L->element[i][i] = sqrt(sum); + } else + L->element[j][i] = sum / L->element[i][i]; + } + } + /* set upper triange to zero */ + for (i = 0; i < n; i++) + for (j = i + 1; j < n; j++) + L->element[i][j] = 0; + + return (L); +} + +double *find_indep_subset_of_matrix(MATRIX *A, double epsilon) +/* Given a positive-semidefinite symmetric matrix A[0..n-1][0..n-1], this + routine finds a subset of rows and colums that is linear independent. To do + this, it constructs the Cholesky decomposition, A = L · LT. On input, only + he upper triangle of A need be given; A is not modified. The routine returns + vector in which non-zero elements indicate the linear independent subset. + psilon is the amount by which the diagonal entry of L has to be greater than + ero. */ +{ + int i, j, k, n; + double sum, *indep; + MATRIX *L; + + if (A->m != A->n) { + printf("ERROR: Matrix not quadratic. Cannot compute Cholesky!\n"); + exit(1); + } + n = A->n; + L = copy_matrix(A); + + for (i = 0; i < n; i++) { + for (j = i; j < n; j++) { + for (sum = L->element[i][j], k = i - 1; k >= 0; k--) + sum -= L->element[i][k] * L->element[j][k]; + if (i == j) { + if (sum <= epsilon) + sum = 0; + L->element[i][i] = sqrt(sum); + } else if (L->element[i][i] == 0) + L->element[j][i] = 0; + else + L->element[j][i] = sum / L->element[i][i]; + } + } + /* Gather non-zero diagonal elements */ + indep = create_nvector(n); + for (i = 0; i < n; i++) + indep[i] = L->element[i][i]; + + free_matrix(L); + return (indep); +} + +MATRIX *invert_ltriangle_matrix(MATRIX *L) +/* Given a lower triangular matrix L, computes inverse L^-1 */ +{ + int i, j, k, n; + double sum; + MATRIX *I; + + if (L->m != L->n) { + printf("ERROR: Matrix not quadratic. Cannot invert triangular matrix!\n"); + exit(1); + } + n = L->n; + I = copy_matrix(L); + + for (i = 0; i < n; i++) { + I->element[i][i] = 1.0 / L->element[i][i]; + for (j = i + 1; j < n; j++) { + sum = 0.0; + for (k = i; k < j; k++) + sum -= I->element[j][k] * I->element[k][i]; + I->element[j][i] = sum / L->element[j][j]; + } + } + + return (I); +} + +double *prod_nvector_matrix(double *v, MATRIX *A) +/* For column vector v and matrix A (assumed to match in size), computes + w^T=v^T*A */ +{ + int i, j; + double sum; + double *w; + + w = create_nvector(A->m); + + for (i = 0; i < A->m; i++) { + sum = 0.0; + for (j = 0; j < A->n; j++) { + sum += v[j] * A->element[j][i]; + } + w[i] = sum; + } + + return (w); +} + +double *prod_matrix_nvector(MATRIX *A, double *v) +/* For column vector v and matrix A (assumed to match in size), computes w=A*v + */ +{ + int i, j; + double sum; + double *w; + + w = create_nvector(A->n); + + for (i = 0; i < A->n; i++) { + sum = 0.0; + for (j = 0; j < A->m; j++) { + sum += v[j] * A->element[i][j]; + } + w[i] = sum; + } + + return (w); +} + +double *prod_nvector_ltmatrix(double *v, MATRIX *A) +/* For column vector v and a lower triangular matrix A (assumed to + match in size), computes w^T=v^T*A */ +{ + int i, j; + double sum; + double *w; + + w = create_nvector(A->m); + + for (i = 0; i < A->m; i++) { + sum = 0.0; + for (j = i; j < A->n; j++) { + sum += v[j] * A->element[j][i]; + } + w[i] = sum; + } + + return (w); +} + +double *prod_ltmatrix_nvector(MATRIX *A, double *v) +/* For column vector v and lower triangular matrix A (assumed to match + in size), computes w=A*v */ +{ + int i, j; + double sum; + double *w; + + w = create_nvector(A->n); + + for (i = 0; i < A->n; i++) { + sum = 0.0; + for (j = 0; j <= i; j++) { + sum += v[j] * A->element[i][j]; + } + w[i] = sum; + } + + return (w); +} + +MATRIX *prod_matrix_matrix(MATRIX *A, MATRIX *B) +/* For matrices A and B (assumed to match in size), computes C=A*B */ +{ + int i, j, k; + double sum; + MATRIX *C; + + if (A->m != B->n) { + printf("ERROR: Matrix size does not match. Cannot compute product!\n"); + exit(1); + } + C = create_matrix(A->n, B->m); + + for (i = 0; i < A->n; i++) { + for (j = 0; j < B->m; j++) { + sum = 0.0; + for (k = 0; k < A->m; k++) { + sum += A->element[i][k] * B->element[k][j]; + } + C->element[i][j] = sum; + } + } + + return (C); +} + +void print_matrix(MATRIX *matrix) +/* prints matrix to STDOUT */ +{ + int i, j; + + printf("\n"); + printf("\n"); + for (i = 0; i < matrix->n; i++) { + for (j = 0; j < matrix->m; j++) { + printf("%4.3f\t", matrix->element[i][j]); + } + printf("\n"); + } +} + +/***************************** IO routines ***************************/ + +void write_model(char *modelfile, MODEL *model) { + FILE *modelfl; + long j, i, sv_num; + SVECTOR *v; + MODEL *compact_model = NULL; + + if (verbosity >= 1) { + printf("Writing model file..."); + fflush(stdout); + } + + /* Replace SV with single weight vector */ + if (0 && model->kernel_parm.kernel_type == LINEAR) { + if (verbosity >= 1) { + printf("(compacting..."); + fflush(stdout); + } + compact_model = compact_linear_model(model); + model = compact_model; + if (verbosity >= 1) { + printf("done)"); + fflush(stdout); + } + } + + if ((modelfl = fopen(modelfile, "w")) == NULL) { + perror(modelfile); + exit(1); + } + fprintf(modelfl, "SVM-light Version %s\n", VERSION); + fprintf(modelfl, "%ld # kernel type\n", model->kernel_parm.kernel_type); + fprintf(modelfl, "%ld # kernel parameter -d \n", + model->kernel_parm.poly_degree); + fprintf(modelfl, "%.8g # kernel parameter -g \n", + model->kernel_parm.rbf_gamma); + fprintf(modelfl, "%.8g # kernel parameter -s \n", + model->kernel_parm.coef_lin); + fprintf(modelfl, "%.8g # kernel parameter -r \n", + model->kernel_parm.coef_const); + fprintf(modelfl, "%s# kernel parameter -u \n", model->kernel_parm.custom); + fprintf(modelfl, "%ld # highest feature index \n", model->totwords); + fprintf(modelfl, "%ld # number of training documents \n", model->totdoc); + + sv_num = 1; + for (i = 1; i < model->sv_num; i++) { + for (v = model->supvec[i]->fvec; v; v = v->next) + sv_num++; + } + fprintf(modelfl, "%ld # number of support vectors plus 1 \n", sv_num); + fprintf(modelfl, + "%.8g # threshold b, each following line is a SV (starting with " + "alpha*y)\n", + model->b); + + for (i = 1; i < model->sv_num; i++) { + for (v = model->supvec[i]->fvec; v; v = v->next) { + fprintf(modelfl, "%.32g ", model->alpha[i] * v->factor); + for (j = 0; (v->words[j]).wnum; j++) { + fprintf(modelfl, "%ld:%.8g ", (long)(v->words[j]).wnum, + (double)(v->words[j]).weight); + } + if (v->userdefined) + fprintf(modelfl, "#%s\n", v->userdefined); + else + fprintf(modelfl, "#\n"); + /* NOTE: this could be made more efficient by summing the + alpha's of identical vectors before writing them to the + file. */ + } + } + fclose(modelfl); + if (compact_model) + free_model(compact_model, 1); + if (verbosity >= 1) { + printf("done\n"); + } +} + +MODEL *read_model(char *modelfile) { + FILE *modelfl; + long i, queryid, slackid; + double costfactor; + long max_sv, max_words, ll, wpos; + char *line, *comment; + WORD *words; + char version_buffer[100]; + MODEL *model; + + if (verbosity >= 1) { + printf("Reading model..."); + fflush(stdout); + } + + nol_ll(modelfile, &max_sv, &max_words, &ll); /* scan size of model file */ + max_words += 2; + ll += 2; + + words = (WORD *)my_malloc(sizeof(WORD) * (max_words + 10)); + line = (char *)my_malloc(sizeof(char) * ll); + model = (MODEL *)my_malloc(sizeof(MODEL)); + + if ((modelfl = fopen(modelfile, "r")) == NULL) { + perror(modelfile); + exit(1); + } + + fscanf(modelfl, "SVM-light Version %s\n", version_buffer); + if (strcmp(version_buffer, VERSION)) { + perror("Version of model-file does not match version of svm_classify!"); + exit(1); + } + fscanf(modelfl, "%ld%*[^\n]\n", &model->kernel_parm.kernel_type); + fscanf(modelfl, "%ld%*[^\n]\n", &model->kernel_parm.poly_degree); + fscanf(modelfl, "%lf%*[^\n]\n", &model->kernel_parm.rbf_gamma); + fscanf(modelfl, "%lf%*[^\n]\n", &model->kernel_parm.coef_lin); + fscanf(modelfl, "%lf%*[^\n]\n", &model->kernel_parm.coef_const); + fscanf(modelfl, "%[^#]%*[^\n]\n", model->kernel_parm.custom); + + fscanf(modelfl, "%ld%*[^\n]\n", &model->totwords); + fscanf(modelfl, "%ld%*[^\n]\n", &model->totdoc); + fscanf(modelfl, "%ld%*[^\n]\n", &model->sv_num); + fscanf(modelfl, "%lf%*[^\n]\n", &model->b); + + model->supvec = (DOC **)my_malloc(sizeof(DOC *) * model->sv_num); + model->alpha = (double *)my_malloc(sizeof(double) * model->sv_num); + model->index = NULL; + model->lin_weights = NULL; + + for (i = 1; i < model->sv_num; i++) { + fgets(line, (int)ll, modelfl); + if (!parse_document(line, words, &(model->alpha[i]), &queryid, &slackid, + &costfactor, &wpos, max_words, &comment)) { + printf("\nParsing error while reading model file in SV %ld!\n%s", i, + line); + exit(1); + } + model->supvec[i] = + create_example(-1, 0, 0, 0.0, create_svector(words, comment, 1.0)); + } + fclose(modelfl); + free(line); + free(words); + if (verbosity >= 1) { + fprintf(stdout, "OK. (%d support vectors read)\n", + (int)(model->sv_num - 1)); + } + return (model); +} + +MODEL *copy_model(MODEL *model) { + MODEL *newmodel; + long i; + + newmodel = (MODEL *)my_malloc(sizeof(MODEL)); + (*newmodel) = (*model); + newmodel->supvec = (DOC **)my_malloc(sizeof(DOC *) * model->sv_num); + newmodel->alpha = (double *)my_malloc(sizeof(double) * model->sv_num); + newmodel->index = NULL; /* index is not copied */ + newmodel->supvec[0] = NULL; + newmodel->alpha[0] = 0; + for (i = 1; i < model->sv_num; i++) { + newmodel->alpha[i] = model->alpha[i]; + newmodel->supvec[i] = create_example( + model->supvec[i]->docnum, model->supvec[i]->queryid, 0, + model->supvec[i]->costfactor, copy_svector(model->supvec[i]->fvec)); + } + if (model->lin_weights) { + newmodel->lin_weights = + (double *)my_malloc(sizeof(double) * (model->totwords + 1)); + for (i = 0; i < model->totwords + 1; i++) + newmodel->lin_weights[i] = model->lin_weights[i]; + } + return (newmodel); +} + +MODEL *compact_linear_model(MODEL *model) +/* Makes a copy of model where the support vectors are replaced + with a single linear weight vector. */ +/* NOTE: It adds the linear weight vector also to newmodel->lin_weights */ +/* WARNING: This is correct only for linear models! */ +{ + MODEL *newmodel; + + newmodel = (MODEL *)my_malloc(sizeof(MODEL)); + (*newmodel) = (*model); + add_weight_vector_to_linear_model(newmodel); + newmodel->supvec = (DOC **)my_malloc(sizeof(DOC *) * 2); + newmodel->alpha = (double *)my_malloc(sizeof(double) * 2); + newmodel->index = NULL; /* index is not copied */ + newmodel->supvec[0] = NULL; + newmodel->alpha[0] = 0.0; + newmodel->supvec[1] = create_example( + -1, 0, 0, 0, + create_svector_n(newmodel->lin_weights, newmodel->totwords, NULL, 1.0)); + newmodel->alpha[1] = 1.0; + newmodel->sv_num = 2; + + return (newmodel); +} + +void free_model(MODEL *model, int deep) { + long i; + + if (model->supvec) { + if (deep) { + for (i = 1; i < model->sv_num; i++) { + free_example(model->supvec[i], 1); + } + } + free(model->supvec); + } + if (model->alpha) + free(model->alpha); + if (model->index) + free(model->index); + if (model->lin_weights) + free(model->lin_weights); + free(model); +} + +void read_documents(char *docfile, DOC ***docs, double **label, + long int *totwords, long int *totdoc) { + char *line, *comment; + WORD *words; + long dnum = 0, wpos, dpos = 0, dneg = 0, dunlab = 0, queryid, slackid, + max_docs; + long max_words_doc, ll; + double doc_label, costfactor; + FILE *docfl; + + if (verbosity >= 1) { + printf("Scanning examples..."); + fflush(stdout); + } + nol_ll(docfile, &max_docs, &max_words_doc, &ll); /* scan size of input file */ + max_words_doc += 2; + ll += 2; + max_docs += 2; + if (verbosity >= 1) { + printf("done\n"); + fflush(stdout); + } + + (*docs) = (DOC **)my_malloc(sizeof(DOC *) * max_docs); /* feature vectors */ + (*label) = (double *)my_malloc(sizeof(double) * max_docs); /* target values */ + line = (char *)my_malloc(sizeof(char) * ll); + + if ((docfl = fopen(docfile, "r")) == NULL) { + perror(docfile); + exit(1); + } + + words = (WORD *)my_malloc(sizeof(WORD) * (max_words_doc + 10)); + if (verbosity >= 1) { + printf("Reading examples into memory..."); + fflush(stdout); + } + dnum = 0; + (*totwords) = 0; + while ((!feof(docfl)) && fgets(line, (int)ll, docfl)) { + if (line[0] == '#') + continue; /* line contains comments */ + if (!parse_document(line, words, &doc_label, &queryid, &slackid, + &costfactor, &wpos, max_words_doc, &comment)) { + printf("\nParsing error in line %ld!\n%s", dnum, line); + exit(1); + } + (*label)[dnum] = doc_label; + /* printf("docnum=%ld: Class=%f ",dnum,doc_label); */ + if (doc_label > 0) + dpos++; + if (doc_label < 0) + dneg++; + if (doc_label == 0) + dunlab++; + if ((wpos > 1) && ((words[wpos - 2]).wnum > (*totwords))) + (*totwords) = (words[wpos - 2]).wnum; + if ((*totwords) > MAXFEATNUM) { + printf("\nMaximum feature number exceeds limit defined in MAXFEATNUM!\n"); + printf("LINE: %s\n", line); + exit(1); + } + (*docs)[dnum] = create_example(dnum, queryid, slackid, costfactor, + create_svector(words, comment, 1.0)); + /* printf("\nNorm=%f\n",((*docs)[dnum]->fvec)->twonorm_sq); */ + dnum++; + if (verbosity >= 1) { + if ((dnum % 100) == 0) { + printf("%ld..", dnum); + fflush(stdout); + } + } + } + + fclose(docfl); + free(line); + free(words); + if (verbosity >= 1) { + fprintf(stdout, "OK. (%ld examples read)\n", dnum); + } + (*totdoc) = dnum; +} + +int parse_document(char *line, WORD *words, double *label, long *queryid, + long *slackid, double *costfactor, long int *numwords, + long int max_words_doc, char **comment) { + register long wpos, pos; + long wnum; + double weight; + char featurepair[1000], junk[1000]; + + (*queryid) = 0; + (*slackid) = 0; + (*costfactor) = 1; + + pos = 0; + (*comment) = NULL; + while (line[pos]) { /* cut off comments */ + if ((line[pos] == '#') && (!(*comment))) { + line[pos] = 0; + (*comment) = &(line[pos + 1]); + } + if (line[pos] == '\n') { /* strip the CR */ + line[pos] = 0; + } + pos++; + } + if (!(*comment)) + (*comment) = &(line[pos]); + /* printf("Comment: '%s'\n",(*comment)); */ + + wpos = 0; + /* check, that line starts with target value or zero, but not with + feature pair */ + if (sscanf(line, "%s", featurepair) == EOF) + return (0); + pos = 0; + while ((featurepair[pos] != ':') && featurepair[pos]) + pos++; + if (featurepair[pos] == ':') { + perror("Line must start with label or 0!!!\n"); + printf("LINE: %s\n", line); + exit(1); + } + /* read the target value */ + if (sscanf(line, "%lf", label) == EOF) + return (0); + pos = 0; + while (space_or_null((int)line[pos])) + pos++; + while ((!space_or_null((int)line[pos])) && line[pos]) + pos++; + while ((pos += read_word(line + pos, featurepair)) && (featurepair[0]) && + (wpos < max_words_doc)) { + /* printf("%s\n",featurepair); */ + if (sscanf(featurepair, "qid:%ld%s", &wnum, junk) == 1) { + /* it is the query id */ + (*queryid) = (long)wnum; + } else if (sscanf(featurepair, "sid:%ld%s", &wnum, junk) == 1) { + /* it is the slack id */ + if (wnum > 0) + (*slackid) = (long)wnum; + else { + perror("Slack-id must be greater or equal to 1!!!\n"); + printf("LINE: %s\n", line); + exit(1); + } + } else if (sscanf(featurepair, "cost:%lf%s", &weight, junk) == 1) { + /* it is the example-dependent cost factor */ + (*costfactor) = (double)weight; + } else if (sscanf(featurepair, "%ld:%lf%s", &wnum, &weight, junk) == 2) { + /* it is a regular feature */ + if (wnum <= 0) { + perror("Feature numbers must be larger or equal to 1!!!\n"); + printf("LINE: %s\n", line); + exit(1); + } + if ((wpos > 0) && ((words[wpos - 1]).wnum >= wnum)) { + perror("Features must be in increasing order!!!\n"); + printf("LINE: %s\n", line); + exit(1); + } + (words[wpos]).wnum = wnum; + (words[wpos]).weight = (FVAL)weight; + wpos++; + } else { + perror("Cannot parse feature/value pair!!!\n"); + printf("'%s' in LINE: %s\n", featurepair, line); + exit(1); + } + } + (words[wpos]).wnum = 0; + (*numwords) = wpos + 1; + return (1); +} + +double *read_alphas(char *alphafile, long totdoc) +/* reads the alpha vector from a file as written by the + write_alphas function */ +{ + FILE *fl; + double *alpha; + long dnum; + + if ((fl = fopen(alphafile, "r")) == NULL) { + perror(alphafile); + exit(1); + } + + alpha = (double *)my_malloc(sizeof(double) * totdoc); + if (verbosity >= 1) { + printf("Reading alphas..."); + fflush(stdout); + } + dnum = 0; + while ((!feof(fl)) && fscanf(fl, "%lf\n", &alpha[dnum]) && (dnum < totdoc)) { + dnum++; + } + if (dnum != totdoc) { + perror("\nNot enough values in alpha file!"); + exit(1); + } + fclose(fl); + + if (verbosity >= 1) { + printf("done\n"); + fflush(stdout); + } + + return (alpha); +} + +void set_learning_defaults(LEARN_PARM *learn_parm, KERNEL_PARM *kernel_parm) { + learn_parm->type = CLASSIFICATION; + strcpy(learn_parm->predfile, "trans_predictions"); + strcpy(learn_parm->alphafile, ""); + learn_parm->biased_hyperplane = 1; + learn_parm->sharedslack = 0; + learn_parm->remove_inconsistent = 0; + learn_parm->skip_final_opt_check = 0; + learn_parm->svm_maxqpsize = 10; + learn_parm->svm_newvarsinqp = 0; + learn_parm->svm_iter_to_shrink = -9999; + learn_parm->maxiter = 100000; + learn_parm->kernel_cache_size = 40; + learn_parm->svm_c = 0.0; + learn_parm->eps = 0.1; + learn_parm->transduction_posratio = -1.0; + learn_parm->svm_costratio = 1.0; + learn_parm->svm_costratio_unlab = 1.0; + learn_parm->svm_unlabbound = 1E-5; + learn_parm->epsilon_crit = 0.001; + learn_parm->epsilon_a = 1E-15; + learn_parm->compute_loo = 0; + learn_parm->rho = 1.0; + learn_parm->xa_depth = 0; + kernel_parm->kernel_type = LINEAR; + kernel_parm->poly_degree = 3; + kernel_parm->rbf_gamma = 1.0; + kernel_parm->coef_lin = 1; + kernel_parm->coef_const = 1; + strcpy(kernel_parm->custom, "empty"); +} + +int check_learning_parms(LEARN_PARM *learn_parm, KERNEL_PARM *kernel_parm) { + if ((learn_parm->skip_final_opt_check) && + (kernel_parm->kernel_type == LINEAR)) { + printf("\nIt does not make sense to skip the final optimality check for " + "linear kernels.\n\n"); + learn_parm->skip_final_opt_check = 0; + } + if ((learn_parm->skip_final_opt_check) && (learn_parm->remove_inconsistent)) { + printf("\nIt is necessary to do the final optimality check when removing " + "inconsistent \nexamples.\n"); + return (0); + } + if ((learn_parm->svm_maxqpsize < 2)) { + printf("\nMaximum size of QP-subproblems not in valid range: %ld [2..]\n", + learn_parm->svm_maxqpsize); + return (0); + } + if ((learn_parm->svm_maxqpsize < learn_parm->svm_newvarsinqp)) { + printf("\nMaximum size of QP-subproblems [%ld] must be larger than the " + "number of\n", + learn_parm->svm_maxqpsize); + printf("new variables [%ld] entering the working set in each iteration.\n", + learn_parm->svm_newvarsinqp); + return (0); + } + if (learn_parm->svm_iter_to_shrink < 1) { + printf("\nMaximum number of iterations for shrinking not in valid range: " + "%ld [1,..]\n", + learn_parm->svm_iter_to_shrink); + return (0); + } + if (learn_parm->svm_c < 0) { + printf("\nThe C parameter must be greater than zero!\n\n"); + return (0); + } + if (learn_parm->transduction_posratio > 1) { + printf( + "\nThe fraction of unlabeled examples to classify as positives must\n"); + printf("be less than 1.0 !!!\n\n"); + return (0); + } + if (learn_parm->svm_costratio <= 0) { + printf("\nThe COSTRATIO parameter must be greater than zero!\n\n"); + return (0); + } + if (learn_parm->epsilon_crit <= 0) { + printf("\nThe epsilon parameter must be greater than zero!\n\n"); + return (0); + } + if (learn_parm->rho < 0) { + printf("\nThe parameter rho for xi/alpha-estimates and leave-one-out " + "pruning must\n"); + printf("be greater than zero (typically 1.0 or 2.0, see T. Joachims, " + "Estimating the\n"); + printf( + "Generalization Performance of an SVM Efficiently, ICML, 2000.)!\n\n"); + return (0); + } + if ((learn_parm->xa_depth < 0) || (learn_parm->xa_depth > 100)) { + printf("\nThe parameter depth for ext. xi/alpha-estimates must be in " + "[0..100] (zero\n"); + printf("for switching to the conventional xa/estimates described in T. " + "Joachims,\n"); + printf("Estimating the Generalization Performance of an SVM Efficiently, " + "ICML, 2000.)\n"); + } + return (1); +} + +void nol_ll(char *file, long int *nol, long int *wol, long int *ll) +/* Grep through file and count number of lines, maximum number of + spaces per line, and longest line. */ +{ + FILE *fl; + int ic; + char c; + long current_length, current_wol; + + if ((fl = fopen(file, "r")) == NULL) { + perror(file); + exit(1); + } + current_length = 0; + current_wol = 0; + (*ll) = 0; + (*nol) = 1; + (*wol) = 0; + while ((ic = getc(fl)) != EOF) { + c = (char)ic; + current_length++; + if (space_or_null((int)c)) { + current_wol++; + } + if (c == '\n') { + (*nol)++; + if (current_length > (*ll)) { + (*ll) = current_length; + } + if (current_wol > (*wol)) { + (*wol) = current_wol; + } + current_length = 0; + current_wol = 0; + } + } + fclose(fl); +} + +long minl(long int a, long int b) { + if (a < b) + return (a); + else + return (b); +} + +long maxl(long int a, long int b) { + if (a > b) + return (a); + else + return (b); +} + +double get_runtime(void) { + /* returns the current processor time in hundredth of a second */ + clock_t start; + start = clock(); + return ((double)start / ((double)(CLOCKS_PER_SEC) / 100.0)); +} + +#ifdef _MSC_VER + +int isnan(double a) { return (_isnan(a)); } + +#endif + +int space_or_null(int c) { + if (c == 0) + return 1; + return isspace((unsigned char)c); +} + +int read_word(char *in, char *out) { + int found = 0; + while (isspace((int)(*in)) && (*in)) { /* skip over whitespace */ + in++; + found++; + } + while (!space_or_null((int)(*in))) { /* read non-whitespace string */ + (*out) = (*in); + in++; + found++; + out++; + } + (*out) = 0; + return (found); +} + +void *my_malloc(size_t size) { + void *ptr; + if (size <= 0) + size = 1; /* for AIX compatibility */ + ptr = (void *)malloc(size); + if (!ptr) { + perror("Out of memory!\n"); + exit(1); + } + return (ptr); +} + +void copyright_notice(void) { + printf("\nCopyright: Thorsten Joachims, thorsten@joachims.org\n\n"); + printf( + "This software is available for non-commercial use only. It must not\n"); + printf( + "be modified and distributed without prior permission of the author.\n"); + printf( + "The author is not responsible for implications from the use of this\n"); + printf("software.\n\n"); +} diff --git a/src/classifier/svm/svm_light/svm_common.h b/src/classifier/svm/svm_light/svm_common.h new file mode 100644 index 0000000..13c7022 --- /dev/null +++ b/src/classifier/svm/svm_light/svm_common.h @@ -0,0 +1,385 @@ +/************************************************************************/ +/* */ +/* svm_common.h */ +/* */ +/* Definitions and functions used in both svm_learn and svm_classify. */ +/* */ +/* Author: Thorsten Joachims */ +/* Date: 31.10.05 */ +/* */ +/* Copyright (c) 2005 Thorsten Joachims - All rights reserved */ +/* */ +/* This software is available for non-commercial use only. It must */ +/* not be modified and distributed without prior permission of the */ +/* author. The author is not responsible for implications from the */ +/* use of this software. */ +/* */ +/************************************************************************/ + +#ifndef SVM_COMMON +#define SVM_COMMON + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define VERSION "V6.20" +#define VERSION_DATE "14.08.08" + +#define CFLOAT float /* the type of float to use for caching */ + /* kernel evaluations. Using float saves */ + /* us some memory, but you can use double, too */ +#define FNUM int32_t /* the type used for storing feature ids */ +#define FNUM_MAX 2147483647 /* maximum value that FNUM type can take */ +#define FVAL float /* the type used for storing feature values */ +#define MAXFEATNUM \ + 99999999 /* maximum feature number (must be in \ + valid range of FNUM type and long int!) */ + +#define LINEAR 0 /* linear kernel type */ +#define POLY 1 /* polynomial kernel type */ +#define RBF 2 /* rbf kernel type */ +#define SIGMOID 3 /* sigmoid kernel type */ +#define CUSTOM 4 /* userdefined kernel function from kernel.h */ +#define GRAM 5 /* use explicit gram matrix from kernel_parm */ + +#define CLASSIFICATION 1 /* train classification model */ +#define REGRESSION 2 /* train regression model */ +#define RANKING 3 /* train ranking model */ +#define OPTIMIZATION 4 /* train on general set of constraints */ + +#define MAXSHRINK 50000 /* maximum number of shrinking rounds */ + +typedef struct word { + FNUM wnum; /* word number */ + FVAL weight; /* word weight */ +} WORD; + +typedef struct svector { + WORD *words; /* The features/values in the vector by + increasing feature-number. Feature + numbers that are skipped are + interpreted as having value zero. */ + double twonorm_sq; /* The squared euclidian length of the + vector. Used to speed up the RBF kernel. */ + char *userdefined; /* You can put additional information + here. This can be useful, if you are + implementing your own kernel that + does not work with feature/values + representations (for example a + string kernel). By default, + svm-light will put here the string + after the # sign from each line of + the input file. */ + long kernel_id; /* Feature vectors with different + kernel_id's are orthogonal (ie. the + feature number do not match). This + is used for computing component + kernels for linear constraints which + are a sum of several different + weight vectors. (currently not + implemented). */ + struct svector *next; /* Let's you set up a list of SVECTOR's + for linear constraints which are a + sum of multiple feature + vectors. List is terminated by + NULL. */ + double factor; /* Factor by which this feature vector + is multiplied in the sum. */ +} SVECTOR; + +typedef struct doc { + long docnum; /* Document ID. This has to be the position of + the document in the training set array. */ + long queryid; /* for learning rankings, constraints are + generated for documents with the same + queryID. */ + double costfactor; /* Scales the cost of misclassifying this + document by this factor. The effect of this + value is, that the upper bound on the alpha + for this example is scaled by this factor. + The factors are set by the feature + 'cost:' in the training data. */ + long slackid; /* Index of the slack variable + corresponding to this + constraint. All constraints with the + same slackid share the same slack + variable. This can only be used for + svm_learn_optimization. */ + long kernelid; /* Position in gram matrix where kernel + value can be found when using an + explicit gram matrix + (i.e. kernel_type=GRAM). */ + SVECTOR *fvec; /* Feature vector of the example. The + feature vector can actually be a + list of feature vectors. For + example, the list will have two + elements, if this DOC is a + preference constraint. The one + vector that is supposed to be ranked + higher, will have a factor of +1, + the lower ranked one should have a + factor of -1. */ +} DOC; + +typedef struct learn_parm { + long type; /* selects between regression and + classification */ + double svm_c; /* upper bound C on alphas */ + double eps; /* regression epsilon (eps=1.0 for + classification */ + double svm_costratio; /* factor to multiply C for positive examples */ + double transduction_posratio; /* fraction of unlabeled examples to be */ + /* classified as positives */ + long biased_hyperplane; /* if nonzero, use hyperplane w*x+b=0 + otherwise w*x=0 */ + long sharedslack; /* if nonzero, it will use the shared + slack variable mode in + svm_learn_optimization. It requires + that the slackid is set for every + training example */ + long svm_maxqpsize; /* size q of working set */ + long svm_newvarsinqp; /* new variables to enter the working set + in each iteration */ + long kernel_cache_size; /* size of kernel cache in megabytes */ + double epsilon_crit; /* tolerable error for distances used + in stopping criterion */ + double epsilon_shrink; /* how much a multiplier should be above + zero for shrinking */ + long svm_iter_to_shrink; /* iterations h after which an example can + be removed by shrinking */ + long maxiter; /* number of iterations after which the + optimizer terminates, if there was + no progress in maxdiff */ + long remove_inconsistent; /* exclude examples with alpha at C and + retrain */ + long skip_final_opt_check; /* do not check KT-Conditions at the end of + optimization for examples removed by + shrinking. WARNING: This might lead to + sub-optimal solutions! */ + long compute_loo; /* if nonzero, computes leave-one-out + estimates */ + double rho; /* parameter in xi/alpha-estimates and for + pruning leave-one-out range [1..2] */ + long xa_depth; /* parameter in xi/alpha-estimates upper + bounding the number of SV the current + alpha_t is distributed over */ + char predfile[200]; /* file for predicitions on unlabeled examples + in transduction */ + char alphafile[200]; /* file to store optimal alphas in. use + empty string if alphas should not be + output */ + + /* you probably do not want to touch the following */ + double epsilon_const; /* tolerable error on eq-constraint */ + double epsilon_a; /* tolerable error on alphas at bounds */ + double opt_precision; /* precision of solver, set to e.g. 1e-21 + if you get convergence problems */ + + /* the following are only for internal use */ + long svm_c_steps; /* do so many steps for finding optimal C */ + double svm_c_factor; /* increase C by this factor every step */ + double svm_costratio_unlab; + double svm_unlabbound; + double *svm_cost; /* individual upper bounds for each var */ + long totwords; /* number of features */ +} LEARN_PARM; + +typedef struct matrix { + int n; /* number of rows */ + int m; /* number of colums */ + double **element; +} MATRIX; + +typedef struct kernel_parm { + long kernel_type; /* 0=linear, 1=poly, 2=rbf, 3=sigmoid, + 4=custom, 5=matrix */ + long poly_degree; + double rbf_gamma; + double coef_lin; + double coef_const; + char custom[50]; /* for user supplied kernel */ + MATRIX *gram_matrix; /* here one can directly supply the kernel + matrix. The matrix is accessed if + kernel_type=5 is selected. */ +} KERNEL_PARM; + +typedef struct model { + long sv_num; + long at_upper_bound; + double b; + DOC **supvec; + double *alpha; + long *index; /* index from docnum to position in model */ + long totwords; /* number of features */ + long totdoc; /* number of training documents */ + KERNEL_PARM kernel_parm; /* kernel */ + + /* the following values are not written to file */ + double loo_error, loo_recall, loo_precision; /* leave-one-out estimates */ + double xa_error, xa_recall, xa_precision; /* xi/alpha estimates */ + double *lin_weights; /* weights for linear case using + folding */ + double maxdiff; /* precision, up to which this + model is accurate */ +} MODEL; + +/* The following specifies a quadratic problem of the following form + + minimize g0 * x + 1/2 x' * G * x + subject to ce*x - ce0 = 0 + l <= x <= u +*/ +typedef struct quadratic_program { + long opt_n; /* number of variables */ + long opt_m; /* number of linear equality constraints */ + double *opt_ce, *opt_ce0; /* linear equality constraints + opt_ce[i]*x - opt_ceo[i]=0 */ + double *opt_g; /* hessian of objective */ + double *opt_g0; /* linear part of objective */ + double *opt_xinit; /* initial value for variables */ + double *opt_low, *opt_up; /* box constraints */ +} QP; + +typedef struct kernel_cache { + long *index; /* cache some kernel evalutations */ + CFLOAT *buffer; /* to improve speed */ + long *invindex; + long *active2totdoc; + long *totdoc2active; + long *lru; + long *occu; + long elems; + long max_elems; + long time; + long activenum; + long buffsize; +} KERNEL_CACHE; + +typedef struct timing_profile { + double time_kernel; + double time_opti; + double time_shrink; + double time_update; + double time_model; + double time_check; + double time_select; +} TIMING; + +typedef struct shrink_state { + long *active; + long *inactive_since; + long deactnum; + double **a_history; /* for shrinking with non-linear kernel */ + long maxhistory; + double *last_a; /* for shrinking with linear kernel */ + double *last_lin; /* for shrinking with linear kernel */ +} SHRINK_STATE; + +typedef struct randpair { + long val, sort; +} RANDPAIR; + +double classify_example(MODEL *, DOC *); +double classify_example_linear(MODEL *, DOC *); +double kernel(KERNEL_PARM *, DOC *, DOC *); +double single_kernel(KERNEL_PARM *, SVECTOR *, SVECTOR *); +double custom_kernel(KERNEL_PARM *, SVECTOR *, SVECTOR *); +SVECTOR *create_svector(WORD *, char *, double); +SVECTOR *create_svector_shallow(WORD *, char *, double); +SVECTOR *create_svector_n(double *, long, char *, double); +SVECTOR *create_svector_n_r(double *, long, char *, double, double); +SVECTOR *copy_svector(SVECTOR *); +SVECTOR *copy_svector_shallow(SVECTOR *); +void free_svector(SVECTOR *); +void free_svector_shallow(SVECTOR *); +double sprod_ss(SVECTOR *, SVECTOR *); +SVECTOR *sub_ss(SVECTOR *, SVECTOR *); +SVECTOR *sub_ss_r(SVECTOR *, SVECTOR *, double min_non_zero); +SVECTOR *add_ss(SVECTOR *, SVECTOR *); +SVECTOR *add_ss_r(SVECTOR *, SVECTOR *, double min_non_zero); +SVECTOR *multadd_ss(SVECTOR *a, SVECTOR *b, double fa, double fb); +SVECTOR *multadd_ss_r(SVECTOR *a, SVECTOR *b, double fa, double fb, + double min_non_zero); +SVECTOR *add_list_ns(SVECTOR *a); +SVECTOR *add_dual_list_ns_r(SVECTOR *, SVECTOR *, double min_non_zero); +SVECTOR *add_list_ns_r(SVECTOR *a, double min_non_zero); +SVECTOR *add_list_ss(SVECTOR *); +SVECTOR *add_dual_list_ss_r(SVECTOR *, SVECTOR *, double min_non_zero); +SVECTOR *add_list_ss_r(SVECTOR *, double min_non_zero); +SVECTOR *add_list_sort_ss(SVECTOR *); +SVECTOR *add_dual_list_sort_ss_r(SVECTOR *, SVECTOR *, double min_non_zero); +SVECTOR *add_list_sort_ss_r(SVECTOR *, double min_non_zero); +void add_list_n_ns(double *vec_n, SVECTOR *vec_s, double faktor); +void append_svector_list(SVECTOR *a, SVECTOR *b); +void mult_svector_list(SVECTOR *a, double factor); +void setfactor_svector_list(SVECTOR *a, double factor); +SVECTOR *smult_s(SVECTOR *, double); +SVECTOR *shift_s(SVECTOR *a, long shift); +int featvec_eq(SVECTOR *, SVECTOR *); +double model_length_s(MODEL *); +double model_length_n(MODEL *); +void mult_vector_ns(double *, SVECTOR *, double); +void add_vector_ns(double *, SVECTOR *, double); +double sprod_ns(double *, SVECTOR *); +void add_weight_vector_to_linear_model(MODEL *); +DOC *create_example(long, long, long, double, SVECTOR *); +void free_example(DOC *, long); +long *random_order(long n); +void print_percent_progress(long *progress, long maximum, long percentperdot, + char *symbol); +MATRIX *create_matrix(int n, int m); +MATRIX *realloc_matrix(MATRIX *matrix, int n, int m); +double *create_nvector(int n); +void clear_nvector(double *vec, long int n); +MATRIX *copy_matrix(MATRIX *matrix); +void free_matrix(MATRIX *matrix); +void free_nvector(double *vector); +MATRIX *transpose_matrix(MATRIX *matrix); +MATRIX *cholesky_matrix(MATRIX *A); +double *find_indep_subset_of_matrix(MATRIX *A, double epsilon); +MATRIX *invert_ltriangle_matrix(MATRIX *L); +double *prod_nvector_matrix(double *v, MATRIX *A); +double *prod_matrix_nvector(MATRIX *A, double *v); +double *prod_nvector_ltmatrix(double *v, MATRIX *A); +double *prod_ltmatrix_nvector(MATRIX *A, double *v); +MATRIX *prod_matrix_matrix(MATRIX *A, MATRIX *B); +void print_matrix(MATRIX *matrix); +MODEL *read_model(char *); +MODEL *copy_model(MODEL *); +MODEL *compact_linear_model(MODEL *model); +void free_model(MODEL *, int); +void read_documents(char *, DOC ***, double **, long *, long *); +int parse_document(char *, WORD *, double *, long *, long *, double *, long *, + long, char **); +int read_word(char *in, char *out); +double *read_alphas(char *, long); +void set_learning_defaults(LEARN_PARM *, KERNEL_PARM *); +int check_learning_parms(LEARN_PARM *, KERNEL_PARM *); +void nol_ll(char *, long *, long *, long *); +long minl(long, long); +long maxl(long, long); +double get_runtime(void); +int space_or_null(int); +void *my_malloc(size_t); +void copyright_notice(void); +#ifdef _MSC_VER +int isnan(double); +#endif + +extern long verbosity; /* verbosity level (0-4) */ +extern long kernel_cache_statistic; + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/classifier/svm/svm_light/svm_hideo.c b/src/classifier/svm/svm_light/svm_hideo.c new file mode 100644 index 0000000..20a1a12 --- /dev/null +++ b/src/classifier/svm/svm_light/svm_hideo.c @@ -0,0 +1,1054 @@ +/***********************************************************************/ +/* */ +/* svm_hideo.c */ +/* */ +/* The Hildreth and D'Espo solver specialized for SVMs. */ +/* */ +/* Author: Thorsten Joachims */ +/* Date: 02.07.02 */ +/* */ +/* Copyright (c) 2002 Thorsten Joachims - All rights reserved */ +/* */ +/* This software is available for non-commercial use only. It must */ +/* not be modified and distributed without prior permission of the */ +/* author. The author is not responsible for implications from the */ +/* use of this software. */ +/* */ +/***********************************************************************/ + +#include "svm_common.h" +#include + +/* + solve the quadratic programming problem + + minimize g0 * x + 1/2 x' * G * x + subject to ce*x - ce0 = 0 + l <= x <= u + + The linear constraint vector ce can only have -1/+1 as entries +*/ + +/* Common Block Declarations */ + +#define PRIMAL_OPTIMAL 1 +#define DUAL_OPTIMAL 2 +#define MAXITER_EXCEEDED 3 +#define NAN_SOLUTION 4 +#define ONLY_ONE_VARIABLE 5 + +#define LARGEROUND 0 +#define SMALLROUND 1 + +/* /////////////////////////////////////////////////////////////// */ + +#define DEF_PRECISION 1E-5 +#define DEF_MAX_ITERATIONS 200 +#define DEF_LINDEP_SENSITIVITY 1E-8 +#define EPSILON_HIDEO 1E-20 +#define EPSILON_EQ 1E-5 + +double *optimize_qp(QP *, double *, long, double *, LEARN_PARM *); +double *primal = 0, *dual = 0; +long precision_violations = 0; +double opt_precision = DEF_PRECISION; +long maxiter = DEF_MAX_ITERATIONS; +double lindep_sensitivity = DEF_LINDEP_SENSITIVITY; +double *buffer; +long *nonoptimal; + +long smallroundcount = 0; +long roundnumber = 0; + +/* /////////////////////////////////////////////////////////////// */ + +void *my_malloc(); + +int optimize_hildreth_despo(long, long, double, double, double, long, long, + long, double, double *, double *, double *, + double *, double *, double *, double *, double *, + double *, long *, double *, double *); +int solve_dual(long, long, double, double, long, double *, double *, double *, + double *, double *, double *, double *, double *, double *, + double *, double *, double *, double *, long); + +void linvert_matrix(double *, long, double *, double, long *); +void lprint_matrix(double *, long); +void ladd_matrix(double *, long, double); +void lcopy_matrix(double *, long, double *); +void lswitch_rows_matrix(double *, long, long, long); +void lswitchrk_matrix(double *, long, long, long); + +double calculate_qp_objective(long, double *, double *, double *); + +double *optimize_qp(qp, epsilon_crit, nx, threshold, learn_parm) +QP *qp; +double *epsilon_crit; +long nx; /* Maximum number of variables in QP */ +double *threshold; +LEARN_PARM *learn_parm; +/* start the optimizer and return the optimal values */ +/* The HIDEO optimizer does not necessarily fully solve the problem. */ +/* Since it requires a strictly positive definite hessian, the solution */ +/* is restricted to a linear independent subset in case the matrix is */ +/* only semi-definite. */ +{ + long i, j; + int result; + double eq, progress; + + roundnumber++; + + if (!primal) { /* allocate memory at first call */ + primal = (double *)my_malloc(sizeof(double) * nx); + dual = (double *)my_malloc(sizeof(double) * ((nx + 1) * 2)); + nonoptimal = (long *)my_malloc(sizeof(long) * (nx)); + buffer = (double *)my_malloc(sizeof(double) * + ((nx + 1) * 2 * (nx + 1) * 2 + nx * nx + + 2 * (nx + 1) * 2 + 2 * nx + 1 + 2 * nx + nx + + nx + nx * nx)); + (*threshold) = 0; + for (i = 0; i < nx; i++) { + primal[i] = 0; + } + } + + if (verbosity >= 4) { /* really verbose */ + printf("\n\n"); + eq = qp->opt_ce0[0]; + for (i = 0; i < qp->opt_n; i++) { + eq += qp->opt_xinit[i] * qp->opt_ce[i]; + printf("%f: ", qp->opt_g0[i]); + for (j = 0; j < qp->opt_n; j++) { + printf("%f ", qp->opt_g[i * qp->opt_n + j]); + } + printf(": a=%.10f < %f", qp->opt_xinit[i], qp->opt_up[i]); + printf(": y=%f\n", qp->opt_ce[i]); + } + if (qp->opt_m) { + printf("EQ: %f*x0", qp->opt_ce[0]); + for (i = 1; i < qp->opt_n; i++) { + printf(" + %f*x%ld", qp->opt_ce[i], i); + } + printf(" = %f\n\n", -qp->opt_ce0[0]); + } + } + + result = optimize_hildreth_despo( + qp->opt_n, qp->opt_m, opt_precision, (*epsilon_crit), + learn_parm->epsilon_a, maxiter, + /* (long)PRIMAL_OPTIMAL, */ + (long)0, (long)0, lindep_sensitivity, qp->opt_g, qp->opt_g0, qp->opt_ce, + qp->opt_ce0, qp->opt_low, qp->opt_up, primal, qp->opt_xinit, dual, + nonoptimal, buffer, &progress); + if (verbosity >= 3) { + printf("return(%d)...", result); + } + + if (learn_parm->totwords < learn_parm->svm_maxqpsize) { + /* larger working sets will be linear dependent anyway */ + learn_parm->svm_maxqpsize = maxl(learn_parm->totwords, (long)2); + } + + if (result == NAN_SOLUTION) { + lindep_sensitivity *= 2; /* throw out linear dependent examples more */ + /* generously */ + if (learn_parm->svm_maxqpsize > 2) { + learn_parm->svm_maxqpsize--; /* decrease size of qp-subproblems */ + } + precision_violations++; + } + + /* take one round of only two variable to get unstuck */ + if ((result != PRIMAL_OPTIMAL) || (!(roundnumber % 31)) || (progress <= 0)) { + + smallroundcount++; + + result = optimize_hildreth_despo( + qp->opt_n, qp->opt_m, opt_precision, (*epsilon_crit), + learn_parm->epsilon_a, (long)maxiter, (long)PRIMAL_OPTIMAL, + (long)SMALLROUND, lindep_sensitivity, qp->opt_g, qp->opt_g0, qp->opt_ce, + qp->opt_ce0, qp->opt_low, qp->opt_up, primal, qp->opt_xinit, dual, + nonoptimal, buffer, &progress); + if (verbosity >= 3) { + printf("return_srd(%d)...", result); + } + + if (result != PRIMAL_OPTIMAL) { + if (result != ONLY_ONE_VARIABLE) + precision_violations++; + if (result == MAXITER_EXCEEDED) + maxiter += 100; + if (result == NAN_SOLUTION) { + lindep_sensitivity *= 2; /* throw out linear dependent examples more */ + /* generously */ + /* results not valid, so return inital values */ + for (i = 0; i < qp->opt_n; i++) { + primal[i] = qp->opt_xinit[i]; + } + } + } + } + + if (precision_violations > 50) { + precision_violations = 0; + (*epsilon_crit) *= 10.0; + if (verbosity >= 1) { + printf("\nWARNING: Relaxing epsilon on KT-Conditions (%f).\n", + (*epsilon_crit)); + } + } + + if ((qp->opt_m > 0) && (result != NAN_SOLUTION) && + (!isnan(dual[1] - dual[0]))) + (*threshold) = dual[1] - dual[0]; + else + (*threshold) = 0; + + if (verbosity >= 4) { /* really verbose */ + printf("\n\n"); + eq = qp->opt_ce0[0]; + for (i = 0; i < qp->opt_n; i++) { + eq += primal[i] * qp->opt_ce[i]; + printf("%f: ", qp->opt_g0[i]); + for (j = 0; j < qp->opt_n; j++) { + printf("%f ", qp->opt_g[i * qp->opt_n + j]); + } + printf(": a=%.30f", primal[i]); + printf(": nonopti=%ld", nonoptimal[i]); + printf(": y=%f\n", qp->opt_ce[i]); + } + printf("eq-constraint=%.30f\n", eq); + printf("b=%f\n", (*threshold)); + printf(" smallroundcount=%ld ", smallroundcount); + } + + return (primal); +} + +int optimize_hildreth_despo(n, m, precision, epsilon_crit, epsilon_a, maxiter, + goal, smallround, lindep_sensitivity, g, g0, ce, + ce0, low, up, primal, init, dual, lin_dependent, + buffer, progress) +long n; /* number of variables */ +long m; /* number of linear equality constraints [0,1] */ +double precision; /* solve at least to this dual precision */ +double epsilon_crit; /* stop, if KT-Conditions approx fulfilled */ +double epsilon_a; /* precision of alphas at bounds */ +long maxiter; /* stop after this many iterations */ +long goal; /* keep going until goal fulfilled */ +long smallround; /* use only two variables of steepest descent */ +double lindep_sensitivity; /* epsilon for detecting linear dependent ex */ +double *g; /* hessian of objective */ +double *g0; /* linear part of objective */ +double *ce, *ce0; /* linear equality constraints */ +double *low, *up; /* box constraints */ +double *primal; /* primal variables */ +double *init; /* initial values of primal */ +double *dual; /* dual variables */ +long *lin_dependent; +double *buffer; +double *progress; /* delta in the objective function between + before and after */ +{ + long i, j, k, from, to, n_indep, changed; + double sum, bmin = 0, bmax = 0; + double *d, *d0, *ig, *dual_old, *temp, *start; + double *g0_new, *g_new, *ce_new, *ce0_new, *low_new, *up_new; + double add, t; + int result; + double obj_before, obj_after; + long b1, b2; + double g0_b1, g0_b2, ce0_b; + + g0_new = &(buffer[0]); /* claim regions of buffer */ + d = &(buffer[n]); + d0 = &(buffer[n + (n + m) * 2 * (n + m) * 2]); + ce_new = &(buffer[n + (n + m) * 2 * (n + m) * 2 + (n + m) * 2]); + ce0_new = &(buffer[n + (n + m) * 2 * (n + m) * 2 + (n + m) * 2 + n]); + ig = &(buffer[n + (n + m) * 2 * (n + m) * 2 + (n + m) * 2 + n + m]); + dual_old = + &(buffer[n + (n + m) * 2 * (n + m) * 2 + (n + m) * 2 + n + m + n * n]); + low_new = &(buffer[n + (n + m) * 2 * (n + m) * 2 + (n + m) * 2 + n + m + + n * n + (n + m) * 2]); + up_new = &(buffer[n + (n + m) * 2 * (n + m) * 2 + (n + m) * 2 + n + m + + n * n + (n + m) * 2 + n]); + start = &(buffer[n + (n + m) * 2 * (n + m) * 2 + (n + m) * 2 + n + m + n * n + + (n + m) * 2 + n + n]); + g_new = &(buffer[n + (n + m) * 2 * (n + m) * 2 + (n + m) * 2 + n + m + n * n + + (n + m) * 2 + n + n + n]); + temp = &(buffer[n + (n + m) * 2 * (n + m) * 2 + (n + m) * 2 + n + m + n * n + + (n + m) * 2 + n + n + n + n * n]); + + b1 = -1; + b2 = -1; + for (i = 0; i < n; i++) { /* get variables with steepest feasible descent */ + sum = g0[i]; + for (j = 0; j < n; j++) + sum += init[j] * g[i * n + j]; + sum = sum * ce[i]; + if (((b1 == -1) || (sum < bmin)) && + (!((init[i] <= (low[i] + epsilon_a)) && (ce[i] < 0.0))) && + (!((init[i] >= (up[i] - epsilon_a)) && (ce[i] > 0.0)))) { + bmin = sum; + b1 = i; + } + if (((b2 == -1) || (sum >= bmax)) && + (!((init[i] <= (low[i] + epsilon_a)) && (ce[i] > 0.0))) && + (!((init[i] >= (up[i] - epsilon_a)) && (ce[i] < 0.0)))) { + bmax = sum; + b2 = i; + } + } + /* in case of unbiased hyperplane, the previous projection on */ + /* equality constraint can lead to b1 or b2 being -1. */ + if ((b1 == -1) || (b2 == -1)) { + b1 = maxl(b1, b2); + b2 = maxl(b1, b2); + } + + for (i = 0; i < n; i++) { + start[i] = init[i]; + } + + /* in case both example vectors are linearly dependent */ + /* WARNING: Assumes that ce[] in {-1,1} */ + add = 0; + changed = 0; + if ((b1 != b2) && (m == 1)) { + for (i = 0; i < n; i++) { /* fix other vectors */ + if (i == b1) + g0_b1 = g0[i]; + if (i == b2) + g0_b2 = g0[i]; + } + ce0_b = ce0[0]; + for (i = 0; i < n; i++) { + if ((i != b1) && (i != b2)) { + for (j = 0; j < n; j++) { + if (j == b1) + g0_b1 += start[i] * g[i * n + j]; + if (j == b2) + g0_b2 += start[i] * g[i * n + j]; + } + ce0_b -= (start[i] * ce[i]); + } + } + if ((g[b1 * n + b2] == g[b1 * n + b1]) && + (g[b1 * n + b2] == g[b2 * n + b2])) { + /* printf("euqal\n"); */ + if (ce[b1] == ce[b2]) { + if (g0_b1 <= g0_b2) { /* set b1 to upper bound */ + /* printf("case +=<\n"); */ + changed = 1; + t = up[b1] - init[b1]; + if ((init[b2] - low[b2]) < t) { + t = init[b2] - low[b2]; + } + start[b1] = init[b1] + t; + start[b2] = init[b2] - t; + } else if (g0_b1 > g0_b2) { /* set b2 to upper bound */ + /* printf("case +=>\n"); */ + changed = 1; + t = up[b2] - init[b2]; + if ((init[b1] - low[b1]) < t) { + t = init[b1] - low[b1]; + } + start[b1] = init[b1] - t; + start[b2] = init[b2] + t; + } + } else if (((g[b1 * n + b1] > 0) || + (g[b2 * n + b2] > 0))) { /* (ce[b1] != ce[b2]) */ + /* printf("case +!\n"); */ + t = ((ce[b2] / ce[b1]) * g0[b1] - g0[b2] + + ce0[0] * + (g[b1 * n + b1] * ce[b2] / ce[b1] - g[b1 * n + b2] / ce[b1])) / + ((ce[b2] * ce[b2] / (ce[b1] * ce[b1])) * g[b1 * n + b1] + + g[b2 * n + b2] - 2 * (g[b1 * n + b2] * ce[b2] / ce[b1])) - + init[b2]; + changed = 1; + if ((up[b2] - init[b2]) < t) { + t = up[b2] - init[b2]; + } + if ((init[b2] - low[b2]) < -t) { + t = -(init[b2] - low[b2]); + } + if ((up[b1] - init[b1]) < t) { + t = (up[b1] - init[b1]); + } + if ((init[b1] - low[b1]) < -t) { + t = -(init[b1] - low[b1]); + } + start[b1] = init[b1] + t; + start[b2] = init[b2] + t; + } + } + if ((-g[b1 * n + b2] == g[b1 * n + b1]) && + (-g[b1 * n + b2] == g[b2 * n + b2])) { + /* printf("diffeuqal\n"); */ + if (ce[b1] != ce[b2]) { + if ((g0_b1 + g0_b2) < 0) { /* set b1 and b2 to upper bound */ + /* printf("case -!<\n"); */ + changed = 1; + t = up[b1] - init[b1]; + if ((up[b2] - init[b2]) < t) { + t = up[b2] - init[b2]; + } + start[b1] = init[b1] + t; + start[b2] = init[b2] + t; + } else if ((g0_b1 + g0_b2) >= 0) { /* set b1 and b2 to lower bound */ + /* printf("case -!>\n"); */ + changed = 1; + t = init[b1] - low[b1]; + if ((init[b2] - low[b2]) < t) { + t = init[b2] - low[b2]; + } + start[b1] = init[b1] - t; + start[b2] = init[b2] - t; + } + } else if (((g[b1 * n + b1] > 0) || + (g[b2 * n + b2] > 0))) { /* (ce[b1]==ce[b2]) */ + /* printf("case -=\n"); */ + t = ((ce[b2] / ce[b1]) * g0[b1] - g0[b2] + + ce0[0] * + (g[b1 * n + b1] * ce[b2] / ce[b1] - g[b1 * n + b2] / ce[b1])) / + ((ce[b2] * ce[b2] / (ce[b1] * ce[b1])) * g[b1 * n + b1] + + g[b2 * n + b2] - 2 * (g[b1 * n + b2] * ce[b2] / ce[b1])) - + init[b2]; + changed = 1; + if ((up[b2] - init[b2]) < t) { + t = up[b2] - init[b2]; + } + if ((init[b2] - low[b2]) < -t) { + t = -(init[b2] - low[b2]); + } + if ((up[b1] - init[b1]) < -t) { + t = -(up[b1] - init[b1]); + } + if ((init[b1] - low[b1]) < t) { + t = init[b1] - low[b1]; + } + start[b1] = init[b1] - t; + start[b2] = init[b2] + t; + } + } + } + /* if we have a biased hyperplane, then adding a constant to the */ + /* hessian does not change the solution. So that is done for examples */ + /* with zero diagonal entry, since HIDEO cannot handle them. */ + if ((m > 0) && ((fabs(g[b1 * n + b1]) < lindep_sensitivity) || + (fabs(g[b2 * n + b2]) < lindep_sensitivity))) { + /* printf("Case 0\n"); */ + add += 0.093274; + } + /* in case both examples are linear dependent */ + else if ((m > 0) && (g[b1 * n + b2] != 0 && g[b2 * n + b2] != 0) && + (fabs(g[b1 * n + b1] / g[b1 * n + b2] - + g[b1 * n + b2] / g[b2 * n + b2]) < lindep_sensitivity)) { + /* printf("Case lindep\n"); */ + add += 0.078274; + } + + /* special case for zero diagonal entry on unbiased hyperplane */ + if ((m == 0) && (b1 >= 0)) { + if (fabs(g[b1 * n + b1]) < lindep_sensitivity) { + /* printf("Case 0b1\n"); */ + for (i = 0; i < n; i++) { /* fix other vectors */ + if (i == b1) + g0_b1 = g0[i]; + } + for (i = 0; i < n; i++) { + if (i != b1) { + for (j = 0; j < n; j++) { + if (j == b1) + g0_b1 += start[i] * g[i * n + j]; + } + } + } + if (g0_b1 < 0) + start[b1] = up[b1]; + if (g0_b1 >= 0) + start[b1] = low[b1]; + } + } + if ((m == 0) && (b2 >= 0)) { + if (fabs(g[b2 * n + b2]) < lindep_sensitivity) { + /* printf("Case 0b2\n"); */ + for (i = 0; i < n; i++) { /* fix other vectors */ + if (i == b2) + g0_b2 = g0[i]; + } + for (i = 0; i < n; i++) { + if (i != b2) { + for (j = 0; j < n; j++) { + if (j == b2) + g0_b2 += start[i] * g[i * n + j]; + } + } + } + if (g0_b2 < 0) + start[b2] = up[b2]; + if (g0_b2 >= 0) + start[b2] = low[b2]; + } + } + + /* printf("b1=%ld,b2=%ld\n",b1,b2); */ + + lcopy_matrix(g, n, d); + if ((m == 1) && (add > 0.0)) { + for (j = 0; j < n; j++) { + for (k = 0; k < n; k++) { + d[j * n + k] += add * ce[j] * ce[k]; + } + } + } else { + add = 0.0; + } + + if (n > 2) { /* switch, so that variables are better mixed */ + lswitchrk_matrix(d, n, b1, (long)0); + if (b2 == 0) + lswitchrk_matrix(d, n, b1, (long)1); + else + lswitchrk_matrix(d, n, b2, (long)1); + } + if (smallround == SMALLROUND) { + for (i = 2; i < n; i++) { + lin_dependent[i] = 1; + } + if (m > 0) { /* for biased hyperplane, pick two variables */ + lin_dependent[0] = 0; + lin_dependent[1] = 0; + } else { /* for unbiased hyperplane, pick only one variable */ + lin_dependent[0] = smallroundcount % 2; + lin_dependent[1] = (smallroundcount + 1) % 2; + } + } else { + for (i = 0; i < n; i++) { + lin_dependent[i] = 0; + } + } + linvert_matrix(d, n, ig, lindep_sensitivity, lin_dependent); + if (n > 2) { /* now switch back */ + if (b2 == 0) { + lswitchrk_matrix(ig, n, b1, (long)1); + i = lin_dependent[1]; + lin_dependent[1] = lin_dependent[b1]; + lin_dependent[b1] = i; + } else { + lswitchrk_matrix(ig, n, b2, (long)1); + i = lin_dependent[1]; + lin_dependent[1] = lin_dependent[b2]; + lin_dependent[b2] = i; + } + lswitchrk_matrix(ig, n, b1, (long)0); + i = lin_dependent[0]; + lin_dependent[0] = lin_dependent[b1]; + lin_dependent[b1] = i; + } + /* lprint_matrix(d,n); */ + /* lprint_matrix(ig,n); */ + + lcopy_matrix(g, n, g_new); /* restore g_new matrix */ + if (add > 0) + for (j = 0; j < n; j++) { + for (k = 0; k < n; k++) { + g_new[j * n + k] += add * ce[j] * ce[k]; + } + } + + for (i = 0; i < n; i++) { /* fix linear dependent vectors */ + g0_new[i] = g0[i] + add * ce0[0] * ce[i]; + } + if (m > 0) + ce0_new[0] = -ce0[0]; + for (i = 0; i < n; i++) { /* fix linear dependent vectors */ + if (lin_dependent[i]) { + for (j = 0; j < n; j++) { + if (!lin_dependent[j]) { + g0_new[j] += start[i] * g_new[i * n + j]; + } + } + if (m > 0) + ce0_new[0] -= (start[i] * ce[i]); + } + } + from = 0; /* remove linear dependent vectors */ + to = 0; + n_indep = 0; + for (i = 0; i < n; i++) { + if (!lin_dependent[i]) { + g0_new[n_indep] = g0_new[i]; + ce_new[n_indep] = ce[i]; + low_new[n_indep] = low[i]; + up_new[n_indep] = up[i]; + primal[n_indep] = start[i]; + n_indep++; + } + for (j = 0; j < n; j++) { + if ((!lin_dependent[i]) && (!lin_dependent[j])) { + ig[to] = ig[from]; + g_new[to] = g_new[from]; + to++; + } + from++; + } + } + + if (verbosity >= 3) { + printf("real_qp_size(%ld)...", n_indep); + } + + /* cannot optimize with only one variable */ + if ((n_indep <= 1) && (m > 0) && (!changed)) { + for (i = n - 1; i >= 0; i--) { + primal[i] = init[i]; + } + return ((int)ONLY_ONE_VARIABLE); + } + + if ((!changed) || (n_indep > 1)) { + result = solve_dual(n_indep, m, precision, epsilon_crit, maxiter, g_new, + g0_new, ce_new, ce0_new, low_new, up_new, primal, d, d0, + ig, dual, dual_old, temp, goal); + } else { + result = PRIMAL_OPTIMAL; + } + + j = n_indep; + for (i = n - 1; i >= 0; i--) { + if (!lin_dependent[i]) { + j--; + primal[i] = primal[j]; + } else { + primal[i] = start[i]; /* leave as is */ + } + temp[i] = primal[i]; + } + + obj_before = calculate_qp_objective(n, g, g0, init); + obj_after = calculate_qp_objective(n, g, g0, primal); + (*progress) = obj_before - obj_after; + if (verbosity >= 3) { + printf("before(%.30f)...after(%.30f)...result_sd(%d)...", obj_before, + obj_after, result); + } + + return ((int)result); +} + +int solve_dual(n, m, precision, epsilon_crit, maxiter, g, g0, ce, ce0, low, up, + primal, d, d0, ig, dual, dual_old, temp, goal) +/* Solves the dual using the method of Hildreth and D'Espo. */ +/* Can only handle problems with zero or exactly one */ +/* equality constraints. */ + +long n; /* number of variables */ +long m; /* number of linear equality constraints */ +double precision; /* solve at least to this dual precision */ +double epsilon_crit; /* stop, if KT-Conditions approx fulfilled */ +long maxiter; /* stop after that many iterations */ +double *g; +double *g0; /* linear part of objective */ +double *ce, *ce0; /* linear equality constraints */ +double *low, *up; /* box constraints */ +double *primal; /* variables (with initial values) */ +double *d, *d0, *ig, *dual, *dual_old, *temp; /* buffer */ +long goal; +{ + long i, j, k, iter; + double sum, w, maxviol, viol, temp1, temp2, isnantest; + double model_b, dist; + long retrain, maxfaktor, primal_optimal = 0, at_bound, scalemaxiter; + double epsilon_a = 1E-15, epsilon_hideo; + double eq; + + if ((m < 0) || (m > 1)) + perror("SOLVE DUAL: inappropriate number of eq-constrains!"); + + /* + printf("\n"); + for(i=0;i 0) { + sum = 0; /* dual hessian for eq constraints */ + for (j = 0; j < n; j++) { + sum += (ce[j] * ig[i * n + j]); + } + d[i * 2 * (n + m) + 2 * n] = sum; + d[i * 2 * (n + m) + 2 * n + 1] = -sum; + d[(n + i) * 2 * (n + m) + 2 * n] = -sum; + d[(n + i) * 2 * (n + m) + 2 * n + 1] = sum; + d[(n + n) * 2 * (n + m) + i] = sum; + d[(n + n + 1) * 2 * (n + m) + i] = -sum; + d[(n + n) * 2 * (n + m) + (n + i)] = -sum; + d[(n + n + 1) * 2 * (n + m) + (n + i)] = sum; + + sum = 0; + for (j = 0; j < n; j++) { + for (k = 0; k < n; k++) { + sum += (ce[k] * ce[j] * ig[j * n + k]); + } + } + d[(n + n) * 2 * (n + m) + 2 * n] = sum; + d[(n + n) * 2 * (n + m) + 2 * n + 1] = -sum; + d[(n + n + 1) * 2 * (n + m) + 2 * n] = -sum; + d[(n + n + 1) * 2 * (n + m) + 2 * n + 1] = sum; + } + } + + for (i = 0; i < n; i++) { /* dual linear component for the box constraints */ + w = 0; + for (j = 0; j < n; j++) { + w += (ig[i * n + j] * g0[j]); + } + d0[i] = up[i] + w; + d0[i + n] = -low[i] - w; + } + + if (m > 0) { + sum = 0; /* dual linear component for eq constraints */ + for (j = 0; j < n; j++) { + for (k = 0; k < n; k++) { + sum += (ce[k] * ig[k * n + j] * g0[j]); + } + } + d0[2 * n] = ce0[0] + sum; + d0[2 * n + 1] = -ce0[0] - sum; + } + + maxviol = 999999; + iter = 0; + retrain = 1; + maxfaktor = 1; + scalemaxiter = maxiter / 5; + while ((retrain) && (maxviol > 0) && (iter < (scalemaxiter * maxfaktor))) { + iter++; + + while ((maxviol > precision) && (iter < (scalemaxiter * maxfaktor))) { + iter++; + maxviol = 0; + for (i = 0; i < 2 * (n + m); i++) { + sum = d0[i]; + for (j = 0; j < 2 * (n + m); j++) { + sum += d[i * 2 * (n + m) + j] * dual_old[j]; + } + sum -= d[i * 2 * (n + m) + i] * dual_old[i]; + dual[i] = -sum / d[i * 2 * (n + m) + i]; + if (dual[i] < 0) + dual[i] = 0; + + viol = fabs(dual[i] - dual_old[i]); + if (viol > maxviol) + maxviol = viol; + dual_old[i] = dual[i]; + } + /* + printf("%d) maxviol=%20f precision=%f\n",iter,maxviol,precision); + */ + } + + if (m > 0) { + for (i = 0; i < n; i++) { + temp[i] = dual[i] - dual[i + n] + + ce[i] * (dual[n + n] - dual[n + n + 1]) + g0[i]; + } + } else { + for (i = 0; i < n; i++) { + temp[i] = dual[i] - dual[i + n] + g0[i]; + } + } + for (i = 0; i < n; i++) { + primal[i] = 0; /* calc value of primal variables */ + for (j = 0; j < n; j++) { + primal[i] += ig[i * n + j] * temp[j]; + } + primal[i] *= -1.0; + if (primal[i] <= (low[i])) { /* clip conservatively */ + primal[i] = low[i]; + } else if (primal[i] >= (up[i])) { + primal[i] = up[i]; + } + } + + if (m > 0) + model_b = dual[n + n + 1] - dual[n + n]; + else + model_b = 0; + + epsilon_hideo = EPSILON_HIDEO; + for (i = 0; i < n; i++) { /* check precision of alphas */ + dist = -model_b * ce[i]; + dist += (g0[i] + 1.0); + for (j = 0; j < i; j++) { + dist += (primal[j] * g[j * n + i]); + } + for (j = i; j < n; j++) { + dist += (primal[j] * g[i * n + j]); + } + if ((primal[i] < (up[i] - epsilon_hideo)) && + (dist < (1.0 - epsilon_crit))) { + epsilon_hideo = (up[i] - primal[i]) * 2.0; + } else if ((primal[i] > (low[i] + epsilon_hideo)) && + (dist > (1.0 + epsilon_crit))) { + epsilon_hideo = (primal[i] - low[i]) * 2.0; + } + } + /* printf("\nEPSILON_HIDEO=%.30f\n",epsilon_hideo); */ + + for (i = 0; i < n; i++) { /* clip alphas to bounds */ + if (primal[i] <= (low[i] + epsilon_hideo)) { + primal[i] = low[i]; + } else if (primal[i] >= (up[i] - epsilon_hideo)) { + primal[i] = up[i]; + } + } + + retrain = 0; + primal_optimal = 1; + at_bound = 0; + for (i = 0; (i < n); i++) { /* check primal KT-Conditions */ + dist = -model_b * ce[i]; + dist += (g0[i] + 1.0); + for (j = 0; j < i; j++) { + dist += (primal[j] * g[j * n + i]); + } + for (j = i; j < n; j++) { + dist += (primal[j] * g[i * n + j]); + } + if ((primal[i] < (up[i] - epsilon_a)) && (dist < (1.0 - epsilon_crit))) { + retrain = 1; + primal_optimal = 0; + } else if ((primal[i] > (low[i] + epsilon_a)) && + (dist > (1.0 + epsilon_crit))) { + retrain = 1; + primal_optimal = 0; + } + if ((primal[i] <= (low[i] + epsilon_a)) || + (primal[i] >= (up[i] - epsilon_a))) { + at_bound++; + } + /* printf("HIDEOtemp: a[%ld]=%.30f, dist=%.6f, b=%f, + * at_bound=%ld\n",i,primal[i],dist,model_b,at_bound); */ + } + if (m > 0) { + eq = -ce0[0]; /* check precision of eq-constraint */ + for (i = 0; i < n; i++) { + eq += (ce[i] * primal[i]); + } + if ((EPSILON_EQ < fabs(eq)) + /* + && !((goal==PRIMAL_OPTIMAL) + && (at_bound==n)) */ + ) { + retrain = 1; + primal_optimal = 0; + } + /* printf("\n eq=%.30f ce0=%f at-bound=%ld\n",eq,ce0[0],at_bound); */ + } + + if (retrain) { + precision /= 10; + if (((goal == PRIMAL_OPTIMAL) && (maxfaktor < 50000)) || + (maxfaktor < 5)) { + maxfaktor++; + } + } + } + + if (!primal_optimal) { + for (i = 0; i < n; i++) { + primal[i] = 0; /* calc value of primal variables */ + for (j = 0; j < n; j++) { + primal[i] += ig[i * n + j] * temp[j]; + } + primal[i] *= -1.0; + if (primal[i] <= (low[i] + epsilon_a)) { /* clip conservatively */ + primal[i] = low[i]; + } else if (primal[i] >= (up[i] - epsilon_a)) { + primal[i] = up[i]; + } + } + } + + isnantest = 0; + for (i = 0; i < n; i++) { /* check for isnan */ + isnantest += primal[i]; + } + + if (m > 0) { + temp1 = dual[n + n + 1]; /* copy the dual variables for the eq */ + temp2 = dual[n + n]; /* constraints to a handier location */ + for (i = n + n + 1; i >= 2; i--) { + dual[i] = dual[i - 2]; + } + dual[0] = temp2; + dual[1] = temp1; + isnantest += temp1 + temp2; + } + + if (isnan(isnantest)) { + return ((int)NAN_SOLUTION); + } else if (primal_optimal) { + return ((int)PRIMAL_OPTIMAL); + } else if (maxviol == 0.0) { + return ((int)DUAL_OPTIMAL); + } else { + return ((int)MAXITER_EXCEEDED); + } +} + +void linvert_matrix(matrix, depth, inverse, lindep_sensitivity, + lin_dependent) double *matrix; +long depth; +double *inverse, lindep_sensitivity; +long *lin_dependent; /* indicates the active parts of matrix on + input and output*/ +{ + long i, j, k; + double factor; + + for (i = 0; i < depth; i++) { + /* lin_dependent[i]=0; */ + for (j = 0; j < depth; j++) { + inverse[i * depth + j] = 0.0; + } + inverse[i * depth + i] = 1.0; + } + for (i = 0; i < depth; i++) { + if (lin_dependent[i] || + (fabs(matrix[i * depth + i]) < lindep_sensitivity)) { + lin_dependent[i] = 1; + } else { + for (j = i + 1; j < depth; j++) { + factor = matrix[j * depth + i] / matrix[i * depth + i]; + for (k = i; k < depth; k++) { + matrix[j * depth + k] -= (factor * matrix[i * depth + k]); + } + for (k = 0; k < depth; k++) { + inverse[j * depth + k] -= (factor * inverse[i * depth + k]); + } + } + } + } + for (i = depth - 1; i >= 0; i--) { + if (!lin_dependent[i]) { + factor = 1 / matrix[i * depth + i]; + for (k = 0; k < depth; k++) { + inverse[i * depth + k] *= factor; + } + matrix[i * depth + i] = 1; + for (j = i - 1; j >= 0; j--) { + factor = matrix[j * depth + i]; + matrix[j * depth + i] = 0; + for (k = 0; k < depth; k++) { + inverse[j * depth + k] -= (factor * inverse[i * depth + k]); + } + } + } + } +} + +void lprint_matrix(matrix, depth) double *matrix; +long depth; +{ + long i, j; + for (i = 0; i < depth; i++) { + for (j = 0; j < depth; j++) { + printf("%5.2f ", (double)(matrix[i * depth + j])); + } + printf("\n"); + } + printf("\n"); +} + +void ladd_matrix(matrix, depth, scalar) double *matrix; +long depth; +double scalar; +{ + long i, j; + for (i = 0; i < depth; i++) { + for (j = 0; j < depth; j++) { + matrix[i * depth + j] += scalar; + } + } +} + +void lcopy_matrix(matrix, depth, matrix2) double *matrix; +long depth; +double *matrix2; +{ + long i; + + for (i = 0; i < (depth) * (depth); i++) { + matrix2[i] = matrix[i]; + } +} + +void lswitch_rows_matrix(matrix, depth, r1, r2) double *matrix; +long depth, r1, r2; +{ + long i; + double temp; + + for (i = 0; i < depth; i++) { + temp = matrix[r1 * depth + i]; + matrix[r1 * depth + i] = matrix[r2 * depth + i]; + matrix[r2 * depth + i] = temp; + } +} + +void lswitchrk_matrix(matrix, depth, rk1, rk2) double *matrix; +long depth, rk1, rk2; +{ + long i; + double temp; + + for (i = 0; i < depth; i++) { + temp = matrix[rk1 * depth + i]; + matrix[rk1 * depth + i] = matrix[rk2 * depth + i]; + matrix[rk2 * depth + i] = temp; + } + for (i = 0; i < depth; i++) { + temp = matrix[i * depth + rk1]; + matrix[i * depth + rk1] = matrix[i * depth + rk2]; + matrix[i * depth + rk2] = temp; + } +} + +double calculate_qp_objective(opt_n, opt_g, opt_g0, alpha) +long opt_n; +double *opt_g, *opt_g0, *alpha; +{ + double obj; + long i, j; + obj = 0; /* calculate objective */ + for (i = 0; i < opt_n; i++) { + obj += (opt_g0[i] * alpha[i]); + obj += (0.5 * alpha[i] * alpha[i] * opt_g[i * opt_n + i]); + for (j = 0; j < i; j++) { + obj += (alpha[j] * alpha[i] * opt_g[j * opt_n + i]); + } + } + return (obj); +} diff --git a/src/classifier/svm/svm_light/svm_learn.c b/src/classifier/svm/svm_light/svm_learn.c new file mode 100644 index 0000000..5bf8756 --- /dev/null +++ b/src/classifier/svm/svm_light/svm_learn.c @@ -0,0 +1,4216 @@ +/***********************************************************************/ +/* */ +/* svm_learn.c */ +/* */ +/* Learning module of Support Vector Machine. */ +/* */ +/* Author: Thorsten Joachims */ +/* Date: 31.10.05 */ +/* */ +/* Copyright (c) 2005 Thorsten Joachims - All rights reserved */ +/* */ +/* This software is available for non-commercial use only. It must */ +/* not be modified and distributed without prior permission of the */ +/* author. The author is not responsible for implications from the */ +/* use of this software. */ +/* */ +/***********************************************************************/ + + +# include "svm_common.h" +# include "svm_learn.h" + +#define MAX(x,y) ((x) < (y) ? (y) : (x)) +#define MIN(x,y) ((x) > (y) ? (y) : (x)) +#define SIGN(x) ((x) > (0) ? (1) : (((x) < (0) ? (-1) : (0)))) + +/* interface to QP-solver */ +double *optimize_qp(QP *, double *, long, double *, LEARN_PARM *); + +/*---------------------------------------------------------------------------*/ + +/* Learns an SVM classification model based on the training data in + docs/label. The resulting model is returned in the structure + model. */ + +void svm_learn_classification(DOC **docs, double *class, long int + totdoc, long int totwords, + LEARN_PARM *learn_parm, + KERNEL_PARM *kernel_parm, + KERNEL_CACHE *kernel_cache, + MODEL *model, + double *alpha) + /* docs: Training vectors (x-part) */ + /* class: Training labels (y-part, zero if test example for + transduction) */ + /* totdoc: Number of examples in docs/label */ + /* totwords: Number of features (i.e. highest feature index) */ + /* learn_parm: Learning paramenters */ + /* kernel_parm: Kernel paramenters */ + /* kernel_cache:Initialized Cache of size totdoc, if using a kernel. + NULL if linear.*/ + /* model: Returns learning result (assumed empty before called) */ + /* alpha: Start values for the alpha variables or NULL + pointer. The new alpha values are returned after + optimization if not NULL. Array must be of size totdoc. */ +{ + long *inconsistent,i,*label; + long inconsistentnum; + long misclassified,upsupvecnum; + double loss,model_length,example_length; + double dualitygap,xisum,alphasum,xi; + double maxdiff,*lin,*a,*c; + double runtime_start,runtime_end; + long iterations; + long *unlabeled,transduction; + long heldout; + long loo_count=0,loo_count_pos=0,loo_count_neg=0,trainpos=0,trainneg=0; + long loocomputed=0; + double runtime_start_loo=0,runtime_start_xa=0; + double heldout_c=0,r_delta_sq=0,r_delta,r_delta_avg; + long *index,*index2dnum; + double *weights; + CFLOAT *aicache; /* buffer to keep one row of hessian */ + + double *xi_fullset; /* buffer for storing xi on full sample in loo */ + double *a_fullset; /* buffer for storing alpha on full sample in loo */ + TIMING timing_profile; + SHRINK_STATE shrink_state; + + runtime_start=get_runtime(); + timing_profile.time_kernel=0; + timing_profile.time_opti=0; + timing_profile.time_shrink=0; + timing_profile.time_update=0; + timing_profile.time_model=0; + timing_profile.time_check=0; + timing_profile.time_select=0; + kernel_cache_statistic=0; + + learn_parm->totwords=totwords; + + /* make sure -n value is reasonable */ + if((learn_parm->svm_newvarsinqp < 2) + || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { + learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; + } + + init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK); + + label = (long *)my_malloc(sizeof(long)*totdoc); + inconsistent = (long *)my_malloc(sizeof(long)*totdoc); + unlabeled = (long *)my_malloc(sizeof(long)*totdoc); + c = (double *)my_malloc(sizeof(double)*totdoc); + a = (double *)my_malloc(sizeof(double)*totdoc); + a_fullset = (double *)my_malloc(sizeof(double)*totdoc); + xi_fullset = (double *)my_malloc(sizeof(double)*totdoc); + lin = (double *)my_malloc(sizeof(double)*totdoc); + learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc); + model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); + model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); + model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); + + model->at_upper_bound=0; + model->b=0; + model->supvec[0]=0; /* element 0 reserved and empty for now */ + model->alpha[0]=0; + model->lin_weights=NULL; + model->totwords=totwords; + model->totdoc=totdoc; + model->kernel_parm=(*kernel_parm); + model->sv_num=1; + model->loo_error=-1; + model->loo_recall=-1; + model->loo_precision=-1; + model->xa_error=-1; + model->xa_recall=-1; + model->xa_precision=-1; + inconsistentnum=0; + transduction=0; + + r_delta=estimate_r_delta(docs,totdoc,kernel_parm); + r_delta_sq=r_delta*r_delta; + + r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm); + if(learn_parm->svm_c == 0.0) { /* default value for C */ + learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg); + if(verbosity>=1) + printf("Setting default regularization parameter C=%.4f\n", + learn_parm->svm_c); + } + + learn_parm->eps=-1.0; /* equivalent regression epsilon for + classification */ + + for(i=0;idocnum=i; + inconsistent[i]=0; + a[i]=0; + lin[i]=0; + c[i]=0.0; + unlabeled[i]=0; + if(class[i] == 0) { + unlabeled[i]=1; + label[i]=0; + transduction=1; + } + if(class[i] > 0) { + learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio* + docs[i]->costfactor; + label[i]=1; + trainpos++; + } + else if(class[i] < 0) { + learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor; + label[i]=-1; + trainneg++; + } + else { + learn_parm->svm_cost[i]=0; + } + } + if(verbosity>=2) { + printf("%ld positive, %ld negative, and %ld unlabeled examples.\n",trainpos,trainneg,totdoc-trainpos-trainneg); fflush(stdout); + } + + /* caching makes no sense for linear kernel */ + if(kernel_parm->kernel_type == LINEAR) { + /* kernel_cache = NULL; */ + } + + /* compute starting state for initial alpha values */ + if(alpha) { + if(verbosity>=1) { + printf("Computing starting state..."); fflush(stdout); + } + index = (long *)my_malloc(sizeof(long)*totdoc); + index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); + weights=(double *)my_malloc(sizeof(double)*(totwords+1)); + aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); + for(i=0;ilearn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i]; + } + if(kernel_cache && (kernel_parm->kernel_type != LINEAR)) { + for(i=0;i0) && (alpha[i]svm_cost[i]) + && (kernel_cache_space_available(kernel_cache))) + cache_kernel_row(kernel_cache,docs,i,kernel_parm); + for(i=0;isvm_cost[i]) + && (kernel_cache_space_available(kernel_cache))) + cache_kernel_row(kernel_cache,docs,i,kernel_parm); + } + clear_nvector(weights,totwords); /* set weights to zero */ + (void)compute_index(index,totdoc,index2dnum); + update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc, + totwords,kernel_parm,kernel_cache,lin,aicache, + weights); + (void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c, + learn_parm,index2dnum,index2dnum,model); + for(i=0;i=1) { + printf("done.\n"); fflush(stdout); + } + } + + if(transduction) { + learn_parm->svm_iter_to_shrink=99999999; + if(verbosity >= 1) + printf("\nDeactivating Shrinking due to an incompatibility with the transductive \nlearner in the current version.\n\n"); + } + + if(transduction && learn_parm->compute_loo) { + learn_parm->compute_loo=0; + if(verbosity >= 1) + printf("\nCannot compute leave-one-out estimates for transductive learner.\n\n"); + } + + if(learn_parm->remove_inconsistent && learn_parm->compute_loo) { + learn_parm->compute_loo=0; + printf("\nCannot compute leave-one-out estimates when removing inconsistent examples.\n\n"); + } + + if(learn_parm->compute_loo && ((trainpos == 1) || (trainneg == 1))) { + learn_parm->compute_loo=0; + printf("\nCannot compute leave-one-out with only one example in one class.\n\n"); + } + + + if(verbosity==1) { + printf("Optimizing"); fflush(stdout); + } + + /* train the svm */ + iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, + kernel_parm,kernel_cache,&shrink_state,model, + inconsistent,unlabeled,a,lin, + c,&timing_profile, + &maxdiff,(long)-1, + (long)1); + + if(verbosity>=1) { + if(verbosity==1) printf("done. (%ld iterations)\n",iterations); + + misclassified=0; + for(i=0;(ib)*(double)label[i] <= 0.0) + misclassified++; + } + + printf("Optimization finished (%ld misclassified, maxdiff=%.5f).\n", + misclassified,maxdiff); + + runtime_end=get_runtime(); + if(verbosity>=2) { + printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n", + (runtime_end-runtime_start)/100.0, + (100.0*timing_profile.time_kernel)/(runtime_end-runtime_start), + (100.0*timing_profile.time_opti)/(runtime_end-runtime_start), + (100.0*timing_profile.time_shrink)/(runtime_end-runtime_start), + (100.0*timing_profile.time_update)/(runtime_end-runtime_start), + (100.0*timing_profile.time_model)/(runtime_end-runtime_start), + (100.0*timing_profile.time_check)/(runtime_end-runtime_start), + (100.0*timing_profile.time_select)/(runtime_end-runtime_start)); + } + else { + printf("Runtime in cpu-seconds: %.2f\n", + (runtime_end-runtime_start)/100.0); + } + + if(learn_parm->remove_inconsistent) { + inconsistentnum=0; + for(i=0;isv_num-1,inconsistentnum); + } + else { + upsupvecnum=0; + for(i=1;isv_num;i++) { + if(fabs(model->alpha[i]) >= + (learn_parm->svm_cost[(model->supvec[i])->docnum]- + learn_parm->epsilon_a)) + upsupvecnum++; + } + printf("Number of SV: %ld (including %ld at upper bound)\n", + model->sv_num-1,upsupvecnum); + } + + if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { + loss=0; + xisum=0; + alphasum=0; + model_length=0; + for(i=0;ib)*(double)label[i]); + if(xi > learn_parm->epsilon_crit) + loss+=xi; + xisum+=xi*learn_parm->svm_cost[i]; + alphasum+=a[i]; + model_length+=a[i]*label[i]*lin[i]; + } + model_length=sqrt(model_length); + dualitygap=(0.5*model_length*model_length+xisum) + -(alphasum-0.5*model_length*model_length); + fprintf(stdout,"Upper bound on duality gap: gap=%.5f\n",dualitygap); + fprintf(stdout,"Dual objective value: dval=%.5f\n", + alphasum-0.5*model_length*model_length); + fprintf(stdout,"L1 loss: loss=%.5f\n",loss); + fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length); + example_length=estimate_sphere(model); + fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n", + length_of_longest_document_vector(docs,totdoc,kernel_parm)); + fprintf(stdout,"Estimated VCdim of classifier: VCdim<=%.5f\n", + estimate_margin_vcdim(model,model_length,example_length)); + if((!learn_parm->remove_inconsistent) && (!transduction)) { + runtime_start_xa=get_runtime(); + if(verbosity>=1) { + printf("Computing XiAlpha-estimates..."); fflush(stdout); + } + compute_xa_estimates(model,label,unlabeled,totdoc,docs,lin,a, + kernel_parm,learn_parm,&(model->xa_error), + &(model->xa_recall),&(model->xa_precision)); + if(verbosity>=1) { + printf("done\n"); + } + printf("Runtime for XiAlpha-estimates in cpu-seconds: %.2f\n", + (get_runtime()-runtime_start_xa)/100.0); + + fprintf(stdout,"XiAlpha-estimate of the error: error<=%.2f%% (rho=%.2f,depth=%ld)\n", + model->xa_error,learn_parm->rho,learn_parm->xa_depth); + fprintf(stdout,"XiAlpha-estimate of the recall: recall=>%.2f%% (rho=%.2f,depth=%ld)\n", + model->xa_recall,learn_parm->rho,learn_parm->xa_depth); + fprintf(stdout,"XiAlpha-estimate of the precision: precision=>%.2f%% (rho=%.2f,depth=%ld)\n", + model->xa_precision,learn_parm->rho,learn_parm->xa_depth); + } + else if(!learn_parm->remove_inconsistent) { + estimate_transduction_quality(model,label,unlabeled,totdoc,docs,lin); + } + } + if(verbosity>=1) { + printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic); + } + } + + + /* leave-one-out testing starts now */ + if(learn_parm->compute_loo) { + /* save results of training on full dataset for leave-one-out */ + runtime_start_loo=get_runtime(); + for(i=0;ib)*(double)label[i]); + if(xi_fullset[i]<0) xi_fullset[i]=0; + a_fullset[i]=a[i]; + } + if(verbosity>=1) { + printf("Computing leave-one-out"); + } + + /* repeat this loop for every held-out example */ + for(heldout=0;(heldoutrho*a_fullset[heldout]*r_delta_sq+xi_fullset[heldout] + < 1.0) { + /* guaranteed to not produce a leave-one-out error */ + if(verbosity==1) { + printf("+"); fflush(stdout); + } + } + else if(xi_fullset[heldout] > 1.0) { + /* guaranteed to produce a leave-one-out error */ + loo_count++; + if(label[heldout] > 0) loo_count_pos++; else loo_count_neg++; + if(verbosity==1) { + printf("-"); fflush(stdout); + } + } + else { + loocomputed++; + heldout_c=learn_parm->svm_cost[heldout]; /* set upper bound to zero */ + learn_parm->svm_cost[heldout]=0; + /* make sure heldout example is not currently */ + /* shrunk away. Assumes that lin is up to date! */ + shrink_state.active[heldout]=1; + if(verbosity>=2) + printf("\nLeave-One-Out test on example %ld\n",heldout); + if(verbosity>=1) { + printf("(?[%ld]",heldout); fflush(stdout); + } + + optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, + kernel_parm, + kernel_cache,&shrink_state,model,inconsistent,unlabeled, + a,lin,c,&timing_profile, + &maxdiff,heldout,(long)2); + + /* printf("%.20f\n",(lin[heldout]-model->b)*(double)label[heldout]); */ + + if(((lin[heldout]-model->b)*(double)label[heldout]) <= 0.0) { + loo_count++; /* there was a loo-error */ + if(label[heldout] > 0) loo_count_pos++; else loo_count_neg++; + if(verbosity>=1) { + printf("-)"); fflush(stdout); + } + } + else { + if(verbosity>=1) { + printf("+)"); fflush(stdout); + } + } + /* now we need to restore the original data set*/ + learn_parm->svm_cost[heldout]=heldout_c; /* restore upper bound */ + } + } /* end of leave-one-out loop */ + + + if(verbosity>=1) { + printf("\nRetrain on full problem"); fflush(stdout); + } + optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, + kernel_parm, + kernel_cache,&shrink_state,model,inconsistent,unlabeled, + a,lin,c,&timing_profile, + &maxdiff,(long)-1,(long)1); + if(verbosity >= 1) + printf("done.\n"); + + + /* after all leave-one-out computed */ + model->loo_error=100.0*loo_count/(double)totdoc; + model->loo_recall=(1.0-(double)loo_count_pos/(double)trainpos)*100.0; + model->loo_precision=(trainpos-loo_count_pos)/ + (double)(trainpos-loo_count_pos+loo_count_neg)*100.0; + if(verbosity >= 1) { + fprintf(stdout,"Leave-one-out estimate of the error: error=%.2f%%\n", + model->loo_error); + fprintf(stdout,"Leave-one-out estimate of the recall: recall=%.2f%%\n", + model->loo_recall); + fprintf(stdout,"Leave-one-out estimate of the precision: precision=%.2f%%\n", + model->loo_precision); + fprintf(stdout,"Actual leave-one-outs computed: %ld (rho=%.2f)\n", + loocomputed,learn_parm->rho); + printf("Runtime for leave-one-out in cpu-seconds: %.2f\n", + (get_runtime()-runtime_start_loo)/100.0); + } + } + + if(learn_parm->alphafile[0]) + write_alphas(learn_parm->alphafile,a,label,totdoc); + + shrink_state_cleanup(&shrink_state); + free(label); + free(inconsistent); + free(unlabeled); + free(c); + free(a); + free(a_fullset); + free(xi_fullset); + free(lin); + free(learn_parm->svm_cost); +} + + +/* Learns an SVM regression model based on the training data in + docs/label. The resulting model is returned in the structure + model. */ + +void svm_learn_regression(DOC **docs, double *value, long int totdoc, + long int totwords, LEARN_PARM *learn_parm, + KERNEL_PARM *kernel_parm, + KERNEL_CACHE **kernel_cache, MODEL *model) + /* docs: Training vectors (x-part) */ + /* class: Training value (y-part) */ + /* totdoc: Number of examples in docs/label */ + /* totwords: Number of features (i.e. highest feature index) */ + /* learn_parm: Learning paramenters */ + /* kernel_parm: Kernel paramenters */ + /* kernel_cache:Initialized Cache, if using a kernel. NULL if + linear. Note that it will be free'd and reassigned */ + /* model: Returns learning result (assumed empty before called) */ +{ + long *inconsistent,i,j; + long inconsistentnum; + long upsupvecnum; + double loss,model_length,example_length; + double maxdiff,*lin,*a,*c; + double runtime_start,runtime_end; + long iterations,kernel_cache_size; + long *unlabeled; + double r_delta_sq=0,r_delta,r_delta_avg; + double *xi_fullset; /* buffer for storing xi on full sample in loo */ + double *a_fullset; /* buffer for storing alpha on full sample in loo */ + TIMING timing_profile; + SHRINK_STATE shrink_state; + DOC **docs_org; + long *label; + + /* set up regression problem in standard form */ + docs_org=docs; + docs = (DOC **)my_malloc(sizeof(DOC)*2*totdoc); + label = (long *)my_malloc(sizeof(long)*2*totdoc); + c = (double *)my_malloc(sizeof(double)*2*totdoc); + for(i=0;icostfactor,docs_org[i]->fvec); + label[i]=+1; + c[i]=value[i]; + docs[j]=create_example(j,0,0,docs_org[i]->costfactor,docs_org[i]->fvec); + label[j]=-1; + c[j]=value[i]; + } + totdoc*=2; + + /* need to get a bigger kernel cache */ + if(*kernel_cache) { + kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024); + kernel_cache_cleanup(*kernel_cache); + (*kernel_cache)=kernel_cache_init(totdoc,kernel_cache_size); + } + + runtime_start=get_runtime(); + timing_profile.time_kernel=0; + timing_profile.time_opti=0; + timing_profile.time_shrink=0; + timing_profile.time_update=0; + timing_profile.time_model=0; + timing_profile.time_check=0; + timing_profile.time_select=0; + kernel_cache_statistic=0; + + learn_parm->totwords=totwords; + + /* make sure -n value is reasonable */ + if((learn_parm->svm_newvarsinqp < 2) + || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { + learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; + } + + init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK); + + inconsistent = (long *)my_malloc(sizeof(long)*totdoc); + unlabeled = (long *)my_malloc(sizeof(long)*totdoc); + a = (double *)my_malloc(sizeof(double)*totdoc); + a_fullset = (double *)my_malloc(sizeof(double)*totdoc); + xi_fullset = (double *)my_malloc(sizeof(double)*totdoc); + lin = (double *)my_malloc(sizeof(double)*totdoc); + learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc); + model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); + model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); + model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); + + model->at_upper_bound=0; + model->b=0; + model->supvec[0]=0; /* element 0 reserved and empty for now */ + model->alpha[0]=0; + model->lin_weights=NULL; + model->totwords=totwords; + model->totdoc=totdoc; + model->kernel_parm=(*kernel_parm); + model->sv_num=1; + model->loo_error=-1; + model->loo_recall=-1; + model->loo_precision=-1; + model->xa_error=-1; + model->xa_recall=-1; + model->xa_precision=-1; + inconsistentnum=0; + + r_delta=estimate_r_delta(docs,totdoc,kernel_parm); + r_delta_sq=r_delta*r_delta; + + r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm); + if(learn_parm->svm_c == 0.0) { /* default value for C */ + learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg); + if(verbosity>=1) + printf("Setting default regularization parameter C=%.4f\n", + learn_parm->svm_c); + } + + for(i=0;i 0) { + learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio* + docs[i]->costfactor; + } + else if(label[i] < 0) { + learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor; + } + } + + /* caching makes no sense for linear kernel */ + if((kernel_parm->kernel_type == LINEAR) && (*kernel_cache)) { + printf("WARNING: Using a kernel cache for linear case will slow optimization down!\n"); + } + + if(verbosity==1) { + printf("Optimizing"); fflush(stdout); + } + + /* train the svm */ + iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, + kernel_parm,*kernel_cache,&shrink_state, + model,inconsistent,unlabeled,a,lin,c, + &timing_profile,&maxdiff,(long)-1, + (long)1); + + if(verbosity>=1) { + if(verbosity==1) printf("done. (%ld iterations)\n",iterations); + + printf("Optimization finished (maxdiff=%.5f).\n",maxdiff); + + runtime_end=get_runtime(); + if(verbosity>=2) { + printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n", + (runtime_end-runtime_start)/100.0, + (100.0*timing_profile.time_kernel)/(runtime_end-runtime_start), + (100.0*timing_profile.time_opti)/(runtime_end-runtime_start), + (100.0*timing_profile.time_shrink)/(runtime_end-runtime_start), + (100.0*timing_profile.time_update)/(runtime_end-runtime_start), + (100.0*timing_profile.time_model)/(runtime_end-runtime_start), + (100.0*timing_profile.time_check)/(runtime_end-runtime_start), + (100.0*timing_profile.time_select)/(runtime_end-runtime_start)); + } + else { + printf("Runtime in cpu-seconds: %.2f\n", + (runtime_end-runtime_start)/100.0); + } + + if(learn_parm->remove_inconsistent) { + inconsistentnum=0; + for(i=0;isv_num-1,inconsistentnum); + } + else { + upsupvecnum=0; + for(i=1;isv_num;i++) { + if(fabs(model->alpha[i]) >= + (learn_parm->svm_cost[(model->supvec[i])->docnum]- + learn_parm->epsilon_a)) + upsupvecnum++; + } + printf("Number of SV: %ld (including %ld at upper bound)\n", + model->sv_num-1,upsupvecnum); + } + + if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { + loss=0; + model_length=0; + for(i=0;ib)*(double)label[i] < (-learn_parm->eps+(double)label[i]*c[i])-learn_parm->epsilon_crit) + loss+=-learn_parm->eps+(double)label[i]*c[i]-(lin[i]-model->b)*(double)label[i]; + model_length+=a[i]*label[i]*lin[i]; + } + model_length=sqrt(model_length); + fprintf(stdout,"L1 loss: loss=%.5f\n",loss); + fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length); + example_length=estimate_sphere(model); + fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n", + length_of_longest_document_vector(docs,totdoc,kernel_parm)); + } + if(verbosity>=1) { + printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic); + } + } + + if(learn_parm->alphafile[0]) + write_alphas(learn_parm->alphafile,a,label,totdoc); + + /* this makes sure the model we return does not contain pointers to the + temporary documents */ + for(i=1;isv_num;i++) { + j=model->supvec[i]->docnum; + if(j >= (totdoc/2)) { + j=totdoc-j-1; + } + model->supvec[i]=docs_org[j]; + } + + shrink_state_cleanup(&shrink_state); + for(i=0;isvm_cost); +} + +void svm_learn_ranking(DOC **docs, double *rankvalue, long int totdoc, + long int totwords, LEARN_PARM *learn_parm, + KERNEL_PARM *kernel_parm, KERNEL_CACHE **kernel_cache, + MODEL *model) + /* docs: Training vectors (x-part) */ + /* rankvalue: Training target values that determine the ranking */ + /* totdoc: Number of examples in docs/label */ + /* totwords: Number of features (i.e. highest feature index) */ + /* learn_parm: Learning paramenters */ + /* kernel_parm: Kernel paramenters */ + /* kernel_cache:Initialized pointer to Cache of size 1*totdoc, if + using a kernel. NULL if linear. NOTE: Cache is + getting reinitialized in this function */ + /* model: Returns learning result (assumed empty before called) */ +{ + DOC **docdiff; + long i,j,k,totpair,kernel_cache_size; + double *target,*alpha,cost; + long *greater,*lesser; + MODEL *pairmodel; + SVECTOR *flow,*fhigh; + + totpair=0; + for(i=0;iqueryid==docs[j]->queryid) && (rankvalue[i] != rankvalue[j])) { + totpair++; + } + } + } + + printf("Constructing %ld rank constraints...",totpair); fflush(stdout); + docdiff=(DOC **)my_malloc(sizeof(DOC)*totpair); + target=(double *)my_malloc(sizeof(double)*totpair); + greater=(long *)my_malloc(sizeof(long)*totpair); + lesser=(long *)my_malloc(sizeof(long)*totpair); + + k=0; + for(i=0;iqueryid == docs[j]->queryid) { + /* "Hijacked" costfactor to input rhs of constraints */ + /* cost=(docs[i]->costfactor+docs[j]->costfactor)/2.0; */ + cost=1; + if(rankvalue[i] > rankvalue[j]) { + if(kernel_parm->kernel_type == LINEAR) + docdiff[k]=create_example(k,0,0,cost, + sub_ss(docs[i]->fvec,docs[j]->fvec)); + else { + flow=copy_svector(docs[j]->fvec); + flow->factor=-1.0; + flow->next=NULL; + fhigh=copy_svector(docs[i]->fvec); + fhigh->factor=1.0; + fhigh->next=flow; + docdiff[k]=create_example(k,0,0,cost,fhigh); + } + target[k]=1+docs[i]->costfactor-docs[j]->costfactor; + greater[k]=i; + lesser[k]=j; + k++; + } + else if(rankvalue[i] < rankvalue[j]) { + if(kernel_parm->kernel_type == LINEAR) + docdiff[k]=create_example(k,0,0,cost, + sub_ss(docs[j]->fvec,docs[i]->fvec)); + else { + flow=copy_svector(docs[j]->fvec); + flow->factor=1.0; + flow->next=NULL; + fhigh=copy_svector(docs[i]->fvec); + fhigh->factor=-1.0; + fhigh->next=flow; + docdiff[k]=create_example(k,0,0,cost,fhigh); + } + target[k]=1+docs[j]->costfactor-docs[i]->costfactor; + greater[k]=j; + lesser[k]=i; + k++; + } + } + } + } + printf("done.\n"); fflush(stdout); + + /* need to get a bigger kernel cache */ + if(*kernel_cache) { + kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024); + kernel_cache_cleanup(*kernel_cache); + (*kernel_cache)=kernel_cache_init(totpair,kernel_cache_size); + } + + /* must use unbiased hyperplane on difference vectors */ + learn_parm->biased_hyperplane=0; + pairmodel=(MODEL *)my_malloc(sizeof(MODEL)); + svm_learn_optimization(docdiff,target,totpair,totwords,learn_parm, + kernel_parm,(*kernel_cache),pairmodel,NULL); + + /* Transfer the result into a more compact model. If you would like + to output the original model on pairs of documents, see below. */ + alpha=(double *)my_malloc(sizeof(double)*totdoc); + for(i=0;isv_num;i++) { + alpha[lesser[(pairmodel->supvec[i])->docnum]]-=pairmodel->alpha[i]; + alpha[greater[(pairmodel->supvec[i])->docnum]]+=pairmodel->alpha[i]; + } + model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); + model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); + model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); + model->supvec[0]=0; /* element 0 reserved and empty for now */ + model->alpha[0]=0; + model->sv_num=1; + for(i=0;isupvec[model->sv_num]=docs[i]; + model->alpha[model->sv_num]=alpha[i]; + model->index[i]=model->sv_num; + model->sv_num++; + } + else { + model->index[i]=-1; + } + } + model->at_upper_bound=0; + model->b=0; + model->lin_weights=NULL; + model->totwords=totwords; + model->totdoc=totdoc; + model->kernel_parm=(*kernel_parm); + model->loo_error=-1; + model->loo_recall=-1; + model->loo_precision=-1; + model->xa_error=-1; + model->xa_recall=-1; + model->xa_precision=-1; + + free(alpha); + free(greater); + free(lesser); + free(target); + + /* If you would like to output the original model on pairs of + document, replace the following lines with '(*model)=(*pairmodel);' */ + for(i=0;i rhs_i - \xi_i + + This corresponds to the -z o option. */ + +void svm_learn_optimization(DOC **docs, double *rhs, long int + totdoc, long int totwords, + LEARN_PARM *learn_parm, + KERNEL_PARM *kernel_parm, + KERNEL_CACHE *kernel_cache, MODEL *model, + double *alpha) + /* docs: Left-hand side of inequalities (x-part) */ + /* rhs: Right-hand side of inequalities */ + /* totdoc: Number of examples in docs/label */ + /* totwords: Number of features (i.e. highest feature index) */ + /* learn_parm: Learning paramenters */ + /* kernel_parm: Kernel paramenters */ + /* kernel_cache:Initialized Cache of size 1*totdoc, if using a kernel. + NULL if linear.*/ + /* model: Returns solution as SV expansion (assumed empty before called) */ + /* alpha: Start values for the alpha variables or NULL + pointer. The new alpha values are returned after + optimization if not NULL. Array must be of size totdoc. */ +{ + long i,*label; + long misclassified,upsupvecnum; + double loss,model_length,alphasum,example_length; + double maxdiff,*lin,*a,*c; + double runtime_start,runtime_end; + long iterations,maxslackid,svsetnum; + long *unlabeled,*inconsistent; + double r_delta_avg; + long *index,*index2dnum; + double *weights,*slack,*alphaslack; + CFLOAT *aicache; /* buffer to keep one row of hessian */ + + TIMING timing_profile; + SHRINK_STATE shrink_state; + + runtime_start=get_runtime(); + timing_profile.time_kernel=0; + timing_profile.time_opti=0; + timing_profile.time_shrink=0; + timing_profile.time_update=0; + timing_profile.time_model=0; + timing_profile.time_check=0; + timing_profile.time_select=0; + kernel_cache_statistic=0; + + learn_parm->totwords=totwords; + + /* make sure -n value is reasonable */ + if((learn_parm->svm_newvarsinqp < 2) + || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { + learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; + } + + init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK); + + label = (long *)my_malloc(sizeof(long)*totdoc); + unlabeled = (long *)my_malloc(sizeof(long)*totdoc); + inconsistent = (long *)my_malloc(sizeof(long)*totdoc); + c = (double *)my_malloc(sizeof(double)*totdoc); + a = (double *)my_malloc(sizeof(double)*totdoc); + lin = (double *)my_malloc(sizeof(double)*totdoc); + learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc); + model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); + model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); + model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); + + model->at_upper_bound=0; + model->b=0; + model->supvec[0]=0; /* element 0 reserved and empty for now */ + model->alpha[0]=0; + model->lin_weights=NULL; + model->totwords=totwords; + model->totdoc=totdoc; + model->kernel_parm=(*kernel_parm); + model->sv_num=1; + model->loo_error=-1; + model->loo_recall=-1; + model->loo_precision=-1; + model->xa_error=-1; + model->xa_recall=-1; + model->xa_precision=-1; + + r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm); + if(learn_parm->svm_c == 0.0) { /* default value for C */ + learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg); + if(verbosity>=1) + printf("Setting default regularization parameter C=%.4f\n", + learn_parm->svm_c); + } + + learn_parm->biased_hyperplane=0; /* learn an unbiased hyperplane */ + + learn_parm->eps=0.0; /* No margin, unless explicitly handcoded + in the right-hand side in the training + set. */ + + for(i=0;idocnum=i; + a[i]=0; + lin[i]=0; + c[i]=rhs[i]; /* set right-hand side */ + unlabeled[i]=0; + inconsistent[i]=0; + learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio* + docs[i]->costfactor; + label[i]=1; + } + if(learn_parm->sharedslack) /* if shared slacks are used, they must */ + for(i=0;islackid) { + perror("Error: Missing shared slacks definitions in some of the examples."); + exit(0); + } + + /* print kernel matrix */ + /* + int j; + for(i=0;i=1) { + printf("Computing starting state..."); fflush(stdout); + } + index = (long *)my_malloc(sizeof(long)*totdoc); + index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); + if(kernel_parm->kernel_type == LINEAR) { + weights=(double *)my_malloc(sizeof(double)*(totwords+1)); + clear_nvector(weights,totwords); /* set weights to zero */ + aicache=NULL; + } + else { + weights=NULL; + aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); + } + for(i=0;ilearn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i]; + } + if(kernel_cache && (kernel_parm->kernel_type != LINEAR)) { + for(i=0;i0) && (alpha[i]svm_cost[i]) + && (kernel_cache_space_available(kernel_cache))) + cache_kernel_row(kernel_cache,docs,i,kernel_parm); + for(i=0;isvm_cost[i]) + && (kernel_cache_space_available(kernel_cache))) + cache_kernel_row(kernel_cache,docs,i,kernel_parm); + } + (void)compute_index(index,totdoc,index2dnum); + update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc, + totwords,kernel_parm,kernel_cache,lin,aicache, + weights); + (void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c, + learn_parm,index2dnum,index2dnum,model); + for(i=0;i=1) { + printf("done.\n"); fflush(stdout); + } + } + + /* removing inconsistent does not work for general optimization problem */ + if(learn_parm->remove_inconsistent) { + learn_parm->remove_inconsistent = 0; + printf("'remove inconsistent' not available in this mode. Switching option off!"); fflush(stdout); + } + + /* caching makes no sense for linear kernel */ + if(kernel_parm->kernel_type == LINEAR) { + /* kernel_cache = NULL; */ + } + + if(verbosity==1) { + printf("Optimizing"); fflush(stdout); + } + + /* train the svm */ + if(learn_parm->sharedslack) + iterations=optimize_to_convergence_sharedslack(docs,label,totdoc, + totwords,learn_parm,kernel_parm, + kernel_cache,&shrink_state,model, + a,lin,c,&timing_profile, + &maxdiff); + else + iterations=optimize_to_convergence(docs,label,totdoc, + totwords,learn_parm,kernel_parm, + kernel_cache,&shrink_state,model, + inconsistent,unlabeled, + a,lin,c,&timing_profile, + &maxdiff,(long)-1,(long)1); + + if(verbosity>=1) { + if(verbosity==1) printf("done. (%ld iterations)\n",iterations); + + misclassified=0; + for(i=0;(ib)*(double)label[i] <= 0.0) + misclassified++; + } + + printf("Optimization finished (maxdiff=%.5f).\n",maxdiff); + + runtime_end=get_runtime(); + if(verbosity>=2) { + printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n", + (runtime_end-runtime_start)/100.0, + (100.0*timing_profile.time_kernel)/(runtime_end-runtime_start), + (100.0*timing_profile.time_opti)/(runtime_end-runtime_start), + (100.0*timing_profile.time_shrink)/(runtime_end-runtime_start), + (100.0*timing_profile.time_update)/(runtime_end-runtime_start), + (100.0*timing_profile.time_model)/(runtime_end-runtime_start), + (100.0*timing_profile.time_check)/(runtime_end-runtime_start), + (100.0*timing_profile.time_select)/(runtime_end-runtime_start)); + } + else { + printf("Runtime in cpu-seconds: %.2f\n", + (runtime_end-runtime_start)/100.0); + } + } + if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { + loss=0; + model_length=0; + alphasum=0; + for(i=0;ib)*(double)label[i] < c[i]-learn_parm->epsilon_crit) + loss+=c[i]-(lin[i]-model->b)*(double)label[i]; + model_length+=a[i]*label[i]*lin[i]; + alphasum+=rhs[i]*a[i]; + } + model_length=sqrt(model_length); + fprintf(stdout,"Dual objective value: dval=%.5f\n", + alphasum-0.5*model_length*model_length); + fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length); + } + + if(learn_parm->sharedslack) { + index = (long *)my_malloc(sizeof(long)*totdoc); + index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); + maxslackid=0; + for(i=0;islackid) + maxslackid=docs[i]->slackid; + } + (void)compute_index(index,totdoc,index2dnum); + slack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); + alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); + for(i=0;i<=maxslackid;i++) { /* init shared slacks */ + slack[i]=0; + alphaslack[i]=0; + } + for(i=0;islackid]+=a[i]; + } + compute_shared_slacks(docs,label,a,lin,c,index2dnum,learn_parm, + slack,alphaslack); + loss=0; + model->at_upper_bound=0; + svsetnum=0; + for(i=0;i<=maxslackid;i++) { /* create full index */ + loss+=slack[i]; + if(alphaslack[i] > (learn_parm->svm_c - learn_parm->epsilon_a)) + model->at_upper_bound++; + if(alphaslack[i] > learn_parm->epsilon_a) + svsetnum++; + } + free(index); + free(index2dnum); + free(slack); + free(alphaslack); + } + + if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { + if(learn_parm->sharedslack) { + printf("Number of SV: %ld\n", + model->sv_num-1); + printf("Number of non-zero slack variables: %ld (%ld slacks have non-zero alpha)\n", + model->at_upper_bound,svsetnum); + fprintf(stdout,"L1 loss: loss=%.5f\n",loss); + } + else { + upsupvecnum=0; + for(i=1;isv_num;i++) { + if(fabs(model->alpha[i]) >= + (learn_parm->svm_cost[(model->supvec[i])->docnum]- + learn_parm->epsilon_a)) + upsupvecnum++; + } + printf("Number of SV: %ld (including %ld at upper bound)\n", + model->sv_num-1,upsupvecnum); + fprintf(stdout,"L1 loss: loss=%.5f\n",loss); + } + example_length=estimate_sphere(model); + fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n", + length_of_longest_document_vector(docs,totdoc,kernel_parm)); + } + if(verbosity>=1) { + printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic); + } + + if(alpha) { + for(i=0;ialphafile[0]) + write_alphas(learn_parm->alphafile,a,label,totdoc); + + shrink_state_cleanup(&shrink_state); + free(label); + free(unlabeled); + free(inconsistent); + free(c); + free(a); + free(lin); + free(learn_parm->svm_cost); +} + + +long optimize_to_convergence(DOC **docs, long int *label, long int totdoc, + long int totwords, LEARN_PARM *learn_parm, + KERNEL_PARM *kernel_parm, + KERNEL_CACHE *kernel_cache, + SHRINK_STATE *shrink_state, MODEL *model, + long int *inconsistent, long int *unlabeled, + double *a, double *lin, double *c, + TIMING *timing_profile, double *maxdiff, + long int heldout, long int retrain) + /* docs: Training vectors (x-part) */ + /* label: Training labels/value (y-part, zero if test example for + transduction) */ + /* totdoc: Number of examples in docs/label */ + /* totwords: Number of features (i.e. highest feature index) */ + /* laern_parm: Learning paramenters */ + /* kernel_parm: Kernel paramenters */ + /* kernel_cache: Initialized/partly filled Cache, if using a kernel. + NULL if linear. */ + /* shrink_state: State of active variables */ + /* model: Returns learning result */ + /* inconsistent: examples thrown out as inconstistent */ + /* unlabeled: test examples for transduction */ + /* a: alphas */ + /* lin: linear component of gradient */ + /* c: right hand side of inequalities (margin) */ + /* maxdiff: returns maximum violation of KT-conditions */ + /* heldout: marks held-out example for leave-one-out (or -1) */ + /* retrain: selects training mode (1=regular / 2=holdout) */ +{ + long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink; + long inconsistentnum,choosenum,already_chosen=0,iteration; + long misclassified,supvecnum=0,*active2dnum,inactivenum; + long *working2dnum,*selexam; + long activenum; + double criterion,eq; + double *a_old; + double t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */ + long transductcycle; + long transduction; + double epsilon_crit_org; + double bestmaxdiff; + long bestmaxdiffiter,terminate; + + double *selcrit; /* buffer for sorting */ + CFLOAT *aicache; /* buffer to keep one row of hessian */ + double *weights; /* buffer for weight vector in linear case */ + QP qp; /* buffer for one quadratic program */ + + epsilon_crit_org=learn_parm->epsilon_crit; /* save org */ + if(kernel_parm->kernel_type == LINEAR) { + learn_parm->epsilon_crit=2.0; + /* kernel_cache=NULL; */ /* caching makes no sense for linear kernel */ + } + learn_parm->epsilon_shrink=2; + (*maxdiff)=1; + + learn_parm->totwords=totwords; + + chosen = (long *)my_malloc(sizeof(long)*totdoc); + last_suboptimal_at = (long *)my_malloc(sizeof(long)*totdoc); + key = (long *)my_malloc(sizeof(long)*(totdoc+11)); + selcrit = (double *)my_malloc(sizeof(double)*totdoc); + selexam = (long *)my_malloc(sizeof(long)*totdoc); + a_old = (double *)my_malloc(sizeof(double)*totdoc); + aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); + working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); + active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); + qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + qp.opt_ce0 = (double *)my_malloc(sizeof(double)); + qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize + *learn_parm->svm_maxqpsize); + qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + if(kernel_parm->kernel_type == LINEAR) { + weights=create_nvector(totwords); + clear_nvector(weights,totwords); /* set weights to zero */ + } + else + weights=NULL; + + choosenum=0; + inconsistentnum=0; + transductcycle=0; + transduction=0; + if(!retrain) retrain=1; + iteration=1; + bestmaxdiffiter=1; + bestmaxdiff=999999999; + terminate=0; + + if(kernel_cache) { + kernel_cache->time=iteration; /* for lru cache */ + kernel_cache_reset_lru(kernel_cache); + } + + for(i=0;iactive,totdoc,active2dnum); + inactivenum=totdoc-activenum; + clear_index(working2dnum); + + /* repeat this loop until we have convergence */ + for(;retrain && (!terminate);iteration++) { + + if(kernel_cache) + kernel_cache->time=iteration; /* for lru cache */ + if(verbosity>=2) { + printf( + "Iteration %ld: ",iteration); fflush(stdout); + } + else if(verbosity==1) { + printf("."); fflush(stdout); + } + + if(verbosity>=2) t0=get_runtime(); + if(verbosity>=3) { + printf("\nSelecting working set... "); fflush(stdout); + } + + if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize) + learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; + + i=0; + for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */ + if((chosen[j]>=(learn_parm->svm_maxqpsize/ + minl(learn_parm->svm_maxqpsize, + learn_parm->svm_newvarsinqp))) + || (inconsistent[j]) + || (j == heldout)) { + chosen[j]=0; + choosenum--; + } + else { + chosen[j]++; + working2dnum[i++]=j; + } + } + working2dnum[i]=-1; + + if(retrain == 2) { + choosenum=0; + for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* fully clear working set */ + chosen[j]=0; + } + clear_index(working2dnum); + for(i=0;ibiased_hyperplane) { + eq=0; + for(i=0;i learn_parm->epsilon_a);i++) { + if((eq*label[i] > 0) && (a[i] > 0)) { + chosen[i]=88888; + choosenum++; + if((eq*label[i]) > a[i]) { + eq-=(a[i]*label[i]); + a[i]=0; + } + else { + a[i]-=(eq*label[i]); + eq=0; + } + } + } + } + compute_index(chosen,totdoc,working2dnum); + } + else { /* select working set according to steepest gradient */ + if(iteration % 101) { + already_chosen=0; + if((minl(learn_parm->svm_newvarsinqp, + learn_parm->svm_maxqpsize-choosenum)>=4) + && (kernel_parm->kernel_type != LINEAR)) { + /* select part of the working set from cache */ + already_chosen=select_next_qp_subproblem_grad( + label,unlabeled,a,lin,c,totdoc, + (long)(minl(learn_parm->svm_maxqpsize-choosenum, + learn_parm->svm_newvarsinqp) + /2), + learn_parm,inconsistent,active2dnum, + working2dnum,selcrit,selexam,kernel_cache,1, + key,chosen); + choosenum+=already_chosen; + } + choosenum+=select_next_qp_subproblem_grad( + label,unlabeled,a,lin,c,totdoc, + minl(learn_parm->svm_maxqpsize-choosenum, + learn_parm->svm_newvarsinqp-already_chosen), + learn_parm,inconsistent,active2dnum, + working2dnum,selcrit,selexam,kernel_cache,0,key, + chosen); + } + else { /* once in a while, select a somewhat random working set + to get unlocked of infinite loops due to numerical + inaccuracies in the core qp-solver */ + choosenum+=select_next_qp_subproblem_rand( + label,unlabeled,a,lin,c,totdoc, + minl(learn_parm->svm_maxqpsize-choosenum, + learn_parm->svm_newvarsinqp), + learn_parm,inconsistent,active2dnum, + working2dnum,selcrit,selexam,kernel_cache,key, + chosen,iteration); + } + } + + if(verbosity>=2) { + printf(" %ld vectors chosen\n",choosenum); fflush(stdout); + } + + if(verbosity>=2) t1=get_runtime(); + + if(kernel_cache) + cache_multiple_kernel_rows(kernel_cache,docs,working2dnum, + choosenum,kernel_parm); + + if(verbosity>=2) t2=get_runtime(); + if(retrain != 2) { + optimize_svm(docs,label,unlabeled,inconsistent,0.0,chosen,active2dnum, + model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm, + aicache,kernel_parm,&qp,&epsilon_crit_org); + } + + if(verbosity>=2) t3=get_runtime(); + update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc, + totwords,kernel_parm,kernel_cache,lin,aicache, + weights); + + if(verbosity>=2) t4=get_runtime(); + supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c, + learn_parm,working2dnum,active2dnum,model); + + if(verbosity>=2) t5=get_runtime(); + + /* The following computation of the objective function works only */ + /* relative to the active variables */ + if(verbosity>=3) { + criterion=compute_objective_function(a,lin,c,learn_parm->eps,label, + active2dnum); + printf("Objective function (over active variables): %.16f\n",criterion); + fflush(stdout); + } + + for(jj=0;(i=working2dnum[jj])>=0;jj++) { + a_old[i]=a[i]; + } + + if(retrain == 2) { /* reset inconsistent unlabeled examples */ + for(i=0;(i=2) { + t6=get_runtime(); + timing_profile->time_select+=t1-t0; + timing_profile->time_kernel+=t2-t1; + timing_profile->time_opti+=t3-t2; + timing_profile->time_update+=t4-t3; + timing_profile->time_model+=t5-t4; + timing_profile->time_check+=t6-t5; + } + + /* checking whether optimizer got stuck */ + if((*maxdiff) < bestmaxdiff) { + bestmaxdiff=(*maxdiff); + bestmaxdiffiter=iteration; + } + if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) { + /* long time no progress? */ + terminate=1; + retrain=0; + if(verbosity>=1) + printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n"); + } + + noshrink=0; + if((!retrain) && (inactivenum>0) + && ((!learn_parm->skip_final_opt_check) + || (kernel_parm->kernel_type == LINEAR))) { + if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) + || (verbosity>=2)) { + if(verbosity==1) { + printf("\n"); + } + printf(" Checking optimality of inactive variables..."); + fflush(stdout); + } + t1=get_runtime(); + reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc, + totwords,iteration,learn_parm,inconsistent, + docs,kernel_parm,kernel_cache,model,aicache, + weights,maxdiff); + /* Update to new active variables. */ + activenum=compute_index(shrink_state->active,totdoc,active2dnum); + inactivenum=totdoc-activenum; + /* reset watchdog */ + bestmaxdiff=(*maxdiff); + bestmaxdiffiter=iteration; + /* termination criterion */ + noshrink=1; + retrain=0; + if((*maxdiff) > learn_parm->epsilon_crit) + retrain=1; + timing_profile->time_shrink+=get_runtime()-t1; + if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) + || (verbosity>=2)) { + printf("done.\n"); fflush(stdout); + printf(" Number of inactive variables = %ld\n",inactivenum); + } + } + + if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff))) + learn_parm->epsilon_crit=(*maxdiff); + if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) { + learn_parm->epsilon_crit/=2.0; + retrain=1; + noshrink=1; + } + if(learn_parm->epsilon_critepsilon_crit=epsilon_crit_org; + + if(verbosity>=2) { + printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n", + supvecnum,model->at_upper_bound,(*maxdiff)); + fflush(stdout); + } + if(verbosity>=3) { + printf("\n"); + } + + if((!retrain) && (transduction)) { + for(i=0;(iactive[i]=1; + } + activenum=compute_index(shrink_state->active,totdoc,active2dnum); + inactivenum=0; + if(verbosity==1) printf("done\n"); + retrain=incorporate_unlabeled_examples(model,label,inconsistent, + unlabeled,a,lin,totdoc, + selcrit,selexam,key, + transductcycle,kernel_parm, + learn_parm); + epsilon_crit_org=learn_parm->epsilon_crit; + if(kernel_parm->kernel_type == LINEAR) + learn_parm->epsilon_crit=1; + transductcycle++; + /* reset watchdog */ + bestmaxdiff=(*maxdiff); + bestmaxdiffiter=iteration; + } + else if(((iteration % 10) == 0) && (!noshrink)) { + activenum=shrink_problem(docs,learn_parm,shrink_state,kernel_parm, + active2dnum,last_suboptimal_at,iteration,totdoc, + maxl((long)(activenum/10), + maxl((long)(totdoc/500),100)), + a,inconsistent); + inactivenum=totdoc-activenum; + if((kernel_cache) + && (supvecnum>kernel_cache->max_elems) + && ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) { + kernel_cache_shrink(kernel_cache,totdoc, + minl((kernel_cache->activenum-activenum), + (kernel_cache->activenum-supvecnum)), + shrink_state->active); + } + } + + if((!retrain) && learn_parm->remove_inconsistent) { + if(verbosity>=1) { + printf(" Moving training errors to inconsistent examples..."); + fflush(stdout); + } + if(learn_parm->remove_inconsistent == 1) { + retrain=identify_inconsistent(a,label,unlabeled,totdoc,learn_parm, + &inconsistentnum,inconsistent); + } + else if(learn_parm->remove_inconsistent == 2) { + retrain=identify_misclassified(lin,label,unlabeled,totdoc, + model,&inconsistentnum,inconsistent); + } + else if(learn_parm->remove_inconsistent == 3) { + retrain=identify_one_misclassified(lin,label,unlabeled,totdoc, + model,&inconsistentnum,inconsistent); + } + if(retrain) { + if(kernel_parm->kernel_type == LINEAR) { /* reinit shrinking */ + learn_parm->epsilon_crit=2.0; + } + } + if(verbosity>=1) { + printf("done.\n"); + if(retrain) { + printf(" Now %ld inconsistent examples.\n",inconsistentnum); + } + } + } + } /* end of loop */ + + free(chosen); + free(last_suboptimal_at); + free(key); + free(selcrit); + free(selexam); + free(a_old); + free(aicache); + free(working2dnum); + free(active2dnum); + free(qp.opt_ce); + free(qp.opt_ce0); + free(qp.opt_g); + free(qp.opt_g0); + free(qp.opt_xinit); + free(qp.opt_low); + free(qp.opt_up); + if(weights) free(weights); + + learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */ + model->maxdiff=(*maxdiff); + + return(iteration); +} + +long optimize_to_convergence_sharedslack(DOC **docs, long int *label, + long int totdoc, + long int totwords, LEARN_PARM *learn_parm, + KERNEL_PARM *kernel_parm, + KERNEL_CACHE *kernel_cache, + SHRINK_STATE *shrink_state, MODEL *model, + double *a, double *lin, double *c, + TIMING *timing_profile, double *maxdiff) + /* docs: Training vectors (x-part) */ + /* label: Training labels/value (y-part, zero if test example for + transduction) */ + /* totdoc: Number of examples in docs/label */ + /* totwords: Number of features (i.e. highest feature index) */ + /* learn_parm: Learning paramenters */ + /* kernel_parm: Kernel paramenters */ + /* kernel_cache: Initialized/partly filled Cache, if using a kernel. + NULL if linear. */ + /* shrink_state: State of active variables */ + /* model: Returns learning result */ + /* a: alphas */ + /* lin: linear component of gradient */ + /* c: right hand side of inequalities (margin) */ + /* maxdiff: returns maximum violation of KT-conditions */ +{ + long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink,*unlabeled; + long *inconsistent,choosenum,already_chosen=0,iteration; + long misclassified,supvecnum=0,*active2dnum,inactivenum; + long *working2dnum,*selexam,*ignore; + long activenum,retrain,maxslackid,slackset,jointstep; + double criterion,eq_target; + double *a_old,*alphaslack; + double t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */ + double epsilon_crit_org,maxsharedviol; + double bestmaxdiff; + long bestmaxdiffiter,terminate; + + double *selcrit; /* buffer for sorting */ + CFLOAT *aicache; /* buffer to keep one row of hessian */ + double *weights; /* buffer for weight vector in linear case */ + QP qp; /* buffer for one quadratic program */ + double *slack; /* vector of slack variables for optimization with + shared slacks */ + + epsilon_crit_org=learn_parm->epsilon_crit; /* save org */ + if(kernel_parm->kernel_type == LINEAR) { + learn_parm->epsilon_crit=2.0; + /* kernel_cache=NULL; */ /* caching makes no sense for linear kernel */ + } + learn_parm->epsilon_shrink=2; + (*maxdiff)=1; + + learn_parm->totwords=totwords; + + chosen = (long *)my_malloc(sizeof(long)*totdoc); + unlabeled = (long *)my_malloc(sizeof(long)*totdoc); + inconsistent = (long *)my_malloc(sizeof(long)*totdoc); + ignore = (long *)my_malloc(sizeof(long)*totdoc); + last_suboptimal_at = (long *)my_malloc(sizeof(long)*totdoc); + key = (long *)my_malloc(sizeof(long)*(totdoc+11)); + selcrit = (double *)my_malloc(sizeof(double)*totdoc); + selexam = (long *)my_malloc(sizeof(long)*totdoc); + a_old = (double *)my_malloc(sizeof(double)*totdoc); + aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); + working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); + active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); + qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + qp.opt_ce0 = (double *)my_malloc(sizeof(double)); + qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize + *learn_parm->svm_maxqpsize); + qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + if(kernel_parm->kernel_type == LINEAR) { + weights=create_nvector(totwords); + clear_nvector(weights,totwords); /* set weights to zero */ + } + else + weights=NULL; + maxslackid=0; + for(i=0;islackid) + maxslackid=docs[i]->slackid; + } + slack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); + alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); + for(i=0;i<=maxslackid;i++) { /* init shared slacks */ + slack[i]=0; + alphaslack[i]=0; + } + + choosenum=0; + retrain=1; + iteration=1; + bestmaxdiffiter=1; + bestmaxdiff=999999999; + terminate=0; + + if(kernel_cache) { + kernel_cache->time=iteration; /* for lru cache */ + kernel_cache_reset_lru(kernel_cache); + } + + for(i=0;islackid]+=a[i]; + a_old[i]=a[i]; + last_suboptimal_at[i]=1; + } + activenum=compute_index(shrink_state->active,totdoc,active2dnum); + inactivenum=totdoc-activenum; + clear_index(working2dnum); + + /* call to init slack and alphaslack */ + compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm, + slack,alphaslack); + + /* repeat this loop until we have convergence */ + for(;retrain && (!terminate);iteration++) { + + if(kernel_cache) + kernel_cache->time=iteration; /* for lru cache */ + if(verbosity>=2) { + printf( + "Iteration %ld: ",iteration); fflush(stdout); + } + else if(verbosity==1) { + printf("."); fflush(stdout); + } + + if(verbosity>=2) t0=get_runtime(); + if(verbosity>=3) { + printf("\nSelecting working set... "); fflush(stdout); + } + + if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize) + learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; + + /* select working set according to steepest gradient */ + jointstep=0; + eq_target=0; + if(iteration % 101) { + slackset=select_next_qp_slackset(docs,label,a,lin,slack,alphaslack,c, + learn_parm,active2dnum,&maxsharedviol); + if((!(iteration % 100)) + || (!slackset) || (maxsharedviolepsilon_crit)){ + /* do a step with examples from different slack sets */ + if(verbosity >= 2) { + printf("(i-step)"); fflush(stdout); + } + i=0; + for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear old part of working set */ + if((chosen[j]>=(learn_parm->svm_maxqpsize/ + minl(learn_parm->svm_maxqpsize, + learn_parm->svm_newvarsinqp)))) { + chosen[j]=0; + choosenum--; + } + else { + chosen[j]++; + working2dnum[i++]=j; + } + } + working2dnum[i]=-1; + + already_chosen=0; + if((minl(learn_parm->svm_newvarsinqp, + learn_parm->svm_maxqpsize-choosenum)>=4) + && (kernel_parm->kernel_type != LINEAR)) { + /* select part of the working set from cache */ + already_chosen=select_next_qp_subproblem_grad( + label,unlabeled,a,lin,c,totdoc, + (long)(minl(learn_parm->svm_maxqpsize-choosenum, + learn_parm->svm_newvarsinqp) + /2), + learn_parm,inconsistent,active2dnum, + working2dnum,selcrit,selexam,kernel_cache, + (long)1,key,chosen); + choosenum+=already_chosen; + } + choosenum+=select_next_qp_subproblem_grad( + label,unlabeled,a,lin,c,totdoc, + minl(learn_parm->svm_maxqpsize-choosenum, + learn_parm->svm_newvarsinqp-already_chosen), + learn_parm,inconsistent,active2dnum, + working2dnum,selcrit,selexam,kernel_cache, + (long)0,key,chosen); + } + else { /* do a step with all examples from same slack set */ + if(verbosity >= 2) { + printf("(j-step on %ld)",slackset); fflush(stdout); + } + jointstep=1; + for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */ + chosen[j]=0; + } + working2dnum[0]=-1; + eq_target=alphaslack[slackset]; + for(j=0;j=0;jj++) { */ + if(docs[j]->slackid != slackset) + ignore[j]=1; + else { + ignore[j]=0; + learn_parm->svm_cost[j]=learn_parm->svm_c; + /* printf("Inslackset(%ld,%ld)",j,shrink_state->active[j]); */ + } + } + learn_parm->biased_hyperplane=1; + choosenum=select_next_qp_subproblem_grad( + label,unlabeled,a,lin,c,totdoc, + learn_parm->svm_maxqpsize, + learn_parm,ignore,active2dnum, + working2dnum,selcrit,selexam,kernel_cache, + (long)0,key,chosen); + learn_parm->biased_hyperplane=0; + } + } + else { /* once in a while, select a somewhat random working set + to get unlocked of infinite loops due to numerical + inaccuracies in the core qp-solver */ + choosenum+=select_next_qp_subproblem_rand( + label,unlabeled,a,lin,c,totdoc, + minl(learn_parm->svm_maxqpsize-choosenum, + learn_parm->svm_newvarsinqp), + learn_parm,inconsistent,active2dnum, + working2dnum,selcrit,selexam,kernel_cache,key, + chosen,iteration); + } + + if(verbosity>=2) { + printf(" %ld vectors chosen\n",choosenum); fflush(stdout); + } + + if(verbosity>=2) t1=get_runtime(); + + if(kernel_cache) + cache_multiple_kernel_rows(kernel_cache,docs,working2dnum, + choosenum,kernel_parm); + + if(verbosity>=2) t2=get_runtime(); + if(jointstep) learn_parm->biased_hyperplane=1; + optimize_svm(docs,label,unlabeled,ignore,eq_target,chosen,active2dnum, + model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm, + aicache,kernel_parm,&qp,&epsilon_crit_org); + learn_parm->biased_hyperplane=0; + + for(jj=0;(i=working2dnum[jj])>=0;jj++) /* recompute sums of alphas */ + alphaslack[docs[i]->slackid]+=(a[i]-a_old[i]); + for(jj=0;(i=working2dnum[jj])>=0;jj++) { /* reduce alpha to fulfill + constraints */ + if(alphaslack[docs[i]->slackid] > learn_parm->svm_c) { + if(a[i] < (alphaslack[docs[i]->slackid]-learn_parm->svm_c)) { + alphaslack[docs[i]->slackid]-=a[i]; + a[i]=0; + } + else { + a[i]-=(alphaslack[docs[i]->slackid]-learn_parm->svm_c); + alphaslack[docs[i]->slackid]=learn_parm->svm_c; + } + } + } + for(jj=0;(i=active2dnum[jj])>=0;jj++) + learn_parm->svm_cost[i]=a[i]+(learn_parm->svm_c + -alphaslack[docs[i]->slackid]); + model->at_upper_bound=0; + for(jj=0;jj<=maxslackid;jj++) { + if(alphaslack[jj]>(learn_parm->svm_c-learn_parm->epsilon_a)) + model->at_upper_bound++; + } + + if(verbosity>=2) t3=get_runtime(); + update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc, + totwords,kernel_parm,kernel_cache,lin,aicache, + weights); + compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm, + slack,alphaslack); + + if(verbosity>=2) t4=get_runtime(); + supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c, + learn_parm,working2dnum,active2dnum,model); + + if(verbosity>=2) t5=get_runtime(); + + /* The following computation of the objective function works only */ + /* relative to the active variables */ + if(verbosity>=3) { + criterion=compute_objective_function(a,lin,c,learn_parm->eps,label, + active2dnum); + printf("Objective function (over active variables): %.16f\n",criterion); + fflush(stdout); + } + + for(jj=0;(i=working2dnum[jj])>=0;jj++) { + a_old[i]=a[i]; + } + + retrain=check_optimality_sharedslack(docs,model,label,a,lin,c, + slack,alphaslack,totdoc,learn_parm, + maxdiff,epsilon_crit_org,&misclassified, + active2dnum,last_suboptimal_at, + iteration,kernel_parm); + + if(verbosity>=2) { + t6=get_runtime(); + timing_profile->time_select+=t1-t0; + timing_profile->time_kernel+=t2-t1; + timing_profile->time_opti+=t3-t2; + timing_profile->time_update+=t4-t3; + timing_profile->time_model+=t5-t4; + timing_profile->time_check+=t6-t5; + } + + /* checking whether optimizer got stuck */ + if((*maxdiff) < bestmaxdiff) { + bestmaxdiff=(*maxdiff); + bestmaxdiffiter=iteration; + } + if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) { + /* long time no progress? */ + terminate=1; + retrain=0; + if(verbosity>=1) + printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n"); + } + + noshrink=0; + + if((!retrain) && (inactivenum>0) + && ((!learn_parm->skip_final_opt_check) + || (kernel_parm->kernel_type == LINEAR))) { + if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) + || (verbosity>=2)) { + if(verbosity==1) { + printf("\n"); + } + printf(" Checking optimality of inactive variables..."); + fflush(stdout); + } + t1=get_runtime(); + reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc, + totwords,iteration,learn_parm,inconsistent, + docs,kernel_parm,kernel_cache,model,aicache, + weights,maxdiff); + /* Update to new active variables. */ + activenum=compute_index(shrink_state->active,totdoc,active2dnum); + inactivenum=totdoc-activenum; + /* check optimality, since check in reactivate does not work for + sharedslacks */ + compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm, + slack,alphaslack); + retrain=check_optimality_sharedslack(docs,model,label,a,lin,c, + slack,alphaslack,totdoc,learn_parm, + maxdiff,epsilon_crit_org,&misclassified, + active2dnum,last_suboptimal_at, + iteration,kernel_parm); + + /* reset watchdog */ + bestmaxdiff=(*maxdiff); + bestmaxdiffiter=iteration; + /* termination criterion */ + noshrink=1; + retrain=0; + if((*maxdiff) > learn_parm->epsilon_crit) + retrain=1; + timing_profile->time_shrink+=get_runtime()-t1; + if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) + || (verbosity>=2)) { + printf("done.\n"); fflush(stdout); + printf(" Number of inactive variables = %ld\n",inactivenum); + } + } + + if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff))) + learn_parm->epsilon_crit=(*maxdiff); + if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) { + learn_parm->epsilon_crit/=2.0; + retrain=1; + noshrink=1; + } + if(learn_parm->epsilon_critepsilon_crit=epsilon_crit_org; + + if(verbosity>=2) { + printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n", + supvecnum,model->at_upper_bound,(*maxdiff)); + fflush(stdout); + } + if(verbosity>=3) { + printf("\n"); + } + + if(((iteration % 10) == 0) && (!noshrink)) { + activenum=shrink_problem(docs,learn_parm,shrink_state, + kernel_parm,active2dnum, + last_suboptimal_at,iteration,totdoc, + maxl((long)(activenum/10), + maxl((long)(totdoc/500),100)), + a,inconsistent); + inactivenum=totdoc-activenum; + if((kernel_cache) + && (supvecnum>kernel_cache->max_elems) + && ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) { + kernel_cache_shrink(kernel_cache,totdoc, + minl((kernel_cache->activenum-activenum), + (kernel_cache->activenum-supvecnum)), + shrink_state->active); + } + } + + } /* end of loop */ + + + free(alphaslack); + free(slack); + free(chosen); + free(unlabeled); + free(inconsistent); + free(ignore); + free(last_suboptimal_at); + free(key); + free(selcrit); + free(selexam); + free(a_old); + free(aicache); + free(working2dnum); + free(active2dnum); + free(qp.opt_ce); + free(qp.opt_ce0); + free(qp.opt_g); + free(qp.opt_g0); + free(qp.opt_xinit); + free(qp.opt_low); + free(qp.opt_up); + if(weights) free(weights); + + learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */ + model->maxdiff=(*maxdiff); + + return(iteration); +} + + +double compute_objective_function(double *a, double *lin, double *c, + double eps, long int *label, + long int *active2dnum) + /* Return value of objective function. */ + /* Works only relative to the active variables! */ +{ + long i,ii; + double criterion; + /* calculate value of objective function */ + criterion=0; + for(ii=0;active2dnum[ii]>=0;ii++) { + i=active2dnum[ii]; + criterion=criterion+(eps-(double)label[i]*c[i])*a[i]+0.5*a[i]*label[i]*lin[i]; + } + return(criterion); +} + +void clear_index(long int *index) + /* initializes and empties index */ +{ + index[0]=-1; +} + +void add_to_index(long int *index, long int elem) + /* initializes and empties index */ +{ + register long i; + for(i=0;index[i] != -1;i++); + index[i]=elem; + index[i+1]=-1; +} + +long compute_index(long int *binfeature, long int range, long int *index) + /* create an inverted index of binfeature */ +{ + register long i,ii; + + ii=0; + for(i=0;i=3) { + printf("Running optimizer..."); fflush(stdout); + } + /* call the qp-subsolver */ + a_v=optimize_qp(qp,epsilon_crit_target, + learn_parm->svm_maxqpsize, + &(model->b), /* in case the optimizer gives us */ + /* the threshold for free. otherwise */ + /* b is calculated in calculate_model. */ + learn_parm); + if(verbosity>=3) { + printf("done\n"); + } + + for(i=0;iepsilon_a)) { + a[working2dnum[i]]=0; + } + else if(a_v[i]>=(learn_parm->svm_cost[working2dnum[i]]-learn_parm->epsilon_a)) { + a[working2dnum[i]]=learn_parm->svm_cost[working2dnum[i]]; + } + */ + } +} + +void compute_matrices_for_optimization(DOC **docs, long int *label, + long int *unlabeled, long *exclude_from_eq_const, double eq_target, + long int *chosen, long int *active2dnum, + long int *key, MODEL *model, double *a, double *lin, double *c, + long int varnum, long int totdoc, LEARN_PARM *learn_parm, + CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp) +{ + register long ki,kj,i,j; + register double kernel_temp; + + if(verbosity>=3) { + fprintf(stdout,"Computing qp-matrices (type %ld kernel [degree %ld, rbf_gamma %f, coef_lin %f, coef_const %f])...",kernel_parm->kernel_type,kernel_parm->poly_degree,kernel_parm->rbf_gamma,kernel_parm->coef_lin,kernel_parm->coef_const); + fflush(stdout); + } + + qp->opt_n=varnum; + qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */ + for(j=1;jsv_num;j++) { /* start at 1 */ + if((!chosen[(model->supvec[j])->docnum]) + && (!exclude_from_eq_const[(model->supvec[j])->docnum])) { + qp->opt_ce0[0]+=model->alpha[j]; + } + } + if(learn_parm->biased_hyperplane) + qp->opt_m=1; + else + qp->opt_m=0; /* eq-constraint will be ignored */ + + /* init linear part of objective function */ + for(i=0;iopt_g0[i]=lin[key[i]]; + } + + for(i=0;iopt_ce[i]=label[ki]; + qp->opt_low[i]=0; + qp->opt_up[i]=learn_parm->svm_cost[ki]; + + kernel_temp=(double)kernel(kernel_parm,docs[ki],docs[ki]); + /* compute linear part of objective function */ + qp->opt_g0[i]-=(kernel_temp*a[ki]*(double)label[ki]); + /* compute quadratic part of objective function */ + qp->opt_g[varnum*i+i]=kernel_temp; + for(j=i+1;jopt_g0[i]-=(kernel_temp*a[kj]*(double)label[kj]); + qp->opt_g0[j]-=(kernel_temp*a[ki]*(double)label[ki]); + /* compute quadratic part of objective function */ + qp->opt_g[varnum*i+j]=(double)label[ki]*(double)label[kj]*kernel_temp; + qp->opt_g[varnum*j+i]=(double)label[ki]*(double)label[kj]*kernel_temp; + } + + if(verbosity>=3) { + if(i % 20 == 0) { + fprintf(stdout,"%ld..",i); fflush(stdout); + } + } + } + + for(i=0;iopt_xinit[i]=a[key[i]]; + /* set linear part of objective function */ + qp->opt_g0[i]=(learn_parm->eps-(double)label[key[i]]*c[key[i]])+qp->opt_g0[i]*(double)label[key[i]]; + } + + if(verbosity>=3) { + fprintf(stdout,"done\n"); + } +} + +long calculate_svm_model(DOC **docs, long int *label, long int *unlabeled, + double *lin, double *a, double *a_old, double *c, + LEARN_PARM *learn_parm, long int *working2dnum, + long int *active2dnum, MODEL *model) + /* Compute decision function based on current values */ + /* of alpha. */ +{ + long i,ii,pos,b_calculated=0,first_low,first_high; + double ex_c,b_temp,b_low,b_high; + + if(verbosity>=3) { + printf("Calculating model..."); fflush(stdout); + } + + if(!learn_parm->biased_hyperplane) { + model->b=0; + b_calculated=1; + } + + for(ii=0;(i=working2dnum[ii])>=0;ii++) { + if((a_old[i]>0) && (a[i]==0)) { /* remove from model */ + pos=model->index[i]; + model->index[i]=-1; + (model->sv_num)--; + model->supvec[pos]=model->supvec[model->sv_num]; + model->alpha[pos]=model->alpha[model->sv_num]; + model->index[(model->supvec[pos])->docnum]=pos; + } + else if((a_old[i]==0) && (a[i]>0)) { /* add to model */ + model->supvec[model->sv_num]=docs[i]; + model->alpha[model->sv_num]=a[i]*(double)label[i]; + model->index[i]=model->sv_num; + (model->sv_num)++; + } + else if(a_old[i]==a[i]) { /* nothing to do */ + } + else { /* just update alpha */ + model->alpha[model->index[i]]=a[i]*(double)label[i]; + } + + ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; + if(!learn_parm->sharedslack) { + if((a_old[i]>=ex_c) && (a[i]at_upper_bound)--; + } + else if((a_old[i]=ex_c)) { + (model->at_upper_bound)++; + } + } + + if((!b_calculated) + && (a[i]>learn_parm->epsilon_a) && (a[i]b=((double)label[i]*learn_parm->eps-c[i]+lin[i]); + /* model->b=(-(double)label[i]+lin[i]); */ + b_calculated=1; + } + } + + /* No alpha in the working set not at bounds, so b was not + calculated in the usual way. The following handles this special + case. */ + if(learn_parm->biased_hyperplane + && (!b_calculated) + && (model->sv_num-1 == model->at_upper_bound)) { + first_low=1; + first_high=1; + b_low=0; + b_high=0; + for(ii=0;(i=active2dnum[ii])>=0;ii++) { + ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; + if(a[i]0) { + b_temp=-(learn_parm->eps-c[i]+lin[i]); + if((b_temp>b_low) || (first_low)) { + b_low=b_temp; + first_low=0; + } + } + else { + b_temp=-(-learn_parm->eps-c[i]+lin[i]); + if((b_tempeps-c[i]+lin[i]); + if((b_temp>b_low) || (first_low)) { + b_low=b_temp; + first_low=0; + } + } + else { + b_temp=-(learn_parm->eps-c[i]+lin[i]); + if((b_tempb=-b_low; + } + else if(first_low) { + model->b=-b_high; + } + else { + model->b=-(b_high+b_low)/2.0; /* select b as the middle of range */ + /* printf("\nb_low=%f, b_high=%f,b=%f\n",b_low,b_high,model->b); */ + } + } + + if(verbosity>=3) { + printf("done\n"); fflush(stdout); + } + + return(model->sv_num-1); /* have to substract one, since element 0 is empty*/ +} + +long check_optimality(MODEL *model, long int *label, long int *unlabeled, + double *a, double *lin, double *c, long int totdoc, + LEARN_PARM *learn_parm, double *maxdiff, + double epsilon_crit_org, long int *misclassified, + long int *inconsistent, long int *active2dnum, + long int *last_suboptimal_at, + long int iteration, KERNEL_PARM *kernel_parm) + /* Check KT-conditions */ +{ + long i,ii,retrain; + double dist,ex_c,target; + + if(kernel_parm->kernel_type == LINEAR) { /* be optimistic */ + learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org; + } + else { /* be conservative */ + learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3; + } + retrain=0; + (*maxdiff)=0; + (*misclassified)=0; + for(ii=0;(i=active2dnum[ii])>=0;ii++) { + if((!inconsistent[i]) && label[i]) { + dist=(lin[i]-model->b)*(double)label[i];/* 'distance' from + hyperplane*/ + target=-(learn_parm->eps-(double)label[i]*c[i]); + ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; + if(dist <= 0) { + (*misclassified)++; /* does not work due to deactivation of var */ + } + if((a[i]>learn_parm->epsilon_a) && (dist > target)) { + if((dist-target)>(*maxdiff)) /* largest violation */ + (*maxdiff)=dist-target; + } + else if((a[i](*maxdiff)) /* largest violation */ + (*maxdiff)=target-dist; + } + /* Count how long a variable was at lower/upper bound (and optimal).*/ + /* Variables, which were at the bound and optimal for a long */ + /* time are unlikely to become support vectors. In case our */ + /* cache is filled up, those variables are excluded to save */ + /* kernel evaluations. (See chapter 'Shrinking').*/ + if((a[i]>(learn_parm->epsilon_a)) + && (a[i]epsilon_a)) + && (dist < (target+learn_parm->epsilon_shrink))) { + last_suboptimal_at[i]=iteration; /* not likely optimal */ + } + else if((a[i]>=ex_c) + && (dist > (target-learn_parm->epsilon_shrink))) { + last_suboptimal_at[i]=iteration; /* not likely optimal */ + } + } + } + /* termination criterion */ + if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) { + retrain=1; + } + return(retrain); +} + +long check_optimality_sharedslack(DOC **docs, MODEL *model, long int *label, + double *a, double *lin, double *c, double *slack, + double *alphaslack, + long int totdoc, + LEARN_PARM *learn_parm, double *maxdiff, + double epsilon_crit_org, long int *misclassified, + long int *active2dnum, + long int *last_suboptimal_at, + long int iteration, KERNEL_PARM *kernel_parm) + /* Check KT-conditions */ +{ + long i,ii,retrain; + double dist,dist_noslack,ex_c=0,target; + + if(kernel_parm->kernel_type == LINEAR) { /* be optimistic */ + learn_parm->epsilon_shrink=-learn_parm->epsilon_crit/2.0; + } + else { /* be conservative */ + learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3; + } + + retrain=0; + (*maxdiff)=0; + (*misclassified)=0; + for(ii=0;(i=active2dnum[ii])>=0;ii++) { + /* 'distance' from hyperplane*/ + dist_noslack=(lin[i]-model->b)*(double)label[i]; + dist=dist_noslack+slack[docs[i]->slackid]; + target=-(learn_parm->eps-(double)label[i]*c[i]); + ex_c=learn_parm->svm_c-learn_parm->epsilon_a; + if((a[i]>learn_parm->epsilon_a) && (dist > target)) { + if((dist-target)>(*maxdiff)) { /* largest violation */ + (*maxdiff)=dist-target; + if(verbosity>=5) printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, alphaslack=%f\n",docs[i]->slackid,dist,target,slack[docs[i]->slackid],a[i],alphaslack[docs[i]->slackid]); + if(verbosity>=5) printf(" (single %f)\n",(*maxdiff)); + } + } + if((alphaslack[docs[i]->slackid]slackid]>0)) { + if((slack[docs[i]->slackid])>(*maxdiff)) { /* largest violation */ + (*maxdiff)=slack[docs[i]->slackid]; + if(verbosity>=5) printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, alphaslack=%f\n",docs[i]->slackid,dist,target,slack[docs[i]->slackid],a[i],alphaslack[docs[i]->slackid]); + if(verbosity>=5) printf(" (joint %f)\n",(*maxdiff)); + } + } + /* Count how long a variable was at lower/upper bound (and optimal).*/ + /* Variables, which were at the bound and optimal for a long */ + /* time are unlikely to become support vectors. In case our */ + /* cache is filled up, those variables are excluded to save */ + /* kernel evaluations. (See chapter 'Shrinking').*/ + if((a[i]<=learn_parm->epsilon_a) && (dist < (target+learn_parm->epsilon_shrink))) { + last_suboptimal_at[i]=iteration; /* not likely optimal */ + } + else if((alphaslack[docs[i]->slackid]learn_parm->epsilon_a) && (fabs(dist_noslack - target) > -learn_parm->epsilon_shrink)) { + last_suboptimal_at[i]=iteration; /* not at lower bound */ + } + else if((alphaslack[docs[i]->slackid]>=ex_c) && (a[i]>learn_parm->epsilon_a) && (fabs(target-dist) > -learn_parm->epsilon_shrink)) { + last_suboptimal_at[i]=iteration; /* not likely optimal */ + } + } + /* termination criterion */ + if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) { + retrain=1; + } + return(retrain); +} + +void compute_shared_slacks(DOC **docs, long int *label, + double *a, double *lin, + double *c, long int *active2dnum, + LEARN_PARM *learn_parm, + double *slack, double *alphaslack) + /* compute the value of shared slacks and the joint alphas */ +{ + long jj,i; + double dist,target; + + for(jj=0;(i=active2dnum[jj])>=0;jj++) { /* clear slack variables */ + slack[docs[i]->slackid]=0.0; + /* alphaslack[docs[i]->slackid]=0.0; */ + } + for(jj=0;(i=active2dnum[jj])>=0;jj++) { /* recompute slack variables */ + dist=(lin[i])*(double)label[i]; + target=-(learn_parm->eps-(double)label[i]*c[i]); + if((target-dist) > slack[docs[i]->slackid]) + slack[docs[i]->slackid]=target-dist; + /* alphaslack[docs[i]->slackid]+=a[i]; */ + } +} + + +long identify_inconsistent(double *a, long int *label, + long int *unlabeled, long int totdoc, + LEARN_PARM *learn_parm, + long int *inconsistentnum, long int *inconsistent) +{ + long i,retrain; + + /* Throw out examples with multipliers at upper bound. This */ + /* corresponds to the -i 1 option. */ + /* ATTENTION: this is just a heuristic for finding a close */ + /* to minimum number of examples to exclude to */ + /* make the problem separable with desired margin */ + retrain=0; + for(i=0;i=(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) { + (*inconsistentnum)++; + inconsistent[i]=1; /* never choose again */ + retrain=2; /* start over */ + if(verbosity>=3) { + printf("inconsistent(%ld)..",i); fflush(stdout); + } + } + } + return(retrain); +} + +long identify_misclassified(double *lin, long int *label, + long int *unlabeled, long int totdoc, + MODEL *model, long int *inconsistentnum, + long int *inconsistent) +{ + long i,retrain; + double dist; + + /* Throw out misclassified examples. This */ + /* corresponds to the -i 2 option. */ + /* ATTENTION: this is just a heuristic for finding a close */ + /* to minimum number of examples to exclude to */ + /* make the problem separable with desired margin */ + retrain=0; + for(i=0;ib)*(double)label[i]; /* 'distance' from hyperplane*/ + if((!inconsistent[i]) && (!unlabeled[i]) && (dist <= 0)) { + (*inconsistentnum)++; + inconsistent[i]=1; /* never choose again */ + retrain=2; /* start over */ + if(verbosity>=3) { + printf("inconsistent(%ld)..",i); fflush(stdout); + } + } + } + return(retrain); +} + +long identify_one_misclassified(double *lin, long int *label, + long int *unlabeled, + long int totdoc, MODEL *model, + long int *inconsistentnum, + long int *inconsistent) +{ + long i,retrain,maxex=-1; + double dist,maxdist=0; + + /* Throw out the 'most misclassified' example. This */ + /* corresponds to the -i 3 option. */ + /* ATTENTION: this is just a heuristic for finding a close */ + /* to minimum number of examples to exclude to */ + /* make the problem separable with desired margin */ + retrain=0; + for(i=0;ib)*(double)label[i];/* 'distance' from hyperplane*/ + if(dist=0) { + (*inconsistentnum)++; + inconsistent[maxex]=1; /* never choose again */ + retrain=2; /* start over */ + if(verbosity>=3) { + printf("inconsistent(%ld)..",i); fflush(stdout); + } + } + return(retrain); +} + +void update_linear_component(DOC **docs, long int *label, + long int *active2dnum, double *a, + double *a_old, long int *working2dnum, + long int totdoc, long int totwords, + KERNEL_PARM *kernel_parm, + KERNEL_CACHE *kernel_cache, + double *lin, CFLOAT *aicache, double *weights) + /* keep track of the linear component */ + /* lin of the gradient etc. by updating */ + /* based on the change of the variables */ + /* in the current working set */ + /* WARNING: Assumes that array of weights is initialized to all zero + values for linear kernel! */ +{ + register long i,ii,j,jj; + register double tec; + SVECTOR *f; + + if(kernel_parm->kernel_type==0) { /* special linear case */ + /* clear_vector_n(weights,totwords); */ + for(ii=0;(i=working2dnum[ii])>=0;ii++) { + if(a[i] != a_old[i]) { + for(f=docs[i]->fvec;f;f=f->next) + add_vector_ns(weights,f, + f->factor*((a[i]-a_old[i])*(double)label[i])); + } + } + for(jj=0;(j=active2dnum[jj])>=0;jj++) { + for(f=docs[j]->fvec;f;f=f->next) + lin[j]+=f->factor*sprod_ns(weights,f); + } + for(ii=0;(i=working2dnum[ii])>=0;ii++) { + if(a[i] != a_old[i]) { + for(f=docs[i]->fvec;f;f=f->next) + mult_vector_ns(weights,f,0.0); /* Set weights back to zero. */ + } /* This is faster than init */ + } /* weights to zero in each iter. */ + } + else { /* general case */ + for(jj=0;(i=working2dnum[jj])>=0;jj++) { + if(a[i] != a_old[i]) { + get_kernel_row(kernel_cache,docs,i,totdoc,active2dnum,aicache, + kernel_parm); + for(ii=0;(j=active2dnum[ii])>=0;ii++) { + tec=aicache[j]; + lin[j]+=(((a[i]*tec)-(a_old[i]*tec))*(double)label[i]); + } + } + } + } +} + + +long incorporate_unlabeled_examples(MODEL *model, long int *label, + long int *inconsistent, + long int *unlabeled, + double *a, double *lin, + long int totdoc, double *selcrit, + long int *select, long int *key, + long int transductcycle, + KERNEL_PARM *kernel_parm, + LEARN_PARM *learn_parm) +{ + long i,j,k,j1,j2,j3,j4,unsupaddnum1=0,unsupaddnum2=0; + long pos,neg,upos,uneg,orgpos,orgneg,nolabel,newpos,newneg,allunlab; + double dist,model_length,posratio,negratio; + long check_every=2; + double loss; + static double switchsens=0.0,switchsensorg=0.0; + double umin,umax,sumalpha; + long imin=0,imax=0; + static long switchnum=0; + + switchsens/=1.2; + + /* assumes that lin[] is up to date -> no inactive vars */ + + orgpos=0; + orgneg=0; + newpos=0; + newneg=0; + nolabel=0; + allunlab=0; + for(i=0;i 0) { + orgpos++; + } + else { + orgneg++; + } + } + else { + allunlab++; + if(unlabeled[i]) { + if(label[i] > 0) { + newpos++; + } + else if(label[i] < 0) { + newneg++; + } + } + } + if(label[i]==0) { + nolabel++; + } + } + + if(learn_parm->transduction_posratio >= 0) { + posratio=learn_parm->transduction_posratio; + } + else { + posratio=(double)orgpos/(double)(orgpos+orgneg); /* use ratio of pos/neg */ + } /* in training data */ + negratio=1.0-posratio; + + learn_parm->svm_costratio=1.0; /* global */ + if(posratio>0) { + learn_parm->svm_costratio_unlab=negratio/posratio; + } + else { + learn_parm->svm_costratio_unlab=1.0; + } + + pos=0; + neg=0; + upos=0; + uneg=0; + for(i=0;ib); /* 'distance' from hyperplane*/ + if(dist>0) { + pos++; + } + else { + neg++; + } + if(unlabeled[i]) { + if(dist>0) { + upos++; + } + else { + uneg++; + } + } + if((!unlabeled[i]) && (a[i]>(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) { + /* printf("Ubounded %ld (class %ld, unlabeled %ld)\n",i,label[i],unlabeled[i]); */ + } + } + if(verbosity>=2) { + printf("POS=%ld, ORGPOS=%ld, ORGNEG=%ld\n",pos,orgpos,orgneg); + printf("POS=%ld, NEWPOS=%ld, NEWNEG=%ld\n",pos,newpos,newneg); + printf("pos ratio = %f (%f).\n",(double)(upos)/(double)(allunlab),posratio); + fflush(stdout); + } + + if(transductcycle == 0) { + j1=0; + j2=0; + j4=0; + for(i=0;ib); /* 'distance' from hyperplane*/ + if((label[i]==0) && (unlabeled[i])) { + selcrit[j4]=dist; + key[j4]=i; + j4++; + } + } + unsupaddnum1=0; + unsupaddnum2=0; + select_top_n(selcrit,j4,select,(long)(allunlab*posratio+0.5)); + for(k=0;(k<(long)(allunlab*posratio+0.5));k++) { + i=key[select[k]]; + label[i]=1; + unsupaddnum1++; + j1++; + } + for(i=0;isvm_cost[i]=learn_parm->svm_c* + learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound; + } + else if(label[i] == -1) { + learn_parm->svm_cost[i]=learn_parm->svm_c* + learn_parm->svm_unlabbound; + } + } + } + if(verbosity>=1) { + /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n", + learn_parm->svm_costratio,learn_parm->svm_costratio_unlab, + learn_parm->svm_unlabbound); */ + printf("Classifying unlabeled data as %ld POS / %ld NEG.\n", + unsupaddnum1,unsupaddnum2); + fflush(stdout); + } + if(verbosity >= 1) + printf("Retraining."); + if(verbosity >= 2) printf("\n"); + return((long)3); + } + if((transductcycle % check_every) == 0) { + if(verbosity >= 1) + printf("Retraining."); + if(verbosity >= 2) printf("\n"); + j1=0; + j2=0; + unsupaddnum1=0; + unsupaddnum2=0; + for(i=0;isvm_cost[i]=learn_parm->svm_c* + learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound; + } + else if(label[i] == -1) { + learn_parm->svm_cost[i]=learn_parm->svm_c* + learn_parm->svm_unlabbound; + } + } + } + + if(verbosity>=2) { + /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n", + learn_parm->svm_costratio,learn_parm->svm_costratio_unlab, + learn_parm->svm_unlabbound); */ + printf("%ld positive -> Added %ld POS / %ld NEG unlabeled examples.\n", + upos,unsupaddnum1,unsupaddnum2); + fflush(stdout); + } + + if(learn_parm->svm_unlabbound == 1) { + learn_parm->epsilon_crit=0.001; /* do the last run right */ + } + else { + learn_parm->epsilon_crit=0.01; /* otherwise, no need to be so picky */ + } + + return((long)3); + } + else if(((transductcycle % check_every) < check_every)) { + model_length=0; + sumalpha=0; + loss=0; + for(i=0;ib); /* 'distance' from hyperplane*/ + if((label[i]*dist)<(1.0-learn_parm->epsilon_crit)) { + loss+=(1.0-(label[i]*dist))*learn_parm->svm_cost[i]; + } + } + model_length=sqrt(model_length); + if(verbosity>=2) { + printf("Model-length = %f (%f), loss = %f, objective = %f\n", + model_length,sumalpha,loss,loss+0.5*model_length*model_length); + fflush(stdout); + } + j1=0; + j2=0; + j3=0; + j4=0; + unsupaddnum1=0; + unsupaddnum2=0; + umin=99999; + umax=-99999; + j4=1; + while(j4) { + umin=99999; + umax=-99999; + for(i=0;(ib); + if((label[i]>0) && (unlabeled[i]) && (!inconsistent[i]) + && (distumax)) { + umax=dist; + imax=i; + } + } + if((umin < (umax+switchsens-1E-4))) { + j1++; + j2++; + unsupaddnum1++; + unlabeled[imin]=3; + inconsistent[imin]=1; + unsupaddnum2++; + unlabeled[imax]=2; + inconsistent[imax]=1; + } + else + j4=0; + j4=0; + } + for(j=0;(j0) { + unlabeled[j]=2; + } + else if(label[j]<0) { + unlabeled[j]=3; + } + /* inconsistent[j]=1; */ + j3++; + } + } + switchnum+=unsupaddnum1+unsupaddnum2; + + /* stop and print out current margin + printf("switchnum %ld %ld\n",switchnum,kernel_parm->poly_degree); + if(switchnum == 2*kernel_parm->poly_degree) { + learn_parm->svm_unlabbound=1; + } + */ + + if((!unsupaddnum1) && (!unsupaddnum2)) { + if((learn_parm->svm_unlabbound>=1) && ((newpos+newneg) == allunlab)) { + for(j=0;(jpredfile,model,lin,a,unlabeled,label, + totdoc,learn_parm); + if(verbosity>=1) + printf("Number of switches: %ld\n",switchnum); + return((long)0); + } + switchsens=switchsensorg; + learn_parm->svm_unlabbound*=1.5; + if(learn_parm->svm_unlabbound>1) { + learn_parm->svm_unlabbound=1; + } + model->at_upper_bound=0; /* since upper bound increased */ + if(verbosity>=1) + printf("Increasing influence of unlabeled examples to %f%% .", + learn_parm->svm_unlabbound*100.0); + } + else if(verbosity>=1) { + printf("%ld positive -> Switching labels of %ld POS / %ld NEG unlabeled examples.", + upos,unsupaddnum1,unsupaddnum2); + fflush(stdout); + } + + if(verbosity >= 2) printf("\n"); + + learn_parm->epsilon_crit=0.5; /* don't need to be so picky */ + + for(i=0;isvm_cost[i]=learn_parm->svm_c* + learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound; + } + else if(label[i] == -1) { + learn_parm->svm_cost[i]=learn_parm->svm_c* + learn_parm->svm_unlabbound; + } + } + } + + return((long)2); + } + + return((long)0); +} + +/*************************** Working set selection ***************************/ + +long select_next_qp_subproblem_grad(long int *label, + long int *unlabeled, + double *a, double *lin, + double *c, long int totdoc, + long int qp_size, + LEARN_PARM *learn_parm, + long int *inconsistent, + long int *active2dnum, + long int *working2dnum, + double *selcrit, + long int *select, + KERNEL_CACHE *kernel_cache, + long int cache_only, + long int *key, long int *chosen) + /* Use the feasible direction approach to select the next + qp-subproblem (see chapter 'Selecting a good working set'). If + 'cache_only' is true, then the variables are selected only among + those for which the kernel evaluations are cached. */ +{ + long choosenum,i,j,k,activedoc,inum,valid; + double s; + + for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */ + choosenum=0; + activedoc=0; + for(i=0;(j=active2dnum[i])>=0;i++) { + s=-label[j]; + if(kernel_cache && cache_only) + valid=(kernel_cache->index[j]>=0); + else + valid=1; + if(valid + && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) + && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) + && (s>0))) + && (!chosen[j]) + && (label[j]) + && (!inconsistent[j])) + { + selcrit[activedoc]=(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]); + /* selcrit[activedoc]=(double)label[j]*(-1.0+(double)label[j]*lin[j]); */ + key[activedoc]=j; + activedoc++; + } + } + select_top_n(selcrit,activedoc,select,(long)(qp_size/2)); + for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (kbiased_hyperplane || (selcrit[select[k]] > 0)) { */ + i=key[select[k]]; + chosen[i]=1; + working2dnum[inum+choosenum]=i; + choosenum+=1; + if(kernel_cache) + kernel_cache_touch(kernel_cache,i); /* make sure it does not get + kicked out of cache */ + /* } */ + } + + activedoc=0; + for(i=0;(j=active2dnum[i])>=0;i++) { + s=label[j]; + if(kernel_cache && cache_only) + valid=(kernel_cache->index[j]>=0); + else + valid=1; + if(valid + && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) + && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) + && (s>0))) + && (!chosen[j]) + && (label[j]) + && (!inconsistent[j])) + { + selcrit[activedoc]=-(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]); + /* selcrit[activedoc]=-(double)(label[j]*(-1.0+(double)label[j]*lin[j])); */ + key[activedoc]=j; + activedoc++; + } + } + select_top_n(selcrit,activedoc,select,(long)(qp_size/2)); + for(k=0;(choosenumbiased_hyperplane || (selcrit[select[k]] > 0)) { */ + i=key[select[k]]; + chosen[i]=1; + working2dnum[inum+choosenum]=i; + choosenum+=1; + if(kernel_cache) + kernel_cache_touch(kernel_cache,i); /* make sure it does not get + kicked out of cache */ + /* } */ + } + working2dnum[inum+choosenum]=-1; /* complete index */ + return(choosenum); +} + +long select_next_qp_subproblem_rand(long int *label, + long int *unlabeled, + double *a, double *lin, + double *c, long int totdoc, + long int qp_size, + LEARN_PARM *learn_parm, + long int *inconsistent, + long int *active2dnum, + long int *working2dnum, + double *selcrit, + long int *select, + KERNEL_CACHE *kernel_cache, + long int *key, + long int *chosen, + long int iteration) +/* Use the feasible direction approach to select the next + qp-subproblem (see section 'Selecting a good working set'). Chooses + a feasible direction at (pseudo) random to help jump over numerical + problem. */ +{ + long choosenum,i,j,k,activedoc,inum; + double s; + + for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */ + choosenum=0; + activedoc=0; + for(i=0;(j=active2dnum[i])>=0;i++) { + s=-label[j]; + if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) + && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) + && (s>0))) + && (!inconsistent[j]) + && (label[j]) + && (!chosen[j])) { + selcrit[activedoc]=(j+iteration) % totdoc; + key[activedoc]=j; + activedoc++; + } + } + select_top_n(selcrit,activedoc,select,(long)(qp_size/2)); + for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k=0;i++) { + s=label[j]; + if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) + && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) + && (s>0))) + && (!inconsistent[j]) + && (label[j]) + && (!chosen[j])) { + selcrit[activedoc]=(j+iteration) % totdoc; + key[activedoc]=j; + activedoc++; + } + } + select_top_n(selcrit,activedoc,select,(long)(qp_size/2)); + for(k=0;(choosenum=0;ii++) { + ex_c=learn_parm->svm_c-learn_parm->epsilon_a; + if(alphaslack[docs[i]->slackid] >= ex_c) { + dist=(lin[i])*(double)label[i]+slack[docs[i]->slackid]; /* distance */ + target=-(learn_parm->eps-(double)label[i]*c[i]); /* rhs of constraint */ + if((a[i]>learn_parm->epsilon_a) && (dist > target)) { + if((dist-target)>maxdiff) { /* largest violation */ + maxdiff=dist-target; + maxdiffid=docs[i]->slackid; + } + } + } + } + (*maxviol)=maxdiff; + return(maxdiffid); +} + + +void select_top_n(double *selcrit, long int range, long int *select, + long int n) +{ + register long i,j; + + for(i=0;(i=0;j--) { + if((j>0) && (selcrit[select[j-1]]0) { + for(i=n;iselcrit[select[n-1]]) { + for(j=n-1;j>=0;j--) { + if((j>0) && (selcrit[select[j-1]]deactnum=0; + shrink_state->active = (long *)my_malloc(sizeof(long)*totdoc); + shrink_state->inactive_since = (long *)my_malloc(sizeof(long)*totdoc); + shrink_state->a_history = (double **)my_malloc(sizeof(double *)*maxhistory); + shrink_state->maxhistory=maxhistory; + shrink_state->last_lin = (double *)my_malloc(sizeof(double)*totdoc); + shrink_state->last_a = (double *)my_malloc(sizeof(double)*totdoc); + + for(i=0;iactive[i]=1; + shrink_state->inactive_since[i]=0; + shrink_state->last_a[i]=0; + shrink_state->last_lin[i]=0; + } +} + +void shrink_state_cleanup(SHRINK_STATE *shrink_state) +{ + free(shrink_state->active); + free(shrink_state->inactive_since); + if(shrink_state->deactnum > 0) + free(shrink_state->a_history[shrink_state->deactnum-1]); + free(shrink_state->a_history); + free(shrink_state->last_a); + free(shrink_state->last_lin); +} + +long shrink_problem(DOC **docs, + LEARN_PARM *learn_parm, + SHRINK_STATE *shrink_state, + KERNEL_PARM *kernel_parm, + long int *active2dnum, + long int *last_suboptimal_at, + long int iteration, + long int totdoc, + long int minshrink, + double *a, + long int *inconsistent) + /* Shrink some variables away. Do the shrinking only if at least + minshrink variables can be removed. */ +{ + long i,ii,change,activenum,lastiter; + double *a_old; + + activenum=0; + change=0; + for(ii=0;active2dnum[ii]>=0;ii++) { + i=active2dnum[ii]; + activenum++; + if(0 && learn_parm->sharedslack) + lastiter=last_suboptimal_at[docs[i]->slackid]; + else + lastiter=last_suboptimal_at[i]; + if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink) + || (inconsistent[i])) { + change++; + } + } + if((change>=minshrink) /* shrink only if sufficiently many candidates */ + && (shrink_state->deactnummaxhistory)) { /* and enough memory */ + /* Shrink problem by removing those variables which are */ + /* optimal at a bound for a minimum number of iterations */ + if(verbosity>=2) { + printf(" Shrinking..."); fflush(stdout); + } + if(kernel_parm->kernel_type != LINEAR) { /* non-linear case save alphas */ + a_old=(double *)my_malloc(sizeof(double)*totdoc); + shrink_state->a_history[shrink_state->deactnum]=a_old; + for(i=0;i=0;ii++) { + i=active2dnum[ii]; + if(0 && learn_parm->sharedslack) + lastiter=last_suboptimal_at[docs[i]->slackid]; + else + lastiter=last_suboptimal_at[i]; + if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink) + || (inconsistent[i])) { + shrink_state->active[i]=0; + shrink_state->inactive_since[i]=shrink_state->deactnum; + } + } + activenum=compute_index(shrink_state->active,totdoc,active2dnum); + shrink_state->deactnum++; + if(kernel_parm->kernel_type == LINEAR) { + shrink_state->deactnum=0; + } + if(verbosity>=2) { + printf("done.\n"); fflush(stdout); + printf(" Number of inactive variables = %ld\n",totdoc-activenum); + } + } + return(activenum); +} + + +void reactivate_inactive_examples(long int *label, + long int *unlabeled, + double *a, + SHRINK_STATE *shrink_state, + double *lin, + double *c, + long int totdoc, + long int totwords, + long int iteration, + LEARN_PARM *learn_parm, + long int *inconsistent, + DOC **docs, + KERNEL_PARM *kernel_parm, + KERNEL_CACHE *kernel_cache, + MODEL *model, + CFLOAT *aicache, + double *weights, + double *maxdiff) + /* Make all variables active again which had been removed by + shrinking. */ + /* Computes lin for those variables from scratch. */ + /* WARNING: Assumes that array of weights is initialized to all zero + values for linear kernel! */ +{ + register long i,j,ii,jj,t,*changed2dnum,*inactive2dnum; + long *changed,*inactive; + register double kernel_val,*a_old,dist; + double ex_c,target; + SVECTOR *f; + + if(kernel_parm->kernel_type == LINEAR) { /* special linear case */ + /* clear_vector_n(weights,totwords); set weights to zero */ + a_old=shrink_state->last_a; + for(i=0;ifvec;f;f=f->next) + add_vector_ns(weights,f, + f->factor*((a[i]-a_old[i])*(double)label[i])); + a_old[i]=a[i]; + } + } + for(i=0;iactive[i]) { + for(f=docs[i]->fvec;f;f=f->next) + lin[i]=shrink_state->last_lin[i]+f->factor*sprod_ns(weights,f); + } + shrink_state->last_lin[i]=lin[i]; + } + for(i=0;ifvec;f;f=f->next) + mult_vector_ns(weights,f,0.0); /* set weights back to zero */ + } + } + else { + changed=(long *)my_malloc(sizeof(long)*totdoc); + changed2dnum=(long *)my_malloc(sizeof(long)*(totdoc+11)); + inactive=(long *)my_malloc(sizeof(long)*totdoc); + inactive2dnum=(long *)my_malloc(sizeof(long)*(totdoc+11)); + for(t=shrink_state->deactnum-1;(t>=0) && shrink_state->a_history[t];t--) { + if(verbosity>=2) { + printf("%ld..",t); fflush(stdout); + } + a_old=shrink_state->a_history[t]; + for(i=0;iactive[i]) + && (shrink_state->inactive_since[i] == t)); + changed[i]= (a[i] != a_old[i]); + } + compute_index(inactive,totdoc,inactive2dnum); + compute_index(changed,totdoc,changed2dnum); + + for(ii=0;(i=changed2dnum[ii])>=0;ii++) { + get_kernel_row(kernel_cache,docs,i,totdoc,inactive2dnum,aicache, + kernel_parm); + for(jj=0;(j=inactive2dnum[jj])>=0;jj++) { + kernel_val=aicache[j]; + lin[j]+=(((a[i]*kernel_val)-(a_old[i]*kernel_val))*(double)label[i]); + } + } + } + free(changed); + free(changed2dnum); + free(inactive); + free(inactive2dnum); + } + (*maxdiff)=0; + for(i=0;iinactive_since[i]=shrink_state->deactnum-1; + if(!inconsistent[i]) { + dist=(lin[i]-model->b)*(double)label[i]; + target=-(learn_parm->eps-(double)label[i]*c[i]); + ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; + if((a[i]>learn_parm->epsilon_a) && (dist > target)) { + if((dist-target)>(*maxdiff)) /* largest violation */ + (*maxdiff)=dist-target; + } + else if((a[i](*maxdiff)) /* largest violation */ + (*maxdiff)=target-dist; + } + if((a[i]>(0+learn_parm->epsilon_a)) + && (a[i]active[i]=1; /* not at bound */ + } + else if((a[i]<=(0+learn_parm->epsilon_a)) && (dist < (target+learn_parm->epsilon_shrink))) { + shrink_state->active[i]=1; + } + else if((a[i]>=ex_c) + && (dist > (target-learn_parm->epsilon_shrink))) { + shrink_state->active[i]=1; + } + else if(learn_parm->sharedslack) { /* make all active when sharedslack */ + shrink_state->active[i]=1; + } + } + } + if(kernel_parm->kernel_type != LINEAR) { /* update history for non-linear */ + for(i=0;ia_history[shrink_state->deactnum-1])[i]=a[i]; + } + for(t=shrink_state->deactnum-2;(t>=0) && shrink_state->a_history[t];t--) { + free(shrink_state->a_history[t]); + shrink_state->a_history[t]=0; + } + } +} + +/****************************** Cache handling *******************************/ + +void get_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs, + long int docnum, long int totdoc, + long int *active2dnum, CFLOAT *buffer, + KERNEL_PARM *kernel_parm) + /* Get's a row of the matrix of kernel values This matrix has the + same form as the Hessian, just that the elements are not + multiplied by */ + /* y_i * y_j * a_i * a_j */ + /* Takes the values from the cache if available. */ +{ + register long i,j,start; + DOC *ex; + + ex=docs[docnum]; + + if(kernel_cache && (kernel_cache->index[docnum] != -1)) {/* row is cached? */ + kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time;/* lru */ + start=kernel_cache->activenum*kernel_cache->index[docnum]; + for(i=0;(j=active2dnum[i])>=0;i++) { + if(kernel_cache->totdoc2active[j] >= 0) { /* column is cached? */ + buffer[j]=kernel_cache->buffer[start+kernel_cache->totdoc2active[j]]; + } + else { + buffer[j]=(CFLOAT)kernel(kernel_parm,ex,docs[j]); + } + } + } + else { + for(i=0;(j=active2dnum[i])>=0;i++) { + buffer[j]=(CFLOAT)kernel(kernel_parm,ex,docs[j]); + } + } +} + + +void cache_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs, + long int m, KERNEL_PARM *kernel_parm) + /* Fills cache for the row m */ +{ + register DOC *ex; + register long j,k,l; + register CFLOAT *cache; + + if(!kernel_cache_check(kernel_cache,m)) { /* not cached yet*/ + cache = kernel_cache_clean_and_malloc(kernel_cache,m); + if(cache) { + l=kernel_cache->totdoc2active[m]; + ex=docs[m]; + for(j=0;jactivenum;j++) { /* fill cache */ + k=kernel_cache->active2totdoc[j]; + if((kernel_cache->index[k] != -1) && (l != -1) && (k != m)) { + cache[j]=kernel_cache->buffer[kernel_cache->activenum + *kernel_cache->index[k]+l]; + } + else { + cache[j]=kernel(kernel_parm,ex,docs[k]); + } + } + } + else { + perror("Error: Kernel cache full! => increase cache size"); + } + } +} + + +void cache_multiple_kernel_rows(KERNEL_CACHE *kernel_cache, DOC **docs, + long int *key, long int varnum, + KERNEL_PARM *kernel_parm) + /* Fills cache for the rows in key */ +{ + register long i; + + for(i=0;i=2) { + printf(" Reorganizing cache..."); fflush(stdout); + } + + keep=(long *)my_malloc(sizeof(long)*totdoc); + for(j=0;jactivenum) && (scountactive2totdoc[jj]; + if(!after[j]) { + scount++; + keep[j]=0; + } + } + + for(i=0;imax_elems;i++) { + for(jj=0;jjactivenum;jj++) { + j=kernel_cache->active2totdoc[jj]; + if(!keep[j]) { + from++; + } + else { + kernel_cache->buffer[to]=kernel_cache->buffer[from]; + to++; + from++; + } + } + } + + kernel_cache->activenum=0; + for(j=0;jtotdoc2active[j] != -1)) { + kernel_cache->active2totdoc[kernel_cache->activenum]=j; + kernel_cache->totdoc2active[j]=kernel_cache->activenum; + kernel_cache->activenum++; + } + else { + kernel_cache->totdoc2active[j]=-1; + } + } + + kernel_cache->max_elems=(long)(kernel_cache->buffsize/kernel_cache->activenum); + if(kernel_cache->max_elems>totdoc) { + kernel_cache->max_elems=totdoc; + } + + free(keep); + + if(verbosity>=2) { + printf("done.\n"); fflush(stdout); + printf(" Cache-size in rows = %ld\n",kernel_cache->max_elems); + } +} + +KERNEL_CACHE *kernel_cache_init(long int totdoc, long int buffsize) +{ + long i; + KERNEL_CACHE *kernel_cache; + + kernel_cache=(KERNEL_CACHE *)my_malloc(sizeof(KERNEL_CACHE)); + kernel_cache->index = (long *)my_malloc(sizeof(long)*totdoc); + kernel_cache->occu = (long *)my_malloc(sizeof(long)*totdoc); + kernel_cache->lru = (long *)my_malloc(sizeof(long)*totdoc); + kernel_cache->invindex = (long *)my_malloc(sizeof(long)*totdoc); + kernel_cache->active2totdoc = (long *)my_malloc(sizeof(long)*totdoc); + kernel_cache->totdoc2active = (long *)my_malloc(sizeof(long)*totdoc); + kernel_cache->buffer = (CFLOAT *)my_malloc((size_t)(buffsize)*1024*1024); + + kernel_cache->buffsize=(long)(buffsize/sizeof(CFLOAT)*1024*1024); + + kernel_cache->max_elems=(long)(kernel_cache->buffsize/totdoc); + if(kernel_cache->max_elems>totdoc) { + kernel_cache->max_elems=totdoc; + } + + if(verbosity>=2) { + printf(" Cache-size in rows = %ld\n",kernel_cache->max_elems); + printf(" Kernel evals so far: %ld\n",kernel_cache_statistic); + } + + kernel_cache->elems=0; /* initialize cache */ + for(i=0;iindex[i]=-1; + kernel_cache->lru[i]=0; + } + for(i=0;ioccu[i]=0; + kernel_cache->invindex[i]=-1; + } + + kernel_cache->activenum=totdoc;; + for(i=0;iactive2totdoc[i]=i; + kernel_cache->totdoc2active[i]=i; + } + + kernel_cache->time=0; + + return(kernel_cache); +} + +void kernel_cache_reset_lru(KERNEL_CACHE *kernel_cache) +{ + long maxlru=0,k; + + for(k=0;kmax_elems;k++) { + if(maxlru < kernel_cache->lru[k]) + maxlru=kernel_cache->lru[k]; + } + for(k=0;kmax_elems;k++) { + kernel_cache->lru[k]-=maxlru; + } +} + +void kernel_cache_cleanup(KERNEL_CACHE *kernel_cache) +{ + free(kernel_cache->index); + free(kernel_cache->occu); + free(kernel_cache->lru); + free(kernel_cache->invindex); + free(kernel_cache->active2totdoc); + free(kernel_cache->totdoc2active); + free(kernel_cache->buffer); + free(kernel_cache); +} + +long kernel_cache_malloc(KERNEL_CACHE *kernel_cache) +{ + long i; + + if(kernel_cache_space_available(kernel_cache)) { + for(i=0;imax_elems;i++) { + if(!kernel_cache->occu[i]) { + kernel_cache->occu[i]=1; + kernel_cache->elems++; + return(i); + } + } + } + return(-1); +} + +void kernel_cache_free(KERNEL_CACHE *kernel_cache, long int i) +{ + kernel_cache->occu[i]=0; + kernel_cache->elems--; +} + +long kernel_cache_free_lru(KERNEL_CACHE *kernel_cache) + /* remove least recently used cache element */ +{ + register long k,least_elem=-1,least_time; + + least_time=kernel_cache->time+1; + for(k=0;kmax_elems;k++) { + if(kernel_cache->invindex[k] != -1) { + if(kernel_cache->lru[k]lru[k]; + least_elem=k; + } + } + } + if(least_elem != -1) { + kernel_cache_free(kernel_cache,least_elem); + kernel_cache->index[kernel_cache->invindex[least_elem]]=-1; + kernel_cache->invindex[least_elem]=-1; + return(1); + } + return(0); +} + + +CFLOAT *kernel_cache_clean_and_malloc(KERNEL_CACHE *kernel_cache, + long int docnum) + /* Get a free cache entry. In case cache is full, the lru element + is removed. */ +{ + long result; + if((result = kernel_cache_malloc(kernel_cache)) == -1) { + if(kernel_cache_free_lru(kernel_cache)) { + result = kernel_cache_malloc(kernel_cache); + } + } + kernel_cache->index[docnum]=result; + if(result == -1) { + return(0); + } + kernel_cache->invindex[result]=docnum; + kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */ + return((CFLOAT *)((long)kernel_cache->buffer + +(kernel_cache->activenum*sizeof(CFLOAT)* + kernel_cache->index[docnum]))); +} + +long kernel_cache_touch(KERNEL_CACHE *kernel_cache, long int docnum) + /* Update lru time to avoid removal from cache. */ +{ + if(kernel_cache && kernel_cache->index[docnum] != -1) { + kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */ + return(1); + } + return(0); +} + +long kernel_cache_check(KERNEL_CACHE *kernel_cache, long int docnum) + /* Is that row cached? */ +{ + return(kernel_cache->index[docnum] != -1); +} + +long kernel_cache_space_available(KERNEL_CACHE *kernel_cache) + /* Is there room for one more row? */ +{ + return(kernel_cache->elems < kernel_cache->max_elems); +} + +/************************** Compute estimates ******************************/ + +void compute_xa_estimates(MODEL *model, long int *label, + long int *unlabeled, long int totdoc, + DOC **docs, double *lin, double *a, + KERNEL_PARM *kernel_parm, + LEARN_PARM *learn_parm, double *error, + double *recall, double *precision) + /* Computes xa-estimate of error rate, recall, and precision. See + T. Joachims, Estimating the Generalization Performance of an SVM + Efficiently, IMCL, 2000. */ +{ + long i,looerror,looposerror,loonegerror; + long totex,totposex; + double xi,r_delta,r_delta_sq,sim=0; + long *sv2dnum=NULL,*sv=NULL,svnum; + + r_delta=estimate_r_delta(docs,totdoc,kernel_parm); + r_delta_sq=r_delta*r_delta; + + looerror=0; + looposerror=0; + loonegerror=0; + totex=0; + totposex=0; + svnum=0; + + if(learn_parm->xa_depth > 0) { + sv = (long *)my_malloc(sizeof(long)*(totdoc+11)); + for(i=0;isv_num;i++) + if(a[model->supvec[i]->docnum] + < (learn_parm->svm_cost[model->supvec[i]->docnum] + -learn_parm->epsilon_a)) { + sv[model->supvec[i]->docnum]=1; + svnum++; + } + sv2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); + clear_index(sv2dnum); + compute_index(sv,totdoc,sv2dnum); + } + + for(i=0;ib)*(double)label[i]); + if(xi<0) xi=0; + if(label[i]>0) { + totposex++; + } + if((learn_parm->rho*a[i]*r_delta_sq+xi) >= 1.0) { + if(learn_parm->xa_depth > 0) { /* makes assumptions */ + sim=distribute_alpha_t_greedily(sv2dnum,svnum,docs,a,i,label, + kernel_parm,learn_parm, + (double)((1.0-xi-a[i]*r_delta_sq)/(2.0*a[i]))); + } + if((learn_parm->xa_depth == 0) || + ((a[i]*kernel(kernel_parm,docs[i],docs[i])+a[i]*2.0*sim+xi) >= 1.0)) { + looerror++; + if(label[i]>0) { + looposerror++; + } + else { + loonegerror++; + } + } + } + totex++; + } + } + + (*error)=((double)looerror/(double)totex)*100.0; + (*recall)=(1.0-(double)looposerror/(double)totposex)*100.0; + (*precision)=(((double)totposex-(double)looposerror) + /((double)totposex-(double)looposerror+(double)loonegerror))*100.0; + + free(sv); + free(sv2dnum); +} + + +double distribute_alpha_t_greedily(long int *sv2dnum, long int svnum, + DOC **docs, double *a, + long int docnum, + long int *label, + KERNEL_PARM *kernel_parm, + LEARN_PARM *learn_parm, double thresh) + /* Experimental Code improving plain XiAlpha Estimates by + computing a better bound using a greedy optimzation strategy. */ +{ + long best_depth=0; + long i,j,k,d,skip,allskip; + double best,best_val[101],val,init_val_sq,init_val_lin; + long best_ex[101]; + CFLOAT *cache,*trow; + + cache=(CFLOAT *)my_malloc(sizeof(CFLOAT)*learn_parm->xa_depth*svnum); + trow = (CFLOAT *)my_malloc(sizeof(CFLOAT)*svnum); + + for(k=0;kxa_depth;d++) { + allskip=1; + if(d>=1) { + init_val_sq+=cache[best_ex[d-1]+svnum*(d-1)]; + for(k=0;kxa_depth; + } + } + + free(cache); + free(trow); + + /* printf("Distribute[%ld](%ld)=%f, ",docnum,best_depth,best); */ + return(best); +} + + +void estimate_transduction_quality(MODEL *model, long int *label, + long int *unlabeled, + long int totdoc, DOC **docs, double *lin) + /* Loo-bound based on observation that loo-errors must have an + equal distribution in both training and test examples, given + that the test examples are classified correctly. Compare + chapter "Constraints on the Transductive Hyperplane" in my + Dissertation. */ +{ + long i,j,l=0,ulab=0,lab=0,labpos=0,labneg=0,ulabpos=0,ulabneg=0,totulab=0; + double totlab=0,totlabpos=0,totlabneg=0,labsum=0,ulabsum=0; + double r_delta,r_delta_sq,xi,xisum=0,asum=0; + + r_delta=estimate_r_delta(docs,totdoc,&(model->kernel_parm)); + r_delta_sq=r_delta*r_delta; + + for(j=0;j 0) + totlabpos++; + else + totlabneg++; + } + } + for(j=1;jsv_num;j++) { + i=model->supvec[j]->docnum; + xi=1.0-((lin[i]-model->b)*(double)label[i]); + if(xi<0) xi=0; + + xisum+=xi; + asum+=fabs(model->alpha[j]); + if(unlabeled[i]) { + ulabsum+=(fabs(model->alpha[j])*r_delta_sq+xi); + } + else { + labsum+=(fabs(model->alpha[j])*r_delta_sq+xi); + } + if((fabs(model->alpha[j])*r_delta_sq+xi) >= 1) { + l++; + if(unlabeled[model->supvec[j]->docnum]) { + ulab++; + if(model->alpha[j] > 0) + ulabpos++; + else + ulabneg++; + } + else { + lab++; + if(model->alpha[j] > 0) + labpos++; + else + labneg++; + } + } + } + printf("xacrit>=1: labeledpos=%.5f labeledneg=%.5f default=%.5f\n",(double)labpos/(double)totlab*100.0,(double)labneg/(double)totlab*100.0,(double)totlabpos/(double)(totlab)*100.0); + printf("xacrit>=1: unlabelpos=%.5f unlabelneg=%.5f\n",(double)ulabpos/(double)totulab*100.0,(double)ulabneg/(double)totulab*100.0); + printf("xacrit>=1: labeled=%.5f unlabled=%.5f all=%.5f\n",(double)lab/(double)totlab*100.0,(double)ulab/(double)totulab*100.0,(double)l/(double)(totdoc)*100.0); + printf("xacritsum: labeled=%.5f unlabled=%.5f all=%.5f\n",(double)labsum/(double)totlab*100.0,(double)ulabsum/(double)totulab*100.0,(double)(labsum+ulabsum)/(double)(totdoc)*100.0); + printf("r_delta_sq=%.5f xisum=%.5f asum=%.5f\n",r_delta_sq,xisum,asum); +} + +double estimate_margin_vcdim(MODEL *model, double w, double R) + /* optional: length of model vector in feature space */ + /* optional: radius of ball containing the data */ +{ + double h; + + /* follows chapter 5.6.4 in [Vapnik/95] */ + + if(w<0) { + w=model_length_s(model); + } + if(R<0) { + R=estimate_sphere(model); + } + h = w*w * R*R +1; + return(h); +} + +double estimate_sphere(MODEL *model) + /* Approximates the radius of the ball containing */ + /* the support vectors by bounding it with the */ +{ /* length of the longest support vector. This is */ + register long j; /* pretty good for text categorization, since all */ + double xlen,maxxlen=0; /* documents have feature vectors of length 1. It */ + DOC *nulldoc; /* assumes that the center of the ball is at the */ + WORD nullword; /* origin of the space. */ + KERNEL_PARM *kernel_parm=&(model->kernel_parm); + + nullword.wnum=0; + nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0)); + + for(j=1;jsv_num;j++) { + xlen=sqrt(kernel(kernel_parm,model->supvec[j],model->supvec[j]) + -2*kernel(kernel_parm,model->supvec[j],nulldoc) + +kernel(kernel_parm,nulldoc,nulldoc)); + if(xlen>maxxlen) { + maxxlen=xlen; + } + } + + free_example(nulldoc,1); + return(maxxlen); +} + +double estimate_r_delta(DOC **docs, long int totdoc, KERNEL_PARM *kernel_parm) +{ + long i; + double maxxlen,xlen; + DOC *nulldoc; /* assumes that the center of the ball is at the */ + WORD nullword; /* origin of the space. */ + + nullword.wnum=0; + nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0)); + + maxxlen=0; + for(i=0;imaxxlen) { + maxxlen=xlen; + } + } + + free_example(nulldoc,1); + return(maxxlen); +} + +double estimate_r_delta_average(DOC **docs, long int totdoc, + KERNEL_PARM *kernel_parm) +{ + long i; + double avgxlen; + DOC *nulldoc; /* assumes that the center of the ball is at the */ + WORD nullword; /* origin of the space. */ + + nullword.wnum=0; + nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0)); + + avgxlen=0; + for(i=0;imaxxlen) { + maxxlen=xlen; + } + } + + return(maxxlen); +} + +/****************************** IO-handling **********************************/ + +void write_prediction(char *predfile, MODEL *model, double *lin, + double *a, long int *unlabeled, + long int *label, long int totdoc, + LEARN_PARM *learn_parm) +{ + FILE *predfl; + long i; + double dist,a_max; + + if(verbosity>=1) { + printf("Writing prediction file..."); fflush(stdout); + } + if ((predfl = fopen (predfile, "w")) == NULL) + { perror (predfile); exit (1); } + a_max=learn_parm->epsilon_a; + for(i=0;ia_max)) { + a_max=a[i]; + } + } + for(i=0;i(learn_parm->epsilon_a))) { + dist=(double)label[i]*(1.0-learn_parm->epsilon_crit-a[i]/(a_max*2.0)); + } + else { + dist=(lin[i]-model->b); + } + if(dist>0) { + fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist); + } + else { + fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist); + } + } + } + fclose(predfl); + if(verbosity>=1) { + printf("done\n"); + } +} + +void write_alphas(char *alphafile, double *a, + long int *label, long int totdoc) +{ + FILE *alphafl; + long i; + + if(verbosity>=1) { + printf("Writing alpha file..."); fflush(stdout); + } + if ((alphafl = fopen (alphafile, "w")) == NULL) + { perror (alphafile); exit (1); } + for(i=0;i=1) { + printf("done\n"); + } +} + diff --git a/src/classifier/svm/svm_light/svm_learn.h b/src/classifier/svm/svm_light/svm_learn.h new file mode 100644 index 0000000..9dc57b4 --- /dev/null +++ b/src/classifier/svm/svm_light/svm_learn.h @@ -0,0 +1,166 @@ +/***********************************************************************/ +/* */ +/* svm_learn.h */ +/* */ +/* Declarations for learning module of Support Vector Machine. */ +/* */ +/* Author: Thorsten Joachims */ +/* Date: 02.07.02 */ +/* */ +/* Copyright (c) 2002 Thorsten Joachims - All rights reserved */ +/* */ +/* This software is available for non-commercial use only. It must */ +/* not be modified and distributed without prior permission of the */ +/* author. The author is not responsible for implications from the */ +/* use of this software. */ +/* */ +/***********************************************************************/ + +#ifndef SVM_LEARN +#define SVM_LEARN + +#ifdef __cplusplus +extern "C" { +#endif +void svm_learn_classification(DOC **, double *, long, long, LEARN_PARM *, + KERNEL_PARM *, KERNEL_CACHE *, MODEL *, double *); +void svm_learn_regression(DOC **, double *, long, long, LEARN_PARM *, + KERNEL_PARM *, KERNEL_CACHE **, MODEL *); +void svm_learn_ranking(DOC **, double *, long, long, LEARN_PARM *, + KERNEL_PARM *, KERNEL_CACHE **, MODEL *); +void svm_learn_optimization(DOC **, double *, long, long, LEARN_PARM *, + KERNEL_PARM *, KERNEL_CACHE *, MODEL *, double *); +long optimize_to_convergence(DOC **, long *, long, long, LEARN_PARM *, + KERNEL_PARM *, KERNEL_CACHE *, SHRINK_STATE *, + MODEL *, long *, long *, double *, double *, + double *, TIMING *, double *, long, long); +long optimize_to_convergence_sharedslack(DOC **, long *, long, long, + LEARN_PARM *, KERNEL_PARM *, + KERNEL_CACHE *, SHRINK_STATE *, + MODEL *, double *, double *, double *, + TIMING *, double *); +double compute_objective_function(double *, double *, double *, double, long *, + long *); +void clear_index(long *); +void add_to_index(long *, long); +long compute_index(long *, long, long *); +void optimize_svm(DOC **, long *, long *, long *, double, long *, long *, + MODEL *, long, long *, long, double *, double *, double *, + LEARN_PARM *, CFLOAT *, KERNEL_PARM *, QP *, double *); +void compute_matrices_for_optimization(DOC **, long *, long *, long *, double, + long *, long *, long *, MODEL *, + double *, double *, double *, long, long, + LEARN_PARM *, CFLOAT *, KERNEL_PARM *, + QP *); +long calculate_svm_model(DOC **, long *, long *, double *, double *, double *, + double *, LEARN_PARM *, long *, long *, MODEL *); +long check_optimality(MODEL *, long *, long *, double *, double *, double *, + long, LEARN_PARM *, double *, double, long *, long *, + long *, long *, long, KERNEL_PARM *); +long check_optimality_sharedslack( + DOC **docs, MODEL *model, long int *label, double *a, double *lin, + double *c, double *slack, double *alphaslack, long int totdoc, + LEARN_PARM *learn_parm, double *maxdiff, double epsilon_crit_org, + long int *misclassified, long int *active2dnum, + long int *last_suboptimal_at, long int iteration, KERNEL_PARM *kernel_parm); +void compute_shared_slacks(DOC **docs, long int *label, double *a, double *lin, + double *c, long int *active2dnum, + LEARN_PARM *learn_parm, double *slack, + double *alphaslack); +long identify_inconsistent(double *, long *, long *, long, LEARN_PARM *, long *, + long *); +long identify_misclassified(double *, long *, long *, long, MODEL *, long *, + long *); +long identify_one_misclassified(double *, long *, long *, long, MODEL *, long *, + long *); +long incorporate_unlabeled_examples(MODEL *, long *, long *, long *, double *, + double *, long, double *, long *, long *, + long, KERNEL_PARM *, LEARN_PARM *); +void update_linear_component(DOC **, long *, long *, double *, double *, long *, + long, long, KERNEL_PARM *, KERNEL_CACHE *, + double *, CFLOAT *, double *); +long select_next_qp_subproblem_grad(long *, long *, double *, double *, + double *, long, long, LEARN_PARM *, long *, + long *, long *, double *, long *, + KERNEL_CACHE *, long, long *, long *); +long select_next_qp_subproblem_rand(long *, long *, double *, double *, + double *, long, long, LEARN_PARM *, long *, + long *, long *, double *, long *, + KERNEL_CACHE *, long *, long *, long); +long select_next_qp_slackset(DOC **docs, long int *label, double *a, + double *lin, double *slack, double *alphaslack, + double *c, LEARN_PARM *learn_parm, + long int *active2dnum, double *maxviol); +void select_top_n(double *, long, long *, long); +void init_shrink_state(SHRINK_STATE *, long, long); +void shrink_state_cleanup(SHRINK_STATE *); +long shrink_problem(DOC **, LEARN_PARM *, SHRINK_STATE *, KERNEL_PARM *, long *, + long *, long, long, long, double *, long *); +void reactivate_inactive_examples(long *, long *, double *, SHRINK_STATE *, + double *, double *, long, long, long, + LEARN_PARM *, long *, DOC **, KERNEL_PARM *, + KERNEL_CACHE *, MODEL *, CFLOAT *, double *, + double *); + +/* cache kernel evalutations to improve speed */ +KERNEL_CACHE *kernel_cache_init(long, long); +void kernel_cache_cleanup(KERNEL_CACHE *); +void get_kernel_row(KERNEL_CACHE *, DOC **, long, long, long *, CFLOAT *, + KERNEL_PARM *); +void cache_kernel_row(KERNEL_CACHE *, DOC **, long, KERNEL_PARM *); +void cache_multiple_kernel_rows(KERNEL_CACHE *, DOC **, long *, long, + KERNEL_PARM *); +void kernel_cache_shrink(KERNEL_CACHE *, long, long, long *); +void kernel_cache_reset_lru(KERNEL_CACHE *); +long kernel_cache_malloc(KERNEL_CACHE *); +void kernel_cache_free(KERNEL_CACHE *, long); +long kernel_cache_free_lru(KERNEL_CACHE *); +CFLOAT *kernel_cache_clean_and_malloc(KERNEL_CACHE *, long); +long kernel_cache_touch(KERNEL_CACHE *, long); +long kernel_cache_check(KERNEL_CACHE *, long); +long kernel_cache_space_available(KERNEL_CACHE *); + +void compute_xa_estimates(MODEL *, long *, long *, long, DOC **, double *, + double *, KERNEL_PARM *, LEARN_PARM *, double *, + double *, double *); +double xa_estimate_error(MODEL *, long *, long *, long, DOC **, double *, + double *, KERNEL_PARM *, LEARN_PARM *); +double xa_estimate_recall(MODEL *, long *, long *, long, DOC **, double *, + double *, KERNEL_PARM *, LEARN_PARM *); +double xa_estimate_precision(MODEL *, long *, long *, long, DOC **, double *, + double *, KERNEL_PARM *, LEARN_PARM *); +void avg_similarity_of_sv_of_one_class(MODEL *, DOC **, double *, long *, + KERNEL_PARM *, double *, double *); +double most_similar_sv_of_same_class(MODEL *, DOC **, double *, long, long *, + KERNEL_PARM *, LEARN_PARM *); +double distribute_alpha_t_greedily(long *, long, DOC **, double *, long, long *, + KERNEL_PARM *, LEARN_PARM *, double); +double distribute_alpha_t_greedily_noindex(MODEL *, DOC **, double *, long, + long *, KERNEL_PARM *, LEARN_PARM *, + double); +void estimate_transduction_quality(MODEL *, long *, long *, long, DOC **, + double *); +double estimate_margin_vcdim(MODEL *, double, double); +double estimate_sphere(MODEL *); +double estimate_r_delta_average(DOC **, long, KERNEL_PARM *); +double estimate_r_delta(DOC **, long, KERNEL_PARM *); +double length_of_longest_document_vector(DOC **, long, KERNEL_PARM *); + +void write_model(char *, MODEL *); +void write_prediction(char *, MODEL *, double *, double *, long *, long *, long, + LEARN_PARM *); +void write_alphas(char *, double *, long *, long); + +typedef struct cache_parm_s { + KERNEL_CACHE *kernel_cache; + CFLOAT *cache; + DOC **docs; + long m; + KERNEL_PARM *kernel_parm; + long offset, stepsize; +} cache_parm_t; + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/classifier/svm/svm_light/svm_learn_main.c b/src/classifier/svm/svm_light/svm_learn_main.c new file mode 100644 index 0000000..ee3974b --- /dev/null +++ b/src/classifier/svm/svm_light/svm_learn_main.c @@ -0,0 +1,303 @@ +/***********************************************************************/ +/* */ +/* svm_learn_main.c */ +/* */ +/* Command line interface to the learning module of the */ +/* Support Vector Machine. */ +/* */ +/* Author: Thorsten Joachims */ +/* Date: 02.07.02 */ +/* */ +/* Copyright (c) 2000 Thorsten Joachims - All rights reserved */ +/* */ +/* This software is available for non-commercial use only. It must */ +/* not be modified and distributed without prior permission of the */ +/* author. The author is not responsible for implications from the */ +/* use of this software. */ +/* */ +/***********************************************************************/ + + +/* if svm-learn is used out of C++, define it as extern "C" */ +#ifdef __cplusplus +extern "C" { +#endif + +# include "svm_common.h" +# include "svm_learn.h" + +#ifdef __cplusplus +} +#endif + +char docfile[200]; /* file with training examples */ +char modelfile[200]; /* file for resulting classifier */ +char restartfile[200]; /* file with initial alphas */ + +void read_input_parameters(int, char **, char *, char *, char *, long *, + LEARN_PARM *, KERNEL_PARM *); +void wait_any_key(); +void print_help(); + + + +int main (int argc, char* argv[]) +{ + DOC **docs; /* training examples */ + long totwords,totdoc,i; + double *target; + double *alpha_in=NULL; + KERNEL_CACHE *kernel_cache; + LEARN_PARM learn_parm; + KERNEL_PARM kernel_parm; + MODEL *model=(MODEL *)my_malloc(sizeof(MODEL)); + + read_input_parameters(argc,argv,docfile,modelfile,restartfile,&verbosity, + &learn_parm,&kernel_parm); + read_documents(docfile,&docs,&target,&totwords,&totdoc); + if(restartfile[0]) alpha_in=read_alphas(restartfile,totdoc); + + if(kernel_parm.kernel_type == LINEAR) { /* don't need the cache */ + kernel_cache=NULL; + } + else { + /* Always get a new kernel cache. It is not possible to use the + same cache for two different training runs */ + kernel_cache=kernel_cache_init(totdoc,learn_parm.kernel_cache_size); + } + + if(learn_parm.type == CLASSIFICATION) { + svm_learn_classification(docs,target,totdoc,totwords,&learn_parm, + &kernel_parm,kernel_cache,model,alpha_in); + } + else if(learn_parm.type == REGRESSION) { + svm_learn_regression(docs,target,totdoc,totwords,&learn_parm, + &kernel_parm,&kernel_cache,model); + } + else if(learn_parm.type == RANKING) { + svm_learn_ranking(docs,target,totdoc,totwords,&learn_parm, + &kernel_parm,&kernel_cache,model); + } + else if(learn_parm.type == OPTIMIZATION) { + svm_learn_optimization(docs,target,totdoc,totwords,&learn_parm, + &kernel_parm,kernel_cache,model,alpha_in); + } + + if(kernel_cache) { + /* Free the memory used for the cache. */ + kernel_cache_cleanup(kernel_cache); + } + + /* Warning: The model contains references to the original data 'docs'. + If you want to free the original data, and only keep the model, you + have to make a deep copy of 'model'. */ + /* deep_copy_of_model=copy_model(model); */ + write_model(modelfile,model); + + free(alpha_in); + free_model(model,0); + for(i=0;ibiased_hyperplane=atol(argv[i]); break; + case 'i': i++; learn_parm->remove_inconsistent=atol(argv[i]); break; + case 'f': i++; learn_parm->skip_final_opt_check=!atol(argv[i]); break; + case 'q': i++; learn_parm->svm_maxqpsize=atol(argv[i]); break; + case 'n': i++; learn_parm->svm_newvarsinqp=atol(argv[i]); break; + case '#': i++; learn_parm->maxiter=atol(argv[i]); break; + case 'h': i++; learn_parm->svm_iter_to_shrink=atol(argv[i]); break; + case 'm': i++; learn_parm->kernel_cache_size=atol(argv[i]); break; + case 'c': i++; learn_parm->svm_c=atof(argv[i]); break; + case 'w': i++; learn_parm->eps=atof(argv[i]); break; + case 'p': i++; learn_parm->transduction_posratio=atof(argv[i]); break; + case 'j': i++; learn_parm->svm_costratio=atof(argv[i]); break; + case 'e': i++; learn_parm->epsilon_crit=atof(argv[i]); break; + case 'o': i++; learn_parm->rho=atof(argv[i]); break; + case 'k': i++; learn_parm->xa_depth=atol(argv[i]); break; + case 'x': i++; learn_parm->compute_loo=atol(argv[i]); break; + case 't': i++; kernel_parm->kernel_type=atol(argv[i]); break; + case 'd': i++; kernel_parm->poly_degree=atol(argv[i]); break; + case 'g': i++; kernel_parm->rbf_gamma=atof(argv[i]); break; + case 's': i++; kernel_parm->coef_lin=atof(argv[i]); break; + case 'r': i++; kernel_parm->coef_const=atof(argv[i]); break; + case 'u': i++; strcpy(kernel_parm->custom,argv[i]); break; + case 'l': i++; strcpy(learn_parm->predfile,argv[i]); break; + case 'a': i++; strcpy(learn_parm->alphafile,argv[i]); break; + case 'y': i++; strcpy(restartfile,argv[i]); break; + default: printf("\nUnrecognized option %s!\n\n",argv[i]); + print_help(); + exit(0); + } + } + if(i>=argc) { + printf("\nNot enough input parameters!\n\n"); + wait_any_key(); + print_help(); + exit(0); + } + strcpy (docfile, argv[i]); + if((i+1)svm_iter_to_shrink == -9999) { + if(kernel_parm->kernel_type == LINEAR) + learn_parm->svm_iter_to_shrink=2; + else + learn_parm->svm_iter_to_shrink=100; + } + if(strcmp(type,"c")==0) { + learn_parm->type=CLASSIFICATION; + } + else if(strcmp(type,"r")==0) { + learn_parm->type=REGRESSION; + } + else if(strcmp(type,"p")==0) { + learn_parm->type=RANKING; + } + else if(strcmp(type,"o")==0) { + learn_parm->type=OPTIMIZATION; + } + else if(strcmp(type,"s")==0) { + learn_parm->type=OPTIMIZATION; + learn_parm->sharedslack=1; + } + else { + printf("\nUnknown type '%s': Valid types are 'c' (classification), 'r' regession, and 'p' preference ranking.\n",type); + wait_any_key(); + print_help(); + exit(0); + } + if (!check_learning_parms(learn_parm, kernel_parm)) { + wait_any_key(); + print_help(); + exit(0); + } +} + +void wait_any_key() +{ + printf("\n(more)\n"); + (void)getc(stdin); +} + +void print_help() +{ + printf("\nSVM-light %s: Support Vector Machine, learning module %s\n",VERSION,VERSION_DATE); + copyright_notice(); + printf(" usage: svm_learn [options] example_file model_file\n\n"); + printf("Arguments:\n"); + printf(" example_file-> file with training data\n"); + printf(" model_file -> file to store learned decision rule in\n"); + + printf("General options:\n"); + printf(" -? -> this help\n"); + printf(" -v [0..3] -> verbosity level (default 1)\n"); + printf("Learning options:\n"); + printf(" -z {c,r,p} -> select between classification (c), regression (r),\n"); + printf(" and preference ranking (p) (default classification)\n"); + printf(" -c float -> C: trade-off between training error\n"); + printf(" and margin (default [avg. x*x]^-1)\n"); + printf(" -w [0..] -> epsilon width of tube for regression\n"); + printf(" (default 0.1)\n"); + printf(" -j float -> Cost: cost-factor, by which training errors on\n"); + printf(" positive examples outweight errors on negative\n"); + printf(" examples (default 1) (see [4])\n"); + printf(" -b [0,1] -> use biased hyperplane (i.e. x*w+b>0) instead\n"); + printf(" of unbiased hyperplane (i.e. x*w>0) (default 1)\n"); + printf(" -i [0,1] -> remove inconsistent training examples\n"); + printf(" and retrain (default 0)\n"); + printf("Performance estimation options:\n"); + printf(" -x [0,1] -> compute leave-one-out estimates (default 0)\n"); + printf(" (see [5])\n"); + printf(" -o ]0..2] -> value of rho for XiAlpha-estimator and for pruning\n"); + printf(" leave-one-out computation (default 1.0) (see [2])\n"); + printf(" -k [0..100] -> search depth for extended XiAlpha-estimator \n"); + printf(" (default 0)\n"); + printf("Transduction options (see [3]):\n"); + printf(" -p [0..1] -> fraction of unlabeled examples to be classified\n"); + printf(" into the positive class (default is the ratio of\n"); + printf(" positive and negative examples in the training data)\n"); + printf("Kernel options:\n"); + printf(" -t int -> type of kernel function:\n"); + printf(" 0: linear (default)\n"); + printf(" 1: polynomial (s a*b+c)^d\n"); + printf(" 2: radial basis function exp(-gamma ||a-b||^2)\n"); + printf(" 3: sigmoid tanh(s a*b + c)\n"); + printf(" 4: user defined kernel from kernel.h\n"); + printf(" -d int -> parameter d in polynomial kernel\n"); + printf(" -g float -> parameter gamma in rbf kernel\n"); + printf(" -s float -> parameter s in sigmoid/poly kernel\n"); + printf(" -r float -> parameter c in sigmoid/poly kernel\n"); + printf(" -u string -> parameter of user defined kernel\n"); + printf("Optimization options (see [1]):\n"); + printf(" -q [2..] -> maximum size of QP-subproblems (default 10)\n"); + printf(" -n [2..q] -> number of new variables entering the working set\n"); + printf(" in each iteration (default n = q). Set n < q to \n"); + printf(" prevent zig-zagging.\n"); + printf(" -m [5..] -> size of cache for kernel evaluations in MB (default 40)\n"); + printf(" The larger the faster...\n"); + printf(" -e float -> eps: Allow that error for termination criterion\n"); + printf(" [y [w*x+b] - 1] >= eps (default 0.001)\n"); + printf(" -y [0,1] -> restart the optimization from alpha values in file\n"); + printf(" specified by -a option. (default 0)\n"); + printf(" -h [5..] -> number of iterations a variable needs to be\n"); + printf(" optimal before considered for shrinking (default 100)\n"); + printf(" -f [0,1] -> do final optimality check for variables removed\n"); + printf(" by shrinking. Although this test is usually \n"); + printf(" positive, there is no guarantee that the optimum\n"); + printf(" was found if the test is omitted. (default 1)\n"); + printf(" -y string -> if option is given, reads alphas from file with given\n"); + printf(" and uses them as starting point. (default 'disabled')\n"); + printf(" -# int -> terminate optimization, if no progress after this\n"); + printf(" number of iterations. (default 100000)\n"); + printf("Output options:\n"); + printf(" -l string -> file to write predicted labels of unlabeled\n"); + printf(" examples into after transductive learning\n"); + printf(" -a string -> write all alphas to this file after learning\n"); + printf(" (in the same order as in the training set)\n"); + wait_any_key(); + printf("\nMore details in:\n"); + printf("[1] T. Joachims, Making Large-Scale SVM Learning Practical. Advances in\n"); + printf(" Kernel Methods - Support Vector Learning, B. Schölkopf and C. Burges and\n"); + printf(" A. Smola (ed.), MIT Press, 1999.\n"); + printf("[2] T. Joachims, Estimating the Generalization performance of an SVM\n"); + printf(" Efficiently. International Conference on Machine Learning (ICML), 2000.\n"); + printf("[3] T. Joachims, Transductive Inference for Text Classification using Support\n"); + printf(" Vector Machines. International Conference on Machine Learning (ICML),\n"); + printf(" 1999.\n"); + printf("[4] K. Morik, P. Brockhausen, and T. Joachims, Combining statistical learning\n"); + printf(" with a knowledge-based approach - A case study in intensive care \n"); + printf(" monitoring. International Conference on Machine Learning (ICML), 1999.\n"); + printf("[5] T. Joachims, Learning to Classify Text Using Support Vector\n"); + printf(" Machines: Methods, Theory, and Algorithms. Dissertation, Kluwer,\n"); + printf(" 2002.\n\n"); +} + + diff --git a/src/classifier/svm/svm_light/svm_loqo.c b/src/classifier/svm/svm_light/svm_loqo.c new file mode 100644 index 0000000..ff31a65 --- /dev/null +++ b/src/classifier/svm/svm_light/svm_loqo.c @@ -0,0 +1,211 @@ +/***********************************************************************/ +/* */ +/* svm_loqo.c */ +/* */ +/* Interface to the PR_LOQO optimization package for SVM. */ +/* */ +/* Author: Thorsten Joachims */ +/* Date: 19.07.99 */ +/* */ +/* Copyright (c) 1999 Universitaet Dortmund - All rights reserved */ +/* */ +/* This software is available for non-commercial use only. It must */ +/* not be modified and distributed without prior permission of the */ +/* author. The author is not responsible for implications from the */ +/* use of this software. */ +/* */ +/***********************************************************************/ + +# include +# include "pr_loqo/pr_loqo.h" +# include "svm_common.h" + +/* Common Block Declarations */ + +long verbosity; + +/* /////////////////////////////////////////////////////////////// */ + +# define DEF_PRECISION_LINEAR 1E-8 +# define DEF_PRECISION_NONLINEAR 1E-14 + +double *optimize_qp(); +double *primal=0,*dual=0; +double init_margin=0.15; +long init_iter=500,precision_violations=0; +double model_b; +double opt_precision=DEF_PRECISION_LINEAR; + +/* /////////////////////////////////////////////////////////////// */ + +void *my_malloc(); + +double *optimize_qp(qp,epsilon_crit,nx,threshold,learn_parm) +QP *qp; +double *epsilon_crit; +long nx; /* Maximum number of variables in QP */ +double *threshold; +LEARN_PARM *learn_parm; +/* start the optimizer and return the optimal values */ +{ + register long i,j,result; + double margin,obj_before,obj_after; + double sigdig,dist,epsilon_loqo; + int iter; + + if(!primal) { /* allocate memory at first call */ + primal=(double *)my_malloc(sizeof(double)*nx*3); + dual=(double *)my_malloc(sizeof(double)*(nx*2+1)); + } + + if(verbosity>=4) { /* really verbose */ + printf("\n\n"); + for(i=0;iopt_n;i++) { + printf("%f: ",qp->opt_g0[i]); + for(j=0;jopt_n;j++) { + printf("%f ",qp->opt_g[i*qp->opt_n+j]); + } + printf(": a%ld=%.10f < %f",i,qp->opt_xinit[i],qp->opt_up[i]); + printf(": y=%f\n",qp->opt_ce[i]); + } + for(j=0;jopt_m;j++) { + printf("EQ-%ld: %f*a0",j,qp->opt_ce[j]); + for(i=1;iopt_n;i++) { + printf(" + %f*a%ld",qp->opt_ce[i],i); + } + printf(" = %f\n\n",-qp->opt_ce0[0]); + } +} + + obj_before=0; /* calculate objective before optimization */ + for(i=0;iopt_n;i++) { + obj_before+=(qp->opt_g0[i]*qp->opt_xinit[i]); + obj_before+=(0.5*qp->opt_xinit[i]*qp->opt_xinit[i]*qp->opt_g[i*qp->opt_n+i]); + for(j=0;jopt_xinit[j]*qp->opt_xinit[i]*qp->opt_g[j*qp->opt_n+i]); + } + } + + result=STILL_RUNNING; + qp->opt_ce0[0]*=(-1.0); + /* Run pr_loqo. If a run fails, try again with parameters which lead */ + /* to a slower, but more robust setting. */ + for(margin=init_margin,iter=init_iter; + (margin<=0.9999999) && (result!=OPTIMAL_SOLUTION);) { + sigdig=-log10(opt_precision); + + result=pr_loqo((int)qp->opt_n,(int)qp->opt_m, + (double *)qp->opt_g0,(double *)qp->opt_g, + (double *)qp->opt_ce,(double *)qp->opt_ce0, + (double *)qp->opt_low,(double *)qp->opt_up, + (double *)primal,(double *)dual, + (int)(verbosity-2), + (double)sigdig,(int)iter, + (double)margin,(double)(qp->opt_up[0])/4.0,(int)0); + + if(isnan(dual[0])) { /* check for choldc problem */ + if(verbosity>=2) { + printf("NOTICE: Restarting PR_LOQO with more conservative parameters.\n"); + } + if(init_margin<0.80) { /* become more conservative in general */ + init_margin=(4.0*margin+1.0)/5.0; + } + margin=(margin+1.0)/2.0; + (opt_precision)*=10.0; /* reduce precision */ + if(verbosity>=2) { + printf("NOTICE: Reducing precision of PR_LOQO.\n"); + } + } + else if(result!=OPTIMAL_SOLUTION) { + iter+=2000; + init_iter+=10; + (opt_precision)*=10.0; /* reduce precision */ + if(verbosity>=2) { + printf("NOTICE: Reducing precision of PR_LOQO due to (%ld).\n",result); + } + } + } + + if(qp->opt_m) /* Thanks to Alex Smola for this hint */ + model_b=dual[0]; + else + model_b=0; + + /* Check the precision of the alphas. If results of current optimization */ + /* violate KT-Conditions, relax the epsilon on the bounds on alphas. */ + epsilon_loqo=1E-10; + for(i=0;iopt_n;i++) { + dist=-model_b*qp->opt_ce[i]; + dist+=(qp->opt_g0[i]+1.0); + for(j=0;jopt_g[j*qp->opt_n+i]); + } + for(j=i;jopt_n;j++) { + dist+=(primal[j]*qp->opt_g[i*qp->opt_n+j]); + } + /* printf("LOQO: a[%d]=%f, dist=%f, b=%f\n",i,primal[i],dist,dual[0]); */ + if((primal[i]<(qp->opt_up[i]-epsilon_loqo)) && (dist < (1.0-(*epsilon_crit)))) { + epsilon_loqo=(qp->opt_up[i]-primal[i])*2.0; + } + else if((primal[i]>(0+epsilon_loqo)) && (dist > (1.0+(*epsilon_crit)))) { + epsilon_loqo=primal[i]*2.0; + } + } + + for(i=0;iopt_n;i++) { /* clip alphas to bounds */ + if(primal[i]<=(0+epsilon_loqo)) { + primal[i]=0; + } + else if(primal[i]>=(qp->opt_up[i]-epsilon_loqo)) { + primal[i]=qp->opt_up[i]; + } + } + + obj_after=0; /* calculate objective after optimization */ + for(i=0;iopt_n;i++) { + obj_after+=(qp->opt_g0[i]*primal[i]); + obj_after+=(0.5*primal[i]*primal[i]*qp->opt_g[i*qp->opt_n+i]); + for(j=0;jopt_g[j*qp->opt_n+i]); + } + } + + /* if optimizer returned NAN values, reset and retry with smaller */ + /* working set. */ + if(isnan(obj_after) || isnan(model_b)) { + for(i=0;iopt_n;i++) { + primal[i]=qp->opt_xinit[i]; + } + model_b=0; + if(learn_parm->svm_maxqpsize>2) { + learn_parm->svm_maxqpsize--; /* decrease size of qp-subproblems */ + } + } + + if(obj_after >= obj_before) { /* check whether there was progress */ + (opt_precision)/=100.0; + precision_violations++; + if(verbosity>=2) { + printf("NOTICE: Increasing Precision of PR_LOQO.\n"); + } + } + + if(precision_violations > 500) { + (*epsilon_crit)*=10.0; + precision_violations=0; + if(verbosity>=1) { + printf("\nWARNING: Relaxing epsilon on KT-Conditions.\n"); + } + } + + (*threshold)=model_b; + + if(result!=OPTIMAL_SOLUTION) { + printf("\nERROR: PR_LOQO did not converge. \n"); + return(qp->opt_xinit); + } + else { + return(primal); + } +} + diff --git a/src/classifier/svm/svm_multiclass_classifier.cpp b/src/classifier/svm/svm_multiclass_classifier.cpp new file mode 100644 index 0000000..90006c6 --- /dev/null +++ b/src/classifier/svm/svm_multiclass_classifier.cpp @@ -0,0 +1,70 @@ +#include "svm_multiclass_classifier.hpp" +#include "svm_struct/svm_struct_common.h" +#include "svm_struct_api.h" + +namespace ovclassifier { +SVMMultiClassClassifier::SVMMultiClassClassifier() {} +SVMMultiClassClassifier::~SVMMultiClassClassifier() { + if (model_ != NULL) { + free_struct_model(*model_); + free(model_); + model_ = NULL; + } + if (sparm_ != NULL) { + free(sparm_); + sparm_ = NULL; + } +} +int SVMMultiClassClassifier::LoadModel(const char *modelfile) { + if (model_ != NULL) { + free_struct_model(*model_); + } + if (sparm_ != NULL) { + free(sparm_); + } + model_ = (STRUCTMODEL *)my_malloc(sizeof(STRUCTMODEL)); + sparm_ = (STRUCT_LEARN_PARM *)my_malloc(sizeof(STRUCT_LEARN_PARM)); + (*model_) = read_struct_model((char *)modelfile, sparm_); + if (model_->svm_model->kernel_parm.kernel_type == + LINEAR) { /* linear kernel */ + /* compute weight vector */ + add_weight_vector_to_linear_model(model_->svm_model); + model_->w = model_->svm_model->lin_weights; + } + return 0; +} + +double SVMMultiClassClassifier::Predict(const float *vec) { return 0; } + +int SVMMultiClassClassifier::Classify(const float *vec, + std::vector &scores) { + if (model_ == NULL || sparm_ == NULL) { + return -1; + } + struct_verbosity = 5; + int feats = sparm_->num_features; + WORD *words = (WORD *)malloc(sizeof(WORD) * (feats + 10)); + for (int i = 0; i < (feats + 10); ++i) { + if (i >= feats) { + words[i].wnum = 0; + words[i].weight = 0; + } else { + words[i].wnum = i + 1; + words[i].weight = vec[i]; + } + } + DOC *doc = + create_example(-1, 0, 0, 0.0, create_svector(words, (char *)"", 1.0)); + free(words); + PATTERN pattern; + pattern.doc = doc; + LABEL y = classify_struct_example(pattern, model_, sparm_); + free_pattern(pattern); + scores.clear(); + for (int i = 1; i <= y.num_classes_; ++i) { + scores.push_back(y.scores[i]); + } + free_label(y); + return 0; +} +} // namespace ovclassifier diff --git a/src/classifier/svm/svm_multiclass_classifier.hpp b/src/classifier/svm/svm_multiclass_classifier.hpp new file mode 100644 index 0000000..55ed2fa --- /dev/null +++ b/src/classifier/svm/svm_multiclass_classifier.hpp @@ -0,0 +1,21 @@ +#ifndef _CLASSIFIER_SVM_MULTICLASS_CLASSIFIER_H_ +#define _CLASSIFIER_SVM_MULTICLASS_CLASSIFIER_H_ + +#include "svm_classifier.hpp" +#include "svm_struct_api_types.h" + +namespace ovclassifier { +class SVMMultiClassClassifier : public SVMClassifier { +public: + SVMMultiClassClassifier(); + ~SVMMultiClassClassifier(); + int LoadModel(const char *modelfile); + double Predict(const float *vec); + int Classify(const float *vec, std::vector &scores); + +private: + STRUCTMODEL *model_ = NULL; + STRUCT_LEARN_PARM *sparm_ = NULL; +}; +} // namespace ovclassifier +#endif // !_CLASSIFIER_SVM_MULTICLASS_CLASSIFIER_H_ diff --git a/src/classifier/svm/svm_multiclass_trainer.cpp b/src/classifier/svm/svm_multiclass_trainer.cpp new file mode 100644 index 0000000..d878f66 --- /dev/null +++ b/src/classifier/svm/svm_multiclass_trainer.cpp @@ -0,0 +1,151 @@ +#include "svm_multiclass_trainer.hpp" +#include "svm_light/svm_learn.h" +#include "svm_struct/svm_struct_learn.h" +#include "svm_struct_api.h" + +namespace ovclassifier { + +SVMMultiClassTrainer::SVMMultiClassTrainer() { + alg_type = DEFAULT_ALG_TYPE; + struct_parm = (STRUCT_LEARN_PARM *)malloc(sizeof(STRUCT_LEARN_PARM)); + struct_parm->C = 10000; + struct_parm->slack_norm = 1; + struct_parm->epsilon = DEFAULT_EPS; + struct_parm->custom_argc = 0; + struct_parm->loss_function = DEFAULT_LOSS_FCT; + struct_parm->loss_type = DEFAULT_RESCALING; + struct_parm->newconstretrain = 100; + struct_parm->ccache_size = 5; + struct_parm->batch_size = 100; + + learn_parm = (LEARN_PARM *)malloc(sizeof(LEARN_PARM)); + strcpy(learn_parm->predfile, "trans_predictions"); + strcpy(learn_parm->alphafile, ""); + learn_parm->biased_hyperplane = 1; + learn_parm->remove_inconsistent = 0; + learn_parm->skip_final_opt_check = 0; + learn_parm->svm_maxqpsize = 10; + learn_parm->svm_newvarsinqp = 0; + // learn_parm->svm_iter_to_shrink = -9999; + learn_parm->svm_iter_to_shrink = 100; + learn_parm->maxiter = 100000; + learn_parm->kernel_cache_size = 40; + learn_parm->svm_c = 99999999; /* overridden by struct_parm->C */ + learn_parm->eps = 0.001; /* overridden by struct_parm->epsilon */ + learn_parm->transduction_posratio = -1.0; + learn_parm->svm_costratio = 1.0; + learn_parm->svm_costratio_unlab = 1.0; + learn_parm->svm_unlabbound = 1E-5; + learn_parm->epsilon_crit = 0.001; + learn_parm->epsilon_a = 1E-10; /* changed from 1e-15 */ + learn_parm->compute_loo = 0; + learn_parm->rho = 1.0; + learn_parm->xa_depth = 0; + kernel_parm = (KERNEL_PARM *)malloc(sizeof(KERNEL_PARM)); + kernel_parm->kernel_type = 0; + kernel_parm->poly_degree = 3; + kernel_parm->rbf_gamma = 1.0; + kernel_parm->coef_lin = 1; + kernel_parm->coef_const = 1; + strcpy(kernel_parm->custom, "empty"); + + parse_struct_parameters(struct_parm); +} + +SVMMultiClassTrainer::~SVMMultiClassTrainer() { + if (learn_parm != NULL) { + free(learn_parm); + learn_parm = NULL; + } + if (kernel_parm != NULL) { + free(kernel_parm); + kernel_parm = NULL; + } + if (learn_parm != NULL) { + free(learn_parm); + learn_parm = NULL; + } +} + +void SVMMultiClassTrainer::Reset() { + labels_ = 0; + feats_ = 0; + items_.clear(); +} +void SVMMultiClassTrainer::SetLabels(int labels) { labels_ = labels; } +void SVMMultiClassTrainer::SetFeatures(int feats) { feats_ = feats; } +void SVMMultiClassTrainer::AddData(int label, const float *vec) { + LabelItem itm; + itm.label = label; + for (int i = 0; i < feats_; ++i) { + itm.vec.push_back(vec[i]); + } + items_.push_back(itm); +} + +int SVMMultiClassTrainer::Train(const char *modelfile) { + struct_verbosity = 2; + int totdoc = items_.size(); + if (totdoc == 0 || feats_ == 0 || labels_ == 0) { + return -1; + } + EXAMPLE *examples = (EXAMPLE *)my_malloc(sizeof(EXAMPLE) * totdoc); + WORD *words = (WORD *)my_malloc(sizeof(WORD) * (feats_ * 10)); + for (int dnum = 0; dnum < totdoc; ++dnum) { + const int docFeats = items_[dnum].vec.size(); + for (int i = 0; i < (feats_ + 10); ++i) { + if (i >= feats_) { + words[i].wnum = 0; + } else { + (words[i]).wnum = i + 1; + } + if (i >= docFeats) { + (words[i]).weight = 0; + } else { + (words[i]).weight = (FVAL)items_[dnum].vec[i]; + } + } + DOC *doc = + create_example(dnum, 0, 0, 0, create_svector(words, (char *)"", 1.0)); + examples[dnum].x.doc = doc; + examples[dnum].y.class_ = (double)items_[dnum].label + 0.1; + examples[dnum].y.scores = NULL; + examples[dnum].y.num_classes_ = (double)labels_ + 0.1; + } + free(words); + + SAMPLE sample; + sample.n = totdoc; + sample.examples = examples; + STRUCTMODEL structmodel; + /* Do the learning and return structmodel. */ + if (alg_type == 0) + svm_learn_struct(sample, struct_parm, learn_parm, kernel_parm, &structmodel, + NSLACK_ALG); + else if (alg_type == 1) + svm_learn_struct(sample, struct_parm, learn_parm, kernel_parm, &structmodel, + NSLACK_SHRINK_ALG); + else if (alg_type == 2) + svm_learn_struct_joint(sample, struct_parm, learn_parm, kernel_parm, + &structmodel, ONESLACK_PRIMAL_ALG); + else if (alg_type == 3) + svm_learn_struct_joint(sample, struct_parm, learn_parm, kernel_parm, + &structmodel, ONESLACK_DUAL_ALG); + else if (alg_type == 4) + svm_learn_struct_joint(sample, struct_parm, learn_parm, kernel_parm, + &structmodel, ONESLACK_DUAL_CACHE_ALG); + else if (alg_type == 9) + svm_learn_struct_joint_custom(sample, struct_parm, learn_parm, kernel_parm, + &structmodel); + else + return -1; + + write_struct_model((char *)modelfile, &structmodel, struct_parm); + + free_struct_sample(sample); + free_struct_model(structmodel); + + svm_struct_learn_api_exit(); + return 0; +} +} // namespace ovclassifier diff --git a/src/classifier/svm/svm_multiclass_trainer.hpp b/src/classifier/svm/svm_multiclass_trainer.hpp new file mode 100644 index 0000000..4d12e6e --- /dev/null +++ b/src/classifier/svm/svm_multiclass_trainer.hpp @@ -0,0 +1,31 @@ +#ifndef _SVM_MULTCLASS_TRAINER_H_ +#define _SVM_MULTCLASS_TRAINER_H_ + +#include "svm_common.hpp" +#include "svm_light/svm_common.h" +#include "svm_struct_api_types.h" +#include "svm_trainer.hpp" +#include + +namespace ovclassifier { +class SVMMultiClassTrainer : public SVMTrainer { +public: + SVMMultiClassTrainer(); + ~SVMMultiClassTrainer(); + void Reset(); + void SetLabels(int labels); + void SetFeatures(int feats); + void AddData(int label, const float *vec); + int Train(const char *modelfile); + +private: + KERNEL_PARM *kernel_parm = NULL; + LEARN_PARM *learn_parm = NULL; + STRUCT_LEARN_PARM *struct_parm = NULL; + int alg_type; + int feats_; + int labels_; + std::vector items_; +}; +} // namespace ovclassifier +#endif // _SVM_MULTICLASS_TRAINER_H_ diff --git a/src/classifier/svm/svm_struct/svm_struct_classify.c b/src/classifier/svm/svm_struct/svm_struct_classify.c new file mode 100755 index 0000000..ff6b855 --- /dev/null +++ b/src/classifier/svm/svm_struct/svm_struct_classify.c @@ -0,0 +1,186 @@ +/***********************************************************************/ +/* */ +/* svm_struct_classify.c */ +/* */ +/* Classification module of SVM-struct. */ +/* */ +/* Author: Thorsten Joachims */ +/* Date: 03.07.04 */ +/* */ +/* Copyright (c) 2004 Thorsten Joachims - All rights reserved */ +/* */ +/* This software is available for non-commercial use only. It must */ +/* not be modified and distributed without prior permission of the */ +/* author. The author is not responsible for implications from the */ +/* use of this software. */ +/* */ +/************************************************************************/ + +#include +#ifdef __cplusplus +extern "C" { +#endif +#include "../svm_light/svm_common.h" +#ifdef __cplusplus +} +#endif +#include "../svm_struct_api.h" +#include "svm_struct_common.h" + +char testfile[200]; +char modelfile[200]; +char predictionsfile[200]; + +void read_input_parameters(int, char **, char *, char *, char *, + STRUCT_LEARN_PARM *, long*, long *); +void print_help(void); + + +int main (int argc, char* argv[]) +{ + long correct=0,incorrect=0,no_accuracy=0; + long i; + double t1,runtime=0; + double avgloss=0,l; + FILE *predfl; + STRUCTMODEL model; + STRUCT_LEARN_PARM sparm; + STRUCT_TEST_STATS teststats; + SAMPLE testsample; + LABEL y; + + svm_struct_classify_api_init(argc,argv); + + read_input_parameters(argc,argv,testfile,modelfile,predictionsfile,&sparm, + &verbosity,&struct_verbosity); + + if(struct_verbosity>=1) { + printf("Reading model..."); fflush(stdout); + } + model=read_struct_model(modelfile,&sparm); + if(struct_verbosity>=1) { + fprintf(stdout, "done.\n"); + } + + if(model.svm_model->kernel_parm.kernel_type == LINEAR) { /* linear kernel */ + /* compute weight vector */ + add_weight_vector_to_linear_model(model.svm_model); + model.w=model.svm_model->lin_weights; + } + + if(struct_verbosity>=1) { + printf("Reading test examples..."); fflush(stdout); + } + testsample=read_struct_examples(testfile,&sparm); + if(struct_verbosity>=1) { + printf("done.\n"); fflush(stdout); + } + + if(struct_verbosity>=1) { + printf("Classifying test examples..."); fflush(stdout); + } + + if ((predfl = fopen (predictionsfile, "w")) == NULL) + { perror (predictionsfile); exit (1); } + + for(i=0;i=2) { + if((i+1) % 100 == 0) { + printf("%ld..",i+1); fflush(stdout); + } + } + free_label(y); + } + avgloss/=testsample.n; + fclose(predfl); + + if(struct_verbosity>=1) { + printf("done\n"); + printf("Runtime (without IO) in cpu-seconds: %.2f\n", + (float)(runtime/100.0)); + } + if((!no_accuracy) && (struct_verbosity>=1)) { + printf("Average loss on test set: %.4f\n",(float)avgloss); + printf("Zero/one-error on test set: %.2f%% (%ld correct, %ld incorrect, %d total)\n",(float)100.0*incorrect/testsample.n,correct,incorrect,testsample.n); + } + print_struct_testing_stats(testsample,&model,&sparm,&teststats); + free_struct_sample(testsample); + free_struct_model(model); + + svm_struct_classify_api_exit(); + + return(0); +} + +void read_input_parameters(int argc,char *argv[],char *testfile, + char *modelfile,char *predictionsfile, + STRUCT_LEARN_PARM *struct_parm, + long *verbosity,long *struct_verbosity) +{ + long i; + + /* set default */ + strcpy (modelfile, "svm_model"); + strcpy (predictionsfile, "svm_predictions"); + (*verbosity)=0;/*verbosity for svm_light*/ + (*struct_verbosity)=1; /*verbosity for struct learning portion*/ + struct_parm->custom_argc=0; + + for(i=1;(icustom_argv[struct_parm->custom_argc++],argv[i]);i++; strcpy(struct_parm->custom_argv[struct_parm->custom_argc++],argv[i]);break; + case 'v': i++; (*struct_verbosity)=atol(argv[i]); break; + case 'y': i++; (*verbosity)=atol(argv[i]); break; + default: printf("\nUnrecognized option %s!\n\n",argv[i]); + print_help(); + exit(0); + } + } + if((i+1)>=argc) { + printf("\nNot enough input parameters!\n\n"); + print_help(); + exit(0); + } + strcpy (testfile, argv[i]); + strcpy (modelfile, argv[i+1]); + if((i+2) this help\n"); + printf(" -v [0..3] -> verbosity level (default 2)\n\n"); + + print_struct_help_classify(); +} + + + + diff --git a/src/classifier/svm/svm_struct/svm_struct_common.c b/src/classifier/svm/svm_struct/svm_struct_common.c new file mode 100644 index 0000000..ebad338 --- /dev/null +++ b/src/classifier/svm/svm_struct/svm_struct_common.c @@ -0,0 +1,66 @@ +/***********************************************************************/ +/* */ +/* svm_struct_common.h */ +/* */ +/* Functions and types used by multiple components of SVM-struct. */ +/* */ +/* Author: Thorsten Joachims */ +/* Date: 03.07.04 */ +/* */ +/* Copyright (c) 2004 Thorsten Joachims - All rights reserved */ +/* */ +/* This software is available for non-commercial use only. It must */ +/* not be modified and distributed without prior permission of the */ +/* author. The author is not responsible for implications from the */ +/* use of this software. */ +/* */ +/***********************************************************************/ + +#include +#include +#include + +#include "svm_struct_common.h" + +long struct_verbosity; /* verbosity level (0-4) */ + +void printIntArray(int* x, int n) +{ + int i; + for(i=0;i= rhs[i] */ + int m; /* m is the total number of constrains */ + DOC **lhs; + double *rhs; +} CONSTSET; + + +/**** print methods ****/ +void printIntArray(int*,int); +void printDoubleArray(double*,int); +void printWordArray(WORD*); +void printModel(MODEL *); +void printW(double *, long, long, double); + +extern long struct_verbosity; /* verbosity level (0-4) */ + +#endif diff --git a/src/classifier/svm/svm_struct/svm_struct_learn.c b/src/classifier/svm/svm_struct/svm_struct_learn.c new file mode 100755 index 0000000..005facf --- /dev/null +++ b/src/classifier/svm/svm_struct/svm_struct_learn.c @@ -0,0 +1,1289 @@ +/***********************************************************************/ +/* */ +/* svm_struct_learn.c */ +/* */ +/* Basic algorithm for learning structured outputs (e.g. parses, */ +/* sequences, multi-label classification) with a Support Vector */ +/* Machine. */ +/* */ +/* Author: Thorsten Joachims */ +/* Date: 26.06.06 */ +/* */ +/* Copyright (c) 2006 Thorsten Joachims - All rights reserved */ +/* */ +/* This software is available for non-commercial use only. It must */ +/* not be modified and distributed without prior permission of the */ +/* author. The author is not responsible for implications from the */ +/* use of this software. */ +/* */ +/***********************************************************************/ + +#include "svm_struct_learn.h" +#include "svm_struct_common.h" +#include "../svm_struct_api.h" +#include + +#define MAX(x,y) ((x) < (y) ? (y) : (x)) +#define MIN(x,y) ((x) > (y) ? (y) : (x)) + + +void svm_learn_struct(SAMPLE sample, STRUCT_LEARN_PARM *sparm, + LEARN_PARM *lparm, KERNEL_PARM *kparm, + STRUCTMODEL *sm, int alg_type) +{ + int i,j; + int numIt=0; + long argmax_count=0; + long newconstraints=0, totconstraints=0, activenum=0; + int opti_round, *opti, fullround, use_shrinking; + long old_totconstraints=0; + double epsilon,svmCnorm; + long tolerance,new_precision=1,dont_stop=0; + double lossval,factor,dist; + double margin=0; + double slack, *slacks, slacksum, ceps; + double dualitygap,modellength,alphasum; + long sizePsi; + double *alpha=NULL; + long *alphahist=NULL,optcount=0,lastoptcount=0; + CONSTSET cset; + SVECTOR *diff=NULL; + SVECTOR *fy, *fybar, *f, **fycache=NULL; + SVECTOR *slackvec; + WORD slackv[2]; + MODEL *svmModel=NULL; + KERNEL_CACHE *kcache=NULL; + LABEL ybar; + DOC *doc; + + long n=sample.n; + EXAMPLE *ex=sample.examples; + double rt_total=0, rt_opt=0, rt_init=0, rt_psi=0, rt_viol=0; + double rt1,rt2; + + rt1=get_runtime(); + + init_struct_model(sample,sm,sparm,lparm,kparm); + sizePsi=sm->sizePsi+1; /* sm must contain size of psi on return */ + + /* initialize shrinking-style example selection heuristic */ + if(alg_type == NSLACK_SHRINK_ALG) + use_shrinking=1; + else + use_shrinking=0; + opti=(int*)my_malloc(n*sizeof(int)); + for(i=0;iC/n; + + if(sparm->slack_norm == 1) { + lparm->svm_c=svmCnorm; /* set upper bound C */ + lparm->sharedslack=1; + } + else if(sparm->slack_norm == 2) { + lparm->svm_c=999999999999999.0; /* upper bound C must never be reached */ + lparm->sharedslack=0; + if(kparm->kernel_type != LINEAR) { + printf("ERROR: Kernels are not implemented for L2 slack norm!"); + fflush(stdout); + exit(0); + } + } + else { + printf("ERROR: Slack norm must be L1 or L2!"); fflush(stdout); + exit(0); + } + + + epsilon=100.0; /* start with low precision and + increase later */ + tolerance=MIN(n/3,MAX(n/100,5));/* increase precision, whenever less + than that number of constraints + is not fulfilled */ + lparm->biased_hyperplane=0; /* set threshold to zero */ + + cset=init_struct_constraints(sample, sm, sparm); + if(cset.m > 0) { + alpha=(double *)realloc(alpha,sizeof(double)*cset.m); + alphahist=(long *)realloc(alphahist,sizeof(long)*cset.m); + for(i=0; iepsilon_crit=epsilon; + if(kparm->kernel_type != LINEAR) + kcache=kernel_cache_init(MAX(cset.m,1),lparm->kernel_cache_size); + svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n, + lparm,kparm,kcache,svmModel,alpha); + if(kcache) + kernel_cache_cleanup(kcache); + add_weight_vector_to_linear_model(svmModel); + sm->svm_model=svmModel; + sm->w=svmModel->lin_weights; /* short cut to weight vector */ + + /* create a cache of the feature vectors for the correct labels */ + if(USE_FYCACHE) { + fycache=(SVECTOR **)my_malloc(n*sizeof(SVECTOR *)); + for(i=0;ikernel_type == LINEAR) { + diff=add_list_ss(fy); /* store difference vector directly */ + free_svector(fy); + fy=diff; + } + fycache[i]=fy; + } + } + + rt_init+=MAX(get_runtime()-rt1,0); + rt_total+=MAX(get_runtime()-rt1,0); + + /*****************/ + /*** main loop ***/ + /*****************/ + do { /* iteratively increase precision */ + + epsilon=MAX(epsilon*0.49999999999,sparm->epsilon); + new_precision=1; + if(epsilon == sparm->epsilon) /* for final precision, find all SV */ + tolerance=0; + lparm->epsilon_crit=epsilon/2; /* svm precision must be higher than eps */ + if(struct_verbosity>=1) + printf("Setting current working precision to %g.\n",epsilon); + + do { /* iteration until (approx) all SV are found for current + precision and tolerance */ + + opti_round++; + activenum=n; + dont_stop=0; + old_totconstraints=totconstraints; + + do { /* with shrinking turned on, go through examples that keep + producing new constraints */ + + if(struct_verbosity>=1) { + printf("Iter %i (%ld active): ",++numIt,activenum); + fflush(stdout); + } + + ceps=0; + fullround=(activenum == n); + + for(i=0; iloss_type == SLACK_RESCALING) + ybar=find_most_violated_constraint_slackrescaling(ex[i].x, + ex[i].y,sm, + sparm); + else + ybar=find_most_violated_constraint_marginrescaling(ex[i].x, + ex[i].y,sm, + sparm); + rt_viol+=MAX(get_runtime()-rt2,0); + + if(empty_label(ybar)) { + if(opti[i] != opti_round) { + activenum--; + opti[i]=opti_round; + } + if(struct_verbosity>=2) + printf("no-incorrect-found(%i) ",i); + continue; + } + + /**** get psi(y)-psi(ybar) ****/ + rt2=get_runtime(); + if(fycache) + fy=copy_svector(fycache[i]); + else + fy=psi(ex[i].x,ex[i].y,sm,sparm); + fybar=psi(ex[i].x,ybar,sm,sparm); + rt_psi+=MAX(get_runtime()-rt2,0); + + /**** scale feature vector and margin by loss ****/ + lossval=loss(ex[i].y,ybar,sparm); + if(sparm->slack_norm == 2) + lossval=sqrt(lossval); + if(sparm->loss_type == SLACK_RESCALING) + factor=lossval; + else /* do not rescale vector for */ + factor=1.0; /* margin rescaling loss type */ + for(f=fy;f;f=f->next) + f->factor*=factor; + for(f=fybar;f;f=f->next) + f->factor*=-factor; + margin=lossval; + + /**** create constraint for current ybar ****/ + append_svector_list(fy,fybar);/* append the two vector lists */ + doc=create_example(cset.m,0,i+1,1,fy); + + /**** compute slack for this example ****/ + slack=0; + for(j=0;jslackid == i+1) { + if(sparm->slack_norm == 2) /* works only for linear kernel */ + slack=MAX(slack,cset.rhs[j] + -(classify_example(svmModel,cset.lhs[j]) + -sm->w[sizePsi+i]/(sqrt(2*svmCnorm)))); + else + slack=MAX(slack, + cset.rhs[j]-classify_example(svmModel,cset.lhs[j])); + } + + /**** if `error' add constraint and recompute ****/ + dist=classify_example(svmModel,doc); + ceps=MAX(ceps,margin-dist-slack); + if(slack > (margin-dist+0.0001)) { + printf("\nWARNING: Slack of most violated constraint is smaller than slack of working\n"); + printf(" set! There is probably a bug in 'find_most_violated_constraint_*'.\n"); + printf("Ex %d: slack=%f, newslack=%f\n",i,slack,margin-dist); + /* exit(1); */ + } + if((dist+slack)<(margin-epsilon)) { + if(struct_verbosity>=2) + {printf("(%i,eps=%.2f) ",i,margin-dist-slack); fflush(stdout);} + if(struct_verbosity==1) + {printf("."); fflush(stdout);} + + /**** resize constraint matrix and add new constraint ****/ + cset.m++; + cset.lhs=(DOC **)realloc(cset.lhs,sizeof(DOC *)*cset.m); + if(kparm->kernel_type == LINEAR) { + diff=add_list_ss(fy); /* store difference vector directly */ + if(sparm->slack_norm == 1) + cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1, + copy_svector(diff)); + else if(sparm->slack_norm == 2) { + /**** add squared slack variable to feature vector ****/ + slackv[0].wnum=sizePsi+i; + slackv[0].weight=1/(sqrt(2*svmCnorm)); + slackv[1].wnum=0; /*terminator*/ + slackvec=create_svector(slackv,NULL,1.0); + cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1, + add_ss(diff,slackvec)); + free_svector(slackvec); + } + free_svector(diff); + } + else { /* kernel is used */ + if(sparm->slack_norm == 1) + cset.lhs[cset.m-1]=create_example(cset.m-1,0,i+1,1, + copy_svector(fy)); + else if(sparm->slack_norm == 2) + exit(1); + } + cset.rhs=(double *)realloc(cset.rhs,sizeof(double)*cset.m); + cset.rhs[cset.m-1]=margin; + alpha=(double *)realloc(alpha,sizeof(double)*cset.m); + alpha[cset.m-1]=0; + alphahist=(long *)realloc(alphahist,sizeof(long)*cset.m); + alphahist[cset.m-1]=optcount; + newconstraints++; + totconstraints++; + } + else { + printf("+"); fflush(stdout); + if(opti[i] != opti_round) { + activenum--; + opti[i]=opti_round; + } + } + + free_example(doc,0); + free_svector(fy); /* this also free's fybar */ + free_label(ybar); + } + + /**** get new QP solution ****/ + if((newconstraints >= sparm->newconstretrain) + || ((newconstraints > 0) && (i == n-1)) + || (new_precision && (i == n-1))) { + if(struct_verbosity>=1) { + printf("*");fflush(stdout); + } + rt2=get_runtime(); + free_model(svmModel,0); + svmModel=(MODEL *)my_malloc(sizeof(MODEL)); + /* Always get a new kernel cache. It is not possible to use the + same cache for two different training runs */ + if(kparm->kernel_type != LINEAR) + kcache=kernel_cache_init(MAX(cset.m,1),lparm->kernel_cache_size); + /* Run the QP solver on cset. */ + svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi+n, + lparm,kparm,kcache,svmModel,alpha); + if(kcache) + kernel_cache_cleanup(kcache); + /* Always add weight vector, in case part of the kernel is + linear. If not, ignore the weight vector since its + content is bogus. */ + add_weight_vector_to_linear_model(svmModel); + sm->svm_model=svmModel; + sm->w=svmModel->lin_weights; /* short cut to weight vector */ + optcount++; + /* keep track of when each constraint was last + active. constraints marked with -1 are not updated */ + for(j=0;j-1) && (alpha[j] != 0)) + alphahist[j]=optcount; + rt_opt+=MAX(get_runtime()-rt2,0); + + if(new_precision && (epsilon <= sparm->epsilon)) + dont_stop=1; /* make sure we take one final pass */ + new_precision=0; + newconstraints=0; + } + + rt_total+=MAX(get_runtime()-rt1,0); + + } /* end of example loop */ + + rt1=get_runtime(); + + if(struct_verbosity>=1) + printf("(NumConst=%d, SV=%ld, CEps=%.4f, QPEps=%.4f)\n",cset.m, + svmModel->sv_num-1,ceps,svmModel->maxdiff); + + /* Check if some of the linear constraints have not been + active in a while. Those constraints are then removed to + avoid bloating the working set beyond necessity. */ + if(struct_verbosity>=2) + printf("Reducing working set...");fflush(stdout); + remove_inactive_constraints(&cset,alpha,optcount,alphahist, + MAX(50,optcount-lastoptcount)); + lastoptcount=optcount; + if(struct_verbosity>=2) + printf("done. (NumConst=%d)\n",cset.m); + + rt_total+=MAX(get_runtime()-rt1,0); + + } while(use_shrinking && (activenum > 0)); /* when using shrinking, + repeat until all examples + produced no constraint at + least once */ + + } while(((totconstraints - old_totconstraints) > tolerance) || dont_stop); + + } while((epsilon > sparm->epsilon) + || finalize_iteration(ceps,0,sample,sm,cset,alpha,sparm)); + + if(struct_verbosity>=1) { + /**** compute sum of slacks ****/ + /**** WARNING: If positivity constraints are used, then the + maximum slack id is larger than what is allocated + below ****/ + slacks=(double *)my_malloc(sizeof(double)*(n+1)); + for(i=0; i<=n; i++) { + slacks[i]=0; + } + if(sparm->slack_norm == 1) { + for(j=0;jslackid]=MAX(slacks[cset.lhs[j]->slackid], + cset.rhs[j]-classify_example(svmModel,cset.lhs[j])); + } + else if(sparm->slack_norm == 2) { + for(j=0;jslackid]=MAX(slacks[cset.lhs[j]->slackid], + cset.rhs[j] + -(classify_example(svmModel,cset.lhs[j]) + -sm->w[sizePsi+cset.lhs[j]->slackid-1]/(sqrt(2*svmCnorm)))); + } + slacksum=0; + for(i=1; i<=n; i++) + slacksum+=slacks[i]; + free(slacks); + alphasum=0; + for(i=0; imaxdiff,epsilon)); + printf("Upper bound on duality gap: %.5f\n", dualitygap); + printf("Dual objective value: dval=%.5f\n", + alphasum-0.5*modellength*modellength); + printf("Total number of constraints in final working set: %i (of %i)\n",(int)cset.m,(int)totconstraints); + printf("Number of iterations: %d\n",numIt); + printf("Number of calls to 'find_most_violated_constraint': %ld\n",argmax_count); + if(sparm->slack_norm == 1) { + printf("Number of SV: %ld \n",svmModel->sv_num-1); + printf("Number of non-zero slack variables: %ld (out of %ld)\n", + svmModel->at_upper_bound,n); + printf("Norm of weight vector: |w|=%.5f\n",modellength); + } + else if(sparm->slack_norm == 2){ + printf("Number of SV: %ld (including %ld at upper bound)\n", + svmModel->sv_num-1,svmModel->at_upper_bound); + printf("Norm of weight vector (including L2-loss): |w|=%.5f\n", + modellength); + } + printf("Norm. sum of slack variables (on working set): sum(xi_i)/n=%.5f\n",slacksum/n); + printf("Norm of longest difference vector: ||Psi(x,y)-Psi(x,ybar)||=%.5f\n", + length_of_longest_document_vector(cset.lhs,cset.m,kparm)); + printf("Runtime in cpu-seconds: %.2f (%.2f%% for QP, %.2f%% for Argmax, %.2f%% for Psi, %.2f%% for init)\n", + rt_total/100.0, (100.0*rt_opt)/rt_total, (100.0*rt_viol)/rt_total, + (100.0*rt_psi)/rt_total, (100.0*rt_init)/rt_total); + } + if(struct_verbosity>=4) + printW(sm->w,sizePsi,n,lparm->svm_c); + + if(svmModel) { + sm->svm_model=copy_model(svmModel); + sm->w=sm->svm_model->lin_weights; /* short cut to weight vector */ + } + + print_struct_learning_stats(sample,sm,cset,alpha,sparm); + + if(fycache) { + for(i=0;ibatch_size<100) + batch_size=sparm->batch_size*n/100.0; + + init_struct_model(sample,sm,sparm,lparm,kparm); + sizePsi=sm->sizePsi+1; /* sm must contain size of psi on return */ + + if(sparm->slack_norm == 1) { + lparm->svm_c=sparm->C; /* set upper bound C */ + lparm->sharedslack=1; + } + else if(sparm->slack_norm == 2) { + printf("ERROR: The joint algorithm does not apply to L2 slack norm!"); + fflush(stdout); + exit(0); + } + else { + printf("ERROR: Slack norm must be L1 or L2!"); fflush(stdout); + exit(0); + } + + + lparm->biased_hyperplane=0; /* set threshold to zero */ + epsilon=100.0; /* start with low precision and + increase later */ + epsilon_cached=epsilon; /* epsilon to use for iterations + using constraints constructed + from the constraint cache */ + + cset=init_struct_constraints(sample, sm, sparm); + if(cset.m > 0) { + alpha=(double *)realloc(alpha,sizeof(double)*cset.m); + alphahist=(long *)realloc(alphahist,sizeof(long)*cset.m); + for(i=0; igram_matrix=NULL; + if((alg_type == ONESLACK_DUAL_ALG) || (alg_type == ONESLACK_DUAL_CACHE_ALG)) + kparm->gram_matrix=init_kernel_matrix(&cset,kparm); + + /* set initial model and slack variables */ + svmModel=(MODEL *)my_malloc(sizeof(MODEL)); + lparm->epsilon_crit=epsilon; + svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi, + lparm,kparm,NULL,svmModel,alpha); + add_weight_vector_to_linear_model(svmModel); + sm->svm_model=svmModel; + sm->w=svmModel->lin_weights; /* short cut to weight vector */ + + /* create a cache of the feature vectors for the correct labels */ + fycache=(SVECTOR **)my_malloc(n*sizeof(SVECTOR *)); + for(i=0;ikernel_type == LINEAR) { /* store difference vector directly */ + diff=add_list_sort_ss_r(fy,COMPACT_ROUNDING_THRESH); + free_svector(fy); + fy=diff; + } + } + else + fy=NULL; + fycache[i]=fy; + } + + /* initialize the constraint cache */ + if(alg_type == ONESLACK_DUAL_CACHE_ALG) { + ccache=create_constraint_cache(sample,sparm,sm); + /* NOTE: */ + for(i=0;ikernel_type == LINEAR) + lhs_n=create_nvector(sm->sizePsi); + + /* randomize order or training examples */ + if(batch_size=1) { + printf("Iter %i: ",++numIt); + fflush(stdout); + } + + rt1=get_runtime(); + + /**** compute current slack ****/ + alphasum=0; + for(j=0;(j alphasum/cset.m) + slack=MAX(0,cset.rhs[j]-classify_example(svmModel,cset.lhs[j])); + slack=MAX(0,slack); + + rt_total+=MAX(get_runtime()-rt1,0); + + /**** find a violated joint constraint ****/ + lhs=NULL; + rhs=0; + if(alg_type == ONESLACK_DUAL_CACHE_ALG) { + rt1=get_runtime(); + /* Compute violation of constraints in cache for current w */ + if(struct_verbosity>=2) rt2=get_runtime(); + update_constraint_cache_for_model(ccache, svmModel); + if(struct_verbosity>=2) rt_cacheupdate+=MAX(get_runtime()-rt2,0); + /* Is there is a sufficiently violated constraint in cache? */ + viol=compute_violation_of_constraint_in_cache(ccache,epsilon_est/2); + if(viol-slack > MAX(epsilon_est/10,sparm->epsilon)) { + /* There is a sufficiently violated constraint in cache, so + use this constraint in this iteration. */ + if(struct_verbosity>=2) rt2=get_runtime(); + viol=find_most_violated_joint_constraint_in_cache(ccache, + epsilon_est/2,lhs_n,&lhs,&rhs); + if(struct_verbosity>=2) rt_cacheconst+=MAX(get_runtime()-rt2,0); + cached_constraint=1; + } + else { + /* There is no sufficiently violated constraint in cache, so + update cache by computing most violated constraint + explicitly for batch_size examples. */ + viol_est=0; + progress=0; + viol=compute_violation_of_constraint_in_cache(ccache,0); + for(j=0;(jepsilon));j++) { + if(struct_verbosity>=1) + print_percent_progress(&progress,n,10,"."); + uptr=uptr % n; + if(randmapping) + i=randmapping[uptr]; + else + i=uptr; + /* find most violating fydelta=fy-fybar and rhs for example i */ + find_most_violated_constraint(&fydelta,&rhs_i,&ex[i], + fycache[i],n,sm,sparm, + &rt_viol,&rt_psi,&argmax_count); + /* add current fy-fybar and loss to cache */ + if(struct_verbosity>=2) rt2=get_runtime(); + viol+=add_constraint_to_constraint_cache(ccache,sm->svm_model, + i,fydelta,rhs_i,0.0001*sparm->epsilon/n, + sparm->ccache_size,&rt_cachesum); + if(struct_verbosity>=2) rt_cacheadd+=MAX(get_runtime()-rt2,0); + viol_est+=ccache->constlist[i]->viol; + uptr++; + } + cached_constraint=(j=2) rt2=get_runtime(); + if(cached_constraint) + viol=find_most_violated_joint_constraint_in_cache(ccache, + epsilon_est/2,lhs_n,&lhs,&rhs); + else + viol=find_most_violated_joint_constraint_in_cache(ccache,0,lhs_n, + &lhs,&rhs); + if(struct_verbosity>=2) rt_cacheconst+=MAX(get_runtime()-rt2,0); + viol_est*=((double)n/j); + epsilon_est=(1-(double)j/n)*epsilon_est+(double)j/n*(viol_est-slack); + if((struct_verbosity >= 1) && (j!=n)) + printf("(upd=%5.1f%%,eps^=%.4f,eps*=%.4f)", + 100.0*j/n,viol_est-slack,epsilon_est); + } + lhsXw=rhs-viol; + + rt_total+=MAX(get_runtime()-rt1,0); + } + else { + /* do not use constraint from cache */ + rt1=get_runtime(); + cached_constraint=0; + if(kparm->kernel_type == LINEAR) + clear_nvector(lhs_n,sm->sizePsi); + progress=0; + rt_total+=MAX(get_runtime()-rt1,0); + + for(i=0; i=1) + print_percent_progress(&progress,n,10,"."); + + /* compute most violating fydelta=fy-fybar and rhs for example i */ + find_most_violated_constraint(&fydelta,&rhs_i,&ex[i],fycache[i],n, + sm,sparm,&rt_viol,&rt_psi,&argmax_count); + /* add current fy-fybar to lhs of constraint */ + if(kparm->kernel_type == LINEAR) { + add_list_n_ns(lhs_n,fydelta,1.0); /* add fy-fybar to sum */ + free_svector(fydelta); + } + else { + append_svector_list(fydelta,lhs); /* add fy-fybar to vector list */ + lhs=fydelta; + } + rhs+=rhs_i; /* add loss to rhs */ + + rt_total+=MAX(get_runtime()-rt1,0); + + } /* end of example loop */ + + rt1=get_runtime(); + + /* create sparse vector from dense sum */ + if(kparm->kernel_type == LINEAR) + lhs=create_svector_n_r(lhs_n,sm->sizePsi,NULL,1.0, + COMPACT_ROUNDING_THRESH); + doc=create_example(cset.m,0,1,1,lhs); + lhsXw=classify_example(svmModel,doc); + free_example(doc,0); + viol=rhs-lhsXw; + + rt_total+=MAX(get_runtime()-rt1,0); + + } /* end of finding most violated joint constraint */ + + rt1=get_runtime(); + + /**** if `error', then add constraint and recompute QP ****/ + if(slack > (rhs-lhsXw+0.000001)) { + printf("\nWARNING: Slack of most violated constraint is smaller than slack of working\n"); + printf(" set! There is probably a bug in 'find_most_violated_constraint_*'.\n"); + printf("slack=%f, newslack=%f\n",slack,rhs-lhsXw); + /* exit(1); */ + } + ceps=MAX(0,rhs-lhsXw-slack); + if((ceps > sparm->epsilon) || cached_constraint) { + /**** resize constraint matrix and add new constraint ****/ + cset.lhs=(DOC **)realloc(cset.lhs,sizeof(DOC *)*(cset.m+1)); + cset.lhs[cset.m]=create_example(cset.m,0,1,1,lhs); + cset.rhs=(double *)realloc(cset.rhs,sizeof(double)*(cset.m+1)); + cset.rhs[cset.m]=rhs; + alpha=(double *)realloc(alpha,sizeof(double)*(cset.m+1)); + alpha[cset.m]=0; + alphahist=(long *)realloc(alphahist,sizeof(long)*(cset.m+1)); + alphahist[cset.m]=optcount; + cset.m++; + totconstraints++; + if((alg_type == ONESLACK_DUAL_ALG) + || (alg_type == ONESLACK_DUAL_CACHE_ALG)) { + if(struct_verbosity>=2) rt2=get_runtime(); + kparm->gram_matrix=update_kernel_matrix(kparm->gram_matrix,cset.m-1, + &cset,kparm); + if(struct_verbosity>=2) rt_kernel+=MAX(get_runtime()-rt2,0); + } + + /**** get new QP solution ****/ + if(struct_verbosity>=1) { + printf("*");fflush(stdout); + } + if(struct_verbosity>=2) rt2=get_runtime(); + /* set svm precision so that higher than eps of most violated constr */ + if(cached_constraint) { + epsilon_cached=MIN(epsilon_cached,ceps); + lparm->epsilon_crit=epsilon_cached/2; + } + else { + epsilon=MIN(epsilon,ceps); /* best eps so far */ + lparm->epsilon_crit=epsilon/2; + epsilon_cached=epsilon; + } + free_model(svmModel,0); + svmModel=(MODEL *)my_malloc(sizeof(MODEL)); + /* Run the QP solver on cset. */ + kernel_type_org=kparm->kernel_type; + if((alg_type == ONESLACK_DUAL_ALG) + || (alg_type == ONESLACK_DUAL_CACHE_ALG)) + kparm->kernel_type=GRAM; /* use kernel stored in kparm */ + svm_learn_optimization(cset.lhs,cset.rhs,cset.m,sizePsi, + lparm,kparm,NULL,svmModel,alpha); + kparm->kernel_type=kernel_type_org; + svmModel->kernel_parm.kernel_type=kernel_type_org; + /* Always add weight vector, in case part of the kernel is + linear. If not, ignore the weight vector since its + content is bogus. */ + add_weight_vector_to_linear_model(svmModel); + sm->svm_model=svmModel; + sm->w=svmModel->lin_weights; /* short cut to weight vector */ + optcount++; + /* keep track of when each constraint was last + active. constraints marked with -1 are not updated */ + for(j=0;j-1) && (alpha[j] != 0)) + alphahist[j]=optcount; + if(struct_verbosity>=2) rt_opt+=MAX(get_runtime()-rt2,0); + + /* Check if some of the linear constraints have not been + active in a while. Those constraints are then removed to + avoid bloating the working set beyond necessity. */ + if(struct_verbosity>=3) + printf("Reducing working set...");fflush(stdout); + remove_inactive_constraints(&cset,alpha,optcount,alphahist,50); + if(struct_verbosity>=3) + printf("done. "); + } + else { + free_svector(lhs); + } + + if(struct_verbosity>=1) + printf("(NumConst=%d, SV=%ld, CEps=%.4f, QPEps=%.4f)\n",cset.m, + svmModel->sv_num-1,ceps,svmModel->maxdiff); + + rt_total+=MAX(get_runtime()-rt1,0); + + } while(cached_constraint || (ceps > sparm->epsilon) || + finalize_iteration(ceps,cached_constraint,sample,sm,cset,alpha,sparm) + ); + + + if(struct_verbosity>=1) { + printf("Final epsilon on KKT-Conditions: %.5f\n", + MAX(svmModel->maxdiff,ceps)); + + slack=0; + for(j=0;jkernel_type == LINEAR) + modellength=model_length_n(svmModel); + else + modellength=model_length_s(svmModel); + dualitygap=(0.5*modellength*modellength+sparm->C*viol) + -(alphasum-0.5*modellength*modellength); + + printf("Upper bound on duality gap: %.5f\n", dualitygap); + printf("Dual objective value: dval=%.5f\n", + alphasum-0.5*modellength*modellength); + printf("Primal objective value: pval=%.5f\n", + 0.5*modellength*modellength+sparm->C*viol); + printf("Total number of constraints in final working set: %i (of %i)\n",(int)cset.m,(int)totconstraints); + printf("Number of iterations: %d\n",numIt); + printf("Number of calls to 'find_most_violated_constraint': %ld\n",argmax_count); + printf("Number of SV: %ld \n",svmModel->sv_num-1); + printf("Norm of weight vector: |w|=%.5f\n",modellength); + printf("Value of slack variable (on working set): xi=%.5f\n",slack); + printf("Value of slack variable (global): xi=%.5f\n",viol); + printf("Norm of longest difference vector: ||Psi(x,y)-Psi(x,ybar)||=%.5f\n", + length_of_longest_document_vector(cset.lhs,cset.m,kparm)); + if(struct_verbosity>=2) + printf("Runtime in cpu-seconds: %.2f (%.2f%% for QP, %.2f%% for kernel, %.2f%% for Argmax, %.2f%% for Psi, %.2f%% for init, %.2f%% for cache update, %.2f%% for cache const, %.2f%% for cache add (incl. %.2f%% for sum))\n", + rt_total/100.0, (100.0*rt_opt)/rt_total, (100.0*rt_kernel)/rt_total, + (100.0*rt_viol)/rt_total, (100.0*rt_psi)/rt_total, + (100.0*rt_init)/rt_total,(100.0*rt_cacheupdate)/rt_total, + (100.0*rt_cacheconst)/rt_total,(100.0*rt_cacheadd)/rt_total, + (100.0*rt_cachesum)/rt_total); + else if(struct_verbosity==1) + printf("Runtime in cpu-seconds: %.2f\n",rt_total/100.0); + } + if(ccache) { + long cnum=0; + CCACHEELEM *celem; + for(i=0;iconstlist[i];celem;celem=celem->next) + cnum++; + printf("Final number of constraints in cache: %ld\n",cnum); + } + if(struct_verbosity>=4) + printW(sm->w,sizePsi,n,lparm->svm_c); + + if(svmModel) { + sm->svm_model=copy_model(svmModel); + sm->w=sm->svm_model->lin_weights; /* short cut to weight vector */ + free_model(svmModel,0); + } + + print_struct_learning_stats(sample,sm,cset,alpha,sparm); + + if(lhs_n) + free_nvector(lhs_n); + if(ccache) + free_constraint_cache(ccache); + for(i=0;igram_matrix) + free_matrix(kparm->gram_matrix); +} + + +void find_most_violated_constraint(SVECTOR **fydelta, double *rhs, + EXAMPLE *ex, SVECTOR *fycached, long n, + STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, + double *rt_viol, double *rt_psi, + long *argmax_count) + /* returns fydelta=fy-fybar and rhs scalar value that correspond + to the most violated constraint for example ex */ +{ + double rt2=0; + LABEL ybar; + SVECTOR *fybar, *fy; + double factor,lossval; + + if(struct_verbosity>=2) rt2=get_runtime(); + (*argmax_count)++; + if(sparm->loss_type == SLACK_RESCALING) + ybar=find_most_violated_constraint_slackrescaling(ex->x,ex->y,sm,sparm); + else + ybar=find_most_violated_constraint_marginrescaling(ex->x,ex->y,sm,sparm); + if(struct_verbosity>=2) (*rt_viol)+=MAX(get_runtime()-rt2,0); + + if(empty_label(ybar)) { + printf("ERROR: empty label was returned for example\n"); + /* exit(1); */ + /* continue; */ + } + + /**** get psi(x,y) and psi(x,ybar) ****/ + if(struct_verbosity>=2) rt2=get_runtime(); + if(fycached) + fy=copy_svector(fycached); + else + fy=psi(ex->x,ex->y,sm,sparm); + fybar=psi(ex->x,ybar,sm,sparm); + if(struct_verbosity>=2) (*rt_psi)+=MAX(get_runtime()-rt2,0); + lossval=loss(ex->y,ybar,sparm); + free_label(ybar); + + /**** scale feature vector and margin by loss ****/ + if(sparm->loss_type == SLACK_RESCALING) + factor=lossval/n; + else /* do not rescale vector for */ + factor=1.0/n; /* margin rescaling loss type */ + mult_svector_list(fy,factor); + mult_svector_list(fybar,-factor); + append_svector_list(fybar,fy); /* compute fy-fybar */ + + (*fydelta)=fybar; + (*rhs)=lossval/n; +} + + +void remove_inactive_constraints(CONSTSET *cset, double *alpha, + long currentiter, long *alphahist, + long mininactive) + /* removes the constraints from cset (and alpha) for which + alphahist indicates that they have not been active for at + least mininactive iterations */ + +{ + long i,m; + + m=0; + for(i=0;im;i++) { + if((alphahist[i]<0) || ((currentiter-alphahist[i]) < mininactive)) { + /* keep constraints that are marked as -1 or which have recently + been active */ + cset->lhs[m]=cset->lhs[i]; + cset->lhs[m]->docnum=m; + cset->rhs[m]=cset->rhs[i]; + alpha[m]=alpha[i]; + alphahist[m]=alphahist[i]; + m++; + } + else { + free_example(cset->lhs[i],1); + } + } + if(cset->m != m) { + cset->m=m; + cset->lhs=(DOC **)realloc(cset->lhs,sizeof(DOC *)*cset->m); + cset->rhs=(double *)realloc(cset->rhs,sizeof(double)*cset->m); + /* alpha=realloc(alpha,sizeof(double)*cset->m); */ + /* alphahist=realloc(alphahist,sizeof(long)*cset->m); */ + } +} + + +MATRIX *init_kernel_matrix(CONSTSET *cset, KERNEL_PARM *kparm) + /* assigns a kernelid to each constraint in cset and creates the + corresponding kernel matrix. */ +{ + int i,j; + double kval; + MATRIX *matrix; + + /* assign kernel id to each new constraint */ + for(i=0;im;i++) + cset->lhs[i]->kernelid=i; + + /* allocate kernel matrix as necessary */ + matrix=create_matrix(i+50,i+50); + + for(j=0;jm;j++) { + for(i=j;im;i++) { + kval=kernel(kparm,cset->lhs[j],cset->lhs[i]); + matrix->element[j][i]=kval; + matrix->element[i][j]=kval; + } + } + return(matrix); +} + +MATRIX *update_kernel_matrix(MATRIX *matrix, int newpos, CONSTSET *cset, + KERNEL_PARM *kparm) + /* assigns new kernelid to constraint in position newpos and + fills the corresponding part of the kernel matrix */ +{ + int i,maxkernelid=0,newid; + double kval; + double *used; + + /* find free kernelid to assign to new constraint */ + for(i=0;im;i++) + if(i != newpos) + maxkernelid=MAX(maxkernelid,cset->lhs[i]->kernelid); + used=create_nvector(maxkernelid+2); + clear_nvector(used,maxkernelid+2); + for(i=0;im;i++) + if(i != newpos) + used[cset->lhs[i]->kernelid]=1; + for(newid=0;used[newid];newid++); + free_nvector(used); + cset->lhs[newpos]->kernelid=newid; + + /* extend kernel matrix if necessary */ + maxkernelid=MAX(maxkernelid,newid); + if((!matrix) || (maxkernelid>=matrix->m)) + matrix=realloc_matrix(matrix,maxkernelid+50,maxkernelid+50); + + for(i=0;im;i++) { + kval=kernel(kparm,cset->lhs[newpos],cset->lhs[i]); + matrix->element[newid][cset->lhs[i]->kernelid]=kval; + matrix->element[cset->lhs[i]->kernelid][newid]=kval; + } + return(matrix); +} + +CCACHE *create_constraint_cache(SAMPLE sample, STRUCT_LEARN_PARM *sparm, + STRUCTMODEL *sm) + /* create new constraint cache for training set */ +{ + long n=sample.n; + EXAMPLE *ex=sample.examples; + CCACHE *ccache; + int i; + + ccache=(CCACHE *)my_malloc(sizeof(CCACHE)); + ccache->n=n; + ccache->sm=sm; + ccache->constlist=(CCACHEELEM **)my_malloc(sizeof(CCACHEELEM *)*n); + ccache->avg_viol_gain=(double *)my_malloc(sizeof(double)*n); + ccache->changed=(int *)my_malloc(sizeof(int)*n); + for(i=0;iconstlist[i]=(CCACHEELEM *)my_malloc(sizeof(CCACHEELEM)); + ccache->constlist[i]->fydelta=create_svector_n(NULL,0,NULL,1); + ccache->constlist[i]->rhs=loss(ex[i].y,ex[i].y,sparm)/n; + ccache->constlist[i]->viol=0; + ccache->constlist[i]->next=NULL; + ccache->avg_viol_gain[i]=0; + ccache->changed[i]=0; + } + return(ccache); +} + +void free_constraint_cache(CCACHE *ccache) + /* frees all memory allocated for constraint cache */ +{ + CCACHEELEM *celem,*next; + int i; + for(i=0; in; i++) { + celem=ccache->constlist[i]; + while(celem) { + free_svector(celem->fydelta); + next=celem->next; + free(celem); + celem=next; + } + } + free(ccache->constlist); + free(ccache->avg_viol_gain); + free(ccache->changed); + free(ccache); +} + +double add_constraint_to_constraint_cache(CCACHE *ccache, MODEL *svmModel, int exnum, SVECTOR *fydelta, double rhs, double gainthresh, int maxconst, double *rt_cachesum) + /* add new constraint fydelta*w>rhs for example exnum to cache, + if it is more violated (by gainthresh) than the currently most + violated constraint in cache. if this grows the number of + cached constraints for this example beyond maxconst, then the + least recently used constraint is deleted. the function + assumes that update_constraint_cache_for_model has been + run. */ +{ + double viol,viol_gain,viol_gain_trunc; + double dist_ydelta; + DOC *doc_fydelta; + SVECTOR *fydelta_new; + CCACHEELEM *celem; + int cnum; + double rt2=0; + + /* compute violation of new constraint */ + doc_fydelta=create_example(1,0,1,1,fydelta); + dist_ydelta=classify_example(svmModel,doc_fydelta); + free_example(doc_fydelta,0); + viol=rhs-dist_ydelta; + viol_gain=viol-ccache->constlist[exnum]->viol; + viol_gain_trunc=viol-MAX(ccache->constlist[exnum]->viol,0); + ccache->avg_viol_gain[exnum]=viol_gain; + + /* check if violation of new constraint is larger than that of the + best cache element */ + if(viol_gain > gainthresh) { + fydelta_new=fydelta; + if(struct_verbosity>=2) rt2=get_runtime(); + if(svmModel->kernel_parm.kernel_type == LINEAR) { + if(COMPACT_CACHED_VECTORS == 1) { /* eval sum for linear */ + fydelta_new=add_list_sort_ss_r(fydelta,COMPACT_ROUNDING_THRESH); + free_svector(fydelta); + } + else if(COMPACT_CACHED_VECTORS == 2) { + fydelta_new=add_list_ss_r(fydelta,COMPACT_ROUNDING_THRESH); + free_svector(fydelta); + } + else if(COMPACT_CACHED_VECTORS == 3) { + fydelta_new=add_list_ns_r(fydelta,COMPACT_ROUNDING_THRESH); + free_svector(fydelta); + } + } + if(struct_verbosity>=2) (*rt_cachesum)+=MAX(get_runtime()-rt2,0); + celem=ccache->constlist[exnum]; + ccache->constlist[exnum]=(CCACHEELEM *)my_malloc(sizeof(CCACHEELEM)); + ccache->constlist[exnum]->next=celem; + ccache->constlist[exnum]->fydelta=fydelta_new; + ccache->constlist[exnum]->rhs=rhs; + ccache->constlist[exnum]->viol=viol; + ccache->changed[exnum]+=2; + + /* remove last constraint in list, if list is longer than maxconst */ + cnum=2; + for(celem=ccache->constlist[exnum];celem && celem->next && celem->next->next;celem=celem->next) + cnum++; + if(cnum>maxconst) { + free_svector(celem->next->fydelta); + free(celem->next); + celem->next=NULL; + } + } + else { + free_svector(fydelta); + } + return(viol_gain_trunc); +} + + +void update_constraint_cache_for_model(CCACHE *ccache, MODEL *svmModel) + /* update the violation scores according to svmModel and find the + most violated constraints for each example */ +{ + int i; + long progress=0; + double maxviol=0; + double dist_ydelta; + DOC *doc_fydelta; + CCACHEELEM *celem,*prev,*maxviol_celem,*maxviol_prev; + + doc_fydelta=create_example(1,0,1,1,NULL); + for(i=0; in; i++) { /*** example loop ***/ + + if(struct_verbosity>=3) + print_percent_progress(&progress,ccache->n,10,"+"); + + maxviol=0; + prev=NULL; + maxviol_celem=NULL; + maxviol_prev=NULL; + for(celem=ccache->constlist[i];celem;celem=celem->next) { + doc_fydelta->fvec=celem->fydelta; + dist_ydelta=classify_example(svmModel,doc_fydelta); + celem->viol=celem->rhs-dist_ydelta; + if((celem->viol > maxviol) || (!maxviol_celem)) { + maxviol=celem->viol; + maxviol_celem=celem; + maxviol_prev=prev; + } + prev=celem; + } + ccache->changed[i]=0; + if(maxviol_prev) { /* move max violated constraint to the top of list */ + maxviol_prev->next=maxviol_celem->next; + maxviol_celem->next=ccache->constlist[i]; + ccache->constlist[i]=maxviol_celem; + ccache->changed[i]=1; + } + } + free_example(doc_fydelta,0); +} + +double compute_violation_of_constraint_in_cache(CCACHE *ccache, double thresh) + /* computes the violation of the most violated joint constraint + in cache. assumes that update_constraint_cache_for_model has + been run. */ + /* NOTE: This function assumes that loss(y,y')>=0, and it is most + efficient when loss(y,y)=0. */ +{ + double sumviol=0; + int i,n=ccache->n; + + /**** add all maximal violations ****/ + for(i=0; iconstlist[i]->viol*n > thresh) + sumviol+=ccache->constlist[i]->viol; + } + + return(sumviol); +} + +double find_most_violated_joint_constraint_in_cache(CCACHE *ccache, double thresh, double *lhs_n, SVECTOR **lhs, double *rhs) + /* constructs most violated joint constraint from cache. assumes + that update_constraint_cache_for_model has been run. */ + /* NOTE: For kernels, this function returns only a shallow copy + of the Psi vectors in lhs. So, do not use a deep free, + otherwise the case becomes invalid. */ + /* NOTE: This function assumes that loss(y,y')>=0, and it is most + efficient when loss(y,y)=0. */ +{ + double sumviol=0; + int i,n=ccache->n; + SVECTOR *fydelta; + + (*lhs)=NULL; + (*rhs)=0; + if(lhs_n) { /* linear case? */ + clear_nvector(lhs_n,ccache->sm->sizePsi); + } + + /**** add all maximally violated fydelta to joint constraint ****/ + for(i=0; iconstlist[i]->viol*n > thresh)) { + /* get most violating fydelta=fy-fybar for example i from cache */ + fydelta=ccache->constlist[i]->fydelta; + (*rhs)+=ccache->constlist[i]->rhs; + sumviol+=ccache->constlist[i]->viol; + if(lhs_n) { /* linear case? */ + add_list_n_ns(lhs_n,fydelta,1.0); /* add fy-fybar to sum */ + } + else { /* add fy-fybar to vector list */ + fydelta=copy_svector(fydelta); + append_svector_list(fydelta,(*lhs)); + (*lhs)=fydelta; + } + } + } + /* create sparse vector from dense sum */ + if(lhs_n) /* linear case? */ + (*lhs)=create_svector_n_r(lhs_n,ccache->sm->sizePsi,NULL,1.0, + COMPACT_ROUNDING_THRESH); + + return(sumviol); +} + diff --git a/src/classifier/svm/svm_struct/svm_struct_learn.h b/src/classifier/svm/svm_struct/svm_struct_learn.h new file mode 100755 index 0000000..20a37c3 --- /dev/null +++ b/src/classifier/svm/svm_struct/svm_struct_learn.h @@ -0,0 +1,101 @@ +/***********************************************************************/ +/* */ +/* svm_struct_learn.h */ +/* */ +/* Basic algorithm for learning structured outputs (e.g. parses, */ +/* sequences, multi-label classification) with a Support Vector */ +/* Machine. */ +/* */ +/* Author: Thorsten Joachims */ +/* Date: 03.07.04 */ +/* */ +/* Copyright (c) 2004 Thorsten Joachims - All rights reserved */ +/* */ +/* This software is available for non-commercial use only. It must */ +/* not be modified and distributed without prior permission of the */ +/* author. The author is not responsible for implications from the */ +/* use of this software. */ +/* */ +/***********************************************************************/ + +#ifndef SVM_STRUCT_LEARN +#define SVM_STRUCT_LEARN + +#ifdef __cplusplus +extern "C" { +#endif +#include "../svm_light/svm_common.h" +#include "../svm_light/svm_learn.h" +#include "../svm_struct_api_types.h" +#include "svm_struct_common.h" + +#define SLACK_RESCALING 1 +#define MARGIN_RESCALING 2 + +#define NSLACK_ALG 0 +#define NSLACK_SHRINK_ALG 1 +#define ONESLACK_PRIMAL_ALG 2 +#define ONESLACK_DUAL_ALG 3 +#define ONESLACK_DUAL_CACHE_ALG 4 + +typedef struct ccacheelem { + SVECTOR *fydelta; /* left hand side of constraint */ + double rhs; /* right hand side of constraint */ + double viol; /* violation score under current model */ + struct ccacheelem *next; /* next in linked list */ +} CCACHEELEM; + +typedef struct ccache { + int n; /* number of examples */ + CCACHEELEM **constlist; /* array of pointers to constraint lists + - one list per example. The first + element of the list always points to + the most violated constraint under the + current model for each example. */ + STRUCTMODEL *sm; /* pointer to model */ + double *avg_viol_gain; /* array of average values by which + violation of globally most violated + constraint exceeds that of most violated + constraint in cache */ + int *changed; /* array of boolean indicating whether the + most violated ybar change compared to + last iter? */ +} CCACHE; + +void find_most_violated_constraint(SVECTOR **fydelta, double *lossval, + EXAMPLE *ex, SVECTOR *fycached, long n, + STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, + double *rt_viol, double *rt_psi, + long *argmax_count); +CCACHE *create_constraint_cache(SAMPLE sample, STRUCT_LEARN_PARM *sparm, + STRUCTMODEL *sm); +void free_constraint_cache(CCACHE *ccache); +double add_constraint_to_constraint_cache(CCACHE *ccache, MODEL *svmModel, + int exnum, SVECTOR *fydelta, + double rhs, double gainthresh, + int maxconst, double *rt_cachesum); +void update_constraint_cache_for_model(CCACHE *ccache, MODEL *svmModel); +double compute_violation_of_constraint_in_cache(CCACHE *ccache, double thresh); +double find_most_violated_joint_constraint_in_cache(CCACHE *ccache, + double thresh, + double *lhs_n, + SVECTOR **lhs, double *rhs); +void svm_learn_struct(SAMPLE sample, STRUCT_LEARN_PARM *sparm, + LEARN_PARM *lparm, KERNEL_PARM *kparm, STRUCTMODEL *sm, + int alg_type); +void svm_learn_struct_joint(SAMPLE sample, STRUCT_LEARN_PARM *sparm, + LEARN_PARM *lparm, KERNEL_PARM *kparm, + STRUCTMODEL *sm, int alg_type); +void svm_learn_struct_joint_custom(SAMPLE sample, STRUCT_LEARN_PARM *sparm, + LEARN_PARM *lparm, KERNEL_PARM *kparm, + STRUCTMODEL *sm); +void remove_inactive_constraints(CONSTSET *cset, double *alpha, long i, + long *alphahist, long mininactive); +MATRIX *init_kernel_matrix(CONSTSET *cset, KERNEL_PARM *kparm); +MATRIX *update_kernel_matrix(MATRIX *matrix, int newpos, CONSTSET *cset, + KERNEL_PARM *kparm); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/classifier/svm/svm_struct/svm_struct_main.c b/src/classifier/svm/svm_struct/svm_struct_main.c new file mode 100755 index 0000000..094b29f --- /dev/null +++ b/src/classifier/svm/svm_struct/svm_struct_main.c @@ -0,0 +1,417 @@ +/***********************************************************************/ +/* */ +/* svm_struct_main.c */ +/* */ +/* Command line interface to the alignment learning module of the */ +/* Support Vector Machine. */ +/* */ +/* Author: Thorsten Joachims */ +/* Date: 03.07.04 */ +/* */ +/* Copyright (c) 2004 Thorsten Joachims - All rights reserved */ +/* */ +/* This software is available for non-commercial use only. It must */ +/* not be modified and distributed without prior permission of the */ +/* author. The author is not responsible for implications from the */ +/* use of this software. */ +/* */ +/***********************************************************************/ + + +/* the following enables you to use svm-learn out of C++ */ +#ifdef __cplusplus +extern "C" { +#endif +#include "../svm_light/svm_common.h" +#include "../svm_light/svm_learn.h" +#ifdef __cplusplus +} +#endif +# include "svm_struct_learn.h" +# include "svm_struct_common.h" +# include "../svm_struct_api.h" + +#include +#include +#include +/* } */ + +char trainfile[200]; /* file with training examples */ +char modelfile[200]; /* file for resulting classifier */ + +void read_input_parameters(int, char **, char *, char *,long *, long *, + STRUCT_LEARN_PARM *, LEARN_PARM *, KERNEL_PARM *, + int *); +void wait_any_key(); +void print_help(); + + +int main (int argc, char* argv[]) +{ + SAMPLE sample; /* training sample */ + LEARN_PARM learn_parm; + KERNEL_PARM kernel_parm; + STRUCT_LEARN_PARM struct_parm; + STRUCTMODEL structmodel; + int alg_type; + + svm_struct_learn_api_init(argc,argv); + + read_input_parameters(argc,argv,trainfile,modelfile,&verbosity, + &struct_verbosity,&struct_parm,&learn_parm, + &kernel_parm,&alg_type); + + if(struct_verbosity>=1) { + printf("Reading training examples..."); fflush(stdout); + } + /* read the training examples */ + sample=read_struct_examples(trainfile,&struct_parm); + if(struct_verbosity>=1) { + printf("done\n"); fflush(stdout); + } + + /* Do the learning and return structmodel. */ + if(alg_type == 0) + svm_learn_struct(sample,&struct_parm,&learn_parm,&kernel_parm,&structmodel,NSLACK_ALG); + else if(alg_type == 1) + svm_learn_struct(sample,&struct_parm,&learn_parm,&kernel_parm,&structmodel,NSLACK_SHRINK_ALG); + else if(alg_type == 2) + svm_learn_struct_joint(sample,&struct_parm,&learn_parm,&kernel_parm,&structmodel,ONESLACK_PRIMAL_ALG); + else if(alg_type == 3) + svm_learn_struct_joint(sample,&struct_parm,&learn_parm,&kernel_parm,&structmodel,ONESLACK_DUAL_ALG); + else if(alg_type == 4) + svm_learn_struct_joint(sample,&struct_parm,&learn_parm,&kernel_parm,&structmodel,ONESLACK_DUAL_CACHE_ALG); + else if(alg_type == 9) + svm_learn_struct_joint_custom(sample,&struct_parm,&learn_parm,&kernel_parm,&structmodel); + else + exit(1); + + /* Warning: The model contains references to the original data 'docs'. + If you want to free the original data, and only keep the model, you + have to make a deep copy of 'model'. */ + if(struct_verbosity>=1) { + printf("Writing learned model...");fflush(stdout); + } + write_struct_model(modelfile,&structmodel,&struct_parm); + if(struct_verbosity>=1) { + printf("done\n");fflush(stdout); + } + + free_struct_sample(sample); + free_struct_model(structmodel); + + svm_struct_learn_api_exit(); + + return 0; +} + +/*---------------------------------------------------------------------------*/ + +void read_input_parameters(int argc,char *argv[],char *trainfile, + char *modelfile, + long *verbosity,long *struct_verbosity, + STRUCT_LEARN_PARM *struct_parm, + LEARN_PARM *learn_parm, KERNEL_PARM *kernel_parm, + int *alg_type) +{ + long i; + char type[100]; + + /* set default */ + (*alg_type)=DEFAULT_ALG_TYPE; + struct_parm->C=-0.01; + struct_parm->slack_norm=1; + struct_parm->epsilon=DEFAULT_EPS; + struct_parm->custom_argc=0; + struct_parm->loss_function=DEFAULT_LOSS_FCT; + struct_parm->loss_type=DEFAULT_RESCALING; + struct_parm->newconstretrain=100; + struct_parm->ccache_size=5; + struct_parm->batch_size=100; + + strcpy (modelfile, "svm_struct_model"); + strcpy (learn_parm->predfile, "trans_predictions"); + strcpy (learn_parm->alphafile, ""); + (*verbosity)=0;/*verbosity for svm_light*/ + (*struct_verbosity)=1; /*verbosity for struct learning portion*/ + learn_parm->biased_hyperplane=1; + learn_parm->remove_inconsistent=0; + learn_parm->skip_final_opt_check=0; + learn_parm->svm_maxqpsize=10; + learn_parm->svm_newvarsinqp=0; + learn_parm->svm_iter_to_shrink=-9999; + learn_parm->maxiter=100000; + learn_parm->kernel_cache_size=40; + learn_parm->svm_c=99999999; /* overridden by struct_parm->C */ + learn_parm->eps=0.001; /* overridden by struct_parm->epsilon */ + learn_parm->transduction_posratio=-1.0; + learn_parm->svm_costratio=1.0; + learn_parm->svm_costratio_unlab=1.0; + learn_parm->svm_unlabbound=1E-5; + learn_parm->epsilon_crit=0.001; + learn_parm->epsilon_a=1E-10; /* changed from 1e-15 */ + learn_parm->compute_loo=0; + learn_parm->rho=1.0; + learn_parm->xa_depth=0; + kernel_parm->kernel_type=0; + kernel_parm->poly_degree=3; + kernel_parm->rbf_gamma=1.0; + kernel_parm->coef_lin=1; + kernel_parm->coef_const=1; + strcpy(kernel_parm->custom,"empty"); + strcpy(type,"c"); + + for(i=1;(ialphafile,argv[i]); break; + case 'c': i++; struct_parm->C=atof(argv[i]); break; + case 'p': i++; struct_parm->slack_norm=atol(argv[i]); break; + case 'e': i++; struct_parm->epsilon=atof(argv[i]); break; + case 'k': i++; struct_parm->newconstretrain=atol(argv[i]); break; + case 'h': i++; learn_parm->svm_iter_to_shrink=atol(argv[i]); break; + case '#': i++; learn_parm->maxiter=atol(argv[i]); break; + case 'm': i++; learn_parm->kernel_cache_size=atol(argv[i]); break; + case 'w': i++; (*alg_type)=atol(argv[i]); break; + case 'o': i++; struct_parm->loss_type=atol(argv[i]); break; + case 'n': i++; learn_parm->svm_newvarsinqp=atol(argv[i]); break; + case 'q': i++; learn_parm->svm_maxqpsize=atol(argv[i]); break; + case 'l': i++; struct_parm->loss_function=atol(argv[i]); break; + case 'f': i++; struct_parm->ccache_size=atol(argv[i]); break; + case 'b': i++; struct_parm->batch_size=atof(argv[i]); break; + case 't': i++; kernel_parm->kernel_type=atol(argv[i]); break; + case 'd': i++; kernel_parm->poly_degree=atol(argv[i]); break; + case 'g': i++; kernel_parm->rbf_gamma=atof(argv[i]); break; + case 's': i++; kernel_parm->coef_lin=atof(argv[i]); break; + case 'r': i++; kernel_parm->coef_const=atof(argv[i]); break; + case 'u': i++; strcpy(kernel_parm->custom,argv[i]); break; + case '-': strcpy(struct_parm->custom_argv[struct_parm->custom_argc++],argv[i]);i++; strcpy(struct_parm->custom_argv[struct_parm->custom_argc++],argv[i]);break; + case 'v': i++; (*struct_verbosity)=atol(argv[i]); break; + case 'y': i++; (*verbosity)=atol(argv[i]); break; + default: printf("\nUnrecognized option %s!\n\n",argv[i]); + print_help(); + exit(0); + } + } + if(i>=argc) { + printf("\nNot enough input parameters!\n\n"); + wait_any_key(); + print_help(); + exit(0); + } + strcpy (trainfile, argv[i]); + if((i+1)svm_iter_to_shrink == -9999) { + learn_parm->svm_iter_to_shrink=100; + } + + if((learn_parm->skip_final_opt_check) + && (kernel_parm->kernel_type == LINEAR)) { + printf("\nIt does not make sense to skip the final optimality check for linear kernels.\n\n"); + learn_parm->skip_final_opt_check=0; + } + if((learn_parm->skip_final_opt_check) + && (learn_parm->remove_inconsistent)) { + printf("\nIt is necessary to do the final optimality check when removing inconsistent \nexamples.\n"); + wait_any_key(); + print_help(); + exit(0); + } + if((learn_parm->svm_maxqpsize<2)) { + printf("\nMaximum size of QP-subproblems not in valid range: %ld [2..]\n",learn_parm->svm_maxqpsize); + wait_any_key(); + print_help(); + exit(0); + } + if((learn_parm->svm_maxqpsizesvm_newvarsinqp)) { + printf("\nMaximum size of QP-subproblems [%ld] must be larger than the number of\n",learn_parm->svm_maxqpsize); + printf("new variables [%ld] entering the working set in each iteration.\n",learn_parm->svm_newvarsinqp); + wait_any_key(); + print_help(); + exit(0); + } + if(learn_parm->svm_iter_to_shrink<1) { + printf("\nMaximum number of iterations for shrinking not in valid range: %ld [1,..]\n",learn_parm->svm_iter_to_shrink); + wait_any_key(); + print_help(); + exit(0); + } + if(struct_parm->C<0) { + printf("\nYou have to specify a value for the parameter '-c' (C>0)!\n\n"); + wait_any_key(); + print_help(); + exit(0); + } + if(((*alg_type) < 0) || (((*alg_type) > 5) && ((*alg_type) != 9))) { + printf("\nAlgorithm type must be either '0', '1', '2', '3', '4', or '9'!\n\n"); + wait_any_key(); + print_help(); + exit(0); + } + if(learn_parm->transduction_posratio>1) { + printf("\nThe fraction of unlabeled examples to classify as positives must\n"); + printf("be less than 1.0 !!!\n\n"); + wait_any_key(); + print_help(); + exit(0); + } + if(learn_parm->svm_costratio<=0) { + printf("\nThe COSTRATIO parameter must be greater than zero!\n\n"); + wait_any_key(); + print_help(); + exit(0); + } + if(struct_parm->epsilon<=0) { + printf("\nThe epsilon parameter must be greater than zero!\n\n"); + wait_any_key(); + print_help(); + exit(0); + } + if((struct_parm->ccache_size<=0) && ((*alg_type) == 4)) { + printf("\nThe cache size must be at least 1!\n\n"); + wait_any_key(); + print_help(); + exit(0); + } + if(((struct_parm->batch_size<=0) || (struct_parm->batch_size>100)) + && ((*alg_type) == 4)) { + printf("\nThe batch size must be in the interval ]0,100]!\n\n"); + wait_any_key(); + print_help(); + exit(0); + } + if((struct_parm->slack_norm<1) || (struct_parm->slack_norm>2)) { + printf("\nThe norm of the slacks must be either 1 (L1-norm) or 2 (L2-norm)!\n\n"); + wait_any_key(); + print_help(); + exit(0); + } + if((struct_parm->loss_type != SLACK_RESCALING) + && (struct_parm->loss_type != MARGIN_RESCALING)) { + printf("\nThe loss type must be either 1 (slack rescaling) or 2 (margin rescaling)!\n\n"); + wait_any_key(); + print_help(); + exit(0); + } + if(learn_parm->rho<0) { + printf("\nThe parameter rho for xi/alpha-estimates and leave-one-out pruning must\n"); + printf("be greater than zero (typically 1.0 or 2.0, see T. Joachims, Estimating the\n"); + printf("Generalization Performance of an SVM Efficiently, ICML, 2000.)!\n\n"); + wait_any_key(); + print_help(); + exit(0); + } + if((learn_parm->xa_depth<0) || (learn_parm->xa_depth>100)) { + printf("\nThe parameter depth for ext. xi/alpha-estimates must be in [0..100] (zero\n"); + printf("for switching to the conventional xa/estimates described in T. Joachims,\n"); + printf("Estimating the Generalization Performance of an SVM Efficiently, ICML, 2000.)\n"); + wait_any_key(); + print_help(); + exit(0); + } + + parse_struct_parameters(struct_parm); +} + +void wait_any_key() +{ + printf("\n(more)\n"); + (void)getc(stdin); +} + +void print_help() +{ + printf("\nSVM-struct learning module: %s, %s, %s\n",INST_NAME,INST_VERSION,INST_VERSION_DATE); + printf(" includes SVM-struct %s for learning complex outputs, %s\n",STRUCT_VERSION,STRUCT_VERSION_DATE); + printf(" includes SVM-light %s quadratic optimizer, %s\n",VERSION,VERSION_DATE); + copyright_notice(); + printf(" usage: svm_struct_learn [options] example_file model_file\n\n"); + printf("Arguments:\n"); + printf(" example_file-> file with training data\n"); + printf(" model_file -> file to store learned decision rule in\n"); + + printf("General Options:\n"); + printf(" -? -> this help\n"); + printf(" -v [0..3] -> verbosity level (default 1)\n"); + printf(" -y [0..3] -> verbosity level for svm_light (default 0)\n"); + printf("Learning Options:\n"); + printf(" -c float -> C: trade-off between training error\n"); + printf(" and margin (default 0.01)\n"); + printf(" -p [1,2] -> L-norm to use for slack variables. Use 1 for L1-norm,\n"); + printf(" use 2 for squared slacks. (default 1)\n"); + printf(" -o [1,2] -> Rescaling method to use for loss.\n"); + printf(" 1: slack rescaling\n"); + printf(" 2: margin rescaling\n"); + printf(" (default %d)\n",DEFAULT_RESCALING); + printf(" -l [0..] -> Loss function to use.\n"); + printf(" 0: zero/one loss\n"); + printf(" ?: see below in application specific options\n"); + printf(" (default %d)\n",DEFAULT_LOSS_FCT); + printf("Optimization Options (see [2][5]):\n"); + printf(" -w [0,..,9] -> choice of structural learning algorithm (default %d):\n",(int)DEFAULT_ALG_TYPE); + printf(" 0: n-slack algorithm described in [2]\n"); + printf(" 1: n-slack algorithm with shrinking heuristic\n"); + printf(" 2: 1-slack algorithm (primal) described in [5]\n"); + printf(" 3: 1-slack algorithm (dual) described in [5]\n"); + printf(" 4: 1-slack algorithm (dual) with constraint cache [5]\n"); + printf(" 9: custom algorithm in svm_struct_learn_custom.c\n"); + printf(" -e float -> epsilon: allow that tolerance for termination\n"); + printf(" criterion (default %f)\n",DEFAULT_EPS); + printf(" -k [1..] -> number of new constraints to accumulate before\n"); + printf(" recomputing the QP solution (default 100) (-w 0 and 1 only)\n"); + printf(" -f [5..] -> number of constraints to cache for each example\n"); + printf(" (default 5) (used with -w 4)\n"); + printf(" -b [1..100] -> percentage of training set for which to refresh cache\n"); + printf(" when no epsilon violated constraint can be constructed\n"); + printf(" from current cache (default 100%%) (used with -w 4)\n"); + printf("SVM-light Options for Solving QP Subproblems (see [3]):\n"); + printf(" -n [2..q] -> number of new variables entering the working set\n"); + printf(" in each svm-light iteration (default n = q). \n"); + printf(" Set n < q to prevent zig-zagging.\n"); + printf(" -m [5..] -> size of svm-light cache for kernel evaluations in MB\n"); + printf(" (default 40) (used only for -w 1 with kernels)\n"); + printf(" -h [5..] -> number of svm-light iterations a variable needs to be\n"); + printf(" optimal before considered for shrinking (default 100)\n"); + printf(" -# int -> terminate svm-light QP subproblem optimization, if no\n"); + printf(" progress after this number of iterations.\n"); + printf(" (default 100000)\n"); + printf("Kernel Options:\n"); + printf(" -t int -> type of kernel function:\n"); + printf(" 0: linear (default)\n"); + printf(" 1: polynomial (s a*b+c)^d\n"); + printf(" 2: radial basis function exp(-gamma ||a-b||^2)\n"); + printf(" 3: sigmoid tanh(s a*b + c)\n"); + printf(" 4: user defined kernel from kernel.h\n"); + printf(" -d int -> parameter d in polynomial kernel\n"); + printf(" -g float -> parameter gamma in rbf kernel\n"); + printf(" -s float -> parameter s in sigmoid/poly kernel\n"); + printf(" -r float -> parameter c in sigmoid/poly kernel\n"); + printf(" -u string -> parameter of user defined kernel\n"); + printf("Output Options:\n"); + printf(" -a string -> write all alphas to this file after learning\n"); + printf(" (in the same order as in the training set)\n"); + printf("Application-Specific Options:\n"); + print_struct_help(); + wait_any_key(); + + printf("\nMore details in:\n"); + printf("[1] T. Joachims, Learning to Align Sequences: A Maximum Margin Aproach.\n"); + printf(" Technical Report, September, 2003.\n"); + printf("[2] I. Tsochantaridis, T. Joachims, T. Hofmann, and Y. Altun, Large Margin\n"); + printf(" Methods for Structured and Interdependent Output Variables, Journal\n"); + printf(" of Machine Learning Research (JMLR), Vol. 6(Sep):1453-1484, 2005.\n"); + printf("[3] T. Joachims, Making Large-Scale SVM Learning Practical. Advances in\n"); + printf(" Kernel Methods - Support Vector Learning, B. Schölkopf and C. Burges and\n"); + printf(" A. Smola (ed.), MIT Press, 1999.\n"); + printf("[4] T. Joachims, Learning to Classify Text Using Support Vector\n"); + printf(" Machines: Methods, Theory, and Algorithms. Dissertation, Kluwer,\n"); + printf(" 2002.\n"); + printf("[5] T. Joachims, T. Finley, Chun-Nam Yu, Cutting-Plane Training of Structural\n"); + printf(" SVMs, Machine Learning Journal, to appear.\n"); +} + + + diff --git a/src/classifier/svm/svm_struct_api.c b/src/classifier/svm/svm_struct_api.c new file mode 100755 index 0000000..1ab1efc --- /dev/null +++ b/src/classifier/svm/svm_struct_api.c @@ -0,0 +1,615 @@ +/***********************************************************************/ +/* */ +/* svm_struct_api.c */ +/* */ +/* Definition of API for attaching implementing SVM learning of */ +/* structures (e.g. parsing, multi-label classification, HMM) */ +/* */ +/* Author: Thorsten Joachims */ +/* Date: 03.07.04 */ +/* */ +/* Copyright (c) 2004 Thorsten Joachims - All rights reserved */ +/* */ +/* This software is available for non-commercial use only. It must */ +/* not be modified and distributed without prior permission of the */ +/* author. The author is not responsible for implications from the */ +/* use of this software. */ +/* */ +/***********************************************************************/ + +#include "svm_struct_api.h" +#include "svm_struct/svm_struct_common.h" +#include +#include + +void svm_struct_learn_api_init(int argc, char *argv[]) { + /* Called in learning part before anything else is done to allow + any initializations that might be necessary. */ +} + +void svm_struct_learn_api_exit() { + /* Called in learning part at the very end to allow any clean-up + that might be necessary. */ +} + +void svm_struct_classify_api_init(int argc, char *argv[]) { + /* Called in prediction part before anything else is done to allow + any initializations that might be necessary. */ +} + +void svm_struct_classify_api_exit() { + /* Called in prediction part at the very end to allow any clean-up + that might be necessary. */ +} + +SAMPLE read_struct_examples(char *file, STRUCT_LEARN_PARM *sparm) { + /* Reads training examples and returns them in sample. The number of + examples must be written into sample.n */ + SAMPLE sample; /* sample */ + EXAMPLE *examples; + long n; /* number of examples */ + DOC **docs; /* examples in original SVM-light format */ + double *target; + long totwords, i, num_classes = 0; + + /* Using the read_documents function from SVM-light */ + read_documents(file, &docs, &target, &totwords, &n); + examples = (EXAMPLE *)my_malloc(sizeof(EXAMPLE) * n); + for (i = 0; i < n; i++) /* find highest class label */ + if (num_classes < (target[i] + 0.1)) + num_classes = target[i] + 0.1; + for (i = 0; i < n; i++) /* make sure all class labels are positive */ + if (target[i] < 1) { + printf("\nERROR: The class label '%lf' of example number %ld is not " + "greater than '1'!\n", + target[i], i + 1); + exit(1); + } + for (i = 0; i < n; i++) { /* copy docs over into new datastructure */ + examples[i].x.doc = docs[i]; + examples[i].y.class_ = target[i] + 0.1; + examples[i].y.scores = NULL; + examples[i].y.num_classes_ = num_classes; + } + free(target); + free(docs); + sample.n = n; + sample.examples = examples; + + if (struct_verbosity >= 0) + printf(" (%d examples) ", sample.n); + return (sample); +} + +void init_struct_model(SAMPLE sample, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, + LEARN_PARM *lparm, KERNEL_PARM *kparm) { + /* Initialize structmodel sm. The weight vector w does not need to be + initialized, but you need to provide the maximum size of the + feature space in sizePsi. This is the maximum number of different + weights that can be learned. Later, the weight vector w will + contain the learned weights for the model. */ + long i, totwords = 0; + WORD *w; + + sparm->num_classes_ = 1; + for (i = 0; i < sample.n; i++) /* find highest class label */ + if (sparm->num_classes_ < (sample.examples[i].y.class_ + 0.1)) + sparm->num_classes_ = sample.examples[i].y.class_ + 0.1; + for (i = 0; i < sample.n; i++) /* find highest feature number */ + for (w = sample.examples[i].x.doc->fvec->words; w->wnum; w++) + if (totwords < w->wnum) + totwords = w->wnum; + sparm->num_features = totwords; + if (struct_verbosity >= 0) + printf("Training set properties: %d features, %d classes\n", + sparm->num_features, sparm->num_classes_); + sm->sizePsi = sparm->num_features * sparm->num_classes_; + if (struct_verbosity >= 2) + printf("Size of Phi: %ld\n", sm->sizePsi); +} + +CONSTSET init_struct_constraints(SAMPLE sample, STRUCTMODEL *sm, + STRUCT_LEARN_PARM *sparm) { + /* Initializes the optimization problem. Typically, you do not need + to change this function, since you want to start with an empty + set of constraints. However, if for example you have constraints + that certain weights need to be positive, you might put that in + here. The constraints are represented as lhs[i]*w >= rhs[i]. lhs + is an array of feature vectors, rhs is an array of doubles. m is + the number of constraints. The function returns the initial + set of constraints. */ + CONSTSET c; + long sizePsi = sm->sizePsi; + long i; + WORD words[2]; + + if (1) { /* normal case: start with empty set of constraints */ + c.lhs = NULL; + c.rhs = NULL; + c.m = 0; + } else { /* add constraints so that all learned weights are + positive. WARNING: Currently, they are positive only up to + precision epsilon set by -e. */ + c.lhs = my_malloc(sizeof(DOC *) * sizePsi); + c.rhs = my_malloc(sizeof(double) * sizePsi); + for (i = 0; i < sizePsi; i++) { + words[0].wnum = i + 1; + words[0].weight = 1.0; + words[1].wnum = 0; + /* the following slackid is a hack. we will run into problems, + if we have move than 1000000 slack sets (ie examples) */ + c.lhs[i] = create_example(i, 0, 1000000 + i, 1, + create_svector(words, NULL, 1.0)); + c.rhs[i] = 0.0; + } + } + return (c); +} + +LABEL classify_struct_example(PATTERN x, STRUCTMODEL *sm, + STRUCT_LEARN_PARM *sparm) { + /* Finds the label yhat for pattern x that scores the highest + according to the linear evaluation function in sm, especially the + weights sm.w. The returned label is taken as the prediction of sm + for the pattern x. The weights correspond to the features defined + by psi() and range from index 1 to index sm->sizePsi. If the + function cannot find a label, it shall return an empty label as + recognized by the function empty_label(y). */ + LABEL y; + DOC doc; + long class_, bestclass = -1, first = 1, j; + double score, bestscore = -1; + WORD *words; + + doc = *(x.doc); + y.scores = (double *)my_malloc(sizeof(double) * (sparm->num_classes_ + 1)); + y.num_classes_ = sparm->num_classes_; + words = doc.fvec->words; + for (j = 0; (words[j]).wnum != 0; j++) { /* Check if feature numbers */ + if ((words[j]).wnum > sparm->num_features) /* are not larger than in */ + (words[j]).wnum = 0; /* model. Remove feature if */ + } /* necessary. */ + for (class_ = 1; class_ <= sparm->num_classes_; class_++) { + y.class_ = class_; + doc.fvec = psi(x, y, sm, sparm); + score = classify_example(sm->svm_model, &doc); + free_svector(doc.fvec); + y.scores[class_] = score; + if ((bestscore < score) || (first)) { + bestscore = score; + bestclass = class_; + first = 0; + } + } + y.class_ = bestclass; + return (y); +} + +LABEL find_most_violated_constraint_slackrescaling(PATTERN x, LABEL y, + STRUCTMODEL *sm, + STRUCT_LEARN_PARM *sparm) { + /* Finds the label ybar for pattern x that that is responsible for + the most violated constraint for the slack rescaling + formulation. It has to take into account the scoring function in + sm, especially the weights sm.w, as well as the loss + function. The weights in sm.w correspond to the features defined + by psi() and range from index 1 to index sm->sizePsi. Most simple + is the case of the zero/one loss function. For the zero/one loss, + this function should return the highest scoring label ybar, if + ybar is unequal y; if it is equal to the correct label y, then + the function shall return the second highest scoring label. If + the function cannot find a label, it shall return an empty label + as recognized by the function empty_label(y). */ + LABEL ybar; + DOC doc; + long class_, bestclass = -1, first = 1; + double score, score_y, score_ybar, bestscore = -1; + + /* NOTE: This function could be made much more efficient by not + always computing a new PSI vector. */ + doc = *(x.doc); + doc.fvec = psi(x, y, sm, sparm); + score_y = classify_example(sm->svm_model, &doc); + free_svector(doc.fvec); + + ybar.scores = NULL; + ybar.num_classes_ = sparm->num_classes_; + for (class_ = 1; class_ <= sparm->num_classes_; class_++) { + ybar.class_ = class_; + doc.fvec = psi(x, ybar, sm, sparm); + score_ybar = classify_example(sm->svm_model, &doc); + free_svector(doc.fvec); + score = loss(y, ybar, sparm) * (1.0 - score_y + score_ybar); + if ((bestscore < score) || (first)) { + bestscore = score; + bestclass = class_; + first = 0; + } + } + if (bestclass == -1) + printf("ERROR: Only one class\n"); + ybar.class_ = bestclass; + if (struct_verbosity >= 3) + printf("[%ld:%.2f] ", bestclass, bestscore); + return (ybar); +} + +LABEL find_most_violated_constraint_marginrescaling(PATTERN x, LABEL y, + STRUCTMODEL *sm, + STRUCT_LEARN_PARM *sparm) { + /* Finds the label ybar for pattern x that that is responsible for + the most violated constraint for the margin rescaling + formulation. It has to take into account the scoring function in + sm, especially the weights sm.w, as well as the loss + function. The weights in sm.w correspond to the features defined + by psi() and range from index 1 to index sm->sizePsi. Most simple + is the case of the zero/one loss function. For the zero/one loss, + this function should return the highest scoring label ybar, if + ybar is unequal y; if it is equal to the correct label y, then + the function shall return the second highest scoring label. If + the function cannot find a label, it shall return an empty label + as recognized by the function empty_label(y). */ + LABEL ybar; + DOC doc; + long class_, bestclass = -1, first = 1; + double score, bestscore = -1; + + /* NOTE: This function could be made much more efficient by not + always computing a new PSI vector. */ + doc = *(x.doc); + ybar.scores = NULL; + ybar.num_classes_ = sparm->num_classes_; + for (class_ = 1; class_ <= sparm->num_classes_; class_++) { + ybar.class_ = class_; + doc.fvec = psi(x, ybar, sm, sparm); + score = classify_example(sm->svm_model, &doc); + free_svector(doc.fvec); + score += loss(y, ybar, sparm); + if ((bestscore < score) || (first)) { + bestscore = score; + bestclass = class_; + first = 0; + } + } + if (bestclass == -1) + printf("ERROR: Only one class\n"); + ybar.class_ = bestclass; + if (struct_verbosity >= 3) + printf("[%ld:%.2f] ", bestclass, bestscore); + return (ybar); +} + +int empty_label(LABEL y) { + /* Returns true, if y is an empty label. An empty label might be + returned by find_most_violated_constraint_???(x, y, sm) if there + is no incorrect label that can be found for x, or if it is unable + to label x at all */ + return (y.class_ < 0.9); +} + +SVECTOR *psi(PATTERN x, LABEL y, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm) { + /* Returns a feature vector describing the match between pattern x and + label y. The feature vector is returned as an SVECTOR + (i.e. pairs ), where the last pair has + featurenumber 0 as a terminator. Featurenumbers start with 1 and end with + sizePsi. This feature vector determines the linear evaluation + function that is used to score labels. There will be one weight in + sm.w for each feature. Note that psi has to match + find_most_violated_constraint_???(x, y, sm) and vice versa. In + particular, find_most_violated_constraint_???(x, y, sm) finds that + ybar!=y that maximizes psi(x,ybar,sm)*sm.w (where * is the inner + vector product) and the appropriate function of the loss. */ + SVECTOR *fvec; + + /* shift the feature numbers to the position of weight vector of class y */ + fvec = shift_s(x.doc->fvec, (y.class_ - 1) * sparm->num_features); + + /* The following makes sure that the weight vectors for each class + are treated separately when kernels are used . */ + fvec->kernel_id = y.class_; + + return (fvec); +} + +double loss(LABEL y, LABEL ybar, STRUCT_LEARN_PARM *sparm) { + /* loss for correct label y and predicted label ybar. The loss for + y==ybar has to be zero. sparm->loss_function is set with the -l option. */ + if (sparm->loss_function == 0) { /* type 0 loss: 0/1 loss */ + if (y.class_ == ybar.class_) /* return 0, if y==ybar. return 100 else */ + return (0); + else + return (100); + } + if (sparm->loss_function == 1) { /* type 1 loss: squared difference */ + return ((y.class_ - ybar.class_) * (y.class_ - ybar.class_)); + } else { + /* Put your code for different loss functions here. But then + find_most_violated_constraint_???(x, y, sm) has to return the + highest scoring label with the largest loss. */ + printf("Unkown loss function\n"); + exit(1); + } +} + +int finalize_iteration(double ceps, int cached_constraint, SAMPLE sample, + STRUCTMODEL *sm, CONSTSET cset, double *alpha, + STRUCT_LEARN_PARM *sparm) { + /* This function is called just before the end of each cutting plane + * iteration. ceps is the amount by which the most violated constraint found + * in the current iteration was violated. cached_constraint is true if the + * added constraint was constructed from the cache. If the return value is + * FALSE, then the algorithm is allowed to terminate. If it is TRUE, the + * algorithm will keep iterating even if the desired precision sparm->epsilon + * is already reached. */ + return (0); +} + +void print_struct_learning_stats(SAMPLE sample, STRUCTMODEL *sm, CONSTSET cset, + double *alpha, STRUCT_LEARN_PARM *sparm) { + /* This function is called after training and allows final touches to + the model sm. But primarly it allows computing and printing any + kind of statistic (e.g. training error) you might want. */ + + /* Replace SV with single weight vector */ + MODEL *model = sm->svm_model; + if (model->kernel_parm.kernel_type == LINEAR) { + if (struct_verbosity >= 1) { + printf("Compacting linear model..."); + fflush(stdout); + } + sm->svm_model = compact_linear_model(model); + sm->w = sm->svm_model->lin_weights; /* short cut to weight vector */ + free_model(model, 1); + if (struct_verbosity >= 1) { + printf("done\n"); + fflush(stdout); + } + } +} + +void write_struct_model(char *file, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm) { + /* Writes structural model sm to file file. */ + FILE *modelfl; + long j, i, sv_num; + MODEL *model = sm->svm_model; + SVECTOR *v; + + if ((modelfl = fopen(file, "w")) == NULL) { + perror(file); + exit(1); + } + fprintf(modelfl, "SVM-multiclass Version %s\n", INST_VERSION); + fprintf(modelfl, "%d # number of classes\n", sparm->num_classes_); + fprintf(modelfl, "%d # number of base features\n", sparm->num_features); + fprintf(modelfl, "%d # loss function\n", sparm->loss_function); + fprintf(modelfl, "%ld # kernel type\n", model->kernel_parm.kernel_type); + fprintf(modelfl, "%ld # kernel parameter -d \n", + model->kernel_parm.poly_degree); + fprintf(modelfl, "%.8g # kernel parameter -g \n", + model->kernel_parm.rbf_gamma); + fprintf(modelfl, "%.8g # kernel parameter -s \n", + model->kernel_parm.coef_lin); + fprintf(modelfl, "%.8g # kernel parameter -r \n", + model->kernel_parm.coef_const); + fprintf(modelfl, "%s# kernel parameter -u \n", model->kernel_parm.custom); + fprintf(modelfl, "%ld # highest feature index \n", model->totwords); + fprintf(modelfl, "%ld # number of training documents \n", model->totdoc); + + sv_num = 1; + for (i = 1; i < model->sv_num; i++) { + for (v = model->supvec[i]->fvec; v; v = v->next) + sv_num++; + } + fprintf(modelfl, "%ld # number of support vectors plus 1 \n", sv_num); + fprintf(modelfl, + "%.8g # threshold b, each following line is a SV (starting with " + "alpha*y)\n", + model->b); + + for (i = 1; i < model->sv_num; i++) { + for (v = model->supvec[i]->fvec; v; v = v->next) { + fprintf(modelfl, "%.32g ", model->alpha[i] * v->factor); + fprintf(modelfl, "qid:%ld ", v->kernel_id); + for (j = 0; (v->words[j]).wnum; j++) { + fprintf(modelfl, "%ld:%.8g ", (long)(v->words[j]).wnum, + (double)(v->words[j]).weight); + } + if (v->userdefined) + fprintf(modelfl, "#%s\n", v->userdefined); + else + fprintf(modelfl, "#\n"); + /* NOTE: this could be made more efficient by summing the + alpha's of identical vectors before writing them to the + file. */ + } + } + fclose(modelfl); +} + +void print_struct_testing_stats(SAMPLE sample, STRUCTMODEL *sm, + STRUCT_LEARN_PARM *sparm, + STRUCT_TEST_STATS *teststats) { + /* This function is called after making all test predictions in + svm_struct_classify and allows computing and printing any kind of + evaluation (e.g. precision/recall) you might want. You can use + the function eval_prediction to accumulate the necessary + statistics for each prediction. */ +} + +void eval_prediction(long exnum, EXAMPLE ex, LABEL ypred, STRUCTMODEL *sm, + STRUCT_LEARN_PARM *sparm, STRUCT_TEST_STATS *teststats) { + /* This function allows you to accumlate statistic for how well the + predicition matches the labeled example. It is called from + svm_struct_classify. See also the function + print_struct_testing_stats. */ + if (exnum == 0) { /* this is the first time the function is + called. So initialize the teststats */ + } +} + +STRUCTMODEL read_struct_model(char *file, STRUCT_LEARN_PARM *sparm) { + /* Reads structural model sm from file file. This function is used + only in the prediction module, not in the learning module. */ + FILE *modelfl; + STRUCTMODEL sm; + long i, queryid, slackid; + double costfactor; + long max_sv, max_words, ll, wpos; + char *line, *comment; + WORD *words; + char version_buffer[100]; + MODEL *model; + + nol_ll(file, &max_sv, &max_words, &ll); /* scan size of model file */ + max_words += 2; + ll += 2; + + words = (WORD *)my_malloc(sizeof(WORD) * (max_words + 10)); + line = (char *)my_malloc(sizeof(char) * ll); + model = (MODEL *)my_malloc(sizeof(MODEL)); + + if ((modelfl = fopen(file, "r")) == NULL) { + perror(file); + exit(1); + } + + fscanf(modelfl, "SVM-multiclass Version %s\n", version_buffer); + if (strcmp(version_buffer, INST_VERSION)) { + perror( + "Version of model-file does not match version of svm_struct_classify!"); + exit(1); + } + fscanf(modelfl, "%d%*[^\n]\n", &sparm->num_classes_); + fscanf(modelfl, "%d%*[^\n]\n", &sparm->num_features); + fscanf(modelfl, "%d%*[^\n]\n", &sparm->loss_function); + fscanf(modelfl, "%ld%*[^\n]\n", &model->kernel_parm.kernel_type); + fscanf(modelfl, "%ld%*[^\n]\n", &model->kernel_parm.poly_degree); + fscanf(modelfl, "%lf%*[^\n]\n", &model->kernel_parm.rbf_gamma); + fscanf(modelfl, "%lf%*[^\n]\n", &model->kernel_parm.coef_lin); + fscanf(modelfl, "%lf%*[^\n]\n", &model->kernel_parm.coef_const); + fscanf(modelfl, "%[^#]%*[^\n]\n", model->kernel_parm.custom); + + fscanf(modelfl, "%ld%*[^\n]\n", &model->totwords); + fscanf(modelfl, "%ld%*[^\n]\n", &model->totdoc); + fscanf(modelfl, "%ld%*[^\n]\n", &model->sv_num); + fscanf(modelfl, "%lf%*[^\n]\n", &model->b); + + model->supvec = (DOC **)my_malloc(sizeof(DOC *) * model->sv_num); + model->alpha = (double *)my_malloc(sizeof(double) * model->sv_num); + model->index = NULL; + model->lin_weights = NULL; + + for (i = 1; i < model->sv_num; i++) { + fgets(line, (int)ll, modelfl); + if (!parse_document(line, words, &(model->alpha[i]), &queryid, &slackid, + &costfactor, &wpos, max_words, &comment)) { + printf("\nParsing error while reading model file in SV %ld!\n%s", i, + line); + exit(1); + } + model->supvec[i] = + create_example(-1, 0, 0, 0.0, create_svector(words, comment, 1.0)); + model->supvec[i]->fvec->kernel_id = queryid; + } + fclose(modelfl); + free(line); + free(words); + if (verbosity >= 1) { + fprintf(stdout, " (%d support vectors read) ", (int)(model->sv_num - 1)); + } + sm.svm_model = model; + sm.sizePsi = model->totwords; + sm.w = NULL; + return (sm); +} + +void write_label(FILE *fp, LABEL y) { + /* Writes label y to file handle fp. */ + int i; + fprintf(fp, "%d", y.class_); + if (y.scores) + for (i = 1; i <= y.num_classes_; i++) + fprintf(fp, " %f", y.scores[i]); + fprintf(fp, "\n"); +} + +void free_pattern(PATTERN x) { + /* Frees the memory of x. */ + free_example(x.doc, 1); +} + +void free_label(LABEL y) { + /* Frees the memory of y. */ + if (y.scores) + free(y.scores); +} + +void free_struct_model(STRUCTMODEL sm) { + /* Frees the memory of model. */ + /* if(sm.w) free(sm.w); */ /* this is free'd in free_model */ + if (sm.svm_model) + free_model(sm.svm_model, 1); + /* add free calls for user defined data here */ +} + +void free_struct_sample(SAMPLE s) { + /* Frees the memory of sample s. */ + int i; + for (i = 0; i < s.n; i++) { + free_pattern(s.examples[i].x); + free_label(s.examples[i].y); + } + free(s.examples); +} + +void print_struct_help() { + /* Prints a help text that is appended to the common help text of + svm_struct_learn. */ + + printf(" none\n\n"); + printf("Based on multi-class SVM formulation described in:\n"); + printf(" K. Crammer and Y. Singer. On the Algorithmic " + "Implementation of\n"); + printf(" Multi-class SVMs, JMLR, 2001.\n"); +} + +void parse_struct_parameters(STRUCT_LEARN_PARM *sparm) { + /* Parses the command line parameters that start with -- */ + int i; + + for (i = 0; (i < sparm->custom_argc) && ((sparm->custom_argv[i])[0] == '-'); + i++) { + switch ((sparm->custom_argv[i])[2]) { + case 'a': + i++; /* strcpy(learn_parm->alphafile,argv[i]); */ + break; + case 'e': + i++; /* sparm->epsilon=atof(sparm->custom_argv[i]); */ + break; + case 'k': + i++; /* sparm->newconstretrain=atol(sparm->custom_argv[i]); */ + break; + } + } +} + +void print_struct_help_classify() { + /* Prints a help text that is appended to the common help text of + svm_struct_classify. */ +} + +void parse_struct_parameters_classify(STRUCT_LEARN_PARM *sparm) { + /* Parses the command line parameters that start with -- for the + classification module */ + int i; + + for (i = 0; (i < sparm->custom_argc) && ((sparm->custom_argv[i])[0] == '-'); + i++) { + switch ((sparm->custom_argv[i])[2]) { + /* case 'x': i++; strcpy(xvalue,sparm->custom_argv[i]); break; */ + default: + printf("\nUnrecognized option %s!\n\n", sparm->custom_argv[i]); + exit(0); + } + } +} diff --git a/src/classifier/svm/svm_struct_api.h b/src/classifier/svm/svm_struct_api.h new file mode 100755 index 0000000..4583255 --- /dev/null +++ b/src/classifier/svm/svm_struct_api.h @@ -0,0 +1,76 @@ +/***********************************************************************/ +/* */ +/* svm_struct_api.h */ +/* */ +/* Definition of API for attaching implementing SVM learning of */ +/* structures (e.g. parsing, multi-label classification, HMM) */ +/* */ +/* Author: Thorsten Joachims */ +/* Date: 03.07.04 */ +/* */ +/* Copyright (c) 2004 Thorsten Joachims - All rights reserved */ +/* */ +/* This software is available for non-commercial use only. It must */ +/* not be modified and distributed without prior permission of the */ +/* author. The author is not responsible for implications from the */ +/* use of this software. */ +/* */ +/***********************************************************************/ + +#include "svm_struct/svm_struct_common.h" +#include "svm_struct_api_types.h" + +#ifndef svm_struct_api +#define svm_struct_api +#ifdef __cplusplus +extern "C" { +#endif +void svm_struct_learn_api_init(int argc, char *argv[]); +void svm_struct_learn_api_exit(); +void svm_struct_classify_api_init(int argc, char *argv[]); +void svm_struct_classify_api_exit(); +SAMPLE read_struct_examples(char *file, STRUCT_LEARN_PARM *sparm); +void init_struct_model(SAMPLE sample, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm, + LEARN_PARM *lparm, KERNEL_PARM *kparm); +CONSTSET init_struct_constraints(SAMPLE sample, STRUCTMODEL *sm, + STRUCT_LEARN_PARM *sparm); +LABEL find_most_violated_constraint_slackrescaling(PATTERN x, LABEL y, + STRUCTMODEL *sm, + STRUCT_LEARN_PARM *sparm); +LABEL find_most_violated_constraint_marginrescaling(PATTERN x, LABEL y, + STRUCTMODEL *sm, + STRUCT_LEARN_PARM *sparm); +LABEL classify_struct_example(PATTERN x, STRUCTMODEL *sm, + STRUCT_LEARN_PARM *sparm); +int empty_label(LABEL y); +SVECTOR *psi(PATTERN x, LABEL y, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm); +double loss(LABEL y, LABEL ybar, STRUCT_LEARN_PARM *sparm); +int finalize_iteration(double ceps, int cached_constraint, SAMPLE sample, + STRUCTMODEL *sm, CONSTSET cset, double *alpha, + STRUCT_LEARN_PARM *sparm); +void print_struct_learning_stats(SAMPLE sample, STRUCTMODEL *sm, CONSTSET cset, + double *alpha, STRUCT_LEARN_PARM *sparm); +void print_struct_testing_stats(SAMPLE sample, STRUCTMODEL *sm, + STRUCT_LEARN_PARM *sparm, + STRUCT_TEST_STATS *teststats); +void eval_prediction(long exnum, EXAMPLE ex, LABEL prediction, STRUCTMODEL *sm, + STRUCT_LEARN_PARM *sparm, STRUCT_TEST_STATS *teststats); +void write_struct_model(char *file, STRUCTMODEL *sm, STRUCT_LEARN_PARM *sparm); +STRUCTMODEL read_struct_model(char *file, STRUCT_LEARN_PARM *sparm); +void write_label(FILE *fp, LABEL y); +void free_pattern(PATTERN x); +void free_label(LABEL y); +void free_struct_model(STRUCTMODEL sm); +void free_struct_sample(SAMPLE s); +void print_struct_help(); +void parse_struct_parameters(STRUCT_LEARN_PARM *sparm); +void print_struct_help_classify(); +void parse_struct_parameters_classify(STRUCT_LEARN_PARM *sparm); +void svm_learn_struct_joint_custom(SAMPLE sample, STRUCT_LEARN_PARM *sparm, + LEARN_PARM *lparm, KERNEL_PARM *kparm, + STRUCTMODEL *sm); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/classifier/svm/svm_struct_api_types.h b/src/classifier/svm/svm_struct_api_types.h new file mode 100755 index 0000000..934400f --- /dev/null +++ b/src/classifier/svm/svm_struct_api_types.h @@ -0,0 +1,114 @@ +/***********************************************************************/ +/* */ +/* svm_struct_api.h */ +/* */ +/* Definition of API for attaching implementing SVM learning of */ +/* structures (e.g. parsing, multi-label classification, HMM) */ +/* */ +/* Author: Thorsten Joachims */ +/* Date: 13.10.03 */ +/* */ +/* Copyright (c) 2003 Thorsten Joachims - All rights reserved */ +/* */ +/* This software is available for non-commercial use only. It must */ +/* not be modified and distributed without prior permission of the */ +/* author. The author is not responsible for implications from the */ +/* use of this software. */ +/* */ +/***********************************************************************/ + +#ifndef svm_struct_api_types +#define svm_struct_api_types + +#include "svm_light/svm_common.h" +#include "svm_light/svm_learn.h" + +#define INST_NAME "Multi-Class SVM" +#define INST_VERSION "V2.20" +#define INST_VERSION_DATE "14.08.08" + +/* default precision for solving the optimization problem */ +#define DEFAULT_EPS 0.1 +/* default loss rescaling method: 1=slack_rescaling, 2=margin_rescaling */ +#define DEFAULT_RESCALING 2 +/* default loss function: */ +#define DEFAULT_LOSS_FCT 0 +/* default optimization algorithm to use: */ +#define DEFAULT_ALG_TYPE 4 +/* store Psi(x,y) once instead of recomputing it every time: */ +#define USE_FYCACHE 0 +/* decide whether to evaluate sum before storing vectors in constraint + cache: + 0 = NO, + 1 = YES (best, if sparse vectors and long vector lists), + 2 = YES (best, if short vector lists), + 3 = YES (best, if dense vectors and long vector lists) */ +#define COMPACT_CACHED_VECTORS 2 +/* minimum absolute value below which values in sparse vectors are + rounded to zero. Values are stored in the FVAL type defined in svm_common.h + RECOMMENDATION: assuming you use FVAL=float, use + 10E-15 if COMPACT_CACHED_VECTORS is 1 + 10E-10 if COMPACT_CACHED_VECTORS is 2 or 3 +*/ +#define COMPACT_ROUNDING_THRESH 10E-15 + +typedef struct pattern { + /* this defines the x-part of a training example, e.g. the structure + for storing a natural language sentence in NLP parsing */ + DOC *doc; +} PATTERN; + +typedef struct label { + /* this defines the y-part (the label) of a training example, + e.g. the parse tree of the corresponding sentence. */ + int class_; /* class label */ + int num_classes_; /* total number of classes */ + double *scores; /* value of linear function of each class */ +} LABEL; + +typedef struct structmodel { + double *w; /* pointer to the learned weights */ + MODEL *svm_model; /* the learned SVM model */ + long sizePsi; /* maximum number of weights in w */ + double walpha; + /* other information that is needed for the stuctural model can be + added here, e.g. the grammar rules for NLP parsing */ +} STRUCTMODEL; + +typedef struct struct_learn_parm { + double epsilon; /* precision for which to solve + quadratic program */ + double newconstretrain; /* number of new constraints to + accumulate before recomputing the QP + solution */ + int ccache_size; /* maximum number of constraints to + cache for each example (used in w=4 + algorithm) */ + double batch_size; /* size of the mini batches in percent + of training set size (used in w=4 + algorithm) */ + double C; /* trade-off between margin and loss */ + char custom_argv[20][300]; /* string set with the -u command line option */ + int custom_argc; /* number of -u command line options */ + int slack_norm; /* norm to use in objective function + for slack variables; 1 -> L1-norm, + 2 -> L2-norm */ + int loss_type; /* selected loss function from -r + command line option. Select between + slack rescaling (1) and margin + rescaling (2) */ + int loss_function; /* select between different loss + functions via -l command line + option */ + /* further parameters that are passed to init_struct_model() */ + int num_classes_; + int num_features; +} STRUCT_LEARN_PARM; + +typedef struct struct_test_stats { + /* you can add variables for keeping statistics when evaluating the + test predictions in svm_struct_classify. This can be used in the + function eval_prediction and print_struct_testing_stats. */ +} STRUCT_TEST_STATS; + +#endif diff --git a/src/classifier/svm/svm_struct_learn_custom.c b/src/classifier/svm/svm_struct_learn_custom.c new file mode 100755 index 0000000..dfe931f --- /dev/null +++ b/src/classifier/svm/svm_struct_learn_custom.c @@ -0,0 +1,42 @@ +/***********************************************************************/ +/* */ +/* svm_struct_learn_custom.c (instantiated for SVM-perform) */ +/* */ +/* Allows implementing a custom/alternate algorithm for solving */ +/* the structual SVM optimization problem. The algorithm can use */ +/* full access to the SVM-struct API and to SVM-light. */ +/* */ +/* Author: Thorsten Joachims */ +/* Date: 09.01.08 */ +/* */ +/* Copyright (c) 2008 Thorsten Joachims - All rights reserved */ +/* */ +/* This software is available for non-commercial use only. It must */ +/* not be modified and distributed without prior permission of the */ +/* author. The author is not responsible for implications from the */ +/* use of this software. */ +/* */ +/***********************************************************************/ + +#include +#include +#include +#include "svm_struct_api.h" +#include "svm_light/svm_common.h" +#include "svm_struct/svm_struct_common.h" +#include "svm_struct/svm_struct_learn.h" + + +void svm_learn_struct_joint_custom(SAMPLE sample, STRUCT_LEARN_PARM *sparm, + LEARN_PARM *lparm, KERNEL_PARM *kparm, + STRUCTMODEL *sm) + /* Input: sample (training examples) + sparm (structural learning parameters) + lparm (svm learning parameters) + kparm (kernel parameters) + Output: sm (learned model) */ +{ + /* Put your algorithm here. See svm_struct_learn.c for an example of + how to access this API. */ +} + diff --git a/src/classifier/svm/svm_trainer.cpp b/src/classifier/svm/svm_trainer.cpp new file mode 100644 index 0000000..cd735b1 --- /dev/null +++ b/src/classifier/svm/svm_trainer.cpp @@ -0,0 +1,34 @@ +#include "../svm_trainer.h" +#include "svm_binary_trainer.hpp" +#include "svm_multiclass_trainer.hpp" + +ISVMTrainer new_svm_binary_trainer() { + return new ovclassifier::SVMBinaryTrainer(); +} +ISVMTrainer new_svm_multiclass_trainer() { + return new ovclassifier::SVMMultiClassTrainer(); +} + +void destroy_svm_trainer(ISVMTrainer trainer) { + delete static_cast(trainer); +} + +void svm_trainer_reset(ISVMTrainer trainer) { + static_cast(trainer)->Reset(); +} + +void svm_trainer_set_labels(ISVMTrainer trainer, int labels) { + static_cast(trainer)->SetLabels(labels); +} + +void svm_trainer_set_features(ISVMTrainer trainer, int feats) { + static_cast(trainer)->SetFeatures(feats); +} + +void svm_trainer_add_data(ISVMTrainer trainer, int label, const float *vec) { + static_cast(trainer)->AddData(label, vec); +} + +int svm_train(ISVMTrainer trainer, const char *modelfile) { + return static_cast(trainer)->Train(modelfile); +} diff --git a/src/classifier/svm/svm_trainer.hpp b/src/classifier/svm/svm_trainer.hpp new file mode 100644 index 0000000..362218b --- /dev/null +++ b/src/classifier/svm/svm_trainer.hpp @@ -0,0 +1,16 @@ +#ifndef _SVM_TRAINER_H_ +#define _SVM_TRAINER_H_ + +namespace ovclassifier { +class SVMTrainer { +public: + virtual ~SVMTrainer(){}; + virtual void Reset() = 0; + virtual void SetLabels(int labels) = 0; + virtual void SetFeatures(int feats) = 0; + virtual void AddData(int label, const float *vec) = 0; + virtual int Train(const char *modelfile) = 0; +}; + +} // namespace ovclassifier +#endif // _SVM_TRAINER_H_ diff --git a/src/classifier/svm_classifier.h b/src/classifier/svm_classifier.h new file mode 100644 index 0000000..0badba4 --- /dev/null +++ b/src/classifier/svm_classifier.h @@ -0,0 +1,19 @@ +#ifndef _CLASSIFIER_SVM_CLASSIFIER_C_H_ +#define _CLASSIFIER_SVM_CLASSIFIER_C_H_ + +#include "../common/common.h" +#ifdef __cplusplus +#include "svm/svm_classifier.hpp" +extern "C" { +#endif +typedef void *ISVMClassifier; +ISVMClassifier new_svm_binary_classifier(); +ISVMClassifier new_svm_multiclass_classifier(); +void destroy_svm_classifier(ISVMClassifier e); +int svm_classifier_load_model(ISVMClassifier e, const char *modelfile); +double svm_predict(ISVMClassifier e, const float *vec); +int svm_classify(ISVMClassifier e, const float *vec, FloatVector *scores); +#ifdef __cplusplus +} +#endif +#endif // !_CLASSIFER_SVM_CLASSIFIER_C_H_ diff --git a/src/classifier/svm_trainer.h b/src/classifier/svm_trainer.h new file mode 100644 index 0000000..f211b8e --- /dev/null +++ b/src/classifier/svm_trainer.h @@ -0,0 +1,20 @@ +#ifndef _CLASSIFIER_SVM_TRAINER_C_H_ +#define _CLASSIFIER_SVM_TRAINER_C_H_ + +#ifdef __cplusplus +#include "svm/svm_trainer.hpp" +extern "C" { +#endif +typedef void *ISVMTrainer; +ISVMTrainer new_svm_binary_trainer(); +ISVMTrainer new_svm_multiclass_trainer(); +void destroy_svm_trainer(ISVMTrainer trainer); +void svm_trainer_reset(ISVMTrainer trainer); +void svm_trainer_set_labels(ISVMTrainer trainer, int labels); +void svm_trainer_set_features(ISVMTrainer trainer, int feats); +void svm_trainer_add_data(ISVMTrainer trainer, int label, const float *vec); +int svm_train(ISVMTrainer trainer, const char *modelfile); +#ifdef __cplusplus +} +#endif +#endif // !_CLASSIFER_SVM_TRAINER_C_H_ diff --git a/src/common/common.cpp b/src/common/common.cpp index 8795ae6..706334a 100644 --- a/src/common/common.cpp +++ b/src/common/common.cpp @@ -1,9 +1,9 @@ #include "common.h" +#include "cpu.h" #include -#include #include #include -#include "cpu.h" +#include #ifdef OV_VULKAN #include "gpu.h" @@ -11,423 +11,400 @@ int get_gpu_count() { #ifdef OV_VULKAN - return ncnn::get_gpu_count(); + return ncnn::get_gpu_count(); #endif // OV_VULKAN - return 0; + return 0; } int create_gpu_instance() { #ifdef OV_VULKAN - return ncnn::create_gpu_instance(); + return ncnn::create_gpu_instance(); #endif // OV_VULKAN - return 0; + return 0; } void destroy_gpu_instance() { #ifdef OV_VULKAN - ncnn::destroy_gpu_instance(); + ncnn::destroy_gpu_instance(); #endif // OV_VULKAN } -int get_big_cpu_count() { - return ncnn::get_big_cpu_count(); -} +int get_big_cpu_count() { return ncnn::get_big_cpu_count(); } void set_omp_num_threads(int n) { #ifdef OV_OPENMP - ncnn::set_omp_num_threads(n); + ncnn::set_omp_num_threads(n); #endif } int load_model(IEstimator d, const char *root_path) { - return static_cast(d)->LoadModel(root_path); + return static_cast(d)->LoadModel(root_path); } -void destroy_estimator(IEstimator d) { - delete static_cast(d); -} +void destroy_estimator(IEstimator d) { delete static_cast(d); } void set_num_threads(IEstimator d, int n) { - static_cast(d)->set_num_threads(n); + static_cast(d)->set_num_threads(n); } void set_light_mode(IEstimator d, bool mode) { - static_cast(d)->set_light_mode(mode); + static_cast(d)->set_light_mode(mode); } -void FreePoint2fVector(Point2fVector* p) { - if (p->points != NULL) { - free(p->points); - p->points = NULL; - } +void FreePoint2fVector(Point2fVector *p) { + if (p->points != NULL) { + free(p->points); + p->points = NULL; + } } -void Point2fVectorSetValue(Point2fVector *p, int i, const Point2f* val) { - if (p->points == NULL || i >= p->length) { - return; - } - p->points[i] = *val; +void Point2fVectorSetValue(Point2fVector *p, int i, const Point2f *val) { + if (p->points == NULL || i >= p->length) { + return; + } + p->points[i] = *val; } void FreeFloatVector(FloatVector *p) { - if (p->values != NULL) { - free(p->values); - p->values = NULL; - } + if (p->values != NULL) { + free(p->values); + p->values = NULL; + } } void FreeBytes(Bytes *p) { - if (p->values != NULL) { - free(p->values); - p->values = NULL; - } + if (p->values != NULL) { + free(p->values); + p->values = NULL; + } } void FreeKeypointVector(KeypointVector *p) { - if (p->points != NULL) { - free(p->points); - p->points = NULL; - } + if (p->points != NULL) { + free(p->points); + p->points = NULL; + } } -void KeypointVectorSetValue(KeypointVector *p, int i, const Keypoint* val) { - if (p->points == NULL || i >= p->length) { - return; - } - p->points[i] = *val; +void KeypointVectorSetValue(KeypointVector *p, int i, const Keypoint *val) { + if (p->points == NULL || i >= p->length) { + return; + } + p->points[i] = *val; } void FreeObjectInfo(ObjectInfo *p) { - if (p->pts != NULL) { - FreeKeypointVector(p->pts); - free(p->pts); - p->pts = NULL; - } + if (p->pts != NULL) { + FreeKeypointVector(p->pts); + free(p->pts); + p->pts = NULL; + } } void FreeObjectInfoVector(ObjectInfoVector *p) { - if (p->items!=NULL) { - for (int i=0; i < p->length; i ++) { - FreeObjectInfo(&p->items[i]); - } - free(p->items); - p->items= NULL; + if (p->items != NULL) { + for (int i = 0; i < p->length; i++) { + FreeObjectInfo(&p->items[i]); } + free(p->items); + p->items = NULL; + } } -void FreeImage(Image* p) { - if (p->data != NULL) { - free(p->data); - p->data = NULL; - } +void FreeImage(Image *p) { + if (p->data != NULL) { + free(p->data); + p->data = NULL; + } } namespace ov { Estimator::Estimator() : EstimatorBase() { - blob_allocator_.set_size_compare_ratio(0.f); - workspace_allocator_.set_size_compare_ratio(0.f); - net_ = new ncnn::Net(); - initialized_ = false; - if (num_threads > 0) { - net_->opt.num_threads = num_threads; - } - net_->opt.blob_allocator = &blob_allocator_; - net_->opt.workspace_allocator = &workspace_allocator_; + blob_allocator_.set_size_compare_ratio(0.f); + workspace_allocator_.set_size_compare_ratio(0.f); + net_ = new ncnn::Net(); + initialized_ = false; + if (num_threads > 0) { + net_->opt.num_threads = num_threads; + } + net_->opt.blob_allocator = &blob_allocator_; + net_->opt.workspace_allocator = &workspace_allocator_; + net_->opt.lightmode = light_mode_; #ifdef OV_VULKAN - net_->opt.use_vulkan_compute = true; + net_->opt.use_vulkan_compute = true; #endif // OV_VULKAN } Estimator::~Estimator() { - if (net_) { - net_->clear(); - } - workspace_allocator_.clear(); - blob_allocator_.clear(); + if (net_) { + net_->clear(); + } + workspace_allocator_.clear(); + blob_allocator_.clear(); } -int Estimator::LoadModel(const char * root_path) { - std::string param_file = std::string(root_path) + "/param"; - std::string bin_file = std::string(root_path) + "/bin"; - if (net_->load_param(param_file.c_str()) == -1 || - net_->load_model(bin_file.c_str()) == -1) { - return 10000; - } +int Estimator::LoadModel(const char *root_path) { + std::string param_file = std::string(root_path) + "/param"; + std::string bin_file = std::string(root_path) + "/bin"; + if (net_->load_param(param_file.c_str()) == -1 || + net_->load_model(bin_file.c_str()) == -1) { + return 10000; + } - initialized_ = true; + initialized_ = true; - return 0; + return 0; } -EstimatorBase::EstimatorBase() { - num_threads = ncnn::get_big_cpu_count(); -} +EstimatorBase::EstimatorBase() { num_threads = ncnn::get_big_cpu_count(); } EstimatorBase::~EstimatorBase() {} -void EstimatorBase::set_num_threads(int n) { - num_threads = n; -} +void EstimatorBase::set_num_threads(int n) { num_threads = n; } void Estimator::set_num_threads(int n) { - EstimatorBase::set_num_threads(n); - if (net_) { - net_->opt.num_threads = n; - } + EstimatorBase::set_num_threads(n); + if (net_) { + net_->opt.num_threads = n; + } } void Estimator::set_light_mode(bool mode) { - if (net_) { - net_->opt.lightmode = mode; - light_mode_ = mode; - } + if (net_) { + net_->opt.lightmode = mode; + light_mode_ = mode; + } } -int RatioAnchors(const Rect & anchor, - const std::vector& ratios, - std::vector* anchors, int threads_num) { - anchors->clear(); - Point center = Point(anchor.x + (anchor.width - 1) * 0.5f, - anchor.y + (anchor.height - 1) * 0.5f); - float anchor_size = anchor.width * anchor.height; -#ifdef OV_OPENMP +int RatioAnchors(const Rect &anchor, const std::vector &ratios, + std::vector *anchors, int threads_num) { + anchors->clear(); + Point center = Point(anchor.x + (anchor.width - 1) * 0.5f, + anchor.y + (anchor.height - 1) * 0.5f); + float anchor_size = anchor.width * anchor.height; +#ifdef OV_OPENMP #pragma omp parallel for num_threads(threads_num) #endif - for (int i = 0; i < static_cast(ratios.size()); ++i) { - float ratio = ratios.at(i); - float anchor_size_ratio = anchor_size / ratio; - float curr_anchor_width = sqrt(anchor_size_ratio); - float curr_anchor_height = curr_anchor_width * ratio; - float curr_x = center.x - (curr_anchor_width - 1)* 0.5f; - float curr_y = center.y - (curr_anchor_height - 1)* 0.5f; + for (int i = 0; i < static_cast(ratios.size()); ++i) { + float ratio = ratios.at(i); + float anchor_size_ratio = anchor_size / ratio; + float curr_anchor_width = sqrt(anchor_size_ratio); + float curr_anchor_height = curr_anchor_width * ratio; + float curr_x = center.x - (curr_anchor_width - 1) * 0.5f; + float curr_y = center.y - (curr_anchor_height - 1) * 0.5f; - Rect curr_anchor = Rect(curr_x, curr_y, - curr_anchor_width - 1, curr_anchor_height - 1); - anchors->push_back(curr_anchor); - } - return 0; + Rect curr_anchor = + Rect(curr_x, curr_y, curr_anchor_width - 1, curr_anchor_height - 1); + anchors->push_back(curr_anchor); + } + return 0; } -int ScaleAnchors(const std::vector& ratio_anchors, - const std::vector& scales, std::vector* anchors, int threads_num) { - anchors->clear(); +int ScaleAnchors(const std::vector &ratio_anchors, + const std::vector &scales, std::vector *anchors, + int threads_num) { + anchors->clear(); #if defined(_OPENMP) #pragma omp parallel for num_threads(threads_num) #endif - for (int i = 0; i < static_cast(ratio_anchors.size()); ++i) { - Rect anchor = ratio_anchors.at(i); - Point2f center = Point2f(anchor.x + anchor.width * 0.5f, - anchor.y + anchor.height * 0.5f); - for (int j = 0; j < static_cast(scales.size()); ++j) { - float scale = scales.at(j); - float curr_width = scale * (anchor.width + 1); - float curr_height = scale * (anchor.height + 1); - float curr_x = center.x - curr_width * 0.5f; - float curr_y = center.y - curr_height * 0.5f; - Rect curr_anchor = Rect(curr_x, curr_y, - curr_width, curr_height); - anchors->push_back(curr_anchor); - } - } + for (int i = 0; i < static_cast(ratio_anchors.size()); ++i) { + Rect anchor = ratio_anchors.at(i); + Point2f center = Point2f(anchor.x + anchor.width * 0.5f, + anchor.y + anchor.height * 0.5f); + for (int j = 0; j < static_cast(scales.size()); ++j) { + float scale = scales.at(j); + float curr_width = scale * (anchor.width + 1); + float curr_height = scale * (anchor.height + 1); + float curr_x = center.x - curr_width * 0.5f; + float curr_y = center.y - curr_height * 0.5f; + Rect curr_anchor = Rect(curr_x, curr_y, curr_width, curr_height); + anchors->push_back(curr_anchor); + } + } - return 0; + return 0; } -int GenerateAnchors(const int & base_size, - const std::vector& ratios, - const std::vector scales, - std::vector* anchors, - int threads_num) { - anchors->clear(); - Rect anchor = Rect(0, 0, base_size, base_size); - std::vector ratio_anchors; - RatioAnchors(anchor, ratios, &ratio_anchors, threads_num); - ScaleAnchors(ratio_anchors, scales, anchors, threads_num); - - return 0; +int GenerateAnchors(const int &base_size, const std::vector &ratios, + const std::vector scales, std::vector *anchors, + int threads_num) { + anchors->clear(); + Rect anchor = Rect(0, 0, base_size, base_size); + std::vector ratio_anchors; + RatioAnchors(anchor, ratios, &ratio_anchors, threads_num); + ScaleAnchors(ratio_anchors, scales, anchors, threads_num); + + return 0; } -float InterRectArea(const Rect & a, const Rect & b) { - Point left_top = Point(std::max(a.x, b.x), std::max(a.y, b.y)); - Point right_bottom = Point(std::min(a.br().x, b.br().x), std::min(a.br().y, b.br().y)); - Point diff = right_bottom - left_top; - return (std::max(diff.x + 1, 0) * std::max(diff.y + 1, 0)); +float InterRectArea(const Rect &a, const Rect &b) { + Point left_top = Point(std::max(a.x, b.x), std::max(a.y, b.y)); + Point right_bottom = + Point(std::min(a.br().x, b.br().x), std::min(a.br().y, b.br().y)); + Point diff = right_bottom - left_top; + return (std::max(diff.x + 1, 0) * std::max(diff.y + 1, 0)); } -int ComputeIOU(const Rect & rect1, - const Rect & rect2, float * iou, - const std::string& type) { +int ComputeIOU(const Rect &rect1, const Rect &rect2, float *iou, + const std::string &type) { - float inter_area = InterRectArea(rect1, rect2); - if (type == "UNION") { - *iou = inter_area / (rect1.area() + rect2.area() - inter_area); - } - else { - *iou = inter_area / std::min(rect1.area(), rect2.area()); - } + float inter_area = InterRectArea(rect1, rect2); + if (type == "UNION") { + *iou = inter_area / (rect1.area() + rect2.area() - inter_area); + } else { + *iou = inter_area / std::min(rect1.area(), rect2.area()); + } - return 0; + return 0; } - -void EnlargeRect(const float& scale, Rect* rect) { - float offset_x = (scale - 1.f) / 2.f * rect->width; - float offset_y = (scale - 1.f) / 2.f * rect->height; - rect->x -= offset_x; - rect->y -= offset_y; - rect->width = scale * rect->width; - rect->height = scale * rect->height; +void EnlargeRect(const float &scale, Rect *rect) { + float offset_x = (scale - 1.f) / 2.f * rect->width; + float offset_y = (scale - 1.f) / 2.f * rect->height; + rect->x -= offset_x; + rect->y -= offset_y; + rect->width = scale * rect->width; + rect->height = scale * rect->height; } -void RectifyRect(Rect* rect) { - int max_side = std::max(rect->width, rect->height); - int offset_x = (max_side - rect->width) / 2; - int offset_y = (max_side - rect->height) / 2; +void RectifyRect(Rect *rect) { + int max_side = std::max(rect->width, rect->height); + int offset_x = (max_side - rect->width) / 2; + int offset_y = (max_side - rect->height) / 2; - rect->x -= offset_x; - rect->y -= offset_y; - rect->width = max_side; - rect->height = max_side; + rect->x -= offset_x; + rect->y -= offset_y; + rect->width = max_side; + rect->height = max_side; } -void qsort_descent_inplace(std::vector& objects, int left, int right) -{ - int i = left; - int j = right; - float p = objects[(left + right) / 2].score; +void qsort_descent_inplace(std::vector &objects, int left, + int right) { + int i = left; + int j = right; + float p = objects[(left + right) / 2].score; - while (i <= j) + while (i <= j) { + while (objects[i].score > p) + i++; + + while (objects[j].score < p) + j--; + + if (i <= j) { + // swap + std::swap(objects[i], objects[j]); + + i++; + j--; + } + } + +#pragma omp parallel sections + { +#pragma omp section { - while (objects[i].score > p) - i++; + if (left < j) + qsort_descent_inplace(objects, left, j); + } +#pragma omp section + { + if (i < right) + qsort_descent_inplace(objects, i, right); + } + } +} - while (objects[j].score < p) - j--; +void qsort_descent_inplace(std::vector &objects) { + if (objects.empty()) + return; - if (i <= j) - { - // swap - std::swap(objects[i], objects[j]); + qsort_descent_inplace(objects, 0, objects.size() - 1); +} - i++; - j--; - } +void nms_sorted_bboxes(const std::vector &objects, + std::vector &picked, float nms_threshold) { + picked.clear(); + + const int n = objects.size(); + + std::vector areas(n); + for (int i = 0; i < n; i++) { + areas[i] = objects[i].rect.area(); + } + + for (int i = 0; i < n; i++) { + const ObjectInfo &a = objects[i]; + + int keep = 1; + for (int j = 0; j < (int)picked.size(); j++) { + const ObjectInfo &b = objects[picked[j]]; + + // intersection over union + float inter_area = InterRectArea(a.rect, b.rect); + float union_area = areas[i] + areas[picked[j]] - inter_area; + // float IoU = inter_area / union_area + if (inter_area / union_area > nms_threshold) + keep = 0; } - #pragma omp parallel sections - { - #pragma omp section - { - if (left < j) qsort_descent_inplace(objects, left, j); - } - #pragma omp section - { - if (i < right) qsort_descent_inplace(objects, i, right); - } - } -} - -void qsort_descent_inplace(std::vector& objects) -{ - if (objects.empty()) - return; - - qsort_descent_inplace(objects, 0, objects.size() - 1); -} - -void nms_sorted_bboxes(const std::vector& objects, std::vector& picked, float nms_threshold) -{ - picked.clear(); - - const int n = objects.size(); - - std::vector areas(n); - for (int i = 0; i < n; i++) - { - areas[i] = objects[i].rect.area(); - } - - for (int i = 0; i < n; i++) - { - const ObjectInfo& a = objects[i]; - - int keep = 1; - for (int j = 0; j < (int)picked.size(); j++) - { - const ObjectInfo& b = objects[picked[j]]; - - // intersection over union - float inter_area = InterRectArea(a.rect, b.rect); - float union_area = areas[i] + areas[picked[j]] - inter_area; - // float IoU = inter_area / union_area - if (inter_area / union_area > nms_threshold) - keep = 0; - } - - if (keep) - picked.push_back(i); - } + if (keep) + picked.push_back(i); + } } // -// insightface/detection/scrfd/mmdet/core/anchor/anchor_generator.py gen_single_level_base_anchors() -ncnn::Mat generate_anchors(int base_size, const ncnn::Mat& ratios, const ncnn::Mat& scales) -{ - int num_ratio = ratios.w; - int num_scale = scales.w; +// insightface/detection/scrfd/mmdet/core/anchor/anchor_generator.py +// gen_single_level_base_anchors() +ncnn::Mat generate_anchors(int base_size, const ncnn::Mat &ratios, + const ncnn::Mat &scales) { + int num_ratio = ratios.w; + int num_scale = scales.w; - ncnn::Mat anchors; - anchors.create(4, num_ratio * num_scale); + ncnn::Mat anchors; + anchors.create(4, num_ratio * num_scale); - const float cx = 0; - const float cy = 0; + const float cx = 0; + const float cy = 0; - for (int i = 0; i < num_ratio; i++) - { - float ar = ratios[i]; + for (int i = 0; i < num_ratio; i++) { + float ar = ratios[i]; - int r_w = round(base_size / sqrt(ar)); - int r_h = round(r_w * ar); //round(base_size * sqrt(ar)); + int r_w = round(base_size / sqrt(ar)); + int r_h = round(r_w * ar); // round(base_size * sqrt(ar)); - for (int j = 0; j < num_scale; j++) - { - float scale = scales[j]; + for (int j = 0; j < num_scale; j++) { + float scale = scales[j]; - float rs_w = r_w * scale; - float rs_h = r_h * scale; + float rs_w = r_w * scale; + float rs_h = r_h * scale; - float* anchor = anchors.row(i * num_scale + j); + float *anchor = anchors.row(i * num_scale + j); - anchor[0] = cx - rs_w * 0.5f; - anchor[1] = cy - rs_h * 0.5f; - anchor[2] = cx + rs_w * 0.5f; - anchor[3] = cy + rs_h * 0.5f; - } + anchor[0] = cx - rs_w * 0.5f; + anchor[1] = cy - rs_h * 0.5f; + anchor[2] = cx + rs_w * 0.5f; + anchor[3] = cy + rs_h * 0.5f; } + } - return anchors; + return anchors; } -int generate_grids_and_stride(const int target_size, std::vector& strides, std::vector& grid_strides) -{ - for (auto stride : strides) - { - int num_grid = target_size / stride; - for (int g1 = 0; g1 < num_grid; g1++) - { - for (int g0 = 0; g0 < num_grid; g0++) - { - grid_strides.push_back((GridAndStride){g0, g1, stride}); - } - } +int generate_grids_and_stride(const int target_size, std::vector &strides, + std::vector &grid_strides) { + for (auto stride : strides) { + int num_grid = target_size / stride; + for (int g1 = 0; g1 < num_grid; g1++) { + for (int g0 = 0; g0 < num_grid; g0++) { + grid_strides.push_back((GridAndStride){g0, g1, stride}); + } } + } - return 0; + return 0; } -float sigmoid(float x) -{ - return static_cast(1.f / (1.f + exp(-x))); -} +float sigmoid(float x) { return static_cast(1.f / (1.f + exp(-x))); } -} +} // namespace ov diff --git a/src/common/common.hpp b/src/common/common.hpp index 2b64380..fa87d8a 100644 --- a/src/common/common.hpp +++ b/src/common/common.hpp @@ -1,12 +1,12 @@ #ifndef _COMMON_H_ #define _COMMON_H_ -#include -#include -#include #include "config.h" #include "net.h" -#ifdef OV_OPENMP +#include +#include +#include +#ifdef OV_OPENMP #include #endif @@ -14,241 +14,235 @@ namespace ov { class EstimatorBase { public: - EstimatorBase(); - virtual ~EstimatorBase(); - virtual void set_num_threads(int n); + EstimatorBase(); + virtual ~EstimatorBase(); + virtual void set_num_threads(int n); + protected: - int num_threads = 2; + int num_threads = 2; }; class Estimator : public EstimatorBase { public: - Estimator(); - virtual ~Estimator(); - virtual int LoadModel(const char* root_path); - virtual void set_num_threads(int n); - virtual void set_light_mode(bool mode); + Estimator(); + virtual ~Estimator(); + virtual int LoadModel(const char *root_path); + virtual void set_num_threads(int n); + virtual void set_light_mode(bool mode); + protected: - ncnn::Net* net_; - ncnn::PoolAllocator workspace_allocator_; - ncnn::UnlockedPoolAllocator blob_allocator_; - bool initialized_ = false; - bool light_mode_ = true; + ncnn::Net *net_ = NULL; + ncnn::PoolAllocator workspace_allocator_; + ncnn::UnlockedPoolAllocator blob_allocator_; + bool initialized_ = false; + bool light_mode_ = true; }; // Wrapper for an individual cv::cvSize struct Size { - int width; - int height; - Size(int _width = 0, int _height = 0): width(_width), height(_height) {} + int width; + int height; + Size(int _width = 0, int _height = 0) : width(_width), height(_height) {} }; // // Wrapper for an individual cv::cvSize2f struct Size2f { - float width; - float height; - Size2f(float _width = 0, float _height = 0): width(_width), height(_height) {} + float width; + float height; + Size2f(float _width = 0, float _height = 0) + : width(_width), height(_height) {} }; // Wrapper for an individual cv::cvPoint struct Point { - int x; - int y; - Point(int _x = 0, int _y = 0): x(_x), y(_y) {} - Point operator-(const Point &p2) { - return Point(x - p2.x, y - p2.y); - }; + int x; + int y; + Point(int _x = 0, int _y = 0) : x(_x), y(_y) {} + Point operator-(const Point &p2) { return Point(x - p2.x, y - p2.y); }; }; // Wrapper for an individual cv::Point2f struct Point2f { - float x; - float y; - Point2f(float _x = 0, float _y = 0): x(_x), y(_y) {} - Point2f operator*(float f) const { - return Point2f(x * f, y * f); - }; - Point2f operator/(float f) const { - return Point2f(x / f, y / f); - }; - Point2f operator+(const Point2f &p2) const { - return Point2f(x + p2.x, y + p2.y); - }; - Point2f operator-(const Point2f &p2) const { - return Point2f(x - p2.x, y - p2.y); - }; + float x; + float y; + Point2f(float _x = 0, float _y = 0) : x(_x), y(_y) {} + Point2f operator*(float f) const { return Point2f(x * f, y * f); }; + Point2f operator/(float f) const { return Point2f(x / f, y / f); }; + Point2f operator+(const Point2f &p2) const { + return Point2f(x + p2.x, y + p2.y); + }; + Point2f operator-(const Point2f &p2) const { + return Point2f(x - p2.x, y - p2.y); + }; }; // Wrapper for an individual cv::Rect struct Rect { - int x; - int y; - int width; - int height; - Rect(int _x = 0, int _y = 0, int _width = 0, int _height = 0): x(_x), y(_y), width(_width), height(_height) {} - Point br() const { - return Point(x + width, y + height); - }; - int area() const { - return width * height; - }; - Rect operator&(const Rect &r2) const { - int inter_x = x; - int inter_y = y; - int inter_width = width; - int inter_height = height; - if (x < r2.x) { - inter_x = r2.x; - } - if (y < r2.y) { - inter_y = r2.y; - } - if (x + width > r2.x + r2.width) { - inter_width = r2.x + r2.width - inter_x; - } - if (y + height > r2.y + r2.height) { - inter_height = r2.y + r2.height - inter_y; - } - return Rect(inter_x, inter_y, inter_width, inter_height); - }; + int x; + int y; + int width; + int height; + Rect(int _x = 0, int _y = 0, int _width = 0, int _height = 0) + : x(_x), y(_y), width(_width), height(_height) {} + Point br() const { return Point(x + width, y + height); }; + int area() const { return width * height; }; + Rect operator&(const Rect &r2) const { + int inter_x = x; + int inter_y = y; + int inter_width = width; + int inter_height = height; + if (x < r2.x) { + inter_x = r2.x; + } + if (y < r2.y) { + inter_y = r2.y; + } + if (x + width > r2.x + r2.width) { + inter_width = r2.x + r2.width - inter_x; + } + if (y + height > r2.y + r2.height) { + inter_height = r2.y + r2.height - inter_y; + } + return Rect(inter_x, inter_y, inter_width, inter_height); + }; }; struct Keypoint { - Point2f p; - float score; - int id; - Keypoint(){}; - Keypoint(const Point2f p_): p(p_){}; + Point2f p; + float score; + int id; + Keypoint(){}; + Keypoint(const Point2f p_) : p(p_){}; }; struct ObjectInfo { - Rect rect; - float score; - int label; - std::vector pts; + Rect rect; + float score; + int label; + std::vector pts; }; struct Image { - std::vector data; - int width; - int height; - int channels; - float at(const Point& p) const { - return data.at((p.x + p.y * width) * channels); - }; - float atChannel(const Point& p, int channel) const { - return data.at((p.x + p.y * width) * channels + channel); - }; - Image(){}; - Image(const ncnn::Mat& mat): width(mat.w), height(mat.h), channels(mat.c) { - int data_size = mat.total(); - float* ptr = (float*)malloc(data_size * sizeof(float)); - memcpy(ptr, mat.data, data_size * sizeof(float)); - data.clear(); - data.resize(data_size); - data.assign(ptr, ptr + data_size); - free(ptr); - ptr=NULL; - }; + std::vector data; + int width; + int height; + int channels; + float at(const Point &p) const { + return data.at((p.x + p.y * width) * channels); + }; + float atChannel(const Point &p, int channel) const { + return data.at((p.x + p.y * width) * channels + channel); + }; + Image(){}; + Image(const ncnn::Mat &mat) : width(mat.w), height(mat.h), channels(mat.c) { + int data_size = mat.total(); + float *ptr = (float *)malloc(data_size * sizeof(float)); + memcpy(ptr, mat.data, data_size * sizeof(float)); + data.clear(); + data.resize(data_size); + data.assign(ptr, ptr + data_size); + free(ptr); + ptr = NULL; + }; }; -struct GridAndStride -{ - int grid0; - int grid1; - int stride; +struct GridAndStride { + int grid0; + int grid1; + int stride; }; -template +template constexpr std::size_t arraySize(const T (&)[N]) noexcept { - return N; + return N; } static inline int cvRound(double x) { - int y; - if(x >= (int)x+0.5) - y = (int)x++; - else - y = (int)x; - return y; + int y; + if (x >= (int)x + 0.5) + y = (int)x++; + else + y = (int)x; + return y; }; -int RatioAnchors(const Rect & anchor, - const std::vector& ratios, std::vector* anchors, int threads_num); +int RatioAnchors(const Rect &anchor, const std::vector &ratios, + std::vector *anchors, int threads_num); -int ScaleAnchors(const std::vector& ratio_anchors, - const std::vector& scales, std::vector* anchors, int threads_num); +int ScaleAnchors(const std::vector &ratio_anchors, + const std::vector &scales, std::vector *anchors, + int threads_num); -int GenerateAnchors(const int & base_size, - const std::vector& ratios, const std::vector scales, - std::vector* anchors, - int threads_num); +int GenerateAnchors(const int &base_size, const std::vector &ratios, + const std::vector scales, std::vector *anchors, + int threads_num); -float InterRectArea(const Rect & a, - const Rect & b); +float InterRectArea(const Rect &a, const Rect &b); -int ComputeIOU(const Rect & rect1, - const Rect & rect2, float * iou, - const std::string& type = "UNION"); +int ComputeIOU(const Rect &rect1, const Rect &rect2, float *iou, + const std::string &type = "UNION"); template -int const NMS(const std::vector& inputs, std::vector* result, - const float& threshold, const std::string& type = "UNION") { - result->clear(); - if (inputs.size() == 0) - return -1; - - std::vector inputs_tmp; - inputs_tmp.assign(inputs.begin(), inputs.end()); - std::sort(inputs_tmp.begin(), inputs_tmp.end(), - [](const T& a, const T& b) { - return a.score > b.score; - }); +int const NMS(const std::vector &inputs, std::vector *result, + const float &threshold, const std::string &type = "UNION") { + result->clear(); + if (inputs.size() == 0) + return -1; - std::vector indexes(inputs_tmp.size()); + std::vector inputs_tmp; + inputs_tmp.assign(inputs.begin(), inputs.end()); + std::sort(inputs_tmp.begin(), inputs_tmp.end(), + [](const T &a, const T &b) { return a.score > b.score; }); - for (int i = 0; i < indexes.size(); i++) { - indexes[i] = i; + std::vector indexes(inputs_tmp.size()); + + for (int i = 0; i < indexes.size(); i++) { + indexes[i] = i; + } + + while (indexes.size() > 0) { + int good_idx = indexes[0]; + result->push_back(inputs_tmp[good_idx]); + std::vector tmp_indexes = indexes; + indexes.clear(); + for (int i = 1; i < tmp_indexes.size(); i++) { + int tmp_i = tmp_indexes[i]; + float iou = 0.0f; + ComputeIOU(inputs_tmp[good_idx].rect, inputs_tmp[tmp_i].rect, &iou, type); + if (iou <= threshold) { + indexes.push_back(tmp_i); + } } - - while (indexes.size() > 0) { - int good_idx = indexes[0]; - result->push_back(inputs_tmp[good_idx]); - std::vector tmp_indexes = indexes; - indexes.clear(); - for (int i = 1; i < tmp_indexes.size(); i++) { - int tmp_i = tmp_indexes[i]; - float iou = 0.0f; - ComputeIOU(inputs_tmp[good_idx].rect, inputs_tmp[tmp_i].rect, &iou, type); - if (iou <= threshold) { - indexes.push_back(tmp_i); - } - } - } - return 0; + } + return 0; } -void qsort_descent_inplace(std::vector& objects, int left, int right); +void qsort_descent_inplace(std::vector &objects, int left, + int right); -void qsort_descent_inplace(std::vector& objects); +void qsort_descent_inplace(std::vector &objects); -void nms_sorted_bboxes(const std::vector& objects, std::vector& picked, float nms_threshold); +void nms_sorted_bboxes(const std::vector &objects, + std::vector &picked, float nms_threshold); -// insightface/detection/scrfd/mmdet/core/anchor/anchor_generator.py gen_single_level_base_anchors() -ncnn::Mat generate_anchors(int base_size, const ncnn::Mat& ratios, const ncnn::Mat& scales); +// insightface/detection/scrfd/mmdet/core/anchor/anchor_generator.py +// gen_single_level_base_anchors() +ncnn::Mat generate_anchors(int base_size, const ncnn::Mat &ratios, + const ncnn::Mat &scales); -int generate_grids_and_stride(const int target_size, std::vector& strides, std::vector& grid_strides); +int generate_grids_and_stride(const int target_size, std::vector &strides, + std::vector &grid_strides); float sigmoid(float x); -void EnlargeRect(const float& scale, Rect* rect); -void RectifyRect(Rect* rect); +void EnlargeRect(const float &scale, Rect *rect); +void RectifyRect(Rect *rect); -template +template inline static size_t argmax(ForwardIterator first, ForwardIterator last) { - return std::distance(first, std::max_element(first, last)); + return std::distance(first, std::max_element(first, last)); }; -} +} // namespace ov #endif // !_COMMON_H_ From 429e30db914394c7c8253ad74fc92d983aa590a1 Mon Sep 17 00:00:00 2001 From: Syd Xu Date: Fri, 26 Nov 2021 14:31:35 +0800 Subject: [PATCH 6/9] fix(image): wrong label size --- go/common/image.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/common/image.go b/go/common/image.go index ed6e42a..d2cbc96 100644 --- a/go/common/image.go +++ b/go/common/image.go @@ -231,5 +231,5 @@ func DrawLabelInWidth(gc *draw2dimg.GraphicContext, font *Font, label string, pt width := right - left fontWidth := width + padding*2 scale := boundWidth / fontWidth - DrawLabelInWidth(gc, font, label, pt, txtColor, bgColor, scale) + DrawLabel(gc, font, label, pt, txtColor, bgColor, scale) } From 7eab96fa85a1f045250e84ef7ecb1b64ec2159ef Mon Sep 17 00:00:00 2001 From: Syd Xu Date: Mon, 13 Dec 2021 16:39:50 +0800 Subject: [PATCH 7/9] feat(hand): add mediapipe hand 3d pose detecter --- .gitignore | 2 + README.md | 2 + go/common/geometry.go | 52 ++ go/common/objectinfo.go | 2 + go/common/palmobject.go | 56 ++ go/examples/hand/main.go | 65 ++- go/face/tracker/cgo.go | 2 +- go/hand/drawer/const.go | 63 +-- go/hand/drawer/drawer.go | 133 ++++- go/hand/drawer/option.go | 18 + go/hand/pose3d/cgo.go | 11 + go/hand/pose3d/cgo_vulkan.go | 11 + go/hand/pose3d/doc.go | 2 + go/hand/pose3d/mediapipe.go | 62 +++ src/CMakeLists.txt | 2 + src/common/common.cpp | 7 + src/common/common.h | 97 ++-- src/common/common.hpp | 7 + src/hand/detecter/nanodet/nanodet.cpp | 418 +++++++-------- src/hand/pose3d.h | 37 ++ src/hand/pose3d/estimator.cpp | 106 ++++ src/hand/pose3d/mediapipe/mediapipe.cpp | 534 +++++++++++++++++++ src/hand/pose3d/mediapipe/mediapipe.hpp | 87 +++ src/pose/estimator/pptinypose/pptinypose.bak | 161 ++++++ src/pose/estimator/pptinypose/pptinypose.hpp | 25 + 25 files changed, 1628 insertions(+), 334 deletions(-) create mode 100644 go/common/palmobject.go create mode 100644 go/hand/pose3d/cgo.go create mode 100644 go/hand/pose3d/cgo_vulkan.go create mode 100644 go/hand/pose3d/doc.go create mode 100644 go/hand/pose3d/mediapipe.go create mode 100644 src/hand/pose3d.h create mode 100644 src/hand/pose3d/estimator.cpp create mode 100644 src/hand/pose3d/mediapipe/mediapipe.cpp create mode 100644 src/hand/pose3d/mediapipe/mediapipe.hpp create mode 100644 src/pose/estimator/pptinypose/pptinypose.bak create mode 100644 src/pose/estimator/pptinypose/pptinypose.hpp diff --git a/.gitignore b/.gitignore index 049c503..5ad6ac4 100644 --- a/.gitignore +++ b/.gitignore @@ -70,3 +70,5 @@ _testmain.go test .vim dist/ + +libtorch/ diff --git a/README.md b/README.md index 7bdd285..8f7c9bc 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,8 @@ cmake .. # optional -DNCNN_VULKAN=OFF -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COM - nanodet [Google Drive](https://drive.google.com/drive/folders/1ywH7r_clqqA_BAOFSzA92Q0lxJtWlN3z?usp=sharing) - pose (for hand pose estimation) - handnet [Google Drive](https://drive.google.com/drive/folders/1DsCGmiVaZobbMWRp5Oec8GbIpeg7CsNR?usp=sharing) + - pose3d (for 3d handpose detection) + - mediapipe [Google Drive](https://drive.google.com/drive/folders/1LsqIGB55dusZJqmP1uhnQUnNE2tLzifp?usp=sharing) - styletransfer - animegan2 [Google Drive](https://drive.google.com/drive/folders/1K6ZScENPHVbxupHkwl5WcpG8PPECtD8e?usp=sharing) - tracker diff --git a/go/common/geometry.go b/go/common/geometry.go index c803e4a..bcdf88c 100644 --- a/go/common/geometry.go +++ b/go/common/geometry.go @@ -90,6 +90,9 @@ func NewCPoint2fVector() *C.Point2fVector { // GoPoint2fVector convert C.Point2fVector to []Point func GoPoint2fVector(cVector *C.Point2fVector, w float64, h float64) []Point { + if cVector == nil { + return nil + } l := int(cVector.length) ret := make([]Point, 0, l) ptr := unsafe.Pointer(cVector.points) @@ -105,3 +108,52 @@ func FreeCPoint2fVector(c *C.Point2fVector) { C.FreePoint2fVector(c) C.free(unsafe.Pointer(c)) } + +// Point3d represents a 3dPoint +type Point3d struct { + X float64 + Y float64 + Z float64 +} + +// Pt3d returns a New Point3d +func Pt3d(x, y, z float64) Point3d { + return Point3d{x, y, z} +} + +var ZP3d = Point3d{} + +// GoPoint3d conver C.Point3d to Point3d +func GoPoint3d(c *C.Point3d) Point3d { + return Pt3d( + float64(c.x), + float64(c.y), + float64(c.z), + ) +} + +// NewCPoint3dVector retruns C.Point3dVector pointer +func NewCPoint3dVector() *C.Point3dVector { + return (*C.Point3dVector)(C.malloc(C.sizeof_Point3d)) +} + +// GoPoint3dVector convert C.Point3dVector to []Point3d +func GoPoint3dVector(cVector *C.Point3dVector) []Point3d { + if cVector == nil { + return nil + } + l := int(cVector.length) + ret := make([]Point3d, 0, l) + ptr := unsafe.Pointer(cVector.points) + for i := 0; i < l; i++ { + cPoint3d := (*C.Point3d)(unsafe.Pointer(uintptr(ptr) + uintptr(C.sizeof_Point3d*C.int(i)))) + ret = append(ret, GoPoint3d(cPoint3d)) + } + return ret +} + +// FreeCPoint3dVector release C.Point3dVector memory +func FreeCPoint3dVector(c *C.Point3dVector) { + C.FreePoint3dVector(c) + C.free(unsafe.Pointer(c)) +} diff --git a/go/common/objectinfo.go b/go/common/objectinfo.go index 67ae10a..9d32019 100644 --- a/go/common/objectinfo.go +++ b/go/common/objectinfo.go @@ -20,6 +20,8 @@ type ObjectInfo struct { Rect Rectangle // Points keypoints Keypoints []Keypoint + // Name + Name string } // GoObjectInfo convert C.ObjectInfo to go type diff --git a/go/common/palmobject.go b/go/common/palmobject.go new file mode 100644 index 0000000..c6c1e07 --- /dev/null +++ b/go/common/palmobject.go @@ -0,0 +1,56 @@ +package common + +/* +#include +#include +#include "openvision/common/common.h" +#include "openvision/hand/pose3d.h" +*/ +import "C" +import ( + "unsafe" +) + +// PalmObject +type PalmObject struct { + Score float64 + Rotation float64 + Rect []Point + Landmarks []Point + Skeleton []Point + Skeleton3d []Point3d +} + +// NewCPalmObjectVector returns *C.PalmObjectVector +func NewCPalmObjectVector() *C.PalmObjectVector { + return (*C.PalmObjectVector)(C.malloc(C.sizeof_PalmObjectVector)) +} + +// FreeCPalmObjectVector release *C.PalmObjectVector memory +func FreeCPalmObjectVector(p *C.PalmObjectVector) { + C.FreePalmObjectVector(p) + C.free(unsafe.Pointer(p)) +} + +// GoPalmObject convert C.PalmObject to Go type +func GoPalmObject(cObj *C.PalmObject, w float64, h float64) PalmObject { + return PalmObject{ + Score: float64(cObj.score), + Rotation: float64(cObj.rotation), + Rect: GoPoint2fVector(cObj.rect, w, h), + Landmarks: GoPoint2fVector(cObj.landmarks, w, h), + Skeleton: GoPoint2fVector(cObj.skeleton, w, h), + Skeleton3d: GoPoint3dVector(cObj.skeleton3d), + } +} + +func GoPalmObjectVector(c *C.PalmObjectVector, w float64, h float64) []PalmObject { + l := int(c.length) + ret := make([]PalmObject, 0, l) + ptr := unsafe.Pointer(c.items) + for i := 0; i < l; i++ { + cObj := (*C.PalmObject)(unsafe.Pointer(uintptr(ptr) + uintptr(C.sizeof_PalmObject*C.int(i)))) + ret = append(ret, GoPalmObject(cObj, w, h)) + } + return ret +} diff --git a/go/examples/hand/main.go b/go/examples/hand/main.go index 0461264..836b9df 100644 --- a/go/examples/hand/main.go +++ b/go/examples/hand/main.go @@ -15,6 +15,7 @@ import ( "github.com/bububa/openvision/go/hand/detecter" handdrawer "github.com/bububa/openvision/go/hand/drawer" "github.com/bububa/openvision/go/hand/pose" + "github.com/bububa/openvision/go/hand/pose3d" ) func main() { @@ -27,17 +28,19 @@ func main() { cpuCores := common.GetBigCPUCount() common.SetOMPThreads(cpuCores) log.Printf("CPU big cores:%d\n", cpuCores) - estimator := handpose(modelPath) - defer estimator.Destroy() - common.SetEstimatorThreads(estimator, cpuCores) - for idx, d := range []detecter.Detecter{ - yolox(modelPath), - nanodet(modelPath), - } { - defer d.Destroy() - common.SetEstimatorThreads(d, cpuCores) - detect(d, estimator, imgPath, "hand1.jpg", idx) - } + // estimator := handpose(modelPath) + // defer estimator.Destroy() + // common.SetEstimatorThreads(estimator, cpuCores) + // for idx, d := range []detecter.Detecter{ + // yolox(modelPath), + // nanodet(modelPath), + // } { + // defer d.Destroy() + // common.SetEstimatorThreads(d, cpuCores) + // detect(d, estimator, imgPath, "hand2.jpg", idx) + // } + d3d := mediapipe(modelPath) + detect3d(d3d, imgPath, "hand1.jpg") } func yolox(modelPath string) detecter.Detecter { @@ -67,6 +70,16 @@ func handpose(modelPath string) pose.Estimator { return d } +func mediapipe(modelPath string) *pose3d.Mediapipe { + palmPath := filepath.Join(modelPath, "mediapipe/palm/full") + handPath := filepath.Join(modelPath, "mediapipe/hand/full") + d := pose3d.NewMediapipe() + if err := d.LoadModel(palmPath, handPath); err != nil { + log.Fatalln(err) + } + return d +} + func detect(d detecter.Detecter, e pose.Estimator, imgPath string, filename string, idx int) { inPath := filepath.Join(imgPath, filename) imgSrc, err := loadImage(inPath) @@ -104,6 +117,36 @@ func detect(d detecter.Detecter, e pose.Estimator, imgPath string, filename stri if err := saveImage(out, outPath); err != nil { log.Fatalln(err) } +} + +func detect3d(d *pose3d.Mediapipe, imgPath string, filename string) { + inPath := filepath.Join(imgPath, filename) + imgSrc, err := loadImage(inPath) + if err != nil { + log.Fatalln("load image failed,", err) + } + img := common.NewImage(imgSrc) + rois, err := d.Detect(img) + if err != nil { + log.Fatalln(err) + } + log.Printf("%+v\n", rois) + drawer := handdrawer.New() + outPath := filepath.Join(imgPath, "./results", fmt.Sprintf("pose3d-hand-%s", filename)) + out := drawer.DrawPalm(img, rois) + + if err := saveImage(out, outPath); err != nil { + log.Fatalln(err) + } + + for idx, roi := range rois { + outPath := filepath.Join(imgPath, "./results", fmt.Sprintf("pose3d-palm3d-%d-%s", idx, filename)) + out := drawer.DrawPalm3D(roi, 400, "#442519") + + if err := saveImage(out, outPath); err != nil { + log.Fatalln(err) + } + } } diff --git a/go/face/tracker/cgo.go b/go/face/tracker/cgo.go index c64d9fc..0f33239 100644 --- a/go/face/tracker/cgo.go +++ b/go/face/tracker/cgo.go @@ -1,6 +1,6 @@ // +build !vulkan -package eye +package tracker /* #cgo CXXFLAGS: --std=c++11 -fopenmp diff --git a/go/hand/drawer/const.go b/go/hand/drawer/const.go index c1f9887..387d608 100644 --- a/go/hand/drawer/const.go +++ b/go/hand/drawer/const.go @@ -7,71 +7,16 @@ import ( const ( // DefaultBorderColor default drawer border color DefaultBorderColor = common.Green + // DefaultKeypointColor default drawer keypoint color + DefaultKeypointColor = common.Pink // DefaultBorderStrokeWidth default drawer border stroke width DefaultBorderStrokeWidth = 3 // DefaultKeypointRadius default drawer keypoint radius DefaultKeypointRadius = 3 // DefaultKeypointStrokeWidth default drawer keypoint stroke width DefaultKeypointStrokeWidth = 1 -) - -// CocoPart coco part define -type CocoPart = int - -const ( - // CocoPartNose nose - CocoPartNose CocoPart = iota - // CocoPartLEye left eye - CocoPartLEye - // CocoPartREye right eye - CocoPartREye - // CocoPartLEar left ear - CocoPartLEar - // CocoPartREar right ear - CocoPartREar - // CocoPartLShoulder left sholder - CocoPartLShoulder - // CocoPartRShoulder right sholder - CocoPartRShoulder - // CocoPartLElbow left elbow - CocoPartLElbow - // CocoPartRElbow right elbow - CocoPartRElbow - // CocoPartLWrist left wrist - CocoPartLWrist - // CocoPartRWrist right wrist - CocoPartRWrist - // CocoPartLHip left hip - CocoPartLHip - // CocoPartRHip right hip - CocoPartRHip - // CocoPartLKnee left knee - CocoPartLKnee - // CocoPartRKnee right knee - CocoPartRKnee - // CocoPartRAnkle right ankle - CocoPartRAnkle - // CocoPartLAnkle left ankle - CocoPartLAnkle - // CocoPartNeck neck - CocoPartNeck - // CocoPartBackground background - CocoPartBackground -) - -var ( - // CocoPair represents joints pair - CocoPair = [16][2]CocoPart{ - {0, 1}, {1, 3}, {0, 2}, {2, 4}, {5, 6}, {5, 7}, {7, 9}, {6, 8}, {8, 10}, {5, 11}, {6, 12}, {11, 12}, {11, 13}, {12, 14}, {13, 15}, {14, 16}, - } - // CocoColors represents color for coco parts - CocoColors = [17]string{ - "#ff0000", "#ff5500", "#ffaa00", "#ffff00", - "#aaff00", "#55ff00", "#00ff00", "#00ff55", "#00ffaa", - "#00ffff", "#00aaff", "#0055ff", - "#0000ff", "#aa00ff", "#ff00ff", - "#ff00aa", "#ff0055", - } + // DefaultLabelColor default label color + DefaultLabelColor = common.White ) var ( diff --git a/go/hand/drawer/drawer.go b/go/hand/drawer/drawer.go index 948eef5..2045ac3 100644 --- a/go/hand/drawer/drawer.go +++ b/go/hand/drawer/drawer.go @@ -2,8 +2,10 @@ package drawer import ( "image" + "image/color" "github.com/llgcode/draw2d/draw2dimg" + "github.com/llgcode/draw2d/draw2dkit" "github.com/bububa/openvision/go/common" ) @@ -18,6 +20,12 @@ type Drawer struct { KeypointStrokeWidth float64 // KeypointRadius represents keypoints circle radius KeypointRadius float64 + // KeypointColor represents keypoint color + KeypointColor string + // LabelColor string + LabelColor string + // Font + Font *common.Font } // New returns a new Drawer @@ -27,6 +35,8 @@ func New(options ...Option) *Drawer { BorderStrokeWidth: DefaultBorderStrokeWidth, KeypointStrokeWidth: DefaultKeypointStrokeWidth, KeypointRadius: DefaultKeypointRadius, + KeypointColor: DefaultKeypointColor, + LabelColor: DefaultLabelColor, } for _, opt := range options { opt.apply(d) @@ -42,15 +52,15 @@ func (d *Drawer) Draw(img image.Image, rois []common.ObjectInfo, drawBorder bool gc := draw2dimg.NewGraphicContext(out) gc.DrawImage(img) for _, roi := range rois { + rect := common.Rect( + roi.Rect.X*imgW, + roi.Rect.Y*imgH, + roi.Rect.Width*imgW, + roi.Rect.Height*imgH, + ) + borderColor := d.BorderColor if drawBorder { // draw rect - rect := common.Rect( - roi.Rect.X*imgW, - roi.Rect.Y*imgH, - roi.Rect.Width*imgW, - roi.Rect.Height*imgH, - ) - borderColor := d.BorderColor common.DrawRectangle(gc, rect, borderColor, "", d.BorderStrokeWidth) } l := len(roi.Keypoints) @@ -95,6 +105,115 @@ func (d *Drawer) Draw(img image.Image, rois []common.ObjectInfo, drawBorder bool poseColor := PoseColors[colorIdx] common.DrawCircle(gc, common.Pt(pt.Point.X*imgW, pt.Point.Y*imgH), d.KeypointRadius, poseColor, "", d.KeypointStrokeWidth) } + // draw name + if roi.Name != "" { + common.DrawLabelInWidth(gc, d.Font, roi.Name, common.Pt(rect.X, rect.MaxY()), d.LabelColor, borderColor, rect.Width) + } + } + return out +} + +// DrawPalm draw PalmObject +func (d *Drawer) DrawPalm(img image.Image, rois []common.PalmObject) image.Image { + imgW := float64(img.Bounds().Dx()) + imgH := float64(img.Bounds().Dy()) + out := image.NewRGBA(img.Bounds()) + gc := draw2dimg.NewGraphicContext(out) + gc.DrawImage(img) + for _, roi := range rois { + gc.SetLineWidth(d.BorderStrokeWidth) + gc.SetStrokeColor(common.ColorFromHex(d.BorderColor)) + gc.BeginPath() + for idx, pt := range roi.Rect { + gc.MoveTo(pt.X*imgW, pt.Y*imgH) + if idx == len(roi.Rect)-1 { + gc.LineTo(roi.Rect[0].X*imgW, roi.Rect[0].Y*imgH) + } else { + gc.LineTo(roi.Rect[idx+1].X*imgW, roi.Rect[idx+1].Y*imgH) + } + } + gc.Close() + gc.Stroke() + + l := len(roi.Skeleton) + if l == 0 { + continue + } + // draw skeleton + for idx := range roi.Skeleton[:l-1] { + var ( + p0 common.Point + p1 common.Point + poseColor = PoseColors[idx/4] + ) + gc.SetStrokeColor(common.ColorFromHex(poseColor)) + if idx == 5 || idx == 9 || idx == 13 || idx == 17 { + p0 = roi.Skeleton[0] + p1 = roi.Skeleton[idx] + gc.BeginPath() + gc.MoveTo(p0.X*imgW, p0.Y*imgH) + gc.LineTo(p1.X*imgW, p1.Y*imgH) + gc.Close() + gc.Stroke() + } else if idx == 4 || idx == 8 || idx == 12 || idx == 16 { + continue + } + p0 = roi.Skeleton[idx] + p1 = roi.Skeleton[idx+1] + gc.BeginPath() + gc.MoveTo(p0.X*imgW, p0.Y*imgH) + gc.LineTo(p1.X*imgW, p1.Y*imgH) + gc.Close() + gc.Stroke() + } + for _, pt := range roi.Landmarks { + common.DrawCircle(gc, common.Pt(pt.X*imgW, pt.Y*imgH), d.KeypointRadius, d.KeypointColor, "", d.KeypointStrokeWidth) + } + } + return out +} + +// DrawPalm3D draw 3d PalmObject +func (d *Drawer) DrawPalm3D(roi common.PalmObject, size float64, bg string) image.Image { + out := image.NewRGBA(image.Rect(0, 0, int(size), int(size))) + gc := draw2dimg.NewGraphicContext(out) + l := len(roi.Skeleton3d) + if l == 0 { + return out + } + if bg != "" { + bgColor := common.ColorFromHex(bg) + gc.SetFillColor(bgColor) + draw2dkit.Rectangle(gc, 0, 0, size, size) + gc.Fill() + gc.SetFillColor(color.Transparent) + } + // draw skeleton3d + for idx := range roi.Skeleton3d[:l-1] { + var ( + p0 common.Point3d + p1 common.Point3d + poseColor = PoseColors[idx/4] + ) + gc.SetStrokeColor(common.ColorFromHex(poseColor)) + if idx == 5 || idx == 9 || idx == 13 || idx == 17 { + p0 = roi.Skeleton3d[0] + p1 = roi.Skeleton3d[idx] + gc.BeginPath() + gc.MoveTo(p0.X*size, p0.Y*size) + gc.LineTo(p1.X*size, p1.Y*size) + gc.Close() + gc.Stroke() + } else if idx == 4 || idx == 8 || idx == 12 || idx == 16 { + continue + } + p0 = roi.Skeleton3d[idx] + p1 = roi.Skeleton3d[idx+1] + gc.BeginPath() + gc.MoveTo(p0.X*size, p0.Y*size) + gc.LineTo(p1.X*size, p1.Y*size) + gc.Close() + gc.Stroke() } return out } diff --git a/go/hand/drawer/option.go b/go/hand/drawer/option.go index 2ca7e11..9bcddd0 100644 --- a/go/hand/drawer/option.go +++ b/go/hand/drawer/option.go @@ -1,5 +1,9 @@ package drawer +import ( + "github.com/bububa/openvision/go/common" +) + // Option represents Drawer option interface type Option interface { apply(*Drawer) @@ -38,3 +42,17 @@ func WithKeypointStrokeWidth(w float64) Option { d.KeypointStrokeWidth = w }) } + +// WithKeypointColor set Drawer KeypointColor +func WithKeypointColor(color string) Option { + return optionFunc(func(d *Drawer) { + d.KeypointColor = color + }) +} + +// WithFont set Drawer Font +func WithFont(font *common.Font) Option { + return optionFunc(func(d *Drawer) { + d.Font = font + }) +} diff --git a/go/hand/pose3d/cgo.go b/go/hand/pose3d/cgo.go new file mode 100644 index 0000000..714b66e --- /dev/null +++ b/go/hand/pose3d/cgo.go @@ -0,0 +1,11 @@ +// +build !vulkan + +package pose3d + +/* +#cgo CXXFLAGS: --std=c++11 -fopenmp +#cgo CPPFLAGS: -I ${SRCDIR}/../../../include -I /usr/local/include +#cgo LDFLAGS: -lstdc++ -lncnn -lomp -lopenvision +#cgo LDFLAGS: -L /usr/local/lib -L ${SRCDIR}/../../../lib +*/ +import "C" diff --git a/go/hand/pose3d/cgo_vulkan.go b/go/hand/pose3d/cgo_vulkan.go new file mode 100644 index 0000000..f12a81b --- /dev/null +++ b/go/hand/pose3d/cgo_vulkan.go @@ -0,0 +1,11 @@ +// +build vulkan + +package pose3d + +/* +#cgo CXXFLAGS: --std=c++11 -fopenmp +#cgo CPPFLAGS: -I ${SRCDIR}/../../../include -I /usr/local/include +#cgo LDFLAGS: -lstdc++ -lncnn -lomp -lopenvision -lglslang -lvulkan -lSPIRV -lOGLCompiler -lMachineIndependent -lGenericCodeGen -lOSDependent +#cgo LDFLAGS: -L /usr/local/lib -L ${SRCDIR}/../../../lib +*/ +import "C" diff --git a/go/hand/pose3d/doc.go b/go/hand/pose3d/doc.go new file mode 100644 index 0000000..bfb3a2c --- /dev/null +++ b/go/hand/pose3d/doc.go @@ -0,0 +1,2 @@ +// Package pose hand 3d pose estimator +package pose3d diff --git a/go/hand/pose3d/mediapipe.go b/go/hand/pose3d/mediapipe.go new file mode 100644 index 0000000..6c1f5fa --- /dev/null +++ b/go/hand/pose3d/mediapipe.go @@ -0,0 +1,62 @@ +package pose3d + +/* +#include +#include +#include "openvision/common/common.h" +#include "openvision/hand/pose3d.h" +*/ +import "C" +import ( + "unsafe" + + openvision "github.com/bububa/openvision/go" + "github.com/bububa/openvision/go/common" +) + +// Mediapipe represents mediapipe estimator interface +type Mediapipe struct { + d C.IHandPose3DEstimator +} + +func NewMediapipe() *Mediapipe { + return &Mediapipe{ + d: C.new_mediapipe_hand(), + } +} + +func (m *Mediapipe) Destroy() { + C.destroy_mediapipe_hand(m.d) +} + +func (m *Mediapipe) LoadModel(palmPath string, handPath string) error { + cPalm := C.CString(palmPath) + defer C.free(unsafe.Pointer(cPalm)) + cHand := C.CString(handPath) + defer C.free(unsafe.Pointer(cHand)) + retCode := C.mediapipe_hand_load_model(m.d, cPalm, cHand) + if retCode != 0 { + return openvision.LoadModelError(int(retCode)) + } + return nil + +} + +// Detect detect hand 3d pose +func (m *Mediapipe) Detect(img *common.Image) ([]common.PalmObject, error) { + imgWidth := img.WidthF64() + imgHeight := img.HeightF64() + data := img.Bytes() + cObjs := common.NewCPalmObjectVector() + defer common.FreeCPalmObjectVector(cObjs) + errCode := C.mediapipe_hand_detect( + m.d, + (*C.uchar)(unsafe.Pointer(&data[0])), + C.int(imgWidth), C.int(imgHeight), + (*C.PalmObjectVector)(unsafe.Pointer(cObjs)), + ) + if errCode != 0 { + return nil, openvision.DetectHandError(int(errCode)) + } + return common.GoPalmObjectVector(cObjs, imgWidth, imgHeight), nil +} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f6312ed..1ac27c4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -71,6 +71,7 @@ target_include_directories(openvision $ $ $ + $ $ $ @@ -109,6 +110,7 @@ file(COPY file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/hand/detecter.h ${CMAKE_CURRENT_SOURCE_DIR}/hand/pose.h + ${CMAKE_CURRENT_SOURCE_DIR}/hand/pose3d.h DESTINATION ${INCLUDE_OUTPUT_PATH}/openvision/hand ) diff --git a/src/common/common.cpp b/src/common/common.cpp index 706334a..918d69c 100644 --- a/src/common/common.cpp +++ b/src/common/common.cpp @@ -58,6 +58,13 @@ void FreePoint2fVector(Point2fVector *p) { } } +void FreePoint3dVector(Point3dVector *p) { + if (p->points != NULL) { + free(p->points); + p->points = NULL; + } +} + void Point2fVectorSetValue(Point2fVector *p, int i, const Point2f *val) { if (p->points == NULL || i >= p->length) { return; diff --git a/src/common/common.h b/src/common/common.h index 1786397..675af10 100644 --- a/src/common/common.h +++ b/src/common/common.h @@ -11,123 +11,134 @@ typedef ov::Size Size; typedef ov::Size2f Size2f; typedef ov::Point Point; typedef ov::Point2f Point2f; +typedef ov::Point3d Point3d; typedef ov::Rect Rect; typedef ov::Keypoint Keypoint; #else // Wrapper for an individual cv::cvSize typedef struct Size { - int width; - int height; + int width; + int height; } Size; // // Wrapper for an individual cv::cvSize2f typedef struct Size2f { - int width; - int height; + int width; + int height; } Size2f; // Wrapper for an individual cv::cvPoint typedef struct Point { - int x; - int y; + int x; + int y; } Point; // Wrapper for an individual cv::Point2f typedef struct Point2f { - float x; - float y; + float x; + float y; } Point2f; +typedef struct Point3d { + float x; + float y; + float z; +} Point3d; // Wrapper for an individual cv::Rect typedef struct Rect { - int x; - int y; - int width; - int height; + int x; + int y; + int width; + int height; } Rect; - typedef struct Keypoint { - Point2f p; - float score; - int id; + Point2f p; + float score; + int id; } Keypoint; - #endif -typedef void* IEstimator; +typedef void *IEstimator; int get_gpu_count(); int create_gpu_instance(); void destroy_gpu_instance(); -int get_big_cpu_count(); +int get_big_cpu_count(); void set_omp_num_threads(int n); -int load_model(IEstimator e, const char* root_path); +int load_model(IEstimator e, const char *root_path); void destroy_estimator(IEstimator e); void set_num_threads(IEstimator e, int n); void set_light_mode(IEstimator e, bool mode); typedef struct Point2fVector { - Point2f* points; - int length; + Point2f *points; + int length; } Point2fVector; void FreePoint2fVector(Point2fVector *p); -void Point2fVectorSetValue(Point2fVector *p, int i, const Point2f* val); +void Point2fVectorSetValue(Point2fVector *p, int i, const Point2f *val); + +typedef struct Point3dVector { + Point3d *points; + int length; +} Point3dVector; + +void FreePoint3dVector(Point3dVector *p); typedef struct RectVector { - Rect* rects; - int length; + Rect *rects; + int length; } RectVector; void FreeRectVector(RectVector *p); typedef struct FloatVector { - float* values; - int length; + float *values; + int length; } FloatVector; void FreeFloatVector(FloatVector *p); typedef struct Bytes { - unsigned char* values; - int length; + unsigned char *values; + int length; } Bytes; void FreeBytes(Bytes *p); typedef struct KeypointVector { - Keypoint* points; - int length; + Keypoint *points; + int length; } KeypointVector; void FreeKeypointVector(KeypointVector *p); -void KeypointVectorSetValue(KeypointVector *p, int i, const Keypoint* val); +void KeypointVectorSetValue(KeypointVector *p, int i, const Keypoint *val); typedef struct ImageC { - unsigned char* data; - int width; - int height; - int channels; + unsigned char *data; + int width; + int height; + int channels; } Image; -void FreeImage(Image* p); +void FreeImage(Image *p); typedef struct ObjectInfoC { - Rect rect; - float score; - int label; - KeypointVector* pts; + Rect rect; + float score; + int label; + KeypointVector *pts; } ObjectInfo; void FreeObjectInfo(ObjectInfo *p); typedef struct ObjectInfoVector { - ObjectInfo* items; - int length; + ObjectInfo *items; + int length; } ObjectInfoVector; void FreeObjectInfoVector(ObjectInfoVector *p); diff --git a/src/common/common.hpp b/src/common/common.hpp index fa87d8a..cb473e3 100644 --- a/src/common/common.hpp +++ b/src/common/common.hpp @@ -76,6 +76,13 @@ struct Point2f { }; }; +struct Point3d { + float x; + float y; + float z; + Point3d(float _x = 0, float _y = 0, float _z = 0) : x(_x), y(_y), z(_z) {} +}; + // Wrapper for an individual cv::Rect struct Rect { int x; diff --git a/src/hand/detecter/nanodet/nanodet.cpp b/src/hand/detecter/nanodet/nanodet.cpp index 2c48cd7..57e6635 100644 --- a/src/hand/detecter/nanodet/nanodet.cpp +++ b/src/hand/detecter/nanodet/nanodet.cpp @@ -1,6 +1,6 @@ #include "nanodet.hpp" -#include #include +#include #ifdef OV_VULKAN #include "gpu.h" @@ -8,227 +8,219 @@ namespace ovhand { -static void generate_nanodet_proposals(const ncnn::Mat& cls_pred, const ncnn::Mat& dis_pred, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector& objects) -{ +static void generate_nanodet_proposals(const ncnn::Mat &cls_pred, + const ncnn::Mat &dis_pred, int stride, + const ncnn::Mat &in_pad, + float prob_threshold, + std::vector &objects) { - const int num_grid = cls_pred.h; + const int num_grid = cls_pred.h; - int num_grid_x; - int num_grid_y; - if (in_pad.w > in_pad.h) - { - num_grid_x = in_pad.w / stride; - num_grid_y = num_grid / num_grid_x; - } - else - { - num_grid_y = in_pad.h / stride; - num_grid_x = num_grid / num_grid_y; - } + int num_grid_x; + int num_grid_y; + if (in_pad.w > in_pad.h) { + num_grid_x = in_pad.w / stride; + num_grid_y = num_grid / num_grid_x; + } else { + num_grid_y = in_pad.h / stride; + num_grid_x = num_grid / num_grid_y; + } - const int num_class = cls_pred.w; - const int reg_max_1 = dis_pred.w / 4; - //__android_log_print(ANDROID_LOG_WARN, "ncnn","cls_pred h %d, w %d",cls_pred.h,cls_pred.w); - //__android_log_print(ANDROID_LOG_WARN, "ncnn","%d,%d,%d,%d",num_grid_x,num_grid_y,num_class,reg_max_1); - for (int i = 0; i < num_grid_y; i++) - { - for (int j = 0; j < num_grid_x; j++) - { - const int idx = i * num_grid_x + j; + const int num_class = cls_pred.w; + const int reg_max_1 = dis_pred.w / 4; - const float* scores = cls_pred.row(idx); + for (int i = 0; i < num_grid_y; i++) { + for (int j = 0; j < num_grid_x; j++) { + const int idx = i * num_grid_x + j; - // find label with max score - int label = -1; - float score = -FLT_MAX; - for (int k = 0; k < num_class; k++) - { - if (scores[k] > score) - { - label = k; - score = scores[k]; - } - } + const float *scores = cls_pred.row(idx); - if (score >= prob_threshold) - { - ncnn::Mat bbox_pred(reg_max_1, 4, (void*)dis_pred.row(idx)); - { - ncnn::Layer* softmax = ncnn::create_layer("Softmax"); - - ncnn::ParamDict pd; - pd.set(0, 1); // axis - pd.set(1, 1); - softmax->load_param(pd); - - ncnn::Option opt; - // opt.num_threads = 1; - opt.use_packing_layout = false; - - softmax->create_pipeline(opt); - - softmax->forward_inplace(bbox_pred, opt); - - softmax->destroy_pipeline(opt); - - delete softmax; - } - - float pred_ltrb[4]; - for (int k = 0; k < 4; k++) - { - float dis = 0.f; - const float* dis_after_sm = bbox_pred.row(k); - for (int l = 0; l < reg_max_1; l++) - { - dis += l * dis_after_sm[l]; - } - - pred_ltrb[k] = dis * stride; - } - - float pb_cx = (j + 0.5f) * stride; - float pb_cy = (i + 0.5f) * stride; - - float x0 = pb_cx - pred_ltrb[0]; - float y0 = pb_cy - pred_ltrb[1]; - float x1 = pb_cx + pred_ltrb[2]; - float y1 = pb_cy + pred_ltrb[3]; - - ov::ObjectInfo obj; - obj.rect.x = x0; - obj.rect.y = y0; - obj.rect.width = x1 - x0; - obj.rect.height = y1 - y0; - obj.label = label; - obj.score= score; - - objects.push_back(obj); - } + // find label with max score + int label = -1; + float score = -FLT_MAX; + for (int k = 0; k < num_class; k++) { + if (scores[k] > score) { + label = k; + score = scores[k]; } - } -} + } -int Nanodet::Detect(const unsigned char* rgbdata, - int img_width, int img_height, - std::vector& rois) { - if (!initialized_) { - return 10000; - } - if (rgbdata == 0){ - return 10001; - } - - int w = img_width; - int h = img_height; - float scale = 1.f; - if (w > h) { - scale = (float)target_size / w; - w = target_size; - h = h * scale; - } else { - scale = (float)target_size / h; - h = target_size; - w = w * scale; - } - - ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, w, h); - - // pad to target_size rectangle - float wpad = 320-w;//(w + 31) / 32 * 32 - w; - float hpad = 320-h;//(h + 31) / 32 * 32 - h; - ncnn::Mat in_pad; - ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f); - - in_pad.substract_mean_normalize(mean_vals, norm_vals); - - ncnn::Extractor ex = net_->create_extractor(); - ex.set_light_mode(light_mode_); - ex.set_num_threads(num_threads); - ex.input("input.1", in_pad); - - std::vector proposals; - // stride 8 - { - ncnn::Mat cls_pred; - ncnn::Mat dis_pred; - ex.extract("cls_pred_stride_8", cls_pred); - ex.extract("dis_pred_stride_8", dis_pred); - - std::vector objects8; - generate_nanodet_proposals(cls_pred, dis_pred, 8, in_pad, prob_threshold, objects8); - - proposals.insert(proposals.end(), objects8.begin(), objects8.end()); - } - - // stride 16 - { - ncnn::Mat cls_pred; - ncnn::Mat dis_pred; - ex.extract("cls_pred_stride_16", cls_pred); - ex.extract("dis_pred_stride_16", dis_pred); - - std::vector objects16; - generate_nanodet_proposals(cls_pred, dis_pred, 16, in_pad, prob_threshold, objects16); - - proposals.insert(proposals.end(), objects16.begin(), objects16.end()); - } - - // stride 32 - { - ncnn::Mat cls_pred; - ncnn::Mat dis_pred; - ex.extract("cls_pred_stride_32", cls_pred); - ex.extract("dis_pred_stride_32", dis_pred); - - std::vector objects32; - generate_nanodet_proposals(cls_pred, dis_pred, 32, in_pad, prob_threshold, objects32); - - proposals.insert(proposals.end(), objects32.begin(), objects32.end()); - } - - // sort all proposals by score from highest to lowest - qsort_descent_inplace(proposals); - - // apply nms with nms_threshold - std::vector picked; - nms_sorted_bboxes(proposals, picked, nms_threshold); - - int count = picked.size(); - rois.resize(count); - - for (int i = 0; i < count; i++) - { - ov::ObjectInfo roi = proposals[picked[i]]; - - // adjust offset to original unpadded - float x0 = (roi.rect.x - (wpad / 2)) / scale; - float y0 = (roi.rect.y - (hpad / 2)) / scale; - float x1 = (roi.rect.x + roi.rect.width - (wpad / 2)) / scale; - float y1 = (roi.rect.y + roi.rect.height - (hpad / 2)) / scale; - - // clip - x0 = std::max(std::min(x0, (float)(img_width - 1)), 0.f); - y0 = std::max(std::min(y0, (float)(img_height - 1)), 0.f); - x1 = std::max(std::min(x1, (float)(img_width - 1)), 0.f); - y1 = std::max(std::min(y1, (float)(img_height - 1)), 0.f); - - roi.rect.x = x0; - roi.rect.y = y0; - roi.rect.width = x1 - x0; - roi.rect.height = y1 - y0; - - rois[i] = roi; - } - // sort objects by area - struct - { - bool operator()(const ov::ObjectInfo& a, const ov::ObjectInfo& b) const + if (score >= prob_threshold) { + ncnn::Mat bbox_pred(reg_max_1, 4, (void *)dis_pred.row(idx)); { - return a.rect.area() > b.rect.area(); + ncnn::Layer *softmax = ncnn::create_layer("Softmax"); + + ncnn::ParamDict pd; + pd.set(0, 1); // axis + pd.set(1, 1); + softmax->load_param(pd); + + ncnn::Option opt; + opt.num_threads = 1; + opt.use_packing_layout = false; + + softmax->create_pipeline(opt); + + softmax->forward_inplace(bbox_pred, opt); + + softmax->destroy_pipeline(opt); + + delete softmax; } - } objects_area_greater; - std::sort(rois.begin(), rois.end(), objects_area_greater); - return 0; + + float pred_ltrb[4]; + for (int k = 0; k < 4; k++) { + float dis = 0.f; + const float *dis_after_sm = bbox_pred.row(k); + for (int l = 0; l < reg_max_1; l++) { + dis += l * dis_after_sm[l]; + } + pred_ltrb[k] = dis * stride; + } + + float pb_cx = (j + 0.5f) * stride; + float pb_cy = (i + 0.5f) * stride; + + float x0 = pb_cx - pred_ltrb[0]; + float y0 = pb_cy - pred_ltrb[1]; + float x1 = pb_cx + pred_ltrb[2]; + float y1 = pb_cy + pred_ltrb[3]; + + ov::ObjectInfo obj; + obj.rect.x = x0; + obj.rect.y = y0; + obj.rect.width = x1 - x0; + obj.rect.height = y1 - y0; + obj.label = label; + obj.score = score; + + objects.push_back(obj); + } + } + } } + +int Nanodet::Detect(const unsigned char *rgbdata, int img_width, int img_height, + std::vector &rois) { + if (!initialized_) { + return 10000; + } + if (rgbdata == 0) { + return 10001; + } + + int w = img_width; + int h = img_height; + float scale = 1.f; + if (w > h) { + scale = (float)target_size / w; + w = target_size; + h = h * scale; + } else { + scale = (float)target_size / h; + h = target_size; + w = w * scale; + } + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB, + img_width, img_height, w, h); + + // pad to target_size rectangle + float wpad = 320 - w; //(w + 31) / 32 * 32 - w; + float hpad = 320 - h; //(h + 31) / 32 * 32 - h; + ncnn::Mat in_pad; + ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, + wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f); + + in_pad.substract_mean_normalize(mean_vals, norm_vals); + + ncnn::Extractor ex = net_->create_extractor(); + ex.set_light_mode(light_mode_); + ex.set_num_threads(num_threads); + ex.input("input.1", in_pad); + + std::vector proposals; + // stride 8 + { + ncnn::Mat cls_pred; + ncnn::Mat dis_pred; + ex.extract("cls_pred_stride_8", cls_pred); + ex.extract("dis_pred_stride_8", dis_pred); + + std::vector objects8; + generate_nanodet_proposals(cls_pred, dis_pred, 8, in_pad, prob_threshold, + objects8); + + proposals.insert(proposals.end(), objects8.begin(), objects8.end()); + } + + // stride 16 + { + ncnn::Mat cls_pred; + ncnn::Mat dis_pred; + ex.extract("cls_pred_stride_16", cls_pred); + ex.extract("dis_pred_stride_16", dis_pred); + + std::vector objects16; + generate_nanodet_proposals(cls_pred, dis_pred, 16, in_pad, prob_threshold, + objects16); + + proposals.insert(proposals.end(), objects16.begin(), objects16.end()); + } + + // stride 32 + { + ncnn::Mat cls_pred; + ncnn::Mat dis_pred; + ex.extract("cls_pred_stride_32", cls_pred); + ex.extract("dis_pred_stride_32", dis_pred); + + std::vector objects32; + generate_nanodet_proposals(cls_pred, dis_pred, 32, in_pad, prob_threshold, + objects32); + + proposals.insert(proposals.end(), objects32.begin(), objects32.end()); + } + + // sort all proposals by score from highest to lowest + qsort_descent_inplace(proposals); + + // apply nms with nms_threshold + std::vector picked; + nms_sorted_bboxes(proposals, picked, nms_threshold); + + int count = picked.size(); + rois.resize(count); + + for (int i = 0; i < count; i++) { + ov::ObjectInfo roi = proposals[picked[i]]; + + // adjust offset to original unpadded + float x0 = (roi.rect.x - (wpad / 2)) / scale; + float y0 = (roi.rect.y - (hpad / 2)) / scale; + float x1 = (roi.rect.x + roi.rect.width - (wpad / 2)) / scale; + float y1 = (roi.rect.y + roi.rect.height - (hpad / 2)) / scale; + + // clip + x0 = std::max(std::min(x0, (float)(img_width - 1)), 0.f); + y0 = std::max(std::min(y0, (float)(img_height - 1)), 0.f); + x1 = std::max(std::min(x1, (float)(img_width - 1)), 0.f); + y1 = std::max(std::min(y1, (float)(img_height - 1)), 0.f); + + roi.rect.x = x0; + roi.rect.y = y0; + roi.rect.width = x1 - x0; + roi.rect.height = y1 - y0; + + rois[i] = roi; + } + // sort objects by area + struct { + bool operator()(const ov::ObjectInfo &a, const ov::ObjectInfo &b) const { + return a.rect.area() > b.rect.area(); + } + } objects_area_greater; + std::sort(rois.begin(), rois.end(), objects_area_greater); + return 0; } +} // namespace ovhand diff --git a/src/hand/pose3d.h b/src/hand/pose3d.h new file mode 100644 index 0000000..a102c3f --- /dev/null +++ b/src/hand/pose3d.h @@ -0,0 +1,37 @@ +#ifndef _HAND_POSE3D_C_H_ +#define _HAND_POSE3D_C_H_ + +#include "../common/common.h" + +#ifdef __cplusplus +extern "C" { +#endif +typedef struct PalmObject { + float score; + float rotation; + Point2fVector *rect; + Point2fVector *landmarks; + Point2fVector *skeleton; + Point3dVector *skeleton3d; +} PalmObject; + +typedef struct PalmObjectVector { + PalmObject *items; + int length; +} PalmObjectVector; + +void FreePalmObject(PalmObject *obj); +void FreePalmObjectVector(PalmObjectVector *vec); + +typedef void *IHandPose3DEstimator; +IHandPose3DEstimator new_mediapipe_hand(); +void destroy_mediapipe_hand(IHandPose3DEstimator d); +int mediapipe_hand_load_model(IHandPose3DEstimator d, const char *palm_path, + const char *hand_path); +int mediapipe_hand_detect(IHandPose3DEstimator d, const unsigned char *rgbdata, + int img_width, int img_height, PalmObjectVector *vec); +#ifdef __cplusplus +} +#endif + +#endif // !_HAND_POSE3D_C_H_ diff --git a/src/hand/pose3d/estimator.cpp b/src/hand/pose3d/estimator.cpp new file mode 100644 index 0000000..ceb7589 --- /dev/null +++ b/src/hand/pose3d/estimator.cpp @@ -0,0 +1,106 @@ +#include "../pose3d.h" +#include "mediapipe/mediapipe.hpp" +#include + +void FreePalmObject(PalmObject *obj) { + if (obj->rect != NULL) { + FreePoint2fVector(obj->rect); + obj->rect = NULL; + } + if (obj->skeleton != NULL) { + FreePoint2fVector(obj->skeleton); + obj->skeleton = NULL; + } + if (obj->skeleton3d != NULL) { + FreePoint3dVector(obj->skeleton3d); + obj->skeleton3d = NULL; + } + if (obj->landmarks != NULL) { + FreePoint2fVector(obj->landmarks); + obj->landmarks = NULL; + } +} + +void FreePalmObjectVector(PalmObjectVector *vec) { + if (vec->items != NULL) { + for (int i = 0; i < vec->length; i++) { + FreePalmObject(&vec->items[i]); + } + free(vec->items); + vec->items = NULL; + } +} + +IHandPose3DEstimator new_mediapipe_hand() { + return new ovhand3d::MediapipeHand(); +} + +void destroy_mediapipe_hand(IHandPose3DEstimator d) { + delete static_cast(d); +} + +int mediapipe_hand_load_model(IHandPose3DEstimator d, const char *palm_path, + const char *hand_path) { + return static_cast(d)->LoadModel(palm_path, + hand_path); +} + +int mediapipe_hand_detect(IHandPose3DEstimator d, const unsigned char *rgbdata, + int img_width, int img_height, + PalmObjectVector *objects) { + std::vector objs; + + int ret = static_cast(d)->Detect( + rgbdata, img_width, img_height, objs); + if (ret != 0) { + return ret; + } + const size_t total_objs = objs.size(); + objects->length = total_objs; + if (total_objs == 0) { + objects->items = NULL; + return 0; + } + objects->items = (PalmObject *)malloc(total_objs * sizeof(PalmObject)); + for (size_t i = 0; i < total_objs; ++i) { + objects->items[i].score = objs[i].score; + objects->items[i].rotation = objs[i].rotation; + objects->items[i].rect = (Point2fVector *)malloc(sizeof(Point2fVector)); + objects->items[i].rect->length = 4; + objects->items[i].rect->points = (Point2f *)malloc(4 * sizeof(Point2f)); + for (size_t j = 0; j < 4; ++j) { + objects->items[i].rect->points[j] = objs[i].hand_pos[j]; + } + objects->items[i].landmarks = + (Point2fVector *)malloc(sizeof(Point2fVector)); + objects->items[i].landmarks->length = 7; + objects->items[i].landmarks->points = + (Point2f *)malloc(4 * sizeof(Point2f)); + for (size_t j = 0; j < 7; ++j) { + objects->items[i].landmarks->points[j] = objs[i].landmarks[j]; + } + const size_t total_skeleton = objs[i].skeleton.size(); + if (total_skeleton == 0) { + objects->items[i].skeleton = NULL; + objects->items[i].skeleton3d = NULL; + continue; + } + objects->items[i].skeleton = (Point2fVector *)malloc(sizeof(Point2fVector)); + objects->items[i].skeleton->length = total_skeleton; + objects->items[i].skeleton->points = + (Point2f *)malloc(total_skeleton * sizeof(Point2f)); + objects->items[i].skeleton3d = + (Point3dVector *)malloc(sizeof(Point3dVector)); + objects->items[i].skeleton3d->length = total_skeleton; + objects->items[i].skeleton3d->points = + (Point3d *)malloc(total_skeleton * sizeof(Point3d)); + for (size_t j = 0; j < total_skeleton; ++j) { + objects->items[i].skeleton->points[j].x = objs[i].skeleton[j].x; + objects->items[i].skeleton->points[j].y = objs[i].skeleton[j].y; + objects->items[i].skeleton3d->points[j].x = objs[i].skeleton3d[j].x; + objects->items[i].skeleton3d->points[j].y = objs[i].skeleton3d[j].y; + objects->items[i].skeleton3d->points[j].z = objs[i].skeleton3d[j].z; + } + } + return 0; +} diff --git a/src/hand/pose3d/mediapipe/mediapipe.cpp b/src/hand/pose3d/mediapipe/mediapipe.cpp new file mode 100644 index 0000000..e2c5a04 --- /dev/null +++ b/src/hand/pose3d/mediapipe/mediapipe.cpp @@ -0,0 +1,534 @@ +#include "mediapipe.hpp" +#include "mat.h" +#include + +namespace ovhand3d { + +static float calculate_scale(float min_scale, float max_scale, int stride_index, + int num_strides) { + if (num_strides == 1) + return (min_scale + max_scale) * 0.5f; + else + return min_scale + + (max_scale - min_scale) * 1.0 * stride_index / (num_strides - 1.0f); +} + +static void generate_anchors(std::vector &anchors, + const AnchorsParams &anchor_params) { + int layer_id = 0; + for (int layer_id = 0; layer_id < anchor_params.strides.size();) { + std::vector anchor_height; + std::vector anchor_width; + std::vector aspect_ratios; + std::vector scales; + + int last_same_stride_layer = layer_id; + while (last_same_stride_layer < (int)anchor_params.strides.size() && + anchor_params.strides[last_same_stride_layer] == + anchor_params.strides[layer_id]) { + const float scale = + calculate_scale(anchor_params.min_scale, anchor_params.max_scale, + last_same_stride_layer, anchor_params.strides.size()); + { + for (int aspect_ratio_id = 0; + aspect_ratio_id < (int)anchor_params.aspect_ratios.size(); + aspect_ratio_id++) { + aspect_ratios.push_back(anchor_params.aspect_ratios[aspect_ratio_id]); + scales.push_back(scale); + } + + const float scale_next = + last_same_stride_layer == (int)anchor_params.strides.size() - 1 + ? 1.0f + : calculate_scale( + anchor_params.min_scale, anchor_params.max_scale, + last_same_stride_layer + 1, anchor_params.strides.size()); + scales.push_back(sqrt(scale * scale_next)); + aspect_ratios.push_back(1.0); + } + last_same_stride_layer++; + } + + for (int i = 0; i < (int)aspect_ratios.size(); ++i) { + const float ratio_sqrts = sqrt(aspect_ratios[i]); + anchor_height.push_back(scales[i] / ratio_sqrts); + anchor_width.push_back(scales[i] * ratio_sqrts); + } + + int feature_map_height = 0; + int feature_map_width = 0; + const int stride = anchor_params.strides[layer_id]; + feature_map_height = ceil(1.0f * anchor_params.input_size_height / stride); + feature_map_width = ceil(1.0f * anchor_params.input_size_width / stride); + + for (int y = 0; y < feature_map_height; ++y) { + for (int x = 0; x < feature_map_width; ++x) { + for (int anchor_id = 0; anchor_id < (int)anchor_height.size(); + ++anchor_id) { + const float x_center = + (x + anchor_params.anchor_offset_x) * 1.0f / feature_map_width; + const float y_center = + (y + anchor_params.anchor_offset_y) * 1.0f / feature_map_height; + + Anchor new_anchor; + new_anchor.x_center = x_center; + new_anchor.y_center = y_center; + + new_anchor.w = 1.0f; + new_anchor.h = 1.0f; + + anchors.push_back(new_anchor); + } + } + } + layer_id = last_same_stride_layer; + } +} + +static void create_ssd_anchors(int input_w, int input_h, + std::vector &anchors) { + AnchorsParams anchor_options; + anchor_options.num_layers = 4; + anchor_options.min_scale = 0.1484375; + anchor_options.max_scale = 0.75; + anchor_options.input_size_height = 192; + anchor_options.input_size_width = 192; + anchor_options.anchor_offset_x = 0.5f; + anchor_options.anchor_offset_y = 0.5f; + anchor_options.strides.push_back(8); + anchor_options.strides.push_back(16); + anchor_options.strides.push_back(16); + anchor_options.strides.push_back(16); + anchor_options.aspect_ratios.push_back(1.0); + generate_anchors(anchors, anchor_options); +} + +static int decode_bounds(std::list ®ion_list, + float score_thresh, int input_img_w, int input_img_h, + float *scores_ptr, float *bboxes_ptr, + std::vector &anchors) { + DetectRegion region; + int i = 0; + for (auto &anchor : anchors) { + float score = ov::sigmoid(scores_ptr[i]); + + if (score > score_thresh) { + float *p = bboxes_ptr + (i * 18); + + float cx = p[0] / input_img_w + anchor.x_center; + float cy = p[1] / input_img_h + anchor.y_center; + float w = p[2] / input_img_w; + float h = p[3] / input_img_h; + + ov::Point2f topleft, btmright; + topleft.x = cx - w * 0.5f; + topleft.y = cy - h * 0.5f; + btmright.x = cx + w * 0.5f; + btmright.y = cy + h * 0.5f; + + region.score = score; + region.topleft = topleft; + region.btmright = btmright; + + for (int j = 0; j < 7; j++) { + float lx = p[4 + (2 * j) + 0]; + float ly = p[4 + (2 * j) + 1]; + lx += anchor.x_center * input_img_w; + ly += anchor.y_center * input_img_h; + lx /= (float)input_img_w; + ly /= (float)input_img_h; + + region.landmarks[j].x = lx; + region.landmarks[j].y = ly; + } + + region_list.push_back(region); + } + i++; + } + return 0; +} + +static float calc_intersection_over_union(DetectRegion ®ion0, + DetectRegion ®ion1) { + float sx0 = region0.topleft.x; + float sy0 = region0.topleft.y; + float ex0 = region0.btmright.x; + float ey0 = region0.btmright.y; + float sx1 = region1.topleft.x; + float sy1 = region1.topleft.y; + float ex1 = region1.btmright.x; + float ey1 = region1.btmright.y; + + float xmin0 = std::min(sx0, ex0); + float ymin0 = std::min(sy0, ey0); + float xmax0 = std::max(sx0, ex0); + float ymax0 = std::max(sy0, ey0); + float xmin1 = std::min(sx1, ex1); + float ymin1 = std::min(sy1, ey1); + float xmax1 = std::max(sx1, ex1); + float ymax1 = std::max(sy1, ey1); + + float area0 = (ymax0 - ymin0) * (xmax0 - xmin0); + float area1 = (ymax1 - ymin1) * (xmax1 - xmin1); + if (area0 <= 0 || area1 <= 0) + return 0.0f; + + float intersect_xmin = std::max(xmin0, xmin1); + float intersect_ymin = std::max(ymin0, ymin1); + float intersect_xmax = std::min(xmax0, xmax1); + float intersect_ymax = std::min(ymax0, ymax1); + + float intersect_area = std::max(intersect_ymax - intersect_ymin, 0.0f) * + std::max(intersect_xmax - intersect_xmin, 0.0f); + + return intersect_area / (area0 + area1 - intersect_area); +} + +static int non_max_suppression(std::list ®ion_list, + std::list ®ion_nms_list, + float iou_thresh) { + region_list.sort([](DetectRegion &v1, DetectRegion &v2) { + return v1.score > v2.score ? true : false; + }); + + for (auto itr = region_list.begin(); itr != region_list.end(); itr++) { + DetectRegion region_candidate = *itr; + + int ignore_candidate = false; + for (auto itr_nms = region_nms_list.rbegin(); + itr_nms != region_nms_list.rend(); itr_nms++) { + DetectRegion region_nms = *itr_nms; + + float iou = calc_intersection_over_union(region_candidate, region_nms); + if (iou >= iou_thresh) { + ignore_candidate = true; + break; + } + } + + if (!ignore_candidate) { + region_nms_list.push_back(region_candidate); + if (region_nms_list.size() >= 5) + break; + } + } + return 0; +} + +static float normalize_radians(float angle) { + return angle - 2 * M_PI * floor((angle - (-M_PI)) / (2 * M_PI)); +} + +static void compute_rotation(DetectRegion ®ion) { + float x0 = region.landmarks[0].x; + float y0 = region.landmarks[0].y; + float x1 = region.landmarks[2].x; + float y1 = region.landmarks[2].y; + + float target_angle = M_PI * 0.5f; + float rotation = target_angle - atan2(-(y1 - y0), x1 - x0); + + region.rotation = normalize_radians(rotation); +} + +void rot_vec(ov::Point2f &vec, float rotation) { + float sx = vec.x; + float sy = vec.y; + vec.x = sx * cos(rotation) - sy * sin(rotation); + vec.y = sx * sin(rotation) + sy * cos(rotation); +} + +void compute_detect_to_roi(DetectRegion ®ion, const int &target_size, + PalmObject &palm) { + float width = region.btmright.x - region.topleft.x; + float height = region.btmright.y - region.topleft.y; + float palm_cx = region.topleft.x + width * 0.5f; + float palm_cy = region.topleft.y + height * 0.5f; + + float hand_cx; + float hand_cy; + float rotation = region.rotation; + float shift_x = 0.0f; + float shift_y = -0.5f; + + if (rotation == 0.0f) { + hand_cx = palm_cx + (width * shift_x); + hand_cy = palm_cy + (height * shift_y); + } else { + float dx = + (width * shift_x) * cos(rotation) - (height * shift_y) * sin(rotation); + float dy = + (width * shift_x) * sin(rotation) + (height * shift_y) * cos(rotation); + hand_cx = palm_cx + dx; + hand_cy = palm_cy + dy; + } + + float long_side = std::max(width, height); + width = long_side; + height = long_side; + float hand_w = width * 2.6f; + float hand_h = height * 2.6f; + + palm.hand_cx = hand_cx; + palm.hand_cy = hand_cy; + palm.hand_w = hand_w; + palm.hand_h = hand_h; + + float dx = hand_w * 0.5f; + float dy = hand_h * 0.5f; + + palm.hand_pos[0].x = -dx; + palm.hand_pos[0].y = -dy; + palm.hand_pos[1].x = +dx; + palm.hand_pos[1].y = -dy; + palm.hand_pos[2].x = +dx; + palm.hand_pos[2].y = +dy; + palm.hand_pos[3].x = -dx; + palm.hand_pos[3].y = +dy; + + for (int i = 0; i < 4; i++) { + rot_vec(palm.hand_pos[i], rotation); + palm.hand_pos[i].x += hand_cx; + palm.hand_pos[i].y += hand_cy; + } + + for (int i = 0; i < 7; i++) { + palm.landmarks[i] = region.landmarks[i]; + } + + palm.score = region.score; +} + +static void pack_detect_result(std::vector &detect_results, + std::list ®ion_list, + const int &target_size, + std::vector &palmlist) { + for (auto ®ion : region_list) { + compute_rotation(region); + PalmObject palm; + compute_detect_to_roi(region, target_size, palm); + palmlist.push_back(palm); + detect_results.push_back(region); + } +} + +MediapipeHand::MediapipeHand() : ov::EstimatorBase() { + palm_blob_allocator_.set_size_compare_ratio(0.f); + palm_workspace_allocator_.set_size_compare_ratio(0.f); + hand_blob_allocator_.set_size_compare_ratio(0.f); + hand_workspace_allocator_.set_size_compare_ratio(0.f); + palm_net_ = new ncnn::Net(); + hand_net_ = new ncnn::Net(); + initialized_ = false; + if (num_threads > 0) { + palm_net_->opt.num_threads = num_threads; + hand_net_->opt.num_threads = num_threads; + } + palm_net_->opt.blob_allocator = &palm_blob_allocator_; + palm_net_->opt.workspace_allocator = &palm_workspace_allocator_; + palm_net_->opt.lightmode = light_mode_; + hand_net_->opt.blob_allocator = &hand_blob_allocator_; + hand_net_->opt.workspace_allocator = &hand_workspace_allocator_; + hand_net_->opt.lightmode = light_mode_; +#ifdef OV_VULKAN + palm_net_->opt.use_vulkan_compute = true; + hand_net_->opt.use_vulkan_compute = true; +#endif // OV_VULKAN +} + +MediapipeHand::~MediapipeHand() { + if (palm_net_) { + palm_net_->clear(); + } + if (hand_net_) { + hand_net_->clear(); + } + palm_workspace_allocator_.clear(); + palm_blob_allocator_.clear(); + hand_workspace_allocator_.clear(); + hand_blob_allocator_.clear(); +} + +void MediapipeHand::set_num_threads(int n) { + EstimatorBase::set_num_threads(n); + if (palm_net_) { + palm_net_->opt.num_threads = n; + } + if (hand_net_) { + hand_net_->opt.num_threads = n; + } +} + +void MediapipeHand::set_light_mode(bool mode) { + if (palm_net_) { + palm_net_->opt.lightmode = mode; + } + if (hand_net_) { + hand_net_->opt.lightmode = mode; + } + light_mode_ = mode; +} + +int MediapipeHand::LoadModel(const char *palm_path, const char *hand_path) { + std::string palm_param_file = std::string(palm_path) + "/param"; + std::string palm_bin_file = std::string(palm_path) + "/bin"; + std::string hand_param_file = std::string(hand_path) + "/param"; + std::string hand_bin_file = std::string(hand_path) + "/bin"; + if (palm_net_->load_param(palm_param_file.c_str()) == -1 || + palm_net_->load_model(palm_bin_file.c_str()) == -1) { + return 10000; + } + if (hand_net_->load_param(hand_param_file.c_str()) == -1 || + hand_net_->load_model(hand_bin_file.c_str()) == -1) { + return 10000; + } + + initialized_ = true; + anchors.clear(); + create_ssd_anchors(target_size, target_size, anchors); + + return 0; +} + +int MediapipeHand::Detect(const unsigned char *rgbdata, int img_width, + int img_height, std::vector &objects) { + if (!initialized_) { + return 10000; + } + if (rgbdata == 0) { + return 10001; + } + int w = img_width; + int h = img_height; + float scale = 1.f; + if (w > h) { + scale = (float)target_size / w; + w = target_size; + h = h * scale; + } else { + scale = (float)target_size / h; + h = target_size; + w = w * scale; + } + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB, + img_width, img_height, w, h); + + int wpad = target_size - w; + int hpad = target_size - h; + ncnn::Mat in_pad; + ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, + wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f); + const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f}; + in_pad.substract_mean_normalize(0, norm_vals); + + ncnn::Extractor ex = palm_net_->create_extractor(); + ncnn::Mat cls, reg; + ex.input("input", in_pad); + ex.extract("cls", cls); + ex.extract("reg", reg); + + float *scores = (float *)cls.data; + float *bboxes = (float *)reg.data; + + std::list region_list, region_nms_list; + std::vector detect_results; + + decode_bounds(region_list, prob_threshold, target_size, target_size, scores, + bboxes, anchors); + non_max_suppression(region_list, region_nms_list, nms_threshold); + objects.clear(); + pack_detect_result(detect_results, region_nms_list, target_size, objects); + + for (int i = 0; i < objects.size(); i++) { + objects[i].hand_pos[0].x = + (objects[i].hand_pos[0].x * target_size - ((float)wpad / 2)) / scale; + objects[i].hand_pos[0].y = + (objects[i].hand_pos[0].y * target_size - ((float)hpad / 2)) / scale; + objects[i].hand_pos[1].x = + (objects[i].hand_pos[1].x * target_size - ((float)wpad / 2)) / scale; + objects[i].hand_pos[1].y = + (objects[i].hand_pos[1].y * target_size - ((float)hpad / 2)) / scale; + objects[i].hand_pos[2].x = + (objects[i].hand_pos[2].x * target_size - ((float)wpad / 2)) / scale; + objects[i].hand_pos[2].y = + (objects[i].hand_pos[2].y * target_size - ((float)hpad / 2)) / scale; + objects[i].hand_pos[3].x = + (objects[i].hand_pos[3].x * target_size - ((float)wpad / 2)) / scale; + objects[i].hand_pos[3].y = + (objects[i].hand_pos[3].y * target_size - ((float)hpad / 2)) / scale; + + for (int j = 0; j < 7; j++) { + objects[i].landmarks[j].x = + (objects[i].landmarks[j].x * target_size - ((float)wpad / 2)) / scale; + objects[i].landmarks[j].y = + (objects[i].landmarks[j].y * target_size - ((float)hpad / 2)) / scale; + } + + const float srcPts[8] = { + objects[i].hand_pos[0].x, objects[i].hand_pos[0].y, + objects[i].hand_pos[1].x, objects[i].hand_pos[1].y, + objects[i].hand_pos[2].x, objects[i].hand_pos[2].y, + objects[i].hand_pos[3].x, objects[i].hand_pos[3].y, + }; + + const float dstPts[8] = { + 0, 0, 224, 0, 224, 224, 0, 224, + }; + + float tm[6]; + unsigned char *trans_mat = + (unsigned char *)malloc(224 * 224 * 3 * sizeof(unsigned char)); + ncnn::get_affine_transform(dstPts, srcPts, 4, tm); + + ncnn::warpaffine_bilinear_c3(rgbdata, img_width, img_height, trans_mat, 224, + 224, tm); + + ncnn::Mat trans_image = + ncnn::Mat::from_pixels(trans_mat, ncnn::Mat::PIXEL_RGB, 224, 224); + + float score = GetLandmarks(trans_image, tm, objects[i].skeleton, + objects[i].skeleton3d); + + free(trans_mat); + } + return 0; +} + +float MediapipeHand::GetLandmarks(ncnn::Mat in, float tm[6], + std::vector &skeleton, + std::vector &skeleton3d) { + + const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f}; + in.substract_mean_normalize(NULL, norm_vals); + ncnn::Mat points, score; + { + ncnn::Extractor ex = hand_net_->create_extractor(); + ex.input("input", in); + ex.extract("points", points); + ex.extract("score", score); + } + + float *points_data = (float *)points.data; + float *score_data = (float *)score.data; + for (int i = 0; i < 21; i++) { + ov::Point3d pt3d; + pt3d.x = points_data[i * 3]; + pt3d.y = points_data[i * 3 + 1]; + pt3d.z = points_data[i * 3 + 2]; + + ov::Point2f pt; + pt.x = pt3d.x * tm[0] + pt3d.y * tm[1] + tm[2]; + pt.y = pt3d.x * tm[3] + pt3d.y * tm[4] + tm[5]; + + skeleton.push_back(pt); + + pt3d.x /= 224.f; + pt3d.y /= 224.f; + skeleton3d.push_back(pt3d); + } + return score_data[0]; +} + +} // namespace ovhand3d diff --git a/src/hand/pose3d/mediapipe/mediapipe.hpp b/src/hand/pose3d/mediapipe/mediapipe.hpp new file mode 100644 index 0000000..31efda4 --- /dev/null +++ b/src/hand/pose3d/mediapipe/mediapipe.hpp @@ -0,0 +1,87 @@ +#ifndef _HAND_POSE3D_MEDIAPIPE_H_ +#define _HAND_POSE3D_MEDIAPIPE_H_ + +#include "../../../common/common.hpp" +#include + +namespace ovhand3d { + +struct PalmObject { + float score; + ov::Point2f landmarks[7]; + float rotation; + + float hand_cx; + float hand_cy; + float hand_w; + float hand_h; + ov::Point2f hand_pos[4]; + + std::vector skeleton; + std::vector skeleton3d; +}; + +struct DetectRegion { + float score; + ov::Point2f topleft; + ov::Point2f btmright; + ov::Point2f landmarks[7]; + + float rotation; + ov::Point2f roi_center; + ov::Point2f roi_size; + ov::Point2f roi_coord[4]; +}; + +struct Anchor { + float x_center, y_center, w, h; +}; + +struct AnchorsParams { + int input_size_width; + int input_size_height; + + float min_scale; + float max_scale; + + float anchor_offset_x; + float anchor_offset_y; + + int num_layers; + std::vector feature_map_width; + std::vector feature_map_height; + std::vector strides; + std::vector aspect_ratios; +}; + +class MediapipeHand : public ov::EstimatorBase { +public: + MediapipeHand(); + ~MediapipeHand(); + int LoadModel(const char *palm_model, const char *hand_model); + int Detect(const unsigned char *rgbdata, int img_width, int img_heidht, + std::vector &objects); + float GetLandmarks(ncnn::Mat in, float tm[6], + std::vector &skeleton, + std::vector &skeleton3d); + void set_light_mode(bool mode); + void set_num_threads(int n); + +private: + ncnn::Net *palm_net_ = NULL; + ncnn::Net *hand_net_ = NULL; + ncnn::PoolAllocator palm_workspace_allocator_; + ncnn::UnlockedPoolAllocator palm_blob_allocator_; + ncnn::PoolAllocator hand_workspace_allocator_; + ncnn::UnlockedPoolAllocator hand_blob_allocator_; + bool initialized_ = false; + bool light_mode_ = true; + std::vector anchors; + float prob_threshold = 0.55f; + float nms_threshold = 0.3f; + const int target_size = 192; + const float mean_vals[3] = {0.f, 0.f, 0.f}; + const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f}; +}; +} // namespace ovhand3d +#endif // !_HAND_POSE3D_MEDIAPIPE_H_ diff --git a/src/pose/estimator/pptinypose/pptinypose.bak b/src/pose/estimator/pptinypose/pptinypose.bak new file mode 100644 index 0000000..c41ae1e --- /dev/null +++ b/src/pose/estimator/pptinypose/pptinypose.bak @@ -0,0 +1,161 @@ +#include "pptinypose.hpp" +#include + +#ifdef OV_VULKAN +#include "gpu.h" +#endif // OV_VULKAN + +namespace ovpose { +static int argmax(const ncnn::Mat &bottom_blob, ncnn::Mat &top_blob, + std::vector &prob) { + int size = bottom_blob.total(); + const float *ptr = bottom_blob; + std::vector> vec; + vec.resize(size); + for (int i = 0; i < size; i++) { + vec[i] = std::make_pair(ptr[i], i); + } + top_blob.create(bottom_blob.c, 1, 1, 4u); + float *outptr = top_blob; + + for (size_t i = 0; i < bottom_blob.c; i++) { + int size0 = bottom_blob.channel(i).total(); + std::partial_sort(vec.begin() + size0 * i, vec.begin() + size0 * (i + 1), + vec.begin() + size0 * (i + 1), + std::greater>()); + outptr[i] = vec[size0 * i].second - size0 * i; + prob.push_back(vec[size0 * i].first); + } + + return 0; +} + +static void dark_parse(const ncnn::Mat &heatmap, std::vector &dim, + std::vector &coords, int px, int py, int ch) { + /*DARK postpocessing, Zhang et al. Distribution-Aware Coordinate + Representation for Human Pose Estimation (CVPR 2020). + 1) offset = - hassian.inv() * derivative + 2) dx = (heatmap[x+1] - heatmap[x-1])/2. + 3) dxx = (dx[x+1] - dx[x-1])/2. + 4) derivative = Mat([dx, dy]) + 5) hassian = Mat([[dxx, dxy], [dxy, dyy]]) + */ + + float *heatmap_data = (float *)heatmap.channel(ch).data; + std::vector heatmap_ch; + heatmap_ch.insert(heatmap_ch.begin(), heatmap_data, + heatmap_data + heatmap.channel(ch).total()); + cv::Mat heatmap_mat = cv::Mat(heatmap_ch).reshape(0, dim[2]); + heatmap_mat.convertTo(heatmap_mat, CV_32FC1); + cv::GaussianBlur(heatmap_mat, heatmap_mat, cv::Size(3, 3), 0, 0); + heatmap_mat = heatmap_mat.reshape(1, 1); + heatmap_ch = std::vector(heatmap_mat.reshape(1, 1)); + + ncnn::Mat heatmap_mat = heatmap.channel(ch).reshape(dim[2]); + heatmap_mat = heatmap_mat.reshape(1); + heatmap_ch = (float *)heatmap_mat.data; + + float epsilon = 1e-10; + // sample heatmap to get values in around target location + float xy = log(fmax(heatmap_ch[py * dim[3] + px], epsilon)); + float xr = log(fmax(heatmap_ch[py * dim[3] + px + 1], epsilon)); + float xl = log(fmax(heatmap_ch[py * dim[3] + px - 1], epsilon)); + + float xr2 = log(fmax(heatmap_ch[py * dim[3] + px + 2], epsilon)); + float xl2 = log(fmax(heatmap_ch[py * dim[3] + px - 2], epsilon)); + float yu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px], epsilon)); + float yd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px], epsilon)); + float yu2 = log(fmax(heatmap_ch[(py + 2) * dim[3] + px], epsilon)); + float yd2 = log(fmax(heatmap_ch[(py - 2) * dim[3] + px], epsilon)); + float xryu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px + 1], epsilon)); + float xryd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px + 1], epsilon)); + float xlyu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px - 1], epsilon)); + float xlyd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px - 1], epsilon)); + + // compute dx/dy and dxx/dyy with sampled values + float dx = 0.5 * (xr - xl); + float dy = 0.5 * (yu - yd); + float dxx = 0.25 * (xr2 - 2 * xy + xl2); + float dxy = 0.25 * (xryu - xryd - xlyu + xlyd); + float dyy = 0.25 * (yu2 - 2 * xy + yd2); + + // finally get offset by derivative and hassian, which combined by dx/dy and + // dxx/dyy + if (dxx * dyy - dxy * dxy != 0) { + float M[2][2] = {dxx, dxy, dxy, dyy}; + float D[2] = {dx, dy}; + cv::Mat hassian(2, 2, CV_32F, M); + cv::Mat derivative(2, 1, CV_32F, D); + cv::Mat offset = -hassian.inv() * derivative; + coords[ch * 2] += offset.at(0, 0); + coords[ch * 2 + 1] += offset.at(1, 0); + } +} + +static std::vector get_final_preds(const ncnn::Mat &heatmap, + const ncnn::Mat &argmax_out) { + std::vector coords((size_t)heatmap.c * 2); + for (int i = 0; i < heatmap.c; i++) { + int idx = argmax_out[i]; + coords[i * 2] = idx % heatmap.w; + coords[i * 2 + 1] = (float)idx / heatmap.w; + + int px = int(coords[i * 2] + 0.5); + int py = int(coords[i * 2 + 1] + 0.5); + + std::vector dim({1, heatmap.c, heatmap.h, heatmap.w}); + dark_parse(heatmap, dim, coords, px, py, i); + } + + return coords; +} +PPTinyPoseEstimator::PPTinyPoseEstimator(int target_size) : Estimator() { + if (target_size == 128) { + target_width_ = 96; + target_height_ = 128; + } else { + target_width_ = 196; + target_height_ = 256; + } +} + +int PPTinyPoseEstimator::ExtractKeypoints( + const unsigned char *rgbdata, int img_width, int img_height, + const ov::Rect &rect, std::vector *keypoints) { + if (!initialized_) { + return 10000; + } + if (rgbdata == 0) { + return 10001; + } + keypoints->clear(); + + ncnn::Mat in = ncnn::Mat::from_pixels_roi_resize( + rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, rect.x, rect.y, + rect.width, rect.height, target_width_, target_height_); + in.substract_mean_normalize(meanVals, normVals); + + ncnn::Extractor ex = net_->create_extractor(); + ex.set_light_mode(light_mode_); + ex.set_num_threads(num_threads); + ex.input("image", in); + ncnn::Mat out; + ex.extract("save_infer_model/scale_0.tmp_1", out); + + ncnn::Mat argmax_out; + std::vector probs; + argmax(out, argmax_out, probs); + std::vector coords = get_final_preds(out, argmax_out); + + for (int i = 0; i < coords.size() / 2; i++) { + ov::KeyPoint keypoint; + keypoint.p = ov::Point(coords[i * 2] * rect.width / (float)out.w + rect.x, + coords[i * 2 + 1] * rect.h / (float)out.h + rect.y); + keypoint.score = probs[i]; + keypoints->push_back(keypoint); + } + + return 0; +} + +} // namespace ovpose diff --git a/src/pose/estimator/pptinypose/pptinypose.hpp b/src/pose/estimator/pptinypose/pptinypose.hpp new file mode 100644 index 0000000..4bae1ba --- /dev/null +++ b/src/pose/estimator/pptinypose/pptinypose.hpp @@ -0,0 +1,25 @@ +#ifndef _POSE_PPTINYPOSE_ESTIMATOR_H_ +#define _POSE_PPTINYPOSE_ESTIMATOR_H_ + +#include "../estimator.hpp" +#include "net.h" +#include + +namespace ovpose { +class PPTinyPoseEstimator : public Estimator { +public: + PPTinyPoseEstimator(int target_size); + int ExtractKeypoints(const unsigned char *rgbdata, int img_width, + int img_height, const ov::Rect &rect, + std::vector *keypoints); + +private: + int target_width_ = 96; + int target_height_ = 128; + const float meanVals[3] = {123.675f, 116.28f, 103.53f}; + const float normVals[3] = {0.01712475f, 0.0175f, 0.01742919f}; +}; + +} // namespace ovpose + +#endif // !_POSE_PPTINYPOSE_ESTIMATOR_H_ From 9c1a2dd8f4235fe74ca814ed0c7bf201f2cc82c6 Mon Sep 17 00:00:00 2001 From: Syd Xu Date: Mon, 7 Feb 2022 14:19:13 +0800 Subject: [PATCH 8/9] feat: hand pose3d mediapipe --- go/common/palmobject.go | 5 +++-- go/examples/hand/main.go | 28 +++++++++++++++++++++---- go/hand/drawer/drawer.go | 22 +++++++++++++++---- src/hand/pose3d/mediapipe/mediapipe.cpp | 2 ++ 4 files changed, 47 insertions(+), 10 deletions(-) diff --git a/go/common/palmobject.go b/go/common/palmobject.go index c6c1e07..9be9a96 100644 --- a/go/common/palmobject.go +++ b/go/common/palmobject.go @@ -13,9 +13,10 @@ import ( // PalmObject type PalmObject struct { + Name string Score float64 Rotation float64 - Rect []Point + RectPoints []Point Landmarks []Point Skeleton []Point Skeleton3d []Point3d @@ -37,7 +38,7 @@ func GoPalmObject(cObj *C.PalmObject, w float64, h float64) PalmObject { return PalmObject{ Score: float64(cObj.score), Rotation: float64(cObj.rotation), - Rect: GoPoint2fVector(cObj.rect, w, h), + RectPoints: GoPoint2fVector(cObj.rect, w, h), Landmarks: GoPoint2fVector(cObj.landmarks, w, h), Skeleton: GoPoint2fVector(cObj.skeleton, w, h), Skeleton3d: GoPoint3dVector(cObj.skeleton3d), diff --git a/go/examples/hand/main.go b/go/examples/hand/main.go index 836b9df..1924511 100644 --- a/go/examples/hand/main.go +++ b/go/examples/hand/main.go @@ -16,6 +16,7 @@ import ( handdrawer "github.com/bububa/openvision/go/hand/drawer" "github.com/bububa/openvision/go/hand/pose" "github.com/bububa/openvision/go/hand/pose3d" + "github.com/llgcode/draw2d" ) func main() { @@ -23,6 +24,7 @@ func main() { dataPath := cleanPath(wd, "~/go/src/github.com/bububa/openvision/data") imgPath := filepath.Join(dataPath, "./images") modelPath := filepath.Join(dataPath, "./models") + fontPath := filepath.Join(dataPath, "./font") common.CreateGPUInstance() defer common.DestroyGPUInstance() cpuCores := common.GetBigCPUCount() @@ -40,7 +42,7 @@ func main() { // detect(d, estimator, imgPath, "hand2.jpg", idx) // } d3d := mediapipe(modelPath) - detect3d(d3d, imgPath, "hand1.jpg") + detect3d(d3d, imgPath, fontPath, "hand1.jpg") } func yolox(modelPath string) detecter.Detecter { @@ -119,7 +121,7 @@ func detect(d detecter.Detecter, e pose.Estimator, imgPath string, filename stri } } -func detect3d(d *pose3d.Mediapipe, imgPath string, filename string) { +func detect3d(d *pose3d.Mediapipe, imgPath string, fontPath string, filename string) { inPath := filepath.Join(imgPath, filename) imgSrc, err := loadImage(inPath) if err != nil { @@ -130,8 +132,9 @@ func detect3d(d *pose3d.Mediapipe, imgPath string, filename string) { if err != nil { log.Fatalln(err) } - log.Printf("%+v\n", rois) - drawer := handdrawer.New() + // log.Printf("%+v\n", rois) + fnt := load_font(fontPath) + drawer := handdrawer.New(handdrawer.WithFont(fnt)) outPath := filepath.Join(imgPath, "./results", fmt.Sprintf("pose3d-hand-%s", filename)) out := drawer.DrawPalm(img, rois) @@ -187,3 +190,20 @@ func cleanPath(wd string, path string) string { } return filepath.Join(wd, path) } + +func load_font(fontPath string) *common.Font { + fontCache := common.NewFontCache(fontPath) + fnt := &common.Font{ + Size: 9, + Data: &draw2d.FontData{ + Name: "NotoSansCJKsc", + //Name: "Roboto", + Family: draw2d.FontFamilySans, + Style: draw2d.FontStyleNormal, + }, + } + if err := fnt.Load(fontCache); err != nil { + log.Fatalln(err) + } + return fnt +} diff --git a/go/hand/drawer/drawer.go b/go/hand/drawer/drawer.go index 2045ac3..02bf8b7 100644 --- a/go/hand/drawer/drawer.go +++ b/go/hand/drawer/drawer.go @@ -3,7 +3,9 @@ package drawer import ( "image" "image/color" + "math" + "github.com/llgcode/draw2d" "github.com/llgcode/draw2d/draw2dimg" "github.com/llgcode/draw2d/draw2dkit" @@ -124,12 +126,12 @@ func (d *Drawer) DrawPalm(img image.Image, rois []common.PalmObject) image.Image gc.SetLineWidth(d.BorderStrokeWidth) gc.SetStrokeColor(common.ColorFromHex(d.BorderColor)) gc.BeginPath() - for idx, pt := range roi.Rect { + for idx, pt := range roi.RectPoints { gc.MoveTo(pt.X*imgW, pt.Y*imgH) - if idx == len(roi.Rect)-1 { - gc.LineTo(roi.Rect[0].X*imgW, roi.Rect[0].Y*imgH) + if idx == 3 { + gc.LineTo(roi.RectPoints[0].X*imgW, roi.RectPoints[0].Y*imgH) } else { - gc.LineTo(roi.Rect[idx+1].X*imgW, roi.Rect[idx+1].Y*imgH) + gc.LineTo(roi.RectPoints[idx+1].X*imgW, roi.RectPoints[idx+1].Y*imgH) } } gc.Close() @@ -169,6 +171,18 @@ func (d *Drawer) DrawPalm(img image.Image, rois []common.PalmObject) image.Image for _, pt := range roi.Landmarks { common.DrawCircle(gc, common.Pt(pt.X*imgW, pt.Y*imgH), d.KeypointRadius, d.KeypointColor, "", d.KeypointStrokeWidth) } + // draw name + if roi.Name != "" { + deltaX := (roi.RectPoints[2].X - roi.RectPoints[3].X) * imgW + deltaY := (roi.RectPoints[2].Y - roi.RectPoints[3].Y) * imgH + width := math.Sqrt(math.Abs(deltaX*deltaX) + math.Abs(deltaY*deltaY)) + metrix := draw2d.NewRotationMatrix(roi.Rotation) + ptX, ptY := metrix.InverseTransformPoint(roi.RectPoints[3].X*imgW, roi.RectPoints[3].Y*imgH) + gc.Save() + gc.Rotate(roi.Rotation) + common.DrawLabelInWidth(gc, d.Font, roi.Name, common.Pt(ptX, ptY), d.LabelColor, d.BorderColor, width) + gc.Restore() + } } return out } diff --git a/src/hand/pose3d/mediapipe/mediapipe.cpp b/src/hand/pose3d/mediapipe/mediapipe.cpp index e2c5a04..603f51a 100644 --- a/src/hand/pose3d/mediapipe/mediapipe.cpp +++ b/src/hand/pose3d/mediapipe/mediapipe.cpp @@ -297,6 +297,8 @@ void compute_detect_to_roi(DetectRegion ®ion, const int &target_size, palm.landmarks[i] = region.landmarks[i]; } + palm.rotation = rotation; + palm.score = region.score; } From a77aaa279801ab79baf543f64b826145f54e4c6a Mon Sep 17 00:00:00 2001 From: Syd Xu Date: Mon, 7 Feb 2022 17:29:30 +0800 Subject: [PATCH 9/9] fix(hand): pose3d/mediapipe palmobject.landmarks wrong malloc --- go/examples/hand/main.go | 1 + src/classifier/svm/svm_light/svm_learn.c | 6373 +++++++++++----------- src/classifier/svm/svm_light/svm_learn.h | 3 + src/hand/pose3d/estimator.cpp | 17 +- src/hand/pose3d/mediapipe/mediapipe.cpp | 4 +- 5 files changed, 3210 insertions(+), 3188 deletions(-) diff --git a/go/examples/hand/main.go b/go/examples/hand/main.go index 1924511..d6daac6 100644 --- a/go/examples/hand/main.go +++ b/go/examples/hand/main.go @@ -43,6 +43,7 @@ func main() { // } d3d := mediapipe(modelPath) detect3d(d3d, imgPath, fontPath, "hand1.jpg") + detect3d(d3d, imgPath, fontPath, "hand2.jpg") } func yolox(modelPath string) detecter.Detecter { diff --git a/src/classifier/svm/svm_light/svm_learn.c b/src/classifier/svm/svm_light/svm_learn.c index 5bf8756..0dff71a 100644 --- a/src/classifier/svm/svm_light/svm_learn.c +++ b/src/classifier/svm/svm_light/svm_learn.c @@ -14,15 +14,14 @@ /* author. The author is not responsible for implications from the */ /* use of this software. */ /* */ -/***********************************************************************/ +/***********************************************************************/ + +#include "svm_learn.h" +#include "svm_common.h" - -# include "svm_common.h" -# include "svm_learn.h" - -#define MAX(x,y) ((x) < (y) ? (y) : (x)) -#define MIN(x,y) ((x) > (y) ? (y) : (x)) -#define SIGN(x) ((x) > (0) ? (1) : (((x) < (0) ? (-1) : (0)))) +#define MAX(x, y) ((x) < (y) ? (y) : (x)) +#define MIN(x, y) ((x) > (y) ? (y) : (x)) +#define SIGN(x) ((x) > (0) ? (1) : (((x) < (0) ? (-1) : (0)))) /* interface to QP-solver */ double *optimize_qp(QP *, double *, long, double *, LEARN_PARM *); @@ -33,439 +32,463 @@ double *optimize_qp(QP *, double *, long, double *, LEARN_PARM *); docs/label. The resulting model is returned in the structure model. */ -void svm_learn_classification(DOC **docs, double *class, long int - totdoc, long int totwords, - LEARN_PARM *learn_parm, - KERNEL_PARM *kernel_parm, - KERNEL_CACHE *kernel_cache, - MODEL *model, - double *alpha) - /* docs: Training vectors (x-part) */ - /* class: Training labels (y-part, zero if test example for - transduction) */ - /* totdoc: Number of examples in docs/label */ - /* totwords: Number of features (i.e. highest feature index) */ - /* learn_parm: Learning paramenters */ - /* kernel_parm: Kernel paramenters */ - /* kernel_cache:Initialized Cache of size totdoc, if using a kernel. - NULL if linear.*/ - /* model: Returns learning result (assumed empty before called) */ - /* alpha: Start values for the alpha variables or NULL - pointer. The new alpha values are returned after - optimization if not NULL. Array must be of size totdoc. */ +void svm_learn_classification(DOC **docs, double *class, long int totdoc, + long int totwords, LEARN_PARM *learn_parm, + KERNEL_PARM *kernel_parm, + KERNEL_CACHE *kernel_cache, MODEL *model, + double *alpha) +/* docs: Training vectors (x-part) */ +/* class: Training labels (y-part, zero if test example for + transduction) */ +/* totdoc: Number of examples in docs/label */ +/* totwords: Number of features (i.e. highest feature index) */ +/* learn_parm: Learning paramenters */ +/* kernel_parm: Kernel paramenters */ +/* kernel_cache:Initialized Cache of size totdoc, if using a kernel. + NULL if linear.*/ +/* model: Returns learning result (assumed empty before called) */ +/* alpha: Start values for the alpha variables or NULL + pointer. The new alpha values are returned after + optimization if not NULL. Array must be of size totdoc. */ { - long *inconsistent,i,*label; + long *inconsistent, i, *label; long inconsistentnum; - long misclassified,upsupvecnum; - double loss,model_length,example_length; - double dualitygap,xisum,alphasum,xi; - double maxdiff,*lin,*a,*c; - double runtime_start,runtime_end; + long misclassified, upsupvecnum; + double loss, model_length, example_length; + double dualitygap, xisum, alphasum, xi; + double maxdiff, *lin, *a, *c; + double runtime_start, runtime_end; long iterations; - long *unlabeled,transduction; + long *unlabeled, transduction; long heldout; - long loo_count=0,loo_count_pos=0,loo_count_neg=0,trainpos=0,trainneg=0; - long loocomputed=0; - double runtime_start_loo=0,runtime_start_xa=0; - double heldout_c=0,r_delta_sq=0,r_delta,r_delta_avg; - long *index,*index2dnum; + long loo_count = 0, loo_count_pos = 0, loo_count_neg = 0, trainpos = 0, + trainneg = 0; + long loocomputed = 0; + double runtime_start_loo = 0, runtime_start_xa = 0; + double heldout_c = 0, r_delta_sq = 0, r_delta, r_delta_avg; + long *index, *index2dnum; double *weights; - CFLOAT *aicache; /* buffer to keep one row of hessian */ + CFLOAT *aicache; /* buffer to keep one row of hessian */ double *xi_fullset; /* buffer for storing xi on full sample in loo */ double *a_fullset; /* buffer for storing alpha on full sample in loo */ TIMING timing_profile; SHRINK_STATE shrink_state; - runtime_start=get_runtime(); - timing_profile.time_kernel=0; - timing_profile.time_opti=0; - timing_profile.time_shrink=0; - timing_profile.time_update=0; - timing_profile.time_model=0; - timing_profile.time_check=0; - timing_profile.time_select=0; - kernel_cache_statistic=0; + runtime_start = get_runtime(); + timing_profile.time_kernel = 0; + timing_profile.time_opti = 0; + timing_profile.time_shrink = 0; + timing_profile.time_update = 0; + timing_profile.time_model = 0; + timing_profile.time_check = 0; + timing_profile.time_select = 0; + kernel_cache_statistic = 0; - learn_parm->totwords=totwords; + learn_parm->totwords = totwords; /* make sure -n value is reasonable */ - if((learn_parm->svm_newvarsinqp < 2) - || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { - learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; + if ((learn_parm->svm_newvarsinqp < 2) || + (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { + learn_parm->svm_newvarsinqp = learn_parm->svm_maxqpsize; } - init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK); + init_shrink_state(&shrink_state, totdoc, (long)MAXSHRINK); - label = (long *)my_malloc(sizeof(long)*totdoc); - inconsistent = (long *)my_malloc(sizeof(long)*totdoc); - unlabeled = (long *)my_malloc(sizeof(long)*totdoc); - c = (double *)my_malloc(sizeof(double)*totdoc); - a = (double *)my_malloc(sizeof(double)*totdoc); - a_fullset = (double *)my_malloc(sizeof(double)*totdoc); - xi_fullset = (double *)my_malloc(sizeof(double)*totdoc); - lin = (double *)my_malloc(sizeof(double)*totdoc); - learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc); - model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); - model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); - model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); + label = (long *)my_malloc(sizeof(long) * totdoc); + inconsistent = (long *)my_malloc(sizeof(long) * totdoc); + unlabeled = (long *)my_malloc(sizeof(long) * totdoc); + c = (double *)my_malloc(sizeof(double) * totdoc); + a = (double *)my_malloc(sizeof(double) * totdoc); + a_fullset = (double *)my_malloc(sizeof(double) * totdoc); + xi_fullset = (double *)my_malloc(sizeof(double) * totdoc); + lin = (double *)my_malloc(sizeof(double) * totdoc); + learn_parm->svm_cost = (double *)my_malloc(sizeof(double) * totdoc); + model->supvec = (DOC **)my_malloc(sizeof(DOC *) * (totdoc + 2)); + model->alpha = (double *)my_malloc(sizeof(double) * (totdoc + 2)); + model->index = (long *)my_malloc(sizeof(long) * (totdoc + 2)); - model->at_upper_bound=0; - model->b=0; - model->supvec[0]=0; /* element 0 reserved and empty for now */ - model->alpha[0]=0; - model->lin_weights=NULL; - model->totwords=totwords; - model->totdoc=totdoc; - model->kernel_parm=(*kernel_parm); - model->sv_num=1; - model->loo_error=-1; - model->loo_recall=-1; - model->loo_precision=-1; - model->xa_error=-1; - model->xa_recall=-1; - model->xa_precision=-1; - inconsistentnum=0; - transduction=0; + model->at_upper_bound = 0; + model->b = 0; + model->supvec[0] = 0; /* element 0 reserved and empty for now */ + model->alpha[0] = 0; + model->lin_weights = NULL; + model->totwords = totwords; + model->totdoc = totdoc; + model->kernel_parm = (*kernel_parm); + model->sv_num = 1; + model->loo_error = -1; + model->loo_recall = -1; + model->loo_precision = -1; + model->xa_error = -1; + model->xa_recall = -1; + model->xa_precision = -1; + inconsistentnum = 0; + transduction = 0; - r_delta=estimate_r_delta(docs,totdoc,kernel_parm); - r_delta_sq=r_delta*r_delta; + r_delta = estimate_r_delta(docs, totdoc, kernel_parm); + r_delta_sq = r_delta * r_delta; - r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm); - if(learn_parm->svm_c == 0.0) { /* default value for C */ - learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg); - if(verbosity>=1) - printf("Setting default regularization parameter C=%.4f\n", - learn_parm->svm_c); + r_delta_avg = estimate_r_delta_average(docs, totdoc, kernel_parm); + if (learn_parm->svm_c == 0.0) { /* default value for C */ + learn_parm->svm_c = 1.0 / (r_delta_avg * r_delta_avg); + if (verbosity >= 1) + printf("Setting default regularization parameter C=%.4f\n", + learn_parm->svm_c); } - learn_parm->eps=-1.0; /* equivalent regression epsilon for - classification */ + learn_parm->eps = -1.0; /* equivalent regression epsilon for + classification */ - for(i=0;idocnum=i; - inconsistent[i]=0; - a[i]=0; - lin[i]=0; - c[i]=0.0; - unlabeled[i]=0; - if(class[i] == 0) { - unlabeled[i]=1; - label[i]=0; - transduction=1; + for (i = 0; i < totdoc; i++) { /* various inits */ + docs[i]->docnum = i; + inconsistent[i] = 0; + a[i] = 0; + lin[i] = 0; + c[i] = 0.0; + unlabeled[i] = 0; + if (class[i] == 0) { + unlabeled[i] = 1; + label[i] = 0; + transduction = 1; } - if(class[i] > 0) { - learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio* - docs[i]->costfactor; - label[i]=1; + if (class[i] > 0) { + learn_parm->svm_cost[i] = + learn_parm->svm_c * learn_parm->svm_costratio * docs[i]->costfactor; + label[i] = 1; trainpos++; - } - else if(class[i] < 0) { - learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor; - label[i]=-1; + } else if (class[i] < 0) { + learn_parm->svm_cost[i] = learn_parm->svm_c * docs[i]->costfactor; + label[i] = -1; trainneg++; - } - else { - learn_parm->svm_cost[i]=0; + } else { + learn_parm->svm_cost[i] = 0; } } - if(verbosity>=2) { - printf("%ld positive, %ld negative, and %ld unlabeled examples.\n",trainpos,trainneg,totdoc-trainpos-trainneg); fflush(stdout); + if (verbosity >= 2) { + printf("%ld positive, %ld negative, and %ld unlabeled examples.\n", + trainpos, trainneg, totdoc - trainpos - trainneg); + fflush(stdout); } /* caching makes no sense for linear kernel */ - if(kernel_parm->kernel_type == LINEAR) { + if (kernel_parm->kernel_type == LINEAR) { /* kernel_cache = NULL; */ - } - + } + /* compute starting state for initial alpha values */ - if(alpha) { - if(verbosity>=1) { - printf("Computing starting state..."); fflush(stdout); + if (alpha) { + if (verbosity >= 1) { + printf("Computing starting state..."); + fflush(stdout); } - index = (long *)my_malloc(sizeof(long)*totdoc); - index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); - weights=(double *)my_malloc(sizeof(double)*(totwords+1)); - aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); - for(i=0;ilearn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i]; + index = (long *)my_malloc(sizeof(long) * totdoc); + index2dnum = (long *)my_malloc(sizeof(long) * (totdoc + 11)); + weights = (double *)my_malloc(sizeof(double) * (totwords + 1)); + aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT) * totdoc); + for (i = 0; i < totdoc; i++) { /* create full index and clip alphas */ + index[i] = 1; + alpha[i] = fabs(alpha[i]); + if (alpha[i] < 0) + alpha[i] = 0; + if (alpha[i] > learn_parm->svm_cost[i]) + alpha[i] = learn_parm->svm_cost[i]; } - if(kernel_cache && (kernel_parm->kernel_type != LINEAR)) { - for(i=0;i0) && (alpha[i]svm_cost[i]) - && (kernel_cache_space_available(kernel_cache))) - cache_kernel_row(kernel_cache,docs,i,kernel_parm); - for(i=0;isvm_cost[i]) - && (kernel_cache_space_available(kernel_cache))) - cache_kernel_row(kernel_cache,docs,i,kernel_parm); + if (kernel_cache && (kernel_parm->kernel_type != LINEAR)) { + for (i = 0; i < totdoc; i++) /* fill kernel cache with unbounded SV */ + if ((alpha[i] > 0) && (alpha[i] < learn_parm->svm_cost[i]) && + (kernel_cache_space_available(kernel_cache))) + cache_kernel_row(kernel_cache, docs, i, kernel_parm); + for (i = 0; i < totdoc; + i++) /* fill rest of kernel cache with bounded SV */ + if ((alpha[i] == learn_parm->svm_cost[i]) && + (kernel_cache_space_available(kernel_cache))) + cache_kernel_row(kernel_cache, docs, i, kernel_parm); } - clear_nvector(weights,totwords); /* set weights to zero */ - (void)compute_index(index,totdoc,index2dnum); - update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc, - totwords,kernel_parm,kernel_cache,lin,aicache, - weights); - (void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c, - learn_parm,index2dnum,index2dnum,model); - for(i=0;i=1) { - printf("done.\n"); fflush(stdout); - } - } - - if(transduction) { - learn_parm->svm_iter_to_shrink=99999999; - if(verbosity >= 1) - printf("\nDeactivating Shrinking due to an incompatibility with the transductive \nlearner in the current version.\n\n"); + if (verbosity >= 1) { + printf("done.\n"); + fflush(stdout); + } + } + + if (transduction) { + learn_parm->svm_iter_to_shrink = 99999999; + if (verbosity >= 1) + printf("\nDeactivating Shrinking due to an incompatibility with the " + "transductive \nlearner in the current version.\n\n"); } - if(transduction && learn_parm->compute_loo) { - learn_parm->compute_loo=0; - if(verbosity >= 1) - printf("\nCannot compute leave-one-out estimates for transductive learner.\n\n"); - } - - if(learn_parm->remove_inconsistent && learn_parm->compute_loo) { - learn_parm->compute_loo=0; - printf("\nCannot compute leave-one-out estimates when removing inconsistent examples.\n\n"); - } - - if(learn_parm->compute_loo && ((trainpos == 1) || (trainneg == 1))) { - learn_parm->compute_loo=0; - printf("\nCannot compute leave-one-out with only one example in one class.\n\n"); - } - - - if(verbosity==1) { - printf("Optimizing"); fflush(stdout); + if (transduction && learn_parm->compute_loo) { + learn_parm->compute_loo = 0; + if (verbosity >= 1) + printf("\nCannot compute leave-one-out estimates for transductive " + "learner.\n\n"); + } + + if (learn_parm->remove_inconsistent && learn_parm->compute_loo) { + learn_parm->compute_loo = 0; + printf("\nCannot compute leave-one-out estimates when removing " + "inconsistent examples.\n\n"); + } + + if (learn_parm->compute_loo && ((trainpos == 1) || (trainneg == 1))) { + learn_parm->compute_loo = 0; + printf("\nCannot compute leave-one-out with only one example in one " + "class.\n\n"); + } + + if (verbosity == 1) { + printf("Optimizing"); + fflush(stdout); } /* train the svm */ - iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, - kernel_parm,kernel_cache,&shrink_state,model, - inconsistent,unlabeled,a,lin, - c,&timing_profile, - &maxdiff,(long)-1, - (long)1); - - if(verbosity>=1) { - if(verbosity==1) printf("done. (%ld iterations)\n",iterations); + iterations = optimize_to_convergence( + docs, label, totdoc, totwords, learn_parm, kernel_parm, kernel_cache, + &shrink_state, model, inconsistent, unlabeled, a, lin, c, &timing_profile, + &maxdiff, (long)-1, (long)1); + + if (verbosity >= 1) { + if (verbosity == 1) + printf("done. (%ld iterations)\n", iterations); - misclassified=0; - for(i=0;(ib)*(double)label[i] <= 0.0) - misclassified++; + misclassified = 0; + for (i = 0; (i < totdoc); i++) { /* get final statistic */ + if ((lin[i] - model->b) * (double)label[i] <= 0.0) + misclassified++; } - printf("Optimization finished (%ld misclassified, maxdiff=%.5f).\n", - misclassified,maxdiff); - - runtime_end=get_runtime(); - if(verbosity>=2) { - printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n", - (runtime_end-runtime_start)/100.0, - (100.0*timing_profile.time_kernel)/(runtime_end-runtime_start), - (100.0*timing_profile.time_opti)/(runtime_end-runtime_start), - (100.0*timing_profile.time_shrink)/(runtime_end-runtime_start), - (100.0*timing_profile.time_update)/(runtime_end-runtime_start), - (100.0*timing_profile.time_model)/(runtime_end-runtime_start), - (100.0*timing_profile.time_check)/(runtime_end-runtime_start), - (100.0*timing_profile.time_select)/(runtime_end-runtime_start)); - } - else { - printf("Runtime in cpu-seconds: %.2f\n", - (runtime_end-runtime_start)/100.0); + printf("Optimization finished (%ld misclassified, maxdiff=%.5f).\n", + misclassified, maxdiff); + + runtime_end = get_runtime(); + if (verbosity >= 2) { + printf( + "Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for " + "optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for " + "model/%.2f%% for check/%.2f%% for select)\n", + (runtime_end - runtime_start) / 100.0, + (100.0 * timing_profile.time_kernel) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_opti) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_shrink) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_update) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_model) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_check) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_select) / (runtime_end - runtime_start)); + } else { + printf("Runtime in cpu-seconds: %.2f\n", + (runtime_end - runtime_start) / 100.0); } - if(learn_parm->remove_inconsistent) { - inconsistentnum=0; - for(i=0;isv_num-1,inconsistentnum); - } - else { - upsupvecnum=0; - for(i=1;isv_num;i++) { - if(fabs(model->alpha[i]) >= - (learn_parm->svm_cost[(model->supvec[i])->docnum]- - learn_parm->epsilon_a)) - upsupvecnum++; + if (learn_parm->remove_inconsistent) { + inconsistentnum = 0; + for (i = 0; i < totdoc; i++) + if (inconsistent[i]) + inconsistentnum++; + printf("Number of SV: %ld (plus %ld inconsistent examples)\n", + model->sv_num - 1, inconsistentnum); + } else { + upsupvecnum = 0; + for (i = 1; i < model->sv_num; i++) { + if (fabs(model->alpha[i]) >= + (learn_parm->svm_cost[(model->supvec[i])->docnum] - + learn_parm->epsilon_a)) + upsupvecnum++; } - printf("Number of SV: %ld (including %ld at upper bound)\n", - model->sv_num-1,upsupvecnum); - } - - if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { - loss=0; - xisum=0; - alphasum=0; - model_length=0; - for(i=0;ib)*(double)label[i]); - if(xi > learn_parm->epsilon_crit) - loss+=xi; - xisum+=xi*learn_parm->svm_cost[i]; - alphasum+=a[i]; - model_length+=a[i]*label[i]*lin[i]; + printf("Number of SV: %ld (including %ld at upper bound)\n", + model->sv_num - 1, upsupvecnum); + } + + if ((verbosity >= 1) && (!learn_parm->skip_final_opt_check)) { + loss = 0; + xisum = 0; + alphasum = 0; + model_length = 0; + for (i = 0; i < totdoc; i++) { + xi = MAX(0.0, 1.0 - (lin[i] - model->b) * (double)label[i]); + if (xi > learn_parm->epsilon_crit) + loss += xi; + xisum += xi * learn_parm->svm_cost[i]; + alphasum += a[i]; + model_length += a[i] * label[i] * lin[i]; } - model_length=sqrt(model_length); - dualitygap=(0.5*model_length*model_length+xisum) - -(alphasum-0.5*model_length*model_length); - fprintf(stdout,"Upper bound on duality gap: gap=%.5f\n",dualitygap); - fprintf(stdout,"Dual objective value: dval=%.5f\n", - alphasum-0.5*model_length*model_length); - fprintf(stdout,"L1 loss: loss=%.5f\n",loss); - fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length); - example_length=estimate_sphere(model); - fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n", - length_of_longest_document_vector(docs,totdoc,kernel_parm)); - fprintf(stdout,"Estimated VCdim of classifier: VCdim<=%.5f\n", - estimate_margin_vcdim(model,model_length,example_length)); - if((!learn_parm->remove_inconsistent) && (!transduction)) { - runtime_start_xa=get_runtime(); - if(verbosity>=1) { - printf("Computing XiAlpha-estimates..."); fflush(stdout); - } - compute_xa_estimates(model,label,unlabeled,totdoc,docs,lin,a, - kernel_parm,learn_parm,&(model->xa_error), - &(model->xa_recall),&(model->xa_precision)); - if(verbosity>=1) { - printf("done\n"); - } - printf("Runtime for XiAlpha-estimates in cpu-seconds: %.2f\n", - (get_runtime()-runtime_start_xa)/100.0); - - fprintf(stdout,"XiAlpha-estimate of the error: error<=%.2f%% (rho=%.2f,depth=%ld)\n", - model->xa_error,learn_parm->rho,learn_parm->xa_depth); - fprintf(stdout,"XiAlpha-estimate of the recall: recall=>%.2f%% (rho=%.2f,depth=%ld)\n", - model->xa_recall,learn_parm->rho,learn_parm->xa_depth); - fprintf(stdout,"XiAlpha-estimate of the precision: precision=>%.2f%% (rho=%.2f,depth=%ld)\n", - model->xa_precision,learn_parm->rho,learn_parm->xa_depth); - } - else if(!learn_parm->remove_inconsistent) { - estimate_transduction_quality(model,label,unlabeled,totdoc,docs,lin); + model_length = sqrt(model_length); + dualitygap = (0.5 * model_length * model_length + xisum) - + (alphasum - 0.5 * model_length * model_length); + fprintf(stdout, "Upper bound on duality gap: gap=%.5f\n", dualitygap); + fprintf(stdout, "Dual objective value: dval=%.5f\n", + alphasum - 0.5 * model_length * model_length); + fprintf(stdout, "L1 loss: loss=%.5f\n", loss); + fprintf(stdout, "Norm of weight vector: |w|=%.5f\n", model_length); + example_length = estimate_sphere(model); + fprintf(stdout, "Norm of longest example vector: |x|=%.5f\n", + length_of_longest_document_vector(docs, totdoc, kernel_parm)); + fprintf(stdout, "Estimated VCdim of classifier: VCdim<=%.5f\n", + estimate_margin_vcdim(model, model_length, example_length)); + if ((!learn_parm->remove_inconsistent) && (!transduction)) { + runtime_start_xa = get_runtime(); + if (verbosity >= 1) { + printf("Computing XiAlpha-estimates..."); + fflush(stdout); + } + compute_xa_estimates(model, label, unlabeled, totdoc, docs, lin, a, + kernel_parm, learn_parm, &(model->xa_error), + &(model->xa_recall), &(model->xa_precision)); + if (verbosity >= 1) { + printf("done\n"); + } + printf("Runtime for XiAlpha-estimates in cpu-seconds: %.2f\n", + (get_runtime() - runtime_start_xa) / 100.0); + + fprintf(stdout, + "XiAlpha-estimate of the error: error<=%.2f%% " + "(rho=%.2f,depth=%ld)\n", + model->xa_error, learn_parm->rho, learn_parm->xa_depth); + fprintf(stdout, + "XiAlpha-estimate of the recall: recall=>%.2f%% " + "(rho=%.2f,depth=%ld)\n", + model->xa_recall, learn_parm->rho, learn_parm->xa_depth); + fprintf(stdout, + "XiAlpha-estimate of the precision: precision=>%.2f%% " + "(rho=%.2f,depth=%ld)\n", + model->xa_precision, learn_parm->rho, learn_parm->xa_depth); + } else if (!learn_parm->remove_inconsistent) { + estimate_transduction_quality(model, label, unlabeled, totdoc, docs, + lin); } } - if(verbosity>=1) { - printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic); + if (verbosity >= 1) { + printf("Number of kernel evaluations: %ld\n", kernel_cache_statistic); } - } - - + } + /* leave-one-out testing starts now */ - if(learn_parm->compute_loo) { + if (learn_parm->compute_loo) { /* save results of training on full dataset for leave-one-out */ - runtime_start_loo=get_runtime(); - for(i=0;ib)*(double)label[i]); - if(xi_fullset[i]<0) xi_fullset[i]=0; - a_fullset[i]=a[i]; + runtime_start_loo = get_runtime(); + for (i = 0; i < totdoc; i++) { + xi_fullset[i] = 1.0 - ((lin[i] - model->b) * (double)label[i]); + if (xi_fullset[i] < 0) + xi_fullset[i] = 0; + a_fullset[i] = a[i]; } - if(verbosity>=1) { + if (verbosity >= 1) { printf("Computing leave-one-out"); - } - + } + /* repeat this loop for every held-out example */ - for(heldout=0;(heldoutrho*a_fullset[heldout]*r_delta_sq+xi_fullset[heldout] - < 1.0) { - /* guaranteed to not produce a leave-one-out error */ - if(verbosity==1) { - printf("+"); fflush(stdout); - } + for (heldout = 0; (heldout < totdoc); heldout++) { + if (learn_parm->rho * a_fullset[heldout] * r_delta_sq + + xi_fullset[heldout] < + 1.0) { + /* guaranteed to not produce a leave-one-out error */ + if (verbosity == 1) { + printf("+"); + fflush(stdout); + } + } else if (xi_fullset[heldout] > 1.0) { + /* guaranteed to produce a leave-one-out error */ + loo_count++; + if (label[heldout] > 0) + loo_count_pos++; + else + loo_count_neg++; + if (verbosity == 1) { + printf("-"); + fflush(stdout); + } + } else { + loocomputed++; + heldout_c = learn_parm->svm_cost[heldout]; /* set upper bound to zero */ + learn_parm->svm_cost[heldout] = 0; + /* make sure heldout example is not currently */ + /* shrunk away. Assumes that lin is up to date! */ + shrink_state.active[heldout] = 1; + if (verbosity >= 2) + printf("\nLeave-One-Out test on example %ld\n", heldout); + if (verbosity >= 1) { + printf("(?[%ld]", heldout); + fflush(stdout); + } + + optimize_to_convergence(docs, label, totdoc, totwords, learn_parm, + kernel_parm, kernel_cache, &shrink_state, model, + inconsistent, unlabeled, a, lin, c, + &timing_profile, &maxdiff, heldout, (long)2); + + /* printf("%.20f\n",(lin[heldout]-model->b)*(double)label[heldout]); */ + + if (((lin[heldout] - model->b) * (double)label[heldout]) <= 0.0) { + loo_count++; /* there was a loo-error */ + if (label[heldout] > 0) + loo_count_pos++; + else + loo_count_neg++; + if (verbosity >= 1) { + printf("-)"); + fflush(stdout); + } + } else { + if (verbosity >= 1) { + printf("+)"); + fflush(stdout); + } + } + /* now we need to restore the original data set*/ + learn_parm->svm_cost[heldout] = heldout_c; /* restore upper bound */ } - else if(xi_fullset[heldout] > 1.0) { - /* guaranteed to produce a leave-one-out error */ - loo_count++; - if(label[heldout] > 0) loo_count_pos++; else loo_count_neg++; - if(verbosity==1) { - printf("-"); fflush(stdout); - } - } - else { - loocomputed++; - heldout_c=learn_parm->svm_cost[heldout]; /* set upper bound to zero */ - learn_parm->svm_cost[heldout]=0; - /* make sure heldout example is not currently */ - /* shrunk away. Assumes that lin is up to date! */ - shrink_state.active[heldout]=1; - if(verbosity>=2) - printf("\nLeave-One-Out test on example %ld\n",heldout); - if(verbosity>=1) { - printf("(?[%ld]",heldout); fflush(stdout); - } - - optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, - kernel_parm, - kernel_cache,&shrink_state,model,inconsistent,unlabeled, - a,lin,c,&timing_profile, - &maxdiff,heldout,(long)2); - - /* printf("%.20f\n",(lin[heldout]-model->b)*(double)label[heldout]); */ - - if(((lin[heldout]-model->b)*(double)label[heldout]) <= 0.0) { - loo_count++; /* there was a loo-error */ - if(label[heldout] > 0) loo_count_pos++; else loo_count_neg++; - if(verbosity>=1) { - printf("-)"); fflush(stdout); - } - } - else { - if(verbosity>=1) { - printf("+)"); fflush(stdout); - } - } - /* now we need to restore the original data set*/ - learn_parm->svm_cost[heldout]=heldout_c; /* restore upper bound */ - } - } /* end of leave-one-out loop */ - - - if(verbosity>=1) { - printf("\nRetrain on full problem"); fflush(stdout); + } /* end of leave-one-out loop */ + + if (verbosity >= 1) { + printf("\nRetrain on full problem"); + fflush(stdout); } - optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, - kernel_parm, - kernel_cache,&shrink_state,model,inconsistent,unlabeled, - a,lin,c,&timing_profile, - &maxdiff,(long)-1,(long)1); - if(verbosity >= 1) - printf("done.\n"); - - + optimize_to_convergence(docs, label, totdoc, totwords, learn_parm, + kernel_parm, kernel_cache, &shrink_state, model, + inconsistent, unlabeled, a, lin, c, &timing_profile, + &maxdiff, (long)-1, (long)1); + if (verbosity >= 1) + printf("done.\n"); + /* after all leave-one-out computed */ - model->loo_error=100.0*loo_count/(double)totdoc; - model->loo_recall=(1.0-(double)loo_count_pos/(double)trainpos)*100.0; - model->loo_precision=(trainpos-loo_count_pos)/ - (double)(trainpos-loo_count_pos+loo_count_neg)*100.0; - if(verbosity >= 1) { - fprintf(stdout,"Leave-one-out estimate of the error: error=%.2f%%\n", - model->loo_error); - fprintf(stdout,"Leave-one-out estimate of the recall: recall=%.2f%%\n", - model->loo_recall); - fprintf(stdout,"Leave-one-out estimate of the precision: precision=%.2f%%\n", - model->loo_precision); - fprintf(stdout,"Actual leave-one-outs computed: %ld (rho=%.2f)\n", - loocomputed,learn_parm->rho); - printf("Runtime for leave-one-out in cpu-seconds: %.2f\n", - (get_runtime()-runtime_start_loo)/100.0); + model->loo_error = 100.0 * loo_count / (double)totdoc; + model->loo_recall = + (1.0 - (double)loo_count_pos / (double)trainpos) * 100.0; + model->loo_precision = (trainpos - loo_count_pos) / + (double)(trainpos - loo_count_pos + loo_count_neg) * + 100.0; + if (verbosity >= 1) { + fprintf(stdout, "Leave-one-out estimate of the error: error=%.2f%%\n", + model->loo_error); + fprintf(stdout, "Leave-one-out estimate of the recall: recall=%.2f%%\n", + model->loo_recall); + fprintf(stdout, + "Leave-one-out estimate of the precision: precision=%.2f%%\n", + model->loo_precision); + fprintf(stdout, "Actual leave-one-outs computed: %ld (rho=%.2f)\n", + loocomputed, learn_parm->rho); + printf("Runtime for leave-one-out in cpu-seconds: %.2f\n", + (get_runtime() - runtime_start_loo) / 100.0); } - } - - if(learn_parm->alphafile[0]) - write_alphas(learn_parm->alphafile,a,label,totdoc); - + } + + if (learn_parm->alphafile[0]) + write_alphas(learn_parm->alphafile, a, label, totdoc); + shrink_state_cleanup(&shrink_state); free(label); free(inconsistent); @@ -476,36 +499,35 @@ void svm_learn_classification(DOC **docs, double *class, long int free(xi_fullset); free(lin); free(learn_parm->svm_cost); -} - - +} + /* Learns an SVM regression model based on the training data in docs/label. The resulting model is returned in the structure model. */ -void svm_learn_regression(DOC **docs, double *value, long int totdoc, - long int totwords, LEARN_PARM *learn_parm, - KERNEL_PARM *kernel_parm, - KERNEL_CACHE **kernel_cache, MODEL *model) - /* docs: Training vectors (x-part) */ - /* class: Training value (y-part) */ - /* totdoc: Number of examples in docs/label */ - /* totwords: Number of features (i.e. highest feature index) */ - /* learn_parm: Learning paramenters */ - /* kernel_parm: Kernel paramenters */ - /* kernel_cache:Initialized Cache, if using a kernel. NULL if - linear. Note that it will be free'd and reassigned */ - /* model: Returns learning result (assumed empty before called) */ +void svm_learn_regression(DOC **docs, double *value, long int totdoc, + long int totwords, LEARN_PARM *learn_parm, + KERNEL_PARM *kernel_parm, KERNEL_CACHE **kernel_cache, + MODEL *model) +/* docs: Training vectors (x-part) */ +/* class: Training value (y-part) */ +/* totdoc: Number of examples in docs/label */ +/* totwords: Number of features (i.e. highest feature index) */ +/* learn_parm: Learning paramenters */ +/* kernel_parm: Kernel paramenters */ +/* kernel_cache:Initialized Cache, if using a kernel. NULL if + linear. Note that it will be free'd and reassigned */ +/* model: Returns learning result (assumed empty before called) */ { - long *inconsistent,i,j; + long *inconsistent, i, j; long inconsistentnum; long upsupvecnum; - double loss,model_length,example_length; - double maxdiff,*lin,*a,*c; - double runtime_start,runtime_end; - long iterations,kernel_cache_size; + double loss, model_length, example_length; + double maxdiff, *lin, *a, *c; + double runtime_start, runtime_end; + long iterations, kernel_cache_size; long *unlabeled; - double r_delta_sq=0,r_delta,r_delta_avg; + double r_delta_sq = 0, r_delta, r_delta_avg; double *xi_fullset; /* buffer for storing xi on full sample in loo */ double *a_fullset; /* buffer for storing alpha on full sample in loo */ TIMING timing_profile; @@ -514,195 +536,203 @@ void svm_learn_regression(DOC **docs, double *value, long int totdoc, long *label; /* set up regression problem in standard form */ - docs_org=docs; - docs = (DOC **)my_malloc(sizeof(DOC)*2*totdoc); - label = (long *)my_malloc(sizeof(long)*2*totdoc); - c = (double *)my_malloc(sizeof(double)*2*totdoc); - for(i=0;icostfactor,docs_org[i]->fvec); - label[i]=+1; - c[i]=value[i]; - docs[j]=create_example(j,0,0,docs_org[i]->costfactor,docs_org[i]->fvec); - label[j]=-1; - c[j]=value[i]; + docs_org = docs; + docs = (DOC **)my_malloc(sizeof(DOC) * 2 * totdoc); + label = (long *)my_malloc(sizeof(long) * 2 * totdoc); + c = (double *)my_malloc(sizeof(double) * 2 * totdoc); + for (i = 0; i < totdoc; i++) { + j = 2 * totdoc - 1 - i; + docs[i] = + create_example(i, 0, 0, docs_org[i]->costfactor, docs_org[i]->fvec); + label[i] = +1; + c[i] = value[i]; + docs[j] = + create_example(j, 0, 0, docs_org[i]->costfactor, docs_org[i]->fvec); + label[j] = -1; + c[j] = value[i]; } - totdoc*=2; + totdoc *= 2; /* need to get a bigger kernel cache */ - if(*kernel_cache) { - kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024); + if (*kernel_cache) { + kernel_cache_size = + (*kernel_cache)->buffsize * sizeof(CFLOAT) / (1024 * 1024); kernel_cache_cleanup(*kernel_cache); - (*kernel_cache)=kernel_cache_init(totdoc,kernel_cache_size); + (*kernel_cache) = kernel_cache_init(totdoc, kernel_cache_size); } - runtime_start=get_runtime(); - timing_profile.time_kernel=0; - timing_profile.time_opti=0; - timing_profile.time_shrink=0; - timing_profile.time_update=0; - timing_profile.time_model=0; - timing_profile.time_check=0; - timing_profile.time_select=0; - kernel_cache_statistic=0; + runtime_start = get_runtime(); + timing_profile.time_kernel = 0; + timing_profile.time_opti = 0; + timing_profile.time_shrink = 0; + timing_profile.time_update = 0; + timing_profile.time_model = 0; + timing_profile.time_check = 0; + timing_profile.time_select = 0; + kernel_cache_statistic = 0; - learn_parm->totwords=totwords; + learn_parm->totwords = totwords; /* make sure -n value is reasonable */ - if((learn_parm->svm_newvarsinqp < 2) - || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { - learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; + if ((learn_parm->svm_newvarsinqp < 2) || + (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { + learn_parm->svm_newvarsinqp = learn_parm->svm_maxqpsize; } - init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK); + init_shrink_state(&shrink_state, totdoc, (long)MAXSHRINK); - inconsistent = (long *)my_malloc(sizeof(long)*totdoc); - unlabeled = (long *)my_malloc(sizeof(long)*totdoc); - a = (double *)my_malloc(sizeof(double)*totdoc); - a_fullset = (double *)my_malloc(sizeof(double)*totdoc); - xi_fullset = (double *)my_malloc(sizeof(double)*totdoc); - lin = (double *)my_malloc(sizeof(double)*totdoc); - learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc); - model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); - model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); - model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); + inconsistent = (long *)my_malloc(sizeof(long) * totdoc); + unlabeled = (long *)my_malloc(sizeof(long) * totdoc); + a = (double *)my_malloc(sizeof(double) * totdoc); + a_fullset = (double *)my_malloc(sizeof(double) * totdoc); + xi_fullset = (double *)my_malloc(sizeof(double) * totdoc); + lin = (double *)my_malloc(sizeof(double) * totdoc); + learn_parm->svm_cost = (double *)my_malloc(sizeof(double) * totdoc); + model->supvec = (DOC **)my_malloc(sizeof(DOC *) * (totdoc + 2)); + model->alpha = (double *)my_malloc(sizeof(double) * (totdoc + 2)); + model->index = (long *)my_malloc(sizeof(long) * (totdoc + 2)); - model->at_upper_bound=0; - model->b=0; - model->supvec[0]=0; /* element 0 reserved and empty for now */ - model->alpha[0]=0; - model->lin_weights=NULL; - model->totwords=totwords; - model->totdoc=totdoc; - model->kernel_parm=(*kernel_parm); - model->sv_num=1; - model->loo_error=-1; - model->loo_recall=-1; - model->loo_precision=-1; - model->xa_error=-1; - model->xa_recall=-1; - model->xa_precision=-1; - inconsistentnum=0; + model->at_upper_bound = 0; + model->b = 0; + model->supvec[0] = 0; /* element 0 reserved and empty for now */ + model->alpha[0] = 0; + model->lin_weights = NULL; + model->totwords = totwords; + model->totdoc = totdoc; + model->kernel_parm = (*kernel_parm); + model->sv_num = 1; + model->loo_error = -1; + model->loo_recall = -1; + model->loo_precision = -1; + model->xa_error = -1; + model->xa_recall = -1; + model->xa_precision = -1; + inconsistentnum = 0; - r_delta=estimate_r_delta(docs,totdoc,kernel_parm); - r_delta_sq=r_delta*r_delta; + r_delta = estimate_r_delta(docs, totdoc, kernel_parm); + r_delta_sq = r_delta * r_delta; - r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm); - if(learn_parm->svm_c == 0.0) { /* default value for C */ - learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg); - if(verbosity>=1) - printf("Setting default regularization parameter C=%.4f\n", - learn_parm->svm_c); + r_delta_avg = estimate_r_delta_average(docs, totdoc, kernel_parm); + if (learn_parm->svm_c == 0.0) { /* default value for C */ + learn_parm->svm_c = 1.0 / (r_delta_avg * r_delta_avg); + if (verbosity >= 1) + printf("Setting default regularization parameter C=%.4f\n", + learn_parm->svm_c); } - for(i=0;i 0) { - learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio* - docs[i]->costfactor; - } - else if(label[i] < 0) { - learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor; + for (i = 0; i < totdoc; i++) { /* various inits */ + inconsistent[i] = 0; + a[i] = 0; + lin[i] = 0; + unlabeled[i] = 0; + if (label[i] > 0) { + learn_parm->svm_cost[i] = + learn_parm->svm_c * learn_parm->svm_costratio * docs[i]->costfactor; + } else if (label[i] < 0) { + learn_parm->svm_cost[i] = learn_parm->svm_c * docs[i]->costfactor; } } /* caching makes no sense for linear kernel */ - if((kernel_parm->kernel_type == LINEAR) && (*kernel_cache)) { - printf("WARNING: Using a kernel cache for linear case will slow optimization down!\n"); - } - - if(verbosity==1) { - printf("Optimizing"); fflush(stdout); + if ((kernel_parm->kernel_type == LINEAR) && (*kernel_cache)) { + printf("WARNING: Using a kernel cache for linear case will slow " + "optimization down!\n"); + } + + if (verbosity == 1) { + printf("Optimizing"); + fflush(stdout); } /* train the svm */ - iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, - kernel_parm,*kernel_cache,&shrink_state, - model,inconsistent,unlabeled,a,lin,c, - &timing_profile,&maxdiff,(long)-1, - (long)1); - - if(verbosity>=1) { - if(verbosity==1) printf("done. (%ld iterations)\n",iterations); + iterations = optimize_to_convergence( + docs, label, totdoc, totwords, learn_parm, kernel_parm, *kernel_cache, + &shrink_state, model, inconsistent, unlabeled, a, lin, c, &timing_profile, + &maxdiff, (long)-1, (long)1); + + if (verbosity >= 1) { + if (verbosity == 1) + printf("done. (%ld iterations)\n", iterations); - printf("Optimization finished (maxdiff=%.5f).\n",maxdiff); - - runtime_end=get_runtime(); - if(verbosity>=2) { - printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n", - (runtime_end-runtime_start)/100.0, - (100.0*timing_profile.time_kernel)/(runtime_end-runtime_start), - (100.0*timing_profile.time_opti)/(runtime_end-runtime_start), - (100.0*timing_profile.time_shrink)/(runtime_end-runtime_start), - (100.0*timing_profile.time_update)/(runtime_end-runtime_start), - (100.0*timing_profile.time_model)/(runtime_end-runtime_start), - (100.0*timing_profile.time_check)/(runtime_end-runtime_start), - (100.0*timing_profile.time_select)/(runtime_end-runtime_start)); - } - else { - printf("Runtime in cpu-seconds: %.2f\n", - (runtime_end-runtime_start)/100.0); + printf("Optimization finished (maxdiff=%.5f).\n", maxdiff); + + runtime_end = get_runtime(); + if (verbosity >= 2) { + printf( + "Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for " + "optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for " + "model/%.2f%% for check/%.2f%% for select)\n", + (runtime_end - runtime_start) / 100.0, + (100.0 * timing_profile.time_kernel) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_opti) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_shrink) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_update) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_model) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_check) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_select) / (runtime_end - runtime_start)); + } else { + printf("Runtime in cpu-seconds: %.2f\n", + (runtime_end - runtime_start) / 100.0); } - if(learn_parm->remove_inconsistent) { - inconsistentnum=0; - for(i=0;isv_num-1,inconsistentnum); - } - else { - upsupvecnum=0; - for(i=1;isv_num;i++) { - if(fabs(model->alpha[i]) >= - (learn_parm->svm_cost[(model->supvec[i])->docnum]- - learn_parm->epsilon_a)) - upsupvecnum++; + if (learn_parm->remove_inconsistent) { + inconsistentnum = 0; + for (i = 0; i < totdoc; i++) + if (inconsistent[i]) + inconsistentnum++; + printf("Number of SV: %ld (plus %ld inconsistent examples)\n", + model->sv_num - 1, inconsistentnum); + } else { + upsupvecnum = 0; + for (i = 1; i < model->sv_num; i++) { + if (fabs(model->alpha[i]) >= + (learn_parm->svm_cost[(model->supvec[i])->docnum] - + learn_parm->epsilon_a)) + upsupvecnum++; } - printf("Number of SV: %ld (including %ld at upper bound)\n", - model->sv_num-1,upsupvecnum); - } - - if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { - loss=0; - model_length=0; - for(i=0;ib)*(double)label[i] < (-learn_parm->eps+(double)label[i]*c[i])-learn_parm->epsilon_crit) - loss+=-learn_parm->eps+(double)label[i]*c[i]-(lin[i]-model->b)*(double)label[i]; - model_length+=a[i]*label[i]*lin[i]; + printf("Number of SV: %ld (including %ld at upper bound)\n", + model->sv_num - 1, upsupvecnum); + } + + if ((verbosity >= 1) && (!learn_parm->skip_final_opt_check)) { + loss = 0; + model_length = 0; + for (i = 0; i < totdoc; i++) { + if ((lin[i] - model->b) * (double)label[i] < + (-learn_parm->eps + (double)label[i] * c[i]) - + learn_parm->epsilon_crit) + loss += -learn_parm->eps + (double)label[i] * c[i] - + (lin[i] - model->b) * (double)label[i]; + model_length += a[i] * label[i] * lin[i]; } - model_length=sqrt(model_length); - fprintf(stdout,"L1 loss: loss=%.5f\n",loss); - fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length); - example_length=estimate_sphere(model); - fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n", - length_of_longest_document_vector(docs,totdoc,kernel_parm)); + model_length = sqrt(model_length); + fprintf(stdout, "L1 loss: loss=%.5f\n", loss); + fprintf(stdout, "Norm of weight vector: |w|=%.5f\n", model_length); + example_length = estimate_sphere(model); + fprintf(stdout, "Norm of longest example vector: |x|=%.5f\n", + length_of_longest_document_vector(docs, totdoc, kernel_parm)); } - if(verbosity>=1) { - printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic); + if (verbosity >= 1) { + printf("Number of kernel evaluations: %ld\n", kernel_cache_statistic); } - } - - if(learn_parm->alphafile[0]) - write_alphas(learn_parm->alphafile,a,label,totdoc); + } + + if (learn_parm->alphafile[0]) + write_alphas(learn_parm->alphafile, a, label, totdoc); - /* this makes sure the model we return does not contain pointers to the + /* this makes sure the model we return does not contain pointers to the temporary documents */ - for(i=1;isv_num;i++) { - j=model->supvec[i]->docnum; - if(j >= (totdoc/2)) { - j=totdoc-j-1; + for (i = 1; i < model->sv_num; i++) { + j = model->supvec[i]->docnum; + if (j >= (totdoc / 2)) { + j = totdoc - j - 1; } - model->supvec[i]=docs_org[j]; - } - + model->supvec[i] = docs_org[j]; + } + shrink_state_cleanup(&shrink_state); - for(i=0;isvm_cost); } -void svm_learn_ranking(DOC **docs, double *rankvalue, long int totdoc, - long int totwords, LEARN_PARM *learn_parm, - KERNEL_PARM *kernel_parm, KERNEL_CACHE **kernel_cache, - MODEL *model) - /* docs: Training vectors (x-part) */ - /* rankvalue: Training target values that determine the ranking */ - /* totdoc: Number of examples in docs/label */ - /* totwords: Number of features (i.e. highest feature index) */ - /* learn_parm: Learning paramenters */ - /* kernel_parm: Kernel paramenters */ - /* kernel_cache:Initialized pointer to Cache of size 1*totdoc, if - using a kernel. NULL if linear. NOTE: Cache is - getting reinitialized in this function */ - /* model: Returns learning result (assumed empty before called) */ +void svm_learn_ranking(DOC **docs, double *rankvalue, long int totdoc, + long int totwords, LEARN_PARM *learn_parm, + KERNEL_PARM *kernel_parm, KERNEL_CACHE **kernel_cache, + MODEL *model) +/* docs: Training vectors (x-part) */ +/* rankvalue: Training target values that determine the ranking */ +/* totdoc: Number of examples in docs/label */ +/* totwords: Number of features (i.e. highest feature index) */ +/* learn_parm: Learning paramenters */ +/* kernel_parm: Kernel paramenters */ +/* kernel_cache:Initialized pointer to Cache of size 1*totdoc, if + using a kernel. NULL if linear. NOTE: Cache is + getting reinitialized in this function */ +/* model: Returns learning result (assumed empty before called) */ { DOC **docdiff; - long i,j,k,totpair,kernel_cache_size; - double *target,*alpha,cost; - long *greater,*lesser; + long i, j, k, totpair, kernel_cache_size; + double *target, *alpha, cost; + long *greater, *lesser; MODEL *pairmodel; - SVECTOR *flow,*fhigh; + SVECTOR *flow, *fhigh; - totpair=0; - for(i=0;iqueryid==docs[j]->queryid) && (rankvalue[i] != rankvalue[j])) { - totpair++; + totpair = 0; + for (i = 0; i < totdoc; i++) { + for (j = i + 1; j < totdoc; j++) { + if ((docs[i]->queryid == docs[j]->queryid) && + (rankvalue[i] != rankvalue[j])) { + totpair++; } } } - printf("Constructing %ld rank constraints...",totpair); fflush(stdout); - docdiff=(DOC **)my_malloc(sizeof(DOC)*totpair); - target=(double *)my_malloc(sizeof(double)*totpair); - greater=(long *)my_malloc(sizeof(long)*totpair); - lesser=(long *)my_malloc(sizeof(long)*totpair); - - k=0; - for(i=0;iqueryid == docs[j]->queryid) { - /* "Hijacked" costfactor to input rhs of constraints */ - /* cost=(docs[i]->costfactor+docs[j]->costfactor)/2.0; */ - cost=1; - if(rankvalue[i] > rankvalue[j]) { - if(kernel_parm->kernel_type == LINEAR) - docdiff[k]=create_example(k,0,0,cost, - sub_ss(docs[i]->fvec,docs[j]->fvec)); - else { - flow=copy_svector(docs[j]->fvec); - flow->factor=-1.0; - flow->next=NULL; - fhigh=copy_svector(docs[i]->fvec); - fhigh->factor=1.0; - fhigh->next=flow; - docdiff[k]=create_example(k,0,0,cost,fhigh); - } - target[k]=1+docs[i]->costfactor-docs[j]->costfactor; - greater[k]=i; - lesser[k]=j; - k++; - } - else if(rankvalue[i] < rankvalue[j]) { - if(kernel_parm->kernel_type == LINEAR) - docdiff[k]=create_example(k,0,0,cost, - sub_ss(docs[j]->fvec,docs[i]->fvec)); - else { - flow=copy_svector(docs[j]->fvec); - flow->factor=1.0; - flow->next=NULL; - fhigh=copy_svector(docs[i]->fvec); - fhigh->factor=-1.0; - fhigh->next=flow; - docdiff[k]=create_example(k,0,0,cost,fhigh); - } - target[k]=1+docs[j]->costfactor-docs[i]->costfactor; - greater[k]=j; - lesser[k]=i; - k++; - } + printf("Constructing %ld rank constraints...", totpair); + fflush(stdout); + docdiff = (DOC **)my_malloc(sizeof(DOC) * totpair); + target = (double *)my_malloc(sizeof(double) * totpair); + greater = (long *)my_malloc(sizeof(long) * totpair); + lesser = (long *)my_malloc(sizeof(long) * totpair); + + k = 0; + for (i = 0; i < totdoc; i++) { + for (j = i + 1; j < totdoc; j++) { + if (docs[i]->queryid == docs[j]->queryid) { + /* "Hijacked" costfactor to input rhs of constraints */ + /* cost=(docs[i]->costfactor+docs[j]->costfactor)/2.0; */ + cost = 1; + if (rankvalue[i] > rankvalue[j]) { + if (kernel_parm->kernel_type == LINEAR) + docdiff[k] = create_example(k, 0, 0, cost, + sub_ss(docs[i]->fvec, docs[j]->fvec)); + else { + flow = copy_svector(docs[j]->fvec); + flow->factor = -1.0; + flow->next = NULL; + fhigh = copy_svector(docs[i]->fvec); + fhigh->factor = 1.0; + fhigh->next = flow; + docdiff[k] = create_example(k, 0, 0, cost, fhigh); + } + target[k] = 1 + docs[i]->costfactor - docs[j]->costfactor; + greater[k] = i; + lesser[k] = j; + k++; + } else if (rankvalue[i] < rankvalue[j]) { + if (kernel_parm->kernel_type == LINEAR) + docdiff[k] = create_example(k, 0, 0, cost, + sub_ss(docs[j]->fvec, docs[i]->fvec)); + else { + flow = copy_svector(docs[j]->fvec); + flow->factor = 1.0; + flow->next = NULL; + fhigh = copy_svector(docs[i]->fvec); + fhigh->factor = -1.0; + fhigh->next = flow; + docdiff[k] = create_example(k, 0, 0, cost, fhigh); + } + target[k] = 1 + docs[j]->costfactor - docs[i]->costfactor; + greater[k] = j; + lesser[k] = i; + k++; + } } } } - printf("done.\n"); fflush(stdout); + printf("done.\n"); + fflush(stdout); /* need to get a bigger kernel cache */ - if(*kernel_cache) { - kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024); + if (*kernel_cache) { + kernel_cache_size = + (*kernel_cache)->buffsize * sizeof(CFLOAT) / (1024 * 1024); kernel_cache_cleanup(*kernel_cache); - (*kernel_cache)=kernel_cache_init(totpair,kernel_cache_size); + (*kernel_cache) = kernel_cache_init(totpair, kernel_cache_size); } /* must use unbiased hyperplane on difference vectors */ - learn_parm->biased_hyperplane=0; - pairmodel=(MODEL *)my_malloc(sizeof(MODEL)); - svm_learn_optimization(docdiff,target,totpair,totwords,learn_parm, - kernel_parm,(*kernel_cache),pairmodel,NULL); + learn_parm->biased_hyperplane = 0; + pairmodel = (MODEL *)my_malloc(sizeof(MODEL)); + svm_learn_optimization(docdiff, target, totpair, totwords, learn_parm, + kernel_parm, (*kernel_cache), pairmodel, NULL); /* Transfer the result into a more compact model. If you would like to output the original model on pairs of documents, see below. */ - alpha=(double *)my_malloc(sizeof(double)*totdoc); - for(i=0;isv_num;i++) { - alpha[lesser[(pairmodel->supvec[i])->docnum]]-=pairmodel->alpha[i]; - alpha[greater[(pairmodel->supvec[i])->docnum]]+=pairmodel->alpha[i]; + for (i = 1; i < pairmodel->sv_num; i++) { + alpha[lesser[(pairmodel->supvec[i])->docnum]] -= pairmodel->alpha[i]; + alpha[greater[(pairmodel->supvec[i])->docnum]] += pairmodel->alpha[i]; } - model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); - model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); - model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); - model->supvec[0]=0; /* element 0 reserved and empty for now */ - model->alpha[0]=0; - model->sv_num=1; - for(i=0;isupvec[model->sv_num]=docs[i]; - model->alpha[model->sv_num]=alpha[i]; - model->index[i]=model->sv_num; + model->supvec = (DOC **)my_malloc(sizeof(DOC *) * (totdoc + 2)); + model->alpha = (double *)my_malloc(sizeof(double) * (totdoc + 2)); + model->index = (long *)my_malloc(sizeof(long) * (totdoc + 2)); + model->supvec[0] = 0; /* element 0 reserved and empty for now */ + model->alpha[0] = 0; + model->sv_num = 1; + for (i = 0; i < totdoc; i++) { + if (alpha[i]) { + model->supvec[model->sv_num] = docs[i]; + model->alpha[model->sv_num] = alpha[i]; + model->index[i] = model->sv_num; model->sv_num++; - } - else { - model->index[i]=-1; + } else { + model->index[i] = -1; } } - model->at_upper_bound=0; - model->b=0; - model->lin_weights=NULL; - model->totwords=totwords; - model->totdoc=totdoc; - model->kernel_parm=(*kernel_parm); - model->loo_error=-1; - model->loo_recall=-1; - model->loo_precision=-1; - model->xa_error=-1; - model->xa_recall=-1; - model->xa_precision=-1; + model->at_upper_bound = 0; + model->b = 0; + model->lin_weights = NULL; + model->totwords = totwords; + model->totdoc = totdoc; + model->kernel_parm = (*kernel_parm); + model->loo_error = -1; + model->loo_recall = -1; + model->loo_precision = -1; + model->xa_error = -1; + model->xa_recall = -1; + model->xa_precision = -1; free(alpha); free(greater); @@ -860,13 +892,12 @@ void svm_learn_ranking(DOC **docs, double *rankvalue, long int totdoc, /* If you would like to output the original model on pairs of document, replace the following lines with '(*model)=(*pairmodel);' */ - for(i=0;itotwords=totwords; + learn_parm->totwords = totwords; /* make sure -n value is reasonable */ - if((learn_parm->svm_newvarsinqp < 2) - || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { - learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; + if ((learn_parm->svm_newvarsinqp < 2) || + (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { + learn_parm->svm_newvarsinqp = learn_parm->svm_maxqpsize; } - init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK); + init_shrink_state(&shrink_state, totdoc, (long)MAXSHRINK); - label = (long *)my_malloc(sizeof(long)*totdoc); - unlabeled = (long *)my_malloc(sizeof(long)*totdoc); - inconsistent = (long *)my_malloc(sizeof(long)*totdoc); - c = (double *)my_malloc(sizeof(double)*totdoc); - a = (double *)my_malloc(sizeof(double)*totdoc); - lin = (double *)my_malloc(sizeof(double)*totdoc); - learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc); - model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); - model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); - model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); + label = (long *)my_malloc(sizeof(long) * totdoc); + unlabeled = (long *)my_malloc(sizeof(long) * totdoc); + inconsistent = (long *)my_malloc(sizeof(long) * totdoc); + c = (double *)my_malloc(sizeof(double) * totdoc); + a = (double *)my_malloc(sizeof(double) * totdoc); + lin = (double *)my_malloc(sizeof(double) * totdoc); + learn_parm->svm_cost = (double *)my_malloc(sizeof(double) * totdoc); + model->supvec = (DOC **)my_malloc(sizeof(DOC *) * (totdoc + 2)); + model->alpha = (double *)my_malloc(sizeof(double) * (totdoc + 2)); + model->index = (long *)my_malloc(sizeof(long) * (totdoc + 2)); - model->at_upper_bound=0; - model->b=0; - model->supvec[0]=0; /* element 0 reserved and empty for now */ - model->alpha[0]=0; - model->lin_weights=NULL; - model->totwords=totwords; - model->totdoc=totdoc; - model->kernel_parm=(*kernel_parm); - model->sv_num=1; - model->loo_error=-1; - model->loo_recall=-1; - model->loo_precision=-1; - model->xa_error=-1; - model->xa_recall=-1; - model->xa_precision=-1; + model->at_upper_bound = 0; + model->b = 0; + model->supvec[0] = 0; /* element 0 reserved and empty for now */ + model->alpha[0] = 0; + model->lin_weights = NULL; + model->totwords = totwords; + model->totdoc = totdoc; + model->kernel_parm = (*kernel_parm); + model->sv_num = 1; + model->loo_error = -1; + model->loo_recall = -1; + model->loo_precision = -1; + model->xa_error = -1; + model->xa_recall = -1; + model->xa_precision = -1; - r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm); - if(learn_parm->svm_c == 0.0) { /* default value for C */ - learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg); - if(verbosity>=1) - printf("Setting default regularization parameter C=%.4f\n", - learn_parm->svm_c); + r_delta_avg = estimate_r_delta_average(docs, totdoc, kernel_parm); + if (learn_parm->svm_c == 0.0) { /* default value for C */ + learn_parm->svm_c = 1.0 / (r_delta_avg * r_delta_avg); + if (verbosity >= 1) + printf("Setting default regularization parameter C=%.4f\n", + learn_parm->svm_c); } - learn_parm->biased_hyperplane=0; /* learn an unbiased hyperplane */ + learn_parm->biased_hyperplane = 0; /* learn an unbiased hyperplane */ - learn_parm->eps=0.0; /* No margin, unless explicitly handcoded - in the right-hand side in the training - set. */ + learn_parm->eps = 0.0; /* No margin, unless explicitly handcoded + in the right-hand side in the training + set. */ - for(i=0;idocnum=i; - a[i]=0; - lin[i]=0; - c[i]=rhs[i]; /* set right-hand side */ - unlabeled[i]=0; - inconsistent[i]=0; - learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio* - docs[i]->costfactor; - label[i]=1; + for (i = 0; i < totdoc; i++) { /* various inits */ + docs[i]->docnum = i; + a[i] = 0; + lin[i] = 0; + c[i] = rhs[i]; /* set right-hand side */ + unlabeled[i] = 0; + inconsistent[i] = 0; + learn_parm->svm_cost[i] = + learn_parm->svm_c * learn_parm->svm_costratio * docs[i]->costfactor; + label[i] = 1; } - if(learn_parm->sharedslack) /* if shared slacks are used, they must */ - for(i=0;islackid) { - perror("Error: Missing shared slacks definitions in some of the examples."); - exit(0); + if (learn_parm->sharedslack) /* if shared slacks are used, they must */ + for (i = 0; i < totdoc; i++) /* be used on every constraint */ + if (!docs[i]->slackid) { + perror("Error: Missing shared slacks definitions in some of the " + "examples."); + exit(0); } /* print kernel matrix */ @@ -1001,203 +1033,211 @@ void svm_learn_optimization(DOC **docs, double *rhs, long int */ /* compute starting state for initial alpha values */ - if(alpha) { - if(verbosity>=1) { - printf("Computing starting state..."); fflush(stdout); + if (alpha) { + if (verbosity >= 1) { + printf("Computing starting state..."); + fflush(stdout); } - index = (long *)my_malloc(sizeof(long)*totdoc); - index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); - if(kernel_parm->kernel_type == LINEAR) { - weights=(double *)my_malloc(sizeof(double)*(totwords+1)); - clear_nvector(weights,totwords); /* set weights to zero */ - aicache=NULL; + index = (long *)my_malloc(sizeof(long) * totdoc); + index2dnum = (long *)my_malloc(sizeof(long) * (totdoc + 11)); + if (kernel_parm->kernel_type == LINEAR) { + weights = (double *)my_malloc(sizeof(double) * (totwords + 1)); + clear_nvector(weights, totwords); /* set weights to zero */ + aicache = NULL; + } else { + weights = NULL; + aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT) * totdoc); } - else { - weights=NULL; - aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); + for (i = 0; i < totdoc; i++) { /* create full index and clip alphas */ + index[i] = 1; + alpha[i] = fabs(alpha[i]); + if (alpha[i] < 0) + alpha[i] = 0; + if (alpha[i] > learn_parm->svm_cost[i]) + alpha[i] = learn_parm->svm_cost[i]; } - for(i=0;ilearn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i]; + if (kernel_cache && (kernel_parm->kernel_type != LINEAR)) { + for (i = 0; i < totdoc; i++) /* fill kernel cache with unbounded SV */ + if ((alpha[i] > 0) && (alpha[i] < learn_parm->svm_cost[i]) && + (kernel_cache_space_available(kernel_cache))) + cache_kernel_row(kernel_cache, docs, i, kernel_parm); + for (i = 0; i < totdoc; + i++) /* fill rest of kernel cache with bounded SV */ + if ((alpha[i] == learn_parm->svm_cost[i]) && + (kernel_cache_space_available(kernel_cache))) + cache_kernel_row(kernel_cache, docs, i, kernel_parm); } - if(kernel_cache && (kernel_parm->kernel_type != LINEAR)) { - for(i=0;i0) && (alpha[i]svm_cost[i]) - && (kernel_cache_space_available(kernel_cache))) - cache_kernel_row(kernel_cache,docs,i,kernel_parm); - for(i=0;isvm_cost[i]) - && (kernel_cache_space_available(kernel_cache))) - cache_kernel_row(kernel_cache,docs,i,kernel_parm); - } - (void)compute_index(index,totdoc,index2dnum); - update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc, - totwords,kernel_parm,kernel_cache,lin,aicache, - weights); - (void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c, - learn_parm,index2dnum,index2dnum,model); - for(i=0;i=1) { - printf("done.\n"); fflush(stdout); - } - } - + if (weights) + free(weights); + if (aicache) + free(aicache); + if (verbosity >= 1) { + printf("done.\n"); + fflush(stdout); + } + } + /* removing inconsistent does not work for general optimization problem */ - if(learn_parm->remove_inconsistent) { + if (learn_parm->remove_inconsistent) { learn_parm->remove_inconsistent = 0; - printf("'remove inconsistent' not available in this mode. Switching option off!"); fflush(stdout); + printf("'remove inconsistent' not available in this mode. Switching option " + "off!"); + fflush(stdout); } /* caching makes no sense for linear kernel */ - if(kernel_parm->kernel_type == LINEAR) { + if (kernel_parm->kernel_type == LINEAR) { /* kernel_cache = NULL; */ - } - - if(verbosity==1) { - printf("Optimizing"); fflush(stdout); + } + + if (verbosity == 1) { + printf("Optimizing"); + fflush(stdout); } /* train the svm */ - if(learn_parm->sharedslack) - iterations=optimize_to_convergence_sharedslack(docs,label,totdoc, - totwords,learn_parm,kernel_parm, - kernel_cache,&shrink_state,model, - a,lin,c,&timing_profile, - &maxdiff); + if (learn_parm->sharedslack) + iterations = optimize_to_convergence_sharedslack( + docs, label, totdoc, totwords, learn_parm, kernel_parm, kernel_cache, + &shrink_state, model, a, lin, c, &timing_profile, &maxdiff); else - iterations=optimize_to_convergence(docs,label,totdoc, - totwords,learn_parm,kernel_parm, - kernel_cache,&shrink_state,model, - inconsistent,unlabeled, - a,lin,c,&timing_profile, - &maxdiff,(long)-1,(long)1); - - if(verbosity>=1) { - if(verbosity==1) printf("done. (%ld iterations)\n",iterations); + iterations = optimize_to_convergence( + docs, label, totdoc, totwords, learn_parm, kernel_parm, kernel_cache, + &shrink_state, model, inconsistent, unlabeled, a, lin, c, + &timing_profile, &maxdiff, (long)-1, (long)1); + + if (verbosity >= 1) { + if (verbosity == 1) + printf("done. (%ld iterations)\n", iterations); - misclassified=0; - for(i=0;(ib)*(double)label[i] <= 0.0) - misclassified++; + misclassified = 0; + for (i = 0; (i < totdoc); i++) { /* get final statistic */ + if ((lin[i] - model->b) * (double)label[i] <= 0.0) + misclassified++; } - printf("Optimization finished (maxdiff=%.5f).\n",maxdiff); - - runtime_end=get_runtime(); - if(verbosity>=2) { - printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n", - (runtime_end-runtime_start)/100.0, - (100.0*timing_profile.time_kernel)/(runtime_end-runtime_start), - (100.0*timing_profile.time_opti)/(runtime_end-runtime_start), - (100.0*timing_profile.time_shrink)/(runtime_end-runtime_start), - (100.0*timing_profile.time_update)/(runtime_end-runtime_start), - (100.0*timing_profile.time_model)/(runtime_end-runtime_start), - (100.0*timing_profile.time_check)/(runtime_end-runtime_start), - (100.0*timing_profile.time_select)/(runtime_end-runtime_start)); - } - else { - printf("Runtime in cpu-seconds: %.2f\n", - (runtime_end-runtime_start)/100.0); + printf("Optimization finished (maxdiff=%.5f).\n", maxdiff); + + runtime_end = get_runtime(); + if (verbosity >= 2) { + printf( + "Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for " + "optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for " + "model/%.2f%% for check/%.2f%% for select)\n", + (runtime_end - runtime_start) / 100.0, + (100.0 * timing_profile.time_kernel) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_opti) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_shrink) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_update) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_model) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_check) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_select) / (runtime_end - runtime_start)); + } else { + printf("Runtime in cpu-seconds: %.2f\n", + (runtime_end - runtime_start) / 100.0); } } - if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { - loss=0; - model_length=0; - alphasum=0; - for(i=0;ib)*(double)label[i] < c[i]-learn_parm->epsilon_crit) - loss+=c[i]-(lin[i]-model->b)*(double)label[i]; - model_length+=a[i]*label[i]*lin[i]; - alphasum+=rhs[i]*a[i]; + if ((verbosity >= 1) && (!learn_parm->skip_final_opt_check)) { + loss = 0; + model_length = 0; + alphasum = 0; + for (i = 0; i < totdoc; i++) { + if ((lin[i] - model->b) * (double)label[i] < + c[i] - learn_parm->epsilon_crit) + loss += c[i] - (lin[i] - model->b) * (double)label[i]; + model_length += a[i] * label[i] * lin[i]; + alphasum += rhs[i] * a[i]; } - model_length=sqrt(model_length); - fprintf(stdout,"Dual objective value: dval=%.5f\n", - alphasum-0.5*model_length*model_length); - fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length); - } - - if(learn_parm->sharedslack) { - index = (long *)my_malloc(sizeof(long)*totdoc); - index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); - maxslackid=0; - for(i=0;islackid) - maxslackid=docs[i]->slackid; + model_length = sqrt(model_length); + fprintf(stdout, "Dual objective value: dval=%.5f\n", + alphasum - 0.5 * model_length * model_length); + fprintf(stdout, "Norm of weight vector: |w|=%.5f\n", model_length); + } + + if (learn_parm->sharedslack) { + index = (long *)my_malloc(sizeof(long) * totdoc); + index2dnum = (long *)my_malloc(sizeof(long) * (totdoc + 11)); + maxslackid = 0; + for (i = 0; i < totdoc; i++) { /* create full index */ + index[i] = 1; + if (maxslackid < docs[i]->slackid) + maxslackid = docs[i]->slackid; } - (void)compute_index(index,totdoc,index2dnum); - slack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); - alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); - for(i=0;i<=maxslackid;i++) { /* init shared slacks */ - slack[i]=0; - alphaslack[i]=0; + (void)compute_index(index, totdoc, index2dnum); + slack = (double *)my_malloc(sizeof(double) * (maxslackid + 1)); + alphaslack = (double *)my_malloc(sizeof(double) * (maxslackid + 1)); + for (i = 0; i <= maxslackid; i++) { /* init shared slacks */ + slack[i] = 0; + alphaslack[i] = 0; } - for(i=0;islackid]+=a[i]; + for (i = 0; i < totdoc; i++) { /* compute alpha aggregated by slack */ + alphaslack[docs[i]->slackid] += a[i]; } - compute_shared_slacks(docs,label,a,lin,c,index2dnum,learn_parm, - slack,alphaslack); - loss=0; - model->at_upper_bound=0; - svsetnum=0; - for(i=0;i<=maxslackid;i++) { /* create full index */ - loss+=slack[i]; - if(alphaslack[i] > (learn_parm->svm_c - learn_parm->epsilon_a)) - model->at_upper_bound++; - if(alphaslack[i] > learn_parm->epsilon_a) - svsetnum++; + compute_shared_slacks(docs, label, a, lin, c, index2dnum, learn_parm, slack, + alphaslack); + loss = 0; + model->at_upper_bound = 0; + svsetnum = 0; + for (i = 0; i <= maxslackid; i++) { /* create full index */ + loss += slack[i]; + if (alphaslack[i] > (learn_parm->svm_c - learn_parm->epsilon_a)) + model->at_upper_bound++; + if (alphaslack[i] > learn_parm->epsilon_a) + svsetnum++; } free(index); free(index2dnum); free(slack); free(alphaslack); - } - - if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { - if(learn_parm->sharedslack) { - printf("Number of SV: %ld\n", - model->sv_num-1); - printf("Number of non-zero slack variables: %ld (%ld slacks have non-zero alpha)\n", - model->at_upper_bound,svsetnum); - fprintf(stdout,"L1 loss: loss=%.5f\n",loss); - } - else { - upsupvecnum=0; - for(i=1;isv_num;i++) { - if(fabs(model->alpha[i]) >= - (learn_parm->svm_cost[(model->supvec[i])->docnum]- - learn_parm->epsilon_a)) - upsupvecnum++; + } + + if ((verbosity >= 1) && (!learn_parm->skip_final_opt_check)) { + if (learn_parm->sharedslack) { + printf("Number of SV: %ld\n", model->sv_num - 1); + printf("Number of non-zero slack variables: %ld (%ld slacks have " + "non-zero alpha)\n", + model->at_upper_bound, svsetnum); + fprintf(stdout, "L1 loss: loss=%.5f\n", loss); + } else { + upsupvecnum = 0; + for (i = 1; i < model->sv_num; i++) { + if (fabs(model->alpha[i]) >= + (learn_parm->svm_cost[(model->supvec[i])->docnum] - + learn_parm->epsilon_a)) + upsupvecnum++; } - printf("Number of SV: %ld (including %ld at upper bound)\n", - model->sv_num-1,upsupvecnum); - fprintf(stdout,"L1 loss: loss=%.5f\n",loss); + printf("Number of SV: %ld (including %ld at upper bound)\n", + model->sv_num - 1, upsupvecnum); + fprintf(stdout, "L1 loss: loss=%.5f\n", loss); } - example_length=estimate_sphere(model); - fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n", - length_of_longest_document_vector(docs,totdoc,kernel_parm)); + example_length = estimate_sphere(model); + fprintf(stdout, "Norm of longest example vector: |x|=%.5f\n", + length_of_longest_document_vector(docs, totdoc, kernel_parm)); } - if(verbosity>=1) { - printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic); - } - - if(alpha) { - for(i=0;i= 1) { + printf("Number of kernel evaluations: %ld\n", kernel_cache_statistic); + } + + if (alpha) { + for (i = 0; i < totdoc; i++) { /* copy final alphas */ + alpha[i] = a[i]; } - } - - if(learn_parm->alphafile[0]) - write_alphas(learn_parm->alphafile,a,label,totdoc); - + } + + if (learn_parm->alphafile[0]) + write_alphas(learn_parm->alphafile, a, label, totdoc); + shrink_state_cleanup(&shrink_state); free(label); free(unlabeled); @@ -1206,428 +1246,431 @@ void svm_learn_optimization(DOC **docs, double *rhs, long int free(a); free(lin); free(learn_parm->svm_cost); -} - - -long optimize_to_convergence(DOC **docs, long int *label, long int totdoc, - long int totwords, LEARN_PARM *learn_parm, - KERNEL_PARM *kernel_parm, - KERNEL_CACHE *kernel_cache, - SHRINK_STATE *shrink_state, MODEL *model, - long int *inconsistent, long int *unlabeled, - double *a, double *lin, double *c, - TIMING *timing_profile, double *maxdiff, - long int heldout, long int retrain) - /* docs: Training vectors (x-part) */ - /* label: Training labels/value (y-part, zero if test example for - transduction) */ - /* totdoc: Number of examples in docs/label */ - /* totwords: Number of features (i.e. highest feature index) */ - /* laern_parm: Learning paramenters */ - /* kernel_parm: Kernel paramenters */ - /* kernel_cache: Initialized/partly filled Cache, if using a kernel. - NULL if linear. */ - /* shrink_state: State of active variables */ - /* model: Returns learning result */ - /* inconsistent: examples thrown out as inconstistent */ - /* unlabeled: test examples for transduction */ - /* a: alphas */ - /* lin: linear component of gradient */ - /* c: right hand side of inequalities (margin) */ - /* maxdiff: returns maximum violation of KT-conditions */ - /* heldout: marks held-out example for leave-one-out (or -1) */ - /* retrain: selects training mode (1=regular / 2=holdout) */ +} + +long optimize_to_convergence(DOC **docs, long int *label, long int totdoc, + long int totwords, LEARN_PARM *learn_parm, + KERNEL_PARM *kernel_parm, + KERNEL_CACHE *kernel_cache, + SHRINK_STATE *shrink_state, MODEL *model, + long int *inconsistent, long int *unlabeled, + double *a, double *lin, double *c, + TIMING *timing_profile, double *maxdiff, + long int heldout, long int retrain) +/* docs: Training vectors (x-part) */ +/* label: Training labels/value (y-part, zero if test example for + transduction) */ +/* totdoc: Number of examples in docs/label */ +/* totwords: Number of features (i.e. highest feature index) */ +/* laern_parm: Learning paramenters */ +/* kernel_parm: Kernel paramenters */ +/* kernel_cache: Initialized/partly filled Cache, if using a kernel. + NULL if linear. */ +/* shrink_state: State of active variables */ +/* model: Returns learning result */ +/* inconsistent: examples thrown out as inconstistent */ +/* unlabeled: test examples for transduction */ +/* a: alphas */ +/* lin: linear component of gradient */ +/* c: right hand side of inequalities (margin) */ +/* maxdiff: returns maximum violation of KT-conditions */ +/* heldout: marks held-out example for leave-one-out (or -1) */ +/* retrain: selects training mode (1=regular / 2=holdout) */ { - long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink; - long inconsistentnum,choosenum,already_chosen=0,iteration; - long misclassified,supvecnum=0,*active2dnum,inactivenum; - long *working2dnum,*selexam; + long *chosen, *key, i, j, jj, *last_suboptimal_at, noshrink; + long inconsistentnum, choosenum, already_chosen = 0, iteration; + long misclassified, supvecnum = 0, *active2dnum, inactivenum; + long *working2dnum, *selexam; long activenum; - double criterion,eq; + double criterion, eq; double *a_old; - double t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */ + double t0 = 0, t1 = 0, t2 = 0, t3 = 0, t4 = 0, t5 = 0, t6 = 0; /* timing */ long transductcycle; long transduction; - double epsilon_crit_org; + double epsilon_crit_org; double bestmaxdiff; - long bestmaxdiffiter,terminate; + long bestmaxdiffiter, terminate; - double *selcrit; /* buffer for sorting */ - CFLOAT *aicache; /* buffer to keep one row of hessian */ - double *weights; /* buffer for weight vector in linear case */ - QP qp; /* buffer for one quadratic program */ + double *selcrit; /* buffer for sorting */ + CFLOAT *aicache; /* buffer to keep one row of hessian */ + double *weights; /* buffer for weight vector in linear case */ + QP qp; /* buffer for one quadratic program */ - epsilon_crit_org=learn_parm->epsilon_crit; /* save org */ - if(kernel_parm->kernel_type == LINEAR) { - learn_parm->epsilon_crit=2.0; - /* kernel_cache=NULL; */ /* caching makes no sense for linear kernel */ - } - learn_parm->epsilon_shrink=2; - (*maxdiff)=1; + epsilon_crit_org = learn_parm->epsilon_crit; /* save org */ + if (kernel_parm->kernel_type == LINEAR) { + learn_parm->epsilon_crit = 2.0; + /* kernel_cache=NULL; */ /* caching makes no sense for linear kernel */ + } + learn_parm->epsilon_shrink = 2; + (*maxdiff) = 1; - learn_parm->totwords=totwords; + learn_parm->totwords = totwords; - chosen = (long *)my_malloc(sizeof(long)*totdoc); - last_suboptimal_at = (long *)my_malloc(sizeof(long)*totdoc); - key = (long *)my_malloc(sizeof(long)*(totdoc+11)); - selcrit = (double *)my_malloc(sizeof(double)*totdoc); - selexam = (long *)my_malloc(sizeof(long)*totdoc); - a_old = (double *)my_malloc(sizeof(double)*totdoc); - aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); - working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); - active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); - qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + chosen = (long *)my_malloc(sizeof(long) * totdoc); + last_suboptimal_at = (long *)my_malloc(sizeof(long) * totdoc); + key = (long *)my_malloc(sizeof(long) * (totdoc + 11)); + selcrit = (double *)my_malloc(sizeof(double) * totdoc); + selexam = (long *)my_malloc(sizeof(long) * totdoc); + a_old = (double *)my_malloc(sizeof(double) * totdoc); + aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT) * totdoc); + working2dnum = (long *)my_malloc(sizeof(long) * (totdoc + 11)); + active2dnum = (long *)my_malloc(sizeof(long) * (totdoc + 11)); + qp.opt_ce = (double *)my_malloc(sizeof(double) * learn_parm->svm_maxqpsize); qp.opt_ce0 = (double *)my_malloc(sizeof(double)); - qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize - *learn_parm->svm_maxqpsize); - qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); - qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); - qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); - qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); - if(kernel_parm->kernel_type == LINEAR) { - weights=create_nvector(totwords); - clear_nvector(weights,totwords); /* set weights to zero */ - } - else - weights=NULL; + qp.opt_g = (double *)my_malloc(sizeof(double) * learn_parm->svm_maxqpsize * + learn_parm->svm_maxqpsize); + qp.opt_g0 = (double *)my_malloc(sizeof(double) * learn_parm->svm_maxqpsize); + qp.opt_xinit = + (double *)my_malloc(sizeof(double) * learn_parm->svm_maxqpsize); + qp.opt_low = (double *)my_malloc(sizeof(double) * learn_parm->svm_maxqpsize); + qp.opt_up = (double *)my_malloc(sizeof(double) * learn_parm->svm_maxqpsize); + if (kernel_parm->kernel_type == LINEAR) { + weights = create_nvector(totwords); + clear_nvector(weights, totwords); /* set weights to zero */ + } else + weights = NULL; - choosenum=0; - inconsistentnum=0; - transductcycle=0; - transduction=0; - if(!retrain) retrain=1; - iteration=1; - bestmaxdiffiter=1; - bestmaxdiff=999999999; - terminate=0; + choosenum = 0; + inconsistentnum = 0; + transductcycle = 0; + transduction = 0; + if (!retrain) + retrain = 1; + iteration = 1; + bestmaxdiffiter = 1; + bestmaxdiff = 999999999; + terminate = 0; - if(kernel_cache) { - kernel_cache->time=iteration; /* for lru cache */ + if (kernel_cache) { + kernel_cache->time = iteration; /* for lru cache */ kernel_cache_reset_lru(kernel_cache); } - for(i=0;iactive,totdoc,active2dnum); - inactivenum=totdoc-activenum; - clear_index(working2dnum); + activenum = compute_index(shrink_state->active, totdoc, active2dnum); + inactivenum = totdoc - activenum; + clear_index(working2dnum); + + /* repeat this loop until we have convergence */ + for (; retrain && (!terminate); iteration++) { - /* repeat this loop until we have convergence */ - for(;retrain && (!terminate);iteration++) { - - if(kernel_cache) - kernel_cache->time=iteration; /* for lru cache */ - if(verbosity>=2) { - printf( - "Iteration %ld: ",iteration); fflush(stdout); - } - else if(verbosity==1) { - printf("."); fflush(stdout); + if (kernel_cache) + kernel_cache->time = iteration; /* for lru cache */ + if (verbosity >= 2) { + printf("Iteration %ld: ", iteration); + fflush(stdout); + } else if (verbosity == 1) { + printf("."); + fflush(stdout); } - if(verbosity>=2) t0=get_runtime(); - if(verbosity>=3) { - printf("\nSelecting working set... "); fflush(stdout); + if (verbosity >= 2) + t0 = get_runtime(); + if (verbosity >= 3) { + printf("\nSelecting working set... "); + fflush(stdout); } - if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize) - learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; + if (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize) + learn_parm->svm_newvarsinqp = learn_parm->svm_maxqpsize; - i=0; - for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */ - if((chosen[j]>=(learn_parm->svm_maxqpsize/ - minl(learn_parm->svm_maxqpsize, - learn_parm->svm_newvarsinqp))) - || (inconsistent[j]) - || (j == heldout)) { - chosen[j]=0; - choosenum--; - } - else { - chosen[j]++; - working2dnum[i++]=j; + i = 0; + for (jj = 0; (j = working2dnum[jj]) >= 0; jj++) { /* clear working set */ + if ((chosen[j] >= + (learn_parm->svm_maxqpsize / + minl(learn_parm->svm_maxqpsize, learn_parm->svm_newvarsinqp))) || + (inconsistent[j]) || (j == heldout)) { + chosen[j] = 0; + choosenum--; + } else { + chosen[j]++; + working2dnum[i++] = j; } } - working2dnum[i]=-1; + working2dnum[i] = -1; - if(retrain == 2) { - choosenum=0; - for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* fully clear working set */ - chosen[j]=0; + if (retrain == 2) { + choosenum = 0; + for (jj = 0; (j = working2dnum[jj]) >= 0; + jj++) { /* fully clear working set */ + chosen[j] = 0; } clear_index(working2dnum); - for(i=0;ibiased_hyperplane) { - eq=0; - for(i=0;i learn_parm->epsilon_a);i++) { - if((eq*label[i] > 0) && (a[i] > 0)) { - chosen[i]=88888; - choosenum++; - if((eq*label[i]) > a[i]) { - eq-=(a[i]*label[i]); - a[i]=0; - } - else { - a[i]-=(eq*label[i]); - eq=0; - } - } - } + if (learn_parm->biased_hyperplane) { + eq = 0; + for (i = 0; i < totdoc; + i++) { /* make sure we fulfill equality constraint */ + eq += a[i] * label[i]; + } + for (i = 0; (i < totdoc) && (fabs(eq) > learn_parm->epsilon_a); i++) { + if ((eq * label[i] > 0) && (a[i] > 0)) { + chosen[i] = 88888; + choosenum++; + if ((eq * label[i]) > a[i]) { + eq -= (a[i] * label[i]); + a[i] = 0; + } else { + a[i] -= (eq * label[i]); + eq = 0; + } + } + } } - compute_index(chosen,totdoc,working2dnum); - } - else { /* select working set according to steepest gradient */ - if(iteration % 101) { - already_chosen=0; - if((minl(learn_parm->svm_newvarsinqp, - learn_parm->svm_maxqpsize-choosenum)>=4) - && (kernel_parm->kernel_type != LINEAR)) { - /* select part of the working set from cache */ - already_chosen=select_next_qp_subproblem_grad( - label,unlabeled,a,lin,c,totdoc, - (long)(minl(learn_parm->svm_maxqpsize-choosenum, - learn_parm->svm_newvarsinqp) - /2), - learn_parm,inconsistent,active2dnum, - working2dnum,selcrit,selexam,kernel_cache,1, - key,chosen); - choosenum+=already_chosen; - } - choosenum+=select_next_qp_subproblem_grad( - label,unlabeled,a,lin,c,totdoc, - minl(learn_parm->svm_maxqpsize-choosenum, - learn_parm->svm_newvarsinqp-already_chosen), - learn_parm,inconsistent,active2dnum, - working2dnum,selcrit,selexam,kernel_cache,0,key, - chosen); - } - else { /* once in a while, select a somewhat random working set - to get unlocked of infinite loops due to numerical - inaccuracies in the core qp-solver */ - choosenum+=select_next_qp_subproblem_rand( - label,unlabeled,a,lin,c,totdoc, - minl(learn_parm->svm_maxqpsize-choosenum, - learn_parm->svm_newvarsinqp), - learn_parm,inconsistent,active2dnum, - working2dnum,selcrit,selexam,kernel_cache,key, - chosen,iteration); + compute_index(chosen, totdoc, working2dnum); + } else { /* select working set according to steepest gradient */ + if (iteration % 101) { + already_chosen = 0; + if ((minl(learn_parm->svm_newvarsinqp, + learn_parm->svm_maxqpsize - choosenum) >= 4) && + (kernel_parm->kernel_type != LINEAR)) { + /* select part of the working set from cache */ + already_chosen = select_next_qp_subproblem_grad( + label, unlabeled, a, lin, c, totdoc, + (long)(minl(learn_parm->svm_maxqpsize - choosenum, + learn_parm->svm_newvarsinqp) / + 2), + learn_parm, inconsistent, active2dnum, working2dnum, selcrit, + selexam, kernel_cache, 1, key, chosen); + choosenum += already_chosen; + } + choosenum += select_next_qp_subproblem_grad( + label, unlabeled, a, lin, c, totdoc, + minl(learn_parm->svm_maxqpsize - choosenum, + learn_parm->svm_newvarsinqp - already_chosen), + learn_parm, inconsistent, active2dnum, working2dnum, selcrit, + selexam, kernel_cache, 0, key, chosen); + } else { /* once in a while, select a somewhat random working set + to get unlocked of infinite loops due to numerical + inaccuracies in the core qp-solver */ + choosenum += select_next_qp_subproblem_rand( + label, unlabeled, a, lin, c, totdoc, + minl(learn_parm->svm_maxqpsize - choosenum, + learn_parm->svm_newvarsinqp), + learn_parm, inconsistent, active2dnum, working2dnum, selcrit, + selexam, kernel_cache, key, chosen, iteration); } } - if(verbosity>=2) { - printf(" %ld vectors chosen\n",choosenum); fflush(stdout); + if (verbosity >= 2) { + printf(" %ld vectors chosen\n", choosenum); + fflush(stdout); } - if(verbosity>=2) t1=get_runtime(); + if (verbosity >= 2) + t1 = get_runtime(); - if(kernel_cache) - cache_multiple_kernel_rows(kernel_cache,docs,working2dnum, - choosenum,kernel_parm); - - if(verbosity>=2) t2=get_runtime(); - if(retrain != 2) { - optimize_svm(docs,label,unlabeled,inconsistent,0.0,chosen,active2dnum, - model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm, - aicache,kernel_parm,&qp,&epsilon_crit_org); + if (kernel_cache) + cache_multiple_kernel_rows(kernel_cache, docs, working2dnum, choosenum, + kernel_parm); + + if (verbosity >= 2) + t2 = get_runtime(); + if (retrain != 2) { + optimize_svm(docs, label, unlabeled, inconsistent, 0.0, chosen, + active2dnum, model, totdoc, working2dnum, choosenum, a, lin, + c, learn_parm, aicache, kernel_parm, &qp, &epsilon_crit_org); } - if(verbosity>=2) t3=get_runtime(); - update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc, - totwords,kernel_parm,kernel_cache,lin,aicache, - weights); + if (verbosity >= 2) + t3 = get_runtime(); + update_linear_component(docs, label, active2dnum, a, a_old, working2dnum, + totdoc, totwords, kernel_parm, kernel_cache, lin, + aicache, weights); - if(verbosity>=2) t4=get_runtime(); - supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c, - learn_parm,working2dnum,active2dnum,model); + if (verbosity >= 2) + t4 = get_runtime(); + supvecnum = + calculate_svm_model(docs, label, unlabeled, lin, a, a_old, c, + learn_parm, working2dnum, active2dnum, model); - if(verbosity>=2) t5=get_runtime(); + if (verbosity >= 2) + t5 = get_runtime(); /* The following computation of the objective function works only */ /* relative to the active variables */ - if(verbosity>=3) { - criterion=compute_objective_function(a,lin,c,learn_parm->eps,label, - active2dnum); - printf("Objective function (over active variables): %.16f\n",criterion); - fflush(stdout); + if (verbosity >= 3) { + criterion = compute_objective_function(a, lin, c, learn_parm->eps, label, + active2dnum); + printf("Objective function (over active variables): %.16f\n", criterion); + fflush(stdout); } - for(jj=0;(i=working2dnum[jj])>=0;jj++) { - a_old[i]=a[i]; + for (jj = 0; (i = working2dnum[jj]) >= 0; jj++) { + a_old[i] = a[i]; } - if(retrain == 2) { /* reset inconsistent unlabeled examples */ - for(i=0;(i=2) { - t6=get_runtime(); - timing_profile->time_select+=t1-t0; - timing_profile->time_kernel+=t2-t1; - timing_profile->time_opti+=t3-t2; - timing_profile->time_update+=t4-t3; - timing_profile->time_model+=t5-t4; - timing_profile->time_check+=t6-t5; + if (verbosity >= 2) { + t6 = get_runtime(); + timing_profile->time_select += t1 - t0; + timing_profile->time_kernel += t2 - t1; + timing_profile->time_opti += t3 - t2; + timing_profile->time_update += t4 - t3; + timing_profile->time_model += t5 - t4; + timing_profile->time_check += t6 - t5; } /* checking whether optimizer got stuck */ - if((*maxdiff) < bestmaxdiff) { - bestmaxdiff=(*maxdiff); - bestmaxdiffiter=iteration; + if ((*maxdiff) < bestmaxdiff) { + bestmaxdiff = (*maxdiff); + bestmaxdiffiter = iteration; } - if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) { + if (iteration > (bestmaxdiffiter + learn_parm->maxiter)) { /* long time no progress? */ - terminate=1; - retrain=0; - if(verbosity>=1) - printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n"); + terminate = 1; + retrain = 0; + if (verbosity >= 1) + printf("\nWARNING: Relaxing KT-Conditions due to slow progress! " + "Terminating!\n"); } - noshrink=0; - if((!retrain) && (inactivenum>0) - && ((!learn_parm->skip_final_opt_check) - || (kernel_parm->kernel_type == LINEAR))) { - if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) - || (verbosity>=2)) { - if(verbosity==1) { - printf("\n"); - } - printf(" Checking optimality of inactive variables..."); - fflush(stdout); + noshrink = 0; + if ((!retrain) && (inactivenum > 0) && + ((!learn_parm->skip_final_opt_check) || + (kernel_parm->kernel_type == LINEAR))) { + if (((verbosity >= 1) && (kernel_parm->kernel_type != LINEAR)) || + (verbosity >= 2)) { + if (verbosity == 1) { + printf("\n"); + } + printf(" Checking optimality of inactive variables..."); + fflush(stdout); } - t1=get_runtime(); - reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc, - totwords,iteration,learn_parm,inconsistent, - docs,kernel_parm,kernel_cache,model,aicache, - weights,maxdiff); + t1 = get_runtime(); + reactivate_inactive_examples( + label, unlabeled, a, shrink_state, lin, c, totdoc, totwords, + iteration, learn_parm, inconsistent, docs, kernel_parm, kernel_cache, + model, aicache, weights, maxdiff); /* Update to new active variables. */ - activenum=compute_index(shrink_state->active,totdoc,active2dnum); - inactivenum=totdoc-activenum; + activenum = compute_index(shrink_state->active, totdoc, active2dnum); + inactivenum = totdoc - activenum; /* reset watchdog */ - bestmaxdiff=(*maxdiff); - bestmaxdiffiter=iteration; + bestmaxdiff = (*maxdiff); + bestmaxdiffiter = iteration; /* termination criterion */ - noshrink=1; - retrain=0; - if((*maxdiff) > learn_parm->epsilon_crit) - retrain=1; - timing_profile->time_shrink+=get_runtime()-t1; - if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) - || (verbosity>=2)) { - printf("done.\n"); fflush(stdout); - printf(" Number of inactive variables = %ld\n",inactivenum); - } + noshrink = 1; + retrain = 0; + if ((*maxdiff) > learn_parm->epsilon_crit) + retrain = 1; + timing_profile->time_shrink += get_runtime() - t1; + if (((verbosity >= 1) && (kernel_parm->kernel_type != LINEAR)) || + (verbosity >= 2)) { + printf("done.\n"); + fflush(stdout); + printf(" Number of inactive variables = %ld\n", inactivenum); + } } - if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff))) - learn_parm->epsilon_crit=(*maxdiff); - if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) { - learn_parm->epsilon_crit/=2.0; - retrain=1; - noshrink=1; + if ((!retrain) && (learn_parm->epsilon_crit > (*maxdiff))) + learn_parm->epsilon_crit = (*maxdiff); + if ((!retrain) && (learn_parm->epsilon_crit > epsilon_crit_org)) { + learn_parm->epsilon_crit /= 2.0; + retrain = 1; + noshrink = 1; } - if(learn_parm->epsilon_critepsilon_crit=epsilon_crit_org; - - if(verbosity>=2) { - printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n", - supvecnum,model->at_upper_bound,(*maxdiff)); + if (learn_parm->epsilon_crit < epsilon_crit_org) + learn_parm->epsilon_crit = epsilon_crit_org; + + if (verbosity >= 2) { + printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n", + supvecnum, model->at_upper_bound, (*maxdiff)); fflush(stdout); } - if(verbosity>=3) { + if (verbosity >= 3) { printf("\n"); } - if((!retrain) && (transduction)) { - for(i=0;(iactive[i]=1; + if ((!retrain) && (transduction)) { + for (i = 0; (i < totdoc); i++) { + shrink_state->active[i] = 1; } - activenum=compute_index(shrink_state->active,totdoc,active2dnum); - inactivenum=0; - if(verbosity==1) printf("done\n"); - retrain=incorporate_unlabeled_examples(model,label,inconsistent, - unlabeled,a,lin,totdoc, - selcrit,selexam,key, - transductcycle,kernel_parm, - learn_parm); - epsilon_crit_org=learn_parm->epsilon_crit; - if(kernel_parm->kernel_type == LINEAR) - learn_parm->epsilon_crit=1; + activenum = compute_index(shrink_state->active, totdoc, active2dnum); + inactivenum = 0; + if (verbosity == 1) + printf("done\n"); + retrain = incorporate_unlabeled_examples( + model, label, inconsistent, unlabeled, a, lin, totdoc, selcrit, + selexam, key, transductcycle, kernel_parm, learn_parm); + epsilon_crit_org = learn_parm->epsilon_crit; + if (kernel_parm->kernel_type == LINEAR) + learn_parm->epsilon_crit = 1; transductcycle++; /* reset watchdog */ - bestmaxdiff=(*maxdiff); - bestmaxdiffiter=iteration; - } - else if(((iteration % 10) == 0) && (!noshrink)) { - activenum=shrink_problem(docs,learn_parm,shrink_state,kernel_parm, - active2dnum,last_suboptimal_at,iteration,totdoc, - maxl((long)(activenum/10), - maxl((long)(totdoc/500),100)), - a,inconsistent); - inactivenum=totdoc-activenum; - if((kernel_cache) - && (supvecnum>kernel_cache->max_elems) - && ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) { - kernel_cache_shrink(kernel_cache,totdoc, - minl((kernel_cache->activenum-activenum), - (kernel_cache->activenum-supvecnum)), - shrink_state->active); + bestmaxdiff = (*maxdiff); + bestmaxdiffiter = iteration; + } else if (((iteration % 10) == 0) && (!noshrink)) { + activenum = shrink_problem( + docs, learn_parm, shrink_state, kernel_parm, active2dnum, + last_suboptimal_at, iteration, totdoc, + maxl((long)(activenum / 10), maxl((long)(totdoc / 500), 100)), a, + inconsistent); + inactivenum = totdoc - activenum; + if ((kernel_cache) && (supvecnum > kernel_cache->max_elems) && + ((kernel_cache->activenum - activenum) > + maxl((long)(activenum / 10), 500))) { + kernel_cache_shrink(kernel_cache, totdoc, + minl((kernel_cache->activenum - activenum), + (kernel_cache->activenum - supvecnum)), + shrink_state->active); } } - if((!retrain) && learn_parm->remove_inconsistent) { - if(verbosity>=1) { - printf(" Moving training errors to inconsistent examples..."); - fflush(stdout); + if ((!retrain) && learn_parm->remove_inconsistent) { + if (verbosity >= 1) { + printf(" Moving training errors to inconsistent examples..."); + fflush(stdout); } - if(learn_parm->remove_inconsistent == 1) { - retrain=identify_inconsistent(a,label,unlabeled,totdoc,learn_parm, - &inconsistentnum,inconsistent); + if (learn_parm->remove_inconsistent == 1) { + retrain = identify_inconsistent(a, label, unlabeled, totdoc, learn_parm, + &inconsistentnum, inconsistent); + } else if (learn_parm->remove_inconsistent == 2) { + retrain = identify_misclassified(lin, label, unlabeled, totdoc, model, + &inconsistentnum, inconsistent); + } else if (learn_parm->remove_inconsistent == 3) { + retrain = + identify_one_misclassified(lin, label, unlabeled, totdoc, model, + &inconsistentnum, inconsistent); } - else if(learn_parm->remove_inconsistent == 2) { - retrain=identify_misclassified(lin,label,unlabeled,totdoc, - model,&inconsistentnum,inconsistent); + if (retrain) { + if (kernel_parm->kernel_type == LINEAR) { /* reinit shrinking */ + learn_parm->epsilon_crit = 2.0; + } } - else if(learn_parm->remove_inconsistent == 3) { - retrain=identify_one_misclassified(lin,label,unlabeled,totdoc, - model,&inconsistentnum,inconsistent); - } - if(retrain) { - if(kernel_parm->kernel_type == LINEAR) { /* reinit shrinking */ - learn_parm->epsilon_crit=2.0; - } - } - if(verbosity>=1) { - printf("done.\n"); - if(retrain) { - printf(" Now %ld inconsistent examples.\n",inconsistentnum); - } + if (verbosity >= 1) { + printf("done.\n"); + if (retrain) { + printf(" Now %ld inconsistent examples.\n", inconsistentnum); + } } } } /* end of loop */ @@ -1648,430 +1691,433 @@ long optimize_to_convergence(DOC **docs, long int *label, long int totdoc, free(qp.opt_xinit); free(qp.opt_low); free(qp.opt_up); - if(weights) free(weights); + if (weights) + free(weights); - learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */ - model->maxdiff=(*maxdiff); + learn_parm->epsilon_crit = epsilon_crit_org; /* restore org */ + model->maxdiff = (*maxdiff); - return(iteration); + return (iteration); } -long optimize_to_convergence_sharedslack(DOC **docs, long int *label, - long int totdoc, - long int totwords, LEARN_PARM *learn_parm, - KERNEL_PARM *kernel_parm, - KERNEL_CACHE *kernel_cache, - SHRINK_STATE *shrink_state, MODEL *model, - double *a, double *lin, double *c, - TIMING *timing_profile, double *maxdiff) - /* docs: Training vectors (x-part) */ - /* label: Training labels/value (y-part, zero if test example for - transduction) */ - /* totdoc: Number of examples in docs/label */ - /* totwords: Number of features (i.e. highest feature index) */ - /* learn_parm: Learning paramenters */ - /* kernel_parm: Kernel paramenters */ - /* kernel_cache: Initialized/partly filled Cache, if using a kernel. - NULL if linear. */ - /* shrink_state: State of active variables */ - /* model: Returns learning result */ - /* a: alphas */ - /* lin: linear component of gradient */ - /* c: right hand side of inequalities (margin) */ - /* maxdiff: returns maximum violation of KT-conditions */ +long optimize_to_convergence_sharedslack( + DOC **docs, long int *label, long int totdoc, long int totwords, + LEARN_PARM *learn_parm, KERNEL_PARM *kernel_parm, + KERNEL_CACHE *kernel_cache, SHRINK_STATE *shrink_state, MODEL *model, + double *a, double *lin, double *c, TIMING *timing_profile, double *maxdiff) +/* docs: Training vectors (x-part) */ +/* label: Training labels/value (y-part, zero if test example for + transduction) */ +/* totdoc: Number of examples in docs/label */ +/* totwords: Number of features (i.e. highest feature index) */ +/* learn_parm: Learning paramenters */ +/* kernel_parm: Kernel paramenters */ +/* kernel_cache: Initialized/partly filled Cache, if using a kernel. + NULL if linear. */ +/* shrink_state: State of active variables */ +/* model: Returns learning result */ +/* a: alphas */ +/* lin: linear component of gradient */ +/* c: right hand side of inequalities (margin) */ +/* maxdiff: returns maximum violation of KT-conditions */ { - long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink,*unlabeled; - long *inconsistent,choosenum,already_chosen=0,iteration; - long misclassified,supvecnum=0,*active2dnum,inactivenum; - long *working2dnum,*selexam,*ignore; - long activenum,retrain,maxslackid,slackset,jointstep; - double criterion,eq_target; - double *a_old,*alphaslack; - double t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */ - double epsilon_crit_org,maxsharedviol; + long *chosen, *key, i, j, jj, *last_suboptimal_at, noshrink, *unlabeled; + long *inconsistent, choosenum, already_chosen = 0, iteration; + long misclassified, supvecnum = 0, *active2dnum, inactivenum; + long *working2dnum, *selexam, *ignore; + long activenum, retrain, maxslackid, slackset, jointstep; + double criterion, eq_target; + double *a_old, *alphaslack; + double t0 = 0, t1 = 0, t2 = 0, t3 = 0, t4 = 0, t5 = 0, t6 = 0; /* timing */ + double epsilon_crit_org, maxsharedviol; double bestmaxdiff; - long bestmaxdiffiter,terminate; + long bestmaxdiffiter, terminate; - double *selcrit; /* buffer for sorting */ - CFLOAT *aicache; /* buffer to keep one row of hessian */ - double *weights; /* buffer for weight vector in linear case */ - QP qp; /* buffer for one quadratic program */ - double *slack; /* vector of slack variables for optimization with - shared slacks */ + double *selcrit; /* buffer for sorting */ + CFLOAT *aicache; /* buffer to keep one row of hessian */ + double *weights; /* buffer for weight vector in linear case */ + QP qp; /* buffer for one quadratic program */ + double *slack; /* vector of slack variables for optimization with + shared slacks */ - epsilon_crit_org=learn_parm->epsilon_crit; /* save org */ - if(kernel_parm->kernel_type == LINEAR) { - learn_parm->epsilon_crit=2.0; - /* kernel_cache=NULL; */ /* caching makes no sense for linear kernel */ - } - learn_parm->epsilon_shrink=2; - (*maxdiff)=1; + epsilon_crit_org = learn_parm->epsilon_crit; /* save org */ + if (kernel_parm->kernel_type == LINEAR) { + learn_parm->epsilon_crit = 2.0; + /* kernel_cache=NULL; */ /* caching makes no sense for linear kernel */ + } + learn_parm->epsilon_shrink = 2; + (*maxdiff) = 1; - learn_parm->totwords=totwords; + learn_parm->totwords = totwords; - chosen = (long *)my_malloc(sizeof(long)*totdoc); - unlabeled = (long *)my_malloc(sizeof(long)*totdoc); - inconsistent = (long *)my_malloc(sizeof(long)*totdoc); - ignore = (long *)my_malloc(sizeof(long)*totdoc); - last_suboptimal_at = (long *)my_malloc(sizeof(long)*totdoc); - key = (long *)my_malloc(sizeof(long)*(totdoc+11)); - selcrit = (double *)my_malloc(sizeof(double)*totdoc); - selexam = (long *)my_malloc(sizeof(long)*totdoc); - a_old = (double *)my_malloc(sizeof(double)*totdoc); - aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); - working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); - active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); - qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + chosen = (long *)my_malloc(sizeof(long) * totdoc); + unlabeled = (long *)my_malloc(sizeof(long) * totdoc); + inconsistent = (long *)my_malloc(sizeof(long) * totdoc); + ignore = (long *)my_malloc(sizeof(long) * totdoc); + last_suboptimal_at = (long *)my_malloc(sizeof(long) * totdoc); + key = (long *)my_malloc(sizeof(long) * (totdoc + 11)); + selcrit = (double *)my_malloc(sizeof(double) * totdoc); + selexam = (long *)my_malloc(sizeof(long) * totdoc); + a_old = (double *)my_malloc(sizeof(double) * totdoc); + aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT) * totdoc); + working2dnum = (long *)my_malloc(sizeof(long) * (totdoc + 11)); + active2dnum = (long *)my_malloc(sizeof(long) * (totdoc + 11)); + qp.opt_ce = (double *)my_malloc(sizeof(double) * learn_parm->svm_maxqpsize); qp.opt_ce0 = (double *)my_malloc(sizeof(double)); - qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize - *learn_parm->svm_maxqpsize); - qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); - qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); - qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); - qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); - if(kernel_parm->kernel_type == LINEAR) { - weights=create_nvector(totwords); - clear_nvector(weights,totwords); /* set weights to zero */ + qp.opt_g = (double *)my_malloc(sizeof(double) * learn_parm->svm_maxqpsize * + learn_parm->svm_maxqpsize); + qp.opt_g0 = (double *)my_malloc(sizeof(double) * learn_parm->svm_maxqpsize); + qp.opt_xinit = + (double *)my_malloc(sizeof(double) * learn_parm->svm_maxqpsize); + qp.opt_low = (double *)my_malloc(sizeof(double) * learn_parm->svm_maxqpsize); + qp.opt_up = (double *)my_malloc(sizeof(double) * learn_parm->svm_maxqpsize); + if (kernel_parm->kernel_type == LINEAR) { + weights = create_nvector(totwords); + clear_nvector(weights, totwords); /* set weights to zero */ + } else + weights = NULL; + maxslackid = 0; + for (i = 0; i < totdoc; i++) { /* determine size of slack array */ + if (maxslackid < docs[i]->slackid) + maxslackid = docs[i]->slackid; } - else - weights=NULL; - maxslackid=0; - for(i=0;islackid) - maxslackid=docs[i]->slackid; - } - slack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); - alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); - for(i=0;i<=maxslackid;i++) { /* init shared slacks */ - slack[i]=0; - alphaslack[i]=0; + slack = (double *)my_malloc(sizeof(double) * (maxslackid + 1)); + alphaslack = (double *)my_malloc(sizeof(double) * (maxslackid + 1)); + for (i = 0; i <= maxslackid; i++) { /* init shared slacks */ + slack[i] = 0; + alphaslack[i] = 0; } - choosenum=0; - retrain=1; - iteration=1; - bestmaxdiffiter=1; - bestmaxdiff=999999999; - terminate=0; + choosenum = 0; + retrain = 1; + iteration = 1; + bestmaxdiffiter = 1; + bestmaxdiff = 999999999; + terminate = 0; - if(kernel_cache) { - kernel_cache->time=iteration; /* for lru cache */ + if (kernel_cache) { + kernel_cache->time = iteration; /* for lru cache */ kernel_cache_reset_lru(kernel_cache); } - for(i=0;islackid]+=a[i]; - a_old[i]=a[i]; - last_suboptimal_at[i]=1; + for (i = 0; i < totdoc; i++) { /* various inits */ + chosen[i] = 0; + unlabeled[i] = 0; + inconsistent[i] = 0; + ignore[i] = 0; + alphaslack[docs[i]->slackid] += a[i]; + a_old[i] = a[i]; + last_suboptimal_at[i] = 1; } - activenum=compute_index(shrink_state->active,totdoc,active2dnum); - inactivenum=totdoc-activenum; + activenum = compute_index(shrink_state->active, totdoc, active2dnum); + inactivenum = totdoc - activenum; clear_index(working2dnum); /* call to init slack and alphaslack */ - compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm, - slack,alphaslack); + compute_shared_slacks(docs, label, a, lin, c, active2dnum, learn_parm, slack, + alphaslack); + + /* repeat this loop until we have convergence */ + for (; retrain && (!terminate); iteration++) { - /* repeat this loop until we have convergence */ - for(;retrain && (!terminate);iteration++) { - - if(kernel_cache) - kernel_cache->time=iteration; /* for lru cache */ - if(verbosity>=2) { - printf( - "Iteration %ld: ",iteration); fflush(stdout); - } - else if(verbosity==1) { - printf("."); fflush(stdout); + if (kernel_cache) + kernel_cache->time = iteration; /* for lru cache */ + if (verbosity >= 2) { + printf("Iteration %ld: ", iteration); + fflush(stdout); + } else if (verbosity == 1) { + printf("."); + fflush(stdout); } - if(verbosity>=2) t0=get_runtime(); - if(verbosity>=3) { - printf("\nSelecting working set... "); fflush(stdout); + if (verbosity >= 2) + t0 = get_runtime(); + if (verbosity >= 3) { + printf("\nSelecting working set... "); + fflush(stdout); } - if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize) - learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; + if (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize) + learn_parm->svm_newvarsinqp = learn_parm->svm_maxqpsize; /* select working set according to steepest gradient */ - jointstep=0; - eq_target=0; - if(iteration % 101) { - slackset=select_next_qp_slackset(docs,label,a,lin,slack,alphaslack,c, - learn_parm,active2dnum,&maxsharedviol); - if((!(iteration % 100)) - || (!slackset) || (maxsharedviolepsilon_crit)){ - /* do a step with examples from different slack sets */ - if(verbosity >= 2) { - printf("(i-step)"); fflush(stdout); - } - i=0; - for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear old part of working set */ - if((chosen[j]>=(learn_parm->svm_maxqpsize/ - minl(learn_parm->svm_maxqpsize, - learn_parm->svm_newvarsinqp)))) { - chosen[j]=0; - choosenum--; - } - else { - chosen[j]++; - working2dnum[i++]=j; - } - } - working2dnum[i]=-1; - - already_chosen=0; - if((minl(learn_parm->svm_newvarsinqp, - learn_parm->svm_maxqpsize-choosenum)>=4) - && (kernel_parm->kernel_type != LINEAR)) { - /* select part of the working set from cache */ - already_chosen=select_next_qp_subproblem_grad( - label,unlabeled,a,lin,c,totdoc, - (long)(minl(learn_parm->svm_maxqpsize-choosenum, - learn_parm->svm_newvarsinqp) - /2), - learn_parm,inconsistent,active2dnum, - working2dnum,selcrit,selexam,kernel_cache, - (long)1,key,chosen); - choosenum+=already_chosen; - } - choosenum+=select_next_qp_subproblem_grad( - label,unlabeled,a,lin,c,totdoc, - minl(learn_parm->svm_maxqpsize-choosenum, - learn_parm->svm_newvarsinqp-already_chosen), - learn_parm,inconsistent,active2dnum, - working2dnum,selcrit,selexam,kernel_cache, - (long)0,key,chosen); + jointstep = 0; + eq_target = 0; + if (iteration % 101) { + slackset = + select_next_qp_slackset(docs, label, a, lin, slack, alphaslack, c, + learn_parm, active2dnum, &maxsharedviol); + if ((!(iteration % 100)) || (!slackset) || + (maxsharedviol < learn_parm->epsilon_crit)) { + /* do a step with examples from different slack sets */ + if (verbosity >= 2) { + printf("(i-step)"); + fflush(stdout); + } + i = 0; + for (jj = 0; (j = working2dnum[jj]) >= 0; + jj++) { /* clear old part of working set */ + if ((chosen[j] >= (learn_parm->svm_maxqpsize / + minl(learn_parm->svm_maxqpsize, + learn_parm->svm_newvarsinqp)))) { + chosen[j] = 0; + choosenum--; + } else { + chosen[j]++; + working2dnum[i++] = j; + } + } + working2dnum[i] = -1; + + already_chosen = 0; + if ((minl(learn_parm->svm_newvarsinqp, + learn_parm->svm_maxqpsize - choosenum) >= 4) && + (kernel_parm->kernel_type != LINEAR)) { + /* select part of the working set from cache */ + already_chosen = select_next_qp_subproblem_grad( + label, unlabeled, a, lin, c, totdoc, + (long)(minl(learn_parm->svm_maxqpsize - choosenum, + learn_parm->svm_newvarsinqp) / + 2), + learn_parm, inconsistent, active2dnum, working2dnum, selcrit, + selexam, kernel_cache, (long)1, key, chosen); + choosenum += already_chosen; + } + choosenum += select_next_qp_subproblem_grad( + label, unlabeled, a, lin, c, totdoc, + minl(learn_parm->svm_maxqpsize - choosenum, + learn_parm->svm_newvarsinqp - already_chosen), + learn_parm, inconsistent, active2dnum, working2dnum, selcrit, + selexam, kernel_cache, (long)0, key, chosen); + } else { /* do a step with all examples from same slack set */ + if (verbosity >= 2) { + printf("(j-step on %ld)", slackset); + fflush(stdout); + } + jointstep = 1; + for (jj = 0; (j = working2dnum[jj]) >= 0; + jj++) { /* clear working set */ + chosen[j] = 0; + } + working2dnum[0] = -1; + eq_target = alphaslack[slackset]; + for (j = 0; j < totdoc; j++) { /* mask all but slackset */ + /* for(jj=0;(j=active2dnum[jj])>=0;jj++) { */ + if (docs[j]->slackid != slackset) + ignore[j] = 1; + else { + ignore[j] = 0; + learn_parm->svm_cost[j] = learn_parm->svm_c; + /* printf("Inslackset(%ld,%ld)",j,shrink_state->active[j]); */ + } + } + learn_parm->biased_hyperplane = 1; + choosenum = select_next_qp_subproblem_grad( + label, unlabeled, a, lin, c, totdoc, learn_parm->svm_maxqpsize, + learn_parm, ignore, active2dnum, working2dnum, selcrit, selexam, + kernel_cache, (long)0, key, chosen); + learn_parm->biased_hyperplane = 0; } - else { /* do a step with all examples from same slack set */ - if(verbosity >= 2) { - printf("(j-step on %ld)",slackset); fflush(stdout); - } - jointstep=1; - for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */ - chosen[j]=0; - } - working2dnum[0]=-1; - eq_target=alphaslack[slackset]; - for(j=0;j=0;jj++) { */ - if(docs[j]->slackid != slackset) - ignore[j]=1; - else { - ignore[j]=0; - learn_parm->svm_cost[j]=learn_parm->svm_c; - /* printf("Inslackset(%ld,%ld)",j,shrink_state->active[j]); */ - } - } - learn_parm->biased_hyperplane=1; - choosenum=select_next_qp_subproblem_grad( - label,unlabeled,a,lin,c,totdoc, - learn_parm->svm_maxqpsize, - learn_parm,ignore,active2dnum, - working2dnum,selcrit,selexam,kernel_cache, - (long)0,key,chosen); - learn_parm->biased_hyperplane=0; + } else { /* once in a while, select a somewhat random working set + to get unlocked of infinite loops due to numerical + inaccuracies in the core qp-solver */ + choosenum += select_next_qp_subproblem_rand( + label, unlabeled, a, lin, c, totdoc, + minl(learn_parm->svm_maxqpsize - choosenum, + learn_parm->svm_newvarsinqp), + learn_parm, inconsistent, active2dnum, working2dnum, selcrit, selexam, + kernel_cache, key, chosen, iteration); + } + + if (verbosity >= 2) { + printf(" %ld vectors chosen\n", choosenum); + fflush(stdout); + } + + if (verbosity >= 2) + t1 = get_runtime(); + + if (kernel_cache) + cache_multiple_kernel_rows(kernel_cache, docs, working2dnum, choosenum, + kernel_parm); + + if (verbosity >= 2) + t2 = get_runtime(); + if (jointstep) + learn_parm->biased_hyperplane = 1; + optimize_svm(docs, label, unlabeled, ignore, eq_target, chosen, active2dnum, + model, totdoc, working2dnum, choosenum, a, lin, c, learn_parm, + aicache, kernel_parm, &qp, &epsilon_crit_org); + learn_parm->biased_hyperplane = 0; + + for (jj = 0; (i = working2dnum[jj]) >= 0; + jj++) /* recompute sums of alphas */ + alphaslack[docs[i]->slackid] += (a[i] - a_old[i]); + for (jj = 0; (i = working2dnum[jj]) >= 0; jj++) { /* reduce alpha to fulfill + constraints */ + if (alphaslack[docs[i]->slackid] > learn_parm->svm_c) { + if (a[i] < (alphaslack[docs[i]->slackid] - learn_parm->svm_c)) { + alphaslack[docs[i]->slackid] -= a[i]; + a[i] = 0; + } else { + a[i] -= (alphaslack[docs[i]->slackid] - learn_parm->svm_c); + alphaslack[docs[i]->slackid] = learn_parm->svm_c; + } } } - else { /* once in a while, select a somewhat random working set - to get unlocked of infinite loops due to numerical - inaccuracies in the core qp-solver */ - choosenum+=select_next_qp_subproblem_rand( - label,unlabeled,a,lin,c,totdoc, - minl(learn_parm->svm_maxqpsize-choosenum, - learn_parm->svm_newvarsinqp), - learn_parm,inconsistent,active2dnum, - working2dnum,selcrit,selexam,kernel_cache,key, - chosen,iteration); + for (jj = 0; (i = active2dnum[jj]) >= 0; jj++) + learn_parm->svm_cost[i] = + a[i] + (learn_parm->svm_c - alphaslack[docs[i]->slackid]); + model->at_upper_bound = 0; + for (jj = 0; jj <= maxslackid; jj++) { + if (alphaslack[jj] > (learn_parm->svm_c - learn_parm->epsilon_a)) + model->at_upper_bound++; } - if(verbosity>=2) { - printf(" %ld vectors chosen\n",choosenum); fflush(stdout); - } + if (verbosity >= 2) + t3 = get_runtime(); + update_linear_component(docs, label, active2dnum, a, a_old, working2dnum, + totdoc, totwords, kernel_parm, kernel_cache, lin, + aicache, weights); + compute_shared_slacks(docs, label, a, lin, c, active2dnum, learn_parm, + slack, alphaslack); - if(verbosity>=2) t1=get_runtime(); + if (verbosity >= 2) + t4 = get_runtime(); + supvecnum = + calculate_svm_model(docs, label, unlabeled, lin, a, a_old, c, + learn_parm, working2dnum, active2dnum, model); - if(kernel_cache) - cache_multiple_kernel_rows(kernel_cache,docs,working2dnum, - choosenum,kernel_parm); - - if(verbosity>=2) t2=get_runtime(); - if(jointstep) learn_parm->biased_hyperplane=1; - optimize_svm(docs,label,unlabeled,ignore,eq_target,chosen,active2dnum, - model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm, - aicache,kernel_parm,&qp,&epsilon_crit_org); - learn_parm->biased_hyperplane=0; - - for(jj=0;(i=working2dnum[jj])>=0;jj++) /* recompute sums of alphas */ - alphaslack[docs[i]->slackid]+=(a[i]-a_old[i]); - for(jj=0;(i=working2dnum[jj])>=0;jj++) { /* reduce alpha to fulfill - constraints */ - if(alphaslack[docs[i]->slackid] > learn_parm->svm_c) { - if(a[i] < (alphaslack[docs[i]->slackid]-learn_parm->svm_c)) { - alphaslack[docs[i]->slackid]-=a[i]; - a[i]=0; - } - else { - a[i]-=(alphaslack[docs[i]->slackid]-learn_parm->svm_c); - alphaslack[docs[i]->slackid]=learn_parm->svm_c; - } - } - } - for(jj=0;(i=active2dnum[jj])>=0;jj++) - learn_parm->svm_cost[i]=a[i]+(learn_parm->svm_c - -alphaslack[docs[i]->slackid]); - model->at_upper_bound=0; - for(jj=0;jj<=maxslackid;jj++) { - if(alphaslack[jj]>(learn_parm->svm_c-learn_parm->epsilon_a)) - model->at_upper_bound++; - } - - if(verbosity>=2) t3=get_runtime(); - update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc, - totwords,kernel_parm,kernel_cache,lin,aicache, - weights); - compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm, - slack,alphaslack); - - if(verbosity>=2) t4=get_runtime(); - supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c, - learn_parm,working2dnum,active2dnum,model); - - if(verbosity>=2) t5=get_runtime(); + if (verbosity >= 2) + t5 = get_runtime(); /* The following computation of the objective function works only */ /* relative to the active variables */ - if(verbosity>=3) { - criterion=compute_objective_function(a,lin,c,learn_parm->eps,label, - active2dnum); - printf("Objective function (over active variables): %.16f\n",criterion); - fflush(stdout); + if (verbosity >= 3) { + criterion = compute_objective_function(a, lin, c, learn_parm->eps, label, + active2dnum); + printf("Objective function (over active variables): %.16f\n", criterion); + fflush(stdout); } - for(jj=0;(i=working2dnum[jj])>=0;jj++) { - a_old[i]=a[i]; + for (jj = 0; (i = working2dnum[jj]) >= 0; jj++) { + a_old[i] = a[i]; } - retrain=check_optimality_sharedslack(docs,model,label,a,lin,c, - slack,alphaslack,totdoc,learn_parm, - maxdiff,epsilon_crit_org,&misclassified, - active2dnum,last_suboptimal_at, - iteration,kernel_parm); + retrain = check_optimality_sharedslack( + docs, model, label, a, lin, c, slack, alphaslack, totdoc, learn_parm, + maxdiff, epsilon_crit_org, &misclassified, active2dnum, + last_suboptimal_at, iteration, kernel_parm); - if(verbosity>=2) { - t6=get_runtime(); - timing_profile->time_select+=t1-t0; - timing_profile->time_kernel+=t2-t1; - timing_profile->time_opti+=t3-t2; - timing_profile->time_update+=t4-t3; - timing_profile->time_model+=t5-t4; - timing_profile->time_check+=t6-t5; + if (verbosity >= 2) { + t6 = get_runtime(); + timing_profile->time_select += t1 - t0; + timing_profile->time_kernel += t2 - t1; + timing_profile->time_opti += t3 - t2; + timing_profile->time_update += t4 - t3; + timing_profile->time_model += t5 - t4; + timing_profile->time_check += t6 - t5; } /* checking whether optimizer got stuck */ - if((*maxdiff) < bestmaxdiff) { - bestmaxdiff=(*maxdiff); - bestmaxdiffiter=iteration; + if ((*maxdiff) < bestmaxdiff) { + bestmaxdiff = (*maxdiff); + bestmaxdiffiter = iteration; } - if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) { + if (iteration > (bestmaxdiffiter + learn_parm->maxiter)) { /* long time no progress? */ - terminate=1; - retrain=0; - if(verbosity>=1) - printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n"); + terminate = 1; + retrain = 0; + if (verbosity >= 1) + printf("\nWARNING: Relaxing KT-Conditions due to slow progress! " + "Terminating!\n"); } - noshrink=0; - - if((!retrain) && (inactivenum>0) - && ((!learn_parm->skip_final_opt_check) - || (kernel_parm->kernel_type == LINEAR))) { - if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) - || (verbosity>=2)) { - if(verbosity==1) { - printf("\n"); - } - printf(" Checking optimality of inactive variables..."); - fflush(stdout); + noshrink = 0; + + if ((!retrain) && (inactivenum > 0) && + ((!learn_parm->skip_final_opt_check) || + (kernel_parm->kernel_type == LINEAR))) { + if (((verbosity >= 1) && (kernel_parm->kernel_type != LINEAR)) || + (verbosity >= 2)) { + if (verbosity == 1) { + printf("\n"); + } + printf(" Checking optimality of inactive variables..."); + fflush(stdout); } - t1=get_runtime(); - reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc, - totwords,iteration,learn_parm,inconsistent, - docs,kernel_parm,kernel_cache,model,aicache, - weights,maxdiff); + t1 = get_runtime(); + reactivate_inactive_examples( + label, unlabeled, a, shrink_state, lin, c, totdoc, totwords, + iteration, learn_parm, inconsistent, docs, kernel_parm, kernel_cache, + model, aicache, weights, maxdiff); /* Update to new active variables. */ - activenum=compute_index(shrink_state->active,totdoc,active2dnum); - inactivenum=totdoc-activenum; - /* check optimality, since check in reactivate does not work for - sharedslacks */ - compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm, - slack,alphaslack); - retrain=check_optimality_sharedslack(docs,model,label,a,lin,c, - slack,alphaslack,totdoc,learn_parm, - maxdiff,epsilon_crit_org,&misclassified, - active2dnum,last_suboptimal_at, - iteration,kernel_parm); + activenum = compute_index(shrink_state->active, totdoc, active2dnum); + inactivenum = totdoc - activenum; + /* check optimality, since check in reactivate does not work for + sharedslacks */ + compute_shared_slacks(docs, label, a, lin, c, active2dnum, learn_parm, + slack, alphaslack); + retrain = check_optimality_sharedslack( + docs, model, label, a, lin, c, slack, alphaslack, totdoc, learn_parm, + maxdiff, epsilon_crit_org, &misclassified, active2dnum, + last_suboptimal_at, iteration, kernel_parm); /* reset watchdog */ - bestmaxdiff=(*maxdiff); - bestmaxdiffiter=iteration; + bestmaxdiff = (*maxdiff); + bestmaxdiffiter = iteration; /* termination criterion */ - noshrink=1; - retrain=0; - if((*maxdiff) > learn_parm->epsilon_crit) - retrain=1; - timing_profile->time_shrink+=get_runtime()-t1; - if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) - || (verbosity>=2)) { - printf("done.\n"); fflush(stdout); - printf(" Number of inactive variables = %ld\n",inactivenum); - } + noshrink = 1; + retrain = 0; + if ((*maxdiff) > learn_parm->epsilon_crit) + retrain = 1; + timing_profile->time_shrink += get_runtime() - t1; + if (((verbosity >= 1) && (kernel_parm->kernel_type != LINEAR)) || + (verbosity >= 2)) { + printf("done.\n"); + fflush(stdout); + printf(" Number of inactive variables = %ld\n", inactivenum); + } } - if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff))) - learn_parm->epsilon_crit=(*maxdiff); - if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) { - learn_parm->epsilon_crit/=2.0; - retrain=1; - noshrink=1; + if ((!retrain) && (learn_parm->epsilon_crit > (*maxdiff))) + learn_parm->epsilon_crit = (*maxdiff); + if ((!retrain) && (learn_parm->epsilon_crit > epsilon_crit_org)) { + learn_parm->epsilon_crit /= 2.0; + retrain = 1; + noshrink = 1; } - if(learn_parm->epsilon_critepsilon_crit=epsilon_crit_org; - - if(verbosity>=2) { - printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n", - supvecnum,model->at_upper_bound,(*maxdiff)); + if (learn_parm->epsilon_crit < epsilon_crit_org) + learn_parm->epsilon_crit = epsilon_crit_org; + + if (verbosity >= 2) { + printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n", + supvecnum, model->at_upper_bound, (*maxdiff)); fflush(stdout); } - if(verbosity>=3) { + if (verbosity >= 3) { printf("\n"); } - if(((iteration % 10) == 0) && (!noshrink)) { - activenum=shrink_problem(docs,learn_parm,shrink_state, - kernel_parm,active2dnum, - last_suboptimal_at,iteration,totdoc, - maxl((long)(activenum/10), - maxl((long)(totdoc/500),100)), - a,inconsistent); - inactivenum=totdoc-activenum; - if((kernel_cache) - && (supvecnum>kernel_cache->max_elems) - && ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) { - kernel_cache_shrink(kernel_cache,totdoc, - minl((kernel_cache->activenum-activenum), - (kernel_cache->activenum-supvecnum)), - shrink_state->active); + if (((iteration % 10) == 0) && (!noshrink)) { + activenum = shrink_problem( + docs, learn_parm, shrink_state, kernel_parm, active2dnum, + last_suboptimal_at, iteration, totdoc, + maxl((long)(activenum / 10), maxl((long)(totdoc / 500), 100)), a, + inconsistent); + inactivenum = totdoc - activenum; + if ((kernel_cache) && (supvecnum > kernel_cache->max_elems) && + ((kernel_cache->activenum - activenum) > + maxl((long)(activenum / 10), 500))) { + kernel_cache_shrink(kernel_cache, totdoc, + minl((kernel_cache->activenum - activenum), + (kernel_cache->activenum - supvecnum)), + shrink_state->active); } } - } /* end of loop */ - - + } /* end of loop */ + free(alphaslack); free(slack); free(chosen); @@ -2093,496 +2139,501 @@ long optimize_to_convergence_sharedslack(DOC **docs, long int *label, free(qp.opt_xinit); free(qp.opt_low); free(qp.opt_up); - if(weights) free(weights); + if (weights) + free(weights); - learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */ - model->maxdiff=(*maxdiff); + learn_parm->epsilon_crit = epsilon_crit_org; /* restore org */ + model->maxdiff = (*maxdiff); - return(iteration); -} - - -double compute_objective_function(double *a, double *lin, double *c, - double eps, long int *label, - long int *active2dnum) - /* Return value of objective function. */ - /* Works only relative to the active variables! */ + return (iteration); +} + +double compute_objective_function(double *a, double *lin, double *c, double eps, + long int *label, long int *active2dnum) +/* Return value of objective function. */ +/* Works only relative to the active variables! */ { - long i,ii; + long i, ii; double criterion; /* calculate value of objective function */ - criterion=0; - for(ii=0;active2dnum[ii]>=0;ii++) { - i=active2dnum[ii]; - criterion=criterion+(eps-(double)label[i]*c[i])*a[i]+0.5*a[i]*label[i]*lin[i]; - } - return(criterion); + criterion = 0; + for (ii = 0; active2dnum[ii] >= 0; ii++) { + i = active2dnum[ii]; + criterion = criterion + (eps - (double)label[i] * c[i]) * a[i] + + 0.5 * a[i] * label[i] * lin[i]; + } + return (criterion); } -void clear_index(long int *index) - /* initializes and empties index */ +void clear_index(long int *index) +/* initializes and empties index */ { - index[0]=-1; -} - -void add_to_index(long int *index, long int elem) - /* initializes and empties index */ + index[0] = -1; +} + +void add_to_index(long int *index, long int elem) +/* initializes and empties index */ { register long i; - for(i=0;index[i] != -1;i++); - index[i]=elem; - index[i+1]=-1; + for (i = 0; index[i] != -1; i++) + ; + index[i] = elem; + index[i + 1] = -1; } -long compute_index(long int *binfeature, long int range, long int *index) - /* create an inverted index of binfeature */ -{ - register long i,ii; +long compute_index(long int *binfeature, long int range, long int *index) +/* create an inverted index of binfeature */ +{ + register long i, ii; - ii=0; - for(i=0;i= 3) { + printf("Running optimizer..."); + fflush(stdout); + } + /* call the qp-subsolver */ + a_v = optimize_qp(qp, epsilon_crit_target, learn_parm->svm_maxqpsize, + &(model->b), /* in case the optimizer gives us */ + /* the threshold for free. otherwise */ + /* b is calculated in calculate_model. */ + learn_parm); + if (verbosity >= 3) { + printf("done\n"); + } + + for (i = 0; i < varnum; i++) { + a[working2dnum[i]] = a_v[i]; + /* + if(a_v[i]<=(0+learn_parm->epsilon_a)) { + a[working2dnum[i]]=0; + } + else + if(a_v[i]>=(learn_parm->svm_cost[working2dnum[i]]-learn_parm->epsilon_a)) { + a[working2dnum[i]]=learn_parm->svm_cost[working2dnum[i]]; + } + */ } - return(ii); } - -void optimize_svm(DOC **docs, long int *label, long int *unlabeled, - long int *exclude_from_eq_const, double eq_target, - long int *chosen, long int *active2dnum, MODEL *model, - long int totdoc, long int *working2dnum, long int varnum, - double *a, double *lin, double *c, LEARN_PARM *learn_parm, - CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp, - double *epsilon_crit_target) - /* Do optimization on the working set. */ -{ - long i; - double *a_v; - - compute_matrices_for_optimization(docs,label,unlabeled, - exclude_from_eq_const,eq_target,chosen, - active2dnum,working2dnum,model,a,lin,c, - varnum,totdoc,learn_parm,aicache, - kernel_parm,qp); - - if(verbosity>=3) { - printf("Running optimizer..."); fflush(stdout); - } - /* call the qp-subsolver */ - a_v=optimize_qp(qp,epsilon_crit_target, - learn_parm->svm_maxqpsize, - &(model->b), /* in case the optimizer gives us */ - /* the threshold for free. otherwise */ - /* b is calculated in calculate_model. */ - learn_parm); - if(verbosity>=3) { - printf("done\n"); - } - - for(i=0;iepsilon_a)) { - a[working2dnum[i]]=0; - } - else if(a_v[i]>=(learn_parm->svm_cost[working2dnum[i]]-learn_parm->epsilon_a)) { - a[working2dnum[i]]=learn_parm->svm_cost[working2dnum[i]]; - } - */ - } -} - -void compute_matrices_for_optimization(DOC **docs, long int *label, - long int *unlabeled, long *exclude_from_eq_const, double eq_target, - long int *chosen, long int *active2dnum, - long int *key, MODEL *model, double *a, double *lin, double *c, - long int varnum, long int totdoc, LEARN_PARM *learn_parm, - CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp) -{ - register long ki,kj,i,j; +void compute_matrices_for_optimization( + DOC **docs, long int *label, long int *unlabeled, + long *exclude_from_eq_const, double eq_target, long int *chosen, + long int *active2dnum, long int *key, MODEL *model, double *a, double *lin, + double *c, long int varnum, long int totdoc, LEARN_PARM *learn_parm, + CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp) { + register long ki, kj, i, j; register double kernel_temp; - if(verbosity>=3) { - fprintf(stdout,"Computing qp-matrices (type %ld kernel [degree %ld, rbf_gamma %f, coef_lin %f, coef_const %f])...",kernel_parm->kernel_type,kernel_parm->poly_degree,kernel_parm->rbf_gamma,kernel_parm->coef_lin,kernel_parm->coef_const); + if (verbosity >= 3) { + fprintf(stdout, + "Computing qp-matrices (type %ld kernel [degree %ld, rbf_gamma %f, " + "coef_lin %f, coef_const %f])...", + kernel_parm->kernel_type, kernel_parm->poly_degree, + kernel_parm->rbf_gamma, kernel_parm->coef_lin, + kernel_parm->coef_const); fflush(stdout); } - qp->opt_n=varnum; - qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */ - for(j=1;jsv_num;j++) { /* start at 1 */ - if((!chosen[(model->supvec[j])->docnum]) - && (!exclude_from_eq_const[(model->supvec[j])->docnum])) { - qp->opt_ce0[0]+=model->alpha[j]; + qp->opt_n = varnum; + qp->opt_ce0[0] = + -eq_target; /* compute the constant for equality constraint */ + for (j = 1; j < model->sv_num; j++) { /* start at 1 */ + if ((!chosen[(model->supvec[j])->docnum]) && + (!exclude_from_eq_const[(model->supvec[j])->docnum])) { + qp->opt_ce0[0] += model->alpha[j]; } - } - if(learn_parm->biased_hyperplane) - qp->opt_m=1; - else - qp->opt_m=0; /* eq-constraint will be ignored */ + } + if (learn_parm->biased_hyperplane) + qp->opt_m = 1; + else + qp->opt_m = 0; /* eq-constraint will be ignored */ /* init linear part of objective function */ - for(i=0;iopt_g0[i]=lin[key[i]]; + for (i = 0; i < varnum; i++) { + qp->opt_g0[i] = lin[key[i]]; } - for(i=0;iopt_ce[i]=label[ki]; - qp->opt_low[i]=0; - qp->opt_up[i]=learn_parm->svm_cost[ki]; + qp->opt_ce[i] = label[ki]; + qp->opt_low[i] = 0; + qp->opt_up[i] = learn_parm->svm_cost[ki]; - kernel_temp=(double)kernel(kernel_parm,docs[ki],docs[ki]); + kernel_temp = (double)kernel(kernel_parm, docs[ki], docs[ki]); /* compute linear part of objective function */ - qp->opt_g0[i]-=(kernel_temp*a[ki]*(double)label[ki]); + qp->opt_g0[i] -= (kernel_temp * a[ki] * (double)label[ki]); /* compute quadratic part of objective function */ - qp->opt_g[varnum*i+i]=kernel_temp; - for(j=i+1;jopt_g[varnum * i + i] = kernel_temp; + for (j = i + 1; j < varnum; j++) { + kj = key[j]; + kernel_temp = (double)kernel(kernel_parm, docs[ki], docs[kj]); /* compute linear part of objective function */ - qp->opt_g0[i]-=(kernel_temp*a[kj]*(double)label[kj]); - qp->opt_g0[j]-=(kernel_temp*a[ki]*(double)label[ki]); + qp->opt_g0[i] -= (kernel_temp * a[kj] * (double)label[kj]); + qp->opt_g0[j] -= (kernel_temp * a[ki] * (double)label[ki]); /* compute quadratic part of objective function */ - qp->opt_g[varnum*i+j]=(double)label[ki]*(double)label[kj]*kernel_temp; - qp->opt_g[varnum*j+i]=(double)label[ki]*(double)label[kj]*kernel_temp; + qp->opt_g[varnum * i + j] = + (double)label[ki] * (double)label[kj] * kernel_temp; + qp->opt_g[varnum * j + i] = + (double)label[ki] * (double)label[kj] * kernel_temp; } - if(verbosity>=3) { - if(i % 20 == 0) { - fprintf(stdout,"%ld..",i); fflush(stdout); + if (verbosity >= 3) { + if (i % 20 == 0) { + fprintf(stdout, "%ld..", i); + fflush(stdout); } } } - for(i=0;iopt_xinit[i]=a[key[i]]; + qp->opt_xinit[i] = a[key[i]]; /* set linear part of objective function */ - qp->opt_g0[i]=(learn_parm->eps-(double)label[key[i]]*c[key[i]])+qp->opt_g0[i]*(double)label[key[i]]; + qp->opt_g0[i] = (learn_parm->eps - (double)label[key[i]] * c[key[i]]) + + qp->opt_g0[i] * (double)label[key[i]]; } - if(verbosity>=3) { - fprintf(stdout,"done\n"); + if (verbosity >= 3) { + fprintf(stdout, "done\n"); } } -long calculate_svm_model(DOC **docs, long int *label, long int *unlabeled, - double *lin, double *a, double *a_old, double *c, - LEARN_PARM *learn_parm, long int *working2dnum, - long int *active2dnum, MODEL *model) - /* Compute decision function based on current values */ - /* of alpha. */ +long calculate_svm_model(DOC **docs, long int *label, long int *unlabeled, + double *lin, double *a, double *a_old, double *c, + LEARN_PARM *learn_parm, long int *working2dnum, + long int *active2dnum, MODEL *model) +/* Compute decision function based on current values */ +/* of alpha. */ { - long i,ii,pos,b_calculated=0,first_low,first_high; - double ex_c,b_temp,b_low,b_high; + long i, ii, pos, b_calculated = 0, first_low, first_high; + double ex_c, b_temp, b_low, b_high; - if(verbosity>=3) { - printf("Calculating model..."); fflush(stdout); + if (verbosity >= 3) { + printf("Calculating model..."); + fflush(stdout); } - if(!learn_parm->biased_hyperplane) { - model->b=0; - b_calculated=1; + if (!learn_parm->biased_hyperplane) { + model->b = 0; + b_calculated = 1; } - for(ii=0;(i=working2dnum[ii])>=0;ii++) { - if((a_old[i]>0) && (a[i]==0)) { /* remove from model */ - pos=model->index[i]; - model->index[i]=-1; + for (ii = 0; (i = working2dnum[ii]) >= 0; ii++) { + if ((a_old[i] > 0) && (a[i] == 0)) { /* remove from model */ + pos = model->index[i]; + model->index[i] = -1; (model->sv_num)--; - model->supvec[pos]=model->supvec[model->sv_num]; - model->alpha[pos]=model->alpha[model->sv_num]; - model->index[(model->supvec[pos])->docnum]=pos; - } - else if((a_old[i]==0) && (a[i]>0)) { /* add to model */ - model->supvec[model->sv_num]=docs[i]; - model->alpha[model->sv_num]=a[i]*(double)label[i]; - model->index[i]=model->sv_num; + model->supvec[pos] = model->supvec[model->sv_num]; + model->alpha[pos] = model->alpha[model->sv_num]; + model->index[(model->supvec[pos])->docnum] = pos; + } else if ((a_old[i] == 0) && (a[i] > 0)) { /* add to model */ + model->supvec[model->sv_num] = docs[i]; + model->alpha[model->sv_num] = a[i] * (double)label[i]; + model->index[i] = model->sv_num; (model->sv_num)++; - } - else if(a_old[i]==a[i]) { /* nothing to do */ - } - else { /* just update alpha */ - model->alpha[model->index[i]]=a[i]*(double)label[i]; - } - - ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; - if(!learn_parm->sharedslack) { - if((a_old[i]>=ex_c) && (a[i]at_upper_bound)--; - } - else if((a_old[i]=ex_c)) { - (model->at_upper_bound)++; + } else if (a_old[i] == a[i]) { /* nothing to do */ + } else { /* just update alpha */ + model->alpha[model->index[i]] = a[i] * (double)label[i]; + } + + ex_c = learn_parm->svm_cost[i] - learn_parm->epsilon_a; + if (!learn_parm->sharedslack) { + if ((a_old[i] >= ex_c) && (a[i] < ex_c)) { + (model->at_upper_bound)--; + } else if ((a_old[i] < ex_c) && (a[i] >= ex_c)) { + (model->at_upper_bound)++; } } - if((!b_calculated) - && (a[i]>learn_parm->epsilon_a) && (a[i]b=((double)label[i]*learn_parm->eps-c[i]+lin[i]); - /* model->b=(-(double)label[i]+lin[i]); */ - b_calculated=1; + if ((!b_calculated) && (a[i] > learn_parm->epsilon_a) && + (a[i] < ex_c)) { /* calculate b */ + model->b = ((double)label[i] * learn_parm->eps - c[i] + lin[i]); + /* model->b=(-(double)label[i]+lin[i]); */ + b_calculated = 1; } - } - + } + /* No alpha in the working set not at bounds, so b was not calculated in the usual way. The following handles this special case. */ - if(learn_parm->biased_hyperplane - && (!b_calculated) - && (model->sv_num-1 == model->at_upper_bound)) { - first_low=1; - first_high=1; - b_low=0; - b_high=0; - for(ii=0;(i=active2dnum[ii])>=0;ii++) { - ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; - if(a[i]0) { - b_temp=-(learn_parm->eps-c[i]+lin[i]); - if((b_temp>b_low) || (first_low)) { - b_low=b_temp; - first_low=0; - } - } - else { - b_temp=-(-learn_parm->eps-c[i]+lin[i]); - if((b_tempeps-c[i]+lin[i]); - if((b_temp>b_low) || (first_low)) { - b_low=b_temp; - first_low=0; - } - } - else { - b_temp=-(learn_parm->eps-c[i]+lin[i]); - if((b_tempbiased_hyperplane && (!b_calculated) && + (model->sv_num - 1 == model->at_upper_bound)) { + first_low = 1; + first_high = 1; + b_low = 0; + b_high = 0; + for (ii = 0; (i = active2dnum[ii]) >= 0; ii++) { + ex_c = learn_parm->svm_cost[i] - learn_parm->epsilon_a; + if (a[i] < ex_c) { + if (label[i] > 0) { + b_temp = -(learn_parm->eps - c[i] + lin[i]); + if ((b_temp > b_low) || (first_low)) { + b_low = b_temp; + first_low = 0; + } + } else { + b_temp = -(-learn_parm->eps - c[i] + lin[i]); + if ((b_temp < b_high) || (first_high)) { + b_high = b_temp; + first_high = 0; + } + } + } else { + if (label[i] < 0) { + b_temp = -(-learn_parm->eps - c[i] + lin[i]); + if ((b_temp > b_low) || (first_low)) { + b_low = b_temp; + first_low = 0; + } + } else { + b_temp = -(learn_parm->eps - c[i] + lin[i]); + if ((b_temp < b_high) || (first_high)) { + b_high = b_temp; + first_high = 0; + } + } } } - if(first_high) { - model->b=-b_low; - } - else if(first_low) { - model->b=-b_high; - } - else { - model->b=-(b_high+b_low)/2.0; /* select b as the middle of range */ + if (first_high) { + model->b = -b_low; + } else if (first_low) { + model->b = -b_high; + } else { + model->b = -(b_high + b_low) / 2.0; /* select b as the middle of range */ /* printf("\nb_low=%f, b_high=%f,b=%f\n",b_low,b_high,model->b); */ } } - if(verbosity>=3) { - printf("done\n"); fflush(stdout); + if (verbosity >= 3) { + printf("done\n"); + fflush(stdout); } - return(model->sv_num-1); /* have to substract one, since element 0 is empty*/ + return (model->sv_num - + 1); /* have to substract one, since element 0 is empty*/ } -long check_optimality(MODEL *model, long int *label, long int *unlabeled, - double *a, double *lin, double *c, long int totdoc, - LEARN_PARM *learn_parm, double *maxdiff, - double epsilon_crit_org, long int *misclassified, - long int *inconsistent, long int *active2dnum, - long int *last_suboptimal_at, - long int iteration, KERNEL_PARM *kernel_parm) - /* Check KT-conditions */ +long check_optimality(MODEL *model, long int *label, long int *unlabeled, + double *a, double *lin, double *c, long int totdoc, + LEARN_PARM *learn_parm, double *maxdiff, + double epsilon_crit_org, long int *misclassified, + long int *inconsistent, long int *active2dnum, + long int *last_suboptimal_at, long int iteration, + KERNEL_PARM *kernel_parm) +/* Check KT-conditions */ { - long i,ii,retrain; - double dist,ex_c,target; + long i, ii, retrain; + double dist, ex_c, target; - if(kernel_parm->kernel_type == LINEAR) { /* be optimistic */ - learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org; + if (kernel_parm->kernel_type == LINEAR) { /* be optimistic */ + learn_parm->epsilon_shrink = -learn_parm->epsilon_crit + epsilon_crit_org; + } else { /* be conservative */ + learn_parm->epsilon_shrink = + learn_parm->epsilon_shrink * 0.7 + (*maxdiff) * 0.3; } - else { /* be conservative */ - learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3; - } - retrain=0; - (*maxdiff)=0; - (*misclassified)=0; - for(ii=0;(i=active2dnum[ii])>=0;ii++) { - if((!inconsistent[i]) && label[i]) { - dist=(lin[i]-model->b)*(double)label[i];/* 'distance' from - hyperplane*/ - target=-(learn_parm->eps-(double)label[i]*c[i]); - ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; - if(dist <= 0) { - (*misclassified)++; /* does not work due to deactivation of var */ + retrain = 0; + (*maxdiff) = 0; + (*misclassified) = 0; + for (ii = 0; (i = active2dnum[ii]) >= 0; ii++) { + if ((!inconsistent[i]) && label[i]) { + dist = (lin[i] - model->b) * (double)label[i]; /* 'distance' from + hyperplane*/ + target = -(learn_parm->eps - (double)label[i] * c[i]); + ex_c = learn_parm->svm_cost[i] - learn_parm->epsilon_a; + if (dist <= 0) { + (*misclassified)++; /* does not work due to deactivation of var */ } - if((a[i]>learn_parm->epsilon_a) && (dist > target)) { - if((dist-target)>(*maxdiff)) /* largest violation */ - (*maxdiff)=dist-target; - } - else if((a[i](*maxdiff)) /* largest violation */ - (*maxdiff)=target-dist; + if ((a[i] > learn_parm->epsilon_a) && (dist > target)) { + if ((dist - target) > (*maxdiff)) /* largest violation */ + (*maxdiff) = dist - target; + } else if ((a[i] < ex_c) && (dist < target)) { + if ((target - dist) > (*maxdiff)) /* largest violation */ + (*maxdiff) = target - dist; } /* Count how long a variable was at lower/upper bound (and optimal).*/ /* Variables, which were at the bound and optimal for a long */ /* time are unlikely to become support vectors. In case our */ /* cache is filled up, those variables are excluded to save */ - /* kernel evaluations. (See chapter 'Shrinking').*/ - if((a[i]>(learn_parm->epsilon_a)) - && (a[i] (learn_parm->epsilon_a)) && (a[i] < ex_c)) { + last_suboptimal_at[i] = iteration; /* not at bound */ + } else if ((a[i] <= (learn_parm->epsilon_a)) && + (dist < (target + learn_parm->epsilon_shrink))) { + last_suboptimal_at[i] = iteration; /* not likely optimal */ + } else if ((a[i] >= ex_c) && + (dist > (target - learn_parm->epsilon_shrink))) { + last_suboptimal_at[i] = iteration; /* not likely optimal */ } - else if((a[i]<=(learn_parm->epsilon_a)) - && (dist < (target+learn_parm->epsilon_shrink))) { - last_suboptimal_at[i]=iteration; /* not likely optimal */ - } - else if((a[i]>=ex_c) - && (dist > (target-learn_parm->epsilon_shrink))) { - last_suboptimal_at[i]=iteration; /* not likely optimal */ - } - } + } } /* termination criterion */ - if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) { - retrain=1; + if ((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) { + retrain = 1; } - return(retrain); + return (retrain); } -long check_optimality_sharedslack(DOC **docs, MODEL *model, long int *label, - double *a, double *lin, double *c, double *slack, - double *alphaslack, - long int totdoc, - LEARN_PARM *learn_parm, double *maxdiff, - double epsilon_crit_org, long int *misclassified, - long int *active2dnum, - long int *last_suboptimal_at, - long int iteration, KERNEL_PARM *kernel_parm) - /* Check KT-conditions */ +long check_optimality_sharedslack( + DOC **docs, MODEL *model, long int *label, double *a, double *lin, + double *c, double *slack, double *alphaslack, long int totdoc, + LEARN_PARM *learn_parm, double *maxdiff, double epsilon_crit_org, + long int *misclassified, long int *active2dnum, + long int *last_suboptimal_at, long int iteration, KERNEL_PARM *kernel_parm) +/* Check KT-conditions */ { - long i,ii,retrain; - double dist,dist_noslack,ex_c=0,target; + long i, ii, retrain; + double dist, dist_noslack, ex_c = 0, target; - if(kernel_parm->kernel_type == LINEAR) { /* be optimistic */ - learn_parm->epsilon_shrink=-learn_parm->epsilon_crit/2.0; - } - else { /* be conservative */ - learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3; + if (kernel_parm->kernel_type == LINEAR) { /* be optimistic */ + learn_parm->epsilon_shrink = -learn_parm->epsilon_crit / 2.0; + } else { /* be conservative */ + learn_parm->epsilon_shrink = + learn_parm->epsilon_shrink * 0.7 + (*maxdiff) * 0.3; } - retrain=0; - (*maxdiff)=0; - (*misclassified)=0; - for(ii=0;(i=active2dnum[ii])>=0;ii++) { + retrain = 0; + (*maxdiff) = 0; + (*misclassified) = 0; + for (ii = 0; (i = active2dnum[ii]) >= 0; ii++) { /* 'distance' from hyperplane*/ - dist_noslack=(lin[i]-model->b)*(double)label[i]; - dist=dist_noslack+slack[docs[i]->slackid]; - target=-(learn_parm->eps-(double)label[i]*c[i]); - ex_c=learn_parm->svm_c-learn_parm->epsilon_a; - if((a[i]>learn_parm->epsilon_a) && (dist > target)) { - if((dist-target)>(*maxdiff)) { /* largest violation */ - (*maxdiff)=dist-target; - if(verbosity>=5) printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, alphaslack=%f\n",docs[i]->slackid,dist,target,slack[docs[i]->slackid],a[i],alphaslack[docs[i]->slackid]); - if(verbosity>=5) printf(" (single %f)\n",(*maxdiff)); + dist_noslack = (lin[i] - model->b) * (double)label[i]; + dist = dist_noslack + slack[docs[i]->slackid]; + target = -(learn_parm->eps - (double)label[i] * c[i]); + ex_c = learn_parm->svm_c - learn_parm->epsilon_a; + if ((a[i] > learn_parm->epsilon_a) && (dist > target)) { + if ((dist - target) > (*maxdiff)) { /* largest violation */ + (*maxdiff) = dist - target; + if (verbosity >= 5) + printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, " + "alphaslack=%f\n", + docs[i]->slackid, dist, target, slack[docs[i]->slackid], a[i], + alphaslack[docs[i]->slackid]); + if (verbosity >= 5) + printf(" (single %f)\n", (*maxdiff)); } } - if((alphaslack[docs[i]->slackid]slackid]>0)) { - if((slack[docs[i]->slackid])>(*maxdiff)) { /* largest violation */ - (*maxdiff)=slack[docs[i]->slackid]; - if(verbosity>=5) printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, alphaslack=%f\n",docs[i]->slackid,dist,target,slack[docs[i]->slackid],a[i],alphaslack[docs[i]->slackid]); - if(verbosity>=5) printf(" (joint %f)\n",(*maxdiff)); + if ((alphaslack[docs[i]->slackid] < ex_c) && + (slack[docs[i]->slackid] > 0)) { + if ((slack[docs[i]->slackid]) > (*maxdiff)) { /* largest violation */ + (*maxdiff) = slack[docs[i]->slackid]; + if (verbosity >= 5) + printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, " + "alphaslack=%f\n", + docs[i]->slackid, dist, target, slack[docs[i]->slackid], a[i], + alphaslack[docs[i]->slackid]); + if (verbosity >= 5) + printf(" (joint %f)\n", (*maxdiff)); } } /* Count how long a variable was at lower/upper bound (and optimal).*/ /* Variables, which were at the bound and optimal for a long */ /* time are unlikely to become support vectors. In case our */ /* cache is filled up, those variables are excluded to save */ - /* kernel evaluations. (See chapter 'Shrinking').*/ - if((a[i]<=learn_parm->epsilon_a) && (dist < (target+learn_parm->epsilon_shrink))) { - last_suboptimal_at[i]=iteration; /* not likely optimal */ + /* kernel evaluations. (See chapter 'Shrinking').*/ + if ((a[i] <= learn_parm->epsilon_a) && + (dist < (target + learn_parm->epsilon_shrink))) { + last_suboptimal_at[i] = iteration; /* not likely optimal */ + } else if ((alphaslack[docs[i]->slackid] < ex_c) && + (a[i] > learn_parm->epsilon_a) && + (fabs(dist_noslack - target) > -learn_parm->epsilon_shrink)) { + last_suboptimal_at[i] = iteration; /* not at lower bound */ + } else if ((alphaslack[docs[i]->slackid] >= ex_c) && + (a[i] > learn_parm->epsilon_a) && + (fabs(target - dist) > -learn_parm->epsilon_shrink)) { + last_suboptimal_at[i] = iteration; /* not likely optimal */ } - else if((alphaslack[docs[i]->slackid]learn_parm->epsilon_a) && (fabs(dist_noslack - target) > -learn_parm->epsilon_shrink)) { - last_suboptimal_at[i]=iteration; /* not at lower bound */ - } - else if((alphaslack[docs[i]->slackid]>=ex_c) && (a[i]>learn_parm->epsilon_a) && (fabs(target-dist) > -learn_parm->epsilon_shrink)) { - last_suboptimal_at[i]=iteration; /* not likely optimal */ - } - } + } /* termination criterion */ - if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) { - retrain=1; + if ((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) { + retrain = 1; } - return(retrain); + return (retrain); } -void compute_shared_slacks(DOC **docs, long int *label, - double *a, double *lin, - double *c, long int *active2dnum, - LEARN_PARM *learn_parm, - double *slack, double *alphaslack) - /* compute the value of shared slacks and the joint alphas */ +void compute_shared_slacks(DOC **docs, long int *label, double *a, double *lin, + double *c, long int *active2dnum, + LEARN_PARM *learn_parm, double *slack, + double *alphaslack) +/* compute the value of shared slacks and the joint alphas */ { - long jj,i; - double dist,target; + long jj, i; + double dist, target; - for(jj=0;(i=active2dnum[jj])>=0;jj++) { /* clear slack variables */ - slack[docs[i]->slackid]=0.0; + for (jj = 0; (i = active2dnum[jj]) >= 0; jj++) { /* clear slack variables */ + slack[docs[i]->slackid] = 0.0; /* alphaslack[docs[i]->slackid]=0.0; */ } - for(jj=0;(i=active2dnum[jj])>=0;jj++) { /* recompute slack variables */ - dist=(lin[i])*(double)label[i]; - target=-(learn_parm->eps-(double)label[i]*c[i]); - if((target-dist) > slack[docs[i]->slackid]) - slack[docs[i]->slackid]=target-dist; + for (jj = 0; (i = active2dnum[jj]) >= 0; + jj++) { /* recompute slack variables */ + dist = (lin[i]) * (double)label[i]; + target = -(learn_parm->eps - (double)label[i] * c[i]); + if ((target - dist) > slack[docs[i]->slackid]) + slack[docs[i]->slackid] = target - dist; /* alphaslack[docs[i]->slackid]+=a[i]; */ } -} - - -long identify_inconsistent(double *a, long int *label, - long int *unlabeled, long int totdoc, - LEARN_PARM *learn_parm, - long int *inconsistentnum, long int *inconsistent) -{ - long i,retrain; +} + +long identify_inconsistent(double *a, long int *label, long int *unlabeled, + long int totdoc, LEARN_PARM *learn_parm, + long int *inconsistentnum, long int *inconsistent) { + long i, retrain; /* Throw out examples with multipliers at upper bound. This */ /* corresponds to the -i 1 option. */ /* ATTENTION: this is just a heuristic for finding a close */ /* to minimum number of examples to exclude to */ /* make the problem separable with desired margin */ - retrain=0; - for(i=0;i=(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) { - (*inconsistentnum)++; - inconsistent[i]=1; /* never choose again */ - retrain=2; /* start over */ - if(verbosity>=3) { - printf("inconsistent(%ld)..",i); fflush(stdout); - } + retrain = 0; + for (i = 0; i < totdoc; i++) { + if ((!inconsistent[i]) && (!unlabeled[i]) && + (a[i] >= (learn_parm->svm_cost[i] - learn_parm->epsilon_a))) { + (*inconsistentnum)++; + inconsistent[i] = 1; /* never choose again */ + retrain = 2; /* start over */ + if (verbosity >= 3) { + printf("inconsistent(%ld)..", i); + fflush(stdout); + } } } - return(retrain); + return (retrain); } -long identify_misclassified(double *lin, long int *label, - long int *unlabeled, long int totdoc, - MODEL *model, long int *inconsistentnum, - long int *inconsistent) -{ - long i,retrain; +long identify_misclassified(double *lin, long int *label, long int *unlabeled, + long int totdoc, MODEL *model, + long int *inconsistentnum, long int *inconsistent) { + long i, retrain; double dist; /* Throw out misclassified examples. This */ @@ -2590,387 +2641,378 @@ long identify_misclassified(double *lin, long int *label, /* ATTENTION: this is just a heuristic for finding a close */ /* to minimum number of examples to exclude to */ /* make the problem separable with desired margin */ - retrain=0; - for(i=0;ib)*(double)label[i]; /* 'distance' from hyperplane*/ - if((!inconsistent[i]) && (!unlabeled[i]) && (dist <= 0)) { - (*inconsistentnum)++; - inconsistent[i]=1; /* never choose again */ - retrain=2; /* start over */ - if(verbosity>=3) { - printf("inconsistent(%ld)..",i); fflush(stdout); - } + retrain = 0; + for (i = 0; i < totdoc; i++) { + dist = + (lin[i] - model->b) * (double)label[i]; /* 'distance' from hyperplane*/ + if ((!inconsistent[i]) && (!unlabeled[i]) && (dist <= 0)) { + (*inconsistentnum)++; + inconsistent[i] = 1; /* never choose again */ + retrain = 2; /* start over */ + if (verbosity >= 3) { + printf("inconsistent(%ld)..", i); + fflush(stdout); + } } } - return(retrain); + return (retrain); } -long identify_one_misclassified(double *lin, long int *label, - long int *unlabeled, - long int totdoc, MODEL *model, - long int *inconsistentnum, - long int *inconsistent) -{ - long i,retrain,maxex=-1; - double dist,maxdist=0; +long identify_one_misclassified(double *lin, long int *label, + long int *unlabeled, long int totdoc, + MODEL *model, long int *inconsistentnum, + long int *inconsistent) { + long i, retrain, maxex = -1; + double dist, maxdist = 0; /* Throw out the 'most misclassified' example. This */ /* corresponds to the -i 3 option. */ /* ATTENTION: this is just a heuristic for finding a close */ /* to minimum number of examples to exclude to */ /* make the problem separable with desired margin */ - retrain=0; - for(i=0;ib)*(double)label[i];/* 'distance' from hyperplane*/ - if(distb) * + (double)label[i]; /* 'distance' from hyperplane*/ + if (dist < maxdist) { + maxdist = dist; + maxex = i; } } } - if(maxex>=0) { + if (maxex >= 0) { (*inconsistentnum)++; - inconsistent[maxex]=1; /* never choose again */ - retrain=2; /* start over */ - if(verbosity>=3) { - printf("inconsistent(%ld)..",i); fflush(stdout); + inconsistent[maxex] = 1; /* never choose again */ + retrain = 2; /* start over */ + if (verbosity >= 3) { + printf("inconsistent(%ld)..", i); + fflush(stdout); } } - return(retrain); + return (retrain); } -void update_linear_component(DOC **docs, long int *label, - long int *active2dnum, double *a, - double *a_old, long int *working2dnum, - long int totdoc, long int totwords, - KERNEL_PARM *kernel_parm, - KERNEL_CACHE *kernel_cache, - double *lin, CFLOAT *aicache, double *weights) - /* keep track of the linear component */ - /* lin of the gradient etc. by updating */ - /* based on the change of the variables */ - /* in the current working set */ - /* WARNING: Assumes that array of weights is initialized to all zero - values for linear kernel! */ +void update_linear_component(DOC **docs, long int *label, long int *active2dnum, + double *a, double *a_old, long int *working2dnum, + long int totdoc, long int totwords, + KERNEL_PARM *kernel_parm, + KERNEL_CACHE *kernel_cache, double *lin, + CFLOAT *aicache, double *weights) +/* keep track of the linear component */ +/* lin of the gradient etc. by updating */ +/* based on the change of the variables */ +/* in the current working set */ +/* WARNING: Assumes that array of weights is initialized to all zero + values for linear kernel! */ { - register long i,ii,j,jj; + register long i, ii, j, jj; register double tec; SVECTOR *f; - if(kernel_parm->kernel_type==0) { /* special linear case */ + if (kernel_parm->kernel_type == 0) { /* special linear case */ /* clear_vector_n(weights,totwords); */ - for(ii=0;(i=working2dnum[ii])>=0;ii++) { - if(a[i] != a_old[i]) { - for(f=docs[i]->fvec;f;f=f->next) - add_vector_ns(weights,f, - f->factor*((a[i]-a_old[i])*(double)label[i])); + for (ii = 0; (i = working2dnum[ii]) >= 0; ii++) { + if (a[i] != a_old[i]) { + for (f = docs[i]->fvec; f; f = f->next) + add_vector_ns(weights, f, + f->factor * ((a[i] - a_old[i]) * (double)label[i])); } } - for(jj=0;(j=active2dnum[jj])>=0;jj++) { - for(f=docs[j]->fvec;f;f=f->next) - lin[j]+=f->factor*sprod_ns(weights,f); + for (jj = 0; (j = active2dnum[jj]) >= 0; jj++) { + for (f = docs[j]->fvec; f; f = f->next) + lin[j] += f->factor * sprod_ns(weights, f); } - for(ii=0;(i=working2dnum[ii])>=0;ii++) { - if(a[i] != a_old[i]) { - for(f=docs[i]->fvec;f;f=f->next) - mult_vector_ns(weights,f,0.0); /* Set weights back to zero. */ - } /* This is faster than init */ - } /* weights to zero in each iter. */ - } - else { /* general case */ - for(jj=0;(i=working2dnum[jj])>=0;jj++) { - if(a[i] != a_old[i]) { - get_kernel_row(kernel_cache,docs,i,totdoc,active2dnum,aicache, - kernel_parm); - for(ii=0;(j=active2dnum[ii])>=0;ii++) { - tec=aicache[j]; - lin[j]+=(((a[i]*tec)-(a_old[i]*tec))*(double)label[i]); - } + for (ii = 0; (i = working2dnum[ii]) >= 0; ii++) { + if (a[i] != a_old[i]) { + for (f = docs[i]->fvec; f; f = f->next) + mult_vector_ns(weights, f, 0.0); /* Set weights back to zero. */ + } /* This is faster than init */ + } /* weights to zero in each iter. */ + } else { /* general case */ + for (jj = 0; (i = working2dnum[jj]) >= 0; jj++) { + if (a[i] != a_old[i]) { + get_kernel_row(kernel_cache, docs, i, totdoc, active2dnum, aicache, + kernel_parm); + for (ii = 0; (j = active2dnum[ii]) >= 0; ii++) { + tec = aicache[j]; + lin[j] += (((a[i] * tec) - (a_old[i] * tec)) * (double)label[i]); + } } } } -} - - -long incorporate_unlabeled_examples(MODEL *model, long int *label, - long int *inconsistent, - long int *unlabeled, - double *a, double *lin, - long int totdoc, double *selcrit, - long int *select, long int *key, - long int transductcycle, - KERNEL_PARM *kernel_parm, - LEARN_PARM *learn_parm) -{ - long i,j,k,j1,j2,j3,j4,unsupaddnum1=0,unsupaddnum2=0; - long pos,neg,upos,uneg,orgpos,orgneg,nolabel,newpos,newneg,allunlab; - double dist,model_length,posratio,negratio; - long check_every=2; +} + +long incorporate_unlabeled_examples(MODEL *model, long int *label, + long int *inconsistent, long int *unlabeled, + double *a, double *lin, long int totdoc, + double *selcrit, long int *select, + long int *key, long int transductcycle, + KERNEL_PARM *kernel_parm, + LEARN_PARM *learn_parm) { + long i, j, k, j1, j2, j3, j4, unsupaddnum1 = 0, unsupaddnum2 = 0; + long pos, neg, upos, uneg, orgpos, orgneg, nolabel, newpos, newneg, allunlab; + double dist, model_length, posratio, negratio; + long check_every = 2; double loss; - static double switchsens=0.0,switchsensorg=0.0; - double umin,umax,sumalpha; - long imin=0,imax=0; - static long switchnum=0; + static double switchsens = 0.0, switchsensorg = 0.0; + double umin, umax, sumalpha; + long imin = 0, imax = 0; + static long switchnum = 0; - switchsens/=1.2; + switchsens /= 1.2; /* assumes that lin[] is up to date -> no inactive vars */ - orgpos=0; - orgneg=0; - newpos=0; - newneg=0; - nolabel=0; - allunlab=0; - for(i=0;i 0) { - orgpos++; + orgpos = 0; + orgneg = 0; + newpos = 0; + newneg = 0; + nolabel = 0; + allunlab = 0; + for (i = 0; i < totdoc; i++) { + if (!unlabeled[i]) { + if (label[i] > 0) { + orgpos++; + } else { + orgneg++; } - else { - orgneg++; - } - } - else { + } else { allunlab++; - if(unlabeled[i]) { - if(label[i] > 0) { - newpos++; - } - else if(label[i] < 0) { - newneg++; - } + if (unlabeled[i]) { + if (label[i] > 0) { + newpos++; + } else if (label[i] < 0) { + newneg++; + } } } - if(label[i]==0) { + if (label[i] == 0) { nolabel++; } } - if(learn_parm->transduction_posratio >= 0) { - posratio=learn_parm->transduction_posratio; - } - else { - posratio=(double)orgpos/(double)(orgpos+orgneg); /* use ratio of pos/neg */ - } /* in training data */ - negratio=1.0-posratio; + if (learn_parm->transduction_posratio >= 0) { + posratio = learn_parm->transduction_posratio; + } else { + posratio = + (double)orgpos / (double)(orgpos + orgneg); /* use ratio of pos/neg */ + } /* in training data */ + negratio = 1.0 - posratio; - learn_parm->svm_costratio=1.0; /* global */ - if(posratio>0) { - learn_parm->svm_costratio_unlab=negratio/posratio; - } - else { - learn_parm->svm_costratio_unlab=1.0; - } - - pos=0; - neg=0; - upos=0; - uneg=0; - for(i=0;ib); /* 'distance' from hyperplane*/ - if(dist>0) { + learn_parm->svm_costratio = 1.0; /* global */ + if (posratio > 0) { + learn_parm->svm_costratio_unlab = negratio / posratio; + } else { + learn_parm->svm_costratio_unlab = 1.0; + } + + pos = 0; + neg = 0; + upos = 0; + uneg = 0; + for (i = 0; i < totdoc; i++) { + dist = (lin[i] - model->b); /* 'distance' from hyperplane*/ + if (dist > 0) { pos++; - } - else { + } else { neg++; } - if(unlabeled[i]) { - if(dist>0) { - upos++; - } - else { - uneg++; + if (unlabeled[i]) { + if (dist > 0) { + upos++; + } else { + uneg++; } } - if((!unlabeled[i]) && (a[i]>(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) { - /* printf("Ubounded %ld (class %ld, unlabeled %ld)\n",i,label[i],unlabeled[i]); */ + if ((!unlabeled[i]) && + (a[i] > (learn_parm->svm_cost[i] - learn_parm->epsilon_a))) { + /* printf("Ubounded %ld (class %ld, unlabeled + * %ld)\n",i,label[i],unlabeled[i]); */ } } - if(verbosity>=2) { - printf("POS=%ld, ORGPOS=%ld, ORGNEG=%ld\n",pos,orgpos,orgneg); - printf("POS=%ld, NEWPOS=%ld, NEWNEG=%ld\n",pos,newpos,newneg); - printf("pos ratio = %f (%f).\n",(double)(upos)/(double)(allunlab),posratio); + if (verbosity >= 2) { + printf("POS=%ld, ORGPOS=%ld, ORGNEG=%ld\n", pos, orgpos, orgneg); + printf("POS=%ld, NEWPOS=%ld, NEWNEG=%ld\n", pos, newpos, newneg); + printf("pos ratio = %f (%f).\n", (double)(upos) / (double)(allunlab), + posratio); fflush(stdout); } - if(transductcycle == 0) { - j1=0; - j2=0; - j4=0; - for(i=0;ib); /* 'distance' from hyperplane*/ - if((label[i]==0) && (unlabeled[i])) { - selcrit[j4]=dist; - key[j4]=i; - j4++; + if (transductcycle == 0) { + j1 = 0; + j2 = 0; + j4 = 0; + for (i = 0; i < totdoc; i++) { + dist = (lin[i] - model->b); /* 'distance' from hyperplane*/ + if ((label[i] == 0) && (unlabeled[i])) { + selcrit[j4] = dist; + key[j4] = i; + j4++; } } - unsupaddnum1=0; - unsupaddnum2=0; - select_top_n(selcrit,j4,select,(long)(allunlab*posratio+0.5)); - for(k=0;(k<(long)(allunlab*posratio+0.5));k++) { - i=key[select[k]]; - label[i]=1; - unsupaddnum1++; + unsupaddnum1 = 0; + unsupaddnum2 = 0; + select_top_n(selcrit, j4, select, (long)(allunlab * posratio + 0.5)); + for (k = 0; (k < (long)(allunlab * posratio + 0.5)); k++) { + i = key[select[k]]; + label[i] = 1; + unsupaddnum1++; j1++; } - for(i=0;isvm_cost[i]=learn_parm->svm_c* - learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound; - } - else if(label[i] == -1) { - learn_parm->svm_cost[i]=learn_parm->svm_c* - learn_parm->svm_unlabbound; - } + for (i = 0; i < totdoc; i++) { /* set upper bounds on vars */ + if (unlabeled[i]) { + if (label[i] == 1) { + learn_parm->svm_cost[i] = learn_parm->svm_c * + learn_parm->svm_costratio_unlab * + learn_parm->svm_unlabbound; + } else if (label[i] == -1) { + learn_parm->svm_cost[i] = + learn_parm->svm_c * learn_parm->svm_unlabbound; + } } } - if(verbosity>=1) { - /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n", - learn_parm->svm_costratio,learn_parm->svm_costratio_unlab, - learn_parm->svm_unlabbound); */ - printf("Classifying unlabeled data as %ld POS / %ld NEG.\n", - unsupaddnum1,unsupaddnum2); + if (verbosity >= 1) { + /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n", + learn_parm->svm_costratio,learn_parm->svm_costratio_unlab, + learn_parm->svm_unlabbound); */ + printf("Classifying unlabeled data as %ld POS / %ld NEG.\n", unsupaddnum1, + unsupaddnum2); fflush(stdout); } - if(verbosity >= 1) + if (verbosity >= 1) printf("Retraining."); - if(verbosity >= 2) printf("\n"); - return((long)3); + if (verbosity >= 2) + printf("\n"); + return ((long)3); } - if((transductcycle % check_every) == 0) { - if(verbosity >= 1) + if ((transductcycle % check_every) == 0) { + if (verbosity >= 1) printf("Retraining."); - if(verbosity >= 2) printf("\n"); - j1=0; - j2=0; - unsupaddnum1=0; - unsupaddnum2=0; - for(i=0;i= 2) + printf("\n"); + j1 = 0; + j2 = 0; + unsupaddnum1 = 0; + unsupaddnum2 = 0; + for (i = 0; i < totdoc; i++) { + if ((unlabeled[i] == 2)) { + unlabeled[i] = 1; + label[i] = 1; + j1++; + unsupaddnum1++; + } else if ((unlabeled[i] == 3)) { + unlabeled[i] = 1; + label[i] = -1; + j2++; + unsupaddnum2++; } } - for(i=0;isvm_cost[i]=learn_parm->svm_c* - learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound; - } - else if(label[i] == -1) { - learn_parm->svm_cost[i]=learn_parm->svm_c* - learn_parm->svm_unlabbound; - } + for (i = 0; i < totdoc; i++) { /* set upper bounds on vars */ + if (unlabeled[i]) { + if (label[i] == 1) { + learn_parm->svm_cost[i] = learn_parm->svm_c * + learn_parm->svm_costratio_unlab * + learn_parm->svm_unlabbound; + } else if (label[i] == -1) { + learn_parm->svm_cost[i] = + learn_parm->svm_c * learn_parm->svm_unlabbound; + } } } - if(verbosity>=2) { - /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n", - learn_parm->svm_costratio,learn_parm->svm_costratio_unlab, - learn_parm->svm_unlabbound); */ - printf("%ld positive -> Added %ld POS / %ld NEG unlabeled examples.\n", - upos,unsupaddnum1,unsupaddnum2); + if (verbosity >= 2) { + /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n", + learn_parm->svm_costratio,learn_parm->svm_costratio_unlab, + learn_parm->svm_unlabbound); */ + printf("%ld positive -> Added %ld POS / %ld NEG unlabeled examples.\n", + upos, unsupaddnum1, unsupaddnum2); fflush(stdout); } - if(learn_parm->svm_unlabbound == 1) { - learn_parm->epsilon_crit=0.001; /* do the last run right */ - } - else { - learn_parm->epsilon_crit=0.01; /* otherwise, no need to be so picky */ + if (learn_parm->svm_unlabbound == 1) { + learn_parm->epsilon_crit = 0.001; /* do the last run right */ + } else { + learn_parm->epsilon_crit = 0.01; /* otherwise, no need to be so picky */ } - return((long)3); - } - else if(((transductcycle % check_every) < check_every)) { - model_length=0; - sumalpha=0; - loss=0; - for(i=0;ib); /* 'distance' from hyperplane*/ - if((label[i]*dist)<(1.0-learn_parm->epsilon_crit)) { - loss+=(1.0-(label[i]*dist))*learn_parm->svm_cost[i]; + return ((long)3); + } else if (((transductcycle % check_every) < check_every)) { + model_length = 0; + sumalpha = 0; + loss = 0; + for (i = 0; i < totdoc; i++) { + model_length += a[i] * label[i] * lin[i]; + sumalpha += a[i]; + dist = (lin[i] - model->b); /* 'distance' from hyperplane*/ + if ((label[i] * dist) < (1.0 - learn_parm->epsilon_crit)) { + loss += (1.0 - (label[i] * dist)) * learn_parm->svm_cost[i]; } } - model_length=sqrt(model_length); - if(verbosity>=2) { - printf("Model-length = %f (%f), loss = %f, objective = %f\n", - model_length,sumalpha,loss,loss+0.5*model_length*model_length); + model_length = sqrt(model_length); + if (verbosity >= 2) { + printf("Model-length = %f (%f), loss = %f, objective = %f\n", + model_length, sumalpha, loss, + loss + 0.5 * model_length * model_length); fflush(stdout); } - j1=0; - j2=0; - j3=0; - j4=0; - unsupaddnum1=0; - unsupaddnum2=0; - umin=99999; - umax=-99999; - j4=1; - while(j4) { - umin=99999; - umax=-99999; - for(i=0;(ib); - if((label[i]>0) && (unlabeled[i]) && (!inconsistent[i]) - && (distumax)) { - umax=dist; - imax=i; - } + j1 = 0; + j2 = 0; + j3 = 0; + j4 = 0; + unsupaddnum1 = 0; + unsupaddnum2 = 0; + umin = 99999; + umax = -99999; + j4 = 1; + while (j4) { + umin = 99999; + umax = -99999; + for (i = 0; (i < totdoc); i++) { + dist = (lin[i] - model->b); + if ((label[i] > 0) && (unlabeled[i]) && (!inconsistent[i]) && + (dist < umin)) { + umin = dist; + imin = i; + } + if ((label[i] < 0) && (unlabeled[i]) && (!inconsistent[i]) && + (dist > umax)) { + umax = dist; + imax = i; + } } - if((umin < (umax+switchsens-1E-4))) { - j1++; - j2++; - unsupaddnum1++; - unlabeled[imin]=3; - inconsistent[imin]=1; - unsupaddnum2++; - unlabeled[imax]=2; - inconsistent[imax]=1; - } - else - j4=0; - j4=0; + if ((umin < (umax + switchsens - 1E-4))) { + j1++; + j2++; + unsupaddnum1++; + unlabeled[imin] = 3; + inconsistent[imin] = 1; + unsupaddnum2++; + unlabeled[imax] = 2; + inconsistent[imax] = 1; + } else + j4 = 0; + j4 = 0; } - for(j=0;(j0) { - unlabeled[j]=2; - } - else if(label[j]<0) { - unlabeled[j]=3; - } - /* inconsistent[j]=1; */ - j3++; + for (j = 0; (j < totdoc); j++) { + if (unlabeled[j] && (!inconsistent[j])) { + if (label[j] > 0) { + unlabeled[j] = 2; + } else if (label[j] < 0) { + unlabeled[j] = 3; + } + /* inconsistent[j]=1; */ + j3++; } } - switchnum+=unsupaddnum1+unsupaddnum2; + switchnum += unsupaddnum1 + unsupaddnum2; /* stop and print out current margin printf("switchnum %ld %ld\n",switchnum,kernel_parm->poly_degree); @@ -2979,478 +3021,446 @@ long incorporate_unlabeled_examples(MODEL *model, long int *label, } */ - if((!unsupaddnum1) && (!unsupaddnum2)) { - if((learn_parm->svm_unlabbound>=1) && ((newpos+newneg) == allunlab)) { - for(j=0;(jpredfile,model,lin,a,unlabeled,label, - totdoc,learn_parm); - if(verbosity>=1) - printf("Number of switches: %ld\n",switchnum); - return((long)0); + if ((!unsupaddnum1) && (!unsupaddnum2)) { + if ((learn_parm->svm_unlabbound >= 1) && + ((newpos + newneg) == allunlab)) { + for (j = 0; (j < totdoc); j++) { + inconsistent[j] = 0; + if (unlabeled[j]) + unlabeled[j] = 1; + } + write_prediction(learn_parm->predfile, model, lin, a, unlabeled, label, + totdoc, learn_parm); + if (verbosity >= 1) + printf("Number of switches: %ld\n", switchnum); + return ((long)0); } - switchsens=switchsensorg; - learn_parm->svm_unlabbound*=1.5; - if(learn_parm->svm_unlabbound>1) { - learn_parm->svm_unlabbound=1; + switchsens = switchsensorg; + learn_parm->svm_unlabbound *= 1.5; + if (learn_parm->svm_unlabbound > 1) { + learn_parm->svm_unlabbound = 1; } - model->at_upper_bound=0; /* since upper bound increased */ - if(verbosity>=1) - printf("Increasing influence of unlabeled examples to %f%% .", - learn_parm->svm_unlabbound*100.0); - } - else if(verbosity>=1) { - printf("%ld positive -> Switching labels of %ld POS / %ld NEG unlabeled examples.", - upos,unsupaddnum1,unsupaddnum2); + model->at_upper_bound = 0; /* since upper bound increased */ + if (verbosity >= 1) + printf("Increasing influence of unlabeled examples to %f%% .", + learn_parm->svm_unlabbound * 100.0); + } else if (verbosity >= 1) { + printf("%ld positive -> Switching labels of %ld POS / %ld NEG unlabeled " + "examples.", + upos, unsupaddnum1, unsupaddnum2); fflush(stdout); } - if(verbosity >= 2) printf("\n"); - - learn_parm->epsilon_crit=0.5; /* don't need to be so picky */ + if (verbosity >= 2) + printf("\n"); + + learn_parm->epsilon_crit = 0.5; /* don't need to be so picky */ - for(i=0;isvm_cost[i]=learn_parm->svm_c* - learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound; - } - else if(label[i] == -1) { - learn_parm->svm_cost[i]=learn_parm->svm_c* - learn_parm->svm_unlabbound; - } + for (i = 0; i < totdoc; i++) { /* set upper bounds on vars */ + if (unlabeled[i]) { + if (label[i] == 1) { + learn_parm->svm_cost[i] = learn_parm->svm_c * + learn_parm->svm_costratio_unlab * + learn_parm->svm_unlabbound; + } else if (label[i] == -1) { + learn_parm->svm_cost[i] = + learn_parm->svm_c * learn_parm->svm_unlabbound; + } } } - return((long)2); + return ((long)2); } - return((long)0); + return ((long)0); } /*************************** Working set selection ***************************/ -long select_next_qp_subproblem_grad(long int *label, - long int *unlabeled, - double *a, double *lin, - double *c, long int totdoc, - long int qp_size, - LEARN_PARM *learn_parm, - long int *inconsistent, - long int *active2dnum, - long int *working2dnum, - double *selcrit, - long int *select, - KERNEL_CACHE *kernel_cache, - long int cache_only, - long int *key, long int *chosen) - /* Use the feasible direction approach to select the next - qp-subproblem (see chapter 'Selecting a good working set'). If - 'cache_only' is true, then the variables are selected only among - those for which the kernel evaluations are cached. */ +long select_next_qp_subproblem_grad( + long int *label, long int *unlabeled, double *a, double *lin, double *c, + long int totdoc, long int qp_size, LEARN_PARM *learn_parm, + long int *inconsistent, long int *active2dnum, long int *working2dnum, + double *selcrit, long int *select, KERNEL_CACHE *kernel_cache, + long int cache_only, long int *key, long int *chosen) +/* Use the feasible direction approach to select the next + qp-subproblem (see chapter 'Selecting a good working set'). If + 'cache_only' is true, then the variables are selected only among + those for which the kernel evaluations are cached. */ { - long choosenum,i,j,k,activedoc,inum,valid; + long choosenum, i, j, k, activedoc, inum, valid; double s; - for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */ - choosenum=0; - activedoc=0; - for(i=0;(j=active2dnum[i])>=0;i++) { - s=-label[j]; - if(kernel_cache && cache_only) - valid=(kernel_cache->index[j]>=0); + for (inum = 0; working2dnum[inum] >= 0; inum++) + ; /* find end of index */ + choosenum = 0; + activedoc = 0; + for (i = 0; (j = active2dnum[i]) >= 0; i++) { + s = -label[j]; + if (kernel_cache && cache_only) + valid = (kernel_cache->index[j] >= 0); else - valid=1; - if(valid - && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) - && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) - && (s>0))) - && (!chosen[j]) - && (label[j]) - && (!inconsistent[j])) - { - selcrit[activedoc]=(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]); - /* selcrit[activedoc]=(double)label[j]*(-1.0+(double)label[j]*lin[j]); */ - key[activedoc]=j; + valid = 1; + if (valid && (!((a[j] <= (0 + learn_parm->epsilon_a)) && (s < 0))) && + (!((a[j] >= (learn_parm->svm_cost[j] - learn_parm->epsilon_a)) && + (s > 0))) && + (!chosen[j]) && (label[j]) && (!inconsistent[j])) { + selcrit[activedoc] = + (double)label[j] * (learn_parm->eps - (double)label[j] * c[j] + + (double)label[j] * lin[j]); + /* selcrit[activedoc]=(double)label[j]*(-1.0+(double)label[j]*lin[j]); + */ + key[activedoc] = j; activedoc++; } } - select_top_n(selcrit,activedoc,select,(long)(qp_size/2)); - for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (kbiased_hyperplane || (selcrit[select[k]] > 0)) { */ - i=key[select[k]]; - chosen[i]=1; - working2dnum[inum+choosenum]=i; - choosenum+=1; - if(kernel_cache) - kernel_cache_touch(kernel_cache,i); /* make sure it does not get - kicked out of cache */ - /* } */ + select_top_n(selcrit, activedoc, select, (long)(qp_size / 2)); + for (k = 0; + (choosenum < (qp_size / 2)) && (k < (qp_size / 2)) && (k < activedoc); + k++) { + /* if(learn_parm->biased_hyperplane || (selcrit[select[k]] > 0)) { */ + i = key[select[k]]; + chosen[i] = 1; + working2dnum[inum + choosenum] = i; + choosenum += 1; + if (kernel_cache) + kernel_cache_touch(kernel_cache, i); /* make sure it does not get + kicked out of cache */ + /* } */ } - activedoc=0; - for(i=0;(j=active2dnum[i])>=0;i++) { - s=label[j]; - if(kernel_cache && cache_only) - valid=(kernel_cache->index[j]>=0); + activedoc = 0; + for (i = 0; (j = active2dnum[i]) >= 0; i++) { + s = label[j]; + if (kernel_cache && cache_only) + valid = (kernel_cache->index[j] >= 0); else - valid=1; - if(valid - && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) - && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) - && (s>0))) - && (!chosen[j]) - && (label[j]) - && (!inconsistent[j])) - { - selcrit[activedoc]=-(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]); - /* selcrit[activedoc]=-(double)(label[j]*(-1.0+(double)label[j]*lin[j])); */ - key[activedoc]=j; + valid = 1; + if (valid && (!((a[j] <= (0 + learn_parm->epsilon_a)) && (s < 0))) && + (!((a[j] >= (learn_parm->svm_cost[j] - learn_parm->epsilon_a)) && + (s > 0))) && + (!chosen[j]) && (label[j]) && (!inconsistent[j])) { + selcrit[activedoc] = + -(double)label[j] * (learn_parm->eps - (double)label[j] * c[j] + + (double)label[j] * lin[j]); + /* selcrit[activedoc]=-(double)(label[j]*(-1.0+(double)label[j]*lin[j])); + */ + key[activedoc] = j; activedoc++; } } - select_top_n(selcrit,activedoc,select,(long)(qp_size/2)); - for(k=0;(choosenumbiased_hyperplane || (selcrit[select[k]] > 0)) { */ - i=key[select[k]]; - chosen[i]=1; - working2dnum[inum+choosenum]=i; - choosenum+=1; - if(kernel_cache) - kernel_cache_touch(kernel_cache,i); /* make sure it does not get - kicked out of cache */ - /* } */ - } - working2dnum[inum+choosenum]=-1; /* complete index */ - return(choosenum); + select_top_n(selcrit, activedoc, select, (long)(qp_size / 2)); + for (k = 0; (choosenum < qp_size) && (k < (qp_size / 2)) && (k < activedoc); + k++) { + /* if(learn_parm->biased_hyperplane || (selcrit[select[k]] > 0)) { */ + i = key[select[k]]; + chosen[i] = 1; + working2dnum[inum + choosenum] = i; + choosenum += 1; + if (kernel_cache) + kernel_cache_touch(kernel_cache, i); /* make sure it does not get + kicked out of cache */ + /* } */ + } + working2dnum[inum + choosenum] = -1; /* complete index */ + return (choosenum); } -long select_next_qp_subproblem_rand(long int *label, - long int *unlabeled, - double *a, double *lin, - double *c, long int totdoc, - long int qp_size, - LEARN_PARM *learn_parm, - long int *inconsistent, - long int *active2dnum, - long int *working2dnum, - double *selcrit, - long int *select, - KERNEL_CACHE *kernel_cache, - long int *key, - long int *chosen, - long int iteration) +long select_next_qp_subproblem_rand( + long int *label, long int *unlabeled, double *a, double *lin, double *c, + long int totdoc, long int qp_size, LEARN_PARM *learn_parm, + long int *inconsistent, long int *active2dnum, long int *working2dnum, + double *selcrit, long int *select, KERNEL_CACHE *kernel_cache, + long int *key, long int *chosen, long int iteration) /* Use the feasible direction approach to select the next qp-subproblem (see section 'Selecting a good working set'). Chooses a feasible direction at (pseudo) random to help jump over numerical problem. */ { - long choosenum,i,j,k,activedoc,inum; + long choosenum, i, j, k, activedoc, inum; double s; - for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */ - choosenum=0; - activedoc=0; - for(i=0;(j=active2dnum[i])>=0;i++) { - s=-label[j]; - if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) - && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) - && (s>0))) - && (!inconsistent[j]) - && (label[j]) - && (!chosen[j])) { - selcrit[activedoc]=(j+iteration) % totdoc; - key[activedoc]=j; + for (inum = 0; working2dnum[inum] >= 0; inum++) + ; /* find end of index */ + choosenum = 0; + activedoc = 0; + for (i = 0; (j = active2dnum[i]) >= 0; i++) { + s = -label[j]; + if ((!((a[j] <= (0 + learn_parm->epsilon_a)) && (s < 0))) && + (!((a[j] >= (learn_parm->svm_cost[j] - learn_parm->epsilon_a)) && + (s > 0))) && + (!inconsistent[j]) && (label[j]) && (!chosen[j])) { + selcrit[activedoc] = (j + iteration) % totdoc; + key[activedoc] = j; activedoc++; } } - select_top_n(selcrit,activedoc,select,(long)(qp_size/2)); - for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k=0;i++) { - s=label[j]; - if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) - && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) - && (s>0))) - && (!inconsistent[j]) - && (label[j]) - && (!chosen[j])) { - selcrit[activedoc]=(j+iteration) % totdoc; - key[activedoc]=j; + activedoc = 0; + for (i = 0; (j = active2dnum[i]) >= 0; i++) { + s = label[j]; + if ((!((a[j] <= (0 + learn_parm->epsilon_a)) && (s < 0))) && + (!((a[j] >= (learn_parm->svm_cost[j] - learn_parm->epsilon_a)) && + (s > 0))) && + (!inconsistent[j]) && (label[j]) && (!chosen[j])) { + selcrit[activedoc] = (j + iteration) % totdoc; + key[activedoc] = j; activedoc++; } } - select_top_n(selcrit,activedoc,select,(long)(qp_size/2)); - for(k=0;(choosenum=0;ii++) { - ex_c=learn_parm->svm_c-learn_parm->epsilon_a; - if(alphaslack[docs[i]->slackid] >= ex_c) { - dist=(lin[i])*(double)label[i]+slack[docs[i]->slackid]; /* distance */ - target=-(learn_parm->eps-(double)label[i]*c[i]); /* rhs of constraint */ - if((a[i]>learn_parm->epsilon_a) && (dist > target)) { - if((dist-target)>maxdiff) { /* largest violation */ - maxdiff=dist-target; - maxdiffid=docs[i]->slackid; - } + maxdiff = 0; + maxdiffid = 0; + for (ii = 0; (i = active2dnum[ii]) >= 0; ii++) { + ex_c = learn_parm->svm_c - learn_parm->epsilon_a; + if (alphaslack[docs[i]->slackid] >= ex_c) { + dist = + (lin[i]) * (double)label[i] + slack[docs[i]->slackid]; /* distance */ + target = + -(learn_parm->eps - (double)label[i] * c[i]); /* rhs of constraint */ + if ((a[i] > learn_parm->epsilon_a) && (dist > target)) { + if ((dist - target) > maxdiff) { /* largest violation */ + maxdiff = dist - target; + maxdiffid = docs[i]->slackid; + } } } } - (*maxviol)=maxdiff; - return(maxdiffid); -} + (*maxviol) = maxdiff; + return (maxdiffid); +} + +void select_top_n(double *selcrit, long int range, long int *select, + long int n) { + register long i, j; - -void select_top_n(double *selcrit, long int range, long int *select, - long int n) -{ - register long i,j; - - for(i=0;(i=0;j--) { - if((j>0) && (selcrit[select[j-1]]= 0; j--) { + if ((j > 0) && (selcrit[select[j - 1]] < selcrit[i])) { + select[j] = select[j - 1]; + } else { + select[j] = i; + j = -1; } } } - if(n>0) { - for(i=n;iselcrit[select[n-1]]) { - for(j=n-1;j>=0;j--) { - if((j>0) && (selcrit[select[j-1]] 0) { + for (i = n; i < range; i++) { + if (selcrit[i] > selcrit[select[n - 1]]) { + for (j = n - 1; j >= 0; j--) { + if ((j > 0) && (selcrit[select[j - 1]] < selcrit[i])) { + select[j] = select[j - 1]; + } else { + select[j] = i; + j = -1; + } + } } } } -} - - +} + /******************************** Shrinking *********************************/ -void init_shrink_state(SHRINK_STATE *shrink_state, long int totdoc, - long int maxhistory) -{ +void init_shrink_state(SHRINK_STATE *shrink_state, long int totdoc, + long int maxhistory) { long i; - shrink_state->deactnum=0; - shrink_state->active = (long *)my_malloc(sizeof(long)*totdoc); - shrink_state->inactive_since = (long *)my_malloc(sizeof(long)*totdoc); - shrink_state->a_history = (double **)my_malloc(sizeof(double *)*maxhistory); - shrink_state->maxhistory=maxhistory; - shrink_state->last_lin = (double *)my_malloc(sizeof(double)*totdoc); - shrink_state->last_a = (double *)my_malloc(sizeof(double)*totdoc); + shrink_state->deactnum = 0; + shrink_state->active = (long *)my_malloc(sizeof(long) * totdoc); + shrink_state->inactive_since = (long *)my_malloc(sizeof(long) * totdoc); + shrink_state->a_history = (double **)my_malloc(sizeof(double *) * maxhistory); + shrink_state->maxhistory = maxhistory; + shrink_state->last_lin = (double *)my_malloc(sizeof(double) * totdoc); + shrink_state->last_a = (double *)my_malloc(sizeof(double) * totdoc); - for(i=0;iactive[i]=1; - shrink_state->inactive_since[i]=0; - shrink_state->last_a[i]=0; - shrink_state->last_lin[i]=0; + for (i = 0; i < totdoc; i++) { + shrink_state->active[i] = 1; + shrink_state->inactive_since[i] = 0; + shrink_state->last_a[i] = 0; + shrink_state->last_lin[i] = 0; } } -void shrink_state_cleanup(SHRINK_STATE *shrink_state) -{ +void shrink_state_cleanup(SHRINK_STATE *shrink_state) { free(shrink_state->active); free(shrink_state->inactive_since); - if(shrink_state->deactnum > 0) - free(shrink_state->a_history[shrink_state->deactnum-1]); + if (shrink_state->deactnum > 0) + free(shrink_state->a_history[shrink_state->deactnum - 1]); free(shrink_state->a_history); free(shrink_state->last_a); free(shrink_state->last_lin); } -long shrink_problem(DOC **docs, - LEARN_PARM *learn_parm, - SHRINK_STATE *shrink_state, - KERNEL_PARM *kernel_parm, - long int *active2dnum, - long int *last_suboptimal_at, - long int iteration, - long int totdoc, - long int minshrink, - double *a, - long int *inconsistent) - /* Shrink some variables away. Do the shrinking only if at least - minshrink variables can be removed. */ +long shrink_problem(DOC **docs, LEARN_PARM *learn_parm, + SHRINK_STATE *shrink_state, KERNEL_PARM *kernel_parm, + long int *active2dnum, long int *last_suboptimal_at, + long int iteration, long int totdoc, long int minshrink, + double *a, long int *inconsistent) +/* Shrink some variables away. Do the shrinking only if at least + minshrink variables can be removed. */ { - long i,ii,change,activenum,lastiter; - double *a_old; - - activenum=0; - change=0; - for(ii=0;active2dnum[ii]>=0;ii++) { - i=active2dnum[ii]; + long i, ii, change, activenum, lastiter; + double *a_old; + + activenum = 0; + change = 0; + for (ii = 0; active2dnum[ii] >= 0; ii++) { + i = active2dnum[ii]; activenum++; - if(0 && learn_parm->sharedslack) - lastiter=last_suboptimal_at[docs[i]->slackid]; + if (0 && learn_parm->sharedslack) + lastiter = last_suboptimal_at[docs[i]->slackid]; else - lastiter=last_suboptimal_at[i]; - if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink) - || (inconsistent[i])) { + lastiter = last_suboptimal_at[i]; + if (((iteration - lastiter) > learn_parm->svm_iter_to_shrink) || + (inconsistent[i])) { change++; } } - if((change>=minshrink) /* shrink only if sufficiently many candidates */ - && (shrink_state->deactnummaxhistory)) { /* and enough memory */ + if ((change >= minshrink) /* shrink only if sufficiently many candidates */ + && (shrink_state->deactnum < + shrink_state->maxhistory)) { /* and enough memory */ /* Shrink problem by removing those variables which are */ /* optimal at a bound for a minimum number of iterations */ - if(verbosity>=2) { - printf(" Shrinking..."); fflush(stdout); + if (verbosity >= 2) { + printf(" Shrinking..."); + fflush(stdout); } - if(kernel_parm->kernel_type != LINEAR) { /* non-linear case save alphas */ - a_old=(double *)my_malloc(sizeof(double)*totdoc); - shrink_state->a_history[shrink_state->deactnum]=a_old; - for(i=0;ikernel_type != LINEAR) { /* non-linear case save alphas */ + a_old = (double *)my_malloc(sizeof(double) * totdoc); + shrink_state->a_history[shrink_state->deactnum] = a_old; + for (i = 0; i < totdoc; i++) { + a_old[i] = a[i]; } } - for(ii=0;active2dnum[ii]>=0;ii++) { - i=active2dnum[ii]; - if(0 && learn_parm->sharedslack) - lastiter=last_suboptimal_at[docs[i]->slackid]; - else - lastiter=last_suboptimal_at[i]; - if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink) - || (inconsistent[i])) { - shrink_state->active[i]=0; - shrink_state->inactive_since[i]=shrink_state->deactnum; + for (ii = 0; active2dnum[ii] >= 0; ii++) { + i = active2dnum[ii]; + if (0 && learn_parm->sharedslack) + lastiter = last_suboptimal_at[docs[i]->slackid]; + else + lastiter = last_suboptimal_at[i]; + if (((iteration - lastiter) > learn_parm->svm_iter_to_shrink) || + (inconsistent[i])) { + shrink_state->active[i] = 0; + shrink_state->inactive_since[i] = shrink_state->deactnum; } } - activenum=compute_index(shrink_state->active,totdoc,active2dnum); + activenum = compute_index(shrink_state->active, totdoc, active2dnum); shrink_state->deactnum++; - if(kernel_parm->kernel_type == LINEAR) { - shrink_state->deactnum=0; + if (kernel_parm->kernel_type == LINEAR) { + shrink_state->deactnum = 0; } - if(verbosity>=2) { - printf("done.\n"); fflush(stdout); - printf(" Number of inactive variables = %ld\n",totdoc-activenum); + if (verbosity >= 2) { + printf("done.\n"); + fflush(stdout); + printf(" Number of inactive variables = %ld\n", totdoc - activenum); } } - return(activenum); -} - - -void reactivate_inactive_examples(long int *label, - long int *unlabeled, - double *a, - SHRINK_STATE *shrink_state, - double *lin, - double *c, - long int totdoc, - long int totwords, - long int iteration, - LEARN_PARM *learn_parm, - long int *inconsistent, - DOC **docs, - KERNEL_PARM *kernel_parm, - KERNEL_CACHE *kernel_cache, - MODEL *model, - CFLOAT *aicache, - double *weights, - double *maxdiff) - /* Make all variables active again which had been removed by - shrinking. */ - /* Computes lin for those variables from scratch. */ - /* WARNING: Assumes that array of weights is initialized to all zero - values for linear kernel! */ + return (activenum); +} + +void reactivate_inactive_examples( + long int *label, long int *unlabeled, double *a, SHRINK_STATE *shrink_state, + double *lin, double *c, long int totdoc, long int totwords, + long int iteration, LEARN_PARM *learn_parm, long int *inconsistent, + DOC **docs, KERNEL_PARM *kernel_parm, KERNEL_CACHE *kernel_cache, + MODEL *model, CFLOAT *aicache, double *weights, double *maxdiff) +/* Make all variables active again which had been removed by + shrinking. */ +/* Computes lin for those variables from scratch. */ +/* WARNING: Assumes that array of weights is initialized to all zero + values for linear kernel! */ { - register long i,j,ii,jj,t,*changed2dnum,*inactive2dnum; - long *changed,*inactive; - register double kernel_val,*a_old,dist; - double ex_c,target; + register long i, j, ii, jj, t, *changed2dnum, *inactive2dnum; + long *changed, *inactive; + register double kernel_val, *a_old, dist; + double ex_c, target; SVECTOR *f; - if(kernel_parm->kernel_type == LINEAR) { /* special linear case */ + if (kernel_parm->kernel_type == LINEAR) { /* special linear case */ /* clear_vector_n(weights,totwords); set weights to zero */ - a_old=shrink_state->last_a; - for(i=0;ifvec;f;f=f->next) - add_vector_ns(weights,f, - f->factor*((a[i]-a_old[i])*(double)label[i])); - a_old[i]=a[i]; + a_old = shrink_state->last_a; + for (i = 0; i < totdoc; i++) { + if (a[i] != a_old[i]) { + for (f = docs[i]->fvec; f; f = f->next) + add_vector_ns(weights, f, + f->factor * ((a[i] - a_old[i]) * (double)label[i])); + a_old[i] = a[i]; } } - for(i=0;iactive[i]) { - for(f=docs[i]->fvec;f;f=f->next) - lin[i]=shrink_state->last_lin[i]+f->factor*sprod_ns(weights,f); + for (i = 0; i < totdoc; i++) { + if (!shrink_state->active[i]) { + for (f = docs[i]->fvec; f; f = f->next) + lin[i] = shrink_state->last_lin[i] + f->factor * sprod_ns(weights, f); } - shrink_state->last_lin[i]=lin[i]; + shrink_state->last_lin[i] = lin[i]; } - for(i=0;ifvec;f;f=f->next) - mult_vector_ns(weights,f,0.0); /* set weights back to zero */ + for (i = 0; i < totdoc; i++) { + for (f = docs[i]->fvec; f; f = f->next) + mult_vector_ns(weights, f, 0.0); /* set weights back to zero */ } - } - else { - changed=(long *)my_malloc(sizeof(long)*totdoc); - changed2dnum=(long *)my_malloc(sizeof(long)*(totdoc+11)); - inactive=(long *)my_malloc(sizeof(long)*totdoc); - inactive2dnum=(long *)my_malloc(sizeof(long)*(totdoc+11)); - for(t=shrink_state->deactnum-1;(t>=0) && shrink_state->a_history[t];t--) { - if(verbosity>=2) { - printf("%ld..",t); fflush(stdout); + } else { + changed = (long *)my_malloc(sizeof(long) * totdoc); + changed2dnum = (long *)my_malloc(sizeof(long) * (totdoc + 11)); + inactive = (long *)my_malloc(sizeof(long) * totdoc); + inactive2dnum = (long *)my_malloc(sizeof(long) * (totdoc + 11)); + for (t = shrink_state->deactnum - 1; (t >= 0) && shrink_state->a_history[t]; + t--) { + if (verbosity >= 2) { + printf("%ld..", t); + fflush(stdout); } - a_old=shrink_state->a_history[t]; - for(i=0;iactive[i]) - && (shrink_state->inactive_since[i] == t)); - changed[i]= (a[i] != a_old[i]); + a_old = shrink_state->a_history[t]; + for (i = 0; i < totdoc; i++) { + inactive[i] = ((!shrink_state->active[i]) && + (shrink_state->inactive_since[i] == t)); + changed[i] = (a[i] != a_old[i]); } - compute_index(inactive,totdoc,inactive2dnum); - compute_index(changed,totdoc,changed2dnum); - - for(ii=0;(i=changed2dnum[ii])>=0;ii++) { - get_kernel_row(kernel_cache,docs,i,totdoc,inactive2dnum,aicache, - kernel_parm); - for(jj=0;(j=inactive2dnum[jj])>=0;jj++) { - kernel_val=aicache[j]; - lin[j]+=(((a[i]*kernel_val)-(a_old[i]*kernel_val))*(double)label[i]); - } + compute_index(inactive, totdoc, inactive2dnum); + compute_index(changed, totdoc, changed2dnum); + + for (ii = 0; (i = changed2dnum[ii]) >= 0; ii++) { + get_kernel_row(kernel_cache, docs, i, totdoc, inactive2dnum, aicache, + kernel_parm); + for (jj = 0; (j = inactive2dnum[jj]) >= 0; jj++) { + kernel_val = aicache[j]; + lin[j] += (((a[i] * kernel_val) - (a_old[i] * kernel_val)) * + (double)label[i]); + } } } free(changed); @@ -3458,255 +3468,249 @@ void reactivate_inactive_examples(long int *label, free(inactive); free(inactive2dnum); } - (*maxdiff)=0; - for(i=0;iinactive_since[i]=shrink_state->deactnum-1; - if(!inconsistent[i]) { - dist=(lin[i]-model->b)*(double)label[i]; - target=-(learn_parm->eps-(double)label[i]*c[i]); - ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; - if((a[i]>learn_parm->epsilon_a) && (dist > target)) { - if((dist-target)>(*maxdiff)) /* largest violation */ - (*maxdiff)=dist-target; + (*maxdiff) = 0; + for (i = 0; i < totdoc; i++) { + shrink_state->inactive_since[i] = shrink_state->deactnum - 1; + if (!inconsistent[i]) { + dist = (lin[i] - model->b) * (double)label[i]; + target = -(learn_parm->eps - (double)label[i] * c[i]); + ex_c = learn_parm->svm_cost[i] - learn_parm->epsilon_a; + if ((a[i] > learn_parm->epsilon_a) && (dist > target)) { + if ((dist - target) > (*maxdiff)) /* largest violation */ + (*maxdiff) = dist - target; + } else if ((a[i] < ex_c) && (dist < target)) { + if ((target - dist) > (*maxdiff)) /* largest violation */ + (*maxdiff) = target - dist; } - else if((a[i](*maxdiff)) /* largest violation */ - (*maxdiff)=target-dist; - } - if((a[i]>(0+learn_parm->epsilon_a)) - && (a[i]active[i]=1; /* not at bound */ - } - else if((a[i]<=(0+learn_parm->epsilon_a)) && (dist < (target+learn_parm->epsilon_shrink))) { - shrink_state->active[i]=1; - } - else if((a[i]>=ex_c) - && (dist > (target-learn_parm->epsilon_shrink))) { - shrink_state->active[i]=1; - } - else if(learn_parm->sharedslack) { /* make all active when sharedslack */ - shrink_state->active[i]=1; + if ((a[i] > (0 + learn_parm->epsilon_a)) && (a[i] < ex_c)) { + shrink_state->active[i] = 1; /* not at bound */ + } else if ((a[i] <= (0 + learn_parm->epsilon_a)) && + (dist < (target + learn_parm->epsilon_shrink))) { + shrink_state->active[i] = 1; + } else if ((a[i] >= ex_c) && + (dist > (target - learn_parm->epsilon_shrink))) { + shrink_state->active[i] = 1; + } else if (learn_parm + ->sharedslack) { /* make all active when sharedslack */ + shrink_state->active[i] = 1; } } } - if(kernel_parm->kernel_type != LINEAR) { /* update history for non-linear */ - for(i=0;ia_history[shrink_state->deactnum-1])[i]=a[i]; + if (kernel_parm->kernel_type != LINEAR) { /* update history for non-linear */ + for (i = 0; i < totdoc; i++) { + (shrink_state->a_history[shrink_state->deactnum - 1])[i] = a[i]; } - for(t=shrink_state->deactnum-2;(t>=0) && shrink_state->a_history[t];t--) { + for (t = shrink_state->deactnum - 2; (t >= 0) && shrink_state->a_history[t]; + t--) { free(shrink_state->a_history[t]); - shrink_state->a_history[t]=0; + shrink_state->a_history[t] = 0; } } } /****************************** Cache handling *******************************/ -void get_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs, - long int docnum, long int totdoc, - long int *active2dnum, CFLOAT *buffer, - KERNEL_PARM *kernel_parm) - /* Get's a row of the matrix of kernel values This matrix has the - same form as the Hessian, just that the elements are not - multiplied by */ - /* y_i * y_j * a_i * a_j */ - /* Takes the values from the cache if available. */ +void get_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs, long int docnum, + long int totdoc, long int *active2dnum, CFLOAT *buffer, + KERNEL_PARM *kernel_parm) +/* Get's a row of the matrix of kernel values This matrix has the + same form as the Hessian, just that the elements are not + multiplied by */ +/* y_i * y_j * a_i * a_j */ +/* Takes the values from the cache if available. */ { - register long i,j,start; + register long i, j, start; DOC *ex; - ex=docs[docnum]; + ex = docs[docnum]; - if(kernel_cache && (kernel_cache->index[docnum] != -1)) {/* row is cached? */ - kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time;/* lru */ - start=kernel_cache->activenum*kernel_cache->index[docnum]; - for(i=0;(j=active2dnum[i])>=0;i++) { - if(kernel_cache->totdoc2active[j] >= 0) { /* column is cached? */ - buffer[j]=kernel_cache->buffer[start+kernel_cache->totdoc2active[j]]; - } - else { - buffer[j]=(CFLOAT)kernel(kernel_parm,ex,docs[j]); + if (kernel_cache && + (kernel_cache->index[docnum] != -1)) { /* row is cached? */ + kernel_cache->lru[kernel_cache->index[docnum]] = + kernel_cache->time; /* lru */ + start = kernel_cache->activenum * kernel_cache->index[docnum]; + for (i = 0; (j = active2dnum[i]) >= 0; i++) { + if (kernel_cache->totdoc2active[j] >= 0) { /* column is cached? */ + buffer[j] = + kernel_cache->buffer[start + kernel_cache->totdoc2active[j]]; + } else { + buffer[j] = (CFLOAT)kernel(kernel_parm, ex, docs[j]); } } - } - else { - for(i=0;(j=active2dnum[i])>=0;i++) { - buffer[j]=(CFLOAT)kernel(kernel_parm,ex,docs[j]); + } else { + for (i = 0; (j = active2dnum[i]) >= 0; i++) { + buffer[j] = (CFLOAT)kernel(kernel_parm, ex, docs[j]); } } -} - - -void cache_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs, - long int m, KERNEL_PARM *kernel_parm) - /* Fills cache for the row m */ +} + +void cache_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs, long int m, + KERNEL_PARM *kernel_parm) +/* Fills cache for the row m */ { register DOC *ex; - register long j,k,l; + register long j, k, l; register CFLOAT *cache; - if(!kernel_cache_check(kernel_cache,m)) { /* not cached yet*/ - cache = kernel_cache_clean_and_malloc(kernel_cache,m); - if(cache) { - l=kernel_cache->totdoc2active[m]; - ex=docs[m]; - for(j=0;jactivenum;j++) { /* fill cache */ - k=kernel_cache->active2totdoc[j]; - if((kernel_cache->index[k] != -1) && (l != -1) && (k != m)) { - cache[j]=kernel_cache->buffer[kernel_cache->activenum - *kernel_cache->index[k]+l]; - } - else { - cache[j]=kernel(kernel_parm,ex,docs[k]); - } + if (!kernel_cache_check(kernel_cache, m)) { /* not cached yet*/ + cache = kernel_cache_clean_and_malloc(kernel_cache, m); + if (cache) { + l = kernel_cache->totdoc2active[m]; + ex = docs[m]; + for (j = 0; j < kernel_cache->activenum; j++) { /* fill cache */ + k = kernel_cache->active2totdoc[j]; + if ((kernel_cache->index[k] != -1) && (l != -1) && (k != m)) { + cache[j] = + kernel_cache + ->buffer[kernel_cache->activenum * kernel_cache->index[k] + + l]; + } else { + cache[j] = kernel(kernel_parm, ex, docs[k]); + } } - } - else { + } else { perror("Error: Kernel cache full! => increase cache size"); } } -} - - -void cache_multiple_kernel_rows(KERNEL_CACHE *kernel_cache, DOC **docs, - long int *key, long int varnum, - KERNEL_PARM *kernel_parm) - /* Fills cache for the rows in key */ +} + +void cache_multiple_kernel_rows(KERNEL_CACHE *kernel_cache, DOC **docs, + long int *key, long int varnum, + KERNEL_PARM *kernel_parm) +/* Fills cache for the rows in key */ { register long i; - for(i=0;i=2) { - printf(" Reorganizing cache..."); fflush(stdout); + if (verbosity >= 2) { + printf(" Reorganizing cache..."); + fflush(stdout); } - keep=(long *)my_malloc(sizeof(long)*totdoc); - for(j=0;jactivenum) && (scountactive2totdoc[jj]; - if(!after[j]) { + scount = 0; + for (jj = 0; (jj < kernel_cache->activenum) && (scount < numshrink); jj++) { + j = kernel_cache->active2totdoc[jj]; + if (!after[j]) { scount++; - keep[j]=0; + keep[j] = 0; } } - for(i=0;imax_elems;i++) { - for(jj=0;jjactivenum;jj++) { - j=kernel_cache->active2totdoc[jj]; - if(!keep[j]) { - from++; - } - else { - kernel_cache->buffer[to]=kernel_cache->buffer[from]; - to++; - from++; + for (i = 0; i < kernel_cache->max_elems; i++) { + for (jj = 0; jj < kernel_cache->activenum; jj++) { + j = kernel_cache->active2totdoc[jj]; + if (!keep[j]) { + from++; + } else { + kernel_cache->buffer[to] = kernel_cache->buffer[from]; + to++; + from++; } } } - kernel_cache->activenum=0; - for(j=0;jtotdoc2active[j] != -1)) { - kernel_cache->active2totdoc[kernel_cache->activenum]=j; - kernel_cache->totdoc2active[j]=kernel_cache->activenum; + kernel_cache->activenum = 0; + for (j = 0; j < totdoc; j++) { + if ((keep[j]) && (kernel_cache->totdoc2active[j] != -1)) { + kernel_cache->active2totdoc[kernel_cache->activenum] = j; + kernel_cache->totdoc2active[j] = kernel_cache->activenum; kernel_cache->activenum++; - } - else { - kernel_cache->totdoc2active[j]=-1; + } else { + kernel_cache->totdoc2active[j] = -1; } } - kernel_cache->max_elems=(long)(kernel_cache->buffsize/kernel_cache->activenum); - if(kernel_cache->max_elems>totdoc) { - kernel_cache->max_elems=totdoc; + kernel_cache->max_elems = + (long)(kernel_cache->buffsize / kernel_cache->activenum); + if (kernel_cache->max_elems > totdoc) { + kernel_cache->max_elems = totdoc; } free(keep); - if(verbosity>=2) { - printf("done.\n"); fflush(stdout); - printf(" Cache-size in rows = %ld\n",kernel_cache->max_elems); + if (verbosity >= 2) { + printf("done.\n"); + fflush(stdout); + printf(" Cache-size in rows = %ld\n", kernel_cache->max_elems); } } -KERNEL_CACHE *kernel_cache_init(long int totdoc, long int buffsize) -{ +KERNEL_CACHE *kernel_cache_init(long int totdoc, long int buffsize) { long i; KERNEL_CACHE *kernel_cache; - kernel_cache=(KERNEL_CACHE *)my_malloc(sizeof(KERNEL_CACHE)); - kernel_cache->index = (long *)my_malloc(sizeof(long)*totdoc); - kernel_cache->occu = (long *)my_malloc(sizeof(long)*totdoc); - kernel_cache->lru = (long *)my_malloc(sizeof(long)*totdoc); - kernel_cache->invindex = (long *)my_malloc(sizeof(long)*totdoc); - kernel_cache->active2totdoc = (long *)my_malloc(sizeof(long)*totdoc); - kernel_cache->totdoc2active = (long *)my_malloc(sizeof(long)*totdoc); - kernel_cache->buffer = (CFLOAT *)my_malloc((size_t)(buffsize)*1024*1024); + kernel_cache = (KERNEL_CACHE *)my_malloc(sizeof(KERNEL_CACHE)); + kernel_cache->index = (long *)my_malloc(sizeof(long) * totdoc); + kernel_cache->occu = (long *)my_malloc(sizeof(long) * totdoc); + kernel_cache->lru = (long *)my_malloc(sizeof(long) * totdoc); + kernel_cache->invindex = (long *)my_malloc(sizeof(long) * totdoc); + kernel_cache->active2totdoc = (long *)my_malloc(sizeof(long) * totdoc); + kernel_cache->totdoc2active = (long *)my_malloc(sizeof(long) * totdoc); + kernel_cache->buffer = (CFLOAT *)my_malloc((size_t)(buffsize)*1024 * 1024); - kernel_cache->buffsize=(long)(buffsize/sizeof(CFLOAT)*1024*1024); + kernel_cache->buffsize = (long)(buffsize / sizeof(CFLOAT) * 1024 * 1024); - kernel_cache->max_elems=(long)(kernel_cache->buffsize/totdoc); - if(kernel_cache->max_elems>totdoc) { - kernel_cache->max_elems=totdoc; + kernel_cache->max_elems = (long)(kernel_cache->buffsize / totdoc); + if (kernel_cache->max_elems > totdoc) { + kernel_cache->max_elems = totdoc; } - if(verbosity>=2) { - printf(" Cache-size in rows = %ld\n",kernel_cache->max_elems); - printf(" Kernel evals so far: %ld\n",kernel_cache_statistic); + if (verbosity >= 2) { + printf(" Cache-size in rows = %ld\n", kernel_cache->max_elems); + printf(" Kernel evals so far: %ld\n", kernel_cache_statistic); } - kernel_cache->elems=0; /* initialize cache */ - for(i=0;iindex[i]=-1; - kernel_cache->lru[i]=0; + kernel_cache->elems = 0; /* initialize cache */ + for (i = 0; i < totdoc; i++) { + kernel_cache->index[i] = -1; + kernel_cache->lru[i] = 0; } - for(i=0;ioccu[i]=0; - kernel_cache->invindex[i]=-1; + for (i = 0; i < totdoc; i++) { + kernel_cache->occu[i] = 0; + kernel_cache->invindex[i] = -1; } - kernel_cache->activenum=totdoc;; - for(i=0;iactive2totdoc[i]=i; - kernel_cache->totdoc2active[i]=i; + kernel_cache->activenum = totdoc; + ; + for (i = 0; i < totdoc; i++) { + kernel_cache->active2totdoc[i] = i; + kernel_cache->totdoc2active[i] = i; } - kernel_cache->time=0; - - return(kernel_cache); -} - -void kernel_cache_reset_lru(KERNEL_CACHE *kernel_cache) -{ - long maxlru=0,k; - - for(k=0;kmax_elems;k++) { - if(maxlru < kernel_cache->lru[k]) - maxlru=kernel_cache->lru[k]; + kernel_cache->time = 0; + + return (kernel_cache); +} + +void kernel_cache_reset_lru(KERNEL_CACHE *kernel_cache) { + long maxlru = 0, k; + + for (k = 0; k < kernel_cache->max_elems; k++) { + if (maxlru < kernel_cache->lru[k]) + maxlru = kernel_cache->lru[k]; } - for(k=0;kmax_elems;k++) { - kernel_cache->lru[k]-=maxlru; + for (k = 0; k < kernel_cache->max_elems; k++) { + kernel_cache->lru[k] -= maxlru; } } -void kernel_cache_cleanup(KERNEL_CACHE *kernel_cache) -{ +void kernel_cache_cleanup(KERNEL_CACHE *kernel_cache) { free(kernel_cache->index); free(kernel_cache->occu); free(kernel_cache->lru); @@ -3717,500 +3721,503 @@ void kernel_cache_cleanup(KERNEL_CACHE *kernel_cache) free(kernel_cache); } -long kernel_cache_malloc(KERNEL_CACHE *kernel_cache) -{ +long kernel_cache_malloc(KERNEL_CACHE *kernel_cache) { long i; - if(kernel_cache_space_available(kernel_cache)) { - for(i=0;imax_elems;i++) { - if(!kernel_cache->occu[i]) { - kernel_cache->occu[i]=1; - kernel_cache->elems++; - return(i); + if (kernel_cache_space_available(kernel_cache)) { + for (i = 0; i < kernel_cache->max_elems; i++) { + if (!kernel_cache->occu[i]) { + kernel_cache->occu[i] = 1; + kernel_cache->elems++; + return (i); } } } - return(-1); + return (-1); } -void kernel_cache_free(KERNEL_CACHE *kernel_cache, long int i) -{ - kernel_cache->occu[i]=0; +void kernel_cache_free(KERNEL_CACHE *kernel_cache, long int i) { + kernel_cache->occu[i] = 0; kernel_cache->elems--; } -long kernel_cache_free_lru(KERNEL_CACHE *kernel_cache) - /* remove least recently used cache element */ -{ - register long k,least_elem=-1,least_time; +long kernel_cache_free_lru(KERNEL_CACHE *kernel_cache) +/* remove least recently used cache element */ +{ + register long k, least_elem = -1, least_time; - least_time=kernel_cache->time+1; - for(k=0;kmax_elems;k++) { - if(kernel_cache->invindex[k] != -1) { - if(kernel_cache->lru[k]lru[k]; - least_elem=k; + least_time = kernel_cache->time + 1; + for (k = 0; k < kernel_cache->max_elems; k++) { + if (kernel_cache->invindex[k] != -1) { + if (kernel_cache->lru[k] < least_time) { + least_time = kernel_cache->lru[k]; + least_elem = k; } } } - if(least_elem != -1) { - kernel_cache_free(kernel_cache,least_elem); - kernel_cache->index[kernel_cache->invindex[least_elem]]=-1; - kernel_cache->invindex[least_elem]=-1; - return(1); + if (least_elem != -1) { + kernel_cache_free(kernel_cache, least_elem); + kernel_cache->index[kernel_cache->invindex[least_elem]] = -1; + kernel_cache->invindex[least_elem] = -1; + return (1); } - return(0); -} - - -CFLOAT *kernel_cache_clean_and_malloc(KERNEL_CACHE *kernel_cache, - long int docnum) - /* Get a free cache entry. In case cache is full, the lru element - is removed. */ + return (0); +} + +CFLOAT *kernel_cache_clean_and_malloc(KERNEL_CACHE *kernel_cache, + long int docnum) +/* Get a free cache entry. In case cache is full, the lru element + is removed. */ { long result; - if((result = kernel_cache_malloc(kernel_cache)) == -1) { - if(kernel_cache_free_lru(kernel_cache)) { + if ((result = kernel_cache_malloc(kernel_cache)) == -1) { + if (kernel_cache_free_lru(kernel_cache)) { result = kernel_cache_malloc(kernel_cache); } } - kernel_cache->index[docnum]=result; - if(result == -1) { - return(0); + kernel_cache->index[docnum] = result; + if (result == -1) { + return (0); } - kernel_cache->invindex[result]=docnum; - kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */ - return((CFLOAT *)((long)kernel_cache->buffer - +(kernel_cache->activenum*sizeof(CFLOAT)* - kernel_cache->index[docnum]))); + kernel_cache->invindex[result] = docnum; + kernel_cache->lru[kernel_cache->index[docnum]] = kernel_cache->time; /* lru */ + return ((CFLOAT *)((long)kernel_cache->buffer + + (kernel_cache->activenum * sizeof(CFLOAT) * + kernel_cache->index[docnum]))); } -long kernel_cache_touch(KERNEL_CACHE *kernel_cache, long int docnum) - /* Update lru time to avoid removal from cache. */ +long kernel_cache_touch(KERNEL_CACHE *kernel_cache, long int docnum) +/* Update lru time to avoid removal from cache. */ { - if(kernel_cache && kernel_cache->index[docnum] != -1) { - kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */ - return(1); + if (kernel_cache && kernel_cache->index[docnum] != -1) { + kernel_cache->lru[kernel_cache->index[docnum]] = + kernel_cache->time; /* lru */ + return (1); } - return(0); -} - -long kernel_cache_check(KERNEL_CACHE *kernel_cache, long int docnum) - /* Is that row cached? */ + return (0); +} + +long kernel_cache_check(KERNEL_CACHE *kernel_cache, long int docnum) +/* Is that row cached? */ { - return(kernel_cache->index[docnum] != -1); -} - -long kernel_cache_space_available(KERNEL_CACHE *kernel_cache) - /* Is there room for one more row? */ + return (kernel_cache->index[docnum] != -1); +} + +long kernel_cache_space_available(KERNEL_CACHE *kernel_cache) +/* Is there room for one more row? */ { - return(kernel_cache->elems < kernel_cache->max_elems); -} - + return (kernel_cache->elems < kernel_cache->max_elems); +} + /************************** Compute estimates ******************************/ -void compute_xa_estimates(MODEL *model, long int *label, - long int *unlabeled, long int totdoc, - DOC **docs, double *lin, double *a, - KERNEL_PARM *kernel_parm, - LEARN_PARM *learn_parm, double *error, - double *recall, double *precision) - /* Computes xa-estimate of error rate, recall, and precision. See - T. Joachims, Estimating the Generalization Performance of an SVM - Efficiently, IMCL, 2000. */ +void compute_xa_estimates(MODEL *model, long int *label, long int *unlabeled, + long int totdoc, DOC **docs, double *lin, double *a, + KERNEL_PARM *kernel_parm, LEARN_PARM *learn_parm, + double *error, double *recall, double *precision) +/* Computes xa-estimate of error rate, recall, and precision. See + T. Joachims, Estimating the Generalization Performance of an SVM + Efficiently, IMCL, 2000. */ { - long i,looerror,looposerror,loonegerror; - long totex,totposex; - double xi,r_delta,r_delta_sq,sim=0; - long *sv2dnum=NULL,*sv=NULL,svnum; + long i, looerror, looposerror, loonegerror; + long totex, totposex; + double xi, r_delta, r_delta_sq, sim = 0; + long *sv2dnum = NULL, *sv = NULL, svnum; - r_delta=estimate_r_delta(docs,totdoc,kernel_parm); - r_delta_sq=r_delta*r_delta; + r_delta = estimate_r_delta(docs, totdoc, kernel_parm); + r_delta_sq = r_delta * r_delta; - looerror=0; - looposerror=0; - loonegerror=0; - totex=0; - totposex=0; - svnum=0; + looerror = 0; + looposerror = 0; + loonegerror = 0; + totex = 0; + totposex = 0; + svnum = 0; - if(learn_parm->xa_depth > 0) { - sv = (long *)my_malloc(sizeof(long)*(totdoc+11)); - for(i=0;isv_num;i++) - if(a[model->supvec[i]->docnum] - < (learn_parm->svm_cost[model->supvec[i]->docnum] - -learn_parm->epsilon_a)) { - sv[model->supvec[i]->docnum]=1; - svnum++; + if (learn_parm->xa_depth > 0) { + sv = (long *)my_malloc(sizeof(long) * (totdoc + 11)); + for (i = 0; i < totdoc; i++) + sv[i] = 0; + for (i = 1; i < model->sv_num; i++) + if (a[model->supvec[i]->docnum] < + (learn_parm->svm_cost[model->supvec[i]->docnum] - + learn_parm->epsilon_a)) { + sv[model->supvec[i]->docnum] = 1; + svnum++; } - sv2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); + sv2dnum = (long *)my_malloc(sizeof(long) * (totdoc + 11)); clear_index(sv2dnum); - compute_index(sv,totdoc,sv2dnum); + compute_index(sv, totdoc, sv2dnum); } - for(i=0;ib)*(double)label[i]); - if(xi<0) xi=0; - if(label[i]>0) { - totposex++; + } else { + xi = 1.0 - ((lin[i] - model->b) * (double)label[i]); + if (xi < 0) + xi = 0; + if (label[i] > 0) { + totposex++; } - if((learn_parm->rho*a[i]*r_delta_sq+xi) >= 1.0) { - if(learn_parm->xa_depth > 0) { /* makes assumptions */ - sim=distribute_alpha_t_greedily(sv2dnum,svnum,docs,a,i,label, - kernel_parm,learn_parm, - (double)((1.0-xi-a[i]*r_delta_sq)/(2.0*a[i]))); - } - if((learn_parm->xa_depth == 0) || - ((a[i]*kernel(kernel_parm,docs[i],docs[i])+a[i]*2.0*sim+xi) >= 1.0)) { - looerror++; - if(label[i]>0) { - looposerror++; - } - else { - loonegerror++; - } - } + if ((learn_parm->rho * a[i] * r_delta_sq + xi) >= 1.0) { + if (learn_parm->xa_depth > 0) { /* makes assumptions */ + sim = distribute_alpha_t_greedily( + sv2dnum, svnum, docs, a, i, label, kernel_parm, learn_parm, + (double)((1.0 - xi - a[i] * r_delta_sq) / (2.0 * a[i]))); + } + if ((learn_parm->xa_depth == 0) || + ((a[i] * kernel(kernel_parm, docs[i], docs[i]) + a[i] * 2.0 * sim + + xi) >= 1.0)) { + looerror++; + if (label[i] > 0) { + looposerror++; + } else { + loonegerror++; + } + } } totex++; } } - (*error)=((double)looerror/(double)totex)*100.0; - (*recall)=(1.0-(double)looposerror/(double)totposex)*100.0; - (*precision)=(((double)totposex-(double)looposerror) - /((double)totposex-(double)looposerror+(double)loonegerror))*100.0; + (*error) = ((double)looerror / (double)totex) * 100.0; + (*recall) = (1.0 - (double)looposerror / (double)totposex) * 100.0; + (*precision) = + (((double)totposex - (double)looposerror) / + ((double)totposex - (double)looposerror + (double)loonegerror)) * + 100.0; free(sv); free(sv2dnum); -} - - -double distribute_alpha_t_greedily(long int *sv2dnum, long int svnum, - DOC **docs, double *a, - long int docnum, - long int *label, - KERNEL_PARM *kernel_parm, - LEARN_PARM *learn_parm, double thresh) - /* Experimental Code improving plain XiAlpha Estimates by - computing a better bound using a greedy optimzation strategy. */ +} + +double distribute_alpha_t_greedily(long int *sv2dnum, long int svnum, + DOC **docs, double *a, long int docnum, + long int *label, KERNEL_PARM *kernel_parm, + LEARN_PARM *learn_parm, double thresh) +/* Experimental Code improving plain XiAlpha Estimates by + computing a better bound using a greedy optimzation strategy. */ { - long best_depth=0; - long i,j,k,d,skip,allskip; - double best,best_val[101],val,init_val_sq,init_val_lin; + long best_depth = 0; + long i, j, k, d, skip, allskip; + double best, best_val[101], val, init_val_sq, init_val_lin; long best_ex[101]; - CFLOAT *cache,*trow; + CFLOAT *cache, *trow; - cache=(CFLOAT *)my_malloc(sizeof(CFLOAT)*learn_parm->xa_depth*svnum); - trow = (CFLOAT *)my_malloc(sizeof(CFLOAT)*svnum); + cache = (CFLOAT *)my_malloc(sizeof(CFLOAT) * learn_parm->xa_depth * svnum); + trow = (CFLOAT *)my_malloc(sizeof(CFLOAT) * svnum); - for(k=0;kxa_depth;d++) { - allskip=1; - if(d>=1) { - init_val_sq+=cache[best_ex[d-1]+svnum*(d-1)]; - for(k=0;kxa_depth; d++) { + allskip = 1; + if (d >= 1) { + init_val_sq += cache[best_ex[d - 1] + svnum * (d - 1)]; + for (k = 0; k < d - 1; k++) { + init_val_sq += 2.0 * cache[best_ex[k] + svnum * (d - 1)]; } - init_val_lin+=trow[best_ex[d-1]]; + init_val_lin += trow[best_ex[d - 1]]; } - for(i=0;ixa_depth; + if (allskip || (best < thresh)) { + d = learn_parm->xa_depth; } - } - + } + free(cache); free(trow); /* printf("Distribute[%ld](%ld)=%f, ",docnum,best_depth,best); */ - return(best); -} - - -void estimate_transduction_quality(MODEL *model, long int *label, - long int *unlabeled, - long int totdoc, DOC **docs, double *lin) - /* Loo-bound based on observation that loo-errors must have an - equal distribution in both training and test examples, given - that the test examples are classified correctly. Compare - chapter "Constraints on the Transductive Hyperplane" in my - Dissertation. */ + return (best); +} + +void estimate_transduction_quality(MODEL *model, long int *label, + long int *unlabeled, long int totdoc, + DOC **docs, double *lin) +/* Loo-bound based on observation that loo-errors must have an + equal distribution in both training and test examples, given + that the test examples are classified correctly. Compare + chapter "Constraints on the Transductive Hyperplane" in my + Dissertation. */ { - long i,j,l=0,ulab=0,lab=0,labpos=0,labneg=0,ulabpos=0,ulabneg=0,totulab=0; - double totlab=0,totlabpos=0,totlabneg=0,labsum=0,ulabsum=0; - double r_delta,r_delta_sq,xi,xisum=0,asum=0; + long i, j, l = 0, ulab = 0, lab = 0, labpos = 0, labneg = 0, ulabpos = 0, + ulabneg = 0, totulab = 0; + double totlab = 0, totlabpos = 0, totlabneg = 0, labsum = 0, ulabsum = 0; + double r_delta, r_delta_sq, xi, xisum = 0, asum = 0; - r_delta=estimate_r_delta(docs,totdoc,&(model->kernel_parm)); - r_delta_sq=r_delta*r_delta; + r_delta = estimate_r_delta(docs, totdoc, &(model->kernel_parm)); + r_delta_sq = r_delta * r_delta; - for(j=0;j 0) - totlabpos++; - else - totlabneg++; + if (label[j] > 0) + totlabpos++; + else + totlabneg++; } } - for(j=1;jsv_num;j++) { - i=model->supvec[j]->docnum; - xi=1.0-((lin[i]-model->b)*(double)label[i]); - if(xi<0) xi=0; + for (j = 1; j < model->sv_num; j++) { + i = model->supvec[j]->docnum; + xi = 1.0 - ((lin[i] - model->b) * (double)label[i]); + if (xi < 0) + xi = 0; - xisum+=xi; - asum+=fabs(model->alpha[j]); - if(unlabeled[i]) { - ulabsum+=(fabs(model->alpha[j])*r_delta_sq+xi); + xisum += xi; + asum += fabs(model->alpha[j]); + if (unlabeled[i]) { + ulabsum += (fabs(model->alpha[j]) * r_delta_sq + xi); + } else { + labsum += (fabs(model->alpha[j]) * r_delta_sq + xi); } - else { - labsum+=(fabs(model->alpha[j])*r_delta_sq+xi); - } - if((fabs(model->alpha[j])*r_delta_sq+xi) >= 1) { + if ((fabs(model->alpha[j]) * r_delta_sq + xi) >= 1) { l++; - if(unlabeled[model->supvec[j]->docnum]) { - ulab++; - if(model->alpha[j] > 0) - ulabpos++; - else - ulabneg++; - } - else { - lab++; - if(model->alpha[j] > 0) - labpos++; - else - labneg++; + if (unlabeled[model->supvec[j]->docnum]) { + ulab++; + if (model->alpha[j] > 0) + ulabpos++; + else + ulabneg++; + } else { + lab++; + if (model->alpha[j] > 0) + labpos++; + else + labneg++; } } } - printf("xacrit>=1: labeledpos=%.5f labeledneg=%.5f default=%.5f\n",(double)labpos/(double)totlab*100.0,(double)labneg/(double)totlab*100.0,(double)totlabpos/(double)(totlab)*100.0); - printf("xacrit>=1: unlabelpos=%.5f unlabelneg=%.5f\n",(double)ulabpos/(double)totulab*100.0,(double)ulabneg/(double)totulab*100.0); - printf("xacrit>=1: labeled=%.5f unlabled=%.5f all=%.5f\n",(double)lab/(double)totlab*100.0,(double)ulab/(double)totulab*100.0,(double)l/(double)(totdoc)*100.0); - printf("xacritsum: labeled=%.5f unlabled=%.5f all=%.5f\n",(double)labsum/(double)totlab*100.0,(double)ulabsum/(double)totulab*100.0,(double)(labsum+ulabsum)/(double)(totdoc)*100.0); - printf("r_delta_sq=%.5f xisum=%.5f asum=%.5f\n",r_delta_sq,xisum,asum); + printf("xacrit>=1: labeledpos=%.5f labeledneg=%.5f default=%.5f\n", + (double)labpos / (double)totlab * 100.0, + (double)labneg / (double)totlab * 100.0, + (double)totlabpos / (double)(totlab)*100.0); + printf("xacrit>=1: unlabelpos=%.5f unlabelneg=%.5f\n", + (double)ulabpos / (double)totulab * 100.0, + (double)ulabneg / (double)totulab * 100.0); + printf("xacrit>=1: labeled=%.5f unlabled=%.5f all=%.5f\n", + (double)lab / (double)totlab * 100.0, + (double)ulab / (double)totulab * 100.0, + (double)l / (double)(totdoc)*100.0); + printf("xacritsum: labeled=%.5f unlabled=%.5f all=%.5f\n", + (double)labsum / (double)totlab * 100.0, + (double)ulabsum / (double)totulab * 100.0, + (double)(labsum + ulabsum) / (double)(totdoc)*100.0); + printf("r_delta_sq=%.5f xisum=%.5f asum=%.5f\n", r_delta_sq, xisum, asum); } -double estimate_margin_vcdim(MODEL *model, double w, double R) - /* optional: length of model vector in feature space */ - /* optional: radius of ball containing the data */ +double estimate_margin_vcdim(MODEL *model, double w, double R) +/* optional: length of model vector in feature space */ +/* optional: radius of ball containing the data */ { double h; /* follows chapter 5.6.4 in [Vapnik/95] */ - if(w<0) { - w=model_length_s(model); + if (w < 0) { + w = model_length_s(model); } - if(R<0) { - R=estimate_sphere(model); + if (R < 0) { + R = estimate_sphere(model); } - h = w*w * R*R +1; - return(h); + h = w * w * R * R + 1; + return (h); } -double estimate_sphere(MODEL *model) - /* Approximates the radius of the ball containing */ - /* the support vectors by bounding it with the */ -{ /* length of the longest support vector. This is */ - register long j; /* pretty good for text categorization, since all */ - double xlen,maxxlen=0; /* documents have feature vectors of length 1. It */ - DOC *nulldoc; /* assumes that the center of the ball is at the */ - WORD nullword; /* origin of the space. */ - KERNEL_PARM *kernel_parm=&(model->kernel_parm); +double estimate_sphere(MODEL *model) +/* Approximates the radius of the ball containing */ +/* the support vectors by bounding it with the */ +{ /* length of the longest support vector. This is */ + register long j; /* pretty good for text categorization, since all */ + double xlen, maxxlen = 0; /* documents have feature vectors of length 1. It */ + DOC *nulldoc; /* assumes that the center of the ball is at the */ + WORD nullword; /* origin of the space. */ + KERNEL_PARM *kernel_parm = &(model->kernel_parm); - nullword.wnum=0; - nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0)); - - for(j=1;jsv_num;j++) { - xlen=sqrt(kernel(kernel_parm,model->supvec[j],model->supvec[j]) - -2*kernel(kernel_parm,model->supvec[j],nulldoc) - +kernel(kernel_parm,nulldoc,nulldoc)); - if(xlen>maxxlen) { - maxxlen=xlen; + nullword.wnum = 0; + nulldoc = create_example(-2, 0, 0, 0.0, create_svector(&nullword, "", 1.0)); + + for (j = 1; j < model->sv_num; j++) { + xlen = sqrt(kernel(kernel_parm, model->supvec[j], model->supvec[j]) - + 2 * kernel(kernel_parm, model->supvec[j], nulldoc) + + kernel(kernel_parm, nulldoc, nulldoc)); + if (xlen > maxxlen) { + maxxlen = xlen; } } - free_example(nulldoc,1); - return(maxxlen); + free_example(nulldoc, 1); + return (maxxlen); } -double estimate_r_delta(DOC **docs, long int totdoc, KERNEL_PARM *kernel_parm) -{ +double estimate_r_delta(DOC **docs, long int totdoc, KERNEL_PARM *kernel_parm) { long i; - double maxxlen,xlen; - DOC *nulldoc; /* assumes that the center of the ball is at the */ - WORD nullword; /* origin of the space. */ + double maxxlen, xlen; + DOC *nulldoc; /* assumes that the center of the ball is at the */ + WORD nullword; /* origin of the space. */ - nullword.wnum=0; - nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0)); - - maxxlen=0; - for(i=0;imaxxlen) { - maxxlen=xlen; + nullword.wnum = 0; + nulldoc = create_example(-2, 0, 0, 0.0, create_svector(&nullword, "", 1.0)); + + maxxlen = 0; + for (i = 0; i < totdoc; i++) { + xlen = sqrt(kernel(kernel_parm, docs[i], docs[i]) - + 2 * kernel(kernel_parm, docs[i], nulldoc) + + kernel(kernel_parm, nulldoc, nulldoc)); + if (xlen > maxxlen) { + maxxlen = xlen; } } - free_example(nulldoc,1); - return(maxxlen); + free_example(nulldoc, 1); + return (maxxlen); } -double estimate_r_delta_average(DOC **docs, long int totdoc, - KERNEL_PARM *kernel_parm) -{ +double estimate_r_delta_average(DOC **docs, long int totdoc, + KERNEL_PARM *kernel_parm) { long i; double avgxlen; - DOC *nulldoc; /* assumes that the center of the ball is at the */ - WORD nullword; /* origin of the space. */ + DOC *nulldoc; /* assumes that the center of the ball is at the */ + WORD nullword; /* origin of the space. */ - nullword.wnum=0; - nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0)); - - avgxlen=0; - for(i=0;imaxxlen) { - maxxlen=xlen; + maxxlen = 0; + for (i = 0; i < totdoc; i++) { + xlen = sqrt(kernel(kernel_parm, docs[i], docs[i])); + if (xlen > maxxlen) { + maxxlen = xlen; } } - return(maxxlen); + return (maxxlen); } /****************************** IO-handling **********************************/ -void write_prediction(char *predfile, MODEL *model, double *lin, - double *a, long int *unlabeled, - long int *label, long int totdoc, - LEARN_PARM *learn_parm) -{ +void write_prediction(char *predfile, MODEL *model, double *lin, double *a, + long int *unlabeled, long int *label, long int totdoc, + LEARN_PARM *learn_parm) { FILE *predfl; long i; - double dist,a_max; + double dist, a_max; - if(verbosity>=1) { - printf("Writing prediction file..."); fflush(stdout); + if (verbosity >= 1) { + printf("Writing prediction file..."); + fflush(stdout); } - if ((predfl = fopen (predfile, "w")) == NULL) - { perror (predfile); exit (1); } - a_max=learn_parm->epsilon_a; - for(i=0;ia_max)) { - a_max=a[i]; + if ((predfl = fopen(predfile, "w")) == NULL) { + perror(predfile); + exit(1); + } + a_max = learn_parm->epsilon_a; + for (i = 0; i < totdoc; i++) { + if ((unlabeled[i]) && (a[i] > a_max)) { + a_max = a[i]; } } - for(i=0;i(learn_parm->epsilon_a))) { - dist=(double)label[i]*(1.0-learn_parm->epsilon_crit-a[i]/(a_max*2.0)); + for (i = 0; i < totdoc; i++) { + if (unlabeled[i]) { + if ((a[i] > (learn_parm->epsilon_a))) { + dist = (double)label[i] * + (1.0 - learn_parm->epsilon_crit - a[i] / (a_max * 2.0)); + } else { + dist = (lin[i] - model->b); } - else { - dist=(lin[i]-model->b); - } - if(dist>0) { - fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist); - } - else { - fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist); + if (dist > 0) { + fprintf(predfl, "%.8g:+1 %.8g:-1\n", dist, -dist); + } else { + fprintf(predfl, "%.8g:-1 %.8g:+1\n", -dist, dist); } } } fclose(predfl); - if(verbosity>=1) { + if (verbosity >= 1) { printf("done\n"); } } -void write_alphas(char *alphafile, double *a, - long int *label, long int totdoc) -{ +void write_alphas(char *alphafile, double *a, long int *label, + long int totdoc) { FILE *alphafl; long i; - if(verbosity>=1) { - printf("Writing alpha file..."); fflush(stdout); + if (verbosity >= 1) { + printf("Writing alpha file..."); + fflush(stdout); } - if ((alphafl = fopen (alphafile, "w")) == NULL) - { perror (alphafile); exit (1); } - for(i=0;i=1) { + if (verbosity >= 1) { printf("done\n"); } -} - +} diff --git a/src/classifier/svm/svm_light/svm_learn.h b/src/classifier/svm/svm_light/svm_learn.h index 9dc57b4..6e01304 100644 --- a/src/classifier/svm/svm_light/svm_learn.h +++ b/src/classifier/svm/svm_light/svm_learn.h @@ -19,9 +19,12 @@ #ifndef SVM_LEARN #define SVM_LEARN +#include "svm_common.h" + #ifdef __cplusplus extern "C" { #endif + void svm_learn_classification(DOC **, double *, long, long, LEARN_PARM *, KERNEL_PARM *, KERNEL_CACHE *, MODEL *, double *); void svm_learn_regression(DOC **, double *, long, long, LEARN_PARM *, diff --git a/src/hand/pose3d/estimator.cpp b/src/hand/pose3d/estimator.cpp index ceb7589..c33cb1f 100644 --- a/src/hand/pose3d/estimator.cpp +++ b/src/hand/pose3d/estimator.cpp @@ -53,6 +53,8 @@ int mediapipe_hand_detect(IHandPose3DEstimator d, const unsigned char *rgbdata, int ret = static_cast(d)->Detect( rgbdata, img_width, img_height, objs); if (ret != 0) { + objects->length = 0; + objects->items = NULL; return ret; } const size_t total_objs = objs.size(); @@ -61,6 +63,7 @@ int mediapipe_hand_detect(IHandPose3DEstimator d, const unsigned char *rgbdata, objects->items = NULL; return 0; } + objects->items = (PalmObject *)malloc(total_objs * sizeof(PalmObject)); for (size_t i = 0; i < total_objs; ++i) { objects->items[i].score = objs[i].score; @@ -75,7 +78,7 @@ int mediapipe_hand_detect(IHandPose3DEstimator d, const unsigned char *rgbdata, (Point2fVector *)malloc(sizeof(Point2fVector)); objects->items[i].landmarks->length = 7; objects->items[i].landmarks->points = - (Point2f *)malloc(4 * sizeof(Point2f)); + (Point2f *)malloc(7 * sizeof(Point2f)); for (size_t j = 0; j < 7; ++j) { objects->items[i].landmarks->points[j] = objs[i].landmarks[j]; } @@ -85,18 +88,26 @@ int mediapipe_hand_detect(IHandPose3DEstimator d, const unsigned char *rgbdata, objects->items[i].skeleton3d = NULL; continue; } + const size_t total_skeleton3d = objs[i].skeleton3d.size(); + if (total_skeleton3d == 0) { + objects->items[i].skeleton = NULL; + objects->items[i].skeleton3d = NULL; + continue; + } objects->items[i].skeleton = (Point2fVector *)malloc(sizeof(Point2fVector)); objects->items[i].skeleton->length = total_skeleton; objects->items[i].skeleton->points = (Point2f *)malloc(total_skeleton * sizeof(Point2f)); objects->items[i].skeleton3d = (Point3dVector *)malloc(sizeof(Point3dVector)); - objects->items[i].skeleton3d->length = total_skeleton; + objects->items[i].skeleton3d->length = total_skeleton3d; objects->items[i].skeleton3d->points = - (Point3d *)malloc(total_skeleton * sizeof(Point3d)); + (Point3d *)malloc(total_skeleton3d * sizeof(Point3d)); for (size_t j = 0; j < total_skeleton; ++j) { objects->items[i].skeleton->points[j].x = objs[i].skeleton[j].x; objects->items[i].skeleton->points[j].y = objs[i].skeleton[j].y; + } + for (size_t j = 0; j < total_skeleton3d; ++j) { objects->items[i].skeleton3d->points[j].x = objs[i].skeleton3d[j].x; objects->items[i].skeleton3d->points[j].y = objs[i].skeleton3d[j].y; objects->items[i].skeleton3d->points[j].z = objs[i].skeleton3d[j].z; diff --git a/src/hand/pose3d/mediapipe/mediapipe.cpp b/src/hand/pose3d/mediapipe/mediapipe.cpp index 603f51a..6f5da7a 100644 --- a/src/hand/pose3d/mediapipe/mediapipe.cpp +++ b/src/hand/pose3d/mediapipe/mediapipe.cpp @@ -440,6 +440,7 @@ int MediapipeHand::Detect(const unsigned char *rgbdata, int img_width, decode_bounds(region_list, prob_threshold, target_size, target_size, scores, bboxes, anchors); non_max_suppression(region_list, region_nms_list, nms_threshold); + objects.clear(); pack_detect_result(detect_results, region_nms_list, target_size, objects); @@ -490,10 +491,9 @@ int MediapipeHand::Detect(const unsigned char *rgbdata, int img_width, ncnn::Mat trans_image = ncnn::Mat::from_pixels(trans_mat, ncnn::Mat::PIXEL_RGB, 224, 224); + free(trans_mat); float score = GetLandmarks(trans_image, tm, objects[i].skeleton, objects[i].skeleton3d); - - free(trans_mat); } return 0; }