sentence.cpp
2.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#include <stdint.h>
#include "openbr_internal.h"
using namespace cv;
namespace br
{
/*!
* \ingroup transforms
* \brief Ordered words
* \author Josh Klontz \cite jklontz
*/
class SentenceTransform : public UntrainableMetaTransform
{
Q_OBJECT
void project(const Template &src, Template &dst) const
{
QByteArray sentence;
QDataStream stream(&sentence, QIODevice::WriteOnly);
for (int i=0; i<src.size(); i++) {
const Mat &m = src[i];
if (!m.data) continue;
stream.writeRawData((const char*)&i, 4);
stream.writeRawData((const char*)&m.rows, 4);
stream.writeRawData((const char*)&m.cols, 4);
stream.writeRawData((const char*)m.data, 4*m.rows*m.cols);
}
dst.file = src.file;
dst.m() = Mat(1, sentence.size(), CV_8UC1, sentence.data()).clone();
}
};
BR_REGISTER(Transform, SentenceTransform)
/*!
* \ingroup distances
* \brief Distance between sentences
* \author Josh Klontz \cite jklontz
*/
class SentenceSimilarityDistance : public UntrainableDistance
{
Q_OBJECT
float compare(const Mat &a, const Mat &b) const
{
uchar *aBuffer = a.data;
uchar *bBuffer = b.data;
const uchar *aEnd = aBuffer + a.cols;
const uchar *bEnd = bBuffer + b.cols;
int32_t aWord, bWord, aRows, bRows, aColumns, bColumns;
float *aData, *bData;
aWord = aRows = aColumns = -2;
bWord = bRows = bColumns = -1;
aData = bData = NULL;
float distance = 0;
int comparisons = 0;
while (true) {
if (aWord < bWord) {
if (aBuffer == aEnd) return distance == 0 ? -std::numeric_limits<float>::max() : comparisons / distance;
aWord = *reinterpret_cast<int32_t*>(aBuffer);
aRows = *reinterpret_cast<int32_t*>(aBuffer+4);
aColumns = *reinterpret_cast<int32_t*>(aBuffer+8);
aData = reinterpret_cast<float*>(aBuffer+12);
aBuffer += 12 + 4*aRows*aColumns;
} else if (bWord < aWord) {
if (bBuffer == bEnd) return comparisons == 0 ? -std::numeric_limits<float>::max() : comparisons / distance;
bWord = *reinterpret_cast<int32_t*>(bBuffer);
bRows = *reinterpret_cast<int32_t*>(bBuffer+4);
bColumns = *reinterpret_cast<int32_t*>(bBuffer+8);
bData = reinterpret_cast<float*>(bBuffer+12);
bBuffer += 12 + 4*bRows*bColumns;
} else {
for (int i=0; i<aRows; i++)
for (int j=0; j<bRows; j++)
for (int k=0; k<aColumns; k++)
distance += pow(aData[i*aColumns+k] - bData[j*bColumns+k], 2.f);
comparisons += aRows * bRows * aColumns;
aWord = -2;
bWord = -1;
}
}
}
};
BR_REGISTER(Distance, SentenceSimilarityDistance)
} // namespace br
#include "sentence.moc"