Krita Source Code Documentation
Loading...
Searching...
No Matches
kfts_fuzzy_match.h
Go to the documentation of this file.
1/*
2 SPDX-FileCopyrightText: 2017 Forrest Smith
3 SPDX-FileCopyrightText: 2020 Waqar Ahmed
4
5 SPDX-License-Identifier: LGPL-2.0-or-later
6*/
7
8#ifndef KFTS_FUZZY_MATCH_H
9#define KFTS_FUZZY_MATCH_H
10
11#include <QString>
12
20namespace kfts
21{
25Q_DECL_UNUSED static bool fuzzy_match_simple(const QString pattern, const QString str);
26
32Q_DECL_UNUSED static bool fuzzy_match(const QString pattern, const QString str, int &outScore);
33Q_DECL_UNUSED static bool fuzzy_match(const QString pattern, const QString str, int &outScore, uint8_t *matches, int maxMatches);
34
39Q_DECL_UNUSED static bool fuzzy_match_sequential(const QString pattern, const QString str, int &outScore);
49Q_DECL_UNUSED static QString to_fuzzy_matched_display_string(const QString pattern, QString &str, const QString &htmlTag, const QString &htmlTagClose);
50}
51
52namespace kfts
53{
54// Forward declarations for "private" implementation
55namespace fuzzy_internal
56{
57static bool fuzzy_match_recursive(QString::const_iterator pattern,
58 QString::const_iterator str,
59 int &outScore,
60 const QString::const_iterator strBegin,
61 const QString::const_iterator strEnd,
62 const QString::const_iterator patternEnd,
63 uint8_t const *srcMatches,
64 uint8_t *newMatches,
65 int maxMatches,
66 int nextMatch,
67 int &recursionCount,
68 int seqBonus = 15);
69}
70
71// Public interface
72static bool fuzzy_match_simple(const QString pattern, const QString str)
73{
74 auto patternIt = pattern.cbegin();
75 for (auto strIt = str.cbegin(); strIt != str.cend() && patternIt != pattern.cend(); ++strIt) {
76 if (strIt->toLower() == patternIt->toLower()) {
77 ++patternIt;
78 }
79 }
80 return patternIt == pattern.cend();
81}
82
83static bool fuzzy_match(const QString pattern, const QString str, int &outScore)
84{
85 uint8_t matches[256];
86 return fuzzy_match(pattern, str, outScore, matches, sizeof(matches));
87}
88
89static bool fuzzy_match(const QString pattern, const QString str, int &outScore, uint8_t *matches, int maxMatches)
90{
91 int recursionCount = 0;
92
93 auto strIt = str.cbegin();
94 auto patternIt = pattern.cbegin();
95 const auto patternEnd = pattern.cend();
96 const auto strEnd = str.cend();
97
98 return fuzzy_internal::fuzzy_match_recursive(patternIt, strIt, outScore, strIt, strEnd, patternEnd, nullptr, matches, maxMatches, 0, recursionCount);
99}
100
101static bool fuzzy_match_sequential(const QString pattern, const QString str, int &outScore)
102{
103 int recursionCount = 0;
104 uint8_t matches[256];
105 auto maxMatches = sizeof(matches);
106 auto strIt = str.cbegin();
107 auto patternIt = pattern.cbegin();
108 const auto patternEnd = pattern.cend();
109 const auto strEnd = str.cend();
110
111 return fuzzy_internal::fuzzy_match_recursive(patternIt, strIt, outScore, strIt, strEnd, patternEnd, nullptr, matches, maxMatches, 0, recursionCount, 40);
112}
113
114// Private implementation
115static bool fuzzy_internal::fuzzy_match_recursive(QString::const_iterator pattern,
116 QString::const_iterator str,
117 int &outScore,
118 const QString::const_iterator strBegin,
119 const QString::const_iterator strEnd,
120 const QString::const_iterator patternEnd,
121 const uint8_t *srcMatches,
122 uint8_t *matches,
123 int maxMatches,
124 int nextMatch,
125 int &recursionCount,
126 int seqBonus)
127{
128 // Count recursions
129 static constexpr int recursionLimit = 10;
130 ++recursionCount;
131 if (recursionCount >= recursionLimit) {
132 return false;
133 }
134
135 // Detect end of strings
136 if (pattern == patternEnd || str == strEnd) {
137 return false;
138 }
139
140 // Recursion params
141 bool recursiveMatch = false;
142 uint8_t bestRecursiveMatches[256];
143 int bestRecursiveScore = 0;
144
145 // Loop through pattern and str looking for a match
146 bool first_match = true;
147 while (pattern != patternEnd && str != strEnd) {
148 // Found match
149 if (pattern->toLower() == str->toLower()) {
150 // Supplied matches buffer was too short
151 if (nextMatch >= maxMatches) {
152 return false;
153 }
154
155 // "Copy-on-Write" srcMatches into matches
156 if (first_match && srcMatches) {
157 memcpy(matches, srcMatches, nextMatch);
158 first_match = false;
159 }
160
161 // Recursive call that "skips" this match
162 uint8_t recursiveMatches[256];
163 int recursiveScore;
164 auto strNextChar = std::next(str);
165 if (fuzzy_match_recursive(pattern,
166 strNextChar,
167 recursiveScore,
168 strBegin,
169 strEnd,
170 patternEnd,
171 matches,
172 recursiveMatches,
173 sizeof(recursiveMatches),
174 nextMatch,
175 recursionCount)) {
176 // Pick best recursive score
177 if (!recursiveMatch || recursiveScore > bestRecursiveScore) {
178 memcpy(bestRecursiveMatches, recursiveMatches, 256);
179 bestRecursiveScore = recursiveScore;
180 }
181 recursiveMatch = true;
182 }
183
184 // Advance
185 matches[nextMatch++] = (uint8_t)(std::distance(strBegin, str));
186 ++pattern;
187 }
188 ++str;
189 }
190
191 // Determine if full pattern was matched
192 bool matched = pattern == patternEnd ? true : false;
193
194 // Calculate score
195 if (matched) {
196 int sequential_bonus = seqBonus; // bonus for adjacent matches
197 static constexpr int separator_bonus = 30; // bonus if match occurs after a separator
198 static constexpr int camel_bonus = 30; // bonus if match is uppercase and prev is lower
199 static constexpr int first_letter_bonus = 15; // bonus if the first letter is matched
200
201 static constexpr int leading_letter_penalty = -5; // penalty applied for every letter in str before the first match
202 static constexpr int max_leading_letter_penalty = -15; // maximum penalty for leading letters
203 static constexpr int unmatched_letter_penalty = -1; // penalty for every letter that doesn't matter
204
205 // Iterate str to end
206 while (str != strEnd) {
207 ++str;
208 }
209
210 // Initialize score
211 outScore = 100;
212
213 // Apply leading letter penalty
214 int penalty = leading_letter_penalty * matches[0];
215 if (penalty < max_leading_letter_penalty) {
216 penalty = max_leading_letter_penalty;
217 }
218 outScore += penalty;
219
220 // Apply unmatched penalty
221 const int unmatched = (int)(std::distance(strBegin, str)) - nextMatch;
222 outScore += unmatched_letter_penalty * unmatched;
223
224 // Apply ordering bonuses
225 for (int i = 0; i < nextMatch; ++i) {
226 uint8_t currIdx = matches[i];
227
228 if (i > 0) {
229 uint8_t prevIdx = matches[i - 1];
230
231 // Sequential
232 if (currIdx == (prevIdx + 1)) {
233 outScore += sequential_bonus;
234 }
235 }
236
237 // Check for bonuses based on neighbor character value
238 if (currIdx > 0) {
239 // Camel case
240 QChar neighbor = *(strBegin + currIdx - 1);
241 QChar curr = *(strBegin + currIdx);
242 if (neighbor.isLower() && curr.isUpper()) {
243 outScore += camel_bonus;
244 }
245
246 // Separator
247 bool neighborSeparator = neighbor == QLatin1Char('_') || neighbor == QLatin1Char(' ');
248 if (neighborSeparator) {
249 outScore += separator_bonus;
250 }
251 } else {
252 // First letter
253 outScore += first_letter_bonus;
254 }
255 }
256 }
257
258 // Return best result
259 if (recursiveMatch && (!matched || bestRecursiveScore > outScore)) {
260 // Recursive score is better than "this"
261 memcpy(matches, bestRecursiveMatches, maxMatches);
262 outScore = bestRecursiveScore;
263 return true;
264 } else if (matched) {
265 // "this" score is better than recursive
266 return true;
267 } else {
268 // no match
269 return false;
270 }
271}
272
273static QString to_fuzzy_matched_display_string(const QString pattern, QString &str, const QString &htmlTag, const QString &htmlTagClose)
274{
279 int j = 0;
280 for (int i = 0; i < str.size() && j < pattern.size(); ++i) {
281 if (str.at(i).toLower() == pattern.at(j).toLower()) {
282 str.replace(i, 1, htmlTag + str.at(i) + htmlTagClose);
283 i += htmlTag.size() + htmlTagClose.size();
284 ++j;
285 }
286 }
287 return str;
288}
289} // namespace kfts
290
291#endif // KFTS_FUZZY_MATCH_H
static bool fuzzy_match_recursive(QString::const_iterator pattern, QString::const_iterator str, int &outScore, const QString::const_iterator strBegin, const QString::const_iterator strEnd, const QString::const_iterator patternEnd, uint8_t const *srcMatches, uint8_t *newMatches, int maxMatches, int nextMatch, int &recursionCount, int seqBonus=15)
static Q_DECL_UNUSED bool fuzzy_match_simple(const QString pattern, const QString str)
simple fuzzy matching of chars in pattern with chars in str sequentially
static Q_DECL_UNUSED QString to_fuzzy_matched_display_string(const QString pattern, QString &str, const QString &htmlTag, const QString &htmlTagClose)
get string for display in treeview / listview. This should be used from style delegate....
static Q_DECL_UNUSED bool fuzzy_match_sequential(const QString pattern, const QString str, int &outScore)
This is a special case function which doesn't score separator matches higher than sequential matches....
static Q_DECL_UNUSED bool fuzzy_match(const QString pattern, const QString str, int &outScore)
This should be the main function you should use. outscore is the score of this match and should be us...