diff --git a/CMakeLists.txt b/CMakeLists.txt index edd402d..11474fc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.12) -project(libime VERSION 1.1.11) +project(libime VERSION 1.1.12) set(LibIME_VERSION ${PROJECT_VERSION}) set(REQUIRED_FCITX_VERSION 5.1.13) diff --git a/src/libime/core/lattice.h b/src/libime/core/lattice.h index dec4953..8dc5971 100644 --- a/src/libime/core/lattice.h +++ b/src/libime/core/lattice.h @@ -106,6 +106,18 @@ class LIBIMECORE_EXPORT LatticeNode : public WordNode { LatticeNode *prev() const { return prev_; } void setPrev(LatticeNode *prev) { prev_ = prev; } + template + requires(std::is_base_of_v) + T &as() { + return static_cast(*this); + } + + template + requires(std::is_base_of_v) + const T &as() const { + return static_cast(*this); + } + /// Return the full word till the begining of the sentence. std::string fullWord() const { size_t length = 0; diff --git a/src/libime/pinyin/pinyincontext.cpp b/src/libime/pinyin/pinyincontext.cpp index b4e38cf..1c69c6d 100644 --- a/src/libime/pinyin/pinyincontext.cpp +++ b/src/libime/pinyin/pinyincontext.cpp @@ -180,16 +180,15 @@ class PinyinContextPrivate : public fcitx::QPtrHolder { void select(const SentenceResult &sentence) { FCITX_Q(); auto offset = q->selectedLength(); - selectHelper([offset, &sentence, - this](std::vector &selection) { - for (const auto &p : sentence.sentence()) { - selection.emplace_back( - offset + p->to()->index(), - WordNode{p->word(), ime_->model()->index(p->word())}, - static_cast(p)->encodedPinyin(), - false); - } - }); + selectHelper( + [offset, &sentence, this](std::vector &selection) { + for (const auto &p : sentence.sentence()) { + selection.emplace_back( + offset + p->to()->index(), + WordNode{p->word(), ime_->model()->index(p->word())}, + p->as().encodedPinyin(), false); + } + }); } void selectCustom(size_t inputLength, std::string_view segment, @@ -584,6 +583,8 @@ void PinyinContext::update() { d->ime_->frameSize(), &d->matchState_); d->clearCandidates(); + + // Add n-best result. for (size_t i = 0, e = d->lattice_.sentenceSize(); i < e; i++) { d->candidates_.push_back(d->lattice_.sentence(i)); d->candidatesSet_.insert(d->candidates_.back().toString()); @@ -597,8 +598,10 @@ void PinyinContext::update() { float max = -std::numeric_limits::max(); auto distancePenalty = d->ime_->model()->unknownPenalty() / PINYIN_DISTANCE_PENALTY_FACTOR; - // Pull the phrase from lattice, this part is the word that's in the - // dict. + + // Enumerate over all the lattice node, if from == bos, this is + // a dictionary word match. + // Add all words that does not contain pinyin correction. for (const auto &graphNode : graph.nodes(i)) { auto distance = graph.distanceToEnd(graphNode); auto adjust = static_cast(distance) * distancePenalty; @@ -610,6 +613,7 @@ void PinyinContext::update() { min = std::min(latticeNode.score(), min); max = std::max(latticeNode.score(), max); } + // Deduplcate. if (d->candidatesSet_.contains(latticeNode.word())) { continue; } @@ -667,7 +671,34 @@ void PinyinContext::update() { } } std::sort(d->candidates_.begin() + beginSize, d->candidates_.end(), - std::greater()); + std::greater<>()); + if (const auto limit = d->ime_->wordCandidateLimit()) { + size_t count = 0; + auto &candidatesSet = d->candidatesSet_; + d->candidates_.erase( + std::remove_if( + d->candidates_.begin() + beginSize, d->candidates_.end(), + [&count, limit, + &candidatesSet](const SentenceResult &candidate) { + const bool isSinglePinyinWord = + candidate.sentence().size() == 1 && + candidate.sentence() + .front() + ->as() + .encodedPinyin() + .size() == 2; + if (!isSinglePinyinWord) { + if (count >= limit) { + candidatesSet.erase(candidate.toString()); + return true; + } + count++; + } + return false; + }), + d->candidates_.end()); + } + d->candidatesToCursorNeedUpdate_ = true; } @@ -767,8 +798,7 @@ PinyinContext::preeditWithCursor(PinyinPreeditMode mode) const { std::string actualPinyin; if (!syls.empty() && !syls.front().second.empty()) { std::string_view candidatePinyin = - static_cast(node) - ->encodedPinyin(); + node->as().encodedPinyin(); auto nthPinyin = std::distance(node->path().begin(), iter); PinyinInitial bestInitial = syls[0].first; PinyinFinal bestFinal = syls[0].second[0].first; @@ -918,7 +948,7 @@ PinyinContext::candidateFullPinyin(const SentenceResult &candidate) const { pinyin.push_back('\''); } pinyin += PinyinEncoder::decodeFullPinyin( - static_cast(node)->encodedPinyin()); + node->as().encodedPinyin()); } } return pinyin; diff --git a/src/libime/pinyin/pinyinime.cpp b/src/libime/pinyin/pinyinime.cpp index e99afa6..d169c79 100644 --- a/src/libime/pinyin/pinyinime.cpp +++ b/src/libime/pinyin/pinyinime.cpp @@ -39,6 +39,7 @@ class PinyinIMEPrivate : fcitx::QPtrHolder { size_t beamSize_ = Decoder::beamSizeDefault; size_t frameSize_ = Decoder::frameSizeDefault; size_t partialLongWordLimit_ = 0; + size_t wordCandidateLimit_ = 15; float maxDistance_ = std::numeric_limits::max(); float minPath_ = -std::numeric_limits::max(); PinyinPreeditMode preeditMode_ = PinyinPreeditMode::RawText; @@ -139,6 +140,19 @@ void PinyinIME::setPartialLongWordLimit(size_t n) { } } +size_t PinyinIME::wordCandidateLimit() const { + FCITX_D(); + return d->wordCandidateLimit_; +} + +void PinyinIME::setWordCandidateLimit(size_t n) { + FCITX_D(); + if (d->wordCandidateLimit_ != n) { + d->wordCandidateLimit_ = n; + emit(); + } +} + void PinyinIME::setPreeditMode(PinyinPreeditMode mode) { FCITX_D(); if (d->preeditMode_ != mode) { diff --git a/src/libime/pinyin/pinyinime.h b/src/libime/pinyin/pinyinime.h index 51eef7d..ba50f5d 100644 --- a/src/libime/pinyin/pinyinime.h +++ b/src/libime/pinyin/pinyinime.h @@ -41,6 +41,22 @@ class LIBIMEPINYIN_EXPORT PinyinIME : public fcitx::ConnectableObject { void setFrameSize(size_t n); size_t partialLongWordLimit() const; void setPartialLongWordLimit(size_t n); + /** + * \brief The maximum number of candidates that is a word. + * + * Limit the non single character candidates to avoid need to scroll/next + * page too many characters. + * + * When is 0, it means no limit. + * + * Since 1.1.12 + */ + size_t wordCandidateLimit() const; + /** + * \brief Set the maximum number of candidates that is a word. + * Since 1.1.12 + */ + void setWordCandidateLimit(size_t n); void setScoreFilter(float maxDistance = std::numeric_limits::max(), float minPath = -std::numeric_limits::max()); void setShuangpinProfile(std::shared_ptr profile); diff --git a/test/testpinyincontext.cpp b/test/testpinyincontext.cpp index d8d0bc4..e051475 100644 --- a/test/testpinyincontext.cpp +++ b/test/testpinyincontext.cpp @@ -131,8 +131,7 @@ int main() { c.type("n"); for (const auto &candidate : c.candidates()) { for (const auto *node : candidate.sentence()) { - const auto &pinyin = - static_cast(node)->encodedPinyin(); + const auto &pinyin = node->as().encodedPinyin(); std::cout << node->word(); if (!pinyin.empty()) { std::cout << " " << PinyinEncoder::decodeFullPinyin(pinyin); diff --git a/test/testpinyinime.cpp b/test/testpinyinime.cpp index 7058400..47183ef 100644 --- a/test/testpinyinime.cpp +++ b/test/testpinyinime.cpp @@ -107,8 +107,7 @@ int main(int argc, char *argv[]) { std::cout << (count % 10) << ": "; for (const auto *node : candidate.sentence()) { const auto &pinyin = - static_cast(node) - ->encodedPinyin(); + node->as().encodedPinyin(); std::cout << node->word(); if (!pinyin.empty()) { std::cout << " " << PinyinEncoder::decodeFullPinyin(pinyin);