Đưa ra index của hai điểm gần nhau nhất trên mặt phẳng

Nobita1 · September 19, 2019, 4:12am

Bài toán này khá phổ biến và cách giải mình tìm thầy thường là divide and conquer, nhưng đó là đưa ra giá trị nhỏ nhất. Còn việc đưa ra index thì mình nghĩ mãi chưa xử lý được, các bạn có thể gợi ý cho mình được không.

Các phần tử ban đầu được cho dưới dạng mảng.

Về divide and conquer thì mình dựa chủ yếu theo đây:
here

tntxtnt · September 19, 2019, 2:09am

thì ngoài giá trị khoảng cách nhỏ nhất ra trả về thêm index 2 điểm có khoảng cách nhỏ nhất nữa :V

rogp10 · September 19, 2019, 4:08am

Dùng con trỏ thôi chịu thêm một bước deref nữa.

Nobita1 · July 20, 2021, 3:05pm

Nếu mình sửa như thế này trong tutorial ở trên thì tại sao lại sai nhỉ?

class Point  
{  
    public: 
    int x, y; 
   int index;
};

lúc scanf thì mình gắn index = i cho Point[i]
tiếp theo

float bruteForce(Point P[], int n)  
{  
    float min = FLT_MAX;  
    for (int i = 0; i < n; ++i)  
        for (int j = i+1; j < n; ++j)  
            if (dist(P[i], P[j]) < min) { 
                min = dist(P[i], P[j]);  
                index1 = P[i].index;
                index2 = P[j].index;
            }
    return min;  
}

tiếp theo nữa:

float stripClosest(Point strip[], int size, float d)  
{  
    float min = d; // Initialize the minimum distance as d  
  
    qsort(strip, size, sizeof(Point), compareY);  
  
    // Pick all points one by one and try the next points till the difference  
    // between y coordinates is smaller than d.  
    // This is a proven fact that this loop runs at most 6 times  
    for (int i = 0; i < size; ++i)  
        for (int j = i+1; j < size && (strip[j].y - strip[i].y) < min; ++j)  
            if (dist(strip[i],strip[j]) < min)  {
                min = dist(strip[i], strip[j]);  
               index1 = strip[i].index;
               index2 = strip[j].index;
            }
  
    return min;  
}

Mà kết quả lại không đúng.
.

tntxtnt · September 19, 2019, 3:28pm

ehehe có lẽ ko cần trả về index đâu Trả về 2 điểm gần nhất luôn, rồi muốn tìm index thì tìm trong mảng bao đần là được, O(nlogn) tìm ra giá trị 2 điểm gần nhất, O(n) tìm index của 2 điểm đó vẫn là O(nlogn)

tntxtnt · September 20, 2019, 9:37pm

viết thử C++17 :V

main.cpp
g++ -std=c++17 -Wall -Wextra -O2 main.cpp -s

#include "tgeo.h"
#include <algorithm>
#include <cmath>
#include <iostream>
#include <vector>

int main()
{
    using tgeo::Point2f;
    std::vector<Point2f> points;
    std::copy(std::istream_iterator<Point2f>(std::cin),
              std::istream_iterator<Point2f>(), std::back_inserter(points));
    auto [a, b, sqdist] = tgeo::closestPairAlt(
        begin(points), end(points), [](auto const& a, auto const& b) {
            using namespace tgeo;
            auto dx = get<0>(a) - get<0>(b);
            auto dy = get<1>(a) - get<1>(b);
            return dx * dx + dy * dy;
        });
    auto pointIndex = [](auto const& P, auto const& p) {
        return std::distance(begin(P), std::find(begin(P), end(P), p));
    };
    std::cout << "Index " << pointIndex(points, a) << ": " << a << "\n"
              << "Index " << pointIndex(points, b) << ": " << b << "\n"
              << "Distance: " << std::sqrt(sqdist) << "\n";
}

tgeo.h

#pragma once

#include <algorithm>
#include <iterator>
#include <limits>
#include <tuple>
#include <vector>

namespace tgeo
{
template <class T>
struct Point2 {
    using value_type = T;
    T data[2];
};
using Point2i = Point2<int>;
using Point2f = Point2<float>;
using Point2d = Point2<double>;

template <size_t dim, class T>
const T& get(const Point2<T>& p)
{
    return p.data[dim];
}
template <size_t dim, class T>
T& get(Point2<T>& p)
{
    return p.data[dim];
}

template <class T>
std::ostream& operator<<(std::ostream& out, const Point2<T>& p)
{
    return out << '(' << get<0>(p) << ", " << get<1>(p) << ')';
}
template <class T>
std::istream& operator>>(std::istream& in, Point2<T>& p)
{
    in >> get<0>(p) >> get<1>(p);
    return in;
}

template <class P, size_t dim>
bool compare(const P& a, const P& b)
{
    return get<dim>(a) < get<dim>(b);
}
template <class T>
bool operator==(const Point2<T>& a, const Point2<T>& b)
{
    return get<0>(a) == get<0>(b) && get<1>(a) == get<1>(b);
}

template <class P, class D>
struct PairOfPointsDistance {
    P a;
    P b;
    D distance;
};

namespace detail
{
    static const int CLOSEST_PAIR_BRUTE_THRESHOLD = 32; //~33% faster than 3 (?)

    template <class PointsFIter, class DistanceFunction>
    auto closestPairBrute(PointsFIter first, PointsFIter last,
                          DistanceFunction&& distFunc)
    {
        using point_t = typename PointsFIter::value_type;
        using distance_t = decltype(
            distFunc(std::declval<point_t>(), std::declval<point_t>()));
        PairOfPointsDistance<point_t, distance_t> result{
            {}, {}, std::numeric_limits<distance_t>::max()};
        for (; first != last; ++first)
            for (auto pb = first; ++pb != last;)
                if (auto dist = distFunc(*first, *pb); dist < result.distance)
                    result = {*first, *pb, dist};
        return result;
    }

    template <class PointsRange, class DistanceFunction, class Distance>
    auto closestStripPair(const PointsRange& Yp, Distance init,
                          DistanceFunction&& distFunc)
    {
        using point_t = typename PointsRange::value_type;
        using distance_t = decltype(
            distFunc(std::declval<point_t>(), std::declval<point_t>()));
        static const size_t POINTS_LIMIT = 7;
        PairOfPointsDistance<point_t, distance_t> result{{}, {}, init};
        for (size_t i = 0; i < Yp.size(); ++i)
            for (size_t j = i + 1; j < Yp.size() && (j - i <= POINTS_LIMIT);
                 ++j)
                if (auto dist = distFunc(Yp[i], Yp[j]); dist < result.distance)
                    result = {Yp[i], Yp[j], dist};
        return result;
    }

    template <class PointsRange, class DistanceFunction>
    auto closestPairNlogN(typename PointsRange::iterator first,
                          typename PointsRange::iterator last,
                          const PointsRange& Y, PointsRange& Yp,
                          DistanceFunction&& distFunc)
    {
        auto n = std::distance(first, last);
        if (n <= CLOSEST_PAIR_BRUTE_THRESHOLD)
            return closestPairBrute(first, last, distFunc);

        auto mid = first;
        std::advance(mid, n / 2);
        PointsRange Y1, Y2;
        Y1.reserve(Y.size());
        Y2.reserve(Y.size());
        for (auto& p : Y)
            if (get<0>(p) < get<0>(*mid))
                Y1.push_back(p);
            else
                Y2.push_back(p);
        auto ppd1 = closestPairNlogN(first, mid, Y1, Yp, distFunc);
        auto ppd2 = closestPairNlogN(mid, last, Y2, Yp, distFunc);
        auto ppd = ppd1.distance < ppd2.distance ? ppd1 : ppd2;

        auto delta1 = get<0>(*mid) - ppd.distance;
        auto delta2 = get<0>(*mid) + ppd.distance;
        Yp.clear();
        std::copy_if(begin(Y), end(Y), std::back_inserter(Yp),
                     [=](auto const& p) {
                         return delta1 <= get<0>(p) && get<0>(p) <= delta2;
                     });
        auto ppd3 = closestStripPair(Yp, ppd.distance, distFunc);
        return ppd3.distance < ppd.distance ? ppd3 : ppd;
    }

    template <class PointsRange, class DistanceFunction>
    auto closestPairNlogNlogN(typename PointsRange::iterator first,
                              typename PointsRange::iterator last,
                              PointsRange& Yp, DistanceFunction&& distFunc)
    {
        using point_t = typename PointsRange::value_type;
        auto n = std::distance(first, last);
        if (n <= CLOSEST_PAIR_BRUTE_THRESHOLD)
            return closestPairBrute(first, last, distFunc);

        auto mid = first;
        std::advance(mid, n / 2);
        auto ppd1 = closestPairNlogNlogN(first, mid, Yp, distFunc);
        auto ppd2 = closestPairNlogNlogN(mid, last, Yp, distFunc);
        auto ppd = ppd1.distance < ppd2.distance ? ppd1 : ppd2;

        auto delta1 = get<0>(*mid) - ppd.distance;
        auto delta2 = get<0>(*mid) + ppd.distance;
        Yp.clear();
        std::copy_if(first, last, std::back_inserter(Yp), [=](auto const& p) {
            return delta1 <= get<0>(p) && get<0>(p) <= delta2;
        });
        std::sort(begin(Yp), end(Yp), compare<point_t, 1>); // O(n (logn)^2)
        auto ppd3 = closestStripPair(Yp, ppd.distance, distFunc);
        return ppd3.distance < ppd.distance ? ppd3 : ppd;
    }
} // namespace detail

template <class PointsFIter, class DistanceFunction>
auto closestPair(PointsFIter first, PointsFIter last,
                 DistanceFunction&& distFunc)
{
    auto n = std::distance(first, last);
    if (n <= detail::CLOSEST_PAIR_BRUTE_THRESHOLD)
        return detail::closestPairBrute(first, last, distFunc);
    using point_t = typename PointsFIter::value_type;
    std::vector<point_t> X(first, last);
    std::sort(begin(X), end(X), compare<point_t, 0>);
    std::vector<point_t> Y(first, last);
    std::sort(begin(Y), end(Y), compare<point_t, 1>);
    std::vector<point_t> Yp;
    Yp.reserve(X.size());
    return detail::closestPairNlogN(begin(X), end(X), Y, Yp, distFunc);
}

template <class PointsFIter, class DistanceFunction>
auto closestPairAlt(PointsFIter first, PointsFIter last,
                    DistanceFunction&& distFunc)
{
    auto n = std::distance(first, last);
    if (n <= detail::CLOSEST_PAIR_BRUTE_THRESHOLD)
        return detail::closestPairBrute(first, last, distFunc);
    using point_t = typename PointsFIter::value_type;
    std::vector<point_t> X(first, last);
    std::sort(begin(X), end(X), compare<point_t, 0>);
    std::vector<point_t> Yp;
    Yp.reserve(X.size());
    return detail::closestPairNlogNlogN(begin(X), end(X), Yp, distFunc);
}
} // namespace tgeo

sample input

-8.99 0.93
-8.08 -9.36
8.89 -9.33
-3.72 -2.16
-2.47 8.35
-7.59 3.81
8.45 -9.27
-5.12 -6.00
-1.91 5.52
6.24 1.75

sample output

Index 2: (8.89, -9.33)
Index 6: (8.45, -9.27)
Distance: 0.444073

random 1 triệu số :V test.cpp :V
g++ -std=c++17 -O2 -Wall -Wextra test.cpp -s

#include <chrono>
#include <cmath>
#include <iostream>
#include <random>
#include <vector>
namespace cron = std::chrono;

#include "tgeo.h"

template <class Function, class... Args>
auto timeit(Function&& f, Args... args)
{
    const auto start = cron::steady_clock::now();
    auto result = f(std::forward<Args>(args)...);
    const cron::duration<double> elapsed = cron::steady_clock::now() - start;
    return std::make_pair(result, elapsed);
}

struct PointWithIndex {
    tgeo::Point2f point;
    size_t index;
};
template <size_t dim>
float get(const PointWithIndex& p)
{
    return p.point.data[dim];
}
template <size_t dim>
float& get(PointWithIndex& p)
{
    return p.point.data[dim];
}

int main()
{
    using namespace tgeo;
    std::vector<Point2f> points;
    /*std::copy(std::istream_iterator<Point2f>(std::cin),
              std::istream_iterator<Point2f>(), std::back_inserter(points));*/
    unsigned seed = 1;
    std::mt19937 prbg(seed);
    std::generate_n(std::back_inserter(points), 1000000, [&]() {
        return Point2f{
            std::uniform_real_distribution<float>{-1000, 1000}(prbg),
            std::uniform_real_distribution<float>{-1000, 1000}(prbg)};
    });
    std::cout << "Max: " << points.size() << " points\n";

    auto distFunc = [](auto const& a, auto const& b) {
        auto dx = get<0>(a) - get<0>(b);
        auto dy = get<1>(a) - get<1>(b);
        return dx * dx + dy * dy;
    };
    auto nn = [&](auto first, auto last) {
        return detail::closestPairBrute(first, last, distFunc);
    };
    auto nlogn = [&](auto first, auto last) {
        return closestPair(first, last, distFunc);
    };
    auto nlognlogn = [&](auto first, auto last) {
        return closestPairAlt(first, last, distFunc);
    };
    std::vector<size_t> pointCounts{100,    200,    500,    1000,  2000,
                                    5000,   10000,  20000,  50000, 100000,
                                    200000, 500000, 1000000};
    for (auto& ptCnt : pointCounts)
    {
        if (ptCnt > points.size())
            break;
        std::cout << "\n" << ptCnt << " points\n";
        if (ptCnt < 30000)
        {
            auto [ppd, elapsed] =
                timeit(nn, begin(points), begin(points) + ptCnt);
            auto [a, b, sqdist] = ppd;
            std::cout << elapsed.count() * 1000 << "ms\t" << a << " " << b
                      << " " << std::sqrt(sqdist) << "\n";
        }
        {
            auto [ppd, elapsed] =
                timeit(nlogn, begin(points), begin(points) + ptCnt);
            auto [a, b, sqdist] = ppd;
            std::cout << elapsed.count() * 1000 << "ms\t" << a << " " << b
                      << " " << std::sqrt(sqdist) << "\n";
        }
        {
            auto [ppd, elapsed] =
                timeit(nlognlogn, begin(points), begin(points) + ptCnt);
            auto [a, b, sqdist] = ppd;
            std::cout << elapsed.count() * 1000 << "ms\t" << a << " " << b
                      << " " << std::sqrt(sqdist) << "\n";
        }
    }

    std::vector<PointWithIndex> pwi;
    std::transform(begin(points), end(points), std::back_inserter(pwi),
                   [id = size_t(0)](auto const& p) mutable {
                       return PointWithIndex{p, id++};
                   });
    {
        std::cout << "\n" << pwi.size() << " points\n";
        auto [ppd, elapsed] = timeit(nlognlogn, begin(pwi), end(pwi));
        auto [a, b, sqdist] = ppd;
        std::cout << elapsed.count() * 1000 << "ms\t" << a.point << "["
                  << a.index << "] " << b.point << "[" << b.index << "] "
                  << std::sqrt(sqdist) << "\n";
    }
    {
        std::cout << "\n" << points.size() << " points\n";
        auto nlognlognWithIndex = [&](auto first, auto last) {
            auto [a, b, dist] = closestPairAlt(first, last, distFunc);
            auto index = [=](auto const& p) {
                return std::distance(first, std::find(first, last, p));
            };
            return std::make_tuple(a, index(a), b, index(b), dist);
        };
        auto [ppd, elapsed] =
            timeit(nlognlognWithIndex, begin(points), end(points));
        auto [a, aIndex, b, bIndex, sqdist] = ppd;
        std::cout << elapsed.count() * 1000 << "ms\t" << a << "[" << aIndex
                  << "] " << b << "[" << bIndex << "] " << std::sqrt(sqdist)
                  << "\n";
    }
}

sample output:

Max: 1000000 points

100 points
0.011624ms	(340.935, 827.924) (357.671, 833.723) 17.7121
0.027781ms	(340.935, 827.924) (357.671, 833.723) 17.7121
0.022494ms	(340.935, 827.924) (357.671, 833.723) 17.7121

200 points
0.037427ms	(117.38, -138.603) (112.48, -143.235) 6.74247
0.053187ms	(112.48, -143.235) (117.38, -138.603) 6.74247
0.033221ms	(112.48, -143.235) (117.38, -138.603) 6.74247

500 points
0.21959ms	(490.669, 832.887) (491.274, 834.997) 2.19432
0.12743ms	(490.669, 832.887) (491.274, 834.997) 2.19432
0.06595ms	(490.669, 832.887) (491.274, 834.997) 2.19432

1000 points
0.90589ms	(561.059, 521.686) (559.489, 521.271) 1.62469
0.283861ms	(559.489, 521.271) (561.059, 521.686) 1.62469
0.154941ms	(559.489, 521.271) (561.059, 521.686) 1.62469

2000 points
3.50176ms	(652.814, 250.223) (652.455, 249.656) 0.670909
0.639074ms	(652.455, 249.656) (652.814, 250.223) 0.670909
0.344522ms	(652.455, 249.656) (652.814, 250.223) 0.670909

5000 points
22.9154ms	(383.968, -44.3809) (383.834, -44.8297) 0.468228
1.88762ms	(383.834, -44.8297) (383.968, -44.3809) 0.468228
1.04801ms	(383.834, -44.8297) (383.968, -44.3809) 0.468228

10000 points
94.6832ms	(286.925, 576.621) (286.815, 576.775) 0.189527
4.10415ms	(286.815, 576.775) (286.925, 576.621) 0.189527
2.21179ms	(286.815, 576.775) (286.925, 576.621) 0.189527

20000 points
411.55ms	(742.968, 31.0702) (742.874, 31.0225) 0.105201
8.60499ms	(742.874, 31.0225) (742.968, 31.0702) 0.105201
4.60633ms	(742.874, 31.0225) (742.968, 31.0702) 0.105201

50000 points
23.6529ms	(-871.14, 36.7084) (-871.111, 36.6969) 0.030387
12.5755ms	(-871.14, 36.7084) (-871.111, 36.6969) 0.030387

100000 points
50.649ms	(-511.122, -242.313) (-511.117, -242.333) 0.0208356
34.3632ms	(-511.122, -242.313) (-511.117, -242.333) 0.0208356

200000 points
128.915ms	(-904.256, -8.25165) (-904.254, -8.24994) 0.0023741
67.1734ms	(-904.256, -8.25165) (-904.254, -8.24994) 0.0023741

500000 points
373.315ms	(325.247, 4.88391) (325.247, 4.88446) 0.000821144
165.509ms	(325.247, 4.88391) (325.247, 4.88446) 0.000821144

1000000 points
878.32ms	(325.247, 4.88391) (325.247, 4.88446) 0.000821144
376.771ms	(325.247, 4.88391) (325.247, 4.88446) 0.000821144

1000000 points
426.347ms	(325.247, 4.88391)[334121] (325.247, 4.88446)[464350] 0.000821144

1000000 points
381.539ms	(325.247, 4.88391)[334121] (325.247, 4.88446)[464350] 0.000821144

thuật toán O(nlog²n) lại lẹ gần gấp đôi thuật toán O(nlogn) :V Chắc là do tạo nhiều mảng động nhỏ quá hay sao đó :V Còn thằng O(nlog²n) do điểm tạo ra là ngẫu nhiên nên chắc mảng strip nhỏ, có sort cũng coi như là hằng số :V

Nobita1 · September 20, 2019, 9:39pm

Cám ơn ban, nhìn code C++ không quen chút nào, bạn code rất đẹp. Trả lời tận tình nữa =))

tntxtnt · September 20, 2019, 10:21pm

đẹp là nhờ clang-format đó :V

tại thấy cái code kia nó xài qsort trong C++ nên đành code kiểu mới vậy, ko xài lại code đó được :V :V