2地点間のGeodesic距離の総当たり

C++ + GeographicLib で、1000程度の地点どうしの測距の性能と OpenMP 検証。

GeographicLibは、かなり厳密な計算をしている割には高速で、とはいえ複雑なぶん重いといえば重く、この種の総当たり測距では、リアルタイム性を求められるような場合だとやはり心配、なので、OpenMP で並列化したらどうだろうか、と。

ランダムな位置では風情がないので、データは OpenFlightsairports.datを使う。こんなデータ:

1 1,"Goroka","Goroka","Papua New Guinea","GKA","AYGA",-6.081689,145.391881,5282,10,"U"
2 2,"Madang","Madang","Papua New Guinea","MAG","AYMD",-5.207083,145.7887,20,10,"U"
3 3,"Mount Hagen","Mount Hagen","Papua New Guinea","HGU","AYMH",-5.826789,144.295861,5388,10,"U"
4 4,"Nadzab","Nadzab","Papua New Guinea","LAE","AYNZ",-6.569828,146.726242,239,10,"U"
5 5,"Port Moresby Jacksons Intl","Port Moresby","Papua New Guinea","POM","AYPY",-9.443383,147.22005,146,10,"U"
6 6,"Wewak Intl","Wewak","Papua New Guinea","WWK","AYWK",-3.583828,143.669186,19,10,"U"

(入手当時(上で引用したもの)と今現在でフィールドが増えているようで、使ったデータには「Tz database time zone」は含まれていない。)

読み込んだデータ管理の構造:

 1 struct airport_data {
 2   std::map<std::string, std::string> feature;
 3   double lat;
 4   double lon;
 5   int alt;
 6   int timezone;
 7 };
 8 namespace std {
 9   std::ostream& operator<<(std::ostream& os, const airport_data& d) {
10     airport_data& ad = const_cast<airport_data&>(d);
11     os << ad.feature["Name"] << " [" << ad.feature["ICAO"] << " (" << ad.feature["IATA/FAA"] << ")]";
12     return os;
13   }
14 };

データ読み込み部分は、凝ったインフラなしで愚直に(というよりはインチキに):

 1   // --------------------------------------------------------------
 2   // read all airports
 3   std::vector<airport_data> all_airports;
 4   char buf[256];
 5   while (fgets(buf, 256, ::stdin)) {
 6     // 1,"Goroka","Goroka","Papua New Guinea","GKA","AYGA",-6.081689,145.391881,5282,10,"U"
 7     // Airport ID, Name, City, Country, IATA/FAA, ICAO, Lat, Lon, Alt, Timezone, DST
 8     std::string tmp(&buf[0], &buf[std::strlen(buf) - 5]); // ignore DST
 9     std::string(&tmp[tmp.find_first_of(',') + 1]).swap(tmp); // ignore Airport ID
10 
11     std::string keys[] = {"Name", "City", "Country", "IATA/FAA", "ICAO"};
12     airport_data record;
13 
14     int idx = 0;
15     for (int i = 0; i < 5; ++i) {
16       idx = tmp.find_first_of(',');
17       record.feature[keys[i]] = std::string(&tmp[0], &tmp[idx]);
18       if (record.feature[keys[i]] == "\\N") {
19         record.feature[keys[i]] = "";
20       } else {
21         std::string& s = record.feature[keys[i]];
22         std::string(&s[1], &s[s.size() - 1]).swap(s);
23       }
24       std::string(&tmp[tmp.find_first_of(',') + 1]).swap(tmp);
25     }
26     char* ep;
27     record.lat = std::strtod(&tmp[0], &ep);
28     record.lon = std::strtod(ep + 1, &ep);
29     record.alt = int(std::strtod(ep + 1, &ep));
30     record.timezone = int(std::strtod(ep + 1, &ep));
31     if (!record.feature["ICAO"].empty() &&
32         !record.feature["IATA/FAA"].empty() && 
33         (record.feature["Country"] == "Japan"
34          ||
35          record.feature["Country"] == "China"
36          ||
37          record.feature["Country"] == "United States"
38          //||
39          //record.feature["Country"] == "United Kingdom"
40          //||
41          //record.feature["Country"] == "Austria"
42          )) {
43       all_airports.push_back(record);
44     }
45   }

だいたい1000くらいになるように Country で絞り込んでいる。また、最初に触れた「今現在とデータフォーマットが違う」ことには全く柔軟に対応出来ないので、真似したい人は自力で頑張ってくださいまし。

あとは総当たり表を作って…。残り全部引用する:

  1 /**
  2  * COMPILE:
  3  *   with OpenMP:    g++ expr_main.cpp -lGeographic -fopenmp -oexpr_main_OMP
  4  *   without OpenMP: g++ expr_main.cpp -lGeographic -oexpr_main
  5  *
  6  * EXEC:
  7  *   with OpenMP:    LD_LIBRARY_PATH=/usr/local/lib:${LD_LIBRARY_PATH} ./expr_main_OMP < airports.dat
  8  *   without OpenMP: LD_LIBRARY_PATH=/usr/local/lib:${LD_LIBRARY_PATH} ./expr_main < airports.dat
  9  */
 10 #include <GeographicLib/Geodesic.hpp>
 11 #include <cstdio>
 12 #include <cstdlib>
 13 #include <cstring>
 14 #include <iostream>
 15 #include <string>
 16 #include <utility>
 17 #include <map>
 18 #include <vector>
 19 #include <sstream>
 20 #include <fstream>
 21 
 22 #include <time.h>
 23 
 24 double get_clock()
 25 {
 26     struct ::timespec ts;
 27     ::clock_gettime(CLOCK_MONOTONIC, &ts);
 28     return (ts.tv_sec + ts.tv_nsec * 1e-9) * 1000;
 29 }
 30 
 31 // ...(省略)...
 32 //struct airport_data {
 33 
 34 int main(void)
 35 {
 36   // --------------------------------------------------------------
 37   // read all airports
 38   // ...(省略)...
 39 
 40   // --------------------------------------------------------------
 41   // initialize results
 42   double _ct = get_clock();
 43   std::cout << "BEGIN initialize (" << all_airports.size() << " airports)" << std::endl;
 44   std::vector< std::pair< std::pair<const airport_data*, const airport_data*>, double> >
 45     result;
 46   for (int i = 0; i < all_airports.size(); ++i) {
 47     for (int j = i + 1; j < all_airports.size(); ++j) {
 48       result.push_back(std::make_pair(std::make_pair(&all_airports[i],
 49                              &all_airports[j]),
 50                       double(0)));
 51     }
 52   }
 53   std::cout
 54       << "END   initialize ("
 55       << result.size()
 56       << " pairs) - "
 57       << get_clock() - _ct << " [ms]"
 58       << std::endl;
 59   // --------------------------------------------------------------
 60   // calculate
 61   std::cout << "BEGIN calculate" << std::endl;
 62   _ct = get_clock();
 63   GeographicLib::Geodesic
 64     // equatorial radius (meters), flattening of ellipsoid
 65     geod(6378137, 3.35281066475e-3);
 66   {
 67     {
 68 #ifdef _OPENMP
 69 #pragma omp parallel for
 70 #endif // _OPENMP
 71       for (int i = 0; i < result.size(); ++i) {
 72         double s12;
 73         double azi1;
 74         double azi2;
 75         geod.Inverse((result[i].first.first)->lat,
 76              (result[i].first.first)->lon,
 77              (result[i].first.second)->lat,
 78              (result[i].first.second)->lon,
 79              s12, azi1, azi2);
 80         result[i].second = s12 / 1852.0;
 81       }
 82     }
 83   }
 84   std::cout
 85       << "END   calculate - "
 86       << get_clock() - _ct << " [ms]"
 87       << std::endl;
 88   // --------------------------------------------------------------
 89   // print
 90 #ifdef _OPENMP
 91   std::ofstream ofs("result_OMP.tsv");
 92 #else // _OPENMP
 93   std::ofstream ofs("result.tsv");
 94 #endif // _OPENMP
 95   for (int i = 0; i < result.size(); ++i) {
 96     ofs << *result[i].first.first << "\t"
 97         << *result[i].first.second << "\t"
 98         << result[i].second << " [NM]" << std::endl;
 99     ofs << *result[i].first.second << "\t"
100         << *result[i].first.first << "\t"
101         << result[i].second << " [NM]" << std::endl;
102   }
103   return 0;
104 }

結果:

 1 me@host: ~$ g++ expr_main.cpp -lGeographic -fopenmp -oexpr_main_OMP
 2 me@host: ~$ g++ expr_main.cpp -lGeographic -oexpr_main
 3 me@host: ~$ LD_LIBRARY_PATH=/usr/local/lib:${LD_LIBRARY_PATH} ./expr_main < airports.dat
 4 BEGIN initialize (1411 airports)
 5 END   initialize (994755 pairs) - 71.625 [ms]
 6 BEGIN calculate
 7 END   calculate - 2112.39 [ms]
 8 
 9 me@host: ~$ LD_LIBRARY_PATH=/usr/local/lib:${LD_LIBRARY_PATH} ./expr_main_OMP < airports.dat
10 BEGIN initialize (1411 airports)
11 END   initialize (994755 pairs) - 72.0747 [ms]
12 BEGIN calculate
13 END   calculate - 1159.11 [ms]

2コアCPUでの検証なので、OpenMP 版はだいたい2倍の速度が出ている。

OpenMP のサンプルとしては面白みがないただの parallel for である。GeographicLib のサンプルとしても、これもただ Inverse しているだけで、これもまた面白みには欠ける。ただ、「現実的な例」が欲しい人には、もしかしたら嬉しいかもしれないと思うので、公開してみた。ただし、「プロトタイプ」が目的でやってるので、かなり雑にやっている。そこは踏まえてください。