]>
Commit | Line | Data |
---|---|---|
3a9019d9 FG |
1 | // This example shows how to sort structs using complex multiple part keys using\r |
2 | // string_sort.\r | |
3 | //\r | |
4 | // Copyright Steven Ross 2009-2014.\r | |
5 | //\r | |
6 | // Distributed under the Boost Software License, Version 1.0.\r | |
7 | // (See accompanying file LICENSE_1_0.txt or copy at\r | |
8 | // http://www.boost.org/LICENSE_1_0.txt)\r | |
9 | \r | |
10 | // See http://www.boost.org/libs/sort for library home page.\r | |
11 | \r | |
12 | #include <boost/sort/spreadsort/string_sort.hpp>\r | |
13 | #include <boost/sort/spreadsort/float_sort.hpp>\r | |
14 | #include <time.h>\r | |
15 | #include <stdio.h>\r | |
16 | #include <stdlib.h>\r | |
17 | #include <algorithm>\r | |
18 | #include <vector>\r | |
19 | #include <iostream>\r | |
20 | #include <fstream>\r | |
21 | #include <string>\r | |
22 | using std::string;\r | |
23 | using namespace boost::sort::spreadsort;\r | |
24 | \r | |
25 | //[generalized_functors\r | |
26 | struct DATA_TYPE {\r | |
27 | time_t birth;\r | |
28 | float net_worth;\r | |
29 | string first_name;\r | |
30 | string last_name;\r | |
31 | };\r | |
32 | \r | |
33 | static const int birth_size = sizeof(time_t);\r | |
34 | static const int first_name_offset = birth_size + sizeof(float);\r | |
35 | static const boost::uint64_t base_mask = 0xff;\r | |
36 | \r | |
37 | struct lessthan {\r | |
38 | inline bool operator()(const DATA_TYPE &x, const DATA_TYPE &y) const {\r | |
39 | if (x.birth != y.birth) {\r | |
40 | return x.birth < y.birth;\r | |
41 | }\r | |
42 | if (x.net_worth != y.net_worth) {\r | |
43 | return x.net_worth < y.net_worth;\r | |
44 | }\r | |
45 | if (x.first_name != y.first_name) {\r | |
46 | return x.first_name < y.first_name;\r | |
47 | }\r | |
48 | return x.last_name < y.last_name;\r | |
49 | }\r | |
50 | };\r | |
51 | \r | |
52 | struct bracket {\r | |
53 | inline unsigned char operator()(const DATA_TYPE &x, size_t offset) const {\r | |
54 | // Sort date as a signed int, returning the appropriate byte.\r | |
55 | if (offset < birth_size) {\r | |
56 | const int bit_shift = 8 * (birth_size - offset - 1);\r | |
57 | unsigned char result = (x.birth & (base_mask << bit_shift)) >> bit_shift;\r | |
58 | // Handling the sign bit. Unnecessary if the data is always positive.\r | |
59 | if (offset == 0) {\r | |
60 | return result ^ 128;\r | |
61 | }\r | |
62 | \r | |
63 | return result;\r | |
64 | }\r | |
65 | \r | |
66 | // Sort a signed float. This requires reversing the order of negatives\r | |
67 | // because of the way floats are represented in bits.\r | |
68 | if (offset < first_name_offset) {\r | |
69 | const int bit_shift = 8 * (first_name_offset - offset - 1);\r | |
70 | unsigned key = float_mem_cast<float, unsigned>(x.net_worth);\r | |
71 | unsigned char result = (key & (base_mask << bit_shift)) >> bit_shift;\r | |
72 | // Handling the sign.\r | |
73 | if (x.net_worth < 0) {\r | |
74 | return 255 - result;\r | |
75 | }\r | |
76 | // Increasing positives so they are higher than negatives.\r | |
77 | if (offset == birth_size) {\r | |
78 | return 128 + result;\r | |
79 | }\r | |
80 | \r | |
81 | return result;\r | |
82 | }\r | |
83 | \r | |
84 | // Sort a string that is before the end. This approach supports embedded\r | |
85 | // nulls. If embedded nulls are not required, then just delete the "* 2"\r | |
86 | // and the inside of the following if just becomes:\r | |
87 | // return x.first_name[offset - first_name_offset];\r | |
88 | const unsigned first_name_end_offset =\r | |
89 | first_name_offset + x.first_name.size() * 2;\r | |
90 | if (offset < first_name_end_offset) {\r | |
91 | int char_offset = offset - first_name_offset;\r | |
92 | // This signals that the string continues.\r | |
93 | if (!(char_offset & 1)) {\r | |
94 | return 1;\r | |
95 | }\r | |
96 | return x.first_name[char_offset >> 1];\r | |
97 | }\r | |
98 | \r | |
99 | // This signals that the string has ended, so that shorter strings come\r | |
100 | // before longer ones.\r | |
101 | if (offset == first_name_end_offset) {\r | |
102 | return 0;\r | |
103 | }\r | |
104 | \r | |
105 | // The final string needs no special consideration.\r | |
106 | return x.last_name[offset - first_name_end_offset - 1];\r | |
107 | }\r | |
108 | };\r | |
109 | \r | |
110 | struct getsize {\r | |
111 | inline size_t operator()(const DATA_TYPE &x) const {\r | |
112 | return first_name_offset + x.first_name.size() * 2 + 1 +\r | |
113 | x.last_name.size();\r | |
114 | }\r | |
115 | };\r | |
116 | //] [/generalized_functors]\r | |
117 | \r | |
118 | //Pass in an argument to test std::sort\r | |
119 | int main(int argc, const char ** argv) {\r | |
120 | std::ifstream indata;\r | |
121 | std::ofstream outfile;\r | |
122 | bool stdSort = false;\r | |
123 | unsigned loopCount = 1;\r | |
124 | for (int u = 1; u < argc; ++u) {\r | |
125 | if (std::string(argv[u]) == "-std")\r | |
126 | stdSort = true;\r | |
127 | else\r | |
128 | loopCount = atoi(argv[u]);\r | |
129 | }\r | |
130 | double total = 0.0;\r | |
131 | //Run multiple loops, if requested\r | |
132 | std::vector<DATA_TYPE> array;\r | |
133 | for (unsigned u = 0; u < loopCount; ++u) {\r | |
134 | indata.open("input.txt", std::ios_base::in | std::ios_base::binary);\r | |
135 | if (indata.bad()) {\r | |
136 | printf("input.txt could not be opened\n");\r | |
137 | return 1;\r | |
138 | }\r | |
139 | \r | |
140 | // Read in the data.\r | |
141 | DATA_TYPE inval;\r | |
142 | while (!indata.eof() ) {\r | |
143 | indata >> inval.first_name;\r | |
144 | indata >> inval.last_name;\r | |
145 | indata.read(reinterpret_cast<char *>(&(inval.birth)), birth_size);\r | |
146 | indata.read(reinterpret_cast<char *>(&(inval.net_worth)), sizeof(float));\r | |
147 | // Handling nan.\r | |
148 | if (inval.net_worth != inval.net_worth) {\r | |
149 | inval.net_worth = 0;\r | |
150 | }\r | |
151 | if (indata.eof())\r | |
152 | break;\r | |
153 | array.push_back(inval);\r | |
154 | }\r | |
155 | indata.close();\r | |
156 | \r | |
157 | // Sort the data.\r | |
158 | clock_t start, end;\r | |
159 | double elapsed;\r | |
160 | start = clock();\r | |
161 | if (stdSort) {\r | |
162 | std::sort(array.begin(), array.end(), lessthan());\r | |
163 | } else {\r | |
164 | //[generalized_functors_call\r | |
165 | string_sort(array.begin(), array.end(), bracket(), getsize(), lessthan());\r | |
166 | //] [/generalized_functors_call]\r | |
167 | }\r | |
168 | end = clock();\r | |
169 | elapsed = static_cast<double>(end - start);\r | |
170 | if (stdSort) {\r | |
171 | outfile.open("standard_sort_out.txt", std::ios_base::out |\r | |
172 | std::ios_base::binary | std::ios_base::trunc);\r | |
173 | } else {\r | |
174 | outfile.open("boost_sort_out.txt", std::ios_base::out |\r | |
175 | std::ios_base::binary | std::ios_base::trunc);\r | |
176 | }\r | |
177 | if (outfile.good()) {\r | |
178 | for (unsigned u = 0; u < array.size(); ++u)\r | |
179 | outfile << array[u].birth << " " << array[u].net_worth << " "\r | |
180 | << array[u].first_name << " " << array[u].last_name << "\n";\r | |
181 | outfile.close();\r | |
182 | }\r | |
183 | total += elapsed;\r | |
184 | array.clear();\r | |
185 | }\r | |
186 | if (stdSort) {\r | |
187 | printf("std::sort elapsed time %f\n", total / CLOCKS_PER_SEC);\r | |
188 | } else {\r | |
189 | printf("spreadsort elapsed time %f\n", total / CLOCKS_PER_SEC);\r | |
190 | }\r | |
191 | return 0;\r | |
192 | }\r |