/* COPYRIGHT (2011-2012) by: Kevin Marco Erler (author), http://www.kevinerler.de AIU-FSU Jena (co-owner), http://www.astro.uni-jena.de SBSZ Jena-Göschwitz (co-owner), http://www.sbsz-jena.de BSZ-Hermsdorf (co-owner), http://www.bszh.de Advanced Licensing (dual license: COPYRIGHT and following licenses): License (international): CC-BY v3.0-unported or later - link: http://creativecommons.org/licenses/by/3.0/deed.en License (Germany): CC-BY v3.0-DE or later - link: http://creativecommons.org/licenses/by/3.0/de/ ------------------ Compilation requirements: Packages (x86-64): GCC >v4.2, compat. libstdc++ and GOMP v3.0 Normal-Compile with g++-Compiler (Red Hat GCC 4.4.5-6 x86-64 tested) + OpenMP v3.0 ([lib]GOMP v3.0 x86-64 tested) g++ -std=c++0x -m64 -fopenmp -Wall -Wextra -pedantic -pedantic-errors -lgomp -lm -s <source.cpp> -o <dest> Release-Compile with g++-Compiler (Red Hat GCC 4.4.5-6 x86-64 tested) + OpenMP v3.0 ([lib]GOMP v3.0 x86-64 tested) g++ -std=c++0x -m64 -fopenmp -Wall -Wextra -pedantic -pedantic-errors -lgomp -lm -O3 -s <source.cpp> -o <dest> Debug-Compile with g++-Compiler (Red Hat GCC 4.4.5-6 x86-64 tested) + OpenMP v3.0 ([lib]GOMP v3.0 x86-64 tested) g++ -std=c++0x -m64 -fopenmp -Wall -Wextra -pedantic -pedantic-errors -lgomp -lm -g -ggdb3 <source.cpp> -o <dest> */ // Includes of C/C++-Librarys for INTs, REAL/FLOATs, STRINGS, Math-Calc and I/O #include <climits> #include <cstdint> #include <cinttypes> #include <cfloat> #include <cwchar> #include <string> //std:string #include <string.h> #include <cstring> #include <cstdlib> #include <cstdio> #include <iostream> #include <sstream> #include <iomanip> #include <cmath> #include <ctime> // Conditional compilation (conditional include) of the OpenMP-Mainlib for OpenMP-Support #ifdef _OPENMP #include <omp.h> #endif using namespace std; #define free(x) free(x); *x=NULL #define PRId128 "s" #define PRIi128 "s" #define PRIu128 "s" const uint64_t UINT64_MIN = 0; const __int128_t INT128_MIN = (__int128_t)((-170141183460469231731.687303715884105728) * pow(10,18)); const __int128_t INT128_MAX = (__int128_t)(( 170141183460469231731.687303715884105727) * pow(10,18)); const __uint128_t UINT128_MAX = (__uint128_t)((340282366920938463463.374607431768211455) * pow(10,18)); const __uint128_t UINT128_MIN = 0/* * pow(10,18)*/; std::ostream &operator<<(std::ostream &out, __uint128_t x) { if(x >= 10) { out << x / 10; } return out << static_cast<unsigned>(x % 10); } std::ostream &operator<<(std::ostream &out, __int128_t x) { if(x < 0) { out << '-'; x = -x; } return out << static_cast<__uint128_t>(x); } string INT128ToSTR(__int128_t x) { std::stringstream sstr; sstr<<x; return sstr.str(); } #define INT128ToCSTR(x) (INT128ToSTR(x)).c_str() string UINT128ToSTR(__uint128_t x) { std::stringstream sstr; sstr<<x; return sstr.str(); } #define UINT128ToCSTR(x) (UINT128ToSTR(x)).c_str() const uint64_t datarsize = 100000ULL; int main(int argc, char *argv[]) { // Runtime manipulation of OpenMP-state variables //omp_set_num_threads(4); omp_set_dynamic(1); omp_set_nested(1); /* data declarations and implementations DB-arrays requires a FILE-Input-database (file based) of datarsize (N) elements (mtype uint64_t) */ FILE *FileVar; double starttime = 0.00, sdelay1 = 0.00, sdelay2 = 0ULL, pdelay = 0.00; uint64_t db1[datarsize] = {0ULL}, //db1 for BubbleSort: Single-Execution db3[datarsize] = {0ULL}, //db3 for Odd-Even-Transposition-Sort (OetSort: parallelversion of BubbleSort): Single-Execution db2[datarsize] = {0ULL}, //db2 for Odd-Even-Transposition-Sort (OetSort: parallelversion of BubbleSort): Parallel-Execution tmp=0ULL; bool CorrectSorted = true; std::cout << "BubbleSort (64-Bit)\n" << "===================================================================\n" << "DB einlesen:"; //--------------------------Begin: Initialization of data------------------------------------------ if((FileVar = fopen("db1.bin","rb"))!=NULL) { fread(&db1,sizeof(db1),1,FileVar); fseek(FileVar,0,SEEK_SET); fread(&db2,sizeof(db2),1,FileVar); fseek(FileVar,0,SEEK_SET); fread(&db3,sizeof(db3),1,FileVar); fclose(FileVar); } else { std::cout << "Fehler beim Öffnen/Einlesen der DB-Datei!\n"; return 0; } std::cout << " done\n"; /* std::cout << "Vorsortierte DB erzeugen (für Testzwecke):"; tmp = 0ULL; for(uint64_t q=0ULL;q<(datarsize-1ULL);++q) { for(uint64_t r=0ULL;r<(datarsize-q-1ULL);++r) { if(db1[r] > db1[r+1ULL]) { tmp = db1[r]; db1[r] = db1[r+1ULL]; db1[r+1ULL] = tmp; } } } for(uint64_t v=0ULL;v<datarsize;++v) { db3[v] = db2[v] = db1[v]; } std::cout << " done\n"; */ //--------------------------End: Initialization of data-------------------------------------------- //--------------------------Begin: CPU-serial execution of algorithm (BubbleSort)------------------ std::cout << "SERIELLE AUSFÜHRUNG (BubbleSort):"; tmp = 0ULL; starttime = omp_get_wtime(); //CPU-serial algorithm (BubbleSort): for(uint64_t i=0ULL;i<(datarsize-1ULL);++i) { for(uint64_t j=0ULL;j<(datarsize-i-1ULL);++j) { if(db1[j] > db1[j+1ULL]) { tmp = db1[j]; db1[j] = db1[j+1ULL]; db1[j+1ULL] = tmp; } } } sdelay1 = omp_get_wtime()-starttime; std::cout << " done\n"; //serial (BubbleSort) //--------------------------End: CPU-serial execution of algorithm (BubbleSort)-------------------- //--------------------------Begin: CPU-serial execution of algorithm (OetSort)--------------------- std::cout << "SERIELLE AUSFÜHRUNG (OetSort):"; tmp = 0ULL; starttime = omp_get_wtime(); //CPU-serial algorithm (OetSort): for(uint64_t n3=1ULL;n3<datarsize;++n3) { if((n3%2ULL)==0ULL) //conditional execution of EVEN- or ODD-for-loop { for(uint64_t e3=0ULL;e3<(datarsize-1ULL);e3+=2ULL) //even-step { if(db3[e3] > db3[(e3+1ULL)]) { tmp = db3[e3]; db3[e3] = db3[(e3+1ULL)]; db3[(e3+1ULL)] = tmp; } } } else { for(uint64_t o3=1ULL;o3<(datarsize-1ULL);o3+=2ULL) //odd-step { if(db3[o3] > db3[(o3+1ULL)]) { tmp = db3[o3]; db3[o3] = db3[(o3+1ULL)]; db3[(o3+1ULL)] = tmp; } } } } sdelay2 = omp_get_wtime()-starttime; std::cout << " done\n"; //serial (OetSort) //--------------------------End: CPU-serial execution of algorithm (OetSort)----------------------- //--------------------------Begin: CPU-parallel OpenMP-execution of algorithm (OetSort)------------ // //For a better reliability and stability: min. 2 Threads, max. (MaxThreads-1) std::cout << "PARALLELE AUSFÜHRUNG (OetSort) mit " << ((omp_get_max_threads()>2)?(omp_get_max_threads()-1):(omp_get_max_threads()==2)?2:1) << " Threads:"; /* OpenMP-CPU-parallel algorithm a normal single-for-loop (C/C++) contains two OpenMP-loops (conditional execution) */ tmp = 0ULL; starttime = omp_get_wtime(); for(uint64_t n2=1ULL;n2<datarsize;++n2) { #pragma omp flush (db2) if((n2%2ULL)==0ULL) //conditional execution of EVEN- or ODD-OpenMP-for-loop { //even-step //create parallel region and OpenMP-for-loop: #pragma omp parallel for num_threads((omp_get_max_threads()>2)?(omp_get_max_threads()-1):(omp_get_max_threads()==2)?2:1) \ schedule(static) \ default(none) private(tmp) shared(db2) for(uint64_t e2=0ULL;e2<(datarsize-1ULL);e2+=2ULL) { if(db2[e2] > db2[(e2+1ULL)]) { tmp = db2[e2]; db2[e2] = db2[(e2+1ULL)]; db2[(e2+1ULL)] = tmp; } } } else { //odd-step //create parallel region and OpenMP-for-loop: #pragma omp parallel for num_threads((omp_get_max_threads()>2)?(omp_get_max_threads()-1):(omp_get_max_threads()==2)?2:1) \ schedule(static) \ default(none) private(tmp) shared(db2) for(uint64_t o2=1ULL;o2<(datarsize-1ULL);o2+=2ULL) { if(db2[o2] > db2[(o2+1ULL)]) { tmp = db2[o2]; db2[o2] = db2[(o2+1ULL)]; db2[(o2+1ULL)] = tmp; } } } } pdelay = omp_get_wtime()-starttime; if((omp_get_max_threads()-1) >= 10) { std::cout << " done\n"; //parallel } else { std::cout << " done\n"; //parallel } //--------------------------End: CPU-parallel OpenMP-execution of algorithm (OetSort)-------------- //--------------------------Analysis of results---------------------------------------------------- std::cout << "Überprüfe Ergebnisse:"; for(uint64_t t=0ULL;t<datarsize;++t) { if(((db1[t]==db2[t]) && (db1[t]==db3[t])) && (db1[((t<(datarsize-1ULL))?(t+1ULL):(datarsize-1ULL))]==db2[((t<(datarsize-1ULL))?(t+1ULL):(datarsize-1ULL))]) && (db1[((t<(datarsize-1ULL))?(t+1ULL):(datarsize-1ULL))]==db3[((t<(datarsize-1ULL))?(t+1ULL):(datarsize-1ULL))])) { if((db1[t]>db1[((t<(datarsize-1ULL))?(t+1ULL):(datarsize-1ULL))]) && (db2[t]>db2[((t<(datarsize-1ULL))?(t+1ULL):(datarsize-1ULL))]) && (db3[t]>db3[((t<(datarsize-1ULL))?(t+1ULL):(datarsize-1ULL))])) { CorrectSorted = false; break; } } else { CorrectSorted = false; break; } } std::cout << " done\n"; std::cout << "\nAuswertung:\n" << "*******************************************************************\n" << "DB-Größe: " << sizeof(db1) << " Bytes\n" << "Anzahl Elemente: " << datarsize << " zu je " << sizeof(uint64_t) << " Bytes\n" << "Seriell & parallel richtig sortiert?: " << ((CorrectSorted==true)?"yes\n":" no\n") << "Dauer - SERIELL (BubbleSort): " << sdelay1 << " sec\n" << "Dauer - SERIELL (OetSort): " << sdelay2 << " sec\n" << "Dauer - PARALLEL (OetSort): " << pdelay << " sec\n" << "__________________\n" << "==>Via BubbleSort seriell|parallel sortierte Daten:\n"; for(uint64_t m1=0ULL;m1<50ULL;++m1) { std::cout << "S1-Element " << (m1+1ULL) << ": " << db1[m1] << " | S2-Element " << (m1+1ULL) << ": " << db3[m1] << " | P-Element " << (m1+1ULL) << ": " << db2[m1] << '\n'; } std::cout << "...\n"; for(uint64_t m2=(datarsize-50ULL);m2<datarsize;++m2) { std::cout << "S1-Element " << (m2+1ULL) << ": " << db1[m2] << " | S2-Element " << (m2+1ULL) << ": " << db3[m2] << " | P-Element " << (m2+1ULL) << ": " << db2[m2] << '\n'; } std::cout << "__________________" << "\n64-Bit-Werte:\n" << "INT64_MIN: " << INT64_MIN << '\n' << "INT64_MAX: " << INT64_MAX << '\n' << "UINT64_MIN: " << UINT64_MIN << '\n' << "UINT64_MAX: " << UINT64_MAX << '\n'; /* // Detailed output std::cout << "Sortierte Daten:\n" << "==========================\n"; for(uint64_t k=0ULL;k<datarsize;++k) { std::cout << "S1-Element " << (k+1ULL) << ": " << db1[k] << " | S2-Element " << (k+1ULL) << ": " << db3[k] << " | P-Element " << (k+1ULL) << ": " << db2[k] << '\n'; } */ getchar(); return 0; }