diff -Nru papi-5.7.0+dfsg/ChangeLogP600.txt papi-6.0.0~dfsg/ChangeLogP600.txt --- papi-5.7.0+dfsg/ChangeLogP600.txt 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/ChangeLogP600.txt 2020-03-04 15:56:56.000000000 +0000 @@ -0,0 +1,1617 @@ +2020-02-27 Steven Kaufmann + + * src/components/infiniband/tests/Makefile: Making MPI tester + optional + +2020-02-22 Frank Winkler + + * src/papi_fwrappers.c: Added fortran wrappers for PAPI_rate_stop and + PAPI_hl_stop. Also fixed doxygen documentation for PAPI_flops_rate. + +2020-02-21 Anthony Castaldo + + * src/components/rocm/tests/square.cpp, + src/components/rocm/tests/square.cu, + src/components/rocm/tests/square.hipref.cpp, + src/components/rocm_smi/linux-rocm-smi.c: Deleted test files from + the repository, and commented-out debug lines from rocm_smi. + * src/components/rocm/linux-rocm.c, + src/components/rocm/tests/Makefile, + src/components/rocm/tests/rocm_all.cpp: Added patches provided by + Evgeny Shcherbakov (AMD), and corrected bugs in rocm_all.cpp. + Tested and now functions as expected. + +2020-02-20 Anthony + + * src/components/sde/tests/Makefile, src/configure, src/configure.in: + Added -lrt to LIBS (if needed) so that it propagates into the pkg- + config file papi.pc. Also, removed the explicit flag from the SDE + tests Makefile. + +2020-02-19 Anthony + + * src/components/sde/sde_internal.h, src/configure, src/configure.in: + Enabled overflow by default in SDE and added -lrt detection in the + configure script. + +2020-02-19 Anthony Castaldo + + * src/components/rocm/tests/rocm_all.cpp: Reconciling this version of + rocm_all.cpp with another pull request. + +2020-02-18 Anthony Castaldo + + * src/components/cuda/linux-cuda.c: ---Correct cuda push/pop context + consistency--- In _cuda_cleanup_eventset we attempt to push a + current cuda context, set a new cuda context to do some cleanup, + then restore the original context with a pop. (cuCtxPushCurrent, + cuCtxPopCurrent). This was failing. We corrected it by doing a + Save+Restore instead of a Push+Pop using cuCtxGetCurrent, + cuCtxSetCurrent, different routines that do not require the cuda + Context Stack, and have fewer restrictions on their use. + +2020-02-16 Daniel Barry + + * src/counter_analysis_toolkit/main.c: Added check for whether or not + the user provided a benchmark category. When using the Counter + Analysis Toolkit, if the user did not supply a benchmark category, + then it will run the 'branch' benchmark by default and inform the + user of such. The 'branch' benchmark executes the most quickly of + all the categories, making it a suitable default. These changes + were tested on the Intel Haswell architecture. + +2020-02-13 Frank Winkler + + * src/run_tests.sh: Little change in test script based on commit + 14cebbc. We have changed the high-level environment variable + PAPI_NO_WARNING to PAPI_HL_VERBOSE. Also, verbose output is off by + default, that's why this variable is not needed in the test script + anymore. + +2020-02-13 Anthony Castaldo + + * src/components/cuda/linux-cuda.c: Modifications for more thorough + error-checking in routines before using pointers (ensuring they are + non-NULL). Suggested by Steve Kaufmann. + +2020-02-11 Anthony Castaldo + + * src/components/rocm_smi/linux-rocm-smi.c: Removed a debug message. + +2020-02-10 Anthony Castaldo + + * src/components/rocm_smi/linux-rocm-smi.c: Corrects a problem + producing a segfault. The function MakeRoomAllEvents() can + realloc() a table, but this can make the use of a pointer into the + former area produce a segfault. + +2020-01-31 Anthony Castaldo + + * src/components/rocm_smi/linux-rocm-smi.c, + src/components/rocm_smi/tests/ROCM_SMI_Makefile, + src/components/rocm_smi/tests/rocmcap_plot.cpp: A new utility added + to tests, and debug lines (commented out) in component code until + SMI library problem with power events is sorted out. + * src/components/io/linux-io.c: We have to fopen/fclose the system + file for every read; otherwise Linux caches the file and reports + the same values every time. + +2020-01-30 Anthony + + * src/high-level/papi_hl.c: Turned verbosity of HL API off by + default. + +2020-01-30 Anthony Castaldo + + * src/components/io/linux-io.c: Rewrite to use ctx and ctl structures + for thread safety. + +2020-01-29 Anthony Castaldo + + * src/components/rocm/Rules.rocm, + src/components/rocm/tests/rocm_all.cpp: Corrected a typo in + Rules.rocm, and cleaned up a test program rocm_all.cpp. + * src/components/io/linux-io.c: Provided some insurance that io + component initialization occurs only once. + +2020-01-29 Daniel Barry + + * src/counter_analysis_toolkit/branch.c, + src/counter_analysis_toolkit/dcache.c, + src/counter_analysis_toolkit/flops.c, + src/counter_analysis_toolkit/gen_seq_dlopen.sh: Removed unnecessary + error reporting. Some error messages from the CAT benchmarks were + removed so as not to cause extraneous output. These changes were + tested on the Intel Broadwell architecture. + +2020-01-28 Anthony + + * src/counter_analysis_toolkit/main.c: Avoid computing the latencies + twice. + * src/components/sde/sde.c: Updated the info that is reported by the + component about itself. + +2020-01-28 Daniel Barry + + * src/counter_analysis_toolkit/flops.c: Fixed bug in FLOPS benchmark. + The FLOPS benchmarks ensure that the compiler does not discard the + results of the numerical kernels. A double-precision benchmark was + ensuring that the single-precision result was not discarded, + instead of the double-precision result. This has now been + corrected. This was tested on the Intel Broadwell architecture. + +2020-01-28 Anthony + + * src/counter_analysis_toolkit/dcache.c, + src/counter_analysis_toolkit/dcache.h, + src/counter_analysis_toolkit/driver.h, + src/counter_analysis_toolkit/gen_seq_dlopen.sh, + src/counter_analysis_toolkit/icache.c, + src/counter_analysis_toolkit/icache.h, + src/counter_analysis_toolkit/main.c: Added code to show progress if + the user asks for it (-verbose flag), and removed confusing error + messages and dead code. + +2020-01-28 Daniel Barry + + * src/counter_analysis_toolkit/main.c: Per the sscanf man page, it is + unnecessary to call free() in this block since memory for the + string would not be allocated. This was tested on the AMD EPYC + architecture. + +2020-01-27 Daniel Barry + + * src/counter_analysis_toolkit/driver.h, + src/counter_analysis_toolkit/main.c: Added checks for negative + amounts of qualifers provided by the user. Previously, there was a + bug caused by a user providing a negative number of qualifiers. + Now, if a user does provide a negative number of qualifiers, this + number is set to zero. This fix was tested on the AMD EPYC + architecture. + +2020-01-27 Anthony + + * src/components/perf_event/pe_libpfm4_events.c: Fixed problems with + debug macro. + +2020-01-24 Damien Genet + + * src/components/infiniband/tests/Makefile: Adds missing rule for + compilation of MPI test + +2020-01-24 Anthony Castaldo + + * src/components/perf_event/pe_libpfm4_events.c: New libpfm4 contains + "aliased" pmus for backward compatibility, amd64_fam17h == + amd64_fam17h_zen1; this causes us to put BOTH pmus into the PMUs + supported string and double the events in native_avail. This update + recognizes when aliases exist (the names must be hard-coded) and + uses only one of the most recent name. + +2020-01-23 Heike Jagode + + * src/components/infiniband_umad/README.md, + .../infiniband_umad/Rules.infiniband_umad, .../infiniband_umad + /linux-infiniband_umad.c, .../infiniband_umad/linux- + infiniband_umad.h, src/components/infiniband_umad/tests/Makefile, + .../tests/infiniband_umad_list_events.c, + .../tests/infiniband_umad_values_by_code.c: Retirement of + infiniband_umad component. With the latest advancements of the + infiniband component, infiniband_umad has become redundant. + +2020-01-22 Damien Genet + + * src/components/Makefile_comp_tests.target.in: Propagating MPICC to + components tests + * src/components/infiniband/linux-infiniband.c, + .../infiniband/tests/MPI_test_infiniband_events.c: snprintf return + value, a classic now. And the 3-space indentation. + +2019-09-04 Rizwan-ICL + + * src/components/infiniband/linux-infiniband.c, + .../infiniband/tests/MPI_test_infiniband_events.c, + src/components/infiniband/tests/Makefile: Added descriptions for + events of infiniband component using documentation provided by + Mellanox; Added test code to test the various events in infiniband + component and modified Makefile to compile the test code; + +2020-01-22 Damien Genet + + * src/components/powercap_ppc/README, + src/components/powercap_ppc/Rules.powercap_ppc, + src/components/powercap_ppc/linux-powercap-ppc.c, + src/components/powercap_ppc/linux-powercap-ppc.h, + src/components/powercap_ppc/tests/Makefile, + src/components/powercap_ppc/tests/powercap_basic.c, + src/components/powercap_ppc/tests/powercap_limit.c: Merged in + feature/powercap_ppc (pull request #34) Feature/powercap ppc * + Powercapping for IBM PowerPC architecture, Power9 processors * + Adding 2 tests for powercap component on PPC architecture Power9 + Approved-by: adanalis Approved-by: Anthony Castaldo + +2020-01-22 Frank Winkler + + * src/high-level/scripts/papi_hl_output_writer.py: Fixed bug for + python3. - dict.iteritems() was removed in python3 --> Instead: + use dict.items() The output script works for both python2 and + python3. + * src/papi.c: Bug fix that was caused by commit db01193. + * src/examples/PAPI_flops.c: Improved some comments. + +2020-01-21 Damien Genet + + * src/components/sensors_ppc/linux-sensors-ppc.c: Adds missing checks + for snprintf. A return value larger than the buffer is not really + an error, just a poor design, but whatever. + +2020-01-20 Frank Winkler + + * src/examples/PAPI_mix_hl_ll.c, src/examples/PAPI_mix_hl_rate.c, + src/examples/PAPI_mix_ll_rate.c, src/papi.c, src/papi.h: Renamed + papi_rate_stop to papi_stop_events. + * src/high-level/papi_hl.c: Fixed bug. Check for empty string in + PAPI_EVENTS. + * src/high-level/papi_hl.c, src/papi.c, src/papi_internal.c, + src/papi_internal.h: Fixed typo. + * src/high-level/papi_hl.c: Improved cleanup function. + +2020-01-18 Frank Winkler + + * src/examples/Makefile, src/examples/PAPI_mix_hl_ll.c, + src/examples/PAPI_mix_hl_rate.c, src/examples/PAPI_mix_ll_rate.c, + src/papi.c: Added examples that show how to mix hl, ll, and rate + functions. + +2020-01-17 Frank Winkler + + * src/high-level/papi_hl.c, src/papi.c, src/papi.h, + src/papi_internal.c, src/papi_internal.h: Added feature that allows + mixing of rate functions and hl functions. + +2020-01-16 Anthony Castaldo + + * src/papi_events.csv: Added two machine types to papi_events.csv to + be in line with libpfm4 update to support amd64_fam17h_zen1 and + zen2. + +2020-01-16 Anthony + + * src/components/sde/tests/Makefile: Fixed dependency in Makefile. + +2020-01-16 Frank Winkler + + * src/papi.c, src/papi.h: Added PAPI_rate_stop() that stops any rate + function. + +2020-01-16 Damien Genet + + * src/components/sensors_ppc/README, + src/components/sensors_ppc/Rules.sensors_ppc, + src/components/sensors_ppc/linux-sensors-ppc.c, + src/components/sensors_ppc/linux-sensors-ppc.h, + src/components/sensors_ppc/tests/Makefile, + .../sensors_ppc/tests/sensors_ppc_basic.c: Add new component for + sensors reading on PowerPC 9 Enable with ./configure --with- + components="sensors_ppc" + +2020-01-16 Frank Winkler + + * src/run_tests.sh: Fixed little bug in test script. The output + directory of the high-level API has been renamed from papi to + papi_hl_output. + +2020-01-16 Anthony Castaldo + + * src/components/rocm_smi/Rules.rocm_smi, src/components/rocm_smi + /linux-rocm-smi.c: Changed Rules file to look in multiple places + for rocm_smi.h, it moved between rocm releases. Rewrote a routine + to be more efficient and eliminate a string-size warning. Made some + diagnostic outputs that were left active in previous commit + dependent on #ifdef macros. + +2020-01-15 Frank Winkler + + * src/high-level/papi_hl.c: Fixed memory leak in high-level API. + Based on commit ef20e24 that fixed a bug by deleting a "free" call, + the "free" call is now done in the last function of the high-level + API which is called during the "atexit()" call. + +2020-01-14 Anthony + + * .../sde/tests/Advanced_C+FORTRAN/Gamum.c, + .../sde/tests/Advanced_C+FORTRAN/Xandria.F90, + .../sde/tests/Advanced_C+FORTRAN/sde_test_f08.F90, + src/components/sde/tests/Gamum.c, + src/components/sde/tests/Makefile, + src/components/sde/tests/Minimal/Minimal_Test.c, + src/components/sde/tests/Minimal_Test.c, + src/components/sde/tests/Recorder.c, + .../sde/tests/Recorder/Lib_With_Recorder.c, + .../sde/tests/Recorder/Recorder_Driver.c, + src/components/sde/tests/Simple/Simple_Driver.c, + src/components/sde/tests/Simple/Simple_Lib.c, + src/components/sde/tests/Simple2/Simple2_Driver.c, + src/components/sde/tests/Simple2/Simple2_Lib.c, + src/components/sde/tests/Xandria.F90, + src/components/sde/tests/sde_test_f08.F90: Added new tests/examples + under the SDE component and organized them based on complexity. + * src/components/sde/sde.c: Improved and corrected the checks that + relate to counter groups and recorders. + +2020-01-13 Anthony + + * src/utils/Makefile, src/utils/papi_sde_interface.c: Added the weak + symbols for SDE to papi_native_avail, so the utility works when + PAPI is not configured with the SDE component. + * src/utils/papi_avail.c, src/utils/papi_native_avail.c: Improved the + code that checks the command-line arguments. + +2020-01-06 Anthony + + * src/components/sde/sde.c, src/components/sde/sde_internal.h, + src/utils/papi_native_avail.c: Moved the responsibility of listing + SDEs of a library/executable to papi_native_avail instead of the + SDE component. + * src/papi_internal.c: Updated the variables that are used in the + debug messages in accordance to a previous commit that made these + variables thread safe. + +2020-01-03 Frank Winkler + + * src/high-level/scripts/papi_hl_output_writer.py: Changed name of + some derived metrics. + * src/high-level/papi_hl.c, src/high- + level/scripts/papi_hl_output_writer.py: Added new derived metrics. + +2020-01-03 Frank Winkler + + * src/high-level/papi_hl.c: Little format changes. + * src/high-level/papi_hl.c: Fixed bug in high-level API caused by + commit ff8ff65. The creation of the measurement directory failed + since Coverity freed memory of a string that was used later to + create the measurement directory. + +2020-01-02 Frank Winkler + + * src/high-level/papi_hl.c, src/validation_tests/Makefile.recipies, + src/validation_tests/flops_validation_hl.c, + src/validation_tests/fp_validation_hl.c: Revised default events for + flops and flips. + +2019-12-20 Frank Winkler + + * src/papi.c: papi.c edited online with Bitbucket + * src/examples/high_level.c: high_level.c edited online with + Bitbucket + * src/examples/PAPI_ipc.c: PAPI_ipc.c edited online with Bitbucket + * src/examples/PAPI_flops.c: PAPI_flops.c edited online with + Bitbucket + * src/examples/PAPI_flips.c: PAPI_flips.c edited online with + Bitbucket + * src/examples/PAPI_epc.c: PAPI_epc.c edited online with Bitbucket + +2019-12-19 Anthony + + * src/components/sde/sde.c, src/components/sde/sde_internal.h: Fixed + issues in the SDE component unveiled by Coverity. + +2019-12-19 Daniel Barry + + * src/counter_analysis_toolkit/main.c: Fixed typo in comment for + argument parsing. + +2019-12-19 Frank Winkler + + * src/libpapi.exp: Fixed typo. + * src/ctests/bgp/Makefile, src/ctests/bgp/papi_1.c, src/libpapi.exp: + Further clean-up. + +2019-12-19 Daniel Barry + + * src/counter_analysis_toolkit/Makefile, + src/counter_analysis_toolkit/caches.h, + src/counter_analysis_toolkit/dcache.c, + src/counter_analysis_toolkit/dcache.h, + src/counter_analysis_toolkit/driver.h, + src/counter_analysis_toolkit/main.c, + src/counter_analysis_toolkit/timing_kernels.c, + src/counter_analysis_toolkit/timing_kernels.h: Removed unnecessary + variables and checks. Refactored code blocks. Added comments in the + main driver file. + +2019-12-19 Frank Winkler + + * src/ctests/bgp/papi_1.c, src/libpapi.exp: Clean-up of old high- + level functions. + +2019-12-18 Frank Winkler + + * man/man1/papi_component_avail.1: Fixed typo in + papi_component_avail.1. See pull request #2. + +2019-12-16 Anthony + + * src/counter_analysis_toolkit/Makefile: Renamed cit_collect to + cat_collect. + * src/counter_analysis_toolkit/eventstock.c: Clarified comment. + * src/counter_analysis_toolkit/branch.c, + src/counter_analysis_toolkit/driver.h, + src/counter_analysis_toolkit/eventstock.c, + src/counter_analysis_toolkit/eventstock.h, + src/counter_analysis_toolkit/flops.c, + src/counter_analysis_toolkit/gen_seq_dlopen.sh, + src/counter_analysis_toolkit/main.c: Removed unnecessary work when + setting up the list of events, and minor cosmetic changes. + +2019-12-16 Daniel Barry + + * src/counter_analysis_toolkit/flops.c: Cleaned up comments. + +2019-12-16 Anthony Castaldo + + * src/components/rapl/tests/rapl_overflow.c: Corrected a working but + convoluted line of code. + +2019-12-13 Frank Winkler + + * src/examples/PAPI_flips.c, src/examples/PAPI_flops.c, src/papi.c: + Minor documentation corrections. + * src/papi.h: Fixed some thread definitions. + * src/high-level/papi_hl.c, src/papi.h: Revised documentation of + high-level API. + * src/high-level/papi_hl.c, src/high- + level/scripts/papi_hl_output_writer.py: Renamed the output + directory of the high-level API from 'papi' to 'papi_hl_output'. + * src/papi.c: Revised documentation. + * src/examples/PAPI_epc.c, src/examples/PAPI_flips.c, + src/examples/PAPI_flops.c, src/examples/PAPI_ipc.c, src/papi.c: + Adjusted doxygen documentation. + +2019-12-12 Frank Winkler + + * src/examples/Makefile, src/examples/PAPI_flips.c, + src/examples/PAPI_flops.c, src/examples/PAPI_ipc.c, + src/examples/high_level.c, src/papi.c, src/papi.h, + src/papi_fwrappers.c: Reimplemented rate functions and adjusted + examples. + +2019-12-11 Daniel Barry + + * src/counter_analysis_toolkit/branch.c, + src/counter_analysis_toolkit/dcache.c, + src/counter_analysis_toolkit/flops.c, + src/counter_analysis_toolkit/gen_seq_dlopen.sh: Added + PAPI_cleanup_eventset() call to each of the benchmarks. This + removes events from the event set. By including these calls, the + benchmarks do not encounter the PAPI_ECOUNT error code, which + occurs if there are too many events added to the same event set. + These changes were tested on the Intel Skylake architecture. + +2019-12-10 Anthony Castaldo + + * src/components/rocm_smi/README, + src/components/rocm_smi/Rules.rocm_smi, + src/components/rocm_smi/tests/rocm_smi_all.txt: Minor changes to + text and a setting that was for development only. + +2019-12-10 Frank Winkler + + * src/papi.c: Made rate functions thread safe. + +2019-12-09 Anthony + + * src/utils/Makefile: Changed the order of the linker flags so that + -ldl is at the end since libpapi.a needs libdl.so but not the other + way around. + +2019-12-06 Heike Jagode + + * README.md: README.md edited online with Bitbucket + +2019-12-06 Steve Kaufmann + + * src/components/rocm/linux-rocm.c, src/papi_events.csv: The changes + here are based on a patch provided by Steve Kaufmann; to correct a + misnamed event in papi_events.csv, and prevent a segfault in rocm + when a context pointer is null. Additional changes by Tony Castaldo + check to see if the necessary rocprofiler environment variables + have been set; and disable the component if they are not, with an + informative reason to be reported by papi_component_avail. (The + component will not work without them). + +2019-12-05 Frank Winkler + + * src/papi.c: Replaced HighLevelInfo with RateInfo. + +2019-12-03 Anthony Castaldo + + * src/extras.c: extra '#' in "%#p" print formats, using just '%p'. + +2019-12-03 William Cohen + + * src/testlib/papi_test.h, src/testlib/test_utils.c: Use the noreturn + attribute only when the compiler support GNU C extensions. + * src/testlib/papi_test.h, src/testlib/test_utils.c: Properly mark + some test_utils.c functions with noreturn attributes Clang makes + use of the information whether a function returns in flow analysis + to determine whether there are uses of null values and other + possible problematic issues. Marking the test_pass, test_hl_pass, + test_fail, and test_skip functions properly with noreturn attribute + allows Clang to more accurately analyze the code and eliminates 87 + false positive warnings in the PAPI testsuite code. + +2019-12-02 Anthony Castaldo + + * src/components/coretemp/linux-coretemp.c, src/components/infiniband + /linux-infiniband.c, src/components/lmsensors/linux-lmsensors.c, + src/components/lustre/linux-lustre.c, src/components/pcp/linux- + pcp.c, src/components/pcp/tests/testPCP.c, + src/components/perf_event/perf_event.c, + .../perf_event_uncore/perf_event_uncore.c, src/components/rapl + /linux-rapl.c, src/ctests/failed_events.c, src/ctests/kufrin.c, + src/ctests/pthrtough.c, src/ctests/pthrtough2.c, src/extras.c, src + /high-level/papi_hl.c, src/linux-common.c, src/linux-memory.c, + src/testlib/clockcore.c, src/utils/cost_utils.c, + src/utils/papi_command_line.c, src/utils/papi_multiplex_cost.c: The + code in this commit all failed a Coverity scan (a code consistency + tool) that correctly identified memory leaks, potential buffer + overflows, and failures to close a file or directory that had been + opened. + +2019-12-02 Frank Winkler + + * src/papi.c, src/papi.h, src/papi_fwrappers.c: Reimplemented rate + calls such as PAPI_flips, PAPI_flops, etc. - These calls are now + part of the low-level API - PAPI_stop_rates() stop the counters + +2019-11-20 William Cohen + + * src/components/sde/Rules.sde: Limit Fortran 90 compilers options to + SDE component Fortran 90 code The Rules.sde added Fortran 90 + options to FFLAGS that would end up being applied to other Fortran + code being built in papi. Unfortunately, the other code is F77 code + and the options would cause the build to fail. + +2019-11-21 Heike Jagode + + * README.md: README.md edited online with Bitbucket + +2019-11-14 Daniel Barry + + * src/counter_analysis_toolkit/main.c: Swapped lines 268 and 269 of + main.c so that the appropriate memory allocation is freed, and the + pointer is then set to NULL. + +2019-11-13 Anthony Castaldo + + * src/components/nvml/tests/Makefile, + src/components/nvml/tests/nvmlcap_plot.cu, + src/components/nvml/utils/Makefile, + src/components/nvml/utils/README, + src/components/nvml/utils/nvmlcap_plot.cu: For consistency with + powercap and rapl components, moved nvmlcap_plot.cu to a new + nvml/utils/ directory. New Makefile in nvml/utils/ and adjusted + Makefile in nvml/tests/. Created a new README for nvmlcap_plot. No + code changes; but tested configure and make of PAPI and + nvmlcap_plot. + +2019-11-08 Anthony Castaldo + + * src/components/rapl/linux-rapl.c: Fixed an inaccurate comment. + * src/components/rapl/README, + src/components/rapl/tests/rapl_overflow.c: Added a paragraph of + usage info to README; also reformatted existing comments to comply + with 80 char line limit; without changing their content. + rapl_overflow.c was confusing, it was not using the + PACKAGE_ENERGY_CNT event to test for overflow, and the scaled value + seemed to wrap in 85ms. This seemed to conflict with the results of + rapl_wraparound; which computes a wraparound time in 85 minutes. + rapl_overflow.c is now in line with an 80-90 minute wraparound + vaue. + +2019-11-07 Anthony Castaldo + + * src/components/rapl/linux-rapl.c: Changes to properly mask energy + values to uint32, and accumulate them to return a 64-bit + accumulator. Verified wraparound time at approx 85 minutes (for a + 32 bit read). That is the maximum allowed time between reads; the + 64-bit value returned should never wrap. (Some tabs converted to + spaces in changed code.) + +2019-11-01 Frank Winkler + + * src/high-level/papi_hl.c: Removed Doxygen documentation for + internal functions and moved code block for multiplex + initialization. PAPI_multiplex_init is only called after a + successful PAPI_thread_init. + +2019-10-31 Anthony Castaldo + + * src/components/perf_event/pe_libpfm4_events.c: Fixed a typo in the + error message. + * src/ctests/Makefile.recipies, src/ctests/filter_helgrind.c, + src/papi.c, src/papi_internal.c, src/threads.c, src/threads.h: The + changes to papi.c, papi_internal.c, threads.h and threads.c correct + a race condition that was the result of all threads using the same + two static variables (papi_event_code and papi_event_code_changed) + to temporarily record a state of operation. The solution was to + make these variables unique per thread, using the ThreadInfo_t + structure already provided in PAPI for such purposes. The file + krentel_pthread_race.c is a stress test to produce race conditions. + filter_helgrind.c reduces the volume of --tool-helgrind output to a + more manageable summary. Both are added to Makefile.recipies. + +2019-10-31 William Cohen + + * src/ctests/krentel_pthreads_race.c: This code is a modification of + krentel_pthreads.c, to better test some race conditions. It is not + included in the standard tests; it is a diagnostic that should be + run with "valgrind --tool=helgrind". + +2019-10-31 Anthony Castaldo + + * src/components/perf_event/pe_libpfm4_events.c: Changed SUBDBG error + reporting in new code to a single message instead of two, before + the unlock code (so no race condition on variables in report). + Cosmetics. + + +2019-10-28 Daniel Barry + + * src/counter_analysis_toolkit/main.c: Added checks for improperly + formatted lines in the user-provided event list. If a line is + missing a qualifier count, then it is discarded. If a provided + event name is either not available in the architecture or contains + qualifiers, then the qualifier count is set to zero to prevent + appending extraneous qualifiers, and the user is notified. Also + cleaned up string manipulation. These changes were tested on the + Intel Haswell architecture. + +2019-10-25 Anthony Castaldo + + * src/components/perf_event/pe_libpfm4_events.c: In two places, we + exited the routine allocate_native_event() because we could not + find a mask or attribute in an event name (because the event was + supported but the given mask was not), and failed without unlocking + the NAMELIB_LOCK, or cleaning up allocated memory. + + free (msk_ptr); + free(pmu_name); + + _papi_hwi_unlock( NAMELIB_LOCK ); + +2019-10-24 Anthony Castaldo + + * src/components/rocm_smi/linux-rocm-smi.c, + src/components/rocm_smi/tests/ROCM_SMI_Makefile, + src/components/rocm_smi/tests/rocm_smi_all.cpp, + src/components/rocm_smi/tests/rocm_smi_all.txt: New events added, + some bugs corrected. ROCM_SMI_Makefile is modified to use env + variable $PAPI_ROCM_ROOT to make it easier to compile with a local + version of the rocm_smi library. rocm_smi_all.txt is the output of + a run of rocm_smi_all.cpp, which has been modified to handle + strings, and skip testing of events that bomb (unhandled exceptions + in library code). NOTE this code may still contain debug printing + to stderr, to be removed in the final version after all issues are + corrected. -Tony + +2019-10-24 Frank Winkler + + * src/papi.h: Removed TLS definitions. + * src/high-level/papi_hl.c, src/papi.h: Replaced PAPI_TLS_KEYWORD + with THREAD_LOCAL_STORAGE_KEYWORD due to ABI conflicts. + +2019-10-18 Anthony Castaldo + + * src/components/rocm_smi/Rules.rocm_smi, src/components/rocm_smi + /linux-rocm-smi.c, src/components/rocm_smi/rocm_smi.h: This is a + first installment of the rewrite of the rocm_smi component. It + currently requires a private install of the updated library (with + iterators), and a special Rules.file, PAPI_ROCM_ROOT, and + PAPI_ROCM_SMI_MAIN. It works as far as executing + utils/papi_native_avail; but none of the events have been tested + yet by reading with PAPI code. -TC + +2019-10-15 Damien Genet + + * src/components/nvml/linux-nvml.c: Merged in dgenet/papi/fix/nvml- + rules (pull request #14) Fixes error messages while detecting + Rules.nvml Patch from Vince Weaver Approved-by: Heike Jagode + Approved-by: Damien Genet + Approved-by: Anthony Castaldo + +2019-10-09 Heike Jagode + + * README.md: Cleaning up README file. + * README.md: README.md edited online with Bitbucket + * README: README edited online with Bitbucket + * README.md: README.md edited online with Bitbucket + +2019-10-08 Steve Kaufmann + + * src/components/cuda/linux-cuda.c: Corrected several cosmetic issues + and typos, standardized naming, used PATH_MAX instead of literal, + and PAPI_MAX_STR_LEN instead of PAPI_MIN_STR_LEN. + +2019-10-08 Frank Winkler + + * src/components/lmsensors/linux-lmsensors.c: Removed blank line. + * src/components/lmsensors/linux-lmsensors.c: Replaced spaces with + underscores in event name. + +2019-10-06 Frank Winkler + + * src/papi_fwrappers.c: Corrected Doxygen documentation. + * src/ctests/Makefile.recipies, src/ctests/mpi_hl.c, + src/ctests/mpi_omp_hl.c, src/ctests/omp_hl.c, + src/ctests/pthread_hl.c, src/ctests/serial_hl.c, + src/ctests/serial_hl_advanced.c, src/ctests/serial_hl_ll_comb.c, + src/ctests/serial_hl_ll_comb2.c, src/ftests/Makefile.recipies, + src/ftests/serial_hl.F, src/ftests/serial_hl_advanced.F, src/high- + level/papi_hl.c, src/papi.h, src/papi_fwrappers.c, + src/testlib/ftests_util.F, src/testlib/papi_test.h, + src/testlib/test_utils.c, + src/validation_tests/flops_validation_hl.c: Removed advanced + functions from the new high-level API. The new high-level API + consists of three functions: - PAPI_hl_region_begin - PAPI_hl_read + - PAPI_hl_region_end Validation test in C: - + src/validation_tests/flops_validation_hl.c Test examples in C: - + src/ctests/serial_hl.c - src/ctests/omp_hl.c - + src/ctests/pthread_hl.c - src/ctests/mpi_hl.c - + src/ctests/mpi_omp_hl.c - src/ctests/serial_hl_ll_comb.c Test + example in Fortran: - src/ftests/serial_hl.F + +2019-10-03 Damien Genet + + * src/counter_analysis_toolkit/branch.c, + src/counter_analysis_toolkit/branch.h, + src/counter_analysis_toolkit/dcache.c, + src/counter_analysis_toolkit/dcache.h, + src/counter_analysis_toolkit/eventstock.c, + src/counter_analysis_toolkit/eventstock.h, + src/counter_analysis_toolkit/flops.c, + src/counter_analysis_toolkit/gen_seq_dlopen.sh, + src/counter_analysis_toolkit/icache.c, + src/counter_analysis_toolkit/icache.h, + src/counter_analysis_toolkit/main.c, + src/counter_analysis_toolkit/prepareArray.c, + src/counter_analysis_toolkit/prepareArray.h, + src/counter_analysis_toolkit/timing_kernels.c, + src/counter_analysis_toolkit/timing_kernels.h: Adding Checks + +2019-10-04 Anthony Danalis + + * src/components/lmsensors/linux-lmsensors.c: Fixed inconsistency in + component name. + +2019-09-30 Anthony Danalis + + * src/components/sde/README, src/components/sde/Rules.sde, + src/components/sde/interface/papi_sde_interface.c, + src/components/sde/interface/papi_sde_interface.h, + src/components/sde/sde.c, src/components/sde/sde_F.F90, + src/components/sde/sde_internal.h, + src/components/sde/tests/Gamum.c, + src/components/sde/tests/Makefile, + src/components/sde/tests/Minimal_Test.c, + src/components/sde/tests/Recorder.c, + src/components/sde/tests/Xandria.F90, + src/components/sde/tests/sde_test_f08.F90: Software Defined Events + (SDE) component. + * src/counter_analysis_toolkit/Makefile, + src/counter_analysis_toolkit/README, + src/counter_analysis_toolkit/branch.c, + src/counter_analysis_toolkit/branch.h, + src/counter_analysis_toolkit/caches.h, + src/counter_analysis_toolkit/compar.c, + src/counter_analysis_toolkit/dcache.c, + src/counter_analysis_toolkit/dcache.h, + src/counter_analysis_toolkit/driver.h, + src/counter_analysis_toolkit/event_list.txt, + src/counter_analysis_toolkit/eventstock.c, + src/counter_analysis_toolkit/eventstock.h, + src/counter_analysis_toolkit/flops.c, + src/counter_analysis_toolkit/flops.h, + src/counter_analysis_toolkit/flops_aux.c, + src/counter_analysis_toolkit/flops_aux.h, + src/counter_analysis_toolkit/gen_seq_dlopen.sh, + src/counter_analysis_toolkit/icache.c, + src/counter_analysis_toolkit/icache.h, + src/counter_analysis_toolkit/main.c, + src/counter_analysis_toolkit/prepareArray.c, + src/counter_analysis_toolkit/prepareArray.h, + src/counter_analysis_toolkit/replicate.sh, + src/counter_analysis_toolkit/timing_kernels.c, + src/counter_analysis_toolkit/timing_kernels.h: Counter Analysis + Toolkit. + +2019-09-30 Anthony Castaldo + + * src/components/cuda/Rules.cuda, src/components/nvml/Rules.nvml, + src/components/pcp/Rules.pcp: Corrected typos, replacing "optimal" + with "optional." + +2019-09-18 Anthony Castaldo + + * src/components/cuda/linux-cuda.c: We no longer check the error on + setting CUPTI_EVENT_COLLECTION_MODE_CONTINUOUS, it only works on + Tesla devices (and is preferred there) but fails on other models, + they don't support the feature. We do not fail if they reject it. + +2019-09-17 Kevin Huck + + * src/components/io/CHANGES, src/components/io/README, + src/components/io/Rules.io, src/components/io/linux-io.c, + src/components/io/linux-io.h, src/components/io/tests/Makefile, + src/components/io/tests/io_basic.c, + src/components/io/tests/io_multiple_components.c: Adding I/O + component to read from /proc/self/io. + +2019-09-13 Steve Kaufmann + + * src/components/rocm/linux-rocm.c, src/components/rocm_smi/linux- + rocm-smi.c: Changes to make these components (ROCM, ROCM_SMI) have + naming consistency with others; fixed numerous minor formatting + issues and comments. Compiled and checked on ICL Caffeine. + +2019-09-13 Anthony Castaldo + + * src/components/rocm/linux-rocm.c, src/components/rocm_smi/linux- + rocm-smi.c: Revert changes, used wrong author (Should be Steve + Kaufmann). This reverts commit + 9a60e91d539b8eb079dd81adc1d91c17620cfaed. + +2019-09-12 Anthony Castaldo + + * src/components/rocm/linux-rocm.c, src/components/rocm_smi/linux- + rocm-smi.c: Changes suggested by Steve Kaufmann (Cray) to make + these components have naming consistency with others; fixed + numerous minor formatting issues. Reviewed, accepted, compiled, + checked. + +2019-09-09 Frank Winkler + + * src/components/infiniband_umad/README.md, + src/components/lmsensors/README.md: Little format changes for + markdown documentation files. + * src/components/libmsr/Makefile.libmsr.in, + src/components/libmsr/README, src/components/libmsr/README.md, + src/components/libmsr/Rules.libmsr, + src/components/libmsr/configure, + src/components/libmsr/configure.in, src/components/libmsr/linux- + libmsr.c, src/components/libmsr/utils/libmsr_write_test.c: Updated + code and documentation for component libmsr to get compliance with + the new component setup standard. + * src/components/infiniband_umad/README, + src/components/infiniband_umad/README.md, + .../infiniband_umad/Rules.infiniband_umad, .../infiniband_umad + /linux-infiniband_umad.c: Updated code and documentation for + component infiniband_umad to get compliance with the new component + setup standard. + +2019-09-08 Frank Winkler + + * src/components/lmsensors/README, + src/components/lmsensors/README.md, + src/components/lmsensors/Rules.lmsensors, src/components/lmsensors + /linux-lmsensors.c: Updated code and documentation for component + lmsensors to get compliance with the new component setup standard. + +2019-09-05 Anthony Castaldo + + * src/components/pcp/Rules.pcp: Corrected an issue with Rules, + changing the name of macro that conflicted with other potential + macros. + +2019-09-04 Anthony Castaldo + + * src/components/cuda/Rules.cuda, src/components/nvml/Rules.nvml: + Corrected an incompatibility in multiple Rules files when multiple + components are included. Rules files cannot all use the same + "MACRODEF" variable for different purposes; each needs a unique ID, + like CUDA_MACS, NVML_MACS, etc. + * src/components/rocm/README, src/components/rocm/Rules.rocm, + src/components/rocm/linux-rocm.c, src/components/rocm_smi/README, + src/components/rocm_smi/Rules.rocm_smi, src/components/rocm_smi + /linux-rocm-smi.c: Changes to make rocm_smi component compliant + with new component setup standard; changes to rocm component to + correct bugs in compatibility and comments. + * src/components/rocm/README, src/components/rocm/Rules.rocm, + src/components/rocm/linux-rocm.c: Modified documentation, Rules and + code for ROCM component to comply with new setup standards. It now + requires PAPI_ROCM_ROOT as an environment variable. + * src/components/pcp/README, src/components/pcp/Rules.pcp, + src/components/pcp/linux-pcp.c: Code and documentation to get + component PCP into compliance with the new component setup + standard; PAPI_PCP_ROOT is only environmental variable required. + +2019-09-03 Anthony Castaldo + + * src/components/cuda/Rules.cuda, src/components/nvml/README, + src/components/nvml/Rules.nvml, src/components/nvml/linux-nvml.c: + NVML component README, Rules and code updated to reflect new setup + policy, relies on PAPI_CUDA_ROOT only. Adds a new override, + PAPI_NVML_MAIN. Instructions improved in Rules.cuda, Rules.nvml. + +2019-08-29 Anthony Castaldo + + * src/components/cuda/linux-cuda.c: Corrected comments. + * src/components/cuda/README, src/components/cuda/Rules.cuda, + src/components/cuda/linux-cuda.c: The changes make the cuda + component reliant on a single environment variable, PAPI_CUDA_ROOT, + allowing overrides specified in Rules.cuda if the necessary + libraries are not in their expected locations. Detailed + instructions are in README, and for overrides in Rules.cuda. + +2019-08-28 Anthony Castaldo + + * src/components/rocm/linux-rocm.c: Bug fixes, for missing eventName + in debug mode; also for failure to clear internal 'usage' flags + when destroying an event set. + +2019-08-14 Carl Love + + * src/papi_events.csv: Per Carl Love, "The POWER9 event + PM_BR_TAKEN_CMPL includes conditional and unconditional branches. + The equation for event PAPI_BR_NTK should not include the event + PM_BR_UNCOND as PM_BR_TAKEN_CMPL already counts unconditional + branches. The POWER9 event PM_LD_REF_L1 includes hits and misses to + the L1. Thus we should not be adding PM_LS_MISS_L1_ALT when + calculating PAPI_LD_INS on POWER9." The definitions for these + preset events were changed accordingly, and their patterns of + behavior were measured during the execution of performance + benchmarks on the IBM POWER9 processors on Summit. The patterns of + behavior for the corresponding events on the Intel Skylake and + Broadwell processors were measured during the execution of the same + performance benchmarks. The respective events from each + architecture behave similarly. In addition, the new definitions + pass the PAPI validation tests. + +2019-08-12 Anthony Castaldo + + * src/components/pcp/Rules.pcp, src/components/rocm/Rules.rocm: + Adding $(LDL) to LDFLAGS in Rules.x files when it was missing, on + PCP and ROCM components. + +2019-08-09 Anthony Castaldo + + * src/components/pcp/README, src/components/pcp/Rules.pcp, + src/components/pcp/linux-pcp.c: The PCP component changed to use + the new standard for PAPI environment variables; there are now no + necessary environment variables, and no need to change + LD_LIBRARY_PATH. The rules file was streamlined. The code was + tested on Peak and Summit. We do allow overrides for non-standard + installations of PCP, the variables PAPI_PCP_ROOT, PAPI_PCP_LIBS, + PAPI_PCP_INC and PAPI_PCP_LIBNAME can be set by users to specify + non-standard locations or library names. The README file in + components/pcp/ contains detailed instructions on their use. + +2019-08-08 Anthony Castaldo + + * src/components/rocm/README, src/components/rocm/Rules.rocm, + src/components/rocm/linux-rocm.c, + src/components/rocm/tests/run_papi.sh, + src/components/rocm_smi/README, + src/components/rocm_smi/Rules.rocm_smi, src/components/rocm_smi + /linux-rocm-smi.c: Components ROCM and ROCM_SMI have been changed + to adhere to our recent standardization of using environment + variables. README files are updated with detailed information, and + we have both simplified and extended the capabilities with new env + vars. It is simplified because for a standard install of the rocm + or rocm_smi software puts it in the default directories, PAPI will + find the libraries and include files without any configure step. + But it is more powerful because we allow overrides to the defaults, + including overrides to the necessary library names. We also no + longer require the LD_LIBRARY_PATH environment variable be + modified, or exist at all. If it is there and we don't find a + library in a path given by the user, we will still search it, and + the default search directories. The Rules.rocm and Rules.rocm_smi + are changed to use defaults, and their linker commands changed to + allow the specification of a non-standard library name; e.g. a + versioned library that does not end in ".so". These changes were + tested and verified on the ICL machine Caffeine. + +2019-08-04 Frank Winkler + + * src/components/infiniband_umad/Rules.infiniband_umad, + src/components/lmsensors/Rules.lmsensors: Added "$(LDL)" to LDFLAGS + of components lmsensors and infiniband_umad. "libdl" was missing + in a previous commit (0f0b74f). + * src/high-level/papi_hl.c: Fixed bug in high-level API. Function + PAPI_hl_print_output () caused a segmentation fault when no events + were recorded. + +2019-08-01 Frank Winkler + + * .../infiniband_umad/Makefile.infiniband_umad.in, + src/components/infiniband_umad/README, + .../infiniband_umad/Rules.infiniband_umad, + src/components/infiniband_umad/configure, + src/components/infiniband_umad/configure.in, .../infiniband_umad + /linux-infiniband_umad.c, + src/components/infiniband_umad/tests/Makefile, + src/components/lmsensors/Makefile.lmsensors.in, + src/components/lmsensors/README, + src/components/lmsensors/Rules.lmsensors, + src/components/lmsensors/configure, + src/components/lmsensors/configure.in, src/components/lmsensors + /linux-lmsensors.c: Changed configuration mechanism for components + lmsensors and infiband_umad. We do not use configure scripts + anymore. Each component is configured via environment variables. + For compilation: PAPI_[component]_ROOT PAPI_[component]_INCLUDE + PAPI_[component]_LIB For runtime: PAPI_[component]_LIBNAME + Detailed information can be found in the README file of each + component. + +2019-07-25 Anthony Castaldo + + * src/components/cuda/README, src/components/cuda/Rules.cuda, + src/components/cuda/linux-cuda.c, + src/components/cuda/sampling/Makefile, + src/components/cuda/tests/Makefile, src/components/nvml/README, + src/components/nvml/Rules.nvml, src/components/nvml/linux-nvml.c, + src/components/nvml/tests/Makefile: A continuation of a previous + commit prematurely pushed. Same commentary: The CUDA and NVML + components have a revamped Environment Variable processing; we have + simplified this for users, made it more flexible, and standardized + on environment variables beginning with "PAPI_". The NVML + component used to require a separate configure step, this has been + eliminated. Simplification: The only required environment variable + is now PAPI_CUDA_ROOT, set to the path corresponding to CUDA. Users + no longer need to update LD_LIBRARY_PATH. There are several other + environment variables that can be set to override the defaults we + would automatically use if only PAPI_CUDA_ROOT is given. The + general protocol we now use for naming environment variables is + PAPI_[component]_[setting]. Examples are: PAPI_CUDA_STUBS + (default = ${PAPI_CUDA_ROOT}/lib64/stubs PAPI_CUPTI_LIBS (default + = ${PAPI_CUDA_ROOT}/extras/CUPTI/lib64) PAPI_NVML_LIBNAME (default + = "libnvidia-ml.so") Some possible overrides are processed at + compile time, the Rules.[component] files now set defaults (For + cuda and nvml based on PAPI_CUDA_ROOT) for the path to include + files, or to library files. Other possible overrides are handled + at runtime; using environment variables the user can specify + specific paths to attempt first for each library. If the necessary + libraries are not found on those paths, the system will still + attempt to use the LD_LIBRARY_PATH and the default directories + (/lib64, /usr/lib64). The "disabled_reason" field for components + has been updated to provide more information when libraries are not + found. The README files have been rewritten to reflect this + protocol, to detail the new possible overrides, and to show the + order in which they are searched when more than one environment + variable applies. + +2019-07-24 Anthony Castaldo + + * src/components/nvml/Makefile.nvml.in, + src/components/nvml/configure, src/components/nvml/configure.in: + The CUDA and NVML components have a revamped Environment Variable + processing; we have simplified this for users, made it more + flexible, and standardized on environment variables beginning with + "PAPI_". The NVML component used to require a separate configure + step, this has been eliminated. Simplification: The only required + environment variable is now PAPI_CUDA_ROOT, set to the path + corresponding to CUDA. Users no longer need to update + LD_LIBRARY_PATH. There are several other environment variables + that can be set to override the defaults we would automatically use + if only PAPI_CUDA_ROOT is given. The general protocol we now use + for naming environment variables is PAPI_[component]_[setting]. + Examples are: PAPI_CUDA_STUBS (default = + ${PAPI_CUDA_ROOT}/lib64/stubs PAPI_CUPTI_LIBS (default = + ${PAPI_CUDA_ROOT}/extras/CUPTI/lib64) PAPI_NVML_LIBNAME (default = + "libnvidia-ml.so") Some possible overrides are processed at + compile time, the Rules.[component] files now set defaults (For + cuda and nvml based on PAPI_CUDA_ROOT) for the path to include + files, or to library files. Other possible overrides are handled + at runtime; using environment variables the user can specify + specific paths to attempt first for each library. If the necessary + libraries are not found on those paths, the system will still + attempt to use the LD_LIBRARY_PATH and the default directories + (/lib64, /usr/lib64). The "disabled_reason" field for components + has been updated to provide more information when libraries are not + found. The README files have been rewritten to reflect this + protocol, to detail the new possible overrides, and to show the + order in which they are searched when more than one environment + variable applies. + +2019-07-19 Frank Winkler + + * src/high-level/papi_hl.c: Removed function "error_at_line" + (declared in error.h), since it is not portable. Fixed warning + "implicit declaration of function error_at_line". + +2019-07-17 Anthony Castaldo + + * src/components/nvml/README: Changes explaining the issues with + libnvidia-ml.so in detail; and the new facility for changing the + default name using the environment variable PAPI_NVML_LIBNAME. + * src/components/nvml/Rules.nvml, src/components/nvml/linux-nvml.c, + src/utils/papi_component_avail.c: linux-nvml.c is changed to allow + the nvml library name to be set by an environment variable, + PAPI_NVML_LIBNAME. If this is not present, the default 'libnvidia- + ml.so' is used. Also, misspellings in error messages were + corrected. Rules.nvml: A previous method used a -D #define during + the compile of linux-nvml.c to change the default name. This method + was eliminated. utils/papi_component_avail.c a typographic error + in an error message was corrected. + +2019-07-15 Anthony Castaldo + + * src/utils/papi_component_avail.c: To avoid confusion, we no longer + print an empty "PMUs supported:" line for components for which + Performance Monitoring Units do not apply (or are not exposed + through its device interfaces). We also corrected minor bugs in + computing the display length to limit output lines to 130 + characters (when listing PMUs); this was most evident on the + perf_event_uncore component. + +2019-07-12 Anthony Castaldo + + * src/components/rocm_smi/README, src/components/rocm_smi/linux-rocm- + smi.c, .../rocm_smi/tests/rocm_command_line.cpp, + src/components/rocm_smi/tests/rocm_smi_all.cpp: linux-rocm-smi.c + (for the rocm_smi component) was fixed to not expose globals other + than the _rocm_smi_vector. The src/components/rocm_smi/README file + was updated to provide more information on the LD_LIBRARY_PATH + required, and the utilities rocm_command_line.cpp and + rocm_smi_all.cpp in the tests/ directory were updated to report + more information. + +2019-06-27 Frank Winkler + + * src/high-level/papi_hl.c: Added multiplexing support for high-level + API. Mutliplexing of cpu core components can be enabled via the + environment variable PAPI_MULTIPLEX. + +2019-06-26 Daniel Barry + + * src/ctests/zero_omp.c, src/papi_vector.c: Changed the dummy + function call in papi_vector.c and created a function to wrap the + call to omp_get_thread_num() in ctests/zero_omp.c. These allow the + function castings in the respective files to operate properly + without warnings from GCC 8.3.0. These changes were tested on the + Intel Haswell architecture. + +2019-06-25 Anthony Castaldo + + * src/components/nvml/PeakConfigure.sh, src/components/nvml/README, + src/components/nvml/Rules.nvml, src/components/nvml/linux-nvml.c: + linux-nvml.c is modified to accept an alternate name for the nvml + library, which will default to the standard 'libnvidia-ml.so'. This + is necessary on a system (Summit in particular) that doesn't have + the standard link file to the current versioned lib. It will also + provide flexibility for testing previous versions or new versions + of the library. The library file name can be specified in + Rules.nvml, as a compiler-line Define of NVML_LIBNAME. Rules.nvml + has comments added, one of which is an example of how to specify + NVML_LIBNAME. Otherwise it is unchanged; and the library name used + will default to 'libnvidia-ml.so'. README has been updated to + describe this new capability, and for ICL staff contains examples + of what works on Summit. PeakConfigure.sh had a typo that was + corrected. + +2019-06-18 Vince Weaver + + * src/components/rapl/tests/rapl_basic.c: rapl: quiet a strncpy() + warning in the rapl_basic test + * src/linux-common.c, src/linux-memory.c, src/papi_internal.c, + src/papi_libpfm4_events.c: papi: fix some strncpy() related + warnings reported by gcc 8.3 + +2019-06-10 Daniel Barry + + * src/components/perf_event_uncore/tests/perf_event_uncore_cbox.c: + Changed the sprintf() call to snprintf() and added an if-statement + to check whether the number of characters intended to be written to + the destination buffer exceed the size of the buffer. This prevents + GCC 8.3.0 from warning that the destination buffer may not be large + enough to store the contents of the source buffers. These changes + were tested on the Intel Haswell architecture. + +2019-06-05 Daniel Barry + + * src/components/perf_event_uncore/tests/perf_event_uncore.c: Added a + second buffer in the perf_event_uncore test. This prevents GCC 8 + from complaining about the source and destination buffers + overlapping. Per the sprintf man-page (release 3.53 of the Linux + man-pages project), "the standards explicitly note that the results + are undefined if source and destination buffers overlap when + calling sprintf()." Since the second buffer is only present in a + test program, this change will not create memory overhead to user + programs which use PAPI. These changes were tested on the Intel + Haswell architecture. + +2019-06-05 Anthony Castaldo + + * src/components/rocm_smi/linux-rocm-smi.c: Added a direct file + system search for AMD GPU peripherals; vendor ID 0x1002. We search + up to 64 /sys/class/drm/card?/device/vendor files; (card0, card1, + ... card 63). Also corrected a typo in an event name. Tested and + worked on ICL Caffeine system; correctly excluded card0 (display + card) and found two AMD GPUs on card1, card2. + +2019-05-25 Yunqiang Su + + * src/linux-lock.h: [mips] replace beqzl with beqzc for r6 + +2019-05-20 Daniel Barry + + * src/papi_events.csv: I have added PAPI POWER9 event definitions for + PAPI_L2_DCR, PAPI_L2_DCW, PAPI_BR_CN, PAPI_BR_NTK, PAPI_BR_UCN, and + PAPI_BR_TKN. These events have been tested. Their patterns of + behavior were measured during the execution of performance + benchmarks on Summit's POWER9 processors. The patterns of behavior + for the corresponding events on Intel Haswell processors were + measured during the execution of the same performance benchmarks. + The respective events from each architecture behave similarly. + +2019-05-17 Anthony Castaldo + + * src/components/rocm_smi/linux-rocm-smi.c: Added missing + "rsmi_init(0)" call to component_init() function. + * src/components/rocm/README, src/components/rocm/Rules.rocm, + src/components/rocm/linux-rocm.c: Modifications to support indexed + variables; requires different names be used for PAPI users and the + request to the RocProfiler (it interprets the index within the + name). Updated notes in README, and additional potential -I include + paths in Rules.rocm. + +2019-05-14 Anthony Castaldo + + * src/components/cuda/linux-cuda.c: Improved error reporting when + libraries are not found, or the cuda initialization function fails. + No changes to function. + +2019-05-07 Heike Jagode + + * src/components/appio/tests/iozone/Gnuplot.txt, + src/components/appio/tests/iozone/gnu3d.dem, + src/components/powercap/tests/powercap_limit.c, + src/components/vmware/VMwareComponentDocument.txt: More clean up of + carriage return character (^M) throughout the code base. Thanks to + Steve Kaufmann! + +2019-05-07 Anthony Castaldo + + * src/components/rocm_smi/linux-rocm-smi.c, + src/components/rocm_smi/rocm_smi.h, + src/components/rocm_smi/tests/Makefile, + src/components/rocm_smi/tests/ROCM_SMI_Makefile: I fixed linux- + rocm-smi.c to include an event per device called + rocm_smi:::device=?:busy_percent; I overlooked this event in the + first draft of the component. I added a note to rocm_smi.h; we + cannot use the distributed version of this file; we have a compile + error on one of the include files that is not + necessary; so we comment it out. I created a Makefile for the + rocm_smi/tests/ directory, it is just a placeholder until we + develop some standardized tests of the rocm_smi component; but + necessary to prevent an error during system 'make'. I added + rocm_command_line.out to the ROCM_SMI_Makefile. This is to make + non-standardized tests; and can be used as make -f + ROCM_SMI_Makefile + +2019-05-07 Heike Jagode + + * src/Makefile.in, src/Makefile.inc, src/Rules.perfmon2, + src/configure.in: Clean up of carriage return character (^M) from + previous patch (commit 5434010). Thanks to Steve Kaufmann from + Cray! + +2019-05-06 Andreas Beckmann + + * src/Makefile.in, src/Makefile.inc, src/Rules.perfmon2, + src/configure, src/configure.in: [PATCH] set SONAME to + libpapi.so.$(PAPIVER).$(PAPIREV) The version check in + PAPI_library_init() requires matching PAPI_VER_CURRENT, therefore + libpapi.so.5 from papi-5.6.x and papi-5.7.x are not + interchangeable, but require applications to be recompiled. Change + the SONAME to contain the two version components that define + PAPI_VER_CURRENT, thereafter upgrading the shared library to a new + version does no longer break existing applications (which will pick + up the new SONAME upon recompilation). Introduce a new variable + PAPISOVER and use it in all places where the SONAME is being used. + drop unused symlinks with three version components: + $(PAPIVER).$(PAPIREV).$(PAPIAGE) + +2019-05-03 Daniel Barry + + * src/ctests/profile_twoevents.c: Prevented another warning about + buffer size potentially being to small. + +2019-04-25 Frank Winkler + + * src/high-level/papi_hl.c: Fixed "format-overflow" warning detected + by gcc/8.1.0. + +2019-04-24 Anthony Castaldo + + * src/components/rocm/Rules.rocm, src/components/rocm/linux-rocm.c, + src/components/rocm_smi/README, + src/components/rocm_smi/Rules.rocm_smi, src/components/rocm_smi + /linux-rocm-smi.c, src/components/rocm_smi/rocm_smi.h, + src/components/rocm_smi/tests/ROCM_SMI_Makefile, + .../rocm_smi/tests/rocm_command_line.cpp, + src/components/rocm_smi/tests/rocm_smi_all.cpp, + .../rocm_smi/tests/rocm_smi_writeTests.cpp: Major addition: a + component to access the rocm_smi library; this is the System + Management Interface for AMD GPU devices. It allows monitoring of + hardware elements; like power consumption, memory usage, PCIe + throughput, fan speed, etc. It allows control for some hardware + functions as well, via PAPI_write(), although these are untested + (write requires root privileges to test). Included here are the + component code, a tester for all readable events, and an incomplete + tester for writing control values. The tests are cpp; this is + required for the AMD 'HIPP' compiler to process an AMD Kernel that + can exercise the GPU itself. The rules and exports are bit + complicated; for development the rocm_smi_lib was installed and + built in my user directory; in production it would be in a system + directory. + +2019-04-24 Frank Winkler + + * src/high-level/papi_hl.c: Fixed warnings detected by gcc/8.3.0 when + using "-Wrestrict" or "-Wall". + * src/high-level/papi_hl.c: Replaced "get_current_dir_name()" with + "getcwd(NULL,0)". "get_current_dir_name()" is only GNU specific. + * src/run_tests.sh: Replaced bash statements with shell statements. + Some systems do not have a bash. + +2019-04-22 Frank Winkler + + * src/high-level/papi_hl.c: Corrected data type declaration according + to the return value of C library function fgetc. + +2019-04-18 Daniel Barry + + * src/ctests/derived.c, src/ctests/multiattach.c, + src/ctests/multiattach2.c, src/ctests/reset.c, + src/ctests/reset_multiplex.c, src/ctests/zero_attach.c, + src/ctests/zero_flip.c: Prevented warnings about buffer sizes of + length PAPI_MAX_STR_LEN potentially being too small. + +2019-04-18 Frank Winkler + + * doc/Doxyfile-man3, doc/Makefile, src/Makefile.inc, + src/components/appio/tests/appio_test_blocking.c, + .../appio/tests/appio_test_fread_fwrite.c, + src/components/appio/tests/appio_test_pthreads.c, + src/components/appio/tests/appio_test_read_write.c, + src/components/appio/tests/appio_test_recv.c, + src/components/appio/tests/appio_test_seek.c, + src/components/appio/tests/appio_test_select.c, + src/components/appio/tests/appio_test_socket.c, + src/components/appio/tests/init_fini.c, + src/ctests/Makefile.recipies, src/ctests/api.c, src/ctests/flops.c, + src/ctests/high-level.c, src/ctests/high-level2.c, + src/ctests/hl_rates.c, src/ctests/ipc.c, src/ctests/matrix-hl.c, + src/ctests/mpi_hl.c, src/ctests/mpi_omp_hl.c, src/ctests/omp_hl.c, + src/ctests/pthread_hl.c, src/ctests/serial_hl.c, + src/ctests/serial_hl_advanced.c, src/ctests/serial_hl_ll_comb.c, + src/ctests/serial_hl_ll_comb2.c, src/ftests/Makefile.recipies, + src/ftests/flops.F, src/ftests/fmatrixpapi.F, + src/ftests/fmatrixpapi2.F, src/ftests/highlevel.F, + src/ftests/serial_hl.F, src/ftests/serial_hl_advanced.F, src/high- + level/papi_hl.c, src/high-level/scripts/papi_hl_output_writer.py, + src/papi.c, src/papi.h, src/papi_debug.h, src/papi_fwrappers.c, + src/papi_hl.c, src/papi_hl.h, src/run_tests.sh, + src/run_tests_exclude.txt, src/validation_tests/Makefile.recipies, + src/validation_tests/flops_validation_hl.c: Replaced old high-level + API with a new high-level API. The new high-level API provides the + ability to record performance events within instrumented code + sections, called regions, of serial, multi-processing (MPI, SHMEM) + and thread (OpenMP, Pthreads) parallel applications. Events to be + recorded are determined via an environment variable that lists both + preset and native events separated by commas. This enables the + programmer to perform different measurements without recompiling. + In addition, the programmer does not need to take care of printing + performance events since a JSON output is generated at the end of + each measurement. Main changes: - Removed old high-level API + including all test files. - Added new high-level API including a + python script that merges results from several MPI ranks. - Added + Doxygen documentation for new high-level API. - Added high-level + tests for c and fortran. - Added high-level flops validation test. + - Replaced old high-level tests with new high-level tests in appio + component. + +2019-04-02 Anthony Castaldo + + * src/components/rocm/linux-rocm.c, + src/components/rocm/tests/rocm_all.cpp: NOTE: This component is + still not functional! Added missing code to prevent hsa_shut_down() + call from segfaulting. Changed skip table for testing code + rocm_all.cpp. + +2019-04-01 Al Grant + + * src/linux-memory.c: The logic in linux-memory.c + generic_get_memory_info() isn't correct. It looks at the + cpu0/cache node and iterates through the caches. The intention is + to collect information about caches at each level. There may be + multiple caches at a given level (typically at L1 there will be I + and D). PAPI's data structure allows for this. There is a 'level + count' that is incremented so that multiple caches can be collected + per level. The bug is in the lines if (level != last_level) { + level_count = 0; last_level=level; } else { level_count++; } This + assumes that for a given level, you see all the caches at that + level, then you go to the next level. But in fact sysfs may return + the caches in random order. An actual example: index2: level 2, + unified cache index0: level 1, data cache index3: level 3, unified + cache index1: level 1, instruction cache Because index1 is at a + different level from index3, level_count will be reset to 0. So in + PAPI's structures, the L1I information will overwrite the L1D + information. The knowledge about L1D will be lost. + +2019-03-28 Anthony Castaldo + + * src/components/rocm/README, src/components/rocm/linux-rocm.c, + src/components/rocm/tests/Makefile, + src/components/rocm/tests/ROCM_Makefile, + src/components/rocm/tests/rocm_all.cpp, + src/components/rocm/tests/rocm_command_line.c, + src/components/rocm/tests/run_papi.sh, + src/components/rocm/tests/square.cpp, + src/components/rocm/tests/square.cu, + src/components/rocm/tests/square.hipref.cpp: linux-rocm.c updated + with PAPI standard component function names, beginning '_rocm', and + events named '..:device=n:...' instead of 'device:n'. New files and + utilities are added in the test/ directory. The ROCM_Makefile is + used to compile cpp code using the AMD HIPCC compiler; e.g. 'make + -f ROCM_Makefile rocm_all.out', in order to compile code that uses + the AMD GPUs. + +2019-03-18 Anthony Castaldo + + * src/components/rocm/linux-rocm.c: This is the ROCM component + (linux_rocm.c) with the minimal changes needed to compile with the + PAPI standard GCC flags and settings. This version is functional; + it shows up on papi_components_avail, and papi_native_avail shows + rocm::: events. However, the compile still produces warnings for + unused variables (they are used in debug mode but the code using + them is suppressed in production mode). These are corrected in the + next commit; and a '/tests' directory will be added. + +2019-03-18 Evgeny Shcherbakov + + * src/components/rocm/README, src/components/rocm/Rules.rocm, + src/components/rocm/linux-rocm.c: These are the original files + produced by Evgeny Shcherbakov for the ROCM PAPI component; this + component allows PAPI to access to AMD GPU events. Note that linux- + rocm.c will not compile using the PAPI default settings for GCC; it + has 3 lines of code that require a C99 flag (e.g. -std=gnu99). We + do not wish to mix standards, so the next commit will revise these + lines to standard C that will compile clean with our standard + settings. + +2019-03-07 Heike Jagode + + * doc/Doxyfile-common, papi.spec, src/Makefile.in, src/configure, + src/configure.in, src/papi.h: Updated version to 5.7.1 after the + release. + +2019-03-04 Heike Jagode + + * RELEASENOTES.txt, release_procedure.txt: Minor updates to release + procedure text. + * RELEASENOTES.txt: Updated release notes for 5.7.0 release. + +2019-02-22 Anthony Castaldo + + * doc/Doxyfile-common, man/man1/PAPI_derived_event_files.1, + man/man1/papi_avail.1, man/man1/papi_clockres.1, + man/man1/papi_command_line.1, man/man1/papi_component_avail.1, + man/man1/papi_cost.1, man/man1/papi_decode.1, + man/man1/papi_error_codes.1, man/man1/papi_event_chooser.1, + man/man1/papi_hybrid_native_avail.1, man/man1/papi_mem_info.1, + man/man1/papi_multiplex_cost.1, man/man1/papi_native_avail.1, + man/man1/papi_version.1, man/man1/papi_xml_event_info.1, + man/man3/PAPIF_accum.3, man/man3/PAPIF_accum_counters.3, + man/man3/PAPIF_add_event.3, man/man3/PAPIF_add_events.3, + man/man3/PAPIF_add_named_event.3, + man/man3/PAPIF_assign_eventset_component.3, + man/man3/PAPIF_cleanup_eventset.3, + man/man3/PAPIF_create_eventset.3, + man/man3/PAPIF_destroy_eventset.3, man/man3/PAPIF_enum_event.3, + man/man3/PAPIF_epc.3, man/man3/PAPIF_event_code_to_name.3, + man/man3/PAPIF_event_name_to_code.3, man/man3/PAPIF_flips.3, + man/man3/PAPIF_flops.3, man/man3/PAPIF_get_clockrate.3, + man/man3/PAPIF_get_dmem_info.3, man/man3/PAPIF_get_domain.3, + man/man3/PAPIF_get_event_info.3, man/man3/PAPIF_get_exe_info.3, + man/man3/PAPIF_get_granularity.3, + man/man3/PAPIF_get_hardware_info.3, man/man3/PAPIF_get_multiplex.3, + man/man3/PAPIF_get_preload.3, man/man3/PAPIF_get_real_cyc.3, + man/man3/PAPIF_get_real_nsec.3, man/man3/PAPIF_get_real_usec.3, + man/man3/PAPIF_get_virt_cyc.3, man/man3/PAPIF_get_virt_usec.3, + man/man3/PAPIF_ipc.3, man/man3/PAPIF_is_initialized.3, + man/man3/PAPIF_library_init.3, man/man3/PAPIF_lock.3, + man/man3/PAPIF_multiplex_init.3, man/man3/PAPIF_num_cmp_hwctrs.3, + man/man3/PAPIF_num_counters.3, man/man3/PAPIF_num_events.3, + man/man3/PAPIF_num_hwctrs.3, man/man3/PAPIF_perror.3, + man/man3/PAPIF_query_event.3, man/man3/PAPIF_query_named_event.3, + man/man3/PAPIF_read.3, man/man3/PAPIF_read_ts.3, + man/man3/PAPIF_register_thread.3, man/man3/PAPIF_remove_event.3, + man/man3/PAPIF_remove_events.3, + man/man3/PAPIF_remove_named_event.3, man/man3/PAPIF_reset.3, + man/man3/PAPIF_set_cmp_domain.3, + man/man3/PAPIF_set_cmp_granularity.3, man/man3/PAPIF_set_debug.3, + man/man3/PAPIF_set_domain.3, man/man3/PAPIF_set_event_domain.3, + man/man3/PAPIF_set_granularity.3, man/man3/PAPIF_set_inherit.3, + man/man3/PAPIF_set_multiplex.3, man/man3/PAPIF_shutdown.3, + man/man3/PAPIF_start.3, man/man3/PAPIF_start_counters.3, + man/man3/PAPIF_state.3, man/man3/PAPIF_stop.3, + man/man3/PAPIF_stop_counters.3, man/man3/PAPIF_thread_id.3, + man/man3/PAPIF_thread_init.3, man/man3/PAPIF_unlock.3, + man/man3/PAPIF_unregister_thread.3, man/man3/PAPIF_write.3, + man/man3/PAPI_accum.3, man/man3/PAPI_accum_counters.3, + man/man3/PAPI_add_event.3, man/man3/PAPI_add_events.3, + man/man3/PAPI_add_named_event.3, + man/man3/PAPI_addr_range_option_t.3, man/man3/PAPI_address_map_t.3, + man/man3/PAPI_all_thr_spec_t.3, + man/man3/PAPI_assign_eventset_component.3, man/man3/PAPI_attach.3, + man/man3/PAPI_attach_option_t.3, man/man3/PAPI_cleanup_eventset.3, + man/man3/PAPI_component_info_t.3, man/man3/PAPI_cpu_option_t.3, + man/man3/PAPI_create_eventset.3, man/man3/PAPI_debug_option_t.3, + man/man3/PAPI_destroy_eventset.3, man/man3/PAPI_detach.3, + man/man3/PAPI_disable_component.3, + man/man3/PAPI_disable_component_by_name.3, + man/man3/PAPI_dmem_info_t.3, man/man3/PAPI_domain_option_t.3, + man/man3/PAPI_enum_cmp_event.3, man/man3/PAPI_enum_event.3, + man/man3/PAPI_epc.3, man/man3/PAPI_event_code_to_name.3, + man/man3/PAPI_event_info_t.3, man/man3/PAPI_event_name_to_code.3, + man/man3/PAPI_exe_info_t.3, man/man3/PAPI_flips.3, + man/man3/PAPI_flops.3, man/man3/PAPI_get_cmp_opt.3, + man/man3/PAPI_get_component_index.3, + man/man3/PAPI_get_component_info.3, man/man3/PAPI_get_dmem_info.3, + man/man3/PAPI_get_event_component.3, + man/man3/PAPI_get_event_info.3, + man/man3/PAPI_get_eventset_component.3, + man/man3/PAPI_get_executable_info.3, + man/man3/PAPI_get_hardware_info.3, man/man3/PAPI_get_multiplex.3, + man/man3/PAPI_get_opt.3, man/man3/PAPI_get_overflow_event_index.3, + man/man3/PAPI_get_real_cyc.3, man/man3/PAPI_get_real_nsec.3, + man/man3/PAPI_get_real_usec.3, man/man3/PAPI_get_shared_lib_info.3, + man/man3/PAPI_get_thr_specific.3, man/man3/PAPI_get_virt_cyc.3, + man/man3/PAPI_get_virt_nsec.3, man/man3/PAPI_get_virt_usec.3, + man/man3/PAPI_granularity_option_t.3, man/man3/PAPI_hw_info_t.3, + man/man3/PAPI_inherit_option_t.3, man/man3/PAPI_ipc.3, + man/man3/PAPI_is_initialized.3, man/man3/PAPI_itimer_option_t.3, + man/man3/PAPI_library_init.3, man/man3/PAPI_list_events.3, + man/man3/PAPI_list_threads.3, man/man3/PAPI_lock.3, + man/man3/PAPI_mh_cache_info_t.3, man/man3/PAPI_mh_info_t.3, + man/man3/PAPI_mh_level_t.3, man/man3/PAPI_mh_tlb_info_t.3, + man/man3/PAPI_mpx_info_t.3, man/man3/PAPI_multiplex_init.3, + man/man3/PAPI_multiplex_option_t.3, man/man3/PAPI_num_cmp_hwctrs.3, + man/man3/PAPI_num_components.3, man/man3/PAPI_num_counters.3, + man/man3/PAPI_num_events.3, man/man3/PAPI_num_hwctrs.3, + man/man3/PAPI_option_t.3, man/man3/PAPI_overflow.3, + man/man3/PAPI_perror.3, man/man3/PAPI_preload_info_t.3, + man/man3/PAPI_profil.3, man/man3/PAPI_query_event.3, + man/man3/PAPI_query_named_event.3, man/man3/PAPI_read.3, + man/man3/PAPI_read_counters.3, man/man3/PAPI_read_ts.3, + man/man3/PAPI_register_thread.3, man/man3/PAPI_remove_event.3, + man/man3/PAPI_remove_events.3, man/man3/PAPI_remove_named_event.3, + man/man3/PAPI_reset.3, man/man3/PAPI_set_cmp_domain.3, + man/man3/PAPI_set_cmp_granularity.3, man/man3/PAPI_set_debug.3, + man/man3/PAPI_set_domain.3, man/man3/PAPI_set_granularity.3, + man/man3/PAPI_set_multiplex.3, man/man3/PAPI_set_opt.3, + man/man3/PAPI_set_thr_specific.3, man/man3/PAPI_shlib_info_t.3, + man/man3/PAPI_shutdown.3, man/man3/PAPI_sprofil.3, + man/man3/PAPI_sprofil_t.3, man/man3/PAPI_start.3, + man/man3/PAPI_start_counters.3, man/man3/PAPI_state.3, + man/man3/PAPI_stop.3, man/man3/PAPI_stop_counters.3, + man/man3/PAPI_strerror.3, man/man3/PAPI_thread_id.3, + man/man3/PAPI_thread_init.3, man/man3/PAPI_unlock.3, + man/man3/PAPI_unregister_thread.3, man/man3/PAPI_write.3, + papi.spec, release_procedure.txt, src/Makefile.in, + src/configure.in, src/papi.h: Fixing updates to manual; incorrectly + done for release 5.7.0.0. + +2019-02-21 Anthony Castaldo + + * release_procedure.txt: Updated release procedure with additional + instructions on final steps. + +2019-02-18 Anthony Castaldo + + * doc/Doxyfile-common, papi.spec, src/Makefile.in, src/configure.in, + src/papi.h: Changed version to 5.7.1 after release. + * release_procedure.txt: Corrected directory entry typo. + * ChangeLogP570.txt, RELEASENOTES.txt: New ChangeLogP570.txt for new + release, updated RELEASENOTES.txt + diff -Nru papi-5.7.0+dfsg/debian/changelog papi-6.0.0~dfsg/debian/changelog --- papi-5.7.0+dfsg/debian/changelog 2019-06-16 00:01:29.000000000 +0000 +++ papi-6.0.0~dfsg/debian/changelog 2020-04-04 20:16:18.000000000 +0000 @@ -1,3 +1,25 @@ +papi (6.0.0~dfsg-2) unstable; urgency=medium + + * Rewrite shebang to python3. + * Move PAPI_derived_event_files manpage to section 5. + * Upload to unstable. + + -- Andreas Beckmann Sat, 04 Apr 2020 22:16:18 +0200 + +papi (6.0.0~dfsg-1) experimental; urgency=medium + + * New upstream release. + * Bump libpfm4-dev B-D to >= 4.10.1+git41. + * Bump SONAME to libpapi.so.6.0. + * Update symbols for 6.0.0. + * Refresh patches. + * fix-typos.patch: Fix some more typos found by Lintian. + * Build with dh-python for the new papi_hl_output_writer.py script. + * Bump Standards-Version to 4.5.0. No changes needed. + * Upload to experimental. + + -- Andreas Beckmann Tue, 10 Mar 2020 18:59:29 +0100 + papi (5.7.0+dfsg-2) unstable; urgency=medium * Upload to unstable. diff -Nru papi-5.7.0+dfsg/debian/control papi-6.0.0~dfsg/debian/control --- papi-5.7.0+dfsg/debian/control 2019-06-16 00:01:29.000000000 +0000 +++ papi-6.0.0~dfsg/debian/control 2020-04-04 20:16:18.000000000 +0000 @@ -7,25 +7,22 @@ Andreas Beckmann Build-Depends: debhelper-compat (= 12), - libpfm4-dev (>= 4.10.1+git7), + dh-sequence-python3, + libpfm4-dev (>= 4.10.1+git41), gfortran, Rules-Requires-Root: no -Standards-Version: 4.3.0 +Standards-Version: 4.5.0 Homepage: https://icl.utk.edu/papi/software/index.html Vcs-Browser: https://salsa.debian.org/hpc-team/papi Vcs-Git: https://salsa.debian.org/hpc-team/papi.git -Package: libpapi5.7 +Package: libpapi6.0 Section: libs Architecture: any Multi-Arch: same Pre-Depends: ${misc:Pre-Depends} Depends: ${shlibs:Depends}, ${misc:Depends} -Breaks: - libpapi5 (>= 5.7), -Replaces: - libpapi5 (>= 5.7), Description: PAPI runtime (shared libraries) Performance Application Programming Interface (PAPI) provides the tool designer and application engineer with a consistent interface and methodology @@ -66,7 +63,9 @@ Architecture: any Multi-Arch: foreign Pre-Depends: ${misc:Pre-Depends} -Depends: ${shlibs:Depends}, ${misc:Depends} +Depends: + ${python3:Depends}, + ${shlibs:Depends}, ${misc:Depends} Description: PAPI utilities Performance Application Programming Interface (PAPI) provides the tool designer and application engineer with a consistent interface and methodology diff -Nru papi-5.7.0+dfsg/debian/copyright papi-6.0.0~dfsg/debian/copyright --- papi-5.7.0+dfsg/debian/copyright 2019-06-16 00:01:29.000000000 +0000 +++ papi-6.0.0~dfsg/debian/copyright 2020-04-04 20:16:18.000000000 +0000 @@ -41,7 +41,7 @@ Files: debian/* Copyright: 2013 Vincent Danjean - © 2013-2019 Andreas Beckmann + © 2013-2020 Andreas Beckmann License: BSD-3-Clause License: BSD-3-Clause diff -Nru papi-5.7.0+dfsg/debian/libpapiSOVERSION.docs papi-6.0.0~dfsg/debian/libpapiSOVERSION.docs --- papi-5.7.0+dfsg/debian/libpapiSOVERSION.docs 2019-06-16 00:01:29.000000000 +0000 +++ papi-6.0.0~dfsg/debian/libpapiSOVERSION.docs 2020-04-04 20:16:18.000000000 +0000 @@ -1,2 +1,2 @@ -README +README.md ChangeLogP*.txt diff -Nru papi-5.7.0+dfsg/debian/libpapiSOVERSION.symbols papi-6.0.0~dfsg/debian/libpapiSOVERSION.symbols --- papi-5.7.0+dfsg/debian/libpapiSOVERSION.symbols 2019-06-16 00:01:29.000000000 +0000 +++ papi-6.0.0~dfsg/debian/libpapiSOVERSION.symbols 2020-04-04 20:16:18.000000000 +0000 @@ -1,7 +1,7 @@ libpapi.so.@SOVERSION@ #PACKAGE# #MINVER# * Build-Depends-Package: libpapi-dev PAPIF_ACCUM@Base 0 - PAPIF_ACCUM_COUNTERS@Base 0 +#MISSING: 6# PAPIF_ACCUM_COUNTERS@Base 0 PAPIF_ADD_EVENT@Base 0 PAPIF_ADD_EVENTS@Base 0 PAPIF_ADD_NAMED_EVENT@Base 0 @@ -13,8 +13,10 @@ PAPIF_EPC@Base 0 PAPIF_EVENT_CODE_TO_NAME@Base 0 PAPIF_EVENT_NAME_TO_CODE@Base 0 - PAPIF_FLIPS@Base 0 - PAPIF_FLOPS@Base 0 +#MISSING: 6# PAPIF_FLIPS@Base 0 + PAPIF_FLIPS_RATE@Base 0 +#MISSING: 6# PAPIF_FLOPS@Base 0 + PAPIF_FLOPS_RATE@Base 0 PAPIF_GET_CLOCKRATE@Base 0 PAPIF_GET_DMEM_INFO@Base 0 PAPIF_GET_DOMAIN@Base 0 @@ -29,19 +31,24 @@ PAPIF_GET_REAL_USEC@Base 0 PAPIF_GET_VIRT_CYC@Base 0 PAPIF_GET_VIRT_USEC@Base 0 + PAPIF_HL_READ@Base 0 + PAPIF_HL_REGION_BEGIN@Base 0 + PAPIF_HL_REGION_END@Base 0 + PAPIF_HL_STOP@Base 0 PAPIF_IPC@Base 0 PAPIF_IS_INITIALIZED@Base 0 PAPIF_LIBRARY_INIT@Base 0 PAPIF_LIST_EVENTS@Base 0 PAPIF_LOCK@Base 0 PAPIF_MULTIPLEX_INIT@Base 0 - PAPIF_NUM_COUNTERS@Base 0 +#MISSING: 6# PAPIF_NUM_COUNTERS@Base 0 PAPIF_NUM_EVENTS@Base 0 PAPIF_PERROR@Base 0 PAPIF_QUERY_EVENT@Base 0 PAPIF_QUERY_NAMED_EVENT@Base 0 + PAPIF_RATE_STOP@Base 0 PAPIF_READ@Base 0 - PAPIF_READ_COUNTERS@Base 0 +#MISSING: 6# PAPIF_READ_COUNTERS@Base 0 PAPIF_READ_TS@Base 0 PAPIF_REGISTER_THREAD@Base 0 PAPIF_REMOVE_EVENT@Base 0 @@ -58,10 +65,10 @@ PAPIF_SET_MULTIPLEX@Base 0 PAPIF_SHUTDOWN@Base 0 PAPIF_START@Base 0 - PAPIF_START_COUNTERS@Base 0 +#MISSING: 6# PAPIF_START_COUNTERS@Base 0 PAPIF_STATE@Base 0 PAPIF_STOP@Base 0 - PAPIF_STOP_COUNTERS@Base 0 +#MISSING: 6# PAPIF_STOP_COUNTERS@Base 0 PAPIF_THREAD_ID@Base 0 PAPIF_THREAD_INIT@Base 0 PAPIF_UNREGISTER_THREAD@Base 0 @@ -71,7 +78,7 @@ PAPIF_num_hwctrs@Base 0 PAPIF_unlock@Base 0 PAPI_accum@Base 0 - PAPI_accum_counters@Base 0 +#MISSING: 6# PAPI_accum_counters@Base 0 PAPI_add_event@Base 0 PAPI_add_events@Base 0 PAPI_add_named_event@Base 0 @@ -88,8 +95,10 @@ PAPI_epc@Base 0 PAPI_event_code_to_name@Base 0 PAPI_event_name_to_code@Base 0 - PAPI_flips@Base 0 - PAPI_flops@Base 0 +#MISSING: 6# PAPI_flips@Base 0 + PAPI_flips_rate@Base 0 +#MISSING: 6# PAPI_flops@Base 0 + PAPI_flops_rate@Base 0 PAPI_get_cmp_opt@Base 0 PAPI_get_component_index@Base 0 PAPI_get_component_info@Base 0 @@ -110,6 +119,10 @@ PAPI_get_virt_cyc@Base 0 PAPI_get_virt_nsec@Base 0 PAPI_get_virt_usec@Base 0 + PAPI_hl_read@Base 0 + PAPI_hl_region_begin@Base 0 + PAPI_hl_region_end@Base 0 + PAPI_hl_stop@Base 0 PAPI_ipc@Base 0 PAPI_is_initialized@Base 0 PAPI_library_init@Base 0 @@ -119,7 +132,7 @@ PAPI_multiplex_init@Base 0 PAPI_num_cmp_hwctrs@Base 0 PAPI_num_components@Base 0 - PAPI_num_counters@Base 0 +#MISSING: 6# PAPI_num_counters@Base 0 PAPI_num_events@Base 0 PAPI_num_hwctrs@Base 0 PAPI_overflow@Base 0 @@ -127,8 +140,9 @@ PAPI_profil@Base 0 PAPI_query_event@Base 0 PAPI_query_named_event@Base 0 + PAPI_rate_stop@Base 0 PAPI_read@Base 0 - PAPI_read_counters@Base 0 +#MISSING: 6# PAPI_read_counters@Base 0 PAPI_read_ts@Base 0 PAPI_register_thread@Base 0 PAPI_remove_event@Base 0 @@ -146,10 +160,10 @@ PAPI_shutdown@Base 0 PAPI_sprofil@Base 0 PAPI_start@Base 0 - PAPI_start_counters@Base 0 +#MISSING: 6# PAPI_start_counters@Base 0 PAPI_state@Base 0 PAPI_stop@Base 0 - PAPI_stop_counters@Base 0 +#MISSING: 6# PAPI_stop_counters@Base 0 PAPI_strerror@Base 0 PAPI_thread_id@Base 0 PAPI_thread_init@Base 0 @@ -159,9 +173,9 @@ papif_accum@Base 0 papif_accum_@Base 0 papif_accum__@Base 0 - papif_accum_counters@Base 0 - papif_accum_counters_@Base 0 - papif_accum_counters__@Base 0 +#MISSING: 6# papif_accum_counters@Base 0 +#MISSING: 6# papif_accum_counters_@Base 0 +#MISSING: 6# papif_accum_counters__@Base 0 papif_add_event@Base 0 papif_add_event_@Base 0 papif_add_event__@Base 0 @@ -195,12 +209,18 @@ papif_event_name_to_code@Base 0 papif_event_name_to_code_@Base 0 papif_event_name_to_code__@Base 0 - papif_flips@Base 0 - papif_flips_@Base 0 - papif_flips__@Base 0 - papif_flops@Base 0 - papif_flops_@Base 0 - papif_flops__@Base 0 +#MISSING: 6# papif_flips@Base 0 +#MISSING: 6# papif_flips_@Base 0 +#MISSING: 6# papif_flips__@Base 0 + papif_flips_rate@Base 0 + papif_flips_rate_@Base 0 + papif_flips_rate__@Base 0 +#MISSING: 6# papif_flops@Base 0 +#MISSING: 6# papif_flops_@Base 0 +#MISSING: 6# papif_flops__@Base 0 + papif_flops_rate@Base 0 + papif_flops_rate_@Base 0 + papif_flops_rate__@Base 0 papif_get_clockrate@Base 0 papif_get_clockrate_@Base 0 papif_get_clockrate__@Base 0 @@ -243,6 +263,18 @@ papif_get_virt_usec@Base 0 papif_get_virt_usec_@Base 0 papif_get_virt_usec__@Base 0 + papif_hl_read@Base 0 + papif_hl_read_@Base 0 + papif_hl_read__@Base 0 + papif_hl_region_begin@Base 0 + papif_hl_region_begin_@Base 0 + papif_hl_region_begin__@Base 0 + papif_hl_region_end@Base 0 + papif_hl_region_end_@Base 0 + papif_hl_region_end__@Base 0 + papif_hl_stop@Base 0 + papif_hl_stop_@Base 0 + papif_hl_stop__@Base 0 papif_ipc@Base 0 papif_ipc_@Base 0 papif_ipc__@Base 0 @@ -264,9 +296,9 @@ papif_num_cmp_hwctrs@Base 0 papif_num_cmp_hwctrs_@Base 0 papif_num_cmp_hwctrs__@Base 0 - papif_num_counters@Base 0 - papif_num_counters_@Base 0 - papif_num_counters__@Base 0 +#MISSING: 6# papif_num_counters@Base 0 +#MISSING: 6# papif_num_counters_@Base 0 +#MISSING: 6# papif_num_counters__@Base 0 papif_num_events@Base 0 papif_num_events_@Base 0 papif_num_events__@Base 0 @@ -282,12 +314,15 @@ papif_query_named_event@Base 0 papif_query_named_event_@Base 0 papif_query_named_event__@Base 0 + papif_rate_stop@Base 0 + papif_rate_stop_@Base 0 + papif_rate_stop__@Base 0 papif_read@Base 0 papif_read_@Base 0 papif_read__@Base 0 - papif_read_counters@Base 0 - papif_read_counters_@Base 0 - papif_read_counters__@Base 0 +#MISSING: 6# papif_read_counters@Base 0 +#MISSING: 6# papif_read_counters_@Base 0 +#MISSING: 6# papif_read_counters__@Base 0 papif_read_ts@Base 0 papif_read_ts_@Base 0 papif_read_ts__@Base 0 @@ -336,18 +371,18 @@ papif_start@Base 0 papif_start_@Base 0 papif_start__@Base 0 - papif_start_counters@Base 0 - papif_start_counters_@Base 0 - papif_start_counters__@Base 0 +#MISSING: 6# papif_start_counters@Base 0 +#MISSING: 6# papif_start_counters_@Base 0 +#MISSING: 6# papif_start_counters__@Base 0 papif_state@Base 0 papif_state_@Base 0 papif_state__@Base 0 papif_stop@Base 0 papif_stop_@Base 0 papif_stop__@Base 0 - papif_stop_counters@Base 0 - papif_stop_counters_@Base 0 - papif_stop_counters__@Base 0 +#MISSING: 6# papif_stop_counters@Base 0 +#MISSING: 6# papif_stop_counters_@Base 0 +#MISSING: 6# papif_stop_counters__@Base 0 papif_thread_id@Base 0 papif_thread_id_@Base 0 papif_thread_id__@Base 0 diff -Nru papi-5.7.0+dfsg/debian/papi-tools.install papi-6.0.0~dfsg/debian/papi-tools.install --- papi-5.7.0+dfsg/debian/papi-tools.install 2019-06-16 00:01:29.000000000 +0000 +++ papi-6.0.0~dfsg/debian/papi-tools.install 2020-04-04 20:16:18.000000000 +0000 @@ -1,2 +1 @@ usr/bin/* -usr/share/man/man1 diff -Nru papi-5.7.0+dfsg/debian/papi-tools.manpages papi-6.0.0~dfsg/debian/papi-tools.manpages --- papi-5.7.0+dfsg/debian/papi-tools.manpages 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/debian/papi-tools.manpages 2020-04-04 20:16:18.000000000 +0000 @@ -0,0 +1 @@ +usr/share/man/man1/*.1 diff -Nru papi-5.7.0+dfsg/debian/patches/0001-set-SONAME-to-libpapi.so.-PAPIVER-.-PAPIREV.patch papi-6.0.0~dfsg/debian/patches/0001-set-SONAME-to-libpapi.so.-PAPIVER-.-PAPIREV.patch --- papi-5.7.0+dfsg/debian/patches/0001-set-SONAME-to-libpapi.so.-PAPIVER-.-PAPIREV.patch 2019-06-16 00:01:29.000000000 +0000 +++ papi-6.0.0~dfsg/debian/patches/0001-set-SONAME-to-libpapi.so.-PAPIVER-.-PAPIREV.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,133 +0,0 @@ -From 5434010ff4057d6e31493afcb317ba69d01d0e24 Mon Sep 17 00:00:00 2001 -From: Andreas Beckmann -Date: Mon, 6 May 2019 18:28:14 -0400 -Subject: [PATCH 1/2] [PATCH] set SONAME to libpapi.so.$(PAPIVER).$(PAPIREV) - -The version check in PAPI_library_init() requires matching -PAPI_VER_CURRENT, therefore libpapi.so.5 from papi-5.6.x and papi-5.7.x -are not interchangeable, but require applications to be recompiled. - -Change the SONAME to contain the two version components that define -PAPI_VER_CURRENT, thereafter upgrading the shared library to a new -version does no longer break existing applications (which will pick up -the new SONAME upon recompilation). - -Introduce a new variable PAPISOVER and use it in all places where -the SONAME is being used. - -drop unused symlinks with three version components: -$(PAPIVER).$(PAPIREV).$(PAPIAGE) - -Signed-off-by: Heike Jagode ---- - src/Makefile.in | 1 + - src/Makefile.inc | 16 +++++++--------- - src/Rules.perfmon2 | 4 ++-- - src/configure | 5 ++++- - src/configure.in | 4 +++- - 5 files changed, 17 insertions(+), 13 deletions(-) - -diff --git a/src/Makefile.in b/src/Makefile.in -index 99f0c9e5e..fcc41b996 100644 ---- a/src/Makefile.in -+++ b/src/Makefile.in -@@ -75,6 +75,7 @@ PMAPI = @PMAPI@ - PMINIT = @PMINIT@ - SETPATH = @SETPATH@ - SHLIB = @SHLIB@ -+PAPISOVER = @PAPISOVER@ - VLIB = @VLIB@ - SHLIBDEPS = @SHLIBDEPS@ - SHOW_CONF = @SHOW_CONF@ -diff --git a/src/Makefile.inc b/src/Makefile.inc -index 9497326c7..69b428f7e 100644 ---- a/src/Makefile.inc -+++ b/src/Makefile.inc -@@ -58,13 +58,13 @@ $(LIBRARY): $(OBJECTS) - rm -f $(LIBRARY) - $(AR) $(ARG64) rv $(LIBRARY) $(OBJECTS) - --shared: libpapi.so libpapi.so.$(PAPIVER) -+shared: libpapi.so libpapi.so.$(PAPISOVER) - --libpapi.so libpapi.so.$(PAPIVER): $(SHLIB) -+libpapi.so libpapi.so.$(PAPISOVER): $(SHLIB) - ln -sf $(SHLIB) $@ - - $(SHLIB): $(HEADERS) $(SOURCES) $(SHLIBOBJS) -- rm -f $(SHLIB) libpapi.so libpapi.so.$(PAPIVER); -+ rm -f $(SHLIB) libpapi.so libpapi.so.$(PAPISOVER) - $(CC_SHR) $(LIBCFLAGS) $(OPTFLAGS) $(SOURCES) $(SHLIBOBJS) -o $@ $(SHLIBDEPS) $(LDFLAGS) - @set -ex; if test "$(POST_BUILD)" != "" ; then \ - -$(POST_BUILD) ; \ -@@ -258,7 +258,7 @@ ifneq (${COMPONENTS},) - endif - - clean: comp_tests_clean native_clean -- rm -rf $(LIBRARY) $(SHLIB) libpapi.so libpapi.so.$(PAPIVER) $(OBJECTS) core rii_files genpapifdef *~ so_locations papi_fwrappers_.c papi_fwrappers__.c upper_PAPI_FWRAPPERS.c -+ rm -rf $(LIBRARY) $(SHLIB) libpapi.so libpapi.so.$(PAPISOVER) $(OBJECTS) core rii_files genpapifdef *~ so_locations papi_fwrappers_.c papi_fwrappers__.c upper_PAPI_FWRAPPERS.c - $(MAKE) -C ../doc clean - $(MAKE) -C ctests clean - $(MAKE) -C ftests clean -@@ -317,8 +317,7 @@ install-lib: native_install - cp -p $(SHLIB) $(DESTDIR)$(LIBDIR)/libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC); \ - chmod go+r $(DESTDIR)$(LIBDIR)/libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) ; \ - cd $(DESTDIR)$(LIBDIR); \ -- ln -sf libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE); \ -- ln -sf libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) libpapi.so.$(PAPIVER); \ -+ ln -sf libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) libpapi.so.$(PAPISOVER); \ - ln -sf libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) libpapi.so; \ - fi - -@@ -352,9 +351,8 @@ install-pkgconf: - -mkdir -p $(DESTDIR)$(LIBPC) - -chmod go+rx $(DESTDIR)$(LIBPC) - cp papi.pc $(DESTDIR)$(LIBPC)/papi-$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC).pc -- cd $(DESTDIR)$(LIBPC); ln -sf papi-$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC).pc papi-$(PAPIVER).$(PAPIREV).$(PAPIAGE).pc; -- cd $(DESTDIR)$(LIBPC); ln -sf papi-$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC).pc papi-$(PAPIVER).pc; -- cd $(DESTDIR)$(LIBPC); ln -sf papi-$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC).pc papi.pc; -+ ln -sf papi-$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC).pc $(DESTDIR)$(LIBPC)/papi-$(PAPISOVER).pc -+ ln -sf papi-$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC).pc $(DESTDIR)$(LIBPC)/papi.pc - - # - # Dummy targets for configurations that do not also include a Rules file with targets -diff --git a/src/Rules.perfmon2 b/src/Rules.perfmon2 -index 4f86b6c87..f17aa877d 100644 ---- a/src/Rules.perfmon2 -+++ b/src/Rules.perfmon2 -@@ -60,8 +60,8 @@ endif - -install -d $(DESTDIR)$(LIBDIR) - ifneq (,$(findstring shared,$(LIBS))) - cp -p $(SHLIB) $(DESTDIR)$(LIBDIR)/libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) -- cd $(DESTDIR)$(LIBDIR); ln -sf libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) libpapi.so.$(PAPIVER) -- cd $(DESTDIR)$(LIBDIR); ln -sf libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) libpapi.so -+ ln -sf libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) $(DESTDIR)$(LIBDIR)/libpapi.so.$(PAPISOVER) -+ ln -sf libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) $(DESTDIR)$(LIBDIR)/libpapi.so - endif - -install -d $(DESTDIR)$(DATADIR) - cp -f ./papi_events.csv $(DESTDIR)$(DATADIR) -diff --git a/src/configure.in b/src/configure.in -index db16d3dca..e1d255948 100644 ---- a/src/configure.in -+++ b/src/configure.in -@@ -1262,7 +1262,8 @@ CTEST_TARGETS="all" - FTEST_TARGETS="all" - LIBRARY=libpapi.a - SHLIB='libpapi.so.AC_PACKAGE_VERSION' --VLIB='libpapi.so.$(PAPIVER)' -+PAPISOVER='$(PAPIVER).$(PAPIREV)' -+VLIB='libpapi.so.$(PAPISOVER)' - OMPCFLGS=-fopenmp - CC_R='$(CC) -pthread' - CC_SHR='$(CC) -fPIC -DPIC -shared -Wl,-soname -Wl,$(VLIB) -Xlinker "-rpath" -Xlinker "$(LIBDIR)"' -@@ -1769,6 +1770,7 @@ AC_SUBST(CPU) - AC_SUBST(FILENAME) - AC_SUBST(LIBRARY) - AC_SUBST(SHLIB) -+AC_SUBST(PAPISOVER) - AC_SUBST(VLIB) - AC_SUBST(PAPICFLAGS) - AC_SUBST(OPTFLAGS) --- -2.11.0 - diff -Nru papi-5.7.0+dfsg/debian/patches/0002-Clean-up-of-carriage-return-character-M-from.patch papi-6.0.0~dfsg/debian/patches/0002-Clean-up-of-carriage-return-character-M-from.patch --- papi-5.7.0+dfsg/debian/patches/0002-Clean-up-of-carriage-return-character-M-from.patch 2019-06-16 00:01:29.000000000 +0000 +++ papi-6.0.0~dfsg/debian/patches/0002-Clean-up-of-carriage-return-character-M-from.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,119 +0,0 @@ -From 6676556a79ed2e8f5840eecc23f8a26cf040f4c2 Mon Sep 17 00:00:00 2001 -From: Heike Jagode -Date: Tue, 7 May 2019 10:29:54 -0400 -Subject: [PATCH 2/2] Clean up of carriage return character (^M) from previous - patch (commit 5434010). - -Thanks to Steve Kaufmann from Cray! ---- - src/Makefile.in | 2 +- - src/Makefile.inc | 14 +++++++------- - src/Rules.perfmon2 | 4 ++-- - src/configure.in | 6 +++--- - 4 files changed, 13 insertions(+), 13 deletions(-) - -diff --git a/src/Makefile.in b/src/Makefile.in -index fcc41b996..4334c3e2b 100644 ---- a/src/Makefile.in -+++ b/src/Makefile.in -@@ -75,7 +75,7 @@ PMAPI = @PMAPI@ - PMINIT = @PMINIT@ - SETPATH = @SETPATH@ - SHLIB = @SHLIB@ --PAPISOVER = @PAPISOVER@ -+PAPISOVER = @PAPISOVER@ - VLIB = @VLIB@ - SHLIBDEPS = @SHLIBDEPS@ - SHOW_CONF = @SHOW_CONF@ -diff --git a/src/Makefile.inc b/src/Makefile.inc -index 69b428f7e..c7d6a3953 100644 ---- a/src/Makefile.inc -+++ b/src/Makefile.inc -@@ -58,13 +58,13 @@ $(LIBRARY): $(OBJECTS) - rm -f $(LIBRARY) - $(AR) $(ARG64) rv $(LIBRARY) $(OBJECTS) - --shared: libpapi.so libpapi.so.$(PAPISOVER) -+shared: libpapi.so libpapi.so.$(PAPISOVER) - --libpapi.so libpapi.so.$(PAPISOVER): $(SHLIB) -+libpapi.so libpapi.so.$(PAPISOVER): $(SHLIB) - ln -sf $(SHLIB) $@ - - $(SHLIB): $(HEADERS) $(SOURCES) $(SHLIBOBJS) -- rm -f $(SHLIB) libpapi.so libpapi.so.$(PAPISOVER) -+ rm -f $(SHLIB) libpapi.so libpapi.so.$(PAPISOVER) - $(CC_SHR) $(LIBCFLAGS) $(OPTFLAGS) $(SOURCES) $(SHLIBOBJS) -o $@ $(SHLIBDEPS) $(LDFLAGS) - @set -ex; if test "$(POST_BUILD)" != "" ; then \ - -$(POST_BUILD) ; \ -@@ -258,7 +258,7 @@ ifneq (${COMPONENTS},) - endif - - clean: comp_tests_clean native_clean -- rm -rf $(LIBRARY) $(SHLIB) libpapi.so libpapi.so.$(PAPISOVER) $(OBJECTS) core rii_files genpapifdef *~ so_locations papi_fwrappers_.c papi_fwrappers__.c upper_PAPI_FWRAPPERS.c -+ rm -rf $(LIBRARY) $(SHLIB) libpapi.so libpapi.so.$(PAPISOVER) $(OBJECTS) core rii_files genpapifdef *~ so_locations papi_fwrappers_.c papi_fwrappers__.c upper_PAPI_FWRAPPERS.c - $(MAKE) -C ../doc clean - $(MAKE) -C ctests clean - $(MAKE) -C ftests clean -@@ -317,7 +317,7 @@ install-lib: native_install - cp -p $(SHLIB) $(DESTDIR)$(LIBDIR)/libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC); \ - chmod go+r $(DESTDIR)$(LIBDIR)/libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) ; \ - cd $(DESTDIR)$(LIBDIR); \ -- ln -sf libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) libpapi.so.$(PAPISOVER); \ -+ ln -sf libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) libpapi.so.$(PAPISOVER); \ - ln -sf libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) libpapi.so; \ - fi - -@@ -351,8 +351,8 @@ install-pkgconf: - -mkdir -p $(DESTDIR)$(LIBPC) - -chmod go+rx $(DESTDIR)$(LIBPC) - cp papi.pc $(DESTDIR)$(LIBPC)/papi-$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC).pc -- ln -sf papi-$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC).pc $(DESTDIR)$(LIBPC)/papi-$(PAPISOVER).pc -- ln -sf papi-$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC).pc $(DESTDIR)$(LIBPC)/papi.pc -+ ln -sf papi-$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC).pc $(DESTDIR)$(LIBPC)/papi-$(PAPISOVER).pc -+ ln -sf papi-$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC).pc $(DESTDIR)$(LIBPC)/papi.pc - - # - # Dummy targets for configurations that do not also include a Rules file with targets -diff --git a/src/Rules.perfmon2 b/src/Rules.perfmon2 -index f17aa877d..06b7b4554 100644 ---- a/src/Rules.perfmon2 -+++ b/src/Rules.perfmon2 -@@ -60,8 +60,8 @@ endif - -install -d $(DESTDIR)$(LIBDIR) - ifneq (,$(findstring shared,$(LIBS))) - cp -p $(SHLIB) $(DESTDIR)$(LIBDIR)/libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) -- ln -sf libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) $(DESTDIR)$(LIBDIR)/libpapi.so.$(PAPISOVER) -- ln -sf libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) $(DESTDIR)$(LIBDIR)/libpapi.so -+ ln -sf libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) $(DESTDIR)$(LIBDIR)/libpapi.so.$(PAPISOVER) -+ ln -sf libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) $(DESTDIR)$(LIBDIR)/libpapi.so - endif - -install -d $(DESTDIR)$(DATADIR) - cp -f ./papi_events.csv $(DESTDIR)$(DATADIR) -diff --git a/src/configure.in b/src/configure.in -index e1d255948..99b6353bc 100644 ---- a/src/configure.in -+++ b/src/configure.in -@@ -1262,8 +1262,8 @@ CTEST_TARGETS="all" - FTEST_TARGETS="all" - LIBRARY=libpapi.a - SHLIB='libpapi.so.AC_PACKAGE_VERSION' --PAPISOVER='$(PAPIVER).$(PAPIREV)' --VLIB='libpapi.so.$(PAPISOVER)' -+PAPISOVER='$(PAPIVER).$(PAPIREV)' -+VLIB='libpapi.so.$(PAPISOVER)' - OMPCFLGS=-fopenmp - CC_R='$(CC) -pthread' - CC_SHR='$(CC) -fPIC -DPIC -shared -Wl,-soname -Wl,$(VLIB) -Xlinker "-rpath" -Xlinker "$(LIBDIR)"' -@@ -1770,7 +1770,7 @@ AC_SUBST(CPU) - AC_SUBST(FILENAME) - AC_SUBST(LIBRARY) - AC_SUBST(SHLIB) --AC_SUBST(PAPISOVER) -+AC_SUBST(PAPISOVER) - AC_SUBST(VLIB) - AC_SUBST(PAPICFLAGS) - AC_SUBST(OPTFLAGS) --- -2.11.0 - diff -Nru papi-5.7.0+dfsg/debian/patches/do-not-ignore-failures.patch papi-6.0.0~dfsg/debian/patches/do-not-ignore-failures.patch --- papi-5.7.0+dfsg/debian/patches/do-not-ignore-failures.patch 2019-06-16 00:01:29.000000000 +0000 +++ papi-6.0.0~dfsg/debian/patches/do-not-ignore-failures.patch 2020-04-04 20:16:18.000000000 +0000 @@ -3,7 +3,18 @@ --- a/src/Makefile.inc +++ b/src/Makefile.inc -@@ -295,13 +295,13 @@ install: install-lib install-man install +@@ -66,9 +66,7 @@ libpapi.so libpapi.so.$(PAPISOVER): $(SH + $(SHLIB): $(HEADERS) $(SOURCES) $(SHLIBOBJS) + rm -f $(SHLIB) libpapi.so libpapi.so.$(PAPISOVER) + $(CC_SHR) $(LIBCFLAGS) $(OPTFLAGS) $(SOURCES) $(SHLIBOBJS) -o $@ $(SHLIBDEPS) $(LDFLAGS) +- @set -ex; if test "$(POST_BUILD)" != "" ; then \ +- -$(POST_BUILD) ; \ +- fi ++ $(if $(POST_BUILD),@set -ex; $(POST_BUILD)) + + papi_fwrappers_.c: papi_fwrappers.c $(HEADERS) + $(CPP) $(CPPFLAGS) -DFORTRANUNDERSCORE papi_fwrappers.c > papi_fwrappers_.c +@@ -302,13 +300,13 @@ install-hl-scripts: install-lib: native_install @echo "Headers (INCDIR) being installed in: \"$(DESTDIR)$(INCDIR)\""; @@ -21,7 +32,7 @@ @set -ex; if test -r $(LIBRARY) ; then \ cp $(LIBRARY) $(DESTDIR)$(LIBDIR); \ chmod go+r $(DESTDIR)$(LIBDIR)/$(LIBRARY); \ -@@ -325,11 +325,11 @@ install-tests: install-comp_tests +@@ -332,11 +330,11 @@ install-tests: install-comp_tests $(SETPATH) $(MAKE) -C ctests install $(SETPATH) $(MAKE) -C ftests install $(SETPATH) $(MAKE) -C validation_tests install @@ -38,7 +49,7 @@ # Component tests installing install-comp_tests: -@@ -341,8 +341,8 @@ endif +@@ -348,8 +346,8 @@ endif install-pkgconf: @echo "pkcongfig being installed in: \"$(DESTDIR)$(LIBPC)\""; @@ -76,7 +87,7 @@ --- a/src/components/Makefile_comp_tests.target.in +++ b/src/components/Makefile_comp_tests.target.in -@@ -25,13 +25,13 @@ tests: $(NAME)_tests +@@ -26,13 +26,13 @@ tests: $(NAME)_tests install: @echo "$(NAME) tests (DATADIR) being installed in: \"$(DATADIR)\""; @@ -100,9 +111,13 @@ --- a/src/ctests/Makefile.recipies +++ b/src/ctests/Makefile.recipies -@@ -174,13 +174,13 @@ zero_shmem: zero_shmem.c $(TESTLIB) $(DO +@@ -183,16 +183,16 @@ zero_shmem: zero_shmem.c $(TESTLIB) $(DO $(CC_R) $(INCLUDE) $(SMPCFLGS) $(CFLAGS) $(TOPTFLAGS) zero_shmem.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o zero_shmem $(SMPLIBS) + omp_hl: omp_hl.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) +- -$(CC_R) $(INCLUDE) $(OMPCFLGS) $(CFLAGS) $(TOPTFLAGS) omp_hl.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o omp_hl $(OMPLIBS) ++ $(CC_R) $(INCLUDE) $(OMPCFLGS) $(CFLAGS) $(TOPTFLAGS) omp_hl.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o omp_hl $(OMPLIBS) + zero_omp: zero_omp.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) - -$(CC_R) $(INCLUDE) $(OMPCFLGS) $(CFLAGS) $(TOPTFLAGS) zero_omp.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o zero_omp $(OMPLIBS) + $(CC_R) $(INCLUDE) $(OMPCFLGS) $(CFLAGS) $(TOPTFLAGS) zero_omp.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o zero_omp $(OMPLIBS) @@ -117,7 +132,7 @@ clockres_pthreads: clockres_pthreads.c $(TESTLIB) $(CLOCKCORE) $(PAPILIB) $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) clockres_pthreads.c $(TESTLIB) $(CLOCKCORE) $(PAPILIB) $(LDFLAGS) -o clockres_pthreads -lpthread -lm -@@ -342,10 +342,10 @@ shlib: shlib.c $(TESTLIB) $(PAPILIB) +@@ -348,10 +348,10 @@ shlib: shlib.c $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) shlib.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o shlib $(LDL) exeinfo: exeinfo.c $(TESTLIB) $(PAPILIB) @@ -130,7 +145,7 @@ hwinfo: hwinfo.c $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) hwinfo.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o hwinfo -@@ -354,82 +354,82 @@ code2name: code2name.c $(TESTLIB) $(PAPI +@@ -360,88 +360,88 @@ code2name: code2name.c $(TESTLIB) $(PAPI $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) code2name.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o code2name attach_target: attach_target.c $(DOLOOPS) @@ -239,6 +254,13 @@ prof_utils.o: prof_utils.c $(testlibdir)/papi_test.h prof_utils.h $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) -c prof_utils.c + + filter_helgrind: filter_helgrind.c $(TESTLIB) $(PAPILIB) +- -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) filter_helgrind.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o filter_helgrind ++ $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) filter_helgrind.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o filter_helgrind + + .PHONY : all default ctests ctest clean + --- a/src/ftests/Makefile +++ b/src/ftests/Makefile @@ -13,14 +13,14 @@ include Makefile.recipies @@ -282,7 +304,7 @@ + cp Makefile.target $(DATADIR)/testlib/Makefile --- a/src/utils/Makefile +++ b/src/utils/Makefile -@@ -73,6 +73,6 @@ distclean clobber: clean +@@ -76,6 +76,6 @@ distclean clobber: clean install: $(UTIL_TARGETS) @echo "Utilities (BINDIR) being installed in: \"$(BINDIR)\""; @@ -337,3 +359,69 @@ + find . -name "*.[ch]" -type f -exec cp {} $(DATADIR)/ctests \; + cp Makefile.target $(DATADIR)/ctests/Makefile + cat Makefile.recipies >> $(DATADIR)/ctests/Makefile +--- a/doc/Makefile ++++ b/doc/Makefile +@@ -20,8 +20,8 @@ clean: + distclean clobber: clean + + install: man +- -rm -f man/man3/HighLevelInfo.3 +- -rm -f man/man3/papi_data_structures.3 +- -rm -r ../man/man1/*.1 ../man/man3/*.3 +- -cp -R man/man1/*.1 ../man/man1 +- -cp -R man/man3/*.3 ../man/man3 ++ rm -f man/man3/HighLevelInfo.3 ++ rm -f man/man3/papi_data_structures.3 ++ rm -r ../man/man1/*.1 ../man/man3/*.3 ++ cp -R man/man1/*.1 ../man/man1 ++ cp -R man/man3/*.3 ../man/man3 +--- a/man/Makefile ++++ b/man/Makefile +@@ -3,11 +3,11 @@ clean: + + install: + @echo "Man pages (MANDIR) being installed in: \"$(MANDIR)\""; +- -mkdir -p $(MANDIR)/man3 +- -chmod go+rx $(MANDIR)/man3 +- -cp man3/PAPI*.3 $(MANDIR)/man3 +- -chmod go+r $(MANDIR)/man3/PAPI*.3 +- -mkdir -p $(MANDIR)/man1 +- -chmod go+rx $(MANDIR)/man1 +- -cp man1/*.1 $(MANDIR)/man1 +- -chmod go+r $(MANDIR)/man1/*.1 ++ mkdir -p $(MANDIR)/man3 ++ chmod go+rx $(MANDIR)/man3 ++ cp man3/PAPI*.3 $(MANDIR)/man3 ++ chmod go+r $(MANDIR)/man3/PAPI*.3 ++ mkdir -p $(MANDIR)/man1 ++ chmod go+rx $(MANDIR)/man1 ++ cp man1/*.1 $(MANDIR)/man1 ++ chmod go+r $(MANDIR)/man1/*.1 +--- a/src/Rules.pfm4_pe ++++ b/src/Rules.pfm4_pe +@@ -62,20 +62,20 @@ papi_libpfm4_events.o: papi_libpfm4_even + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c papi_libpfm4_events.c -o $@ + + native_clean: +- -rm -f $(MISCOBJS) +- -rm -f $(PFM_OBJS) ++ rm -f $(MISCOBJS) ++ rm -f $(PFM_OBJS) + ifneq (,${PFM_ROOT}) + $(MAKE) -C $(PFM_ROOT) ARCH="$(ARCH)" clean + endif + + native_install: + ifneq (,${PFM_ROOT}) +- -$(MAKE) -C $(PFM_ROOT) ARCH="$(ARCH)" DESTDIR=$(DESTDIR) PREFIX=$(PREFIX) install_prefix=$(PREFIX) LIBDIR=$(LIBDIR) INCDIR=$(INCDIR) MANDIR=$(MANDIR) install ++ $(MAKE) -C $(PFM_ROOT) ARCH="$(ARCH)" DESTDIR=$(DESTDIR) PREFIX=$(PREFIX) install_prefix=$(PREFIX) LIBDIR=$(LIBDIR) INCDIR=$(INCDIR) MANDIR=$(MANDIR) install + endif +- -install -d $(DESTDIR)$(LIBDIR) ++ install -d $(DESTDIR)$(LIBDIR) + # Makefile.inc already has installation of shared libraries so + # there is no need to do it here +- -install -d $(DESTDIR)$(DATADIR) ++ install -d $(DESTDIR)$(DATADIR) + cp -f ./papi_events.csv $(DESTDIR)$(DATADIR) + + native_clobber: diff -Nru papi-5.7.0+dfsg/debian/patches/fix-typos.patch papi-6.0.0~dfsg/debian/patches/fix-typos.patch --- papi-5.7.0+dfsg/debian/patches/fix-typos.patch 2019-06-16 00:01:29.000000000 +0000 +++ papi-6.0.0~dfsg/debian/patches/fix-typos.patch 2020-04-04 20:16:18.000000000 +0000 @@ -12,3 +12,45 @@ "Requests for exclusive access to shared cache line", 0, 0, PAPI_PRESET_BIT_CACH, NULL, {0},{NULL}, NULL}, +--- a/man/man3/PAPI_enum_cmp_event.3 ++++ b/man/man3/PAPI_enum_cmp_event.3 +@@ -16,7 +16,7 @@ Enumerate PAPI preset or native events f + .nf + @par C Interface: + \#include @n +-int PAPI_enum_cmp_event( int *EventCode, int modifer, int cidx ); ++int PAPI_enum_cmp_event( int *EventCode, int modifier, int cidx ); + + Given an event code, PAPI_enum_event replaces the event + code with the next available event. +--- a/man/man3/PAPI_enum_event.3 ++++ b/man/man3/PAPI_enum_event.3 +@@ -16,7 +16,7 @@ Enumerate PAPI preset or native events\& + .nf + @par C Interface: + \#include @n +-int PAPI_enum_event( int * EventCode, int modifer ); ++int PAPI_enum_event( int * EventCode, int modifier ); + + Given a preset or native event code, PAPI_enum_event replaces the event + code with the next available event in either the preset or native table. +--- a/src/papi.c ++++ b/src/papi.c +@@ -1589,7 +1589,7 @@ PAPI_event_name_to_code( const char *in, + * + * @par C Interface: + * \#include @n +- * int PAPI_enum_event( int * EventCode, int modifer ); ++ * int PAPI_enum_event( int * EventCode, int modifier ); + * + * Given a preset or native event code, PAPI_enum_event replaces the event + * code with the next available event in either the preset or native table. +@@ -1792,7 +1792,7 @@ PAPI_enum_event( int *EventCode, int mod + * + * @par C Interface: + * \#include @n +- * int PAPI_enum_cmp_event( int *EventCode, int modifer, int cidx ); ++ * int PAPI_enum_cmp_event( int *EventCode, int modifier, int cidx ); + * + * Given an event code, PAPI_enum_event replaces the event + * code with the next available event. diff -Nru papi-5.7.0+dfsg/debian/patches/for-debian-do-not-embed-libpfm4.patch papi-6.0.0~dfsg/debian/patches/for-debian-do-not-embed-libpfm4.patch --- papi-5.7.0+dfsg/debian/patches/for-debian-do-not-embed-libpfm4.patch 2019-06-16 00:01:29.000000000 +0000 +++ papi-6.0.0~dfsg/debian/patches/for-debian-do-not-embed-libpfm4.patch 2020-04-04 20:16:18.000000000 +0000 @@ -3,7 +3,7 @@ --- a/src/Rules.pfm4_pe +++ b/src/Rules.pfm4_pe -@@ -25,7 +25,7 @@ +@@ -26,7 +26,7 @@ MISCHDRS += linux-lock.h mb.h papi_libpf MISCSRCS += papi_libpfm4_events.c SHLIBDEPS = -Bdynamic -L$(PFM_LIB_PATH) -lpfm PFM_OBJS=$(shell $(AR) t $(PFM_LIB_PATH)/libpfm.a 2>/dev/null) @@ -12,7 +12,7 @@ ifeq (yes,$(MIC)) FORCE_PFM_ARCH="CONFIG_PFMLIB_ARCH_X86=y" -@@ -62,6 +62,7 @@ +@@ -63,6 +63,7 @@ papi_libpfm4_events.o: papi_libpfm4_even native_clean: -rm -f $(MISCOBJS) diff -Nru papi-5.7.0+dfsg/debian/patches/for-debian-fix-hyphenation.patch papi-6.0.0~dfsg/debian/patches/for-debian-fix-hyphenation.patch --- papi-5.7.0+dfsg/debian/patches/for-debian-fix-hyphenation.patch 2019-06-16 00:01:29.000000000 +0000 +++ papi-6.0.0~dfsg/debian/patches/for-debian-fix-hyphenation.patch 2020-04-04 20:16:18.000000000 +0000 @@ -95,28 +95,6 @@ .PP .SH "Bugs" .PP ---- a/man/man3/PAPI_accum_counters.3 -+++ b/man/man3/PAPI_accum_counters.3 -@@ -57,7 +57,7 @@ do_100events(); - if ( PAPI_accum_counters( values, num_hwcntrs ) != PAPI_OK ) - handle_error(1); - // values[0] now equals 200 --values[0] = -100; -+values[0] = \-100; - do_100events(); - if ( PAPI_accum_counters(values, num_hwcntrs ) != PAPI_OK ) - handle_error(); ---- a/man/man3/PAPI_read_counters.3 -+++ b/man/man3/PAPI_read_counters.3 -@@ -57,7 +57,7 @@ do_100events(); - if ( PAPI_accum_counters( values, num_hwcntrs ) != PAPI_OK ) - handle_error(1); - // values[0] now equals 200 --values[0] = -100; -+values[0] = \-100; - do_100events(); - if ( PAPI_accum_counters(values, num_hwcntrs ) != PAPI_OK ) - handle_error(); --- a/man/man1/papi_hybrid_native_avail.1 +++ b/man/man1/papi_hybrid_native_avail.1 @@ -17,36 +17,36 @@ papi_hybrid_native_avail is a PAPI utili diff -Nru papi-5.7.0+dfsg/debian/patches/for-debian-no-rpath.patch papi-6.0.0~dfsg/debian/patches/for-debian-no-rpath.patch --- papi-5.7.0+dfsg/debian/patches/for-debian-no-rpath.patch 2019-06-16 00:01:29.000000000 +0000 +++ papi-6.0.0~dfsg/debian/patches/for-debian-no-rpath.patch 2020-04-04 20:16:18.000000000 +0000 @@ -1,7 +1,7 @@ On Debian, we do not want an rpath for standard lib dir --- a/src/configure.in +++ b/src/configure.in -@@ -1266,7 +1266,7 @@ PAPISOVER='$(PAPIVER).$(PAPIREV)' +@@ -1298,7 +1298,7 @@ PAPISOVER='$(PAPIVER).$(PAPIREV)' VLIB='libpapi.so.$(PAPISOVER)' OMPCFLGS=-fopenmp CC_R='$(CC) -pthread' diff -Nru papi-5.7.0+dfsg/debian/patches/man-section.patch papi-6.0.0~dfsg/debian/patches/man-section.patch --- papi-5.7.0+dfsg/debian/patches/man-section.patch 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/debian/patches/man-section.patch 2020-04-04 20:16:18.000000000 +0000 @@ -0,0 +1,22 @@ +Author: Andreas Beckmann +Description: fix man page section + +--- a/man/man1/PAPI_derived_event_files.1 ++++ b/man/man1/PAPI_derived_event_files.1 +@@ -1,4 +1,4 @@ +-.TH "PAPI_derived_event_files" 1 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- ++.TH "PAPI_derived_event_files" 5 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- + .ad l + .nh + .SH NAME +--- a/src/papi_preset.c ++++ b/src/papi_preset.c +@@ -908,7 +908,7 @@ infix_to_postfix( char *infix ) { + * support the same event definition syntax. + * + * Event definition file syntax: +- * see PAPI_derived_event_files(1) man page. ++ * see PAPI_derived_event_files(5) man page. + * + * Blank lines are ignored + * Lines that begin with '#' are comments. diff -Nru papi-5.7.0+dfsg/debian/patches/series papi-6.0.0~dfsg/debian/patches/series --- papi-5.7.0+dfsg/debian/patches/series 2019-06-16 00:01:29.000000000 +0000 +++ papi-6.0.0~dfsg/debian/patches/series 2020-04-04 20:16:18.000000000 +0000 @@ -1,8 +1,7 @@ -0001-set-SONAME-to-libpapi.so.-PAPIVER-.-PAPIREV.patch -0002-Clean-up-of-carriage-return-character-M-from.patch fix-typos.patch for-debian-no-rpath.patch for-debian-do-not-embed-libpfm4.patch for-debian-fix-hyphenation.patch do-not-ignore-failures.patch #for-debian-link-examples-dynamically.patch +man-section.patch diff -Nru papi-5.7.0+dfsg/debian/rules papi-6.0.0~dfsg/debian/rules --- papi-5.7.0+dfsg/debian/rules 2019-06-16 00:01:29.000000000 +0000 +++ papi-6.0.0~dfsg/debian/rules 2020-04-04 20:16:18.000000000 +0000 @@ -3,11 +3,10 @@ # output every command that modifies files on the build system. #export DH_VERBOSE = 1 -SOVERSION = 5.7 +SOVERSION = 6.0 # see FEATURE AREAS in dpkg-buildflags(1) export DEB_BUILD_MAINT_OPTIONS = hardening=+all -export DEB_LDFLAGS_MAINT_APPEND = -Wl,--as-needed DPKG_EXPORT_BUILDFLAGS = yes include /usr/share/dpkg/buildflags.mk @@ -40,13 +39,18 @@ -env LD_LIBRARY_PATH=$(CURDIR)/src:$(LD_LIBRARY_PATH) dh_auto_test override_dh_auto_install: + mkdir -p debian/tmp/usr/bin dh_auto_install --sourcedirectory=src -- install-all + mv debian/tmp/usr/bin/papi_hl_output_writer.py debian/tmp/usr/bin/papi_hl_output_writer # delete example binaries find debian/tmp/usr/share/papi -perm -100 -type f -delete find debian/tmp/usr/share/papi -name \*.a -type f -delete find debian/tmp/usr/share/papi -name Makefile -type f -print0 | \ xargs -0r sed -i -e 's@ [^ ]*-f\(file\|debug\)-prefix-map=[^ ]*@@g' +override_dh_python3: + dh_python3 --shebang=/usr/bin/python3 + override_dh_makeshlibs: dh_makeshlibs -a -- -c4 diff -Nru papi-5.7.0+dfsg/debian/watch papi-6.0.0~dfsg/debian/watch --- papi-5.7.0+dfsg/debian/watch 2019-06-16 00:01:29.000000000 +0000 +++ papi-6.0.0~dfsg/debian/watch 2020-04-04 20:16:18.000000000 +0000 @@ -1,4 +1,4 @@ version=4 -opts="dversionmangle=s/\+dfsg\d*$//,repacksuffix=+dfsg" \ +opts="dversionmangle=s/\~dfsg\d*$//,repacksuffix=~dfsg" \ https://icl.utk.edu/papi/software/index.html .*/papi-(.*)\.tar\.gz diff -Nru papi-5.7.0+dfsg/doc/Doxyfile-common papi-6.0.0~dfsg/doc/Doxyfile-common --- papi-5.7.0+dfsg/doc/Doxyfile-common 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/doc/Doxyfile-common 2020-03-04 15:56:56.000000000 +0000 @@ -31,7 +31,7 @@ # This could be handy for archiving the generated documentation or # if some version control system is used. -PROJECT_NUMBER = 5.7.0.0 +PROJECT_NUMBER = 6.0.0.0 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer diff -Nru papi-5.7.0+dfsg/doc/Doxyfile-man3 papi-6.0.0~dfsg/doc/Doxyfile-man3 --- papi-5.7.0+dfsg/doc/Doxyfile-man3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/doc/Doxyfile-man3 2020-03-04 15:56:56.000000000 +0000 @@ -19,7 +19,7 @@ # directories like "/usr/src/myproject". Separate the files or directories # with spaces. -INPUT = ../src/papi.h ../src/papi.c ../src/papi_hl.c \ +INPUT = ../src/papi.h ../src/papi.c ../src/high-level/papi_hl.c \ ../src/papi_fwrappers.c FILE_PATTERNS = *.c *.h diff -Nru papi-5.7.0+dfsg/doc/Makefile papi-6.0.0~dfsg/doc/Makefile --- papi-5.7.0+dfsg/doc/Makefile 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/doc/Makefile 2020-03-04 15:56:56.000000000 +0000 @@ -8,7 +8,7 @@ man: man/man1 man/man3 -man/man3: ../src/papi.h ../src/papi.c ../src/papi_hl.c ../src/papi_fwrappers.c +man/man3: ../src/papi.h ../src/papi.c ../src/high-level/papi_hl.c ../src/papi_fwrappers.c doxygen Doxyfile-man3 man/man1: ../src/utils/papi_avail.c ../src/utils/papi_clockres.c ../src/utils/papi_command_line.c ../src/utils/papi_component_avail.c ../src/utils/papi_cost.c ../src/utils/papi_decode.c ../src/utils/papi_error_codes.c ../src/utils/papi_event_chooser.c ../src/utils/papi_xml_event_info.c ../src/utils/papi_mem_info.c ../src/utils/papi_multiplex_cost.c ../src/utils/papi_native_avail.c ../src/utils/papi_version.c diff -Nru papi-5.7.0+dfsg/.gitattributes papi-6.0.0~dfsg/.gitattributes --- papi-5.7.0+dfsg/.gitattributes 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/.gitattributes 2020-03-04 15:56:56.000000000 +0000 @@ -0,0 +1,9 @@ +PAPI_FAQ.html -diff +release_procedure.txt -diff +gitlog2changelog.py -diff +doc/DataRange.html -diff +doc/PAPI-C.html -diff +doc/README -diff +src/buildbot_configure_with_components.sh -diff +delete_before_release.sh -diff +.gitattributes -diff diff -Nru papi-5.7.0+dfsg/INSTALL.txt papi-6.0.0~dfsg/INSTALL.txt --- papi-5.7.0+dfsg/INSTALL.txt 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/INSTALL.txt 2020-03-04 15:56:56.000000000 +0000 @@ -548,11 +548,17 @@ they will function within the PAPI framework without requiring any changes to PAPI source code. -Before running any component that requires configuration, the configure -script for that component must be executed in order to generate the -Makefile which contains the configuration settings. Normally, the script -will only need to be executed once. Depending on the component, configure -may require that one or more configuration settings be specified by the user. +A separate directory for each components is in the papi/src/components/ +directory; e.g. the NVIDIA cuda component is in papi/src/components/cuda. +Within each component directory is a README file which should be consulted. + +Typically the component needs an environment variables to be exported; e.g. +the cuda component requires the PAPI_CUDA_ROOT environment variable be set +to the directory where cuda libraries can be found. + +Some components require multiple environment variables. Additional +instructions and how to address special circumstances can be found in the +README files. The components to be added to PAPI are specified during the configuration of PAPI by adding the --with-components= command line option to @@ -560,6 +566,3 @@ option would be: % ./configure --with-components="acpi lustre net" -Attempting to add a component to PAPI which requires configuration and has -not been configured will result in a compilation error because the PAPI -build environment will be unable to find the Makefile for that component. diff -Nru papi-5.7.0+dfsg/man/man1/papi_avail.1 papi-6.0.0~dfsg/man/man1/papi_avail.1 --- papi-5.7.0+dfsg/man/man1/papi_avail.1 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man1/papi_avail.1 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "papi_avail" 1 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "papi_avail" 1 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man1/papi_clockres.1 papi-6.0.0~dfsg/man/man1/papi_clockres.1 --- papi-5.7.0+dfsg/man/man1/papi_clockres.1 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man1/papi_clockres.1 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "papi_clockres" 1 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "papi_clockres" 1 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man1/papi_command_line.1 papi-6.0.0~dfsg/man/man1/papi_command_line.1 --- papi-5.7.0+dfsg/man/man1/papi_command_line.1 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man1/papi_command_line.1 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "papi_command_line" 1 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "papi_command_line" 1 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man1/papi_component_avail.1 papi-6.0.0~dfsg/man/man1/papi_component_avail.1 --- papi-5.7.0+dfsg/man/man1/papi_component_avail.1 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man1/papi_component_avail.1 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "papi_component_avail" 1 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "papi_component_avail" 1 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man1/papi_cost.1 papi-6.0.0~dfsg/man/man1/papi_cost.1 --- papi-5.7.0+dfsg/man/man1/papi_cost.1 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man1/papi_cost.1 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "papi_cost" 1 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "papi_cost" 1 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man1/papi_decode.1 papi-6.0.0~dfsg/man/man1/papi_decode.1 --- papi-5.7.0+dfsg/man/man1/papi_decode.1 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man1/papi_decode.1 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "papi_decode" 1 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "papi_decode" 1 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man1/PAPI_derived_event_files.1 papi-6.0.0~dfsg/man/man1/PAPI_derived_event_files.1 --- papi-5.7.0+dfsg/man/man1/PAPI_derived_event_files.1 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man1/PAPI_derived_event_files.1 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_derived_event_files" 1 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_derived_event_files" 1 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man1/papi_error_codes.1 papi-6.0.0~dfsg/man/man1/papi_error_codes.1 --- papi-5.7.0+dfsg/man/man1/papi_error_codes.1 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man1/papi_error_codes.1 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "papi_error_codes" 1 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "papi_error_codes" 1 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man1/papi_event_chooser.1 papi-6.0.0~dfsg/man/man1/papi_event_chooser.1 --- papi-5.7.0+dfsg/man/man1/papi_event_chooser.1 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man1/papi_event_chooser.1 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "papi_event_chooser" 1 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "papi_event_chooser" 1 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man1/papi_hybrid_native_avail.1 papi-6.0.0~dfsg/man/man1/papi_hybrid_native_avail.1 --- papi-5.7.0+dfsg/man/man1/papi_hybrid_native_avail.1 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man1/papi_hybrid_native_avail.1 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "papi_hybrid_native_avail" 1 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "papi_hybrid_native_avail" 1 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man1/papi_mem_info.1 papi-6.0.0~dfsg/man/man1/papi_mem_info.1 --- papi-5.7.0+dfsg/man/man1/papi_mem_info.1 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man1/papi_mem_info.1 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "papi_mem_info" 1 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "papi_mem_info" 1 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man1/papi_multiplex_cost.1 papi-6.0.0~dfsg/man/man1/papi_multiplex_cost.1 --- papi-5.7.0+dfsg/man/man1/papi_multiplex_cost.1 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man1/papi_multiplex_cost.1 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "papi_multiplex_cost" 1 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "papi_multiplex_cost" 1 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man1/papi_native_avail.1 papi-6.0.0~dfsg/man/man1/papi_native_avail.1 --- papi-5.7.0+dfsg/man/man1/papi_native_avail.1 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man1/papi_native_avail.1 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "papi_native_avail" 1 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "papi_native_avail" 1 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man1/papi_version.1 papi-6.0.0~dfsg/man/man1/papi_version.1 --- papi-5.7.0+dfsg/man/man1/papi_version.1 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man1/papi_version.1 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "papi_version" 1 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "papi_version" 1 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man1/papi_xml_event_info.1 papi-6.0.0~dfsg/man/man1/papi_xml_event_info.1 --- papi-5.7.0+dfsg/man/man1/papi_xml_event_info.1 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man1/papi_xml_event_info.1 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "papi_xml_event_info" 1 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "papi_xml_event_info" 1 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/binary_tree_t.3 papi-6.0.0~dfsg/man/man3/binary_tree_t.3 --- papi-5.7.0+dfsg/man/man3/binary_tree_t.3 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/binary_tree_t.3 2020-03-04 15:56:57.000000000 +0000 @@ -0,0 +1,28 @@ +.TH "binary_tree_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +binary_tree_t \- +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "void * \fBroot\fP" +.br +.ti -1c +.RI "\fBthreads_t\fP * \fBfind_p\fP" +.br +.in -1c +.SH "Field Documentation" +.PP +.SS "\fBthreads_t\fP* binary_tree_t::find_p" +Pointer that is used for finding a thread node +.SS "void* binary_tree_t::root" +Root of binary tree + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/components_t.3 papi-6.0.0~dfsg/man/man3/components_t.3 --- papi-5.7.0+dfsg/man/man3/components_t.3 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/components_t.3 2020-03-04 15:56:57.000000000 +0000 @@ -0,0 +1,37 @@ +.TH "components_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +components_t \- +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "int \fBcomponent_id\fP" +.br +.ti -1c +.RI "int \fBnum_of_events\fP" +.br +.ti -1c +.RI "int \fBmax_num_of_events\fP" +.br +.ti -1c +.RI "char ** \fBevent_names\fP" +.br +.ti -1c +.RI "int * \fBevent_codes\fP" +.br +.ti -1c +.RI "short * \fBevent_types\fP" +.br +.ti -1c +.RI "int \fBEventSet\fP" +.br +.in -1c + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/local_components_t.3 papi-6.0.0~dfsg/man/man3/local_components_t.3 --- papi-5.7.0+dfsg/man/man3/local_components_t.3 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/local_components_t.3 2020-03-04 15:56:57.000000000 +0000 @@ -0,0 +1,26 @@ +.TH "local_components_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +local_components_t \- +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "int \fBEventSet\fP" +.br +.ti -1c +.RI "long_long * \fBvalues\fP" +.br +.in -1c +.SH "Field Documentation" +.PP +.SS "long_long* local_components_t::values" +Return values for the eventsets + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_accum.3 papi-6.0.0~dfsg/man/man3/PAPI_accum.3 --- papi-5.7.0+dfsg/man/man3/PAPI_accum.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_accum.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_accum" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_accum" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_accum_counters.3 papi-6.0.0~dfsg/man/man3/PAPI_accum_counters.3 --- papi-5.7.0+dfsg/man/man3/PAPI_accum_counters.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_accum_counters.3 1970-01-01 00:00:00.000000000 +0000 @@ -1,78 +0,0 @@ -.TH "PAPI_accum_counters" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- -.ad l -.nh -.SH NAME -PAPI_accum_counters \- -.PP -Accumulate and reset counters\&. - -.SH SYNOPSIS -.br -.PP -.SH "Detailed Description" -.PP - -.PP -.nf -@par C Interface: -\#include @n -int PAPI_accum_counters( long long *values, int array_len ); - -.fi -.PP -.PP -\fBParameters:\fP -.RS 4 -\fI*values\fP an array to hold the counter values of the counting events -.br -\fIarry_len\fP the number of items in the *events array -.RE -.PP -\fBPrecondition:\fP -.RS 4 -These calls assume an initialized PAPI library and a properly added event set\&. -.RE -.PP -\fBPostcondition:\fP -.RS 4 -The counters are reset and left running after the call\&. -.RE -.PP -\fBReturn values:\fP -.RS 4 -\fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. -.br -\fIPAPI_ESYS\fP A system or C library call failed inside PAPI, see the errno variable\&. -.RE -.PP -\fBPAPI_accum_counters()\fP adds the event counters into the array *values\&. -.PP -.PP -.nf -do_100events(); -if ( PAPI_read_counters( values, num_hwcntrs ) != PAPI_OK ) - handlw_error(1); -// values[0] now equals 100 -do_100events(); -if ( PAPI_accum_counters( values, num_hwcntrs ) != PAPI_OK ) - handle_error(1); -// values[0] now equals 200 -values[0] = -100; -do_100events(); -if ( PAPI_accum_counters(values, num_hwcntrs ) != PAPI_OK ) - handle_error(); -// values[0] now equals 0 - * -.fi -.PP -.PP -\fBSee Also:\fP -.RS 4 -\fBPAPI_set_opt()\fP \fBPAPI_start_counters()\fP -.RE -.PP - - -.SH "Author" -.PP -Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_add_event.3 papi-6.0.0~dfsg/man/man3/PAPI_add_event.3 --- papi-5.7.0+dfsg/man/man3/PAPI_add_event.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_add_event.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_add_event" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_add_event" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_add_events.3 papi-6.0.0~dfsg/man/man3/PAPI_add_events.3 --- papi-5.7.0+dfsg/man/man3/PAPI_add_events.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_add_events.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_add_events" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_add_events" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_add_named_event.3 papi-6.0.0~dfsg/man/man3/PAPI_add_named_event.3 --- papi-5.7.0+dfsg/man/man3/PAPI_add_named_event.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_add_named_event.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_add_named_event" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_add_named_event" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_address_map_t.3 papi-6.0.0~dfsg/man/man3/PAPI_address_map_t.3 --- papi-5.7.0+dfsg/man/man3/PAPI_address_map_t.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_address_map_t.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_address_map_t" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_address_map_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_addr_range_option_t.3 papi-6.0.0~dfsg/man/man3/PAPI_addr_range_option_t.3 --- papi-5.7.0+dfsg/man/man3/PAPI_addr_range_option_t.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_addr_range_option_t.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_addr_range_option_t" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_addr_range_option_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_all_thr_spec_t.3 papi-6.0.0~dfsg/man/man3/PAPI_all_thr_spec_t.3 --- papi-5.7.0+dfsg/man/man3/PAPI_all_thr_spec_t.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_all_thr_spec_t.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_all_thr_spec_t" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_all_thr_spec_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_assign_eventset_component.3 papi-6.0.0~dfsg/man/man3/PAPI_assign_eventset_component.3 --- papi-5.7.0+dfsg/man/man3/PAPI_assign_eventset_component.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_assign_eventset_component.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_assign_eventset_component" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_assign_eventset_component" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_attach.3 papi-6.0.0~dfsg/man/man3/PAPI_attach.3 --- papi-5.7.0+dfsg/man/man3/PAPI_attach.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_attach.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_attach" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_attach" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_attach_option_t.3 papi-6.0.0~dfsg/man/man3/PAPI_attach_option_t.3 --- papi-5.7.0+dfsg/man/man3/PAPI_attach_option_t.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_attach_option_t.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_attach_option_t" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_attach_option_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_cleanup_eventset.3 papi-6.0.0~dfsg/man/man3/PAPI_cleanup_eventset.3 --- papi-5.7.0+dfsg/man/man3/PAPI_cleanup_eventset.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_cleanup_eventset.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_cleanup_eventset" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_cleanup_eventset" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_component_info_t.3 papi-6.0.0~dfsg/man/man3/PAPI_component_info_t.3 --- papi-5.7.0+dfsg/man/man3/PAPI_component_info_t.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_component_info_t.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_component_info_t" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_component_info_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_cpu_option_t.3 papi-6.0.0~dfsg/man/man3/PAPI_cpu_option_t.3 --- papi-5.7.0+dfsg/man/man3/PAPI_cpu_option_t.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_cpu_option_t.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_cpu_option_t" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_cpu_option_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_create_eventset.3 papi-6.0.0~dfsg/man/man3/PAPI_create_eventset.3 --- papi-5.7.0+dfsg/man/man3/PAPI_create_eventset.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_create_eventset.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_create_eventset" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_create_eventset" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_debug_option_t.3 papi-6.0.0~dfsg/man/man3/PAPI_debug_option_t.3 --- papi-5.7.0+dfsg/man/man3/PAPI_debug_option_t.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_debug_option_t.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_debug_option_t" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_debug_option_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_destroy_eventset.3 papi-6.0.0~dfsg/man/man3/PAPI_destroy_eventset.3 --- papi-5.7.0+dfsg/man/man3/PAPI_destroy_eventset.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_destroy_eventset.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_destroy_eventset" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_destroy_eventset" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_detach.3 papi-6.0.0~dfsg/man/man3/PAPI_detach.3 --- papi-5.7.0+dfsg/man/man3/PAPI_detach.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_detach.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_detach" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_detach" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_disable_component.3 papi-6.0.0~dfsg/man/man3/PAPI_disable_component.3 --- papi-5.7.0+dfsg/man/man3/PAPI_disable_component.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_disable_component.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_disable_component" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_disable_component" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_disable_component_by_name.3 papi-6.0.0~dfsg/man/man3/PAPI_disable_component_by_name.3 --- papi-5.7.0+dfsg/man/man3/PAPI_disable_component_by_name.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_disable_component_by_name.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_disable_component_by_name" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_disable_component_by_name" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_dmem_info_t.3 papi-6.0.0~dfsg/man/man3/PAPI_dmem_info_t.3 --- papi-5.7.0+dfsg/man/man3/PAPI_dmem_info_t.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_dmem_info_t.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_dmem_info_t" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_dmem_info_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_domain_option_t.3 papi-6.0.0~dfsg/man/man3/PAPI_domain_option_t.3 --- papi-5.7.0+dfsg/man/man3/PAPI_domain_option_t.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_domain_option_t.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_domain_option_t" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_domain_option_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_enum_cmp_event.3 papi-6.0.0~dfsg/man/man3/PAPI_enum_cmp_event.3 --- papi-5.7.0+dfsg/man/man3/PAPI_enum_cmp_event.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_enum_cmp_event.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_enum_cmp_event" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_enum_cmp_event" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_enum_event.3 papi-6.0.0~dfsg/man/man3/PAPI_enum_event.3 --- papi-5.7.0+dfsg/man/man3/PAPI_enum_event.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_enum_event.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_enum_event" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_enum_event" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_epc.3 papi-6.0.0~dfsg/man/man3/PAPI_epc.3 --- papi-5.7.0+dfsg/man/man3/PAPI_epc.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_epc.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_epc" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_epc" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME @@ -24,17 +24,17 @@ .RS 4 \fIevent\fP event code to be measured (0 defaults to PAPI_TOT_INS) .br -\fI*rtime\fP total realtime since the first call +\fI*rtime\fP realtime since the latest call .br -\fI*ptime\fP total process time since the first call +\fI*ptime\fP process time since the latest call .br -\fI*ref\fP incremental reference clock cycles since the last call +\fI*ref\fP incremental reference clock cycles since the latest call .br -\fI*core\fP incremental core clock cycles since the last call +\fI*core\fP incremental core clock cycles since the latest call .br -\fI*evt\fP total events since the first call +\fI*evt\fP events since the latest call .br -\fI*epc\fP incremental events per cycle since the last call +\fI*epc\fP incremental events per cycle since the latest call .RE .PP \fBReturn values:\fP @@ -46,23 +46,21 @@ \fIPAPI_ENOMEM\fP Insufficient memory to complete the operation\&. .RE .PP -The first call to \fBPAPI_epc()\fP will initialize the PAPI High Level interface, set up the counters to monitor the user specified event, PAPI_TOT_CYC, and PAPI_REF_CYC (if it exists) and start the counters\&. +The first call to \fBPAPI_epc()\fP will initialize the PAPI interface, set up the counters to monitor the user specified event, PAPI_TOT_CYC, and PAPI_REF_CYC (if it exists) and start the counters\&. .PP -Subsequent calls will read the counters and return total real time, total process time, total event counts since the start of the measurement and the core and reference cycle count and EPC rate since the latest call to \fBPAPI_epc()\fP\&. +Subsequent calls will read the counters and return real time, process time, event counts, the core and reference cycle count and EPC rate since the latest call to \fBPAPI_epc()\fP\&. .PP -A call to \fBPAPI_stop_counters()\fP will stop the counters from running and then calls such as \fBPAPI_start_counters()\fP or other rate calls can safely be used\&. -.PP -\fBPAPI_epc\fP can provide a more detailed look at algorithm efficiency in light of clock variability in modern cpus\&. MFLOPS is no longer an adequate description of peak performance if clock rates can arbitrarily speed up or slow down\&. By allowing a user specified event and reporting reference cycles, core cycles and real time, \fBPAPI_epc\fP provides the information to compute an accurate effective clock rate, and an accurate measure of computational throughput\&. +\fBPAPI_epc()\fP can provide a more detailed look at algorithm efficiency in light of clock variability in modern cpus\&. MFLOPS is no longer an adequate description of peak performance if clock rates can arbitrarily speed up or slow down\&. By allowing a user specified event and reporting reference cycles, core cycles and real time, \fBPAPI_epc\fP provides the information to compute an accurate effective clock rate, and an accurate measure of computational throughput\&. Note that \fBPAPI_epc()\fP is thread-safe and can therefore be called by multiple threads\&. .PP \fBSee Also:\fP .RS 4 -\fBPAPI_flips()\fP +\fBPAPI_flips_rate()\fP .PP -\fBPAPI_flops()\fP +\fBPAPI_flops_rate()\fP .PP \fBPAPI_ipc()\fP .PP -\fBPAPI_stop_counters()\fP +\fBPAPI_rate_stop()\fP .RE .PP diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_event_code_to_name.3 papi-6.0.0~dfsg/man/man3/PAPI_event_code_to_name.3 --- papi-5.7.0+dfsg/man/man3/PAPI_event_code_to_name.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_event_code_to_name.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_event_code_to_name" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_event_code_to_name" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_event_info_t.3 papi-6.0.0~dfsg/man/man3/PAPI_event_info_t.3 --- papi-5.7.0+dfsg/man/man3/PAPI_event_info_t.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_event_info_t.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_event_info_t" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_event_info_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_event_name_to_code.3 papi-6.0.0~dfsg/man/man3/PAPI_event_name_to_code.3 --- papi-5.7.0+dfsg/man/man3/PAPI_event_name_to_code.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_event_name_to_code.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_event_name_to_code" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_event_name_to_code" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_exe_info_t.3 papi-6.0.0~dfsg/man/man3/PAPI_exe_info_t.3 --- papi-5.7.0+dfsg/man/man3/PAPI_exe_info_t.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_exe_info_t.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_exe_info_t" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_exe_info_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_accum.3 papi-6.0.0~dfsg/man/man3/PAPIF_accum.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_accum.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_accum.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_accum" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_accum" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_accum_counters.3 papi-6.0.0~dfsg/man/man3/PAPIF_accum_counters.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_accum_counters.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_accum_counters.3 1970-01-01 00:00:00.000000000 +0000 @@ -1,32 +0,0 @@ -.TH "PAPIF_accum_counters" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- -.ad l -.nh -.SH NAME -PAPIF_accum_counters \- -.PP -Accumulate and reset counters\&. - -.SH SYNOPSIS -.br -.PP -.SH "Detailed Description" -.PP - -.PP -\fBFortran Interface:\fP -.RS 4 -#include 'fpapi\&.h' -.br - \fBPAPIF_accum_counters\fP( C_LONG_LONG(*) values, C_INT array_len, C_INT check ) -.RE -.PP -\fBSee Also:\fP -.RS 4 -\fBPAPI_accum_counters\fP -.RE -.PP - - -.SH "Author" -.PP -Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_add_event.3 papi-6.0.0~dfsg/man/man3/PAPIF_add_event.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_add_event.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_add_event.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_add_event" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_add_event" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_add_events.3 papi-6.0.0~dfsg/man/man3/PAPIF_add_events.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_add_events.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_add_events.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_add_events" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_add_events" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_add_named_event.3 papi-6.0.0~dfsg/man/man3/PAPIF_add_named_event.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_add_named_event.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_add_named_event.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_add_named_event" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_add_named_event" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_assign_eventset_component.3 papi-6.0.0~dfsg/man/man3/PAPIF_assign_eventset_component.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_assign_eventset_component.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_assign_eventset_component.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_assign_eventset_component" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_assign_eventset_component" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_cleanup_eventset.3 papi-6.0.0~dfsg/man/man3/PAPIF_cleanup_eventset.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_cleanup_eventset.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_cleanup_eventset.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_cleanup_eventset" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_cleanup_eventset" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_create_eventset.3 papi-6.0.0~dfsg/man/man3/PAPIF_create_eventset.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_create_eventset.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_create_eventset.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_create_eventset" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_create_eventset" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_destroy_eventset.3 papi-6.0.0~dfsg/man/man3/PAPIF_destroy_eventset.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_destroy_eventset.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_destroy_eventset.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_destroy_eventset" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_destroy_eventset" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_enum_event.3 papi-6.0.0~dfsg/man/man3/PAPIF_enum_event.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_enum_event.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_enum_event.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_enum_event" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_enum_event" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_epc.3 papi-6.0.0~dfsg/man/man3/PAPIF_epc.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_epc.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_epc.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_epc" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_epc" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_event_code_to_name.3 papi-6.0.0~dfsg/man/man3/PAPIF_event_code_to_name.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_event_code_to_name.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_event_code_to_name.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_event_code_to_name" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_event_code_to_name" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_event_name_to_code.3 papi-6.0.0~dfsg/man/man3/PAPIF_event_name_to_code.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_event_name_to_code.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_event_name_to_code.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_event_name_to_code" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_event_name_to_code" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_flips.3 papi-6.0.0~dfsg/man/man3/PAPIF_flips.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_flips.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_flips.3 1970-01-01 00:00:00.000000000 +0000 @@ -1,32 +0,0 @@ -.TH "PAPIF_flips" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- -.ad l -.nh -.SH NAME -PAPIF_flips \- -.PP -Simplified call to get Mflips/s (floating point instruction rate), real and processor time\&. - -.SH SYNOPSIS -.br -.PP -.SH "Detailed Description" -.PP - -.PP -\fBFortran Interface:\fP -.RS 4 -#include 'fpapi\&.h' -.br - \fBPAPIF_flips( C_FLOAT real_time, C_FLOAT proc_time, C_LONG_LONG flpins, C_FLOAT mflips, C_INT check )\fP -.RE -.PP -\fBSee Also:\fP -.RS 4 -\fBPAPI_flips\fP -.RE -.PP - - -.SH "Author" -.PP -Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_flips_rate.3 papi-6.0.0~dfsg/man/man3/PAPIF_flips_rate.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_flips_rate.3 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_flips_rate.3 2020-03-04 15:56:56.000000000 +0000 @@ -0,0 +1,32 @@ +.TH "PAPIF_flips_rate" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_flips_rate \- +.PP +Simplified call to get Mflips/s (floating point instruction rate), real and processor time\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_flips_rate\fP ( C_STRING EventName, C_FLOAT real_time, C_FLOAT proc_time, C_LONG_LONG flpins, C_FLOAT mflips, C_INT check ) +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_flips_rate\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_flops.3 papi-6.0.0~dfsg/man/man3/PAPIF_flops.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_flops.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_flops.3 1970-01-01 00:00:00.000000000 +0000 @@ -1,32 +0,0 @@ -.TH "PAPIF_flops" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- -.ad l -.nh -.SH NAME -PAPIF_flops \- -.PP -Simplified call to get Mflops/s (floating point instruction rate), real and processor time\&. - -.SH SYNOPSIS -.br -.PP -.SH "Detailed Description" -.PP - -.PP -\fBFortran Interface:\fP -.RS 4 -#include 'fpapi\&.h' -.br - \fBPAPIF_flops( C_FLOAT real_time, C_FLOAT proc_time, C_LONG_LONG flpops, C_FLOAT mflops, C_INT check )\fP -.RE -.PP -\fBSee Also:\fP -.RS 4 -\fBPAPI_flops\fP -.RE -.PP - - -.SH "Author" -.PP -Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_flops_rate.3 papi-6.0.0~dfsg/man/man3/PAPIF_flops_rate.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_flops_rate.3 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_flops_rate.3 2020-03-04 15:56:56.000000000 +0000 @@ -0,0 +1,32 @@ +.TH "PAPIF_flops_rate" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_flops_rate \- +.PP +Simplified call to get Mflops/s (floating point instruction rate), real and processor time\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_flops_rate( C_STRING EventName, C_FLOAT real_time, C_FLOAT proc_time, C_LONG_LONG flpops, C_FLOAT mflops, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_flops_rate\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_get_clockrate.3 papi-6.0.0~dfsg/man/man3/PAPIF_get_clockrate.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_get_clockrate.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_get_clockrate.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_get_clockrate" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_get_clockrate" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_get_dmem_info.3 papi-6.0.0~dfsg/man/man3/PAPIF_get_dmem_info.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_get_dmem_info.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_get_dmem_info.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_get_dmem_info" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_get_dmem_info" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_get_domain.3 papi-6.0.0~dfsg/man/man3/PAPIF_get_domain.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_get_domain.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_get_domain.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_get_domain" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_get_domain" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_get_event_info.3 papi-6.0.0~dfsg/man/man3/PAPIF_get_event_info.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_get_event_info.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_get_event_info.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_get_event_info" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_get_event_info" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_get_exe_info.3 papi-6.0.0~dfsg/man/man3/PAPIF_get_exe_info.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_get_exe_info.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_get_exe_info.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_get_exe_info" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_get_exe_info" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_get_granularity.3 papi-6.0.0~dfsg/man/man3/PAPIF_get_granularity.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_get_granularity.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_get_granularity.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_get_granularity" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_get_granularity" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_get_hardware_info.3 papi-6.0.0~dfsg/man/man3/PAPIF_get_hardware_info.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_get_hardware_info.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_get_hardware_info.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_get_hardware_info" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_get_hardware_info" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_get_multiplex.3 papi-6.0.0~dfsg/man/man3/PAPIF_get_multiplex.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_get_multiplex.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_get_multiplex.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_get_multiplex" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_get_multiplex" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_get_preload.3 papi-6.0.0~dfsg/man/man3/PAPIF_get_preload.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_get_preload.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_get_preload.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_get_preload" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_get_preload" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_get_real_cyc.3 papi-6.0.0~dfsg/man/man3/PAPIF_get_real_cyc.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_get_real_cyc.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_get_real_cyc.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_get_real_cyc" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_get_real_cyc" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_get_real_nsec.3 papi-6.0.0~dfsg/man/man3/PAPIF_get_real_nsec.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_get_real_nsec.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_get_real_nsec.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_get_real_nsec" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_get_real_nsec" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_get_real_usec.3 papi-6.0.0~dfsg/man/man3/PAPIF_get_real_usec.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_get_real_usec.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_get_real_usec.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_get_real_usec" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_get_real_usec" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_get_virt_cyc.3 papi-6.0.0~dfsg/man/man3/PAPIF_get_virt_cyc.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_get_virt_cyc.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_get_virt_cyc.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_get_virt_cyc" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_get_virt_cyc" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_get_virt_usec.3 papi-6.0.0~dfsg/man/man3/PAPIF_get_virt_usec.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_get_virt_usec.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_get_virt_usec.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_get_virt_usec" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_get_virt_usec" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIf_hl_read.3 papi-6.0.0~dfsg/man/man3/PAPIf_hl_read.3 --- papi-5.7.0+dfsg/man/man3/PAPIf_hl_read.3 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIf_hl_read.3 2020-03-04 15:56:57.000000000 +0000 @@ -0,0 +1,84 @@ +.TH "PAPIf_hl_read" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIf_hl_read \- +.PP +Reads and stores hardware events inside of an instrumented code region\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Prototype:\fP +.RS 4 +#include +.br + int \fBPAPIf_hl_read( C_STRING region, C_INT check )\fP +.RE +.PP +\fBParameters:\fP +.RS 4 +\fIregion\fP -- a unique region name corresponding to \fBPAPIf_hl_region_begin\fP +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_OK\fP +.br +\fIPAPI_ENOTRUN\fP -- EventSet is currently not running or could not determined\&. +.br +\fIPAPI_ESYS\fP -- A system or C library call failed inside PAPI, see the errno variable\&. +.br +\fIPAPI_EMISC\fP -- PAPI has been deactivated due to previous erros\&. +.br +\fIPAPI_ENOMEM\fP -- Insufficient memory\&. +.RE +.PP +\fBPAPIf_hl_read\fP reads hardware events and stores them internally inside of an instrumented code region\&. Assumes that \fBPAPIf_hl_region_begin\fP was called before\&. +.PP +\fBExample:\fP +.RS 4 + +.RE +.PP +.PP +.nf +* integer retval +* +* call PAPIf_hl_region_begin("computation", retval) +* if ( retval \&.NE\&. PAPI_OK ) then +* write (*,*) "PAPIf_hl_region_begin failed!" +* end if +* +* !do some computation here +* +* call PAPIf_hl_read("computation", retval) +* if ( retval \&.NE\&. PAPI_OK ) then +* write (*,*) "PAPIf_hl_read failed!" +* end if +* +* !do some computation here +* +* call PAPIf_hl_region_end("computation", retval) +* if ( retval \&.NE\&. PAPI_OK ) then +* write (*,*) "PAPIf_hl_region_end failed!" +* end if +* +* +.fi +.PP +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_hl_read\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/PAPIf_hl_region_begin.3 papi-6.0.0~dfsg/man/man3/PAPIf_hl_region_begin.3 --- papi-5.7.0+dfsg/man/man3/PAPIf_hl_region_begin.3 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIf_hl_region_begin.3 2020-03-04 15:56:57.000000000 +0000 @@ -0,0 +1,79 @@ +.TH "PAPIf_hl_region_begin" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIf_hl_region_begin \- +.PP +Reads and stores hardware events at the beginning of an instrumented code region\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Prototype:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIf_hl_region_begin( C_STRING region, C_INT check )\fP +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_OK\fP +.br +\fIPAPI_ENOTRUN\fP -- EventSet is currently not running or could not determined\&. +.br +\fIPAPI_ESYS\fP -- A system or C library call failed inside PAPI, see the errno variable\&. +.br +\fIPAPI_EMISC\fP -- PAPI has been deactivated due to previous erros\&. +.br +\fIPAPI_ENOMEM\fP -- Insufficient memory\&. +.RE +.PP +\fBPAPIf_hl_region_begin\fP reads hardware events and stores them internally at the beginning of an instrumented code region\&. If not specified via environment variable PAPI_EVENTS, default events are used\&. The first call sets all counters implicitly to zero and starts counting\&. Note that if PAPI_EVENTS is not set or cannot be interpreted, default hardware events are recorded\&. +.PP +\fBExample:\fP +.RS 4 + +.RE +.PP +.PP +.nf +* export PAPI_EVENTS="PAPI_TOT_INS,PAPI_TOT_CYC" +* +.fi +.PP +.PP +.PP +.nf +* integer retval +* +* call PAPIf_hl_region_begin("computation", retval) +* if ( retval \&.NE\&. PAPI_OK ) then +* write (*,*) "PAPIf_hl_region_begin failed!" +* end if +* +* !do some computation here +* +* call PAPIf_hl_region_end("computation", retval) +* if ( retval \&.NE\&. PAPI_OK ) then +* write (*,*) "PAPIf_hl_region_end failed!" +* end if +* +* +.fi +.PP +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_hl_region_begin\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/PAPIf_hl_region_end.3 papi-6.0.0~dfsg/man/man3/PAPIf_hl_region_end.3 --- papi-5.7.0+dfsg/man/man3/PAPIf_hl_region_end.3 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIf_hl_region_end.3 2020-03-04 15:56:57.000000000 +0000 @@ -0,0 +1,77 @@ +.TH "PAPIf_hl_region_end" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIf_hl_region_end \- +.PP +Reads and stores hardware events at the end of an instrumented code region\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Prototype:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIf_hl_region_end( C_STRING region, C_INT check )\fP +.RE +.PP +\fBParameters:\fP +.RS 4 +\fIregion\fP -- a unique region name corresponding to \fBPAPIf_hl_region_begin\fP +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_OK\fP +.br +\fIPAPI_ENOTRUN\fP -- EventSet is currently not running or could not determined\&. +.br +\fIPAPI_ESYS\fP -- A system or C library call failed inside PAPI, see the errno variable\&. +.br +\fIPAPI_EMISC\fP -- PAPI has been deactivated due to previous erros\&. +.br +\fIPAPI_ENOMEM\fP -- Insufficient memory\&. +.RE +.PP +\fBPAPIf_hl_region_end\fP reads hardware events and stores the difference to the values from \fBPAPIf_hl_region_begin\fP at the end of an instrumented code region\&. Assumes that \fBPAPIf_hl_region_begin\fP was called before\&. Note that an output is automatically generated when your application terminates\&. +.PP +\fBExample:\fP +.RS 4 + +.RE +.PP +.PP +.nf +* integer retval +* +* call PAPIf_hl_region_begin("computation", retval) +* if ( retval \&.NE\&. PAPI_OK ) then +* write (*,*) "PAPIf_hl_region_begin failed!" +* end if +* +* !do some computation here +* +* call PAPIf_hl_region_end("computation", retval) +* if ( retval \&.NE\&. PAPI_OK ) then +* write (*,*) "PAPIf_hl_region_end failed!" +* end if +* +* +.fi +.PP +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_hl_region_end\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/PAPIf_hl_stop.3 papi-6.0.0~dfsg/man/man3/PAPIf_hl_stop.3 --- papi-5.7.0+dfsg/man/man3/PAPIf_hl_stop.3 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIf_hl_stop.3 2020-03-04 15:56:57.000000000 +0000 @@ -0,0 +1,73 @@ +.TH "PAPIf_hl_stop" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIf_hl_stop \- +.PP +Stop a running high-level event set\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Prototype:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIf_hl_stop( C_INT check )\fP +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_ENOEVNT\fP -- The EventSet is not started yet\&. +.br +\fIPAPI_ENOMEM\fP -- Insufficient memory to complete the operation\&. +.RE +.PP +\fBPAPIf_hl_stop\fP stops a running high-level event set\&. +.PP +This call is optional and only necessary if the programmer wants to use the low-level API in addition to the high-level API\&. It should be noted that \fBPAPIf_hl_stop\fP and low-level calls are not allowed inside of a marked region\&. Furthermore, \fBPAPIf_hl_stop\fP is thread-local and therefore has to be called in the same thread as the corresponding marked region\&. +.PP +\fBExample:\fP +.RS 4 + +.RE +.PP +.PP +.nf +* integer retval +* +* call PAPIf_hl_region_begin("computation", retval) +* if ( retval \&.NE\&. PAPI_OK ) then +* write (*,*) "PAPIf_hl_region_begin failed!" +* end if +* +* !do some computation here +* +* call PAPIf_hl_region_end("computation", retval) +* if ( retval \&.NE\&. PAPI_OK ) then +* write (*,*) "PAPIf_hl_region_end failed!" +* end if +* +* call PAPIf_hl_stop(retval) +* if ( retval \&.NE\&. PAPI_OK ) then +* write (*,*) "PAPIf_hl_stop failed!" +* end if +* +* +.fi +.PP +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_hl_stop\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_ipc.3 papi-6.0.0~dfsg/man/man3/PAPIF_ipc.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_ipc.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_ipc.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_ipc" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_ipc" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_is_initialized.3 papi-6.0.0~dfsg/man/man3/PAPIF_is_initialized.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_is_initialized.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_is_initialized.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_is_initialized" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_is_initialized" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_library_init.3 papi-6.0.0~dfsg/man/man3/PAPIF_library_init.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_library_init.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_library_init.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_library_init" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_library_init" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_flips.3 papi-6.0.0~dfsg/man/man3/PAPI_flips.3 --- papi-5.7.0+dfsg/man/man3/PAPI_flips.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_flips.3 1970-01-01 00:00:00.000000000 +0000 @@ -1,64 +0,0 @@ -.TH "PAPI_flips" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- -.ad l -.nh -.SH NAME -PAPI_flips \- -.PP -Simplified call to get Mflips/s (floating point instruction rate), real and processor time\&. - -.SH SYNOPSIS -.br -.PP -.SH "Detailed Description" -.PP - -.PP -\fBC Interface: \fP -.RS 4 -#include <\fBpapi\&.h\fP> -.br -int \fBPAPI_flips( float *rtime, float *ptime, long long *flpins, float *mflips )\fP; -.RE -.PP -\fBParameters:\fP -.RS 4 -\fI*rtime\fP total realtime since the first call -.br -\fI*ptime\fP total process time since the first call -.br -\fI*flpins\fP total floating point instructions since the first call -.br -\fI*mflips\fP incremental (Mega) floating point instructions per seconds since the last call -.RE -.PP -\fBReturn values:\fP -.RS 4 -\fIPAPI_EINVAL\fP The counters were already started by something other than \fBPAPI_flips()\fP\&. -.br -\fIPAPI_ENOEVNT\fP The floating point instructions event does not exist\&. -.br -\fIPAPI_ENOMEM\fP Insufficient memory to complete the operation\&. -.RE -.PP -The first call to \fBPAPI_flips()\fP will initialize the PAPI High Level interface, set up the counters to monitor the PAPI_FP_INS event and start the counters\&. -.PP -Subsequent calls will read the counters and return total real time, total process time, total floating point instructions since the start of the measurement and the Mflip/s rate since latest call to \fBPAPI_flips()\fP\&. A call to \fBPAPI_stop_counters()\fP will stop the counters from running and then calls such as \fBPAPI_start_counters()\fP or other rate calls can safely be used\&. -.PP -\fBPAPI_flips\fP returns information related to floating point instructions using the PAPI_FP_INS event\&. This is intended to measure instruction rate through the floating point pipe with no massaging\&. -.PP -\fBSee Also:\fP -.RS 4 -\fBPAPI_flops()\fP -.PP -\fBPAPI_ipc()\fP -.PP -\fBPAPI_epc()\fP -.PP -\fBPAPI_stop_counters()\fP -.RE -.PP - - -.SH "Author" -.PP -Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_flips_rate.3 papi-6.0.0~dfsg/man/man3/PAPI_flips_rate.3 --- papi-5.7.0+dfsg/man/man3/PAPI_flips_rate.3 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_flips_rate.3 2020-03-04 15:56:56.000000000 +0000 @@ -0,0 +1,64 @@ +.TH "PAPI_flips_rate" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_flips_rate \- +.PP +Simplified call to get Mflips/s (floating point instruction rate), real and processor time\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Interface: \fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br +int \fBPAPI_flips_rate( int event, float *rtime, float *ptime, long long *flpins, float *mflips )\fP; +.RE +.PP +\fBParameters:\fP +.RS 4 +\fIevent\fP one of the three presets PAPI_FP_INS, PAPI_VEC_SP or PAPI_VEC_DP +.br +\fI*rtime\fP realtime since the latest call +.br +\fI*ptime\fP process time since the latest call +.br +\fI*flpins\fP floating point instructions since the latest call +.br +\fI*mflips\fP incremental (Mega) floating point instructions per seconds since the latest call +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_EINVAL\fP The counters were already started by something other than \fBPAPI_flips_rate()\fP\&. +.br +\fIPAPI_ENOEVNT\fP The floating point instructions event does not exist\&. +.br +\fIPAPI_ENOMEM\fP Insufficient memory to complete the operation\&. +.RE +.PP +The first call to \fBPAPI_flips_rate()\fP will initialize the PAPI interface, set up the counters to monitor the floating point instructions event and start the counters\&. +.PP +Subsequent calls will read the counters and return real time, process time, floating point instructions and the Mflip/s rate since the latest call to \fBPAPI_flips_rate()\fP\&. +.PP +\fBPAPI_flips_rate()\fP returns information related to floating point instructions using the floating point instructions event\&. This is intended to measure instruction rate through the floating point pipe with no massaging\&. Note that \fBPAPI_flips_rate()\fP is thread-safe and can therefore be called by multiple threads\&. +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_flops_rate()\fP +.PP +\fBPAPI_ipc()\fP +.PP +\fBPAPI_epc()\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_lock.3 papi-6.0.0~dfsg/man/man3/PAPIF_lock.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_lock.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_lock.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_lock" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_lock" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_flops.3 papi-6.0.0~dfsg/man/man3/PAPI_flops.3 --- papi-5.7.0+dfsg/man/man3/PAPI_flops.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_flops.3 1970-01-01 00:00:00.000000000 +0000 @@ -1,64 +0,0 @@ -.TH "PAPI_flops" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- -.ad l -.nh -.SH NAME -PAPI_flops \- -.PP -Simplified call to get Mflops/s (floating point operation rate), real and processor time\&. - -.SH SYNOPSIS -.br -.PP -.SH "Detailed Description" -.PP - -.PP -\fBC Interface: \fP -.RS 4 -#include <\fBpapi\&.h\fP> -.br -int \fBPAPI_flops( float *rtime, float *ptime, long long *flpops, float *mflops )\fP; -.RE -.PP -\fBParameters:\fP -.RS 4 -\fI*rtime\fP total realtime since the first call -.br -\fI*ptime\fP total process time since the first call -.br -\fI*flpops\fP total floating point operations since the first call -.br -\fI*mflops\fP incremental (Mega) floating point operations per seconds since the last call -.RE -.PP -\fBReturn values:\fP -.RS 4 -\fIPAPI_EINVAL\fP The counters were already started by something other than \fBPAPI_flops()\fP\&. -.br -\fIPAPI_ENOEVNT\fP The floating point operations event does not exist\&. -.br -\fIPAPI_ENOMEM\fP Insufficient memory to complete the operation\&. -.RE -.PP -The first call to \fBPAPI_flops()\fP will initialize the PAPI High Level interface, set up the counters to monitor the PAPI_FP_OPS event and start the counters\&. -.PP -Subsequent calls will read the counters and return total real time, total process time, total floating point operations since the start of the measurement and the Mflop/s rate since latest call to \fBPAPI_flops()\fP\&. A call to \fBPAPI_stop_counters()\fP will stop the counters from running and then calls such as \fBPAPI_start_counters()\fP or other rate calls can safely be used\&. -.PP -\fBPAPI_flops\fP returns information related to theoretical floating point operations rather than simple instructions\&. It uses the PAPI_FP_OPS event which attempts to 'correctly' account for, e\&.g\&., FMA undercounts and FP Store overcounts, etc\&. -.PP -\fBSee Also:\fP -.RS 4 -\fBPAPI_flips()\fP -.PP -\fBPAPI_ipc()\fP -.PP -\fBPAPI_epc()\fP -.PP -\fBPAPI_stop_counters()\fP -.RE -.PP - - -.SH "Author" -.PP -Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_flops_rate.3 papi-6.0.0~dfsg/man/man3/PAPI_flops_rate.3 --- papi-5.7.0+dfsg/man/man3/PAPI_flops_rate.3 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_flops_rate.3 2020-03-04 15:56:56.000000000 +0000 @@ -0,0 +1,66 @@ +.TH "PAPI_flops_rate" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_flops_rate \- +.PP +Simplified call to get Mflops/s (floating point operation rate), real and processor time\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Interface: \fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br +int \fBPAPI_flops_rate\fP ( int event, float *rtime, float *ptime, long long *flpops, float *mflops ); +.RE +.PP +\fBParameters:\fP +.RS 4 +\fIevent\fP one of the three presets PAPI_FP_OPS, PAPI_SP_OPS or PAPI_DP_OPS +.br +\fI*rtime\fP realtime since the latest call +.br +\fI*ptime\fP process time since the latest call +.br +\fI*flpops\fP floating point operations since the latest call +.br +\fI*mflops\fP incremental (Mega) floating point operations per seconds since the latest call +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_EINVAL\fP The counters were already started by something other than \fBPAPI_flops_rate()\fP\&. +.br +\fIPAPI_ENOEVNT\fP The floating point operations event does not exist\&. +.br +\fIPAPI_ENOMEM\fP Insufficient memory to complete the operation\&. +.RE +.PP +The first call to \fBPAPI_flops_rate()\fP will initialize the PAPI interface, set up the counters to monitor the floating point operations event and start the counters\&. +.PP +Subsequent calls will read the counters and return real time, process time, floating point operations and the Mflop/s rate since the latest call to \fBPAPI_flops_rate()\fP\&. +.PP +\fBPAPI_flops_rate()\fP returns information related to theoretical floating point operations rather than simple instructions\&. It uses the floating point operations event which attempts to 'correctly' account for, e\&.g\&., FMA undercounts and FP Store overcounts\&. Note that \fBPAPI_flops_rate()\fP is thread-safe and can therefore be called by multiple threads\&. +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_flips_rate()\fP +.PP +\fBPAPI_ipc()\fP +.PP +\fBPAPI_epc()\fP +.PP +\fBPAPI_rate_stop()\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_multiplex_init.3 papi-6.0.0~dfsg/man/man3/PAPIF_multiplex_init.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_multiplex_init.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_multiplex_init.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_multiplex_init" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_multiplex_init" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_num_cmp_hwctrs.3 papi-6.0.0~dfsg/man/man3/PAPIF_num_cmp_hwctrs.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_num_cmp_hwctrs.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_num_cmp_hwctrs.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_num_cmp_hwctrs" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_num_cmp_hwctrs" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_num_counters.3 papi-6.0.0~dfsg/man/man3/PAPIF_num_counters.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_num_counters.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_num_counters.3 1970-01-01 00:00:00.000000000 +0000 @@ -1,32 +0,0 @@ -.TH "PAPIF_num_counters" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- -.ad l -.nh -.SH NAME -PAPIF_num_counters \- -.PP -Get the number of hardware counters available on the system\&. - -.SH SYNOPSIS -.br -.PP -.SH "Detailed Description" -.PP - -.PP -\fBFortran Interface:\fP -.RS 4 -#include 'fpapi\&.h' -.br - \fBPAPIF_num_counters( C_INT numevents )\fP -.RE -.PP -\fBSee Also:\fP -.RS 4 -\fBPAPI_num_counters\fP -.RE -.PP - - -.SH "Author" -.PP -Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_num_events.3 papi-6.0.0~dfsg/man/man3/PAPIF_num_events.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_num_events.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_num_events.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_num_events" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_num_events" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_num_hwctrs.3 papi-6.0.0~dfsg/man/man3/PAPIF_num_hwctrs.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_num_hwctrs.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_num_hwctrs.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_num_hwctrs" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_num_hwctrs" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_perror.3 papi-6.0.0~dfsg/man/man3/PAPIF_perror.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_perror.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_perror.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_perror" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_perror" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_query_event.3 papi-6.0.0~dfsg/man/man3/PAPIF_query_event.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_query_event.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_query_event.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_query_event" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_query_event" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_query_named_event.3 papi-6.0.0~dfsg/man/man3/PAPIF_query_named_event.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_query_named_event.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_query_named_event.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_query_named_event" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_query_named_event" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_rate_stop.3 papi-6.0.0~dfsg/man/man3/PAPIF_rate_stop.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_rate_stop.3 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_rate_stop.3 2020-03-04 15:56:56.000000000 +0000 @@ -0,0 +1,32 @@ +.TH "PAPIF_rate_stop" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_rate_stop \- +.PP +Stop a running event set of a rate function\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_rate_stop( C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_rate_stop\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_read.3 papi-6.0.0~dfsg/man/man3/PAPIF_read.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_read.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_read.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_read" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_read" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_read_ts.3 papi-6.0.0~dfsg/man/man3/PAPIF_read_ts.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_read_ts.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_read_ts.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_read_ts" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_read_ts" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_register_thread.3 papi-6.0.0~dfsg/man/man3/PAPIF_register_thread.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_register_thread.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_register_thread.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_register_thread" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_register_thread" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_remove_event.3 papi-6.0.0~dfsg/man/man3/PAPIF_remove_event.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_remove_event.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_remove_event.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_remove_event" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_remove_event" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_remove_events.3 papi-6.0.0~dfsg/man/man3/PAPIF_remove_events.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_remove_events.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_remove_events.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_remove_events" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_remove_events" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_remove_named_event.3 papi-6.0.0~dfsg/man/man3/PAPIF_remove_named_event.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_remove_named_event.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_remove_named_event.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_remove_named_event" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_remove_named_event" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_reset.3 papi-6.0.0~dfsg/man/man3/PAPIF_reset.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_reset.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_reset.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_reset" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_reset" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_set_cmp_domain.3 papi-6.0.0~dfsg/man/man3/PAPIF_set_cmp_domain.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_set_cmp_domain.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_set_cmp_domain.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_set_cmp_domain" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_set_cmp_domain" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_set_cmp_granularity.3 papi-6.0.0~dfsg/man/man3/PAPIF_set_cmp_granularity.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_set_cmp_granularity.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_set_cmp_granularity.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_set_cmp_granularity" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_set_cmp_granularity" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_set_debug.3 papi-6.0.0~dfsg/man/man3/PAPIF_set_debug.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_set_debug.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_set_debug.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_set_debug" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_set_debug" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_set_domain.3 papi-6.0.0~dfsg/man/man3/PAPIF_set_domain.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_set_domain.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_set_domain.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_set_domain" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_set_domain" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_set_event_domain.3 papi-6.0.0~dfsg/man/man3/PAPIF_set_event_domain.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_set_event_domain.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_set_event_domain.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_set_event_domain" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_set_event_domain" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_set_granularity.3 papi-6.0.0~dfsg/man/man3/PAPIF_set_granularity.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_set_granularity.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_set_granularity.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_set_granularity" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_set_granularity" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_set_inherit.3 papi-6.0.0~dfsg/man/man3/PAPIF_set_inherit.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_set_inherit.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_set_inherit.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_set_inherit" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_set_inherit" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_set_multiplex.3 papi-6.0.0~dfsg/man/man3/PAPIF_set_multiplex.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_set_multiplex.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_set_multiplex.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_set_multiplex" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_set_multiplex" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_shutdown.3 papi-6.0.0~dfsg/man/man3/PAPIF_shutdown.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_shutdown.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_shutdown.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_shutdown" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_shutdown" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_start.3 papi-6.0.0~dfsg/man/man3/PAPIF_start.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_start.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_start.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_start" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_start" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_start_counters.3 papi-6.0.0~dfsg/man/man3/PAPIF_start_counters.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_start_counters.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_start_counters.3 1970-01-01 00:00:00.000000000 +0000 @@ -1,32 +0,0 @@ -.TH "PAPIF_start_counters" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- -.ad l -.nh -.SH NAME -PAPIF_start_counters \- -.PP -Start counting hardware events\&. - -.SH SYNOPSIS -.br -.PP -.SH "Detailed Description" -.PP - -.PP -\fBFortran Interface:\fP -.RS 4 -#include 'fpapi\&.h' -.br - \fBPAPIF_start_counters\fP( C_INT(*) events, C_INT array_len, C_INT check ) -.RE -.PP -\fBSee Also:\fP -.RS 4 -\fBPAPI_start_counters\fP -.RE -.PP - - -.SH "Author" -.PP -Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_state.3 papi-6.0.0~dfsg/man/man3/PAPIF_state.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_state.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_state.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_state" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_state" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_stop.3 papi-6.0.0~dfsg/man/man3/PAPIF_stop.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_stop.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_stop.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_stop" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_stop" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_stop_counters.3 papi-6.0.0~dfsg/man/man3/PAPIF_stop_counters.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_stop_counters.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_stop_counters.3 1970-01-01 00:00:00.000000000 +0000 @@ -1,32 +0,0 @@ -.TH "PAPIF_stop_counters" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- -.ad l -.nh -.SH NAME -PAPIF_stop_counters \- -.PP -Stop counting hardware events and reset values to zero\&. - -.SH SYNOPSIS -.br -.PP -.SH "Detailed Description" -.PP - -.PP -\fBFortran Interface:\fP -.RS 4 -#include 'fpapi\&.h' -.br - \fBPAPIF_stop_counters\fP( C_LONG_LONG(*) values, C_INT array_len, C_INT check ) -.RE -.PP -\fBSee Also:\fP -.RS 4 -\fBPAPI_stop_counters\fP -.RE -.PP - - -.SH "Author" -.PP -Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_thread_id.3 papi-6.0.0~dfsg/man/man3/PAPIF_thread_id.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_thread_id.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_thread_id.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_thread_id" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_thread_id" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_thread_init.3 papi-6.0.0~dfsg/man/man3/PAPIF_thread_init.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_thread_init.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_thread_init.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_thread_init" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_thread_init" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_unlock.3 papi-6.0.0~dfsg/man/man3/PAPIF_unlock.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_unlock.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_unlock.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_unlock" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_unlock" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_unregister_thread.3 papi-6.0.0~dfsg/man/man3/PAPIF_unregister_thread.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_unregister_thread.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_unregister_thread.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_unregister_thread" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_unregister_thread" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPIF_write.3 papi-6.0.0~dfsg/man/man3/PAPIF_write.3 --- papi-5.7.0+dfsg/man/man3/PAPIF_write.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPIF_write.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPIF_write" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPIF_write" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_get_cmp_opt.3 papi-6.0.0~dfsg/man/man3/PAPI_get_cmp_opt.3 --- papi-5.7.0+dfsg/man/man3/PAPI_get_cmp_opt.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_get_cmp_opt.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_get_cmp_opt" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_get_cmp_opt" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_get_component_index.3 papi-6.0.0~dfsg/man/man3/PAPI_get_component_index.3 --- papi-5.7.0+dfsg/man/man3/PAPI_get_component_index.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_get_component_index.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_get_component_index" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_get_component_index" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_get_component_info.3 papi-6.0.0~dfsg/man/man3/PAPI_get_component_info.3 --- papi-5.7.0+dfsg/man/man3/PAPI_get_component_info.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_get_component_info.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_get_component_info" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_get_component_info" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_get_dmem_info.3 papi-6.0.0~dfsg/man/man3/PAPI_get_dmem_info.3 --- papi-5.7.0+dfsg/man/man3/PAPI_get_dmem_info.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_get_dmem_info.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_get_dmem_info" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_get_dmem_info" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_get_event_component.3 papi-6.0.0~dfsg/man/man3/PAPI_get_event_component.3 --- papi-5.7.0+dfsg/man/man3/PAPI_get_event_component.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_get_event_component.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_get_event_component" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_get_event_component" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_get_event_info.3 papi-6.0.0~dfsg/man/man3/PAPI_get_event_info.3 --- papi-5.7.0+dfsg/man/man3/PAPI_get_event_info.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_get_event_info.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_get_event_info" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_get_event_info" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_get_eventset_component.3 papi-6.0.0~dfsg/man/man3/PAPI_get_eventset_component.3 --- papi-5.7.0+dfsg/man/man3/PAPI_get_eventset_component.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_get_eventset_component.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_get_eventset_component" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_get_eventset_component" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_get_executable_info.3 papi-6.0.0~dfsg/man/man3/PAPI_get_executable_info.3 --- papi-5.7.0+dfsg/man/man3/PAPI_get_executable_info.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_get_executable_info.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_get_executable_info" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_get_executable_info" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_get_hardware_info.3 papi-6.0.0~dfsg/man/man3/PAPI_get_hardware_info.3 --- papi-5.7.0+dfsg/man/man3/PAPI_get_hardware_info.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_get_hardware_info.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_get_hardware_info" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_get_hardware_info" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_get_multiplex.3 papi-6.0.0~dfsg/man/man3/PAPI_get_multiplex.3 --- papi-5.7.0+dfsg/man/man3/PAPI_get_multiplex.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_get_multiplex.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_get_multiplex" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_get_multiplex" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_get_opt.3 papi-6.0.0~dfsg/man/man3/PAPI_get_opt.3 --- papi-5.7.0+dfsg/man/man3/PAPI_get_opt.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_get_opt.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_get_opt" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_get_opt" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_get_overflow_event_index.3 papi-6.0.0~dfsg/man/man3/PAPI_get_overflow_event_index.3 --- papi-5.7.0+dfsg/man/man3/PAPI_get_overflow_event_index.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_get_overflow_event_index.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_get_overflow_event_index" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_get_overflow_event_index" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_get_real_cyc.3 papi-6.0.0~dfsg/man/man3/PAPI_get_real_cyc.3 --- papi-5.7.0+dfsg/man/man3/PAPI_get_real_cyc.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_get_real_cyc.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_get_real_cyc" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_get_real_cyc" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_get_real_nsec.3 papi-6.0.0~dfsg/man/man3/PAPI_get_real_nsec.3 --- papi-5.7.0+dfsg/man/man3/PAPI_get_real_nsec.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_get_real_nsec.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_get_real_nsec" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_get_real_nsec" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_get_real_usec.3 papi-6.0.0~dfsg/man/man3/PAPI_get_real_usec.3 --- papi-5.7.0+dfsg/man/man3/PAPI_get_real_usec.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_get_real_usec.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_get_real_usec" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_get_real_usec" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_get_shared_lib_info.3 papi-6.0.0~dfsg/man/man3/PAPI_get_shared_lib_info.3 --- papi-5.7.0+dfsg/man/man3/PAPI_get_shared_lib_info.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_get_shared_lib_info.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_get_shared_lib_info" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_get_shared_lib_info" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_get_thr_specific.3 papi-6.0.0~dfsg/man/man3/PAPI_get_thr_specific.3 --- papi-5.7.0+dfsg/man/man3/PAPI_get_thr_specific.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_get_thr_specific.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_get_thr_specific" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_get_thr_specific" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME @@ -44,7 +44,7 @@ .PP .nf int ret; - HighLevelInfo *state = NULL; + RateInfo *state = NULL; ret = PAPI_thread_init(pthread_self); if (ret != PAPI_OK) handle_error(ret); @@ -52,9 +52,9 @@ ret = PAPI_get_thr_specific(PAPI_USR1_TLS, (void *) &state); if (ret != PAPI_OK || state == NULL) { - state = (HighLevelInfo *) malloc(sizeof(HighLevelInfo)); + state = (RateInfo *) malloc(sizeof(RateInfo)); if (state == NULL) return (PAPI_ESYS); - memset(state, 0, sizeof(HighLevelInfo)); + memset(state, 0, sizeof(RateInfo)); state->EventSet = PAPI_NULL; ret = PAPI_create_eventset(&state->EventSet); if (ret != PAPI_OK) return (PAPI_ESYS); diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_get_virt_cyc.3 papi-6.0.0~dfsg/man/man3/PAPI_get_virt_cyc.3 --- papi-5.7.0+dfsg/man/man3/PAPI_get_virt_cyc.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_get_virt_cyc.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_get_virt_cyc" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_get_virt_cyc" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_get_virt_nsec.3 papi-6.0.0~dfsg/man/man3/PAPI_get_virt_nsec.3 --- papi-5.7.0+dfsg/man/man3/PAPI_get_virt_nsec.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_get_virt_nsec.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_get_virt_nsec" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_get_virt_nsec" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_get_virt_usec.3 papi-6.0.0~dfsg/man/man3/PAPI_get_virt_usec.3 --- papi-5.7.0+dfsg/man/man3/PAPI_get_virt_usec.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_get_virt_usec.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_get_virt_usec" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_get_virt_usec" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_granularity_option_t.3 papi-6.0.0~dfsg/man/man3/PAPI_granularity_option_t.3 --- papi-5.7.0+dfsg/man/man3/PAPI_granularity_option_t.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_granularity_option_t.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_granularity_option_t" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_granularity_option_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_hl_read.3 papi-6.0.0~dfsg/man/man3/PAPI_hl_read.3 --- papi-5.7.0+dfsg/man/man3/PAPI_hl_read.3 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_hl_read.3 2020-03-04 15:56:56.000000000 +0000 @@ -0,0 +1,87 @@ +.TH "PAPI_hl_read" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_hl_read \- +.PP +Read performance events inside of a region and store the difference to the corresponding beginning of the region\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Interface:\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br + int \fBPAPI_hl_read( const char* region )\fP; +.RE +.PP +\fBParameters:\fP +.RS 4 +\fIregion\fP -- a unique region name corresponding to \fBPAPI_hl_region_begin\fP +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_OK\fP +.br +\fIPAPI_ENOTRUN\fP -- EventSet is currently not running or could not determined\&. +.br +\fIPAPI_ESYS\fP -- A system or C library call failed inside PAPI, see the errno variable\&. +.br +\fIPAPI_EMISC\fP -- PAPI has been deactivated due to previous errors\&. +.br +\fIPAPI_ENOMEM\fP -- Insufficient memory\&. +.RE +.PP +\fBPAPI_hl_read\fP reads performance events inside of a region and stores the difference to the corresponding beginning of the region\&. +.PP +Assumes that \fBPAPI_hl_region_begin\fP was called before\&. +.PP +\fBExample:\fP +.RS 4 + +.RE +.PP +.PP +.nf +* int retval; +* +* retval = PAPI_hl_region_begin("computation"); +* if ( retval != PAPI_OK ) +* handle_error(1); +* +* //Do some computation here +* +* retval = PAPI_hl_read("computation"); +* if ( retval != PAPI_OK ) +* handle_error(1); +* +* //Do some computation here +* +* retval = PAPI_hl_region_end("computation"); +* if ( retval != PAPI_OK ) +* handle_error(1); +* +* +.fi +.PP +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_hl_region_begin\fP +.PP +\fBPAPI_hl_region_end\fP +.PP +\fBPAPI_hl_stop\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_hl_region_begin.3 papi-6.0.0~dfsg/man/man3/PAPI_hl_region_begin.3 --- papi-5.7.0+dfsg/man/man3/PAPI_hl_region_begin.3 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_hl_region_begin.3 2020-03-04 15:56:56.000000000 +0000 @@ -0,0 +1,87 @@ +.TH "PAPI_hl_region_begin" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_hl_region_begin \- +.PP +Read performance events at the beginning of a region\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Interface:\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br + int \fBPAPI_hl_region_begin( const char* region )\fP; +.RE +.PP +\fBParameters:\fP +.RS 4 +\fIregion\fP -- a unique region name +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_OK\fP +.br +\fIPAPI_ENOTRUN\fP -- EventSet is currently not running or could not determined\&. +.br +\fIPAPI_ESYS\fP -- A system or C library call failed inside PAPI, see the errno variable\&. +.br +\fIPAPI_EMISC\fP -- PAPI has been deactivated due to previous errors\&. +.br +\fIPAPI_ENOMEM\fP -- Insufficient memory\&. +.RE +.PP +\fBPAPI_hl_region_begin\fP reads performance events and stores them internally at the beginning of an instrumented code region\&. If not specified via the environment variable PAPI_EVENTS, default events are used\&. The first call sets all counters implicitly to zero and starts counting\&. Note that if PAPI_EVENTS is not set or cannot be interpreted, default performance events are recorded\&. +.PP +\fBExample:\fP +.RS 4 + +.RE +.PP +.PP +.nf +* export PAPI_EVENTS="PAPI_TOT_INS,PAPI_TOT_CYC" +* +* +.fi +.PP +.PP +.PP +.nf +* int retval; +* +* retval = PAPI_hl_region_begin("computation"); +* if ( retval != PAPI_OK ) +* handle_error(1); +* +* //Do some computation here +* +* retval = PAPI_hl_region_end("computation"); +* if ( retval != PAPI_OK ) +* handle_error(1); +* +* +.fi +.PP +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_hl_read\fP +.PP +\fBPAPI_hl_region_end\fP +.PP +\fBPAPI_hl_stop\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_hl_region_end.3 papi-6.0.0~dfsg/man/man3/PAPI_hl_region_end.3 --- papi-5.7.0+dfsg/man/man3/PAPI_hl_region_end.3 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_hl_region_end.3 2020-03-04 15:56:56.000000000 +0000 @@ -0,0 +1,111 @@ +.TH "PAPI_hl_region_end" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_hl_region_end \- +.PP +Read performance events at the end of a region and store the difference to the corresponding beginning of the region\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Interface:\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br + int \fBPAPI_hl_region_end( const char* region )\fP; +.RE +.PP +\fBParameters:\fP +.RS 4 +\fIregion\fP -- a unique region name corresponding to \fBPAPI_hl_region_begin\fP +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_OK\fP +.br +\fIPAPI_ENOTRUN\fP -- EventSet is currently not running or could not determined\&. +.br +\fIPAPI_ESYS\fP -- A system or C library call failed inside PAPI, see the errno variable\&. +.br +\fIPAPI_EMISC\fP -- PAPI has been deactivated due to previous errors\&. +.br +\fIPAPI_ENOMEM\fP -- Insufficient memory\&. +.RE +.PP +\fBPAPI_hl_region_end\fP reads performance events at the end of a region and stores the difference to the corresponding beginning of the region\&. +.PP +Assumes that \fBPAPI_hl_region_begin\fP was called before\&. +.PP +Note that \fBPAPI_hl_region_end\fP does not stop counting the performance events\&. Counting continues until the application terminates\&. Therefore, the programmer can also create nested regions if required\&. To stop a running high-level event set, the programmer must call \fBPAPI_hl_stop()\fP\&. It should also be noted, that a marked region is thread-local and therefore has to be in the same thread\&. +.PP +An output of the measured events is created automatically after the application exits\&. In the case of a serial, or a thread-parallel application there is only one output file\&. MPI applications would be saved in multiple files, one per MPI rank\&. The output is generated in the current directory by default\&. However, it is recommended to specify an output directory for larger measurements, especially for MPI applications via the environment variable PAPI_OUTPUT_DIRECTORY\&. In the case where measurements are performed, while there are old measurements in the same directory, PAPI will not overwrite or delete the old measurement directories\&. Instead, timestamps are added to the old directories\&. +.PP +For more convenience, the output can also be printed to stdout by setting PAPI_REPORT=1\&. This is not recommended for MPI applications as each MPI rank tries to print the output concurrently\&. +.PP +The generated measurement output can also be converted in a better readable output\&. The python script papi_hl_output_writer\&.py enhances the output by creating some derived metrics, like IPC, MFlops/s, and MFlips/s as well as real and processor time in case the corresponding PAPI events have been recorded\&. The python script can also summarize performance events over all threads and MPI ranks when using the option 'accumulate' as seen below\&. +.PP +\fBExample:\fP +.RS 4 + +.RE +.PP +.PP +.nf +* int retval; +* +* retval = PAPI_hl_region_begin("computation"); +* if ( retval != PAPI_OK ) +* handle_error(1); +* +* //Do some computation here +* +* retval = PAPI_hl_region_end("computation"); +* if ( retval != PAPI_OK ) +* handle_error(1); +* +* +.fi +.PP +.PP +.PP +.nf +* python papi_hl_output_writer\&.py --type=accumulate +* +* { +* "computation": { +* "Region count": 1, +* "Real time in s": 0\&.97 , +* "CPU time in s": 0\&.98 , +* "IPC": 1\&.41 , +* "MFLIPS /s": 386\&.28 , +* "MFLOPS /s": 386\&.28 , +* "Number of ranks ": 1, +* "Number of threads ": 1, +* "Number of processes ": 1 +* } +* } +* +* +.fi +.PP +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_hl_region_begin\fP +.PP +\fBPAPI_hl_read\fP +.PP +\fBPAPI_hl_stop\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_hl_stop.3 papi-6.0.0~dfsg/man/man3/PAPI_hl_stop.3 --- papi-5.7.0+dfsg/man/man3/PAPI_hl_stop.3 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_hl_stop.3 2020-03-04 15:56:56.000000000 +0000 @@ -0,0 +1,47 @@ +.TH "PAPI_hl_stop" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_hl_stop \- +.PP +Stop a running high-level event set\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Interface: \fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br + int \fBPAPI_hl_stop()\fP; +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_ENOEVNT\fP -- The EventSet is not started yet\&. +.br +\fIPAPI_ENOMEM\fP -- Insufficient memory to complete the operation\&. +.RE +.PP +\fBPAPI_hl_stop\fP stops a running high-level event set\&. +.PP +This call is optional and only necessary if the programmer wants to use the low-level API in addition to the high-level API\&. It should be noted that \fBPAPI_hl_stop\fP and low-level calls are not allowed inside of a marked region\&. Furthermore, \fBPAPI_hl_stop\fP is thread-local and therefore has to be called in the same thread as the corresponding marked region\&. +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_hl_region_begin\fP +.PP +\fBPAPI_hl_read\fP +.PP +\fBPAPI_hl_region_end\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_hw_info_t.3 papi-6.0.0~dfsg/man/man3/PAPI_hw_info_t.3 --- papi-5.7.0+dfsg/man/man3/PAPI_hw_info_t.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_hw_info_t.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_hw_info_t" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_hw_info_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_inherit_option_t.3 papi-6.0.0~dfsg/man/man3/PAPI_inherit_option_t.3 --- papi-5.7.0+dfsg/man/man3/PAPI_inherit_option_t.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_inherit_option_t.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_inherit_option_t" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_inherit_option_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_ipc.3 papi-6.0.0~dfsg/man/man3/PAPI_ipc.3 --- papi-5.7.0+dfsg/man/man3/PAPI_ipc.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_ipc.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_ipc" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_ipc" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME @@ -22,41 +22,39 @@ .PP \fBParameters:\fP .RS 4 -\fI*rtime\fP total realtime since the first call +\fI*rtime\fP realtime since the latest call .br -\fI*ptime\fP total process time since the first call +\fI*ptime\fP process time since the latest call .br -\fI*ins\fP total instructions since the first call +\fI*ins\fP instructions since the latest call .br -\fI*ipc\fP incremental instructions per cycle since the last call +\fI*ipc\fP incremental instructions per cycle since the latest call .RE .PP \fBReturn values:\fP .RS 4 \fIPAPI_EINVAL\fP The counters were already started by something other than \fBPAPI_ipc()\fP\&. .br -\fIPAPI_ENOEVNT\fP The floating point operations event does not exist\&. +\fIPAPI_ENOEVNT\fP The events PAPI_TOT_INS and PAPI_TOT_CYC are not supported\&. .br \fIPAPI_ENOMEM\fP Insufficient memory to complete the operation\&. .RE .PP -The first call to \fBPAPI_ipc()\fP will initialize the PAPI High Level interface, set up the counters to monitor PAPI_TOT_INS and PAPI_TOT_CYC events and start the counters\&. +The first call to \fBPAPI_ipc()\fP will initialize the PAPI interface, set up the counters to monitor PAPI_TOT_INS and PAPI_TOT_CYC events and start the counters\&. .PP -Subsequent calls will read the counters and return total real time, total process time, total instructions since the start of the measurement and the IPC rate since the latest call to \fBPAPI_ipc()\fP\&. +Subsequent calls will read the counters and return real time, process time, instructions and the IPC rate since the latest call to \fBPAPI_ipc()\fP\&. .PP -A call to \fBPAPI_stop_counters()\fP will stop the counters from running and then calls such as \fBPAPI_start_counters()\fP or other rate calls can safely be used\&. -.PP -\fBPAPI_ipc\fP should return a ratio greater than 1\&.0, indicating instruction level parallelism within the chip\&. The larger this ratio the more effeciently the program is running\&. +\fBPAPI_ipc()\fP should return a ratio greater than 1\&.0, indicating instruction level parallelism within the chip\&. The larger this ratio the more effeciently the program is running\&. Note that \fBPAPI_ipc()\fP is thread-safe and can therefore be called by multiple threads\&. .PP \fBSee Also:\fP .RS 4 -\fBPAPI_flips()\fP +\fBPAPI_flips_rate()\fP .PP -\fBPAPI_flops()\fP +\fBPAPI_flops_rate()\fP .PP \fBPAPI_epc()\fP .PP -\fBPAPI_stop_counters()\fP +\fBPAPI_rate_stop()\fP .RE .PP diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_is_initialized.3 papi-6.0.0~dfsg/man/man3/PAPI_is_initialized.3 --- papi-5.7.0+dfsg/man/man3/PAPI_is_initialized.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_is_initialized.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_is_initialized" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_is_initialized" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_itimer_option_t.3 papi-6.0.0~dfsg/man/man3/PAPI_itimer_option_t.3 --- papi-5.7.0+dfsg/man/man3/PAPI_itimer_option_t.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_itimer_option_t.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_itimer_option_t" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_itimer_option_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_library_init.3 papi-6.0.0~dfsg/man/man3/PAPI_library_init.3 --- papi-5.7.0+dfsg/man/man3/PAPI_library_init.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_library_init.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_library_init" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_library_init" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_list_events.3 papi-6.0.0~dfsg/man/man3/PAPI_list_events.3 --- papi-5.7.0+dfsg/man/man3/PAPI_list_events.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_list_events.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_list_events" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_list_events" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_list_threads.3 papi-6.0.0~dfsg/man/man3/PAPI_list_threads.3 --- papi-5.7.0+dfsg/man/man3/PAPI_list_threads.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_list_threads.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_list_threads" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_list_threads" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_lock.3 papi-6.0.0~dfsg/man/man3/PAPI_lock.3 --- papi-5.7.0+dfsg/man/man3/PAPI_lock.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_lock.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_lock" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_lock" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_mh_cache_info_t.3 papi-6.0.0~dfsg/man/man3/PAPI_mh_cache_info_t.3 --- papi-5.7.0+dfsg/man/man3/PAPI_mh_cache_info_t.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_mh_cache_info_t.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_mh_cache_info_t" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_mh_cache_info_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_mh_info_t.3 papi-6.0.0~dfsg/man/man3/PAPI_mh_info_t.3 --- papi-5.7.0+dfsg/man/man3/PAPI_mh_info_t.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_mh_info_t.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_mh_info_t" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_mh_info_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_mh_level_t.3 papi-6.0.0~dfsg/man/man3/PAPI_mh_level_t.3 --- papi-5.7.0+dfsg/man/man3/PAPI_mh_level_t.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_mh_level_t.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_mh_level_t" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_mh_level_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_mh_tlb_info_t.3 papi-6.0.0~dfsg/man/man3/PAPI_mh_tlb_info_t.3 --- papi-5.7.0+dfsg/man/man3/PAPI_mh_tlb_info_t.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_mh_tlb_info_t.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_mh_tlb_info_t" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_mh_tlb_info_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_mpx_info_t.3 papi-6.0.0~dfsg/man/man3/PAPI_mpx_info_t.3 --- papi-5.7.0+dfsg/man/man3/PAPI_mpx_info_t.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_mpx_info_t.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_mpx_info_t" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_mpx_info_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_multiplex_init.3 papi-6.0.0~dfsg/man/man3/PAPI_multiplex_init.3 --- papi-5.7.0+dfsg/man/man3/PAPI_multiplex_init.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_multiplex_init.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_multiplex_init" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_multiplex_init" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_multiplex_option_t.3 papi-6.0.0~dfsg/man/man3/PAPI_multiplex_option_t.3 --- papi-5.7.0+dfsg/man/man3/PAPI_multiplex_option_t.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_multiplex_option_t.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_multiplex_option_t" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_multiplex_option_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_num_cmp_hwctrs.3 papi-6.0.0~dfsg/man/man3/PAPI_num_cmp_hwctrs.3 --- papi-5.7.0+dfsg/man/man3/PAPI_num_cmp_hwctrs.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_num_cmp_hwctrs.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_num_cmp_hwctrs" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_num_cmp_hwctrs" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_num_components.3 papi-6.0.0~dfsg/man/man3/PAPI_num_components.3 --- papi-5.7.0+dfsg/man/man3/PAPI_num_components.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_num_components.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_num_components" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_num_components" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_num_counters.3 papi-6.0.0~dfsg/man/man3/PAPI_num_counters.3 --- papi-5.7.0+dfsg/man/man3/PAPI_num_counters.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_num_counters.3 1970-01-01 00:00:00.000000000 +0000 @@ -1,67 +0,0 @@ -.TH "PAPI_num_counters" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- -.ad l -.nh -.SH NAME -PAPI_num_counters \- -.PP -Get the number of hardware counters available on the system\&. - -.SH SYNOPSIS -.br -.PP -.SH "Detailed Description" -.PP - -.PP -.nf -@par C Interface: -\#include @n -int PAPI_num_counters( void ); - -.fi -.PP -.PP -\fBPostcondition:\fP -.RS 4 -Initializes the library to PAPI_HIGH_LEVEL_INITED if necessary\&. -.RE -.PP -\fBReturn values:\fP -.RS 4 -\fIPAPI_EINVAL\fP \fBpapi\&.h\fP is different from the version used to compile the PAPI library\&. -.br -\fIPAPI_ENOMEM\fP Insufficient memory to complete the operation\&. -.br -\fIPAPI_ESYS\fP A system or C library call failed inside PAPI, see the errno variable\&. -.RE -.PP -\fBExamples:\fP -.RS 4 - -.PP -.nf -* int num_hwcntrs; -* // The installation does not support PAPI -* if ((num_hwcntrs = PAPI_num_counters()) < 0 ) -* handle_error(1); -* // The installation supports PAPI, but has no counters -* if ((num_hwcntrs = PAPI_num_counters()) == 0 ) -* fprintf(stderr,"Info:: This machine does not provide hardware counters\&.\n"); -* - -.fi -.PP -.RE -.PP -\fBPAPI_num_counters()\fP returns the optimal length of the values array for the high level functions\&. This value corresponds to the number of hardware counters supported by the current CPU component\&. -.PP -\fBNote:\fP -.RS 4 -This function only works for the CPU component\&. To determine the number of counters on another component, use the low level \fBPAPI_num_cmp_hwctrs()\fP\&. -.RE -.PP - - -.SH "Author" -.PP -Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_num_events.3 papi-6.0.0~dfsg/man/man3/PAPI_num_events.3 --- papi-5.7.0+dfsg/man/man3/PAPI_num_events.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_num_events.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_num_events" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_num_events" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_num_hwctrs.3 papi-6.0.0~dfsg/man/man3/PAPI_num_hwctrs.3 --- papi-5.7.0+dfsg/man/man3/PAPI_num_hwctrs.3 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_num_hwctrs.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_num_hwctrs" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_num_hwctrs" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_option_t.3 papi-6.0.0~dfsg/man/man3/PAPI_option_t.3 --- papi-5.7.0+dfsg/man/man3/PAPI_option_t.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_option_t.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_option_t" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_option_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_overflow.3 papi-6.0.0~dfsg/man/man3/PAPI_overflow.3 --- papi-5.7.0+dfsg/man/man3/PAPI_overflow.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_overflow.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_overflow" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_overflow" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_perror.3 papi-6.0.0~dfsg/man/man3/PAPI_perror.3 --- papi-5.7.0+dfsg/man/man3/PAPI_perror.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_perror.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_perror" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_perror" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_preload_info_t.3 papi-6.0.0~dfsg/man/man3/PAPI_preload_info_t.3 --- papi-5.7.0+dfsg/man/man3/PAPI_preload_info_t.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_preload_info_t.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_preload_info_t" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_preload_info_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_profil.3 papi-6.0.0~dfsg/man/man3/PAPI_profil.3 --- papi-5.7.0+dfsg/man/man3/PAPI_profil.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_profil.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_profil" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_profil" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_query_event.3 papi-6.0.0~dfsg/man/man3/PAPI_query_event.3 --- papi-5.7.0+dfsg/man/man3/PAPI_query_event.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_query_event.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_query_event" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_query_event" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_query_named_event.3 papi-6.0.0~dfsg/man/man3/PAPI_query_named_event.3 --- papi-5.7.0+dfsg/man/man3/PAPI_query_named_event.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_query_named_event.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_query_named_event" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_query_named_event" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_rate_stop.3 papi-6.0.0~dfsg/man/man3/PAPI_rate_stop.3 --- papi-5.7.0+dfsg/man/man3/PAPI_rate_stop.3 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_rate_stop.3 2020-03-04 15:56:56.000000000 +0000 @@ -0,0 +1,47 @@ +.TH "PAPI_rate_stop" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_rate_stop \- +.PP +Stop a running event set of a rate function\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Interface: \fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br +int \fBPAPI_rate_stop()\fP; +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_ENOEVNT\fP -- The EventSet is not started yet\&. +.br +\fIPAPI_ENOMEM\fP -- Insufficient memory to complete the operation\&. +.RE +.PP +\fBPAPI_rate_stop\fP stops a running event set of a rate function\&. +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_flips_rate()\fP +.PP +\fBPAPI_flops_rate()\fP +.PP +\fBPAPI_ipc()\fP +.PP +\fBPAPI_epc()\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_read.3 papi-6.0.0~dfsg/man/man3/PAPI_read.3 --- papi-5.7.0+dfsg/man/man3/PAPI_read.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_read.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_read" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_read" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_read_counters.3 papi-6.0.0~dfsg/man/man3/PAPI_read_counters.3 --- papi-5.7.0+dfsg/man/man3/PAPI_read_counters.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_read_counters.3 1970-01-01 00:00:00.000000000 +0000 @@ -1,90 +0,0 @@ -.TH "PAPI_read_counters" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- -.ad l -.nh -.SH NAME -PAPI_read_counters \- -.PP -Read and reset counters\&. - -.SH SYNOPSIS -.br -.PP -.SH "Detailed Description" -.PP - -.PP -.nf -@par C Interface: -\#include @n -int PAPI_read_counters( long long *values, int array_len ); - -.fi -.PP -.PP -\fBParameters:\fP -.RS 4 -\fI*values\fP an array to hold the counter values of the counting events -.br -\fIarry_len\fP the number of items in the *events array -.RE -.PP -\fBPrecondition:\fP -.RS 4 -These calls assume an initialized PAPI library and a properly added event set\&. -.RE -.PP -\fBPostcondition:\fP -.RS 4 -The counters are reset and left running after the call\&. -.RE -.PP -\fBReturn values:\fP -.RS 4 -\fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. -.br -\fIPAPI_ESYS\fP A system or C library call failed inside PAPI, see the errno variable\&. -.RE -.PP -\fBPAPI_read_counters()\fP copies the event counters into the array *values\&. -.PP -.PP -.nf -do_100events(); -if ( PAPI_read_counters( values, num_hwcntrs ) != PAPI_OK ) - handlw_error(1); -// values[0] now equals 100 -do_100events(); -if ( PAPI_accum_counters( values, num_hwcntrs ) != PAPI_OK ) - handle_error(1); -// values[0] now equals 200 -values[0] = -100; -do_100events(); -if ( PAPI_accum_counters(values, num_hwcntrs ) != PAPI_OK ) - handle_error(); -// values[0] now equals 0 - * -.fi -.PP -.PP -\fBSee Also:\fP -.RS 4 -\fBPAPI_set_opt()\fP \fBPAPI_start_counters()\fP -.RE -.PP -\fBFortran Interface:\fP -.RS 4 -#include 'fpapi\&.h' -.br - PAPIF_read_counters( C_LONG_LONG(*) values, C_INT array_len, C_INT check ) -.RE -.PP -\fBSee Also:\fP -.RS 4 -\fBPAPI_read_counters\fP -.RE -.PP - - -.SH "Author" -.PP -Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_read_ts.3 papi-6.0.0~dfsg/man/man3/PAPI_read_ts.3 --- papi-5.7.0+dfsg/man/man3/PAPI_read_ts.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_read_ts.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_read_ts" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_read_ts" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_register_thread.3 papi-6.0.0~dfsg/man/man3/PAPI_register_thread.3 --- papi-5.7.0+dfsg/man/man3/PAPI_register_thread.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_register_thread.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_register_thread" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_register_thread" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_remove_event.3 papi-6.0.0~dfsg/man/man3/PAPI_remove_event.3 --- papi-5.7.0+dfsg/man/man3/PAPI_remove_event.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_remove_event.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_remove_event" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_remove_event" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_remove_events.3 papi-6.0.0~dfsg/man/man3/PAPI_remove_events.3 --- papi-5.7.0+dfsg/man/man3/PAPI_remove_events.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_remove_events.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_remove_events" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_remove_events" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_remove_named_event.3 papi-6.0.0~dfsg/man/man3/PAPI_remove_named_event.3 --- papi-5.7.0+dfsg/man/man3/PAPI_remove_named_event.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_remove_named_event.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_remove_named_event" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_remove_named_event" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_reset.3 papi-6.0.0~dfsg/man/man3/PAPI_reset.3 --- papi-5.7.0+dfsg/man/man3/PAPI_reset.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_reset.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_reset" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_reset" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_set_cmp_domain.3 papi-6.0.0~dfsg/man/man3/PAPI_set_cmp_domain.3 --- papi-5.7.0+dfsg/man/man3/PAPI_set_cmp_domain.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_set_cmp_domain.3 2020-03-04 15:56:56.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_set_cmp_domain" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_set_cmp_domain" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_set_cmp_granularity.3 papi-6.0.0~dfsg/man/man3/PAPI_set_cmp_granularity.3 --- papi-5.7.0+dfsg/man/man3/PAPI_set_cmp_granularity.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_set_cmp_granularity.3 2020-03-04 15:56:57.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_set_cmp_granularity" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_set_cmp_granularity" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_set_debug.3 papi-6.0.0~dfsg/man/man3/PAPI_set_debug.3 --- papi-5.7.0+dfsg/man/man3/PAPI_set_debug.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_set_debug.3 2020-03-04 15:56:57.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_set_debug" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_set_debug" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_set_domain.3 papi-6.0.0~dfsg/man/man3/PAPI_set_domain.3 --- papi-5.7.0+dfsg/man/man3/PAPI_set_domain.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_set_domain.3 2020-03-04 15:56:57.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_set_domain" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_set_domain" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_set_granularity.3 papi-6.0.0~dfsg/man/man3/PAPI_set_granularity.3 --- papi-5.7.0+dfsg/man/man3/PAPI_set_granularity.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_set_granularity.3 2020-03-04 15:56:57.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_set_granularity" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_set_granularity" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_set_multiplex.3 papi-6.0.0~dfsg/man/man3/PAPI_set_multiplex.3 --- papi-5.7.0+dfsg/man/man3/PAPI_set_multiplex.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_set_multiplex.3 2020-03-04 15:56:57.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_set_multiplex" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_set_multiplex" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_set_opt.3 papi-6.0.0~dfsg/man/man3/PAPI_set_opt.3 --- papi-5.7.0+dfsg/man/man3/PAPI_set_opt.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_set_opt.3 2020-03-04 15:56:57.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_set_opt" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_set_opt" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_set_thr_specific.3 papi-6.0.0~dfsg/man/man3/PAPI_set_thr_specific.3 --- papi-5.7.0+dfsg/man/man3/PAPI_set_thr_specific.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_set_thr_specific.3 2020-03-04 15:56:57.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_set_thr_specific" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_set_thr_specific" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME @@ -44,7 +44,7 @@ .PP .nf int ret; -HighLevelInfo *state = NULL; +RateInfo *state = NULL; ret = PAPI_thread_init(pthread_self); if (ret != PAPI_OK) handle_error(ret); @@ -52,9 +52,9 @@ ret = PAPI_get_thr_specific(PAPI_USR1_TLS, (void *) &state); if (ret != PAPI_OK || state == NULL) { - state = (HighLevelInfo *) malloc(sizeof(HighLevelInfo)); + state = (RateInfo *) malloc(sizeof(RateInfo)); if (state == NULL) return (PAPI_ESYS); - memset(state, 0, sizeof(HighLevelInfo)); + memset(state, 0, sizeof(RateInfo)); state->EventSet = PAPI_NULL; ret = PAPI_create_eventset(&state->EventSet); if (ret != PAPI_OK) return (PAPI_ESYS); diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_shlib_info_t.3 papi-6.0.0~dfsg/man/man3/PAPI_shlib_info_t.3 --- papi-5.7.0+dfsg/man/man3/PAPI_shlib_info_t.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_shlib_info_t.3 2020-03-04 15:56:57.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_shlib_info_t" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_shlib_info_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_shutdown.3 papi-6.0.0~dfsg/man/man3/PAPI_shutdown.3 --- papi-5.7.0+dfsg/man/man3/PAPI_shutdown.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_shutdown.3 2020-03-04 15:56:57.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_shutdown" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_shutdown" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_sprofil.3 papi-6.0.0~dfsg/man/man3/PAPI_sprofil.3 --- papi-5.7.0+dfsg/man/man3/PAPI_sprofil.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_sprofil.3 2020-03-04 15:56:57.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_sprofil" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_sprofil" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_sprofil_t.3 papi-6.0.0~dfsg/man/man3/PAPI_sprofil_t.3 --- papi-5.7.0+dfsg/man/man3/PAPI_sprofil_t.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_sprofil_t.3 2020-03-04 15:56:57.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_sprofil_t" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_sprofil_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_start.3 papi-6.0.0~dfsg/man/man3/PAPI_start.3 --- papi-5.7.0+dfsg/man/man3/PAPI_start.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_start.3 2020-03-04 15:56:57.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_start" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_start" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_start_counters.3 papi-6.0.0~dfsg/man/man3/PAPI_start_counters.3 --- papi-5.7.0+dfsg/man/man3/PAPI_start_counters.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_start_counters.3 1970-01-01 00:00:00.000000000 +0000 @@ -1,65 +0,0 @@ -.TH "PAPI_start_counters" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- -.ad l -.nh -.SH NAME -PAPI_start_counters \- -.PP -Start counting hardware events\&. - -.SH SYNOPSIS -.br -.PP -.SH "Detailed Description" -.PP - -.PP -.nf -@par C Interface: -\#include @n -int PAPI_start_counters( int *events, int array_len ); - -.fi -.PP -.PP -\fBParameters:\fP -.RS 4 -\fI*events\fP an array of codes for events such as PAPI_INT_INS or a native event code -.br -\fIarray_len\fP the number of items in the *events array -.RE -.PP -\fBReturn values:\fP -.RS 4 -\fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. -.br -\fIPAPI_EISRUN\fP Counters have already been started, you must call \fBPAPI_stop_counters()\fP before you call this function again\&. -.br -\fIPAPI_ESYS\fP A system or C library call failed inside PAPI, see the errno variable\&. -.br -\fIPAPI_ENOMEM\fP Insufficient memory to complete the operation\&. -.br -\fIPAPI_ECNFLCT\fP The underlying counter hardware cannot count this event and other events in the EventSet simultaneously\&. -.br -\fIPAPI_ENOEVNT\fP The PAPI preset is not available on the underlying hardware\&. -.RE -.PP -\fBPAPI_start_counters()\fP starts counting the events named in the *events array\&. This function cannot be called if the counters have already been started\&. The user must call \fBPAPI_stop_counters()\fP to stop the events explicitly if he/she wants to call this function again\&. It is the user's responsibility to choose events that can be counted simultaneously by reading the vendor's documentation\&. The length of the *events array should be no longer than the value returned by \fBPAPI_num_counters()\fP\&. -.PP -.PP -.nf -if( PAPI_start_counters( Events, num_hwcntrs ) != PAPI_OK ) - handle_error(1); - * -.fi -.PP -.PP -\fBSee Also:\fP -.RS 4 -\fBPAPI_stop_counters()\fP \fBPAPI_add_event()\fP \fBPAPI_create_eventset()\fP -.RE -.PP - - -.SH "Author" -.PP -Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_state.3 papi-6.0.0~dfsg/man/man3/PAPI_state.3 --- papi-5.7.0+dfsg/man/man3/PAPI_state.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_state.3 2020-03-04 15:56:57.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_state" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_state" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_stop.3 papi-6.0.0~dfsg/man/man3/PAPI_stop.3 --- papi-5.7.0+dfsg/man/man3/PAPI_stop.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_stop.3 2020-03-04 15:56:57.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_stop" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_stop" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_stop_counters.3 papi-6.0.0~dfsg/man/man3/PAPI_stop_counters.3 --- papi-5.7.0+dfsg/man/man3/PAPI_stop_counters.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_stop_counters.3 1970-01-01 00:00:00.000000000 +0000 @@ -1,69 +0,0 @@ -.TH "PAPI_stop_counters" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- -.ad l -.nh -.SH NAME -PAPI_stop_counters \- -.PP -Stop counting hardware events and reset values to zero\&. - -.SH SYNOPSIS -.br -.PP -.SH "Detailed Description" -.PP - -.PP -.nf -@par C Interface: -\#include @n -int PAPI_stop_counters( long long *values, int array_len ); - -.fi -.PP -.PP -\fBParameters:\fP -.RS 4 -\fI*values\fP an array where to put the counter values -.br -\fIarray_len\fP the number of items in the *values array -.RE -.PP -\fBPostcondition:\fP -.RS 4 -After this function is called, the values are reset to zero\&. -.RE -.PP -\fBReturn values:\fP -.RS 4 -\fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. -.br -\fIPAPI_ENOTRUN\fP The EventSet is not started yet\&. -.br -\fIPAPI_ENOEVST\fP The EventSet has not been added yet\&. -.RE -.PP -The \fBPAPI_stop_counters()\fP function stops the counters and copies the counts into the *values array\&. The counters must have been started by a previous call to \fBPAPI_start_counters()\fP\&. -.PP -.PP -.nf -int Events[2] = { PAPI_TOT_CYC, PAPI_TOT_INS }; -long long values[2]; -if ( PAPI_start_counters( Events, 2 ) != PAPI_OK ) - handle_error(1); -your_slow_code(); -if ( PAPI_stop_counters( values, 2 ) != PAPI_OK ) - handle_error(1); - * -.fi -.PP -.PP -\fBSee Also:\fP -.RS 4 -\fBPAPI_read_counters()\fP \fBPAPI_start_counters()\fP \fBPAPI_set_opt()\fP -.RE -.PP - - -.SH "Author" -.PP -Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_strerror.3 papi-6.0.0~dfsg/man/man3/PAPI_strerror.3 --- papi-5.7.0+dfsg/man/man3/PAPI_strerror.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_strerror.3 2020-03-04 15:56:57.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_strerror" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_strerror" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_thread_id.3 papi-6.0.0~dfsg/man/man3/PAPI_thread_id.3 --- papi-5.7.0+dfsg/man/man3/PAPI_thread_id.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_thread_id.3 2020-03-04 15:56:57.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_thread_id" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_thread_id" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_thread_init.3 papi-6.0.0~dfsg/man/man3/PAPI_thread_init.3 --- papi-5.7.0+dfsg/man/man3/PAPI_thread_init.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_thread_init.3 2020-03-04 15:56:57.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_thread_init" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_thread_init" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_unlock.3 papi-6.0.0~dfsg/man/man3/PAPI_unlock.3 --- papi-5.7.0+dfsg/man/man3/PAPI_unlock.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_unlock.3 2020-03-04 15:56:57.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_unlock" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_unlock" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_unregister_thread.3 papi-6.0.0~dfsg/man/man3/PAPI_unregister_thread.3 --- papi-5.7.0+dfsg/man/man3/PAPI_unregister_thread.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_unregister_thread.3 2020-03-04 15:56:57.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_unregister_thread" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_unregister_thread" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/PAPI_write.3 papi-6.0.0~dfsg/man/man3/PAPI_write.3 --- papi-5.7.0+dfsg/man/man3/PAPI_write.3 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/PAPI_write.3 2020-03-04 15:56:57.000000000 +0000 @@ -1,4 +1,4 @@ -.TH "PAPI_write" 3 "Fri Feb 22 2019" "Version 5.7.0.0" "PAPI" \" -*- nroff -*- +.TH "PAPI_write" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME diff -Nru papi-5.7.0+dfsg/man/man3/RateInfo.3 papi-6.0.0~dfsg/man/man3/RateInfo.3 --- papi-5.7.0+dfsg/man/man3/RateInfo.3 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/RateInfo.3 2020-03-04 15:56:57.000000000 +0000 @@ -0,0 +1,43 @@ +.TH "RateInfo" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +RateInfo \- +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "int \fBEventSet\fP" +.br +.ti -1c +.RI "int \fBevent_0\fP" +.br +.ti -1c +.RI "short int \fBrunning\fP" +.br +.ti -1c +.RI "long long \fBlast_real_time\fP" +.br +.ti -1c +.RI "long long \fBlast_proc_time\fP" +.br +.in -1c +.SH "Field Documentation" +.PP +.SS "int RateInfo::event_0" +first event of the eventset +.SS "int RateInfo::EventSet" +EventSet of the thread +.SS "long long RateInfo::last_proc_time" +Previous value of processor time +.SS "long long RateInfo::last_real_time" +Previous value of real time +.SS "short int RateInfo::running" +STOP, FLIP, FLOP, IPC or EPC + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/reads_t.3 papi-6.0.0~dfsg/man/man3/reads_t.3 --- papi-5.7.0+dfsg/man/man3/reads_t.3 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/reads_t.3 2020-03-04 15:56:57.000000000 +0000 @@ -0,0 +1,29 @@ +.TH "reads_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +reads_t \- +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "struct reads * \fBnext\fP" +.br +.ti -1c +.RI "struct reads * \fBprev\fP" +.br +.ti -1c +.RI "long_long \fBvalue\fP" +.br +.in -1c +.SH "Field Documentation" +.PP +.SS "long_long reads_t::value" +Event value + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/regions_t.3 papi-6.0.0~dfsg/man/man3/regions_t.3 --- papi-5.7.0+dfsg/man/man3/regions_t.3 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/regions_t.3 2020-03-04 15:56:57.000000000 +0000 @@ -0,0 +1,34 @@ +.TH "regions_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +regions_t \- +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "char * \fBregion\fP" +.br +.ti -1c +.RI "struct regions * \fBnext\fP" +.br +.ti -1c +.RI "struct regions * \fBprev\fP" +.br +.ti -1c +.RI "\fBvalue_t\fP \fBvalues\fP []" +.br +.in -1c +.SH "Field Documentation" +.PP +.SS "char* regions_t::region" +Region name +.SS "\fBvalue_t\fP regions_t::values[]" +Array of event values based on current eventset + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/threads_t.3 papi-6.0.0~dfsg/man/man3/threads_t.3 --- papi-5.7.0+dfsg/man/man3/threads_t.3 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/threads_t.3 2020-03-04 15:56:57.000000000 +0000 @@ -0,0 +1,28 @@ +.TH "threads_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +threads_t \- +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "unsigned long \fBkey\fP" +.br +.ti -1c +.RI "\fBregions_t\fP * \fBvalue\fP" +.br +.in -1c +.SH "Field Documentation" +.PP +.SS "unsigned long threads_t::key" +Thread ID +.SS "\fBregions_t\fP* threads_t::value" +List of regions + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/man/man3/value_t.3 papi-6.0.0~dfsg/man/man3/value_t.3 --- papi-5.7.0+dfsg/man/man3/value_t.3 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/man/man3/value_t.3 2020-03-04 15:56:57.000000000 +0000 @@ -0,0 +1,33 @@ +.TH "value_t" 3 "Thu Feb 27 2020" "Version 6.0.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +value_t \- +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "long_long \fBoffset\fP" +.br +.ti -1c +.RI "long_long \fBtotal\fP" +.br +.ti -1c +.RI "\fBreads_t\fP * \fBread_values\fP" +.br +.in -1c +.SH "Field Documentation" +.PP +.SS "long_long value_t::offset" +Event value for region_begin +.SS "\fBreads_t\fP* value_t::read_values" +List of read event values inside a region +.SS "long_long value_t::total" +Event value for region_end - region_begin + previous value + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff -Nru papi-5.7.0+dfsg/papi.spec papi-6.0.0~dfsg/papi.spec --- papi-5.7.0+dfsg/papi.spec 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/papi.spec 2020-03-04 15:56:57.000000000 +0000 @@ -1,6 +1,6 @@ Summary: Performance Application Programming Interface Name: papi -Version: 5.7.0.0 +Version: 6.0.0.0 Release: 1%{?dist} License: BSD Group: Development/System diff -Nru papi-5.7.0+dfsg/README papi-6.0.0~dfsg/README --- papi-5.7.0+dfsg/README 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/README 1970-01-01 00:00:00.000000000 +0000 @@ -1,83 +0,0 @@ -PAPI: Performance Application Programming Interface -=================================================== - -** Innovative Computing Lab ** - -** University of Tennessee, Knoxville, TN ** - -*** -[TOC] -*** - -About ------ - -PAPI provides the tool designer and application engineer with a -consistent interface and methodology for use of the performance -counter hardware found in most major microprocessors. PAPI enables -software engineers to see, in near real time, the relation between -software performance and processor events. - -In addition, PAPI provides access to a collection of components that -expose performance measurement opportunites across the hardware and -software stack. - - -Getting Started ---------------- - -If this is the first file you've opened in the PAPI tree, we'll try to give you -a few tips on where to go from here. - -* Read the license found in LICENSE.txt. It's pretty short, and not very - restrictive, but it'll give you an idea of what you can and can't do with the - PAPI sources. -* Visit the website at: - There you can find late-breaking news that may be more current than in these - files. You can also find documentation in a greater variety of formats than - in the papi/doc/ directory. -* Sign up for the PAPI mailing list(s). Instructions are on our home page. -* Read the RELEASENOTES.txt file to get an idea of what's new in the current release. - - -Installing PAPI ---------------- - -To install PAPI on your system: - -* Find the section in INSTALL.txt that pertains to your hardware and operating - system. -* Follow the directions to install required components and build the PAPI - libraries. -* Run the test suite when you are finished to verify that everything went ok. - NOTE: Although we make every attempt to get all tests to PASS or SKIP on all - platforms, there are occasional instances of FAILures due to excessively - tight compliance thresholds or platform idiosyncrasies. Don't panic if one - or two tests FAIL. Contact us with complete output and we'll see what we can do. - - -Using PAPI ----------- - -To use PAPI in your own programs: - -* Read the PAPI Overview found at: - http://icl.utk.edu/projects/papi/wiki/Main_Page. -* Try out the utility programs in /utils to see what's in your system. -* Try a test program. Source for a number of tests in both C and FORTRAN is - available in the src/tests/ and src/ftests/ directories. Find a program - that's similar to what you want to do. Make sure you can build it and run it. -* Write a test program of your own, exercising the PAPI events and features of - interest to you. -* Go for broke. Fold PAPI calls into your sources and see what you can learn. - - -Bugs and Questions ------------------- - -* Visit our FAQ at: - or read a snapshot of the FAQ in papi/PAPI_FAQ.html -* Subscribe to the PAPI mailing list at: - -* Read historical postings to the list. -* Post questions to the list. \ No newline at end of file diff -Nru papi-5.7.0+dfsg/README.md papi-6.0.0~dfsg/README.md --- papi-5.7.0+dfsg/README.md 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/README.md 2020-03-04 15:56:56.000000000 +0000 @@ -1,97 +1,127 @@ -PAPI: Performance Application Programming Interface -=================================================== +**[PAPI: The Performance Application Programming Interface](https://icl.utk.edu/exa-papi/)** -** Innovative Computing Lab ** +**[Innovative Computing Laboratory (ICL)](http://www.icl.utk.edu/)** + +**University of Tennessee, Knoxville (UTK)** -** University of Tennessee, Knoxville, TN ** *** [TOC] *** -About ------ +# About + +The Performance Application Programming Interface (PAPI) provides tool +designers and application engineers with a consistent interface and methodology +for the use of low-level performance counter hardware found across the entire +compute system (i.e. CPUs, GPUs, on/off-chip memory, interconnects, I/O system, +energy/power, etc.). PAPI enables users to see, in near real time, the +relations between software performance and hardware events across the entire +computer system. + +[The ECP Exa-PAPI project](https://icl.utk.edu/exa-papi/) builds on the latest +PAPI project and extends it with: + +* Performance counter monitoring capabilities for new and advanced ECP + hardware, and software technologies. +* Fine-grained power management support. +* Functionality for performance counter analysis at "task granularity" for + task-based runtime systems. +* "Software-defined Events" that originate from the ECP software stack and are + currently treated as black boxes (i.e., communication libraries, math + libraries, task-based runtime systems, etc.) + +The objective is to enable monitoring of both types of performance +events---hardware- and software-related events---in a uniform way, through one +consistent PAPI interface. Third-party tools and application developers will +have to handle only a single hook to PAPI in order to access all hardware +performance counters in a system, including the new software-defined events. -PAPI provides the tool designer and application engineer with a -consistent interface and methodology for use of the performance -counter hardware found in most major microprocessors. PAPI enables -software engineers to see, in near real time, the relation between -software performance and processor events. +*** -In addition, PAPI provides access to a collection of components that -expose performance measurement opportunites across the hardware and -software stack. +# Documentation -Getting Started ---------------- +* [PAPI Wiki](https://bitbucket.org/icl/papi/wiki/) is the main documentation for HOWTOs, Supported Architectures, PAPI Releases. +* [PAPI Papers and Presentations](https://www.icl.utk.edu/view/biblio/project/papi?items_per_page=All) -If this is the first file you've opened in the PAPI tree, we'll try to give you -a few tips on where to go from here. -* Read the license found in LICENSE.txt. It's pretty short, and not very - restrictive, but it'll give you an idea of what you can and can't do with the - PAPI sources. -* Visit the website at: - There you can find late-breaking news that may be more current than in these - files. You can also find documentation in a greater variety of formats than - in the papi/doc/ directory. -* Sign up for the PAPI mailing list(s). Instructions are on our home page. -* Read the RELEASENOTES.txt file to get an idea of what's new in the current release. +*** -Downloading PAPI ----------------- +# Getting Help -* Clone the PAPI repository the first time with the following command: - `> git clone https://bitbucket.org/icl/papi.git` +* Visit our FAQ at: + or read a snapshot of the FAQ in papi/PAPI_FAQ.html +* For assistance with PAPI, email ptools-perfapi@icl.utk.edu. +* You can also join the PAPI User Google group by going to + + to read historical postings to the list. -* This creates a complete copy of the papi git repository on your computer - in a folder called 'papi'. +*** -* To make sure your copy is up to date with the repository: - `> cd papi` - `> git pull https://bitbucket.org/icl/papi.git` +# Contributing -Installing PAPI ---------------- +The PAPI project welcomes contributions from new developers. Contributions can +be offered through the standard Bitbucket pull request model. We strongly +encourage you to coordinate large contributions with the PAPI development team +early in the process. -To install PAPI on your system: +**For timely pull request reviews and feedback, it is important to submit +one (1) pull request per feature / bug fix.** -* Find the section in INSTALL.txt that pertains to your hardware and operating - system. -* Follow the directions to install required components and build the PAPI - libraries. -* Run the test suite when you are finished to verify that everything went ok. - NOTE: Although we make every attempt to get all tests to PASS or SKIP on all - platforms, there are occasional instances of FAILures due to excessively - tight compliance thresholds or platform idiosyncrasies. Don't panic if one - or two tests FAIL. Contact us with complete output and we'll see what we can do. +In order to create a pull request on a public read-only repo, +you will need to do the following: +1. Fork the PAPI repo (click "+" on the left and "Fork this repository"). -Using PAPI ----------- +2. Clone it. -To use PAPI in your own programs: +3. Make your changes and push them. -* Read the PAPI Overview found at: - http://icl.utk.edu/projects/papi/wiki/Main_Page. -* Try out the utility programs in /utils to see what's in your system. -* Try a test program. Source for a number of tests in both C and FORTRAN is - available in the src/tests/ and src/ftests/ directories. Find a program - that's similar to what you want to do. Make sure you can build it and run it. -* Write a test program of your own, exercising the PAPI events and features of - interest to you. -* Go for broke. Fold PAPI calls into your sources and see what you can learn. +4. Click "create pull request" from your repo (not the PAPI repo). +*** -Bugs and Questions ------------------- -* Visit our FAQ at: - or read a snapshot of the FAQ in papi/PAPI_FAQ.html -* Subscribe to the PAPI mailing list at: - -* Read historical postings to the list. -* Post questions to the list. \ No newline at end of file +# Resources + +* Visit the [Exa-PAPI website](https://icl.utk.edu/exa-papi/) to find out more + about ongoing PAPI and + [PAPI++](https://www.exascaleproject.org/papi-as-de-facto-standard-interface-for-performance-event-monitoring-at-the-exascale/) + developments and research. +* Visit the [PAPI website (retired)](https://icl.utk.edu/papi/) for basic + information about PAPI. +* Visit the [ECP website](https://www.exascaleproject.org/) to find out more + about the DOE Exascale Computing Initiative. + +*** + + +# License + + Copyright (c) 2019, University of Tennessee + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the University of Tennessee nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL UNIVERSITY OF TENNESSEE BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff -Nru papi-5.7.0+dfsg/RELEASENOTES.txt papi-6.0.0~dfsg/RELEASENOTES.txt --- papi-5.7.0+dfsg/RELEASENOTES.txt 2019-03-04 19:56:22.000000000 +0000 +++ papi-6.0.0~dfsg/RELEASENOTES.txt 2020-03-04 15:56:56.000000000 +0000 @@ -6,6 +6,72 @@ =============================================================================== +PAPI 6.0.0 RELEASE NOTES 29 Jan 2020 +=============================================================================== + +PAPI 6.0 is now available. This release includes a new API for SDEs (Software +Defined Events), a major revision of the 'high-level API', and several new +components, including ROCM and ROCM_SMI (for AMD GPUs), powercap_ppc and +sensors_ppc (for IBM Power9 and later), SDE, and the IO component (exposes I/O +statistics exported by the Linux kernel). Furthermore, PAPI 6.0 ships CAT, a +new Counter Analysis Toolkit that assists with native performance counter +disambiguation through micro-benchmarks. + +PAPI 6.0 is now available. This release includes several new components; +including ROCM and ROCM_SMI (for AMD GPUs), SDE (Software Defined Events), +and the IO component (exposes I/O statistics exported by the Linux kernel). + +For specific and detailed information on changes made for this release, see +ChangeLogP600.txt for filenames or keywords of interest and change summaries, +or go directly to the PAPI git repository. + +Major Changes + +* Added the rocm component to support performance counters on AMD GPUs. +* Added the rocm_smi component; SMI is System Management Interface to monitor + power usage on AMD GPUs, which is also writeable by the user, e.g. to reduce + power consumption on non-critical operations. +* Added 'io' component to expose I/O statistics exported by the Linux kernel + (/proc/self/io). +* Added 'SDE' component, Software Defined Events, which allows HPC software + layers to expose internal performance-critical behavior via Software Defined + Events (SDEs) through the PAPI interface. +* Added 'SDE API' to register performance-critical events that originate from + HPC software layers, and which are recognized as 'PAPI counters' and, thus, + can be monitored with the standard PAPI interface. +* Added powercap_ppc component to support monitoring and capping of power usage + on IBM PowerPC architectures (Power9 and later) using the powercap interface + exposed through the Linux kernel. +* Added 'sensors_ppc' component to support monitoring of system metrics on IBM + PowerPC architectures (Power9 and later) using the opal/exports sysfs + interface. +* Retired infiniband_umad component, it is superseded by infiniband. +* Revived PAPI's 'high-level API' to make it more intuitive and effective for + novice users and quick event reporting. +* Added 'counter_analysis_toolkit' sub-directory (CAT): A tool to assist with + native performance counter disambiguation through micro-benchmarks, which are + used to probe different important aspects of modern CPUs, to aid the + classification of native performance events. + +Other Changes + +* Standardized our environment variables and implemented a simplified, + unified approach for specifying libraries necessary for components, with + overrides possible for special circumstances. Eliminated component level + 'configure' requirements. +* Corrected TLS issues (Thread Local Storage) and race conditions. +* Several bug fixes, documentation fixes and enhancements, improvements to + README files for user instruction and code comments. + +Acknowledgements: This release is the result of efforts from many people. The +PAPI team would like to express special Thanks to Vince Weaver, Stephane +Eranian (for libpfm4), William Cohen, Steve Kaufmann, Phil Mucci, Kevin Huck, +Yunqiang Su, Carl Love, Andreas Beckmann, Al Grant and Evgeny Shcherbakov. + +The PAPI release can be downloaded from http://icl.cs.utk.edu/papi/software. + + +=============================================================================== PAPI 5.7.0 RELEASE NOTES 4 Mar 2019 =============================================================================== @@ -40,7 +106,7 @@ power-management (reporting and setting) for NVIDIA GPUs. * Re-implementation of the “cuda†component to better handle GPU events, metrics (values computed from multiple events), and NVLink events, each of - which have differently handling requirements and may require separate read + which have different handling requirements and may require separate read groupings. * Enhanced NVLink support, and added additional tests and example code for NVLink (high-speed GPU interconnect). diff -Nru papi-5.7.0+dfsg/src/components/appio/tests/appio_test_blocking.c papi-6.0.0~dfsg/src/components/appio/tests/appio_test_blocking.c --- papi-5.7.0+dfsg/src/components/appio/tests/appio_test_blocking.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/appio/tests/appio_test_blocking.c 2020-03-04 15:56:57.000000000 +0000 @@ -22,7 +22,7 @@ #define NUM_EVENTS 12 int main(int argc, char** argv) { - int Events[NUM_EVENTS]; + int EventSet = PAPI_NULL; const char* names[NUM_EVENTS] = {"OPEN_CALLS", "OPEN_FDS", "READ_CALLS", "READ_BYTES", "READ_USEC", "READ_ERR", "READ_INTERRUPTED", "READ_WOULD_BLOCK", "WRITE_CALLS","WRITE_BYTES","WRITE_USEC", "WRITE_WOULD_BLOCK"}; long long values[NUM_EVENTS]; @@ -35,20 +35,32 @@ exit(1); } + /* Create the Event Set */ + if (PAPI_create_eventset(&EventSet) != PAPI_OK) { + fprintf(stderr, "Error creating event set\n"); + exit(2); + } + if (!TESTS_QUIET) fprintf(stderr, "This program will read from stdin and echo it to stdout\n"); int retval; int e; + int event_code; for (e=0; e #include @@ -22,7 +22,7 @@ #define NUM_EVENTS 12 int main(int argc, char** argv) { - int Events[NUM_EVENTS]; + int EventSet = PAPI_NULL; const char* names[NUM_EVENTS] = {"OPEN_CALLS", "OPEN_FDS", "READ_CALLS", "READ_BYTES", "READ_USEC", "READ_ERR", "READ_INTERRUPTED", "READ_WOULD_BLOCK", "WRITE_CALLS","WRITE_BYTES","WRITE_USEC","WRITE_WOULD_BLOCK"}; long long values[NUM_EVENTS]; @@ -37,21 +37,33 @@ exit(1); } + /* Create the Event Set */ + if (PAPI_create_eventset(&EventSet) != PAPI_OK) { + fprintf(stderr, "Error creating event set\n"); + exit(2); + } + int fdin; if (!TESTS_QUIET) printf("This program will read %s and write it to /dev/null\n", infile); int retval; int e; + int event_code; for (e=0; ed_name, i); if (retlen <= 0 || PAPI_MAX_STR_LEN <= retlen) { SUBDBG("Unable to generate name %s:in%i_input\n", hwmonx->d_name, i); + closedir(d); return ( PAPI_EINVAL ); } @@ -230,6 +231,8 @@ retlen = snprintf(name, PAPI_MAX_STR_LEN, "%s:temp%i_input", hwmonx->d_name, i); if (retlen <= 0 || PAPI_MAX_STR_LEN <= retlen) { SUBDBG("Unable to generate name %s:temp%i_input\n", hwmonx->d_name, i); + closedir(d); + closedir(dir); return ( PAPI_EINVAL ); } @@ -269,6 +272,8 @@ retlen = snprintf(filename, PAPI_MAX_STR_LEN, "%s/fan%d_input", path,i); if (retlen <= 0 || PAPI_MAX_STR_LEN <= retlen) { SUBDBG("Unable to generate filename %s/fan%d_input\n", path,i); + closedir(d); + closedir(dir); return ( PAPI_EINVAL ); } @@ -279,6 +284,8 @@ retlen = snprintf(name, PAPI_MAX_STR_LEN, "%s:fan%i_input", hwmonx->d_name, i); if (retlen <= 0 || PAPI_MAX_STR_LEN <= retlen) { SUBDBG("Unable to generate name %s:fan%i_input\n", hwmonx->d_name, i); + closedir(d); + closedir(dir); return ( PAPI_EINVAL ); } diff -Nru papi-5.7.0+dfsg/src/components/cuda/linux-cuda.c papi-6.0.0~dfsg/src/components/cuda/linux-cuda.c --- papi-5.7.0+dfsg/src/components/cuda/linux-cuda.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/cuda/linux-cuda.c 2020-03-04 15:56:57.000000000 +0000 @@ -18,13 +18,16 @@ //----------------------------------------------------------------------------- // A basic assumption here (and in other components) is that we put as much of // the computational load of this component into the initialization stage and -// the "adding" stage for events (update_control), becuase users are likely not +// the "adding" stage for events (update_control), because users are likely not // measuring performance at those times, but may well be reading these events // when performance matters. So we want the read operation lightweight, but we // can remember tables and such at startup and when servicing a PAPI_add(). //----------------------------------------------------------------------------- #include +#include + +// NOTE: We can't use extended directories; these include files have includes. #include #include @@ -39,36 +42,36 @@ // #define PAPICUDA_KERNEL_REPLAY_MODE // w to punctuate an embedded quoted question within a declarative sentence? [duplicate] -// Contains device list, pointer to device desciption, and the list of all available events. -typedef struct papicuda_context { +// Contains device list, pointer to device description, and the list of all available events. +typedef struct cuda_context { int deviceCount; - struct papicuda_device_desc *deviceArray; + struct cuda_device_desc *deviceArray; uint32_t availEventSize; CUpti_ActivityKind *availEventKind; int *availEventDeviceNum; uint32_t *availEventIDArray; uint32_t *availEventIsBeingMeasuredInEventset; - struct papicuda_name_desc *availEventDesc; -} papicuda_context_t; + struct cuda_name_desc *availEventDesc; +} cuda_context_t; /* Store the name and description for an event */ -typedef struct papicuda_name_desc { +typedef struct cuda_name_desc { char name[PAPI_MAX_STR_LEN]; char description[PAPI_2MAX_STR_LEN]; uint16_t numMetricEvents; // 0=event, if a metric, size of metricEvents array below. CUpti_EventID *metricEvents; // NULL for cuda events, an array of member events if a metric. CUpti_MetricValueKind MV_Kind; // eg. % or counter or rate, etc. Needed to compute metric from individual events. -} papicuda_name_desc_t; +} cuda_name_desc_t; /* For a device, store device description */ -typedef struct papicuda_device_desc { +typedef struct cuda_device_desc { CUdevice cuDev; int deviceNum; char deviceName[PAPI_MIN_STR_LEN]; uint32_t maxDomains; /* number of domains per device */ CUpti_EventDomainID *domainIDArray; /* Array[maxDomains] of domain IDs */ uint32_t *domainIDNumEvents; /* Array[maxDomains] of num of events in that domain */ -} papicuda_device_desc_t; +} cuda_device_desc_t; // For each active cuda context (one measuring something) we also track the // cuda device number it is on. We track in separate arrays for each reading @@ -76,9 +79,9 @@ // these are then arithmetically combined to produce the metric value. The // allEvents array stores all the actual events; i.e. metrics are deconstructed // to their individual events and stored there, as well as regular events, so -// we can perform an analysis of how to read with cuptiEventGroupSetsCreate(). +// we can perform an analysis of how to read with cuptiEventGroupSetsCreate(). -typedef struct papicuda_active_cucontext_s { +typedef struct cuda_active_cucontext_s { CUcontext cuCtx; int deviceNum; @@ -90,35 +93,39 @@ uint64_t allEventValues [PAPICUDA_MAX_COUNTERS]; // aggregated event values. CUpti_EventGroupSets *eventGroupSets; // Built during add, to save time not doing it at read. -} papicuda_active_cucontext_t; +} cuda_active_cucontext_t; -// Control structure tracks array of active contexts and active events -// in the order the user requested them; along with associated values +// Control structure tracks array of active contexts and active events +// in the order the user requested them; along with associated values // values and types (to save lookup time). -typedef struct papicuda_control { +typedef struct cuda_control { uint32_t countOfActiveCUContexts; - papicuda_active_cucontext_t *arrayOfActiveCUContexts[PAPICUDA_MAX_COUNTERS]; + cuda_active_cucontext_t *arrayOfActiveCUContexts[PAPICUDA_MAX_COUNTERS]; uint32_t activeEventCount; int activeEventIndex [PAPICUDA_MAX_COUNTERS]; // index into gctxt->availEventXXXXX arrays. long long activeEventValues [PAPICUDA_MAX_COUNTERS]; // values we will return. CUpti_MetricValueKind activeEventKind [PAPICUDA_MAX_COUNTERS]; // For metrics: double, uint64, % or throughput. Needed to compute metric from individual events. uint64_t cuptiStartTimestampNs; // needed to compute duration for some metrics. uint64_t cuptiReadTimestampNs; // .. -} papicuda_control_t; +} cuda_control_t; // file handles used to access cuda libraries with dlopen static void *dl1 = NULL; static void *dl2 = NULL; static void *dl3 = NULL; +static char cuda_main[]=PAPI_CUDA_MAIN; +static char cuda_runtime[]=PAPI_CUDA_RUNTIME; +static char cuda_cupti[]=PAPI_CUDA_CUPTI; + /* The PAPI side (external) variable as a global */ papi_vector_t _cuda_vector; /* Global variable for hardware description, event and metric lists */ -static papicuda_context_t *global_papicuda_context = NULL; +static cuda_context_t *global_cuda_context = NULL; /* This global variable points to the head of the control state list */ -static papicuda_control_t *global_papicuda_control = NULL; +static cuda_control_t *global_cuda_control = NULL; /* Macros for error checking... each arg is only referenced/evaluated once */ #define CHECK_PRINT_EVAL( checkcond, str, evalthis ) \ @@ -144,7 +151,7 @@ CUresult _status = (call); \ if (_status != CUDA_SUCCESS) { \ SUBDBG("error: function %s failed with error %d.\n", #call, _status); \ - /* fprintf(stderr,"Line %i CU_CALL error function %s failed with error %d.\n", __LINE__, #call, _status); */ \ + /* fprintf(stderr,"Line %i CU_CALL error function %s failed with error %08X.\n", __LINE__, #call, _status); */ \ handleerror; \ } \ } while (0) @@ -157,18 +164,18 @@ const char *errstr; \ (*cuptiGetResultStringPtr)(_status, &errstr); \ SUBDBG("error: function %s failed with error %s.\n", #call, errstr); \ - /* fprintf(stderr, "Line %i CUPTI_CALL macro '%s' failed with error '%s'.\n", __LINE__, #call, errstr); */ \ + /* fprintf(stderr, "Line %i CUPTI_CALL macro '%s' failed with error #%08X='%s'.\n", __LINE__, #call, _status, errstr); */ \ handleerror; \ } \ } while (0) -#define BUF_SIZE (32 * 1024) +#define BUF_SIZE (32 * PATH_MAX) #define ALIGN_SIZE (8) #define ALIGN_BUFFER(buffer, align) \ (((uintptr_t) (buffer) & ((align)-1)) ? ((buffer) + (align) - ((uintptr_t) (buffer) & ((align)-1))) : (buffer)) /* Function prototypes */ -static int papicuda_cleanup_eventset(hwd_control_state_t * ctrl); +static int _cuda_cleanup_eventset(hwd_control_state_t * ctrl); /* ****** CHANGE PROTOTYPES TO DECLARE CUDA LIBRARY SYMBOLS AS WEAK ********** * This is done so that a version of PAPI built with the cuda component can * @@ -197,6 +204,7 @@ DECLARECUFUNC(cuDeviceGetCount, (int *)); DECLARECUFUNC(cuDeviceGetName, (char *, int, CUdevice)); DECLARECUFUNC(cuInit, (unsigned int)); +DECLARECUFUNC(cuGetErrorString, (CUresult error, const char** pStr)); DECLARECUFUNC(cuCtxPopCurrent, (CUcontext * pctx)); DECLARECUFUNC(cuCtxPushCurrent, (CUcontext pctx)); DECLARECUFUNC(cuCtxSynchronize, ()); @@ -251,44 +259,140 @@ ******** BEGIN FUNCTIONS USED INTERNALLY SPECIFIC TO THIS COMPONENT ********* *****************************************************************************/ -/* +/* * Link the necessary CUDA libraries to use the cuda component. If any of them can not be found, then * the CUDA component will just be disabled. This is done at runtime so that a version of PAPI built * with the CUDA component can be installed and used on systems which have the CUDA libraries installed * and on systems where these libraries are not installed. */ -static int papicuda_linkCudaLibraries() +static int _cuda_linkCudaLibraries(void) { -#define DLSYM_AND_CHECK( dllib, name ) dlsym( dllib, name ); if ( dlerror()!=NULL ) { strncpy( _cuda_vector.cmp_info.disabled_reason, "A CUDA required function was not found in dynamic libs", PAPI_MAX_STR_LEN ); return ( PAPI_ENOSUPP ); } + char path_lib[PATH_MAX]; +#define DLSYM_AND_CHECK( dllib, name ) dlsym( dllib, name ); \ + if ( dlerror()!=NULL ) { \ + snprintf(_cuda_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, \ + "A CUDA required function '%s' was not found in lib '%s'.", \ + name, #dllib); \ + return ( PAPI_ENOSUPP ); \ + } /* Attempt to guess if we were statically linked to libc, if so bail */ if(_dl_non_dynamic_init != NULL) { strncpy(_cuda_vector.cmp_info.disabled_reason, "The CUDA component does not support statically linking to libc.", PAPI_MAX_STR_LEN); return PAPI_ENOSUPP; } - /* Need to link in the cuda libraries, if not found disable the component */ - dl1 = dlopen("libcuda.so", RTLD_NOW | RTLD_GLOBAL); - CHECK_PRINT_EVAL(!dl1, "CUDA library libcuda.so not found.", return (PAPI_ENOSUPP)); + // Need to link in the cuda libraries, if any not found disable the component + // getenv returns NULL if environment variable is not found. + char *cuda_root = getenv("PAPI_CUDA_ROOT"); + + dl1 = NULL; // Ensure reset to NULL. + + // Step 1: Process override if given. + if (strlen(cuda_main) > 0) { // If override given, it has to work. + dl1 = dlopen(cuda_main, RTLD_NOW | RTLD_GLOBAL); // Try to open that path. + if (dl1 == NULL) { + snprintf(_cuda_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "PAPI_CUDA_MAIN override '%s' given in Rules.cuda not found.", cuda_main); + return(PAPI_ENOSUPP); // Override given but not found. + } + } + + // Step 2: Try system paths, will work with Spack, LD_LIBRARY_PATH, default paths. + if (dl1 == NULL) { // No override, + dl1 = dlopen("libcuda.so", RTLD_NOW | RTLD_GLOBAL); // Try system paths. + } + + // Step 3: Try the explicit install default. + if (dl1 == NULL && cuda_root != NULL) { // if root given, try it. + snprintf(path_lib, sizeof(path_lib), "%s/lib64/libcuda.so", cuda_root); // PAPI Root check. + dl1 = dlopen(path_lib, RTLD_NOW | RTLD_GLOBAL); // Try to open that path. + } + + // Check for failure. + if (dl1 == NULL) { + snprintf(_cuda_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "libcuda.so not found."); + return(PAPI_ENOSUPP); + } + + // We have a dl1. (libcuda.so). + cuCtxGetCurrentPtr = DLSYM_AND_CHECK(dl1, "cuCtxGetCurrent"); cuCtxSetCurrentPtr = DLSYM_AND_CHECK(dl1, "cuCtxSetCurrent"); cuDeviceGetPtr = DLSYM_AND_CHECK(dl1, "cuDeviceGet"); cuDeviceGetCountPtr = DLSYM_AND_CHECK(dl1, "cuDeviceGetCount"); cuDeviceGetNamePtr = DLSYM_AND_CHECK(dl1, "cuDeviceGetName"); cuInitPtr = DLSYM_AND_CHECK(dl1, "cuInit"); + cuGetErrorStringPtr = DLSYM_AND_CHECK(dl1, "cuGetErrorString"); cuCtxPopCurrentPtr = DLSYM_AND_CHECK(dl1, "cuCtxPopCurrent"); cuCtxPushCurrentPtr = DLSYM_AND_CHECK(dl1, "cuCtxPushCurrent"); cuCtxDestroyPtr = DLSYM_AND_CHECK(dl1, "cuCtxDestroy"); cuCtxCreatePtr = DLSYM_AND_CHECK(dl1, "cuCtxCreate"); cuCtxSynchronizePtr = DLSYM_AND_CHECK(dl1, "cuCtxSynchronize"); - dl2 = dlopen("libcudart.so", RTLD_NOW | RTLD_GLOBAL | RTLD_NODELETE); - CHECK_PRINT_EVAL(!dl2, "CUDA runtime library libcudart.so not found.", return (PAPI_ENOSUPP)); + /* Need to link in the cuda runtime library, if not found disable the component */ + dl2 = NULL; // Ensure reset to NULL. + + // Step 1: Process override if given. + if (strlen(cuda_runtime) > 0) { // If override given, it has to work. + dl2 = dlopen(cuda_runtime, RTLD_NOW | RTLD_GLOBAL); // Try to open that path. + if (dl2 == NULL) { + snprintf(_cuda_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "PAPI_CUDA_RUNTIME override '%s' given in Rules.cuda not found.", cuda_runtime); + return(PAPI_ENOSUPP); // Override given but not found. + } + } + + // Step 2: Try system paths, will work with Spack, LD_LIBRARY_PATH, default paths. + if (dl2 == NULL) { // No override, + dl2 = dlopen("libcudart.so", RTLD_NOW | RTLD_GLOBAL); // Try system paths. + } + + // Step 3: Try the explicit install default. + if (dl2 == NULL && cuda_root != NULL) { // if root given, try it. + snprintf(path_lib, sizeof(path_lib), "%s/lib64/libcudart.so", cuda_root); // PAPI Root check. + dl2 = dlopen(path_lib, RTLD_NOW | RTLD_GLOBAL); // Try to open that path. + } + + // Check for failure. + if (dl2 == NULL) { + snprintf(_cuda_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "libcudart.so not found."); + return(PAPI_ENOSUPP); + } + + // We have a dl2. (libcudart.so). + cudaGetDevicePtr = DLSYM_AND_CHECK(dl2, "cudaGetDevice"); cudaSetDevicePtr = DLSYM_AND_CHECK(dl2, "cudaSetDevice"); cudaFreePtr = DLSYM_AND_CHECK(dl2, "cudaFree"); - dl3 = dlopen("libcupti.so", RTLD_NOW | RTLD_GLOBAL); - CHECK_PRINT_EVAL(!dl3, "CUDA Profiling Tools Interface (CUPTI) library libcupti.so not found.", return (PAPI_ENOSUPP)); + dl3 = NULL; // Ensure reset to NULL. + + // Step 1: Process override if given. + if (strlen(cuda_cupti) > 0) { // If override given, it MUST work. + dl3 = dlopen(cuda_cupti, RTLD_NOW | RTLD_GLOBAL); // Try to open that path. + if (dl3 == NULL) { + snprintf(_cuda_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "PAPI_CUDA_CUPTI override '%s' given in Rules.cuda not found.", cuda_cupti); + return(PAPI_ENOSUPP); // Override given but not found. + } + } + + // Step 2: Try system paths, will work with Spack, LD_LIBRARY_PATH, default paths. + if (dl3 == NULL) { // If no override, + dl3 = dlopen("libcupti.so", RTLD_NOW | RTLD_GLOBAL); // Try system paths. + } + + // Step 3: Try the explicit install default. + if (dl3 == NULL && cuda_root != NULL) { // If ROOT given, it doesn't HAVE to work. + snprintf(path_lib, sizeof(path_lib), "%s/extras/CUPTI/lib64/libcupti.so", cuda_root); // PAPI Root check. + dl3 = dlopen(path_lib, RTLD_NOW | RTLD_GLOBAL); // Try to open that path. + } + + // Check for failure. + if (dl3 == NULL) { + snprintf(_cuda_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "libcupti.so not found."); + return(PAPI_ENOSUPP); // Not found on default paths. + } + + // We have a dl3. (libcupti.so) + /* The macro DLSYM_AND_CHECK results in the expansion example below */ /* cuptiDeviceEnumEventDomainsPtr = dlsym( dl3, "cuptiDeviceEnumEventDomains" ); */ /* if ( dlerror()!=NULL ) { strncpy( _cuda_vector.cmp_info.disabled_reason, "A CUDA required function was not found in dynamic libs", PAPI_MAX_STR_LEN ); return ( PAPI_ENOSUPP ); } */ @@ -330,13 +434,13 @@ } -static int papicuda_add_native_events(papicuda_context_t * gctxt) +static int _cuda_add_native_events(cuda_context_t * gctxt) { SUBDBG("Entering\n"); CUresult cuErr; int deviceNum; uint32_t domainNum, eventNum; - papicuda_device_desc_t *mydevice; + cuda_device_desc_t *mydevice; char tmpStr[PAPI_MIN_STR_LEN]; tmpStr[PAPI_MIN_STR_LEN - 1] = '\0'; size_t tmpSizeBytes; @@ -348,13 +452,26 @@ if(cuErr == CUDA_ERROR_NOT_INITIALIZED) { /* If CUDA not initialized, initialize CUDA and retry the device list */ /* This is required for some of the PAPI tools, that do not call the init functions */ - if(((*cuInitPtr) (0)) != CUDA_SUCCESS) { - strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA cannot be found and initialized (cuInit failed).", PAPI_MAX_STR_LEN); + cuErr = (cuInitPtr) (0); // Try the init. + if(cuErr != CUDA_SUCCESS) { // If that failed, we are bailing. + const char *errString=NULL; + (*cuGetErrorStringPtr) (cuErr, &errString); // Read the string. + if (errString != NULL) { + snprintf(_cuda_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN-2, + "CUDA initialization (cuInit) failed: %s", errString); + _cuda_vector.cmp_info.disabled_reason[PAPI_MAX_STR_LEN-1]=0; // force null termination. + } else { + snprintf(_cuda_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN-2, + "CUDA initialization (cuInit) failed: Unrecognized Error Code=%d.", cuErr); + _cuda_vector.cmp_info.disabled_reason[PAPI_MAX_STR_LEN-1]=0; // force null termination. + } // end dealing with error on cuInit(0). return PAPI_ENOSUPP; - } - CU_CALL((*cuDeviceGetCountPtr) (&gctxt->deviceCount), return (PAPI_EMISC)); - } + } // end if cuInit(0) failed. + + CU_CALL((*cuDeviceGetCountPtr) (&gctxt->deviceCount), return (PAPI_EMISC)); // repeat call for device count. + } // end if CUDA was not initialized; try to init. + // cuInit(0) was successful. if(gctxt->deviceCount == 0) { strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA initialized but no CUDA devices found.", PAPI_MAX_STR_LEN); return PAPI_ENOSUPP; @@ -362,7 +479,7 @@ SUBDBG("Found %d devices\n", gctxt->deviceCount); /* allocate memory for device information */ - gctxt->deviceArray = (papicuda_device_desc_t *) papi_calloc(gctxt->deviceCount, sizeof(papicuda_device_desc_t)); + gctxt->deviceArray = (cuda_device_desc_t *) papi_calloc(gctxt->deviceCount, sizeof(cuda_device_desc_t)); CHECK_PRINT_EVAL(!gctxt->deviceArray, "ERROR CUDA: Could not allocate memory for CUDA device structure", return (PAPI_ENOMEM)); /* For each device, get domains and domain-events counts */ @@ -380,19 +497,19 @@ mydevice->deviceName[PAPI_MIN_STR_LEN - 1] = '\0'; // z-terminate it. CUPTI_CALL((*cuptiDeviceGetNumEventDomainsPtr) // get number of domains, - (mydevice->cuDev, &mydevice->maxDomains), + (mydevice->cuDev, &mydevice->maxDomains), return (PAPI_EMISC)); // .. on failure. /* Allocate space to hold domain IDs */ mydevice->domainIDArray = (CUpti_EventDomainID *) papi_calloc( - mydevice->maxDomains, sizeof(CUpti_EventDomainID)); + mydevice->maxDomains, sizeof(CUpti_EventDomainID)); CHECK_PRINT_EVAL(!mydevice->domainIDArray, "ERROR CUDA: Could not allocate memory for CUDA device domains", return (PAPI_ENOMEM)); /* Put domain ids into allocated space */ size_t domainarraysize = mydevice->maxDomains * sizeof(CUpti_EventDomainID); CUPTI_CALL((*cuptiDeviceEnumEventDomainsPtr) // enumerate domain ids into space. - (mydevice->cuDev, &domainarraysize, mydevice->domainIDArray), + (mydevice->cuDev, &domainarraysize, mydevice->domainIDArray), return (PAPI_EMISC)); // .. on failure. /* Allocate space to hold domain event counts */ @@ -415,7 +532,7 @@ for(deviceNum = 0; deviceNum < gctxt->deviceCount; deviceNum++) { // for each device, uint32_t maxMetrics = 0; CUptiResult cuptiRet; - mydevice = &gctxt->deviceArray[deviceNum]; // Get papicuda_device_desc pointer. + mydevice = &gctxt->deviceArray[deviceNum]; // Get cuda_device_desc pointer. cuptiRet = (*cuptiDeviceGetNumMetricsPtr) (mydevice->cuDev, &maxMetrics); // Read the # metrics on this device. if (cuptiRet != CUPTI_SUCCESS || maxMetrics < 1) continue; // If no metrics, skip to next device. maxEventSize += maxMetrics; // make room for metrics we discover later. @@ -430,13 +547,13 @@ CHECK_PRINT_EVAL(!gctxt->availEventIDArray, "ERROR CUDA: Could not allocate memory", return (PAPI_ENOMEM)); gctxt->availEventIsBeingMeasuredInEventset = (uint32_t *) papi_calloc(maxEventSize, sizeof(uint32_t)); CHECK_PRINT_EVAL(!gctxt->availEventIsBeingMeasuredInEventset, "ERROR CUDA: Could not allocate memory", return (PAPI_ENOMEM)); - gctxt->availEventDesc = (papicuda_name_desc_t *) papi_calloc(maxEventSize, sizeof(papicuda_name_desc_t)); + gctxt->availEventDesc = (cuda_name_desc_t *) papi_calloc(maxEventSize, sizeof(cuda_name_desc_t)); CHECK_PRINT_EVAL(!gctxt->availEventDesc, "ERROR CUDA: Could not allocate memory", return (PAPI_ENOMEM)); // Record all events on each device, and their descriptions. uint32_t idxEventArray = 0; for(deviceNum = 0; deviceNum < gctxt->deviceCount; deviceNum++) { // loop through each device. - mydevice = &gctxt->deviceArray[deviceNum]; // get a pointer to the papicuda_device_desc struct. + mydevice = &gctxt->deviceArray[deviceNum]; // get a pointer to the cuda_device_desc struct. // For each domain, get and store event IDs, names, descriptions. for(domainNum = 0; domainNum < mydevice->maxDomains; domainNum++) { // loop through the domains in this device. @@ -448,12 +565,12 @@ // SUBDBG( "For device %d domain %d domainID %d numEvents %d\n", mydevice->cuDev, domainNum, domainID, domainNumEvents ); CUpti_EventID *domainEventIDArray = // Make space for the events in this domain. - (CUpti_EventID *) papi_calloc(domainNumEvents, sizeof(CUpti_EventID)); // .. + (CUpti_EventID *) papi_calloc(domainNumEvents, sizeof(CUpti_EventID)); // .. CHECK_PRINT_EVAL(!domainEventIDArray, "ERROR CUDA: Could not allocate memory for events", return (PAPI_ENOMEM)); size_t domainEventArraySize = domainNumEvents * sizeof(CUpti_EventID); // compute size of array we allocated. CUPTI_CALL((*cuptiEventDomainEnumEventsPtr) // Enumerate the events in the domain, - (domainID, &domainEventArraySize, domainEventIDArray), // .. + (domainID, &domainEventArraySize, domainEventIDArray), // .. return (PAPI_EMISC)); // .. on failure, exit. for(eventNum = 0; eventNum < domainNumEvents; eventNum++) { // Loop through the events in this domain. @@ -462,14 +579,14 @@ gctxt->availEventIDArray[idxEventArray] = myeventCuptiEventId; // .. record the id, gctxt->availEventDeviceNum[idxEventArray] = deviceNum; // .. record the device number, - tmpSizeBytes = PAPI_MIN_STR_LEN - 1 * sizeof(char); // .. compute size of name, + tmpSizeBytes = PAPI_MAX_STR_LEN - 1 * sizeof(char); // .. compute size of name, CUPTI_CALL((*cuptiEventGetAttributePtr) (myeventCuptiEventId, // .. Get the event name seen by cupti, CUPTI_EVENT_ATTR_NAME, &tmpSizeBytes, tmpStr), // .. into tmpStr. return (PAPI_EMISC)); // .. on failure, exit routine. - snprintf(gctxt->availEventDesc[idxEventArray].name, PAPI_MIN_STR_LEN, // record expaneded name for papi user. + snprintf(gctxt->availEventDesc[idxEventArray].name, PAPI_MAX_STR_LEN, // record expanded name for papi user. "event:%s:device=%d", tmpStr, deviceNum); - gctxt->availEventDesc[idxEventArray].name[PAPI_MIN_STR_LEN - 1] = '\0'; // ensure null termination. + gctxt->availEventDesc[idxEventArray].name[PAPI_MAX_STR_LEN - 1] = '\0'; // ensure null termination. char *nameTmpPtr = gctxt->availEventDesc[idxEventArray].name; // For looping, get pointer to name. for(ii = 0; ii < (int) strlen(nameTmpPtr); ii++) { // Replace spaces with underscores. if(nameTmpPtr[ii] == ' ') nameTmpPtr[ii] = '_'; // .. @@ -477,8 +594,8 @@ /* Save description in the native event array */ tmpSizeBytes = PAPI_2MAX_STR_LEN - 1 * sizeof(char); // Most space to use for description. - CUPTI_CALL((*cuptiEventGetAttributePtr) (myeventCuptiEventId, // Get it, - CUPTI_EVENT_ATTR_SHORT_DESCRIPTION, &tmpSizeBytes, // .. Set limit (and recieve bytes written), + CUPTI_CALL((*cuptiEventGetAttributePtr) (myeventCuptiEventId, // Get it, + CUPTI_EVENT_ATTR_SHORT_DESCRIPTION, &tmpSizeBytes, // .. Set limit (and receive bytes written), gctxt->availEventDesc[idxEventArray].description), // .. in the description. return (PAPI_EMISC)); // .. on failure. gctxt->availEventDesc[idxEventArray].description[PAPI_2MAX_STR_LEN - 1] = '\0'; // Ensure null terminator. @@ -499,14 +616,14 @@ uint32_t maxMetrics = 0, i, j; CUpti_MetricID *metricIdList = NULL; CUptiResult cuptiRet; - mydevice = &gctxt->deviceArray[deviceNum]; // Get papicuda_device_desc pointer. + mydevice = &gctxt->deviceArray[deviceNum]; // Get cuda_device_desc pointer. cuptiRet = (*cuptiDeviceGetNumMetricsPtr) (mydevice->cuDev, &maxMetrics); // Read the # metrics on this device. if (cuptiRet != CUPTI_SUCCESS || maxMetrics < 1) continue; // If no metrics, skip to next device. SUBDBG("Device %d: Checking each of the (maxMetrics) %d metrics\n", deviceNum, maxMetrics); // Make a temporary list of the metric Ids to add to the available named collectables. - size_t size = maxMetrics * sizeof(CUpti_EventID); + size_t size = maxMetrics * sizeof(CUpti_EventID); metricIdList = (CUpti_MetricID *) papi_calloc(maxMetrics, sizeof(CUpti_EventID)); CHECK_PRINT_EVAL(metricIdList == NULL, "Out of memory", return (PAPI_ENOMEM)); @@ -521,9 +638,9 @@ for (i=0, j=0; iavailEventIDArray[idxEventArray] = metricIdList[i]; // add to the list of collectables. + gctxt->availEventIDArray[idxEventArray] = metricIdList[i]; // add to the list of collectables. gctxt->availEventKind[idxEventArray] = CUPTI_ACTIVITY_KIND_METRIC; // Indicate it is a metric. gctxt->availEventDeviceNum[idxEventArray] = deviceNum; // remember the device number. - size = PAPI_MIN_STR_LEN; + size = PAPI_MAX_STR_LEN; CUPTI_CALL((*cuptiMetricGetAttributePtr) (metricIdList[i], // Get the name, fail if we cannot. - CUPTI_METRIC_ATTR_NAME, &size, (uint8_t *) tmpStr), + CUPTI_METRIC_ATTR_NAME, &size, (uint8_t *) tmpStr), return (PAPI_EMISC)); - if (size >= PAPI_MIN_STR_LEN) { // Truncate if we don't have room for the name. - gctxt->availEventDesc[idxEventArray].name[PAPI_MIN_STR_LEN - 1] = '\0'; + if (size >= PAPI_MAX_STR_LEN) { // Truncate if we don't have room for the name. + gctxt->availEventDesc[idxEventArray].name[PAPI_MAX_STR_LEN - 1] = '\0'; } size_t MV_KindSize = sizeof(CUpti_MetricValueKind); - CUPTI_CALL((*cuptiMetricGetAttributePtr) // Collect the metric kind. + CUPTI_CALL((*cuptiMetricGetAttributePtr) // Collect the metric kind. (metricIdList[i], CUPTI_METRIC_ATTR_VALUE_KIND, &MV_KindSize, // .. for this metric, &gctxt->availEventDesc[idxEventArray].MV_Kind), // .. store in the event description, return (PAPI_EMISC)); // .. on failure, but should always work. - snprintf(gctxt->availEventDesc[idxEventArray].name, PAPI_MIN_STR_LEN, // .. develop name for papi user in tmpStr. + snprintf(gctxt->availEventDesc[idxEventArray].name, PAPI_MAX_STR_LEN, // .. develop name for papi user in tmpStr. "metric:%s:device=%d", tmpStr, deviceNum); size = PAPI_2MAX_STR_LEN-1; // Most bytes to return. @@ -590,7 +707,7 @@ (uint8_t *) gctxt->availEventDesc[idxEventArray].description), // .. and store in event description. return (PAPI_EMISC)); // .. on failure, but should always work. - // Note that 'size' also returned total bytes written. + // Note that 'size' also returned total bytes written. gctxt->availEventDesc[idxEventArray].description[size] = '\0'; // Always z-terminate. // Now we get all the sub-events of this metric. @@ -624,7 +741,7 @@ /* return 0 if everything went OK */ return 0; -} // end papicuda_add_native_events +} // end _cuda_add_native_events /* @@ -634,7 +751,7 @@ integer percentage. If the CUPTI value is a double, the value is cast to long long... this can be a severe truncation. */ -static int papicuda_convert_metric_value_to_long_long(CUpti_MetricValue metricValue, CUpti_MetricValueKind valueKind, long long int *papiValue) +static int _cuda_convert_metric_value_to_long_long(CUpti_MetricValue metricValue, CUpti_MetricValueKind valueKind, long long int *papiValue) { union { long long ll; @@ -687,10 +804,10 @@ ******************* BEGIN PAPI's COMPONENT REQUIRED FUNCTIONS ************* **************************************************************************** */ -/* +/* * This is called whenever a thread is initialized. */ -static int papicuda_init_thread(hwd_context_t * ctx) +static int _cuda_init_thread(hwd_context_t * ctx) { (void) ctx; SUBDBG("Entering\n"); @@ -712,30 +829,30 @@ CUDA_init_component() (called only by main thread) rather than CUDA_init() or CUDA_init_control_state() (both called by each thread). */ -static int papicuda_init_component(int cidx) +static int _cuda_init_component(int cidx) { SUBDBG("Entering with component idx: %d\n", cidx); int rv; /* link in all the cuda libraries and resolve the symbols we need to use */ - if(papicuda_linkCudaLibraries() != PAPI_OK) { + if(_cuda_linkCudaLibraries() != PAPI_OK) { SUBDBG("Dynamic link of CUDA libraries failed, component will be disabled.\n"); SUBDBG("See disable reason in papi_component_avail output for more details.\n"); return (PAPI_ENOSUPP); } /* Create the structure */ - if(!global_papicuda_context) - global_papicuda_context = (papicuda_context_t *) papi_calloc(1, sizeof(papicuda_context_t)); + if(!global_cuda_context) + global_cuda_context = (cuda_context_t *) papi_calloc(1, sizeof(cuda_context_t)); /* Get list of all native CUDA events supported */ - rv = papicuda_add_native_events(global_papicuda_context); + rv = _cuda_add_native_events(global_cuda_context); if(rv != 0) return (rv); /* Export some information */ _cuda_vector.cmp_info.CmpIdx = cidx; - _cuda_vector.cmp_info.num_native_events = global_papicuda_context->availEventSize; + _cuda_vector.cmp_info.num_native_events = global_cuda_context->availEventSize; _cuda_vector.cmp_info.num_cntrs = _cuda_vector.cmp_info.num_native_events; _cuda_vector.cmp_info.num_mpx_cntrs = _cuda_vector.cmp_info.num_native_events; @@ -747,42 +864,42 @@ * In general a control state holds the hardware info for an * EventSet. */ -static int papicuda_init_control_state(hwd_control_state_t * ctrl) +static int _cuda_init_control_state(hwd_control_state_t * ctrl) { SUBDBG("Entering\n"); (void) ctrl; - papicuda_context_t *gctxt = global_papicuda_context; + cuda_context_t *gctxt = global_cuda_context; CHECK_PRINT_EVAL(!gctxt, "Error: The PAPI CUDA component needs to be initialized first", return (PAPI_ENOINIT)); /* If no events were found during the initial component initialization, return error */ - if(global_papicuda_context->availEventSize <= 0) { + if(global_cuda_context->availEventSize <= 0) { strncpy(_cuda_vector.cmp_info.disabled_reason, "ERROR CUDA: No events exist", PAPI_MAX_STR_LEN); return (PAPI_EMISC); } /* If it does not exist, create the global structure to hold CUDA contexts and active events */ - if(!global_papicuda_control) { - global_papicuda_control = (papicuda_control_t *) papi_calloc(1, sizeof(papicuda_control_t)); - global_papicuda_control->countOfActiveCUContexts = 0; - global_papicuda_control->activeEventCount = 0; + if(!global_cuda_control) { + global_cuda_control = (cuda_control_t *) papi_calloc(1, sizeof(cuda_control_t)); + global_cuda_control->countOfActiveCUContexts = 0; + global_cuda_control->activeEventCount = 0; } return PAPI_OK; -} // end papicuda_init_control_state +} // end cuda_init_control_state /* Triggered by eventset operations like add or remove. For CUDA, needs to be - * called multiple times from each seperate CUDA context with the events to be + * called multiple times from each separate CUDA context with the events to be * measured from that context. For each context, create eventgroups for the * events. */ /* Note: NativeInfo_t is defined in papi_internal.h */ -static int papicuda_update_control_state(hwd_control_state_t * ctrl, +static int _cuda_update_control_state(hwd_control_state_t * ctrl, NativeInfo_t * nativeInfo, int nativeCount, hwd_context_t * ctx) { SUBDBG("Entering with nativeCount %d\n", nativeCount); (void) ctx; - papicuda_control_t *gctrl = global_papicuda_control; // We don't use the passed-in parameter, we use a global. - papicuda_context_t *gctxt = global_papicuda_context; // We don't use the passed-in parameter, we use a global. + cuda_control_t *gctrl = global_cuda_control; // We don't use the passed-in parameter, we use a global. + cuda_context_t *gctxt = global_cuda_context; // We don't use the passed-in parameter, we use a global. int currDeviceNum; CUcontext currCuCtx; int eventContextIdx; @@ -794,7 +911,7 @@ return (PAPI_OK); /* Get deviceNum, initialize context if needed via free, get context */ - CUDA_CALL((*cudaGetDevicePtr) (&currDeviceNum), return (PAPI_EMISC)); + CUDA_CALL((*cudaGetDevicePtr) (&currDeviceNum), return (PAPI_EMISC)); SUBDBG("currDeviceNum %d \n", currDeviceNum); CUDA_CALL((*cudaFreePtr) (NULL), return (PAPI_EMISC)); @@ -802,7 +919,7 @@ SUBDBG("currDeviceNum %d cuCtx %p \n", currDeviceNum, currCuCtx); /* Handle user request of events to be monitored */ - for (ii = 0; ii < nativeCount; ii++) { // For each event provided by caller, + for (ii = 0; ii < nativeCount; ii++) { // For each event provided by caller, index = nativeInfo[ii].ni_event; // Get the index of the event (in the global context). char *eventName = gctxt->availEventDesc[index].name; // Shortcut to name. int numMetricEvents= gctxt->availEventDesc[index].numMetricEvents; // Get if this is an event (=0) or metric (>0). @@ -822,7 +939,7 @@ CHECK_PRINT_EVAL(cc >= PAPICUDA_MAX_COUNTERS, "Exceeded hardcoded maximum number of contexts (PAPICUDA_MAX_COUNTERS)", return (PAPI_EMISC)); if(gctrl->arrayOfActiveCUContexts[cc]->deviceNum == eventDeviceNum) { // If this cuda context is for the device for this event, - eventCuCtx = gctrl->arrayOfActiveCUContexts[cc]->cuCtx; // Remember that context. + eventCuCtx = gctrl->arrayOfActiveCUContexts[cc]->cuCtx; // Remember that context. SUBDBG("Event %s device %d already has a cuCtx %p registered\n", eventName, eventDeviceNum, eventCuCtx); if(eventCuCtx != currCuCtx) // If that is not our CURRENT context, push and make it so. @@ -830,13 +947,13 @@ return (PAPI_EMISC)); // .. .. on failure. break; // .. exit the loop. } // end if found. - } // end loop through active contexts. + } // end loop through active contexts. if(cc == (int) gctrl->countOfActiveCUContexts) { // If we never found the context, create one. SUBDBG("Event %s device %d does not have a cuCtx registered yet...\n", eventName, eventDeviceNum); if(currDeviceNum != eventDeviceNum) { // .. If we need to switch to another device, CUDA_CALL((*cudaSetDevicePtr) (eventDeviceNum), // .. .. set the device pointer to the event's device. - return (PAPI_EMISC)); // .. .. .. (on faiure). + return (PAPI_EMISC)); // .. .. .. (on failure). CUDA_CALL((*cudaFreePtr) (NULL), return (PAPI_EMISC)); // .. .. This is a no-op, but used to force init of a context. CU_CALL((*cuCtxGetCurrentPtr) (&eventCuCtx), // .. .. So we can get a pointer to it. return (PAPI_EMISC)); // .. .. .. On failure. @@ -844,7 +961,7 @@ eventCuCtx = currCuCtx; // .. .. just get the current context. } - gctrl->arrayOfActiveCUContexts[cc] = papi_calloc(1, sizeof(papicuda_active_cucontext_t)); // allocate a structure. + gctrl->arrayOfActiveCUContexts[cc] = papi_calloc(1, sizeof(cuda_active_cucontext_t)); // allocate a structure. CHECK_PRINT_EVAL(gctrl->arrayOfActiveCUContexts[cc] == NULL, "Memory allocation for new active context failed", return (PAPI_ENOMEM)); gctrl->arrayOfActiveCUContexts[cc]->deviceNum = eventDeviceNum; // Fill in everything. gctrl->arrayOfActiveCUContexts[cc]->cuCtx = eventCuCtx; @@ -858,7 +975,7 @@ // We found the context, or created it, and the index is in cc. //--------------------------------------------------------------------- eventContextIdx = cc; - papicuda_active_cucontext_t *eventctrl = gctrl->arrayOfActiveCUContexts[eventContextIdx]; // get the context for this event. + cuda_active_cucontext_t *eventctrl = gctrl->arrayOfActiveCUContexts[eventContextIdx]; // get the context for this event. // We need to get all the events (or sub-events of a metric) and add // them to our list of all events. Note we only check if we exceed the @@ -893,15 +1010,15 @@ if (eventctrl->allEventsCount >= PAPICUDA_MAX_COUNTERS) { // Fail if we exceed size of array. SUBDBG("Num events (generated by metric) exceeded PAPICUDA_MAX_COUNTERS\n"); return(PAPI_EINVAL); - } + } } // end for each event in metric. } // end if we must process all sub-events of a metric. // Record index of this active event back into the nativeInfo - // structure. + // structure. nativeInfo[ii].ni_position = gctrl->activeEventCount; - + // Record index of this active event within this context. We need this // so after we read this context, we can move values (or compute // metrics and move values) into their proper position within the @@ -937,7 +1054,7 @@ SUBDBG("Create eventGroupSets for context (destroy pre-existing) (nativeCount %d, allEventsCount %d) \n", gctrl->activeEventCount, eventctrl->allEventsCount); if(eventctrl->allEventsCount > 0) { // If we have events... // SUBDBG("Destroy previous eventGroupPasses for the context \n"); - if(eventctrl->eventGroupSets != NULL) { // if we have a previous analysis; + if(eventctrl->eventGroupSets != NULL) { // if we have a previous analysis; CUPTI_CALL((*cuptiEventGroupSetsDestroyPtr) // .. Destroy it. (eventctrl->eventGroupSets), return (PAPI_EMISC)); // .. If we can't, return error. eventctrl->eventGroupSets = NULL; // .. Reset pointer. @@ -947,17 +1064,19 @@ // SUBDBG("About to create eventGroupPasses for the context (sizeBytes %zu) \n", sizeBytes); #ifdef PAPICUDA_KERNEL_REPLAY_MODE - CUPTI_CALL((*cuptiEnableKernelReplayModePtr) (eventCuCtx), + CUPTI_CALL((*cuptiEnableKernelReplayModePtr) (eventCuCtx), return (PAPI_ECMP)); - CUPTI_CALL((*cuptiEventGroupSetsCreatePtr) - (eventCuCtx, sizeBytes, eventctrl->allEvents, - &eventctrl->eventGroupSets), + CUPTI_CALL((*cuptiEventGroupSetsCreatePtr) + (eventCuCtx, sizeBytes, eventctrl->allEvents, + &eventctrl->eventGroupSets), return (PAPI_ECMP)); #else // Normal operation. + // Note: We no longer fail if this collection mode does not work. It will only work + // on TESLA devices, and is desirable there (not restricted to the kernel). But it + // is not available on other models (including GTX) and we shouldn't fail without it. CUPTI_CALL((*cuptiSetEventCollectionModePtr) - (eventCuCtx,CUPTI_EVENT_COLLECTION_MODE_CONTINUOUS), - return(PAPI_ECMP)); + (eventCuCtx,CUPTI_EVENT_COLLECTION_MODE_CONTINUOUS), ); // CUPTI provides two routines to create EventGroupSets, one is used // here cuptiEventGroupSetsCreate(), the other is for metrics, it will @@ -965,14 +1084,14 @@ // cuptiMetricCreateEventGroupSets(). We have checked and these two routines // produce groups of the same size with the same event IDs, and work equally. - CUPTI_CALL((*cuptiEventGroupSetsCreatePtr) - (eventCuCtx, sizeBytes, eventctrl->allEvents, - &eventctrl->eventGroupSets), + CUPTI_CALL((*cuptiEventGroupSetsCreatePtr) + (eventCuCtx, sizeBytes, eventctrl->allEvents, + &eventctrl->eventGroupSets), return (PAPI_EMISC)); - if (eventctrl->eventGroupSets->numSets > 1) { // If more than one pass is required, + if (eventctrl->eventGroupSets->numSets > 1) { // If more than one pass is required, SUBDBG("Error occurred: The combined CUPTI events cannot be collected simultaneously ... try different events\n"); - papicuda_cleanup_eventset(ctrl); // Will do cuptiEventGroupSetsDestroy() to clean up memory. + _cuda_cleanup_eventset(ctrl); // Will do cuptiEventGroupSetsDestroy() to clean up memory. return(PAPI_ECOMBO); } else { SUBDBG("Created eventGroupSets. nativeCount %d, allEventsCount %d. Sets (passes-required) = %d) \n", gctrl->activeEventCount, eventctrl->allEventsCount, eventctrl->eventGroupSets->numSets); @@ -981,8 +1100,8 @@ #endif // #if/#else/#endif on PAPICUDA_KERNEL_REPLAY_MODE } // end if we had any events. - - if(eventCuCtx != currCuCtx) // restore original context for caller, if we changed it. + + if(eventCuCtx != currCuCtx) // restore original context for caller, if we changed it. CU_CALL((*cuCtxPopCurrentPtr) (&eventCuCtx), return (PAPI_EMISC)); } @@ -993,13 +1112,13 @@ /* Triggered by PAPI_start(). * For CUDA component, switch to each context and start all eventgroups. */ -static int papicuda_start(hwd_context_t * ctx, hwd_control_state_t * ctrl) +static int _cuda_start(hwd_context_t * ctx, hwd_control_state_t * ctrl) { SUBDBG("Entering\n"); (void) ctx; (void) ctrl; - papicuda_control_t *gctrl = global_papicuda_control; - // papicuda_context_t *gctxt = global_papicuda_context; + cuda_control_t *gctrl = global_cuda_control; + // cuda_context_t *gctxt = global_cuda_context; uint32_t ii, gg, cc; int saveDeviceNum = -1; @@ -1011,7 +1130,7 @@ CUDA_CALL((*cudaGetDevicePtr) (&saveDeviceNum), return (PAPI_EMISC)); CUPTI_CALL((*cuptiGetTimestampPtr) (&gctrl->cuptiStartTimestampNs), return (PAPI_EMISC)); - for(cc = 0; cc < gctrl->countOfActiveCUContexts; cc++) { // For each context, + for(cc = 0; cc < gctrl->countOfActiveCUContexts; cc++) { // For each context, int eventDeviceNum = gctrl->arrayOfActiveCUContexts[cc]->deviceNum; // .. get device number. CUcontext eventCuCtx = gctrl->arrayOfActiveCUContexts[cc]->cuCtx; // .. get this context, SUBDBG("Set to device %d cuCtx %p \n", eventDeviceNum, eventCuCtx); @@ -1025,9 +1144,9 @@ for(gg = 0; gg < groupset->numEventGroups; gg++) { // .. For each group within this groupset, uint32_t one = 1; CUPTI_CALL((*cuptiEventGroupSetAttributePtr) ( // .. .. Say we want to profile all domains. - groupset->eventGroups[gg], - CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES, - sizeof(uint32_t), &one), + groupset->eventGroups[gg], + CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES, + sizeof(uint32_t), &one), return (PAPI_EMISC)); // .. .. on failure of call. } // end for each group. @@ -1051,22 +1170,22 @@ // where we have to give PAPI the address of an array of the values we read (or // composed). -static int papicuda_read(hwd_context_t * ctx, hwd_control_state_t * ctrl, long long **values, int flags) +static int _cuda_read(hwd_context_t * ctx, hwd_control_state_t * ctrl, long long **values, int flags) { SUBDBG("Entering\n"); (void) ctx; (void) ctrl; (void) flags; - papicuda_control_t *gctrl = global_papicuda_control; - papicuda_context_t *gctxt = global_papicuda_context; + cuda_control_t *gctrl = global_cuda_control; + cuda_context_t *gctxt = global_cuda_context; uint32_t gg, i, j, cc; int saveDeviceNum; // Get read time stamp CUPTI_CALL((*cuptiGetTimestampPtr) // Read current timestamp. - (&gctrl->cuptiReadTimestampNs), + (&gctrl->cuptiReadTimestampNs), return (PAPI_EMISC)); - uint64_t durationNs = gctrl->cuptiReadTimestampNs - + uint64_t durationNs = gctrl->cuptiReadTimestampNs - gctrl->cuptiStartTimestampNs; // compute duration from start. gctrl->cuptiStartTimestampNs = gctrl->cuptiReadTimestampNs; // Change start to value just read. @@ -1074,7 +1193,7 @@ CUDA_CALL((*cudaGetDevicePtr) (&saveDeviceNum), return (PAPI_EMISC)); // Save Caller's current device number on entry. for(cc = 0; cc < gctrl->countOfActiveCUContexts; cc++) { // For each active context, - papicuda_active_cucontext_t *activeCuCtxt = + cuda_active_cucontext_t *activeCuCtxt = gctrl->arrayOfActiveCUContexts[cc]; // A shortcut. int currDeviceNum = activeCuCtxt->deviceNum; // Get the device number. CUcontext currCuCtx = activeCuCtxt->cuCtx; // Get the actual CUcontext. @@ -1083,7 +1202,7 @@ if(currDeviceNum != saveDeviceNum) { // If my current is not the same as callers, CU_CALL((*cuCtxPushCurrentPtr) (currCuCtx), return (PAPI_EMISC)); // .. Push the current, and replace with mine. // Note, cuCtxPushCurrent() implicitly includes a cudaSetDevice(). - } else { // If my current IS the same as callers, + } else { // If my current IS the same as callers, CU_CALL((*cuCtxSetCurrentPtr) (currCuCtx), return (PAPI_EMISC)); // .. No push. Just set the current. } @@ -1109,37 +1228,37 @@ CUpti_EventGroup group = groupset->eventGroups[gg]; // Shortcut to the group. CUPTI_CALL((*cuptiEventGroupGetAttributePtr) // Get 'groupDomainID' for this group. - (group, CUPTI_EVENT_GROUP_ATTR_EVENT_DOMAIN_ID, - &groupDomainIDSize, &groupDomainID), + (group, CUPTI_EVENT_GROUP_ATTR_EVENT_DOMAIN_ID, + &groupDomainIDSize, &groupDomainID), return (PAPI_EMISC)); // 'numTotalInstances' and 'numInstances are needed for scaling // the values retrieved. (Nvidia instructions and samples). CUPTI_CALL((*cuptiDeviceGetEventDomainAttributePtr) // Get 'numTotalInstances' for this domain. - (cudevice, - groupDomainID, - CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT, - &sizeofuint32num, - &numTotalInstances), + (cudevice, + groupDomainID, + CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT, + &sizeofuint32num, + &numTotalInstances), return (PAPI_EMISC)); CUPTI_CALL((*cuptiEventGroupGetAttributePtr) // Get 'numInstances' for this domain. - (group, + (group, CUPTI_EVENT_GROUP_ATTR_INSTANCE_COUNT, - &sizeofuint32num, - &numInstances), + &sizeofuint32num, + &numInstances), return (PAPI_EMISC)); CUPTI_CALL((*cuptiEventGroupGetAttributePtr) // Get 'numEvents' in this group. - (group, + (group, CUPTI_EVENT_GROUP_ATTR_NUM_EVENTS, - &sizeofuint32num, - &numEvents), + &sizeofuint32num, + &numEvents), return (PAPI_EMISC)); // Now we will read all events in this group; aggregate the values // and then distribute them. We do not calculate metrics here; - // wait until all groups are read and all values are available. + // wait until all groups are read and all values are available. size_t resultArrayBytes = sizeof(uint64_t) * numEvents * numTotalInstances; size_t eventIdArrayBytes = sizeof(CUpti_EventID) * numEvents; @@ -1158,8 +1277,8 @@ CUPTI_CALL( (*cuptiEventGroupReadAllEventsPtr) // Read all events. (group, CUPTI_EVENT_READ_FLAG_NONE, // This flag is the only allowed flag. - &resultArrayBytes, resultArray, - &eventIdArrayBytes, eventIdArray, + &resultArrayBytes, resultArray, + &eventIdArrayBytes, eventIdArray, &numCountersRead), return (PAPI_EMISC)); @@ -1205,19 +1324,19 @@ // event values. We do that by looping through the events assigned to // this context, and we must back track to the activeEventIdx[] and // activeEventValues[] array in gctrl. We have kept our indexes into - // that array, in ctxActive[]. + // that array, in ctxActive[]. uint32_t ctxActiveCount = activeCuCtxt->ctxActiveCount; // Number of (papi user) events in this context. uint32_t *ctxActive = activeCuCtxt->ctxActiveEvents; // index of each event in gctrl->activeEventXXXX. for (j=0; jactiveEventIndex[activeIdx]; // Get the availEventIdx. CUpti_EventID thisEventId = gctxt->availEventIDArray[availIdx]; // Get the event ID (or metric ID). - struct papicuda_name_desc *myDesc=&(gctxt->availEventDesc[availIdx]); // get pointer to the description. - + struct cuda_name_desc *myDesc=&(gctxt->availEventDesc[availIdx]); // get pointer to the description. + if (myDesc->numMetricEvents == 0) { // If this is a simple cuda event (not a metric), int k; for (k=0; kallEvents, // the event list. @@ -1240,30 +1359,30 @@ durationNs, &myValue), // duration (for rates), and where to return the value. return(PAPI_EMISC)); // In case of error. - papicuda_convert_metric_value_to_long_long( // convert the value computed to long long and store it. - myValue, myDesc->MV_Kind, - &gctrl->activeEventValues[activeIdx]); + _cuda_convert_metric_value_to_long_long( // convert the value computed to long long and store it. + myValue, myDesc->MV_Kind, + &gctrl->activeEventValues[activeIdx]); } } // end loop on active events in this context. if(currDeviceNum != saveDeviceNum) { // If we had to change the context from user's, - CUDA_CALL((*cudaSetDevicePtr) (saveDeviceNum), // set the device pointer to the user's original. - return (PAPI_EMISC)); // .. .. (on faiure). + CUDA_CALL((*cudaSetDevicePtr) (saveDeviceNum), // set the device pointer to the user's original. + return (PAPI_EMISC)); // .. .. (on failure). CU_CALL((*cuCtxPopCurrentPtr) (&currCuCtx), return (PAPI_EMISC)); // .. pop the pushed context back to user's. } } // end of loop for each active context. *values = gctrl->activeEventValues; // Return ptr to the list of computed values to user. return (PAPI_OK); -} // end of papicuda_read(). +} // end of cuda_read(). /* Triggered by PAPI_stop() */ -static int papicuda_stop(hwd_context_t * ctx, hwd_control_state_t * ctrl) +static int _cuda_stop(hwd_context_t * ctx, hwd_control_state_t * ctrl) { SUBDBG("Entering\n"); (void) ctx; (void) ctrl; - papicuda_control_t *gctrl = global_papicuda_control; + cuda_control_t *gctrl = global_cuda_control; uint32_t cc, ss; int saveDeviceNum; @@ -1289,55 +1408,58 @@ } return (PAPI_OK); -} // end of papicuda_stop. +} // end of cuda_stop. -/* +/* * Disable and destroy the CUDA eventGroup */ -static int papicuda_cleanup_eventset(hwd_control_state_t * ctrl) +static int _cuda_cleanup_eventset(hwd_control_state_t * ctrl) { SUBDBG("Entering\n"); (void) ctrl; // Don't need this parameter. - papicuda_control_t *gctrl = global_papicuda_control; - papicuda_context_t *gctxt = global_papicuda_context; - // papicuda_active_cucontext_t *currctrl; + cuda_control_t *gctrl = global_cuda_control; + cuda_context_t *gctxt = global_cuda_context; + // cuda_active_cucontext_t *currctrl; uint32_t cc; int saveDeviceNum; unsigned int ui; + CUcontext saveCtx; - SUBDBG("Save current context, then switch to each active device/context and enable eventgroups\n"); + SUBDBG("Save current device/context, then switch to each active device/context and enable eventgroups\n"); CUDA_CALL((*cudaGetDevicePtr) (&saveDeviceNum), return (PAPI_EMISC)); + CU_CALL((*cuCtxGetCurrentPtr) (&saveCtx), return (PAPI_EMISC)); + for(cc = 0; cc < gctrl->countOfActiveCUContexts; cc++) { - CUcontext currCuCtx = gctrl->arrayOfActiveCUContexts[cc]->cuCtx; int currDeviceNum = gctrl->arrayOfActiveCUContexts[cc]->deviceNum; + CUcontext currCuCtx = gctrl->arrayOfActiveCUContexts[cc]->cuCtx; + CUDA_CALL((*cudaSetDevicePtr) (currDeviceNum), return(PAPI_EMISC)); + CU_CALL((*cuCtxSetCurrentPtr) (currCuCtx), return (PAPI_EMISC)); CUpti_EventGroupSets *currEventGroupSets = gctrl->arrayOfActiveCUContexts[cc]->eventGroupSets; - if(currDeviceNum != saveDeviceNum) - CU_CALL((*cuCtxPushCurrentPtr) (currCuCtx), return (PAPI_EMISC)); - else - CU_CALL((*cuCtxSetCurrentPtr) (currCuCtx), return (PAPI_EMISC)); + //CUPTI_CALL((*cuptiEventGroupSetsDestroyPtr) (currEventGroupPasses), return (PAPI_EMISC)); (*cuptiEventGroupSetsDestroyPtr) (currEventGroupSets); gctrl->arrayOfActiveCUContexts[cc]->eventGroupSets = NULL; papi_free( gctrl->arrayOfActiveCUContexts[cc] ); - /* Pop the pushed context */ - if(currDeviceNum != saveDeviceNum) - CU_CALL((*cuCtxPopCurrentPtr) (&currCuCtx), return (PAPI_EMISC)); } + /* Restore saved context, device pointer */ + CU_CALL((*cuCtxSetCurrentPtr) (saveCtx), return (PAPI_EMISC)); + CUDA_CALL((*cudaSetDevicePtr) (saveDeviceNum), return(PAPI_EMISC)); + /* Record that there are no active contexts or events */ for (ui=0; uiactiveEventCount; ui++) { // For each active event, int idx = gctrl->activeEventIndex[ui]; // .. Get its index... gctxt->availEventIsBeingMeasuredInEventset[idx] = 0; // .. No longer being measured. } - + gctrl->countOfActiveCUContexts = 0; gctrl->activeEventCount = 0; return (PAPI_OK); -} // end papicuda_cleanup_eventset +} // end cuda_cleanup_eventset /* Called at thread shutdown. Does nothing in the CUDA component. */ -int papicuda_shutdown_thread(hwd_context_t * ctx) +static int _cuda_shutdown_thread(hwd_context_t * ctx) { SUBDBG("Entering\n"); (void) ctx; @@ -1346,28 +1468,28 @@ } /* Triggered by PAPI_shutdown() and frees memory allocated in the CUDA component. */ -static int papicuda_shutdown_component(void) +static int _cuda_shutdown_component(void) { SUBDBG("Entering\n"); - papicuda_control_t *gctrl = global_papicuda_control; - papicuda_context_t *gctxt = global_papicuda_context; + cuda_control_t *gctrl = global_cuda_control; + cuda_context_t *gctxt = global_cuda_context; int deviceNum; uint32_t i, cc; /* Free context */ if(gctxt) { for(deviceNum = 0; deviceNum < gctxt->deviceCount; deviceNum++) { - papicuda_device_desc_t *mydevice = &gctxt->deviceArray[deviceNum]; + cuda_device_desc_t *mydevice = &gctxt->deviceArray[deviceNum]; papi_free(mydevice->domainIDArray); papi_free(mydevice->domainIDNumEvents); } for (i=0; iavailEventSize; i++) { // For every event in this context, - struct papicuda_name_desc *desc = &(gctxt->availEventDesc[i]); // get a name description. + struct cuda_name_desc *desc = &(gctxt->availEventDesc[i]); // get a name description. if (desc->numMetricEvents > 0) { // If we have any sub-events, papi_free(desc->metricEvents); // .. Free the list of sub-events. } } // end for every available event. - + papi_free(gctxt->availEventIDArray); papi_free(gctxt->availEventDeviceNum); papi_free(gctxt->availEventKind); @@ -1375,7 +1497,7 @@ papi_free(gctxt->availEventDesc); papi_free(gctxt->deviceArray); papi_free(gctxt); - global_papicuda_context = gctxt = NULL; + global_cuda_context = gctxt = NULL; } /* Free control */ if(gctrl) { @@ -1388,25 +1510,25 @@ papi_free(gctrl->arrayOfActiveCUContexts[cc]); } papi_free(gctrl); - global_papicuda_control = gctrl = NULL; + global_cuda_control = gctrl = NULL; } // close the dynamic libraries needed by this component (opened in the init substrate call) dlclose(dl1); dlclose(dl2); dlclose(dl3); return (PAPI_OK); -} // end papicuda_shutdown_component(). +} // end cuda_shutdown_component(). /* Triggered by PAPI_reset() but only if the EventSet is currently * running. If the eventset is not currently running, then the saved * value in the EventSet is set to zero without calling this * routine. */ -static int papicuda_reset(hwd_context_t * ctx, hwd_control_state_t * ctrl) +static int _cuda_reset(hwd_context_t * ctx, hwd_control_state_t * ctrl) { (void) ctx; (void) ctrl; - papicuda_control_t *gctrl = global_papicuda_control; + cuda_control_t *gctrl = global_cuda_control; uint32_t gg, ii, cc, ss; int saveDeviceNum; @@ -1425,7 +1547,7 @@ CU_CALL((*cuCtxSetCurrentPtr) (currCuCtx), return (PAPI_EMISC)); CUpti_EventGroupSets *currEventGroupSets = gctrl->arrayOfActiveCUContexts[cc]->eventGroupSets; for (ss=0; ssnumSets; ss++) { - CUpti_EventGroupSet groupset = currEventGroupSets->sets[ss]; + CUpti_EventGroupSet groupset = currEventGroupSets->sets[ss]; for(gg = 0; gg < groupset.numEventGroups; gg++) { CUpti_EventGroup group = groupset.eventGroups[gg]; CUPTI_CALL((*cuptiEventGroupResetAllEventsPtr) (group), return (PAPI_EMISC)); @@ -1436,7 +1558,7 @@ CU_CALL((*cuCtxPopCurrentPtr) (&currCuCtx), return (PAPI_EMISC)); } return (PAPI_OK); -} // end papicuda_reset(). +} // end cuda_reset(). /* This function sets various options in the component - Does nothing in the CUDA component. @@ -1444,7 +1566,7 @@ @param[in] code valid are PAPI_SET_DEFDOM, PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL and PAPI_SET_INHERIT @param[in] option -- options to be set */ -static int papicuda_ctrl(hwd_context_t * ctx, int code, _papi_int_option_t * option) +static int _cuda_ctrl(hwd_context_t * ctx, int code, _papi_int_option_t * option) { SUBDBG("Entering\n"); (void) ctx; @@ -1453,7 +1575,7 @@ return (PAPI_OK); } -/* +/* * This function has to set the bits needed to count different domains * In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER * By default return PAPI_EINVAL if none of those are specified @@ -1463,7 +1585,7 @@ * PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) * PAPI_DOM_ALL is all of the domains */ -static int papicuda_set_domain(hwd_control_state_t * ctrl, int domain) +static int _cuda_set_domain(hwd_control_state_t * ctrl, int domain) { SUBDBG("Entering\n"); (void) ctrl; @@ -1479,7 +1601,7 @@ * @param EventCode is the event of interest * @param modifier is one of PAPI_ENUM_FIRST, PAPI_ENUM_EVENTS */ -static int papicuda_ntv_enum_events(unsigned int *EventCode, int modifier) +static int _cuda_ntv_enum_events(unsigned int *EventCode, int modifier) { // SUBDBG( "Entering (get next event after %u)\n", *EventCode ); switch (modifier) { @@ -1488,11 +1610,14 @@ return (PAPI_OK); break; case PAPI_ENUM_EVENTS: - if(*EventCode < global_papicuda_context->availEventSize - 1) { + if (global_cuda_context == NULL) { + return (PAPI_ENOEVNT); + } else if (*EventCode < global_cuda_context->availEventSize - 1) { *EventCode = *EventCode + 1; return (PAPI_OK); - } else + } else { return (PAPI_ENOEVNT); + } break; default: return (PAPI_EINVAL); @@ -1506,12 +1631,12 @@ * @param name is a pointer for the name to be copied to * @param len is the size of the name string */ -static int papicuda_ntv_code_to_name(unsigned int EventCode, char *name, int len) +static int _cuda_ntv_code_to_name(unsigned int EventCode, char *name, int len) { // SUBDBG( "Entering EventCode %d\n", EventCode ); unsigned int index = EventCode; - papicuda_context_t *gctxt = global_papicuda_context; - if(index < gctxt->availEventSize) { + cuda_context_t *gctxt = global_cuda_context; + if(gctxt != NULL && index < gctxt->availEventSize) { strncpy(name, gctxt->availEventDesc[index].name, len); } else { return (PAPI_EINVAL); @@ -1526,12 +1651,12 @@ * @param descr is a pointer for the description to be copied to * @param len is the size of the descr string */ -static int papicuda_ntv_code_to_descr(unsigned int EventCode, char *name, int len) +static int _cuda_ntv_code_to_descr(unsigned int EventCode, char *name, int len) { // SUBDBG( "Entering\n" ); unsigned int index = EventCode; - papicuda_context_t *gctxt = global_papicuda_context; - if(index < gctxt->availEventSize) { + cuda_context_t *gctxt = global_cuda_context; + if(gctxt != NULL && index < gctxt->availEventSize) { strncpy(name, gctxt->availEventDesc[index].description, len); } else { return (PAPI_EINVAL); @@ -1564,53 +1689,53 @@ , /* sizes of framework-opaque component-private structures... these are all unused in this component */ .size = { - .context = 1, /* sizeof( papicuda_context_t ), */ - .control_state = 1, /* sizeof( papicuda_control_t ), */ - .reg_value = 1, /* sizeof( papicuda_register_t ), */ - .reg_alloc = 1, /* sizeof( papicuda_reg_alloc_t ), */ + .context = 1, /* sizeof( cuda_context_t ), */ + .control_state = 1, /* sizeof( cuda_control_t ), */ + .reg_value = 1, /* sizeof( cuda_register_t ), */ + .reg_alloc = 1, /* sizeof( cuda_reg_alloc_t ), */ } , /* function pointers in this component */ - .start = papicuda_start, /* ( hwd_context_t * ctx, hwd_control_state_t * ctrl ) */ - .stop = papicuda_stop, /* ( hwd_context_t * ctx, hwd_control_state_t * ctrl ) */ - .read = papicuda_read, /* ( hwd_context_t * ctx, hwd_control_state_t * ctrl, long_long ** events, int flags ) */ - .reset = papicuda_reset, /* ( hwd_context_t * ctx, hwd_control_state_t * ctrl ) */ - .cleanup_eventset = papicuda_cleanup_eventset, /* ( hwd_control_state_t * ctrl ) */ - - .init_component = papicuda_init_component, /* ( int cidx ) */ - .init_thread = papicuda_init_thread, /* ( hwd_context_t * ctx ) */ - .init_control_state = papicuda_init_control_state, /* ( hwd_control_state_t * ctrl ) */ - .update_control_state = papicuda_update_control_state, /* ( hwd_control_state_t * ptr, NativeInfo_t * native, int count, hwd_context_t * ctx ) */ - - .ctl = papicuda_ctrl, /* ( hwd_context_t * ctx, int code, _papi_int_option_t * option ) */ - .set_domain = papicuda_set_domain, /* ( hwd_control_state_t * cntrl, int domain ) */ - .ntv_enum_events = papicuda_ntv_enum_events, /* ( unsigned int *EventCode, int modifier ) */ - .ntv_code_to_name = papicuda_ntv_code_to_name, /* ( unsigned int EventCode, char *name, int len ) */ - .ntv_code_to_descr = papicuda_ntv_code_to_descr, /* ( unsigned int EventCode, char *name, int len ) */ - .shutdown_thread = papicuda_shutdown_thread, /* ( hwd_context_t * ctx ) */ - .shutdown_component = papicuda_shutdown_component, /* ( void ) */ + .start = _cuda_start, /* ( hwd_context_t * ctx, hwd_control_state_t * ctrl ) */ + .stop = _cuda_stop, /* ( hwd_context_t * ctx, hwd_control_state_t * ctrl ) */ + .read = _cuda_read, /* ( hwd_context_t * ctx, hwd_control_state_t * ctrl, long_long ** events, int flags ) */ + .reset = _cuda_reset, /* ( hwd_context_t * ctx, hwd_control_state_t * ctrl ) */ + .cleanup_eventset = _cuda_cleanup_eventset, /* ( hwd_control_state_t * ctrl ) */ + + .init_component = _cuda_init_component, /* ( int cidx ) */ + .init_thread = _cuda_init_thread, /* ( hwd_context_t * ctx ) */ + .init_control_state = _cuda_init_control_state, /* ( hwd_control_state_t * ctrl ) */ + .update_control_state = _cuda_update_control_state, /* ( hwd_control_state_t * ptr, NativeInfo_t * native, int count, hwd_context_t * ctx ) */ + + .ctl = _cuda_ctrl, /* ( hwd_context_t * ctx, int code, _papi_int_option_t * option ) */ + .set_domain = _cuda_set_domain, /* ( hwd_control_state_t * cntrl, int domain ) */ + .ntv_enum_events = _cuda_ntv_enum_events, /* ( unsigned int *EventCode, int modifier ) */ + .ntv_code_to_name = _cuda_ntv_code_to_name, /* ( unsigned int EventCode, char *name, int len ) */ + .ntv_code_to_descr = _cuda_ntv_code_to_descr, /* ( unsigned int EventCode, char *name, int len ) */ + .shutdown_thread = _cuda_shutdown_thread, /* ( hwd_context_t * ctx ) */ + .shutdown_component = _cuda_shutdown_component, /* ( void ) */ }; //------------------------------------------------------------------------------------------------- -// This routine is an adaptation from 'readMetricValue' in nvlink_bandwidth_cupti_only.cu; where -// it is shown to work. Note that a metric can consist of more than one event, so the number of +// This routine is an adaptation from 'readMetricValue' in nvlink_bandwidth_cupti_only.cu; where +// it is shown to work. Note that a metric can consist of more than one event, so the number of // events and the number of metrics does not have to match. -// 'eventGroup' should contain the events needed to read the +// 'eventGroup' should contain the events needed to read the // 'numEvents' is the number of events needed to read to compute the metrics. -// 'metricId' is the array of METRICS, and +// 'metricId' is the array of METRICS, and // 'numMetrics" is the number of them, and also applies to the arrays 'values' and 'myKinds'. -// 'dev is the CUDevice needed to compute the metric. We don't need to switch the context, that is +// 'dev is the CUDevice needed to compute the metric. We don't need to switch the context, that is // already done by the caller so we are pointing at the correct context. //------------------------------------------------------------------------------------------------- -void readMetricValue(CUpti_EventGroup eventGroup, +void readMetricValue(CUpti_EventGroup eventGroup, uint32_t numEvents, // array COLS in results, - uint64_t numTotalInstances, // array ROWS in results, + uint64_t numTotalInstances, // array ROWS in results, CUdevice dev, // current Device structure. uint32_t numMetrics, CUpti_MetricID *metricId, - CUpti_MetricValueKind *myKinds, + CUpti_MetricValueKind *myKinds, long long int *values, - uint64_t timeDuration) + uint64_t timeDuration) { size_t bufferSizeBytes, numCountersRead; uint64_t *eventValueArray = NULL; @@ -1631,9 +1756,9 @@ aggrEventValueArraySize = sizeof(uint64_t) * numEvents; - CUPTI_CALL( (*cuptiEventGroupReadAllEvents) + CUPTI_CALL( (*cuptiEventGroupReadAllEventsPtr) (eventGroup, CUPTI_EVENT_READ_FLAG_NONE, &bufferSizeBytes, - eventValueArray, &arraySizeBytes, eventIdArray, &numCountersRead), + eventValueArray, &arraySizeBytes, eventIdArray, &numCountersRead), return); // Arrangement of 2-d Array returned in eventValueArray: @@ -1652,13 +1777,13 @@ // After aggregation, we use the data to compose the metrics. for (i = 0; i < numMetrics; i++) { CUpti_MetricValue metricValue; - CUPTI_CALL( (*cuptiMetricGetValue) - (dev, metricId[i], arraySizeBytes, eventIdArray, - aggrEventValueArraySize, aggrEventValueArray, - timeDuration, &metricValue), + CUPTI_CALL( (*cuptiMetricGetValuePtr) + (dev, metricId[i], arraySizeBytes, eventIdArray, + aggrEventValueArraySize, aggrEventValueArray, + timeDuration, &metricValue), return); - papicuda_convert_metric_value_to_long_long(metricValue, myKinds[i], &values[i]); + _cuda_convert_metric_value_to_long_long(metricValue, myKinds[i], &values[i]); } free(eventValueArray); diff -Nru papi-5.7.0+dfsg/src/components/cuda/README papi-6.0.0~dfsg/src/components/cuda/README --- papi-5.7.0+dfsg/src/components/cuda/README 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/cuda/README 2020-03-04 15:56:57.000000000 +0000 @@ -29,12 +29,12 @@ General information ------------------- -The PAPI CUDA component is a hardware performance counter -measurement technology for the NVIDIA CUDA platform which provides -access to the hardware counters inside the GPU. PAPI CUDA is based on -CUPTI support in the NVIDIA driver library. In any environment where -the CUPTI-enabled driver is installed, the PAPI CUDA component should -be able to provide detailed performance counter information regarding +The PAPI CUDA component is a hardware performance counter measurement +technology for the NVIDIA CUDA platform which provides access to the +hardware counters inside the GPU. PAPI CUDA is based on CUPTI support +in the NVIDIA driver library. In any environment where the +CUPTI-enabled driver is installed, the PAPI CUDA component should be +able to provide detailed performance counter information regarding events on the GPU kernels. NOTE: When adding CUDA related events or metrics to the CUDA @@ -47,50 +47,60 @@ PAPI_shutdown() in the application. This is important since it also frees the performance monitoring hardware on the GPU. +This PAPI CUDA component has been developed and tested using CUDA +version 10.1 and the associated CUPTI library. CUPTI is released with +the CUDA Tools SDK. How to install PAPI with the CUDA component? -------------------------------------------- -This PAPI CUDA component has been developed and tested using CUDA -version 8.0 and the associated CUPTI library. CUPTI is released with -the CUDA Tools SDK. +There is ONE required environment variable: PAPI_CUDA_ROOT. This is +required for both compiling, and at runtime. -This component uses the CUDA location from the environment (or looks -for /opt/cuda by default). Please set CUDA_DIR and CUPTI_DIR during -builds so that the component can find the required header files. - -Configure PAPI with CUDA enabled. - % cd src - % ./configure --prefix=some_location --with-components="cuda" - -Build with CUDA_DIR and CUPTI_DIR specified - % export CUDA_DIR=/opt/cuda - % export CUPTI_DIR=/opt/cuda/extras/CUPTI - % make - -Testing the component requires that libraries for PAPI, CUDA, CUPTI -can be found or are statically linked in to the executable. You may -need to add the library directories; examples are shown here. +An example that works on ICL's Saturn system (at this writing): +export PAPI_CUDA_ROOT=/usr/local/cuda-10.1 - % export LD_LIBRARY_PATH=${CUDA_DIR}/lib64:${CUPTI_DIR}/lib64:${LD_LIBRARY_PATH} +Within PAPI_CUDA_ROOT, we expect the following standard directories: +PAPI_CUDA_ROOT/include +PAPI_CUDA_ROOT/lib64 +PAPI_CUDA_ROOT/extras/CUPTI/include +PAPI_CUDA_ROOT/extras/CUPTI/lib64 + +For a standard installed system, this is the only environment variable +required for both compile and runtime. + +System configurations can vary. Some systems use Spack, a package +manager, to automatically keep paths straight. Others (like our own +ICL Saturn System) require "module load" commands to provide some +services, e.g. 'module load cuda-10.1', and these may also set +environment variables and change the LD_LIBRARY_PATH search order. + +Users may require the help of sysadmin personnel to navigate these +facilities and gain access to the correct libraries. + +Configure PAPI with CUDA enabled. We presume you have navigated to the +directory papi/src. In that directory: + % ./configure --with-components="cuda" -Note libraries may be found in different places on different systems; the point -is that we need an environment variable $CUDA_DIR (with the underscore), and -the CUPTI directory in the $LD_LIBRARY_PATH. +Build with PAPI_CUDA_ROOT specified (ICL's Saturn example again): + % export PAPI_CUDA_ROOT=/usr/local/cuda-10.1 + % make -Before starting working with the cuda component, verify it's active by -running: - % ./papi_component_avail" -and check if it listed under the "Active Components" list. +TESTING the component is installed: Still from papi/src: + % utils/papi_component_avail -Test by running from the src directory - % ./components/cuda/tests/simpleMultiGPU +For the CUDA component to be operational, it must find the dynamic +libraries libcuda.so, libcudart.so, and libcupti.so. -For general information on how to create and run components, the user -is referred to the INSTALL.txt section "CREATING AND RUNNING -COMPONENTS". +If any of these are not found (or are not functional) then the +component will be listed as "disabled" with a reason explaining the +problem. If libraries were not found, then they are not in the +expected places. The component can be configured to look for each of +these libraries in a specific place, and using an alternate name if +desired. Detailed instructions are contained in the Rules.cuda file. +They are technical, users may wish to enlist the help of a sysadmin. -To find a list of CUDA supported events. +To find a list of CUDA supported events: % utils/papi_native_avail | grep -i CUDA */ diff -Nru papi-5.7.0+dfsg/src/components/cuda/Rules.cuda papi-6.0.0~dfsg/src/components/cuda/Rules.cuda --- papi-5.7.0+dfsg/src/components/cuda/Rules.cuda 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/cuda/Rules.cuda 2020-03-04 15:56:57.000000000 +0000 @@ -1,12 +1,101 @@ -# $Id$ +# Set default if the root environment variable is not already set. +# Note PAPI_CUDA_ROOT is an environment variable that must be set. +# It is the ONLY environment variable that must be set, all other +# settings are optional. +PAPI_CUDA_ROOT ?= /opt/cuda -CUDA_DIR ?= /opt/cuda -CUPTI_DIR ?= $(CUDA_DIR)/extras/CUPTI +# For non-typical system configurations, the following 'runtime overrides' can +# be set, as just a library name, or a full path and name. There cannot be any +# spaces between the double quotes (which must be escaped as \"). An example: + +# PAPI_CUDA_CUPTI = \"$(PAPI_CUDA_ROOT)/extras/CUPTI/lib64/libcupti.so\" + +# By default, all overrides are empty strings. + +# If an override is not an empty string, it must work, or the component will be +# disabled. + +# Both at compile time and run time, the software depends on PAPI_CUDA_ROOT. +# There are three libraries used by the CUDA component, they are +# libcuda.so +# libcudart.so +# libcupti.so + +# The standard installed locations for these libraries, with overrides: +# $(PAPI_CUDA_ROOT)/lib64/libcuda.so #O.R. PAPI_CUDA_MAIN +# $(PAPI_CUDA_ROOT)/lib64/libcudart.so #O.R. PAPI_CUDA_RUNTIME +# $(PAPI_CUDA_ROOT)/extras/CUPTI/lib64/libcupti.so #O.R. PAPI_CUDA_CUPTI +# +# There are many ways to cause these paths to be known. +# Spack is a package manager used on supercomputers, Linux and MacOS. If Spack +# is aware of CUDA, it encodes the paths to the necessary libraries. + +# The environment variable LD_LIBRARY_PATH encodes a list of paths to search for +# libraries; separated by a colon (:). These paths could be added to +# LD_LIBRARY_PATH. +# +# Warning: LD_LIBRARY_PATH often contains a list of directories that are +# searched for libraries, some of these may be needed by other packages you are +# using. Always add to LD_LIBRARY_PATH recursively; for example: +# >export LD_LIBRARY_PATH=someNewLibraryDirectory:$LD_LIBRARY_PATH +# which would append the existing LD_LIBRARY_PATH to the new directory you wish +# to add. Alternatively, you can prepend it: +# >export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:someNewLibraryDirectory +# Which will search the existing libraries first, then your new directory. + +# You can check on the value of LD_LIBRARY_PATH with +# >echo $LD_LIBRARY_PATH + +# There may be other package managers or utilities, for example on a system with +# modules; the command 'module load cuda' may modify LD_LIBRARY_PATH. + +# A Linux system will also search for libraries by default in the directories +# listed by /etc/ld.so.conf, and /usr/lib64, /lib64, /usr/lib, /lib. + +# OVERRIDES: These are by default empty strings (""), if set they must work. +PAPI_CUDA_MAIN = \"\" +PAPI_CUDA_RUNTIME = \"\" +PAPI_CUDA_CUPTI = \"\" + +# An example of an override: +# PAPI_CUDA_CUPTI = \"$(PAPI_CUDA_ROOT)/extras/CUPTI/lib64/libcupti.so\" + +# Note: PAPI_CUDA_MAIN and PAPI_CUDA_RUNTIME, if set, will also apply to the +# NVML component, which uses the same libraries as CUDA. + +# Note: If you change these overrides, PAPI should be rebuilt from scratch. +# From papi/src/ +# make clobber +# ./configure --with-components="nvml" +# make + +# OPERATION, per library: +# 1) If an override string is not empty, we will use it explicitly and fail if +# it does not work. This means disabling the component; a reason for disabling +# is shown using the papi utility, papi/src/utils/papi_component_avail + +# 2) We will attempt to open the library using the normal system library search +# paths; if Spack is present and configured correctly it should deliver the +# proper library. A failure here will be silent; we will proceed to (3). + +# 3) If that fails, we will try to find the library in the standard installed +# locations listed above. If this fails, we disable the component, the reason +# for disabling is shown using the papi utility, +# papi/src/utils/papi_component_avail. + +# DEFFLAGS is the macro defines for the three overrides. In the code we convert +# these to string variables with the following lines: +# static char cuda_main[]=PAPI_CUDA_MAIN; +# static char cuda_runtime[]=PAPI_CUDA_RUNTIME; +# static char cuda_cupti[]=PAPI_CUDA_CUPTI; + +CUDA_MACS = -DPAPI_CUDA_MAIN=$(PAPI_CUDA_MAIN) -DPAPI_CUDA_RUNTIME=$(PAPI_CUDA_RUNTIME) -DPAPI_CUDA_CUPTI=$(PAPI_CUDA_CUPTI) COMPSRCS += components/cuda/linux-cuda.c COMPOBJS += linux-cuda.o -CFLAGS += -I$(CUDA_DIR)/include -I$(CUPTI_DIR)/include -g -LDFLAGS += -L$(CUPTI_DIR)/lib64 -lcupti $(LDL) -Wl,-rpath=$(CUPTI_DIR)/lib64 -g +# CFLAGS specifies compile flags; need include files here, and macro defines. +CFLAGS += -I$(PAPI_CUDA_ROOT)/include -I$(PAPI_CUDA_ROOT)/extras/CUPTI/include -g $(CUDA_MACS) +LDFLAGS += $(LDL) -g linux-cuda.o: components/cuda/linux-cuda.c $(HEADERS) cuda_sampling $(CC) -E $(LIBCFLAGS) $(OPTFLAGS) -c components/cuda/linux-cuda.c -o linux-cuda.pre diff -Nru papi-5.7.0+dfsg/src/components/cuda/sampling/Makefile papi-6.0.0~dfsg/src/components/cuda/sampling/Makefile --- papi-5.7.0+dfsg/src/components/cuda/sampling/Makefile 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/cuda/sampling/Makefile 2020-03-04 15:56:57.000000000 +0000 @@ -1,10 +1,13 @@ -# include ../Makefile.cuda - -CUDA_DIR ?= /opt/cuda -CUPTI_DIR ?= $(CUDA_DIR)/extras/CUPTI -CFG = -I$(CUDA_DIR)/include -I$(CUPTI_DIR)/include -LDG = -L$(CUDA_DIR)/lib64 -L$(CUDA_DIR)/lib64/stubs -L$(CUPTI_DIR)/lib64 -NVCC = $(CUDA_DIR)/bin/nvcc +# Set defaults if these environment variables are not set yet. +PAPI_CUDA_ROOT ?= /opt/cuda +PAPI_CUDA_INC ?= $(PAPI_CUDA_ROOT)/include +PAPI_CUPTI_INC ?= $(PAPI_CUDA_ROOT)/extras/CUPTI/include +PAPI_CUDA_LIBS ?= $(PAPI_CUDA_ROOT)/lib64 +PAPI_CUDA_STUBS ?= $(PAPI_CUDA_ROOT)/lib64/stubs +PAPI_CUPTI_LIBS ?= $(PAPI_CUDA_ROOT)/extras/CUPTI/lib64 +CFG = -I$(PAPI_CUDA_INC) -I$(PAPI_CUPTI_INC) +LDG = -L$(PAPI_CUDA_LIBS) -L$(PAPI_CUDA_STUBS) -L$(PAPI_CUPTI_LIBS) +NVCC = $(PAPI_CUDA_ROOT)/bin/nvcc all: @make lib diff -Nru papi-5.7.0+dfsg/src/components/cuda/tests/Makefile papi-6.0.0~dfsg/src/components/cuda/tests/Makefile --- papi-5.7.0+dfsg/src/components/cuda/tests/Makefile 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/cuda/tests/Makefile 2020-03-04 15:56:57.000000000 +0000 @@ -1,18 +1,18 @@ NAME=cuda include ../../Makefile_comp_tests.target -CUDA_DIR ?= /opt/cuda -CUPTI_DIR ?= $(CUDA_DIR)/extras/CUPTI -CUDRV_DIR ?= $(CUDA_DIR) +PAPI_CUDA_ROOT ?= /opt/cuda +PAPI_CUPTI_ROOT ?= $(PAPI_CUDA_ROOT)/extras/CUPTI +CUDRV_DIR ?= $(PAPI_CUDA_ROOT) TESTS = HelloWorld simpleMultiGPU simpleMultiGPU_no_counters cuda_tests: $(TESTS) -CUDA_DIR ?= $(CUDA_PATH) -NVCC = $(CUDA_DIR)/bin/nvcc +PAPI_CUDA_ROOT ?= $(CUDA_PATH) +NVCC = $(PAPI_CUDA_ROOT)/bin/nvcc NVCFLAGS = -g -ccbin='$(CC)' -INCLUDE += -I$(CUDA_DIR)/include -I$(CUPTI_DIR)/include -CUDALIBS = -L$(CUDRV_DIR)/lib64 -L$(CUDA_DIR)/lib64 -L$(CUDA_DIR)/lib64/stubs -L$(CUPTI_DIR)/lib64 -lcudart -lcupti -lcuda +INCLUDE += -I$(PAPI_CUDA_ROOT)/include -I$(PAPI_CUPTI_ROOT)/include +CUDALIBS = -L$(CUDRV_DIR)/lib64 -L$(PAPI_CUDA_ROOT)/lib64 -L$(PAPI_CUDA_ROOT)/lib64/stubs -L$(PAPI_CUPTI_ROOT)/lib64 -lcudart -lcupti -lcuda PAPILIB += -L../../../libpfm4/lib -lpfm default: $(TESTS) diff -Nru papi-5.7.0+dfsg/src/components/infiniband/linux-infiniband.c papi-6.0.0~dfsg/src/components/infiniband/linux-infiniband.c --- papi-5.7.0+dfsg/src/components/infiniband/linux-infiniband.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/infiniband/linux-infiniband.c 2020-03-04 15:56:57.000000000 +0000 @@ -46,8 +46,8 @@ /** Structure that stores private information of each event */ typedef struct infiniband_register { - /* This is used by the framework.It likes it to be !=0 to do somehting */ - unsigned int selector; + /* This is used by the framework.It likes it to be !=0 to do somehting */ + unsigned int selector; } infiniband_register_t; /* @@ -58,18 +58,18 @@ typedef struct _ib_device_type { - char* dev_name; - int dev_port; - struct _ib_device_type *next; + char* dev_name; + int dev_port; + struct _ib_device_type *next; } ib_device_t; typedef struct _ib_counter_type { - char* ev_name; - char* ev_file_name; - ib_device_t* ev_device; - int extended; // if this is an extended (64-bit) counter - struct _ib_counter_type *next; + char* ev_name; + char* ev_file_name; + ib_device_t* ev_device; + int extended; // if this is an extended (64-bit) counter + struct _ib_counter_type *next; } ib_counter_t; static const char *ib_dir_path = "/sys/class/infiniband"; @@ -77,29 +77,29 @@ /** This structure is used to build the table of events */ typedef struct _infiniband_native_event_entry { - infiniband_register_t resources; - char *name; - char *description; - char* file_name; - ib_device_t* device; - int extended; /* if this is an extended (64-bit) counter */ + infiniband_register_t resources; + char *name; + char *description; + char* file_name; + ib_device_t* device; + int extended; /* if this is an extended (64-bit) counter */ } infiniband_native_event_entry_t; typedef struct _infiniband_control_state { - long long counts[INFINIBAND_MAX_COUNTERS]; - int being_measured[INFINIBAND_MAX_COUNTERS]; - /* all IB counters need difference, but use a flag for generality */ - int need_difference[INFINIBAND_MAX_COUNTERS]; - long long lastupdate; + long long counts[INFINIBAND_MAX_COUNTERS]; + int being_measured[INFINIBAND_MAX_COUNTERS]; + /* all IB counters need difference, but use a flag for generality */ + int need_difference[INFINIBAND_MAX_COUNTERS]; + long long lastupdate; } infiniband_control_state_t; typedef struct _infiniband_context { - infiniband_control_state_t state; - long long start_value[INFINIBAND_MAX_COUNTERS]; + infiniband_control_state_t state; + long long start_value[INFINIBAND_MAX_COUNTERS]; } infiniband_context_t; @@ -121,342 +121,489 @@ static ib_device_t *root_device = 0; static ib_counter_t *root_counter = 0; -static char* + static char* make_ib_event_description(const char* input_str, int extended) { - int i, len; - char *desc = 0; - if (! input_str) - return (0); - - desc = (char*) papi_calloc(PAPI_MAX_STR_LEN, 1); - if (desc == 0) { - PAPIERROR("cannot allocate memory for event description"); - return (0); - } - len = strlen(input_str); - - snprintf(desc, PAPI_MAX_STR_LEN, "%s (%s).", - input_str, (extended ? "free-running 64bit counter" : - "overflowing, auto-resetting counter")); - desc[0] = toupper(desc[0]); - for (i=0 ; idev_name = strdup(name); - new_dev->dev_port = port; - if (new_dev->dev_name==0) - { - PAPIERROR("cannot allocate memory for device internal fields"); - papi_free(new_dev); - return (0); - } - - // prepend the new device to the device list - new_dev->next = root_device; - root_device = new_dev; - - return (new_dev); + ib_device_t *new_dev = (ib_device_t*) papi_calloc(sizeof(ib_device_t), 1); + if (new_dev == 0) { + PAPIERROR("cannot allocate memory for new IB device structure"); + return (0); + } + + new_dev->dev_name = strdup(name); + new_dev->dev_port = port; + if (new_dev->dev_name==0) + { + PAPIERROR("cannot allocate memory for device internal fields"); + papi_free(new_dev); + return (0); + } + + // prepend the new device to the device list + new_dev->next = root_device; + root_device = new_dev; + + return (new_dev); } -static ib_counter_t* + static ib_counter_t* add_ib_counter(const char* name, const char* file_name, int extended, ib_device_t *device) { - ib_counter_t *new_cnt = (ib_counter_t*) papi_calloc(sizeof(ib_counter_t), 1); - if (new_cnt == 0) { - PAPIERROR("cannot allocate memory for new IB counter structure"); - return (0); - } - - new_cnt->ev_name = strdup(name); - new_cnt->ev_file_name = strdup(file_name); - new_cnt->extended = extended; - new_cnt->ev_device = device; - if (new_cnt->ev_name==0 || new_cnt->ev_file_name==0) - { - PAPIERROR("cannot allocate memory for counter internal fields"); - papi_free(new_cnt); - return (0); - } - - // prepend the new counter to the counter list - new_cnt->next = root_counter; - root_counter = new_cnt; - - return (new_cnt); + ib_counter_t *new_cnt = (ib_counter_t*) papi_calloc(sizeof(ib_counter_t), 1); + if (new_cnt == 0) { + PAPIERROR("cannot allocate memory for new IB counter structure"); + return (0); + } + + new_cnt->ev_name = strdup(name); + new_cnt->ev_file_name = strdup(file_name); + new_cnt->extended = extended; + new_cnt->ev_device = device; + if (new_cnt->ev_name==0 || new_cnt->ev_file_name==0) + { + PAPIERROR("cannot allocate memory for counter internal fields"); + papi_free(new_cnt); + return (0); + } + + // prepend the new counter to the counter list + new_cnt->next = root_counter; + root_counter = new_cnt; + + return (new_cnt); } -static int + static int find_ib_device_events(ib_device_t *dev, int extended) { - int nevents = 0; - DIR *cnt_dir = NULL; - char counters_path[128]; - - if ( extended ) { - /* mofed driver version <4.0 */ - snprintf(counters_path, sizeof(counters_path), "%s/%s/ports/%d/counters%s", - ib_dir_path, dev->dev_name, dev->dev_port, (extended?"_ext":"")); - - cnt_dir = opendir(counters_path); - if (cnt_dir == NULL) { - /* directory counters_ext in sysfs fs has changed to hw_counters */ - /* in 4.0 version of mofed driver */ - SUBDBG("cannot open counters directory `%s'\n", counters_path); - - snprintf(counters_path, sizeof(counters_path), "%s/%s/ports/%d/%scounters", - ib_dir_path, dev->dev_name, dev->dev_port, "hw_"); - - cnt_dir = opendir(counters_path); - } - } - else { - snprintf(counters_path, sizeof(counters_path), "%s/%s/ports/%d/counters", - ib_dir_path, dev->dev_name, dev->dev_port); - cnt_dir = opendir(counters_path); - } - - if (cnt_dir == NULL) { - SUBDBG("cannot open counters directory `%s'\n", counters_path); - goto out; - } - - struct dirent *ev_ent; - /* iterate over all the events */ - while ((ev_ent = readdir(cnt_dir)) != NULL) { - char *ev_name = ev_ent->d_name; - long long value = -1; - char event_path[160]; - char counter_name[80]; - - if (ev_name[0] == '.') - continue; - - /* Check that we can read an integer from the counter file */ - snprintf(event_path, sizeof(event_path), "%s/%s", counters_path, ev_name); - if (pscanf(event_path, "%lld", &value) != 1) { - SUBDBG("cannot read value for event '%s'\n", ev_name); - continue; - } - - /* Create new counter */ - snprintf(counter_name, sizeof(counter_name), "%s_%d%s:%s", - dev->dev_name, dev->dev_port, (extended?"_ext":""), ev_name); - if (add_ib_counter(counter_name, ev_name, extended, dev)) - { - SUBDBG("Added new counter `%s'\n", counter_name); - nevents += 1; - } - } - - out: - if (cnt_dir != NULL) - closedir(cnt_dir); + int nevents = 0; + DIR *cnt_dir = NULL; + char counters_path[128]; + + if ( extended ) { + /* mofed driver version <4.0 */ + snprintf(counters_path, sizeof(counters_path), "%s/%s/ports/%d/counters_ext", + ib_dir_path, dev->dev_name, dev->dev_port); + + cnt_dir = opendir(counters_path); + if (cnt_dir == NULL) { + /* directory counters_ext in sysfs fs has changed to hw_counters */ + /* in 4.0 version of mofed driver */ + SUBDBG("cannot open counters directory `%s'\n", counters_path); + + snprintf(counters_path, sizeof(counters_path), "%s/%s/ports/%d/%scounters", + ib_dir_path, dev->dev_name, dev->dev_port, "hw_"); + + cnt_dir = opendir(counters_path); + } + } + else { + snprintf(counters_path, sizeof(counters_path), "%s/%s/ports/%d/counters", + ib_dir_path, dev->dev_name, dev->dev_port); + cnt_dir = opendir(counters_path); + } + + if (cnt_dir == NULL) { + SUBDBG("cannot open counters directory `%s'\n", counters_path); + goto out; + } + + struct dirent *ev_ent; + /* iterate over all the events */ + while ((ev_ent = readdir(cnt_dir)) != NULL) { + char *ev_name = ev_ent->d_name; + long long value = -1; + char event_path[FILENAME_MAX]; + char counter_name[512]; + + if (ev_name[0] == '.') + continue; + + /* Check that we can read an integer from the counter file */ + snprintf(event_path, sizeof(event_path), "%s/%s", counters_path, ev_name); + if (pscanf(event_path, "%lld", &value) != 1) { + SUBDBG("cannot read value for event '%s'\n", ev_name); + continue; + } + + /* Create new counter */ + snprintf(counter_name, sizeof(counter_name), "%s_%d%s:%s", + dev->dev_name, dev->dev_port, (extended?"_ext":""), ev_name); + if (add_ib_counter(counter_name, ev_name, extended, dev)) + { + SUBDBG("Added new counter `%s'\n", counter_name); + nevents += 1; + } + } + +out: + if (cnt_dir != NULL) + closedir(cnt_dir); - return (nevents); + return (nevents); } -static int + static int find_ib_devices() { - DIR *ib_dir = NULL; - int result = PAPI_OK; - num_events = 0; - - ib_dir = opendir(ib_dir_path); - if (ib_dir == NULL) { - SUBDBG("cannot open `%s'\n", ib_dir_path); - strncpy(_infiniband_vector.cmp_info.disabled_reason, - "Infiniband sysfs interface not found", PAPI_MAX_STR_LEN); - result = PAPI_ENOSUPP; - goto out; - } - - struct dirent *hca_ent; - while ((hca_ent = readdir(ib_dir)) != NULL) { - char *hca = hca_ent->d_name; - char ports_path[80]; - DIR *ports_dir = NULL; - - if (hca[0] == '.') - goto next_hca; - - snprintf(ports_path, sizeof(ports_path), "%s/%s/ports", ib_dir_path, hca); - ports_dir = opendir(ports_path); - if (ports_dir == NULL) { - SUBDBG("cannot open `%s'\n", ports_path); - goto next_hca; - } - - struct dirent *port_ent; - while ((port_ent = readdir(ports_dir)) != NULL) { - int port = atoi(port_ent->d_name); - if (port <= 0) - continue; - - /* Check that port is active. .../HCA/ports/PORT/state should read "4: ACTIVE." */ - int state = -1; - char state_path[80]; - snprintf(state_path, sizeof(state_path), "%s/%s/ports/%d/state", ib_dir_path, hca, port); - if (pscanf(state_path, "%d", &state) != 1) { - SUBDBG("cannot read state of IB HCA `%s' port %d\n", hca, port); - continue; + DIR *ib_dir = NULL; + int result = PAPI_OK; + num_events = 0; + + ib_dir = opendir(ib_dir_path); + if (ib_dir == NULL) { + SUBDBG("cannot open `%s'\n", ib_dir_path); + strncpy(_infiniband_vector.cmp_info.disabled_reason, + "Infiniband sysfs interface not found", PAPI_MAX_STR_LEN); + result = PAPI_ENOSUPP; + goto out; + } + + struct dirent *hca_ent; + while ((hca_ent = readdir(ib_dir)) != NULL) { + char *hca = hca_ent->d_name; + char ports_path[FILENAME_MAX]; + DIR *ports_dir = NULL; + + if (hca[0] == '.') + goto next_hca; + + snprintf(ports_path, sizeof(ports_path), "%s/%s/ports", ib_dir_path, hca); + ports_dir = opendir(ports_path); + if (ports_dir == NULL) { + SUBDBG("cannot open `%s'\n", ports_path); + goto next_hca; } - if (state != 4) { - SUBDBG("skipping inactive IB HCA `%s', port %d, state %d\n", hca, port, state); - continue; + struct dirent *port_ent; + while ((port_ent = readdir(ports_dir)) != NULL) { + int port = atoi(port_ent->d_name); + if (port <= 0) + continue; + + /* Check that port is active. .../HCA/ports/PORT/state should read "4: ACTIVE." */ + int state = -1; + char state_path[FILENAME_MAX]; + snprintf(state_path, sizeof(state_path), "%s/%s/ports/%d/state", ib_dir_path, hca, port); + if (pscanf(state_path, "%d", &state) != 1) { + SUBDBG("cannot read state of IB HCA `%s' port %d\n", hca, port); + continue; + } + + if (state != 4) { + SUBDBG("skipping inactive IB HCA `%s', port %d, state %d\n", hca, port, state); + continue; + } + + /* Create dev name (HCA/PORT) and get stats for dev. */ + SUBDBG("Found IB device `%s', port %d\n", hca, port); + ib_device_t *dev = add_ib_device(hca, port); + if (!dev) + continue; + // do we want to check for short counters only if no extended counters found? + num_events += find_ib_device_events(dev, 1); // check if we have extended (64bit) counters + num_events += find_ib_device_events(dev, 0); // check also for short counters } - /* Create dev name (HCA/PORT) and get stats for dev. */ - SUBDBG("Found IB device `%s', port %d\n", hca, port); - ib_device_t *dev = add_ib_device(hca, port); - if (!dev) - continue; - // do we want to check for short counters only if no extended counters found? - num_events += find_ib_device_events(dev, 1); // check if we have extended (64bit) counters - num_events += find_ib_device_events(dev, 0); // check also for short counters - } - - next_hca: - if (ports_dir != NULL) - closedir(ports_dir); - } - - if (root_device == 0) // no active devices found - { - strncpy(_infiniband_vector.cmp_info.disabled_reason, - "No active Infiniband ports found", PAPI_MAX_STR_LEN); - result = PAPI_ENOIMPL; - } else if (num_events == 0) - { - strncpy(_infiniband_vector.cmp_info.disabled_reason, - "No supported Infiniband events found", PAPI_MAX_STR_LEN); - result = PAPI_ENOIMPL; - } else - { - // Events are stored in a linked list, in reverse order than how I found them - // Revert them again, so that they are in finding order, not that it matters. - int i = num_events - 1; - // now allocate memory to store the counters into the native table - infiniband_native_events = (infiniband_native_event_entry_t*) - papi_calloc(num_events, sizeof(infiniband_native_event_entry_t)); - ib_counter_t *iter = root_counter; - while (iter != 0) - { - infiniband_native_events[i].name = iter->ev_name; - infiniband_native_events[i].file_name = iter->ev_file_name; - infiniband_native_events[i].device = iter->ev_device; - infiniband_native_events[i].extended = iter->extended; - infiniband_native_events[i].resources.selector = i + 1; - infiniband_native_events[i].description = - make_ib_event_description(iter->ev_file_name, iter->extended); - - ib_counter_t *tmp = iter; - iter = iter->next; - papi_free(tmp); - -- i; - } - root_counter = 0; - } - - out: - if (ib_dir != NULL) - closedir(ib_dir); - - return (result); +next_hca: + if (ports_dir != NULL) + closedir(ports_dir); + } + + if (root_device == 0) // no active devices found + { + strncpy(_infiniband_vector.cmp_info.disabled_reason, + "No active Infiniband ports found", PAPI_MAX_STR_LEN); + result = PAPI_ENOIMPL; + } else if (num_events == 0) + { + strncpy(_infiniband_vector.cmp_info.disabled_reason, + "No supported Infiniband events found", PAPI_MAX_STR_LEN); + result = PAPI_ENOIMPL; + } else + { + // Events are stored in a linked list, in reverse order than how I found them + // Revert them again, so that they are in finding order, not that it matters. + int i = num_events - 1; + // now allocate memory to store the counters into the native table + infiniband_native_events = (infiniband_native_event_entry_t*) + papi_calloc(num_events, sizeof(infiniband_native_event_entry_t)); + ib_counter_t *iter = root_counter; + while (iter != 0) + { + infiniband_native_events[i].name = iter->ev_name; + infiniband_native_events[i].file_name = iter->ev_file_name; + infiniband_native_events[i].device = iter->ev_device; + infiniband_native_events[i].extended = iter->extended; + infiniband_native_events[i].resources.selector = i + 1; + infiniband_native_events[i].description = + make_ib_event_description(iter->ev_file_name, iter->extended); + + ib_counter_t *tmp = iter; + iter = iter->next; + papi_free(tmp); + -- i; + } + root_counter = 0; + } + +out: + if (ib_dir != NULL) + closedir(ib_dir); + + return (result); } -static long long + static long long read_ib_counter_value(int index) { - char ev_file[128]; - char counters_path[128]; - DIR *cnt_dir = NULL; - long long value = 0ll; - infiniband_native_event_entry_t *iter = &infiniband_native_events[index]; - - if ( iter->extended ) { - /* mofed driver version <4.0 */ - snprintf(counters_path, sizeof(counters_path), "%s/%s/ports/%d/counters%s", - ib_dir_path, iter->device->dev_name, iter->device->dev_port, "_ext"); - - cnt_dir = opendir(counters_path); - if (cnt_dir == NULL) { - /* directory counters_ext in sysfs fs has changed to hw_counters */ - /* in 4.0 version of mofed driver */ - snprintf(counters_path, sizeof(counters_path), "%s/%s/ports/%d/%scounters", - ib_dir_path, iter->device->dev_name, iter->device->dev_port, "hw_"); - - cnt_dir = opendir(counters_path); - } - } - else { - snprintf(counters_path, sizeof(counters_path), "%s/%s/ports/%d/counters", - ib_dir_path, iter->device->dev_name, iter->device->dev_port ); - cnt_dir = opendir(counters_path); - } - - - if (cnt_dir != NULL) - closedir(cnt_dir); - - - snprintf(ev_file, sizeof(ev_file), "%s/%s", - counters_path, iter->file_name); - - if (pscanf(ev_file, "%lld", &value) != 1) { - PAPIERROR("cannot read value for counter '%s'\n", iter->name); - } else - { - SUBDBG("Counter '%s': %lld\n", iter->name, value); - } - return (value); + char ev_file[FILENAME_MAX]; + char counters_path[FILENAME_MAX]; + DIR *cnt_dir = NULL; + long long value = 0ll; + infiniband_native_event_entry_t *iter = &infiniband_native_events[index]; + + if ( iter->extended ) { + /* mofed driver version <4.0 */ + snprintf(counters_path, sizeof(counters_path), "%s/%s/ports/%d/counters%s", + ib_dir_path, iter->device->dev_name, iter->device->dev_port, "_ext"); + + cnt_dir = opendir(counters_path); + if (cnt_dir == NULL) { + /* directory counters_ext in sysfs fs has changed to hw_counters */ + /* in 4.0 version of mofed driver */ + snprintf(counters_path, sizeof(counters_path), "%s/%s/ports/%d/%scounters", + ib_dir_path, iter->device->dev_name, iter->device->dev_port, "hw_"); + + cnt_dir = opendir(counters_path); + } + } + else { + snprintf(counters_path, sizeof(counters_path), "%s/%s/ports/%d/counters", + ib_dir_path, iter->device->dev_name, iter->device->dev_port ); + cnt_dir = opendir(counters_path); + } + + + if (cnt_dir != NULL) + closedir(cnt_dir); + + + snprintf(ev_file, sizeof(ev_file), "%s/%s", + counters_path, iter->file_name); + + if (pscanf(ev_file, "%lld", &value) != 1) { + PAPIERROR("cannot read value for counter '%s'\n", iter->name); + } else + { + SUBDBG("Counter '%s': %lld\n", iter->name, value); + } + return (value); } -static void + static void deallocate_infiniband_resources() { - int i; - - if (infiniband_native_events) - { - for (i=0 ; idev_name) - free(iter->dev_name); - - ib_device_t *tmp = iter; - iter = iter->next; - papi_free(tmp); - } - root_device = 0; + int i; + + if (infiniband_native_events) + { + for (i=0 ; idev_name) + free(iter->dev_name); + + ib_device_t *tmp = iter; + iter = iter->next; + papi_free(tmp); + } + root_device = 0; } /***************************************************************************** @@ -466,11 +613,11 @@ /* * This is called whenever a thread is initialized */ -static int + static int _infiniband_init_thread( hwd_context_t *ctx ) { - (void) ctx; - return PAPI_OK; + (void) ctx; + return PAPI_OK; } @@ -478,28 +625,28 @@ * and get hardware information, this routine is called when the * PAPI process is initialized (IE PAPI_library_init) */ -static int + static int _infiniband_init_component( int cidx ) { - /* discover Infiniband devices and available events */ - int result = find_ib_devices(); - - if (result != PAPI_OK) // we couldn't initialize the component - { - // deallocate any eventually allocated memory - deallocate_infiniband_resources(); - } - - _infiniband_vector.cmp_info.num_native_events = num_events; + /* discover Infiniband devices and available events */ + int result = find_ib_devices(); - _infiniband_vector.cmp_info.num_cntrs = num_events; - _infiniband_vector.cmp_info.num_mpx_cntrs = num_events; + if (result != PAPI_OK) // we couldn't initialize the component + { + // deallocate any eventually allocated memory + deallocate_infiniband_resources(); + } + _infiniband_vector.cmp_info.num_native_events = num_events; - /* Export the component id */ - _infiniband_vector.cmp_info.CmpIdx = cidx; + _infiniband_vector.cmp_info.num_cntrs = num_events; + _infiniband_vector.cmp_info.num_mpx_cntrs = num_events; - return (result); + + /* Export the component id */ + _infiniband_vector.cmp_info.CmpIdx = cidx; + + return (result); } @@ -507,113 +654,113 @@ * Control of counters (Reading/Writing/Starting/Stopping/Setup) * functions */ -static int + static int _infiniband_init_control_state( hwd_control_state_t *ctl ) { - infiniband_control_state_t* control = (infiniband_control_state_t*) ctl; - int i; + infiniband_control_state_t* control = (infiniband_control_state_t*) ctl; + int i; - for (i=0 ; ibeing_measured[i] = 0; - } + for (i=0 ; ibeing_measured[i] = 0; + } - return PAPI_OK; + return PAPI_OK; } /* * */ -static int + static int _infiniband_start( hwd_context_t *ctx, hwd_control_state_t *ctl ) { - infiniband_context_t* context = (infiniband_context_t*) ctx; - infiniband_control_state_t* control = (infiniband_control_state_t*) ctl; - long long now = PAPI_get_real_usec(); - int i; - - for (i=0 ; ibeing_measured[i] && control->need_difference[i]) { - context->start_value[i] = read_ib_counter_value(i); - } - } - control->lastupdate = now; + infiniband_context_t* context = (infiniband_context_t*) ctx; + infiniband_control_state_t* control = (infiniband_control_state_t*) ctl; + long long now = PAPI_get_real_usec(); + int i; + + for (i=0 ; ibeing_measured[i] && control->need_difference[i]) { + context->start_value[i] = read_ib_counter_value(i); + } + } + control->lastupdate = now; - return PAPI_OK; + return PAPI_OK; } /* * */ -static int + static int _infiniband_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) { - infiniband_context_t* context = (infiniband_context_t*) ctx; - infiniband_control_state_t* control = (infiniband_control_state_t*) ctl; - long long now = PAPI_get_real_usec(); - int i; - long long temp; - - for (i=0 ; ibeing_measured[i]) - { - temp = read_ib_counter_value(i); - if (context->start_value[i] && control->need_difference[i]) { - /* Must subtract values, but check for wraparound. - * We cannot even detect all wraparound cases. Using the short, - * auto-resetting IB counters is error prone. - */ - if (temp < context->start_value[i]) { - SUBDBG("Wraparound!\nstart:\t%#016x\ttemp:\t%#016x", - (unsigned)context->start_value[i], (unsigned)temp); - /* The counters auto-reset. I cannot even adjust them to - * account for a simple wraparound. - * Just use the current reading of the counter, which is useless. - */ - } else - temp -= context->start_value[i]; - } - control->counts[i] = temp; - } - } - control->lastupdate = now; + infiniband_context_t* context = (infiniband_context_t*) ctx; + infiniband_control_state_t* control = (infiniband_control_state_t*) ctl; + long long now = PAPI_get_real_usec(); + int i; + long long temp; + + for (i=0 ; ibeing_measured[i]) + { + temp = read_ib_counter_value(i); + if (context->start_value[i] && control->need_difference[i]) { + /* Must subtract values, but check for wraparound. + * We cannot even detect all wraparound cases. Using the short, + * auto-resetting IB counters is error prone. + */ + if (temp < context->start_value[i]) { + SUBDBG("Wraparound!\nstart:\t%#016x\ttemp:\t%#016x", + (unsigned)context->start_value[i], (unsigned)temp); + /* The counters auto-reset. I cannot even adjust them to + * account for a simple wraparound. + * Just use the current reading of the counter, which is useless. + */ + } else + temp -= context->start_value[i]; + } + control->counts[i] = temp; + } + } + control->lastupdate = now; - return PAPI_OK; + return PAPI_OK; } /* * */ -static int + static int _infiniband_read( hwd_context_t *ctx, hwd_control_state_t *ctl, - long_long ** events, int flags ) + long_long ** events, int flags ) { - ( void ) flags; - - _infiniband_stop(ctx, ctl); /* we cannot actually stop the counters */ - /* Pass back a pointer to our results */ - *events = ((infiniband_control_state_t*) ctl)->counts; - - return PAPI_OK; + ( void ) flags; + + _infiniband_stop(ctx, ctl); /* we cannot actually stop the counters */ + /* Pass back a pointer to our results */ + *events = ((infiniband_control_state_t*) ctl)->counts; + + return PAPI_OK; } -static int + static int _infiniband_shutdown_component( void ) { - /* Cleanup resources used by this component before leaving */ - deallocate_infiniband_resources(); - - return PAPI_OK; + /* Cleanup resources used by this component before leaving */ + deallocate_infiniband_resources(); + + return PAPI_OK; } -static int + static int _infiniband_shutdown_thread( hwd_context_t *ctx ) { - ( void ) ctx; + ( void ) ctx; - return PAPI_OK; + return PAPI_OK; } @@ -622,39 +769,39 @@ * The valid codes being passed in are PAPI_SET_DEFDOM, * PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL * and PAPI_SET_INHERIT */ -static int + static int _infiniband_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option ) { - ( void ) ctx; - ( void ) code; - ( void ) option; - return PAPI_OK; + ( void ) ctx; + ( void ) code; + ( void ) option; + return PAPI_OK; } -static int + static int _infiniband_update_control_state( hwd_control_state_t *ctl, - NativeInfo_t * native, - int count, - hwd_context_t *ctx ) -{ - int i, index; - ( void ) ctx; - - infiniband_control_state_t* control = (infiniband_control_state_t*) ctl; - - for (i=0 ; ibeing_measured[i] = 0; - } - - for (i=0 ; ibeing_measured[index] = 1; - control->need_difference[index] = 1; - } - return PAPI_OK; + NativeInfo_t * native, + int count, + hwd_context_t *ctx ) +{ + int i, index; + ( void ) ctx; + + infiniband_control_state_t* control = (infiniband_control_state_t*) ctl; + + for (i=0 ; ibeing_measured[i] = 0; + } + + for (i=0 ; ibeing_measured[index] = 1; + control->need_difference[index] = 1; + } + return PAPI_OK; } @@ -668,129 +815,129 @@ * PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) * PAPI_DOM_ALL is all of the domains */ -static int + static int _infiniband_set_domain( hwd_control_state_t *ctl, int domain ) { - int found = 0; - (void) ctl; - - if (PAPI_DOM_USER & domain) - found = 1; - - if (PAPI_DOM_KERNEL & domain) - found = 1; + int found = 0; + (void) ctl; + + if (PAPI_DOM_USER & domain) + found = 1; - if (PAPI_DOM_OTHER & domain) - found = 1; + if (PAPI_DOM_KERNEL & domain) + found = 1; - if (!found) - return (PAPI_EINVAL); + if (PAPI_DOM_OTHER & domain) + found = 1; - return (PAPI_OK); + if (!found) + return (PAPI_EINVAL); + + return (PAPI_OK); } /* * Cannot reset the counters using the sysfs interface. */ -static int + static int _infiniband_reset( hwd_context_t *ctx, hwd_control_state_t *ctl ) { - (void) ctx; - (void) ctl; - return PAPI_OK; + (void) ctx; + (void) ctl; + return PAPI_OK; } /* * Native Event functions */ -static int + static int _infiniband_ntv_enum_events( unsigned int *EventCode, int modifier ) { - switch (modifier) { - case PAPI_ENUM_FIRST: - if (num_events == 0) - return (PAPI_ENOEVNT); - - *EventCode = 0; - return PAPI_OK; - - case PAPI_ENUM_EVENTS: - { - int index = *EventCode & PAPI_NATIVE_AND_MASK; + switch (modifier) { + case PAPI_ENUM_FIRST: + if (num_events == 0) + return (PAPI_ENOEVNT); - if (index < num_events - 1) { - *EventCode = *EventCode + 1; + *EventCode = 0; return PAPI_OK; - } else - return PAPI_ENOEVNT; - break; - } - default: - return PAPI_EINVAL; - } - return PAPI_EINVAL; + case PAPI_ENUM_EVENTS: + { + int index = *EventCode & PAPI_NATIVE_AND_MASK; + + if (index < num_events - 1) { + *EventCode = *EventCode + 1; + return PAPI_OK; + } else + return PAPI_ENOEVNT; + + break; + } + default: + return PAPI_EINVAL; + } + return PAPI_EINVAL; } /* * */ -static int + static int _infiniband_ntv_code_to_name( unsigned int EventCode, char *name, int len ) { - int index = EventCode; + int index = EventCode; - if (index>=0 && index=0 && index=0 && index=0 && index= num_events )) return PAPI_ENOEVNT; + int index = EventCode; - if (infiniband_native_events[index].name) - { - unsigned int len = strlen(infiniband_native_events[index].name); - if (len > sizeof(info->symbol)-1) len = sizeof(info->symbol)-1; - strncpy(info->symbol, infiniband_native_events[index].name, len); - info->symbol[len] = '\0'; - } - if (infiniband_native_events[index].description) - { - unsigned int len = strlen(infiniband_native_events[index].description); - if (len > sizeof(info->long_descr)-1) len = sizeof(info->long_descr)-1; - strncpy(info->long_descr, infiniband_native_events[index].description, len); - info->long_descr[len] = '\0'; - } + if ( ( index < 0) || (index >= num_events )) return PAPI_ENOEVNT; - strncpy(info->units, "\0", 1); - /* infiniband_native_events[index].units, sizeof(info->units)); */ - -/* info->data_type = infiniband_native_events[index].return_type; - */ - return PAPI_OK; + if (infiniband_native_events[index].name) + { + unsigned int len = strlen(infiniband_native_events[index].name); + if (len > sizeof(info->symbol)-1) len = sizeof(info->symbol)-1; + strncpy(info->symbol, infiniband_native_events[index].name, len); + info->symbol[len] = '\0'; + } + if (infiniband_native_events[index].description) + { + unsigned int len = strlen(infiniband_native_events[index].description); + if (len > sizeof(info->long_descr)-1) len = sizeof(info->long_descr)-1; + strncpy(info->long_descr, infiniband_native_events[index].description, len); + info->long_descr[len] = '\0'; + } + + strncpy(info->units, "\0", 1); + /* infiniband_native_events[index].units, sizeof(info->units)); */ + + /* info->data_type = infiniband_native_events[index].return_type; + */ + return PAPI_OK; } @@ -798,50 +945,50 @@ * */ papi_vector_t _infiniband_vector = { - .cmp_info = { + .cmp_info = { /* component information (unspecified values are initialized to 0) */ - .name = "infiniband", - .short_name = "infiniband", - .version = "5.3.0", - .description = "Linux Infiniband statistics using the sysfs interface", - .num_mpx_cntrs = INFINIBAND_MAX_COUNTERS, - .num_cntrs = INFINIBAND_MAX_COUNTERS, - .default_domain = PAPI_DOM_USER | PAPI_DOM_KERNEL, - .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL, - .default_granularity = PAPI_GRN_SYS, - .available_granularities = PAPI_GRN_SYS, - .hardware_intr_sig = PAPI_INT_SIGNAL, - - /* component specific cmp_info initializations */ - .fast_real_timer = 0, - .fast_virtual_timer = 0, - .attach = 0, - .attach_must_ptrace = 0, - }, - - /* sizes of framework-opaque component-private structures */ - .size = { - .context = sizeof (infiniband_context_t), - .control_state = sizeof (infiniband_control_state_t), - .reg_value = sizeof (infiniband_register_t), - /* .reg_alloc = sizeof (infiniband_reg_alloc_t), */ - }, - /* function pointers in this component */ - .init_thread = _infiniband_init_thread, - .init_component = _infiniband_init_component, - .init_control_state = _infiniband_init_control_state, - .start = _infiniband_start, - .stop = _infiniband_stop, - .read = _infiniband_read, - .shutdown_thread = _infiniband_shutdown_thread, - .shutdown_component = _infiniband_shutdown_component, - .ctl = _infiniband_ctl, - .update_control_state = _infiniband_update_control_state, - .set_domain = _infiniband_set_domain, - .reset = _infiniband_reset, - - .ntv_enum_events = _infiniband_ntv_enum_events, - .ntv_code_to_name = _infiniband_ntv_code_to_name, - .ntv_code_to_descr = _infiniband_ntv_code_to_descr, - .ntv_code_to_info = _infiniband_ntv_code_to_info, + .name = "infiniband", + .short_name = "infiniband", + .version = "5.3.0", + .description = "Linux Infiniband statistics using the sysfs interface", + .num_mpx_cntrs = INFINIBAND_MAX_COUNTERS, + .num_cntrs = INFINIBAND_MAX_COUNTERS, + .default_domain = PAPI_DOM_USER | PAPI_DOM_KERNEL, + .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL, + .default_granularity = PAPI_GRN_SYS, + .available_granularities = PAPI_GRN_SYS, + .hardware_intr_sig = PAPI_INT_SIGNAL, + + /* component specific cmp_info initializations */ + .fast_real_timer = 0, + .fast_virtual_timer = 0, + .attach = 0, + .attach_must_ptrace = 0, + }, + + /* sizes of framework-opaque component-private structures */ + .size = { + .context = sizeof (infiniband_context_t), + .control_state = sizeof (infiniband_control_state_t), + .reg_value = sizeof (infiniband_register_t), + /* .reg_alloc = sizeof (infiniband_reg_alloc_t), */ + }, + /* function pointers in this component */ + .init_thread = _infiniband_init_thread, + .init_component = _infiniband_init_component, + .init_control_state = _infiniband_init_control_state, + .start = _infiniband_start, + .stop = _infiniband_stop, + .read = _infiniband_read, + .shutdown_thread = _infiniband_shutdown_thread, + .shutdown_component = _infiniband_shutdown_component, + .ctl = _infiniband_ctl, + .update_control_state = _infiniband_update_control_state, + .set_domain = _infiniband_set_domain, + .reset = _infiniband_reset, + + .ntv_enum_events = _infiniband_ntv_enum_events, + .ntv_code_to_name = _infiniband_ntv_code_to_name, + .ntv_code_to_descr = _infiniband_ntv_code_to_descr, + .ntv_code_to_info = _infiniband_ntv_code_to_info, }; diff -Nru papi-5.7.0+dfsg/src/components/infiniband/tests/Makefile papi-6.0.0~dfsg/src/components/infiniband/tests/Makefile --- papi-5.7.0+dfsg/src/components/infiniband/tests/Makefile 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/infiniband/tests/Makefile 2020-03-04 15:56:57.000000000 +0000 @@ -3,8 +3,14 @@ TESTS = infiniband_list_events infiniband_values_by_code +ifneq ($(MPICC),) +TESTS += MPI_test_infiniband_events +endif + infiniband_tests: $(TESTS) +MPI_test_infiniband_events.o:MPI_test_infiniband_events.c + $(MPICC) $(INCLUDE) -c -o $@ $< %.o:%.c $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $< @@ -15,6 +21,9 @@ infiniband_values_by_code: infiniband_values_by_code.o $(UTILOBJS) $(PAPILIB) $(CC) $(CFLAGS) $(INCLUDE) -o $@ $^ $(LDFLAGS) +MPI_test_infiniband_events: MPI_test_infiniband_events.o $(UTILOBJS) $(PAPILIB) + $(MPICC) $(INCLUDE) -o $@ $^ $(LDFLAGS) + clean: rm -f $(TESTS) *.o diff -Nru papi-5.7.0+dfsg/src/components/infiniband/tests/MPI_test_infiniband_events.c papi-6.0.0~dfsg/src/components/infiniband/tests/MPI_test_infiniband_events.c --- papi-5.7.0+dfsg/src/components/infiniband/tests/MPI_test_infiniband_events.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/infiniband/tests/MPI_test_infiniband_events.c 2020-03-04 15:56:57.000000000 +0000 @@ -0,0 +1,732 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @file MPI_test_infiniband_events.c + * + * @author Rizwan Ashraf + * rizwan@icl.utk.edu + * + * MPI-based test case for the infiniband component. + * + * @brief + * The test code uses the message passing interface (MPI) to test all interconnect + * related events available in the infiniband component. It is designed to generate + * network traffic using MPI routines with the goal to trigger some network counters. + * The code automatically checks if the infiniband component is enabled and + * correspondingly adds all available PAPI events in the event set, one at a time. + * In each invocation, different data sizes are communicated over the network. + * The event values are recorded in each case, and listed at the completion of the + * test. Mostly, the event values need to be checked manually for correctness. + * The code automatically tests expected behavior of the code for transmit (TX)/ + * receive (RX) event types. + * + * In this test, the master process distributes workload to all other processes + * (NumProcs-1) and then receives the results of the corresponding sub-computations. + * As far as message transfers is concerned, the expected behavior of this code + * is as follows: + * 1. Master TX event ~= Sum of all RX events across all workers (NumProcs-1), + * 2. Master RX event ~= Sum of all TX events across all workers (NumProcs-1). + * Usage: mpirun -n ./MPI_test_infiniband_events + */ + +#include +#include +#include +#include +#include + +/* headers required by PAPI */ +#include "papi.h" +#include "papi_test.h" + +/* constants */ +// NSIZE_MIN/MAX: min/max no. of double floating point +// values allocated at each node +#define NSIZE_MIN 10000 +#define NSIZE_MAX 100000 +// No. of different data sizes to be +// tested b/w NSIZE_MIN and NSIZE_MAX +#define NSTEPS 9 +// The max no. of infiniband events expected +#define MAX_IB_EVENTS 150 +// Threshold value to use when comparing TX/RX event values, +// i.e., error will be recorded when any difference greater +// than the threshold occurs +#define EVENT_VAL_DIFF_THRESHOLD 100 +// PASS_THRESHOLD: percentage of values out of all possibilities +// which need to be correct for the test to PASS +// WARN_THRESHOLD: If PASS_THRESHOLD is not met, then this threshold +// is used to check if the test can be declared +// PASS WITH WARNING. Otherwise, the test is declared +// as FAILED. +// NSIZE_* : No. of Data Sizes out of all possible NSTEPS data sizes +// where TX/RX event value comparison will be performed to check +// expected behavior. +#define NSIZE_PASS_THRESHOLD 90 +#define NSIZE_WARN_THRESHOLD 50 +// EVENT_* : No. of events out of all possible events as reported by +// component_info which need to be added successfully to the +// event set. +#define EVENT_PASS_THRESHOLD 90 +#define EVENT_WARN_THRESHOLD 50 + +int main (int argc, char **argv) { + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + /************************* SETUP PAPI ENV ************************************* + *******************************************************************************/ + int retVal, r, code; + int ComponentID, NumComponents, IB_ID = -1; + int EventSet = PAPI_NULL; + int eventCount = 0; // total events as reported by component info + int eventNum = 0; // number of events successfully tested + + /* error reporting */ + int addEventFailCount = 0, codeConvertFailCount = 0, eventInfoFailCount = 0; + int PAPIstartFailCount = 0, PAPIstopFailCount = 0; + int failedEventCodes[MAX_IB_EVENTS]; + int failedEventIndex = 0; + + /* Note: these are fixed length arrays */ + char eventNames[MAX_IB_EVENTS][PAPI_MAX_STR_LEN]; + char description[MAX_IB_EVENTS][PAPI_MAX_STR_LEN]; + long long values[NSTEPS][MAX_IB_EVENTS]; + + /* these record certain event values for event value testing */ + long long rxCount[NSTEPS], txCount[NSTEPS]; + + const PAPI_component_info_t *cmpInfo = NULL; + PAPI_event_info_t eventInfo; + + /* for timing the test */ + long long startTime, endTime; + double elapsedTime; + + /* PAPI Initialization */ + retVal = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retVal != PAPI_VER_CURRENT ) { + test_fail(__FILE__, __LINE__,"PAPI_library_init failed. The test has been terminated.\n",retVal); + } + + /* Get total number of components detected by PAPI */ + NumComponents = PAPI_num_components(); + + /* Check if infiniband component exists */ + for ( ComponentID = 0; ComponentID < NumComponents; ComponentID++ ) { + + if ( (cmpInfo = PAPI_get_component_info(ComponentID)) == NULL ) { + fprintf(stderr, "WARNING: PAPI_get_component_info failed on one of the components.\n" + "\t The test will continue for now, but it will be skipped later on\n" + "\t if this error was for a component under test.\n"); + continue; + } + + if (strcmp(cmpInfo->name, "infiniband") != 0) { + continue; + } + + // if we are here, Infiniband component is found + if (!TESTS_QUIET) { + printf("INFO: Component %d (%d) - %d events - %s\n", + ComponentID, cmpInfo->CmpIdx, + cmpInfo->num_native_events, cmpInfo->name); + } + + if (cmpInfo->disabled) { + test_skip(__FILE__,__LINE__,"Infiniband Component is disabled. The test has been terminated.\n", 0); + break; + } + + eventCount = cmpInfo->num_native_events; + IB_ID = ComponentID; + break; + } + + /* if we did not find any valid events, just skip the test. */ + if (eventCount==0) { + fprintf(stderr, "FATAL: No events found for the Infiniband component, even though it is enabled.\n" + " The test will be skipped.\n"); + test_skip(__FILE__,__LINE__,"No events found for the Infiniband component.\n", 0); + } + + /************************* SETUP MPI ENV ************************************** + *******************************************************************************/ + int NumProcs, Rank; + + /* Initialize MPI environment */ + MPI_Init (&argc, &argv); + MPI_Comm_size (MPI_COMM_WORLD, &NumProcs); + MPI_Comm_rank (MPI_COMM_WORLD, &Rank); + + if ((!TESTS_QUIET) && (Rank == 0)) { + printf("INFO: This test should trigger some network events.\n"); + } + + /* data sizes assigned here */ + int Nmax_per_Proc = NSIZE_MAX; + int Nmin_per_Proc = NSIZE_MIN; + // fix data size if not appropriately set + while (Nmax_per_Proc <= Nmin_per_Proc) + Nmax_per_Proc = Nmin_per_Proc*10; + int Nmax = Nmax_per_Proc * NumProcs; + int NstepSize = (Nmax_per_Proc - Nmin_per_Proc)/NSTEPS; + + int i, j, k; // loop variables + int memoryAllocateFailure = 0, ALLmemoryAllocateFailure = 0; // error flags + + /* data arrays */ + double *X, *Y, *Out; + double *Xp, *Yp, *Outp; + + /* Master will initialize data arrays */ + if (Rank == 0) { + X = (double *) malloc (sizeof(double) * Nmax); + Y = (double *) malloc (sizeof(double) * Nmax); + Out = (double *) malloc (sizeof(double) * Nmax); + + // check if memory was successfully allocated. + // Do NOT quit from here. Need to quit safely. + if ( (X == NULL) || (Y == NULL) || (Out == NULL) ) { + fprintf(stderr, "FATAL: Failed to allocate memory on Master Node.\n"); + memoryAllocateFailure = 1; + } + + if (memoryAllocateFailure == 0) { + + if (!TESTS_QUIET) + printf("INFO: Master is initializing data.\n"); + + for ( i = 0; i < Nmax; i++ ) { + X[i] = i*0.25; + Y[i] = i*0.75; + } + + if (!TESTS_QUIET) + printf("INFO: Master has successfully initialized arrays.\n"); + + } + } + + // communicate to workers if master was able to successfully allocate memory + MPI_Bcast (&memoryAllocateFailure, 1, MPI_INT, 0, MPI_COMM_WORLD); + if (memoryAllocateFailure == 1) + test_fail(__FILE__,__LINE__,"Could not allocate memory during the test. This is fatal and the test has been terminated.\n", 0); + + memoryAllocateFailure = 0; // re-use flag + + /* allocate memory for all nodes */ + Xp = (double *) malloc (sizeof(double) * Nmax_per_Proc); + Yp = (double *) malloc (sizeof(double) * Nmax_per_Proc); + Outp = (double *) malloc (sizeof(double) * Nmax_per_Proc); + + // handle error cases for memory allocation failure for all nodes. + if ( (Xp == NULL) || (Yp == NULL) || (Outp == NULL) ) { + fprintf(stderr, "FATAL: Failed to allocate %zu bytes on Rank %d.\n", sizeof(double)*Nmax_per_Proc, Rank); + memoryAllocateFailure = 1; + } + MPI_Allreduce (&memoryAllocateFailure, &ALLmemoryAllocateFailure, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + if (ALLmemoryAllocateFailure > 0) + test_fail(__FILE__,__LINE__,"Could not allocate memory during the test. This is fatal and the test has been terminated.\n", 0); + + /* calculate data size for each compute step */ + int Nstep_per_Proc; + int DataSizes[NSTEPS]; + for (i = 0; i < NSTEPS; i++) { + Nstep_per_Proc = Nmin_per_Proc + (i * NstepSize); + //last iteration or when max size is exceeded + if ((i == (NSTEPS - 1)) || (Nstep_per_Proc > Nmax_per_Proc)) + Nstep_per_Proc = Nmax_per_Proc; + DataSizes[i] = Nstep_per_Proc; + } + + /************************* MAIN TEST CODE ************************************* + *******************************************************************************/ + startTime = PAPI_get_real_nsec(); + + /* create an eventSet */ + retVal = PAPI_create_eventset ( &EventSet ); + if (retVal != PAPI_OK) { + // handle error cases for PAPI_create_eventset() + // Two outcomes are possible here: + // 1. PAPI_EINVAL: invalid argument. This should not occur. + // 2. PAPI_ENOMEM: insufficient memory. If this is the case, then we need to quit the test. + fprintf(stderr, "FATAL: Could not create an eventSet on MPI Rank %d due to: %s.\n" + " Test will not proceed.\n", Rank, PAPI_strerror(retVal)); + test_fail(__FILE__, __LINE__, "PAPI_create_eventset failed. This is fatal and the test has been terminated.\n", retVal); + } // end -- handle error cases for PAPI_create_eventset() + + /* find the code for first event in component */ + code = PAPI_NATIVE_MASK; + r = PAPI_enum_cmp_event ( &code, PAPI_ENUM_FIRST, IB_ID ); + + /* add each event individually in the eventSet and measure event values. */ + /* for each event, repeat work with different data sizes. */ + while ( r == PAPI_OK ) { + + // attempt to add event to event set + retVal = PAPI_add_event (EventSet, code); + if (retVal != PAPI_OK ) { + // handle error cases for PAPI_add_event() + if (retVal == PAPI_ENOMEM) { + fprintf(stderr, "FATAL: Could not add an event to eventSet on MPI Rank %d due to insufficient memory.\n" + " Test will not proceed.\n", Rank); + test_fail(__FILE__, __LINE__, "PAPI_add_event failed due to fatal error and the test has been terminated.\n", retVal); + } + + if (retVal == PAPI_ENOEVST) { + fprintf(stderr, "WARNING: Could not add an event to eventSet on MPI Rank %d since eventSet does not exist.\n" + "\t Test will proceed attempting to create a new eventSet\n", Rank); + EventSet = PAPI_NULL; + retVal = PAPI_create_eventset ( &EventSet ); + if (retVal != PAPI_OK) + test_fail(__FILE__, __LINE__, "PAPI_create_eventset failed while handling failure of PAPI_add_event." + " This is fatal and the test has been terminated.\n", retVal); + continue; + } + + if (retVal == PAPI_EISRUN) { + long long tempValue; + fprintf(stderr, "WARNING: Could not add an event to eventSet on MPI Rank %d since eventSet is already counting.\n" + "\t Test will proceed attempting to stop counting and re-attempting to add current event.\n", Rank); + retVal = PAPI_stop (EventSet, &tempValue); + if (retVal != PAPI_OK) + test_fail(__FILE__,__LINE__,"PAPI_stop failed while handling failure of PAPI_add_event." + " This is fatal and the test has been terminated.\n", retVal); + retVal = PAPI_cleanup_eventset( EventSet ); + if (retVal != PAPI_OK) + test_fail(__FILE__,__LINE__,"PAPI_cleanup_eventset failed while handling failure of PAPI_add_event." + " This is fatal and the test has been terminated.\n", retVal); + continue; + } + + // for all other errors, skip an event + addEventFailCount++; // error reporting + failedEventCodes[failedEventIndex] = code; + failedEventIndex++; + fprintf(stderr, "WARNING: Could not add an event to eventSet on MPI Rank %d due to: %s.\n" + "\t Test will proceed attempting to add other events.\n", Rank, PAPI_strerror(retVal)); + + r = PAPI_enum_cmp_event (&code, PAPI_ENUM_EVENTS, IB_ID); + + if (addEventFailCount >= eventCount) // if no event was added successfully + break; + + continue; + } // end -- handle error cases for PAPI_add_event() + + /* get event name of added event */ + retVal = PAPI_event_code_to_name (code, eventNames[eventNum]); + if (retVal != PAPI_OK ) { + // handle error cases for PAPI_event_code_to_name(). + codeConvertFailCount++; // error reporting + fprintf(stderr, "WARNING: PAPI_event_code_to_name failed due to: %s.\n" + "\t Test will proceed but an event name will not be available.\n", PAPI_strerror(retVal)); + strncpy(eventNames[eventNum], "ERROR:NOT_AVAILABLE", sizeof(eventNames[0])-1); + eventNames[eventNum][sizeof(eventNames[0])-1] = '\0'; + } // end -- handle error cases for PAPI_event_code_to_name() + + /* get long description of added event */ + retVal = PAPI_get_event_info (code, &eventInfo); + if (retVal != PAPI_OK ) { + // handle error cases for PAPI_get_event_info() + eventInfoFailCount++; // error reporting + fprintf(stderr, "WARNING: PAPI_get_event_info failed due to: %s.\n" + "\t Test will proceed but an event description will not be available.\n", PAPI_strerror(retVal)); + strncpy(description[eventNum], "ERROR:NOT_AVAILABLE", sizeof(description[0])-1); + description[eventNum][sizeof(description[0])-1] = '\0'; + } else { + strncpy(description[eventNum], eventInfo.long_descr, sizeof(description[0])-1); + description[eventNum][sizeof(description[0])-1] = '\0'; + } + + /****************** PERFORM WORK (W/ DIFFERENT DATA SIZES) ********************* + *******************************************************************************/ + for (i = 0; i < NSTEPS; i++) { + + /* start recording event value */ + retVal = PAPI_start (EventSet); + if (retVal != PAPI_OK ) { + // handle error cases for PAPI_start() + // we need to skip the current event being counted for all errors, + // in all cases, errors will be handled later on. + + PAPIstartFailCount++; // error reporting + failedEventCodes[failedEventIndex] = code; + failedEventIndex++; + fprintf(stderr, "WARNING: PAPI_start failed on Event Number %d (%s) due to: %s.\n" + "\t Test will proceed with other events if available.\n", + eventNum, eventNames[eventNum], PAPI_strerror(retVal)); + + for (k = i; k < NSTEPS; k++) // fill invalid event values. + values[k][eventNum] = (unsigned long long) - 1; + + break; // try next event + } // end -- handle error cases for PAPI_start() + + if ((!TESTS_QUIET) && (Rank == 0)) + printf("INFO: Doing MPI communication for %s: min. %ld bytes transferred by each process.\n", + eventNames[eventNum], DataSizes[i]*sizeof(double)); + + MPI_Scatter (X, DataSizes[i], MPI_DOUBLE, Xp, DataSizes[i], MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Scatter (Y, DataSizes[i], MPI_DOUBLE, Yp, DataSizes[i], MPI_DOUBLE, 0, MPI_COMM_WORLD); + + /* perform calculation. */ + /* Note: there is redundant computation here. */ + for (j = 0; j < DataSizes[i]; j++) + Outp [j] = Xp [j] + Yp [j]; + + MPI_Gather (Outp, DataSizes[i], MPI_DOUBLE, Out, DataSizes[i], MPI_DOUBLE, 0, MPI_COMM_WORLD); + + /* stop recording and collect event value */ + retVal = PAPI_stop (EventSet, &values[i][eventNum]); + if (retVal != PAPI_OK ) { + // handle error cases for PAPI_stop() + // we need to skip the current event for all errors + // except one case, as below. + PAPIstopFailCount++; // error reporting + if (retVal == PAPI_ENOTRUN) { + fprintf(stderr, "WARNING: PAPI_stop failed on Event Number %d (%s) since eventSet is not running.\n" + "\t Test will attempt to restart counting on this eventSet.\n", + eventNum, eventNames[eventNum]); + if (PAPIstopFailCount < NSTEPS) { + i = i - 1; // re-attempt this data size + continue; + } + } + // for all other errors, try next event. + failedEventCodes[failedEventIndex] = code; + failedEventIndex++; + fprintf(stderr, "WARNING: PAPI_stop failed on Event Number %d (%s) due to: %s.\n" + "\t Test will proceed with other events if available.\n", + eventNum, eventNames[eventNum], PAPI_strerror(retVal)); + + for (k = i; k < NSTEPS; k++) // fill invalid event values + values[k][eventNum] = (unsigned long long) - 1; + + break; + } // end -- handle error cases for PAPI_stop() + + /* record number of bytes received */ + if (strstr(eventNames[eventNum], ":port_rcv_data")) { + rxCount[i] = values[i][eventNum] * 4; // counter value needs to be multiplied by 4 to get total number of bytes + } + /* record number of bytes transmitted */ + if (strstr(eventNames[eventNum], ":port_xmit_data")) { + txCount[i] = values[i][eventNum] * 4; + } + + } // end -- work loop + + /* Done, clean up eventSet for next iteration */ + retVal = PAPI_cleanup_eventset( EventSet ); + if (retVal != PAPI_OK) { + // handle failure cases for PAPI_cleanup_eventset() + if (retVal == PAPI_ENOEVST) { + fprintf(stderr, "WARNING: Could not clean up eventSet on MPI Rank %d since eventSet does not exist.\n" + "\t Test will proceed attempting to create a new eventSet\n", Rank); + EventSet = PAPI_NULL; + retVal = PAPI_create_eventset ( &EventSet ); + if (retVal != PAPI_OK) + test_fail(__FILE__, __LINE__, "PAPI_create_eventset failed while handling failure of PAPI_cleanup_eventset.\n" + "This is fatal and the test has been terminated.\n", retVal); + } else if (retVal == PAPI_EISRUN) { + long long tempValue; + fprintf(stderr, "WARNING: Could not clean up eventSet on MPI Rank %d since eventSet is already counting.\n" + "\t Test will proceed attempting to stop counting and re-attempting to clean up.\n", Rank); + retVal = PAPI_stop (EventSet, &tempValue); + if (retVal != PAPI_OK) + test_fail(__FILE__,__LINE__,"PAPI_stop failed while handling failure of PAPI_cleanup_eventset." + "This is fatal and the test has been terminated.\n", retVal); + retVal = PAPI_cleanup_eventset( EventSet ); + if (retVal != PAPI_OK) + test_fail(__FILE__,__LINE__,"PAPI_cleanup_eventset failed once again while handling failure of PAPI_cleanup_eventset." + "This is fatal and the test has been terminated.\n", retVal); + } else { + test_fail(__FILE__, __LINE__, "PAPI_cleanup_eventset failed:", retVal); + } + } // end -- handle failure cases for PAPI_cleanup_eventset() + + /* get next event */ + eventNum++; + r = PAPI_enum_cmp_event (&code, PAPI_ENUM_EVENTS, IB_ID); + + } // end -- event loop + + // free memory at all nodes + free (Xp); free (Yp); free (Outp); + + /* Done, destroy eventSet */ + retVal = PAPI_destroy_eventset( &EventSet ); + if (retVal != PAPI_OK) { + // handle error cases for PAPI_destroy_eventset() + if (retVal == PAPI_ENOEVST || retVal == PAPI_EINVAL) { + fprintf(stderr, "WARNING: Could not destroy eventSet on MPI Rank %d since eventSet does not exist or has invalid value.\n" + "\t Test will proceed with other operations.\n", Rank); + } else if (retVal == PAPI_EISRUN) { + long long tempValue; + fprintf(stderr, "WARNING: Could not destroy eventSet on MPI Rank %d since eventSet is already counting.\n" + "\t Test will proceed attempting to stop counting and re-attempting to clean up.\n", Rank); + retVal = PAPI_stop (EventSet, &tempValue); + if (retVal != PAPI_OK) + test_fail(__FILE__,__LINE__,"PAPI_stop failed while handling failure of PAPI_destroy_eventset." + "This is fatal and the test has been terminated.\n", retVal); + retVal = PAPI_cleanup_eventset( EventSet ); + if (retVal != PAPI_OK) + test_fail(__FILE__,__LINE__,"PAPI_cleanup_eventset failed while handling failure of PAPI_destroy_eventset." + "This is fatal and the test has been terminated.\n", retVal); + retVal = PAPI_destroy_eventset(&EventSet); + if (retVal != PAPI_OK) + test_fail(__FILE__,__LINE__,"PAPI_destroy_eventset failed once again while handling failure of PAPI_destroy_eventset." + " This is fatal and the test has been terminated.\n", retVal); + } else { + fprintf(stderr, "WARNING: Could not destroy eventSet on MPI Rank %d since there is an internal bug in PAPI.\n" + "\t Please report this to the developers. Test will proceed and operation may be unexpected.\n", Rank); + } + } // end -- handle failure cases for PAPI_destroy_eventset() + + /*************************** SUMMARIZE RESULTS ******************************** + ******************************************************************************/ + endTime = PAPI_get_real_nsec(); + elapsedTime = ((double) (endTime-startTime))/1.0e9; + + /* print results: event values and descriptions */ + if (!TESTS_QUIET) { + int eventX; + // print event values at each process/rank + printf("POST WORK EVENT VALUES (Rank, Event Name, List of Event Values w/ Different Data Sizes)>>>\n"); + for (eventX = 0; eventX < eventNum; eventX++) { + printf("\tRank %d> %s --> \t\t", Rank, eventNames[eventX]); + for (i = 0; i < NSTEPS; i++) { + if (i < NSTEPS-1) + printf("%lld, ", values[i][eventX]); + else + printf("%lld.", values[i][eventX]); + } + printf("\n"); + } + + // print description of each event + if (Rank == 0) { + printf("\n\nTHE DESCRIPTION OF EVENTS IS AS FOLLOWS>>>\n"); + for (eventX = 0; eventX < eventNum; eventX++) { + printf("\t%s \t\t--> %s \n", eventNames[eventX], description[eventX]); + } + } + } + + /* test summary: 1) sanity check on floating point computation */ + int computeTestPass = 0, computeTestPassCount = 0; + if (Rank == 0) { + // check results of computation + for (i = 0; i < Nmax; i++) { + if ( fabs(Out[i] - (X[i] + Y[i])) < 0.00001 ) + computeTestPassCount++; + } + // summarize results of computation + if (computeTestPassCount == Nmax) + computeTestPass = 1; + + // free memory + free (X); free (Y); free (Out); + } + // communicate test results to everyone + MPI_Bcast (&computeTestPass, 1, MPI_INT, 0, MPI_COMM_WORLD); + + /* test summary: 2) check TX and RX event values, if available */ + long long rxCountSumWorkers[NSTEPS], txCountSumWorkers[NSTEPS]; + long long *allProcessRxEvents, *allProcessTxEvents; + int txFailedIndex = 0, rxFailedIndex = 0; + int txFailedDataSizes[NSTEPS], rxFailedDataSizes[NSTEPS]; + int eventValueTestPass = 0; // for test summary + if ((txCount[0] > 0) && (rxCount[0] > 0)) { + if (Rank == 0) { + allProcessRxEvents = (long long*) malloc(sizeof(long long) * NumProcs * NSTEPS); + allProcessTxEvents = (long long*) malloc(sizeof(long long) * NumProcs * NSTEPS); + } + // get all rxCount/txCount at master. Used to check if rx/tx counts match up. + MPI_Gather (&rxCount, NSTEPS, MPI_LONG_LONG, allProcessRxEvents, NSTEPS, MPI_LONG_LONG, 0, MPI_COMM_WORLD); + MPI_Gather (&txCount, NSTEPS, MPI_LONG_LONG, allProcessTxEvents, NSTEPS, MPI_LONG_LONG, 0, MPI_COMM_WORLD); + + // perform event count check at master + if (Rank == 0) { + memset (rxCountSumWorkers, 0, sizeof(long long) * NSTEPS); + memset (txCountSumWorkers, 0, sizeof(long long) * NSTEPS); + for (i = 0; i < NSTEPS; i++) { + for (j = 1; j < NumProcs; j++) { + rxCountSumWorkers[i] += allProcessRxEvents[j*NSTEPS+i]; + txCountSumWorkers[i] += allProcessTxEvents[j*NSTEPS+i]; + } + } + + if (!TESTS_QUIET) printf("\n\n"); + for (i = 0; i < NSTEPS; i++) { + // check: Master TX event ~= Sum of all RX events across all workers (NumProcs-1) + // difference threshold may need to be adjusted based on observed values + if ((llabs(rxCountSumWorkers[i] - txCount[i]) > EVENT_VAL_DIFF_THRESHOLD)) { + txFailedDataSizes[txFailedIndex] = DataSizes[i]; + txFailedIndex++; + if (!TESTS_QUIET) + printf("WARNING: The transmit event count at Master Node (%lld) is not equal" + " to receive event counts at Worker Nodes (%lld) when using %ld bytes!\n" + "\t A difference of %lld was recorded.\n", txCount[i], rxCountSumWorkers[i], + DataSizes[i]*sizeof(double), llabs(rxCountSumWorkers[i] - txCount[i])); + } else { + if (!TESTS_QUIET) + printf("PASSED: The transmit event count at Master Node (%lld) is almost equal" + " to receive event counts at Worker Nodes (%lld) when using %ld bytes.\n", + txCount[i], rxCountSumWorkers[i], DataSizes[i]*sizeof(double)); + } + + // check: Master RX event ~= Sum of all TX events across all workers (NumProcs-1) + if ((llabs(txCountSumWorkers[i] - rxCount[i]) > EVENT_VAL_DIFF_THRESHOLD)) { + rxFailedDataSizes[rxFailedIndex] = DataSizes[i]; + rxFailedIndex++; + if (!TESTS_QUIET) + printf("WARNING: The receive event count at Master Node (%lld) is not equal" + " to transmit event counts at Worker Nodes (%lld) when using %ld bytes!\n" + " A difference of %lld was recorded.\n", rxCount[i], txCountSumWorkers[i], + DataSizes[i]*sizeof(double), llabs(txCountSumWorkers[i] - rxCount[i])); + } else { + if (!TESTS_QUIET) + printf("PASSED: The receive event count at Master Node (%lld) is almost equal" + " to transmit event counts at Worker Nodes (%lld) when using %ld bytes.\n", + rxCount[i], txCountSumWorkers[i], DataSizes[i]*sizeof(double)); + } + } + + // test evaluation criteria + if ( (((float) txFailedIndex / NSTEPS) <= (1.0 - (float) NSIZE_PASS_THRESHOLD/100)) && + (((float) rxFailedIndex / NSTEPS) <= (1.0 - (float) NSIZE_PASS_THRESHOLD/100)) ) + eventValueTestPass = 1; // pass + else if ( (((float) txFailedIndex / NSTEPS) <= (1.0 - (float) NSIZE_WARN_THRESHOLD/100)) && + (((float) rxFailedIndex / NSTEPS) <= (1.0 - (float) NSIZE_WARN_THRESHOLD/100)) ) + eventValueTestPass = -1; // warning + else + eventValueTestPass = 0; // fail + + } // end -- check RX/TX counts for all data sizes at Master node. + + // communicate test results to everyone, since only master knows the result + MPI_Bcast (&eventValueTestPass, 1, MPI_INT, 0, MPI_COMM_WORLD); + + } else { + eventValueTestPass = -2; // not available + } // end -- event value test + + /* test summary: 3) number of events added and counted successfully */ + // Note: under some rare circumstances, the number of failed events at each node may be different. + int eventNumTestPass = 0; + // test evaluation criteria + if (((float) failedEventIndex / eventCount) <= (1.0 - (float) EVENT_PASS_THRESHOLD/100) ) + eventNumTestPass = 1; + else if (((float) failedEventIndex / eventCount) <= (1.0 - (float) EVENT_WARN_THRESHOLD/100) ) + eventNumTestPass = -1; + else + eventNumTestPass = 0; + + + /* print test summary */ + if ((!TESTS_QUIET) && (Rank == 0)) { + + printf("\n\n************************ TEST SUMMARY (EVENTS) ******************************\n" + "No. of Events NOT tested successfully: %d (%.1f%%)\n" + "Note: the above failed event count is for Master node.\n" + "Total No. of Events reported by component info: %d\n", + failedEventIndex, ((float) failedEventIndex/eventCount)*100.0, eventCount); + + if (failedEventIndex > 0) { + printf("\tNames of Events NOT tested: "); + char failedEventName[PAPI_MAX_STR_LEN]; + for (i = 0; i < failedEventIndex; i++) { + retVal = PAPI_event_code_to_name (failedEventCodes[i], failedEventName); + if (retVal != PAPI_OK) { + strncpy(failedEventName, "ERROR:NOT_AVAILABLE", sizeof(failedEventName)-1); + failedEventName[sizeof(failedEventName)-1] = '\0'; + } + printf("%s ", failedEventName); + if ((i > 0) && (i % 2 == 1)) printf("\n \t\t\t\t"); + } + printf("\n"); + + printf("\tThe error counts for different PAPI routines are as follows:\n" + "\t\t\tNo. of PAPI add event errors (major) --> %d\n" + "\t\t\tNo. of PAPI code convert errors (minor) --> %d\n" + "\t\t\tNo. of PAPI event info errors (minor) --> %d\n" + "\t\t\tNo. of PAPI start errors (major) --> %d\n" + "\t\t\tNo. of PAPI stop errors (major) --> %d\n", + addEventFailCount, codeConvertFailCount, eventInfoFailCount, PAPIstartFailCount, PAPIstopFailCount); + } + printf("The PAPI event test has "); + if (eventNumTestPass == 1) printf("PASSED\n"); + else if (eventNumTestPass == -1) printf("PASSED WITH WARNING\n"); + else printf("FAILED\n"); + + // event values + printf("************************ TEST SUMMARY (EVENT VALUES) ************************\n"); + if ((txCount[0] > 0) && (rxCount[0] > 0)) { + printf("No. of times transmit event at Master node did NOT match up receive events at worker nodes: %d (%.1f%%)\n" + "No. of times receive event at Master node did NOT match up transmit events at worker nodes: %d (%.1f%%)\n" + "Total No. of data sizes tested: %d\n" + "\tList of Data Sizes tested in bytes:\n\t\t\t", + txFailedIndex, ((float) txFailedIndex/NSTEPS)*100.0, rxFailedIndex, ((float) rxFailedIndex/NSTEPS)*100.0, NSTEPS); + for (i = 0; i < NSTEPS; i++) + printf("%ld ",DataSizes[i]*sizeof(double)); + printf("\n"); + if (txFailedIndex > 0 || rxFailedIndex > 0) { + printf("\tList of Data Sizes where transmit count at Master was not equal to sum of all worker receive counts:\n" + "\t\t\t"); + for (i = 0; i < txFailedIndex; i++) + printf("%ld ", txFailedDataSizes[i]*sizeof(double)); + printf("\n\tList of Data Sizes where receive count at Master was not equal to sum of all worker transmit counts:\n" + "\t\t\t"); + for (i = 0; i < rxFailedIndex; i++) + printf("%ld ", rxFailedDataSizes[i]*sizeof(double)); + printf("\n"); + } + printf("The PAPI event value test has "); + if (eventValueTestPass == 1) printf("PASSED\n"); + else if (eventValueTestPass == -1) printf("PASSED WITH WARNING\n"); + else printf("FAILED\n"); + } else { + printf("Transmit or receive events were NOT found!\n"); + } + + // compute values + printf("************************ TEST SUMMARY (COMPUTE VALUES) **********************\n"); + if (computeTestPassCount != Nmax) { + printf("No. of times sanity check FAILED on the floating point computation: %d (%.1f%%)\n" + "Total No. of floating point computations performed: %d \n", + Nmax-computeTestPassCount, ((float) (Nmax-computeTestPassCount)/Nmax)*100.0, Nmax); + } else { + printf("Sanity check PASSED on all floating point computations.\n" + "Note: this may pass even if one event was tested successfully!\n"); + } + printf("The overall test took %.3f secs.\n\n", elapsedTime); + } // end -- print summary of test results. + + /* finialize MPI */ + MPI_Finalize(); + + /* determine success of overall test based on all tests */ + if (computeTestPass == 1 && eventValueTestPass == 1 && eventNumTestPass == 1) { + // all has to be good for the test to pass. + // note: test will generate a warning if tx/rx events are not available. + test_pass( __FILE__ ); + } + else if ( (eventValueTestPass < 0 && (eventNumTestPass < 0 || eventNumTestPass == 1) ) || + (eventValueTestPass == 1 && eventNumTestPass < 0) || + (eventValueTestPass == 1 && eventNumTestPass == 1 && computeTestPass == 0) ) { + test_warn(__FILE__,__LINE__,"A warning was generated during any PAPI related tests or sanity check on computation failed", 0); + test_pass(__FILE__); + } + else { + // fail, in case any of eventValueTest and eventNumTest have failed, + // irrespective of the result of computeTest. + test_fail(__FILE__, __LINE__,"Any of PAPI event related tests have failed", 0); + } + +} // end main diff -Nru papi-5.7.0+dfsg/src/components/infiniband_umad/configure.in papi-6.0.0~dfsg/src/components/infiniband_umad/configure.in --- papi-5.7.0+dfsg/src/components/infiniband_umad/configure.in 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/infiniband_umad/configure.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,57 +0,0 @@ -# Process this file with autoconf to produce a configure script. -# File: components/infiniband/configure.in -# CVS: $Id$ - -AC_INIT - - -# looking for infiniband ibumad packages header file and library -AC_ARG_WITH(infiniband_ibumad_dir, - [ --with-infiniband_ibumad_dir= Specify path to InfiniBand ibumad root directory ], - [case "$with_infiniband_ibumad_dir" in - yes|''|no) AC_MSG_ERROR([--with-infiniband_ibumad_dir requires a path]) ;; - *) infiniband_ibumad_dir=$with_infiniband_ibumad_dir ;; - esac], - [infiniband_ibumad_dir="/usr/include/infiniband"]) - -CFLAGS="$CFLAGS -I$infiniband_ibumad_dir/include" -AC_CHECK_HEADERS([infiniband/umad.h], - [AC_DEFINE([HAVE_INFINIBAND_H], [1], [infiniband header] )], - [AC_MSG_ERROR([umad.h not found. See --with-infiniband_ibumad_dir ])], - [#include ]) - -LDFLAGS="$LDFLAGS -L$infiniband_ibumad_dir/lib64 -libumad" -AC_CHECK_LIB([ibumad], - [umad_init], - [], - [AC_MSG_ERROR([libibumad.a is needed for the PAPI infiniband component])]) - - -# looking for infiniband ibmad packages header file and library -AC_ARG_WITH(infiniband_ibmad_dir, - [ --with-infiniband_ibmad_dir= Specify path to InfiniBand ibmad root directory ], - [case "$with_infiniband_ibmad_dir" in - yes|''|no) AC_MSG_ERROR([--with-infiniband_ibmad_dir requires a path]) ;; - *) infiniband_ibmad_dir=$with_infiniband_ibmad_dir ;; - esac], - [infiniband_ibmad_dir="/usr/include/infiniband"]) - -CFLAGS="$CFLAGS -I$infiniband_ibmad_dir/include" -AC_CHECK_HEADERS([infiniband/mad.h], - [AC_DEFINE([HAVE_INFINIBAND_H], [1], [infiniband header] )], - [AC_MSG_ERROR([mad.h not found. See --with-infiniband_ibmad_dir ])], - [#include ]) - -LDFLAGS="$LDFLAGS -L$infiniband_ibmad_dir/lib64 -libmad -L$infiniband_ibumad_dir/lib64 -libumad" -AC_CHECK_LIB([ibmad], - [madrpc_init], - [], - [AC_MSG_ERROR([libibmad.a is needed for the PAPI infiniband component])]) - - -AC_SUBST(infiniband_ibmad_dir) -AC_SUBST(infiniband_ibumad_dir) -## AC_SUBST(infiniband_libdir) -AC_CONFIG_FILES([Makefile.infiniband_umad]) - -AC_OUTPUT diff -Nru papi-5.7.0+dfsg/src/components/infiniband_umad/linux-infiniband_umad.c papi-6.0.0~dfsg/src/components/infiniband_umad/linux-infiniband_umad.c --- papi-5.7.0+dfsg/src/components/infiniband_umad/linux-infiniband_umad.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/infiniband_umad/linux-infiniband_umad.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,976 +0,0 @@ -/****************************/ -/* THIS IS OPEN SOURCE CODE */ -/****************************/ - -/** - * @file linux-infiniband_umad.c - * @author Heike Jagode (in collaboration with Michael Kluge, TU Dresden) - * jagode@eecs.utk.edu - * @author Tony Castaldo; minor changes; the infiniband_umad.h differs - * on the declaration of umad_get_ca(), and demands a const char* - * instead of a char*. Corrected the prototypes below. - * - * - * @ingroup papi_components - * - * InfiniBand component - * - * Tested version of OFED: 1.4 - * - * @brief - * This file has the source code for a component that enables PAPI-C to - * access hardware monitoring counters for InfiniBand devices through the - * OFED library. Since a new interface was introduced with OFED version 1.4 - * (released Dec 2008), the current InfiniBand component does not support - * OFED versions < 1.4. - */ -#include - -#include "papi.h" -#include "papi_internal.h" -#include "papi_vector.h" -#include "papi_memory.h" - -#include "linux-infiniband_umad.h" - -void (*_dl_non_dynamic_init)(void) __attribute__((weak)); - -/******** CHANGE PROTOTYPES TO DECLARE Infiniband LIBRARY SYMBOLS AS WEAK ********** - * This is done so that a version of PAPI built with the infiniband component can * - * be installed on a system which does not have the infiniband libraries installed. * - * * - * If this is done without these prototypes, then all papi services on the system * - * without the infiniband libraries installed will fail. The PAPI libraries * - * contain references to the infiniband libraries which are not installed. The * - * load of PAPI commands fails because the infiniband library references can not * - * be resolved. * - * * - * This also defines pointers to the infiniband library functions that we call. * - * These function pointers will be resolved with dlopen/dlsym calls at component * - * initialization time. The component then calls the infiniband library functions * - * through these function pointers. * - *************************************************************************************/ -int __attribute__((weak)) umad_init ( void ); -int __attribute__((weak)) umad_get_cas_names ( char [][UMAD_CA_NAME_LEN], int ); -int __attribute__((weak)) umad_get_ca ( const char *, umad_ca_t * ); -void __attribute__((weak)) mad_decode_field ( unsigned char *, enum MAD_FIELDS, void *); -struct ibmad_port * __attribute__((weak)) mad_rpc_open_port ( char *, int, int *, int ); -int __attribute__((weak)) ib_resolve_self_via ( ib_portid_t *, int *, ibmad_gid_t *, const struct ibmad_port * ); -uint8_t * __attribute__((weak)) performance_reset_via ( void *, ib_portid_t *, int, unsigned, unsigned, unsigned, const struct ibmad_port * ); -uint8_t * __attribute__((weak)) pma_query_via ( void *, ib_portid_t *, int, unsigned, unsigned, const struct ibmad_port * ); - -int (*umad_initPtr) ( void ); -int (*umad_get_cas_namesPtr) ( char [][UMAD_CA_NAME_LEN], int ); -int (*umad_get_caPtr) ( const char *, umad_ca_t * ); -void (*mad_decode_fieldPtr) ( unsigned char *, enum MAD_FIELDS, void * ); -struct ibmad_port * (*mad_rpc_open_portPtr) ( char *, int, int *, int ); -int (*ib_resolve_self_viaPtr) (ib_portid_t *, int *, ibmad_gid_t *, const struct ibmad_port * ); -uint8_t * (*performance_reset_viaPtr) (void *, ib_portid_t *, int, unsigned, unsigned, unsigned, const struct ibmad_port * ); -uint8_t * (*pma_query_viaPtr) (void *, ib_portid_t *, int, unsigned, unsigned, const struct ibmad_port * ); - -// file handles used to access Infiniband libraries with dlopen -static void* dl1 = NULL; -static void* dl2 = NULL; - -static int linkInfinibandLibraries (); - -papi_vector_t _infiniband_umad_vector; - - - -struct ibmad_port *srcport; -static ib_portid_t portid; -static int ib_timeout = 0; -static int ibportnum = 0; - -static counter_info *subscriptions[INFINIBAND_MAX_COUNTERS]; -static int is_initialized = 0; -static int num_counters = 0; -static int is_finalized = 0; - -/* counters are kept in a list */ -static counter_info *root_counter = NULL; -/* IB ports found are kept in a list */ -static ib_port *root_ib_port = NULL; -static ib_port *active_ib_port = NULL; - -#define infiniband_native_table subscriptions -/* macro to initialize entire structs to 0 */ -#define InitStruct(var, type) type var; memset(&var, 0, sizeof(type)) - -long long _papi_hwd_infiniband_register_start[INFINIBAND_MAX_COUNTERS]; -long long _papi_hwd_infiniband_register[INFINIBAND_MAX_COUNTERS]; - - -/******************************************************************************* - ******** BEGIN FUNCTIONS USED INTERNALLY SPECIFIC TO THIS COMPONENT ********* - ******************************************************************************/ - -/** - * use libumad to discover IB ports - */ -static void -init_ib_counter( ) -{ - char names[20][UMAD_CA_NAME_LEN]; - int n, i; - char *ca_name; - umad_ca_t ca; - int r; - int portnum; - -// if ( umad_init( ) < 0 ) { -// fprintf( stderr, "can't init UMAD library\n" ); -// exit( 1 ); -// } - - if ( ( n = (*umad_get_cas_namesPtr)( ( void * ) names, UMAD_CA_NAME_LEN ) ) < 0 ) { - fprintf( stderr, "can't list IB device names\n" ); - exit( 1 ); - } - - for ( i = 0; i < n; i++ ) { - ca_name = names[i]; - - if ( ( r = (*umad_get_caPtr)( ca_name, &ca ) ) < 0 ) { - fprintf( stderr, "can't read ca from IB device\n" ); - exit( 1 ); - } - - if ( !ca.node_type ) - continue; - - /* port numbers are '1' based in OFED */ - for ( portnum = 1; portnum <= ca.numports; portnum++ ) - addIBPort( ca.ca_name, ca.ports[portnum] ); - } -} - - -/** - * add a counter to the list of available counters - * @param name the short name of the counter - * @param desc a longer description - * @param unit the unit for this counter - */ -static counter_info * -addCounter( const char *name, const char *desc, const char *unit ) -{ - counter_info *cntr, *last; - - cntr = ( counter_info * ) malloc( sizeof ( counter_info ) ); - if ( cntr == NULL ) { - fprintf( stderr, "can not allocate memory for new counter\n" ); - exit( 1 ); - } - cntr->name = strdup( name ); - cntr->description = strdup( desc ); - cntr->unit = strdup( unit ); - cntr->value = 0; - cntr->next = NULL; - - if ( root_counter == NULL ) { - root_counter = cntr; - } else { - last = root_counter; - while ( last->next != NULL ) - last = last->next; - last->next = cntr; - } - - return cntr; -} - - -/** - * add one IB port to the list of available ports and add the - * counters related to this port to the global counter list - */ -static void -addIBPort( const char *ca_name, umad_port_t * port ) -{ - ib_port *nwif, *last; - char counter_name[512]; - - nwif = ( ib_port * ) malloc( sizeof ( ib_port ) ); - - if ( nwif == NULL ) { - fprintf( stderr, "can not allocate memory for IB port description\n" ); - exit( 1 ); - } - - sprintf( counter_name, "%s_%d", ca_name, port->portnum ); - nwif->name = strdup( counter_name ); - - sprintf( counter_name, "%s_%d_recv", ca_name, port->portnum ); - nwif->recv_cntr = - addCounter( counter_name, "bytes received on this IB port", "bytes" ); - - sprintf( counter_name, "%s_%d_send", ca_name, port->portnum ); - nwif->send_cntr = - addCounter( counter_name, "bytes written to this IB port", "bytes" ); - - nwif->port_rate = port->rate; - nwif->is_initialized = 0; - nwif->port_number = port->portnum; - nwif->next = NULL; - - num_counters += 2; - - if ( root_ib_port == NULL ) { - root_ib_port = nwif; - } else { - last = root_ib_port; - while ( last->next != NULL ) - last = last->next; - last->next = nwif; - } -} - - -/** - * initialize one IB port so that we are able to read values from it - */ -static int -init_ib_port( ib_port * portdata ) -{ - int mgmt_classes[4] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, IB_SA_CLASS, - IB_PERFORMANCE_CLASS - }; - char *ca = 0; - static uint8_t pc[1024]; - int mask = 0xFFFF; - - srcport = (*mad_rpc_open_portPtr)( ca, portdata->port_number, mgmt_classes, 4 ); - if ( !srcport ) { - fprintf( stderr, "Failed to open '%s' port '%d'\n", ca, - portdata->port_number ); - exit( 1 ); - } - - if ( (*ib_resolve_self_viaPtr)( &portid, &ibportnum, 0, srcport ) < 0 ) { - fprintf( stderr, "can't resolve self port\n" ); - exit( 1 ); - } - - /* PerfMgt ClassPortInfo is a required attribute */ - /* might be redundant, could be left out for fast implementation */ - if ( !(*pma_query_viaPtr) ( pc, &portid, ibportnum, ib_timeout, CLASS_PORT_INFO, srcport ) ) { - fprintf( stderr, "classportinfo query\n" ); - exit( 1 ); - } - - if ( !(*performance_reset_viaPtr) ( pc, &portid, ibportnum, mask, ib_timeout, IB_GSI_PORT_COUNTERS, srcport ) ) { - fprintf( stderr, "perf reset\n" ); - exit( 1 ); - } - - /* read the initial values */ - (*mad_decode_fieldPtr)( pc, IB_PC_XMT_BYTES_F, &portdata->last_send_val ); - portdata->sum_send_val = 0; - (*mad_decode_fieldPtr)( pc, IB_PC_RCV_BYTES_F, &portdata->last_recv_val ); - portdata->sum_recv_val = 0; - - portdata->is_initialized = 1; - - return 0; -} - - -/** - * read and reset IB counters (reset on demand) - */ -static int -read_ib_counter( ) -{ - uint32_t send_val; - uint32_t recv_val; - uint8_t pc[1024]; - /* 32 bit counter FFFFFFFF */ - uint32_t max_val = 4294967295; - /* if it is bigger than this -> reset */ - uint32_t reset_limit = max_val * 0.7; - int mask = 0xFFFF; - - if ( active_ib_port == NULL ) - return 0; - - /* reading cost ~70 mirco secs */ - if ( !(*pma_query_viaPtr) ( pc, &portid, ibportnum, ib_timeout, IB_GSI_PORT_COUNTERS, srcport ) ) { - fprintf( stderr, "perfquery\n" ); - exit( 1 ); - } - - (*mad_decode_fieldPtr)( pc, IB_PC_XMT_BYTES_F, &send_val ); - (*mad_decode_fieldPtr)( pc, IB_PC_RCV_BYTES_F, &recv_val ); - - /* multiply the numbers read by 4 as the IB port counters are not - counting bytes. they always count 32dwords. see man page of - perfquery for details - internally a uint64_t ia used to sum up the values */ - active_ib_port->sum_send_val += - ( send_val - active_ib_port->last_send_val ) * 4; - active_ib_port->sum_recv_val += - ( recv_val - active_ib_port->last_recv_val ) * 4; - - active_ib_port->send_cntr->value = active_ib_port->sum_send_val; - active_ib_port->recv_cntr->value = active_ib_port->sum_recv_val; - - if ( send_val > reset_limit || recv_val > reset_limit ) { - /* reset cost ~70 mirco secs */ - if ( !(*performance_reset_viaPtr) ( pc, &portid, ibportnum, mask, ib_timeout, IB_GSI_PORT_COUNTERS, srcport ) ) { - fprintf( stderr, "perf reset\n" ); - exit( 1 ); - } - - (*mad_decode_fieldPtr)( pc, IB_PC_XMT_BYTES_F, &active_ib_port->last_send_val ); - (*mad_decode_fieldPtr)( pc, IB_PC_RCV_BYTES_F, &active_ib_port->last_recv_val ); - } else { - active_ib_port->last_send_val = send_val; - active_ib_port->last_recv_val = recv_val; - } - - return 0; -} - - -void -host_read_values( long long *data ) -{ - int loop; - - read_ib_counter( ); - - for ( loop = 0; loop < INFINIBAND_MAX_COUNTERS; loop++ ) { - if ( subscriptions[loop] == NULL ) - break; - - data[loop] = subscriptions[loop]->value; - } -} - - -/** - * find the pointer for a counter_info structure based on the counter name - */ -static counter_info * -counterFromName( const char *cntr ) -{ - int loop = 0; - char tmp[512]; - counter_info *local_cntr = root_counter; - - while ( local_cntr != NULL ) { - if ( strcmp( cntr, local_cntr->name ) == 0 ) - return local_cntr; - - local_cntr = local_cntr->next; - loop++; - } - - gethostname( tmp, 512 ); - fprintf( stderr, "can not find host counter: %s on %s\n", cntr, tmp ); - fprintf( stderr, "we only have: " ); - local_cntr = root_counter; - - while ( local_cntr != NULL ) { - fprintf( stderr, "'%s' ", local_cntr->name ); - local_cntr = local_cntr->next; - loop++; - } - - fprintf( stderr, "\n" ); - exit( 1 ); - /* never reached */ - return 0; -} - - -/** - * allow external code to subscribe to a counter based on the counter name - */ -static uint64_t -host_subscribe( const char *cntr ) -{ - int loop; - int len; - char tmp_name[512]; - ib_port *aktp; - - counter_info *counter = counterFromName( cntr ); - - for ( loop = 0; loop < INFINIBAND_MAX_COUNTERS; loop++ ) { - if ( subscriptions[loop] == NULL ) { - subscriptions[loop] = counter; - counter->idx = loop; - - /* we have an IB counter if the name ends with _send or _recv and - the prefix before that is in the ib_port list */ - if ( ( len = strlen( cntr ) ) > 5 ) { - if ( strcmp( &cntr[len - 5], "_recv" ) == 0 || - strcmp( &cntr[len - 5], "_send" ) == 0 ) { - /* look through all IB_counters */ - strncpy( tmp_name, cntr, len - 5 ); - tmp_name[len - 5] = 0; - aktp = root_ib_port; - // printf("looking for IB port '%s'\n", tmp_name); - while ( aktp != NULL ) { - if ( strcmp( aktp->name, tmp_name ) == 0 ) { - if ( !aktp->is_initialized ) { - init_ib_port( aktp ); - active_ib_port = aktp; - } - return loop + 1; - } - /* name does not match, if this counter is - initialized, we can't have two active IB ports */ - if ( aktp->is_initialized ) { -#if 0 /* not necessary with OFED version >= 1.4 */ - fprintf( stderr, - "unable to activate IB port monitoring for more than one port\n" ); - exit( 1 ); -#endif - } - aktp = aktp->next; - } - } - } - return loop + 1; - } - } - fprintf( stderr, "please subscribe only once to each counter\n" ); - exit( 1 ); - /* never reached */ - return 0; -} - - -/** - * return a newly allocated list of strings containing all counter names - */ -static string_list * -host_listCounter( int num_counters1 ) -{ - string_list *list; - counter_info *cntr = root_counter; - - list = malloc( sizeof ( string_list ) ); - if ( list == NULL ) { - fprintf( stderr, "unable to allocate memory for new string_list" ); - exit( 1 ); - } - list->count = 0; - list->data = ( char ** ) malloc( num_counters1 * sizeof ( char * ) ); - - if ( list->data == NULL ) { - fprintf( stderr, - "unable to allocate memory for %d pointers in a new string_list\n", - num_counters1 ); - exit( 1 ); - } - - while ( cntr != NULL ) { - list->data[list->count++] = strdup( cntr->name ); - cntr = cntr->next; - } - - return list; -} - - -/** - * finalizes the library - */ -static void -host_finalize( ) -{ - counter_info *cntr, *next; - - if ( is_finalized ) - return; - - cntr = root_counter; - - while ( cntr != NULL ) { - next = cntr->next; - free( cntr->name ); - free( cntr->description ); - free( cntr->unit ); - free( cntr ); - cntr = next; - } - - root_counter = NULL; - - ib_port *nwif, *last; - last = root_ib_port; - while ( last != NULL ) { // While we have ports; - nwif = last; // Copy the pointer. - last = last->next; // update the loop pointer now. - if (nwif->name) free(nwif->name); // Free any name malloc. - - free(nwif); // free the chain link itself. - } - - root_ib_port = NULL; // All done with this. - - is_finalized = 1; -} // end host_finalize() - - -/** - * delete a list of strings - */ -static void -host_deleteStringList( string_list * to_delete ) -{ - int loop; - - if ( to_delete->data != NULL ) { - for ( loop = 0; loop < to_delete->count; loop++ ) - free( to_delete->data[loop] ); - - free( to_delete->data ); - } - - free( to_delete ); -} - - -/***************************************************************************** - ******************* BEGIN PAPI's COMPONENT REQUIRED FUNCTIONS ************* - *****************************************************************************/ - -/* - * This is called whenever a thread is initialized - */ -int -INFINIBAND_init_thread( hwd_context_t * ctx ) -{ - string_list *counter_list = NULL; - int i; - int loop; - - /* initialize portid struct of type ib_portid_t to 0 */ - InitStruct( portid, ib_portid_t ); - - if ( is_initialized ) - return PAPI_OK; - - is_initialized = 1; - - init_ib_counter( ); - - for ( loop = 0; loop < INFINIBAND_MAX_COUNTERS; loop++ ) - subscriptions[loop] = NULL; - - counter_list = host_listCounter( num_counters ); - - for ( i = 0; i < counter_list->count; i++ ) - host_subscribe( counter_list->data[i] ); - - ( ( INFINIBAND_context_t * ) ctx )->state.ncounter = counter_list->count; - - host_deleteStringList( counter_list ); - - return PAPI_OK; -} - - -/* Initialize hardware counters, setup the function vector table - * and get hardware information, this routine is called when the - * PAPI process is initialized (IE PAPI_library_init) - */ -int -INFINIBAND_init_component( int cidx ) -{ - SUBDBG ("Entry: cidx: %d\n", cidx); - int i; - - /* link in all the infiniband libraries and resolve the symbols we need to use */ - if (linkInfinibandLibraries() != PAPI_OK) { - SUBDBG ("Dynamic link of Infiniband libraries failed, component will be disabled.\n"); - SUBDBG ("See disable reason in papi_component_avail output for more details.\n"); - return (PAPI_ENOSUPP); - } - - /* make sure that the infiniband library finds the kernel module loaded. */ - if ( (*umad_initPtr)( ) < 0 ) { - strncpy(_infiniband_umad_vector.cmp_info.disabled_reason, "Call to initialize umad library failed.",PAPI_MAX_STR_LEN); - return ( PAPI_ENOSUPP ); - } - - for ( i = 0; i < INFINIBAND_MAX_COUNTERS; i++ ) { - _papi_hwd_infiniband_register_start[i] = -1; - _papi_hwd_infiniband_register[i] = -1; - } - - /* Export the component id */ - _infiniband_umad_vector.cmp_info.CmpIdx = cidx; - - return ( PAPI_OK ); -} - - -/* - * Link the necessary Infiniband libraries to use the Infiniband component. If any of them can not be found, then - * the Infiniband component will just be disabled. This is done at runtime so that a version of PAPI built - * with the Infiniband component can be installed and used on systems which have the Infiniband libraries installed - * and on systems where these libraries are not installed. - */ -static int -linkInfinibandLibraries () -{ - /* Attempt to guess if we were statically linked to libc, if so bail */ - if ( _dl_non_dynamic_init != NULL ) { - strncpy(_infiniband_umad_vector.cmp_info.disabled_reason, "The Infiniband component does not support statically linking of libc.", PAPI_MAX_STR_LEN); - return PAPI_ENOSUPP; - } - - /* Need to link in the Infiniband libraries, if not found disable the component */ - dl1 = dlopen("libibumad.so", RTLD_NOW | RTLD_GLOBAL); - if (!dl1) - { - strncpy(_infiniband_umad_vector.cmp_info.disabled_reason, "Infiniband library libibumad.so not found.",PAPI_MAX_STR_LEN); - return ( PAPI_ENOSUPP ); - } - umad_initPtr = dlsym(dl1, "umad_init"); - if (dlerror() != NULL) - { - strncpy(_infiniband_umad_vector.cmp_info.disabled_reason, "Infiniband function umad_init not found.",PAPI_MAX_STR_LEN); - return ( PAPI_ENOSUPP ); - } - umad_get_cas_namesPtr = dlsym(dl1, "umad_get_cas_names"); - if (dlerror() != NULL) - { - strncpy(_infiniband_umad_vector.cmp_info.disabled_reason, "Infiniband function umad_get_cas_names not found.",PAPI_MAX_STR_LEN); - return ( PAPI_ENOSUPP ); - } - umad_get_caPtr = dlsym(dl1, "umad_get_ca"); - if (dlerror() != NULL) - { - strncpy(_infiniband_umad_vector.cmp_info.disabled_reason, "Infiniband function umad_get_ca not found.",PAPI_MAX_STR_LEN); - return ( PAPI_ENOSUPP ); - } - - /* Need to link in the Infiniband libraries, if not found disable the component */ - dl2 = dlopen("libibmad.so", RTLD_NOW | RTLD_GLOBAL); - if (!dl2) - { - strncpy(_infiniband_umad_vector.cmp_info.disabled_reason, "Infiniband library libibmad.so not found.",PAPI_MAX_STR_LEN); - return ( PAPI_ENOSUPP ); - } - mad_decode_fieldPtr = dlsym(dl2, "mad_decode_field"); - if (dlerror() != NULL) - { - strncpy(_infiniband_umad_vector.cmp_info.disabled_reason, "Infiniband function mad_decode_field not found.",PAPI_MAX_STR_LEN); - return ( PAPI_ENOSUPP ); - } - mad_rpc_open_portPtr = dlsym(dl2, "mad_rpc_open_port"); - if (dlerror() != NULL) - { - strncpy(_infiniband_umad_vector.cmp_info.disabled_reason, "Infiniband function mad_rpc_open_port not found.",PAPI_MAX_STR_LEN); - return ( PAPI_ENOSUPP ); - } - ib_resolve_self_viaPtr = dlsym(dl2, "ib_resolve_self_via"); - if (dlerror() != NULL) - { - strncpy(_infiniband_umad_vector.cmp_info.disabled_reason, "Infiniband function ib_resolve_self_via not found.",PAPI_MAX_STR_LEN); - return ( PAPI_ENOSUPP ); - } - performance_reset_viaPtr = dlsym(dl2, "performance_reset_via"); - if (dlerror() != NULL) - { - strncpy(_infiniband_umad_vector.cmp_info.disabled_reason, "Infiniband function performance_reset_via not found.",PAPI_MAX_STR_LEN); - return ( PAPI_ENOSUPP ); - } - pma_query_viaPtr = dlsym(dl2, "pma_query_via"); - if (dlerror() != NULL) - { - strncpy(_infiniband_umad_vector.cmp_info.disabled_reason, "Infiniband function pma_query_via not found.",PAPI_MAX_STR_LEN); - return ( PAPI_ENOSUPP ); - } - - return ( PAPI_OK ); -} - - -/* - * Control of counters (Reading/Writing/Starting/Stopping/Setup) - * functions - */ -int -INFINIBAND_init_control_state( hwd_control_state_t * ctrl ) -{ - ( void ) ctrl; - return PAPI_OK; -} - - -/* - * - */ -int -INFINIBAND_start( hwd_context_t * ctx, hwd_control_state_t * ctrl ) -{ - ( void ) ctx; - ( void ) ctrl; - - host_read_values( _papi_hwd_infiniband_register_start ); - - memcpy( _papi_hwd_infiniband_register, _papi_hwd_infiniband_register_start, - INFINIBAND_MAX_COUNTERS * sizeof ( long long ) ); - - return ( PAPI_OK ); -} - - -/* - * - */ -int -INFINIBAND_stop( hwd_context_t * ctx, hwd_control_state_t * ctrl ) -{ - int i; - ( void ) ctx; - - host_read_values( _papi_hwd_infiniband_register ); - - for ( i = 0; i < ( ( INFINIBAND_context_t * ) ctx )->state.ncounter; i++ ) { - ( ( INFINIBAND_control_state_t * ) ctrl )->counts[i] = - _papi_hwd_infiniband_register[i] - - _papi_hwd_infiniband_register_start[i]; - } - - return ( PAPI_OK ); -} - - -/* - * - */ -int -INFINIBAND_read( hwd_context_t * ctx, hwd_control_state_t * ctrl, - long_long ** events, int flags ) -{ - int i; - ( void ) flags; - - host_read_values( _papi_hwd_infiniband_register ); - - for ( i = 0; i < ( ( INFINIBAND_context_t * ) ctx )->state.ncounter; i++ ) { - ( ( INFINIBAND_control_state_t * ) ctrl )->counts[i] = - _papi_hwd_infiniband_register[i] - - _papi_hwd_infiniband_register_start[i]; - } - - *events = ( ( INFINIBAND_control_state_t * ) ctrl )->counts; - return ( PAPI_OK ); -} - - -/* - * - */ -int -INFINIBAND_shutdown_thread( hwd_context_t * ctx ) -{ - ( void ) ctx; - host_finalize( ); - return ( PAPI_OK ); -} - - -/* - * - */ -int -INFINIBAND_shutdown_component( void ) -{ - // close the dynamic libraries needed by this component (opened in the init substrate call) - dlclose(dl1); - dlclose(dl2); - - return ( PAPI_OK ); -} // end Shutdown component. - - -/* This function sets various options in the component - * The valid codes being passed in are PAPI_SET_DEFDOM, - * PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL * and PAPI_SET_INHERIT - */ -int -INFINIBAND_ctl( hwd_context_t * ctx, int code, _papi_int_option_t * option ) -{ - ( void ) ctx; - ( void ) code; - ( void ) option; - return ( PAPI_OK ); -} - - -//int INFINIBAND_ntv_code_to_bits ( unsigned int EventCode, hwd_register_t * bits ); - - -/* - * - */ -int -INFINIBAND_update_control_state( hwd_control_state_t * ptr, - NativeInfo_t * native, int count, - hwd_context_t * ctx ) -{ - ( void ) ptr; - ( void ) ctx; - int i, index; - - for ( i = 0; i < count; i++ ) { - index = native[i].ni_event; - native[i].ni_position = index; - } - - return ( PAPI_OK ); -} - - -/* - * Infiniband counts are system wide, so this is the only domain we will respond to - */ -int -INFINIBAND_set_domain( hwd_control_state_t * cntrl, int domain ) -{ - (void) cntrl; - if ( PAPI_DOM_ALL != domain ) - return ( PAPI_EINVAL ); - - return ( PAPI_OK ); -} - - -/* - * - */ -int -INFINIBAND_reset( hwd_context_t * ctx, hwd_control_state_t * ctrl ) -{ - INFINIBAND_start( ctx, ctrl ); - return ( PAPI_OK ); -} - - -/* - * Native Event functions - */ -int -INFINIBAND_ntv_enum_events( unsigned int *EventCode, int modifier ) -{ - if ( modifier == PAPI_ENUM_FIRST ) { - *EventCode = 0; - return PAPI_OK; - } - - if ( modifier == PAPI_ENUM_EVENTS ) { - int index = *EventCode; - - if ( infiniband_native_table[index + 1] ) { - *EventCode = *EventCode + 1; - return ( PAPI_OK ); - } else - return ( PAPI_ENOEVNT ); - } else - return ( PAPI_EINVAL ); -} - - -/* - * - */ -int -INFINIBAND_ntv_code_to_name( unsigned int EventCode, char *name, int len ) -{ - strncpy( name, infiniband_native_table[EventCode]->name, len ); - - return PAPI_OK; -} - - -/* - * - */ -int -INFINIBAND_ntv_code_to_descr( unsigned int EventCode, char *name, int len ) -{ - strncpy( name, infiniband_native_table[EventCode]->description, len ); - - return PAPI_OK; -} - - -/* - * - */ -int -INFINIBAND_ntv_code_to_bits( unsigned int EventCode, hwd_register_t * bits ) -{ - memcpy( ( INFINIBAND_register_t * ) bits, - infiniband_native_table[EventCode], - sizeof ( INFINIBAND_register_t ) ); - - return PAPI_OK; -} - - -/* - * - */ -papi_vector_t _infiniband_umad_vector = { - .cmp_info = { - /* default component information (unspecified values are initialized to 0) */ - .name ="infiniband_umad", - .short_name="infiniband_umad", - .version = "4.2.1", - .description = "Infiniband statistics (for OFED versions < 1.4)", - .num_mpx_cntrs = INFINIBAND_MAX_COUNTERS, - .num_cntrs = INFINIBAND_MAX_COUNTERS, - .default_domain = PAPI_DOM_ALL, - .available_domains = PAPI_DOM_ALL, - .default_granularity = PAPI_GRN_SYS, - .available_granularities = PAPI_GRN_SYS, - .hardware_intr_sig = PAPI_INT_SIGNAL, - - /* component specific cmp_info initializations */ - .fast_real_timer = 0, - .fast_virtual_timer = 0, - .attach = 0, - .attach_must_ptrace = 0, - } - , - - /* sizes of framework-opaque component-private structures */ - .size = { - .context = sizeof ( INFINIBAND_context_t ), - .control_state = sizeof ( INFINIBAND_control_state_t ), - .reg_value = sizeof ( INFINIBAND_register_t ), - .reg_alloc = sizeof ( INFINIBAND_reg_alloc_t ), - } - , - /* function pointers in this component */ - .init_thread = INFINIBAND_init_thread, - .init_component = INFINIBAND_init_component, - .init_control_state = INFINIBAND_init_control_state, - .start = INFINIBAND_start, - .stop = INFINIBAND_stop, - .read = INFINIBAND_read, - .shutdown_component = INFINIBAND_shutdown_component, - .shutdown_thread = INFINIBAND_shutdown_thread, - .ctl = INFINIBAND_ctl, - - .update_control_state = INFINIBAND_update_control_state, - .set_domain = INFINIBAND_set_domain, - .reset = INFINIBAND_reset, - - .ntv_enum_events = INFINIBAND_ntv_enum_events, - .ntv_code_to_name = INFINIBAND_ntv_code_to_name, - .ntv_code_to_descr = INFINIBAND_ntv_code_to_descr, - .ntv_code_to_bits = INFINIBAND_ntv_code_to_bits, -}; diff -Nru papi-5.7.0+dfsg/src/components/infiniband_umad/linux-infiniband_umad.h papi-6.0.0~dfsg/src/components/infiniband_umad/linux-infiniband_umad.h --- papi-5.7.0+dfsg/src/components/infiniband_umad/linux-infiniband_umad.h 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/infiniband_umad/linux-infiniband_umad.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,95 +0,0 @@ -/****************************/ -/* THIS IS OPEN SOURCE CODE */ -/****************************/ - -/** - * @file linux-infiniband_umad.h - * @author Heike Jagode (in collaboration with Michael Kluge, TU Dresden) - * jagode@eecs.utk.edu - * - * @ingroup papi_components - * - * InfiniBand component - * - * Tested version of OFED: 1.4 - * - * @brief - * This file has the source code for a component that enables PAPI-C to - * access hardware monitoring counters for InfiniBand devices through the - * OFED library. Since a new interface was introduced with OFED version 1.4 - * (released Dec 2008), the current InfiniBand component does not support - * OFED versions < 1.4. - */ - -#ifndef _PAPI_INFINIBAND_H -#define _PAPI_INFINIBAND_H - -#define __BUILD_VERSION_TAG__ 1.2 - -#include -#include - -/* describes a single counter with its properties */ -typedef struct counter_info_struct -{ - int idx; - char *name; - char *description; - char *unit; - uint64_t value; - struct counter_info_struct *next; -} counter_info; - -typedef struct -{ - int count; - char **data; -} string_list; - -/* infos collected of a single IB port */ -typedef struct ib_port_struct -{ - char *name; - counter_info *send_cntr; - counter_info *recv_cntr; - int port_rate; - int port_number; - int is_initialized; - uint64_t sum_send_val; - uint64_t sum_recv_val; - uint32_t last_send_val; - uint32_t last_recv_val; - struct ib_port_struct *next; -} ib_port; - - -static void init_ib_counter( ); -static int read_ib_counter( ); -static int init_ib_port( ib_port * portdata ); -static void addIBPort( const char *ca_name, umad_port_t * port ); - - -/************************* DEFINES SECTION ******************************* - ***************************************************************************/ -/* this number assumes that there will never be more events than indicated */ -#define INFINIBAND_MAX_COUNTERS 100 -#define INFINIBAND_MAX_COUNTER_TERMS INFINIBAND_MAX_COUNTERS - -typedef counter_info INFINIBAND_register_t; -typedef counter_info INFINIBAND_native_event_entry_t; -typedef counter_info INFINIBAND_reg_alloc_t; - - -typedef struct INFINIBAND_control_state -{ - long long counts[INFINIBAND_MAX_COUNTERS]; - int ncounter; -} INFINIBAND_control_state_t; - - -typedef struct INFINIBAND_context -{ - INFINIBAND_control_state_t state; -} INFINIBAND_context_t; - -#endif /* _PAPI_INFINIBAND_H */ diff -Nru papi-5.7.0+dfsg/src/components/infiniband_umad/Makefile.infiniband_umad.in papi-6.0.0~dfsg/src/components/infiniband_umad/Makefile.infiniband_umad.in --- papi-5.7.0+dfsg/src/components/infiniband_umad/Makefile.infiniband_umad.in 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/infiniband_umad/Makefile.infiniband_umad.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -INFINIBAND_IBMAD_DIR = @infiniband_ibmad_dir@ -INFINIBAND_IBUMAD_DIR = @infiniband_ibumad_dir@ \ No newline at end of file diff -Nru papi-5.7.0+dfsg/src/components/infiniband_umad/README papi-6.0.0~dfsg/src/components/infiniband_umad/README --- papi-5.7.0+dfsg/src/components/infiniband_umad/README 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/infiniband_umad/README 1970-01-01 00:00:00.000000000 +0000 @@ -1,35 +0,0 @@ -/** -* @file: README -* CVS: $Id$ -* @author: Dan Terpstra -* terpstra@icl.utk.edu -* @defgroup papi_components Components -* @brief Component Specific Readme file: Infiniband -*/ - -/** @page component_readme Component Readme - -@section Component Specific Information - -infiniband_umad/ -These files have the source code for a component that enables PAPI-C to access hardware monitoring counters for InfiniBand devices through the OFED library. Since a new interface was introduced with OFED version 1.4 (released Dec 2008), the current InfiniBand component does not support OFED versions < 1.4. - --------------------------------------------------- -CONFIGURING THE PAPI INFINIBAND_UMAD COMPONENT - -Before installing the INFINIBAND_UMAD component, the configure script of this component -must be executed in order to generate the Makefile which contains the -configuration settings. - % cd /src/components/infiniband_umad - % ./configure -If the infiniband library and header files are not found, specify them in the configuration step. - % ./configure --with-infiniband_ibumad_dir= --with-infiniband_ibmad_dir= -Then, at the higher src directory, configure with this component - % cd /src - % ./configure --with-components="infiniband_umad" - or if you want to specify the compilers and enable debug. - % ./configure CC=gcc F77=gfortran --with-debug --with-components="infiniband_umad" -Finally, follow the standard PAPI build (make) instructions - % make - -*/ diff -Nru papi-5.7.0+dfsg/src/components/infiniband_umad/Rules.infiniband_umad papi-6.0.0~dfsg/src/components/infiniband_umad/Rules.infiniband_umad --- papi-5.7.0+dfsg/src/components/infiniband_umad/Rules.infiniband_umad 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/infiniband_umad/Rules.infiniband_umad 1970-01-01 00:00:00.000000000 +0000 @@ -1,11 +0,0 @@ -# $Id$ - -include components/infiniband_umad/Makefile.infiniband_umad - -COMPSRCS += components/infiniband_umad/linux-infiniband_umad.c -COMPOBJS += linux-infiniband_umad.o -CFLAGS += -I$(INFINIBAND_IBMAD_DIR)/include -I$(INFINIBAND_IBUMAD_DIR)/include -LDFLAGS += $(LDL) - -linux-infiniband_umad.o: components/infiniband_umad/linux-infiniband_umad.c components/infiniband_umad/linux-infiniband_umad.h $(HEADERS) - $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/infiniband_umad/linux-infiniband_umad.c -o linux-infiniband_umad.o diff -Nru papi-5.7.0+dfsg/src/components/infiniband_umad/tests/infiniband_umad_list_events.c papi-6.0.0~dfsg/src/components/infiniband_umad/tests/infiniband_umad_list_events.c --- papi-5.7.0+dfsg/src/components/infiniband_umad/tests/infiniband_umad_list_events.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/infiniband_umad/tests/infiniband_umad_list_events.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,95 +0,0 @@ -/****************************/ -/* THIS IS OPEN SOURCE CODE */ -/****************************/ - -/** - * @author Jose Pedro Oliveira - * - * test case for the linux-infiniband component - * Adapted from its counterpart in the net component. - * - * @author Tony Castaldo - * Corrected missing include file for the strstr() function to be used. - * - * @brief - * List all net events codes and names - */ - -#include -#include - -#include "papi.h" -#include "papi_test.h" -#include // Needed for 'strstr' used below. - -int main (int argc, char **argv) -{ - int retval,cid,numcmp; - int total_events=0; - int code; - char event_name[PAPI_MAX_STR_LEN]; - int r; - const PAPI_component_info_t *cmpinfo = NULL; - - /* Set TESTS_QUIET variable */ - tests_quiet( argc, argv ); - - /* PAPI Initialization */ - retval = PAPI_library_init( PAPI_VER_CURRENT ); - if ( retval != PAPI_VER_CURRENT ) { - test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); - } - - if (!TESTS_QUIET) { - printf("Listing all net events\n"); - } - - numcmp = PAPI_num_components(); - - for(cid=0; cidname, "infiniband") == NULL) { - continue; - } - - if (!TESTS_QUIET) { - printf("Component %d (%d) - %d events - %s\n", - cid, cmpinfo->CmpIdx, - cmpinfo->num_native_events, cmpinfo->name); - } - - code = PAPI_NATIVE_MASK; - - r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, cid ); - while ( r == PAPI_OK ) { - - retval = PAPI_event_code_to_name( code, event_name ); - if ( retval != PAPI_OK ) { - test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); - } - - if (!TESTS_QUIET) { - printf("%#x %s\n", code, event_name); - } - - total_events++; - - r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, cid ); - } - - } - - if (total_events==0) { - test_skip(__FILE__,__LINE__,"No net events found", 0); - } - - test_pass( __FILE__ ); - - return 0; -} - -// vim:set ai ts=4 sw=4 sts=4 et: diff -Nru papi-5.7.0+dfsg/src/components/infiniband_umad/tests/infiniband_umad_values_by_code.c papi-6.0.0~dfsg/src/components/infiniband_umad/tests/infiniband_umad_values_by_code.c --- papi-5.7.0+dfsg/src/components/infiniband_umad/tests/infiniband_umad_values_by_code.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/infiniband_umad/tests/infiniband_umad_values_by_code.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,140 +0,0 @@ -/****************************/ -/* THIS IS OPEN SOURCE CODE */ -/****************************/ - -/** - * @author Jose Pedro Oliveira - * - * test case for the linux-infiniband component - * Adapted from its counterpart in the net component. - * - * @brief - * Prints the value of every net event (by code) - */ - -#include -#include -#include - -#include "papi.h" -#include "papi_test.h" - -#define PINGADDR "127.0.0.1" - -int main (int argc, char **argv) -{ - int retval,cid,numcmp; - int EventSet = PAPI_NULL; - long long value; - int code; - char event_name[PAPI_MAX_STR_LEN]; - int total_events=0; - int r; - const PAPI_component_info_t *cmpinfo = NULL; - - /* Set TESTS_QUIET variable */ - tests_quiet( argc, argv ); - - /* PAPI Initialization */ - retval = PAPI_library_init( PAPI_VER_CURRENT ); - if ( retval != PAPI_VER_CURRENT ) { - test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); - } - - if (!TESTS_QUIET) { - printf("Trying all net events\n"); - } - - numcmp = PAPI_num_components(); - - for(cid=0; cidnum_native_events, cmpinfo->name); - } - - if ( strstr(cmpinfo->name, "infiniband") == NULL) { - continue; - } - - code = PAPI_NATIVE_MASK; - - r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, cid ); - while ( r == PAPI_OK ) { - - retval = PAPI_event_code_to_name( code, event_name ); - if ( retval != PAPI_OK ) { - test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); - } - - if (!TESTS_QUIET) { - printf("%#x %-24s = ", code, event_name); - } - - EventSet = PAPI_NULL; - - retval = PAPI_create_eventset( &EventSet ); - if (retval != PAPI_OK) { - test_fail(__FILE__, __LINE__, "PAPI_create_eventset()", retval); - } - - retval = PAPI_add_event( EventSet, code ); - if (retval != PAPI_OK) { - test_fail(__FILE__, __LINE__, "PAPI_add_event()", retval); - } - - retval = PAPI_start( EventSet ); - if (retval != PAPI_OK) { - test_fail(__FILE__, __LINE__, "PAPI_start()", retval); - } - - if (strcmp(event_name, "_recv") == 0) { - /* XXX figure out a general method to generate some traffic - * for infiniband - * the operation should take more than one second in order - * to guarantee that the network counters are updated */ - retval = system("ping -c 4 " PINGADDR " > /dev/null"); - if (retval < 0) { - test_fail(__FILE__, __LINE__, "Unable to start ping", retval); - } - } - - retval = PAPI_stop( EventSet, &value ); - if (retval != PAPI_OK) { - test_fail(__FILE__, __LINE__, "PAPI_stop()", retval); - } - - if (!TESTS_QUIET) printf("%lld\n", value); - - retval = PAPI_cleanup_eventset( EventSet ); - if (retval != PAPI_OK) { - test_fail(__FILE__, __LINE__, "PAPI_cleanup_eventset()", retval); - } - - retval = PAPI_destroy_eventset( &EventSet ); - if (retval != PAPI_OK) { - test_fail(__FILE__, __LINE__, "PAPI_destroy_eventset()", retval); - } - - total_events++; - - r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, cid ); - } - - } - - if (total_events==0) { - test_skip(__FILE__,__LINE__,"No net events found", 0); - } - - test_pass( __FILE__ ); - - return 0; -} - -// vim:set ai ts=4 sw=4 sts=4 et: diff -Nru papi-5.7.0+dfsg/src/components/infiniband_umad/tests/Makefile papi-6.0.0~dfsg/src/components/infiniband_umad/tests/Makefile --- papi-5.7.0+dfsg/src/components/infiniband_umad/tests/Makefile 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/infiniband_umad/tests/Makefile 1970-01-01 00:00:00.000000000 +0000 @@ -1,23 +0,0 @@ -NAME=infiniband_umad -include ../../Makefile_comp_tests.target -include ../Makefile.infiniband_umad - -INFINIBANDLIBS = -L$(INFINIBAND_IBMAD_DIR)/lib64 -L$(INFINIBAND_IBUMAD_DIR)/lib64 -libumad -libmad - -TESTS = infiniband_umad_list_events infiniband_umad_values_by_code - -infiniband_umad_tests: $(TESTS) - - -%.o:%.c - $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $< - -infiniband_umad_list_events: infiniband_umad_list_events.o $(UTILOBJS) $(PAPILIB) - $(CC) $(CFLAGS) $(INCLUDE) -o $@ $^ $(LDFLAGS) $(INFINIBANDLIBS) - -infiniband_umad_values_by_code: infiniband_umad_values_by_code.o $(UTILOBJS) $(PAPILIB) - $(CC) $(CFLAGS) $(INCLUDE) -o $@ $^ $(LDFLAGS) $(INFINIBANDLIBS) - -clean: - rm -f $(TESTS) *.o - diff -Nru papi-5.7.0+dfsg/src/components/io/CHANGES papi-6.0.0~dfsg/src/components/io/CHANGES --- papi-5.7.0+dfsg/src/components/io/CHANGES 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/io/CHANGES 2020-03-04 15:56:57.000000000 +0000 @@ -0,0 +1,6 @@ +Net component changelog: + +2019-09-16 Kevin Huck + + * Created + diff -Nru papi-5.7.0+dfsg/src/components/io/linux-io.c papi-6.0.0~dfsg/src/components/io/linux-io.c --- papi-5.7.0+dfsg/src/components/io/linux-io.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/io/linux-io.c 2020-03-04 15:56:57.000000000 +0000 @@ -0,0 +1,614 @@ +/** + * @file linux-io.c + * @author Kevin A. Huck + * khuck@uoregon.edu + * + * @ingroup papi_components + * + * @brief io component + * This component provides access to the I/O statistics in the + * system file /proc/self/io. It typically contains 7 counters, + * but for robusness we read the file and create whatever events + * it contains. + */ + +#include +#include +#include +#include + +/* Headers required by PAPI */ +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" /* defines papi_malloc(), etc. */ + +/* Declare our vector in advance */ +/* This allows us to modify the component info */ +papi_vector_t _io_vector; + +// Maximum expected characters per line in file. +#define FILE_LINE_SIZE 256 +// Maximum expected events in file. ARBITRARY VALUE, +// set as needed, just avoiding malloc() and free(). +#define IO_COUNTERS 64 +// File name to access. +#define IO_FILENAME "/proc/self/io" + +/** This structure is used to build the table of events */ +typedef struct IO_native_event_entry +{ + char name[PAPI_MAX_STR_LEN]; // Name of the counter. + char desc[PAPI_MAX_STR_LEN]; // Description of the counter. + int fileIdx; // Line in file. +} IO_native_event_entry_t; + +//----------------------------------------------------------------------------- +// Holds control flags. There's one of these per event-set. Use this to hold +// data specific to the EventSet. +//----------------------------------------------------------------------------- +typedef struct _io_control_state +{ + int EventSetCount; + long long EventSetVal[IO_COUNTERS]; + long long EventSetReport[IO_COUNTERS]; + int EventSetIdx[IO_COUNTERS]; +} _io_control_state_t; + +//----------------------------------------------------------------------------- +// Holds per-thread information. +//----------------------------------------------------------------------------- +typedef struct _io_context +{ + int EventCount; + FILE *pFile; + char line[FILE_LINE_SIZE]; +} _io_context_t; + +// ----------------------- GLOBALS ---------------------------- +// We have to have a global table of events, to support event enumeration. +// We can have different file pointers for each thread, but all files must +// match the file found during _init_component(). +static int gEventCount; +static IO_native_event_entry_t *io_native_table; + +// Code to just count events in file, fills in a context. +// This may be a dummy from init_component. +static int io_count_events(_io_context_t *myCtx) +{ + myCtx->EventCount = 0; + myCtx->pFile = fopen (IO_FILENAME,"r"); + if (myCtx->pFile == NULL) { + snprintf(_io_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN-1, + "Failed to open target file '%s'.", IO_FILENAME); + return PAPI_ENOSUPP; + } + + // Just count the lines, basic vetting for ability to parse. + while (1) { + char *res; + // fgets guarantees z-terminator, reads at most FILE_LINE_SIZE-1 bytes. + res = fgets(myCtx->line, FILE_LINE_SIZE, myCtx->pFile); + if (res == NULL) break; + // If the read filled the whole buffer, line is too long. + if (strlen(myCtx->line) == (FILE_LINE_SIZE-1)) { + fclose(myCtx->pFile); + snprintf(_io_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN-1, + "File '%s' line %i too long.", IO_FILENAME, myCtx->EventCount+1); + return PAPI_ENOSUPP; + } + + char dummy[FILE_LINE_SIZE] = {0}; + long long tmplong = 0LL; + int nf = sscanf( myCtx->line, "%s %lld\n", dummy, &tmplong); + if (nf != 2 || strlen(dummy)<2 || dummy[strlen(dummy)-1] != ':') { + fclose(myCtx->pFile); + snprintf(_io_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN-1, + "File '%s' line %i bad format.", IO_FILENAME, myCtx->EventCount+1); + return PAPI_ENOSUPP; + } + + myCtx->EventCount++; + } // END READING. + + // NOTE: We intentionally leave file open; up to caller to close + // or rewind and continue. + return PAPI_OK; +} // END ROUTINE. + + +// Code to read values; returns PAPI_OK or an error. +// We presume the number of counters and order of them +// will not change from our initialization read. +static int +io_hardware_read(_io_context_t *ctx, _io_control_state_t *ctl) +{ + ctx->pFile = fopen(IO_FILENAME, "r"); + if (ctx->pFile == NULL) return(PAPI_ENOCNTR); /* No counters */ + + /* Read each line */ + int idx; + for (idx=0; idxline, FILE_LINE_SIZE-1, ctx->pFile)) { + char dummy[FILE_LINE_SIZE] = {0}; + long long tmplong = 0LL; + int nf = sscanf(ctx->line, "%s %lld\n", dummy, &tmplong); + if (nf != 2 || strlen(dummy)<2 || dummy[strlen(dummy)-1] != ':') { + return PAPI_ENOCNTR; + } + + ctl->EventSetVal[idx] = tmplong; + } else { /* Did not read ALL counters. */ + return(PAPI_EMISC); + } + } + + fclose(ctx->pFile); + return(PAPI_OK); +} // END FUNCTION. + +/********************************************************************/ +/* Below are the functions required by the PAPI component interface */ +/********************************************************************/ + +/** Initialize hardware counters, setup the function vector table + * and get hardware information, this routine is called when the + * PAPI process is initialized (IE PAPI_library_init) + */ +static int +_io_init_component( int cidx ) +{ + _io_context_t myCtx; + int ret, fileIdx; + SUBDBG( "_io_init_component..." ); + + ret = io_count_events(&myCtx); + if (ret != PAPI_OK) return(ret); + rewind(myCtx.pFile); + + if (myCtx.EventCount > IO_COUNTERS) { + snprintf(_io_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN-1, + "File '%s' has %i events, exceeds counter limit of %i.", IO_FILENAME, myCtx.EventCount, IO_COUNTERS); + fclose(myCtx.pFile); + return PAPI_ENOSUPP; + } + + // Must be same for all threads, now. + gEventCount = myCtx.EventCount; + /* Allocate memory for the native event table */ + io_native_table = + ( IO_native_event_entry_t * ) + papi_calloc(gEventCount, sizeof(IO_native_event_entry_t) ); + if ( io_native_table == NULL ) { + fclose(myCtx.pFile); + return PAPI_ENOMEM; + } + + for (fileIdx = 0; fileIdx < gEventCount; fileIdx++) { + (void) fgets(myCtx.line, FILE_LINE_SIZE, myCtx.pFile); + char name[FILE_LINE_SIZE] = {0}; + long long tmplong = 0LL; + // No check for error here, we would have caught it in io_count_events(). + (void) sscanf(myCtx.line, "%s %lld\n", name, &tmplong); + name[strlen(name)-1]=0; // null terminate over ':' we found. + strncpy(io_native_table[fileIdx].name, name, PAPI_MAX_STR_LEN-1); + io_native_table[fileIdx].fileIdx=fileIdx; + io_native_table[fileIdx].desc[0]=0; // flag for successful copy. + if (strcmp("rchar", name) == 0) { + strcpy(io_native_table[fileIdx].desc, "Characters read."); + } + if (strcmp("wchar", name) == 0) { + strcpy(io_native_table[fileIdx].desc, "Characters written."); + } + if (strcmp("syscr", name) == 0) { + strcpy(io_native_table[fileIdx].desc, "Characters read by system calls."); + } + if (strcmp("syscw", name) == 0) { + strcpy(io_native_table[fileIdx].desc, "Characters written by system calls."); + } + if (strcmp("read_bytes", name) == 0) { + strcpy(io_native_table[fileIdx].desc, "Binary bytes read."); + } + if (strcmp("write_bytes", name) == 0) { + strcpy(io_native_table[fileIdx].desc, "Binary bytes written."); + } + if (strcmp("cancelled_write_bytes", name) == 0) { + strcpy(io_native_table[fileIdx].desc, "Binary write bytes cancelled."); + } + + // If none of the above found, generic description. + if (io_native_table[fileIdx].desc[0] == 0) { + strcpy(io_native_table[fileIdx].desc, "No description available."); + } + } // END READING. + + fclose(myCtx.pFile); + // Export the total number of events available, at least on the init thread. + _io_vector.cmp_info.num_native_events = gEventCount; + _io_vector.cmp_info.num_cntrs = IO_COUNTERS; + _io_vector.cmp_info.num_mpx_cntrs = IO_COUNTERS; + + /* Export the component id */ + _io_vector.cmp_info.CmpIdx = cidx; + return PAPI_OK; +} // END ROUTINE. + +// This is called whenever a thread is initialized. +// WARNING: This can be called BEFORE init_component. +// When it is, shutdown_thread is never called, but +// this is the default context used in calls. +static int +_io_init_thread( hwd_context_t *ctx ) +{ + _io_context_t* myCtx = (_io_context_t*) ctx; + int ret; + ret = io_count_events(myCtx); + if (ret != PAPI_OK) return(ret); + + // File mismatch on event count kills it. + if (gEventCount > 0 && myCtx->EventCount != gEventCount) { + fclose(myCtx->pFile); + myCtx->pFile = NULL; + return PAPI_ENOSUPP; + } + + fclose(myCtx->pFile); + return PAPI_OK; +} // END of init thread. + +// Our control state holds arrays for reading/arranging Event values. +// We just ensure it is all zeros. +static int +_io_init_control_state( hwd_control_state_t * ctl ) +{ + _io_control_state_t* control = ( _io_control_state_t* ) ctl; + memset(control, 0, sizeof(_io_control_state_t)); + return PAPI_OK; +} // END. + + +// Triggered by eventset operations like add or remove. +// We store the order of the events, and the number. +static int +_io_update_control_state( hwd_control_state_t *ctl, + NativeInfo_t *native, + int count, + hwd_context_t *ctx ) +{ + (void) ctx; + _io_control_state_t *myCtl = (_io_control_state_t*) ctl; + + int i, index; + + myCtl->EventSetCount = count; + + /* if no events, return */ + if (count==0) return PAPI_OK; + + for( i = 0; i < count; i++ ) { + index = native[i].ni_event; + myCtl->EventSetIdx[i] = index; + + /* We have no constraints on event position, so any event */ + /* can be in any slot. */ + native[i].ni_position = i; + } + + return PAPI_OK; +} // END ROUTINE. + +/** Triggered by PAPI_start() */ +static int +_io_start( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + (void) ctl; + (void) ctx; + SUBDBG( "io_start %p %p...", ctx, ctl ); + return PAPI_OK; +} + + +/** Triggered by PAPI_stop() */ +static int +_io_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + (void) ctx; + (void) ctl; + SUBDBG( "io_stop %p %p...", ctx, ctl ); + // Don't do anything, can't stop the counters. + + return PAPI_OK; +} + + +// Triggered by PAPI_read(). We read all the events, then +// pick out the ones the user actually requested, in their +// given order. +static int +_io_read( hwd_context_t *ctx, hwd_control_state_t *ctl, + long long **events, int flags ) +{ + // Prevent 'unused' warnings from compiler. + (void) flags; + _io_context_t *myCtx = (_io_context_t*) ctx; + _io_control_state_t *myCtl = (_io_control_state_t*) ctl; + int i; + SUBDBG( "io_read... %p %d", ctx, flags ); + + /* Read all counters into EventSetVal */ + io_hardware_read(myCtx, myCtl); + for (i=0; iEventSetCount; i++) { + myCtl->EventSetReport[i]=myCtl->EventSetVal[myCtl->EventSetIdx[i]]; + } + + /* return pointer to the values we read */ + *events = myCtl->EventSetReport; + + return PAPI_OK; +} + +/** Triggered by PAPI_write(), but only if the counters are running */ +/* otherwise, the updated state is written to ESI->hw_start */ +static int +_io_write( hwd_context_t *ctx, hwd_control_state_t *ctl, + long long *events ) +{ + (void) ctx; // unused + (void) ctl; // unused + (void) events; // unused + + return PAPI_OK; +} + + +/** Triggered by PAPI_reset() but only if the EventSet is currently running */ +/* If the eventset is not currently running, then the saved value in the */ +/* EventSet is set to zero without calling this routine. */ +/* We don't do anything for an io reset. */ +static int +_io_reset( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + (void) ctx; // unused + (void) ctl; + SUBDBG( "io_reset..."); + return PAPI_OK; +} + +// Triggered by PAPI_shutdown(). +static int +_io_shutdown_component(void) +{ + SUBDBG( "io_shutdown_component..." ); + return PAPI_OK; +} + +// Shutdown thread; close files. +static int +_io_shutdown_thread( hwd_context_t *ctx ) +{ + (void) ctx; + SUBDBG( "io_shutdown_thread... %p", ctx ); + return PAPI_OK; +} + +/** This function sets various options in the component + @param[in] ctx -- hardware context + @param[in] code valid are PAPI_SET_DEFDOM, PAPI_SET_DOMAIN, + PAPI_SETDEFGRN, PAPI_SET_GRANUL and PAPI_SET_INHERIT + @param[in] option -- options to be set + */ +static int +_io_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option ) +{ + (void) ctx; + (void) code; + (void) option; + SUBDBG( "io_ctl..." ); + return PAPI_OK; +} + +/** This function has to set the bits needed to count different domains + In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER + By default return PAPI_EINVAL if none of those are specified + and PAPI_OK with success + PAPI_DOM_USER is only user context is counted + PAPI_DOM_KERNEL is only the Kernel/OS context is counted + PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) + PAPI_DOM_ALL is all of the domains + */ +static int +_io_set_domain( hwd_control_state_t * cntrl, int domain ) +{ + (void) cntrl; + + int found = 0; + SUBDBG( "io_set_domain..." ); + + if ( PAPI_DOM_USER & domain ) { + SUBDBG( " PAPI_DOM_USER " ); + found = 1; + } + if ( PAPI_DOM_KERNEL & domain ) { + SUBDBG( " PAPI_DOM_KERNEL " ); + found = 1; + } + if ( PAPI_DOM_OTHER & domain ) { + SUBDBG( " PAPI_DOM_OTHER " ); + found = 1; + } + if ( PAPI_DOM_ALL & domain ) { + SUBDBG( " PAPI_DOM_ALL " ); + found = 1; + } + if ( !found ) + return ( PAPI_EINVAL ); + + return PAPI_OK; +} + + +/**************************************************************/ +/* Naming functions, used to translate event numbers to names */ +/**************************************************************/ + + +/** Enumerate Native Events + * @param EventCode is the event of interest + * @param modifier is one of PAPI_ENUM_FIRST, PAPI_ENUM_EVENTS + * If your component has attribute masks then these need to + * be handled here as well. + */ +static int +_io_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ + int index; + + switch ( modifier ) { + + /* return EventCode of first event */ + case PAPI_ENUM_FIRST: + *EventCode = 0; + return PAPI_OK; + + /* return EventCode of next available event */ + case PAPI_ENUM_EVENTS: + index = *EventCode; + + /* Make sure we have at least 1 more event after us */ + if ( index < (gEventCount-1) ) { + *EventCode = *EventCode + 1; + return PAPI_OK; + } else { + return PAPI_ENOEVNT; + } + break; + + default: + return PAPI_EINVAL; + } + + return PAPI_EINVAL; +} // END ROUTINE + +/** Takes a native event code and passes back the name + * @param EventCode is the native event code + * @param name is a pointer for the name to be copied to + * @param len is the size of the name string + */ +static int +_io_ntv_code_to_name( unsigned int EventCode, char *name, int len ) +{ + int index; + index = EventCode; + + /* Make sure we are in range */ + if (index >= 0 && index < gEventCount) { + strncpy(name, io_native_table[index].name, len ); + return PAPI_OK; + } + + return PAPI_ENOEVNT; +} // END ROUTINE. + +/** Takes a native event code and passes back the event description + * @param EventCode is the native event code + * @param descr is a pointer for the description to be copied to + * @param len is the size of the descr string + */ +static int +_io_ntv_code_to_descr( unsigned int EventCode, char *descr, int len ) +{ + int index; + index = EventCode; + + /* make sure event is in range */ + if (index >= 0 && index < gEventCount) { + strncpy( descr, io_native_table[index].desc, len ); + return PAPI_OK; + } + + return PAPI_ENOEVNT; +} + +/** Vector that points to entry points for our component */ +papi_vector_t _io_vector = { + .cmp_info = { + /* default component information */ + /* (unspecified values are initialized to 0) */ + /* we explicitly set them to zero in this example */ + /* to show what settings are available */ + + .name = "io", + .short_name = "io", + .description = "A component to read /proc/self/io", + .version = "1.0", + .support_version = "n/a", + .kernel_version = "n/a", + .num_cntrs = 512, + .num_mpx_cntrs = 512, + .default_domain = PAPI_DOM_USER, + .available_domains = PAPI_DOM_USER, + .default_granularity = PAPI_GRN_THR, + .available_granularities = PAPI_GRN_THR, + .hardware_intr_sig = PAPI_INT_SIGNAL, + + /* component specific cmp_info initializations */ + }, + + /* sizes of framework-opaque component-private structures */ + .size = { + /* once per thread */ + .context = sizeof(_io_context_t), + /* once per eventset */ + .control_state = sizeof(_io_control_state_t), + .reg_value = 1, /* unused */ + .reg_alloc = 1, /* unused */ + }, + + /* function pointers */ + /* by default they are set to NULL */ + + /* Used for general PAPI interactions */ + .start = _io_start, + .stop = _io_stop, + .read = _io_read, + .reset = _io_reset, + .write = _io_write, + .init_component = _io_init_component, + .init_thread = _io_init_thread, + .init_control_state = _io_init_control_state, + .update_control_state = _io_update_control_state, + .ctl = _io_ctl, + .shutdown_thread = _io_shutdown_thread, + .shutdown_component = _io_shutdown_component, + .set_domain = _io_set_domain, + /* .cleanup_eventset = NULL, */ + /* called in add_native_events() */ + /* .allocate_registers = NULL, */ + + /* Used for overflow/profiling */ + /* .dispatch_timer = NULL, */ + /* .get_overflow_address = NULL, */ + /* .stop_profiling = NULL, */ + /* .set_overflow = NULL, */ + /* .set_profile = NULL, */ + + /* ??? */ + /* .user = NULL, */ + + /* Name Mapping Functions */ + .ntv_enum_events = _io_ntv_enum_events, + .ntv_code_to_name = _io_ntv_code_to_name, + .ntv_code_to_descr = _io_ntv_code_to_descr, + /* if .ntv_name_to_code not available, PAPI emulates */ + /* it by enumerating all events and looking manually */ + .ntv_name_to_code = NULL, + + + /* These are only used by _papi_hwi_get_native_event_info() */ + /* Which currently only uses the info for printing native */ + /* event info, not for any sort of internal use. */ + /* .ntv_code_to_bits = NULL, */ + +}; + diff -Nru papi-5.7.0+dfsg/src/components/io/README papi-6.0.0~dfsg/src/components/io/README --- papi-5.7.0+dfsg/src/components/io/README 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/io/README 2020-03-04 15:56:57.000000000 +0000 @@ -0,0 +1,45 @@ + +COMPONENT + + net + +SUMMARY + + Linux I/O component + +DESCRIPTION + + This network component enables PAPI-C to access the io + statistics exported by the Linux kernel through the /proc + pseudo-file system (file /proc/self/io). + + This component will dynamically create a native events table. + + Event names + ------------------------- + ".rchar", + ".wchar", + ".syscr", + ".syscw", + ".read_bytes", + ".write_bytes", + ".cancelled_write_bytes" + + By default the Linux kernel only updates the io statistics + once every second (see the references listed in the "SEE ALSO" + section for some problems you may come across and for how to + change the default polling period). + + Note: The Linux network statistics are updated by code that + resides in the file ? + +AUTHOR + + Initial written by Kevin A. Huck + +SEE ALSO + + * man proc + +# 2019-09-16 khuck +# vim:set ai ts=4 sw=4 sts=4 et: diff -Nru papi-5.7.0+dfsg/src/components/io/Rules.io papi-6.0.0~dfsg/src/components/io/Rules.io --- papi-5.7.0+dfsg/src/components/io/Rules.io 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/io/Rules.io 2020-03-04 15:56:57.000000000 +0000 @@ -0,0 +1,8 @@ +# $Id$ + +COMPSRCS += components/io/linux-io.c +COMPOBJS += linux-io.o + +linux-io.o: components/io/linux-io.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/io/linux-io.c -o $@ + diff -Nru papi-5.7.0+dfsg/src/components/io/tests/io_basic.c papi-6.0.0~dfsg/src/components/io/tests/io_basic.c --- papi-5.7.0+dfsg/src/components/io/tests/io_basic.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/io/tests/io_basic.c 2020-03-04 15:56:57.000000000 +0000 @@ -0,0 +1,504 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @file io_basic.c + * @author Kevin A. Huck + * khuck@uoregon.edu + * test case for I/O component + * + * + * @brief + * This file is a very simple example test and Makefile that acat + * as a guideline on how to add tests to components. + * The papi configure and papi Makefile will take care of the compilation + * of the component tests (if all tests are added to a directory named + * 'tests' in the specific component dir). + * See components/README for more details. + */ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define NUM_EVENTS 3 + +int main (int argc, char **argv) +{ + + int retval,i; + int EventSet = PAPI_NULL; + long long values[NUM_EVENTS]; + const PAPI_component_info_t *cmpinfo = NULL; + int numcmp,cid,example_cid=-1; + int code,maximum_code=0; + char event_name[PAPI_MAX_STR_LEN]; + PAPI_event_info_t event_info; + int quiet=0; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); + } + + if (!quiet) { + printf( "Testing I/O component with PAPI %d.%d.%d\n", + PAPI_VERSION_MAJOR( PAPI_VERSION ), + PAPI_VERSION_MINOR( PAPI_VERSION ), + PAPI_VERSION_REVISION( PAPI_VERSION ) ); + } + + /* Find our component */ + + numcmp = PAPI_num_components(); + for( cid=0; cidnum_native_events, + cmpinfo->name); + } + if (strstr(cmpinfo->name,"io")) { + /* FOUND! */ + example_cid=cid; + } + } + + + if (example_cid<0) { + test_skip(__FILE__, __LINE__, + "Example component not found\n", 0); + } + + if (!quiet) { + printf("\nFound io Component at id %d\n",example_cid); + printf("\nListing all events in this component:\n"); + } + + /**************************************************/ + /* Listing all available events in this component */ + /* Along with descriptions */ + /**************************************************/ + code = PAPI_NATIVE_MASK; + + retval = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, example_cid ); + + while ( retval == PAPI_OK ) { + if (PAPI_event_code_to_name( code, event_name )!=PAPI_OK) { + printf("Error translating %#x\n",code); + test_fail( __FILE__, __LINE__, + "PAPI_event_code_to_name", retval ); + } + + if (PAPI_get_event_info( code, &event_info)!=PAPI_OK) { + printf("Error getting info for event %#x\n",code); + test_fail( __FILE__, __LINE__, + "PAPI_get_event_info()", retval ); + } + + if (!quiet) { + printf("\tEvent %#x: %s -- %s\n", + code,event_name,event_info.long_descr); + } + + maximum_code=code; + + retval = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, example_cid ); + + } + if (!quiet) printf("\n"); + + /**********************************/ + /* Try accessing an invalid event */ + /**********************************/ + + retval=PAPI_event_code_to_name( maximum_code+10, event_name ); + if (retval!=PAPI_ENOEVNT) { + test_fail( __FILE__, __LINE__, + "Failed to return PAPI_ENOEVNT on invalid event", retval ); + } + + /***********************************/ + /* Test the rchar event */ + /***********************************/ + + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_create_eventset() failed\n", retval ); + } + + retval = PAPI_event_name_to_code("io:::rchar", &code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "io:::rchar not found\n",retval ); + } + + retval = PAPI_add_event( EventSet, code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_add_events failed\n", retval ); + } + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_start failed\n",retval ); + } + + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop failed\n", retval); + } + + if (!quiet) printf("Testing io:::rchar: %lld\n",values[0]); + + if (values[0]==0) { + test_fail( __FILE__, __LINE__, "Result should be != 0!\n", 0); + } + + retval = PAPI_cleanup_eventset(EventSet); + if (retval != PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset!\n", retval); + } + + retval = PAPI_destroy_eventset(&EventSet); + if (retval != PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset!\n", retval); + } + + EventSet=PAPI_NULL; + + + /***********************************/ + /* Test the wchar event */ + /***********************************/ + + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_create_eventset() failed\n", retval ); + } + + retval = PAPI_event_name_to_code("io:::wchar", &code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "io:::wchar not found\n",retval ); + } + + retval = PAPI_add_event( EventSet, code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_add_events failed\n", retval ); + } + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_start failed\n",retval ); + } + + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop failed\n", retval); + } + + if (!quiet) printf("Testing io:::wchar: %lld\n",values[0]); + + if (values[0]==0) { + test_fail( __FILE__, __LINE__, "Result should be != 0!\n", 0); + } + + + /***********************************/ + /* Test multiple reads */ + /***********************************/ + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_start failed\n",retval ); + } + + for(i=0;i<10;i++) { + + retval=PAPI_read( EventSet, values); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_read failed\n", retval); + } + if (!quiet) printf("%lld ",values[0]); + } + + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop failed\n", retval); + } + if (!quiet) printf("%lld\n",values[0]); + + // if (values[0]!=i) { + // test_fail( __FILE__, __LINE__, "Result wrong!\n", 0); + //} + + /***********************************/ + /* Test PAPI_reset() */ + /***********************************/ + + retval = PAPI_reset( EventSet); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_reset() failed\n",retval ); + } + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_start failed\n",retval ); + } + + retval = PAPI_reset( EventSet); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_reset() failed\n",retval ); + } + + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop failed\n", retval); + } + + + if (!quiet) printf("Testing after PAPI_reset(): %lld\n", + values[0]); + + if (values[0]==0) { + test_fail( __FILE__, __LINE__, "Result zero!\n", 0); + } + + retval = PAPI_cleanup_eventset(EventSet); + if (retval != PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset!\n", retval); + } + + retval = PAPI_destroy_eventset(&EventSet); + if (retval != PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset!\n", retval); + } + + EventSet=PAPI_NULL; + + + /***********************************/ + /* Test multiple events */ + /***********************************/ + + if (!quiet) printf("Testing Multiple Events: "); + + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_create_eventset() failed\n", retval ); + } + + retval = PAPI_event_name_to_code("io:::rchar", &code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "io:::rchar not found\n",retval ); + } + + retval = PAPI_add_event( EventSet, code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_add_events failed\n", retval ); + } + + retval = PAPI_event_name_to_code("io:::wchar", &code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "io:::wchar not found\n",retval ); + } + + retval = PAPI_add_event( EventSet, code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_add_events failed\n", retval ); + } + + retval = PAPI_event_name_to_code("io:::read_bytes", &code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "io:::read_bytes not found\n",retval ); + } + + retval = PAPI_add_event( EventSet, code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_add_events failed\n", retval ); + } + + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_start failed\n",retval ); + } + + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop failed\n", retval); + } + + if (!quiet) { + for(i=0;i<3;i++) { + printf("%lld ",values[i]); + } + printf("\n"); + } + + if (values[0]==0) { + test_fail( __FILE__, __LINE__, "Result should be != 0!\n", 0); + } + + retval = PAPI_cleanup_eventset(EventSet); + if (retval != PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset!\n", retval); + } + + retval = PAPI_destroy_eventset(&EventSet); + if (retval != PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset!\n", retval); + } + + EventSet=PAPI_NULL; + + /***********************************/ + /* Test writing to an event */ + /***********************************/ + + if (!quiet) printf("Testing Write\n"); + + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_create_eventset() failed\n", retval ); + } + + retval = PAPI_event_name_to_code("io:::rchar", &code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "io:::rchar not found\n",retval ); + } + + retval = PAPI_add_event( EventSet, code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_add_events failed\n", retval ); + } + + retval = PAPI_event_name_to_code("io:::read_bytes", &code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "io:::read_bytes not found\n",retval ); + } + + retval = PAPI_add_event( EventSet, code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_add_events failed\n", retval ); + } + + retval = PAPI_event_name_to_code("io:::wchar", &code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "io:::wchar not found\n",retval ); + } + + retval = PAPI_add_event( EventSet, code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_add_events failed\n", retval ); + } + + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_start failed\n",retval ); + } + + retval = PAPI_read ( EventSet, values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_read failed\n",retval ); + } + + if (!quiet) { + printf("Before values: "); + for(i=0;i<3;i++) { + printf("%lld ",values[i]); + } + printf("\n"); + } + + values[0]=100; + values[1]=200; + values[2]=300; + + retval = PAPI_write ( EventSet, values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_write failed\n",retval ); + } + + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop failed\n", retval); + } + + if (!quiet) { + printf("After values: "); + for(i=0;i<3;i++) { + printf("%lld ",values[i]); + } + printf("\n"); + } + + + if (values[0]==0) { + test_fail( __FILE__, __LINE__, "Result should be != 0!\n", 0); + } + + retval = PAPI_cleanup_eventset(EventSet); + if (retval != PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset!\n", retval); + } + + retval = PAPI_destroy_eventset(&EventSet); + if (retval != PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset!\n", retval); + } + + EventSet=PAPI_NULL; + + + /************/ + /* All Done */ + /************/ + + if (!quiet) printf("\n"); + + test_pass( __FILE__ ); + + return 0; +} + diff -Nru papi-5.7.0+dfsg/src/components/io/tests/Makefile papi-6.0.0~dfsg/src/components/io/tests/Makefile --- papi-5.7.0+dfsg/src/components/io/tests/Makefile 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/io/tests/Makefile 2020-03-04 15:56:57.000000000 +0000 @@ -0,0 +1,15 @@ +NAME=io +include ../../Makefile_comp_tests.target + +%.o:%.c + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $< + +TESTS = io_basic + +io_tests: $(TESTS) + +io_basic: io_basic.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o io_basic io_basic.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +clean: + rm -f $(TESTS) *.o diff -Nru papi-5.7.0+dfsg/src/components/libmsr/configure.in papi-6.0.0~dfsg/src/components/libmsr/configure.in --- papi-5.7.0+dfsg/src/components/libmsr/configure.in 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/libmsr/configure.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,33 +0,0 @@ -# Process this file with autoconf to produce a configure script. -# File: components/libmsr/configure.in - -AC_INIT - -AC_ARG_WITH(libmsr_incdir, - [ --with-libmsr-incdir= Specify directory of libmsr header files (libmsr.h) in a specific location ], - [libmsr_incdir=$withval]) -AC_ARG_WITH(libmsr_libdir, - [ --with-libmsr-libdir= Specify directory of libmsr library (libmsr.a or libmsr.so) in a specific location ], - [libmsr_libdir=$withval]) - -if test "x$libmsr_libdir" != "x"; then - LIBS="-L$libmsr_libdir" - libmsr_dotest=1 -fi -if test "x$libmsr_incdir" != "x"; then - CPPFLAGS="-I$libmsr_incdir" - libmsr_dotest=1 -fi - -CFLAGS="$CFLAGS -I$libmsr_incdir" -AC_CHECK_HEADER( [msr/msr_rapl.h], [], [AC_MSG_ERROR([libmsr component: msr/msr_rapl.h not found: use configure flags to set the path ])], ) -LDFLAGS="$LDFLAGS -L$libmsr_libdir -Wl,-rpath,$libmsr_libdir" -AC_CHECK_LIB( [msr], [init_msr], [], [AC_MSG_ERROR([libmsr component: libmsr.so not found: use configure flags to set the path])] ) -LIBMSR_INCDIR=$libmsr_incdir -LIBMSR_LIBDIR=$libmsr_libdir - -AC_CONFIG_FILES([Makefile.libmsr]) -AC_SUBST(LIBMSR_INCDIR) -AC_SUBST(LIBMSR_LIBDIR) -AC_SUBST(CUDA_DIR) -AC_OUTPUT diff -Nru papi-5.7.0+dfsg/src/components/libmsr/linux-libmsr.c papi-6.0.0~dfsg/src/components/libmsr/linux-libmsr.c --- papi-5.7.0+dfsg/src/components/libmsr/linux-libmsr.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/libmsr/linux-libmsr.c 2020-03-04 15:56:57.000000000 +0000 @@ -38,9 +38,9 @@ #include "papi_vector.h" #include "papi_memory.h" -#include -#include -#include +#include +#include +#include typedef enum { PKG_ENERGY=0, @@ -106,8 +106,19 @@ /* Using weak symbols allows PAPI to be built with the component, but * installed in a system without the required library */ #include -static void* dllib1 = NULL; -void (*_dl_non_dynamic_init)(void) __attribute__((weak)); +static void* dl1 = NULL; + +// string macro defined within Rules.libmsr +static char libmsr_main[]=PAPI_LIBMSR_MAIN; + +//----------------------------------------------------------------------------- +// Using weak symbols (global declared without a value, so it defers to any +// other global declared in another file WITH a value) allows PAPI to be built +// with the component, but PAPI can still be installed in a system without the +// required library. +//----------------------------------------------------------------------------- + +void (*_dl_non_dynamic_init)(void) __attribute__((weak)); // declare a weak dynamic-library init routine pointer. /* Functions pointers */ static int (*init_msr_ptr)(); @@ -136,28 +147,62 @@ static int _local_linkDynamicLibraries() { if ( _dl_non_dynamic_init != NULL ) { - strncpy( _libmsr_vector.cmp_info.disabled_reason, "The libmsr component REQUIRES dynamic linking capabilities.", PAPI_MAX_STR_LEN); + // If weak var present, statically linked insted of dynamic. + strncpy( _libmsr_vector.cmp_info.disabled_reason, "The libmsr component REQUIRES dynamic linking capabilities.", PAPI_MAX_STR_LEN-1); + // EXIT not supported. return PAPI_ENOSUPP; } - dllib1 = dlopen("libmsr.so", RTLD_NOW | RTLD_GLOBAL); - CHECK_DL_STATUS( !dllib1 , "Component library libmsr.so not found." ); - init_msr_ptr = dlsym( dllib1, "init_msr" ); + + char path_name[1024]; + char *libmsr_root = getenv("PAPI_LIBMSR_ROOT"); + + dl1 = NULL; + // Step 1: Process override if given. + if (strlen(libmsr_main) > 0) { // If override given, it has to work. + dl1 = dlopen(libmsr_main, RTLD_NOW | RTLD_GLOBAL); // Try to open that path. + if (dl1 == NULL) { + snprintf(_libmsr_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "PAPI_LIBMSR_MAIN override '%s' given in Rules.libmsr not found.", libmsr_main); + return(PAPI_ENOSUPP); // Override given but not found. + } + } + + // Step 2: Try system paths, will work with Spack, LD_LIBRARY_PATH, default paths. + if (dl1 == NULL) { // No override, + dl1 = dlopen("libmsr.so", RTLD_NOW | RTLD_GLOBAL); // Try system paths. + } + + // Step 3: Try the explicit install default. + if (dl1 == NULL && libmsr_root != NULL) { // if root given, try it. + snprintf(path_name, 1024, "%s/lib/libmsr.so", libmsr_root); // PAPI Root check. + dl1 = dlopen(path_name, RTLD_NOW | RTLD_GLOBAL); // Try to open that path. + } + + // Check for failure. + if (dl1 == NULL) { + snprintf(_libmsr_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "libmsr.so not found."); + return(PAPI_ENOSUPP); + } + + // We have dl1. + + CHECK_DL_STATUS( !dl1 , "Component library libmsr.so not found." ); + init_msr_ptr = dlsym( dl1, "init_msr" ); CHECK_DL_STATUS( dlerror()!=NULL , "libmsr function init_msr not found." ); - finalize_msr_ptr = dlsym( dllib1, "finalize_msr" ); + finalize_msr_ptr = dlsym( dl1, "finalize_msr" ); CHECK_DL_STATUS( dlerror()!=NULL, "libmsr function finalize_msr not found." ); - rapl_init_ptr = dlsym( dllib1, "rapl_init" ); + rapl_init_ptr = dlsym( dl1, "rapl_init" ); CHECK_DL_STATUS( dlerror()!=NULL, "libmsr function rapl_init not found." ); - poll_rapl_data_ptr = dlsym( dllib1, "poll_rapl_data" ); + poll_rapl_data_ptr = dlsym( dl1, "poll_rapl_data" ); CHECK_DL_STATUS( dlerror()!=NULL, "libmsr function poll_rapl_data not found." ); - set_pkg_rapl_limit_ptr = dlsym( dllib1, "set_pkg_rapl_limit" ); + set_pkg_rapl_limit_ptr = dlsym( dl1, "set_pkg_rapl_limit" ); CHECK_DL_STATUS( dlerror()!=NULL, "libmsr function set_pkg_rapl_limit not found." ); - get_pkg_rapl_limit_ptr = dlsym( dllib1, "get_pkg_rapl_limit" ); + get_pkg_rapl_limit_ptr = dlsym( dl1, "get_pkg_rapl_limit" ); CHECK_DL_STATUS( dlerror()!=NULL, "libmsr function get_pkg_rapl_limit not found." ); - core_config_ptr = dlsym( dllib1, "core_config" ); + core_config_ptr = dlsym( dl1, "core_config" ); CHECK_DL_STATUS( dlerror()!=NULL, "libmsr function core_config not found." ); - rapl_storage_ptr = dlsym( dllib1, "rapl_storage" ); + rapl_storage_ptr = dlsym( dl1, "rapl_storage" ); CHECK_DL_STATUS( dlerror()!=NULL, "libmsr function rapl_storage not found." ); - get_rapl_power_info_ptr = dlsym( dllib1, "get_rapl_power_info" ); + get_rapl_power_info_ptr = dlsym( dl1, "get_rapl_power_info" ); CHECK_DL_STATUS( dlerror()!=NULL, "libmsr function get_rapl_power_info not found." ); return( PAPI_OK); } @@ -669,7 +714,7 @@ free( libmsr_native_events ); libmsr_native_events = NULL; } - dlclose( dllib1 ); + dlclose( dl1 ); return PAPI_OK; } diff -Nru papi-5.7.0+dfsg/src/components/libmsr/Makefile.libmsr.in papi-6.0.0~dfsg/src/components/libmsr/Makefile.libmsr.in --- papi-5.7.0+dfsg/src/components/libmsr/Makefile.libmsr.in 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/libmsr/Makefile.libmsr.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,4 +0,0 @@ -LIBMSR_INCDIR = @LIBMSR_INCDIR@ -LIBMSR_INCFLAG = -I@LIBMSR_INCDIR@ -LIBMSR_LIBDIR = @LIBMSR_LIBDIR@ -LIBMSR_LIBFLAG = -L@LIBMSR_LIBDIR@ diff -Nru papi-5.7.0+dfsg/src/components/libmsr/README papi-6.0.0~dfsg/src/components/libmsr/README --- papi-5.7.0+dfsg/src/components/libmsr/README 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/libmsr/README 1970-01-01 00:00:00.000000000 +0000 @@ -1,133 +0,0 @@ -/** -* @file: README -* CVS: $Id$ -* @author: Asim YarKhan -* yarkhan@icl.utk.edu -* @defgroup papi_components Components -* @brief Component Specific Readme file: libmsr -*/ - -/** @page component_readme Component Readme - -@section Component Specific Information - -libmsr/ - -NOTE: This libmsr component is an initial version, and has been tested -with libmsr (v0.1.17 11/2015) and the msr_safe kernel module (19/2015 -version). - https://github.com/scalability-llnl/libmsr - https://github.com/scalability-llnl/msr-safe - - -The PAPI libmsr component supports measuring and capping power usage -on recent Intel architectures using the RAPL interface exposed through -MSRs (model-specific registers). - -Lawrence Livermore National Laboratory has released a library (libmsr) -designed to provide a simple, safe, consistent interface to several of -the model-specific registers (MSRs) in Intel processors. The problem -is that permitting open access to the MSRs on a machine can be a -safety hazard, so access to MSRs is usually limited. In order to -encourage system administrators to give wider access to the MSRs on a -machine, LLNL has released a Linux kernel module (msr_safe) which -provides safer, white-listed access to the MSRs. - -PAPI has created a libmsr component that can provide read and write -access to the information and controls exposed via the libmsr library. - -This PAPI component introduces a new ability for PAPI; it is the first -case where PAPI is writing information to a counter as well as reading -the data from the counter. - - --------------------------------------------------- -ENABLE ACCESS TO THE MSRS (MODEL SPECIFIC REGISTERS) - -https://github.com/scalability-llnl/msr-safe - -To use this component, the system will need to provide access to Model -Specific Registers (MSRs) from user space. The actions described -below will generally require superuser ability. Note, these actions -may have security and performance consequences, so please make sure -you know what you are doing. - - OPTION 1: EITHER: Enable MSR access using msr-safe - Install the msr-safe module from LLNL, - lsmod | grep msr (should show msr_safe) - Use chmod to set site-appropriate access permissions (e.g. 766) for - /dev/cpu/*/msr_safe /dev/cpu/msr_batch /dev/cpu/msr_whitelist - Load a whitelist appropriate for your machine - e.g. for SandyBridge: - cat msr-safe/whitelists/wl_062D > /dev/cpu/msr_whitelist - - OPTION 2: OR: Enable MSR access via the filesystem and elevated permissions. - Or, enable access to the standard MSRs filesystem - - For Linux kernel version < 3.7, using only file system checks - chmod 666 /dev/cpu/*/msr - - For Linux kernel version >= 3.7, using capabilities - chmod 666 /dev/cpu/*/msr - The final executable needs CAP_SYS_RWIO to open MSR device files [1] - setcap cap_sys_rawio=ep - The final executable cannot be on a shared network partition. - - The dynamic linker on most operating systems will remove variables - that control dynamic linking from the environment of executables - with extended rights, such as setuid executables or executables - with raised capabilities. One such variable is - LD_LIBRARY_PATH. Therefore, executables that have the RAWIO - capability can only load shared libraries from default system - directories. - - One can work around this restriction by either installing the - shared libraries in system directories, linking statically against - those libraries, or using the -rpath linker option to specify the - full path to the shared libraries during the linking step. - - --------------------------------------------------- -COMPILE THE LIBMSR LIBRARY TO ACCESS THE MSRS - -https://github.com/scalability-llnl/libmsr - -Get the library and follow the instructions to build using CMake. -This library contains a subdirectory, test, which will exercise the -functionality. - --------------------------------------------------- -CONFIGURING THE PAPI LIBMSR COMPONENT - -Set libmsr library and header files by configuring within the component. - % cd /src/components/libmsr - % ./configure --with-libmsr-incdir= --with-libmsr-libdir= -Then, at the higher src dirctory, configure with this component - % cd /src - % ./configure --with-components="libmsr" - or if you want to specify the compilers and enable debug. - % ./configure CC=gcc F77=gfortran --with-debug --with-components="libmsr" -Finally, follow the standard PAPI build (make) instructions - % make -To use the module, make sure that the libraries are accessible. - % export LD_LIBRARY_PATH=${PAPIDIR}/src:${PAPIDIR}/src/libpfm4/lib:${LIBMSRDIR}/lib:${LD_LIBRARY_PATH} -To check the installation, the following should show some available counters - % ./utils/papi_native_avail | grep libmsr -To check the installation, the following should some counter values - % ./utils/papi_native_avail -e "libmsr:::PKG_WATTS:PACKAGE0" - % ./utils/papi_command_line "libmsr:::PKG_WATTS:PACKAGE0" - % ./utils/papi_command_line "libmsr:::PKG_DELTA_ENERGY:PACKAGE0" - - --------------------------------------------------- -USE THE PAPI LIBMSR COMPONENT - -See the components/libmsr/utils/README file for instructions. This -test demonstrates how to write power constraints, and gives an -estimate of the overheads for reading and writing information to the -RAPL MSRs. - - -[1] http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=c903f0456bc69176912dee6dd25c6a66ee1aed00 - -*/ diff -Nru papi-5.7.0+dfsg/src/components/libmsr/README.md papi-6.0.0~dfsg/src/components/libmsr/README.md --- papi-5.7.0+dfsg/src/components/libmsr/README.md 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/libmsr/README.md 2020-03-04 15:56:57.000000000 +0000 @@ -0,0 +1,168 @@ +# LIBMSR Component +This libmsr component is an initial version, and has been tested +with libmsr (v0.1.17 11/2015) and the msr_safe kernel module (19/2015 +version). + +* https://github.com/scalability-llnl/libmsr +* https://github.com/scalability-llnl/msr-safe + + +The PAPI libmsr component supports measuring and capping power usage +on recent Intel architectures using the RAPL interface exposed through +MSRs (model-specific registers). + +Lawrence Livermore National Laboratory has released a library (libmsr) +designed to provide a simple, safe, consistent interface to several of +the model-specific registers (MSRs) in Intel processors. The problem +is that permitting open access to the MSRs on a machine can be a +safety hazard, so access to MSRs is usually limited. In order to +encourage system administrators to give wider access to the MSRs on a +machine, LLNL has released a Linux kernel module (msr_safe) which +provides safer, white-listed access to the MSRs. + +PAPI has created a libmsr component that can provide read and write +access to the information and controls exposed via the libmsr library. + +This PAPI component introduces a new ability for PAPI; it is the first +case where PAPI is writing information to a counter as well as reading +the data from the counter. + +## Enable Access to the MSRs (Model Specific Registers) + +https://github.com/scalability-llnl/msr-safe + +To use this component, the system will need to provide access to Model +Specific Registers (MSRs) from user space. The actions described +below will generally require superuser ability. Note, these actions +may have security and performance consequences, so please make sure +you know what you are doing. + +### OPTION 1: Enable MSR access using msr-safe +Install the msr-safe module from LLNL. + + lsmod | grep msr (should show msr_safe) + +Use chmod to set site-appropriate access permissions (e.g. 766) for + +/dev/cpu/*/msr_safe /dev/cpu/msr_batch /dev/cpu/msr_whitelist + +Load a whitelist appropriate for your machine, e.g. for SandyBridge: + + cat msr-safe/whitelists/wl_062D > /dev/cpu/msr_whitelist + +### OPTION 2: Enable MSR access via the filesystem and elevated permissions +Or, enable access to the standard MSRs filesystem + +For Linux kernel version < 3.7, using only file system checks + + chmod 666 /dev/cpu/*/msr + +For Linux kernel version >= 3.7, using capabilities + + chmod 666 /dev/cpu/*/msr + +The final executable needs `CAP_SYS_RWIO` to open MSR device files [1] + + setcap cap_sys_rawio=ep + +The final executable cannot be on a shared network partition. + +The dynamic linker on most operating systems will remove variables +that control dynamic linking from the environment of executables +with extended rights, such as setuid executables or executables +with raised capabilities. One such variable is +`LD_LIBRARY_PATH`. Therefore, executables that have the RAWIO +capability can only load shared libraries from default system +directories. + +One can work around this restriction by either installing the +shared libraries in system directories, linking statically against +those libraries, or using the -rpath linker option to specify the +full path to the shared libraries during the linking step. + + +## Compile the LIBMSR Library to Access the MSRs + +https://github.com/scalability-llnl/libmsr + +Get the library and follow the instructions to build using CMake. +This library contains a subdirectory, test, which will exercise the +functionality. + + +## Installing PAPI with LIBMSR Component + +There is ONE required environment variable: `PAPI_LIBMSR_ROOT`. This is +required for both compiling, and at runtime. + +An example that works on ICL's Saturn system (at this writing): + + export PAPI_LIBMSR_ROOT=/sw/libmsr/0.1.17 + +Within `PAPI_LIBMSR_ROOT`, we expect the following standard directories: + +* `PAPI_LIBMSR_ROOT/include` or `PAPI_LIBMSR_ROOT/include/msr` +* `PAPI_LIBMSR_ROOT/lib` + + +For a standard installed system, this is the only environment variable +required for both compile and runtime. + +System configurations can vary. Some systems use Spack, a package +manager, to automatically keep paths straight. Others require +"module load" commands to provide some services, e.g. +"module load libmsr", and these may also set environment +variables and change the `LD_LIBRARY_PATH` search order. + +Users may require the help of sysadmin personnel to navigate these +facilities and gain access to the correct libraries. + +### Configure PAPI with LIBMSR Enabled + +We presume you have navigated to the +directory papi/src, AND that you have exported `PAPI_LIBMSR_ROOT`. + +In the papi/src directory: + + ./configure --with-components="libmsr" + make + +### Testing PAPI with LIBMSR Enabled + +From papi/src: + + utils/papi_component_avail + +For the LMSENSORS component to be operational, it must find the dynamic +library `libmsr.so`. + +If it is not found (or is not functional) then the component will be +listed as "disabled" with a reason explaining the problem. If library +was not found, then it is not in the expected place. The component +can be configured to look for the library in a specific place, and +using an alternate name if desired. Detailed instructions are +contained in the `Rules.libmsr` file. They are technical, users may wish +to enlist the help of a sysadmin. + +### List LIBMSR Supported Events +From papi/src: + + utils/papi_native_avail | grep -i libmsr + +## Use the PAPI LIBMSR Component + +See the components/libmsr/utils/README file for instructions. This +test demonstrates how to write power constraints, and gives an +estimate of the overheads for reading and writing information to the +RAPL MSRs. + + +## Author +* Frank Winkler (frank.winkler@icl.utk.edu) +* Anthony Castaldo (tonycastaldo@icl.utk.edu) +* Asim YarKhan (yarkhan@icl.utk.edu) + + +[1] http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=c903f0456bc69176912dee6dd25c6a66ee1aed00 + + diff -Nru papi-5.7.0+dfsg/src/components/libmsr/Rules.libmsr papi-6.0.0~dfsg/src/components/libmsr/Rules.libmsr --- papi-5.7.0+dfsg/src/components/libmsr/Rules.libmsr 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/libmsr/Rules.libmsr 2020-03-04 15:56:57.000000000 +0000 @@ -1,9 +1,92 @@ -include components/libmsr/Makefile.libmsr +# Set default if the root environment variable is not already set. +# Note PAPI_LIBMSR_ROOT is an environment variable that must be set. +# It is the ONLY environment variable that must be set, all other +# settings are optional. + +PAPI_LIBMSR_ROOT ?= /sw/libmsr/0.1.17 + +# For non-typical system configurations, the following 'runtime overrides' can +# be set, as just a library name, or a full path and name. There cannot be any +# spaces between the double quotes (which must be escaped as \"). An example: + +# PAPI_LIBMSR_MAIN = \"$(PAPI_LIBMSR_ROOT)/lib/libmsr.so\" + +# By default, all overrides are empty strings. + +# If an override is not an empty string, it must work, or the component will be +# disabled. + +# Both at compile time and run time, the software depends on PAPI_LIBMSR_ROOT. +# There is one library used by the LIBMSR component: +# libmsr.so + +# The standard installed locations for this library, with override: +# $(PAPI_LIBMSR_ROOT)/lib/libmsr.so #O.R. PAPI_LIBMSR_MAIN +# +# There are many ways to cause this path to be known. +# Spack is a package manager used on supercomputers, Linux and MacOS. If Spack +# is aware of LIBMSR, it encodes the path to the library. + +# The environment variable LD_LIBRARY_PATH encodes a list of paths to search for +# libraries; separated by a colon (:). This path could be added to +# LD_LIBRARY_PATH. +# +# Warning: LD_LIBRARY_PATH often contains a list of directories that are +# searched for libraries, some of these may be needed by other packages you are +# using. Always add to LD_LIBRARY_PATH recursively; for example: +# >export LD_LIBRARY_PATH=someNewLibraryDirectory:$LD_LIBRARY_PATH +# which would append the existing LD_LIBRARY_PATH to the new directory you wish +# to add. Alternatively, you can prepend it: +# >export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:someNewLibraryDirectory +# Which will search the existing libraries first, then your new directory. + +# You can check on the value of LD_LIBRARY_PATH with +# >echo $LD_LIBRARY_PATH + +# There may be other package managers or utilities, for example on a system with +# modules; the command 'module load libmsr' may modify LD_LIBRARY_PATH. + +# A Linux system will also search for libraries by default in the directories +# listed by /etc/ld.so.conf, and /usr/lib64, /lib64, /usr/lib, /lib. + +# OVERRIDES: These are by default empty strings (""), if set they must work. +PAPI_LIBMSR_MAIN = \"\" + +# An example of an override: +# PAPI_LIBMSR_MAIN = \"$(PAPI_LIBMSR_ROOT)/lib/libmsr.so.3\" + +# Note: If you change this override, PAPI should be rebuilt from scratch. +# From papi/src/ +# make clobber +# ./configure --with-components="libmsr" +# make + +# OPERATION, per library: +# 1) If an override string is not empty, we will use it explicitly and fail if +# it does not work. This means disabling the component; a reason for disabling +# is shown using the papi utility, papi/src/utils/papi_component_avail + +# 2) We will attempt to open the library using the normal system library search +# paths; if Spack is present and configured correctly it should deliver the +# proper library. A failure here will be silent; we will proceed to (3). + +# 3) If that fails, we will try to find the library in the standard installed +# locations listed above. If this fails, we disable the component, the reason +# for disabling is shown using the papi utility, +# papi/src/utils/papi_component_avail. + +# DEFFLAGS is the macro defines for the three overrides. In the code we convert +# these to string variables with the following lines: +# static char libmsr_main[]=PAPI_LIBMSR_MAIN; + +LIBMSR_MACS = -DPAPI_LIBMSR_MAIN=$(PAPI_LIBMSR_MAIN) COMPSRCS += components/libmsr/linux-libmsr.c COMPOBJS += linux-libmsr.o -CFLAGS += $(LIBMSR_INCFLAG) -LDFLAGS += $(LDL) +# CFLAGS specifies compile flags; need include files here, and macro defines. +CFLAGS += -I$(PAPI_LIBMSR_ROOT)/include -I$(PAPI_LIBMSR_ROOT)/include/msr -g $(LIBMSR_MACS) +LDFLAGS += $(LDL) -g linux-libmsr.o: components/libmsr/linux-libmsr.c $(HEADERS) - $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/libmsr/linux-libmsr.c -o linux-libmsr.o + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/libmsr/linux-libmsr.c -o linux-libmsr.o + diff -Nru papi-5.7.0+dfsg/src/components/libmsr/utils/libmsr_write_test.c papi-6.0.0~dfsg/src/components/libmsr/utils/libmsr_write_test.c --- papi-5.7.0+dfsg/src/components/libmsr/utils/libmsr_write_test.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/libmsr/utils/libmsr_write_test.c 2020-03-04 15:56:57.000000000 +0000 @@ -9,8 +9,8 @@ #include #include "papi.h" -#include "msr/msr_core.h" -#include "msr/msr_rapl.h" +#include "msr_core.h" +#include "msr_rapl.h" #define MAX_EVENTS 128 diff -Nru papi-5.7.0+dfsg/src/components/lmsensors/configure.in papi-6.0.0~dfsg/src/components/lmsensors/configure.in --- papi-5.7.0+dfsg/src/components/lmsensors/configure.in 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/lmsensors/configure.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,18 +0,0 @@ -# Process this file with autoconf to produce a configure script. -# File: components/lmsensors/configure.in -# CVS: $Id$ - -AC_INIT -AC_ARG_WITH(sensors_incdir, - [--with-sensors_incdir= Specify path to sensors includes], - [SENSORS_INCDIR=$withval - CFLAGS="$CFLAGS -I$withval" - AC_CHECK_HEADER([sensors.h], - [], - [AC_MSG_ERROR([sensors.h not found])], - [#include ])], - [AC_MSG_ERROR([Component requires path to sensors includes])]) - -AC_SUBST(SENSORS_INCDIR) -AC_CONFIG_FILES([Makefile.lmsensors]) -AC_OUTPUT diff -Nru papi-5.7.0+dfsg/src/components/lmsensors/linux-lmsensors.c papi-6.0.0~dfsg/src/components/lmsensors/linux-lmsensors.c --- papi-5.7.0+dfsg/src/components/lmsensors/linux-lmsensors.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/lmsensors/linux-lmsensors.c 2020-03-04 15:56:57.000000000 +0000 @@ -116,6 +116,18 @@ // file handles used to access lmsensors libraries with dlopen static void* dl1 = NULL; +// string macro defined within Rules.lmsensors +static char lmsensors_main[]=PAPI_LMSENSORS_MAIN; + +//----------------------------------------------------------------------------- +// Using weak symbols (global declared without a value, so it defers to any +// other global declared in another file WITH a value) allows PAPI to be built +// with the component, but PAPI can still be installed in a system without the +// required library. +//----------------------------------------------------------------------------- + +void (*_dl_non_dynamic_init)(void) __attribute__((weak)); // declare a weak dynamic-library init routine pointer. + static int link_lmsensors_libraries (); papi_vector_t _lmsensors_vector; @@ -159,14 +171,11 @@ createNativeEvents( void ) { unsigned id = 0; - unsigned int count; - (void) count; // Ignore not used warnings. - int chip_nr = 0; const sensors_chip_name *chip_name; /* component name and description */ - strcpy( _lmsensors_vector.cmp_info.short_name, "lm_sensors" ); + strcpy( _lmsensors_vector.cmp_info.short_name, "lmsensors" ); strcpy( _lmsensors_vector.cmp_info.description, "lm-sensors provides tools for monitoring the hardware health" ); @@ -202,7 +211,9 @@ /* Loop through all the subfeatures found */ while ((sub=sensors_get_all_subfeaturesPtr(chip_name,feature,&b))) { - count = 0; + /* replace spaces with underscores */ + char *ptr; + while( (ptr = strchr(featurelabel, ' ')) != NULL ){ *ptr = '_'; } /* Save native event data */ sprintf( lm_sensors_native_table[id].name, "%s.%s.%s", @@ -219,8 +230,6 @@ lm_sensors_native_table[id].resources.name = chip_name; lm_sensors_native_table[id].resources.subfeat_nr = sub->number; - count = sub->number; - /* increment the table index counter */ id++; } @@ -351,14 +360,45 @@ static int link_lmsensors_libraries () { - /* Need to link in the lmsensors libraries, if not found disable the component */ - dl1 = dlopen("libsensors.so", RTLD_NOW | RTLD_GLOBAL); - if (!dl1) - { - strncpy(_lmsensors_vector.cmp_info.disabled_reason, - "lmsensors library libsensors.so not found.",PAPI_MAX_STR_LEN); - return ( PAPI_ENOSUPP ); - } + if ( _dl_non_dynamic_init != NULL ) { + // If weak var present, statically linked insted of dynamic. + strncpy( _lmsensors_vector.cmp_info.disabled_reason, "The lmsensors component REQUIRES dynamic linking capabilities.", PAPI_MAX_STR_LEN-1); + // EXIT not supported. + return PAPI_ENOSUPP; + } + + char path_name[1024]; + char *lmsensors_root = getenv("PAPI_LMSENSORS_ROOT"); + + dl1 = NULL; + // Step 1: Process override if given. + if (strlen(lmsensors_main) > 0) { // If override given, it has to work. + dl1 = dlopen(lmsensors_main, RTLD_NOW | RTLD_GLOBAL); // Try to open that path. + if (dl1 == NULL) { + snprintf(_lmsensors_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "PAPI_LMSENSORS_MAIN override '%s' given in Rules.lmsensors not found.", lmsensors_main); + return(PAPI_ENOSUPP); // Override given but not found. + } + } + + // Step 2: Try system paths, will work with Spack, LD_LIBRARY_PATH, default paths. + if (dl1 == NULL) { // No override, + dl1 = dlopen("libsensors.so", RTLD_NOW | RTLD_GLOBAL); // Try system paths. + } + + // Step 3: Try the explicit install default. + if (dl1 == NULL && lmsensors_root != NULL) { // if root given, try it. + snprintf(path_name, 1024, "%s/lib64/libsensors.so", lmsensors_root); // PAPI Root check. + dl1 = dlopen(path_name, RTLD_NOW | RTLD_GLOBAL); // Try to open that path. + } + + // Check for failure. + if (dl1 == NULL) { + snprintf(_lmsensors_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "libsensors.so not found."); + return(PAPI_ENOSUPP); + } + + // We have dl1. + sensors_initPtr = dlsym(dl1, "sensors_init"); if (dlerror() != NULL) { diff -Nru papi-5.7.0+dfsg/src/components/lmsensors/Makefile.lmsensors.in papi-6.0.0~dfsg/src/components/lmsensors/Makefile.lmsensors.in --- papi-5.7.0+dfsg/src/components/lmsensors/Makefile.lmsensors.in 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/lmsensors/Makefile.lmsensors.in 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -SENSORS_INCDIR = @SENSORS_INCDIR@ diff -Nru papi-5.7.0+dfsg/src/components/lmsensors/README papi-6.0.0~dfsg/src/components/lmsensors/README --- papi-5.7.0+dfsg/src/components/lmsensors/README 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/lmsensors/README 1970-01-01 00:00:00.000000000 +0000 @@ -1,34 +0,0 @@ -/** -* @file: README -* CVS: $Id$ -* @author: Dan Terpstra -* terpstra@icl.utk.edu -* @defgroup papi_components Components -* @brief Component Specific Readme file: lmsensors -*/ - -/** @page component_readme Component Readme - -@section Component Specific Information - -lmsensors/ -The PAPI lmsensors component requires lmsensors version >= 3.0.0. - --------------------------------------------------- -CONFIGURING THE PAPI LMSENSORS COMPONENT - -Set lmsensors header files by configuring within the component. - % cd /src/components/lmsensors - % ./configure --with-sensors-incdir= -For example, one configuration may look like this - % ./configure --with-sensors-incdir=/usr/include/sensors - -Then, at the higher src directory, configure with this component - % cd /src - % ./configure --with-components="lmsensors" - or if you want to specify the compilers and enable debug. - % ./configure CC=gcc F77=gfortran --with-debug --with-components="lmsensors" -Finally, follow the standard PAPI build (make) instructions - % make - -*/ diff -Nru papi-5.7.0+dfsg/src/components/lmsensors/README.md papi-6.0.0~dfsg/src/components/lmsensors/README.md --- papi-5.7.0+dfsg/src/components/lmsensors/README.md 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/lmsensors/README.md 2020-03-04 15:56:57.000000000 +0000 @@ -0,0 +1,68 @@ +# LMSENSORS Component +The PAPI lmsensors component requires lmsensors version >= 3.0.0. + + +## Installing PAPI with LMSENSORS Component +There is ONE required environment variable: `PAPI_LMSENSORS_ROOT`. This is +required for both compiling, and at runtime. + +An example that works on ICL's Saturn system (at this writing): + + export PAPI_LMSENSORS_ROOT=/usr + +Within `PAPI_LMSENSORS_ROOT`, we expect the following standard directories: + +* `PAPI_LMSENSORS_ROOT/include` or `PAPI_LMSENSORS_ROOT/include/sensors` +* `PAPI_LMSENSORS_ROOT/lib64` + + +For a standard installed system, this is the only environment variable +required for both compile and runtime. + +System configurations can vary. Some systems use Spack, a package +manager, to automatically keep paths straight. Others require +"module load" commands to provide some services, e.g. +"module load lmsensors", and these may also set environment +variables and change the `LD_LIBRARY_PATH` search order. + +Users may require the help of sysadmin personnel to navigate these +facilities and gain access to the correct libraries. + +### Configure PAPI with LMSENSORS Enabled + +We presume you have navigated to the +directory papi/src, AND that you have exported `PAPI_LMSENSORS_ROOT`. + +In the papi/src directory: + + ./configure --with-components="lmsensors" + make + + +### Testing PAPI with LMSENSORS Enabled + +From papi/src: + + utils/papi_component_avail + + +For the LMSENSORS component to be operational, it must find the dynamic +library `libsensors.so`. + +If it is not found (or is not functional) then the component will be +listed as "disabled" with a reason explaining the problem. If library +was not found, then it is not in the expected place. The component +can be configured to look for the library in a specific place, and +using an alternate name if desired. Detailed instructions are +contained in the `Rules.lmsensors` file. They are technical, users may wish +to enlist the help of a sysadmin. + +### List LMSENSORS Supported Events +From papi/src: + + utils/papi_native_avail | grep -i sensors + +## Author +* Frank Winkler (frank.winkler@icl.utk.edu) +* Anthony Castaldo (tonycastaldo@icl.utk.edu) +* Dan Terpstra (terpstra@icl.utk.edu) diff -Nru papi-5.7.0+dfsg/src/components/lmsensors/Rules.lmsensors papi-6.0.0~dfsg/src/components/lmsensors/Rules.lmsensors --- papi-5.7.0+dfsg/src/components/lmsensors/Rules.lmsensors 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/lmsensors/Rules.lmsensors 2020-03-04 15:56:57.000000000 +0000 @@ -1,11 +1,91 @@ -# $Id$ +# Set default if the root environment variable is not already set. +# Note PAPI_LMSENSORS_ROOT is an environment variable that must be set. +# It is the ONLY environment variable that must be set, all other +# settings are optional. -include components/lmsensors/Makefile.lmsensors +PAPI_LMSENSORS_ROOT ?= /usr -COMPSRCS += components/lmsensors/linux-lmsensors.c +# For non-typical system configurations, the following 'runtime overrides' can +# be set, as just a library name, or a full path and name. There cannot be any +# spaces between the double quotes (which must be escaped as \"). An example: + +# PAPI_LMSENSORS_MAIN = \"$(PAPI_LMSENSORS_ROOT)/lib64/libsensors.so\" + +# By default, all overrides are empty strings. + +# If an override is not an empty string, it must work, or the component will be +# disabled. + +# Both at compile time and run time, the software depends on PAPI_LMSENSORS_ROOT. +# There is one library used by the LMSENSORS component: +# libsensors.so + +# The standard installed locations for this library, with override: +# $(PAPI_LMSENSORS_ROOT)/lib64/libsensors.so #O.R. PAPI_LMSENSORS_MAIN +# +# There are many ways to cause this path to be known. +# Spack is a package manager used on supercomputers, Linux and MacOS. If Spack +# is aware of LMSENSORS, it encodes the path to the library. + +# The environment variable LD_LIBRARY_PATH encodes a list of paths to search for +# libraries; separated by a colon (:). This path could be added to +# LD_LIBRARY_PATH. +# +# Warning: LD_LIBRARY_PATH often contains a list of directories that are +# searched for libraries, some of these may be needed by other packages you are +# using. Always add to LD_LIBRARY_PATH recursively; for example: +# >export LD_LIBRARY_PATH=someNewLibraryDirectory:$LD_LIBRARY_PATH +# which would append the existing LD_LIBRARY_PATH to the new directory you wish +# to add. Alternatively, you can prepend it: +# >export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:someNewLibraryDirectory +# Which will search the existing libraries first, then your new directory. + +# You can check on the value of LD_LIBRARY_PATH with +# >echo $LD_LIBRARY_PATH + +# There may be other package managers or utilities, for example on a system with +# modules; the command 'module load lmsensors' may modify LD_LIBRARY_PATH. + +# A Linux system will also search for libraries by default in the directories +# listed by /etc/ld.so.conf, and /usr/lib64, /lib64, /usr/lib, /lib. + +# OVERRIDES: These are by default empty strings (""), if set they must work. +PAPI_LMSENSORS_MAIN = \"\" + +# An example of an override: +# PAPI_LMSENSORS_MAIN = \"$(PAPI_LMSENSORS_ROOT)/lib64/libsensors.so.3\" + +# Note: If you change this override, PAPI should be rebuilt from scratch. +# From papi/src/ +# make clobber +# ./configure --with-components="lmsensors" +# make + +# OPERATION, per library: +# 1) If an override string is not empty, we will use it explicitly and fail if +# it does not work. This means disabling the component; a reason for disabling +# is shown using the papi utility, papi/src/utils/papi_component_avail + +# 2) We will attempt to open the library using the normal system library search +# paths; if Spack is present and configured correctly it should deliver the +# proper library. A failure here will be silent; we will proceed to (3). + +# 3) If that fails, we will try to find the library in the standard installed +# locations listed above. If this fails, we disable the component, the reason +# for disabling is shown using the papi utility, +# papi/src/utils/papi_component_avail. + +# DEFFLAGS is the macro defines for the three overrides. In the code we convert +# these to string variables with the following lines: +# static char lmsensors_main[]=PAPI_LMSENSORS_MAIN; + +LMSENSORS_MACS = -DPAPI_LMSENSORS_MAIN=$(PAPI_LMSENSORS_MAIN) + +COMPSRCS += components/lmsensors/linux-lmsensors.c COMPOBJS += linux-lmsensors.o -CFLAGS += -I$(SENSORS_INCDIR) -LDFLAGS += -L$(SENSORS_LIBDIR) -lsensors +# CFLAGS specifies compile flags; need include files here, and macro defines. +CFLAGS += -I$(PAPI_LMSENSORS_ROOT)/include -I$(PAPI_LMSENSORS_ROOT)/include/sensors -g $(LMSENSORS_MACS) +LDFLAGS += $(LDL) -g -linux-lmsensors.o: components/lmsensors/linux-lmsensors.c $(HEADERS) - $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/lmsensors/linux-lmsensors.c -o linux-lmsensors.o +linux-lmsensors.o: components/lmsensors/linux-lmsensors.c + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/lmsensors/linux-lmsensors.c -o linux-lmsensors.o diff -Nru papi-5.7.0+dfsg/src/components/lustre/linux-lustre.c papi-6.0.0~dfsg/src/components/lustre/linux-lustre.c --- papi-5.7.0+dfsg/src/components/lustre/linux-lustre.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/lustre/linux-lustre.c 2020-03-04 15:56:57.000000000 +0000 @@ -300,8 +300,6 @@ /* Lustre paths are of type server-UUID */ - idx = 0; - ptr = strstr(path,"llite/") + 6; if (ptr == NULL) { SUBDBG("Path: %s, missing llite directory, performance event not created.\n", path); diff -Nru papi-5.7.0+dfsg/src/components/Makefile_comp_tests.target.in papi-6.0.0~dfsg/src/components/Makefile_comp_tests.target.in --- papi-5.7.0+dfsg/src/components/Makefile_comp_tests.target.in 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/Makefile_comp_tests.target.in 2020-03-04 15:56:57.000000000 +0000 @@ -13,6 +13,7 @@ TESTLIB = $(testlibdir)/libtestlib.a LDFLAGS = @LDFLAGS@ @LDL@ CC = @CC@ +MPICC = @MPICC@ F77 = @F77@ CC_R = @CC_R@ CFLAGS = @CFLAGS@ diff -Nru papi-5.7.0+dfsg/src/components/nvml/configure.in papi-6.0.0~dfsg/src/components/nvml/configure.in --- papi-5.7.0+dfsg/src/components/nvml/configure.in 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/nvml/configure.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,50 +0,0 @@ -# Process this file with autoconf to produce a configure script. -# File: components/nvml/configure.in - -AC_INIT - -AC_ARG_WITH(nvml_dir, - [ --with-nvml-dir= Specify prefix to nvml libraries and headers ], - [nvml_dir=$withval]) -AC_ARG_WITH(nvml_incdir, - [ --with-nvml-incdir= Specify directory of nvml header files (nvml.h) in a specific location ], - [nvml_incdir=$withval]) -AC_ARG_WITH(nvml_libdir, - [ --with-nvml-libdir= Specify directory of nvml library (libnvidia-ml.so) in a specific location ], - [nvml_libdir=$withval]) - -nvml_dotest=0 -if test "x$nvml_dir" != "x"; then - nvml_incdir="$nvml_dir/include" - nvml_libdir="$nvml_dir/lib64" - nvml_dotest=1 -else - if test "x$nvml_libdir" != "x"; then - LIBS="-L$nvml_libdir" - nvml_dotest=1 - fi - if test "x$nvml_incdir" != "x"; then - CPPFLAGS="-I$nvml_incdir" - nvml_dotest=1 - fi -fi - -CFLAGS="$CFLAGS -I$nvml_incdir" -AC_CHECK_HEADER( [nvml.h], [], [AC_MSG_ERROR([NVML component: nvml.h not found: use configure flags to set the path ])], ) -LDFLAGS="$LDFLAGS -L$nvml_libdir -Wl,-rpath,$nvml_libdir" -AC_CHECK_LIB( [nvidia-ml], [nvmlInit], [], [AC_MSG_ERROR([NVML component: libnvidia-ml.so not found: use configure flags to set the path])] ) -NVML_INCDIR=$nvml_incdir -NVML_LIBDIR=$nvml_libdir - -AC_ARG_WITH(cuda-dir, - [--with-cuda-dir= Specify path to cuda root directory], - [ CUDA_DIR=$withval - LDFLAGS="$LDFLAGS -L$withval/lib64 -Wl,-rpath,$withval/lib64" - AC_CHECK_LIB( [cudart], [cudaMalloc], [], [AC_MSG_ERROR([CUDA cudart library not found!])])], - [ AC_MSG_ERROR([Component requires path to cuda library.])]) - -AC_CONFIG_FILES([Makefile.nvml]) -AC_SUBST(NVML_INCDIR) -AC_SUBST(NVML_LIBDIR) -AC_SUBST(CUDA_DIR) -AC_OUTPUT diff -Nru papi-5.7.0+dfsg/src/components/nvml/linux-nvml.c papi-6.0.0~dfsg/src/components/nvml/linux-nvml.c --- papi-5.7.0+dfsg/src/components/nvml/linux-nvml.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/nvml/linux-nvml.c 2020-03-04 15:56:57.000000000 +0000 @@ -129,6 +129,10 @@ static void* dl2 = NULL; static void* dl3 = NULL; +static char cuda_main[]=PAPI_CUDA_MAIN; +static char cuda_runtime[]=PAPI_CUDA_RUNTIME; +static char nvml_main[]=PAPI_NVML_MAIN; + static int linkCudaLibraries(); /* Declare our vector in advance */ @@ -1067,7 +1071,7 @@ ret = (*nvmlInitPtr)(); if (NVML_SUCCESS != ret) { - strcpy(_nvml_vector.cmp_info.disabled_reason, "The NVIDIA managament library failed to initialize."); + strcpy(_nvml_vector.cmp_info.disabled_reason, "The NVIDIA management library failed to initialize."); _papi_nvml_shutdown_component(); // clean up any open dynLibs, mallocs, etc. return PAPI_ENOSUPP; } @@ -1082,7 +1086,7 @@ /* Figure out the number of CUDA devices in the system */ ret = (*nvmlDeviceGetCountPtr)(&nvml_count); if (NVML_SUCCESS != ret) { - strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a count of devices from the NVIDIA managament library."); + strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a count of devices from the NVIDIA management library."); _papi_nvml_shutdown_component(); // clean up any open dynLibs, mallocs, etc. return PAPI_ENOSUPP; } @@ -1096,7 +1100,7 @@ /* We can probably recover from this, when we're clever */ if ((cuda_count > 0) && (nvml_count != (unsigned int)cuda_count)) { - strcpy(_nvml_vector.cmp_info.disabled_reason, "CUDA and the NVIDIA managament library have different device counts."); + strcpy(_nvml_vector.cmp_info.disabled_reason, "CUDA and the NVIDIA management library have different device counts."); _papi_nvml_shutdown_component(); // clean up any open dynLibs, mallocs, etc. return PAPI_ENOSUPP; } @@ -1150,30 +1154,84 @@ static int linkCudaLibraries() { + char path_lib[1024]; /* Attempt to guess if we were statically linked to libc, if so bail */ if (_dl_non_dynamic_init != NULL) { strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML component does not support statically linking of libc.", PAPI_MAX_STR_LEN); return PAPI_ENOSUPP; } - /* Need to link in the cuda libraries, if not found disable the component */ - dl1 = dlopen("libcuda.so", RTLD_NOW | RTLD_GLOBAL); - if (!dl1) { - strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA library libcuda.so not found.", PAPI_MAX_STR_LEN); - return (PAPI_ENOSUPP); + // Need to link in the cuda libraries, if any not found disable the component. + // getenv returns NULL if environment variable is not found. + char *cuda_root = getenv("PAPI_CUDA_ROOT"); + dl1 = NULL; // Ensure reset to NULL. + + // Step 1: Process override if given. + if (strlen(cuda_main) > 0) { // If override given, it has to work. + dl1 = dlopen(cuda_main, RTLD_NOW | RTLD_GLOBAL); // Try to open that path. + if (dl1 == NULL) { + snprintf(_nvml_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "PAPI_CUDA_MAIN override '%s' given in Rules.nvml not found.", cuda_main); + return(PAPI_ENOSUPP); // Override given but not found. + } } + + // Step 2: Try system paths, will work with Spack, LD_LIBRARY_PATH, default paths. + if (dl1 == NULL) { // No override, + dl1 = dlopen("libcuda.so", RTLD_NOW | RTLD_GLOBAL); // Try system paths. + } + + // Step 3: Try the explicit install default. + if (dl1 == NULL && cuda_root != NULL) { // if root given, try it. + snprintf(path_lib, 1024, "%s/lib64/libcuda.so", cuda_root); // PAPI Root check. + dl1 = dlopen(path_lib, RTLD_NOW | RTLD_GLOBAL); // Try to open that path. + } + + // Check for failure. + if (dl1 == NULL) { + snprintf(_nvml_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "libcuda.so not found."); + return(PAPI_ENOSUPP); + } + + // We have a dl1. (libcuda.so). + cuInitPtr = dlsym(dl1, "cuInit"); if (dlerror() != NULL) { strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA function cuInit not found.", PAPI_MAX_STR_LEN); return (PAPI_ENOSUPP); } - dl2 = dlopen("libcudart.so", RTLD_NOW | RTLD_GLOBAL | RTLD_NODELETE); - if (!dl2) { - strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA runtime library libcudart.so not found.", PAPI_MAX_STR_LEN); - return (PAPI_ENOSUPP); + /* Need to link in the cuda runtime library, if not found disable the component */ + dl2 = NULL; // Ensure reset to NULL. + + // Step 1: Process override if given. + if (strlen(cuda_runtime) > 0) { // If override given, it has to work. + dl2 = dlopen(cuda_runtime, RTLD_NOW | RTLD_GLOBAL); // Try to open that path. + if (dl2 == NULL) { + snprintf(_nvml_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "PAPI_CUDA_RUNTIME override '%s' given in Rules.nvml not found.", cuda_runtime); + return(PAPI_ENOSUPP); // Override given but not found. + } + } + + // Step 2: Try system paths, will work with Spack, LD_LIBRARY_PATH, default paths. + if (dl2 == NULL) { // No override, + dl2 = dlopen("libcudart.so", RTLD_NOW | RTLD_GLOBAL); // Try system paths. } - cudaGetDevicePtr = dlsym(dl2, "cudaGetDevice"); + + // Step 3: Try the explicit install default. + if (dl2 == NULL && cuda_root != NULL) { // if root given, try it. + snprintf(path_lib, 1024, "%s/lib64/libcudart.so", cuda_root); // PAPI Root check. + dl2 = dlopen(path_lib, RTLD_NOW | RTLD_GLOBAL); // Try to open that path. + } + + // Check for failure. + if (dl2 == NULL) { + snprintf(_nvml_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "libcudart.so not found."); + return(PAPI_ENOSUPP); + } + + // We have a dl2. (libcudart.so). + + cudaGetDevicePtr = dlsym(dl2, "cudaGetDevice"); if (dlerror() != NULL) { strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaGetDevice not found.", PAPI_MAX_STR_LEN); return (PAPI_ENOSUPP); @@ -1189,11 +1247,37 @@ return (PAPI_ENOSUPP); } - dl3 = dlopen("libnvidia-ml.so", RTLD_NOW | RTLD_GLOBAL); - if (!dl3) { - strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML runtime library libnvidia-ml.so not found.", PAPI_MAX_STR_LEN); - return (PAPI_ENOSUPP); + // We need the NVML main library, normally libnvidia-ml.so. + dl3 = NULL; // Ensure reset to NULL. + + // Step 1: Process override if given. + if (strlen(nvml_main) > 0) { // If override given, it MUST work. + dl3 = dlopen(nvml_main, RTLD_NOW | RTLD_GLOBAL); // Try to open that path. + if (dl3 == NULL) { + snprintf(_nvml_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "PAPI_NVML_MAIN override '%s' given in Rules.nvml not found.", nvml_main); + return(PAPI_ENOSUPP); // Override given but not found. + } } + + // Step 2: Try system paths, will work with Spack, LD_LIBRARY_PATH, default paths. + if (dl3 == NULL) { // If no override, + dl3 = dlopen("libnvidia-ml.so", RTLD_NOW | RTLD_GLOBAL); // Try system paths. + } + + // Step 3: Try the explicit install default. + if (dl3 == NULL && cuda_root != NULL) { // If ROOT given, it doesn't HAVE to work. + snprintf(path_lib, 1024, "%s/lib64/libnvidia-ml.so", cuda_root); // PAPI Root check. + dl3 = dlopen(path_lib, RTLD_NOW | RTLD_GLOBAL); // Try to open that path. + } + + // Check for failure. + if (dl3 == NULL) { + snprintf(_nvml_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "libnvidia-ml.so not found."); + return(PAPI_ENOSUPP); // Not found on default paths. + } + + // We have a dl3. (libnvidia-ml.so). + nvmlDeviceGetClockInfoPtr = dlsym(dl3, "nvmlDeviceGetClockInfo"); if (dlerror() != NULL) { strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetClockInfo not found.", PAPI_MAX_STR_LEN); diff -Nru papi-5.7.0+dfsg/src/components/nvml/Makefile.nvml.in papi-6.0.0~dfsg/src/components/nvml/Makefile.nvml.in --- papi-5.7.0+dfsg/src/components/nvml/Makefile.nvml.in 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/nvml/Makefile.nvml.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,3 +0,0 @@ -NVML_INCDIR = @NVML_INCDIR@ -NVML_LIBDIR = @NVML_LIBDIR@ -CUDA_DIR = @CUDA_DIR@ diff -Nru papi-5.7.0+dfsg/src/components/nvml/PeakConfigure.sh papi-6.0.0~dfsg/src/components/nvml/PeakConfigure.sh --- papi-5.7.0+dfsg/src/components/nvml/PeakConfigure.sh 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/nvml/PeakConfigure.sh 2020-03-04 15:56:57.000000000 +0000 @@ -1,3 +1,3 @@ # Necessary to configure NVML, on Peak. ./configure --with-nvml-libdir=/usr/lib64/nvidia --with-nvml-incdir=/usr/local --with-cuda-dir=/usr/lib64/nvidia -export CUDA_DIR=/usr/local/cida-9.2 +export CUDA_DIR=/usr/local/cuda-9.2 diff -Nru papi-5.7.0+dfsg/src/components/nvml/README papi-6.0.0~dfsg/src/components/nvml/README --- papi-5.7.0+dfsg/src/components/nvml/README 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/nvml/README 2020-03-04 15:56:57.000000000 +0000 @@ -1,57 +1,66 @@ -The PAPI NVML component provides an interface to the nVidia Management Library (nvml, libnvidia-ml). +General information +------------------- -In versions 8 and later part of the CUDA Toolkit, The NVIDIA -Management Library is no longer a separate download and is installed with -CUDA. On Linux/x86 platforms, it is often found in -/usr/lib64/nvidia/libnvidia-ml.so +The PAPI NVML component provides an interface to the NVIDIA Management +LIBRARY (nvml). The default name of the library is libnvidia-ml.so. + +This PAPI CUDA component has been developed and tested using CUDA +version 10.1 and the associated CUPTI library. CUPTI is released with +the CUDA Tools SDK. + +How to install PAPI with the NVML component? +-------------------------------------------- + +The NVML component is provided by NVIDIA as a standard part of the +CUDA Toolkit download. Two of the same libraries are required for the +CUDA and NVML components. To avoid confusion or the use of multiple +versions of the same library, the NVML component uses the same +environment variable (PAPI_CUDA_ROOT) and overrides as CUDA. Those two +libraries are libcuda.so, and libcudart.so. + +There is ONE required environment variable: PAPI_CUDA_ROOT. This is +required for both compiling, and at runtime. + +An example that works on ICL's Saturn system (at this writing): +export PAPI_CUDA_ROOT=/usr/local/cuda-10.1 + +Within PAPI_CUDA_ROOT, we expect the following standard directories: +PAPI_CUDA_ROOT/include +PAPI_CUDA_ROOT/lib64 + +For a standard installed system, this is the only environment variable +required for both compile and runtime. + +System configurations can vary. Some systems use Spack, a package +manager, to automatically keep paths straight. Others (like our own +ICL Saturn System) require "module load" commands to provide some +services, e.g. 'module load cuda-10.1', and these may also set +environment variables and change the LD_LIBRARY_PATH search order. + +Users may require the help of sysadmin personnel to navigate these +facilities and gain access to the correct libraries. + +Configure PAPI with NVML enabled. We presume you have navigated to the +directory papi/src. In that directory: + % ./configure --with-components="nvml" + +Build with PAPI_CUDA_ROOT specified (ICL's Saturn example again): + % export PAPI_CUDA_ROOT=/usr/local/cuda-10.1 + % make + +TESTING the component is installed: Still from papi/src: + % utils/papi_component_avail + +For the NVML component to be operational, it must find the dynamic +libraries libcuda.so, libcudart.so, and libnvidia-ml.so. + +If any of these are not found (or are not functional) then the +component will be listed as "disabled" with a reason explaining the +problem. If libraries were not found, then they are not in the +expected places. The component can be configured to look for each of +these libraries in a specific place, and using an alternate name if +desired. Detailed instructions are contained in the Rules.nvml file. +They are technical, users may wish to enlist the help of a sysadmin. -Other download packages may be available at -https://developer.nvidia.com/gpu-deployment-kit - -Before running the NVML component, the configure script for the NVML component -must be executed in order to generate the Makefile which contains the -configuration settings. This script needs to be executed only once. - - % cd < papi_dir >/src/components/nvml - % ./configure --with-nvml-libdir= --with-nvml-incdir= --with-cuda-dir= - -For example, one configuration may look like this - %./configure --with-nvml-libdir=/usr/lib64/nvidia --with-nvml-incdir=/usr/local/cuda/include --with-cuda-dir=/usr/local/cuda - -A recent (02/01/2019) NVML configure on the ICL saturn system: -./configure --with-nvml-libdir=/usr/local/cuda-9.2/targets/x86_64-linux/lib/stubs --with-nvml-incdir=/usr/local/cuda-9.2/include --with-cuda-dir=/usr/local/cuda-9.2 - -Equivalently, -./configure --with-nvml-libdir=$CUDA_DIR/lib64/stubs --with-nvml-incdir=$CUDA_DIR/include --with-cuda-dir=$CUDA_DIR - -The NVML component is added to PAPI during the configuration of PAPI -by adding the '--with-components=nvml' command line option to -configure. - - % ./configure --with-components="nvml" - -At build-time the nVidia compiler, nvcc, needs to be in your path, as does the cuda run-time library (libcudart.so). - -Please refer to http://developer.download.nvidia.com/assets/cuda/files/CUDADownloads/NVML/nvml.pdf -for details about NVML library. - - - -Note: Power Limiting using NVML (aka power capping) requires root. - -PAPI has added support for power limiting using NVML (on supported -devices from the Kepler family or later). The executable needs to -have root permissions to change the power limits on the device. - -The power_management_limit can be written to set a limit (in -milliWatts) to the power consumption by DEVICE. The value that can -be written needs to be between the -power_management_limit_constraint_min and -power_management_limit_constraint_max. - -nvml:::DEVICE:power_management_limit -nvml:::DEVICE:power_management_limit_constraint_min -nvml:::DEVICE:power_management_limit_constraint_max - -A test for writing of the power_management_limit can be found in the -nvml/tests/ directory. +To find a list of NVML supported events: + % utils/papi_native_avail | grep -i NVML diff -Nru papi-5.7.0+dfsg/src/components/nvml/Rules.nvml papi-6.0.0~dfsg/src/components/nvml/Rules.nvml --- papi-5.7.0+dfsg/src/components/nvml/Rules.nvml 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/nvml/Rules.nvml 2020-03-04 15:56:57.000000000 +0000 @@ -1,9 +1,101 @@ -include components/nvml/Makefile.nvml +# Set default if the root environment variable is not already set. +# Note PAPI_CUDA_ROOT is an environment variable that must be set. +# It is the ONLY environment variable that must be set, all other +# settings are optional. +PAPI_CUDA_ROOT ?= /opt/cuda + +# For non-typical system configurations, the following 'runtime overrides' can +# be set, as just a library name, or a full path and name. There cannot be any +# spaces between the double quotes (which must be escaped as \"). An example: + +# PAPI_CUDA_CUPTI = \"$(PAPI_CUDA_ROOT)/extras/CUPTI/lib64/libcupti.so\" + +# By default, all overrides are empty strings. + +# If an override is not an empty string, it must work, or the component will be +# disabled. + +# Both at compile time and run time, the software depends on PAPI_CUDA_ROOT. +# There are three libraries used by the NVML component, they are +# libcuda.so +# libcudart.so +# libnvidia-ml.so + +# The standard installed locations for these libraries, with overrides: +# $(PAPI_CUDA_ROOT)/lib64/libcuda.so #O.R. PAPI_CUDA_MAIN +# $(PAPI_CUDA_ROOT)/lib64/libcudart.so #O.R. PAPI_CUDA_RUNTIME +# $(PAPI_CUDA_ROOT)/lib64/libnvidia-ml.so #O.R. PAPI_NVML_MAIN +# +# There are many ways to cause these paths to be known. +# Spack is a package manager used on supercomputers, Linux and MacOS. If Spack +# is aware of CUDA or NVML, it encodes the paths to the necessary libraries. + +# The environment variable LD_LIBRARY_PATH encodes a list of paths to search for +# libraries; separated by a colon (:). These paths could be added to +# LD_LIBRARY_PATH. +# +# Warning: LD_LIBRARY_PATH often contains a list of directories that are +# searched for libraries, some of these may be needed by other packages you are +# using. Always add to LD_LIBRARY_PATH recursively; for example: +# >export LD_LIBRARY_PATH=someNewLibraryDirectory:$LD_LIBRARY_PATH +# which would append the existing LD_LIBRARY_PATH to the new directory you wish +# to add. Alternatively, you can prepend it: +# >export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:someNewLibraryDirectory +# Which will search the existing libraries first, then your new directory. + +# You can check on the value of LD_LIBRARY_PATH with +# >echo $LD_LIBRARY_PATH + +# There may be other package managers or utilities, for example on a system with +# modules; the command 'module load cuda' may modify LD_LIBRARY_PATH. + +# A Linux system will also search for libraries by default in the directories +# listed by /etc/ld.so.conf, and /usr/lib64, /lib64, /usr/lib, /lib. + +# OVERRIDES: These are by default empty strings (""), if set they must work. +PAPI_CUDA_MAIN = \"\" +PAPI_CUDA_RUNTIME = \"\" +PAPI_NVML_MAIN = \"\" + +# An example of an override: +# PAPI_NVML_MAIN = \"$(PAPI_CUDA_ROOT)/lib64/libnvidia-ml.so\" + +# Note: PAPI_CUDA_MAIN and PAPI_CUDA_RUNTIME, if set, will also apply to the +# CUDA component, which uses the same libraries. + +# Note: If you change these overrides, PAPI should be rebuilt from scratch. +# From papi/src/ +# make clobber +# ./configure --with-components="nvml" +# make + +# OPERATION, per library: +# 1) If an override string is not empty, we will use it explicitly and fail if +# it does not work. This means disabling the component; a reason for disabling +# is shown using the papi utility, papi/src/utils/papi_component_avail + +# 2) We will attempt to open the library using the normal system library search +# paths; if Spack is present and configured correctly it should deliver the +# proper library. A failure here will be silent; we will proceed to (3). + +# 3) If that fails, we will try to find the library in the standard installed +# locations listed above. If this fails, we disable the component, the reason +# for disabling is shown using the papi utility, +# papi/src/utils/papi_component_avail. + +# DEFFLAGS is the macro defines for the three overrides. In the code we convert +# these to string variables with the following lines: +# static char cuda_main[]=PAPI_CUDA_MAIN; +# static char cuda_runtime[]=PAPI_CUDA_RUNTIME; +# static char nvml_main[]=PAPI_NVML_MAIN; + +NVML_MACS = -DPAPI_CUDA_MAIN=$(PAPI_CUDA_MAIN) -DPAPI_CUDA_RUNTIME=$(PAPI_CUDA_RUNTIME) -DPAPI_NVML_MAIN=$(PAPI_NVML_MAIN) COMPSRCS += components/nvml/linux-nvml.c COMPOBJS += linux-nvml.o -CFLAGS += -I$(NVML_INCDIR) -I$(CUDA_DIR)/include -LDFLAGS += $(LDL) +# CFLAGS specifies compile flags; need include files here, and macro defines. +CFLAGS += -I$(PAPI_CUDA_ROOT)/include -g $(NVML_MACS) +LDFLAGS += $(LDL) -g linux-nvml.o: components/nvml/linux-nvml.c $(HEADERS) $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/nvml/linux-nvml.c -o linux-nvml.o diff -Nru papi-5.7.0+dfsg/src/components/nvml/tests/Makefile papi-6.0.0~dfsg/src/components/nvml/tests/Makefile --- papi-5.7.0+dfsg/src/components/nvml/tests/Makefile 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/nvml/tests/Makefile 2020-03-04 15:56:57.000000000 +0000 @@ -1,11 +1,14 @@ NAME=nvml include ../../Makefile_comp_tests -include ../Makefile.nvml # -fverbose-asm -Wa,-adhln=benchSANVML.s -NVCC = $(CUDA_DIR)/bin/nvcc -NVCFLAGS = -L$(NVML_INCDIR) -ccbin=$(CC) -CUDALIBS = -L$(NVML_LIBDIR) -L$(CUDA_DIR)/lib64 -lcuda -lcudart -lnvidia-ml -lstdc++ +PAPI_NVML_ROOT ?= $(PAPI_CUDA_ROOT) +PAPI_NVML_INC ?= $(PAPI_CUDA_ROOT)/include +PAPI_NVML_LIB ?= $(PAPI_CUDA_ROOT)/lib64/stubs + +NVCC = $(PAPI_CUDA_ROOT)/bin/nvcc +NVCFLAGS = -L$(PAPI_NVML_INC) -ccbin=$(CC) +CUDALIBS = -L$(PAPI_NVML_LIB) -L$(PAPI_CUDA_ROOT)/lib64 -lcuda -lcudart -lnvidia-ml -lstdc++ PAPILIB := ../../../libpapi.a -ldl %.o:%.cu @@ -29,11 +32,6 @@ benchSANVML: benchSANVML.o $(UTILOBJS) $(NVCC) $(NVCFLAGS) $(INCLUDE) -o benchSANVML benchSANVML.o $(UTILOBJS) $(PAPILIB) $(CUDALIBS) - -nvmlcap_plot: nvmlcap_plot.o $(UTILOBJS) - $(NVCC) $(NVCFLAGS) $(INCLUDE) -o nvmlcap_plot nvmlcap_plot.o $(UTILOBJS) $(PAPILIB) $(CUDALIBS) - - clean: rm -f $(TESTS) *.o rm -f benchSANVML diff -Nru papi-5.7.0+dfsg/src/components/nvml/tests/nvmlcap_plot.cu papi-6.0.0~dfsg/src/components/nvml/tests/nvmlcap_plot.cu --- papi-5.7.0+dfsg/src/components/nvml/tests/nvmlcap_plot.cu 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/nvml/tests/nvmlcap_plot.cu 1970-01-01 00:00:00.000000000 +0000 @@ -1,691 +0,0 @@ -/****************************/ -/* THIS IS OPEN SOURCE CODE */ -/****************************/ - -/** - * @file nvmlcap_plot.cu - * CVS: $Id$ - * @author Tony Castaldo (tonycastaldon@icl.utk.edu) - * Mods: - * - * @brief - - * This file reads power limits using NVML and writes them - * every 50ms to nvmlcap_out.csv. - * - * It takes at least one argument; the number of seconds to - * run. - * - * If there is ONE additional argument, it is a power cap - * and all GPUs will be set to it. This is good if the GPUs - * are all the same model. - * - * If there are MULTIPLE additional arguments, there must be - * one per GPU, and they are individual power limits for the - * GPUs. This is useful if they are not all the same model. - * - * The output is written as tab-seperated-values (TSV) in - * PowerReadGPU.tsv. - */ - -#include -#include -#include -#include -#include -#include - -#include "papi.h" -#include "papi_test.h" - -#define dprintf if (1) printf /* debug printf; change to (1) to enable. */ - -int CTL_Z = 0; // No SIGTSTP signalled yet. -void cbSignal_SIGTSTP(int signalNumber) { - CTL_Z = 1; // Indicate it was received. -} // end signal handler. - -void helpText(void) { - fprintf(stderr, "This program requires at least one argument.\n"); - fprintf(stderr, "First arg is number of seconds to run. If 0, will run \n"); - fprintf(stderr, "until killed. A graceful exit can be made by signalling \n"); - fprintf(stderr, "SIGTSTP (Terminal Stop, like Ctrl-z). We will trap it \n"); - fprintf(stderr, "and close files, free memory, etc. On SLURM, get job id \n"); - fprintf(stderr, "using 'squeue', then 'scancel -s SIGTSTP JOBID' \n"); - fprintf(stderr, "2nd (optional) argument is a global power limit to set \n"); - fprintf(stderr, "on all GPUs. If more than two arguments are given, then \n"); - fprintf(stderr, "there must be a power argument for EACH GPU we find, \n"); - fprintf(stderr, "each is the individual power limit for that GPU (in the \n"); - fprintf(stderr, "order we report them). \n"); - fprintf(stderr, " \n"); - fprintf(stderr, "We report to stderr the hardware found and current power\n"); - fprintf(stderr, "limit settings. If you change the power limit here, it \n"); - fprintf(stderr, "does limit other programs; the original power limits are\n"); - fprintf(stderr, "automatically restored upon any exit of this program. \n"); - fprintf(stderr, " \n"); - fprintf(stderr, "Typically, you will start this program on a node, then \n"); - fprintf(stderr, "while it is running execute ANOTHER program on the node \n"); - fprintf(stderr, "that exercises the GPU. \n"); - fprintf(stderr, " \n"); - fprintf(stderr, "After changing power settings (if specified), this code \n"); - fprintf(stderr, "READS the spot power usage every 50ms, for all GPUs on \n"); - fprintf(stderr, "the node, and reports those (tab-separated) to the file \n"); - fprintf(stderr, "PowerReadGPUs.tsv. \n"); - fprintf(stderr, " \n"); - fprintf(stderr, "It will also output PowerReadGPU.gnuplot, a gnuplot \n"); - fprintf(stderr, "script to plot the power usage for each GPU on the node.\n"); - fprintf(stderr, "This is just an ascii file and can be edited if needed. \n"); -}; - -// Host function -int main( int argc, char** argv ) -{ - -#define NUM_EVENTS 32 /* Max number of GPUs on a node this code can handle. */ - int retval, i, j, device_count; - int EventSet = PAPI_NULL; - long long values[NUM_EVENTS]; // For reading either limit or current power. - char *LimitEventName[NUM_EVENTS]; - char *PowerEventName[NUM_EVENTS]; - char *minEventName[NUM_EVENTS]; - char *maxEventName[NUM_EVENTS]; - int powerEvents[NUM_EVENTS]; // PAPI codes for current power events. - int limitEvents[NUM_EVENTS]; // PAPI codes for power limit setting. - int minEvents[NUM_EVENTS]; - int maxEvents[NUM_EVENTS]; - long long minSetting[NUM_EVENTS]; - long long maxSetting[NUM_EVENTS]; - long long UserLimitGiven[NUM_EVENTS]; // These are the values per GPU set by user. - long long OrigLimitFound[NUM_EVENTS]; // original limit read from device. - int PowerEventCount = 0, LimitEventCount = 0, minEventCount = 0, maxEventCount = 0; - const PAPI_component_info_t *cmpinfo; - char event_name[PAPI_MAX_STR_LEN]; - signal(SIGTSTP, cbSignal_SIGTSTP); // register the signal handler for CTL_Z. - - if (argc < 2) { - helpText(); - exit(-1); - } - - /* PAPI Initialization */ - retval = PAPI_library_init( PAPI_VER_CURRENT ); - if( retval != PAPI_VER_CURRENT ) { - fprintf( stderr, "PAPI_library_init failed.\n" ); - helpText(); - exit(-1); - } - - printf( "PAPI_VERSION : %4d %6d %7d\n", - PAPI_VERSION_MAJOR( PAPI_VERSION ), - PAPI_VERSION_MINOR( PAPI_VERSION ), - PAPI_VERSION_REVISION( PAPI_VERSION ) ); - - int numcmp = PAPI_num_components(); - - // Search for the NVML component. - int cid = 0; - for (cid=0; cidname, "nvml" ) ) break; // If we found it, - } - } - - if ( cid==numcmp ) { // If true we looped through all without finding nvml. - fprintf(stderr, "NVML PAPI Component was not found.\n"); - exit(-1); - } - - printf( "NVML found as Component %d of %d: %s: %d events\n", (1+cmpinfo->CmpIdx), numcmp, cmpinfo->name, cmpinfo->num_native_events ); - if (cmpinfo->disabled) { // If disabled, - fprintf(stderr, "NVML PAPI Component is disabled.\n"); - exit(-1); - } - - cudaGetDeviceCount( &device_count ); - printf("Cuda Device Count: %d.\n", device_count); - if (device_count < 1) { - fprintf(stderr, "There are no GPUs to manage.\n"); - exit(-1); - } - - FILE *myOut = fopen("PowerReadGPU.tsv", "w"); // Open the file. - if (myOut == NULL) { // If that failed, - fprintf(stderr, "Failed to open output file PowerReadGPU.csv.\n"); - exit(-1); - } - - FILE *myGnuplot = fopen("PowerReadGPU.gnuplot", "w"); - if (myGnuplot == NULL) { - fprintf(stderr, "Failed to open gnuplot output file PowerReadGPU.gnuplot.\n"); - exit(-1); - } - - // Scan events to find nvml power events. - int code = PAPI_NATIVE_MASK; - int ii=0; - int event_modifier = PAPI_ENUM_FIRST; - for ( ii=0; iinum_native_events; ii++ ) { - retval = PAPI_enum_cmp_event( &code, event_modifier, cid ); - event_modifier = PAPI_ENUM_EVENTS; - if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); - retval = PAPI_event_code_to_name( code, event_name ); - char *ss; - - ss = strstr(event_name, "device_"); // Look for the device id. - if (ss == NULL) continue; // Not a valid name. - int did = atoi(ss+7); // convert it. - if (did >= device_count) continue; // Invalid device count. - - // Have some event, anyway. - ss = strstr(event_name, "power"); // First, see if we have power. - if (ss != NULL && ss[5] == 0) { // If found and the last thing on the line, - PowerEventName[did] = strdup(event_name); // .. remember the name, in device order. - dprintf("Found powerEvent '%s' for device %i.\n", event_name, did); - PowerEventCount++; // .. bump total power events. - continue; // .. done with this event. - } - - ss = strstr(event_name, "power_management_limit"); // get position of this string. - if (ss != NULL && ss[22] == 0) { // If found and last thing on the line, - LimitEventName[did] = strdup(event_name); // Valid! Remember the name. - dprintf("Found limitEvent '%s' for device %i.\n", event_name, did); // Report what we found. - LimitEventCount++; // Add to the number of events found. - continue; // Done with it. - } - - ss = strstr(event_name, "power_management_limit_constraint_min"); // get position of this string. - if (ss != NULL && ss[37] == 0) { // If found and last thing on the line, - minEventName[did] = strdup(event_name); // Valid! Remember the name. - dprintf("Found minEvent '%s' for device %i.\n", event_name, did); // Report what we found. - minEventCount++; // Add to the number of events found. - continue; // Done with it. - } - - ss = strstr(event_name, "power_management_limit_constraint_max"); // get position of this string. - if (ss != NULL && ss[37] == 0) { // If found and last thing on the line, - maxEventName[did] = strdup(event_name); // Valid! Remember the name. - dprintf("Found maxEvent '%s' for device %i.\n", event_name, did); // Report what we found. - maxEventCount++; // Add to the number of events found. - continue; // Done with it. - } - - } // end of for each event. - - - if (PowerEventCount != device_count || - LimitEventCount != device_count || - minEventCount != device_count || - maxEventCount != device_count) { // If we did not get all the events, - fprintf(stderr, "Too few NVML events found; %i devices, %i PowerEvents, %i LimitEvents, %i maxEvents, %i minEvents. Aborting\n", - device_count, PowerEventCount, LimitEventCount, minEventCount, maxEventCount); - for (j=0; j 2) { - if (argc != device_count+2) { - fprintf(stderr, "You have specified %i power limits, it doesn't match with %i devices.\n", argc-2, device_count); - for (j=0; j 2) { // If we have settings to check, - for (i=0; i maxSetting[i]) { - fprintf(stderr, "User Power Limit of %llu is out of range for device %i.\n", UserLimitGiven[i], i); - retval++; // increase violations. - } - } - - if (retval > 0) { // Any out of range, we get out. - for (j=0; j 2) { // If power limits were given, - retval = PAPI_write(EventSet, UserLimitGiven); // .. Try to write user values. - if( retval != PAPI_OK ) { - fprintf(stderr, "PAPI_write(User Limits) failed, returned %i [%s].\n", retval, PAPI_strerror(retval)); - for (j=0; j 0) { - fprintf(stderr, "Aborting for %i write failure(s).\n", retval); - for (j=0; j 0 && elapsedSec >= runSeconds) break; // Exit if time is up. - } - - if (CTL_Z) fprintf(stderr, "Received CTL_Z signal (SIGTSTP).\n"); - else fprintf(stderr, "Time %i seconds expired.\n", runSeconds); - fprintf(stderr, "Total reads: %i.\n", runCount); - - //-------------------------------------------------------------------------- - // Generate a gnuplot file instructions. - //-------------------------------------------------------------------------- - fprintf(myGnuplot, "set xlabel 'Time (sec)'\n"); // label for x axis. - fprintf(myGnuplot, "set nokey\n"); // no key needed. - fprintf(myGnuplot, "set terminal png\n"); // generate png output when plotting. - fprintf(myGnuplot, "set title 'Spot MW Usage During Run'\n"); // Title of graph. - fprintf(myGnuplot, "set yrange [0:300000]\n"); // Force the y range. - - for (i=0; i + * + * @brief + + * This file reads power limits using NVML and writes them + * every 50ms to nvmlcap_out.csv. + * + * It takes at least one argument; the number of seconds to + * run. + * + * If there is ONE additional argument, it is a power cap + * and all GPUs will be set to it. This is good if the GPUs + * are all the same model. + * + * If there are MULTIPLE additional arguments, there must be + * one per GPU, and they are individual power limits for the + * GPUs. This is useful if they are not all the same model. + * + * The output is written as tab-seperated-values (TSV) in + * PowerReadGPU.tsv. + */ + +#include +#include +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define dprintf if (0) printf /* debug printf; change to (1) to enable. */ + +int CTL_Z = 0; // No SIGTSTP signalled yet. +void cbSignal_SIGTSTP(int signalNumber) { + CTL_Z = 1; // Indicate it was received. +} // end signal handler. + +void helpText(void) { + fprintf(stderr, "This program requires at least one argument.\n"); + fprintf(stderr, "First arg is number of seconds to run. If 0, will run \n"); + fprintf(stderr, "until killed. A graceful exit can be made by signalling \n"); + fprintf(stderr, "SIGTSTP (Terminal Stop, like Ctrl-z). We will trap it \n"); + fprintf(stderr, "and close files, free memory, etc. On SLURM, get job id \n"); + fprintf(stderr, "using 'squeue', then 'scancel -s SIGTSTP JOBID' \n"); + fprintf(stderr, "2nd (optional) argument is a global power limit to set \n"); + fprintf(stderr, "on all GPUs. If more than two arguments are given, then \n"); + fprintf(stderr, "there must be a power argument for EACH GPU we find, \n"); + fprintf(stderr, "each is the individual power limit for that GPU (in the \n"); + fprintf(stderr, "order we report them). \n"); + fprintf(stderr, " \n"); + fprintf(stderr, "We report to stderr the hardware found and current power\n"); + fprintf(stderr, "limit settings. If you change the power limit here, it \n"); + fprintf(stderr, "does limit other programs; the original power limits are\n"); + fprintf(stderr, "automatically restored upon any exit of this program. \n"); + fprintf(stderr, " \n"); + fprintf(stderr, "Typically, you will start this program on a node, then \n"); + fprintf(stderr, "while it is running execute ANOTHER program on the node \n"); + fprintf(stderr, "that exercises the GPU. \n"); + fprintf(stderr, " \n"); + fprintf(stderr, "After changing power settings (if specified), this code \n"); + fprintf(stderr, "READS the spot power usage every 50ms, for all GPUs on \n"); + fprintf(stderr, "the node, and reports those (tab-separated) to the file \n"); + fprintf(stderr, "PowerReadGPUs.tsv. \n"); + fprintf(stderr, " \n"); + fprintf(stderr, "It will also output PowerReadGPU.gnuplot, a gnuplot \n"); + fprintf(stderr, "script to plot the power usage for each GPU on the node.\n"); + fprintf(stderr, "This is just an ascii file and can be edited if needed. \n"); +}; + +// Host function +int main( int argc, char** argv ) +{ + +#define NUM_EVENTS 32 /* Max number of GPUs on a node this code can handle. */ + int retval, i, j, device_count; + int EventSet = PAPI_NULL; + long long values[NUM_EVENTS]; // For reading either limit or current power. + char *LimitEventName[NUM_EVENTS]; + char *PowerEventName[NUM_EVENTS]; + char *minEventName[NUM_EVENTS]; + char *maxEventName[NUM_EVENTS]; + int powerEvents[NUM_EVENTS]; // PAPI codes for current power events. + int limitEvents[NUM_EVENTS]; // PAPI codes for power limit setting. + int minEvents[NUM_EVENTS]; + int maxEvents[NUM_EVENTS]; + long long minSetting[NUM_EVENTS]; + long long maxSetting[NUM_EVENTS]; + long long UserLimitGiven[NUM_EVENTS]; // These are the values per GPU set by user. + long long OrigLimitFound[NUM_EVENTS]; // original limit read from device. + int PowerEventCount = 0, LimitEventCount = 0, minEventCount = 0, maxEventCount = 0; + const PAPI_component_info_t *cmpinfo; + char event_name[PAPI_MAX_STR_LEN]; + signal(SIGTSTP, cbSignal_SIGTSTP); // register the signal handler for CTL_Z. + + if (argc < 2) { + helpText(); + exit(-1); + } + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if( retval != PAPI_VER_CURRENT ) { + fprintf( stderr, "PAPI_library_init failed.\n" ); + helpText(); + exit(-1); + } + + printf( "PAPI_VERSION : %4d %6d %7d\n", + PAPI_VERSION_MAJOR( PAPI_VERSION ), + PAPI_VERSION_MINOR( PAPI_VERSION ), + PAPI_VERSION_REVISION( PAPI_VERSION ) ); + + int numcmp = PAPI_num_components(); + + // Search for the NVML component. + int cid = 0; + for (cid=0; cidname, "nvml" ) ) break; // If we found it, + } + } + + if ( cid==numcmp ) { // If true we looped through all without finding nvml. + fprintf(stderr, "NVML PAPI Component was not found.\n"); + exit(-1); + } + + printf( "NVML found as Component %d of %d: %s: %d events\n", (1+cmpinfo->CmpIdx), numcmp, cmpinfo->name, cmpinfo->num_native_events ); + if (cmpinfo->disabled) { // If disabled, + fprintf(stderr, "NVML PAPI Component is disabled.\n"); + exit(-1); + } + + cudaGetDeviceCount( &device_count ); + printf("Cuda Device Count: %d.\n", device_count); + if (device_count < 1) { + fprintf(stderr, "There are no GPUs to manage.\n"); + exit(-1); + } + + FILE *myOut = fopen("PowerReadGPU.tsv", "w"); // Open the file. + if (myOut == NULL) { // If that failed, + fprintf(stderr, "Failed to open output file PowerReadGPU.csv.\n"); + exit(-1); + } + + FILE *myGnuplot = fopen("PowerReadGPU.gnuplot", "w"); + if (myGnuplot == NULL) { + fprintf(stderr, "Failed to open gnuplot output file PowerReadGPU.gnuplot.\n"); + exit(-1); + } + + // Scan events to find nvml power events. + int code = PAPI_NATIVE_MASK; + int ii=0; + int event_modifier = PAPI_ENUM_FIRST; + for ( ii=0; iinum_native_events; ii++ ) { + retval = PAPI_enum_cmp_event( &code, event_modifier, cid ); + event_modifier = PAPI_ENUM_EVENTS; + if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + retval = PAPI_event_code_to_name( code, event_name ); + char *ss; + + ss = strstr(event_name, "device_"); // Look for the device id. + if (ss == NULL) continue; // Not a valid name. + int did = atoi(ss+7); // convert it. + if (did >= device_count) continue; // Invalid device count. + + // Have some event, anyway. + ss = strstr(event_name, "power"); // First, see if we have power. + if (ss != NULL && ss[5] == 0) { // If found and the last thing on the line, + PowerEventName[did] = strdup(event_name); // .. remember the name, in device order. + dprintf("Found powerEvent '%s' for device %i.\n", event_name, did); + PowerEventCount++; // .. bump total power events. + continue; // .. done with this event. + } + + ss = strstr(event_name, "power_management_limit"); // get position of this string. + if (ss != NULL && ss[22] == 0) { // If found and last thing on the line, + LimitEventName[did] = strdup(event_name); // Valid! Remember the name. + dprintf("Found limitEvent '%s' for device %i.\n", event_name, did); // Report what we found. + LimitEventCount++; // Add to the number of events found. + continue; // Done with it. + } + + ss = strstr(event_name, "power_management_limit_constraint_min"); // get position of this string. + if (ss != NULL && ss[37] == 0) { // If found and last thing on the line, + minEventName[did] = strdup(event_name); // Valid! Remember the name. + dprintf("Found minEvent '%s' for device %i.\n", event_name, did); // Report what we found. + minEventCount++; // Add to the number of events found. + continue; // Done with it. + } + + ss = strstr(event_name, "power_management_limit_constraint_max"); // get position of this string. + if (ss != NULL && ss[37] == 0) { // If found and last thing on the line, + maxEventName[did] = strdup(event_name); // Valid! Remember the name. + dprintf("Found maxEvent '%s' for device %i.\n", event_name, did); // Report what we found. + maxEventCount++; // Add to the number of events found. + continue; // Done with it. + } + + } // end of for each event. + + + if (PowerEventCount != device_count || + LimitEventCount != device_count || + minEventCount != device_count || + maxEventCount != device_count) { // If we did not get all the events, + fprintf(stderr, "Too few NVML events found; %i devices, %i PowerEvents, %i LimitEvents, %i maxEvents, %i minEvents. Aborting\n", + device_count, PowerEventCount, LimitEventCount, minEventCount, maxEventCount); + for (j=0; j 2) { + if (argc != device_count+2) { + fprintf(stderr, "You have specified %i power limits, it doesn't match with %i devices.\n", argc-2, device_count); + for (j=0; j 2) { // If we have settings to check, + for (i=0; i maxSetting[i]) { + fprintf(stderr, "User Power Limit of %llu is out of range for device %i.\n", UserLimitGiven[i], i); + retval++; // increase violations. + } + } + + if (retval > 0) { // Any out of range, we get out. + for (j=0; j 2) { // If power limits were given, + retval = PAPI_write(EventSet, UserLimitGiven); // .. Try to write user values. + if( retval != PAPI_OK ) { + fprintf(stderr, "PAPI_write(User Limits) failed, returned %i [%s].\n", retval, PAPI_strerror(retval)); + for (j=0; j 0) { + fprintf(stderr, "Aborting for %i write failure(s).\n", retval); + for (j=0; j 0 && elapsedSec >= runSeconds) break; // Exit if time is up. + } + + if (CTL_Z) fprintf(stderr, "Received CTL_Z signal (SIGTSTP).\n"); + else fprintf(stderr, "Time %i seconds expired.\n", runSeconds); + fprintf(stderr, "Total reads: %i.\n", runCount); + + //-------------------------------------------------------------------------- + // Generate a gnuplot file instructions. + //-------------------------------------------------------------------------- + fprintf(myGnuplot, "set xlabel 'Time (sec)'\n"); // label for x axis. + fprintf(myGnuplot, "set nokey\n"); // no key needed. + fprintf(myGnuplot, "set terminal png\n"); // generate png output when plotting. + fprintf(myGnuplot, "set title 'Spot MW Usage During Run'\n"); // Title of graph. + fprintf(myGnuplot, "set yrange [0:300000]\n"); // Force the y range. + + for (i=0; i scancel -s SIGTSTP JOBID + +And if you did not record JOBID, it can be found using + +> squeue + +The 2nd (optional) argument is a global power limit to set on all GPUs. If 3 +or more arguments are given, then there must be a power limit argument for EACH +GPU we find. Each will be the individual power limit for that GPU (in the order +we report them). + +We report to stderr the hardware found and current power limit settings. If you +change the power limit here, it WILL limit the performance of other programs +using the GPU. On the development systems where this program was tested, the +original power limits were automatically restored upon any exit of this +program. + +This program does NOT exercise the GPU. Typically, you will start this program +on a node, then while it is running execute ANOTHER program on the node that +does exercise the GPU. For testing, we used the MAGMA math library and dense +matrix multiplies. + +This code reads the spot power usage every 50ms, for all GPUs on the node, and +reports those (tab-separated) to the file PowerReadGPUs.tsv. This file can be +edited with a text editor. For example, to delete leading records in the file +that were recorded before the program of interest began. + +It will also output PowerReadGPU.gnuplot, a gnuplot script to plot the power +usage for each GPU on the node. This is just an ascii file and can also be +edited if needed. + +Be sure to configure PAPI with --with-components="nvml". + +To compile, you must have a valid PAPI_CUDA_ROOT environment variable. +Typically, we do +> module load cuda +> export PAPI_CUDA_ROOT=$CUDA_ROOT + diff -Nru papi-5.7.0+dfsg/src/components/pcp/linux-pcp.c papi-6.0.0~dfsg/src/components/pcp/linux-pcp.c --- papi-5.7.0+dfsg/src/components/pcp/linux-pcp.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/pcp/linux-pcp.c 2020-03-04 15:56:57.000000000 +0000 @@ -49,10 +49,9 @@ #undef PACKAGE_STRING #undef PACKAGE_VERSION -#include // See https://pcp.io/man/man3/pmapi.3.html for routines. -#include // also a PCP file. - -#define MYPCPLIB "libpcp.so" // Name of my PCP library. +// PCP include directory (defaults to /usr/include/pcp; see README for PAPI_PCP_INC.) +#include // See https://pcp.io/man/man3/pmapi.3.html for routines. +#include // also a PCP file. #define PM_OPTFLAG_EXIT (1<<5) #define PM_CONTEXT_UNDEF -1 @@ -180,13 +179,12 @@ // -------------------------- GLOBAL SECTION --------------------------------- -int _papi_hwi_debug = DEBUG_SUBSTRATE; // Bit flags to enable xxxDBG; SUBDBG for Substrate. Overrides weak global in papi.c. + int _papi_hwi_debug = DEBUG_SUBSTRATE; // Bit flags to enable xxxDBG; SUBDBG for Substrate. Overrides weak global in papi.c. static int sEventInfoSize=0; // total size of pcp_event_info. static int sEventInfoBlock = ((8*1024) / sizeof(_pcp_event_info_t)); // add about 8K at a time. static _pcp_event_info_t * pcp_event_info = NULL; // our array of created pcp events. static int sEventCount = 0; // count of events seen by pmTraversePMNS(). -int ctxHandle = -1; // context handle. (-1 is invalid). -char *pmProgname = "pcp"; +static int ctxHandle = -1; // context handle. (-1 is invalid). static char *cachedGetInDom(pmInDom indom, int inst); // cache all reads of pcp_pmGetInDom, to save time. #define HASH_SIZE 512 /* very roughly in the range of total events. full Saturn test, had ~ 11,000 events.*/ static _pcp_hash_t sNameHash[HASH_SIZE]; // hash table into pcp_event_info by event name. @@ -212,7 +210,7 @@ ctr_pcp_ntv_code_to_descr, // counter 15 ctr_pcp_ntv_code_to_info}; // counter 16 -int cnt[ctr_pcp_ntv_code_to_info+1] = {0}; // counters for the following macro. +static int cnt[ctr_pcp_ntv_code_to_info+1] = {0}; // counters for the following macro. #define mRtnCnt(funcname) \ if (COUNT_ROUTINES) { /* Note if (0) optimized out completely even if -O0. */ \ @@ -241,7 +239,11 @@ #define _time_gettimeofday if (0) gettimeofday /* change to 1 to enable gettimeofday for performance timings. */ -static void* dllib1 = NULL; // Our dynamic library. +// file handle used to access pcp library with dlopen +static void *dl1 = NULL; + +// string macro defined within Rules.pcp +static char pcp_main[]=PAPI_PCP_MAIN; //----------------------------------------------------------------------------- // Using weak symbols (global declared without a value, so it defers to any @@ -258,7 +260,7 @@ static int (*pmLookupName_ptr) (int numpid, char **namelist,pmID *pmidlist); static char* (*pmErrStr_ptr) (int code); static int (*pmTraversePMNS_ptr) (const char *name, void(*func)(const char *)); -void (*pmFreeResult_ptr) (pmResult *result); +static void (*pmFreeResult_ptr) (pmResult *result); static int (*pmNewContext_ptr) (int type, const char *name); static int (*pmDestroyContext_ptr) (int handle); static int (*pmFetch_ptr) (int numpid, pmID *pmidlist, pmResult **result); @@ -277,7 +279,7 @@ static int pcp_pmTraversePMNS (const char *name, void(*func)(const char *)) { return ((*pmTraversePMNS_ptr) (name, func)); } -void pcp_pmFreeResult (pmResult *result) +static void pcp_pmFreeResult (pmResult *result) { return ((*pmFreeResult_ptr) (result)); } static int pcp_pmNewContext (int type, const char *name) @@ -310,7 +312,7 @@ // have dups; max dups was 4. //----------------------------------------------------------------------------- -unsigned int stringHash(char *str, unsigned int tableSize) +static unsigned int stringHash(char *str, unsigned int tableSize) { unsigned long hash = 5381; // seed value. int c; @@ -326,7 +328,7 @@ // addNameHash: Given a string, hash it, and add to sNameHash[]. //----------------------------------------------------------------------------- -unsigned int addNameHash(char *key, int idx) +static unsigned int addNameHash(char *key, int idx) { unsigned int slot = stringHash(key, HASH_SIZE); // compute hash code. if (sNameHash[slot].idx < 0) { // If not occupied, @@ -348,7 +350,7 @@ // freeNameHash: delete any allocated for collisions. //----------------------------------------------------------------------------- -void freeNameHash(void) +static void freeNameHash(void) { int i; for (i=0; i 0) { // If override given, it has to work. + dl1 = dlopen(pcp_main, RTLD_NOW | RTLD_GLOBAL); // Try to open that path. + if (dl1 == NULL) { + snprintf(_pcp_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "PAPI_PCP_MAIN override '%s' given in Rules.pcp not found.", pcp_main); + return(PAPI_ENOSUPP); // Override given but not found. + } + } + + // Step 2: Try system paths, will work with Spack, LD_LIBRARY_PATH, default paths. + if (dl1 == NULL) { // No override, + dl1 = dlopen("libpcp.so", RTLD_NOW | RTLD_GLOBAL); // Try system paths. + } + + // Step 3: Try the explicit install default. + if (dl1 == NULL && pcp_root != NULL) { // if root given, try it. + snprintf(path_name, 1024, "%s/lib64/libpcp.so", pcp_root); // PAPI Root check. + dl1 = dlopen(path_name, RTLD_NOW | RTLD_GLOBAL); // Try to open that path. + } + + // Check for failure. + if (dl1 == NULL) { + snprintf(_pcp_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "libpcp.so not found."); + return(PAPI_ENOSUPP); + } + + // We have dl1. //----------------------------------------------------------------------------- // Collect pointers for routines in shared library. All below will abort this @@ -450,7 +472,7 @@ //----------------------------------------------------------------------------- // qsort comparison routine, for pcp_event_info. //----------------------------------------------------------------------------- -int qsPMID(const void *arg1, const void* arg2) +static int qsPMID(const void *arg1, const void* arg2) { _pcp_event_info_t *p1 = (_pcp_event_info_t*) arg1; _pcp_event_info_t *p2 = (_pcp_event_info_t*) arg2; @@ -471,7 +493,7 @@ // WARNING: May realloc() pcp_event_info[], invalidating pointers into it. //----------------------------------------------------------------------------- -void cbPopulateNameOnly(const char *name) +static void cbPopulateNameOnly(const char *name) { if (sEventCount >= sEventInfoSize) { // If we must realloc, sEventInfoSize += sEventInfoBlock; // .. Add another page. @@ -492,7 +514,7 @@ // (which CAN invalidate any pointers into it). //----------------------------------------------------------------------------- -void makeQualifiedEvent(int baseEvent, int idx, char *qualifier) +static void makeQualifiedEvent(int baseEvent, int idx, char *qualifier) { int prevSize; if (sEventCount >= sEventInfoSize) { // If we must realloc, @@ -536,7 +558,7 @@ // index. Presumes pmid is already present. //----------------------------------------------------------------------------- -void getPMDesc(int pcpIdx) { // Reads the variable descriptor. +static void getPMDesc(int pcpIdx) { // Reads the variable descriptor. int ret; if (pcp_event_info[pcpIdx].pmid == PM_ID_NULL) return; // Already have it. ret = pcp_pmLookupDesc(pcp_event_info[pcpIdx].pmid, // Get the event descriptor. @@ -649,7 +671,7 @@ // Helper routine, returns a ull value from a value set pointer. Automatically // does conversions from 32 bit to 64 bit (int32, uint32, fp32). //----------------------------------------------------------------------------- -unsigned long long getULLValue(pmValueSet *vset, int value_index) +static unsigned long long getULLValue(pmValueSet *vset, int value_index) { unsigned long long value; // our return value. convert_64_t convert; // union for conversion. @@ -723,7 +745,7 @@ // PM_SEM_DISCRETE // instantaneous value, discrete domain //---------------------------------------------------------------------------- -void subZero(_pcp_control_state_t *myCtl, int event) +static void subZero(_pcp_control_state_t *myCtl, int event) { int k = myCtl->pcpIndex[event]; // get pcp_event_info[] index. if (pcp_event_info[k].desc.sem != PM_SEM_COUNTER) return; // Don't subtract from instantaneous values. @@ -775,7 +797,7 @@ // NOTE: There is also a pmLookupInDomText() that returns a description of a // domain; if you want that, you need a pmInDom and a very similar routine. //----------------------------------------------------------------------------- -int getHelpText(unsigned int pcpIdx, char **helpText) +static int getHelpText(unsigned int pcpIdx, char **helpText) { char *p; int ret; @@ -830,8 +852,6 @@ ret = _local_linkDynamicLibraries(); if ( ret != PAPI_OK ) { // Failure to get lib. - snprintf(reason, rLen, "Failed attempt to link to PCP " - "library '%s'.\n", MYPCPLIB); return PAPI_ESYS; } @@ -895,6 +915,7 @@ if (allPMID == NULL) { // If we failed, snprintf(reason, rLen, "memory alloc denied for allPMID; " "size=%i.\n", sEventCount); + free(allNames); return(PAPI_ENOMEM); // memory failure. } // end if calloc failed. @@ -983,7 +1004,7 @@ pmValue *pmval = &vset->vlist[0]; // .. Get the first value. pmValueBlock *pB = pmval->value.pval; // .. get it. if (pcp_event_info[i].valType != pB->vtype) { - snprintf(reason, rLen, "Disagreement between var descriptor and fetch on event %s. %i vs %i. Possible version incompatibiity.\n", + snprintf(reason, rLen, "Unexpected value type fetched for %s. %i vs %i. Possible version incompatibiity.\n", pcp_event_info[i].name, pcp_event_info[i].valType, pB->vtype); return PAPI_ENOSUPP; // .. in } @@ -1253,7 +1274,7 @@ MyCtl->pcpIndex = realloc(MyCtl->pcpIndex, // .. .. reallocate to make more room. newalloc*sizeof(int)); // .. .. .. MyCtl->pcpValue = realloc(MyCtl->pcpValue, // .. .. reallocate to make more room. - newalloc*sizeof(long long)); // .. .. .. + newalloc*sizeof(unsigned long long));// .. .. .. MyCtl->maxAllocated = newalloc; // .. .. remember what we've got. } } else { // If NULL then I have no previous set, @@ -1261,7 +1282,7 @@ MyCtl->pcpIndex = // .. make room for 'count' indices, calloc(MyCtl->maxAllocated, sizeof(int)); // .. MyCtl->pcpValue = // .. make room for 'count' values. - calloc(MyCtl->maxAllocated, sizeof(long long)); // .. + calloc(MyCtl->maxAllocated, sizeof(unsigned long long)); // .. } if (MyCtl->pcpIndex == NULL) { // If malloc failed, @@ -1305,7 +1326,7 @@ // RETURNS PAPI error code, or PAPI_OK. //--------------------------------------------------------------------- -int PCP_ReadList(hwd_control_state_t *ctl, // the event set. +static int PCP_ReadList(hwd_control_state_t *ctl, // the event set. pmResult **results) // results from pmFetch, caller must pmFreeResult(results). { int i, j, ret; diff -Nru papi-5.7.0+dfsg/src/components/pcp/README papi-6.0.0~dfsg/src/components/pcp/README --- papi-5.7.0+dfsg/src/components/pcp/README 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/pcp/README 2020-03-04 15:56:57.000000000 +0000 @@ -27,9 +27,69 @@ ... +++ Repository: bitbucket/icl/papi/src/components/pcp +This PAPI PCP component has been developed and tested using PCP +version 3.12.2. + +How to install PAPI with the PCP component? +------------------------------------------- +There is ONE required environment variable: PAPI_PCP_ROOT. This is +required for both compiling, and at runtime. + +An example that works on ICL's Caffeine system (at this writing): +export PAPI_PCP_ROOT=/usr + +Within PAPI_PCP_ROOT, we expect the following standard directories: +PAPI_PCP_ROOT/include #OR# PAPI_PCP_ROOT/include/pcp +PAPI_PCP_ROOT/lib64 + +For a standard installed system, this is the only environment variable +required for both compile and runtime. + +System configurations can vary. Some systems use Spack, a package +manager, to automatically keep paths straight. Others (like our own +ICL Saturn System) require "module load" commands to provide some +services, e.g. 'module load pcp', and these may also set environment +variables and change the LD_LIBRARY_PATH search order. + +Users may require the help of sysadmin personnel to navigate these +facilities and gain access to the correct libraries. + +Configure PAPI with PCP enabled. We presume you have navigated to the +directory papi/src, AND that you have exported PAPI_PCP_ROOT. +In the papi/src directory: + % ./configure --with-components="pcp" + % make + +TESTING the component is installed: Still from papi/src: +> utils/papi_component_avail + +For the PCP component to be operational, it must find the dynamic +library libpcp.so. + +If it is not found (or is not functional) then the component will be +listed as "disabled" with a reason explaining the problem. If library +was not found, then it is not in the expected place. The component +can be configured to look for the library in a specific place, and +using an alternate name if desired. Detailed instructions are +contained in the Rules.pcp file. They are technical, users may wish +to enlist the help of a sysadmin. + +To find a list of PCP supported events: + % utils/papi_native_avail | grep -i PCP + +Special Notes: PCP interfaces with a daemon (a background program +running on the machines). If you use a batch system (like SLURM) so +that your programs run on a different machine (node) than your login +node, then it is possible one machine can have the daemon installed +and the other machine does not. This is the case for ICL developers on +the Peak and Summit machines; the login nodes are not executing the +PCP daemon, and the work nodes are. -Code Specific Information -------------------------- +Thus to test the PCP component on Peak or Summit, you must +>jsrun --np 1 someprogram + +All below this line is code specific information for developers +--------------------------------------------------------------- This code was tested on both Saturn and ORNL peak (Power9; P9). ------------------------------------------------------------------ @@ -128,10 +188,6 @@ 'perfevents', but 'perfevents' is an umbrella that contains other events that are not 'nest' events. -Near line 55: #define MYPCPLIB "libpcp.so" -The name of the pcp library to be used. Note that it MUST be a -shared object library; a regular archive will not do. - Near line 194: #define COUNT_ROUTINES 1 Will enable a macro 'mRtnCnt' which will print on stderr the entry of each PAPI interfacing routine the first time it is executed; @@ -166,40 +222,3 @@ type and a sample value for for each event. ------------------------------------------------------------------ - -How to install PAPI with the PCP component? -------------------------------------------- - -This PAPI PCP component has been developed and tested using PCP -version 3.12.2. - -This component uses the PCP location from the environment (or looks -for /usr by default). Please set PCP_DIR during builds so that -the component can find the required header files and library. - -Configure PAPI with PCP enabled. - % cd src - % ./configure --prefix= --with-components="pcp" - -Build with PCP_DIR specified - % export PCP_DIR=/usr - % make && make install - -Testing the component requires that libraries for PAPI and PCP -can be found or are statically linked in to the executable. You may -need to add the library directories as shown here. - - % export LD_LIBRARY_PATH=${PCP_DIR}/lib64:${LD_LIBRARY_PATH} - -For general information on how to create and run components, the user -is referred to the INSTALL.txt section "CREATING AND RUNNING -COMPONENTS". - - -To check if PCP was successfully installed with PAPI. - % /bin/papi_component_avail - -To find a list of PCP supported events. - % /bin/papi_native_avail | grep pcp - -*/ diff -Nru papi-5.7.0+dfsg/src/components/pcp/Rules.pcp papi-6.0.0~dfsg/src/components/pcp/Rules.pcp --- papi-5.7.0+dfsg/src/components/pcp/Rules.pcp 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/pcp/Rules.pcp 2020-03-04 15:56:57.000000000 +0000 @@ -1,10 +1,90 @@ -PCP_DIR ?= /usr +# Set default if the root environment variable is not already set. +# Note PAPI_PCP_ROOT is an environment variable that must be set. +# It is the ONLY environment variable that must be set, all other +# settings are optional. +PAPI_PCP_ROOT ?= /usr + +# For non-typical system configurations, the following 'runtime overrides' can +# be set, as just a library name, or a full path and name. There cannot be any +# spaces between the double quotes (which must be escaped as \"). An example: + +# PAPI_PCP_MAIN = \"$(PAPI_PCP_ROOT)/lib64/libpcp.so\" + +# By default, all overrides are empty strings. + +# If an override is not an empty string, it must work, or the component will be +# disabled. + +# Both at compile time and run time, the software depends on PAPI_PCP_ROOT. +# There is one library used by the PCP component: +# libpcp.so + +# The standard installed locations for this library, with override: +# $(PAPI_PCP_ROOT)/lib64/libpcp.so #O.R. PAPI_PCP_MAIN +# +# There are many ways to cause this path to be known. +# Spack is a package manager used on supercomputers, Linux and MacOS. If Spack +# is aware of PCP, it encodes the path to the library. + +# The environment variable LD_LIBRARY_PATH encodes a list of paths to search for +# libraries; separated by a colon (:). This path could be added to +# LD_LIBRARY_PATH. +# +# Warning: LD_LIBRARY_PATH often contains a list of directories that are +# searched for libraries, some of these may be needed by other packages you are +# using. Always add to LD_LIBRARY_PATH recursively; for example: +# >export LD_LIBRARY_PATH=someNewLibraryDirectory:$LD_LIBRARY_PATH +# which would append the existing LD_LIBRARY_PATH to the new directory you wish +# to add. Alternatively, you can prepend it: +# >export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:someNewLibraryDirectory +# Which will search the existing libraries first, then your new directory. + +# You can check on the value of LD_LIBRARY_PATH with +# >echo $LD_LIBRARY_PATH + +# There may be other package managers or utilities, for example on a system with +# modules; the command 'module load pcp' may modify LD_LIBRARY_PATH. + +# A Linux system will also search for libraries by default in the directories +# listed by /etc/ld.so.conf, and /usr/lib64, /lib64, /usr/lib, /lib. + +# OVERRIDES: These are by default empty strings (""), if set they must work. +PAPI_PCP_MAIN = \"\" + +# An example of an override: +# PAPI_PCP_MAIN = \"$(PAPI_PCP_ROOT)/lib64/libpcp.so.3\" + +# Note: If you change this override, PAPI should be rebuilt from scratch. +# From papi/src/ +# make clobber +# ./configure --with-components="pcp" +# make + +# OPERATION, per library: +# 1) If an override string is not empty, we will use it explicitly and fail if +# it does not work. This means disabling the component; a reason for disabling +# is shown using the papi utility, papi/src/utils/papi_component_avail + +# 2) We will attempt to open the library using the normal system library search +# paths; if Spack is present and configured correctly it should deliver the +# proper library. A failure here will be silent; we will proceed to (3). + +# 3) If that fails, we will try to find the library in the standard installed +# locations listed above. If this fails, we disable the component, the reason +# for disabling is shown using the papi utility, +# papi/src/utils/papi_component_avail. + +# DEFFLAGS is the macro defines for the three overrides. In the code we convert +# these to string variables with the following lines: +# static char pcp_main[]=PAPI_PCP_MAIN; + +PCP_MACS = -DPAPI_PCP_MAIN=$(PAPI_PCP_MAIN) COMPSRCS += components/pcp/linux-pcp.c COMPOBJS += linux-pcp.o -LDFLAGS += -L$(PCP_DIR)/lib64 -lpcp -CC_SHR += -I$(PCP_DIR)/include -PCP_INC = -I$(PCP_DIR)/include +# CFLAGS specifies compile flags; need include files here, and macro defines. +CFLAGS += -I$(PAPI_PCP_ROOT)/include -I$(PAPI_PCP_ROOT)/include/pcp -g $(PCP_MACS) +LDFLAGS += $(LDL) -g linux-pcp.o: components/pcp/linux-pcp.c - $(CC) $(LIBCFLAGS) $(OPTFLAGS) $(PCP_INC) -c components/pcp/linux-pcp.c -o linux-pcp.o + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/pcp/linux-pcp.c -o linux-pcp.o diff -Nru papi-5.7.0+dfsg/src/components/pcp/tests/testPCP.c papi-6.0.0~dfsg/src/components/pcp/tests/testPCP.c --- papi-5.7.0+dfsg/src/components/pcp/tests/testPCP.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/pcp/tests/testPCP.c 2020-03-04 15:56:57.000000000 +0000 @@ -218,10 +218,15 @@ long long *values = NULL; // pointer for us to malloc next. eventSetCount = PAPI_num_events(EventSet); // get the number of events in set. + if (eventSetCount < 1) { + test_fail( __FILE__, __LINE__, "PAPI_num_events(EventSet) failed.\n", ret); + } + values = calloc(eventSetCount, sizeof(long long)); // make zeroed space for it. ret = PAPI_read(EventSet, values); // read without a stop. if (ret != PAPI_OK) { // If that failed, report it. + free(values); test_fail( __FILE__, __LINE__, "PAPI_read(EventSet) failed.\n", ret); } @@ -263,11 +268,13 @@ ret = PAPI_reset(EventSet); // Reset the event. if (ret != PAPI_OK) { // If that failed, report and exit. + free(values); test_fail( __FILE__, __LINE__, "PAPI_reset_event() failed\n", ret); } ret = PAPI_stop(EventSet, values); // stop counting, get final values. if (ret != PAPI_OK) { // If that failed, report it. + free(values); test_fail( __FILE__, __LINE__, "PAPI_stop_event(EventSet, values) failed.\n", ret); } diff -Nru papi-5.7.0+dfsg/src/components/perf_event/pe_libpfm4_events.c papi-6.0.0~dfsg/src/components/perf_event/pe_libpfm4_events.c --- papi-5.7.0+dfsg/src/components/perf_event/pe_libpfm4_events.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/perf_event/pe_libpfm4_events.c 2020-03-04 15:56:57.000000000 +0000 @@ -31,6 +31,9 @@ // used to step through the attributes when enumerating events static int attr_idx; +/* alias flags to handle amd_fam17h, amd_fam17h_zen1 both present PMUs*/ +static int amd64_fam17h_zen1_present = 0; + /** @class find_existing_event * @brief looks up an event, returns it if it exists * @@ -353,9 +356,10 @@ ret = pfm_get_event_attr_info(libpfm4_index, i, PFM_OS_PERF_EVENT_EXT, &ainfo); if (ret != PFM_SUCCESS) { - free (msk_ptr); - SUBDBG("EXIT: Attribute info not found, libpfm4_index: %#x, ret: %d\n", libpfm4_index, _papi_libpfm4_error(ret)); - // FIXME: do we need to unlock here? --vmw + free (msk_ptr); + free(pmu_name); + SUBDBG("EXIT: error libpfm4 find event: Attribute info not found, libpfm4_index: %#x, ret: %d\n", libpfm4_index, _papi_libpfm4_error(ret)); + _papi_hwi_unlock( NAMELIB_LOCK ); return NULL; } @@ -391,8 +395,10 @@ /* See if we had a mask that wasn't found */ if (!mask_found) { - SUBDBG("Mask not found! %s\n",ptr); - /* FIXME: do we need to unlock here? */ + free(msk_ptr); + free(pmu_name); + SUBDBG("EXIT: error libpfm4 find event: Mask not found: %s.\n", ptr); + _papi_hwi_unlock( NAMELIB_LOCK ); return NULL; } @@ -479,7 +485,13 @@ * * @returns returns a libpfm event number * @retval PAPI_ENOEVENT Could not find an event - * + * Operational note: _pe_libpfm4_init() must be called first to set + * flags for synonymous PMUs. At this writing only + * amd64_fam17h_zen1_present is defined. + * Operational note: We indirectly return the pmu_idx within the + * event data; the calling code uses that to set + * pmu_idx for subsequent calls. All we do is find + * the next valid pmu, if any. */ static int @@ -508,6 +520,12 @@ break; } + if ((ret==PFM_SUCCESS) && amd64_fam17h_zen1_present && strcmp(pinfo.name, "amd64_fam17h") == 0) { + /* Skip as if invalid; we want the PMU amd64_fam17h_zen1 instead. */ + pmu_idx++; + continue; + } + if ((ret==PFM_SUCCESS) && pmu_is_present_and_right_type(&pinfo,pmu_type)) { pidx=pinfo.first_event; @@ -1156,6 +1174,35 @@ event_table->default_pmu.size = sizeof(pfm_pmu_info_t); retval=pfm_get_pmu_info(0, &(event_table->default_pmu)); + SUBDBG("Prescan for aliases.\n"); + /* We have to see if we have aliases in there as separate PMUs, */ + /* we don't want both PMUs with all the events duplicated. */ + /* For aliases, either is valid alone, but if both are present */ + /* specify a preference in the code. */ + /* Alias: amd64_fam17h_zen1 over amd64_fam17h. */ + /* Alias flags are static ints global to this file. */ + i=0; + while(1) { + memset(&pinfo,0,sizeof(pfm_pmu_info_t)); + pinfo.size = sizeof(pfm_pmu_info_t); + retval=pfm_get_pmu_info(i, &pinfo); + + /* We're done if we hit an invalid PMU entry */ + /* We can't check against PFM_PMU_MAX as that might not */ + /* match if libpfm4 is dynamically linked */ + + if (retval==PFM_ERR_INVAL) { + break; + } + + if ( (retval==PFM_SUCCESS) && (pinfo.name != NULL) && + (pmu_is_present_and_right_type(&pinfo,pmu_type)) && + (strcmp(pinfo.name,"amd64_fam17h_zen1") == 0) ) { + amd64_fam17h_zen1_present = 1; + } + i++; + } + SUBDBG("Detected pmus:\n"); i=0; while(1) { @@ -1174,6 +1221,12 @@ if ((retval==PFM_SUCCESS) && (pinfo.name != NULL) && (pmu_is_present_and_right_type(&pinfo,pmu_type))) { + /* skip if it is amd64_fam17h and zen1 is also present. */ + if (strcmp(pinfo.name,"amd64_fam17h") == 0 && amd64_fam17h_zen1_present) { + i++; + continue; + } + SUBDBG("\t%d %s %s %d\n",i, pinfo.name,pinfo.desc,pinfo.type); @@ -1190,11 +1243,9 @@ /* Hack to have "default core" PMU */ if ( (pinfo.type==PFM_PMU_TYPE_CORE) && strcmp(pinfo.name,"ix86arch")) { - - SUBDBG("\t %s is default\n",pinfo.name); - memcpy(&(event_table->default_pmu), - &pinfo,sizeof(pfm_pmu_info_t)); - found_default++; + memcpy(&(event_table->default_pmu), + &pinfo,sizeof(pfm_pmu_info_t)); + found_default++; } } diff -Nru papi-5.7.0+dfsg/src/components/perf_event/perf_event.c papi-6.0.0~dfsg/src/components/perf_event/perf_event.c --- papi-5.7.0+dfsg/src/components/perf_event/perf_event.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/perf_event/perf_event.c 2020-03-04 15:56:57.000000000 +0000 @@ -1165,7 +1165,7 @@ papi_pe_buffer, sizeof ( papi_pe_buffer ) ); if ( ret == -1 ) { - PAPIERROR("read returned an error: ", + PAPIERROR("read returned an error: %s", strerror( errno )); return PAPI_ESYS; } @@ -1233,7 +1233,7 @@ papi_pe_buffer, sizeof ( papi_pe_buffer ) ); if ( ret == -1 ) { - PAPIERROR("read returned an error: ", + PAPIERROR("read returned an error: %s", strerror( errno )); return PAPI_ESYS; } @@ -1310,7 +1310,7 @@ sizeof ( papi_pe_buffer ) ); if ( ret == -1 ) { - PAPIERROR("read returned an error: ", + PAPIERROR("read returned an error: %s", strerror( errno )); return PAPI_ESYS; } @@ -2069,7 +2069,7 @@ /* Restart the counters */ if (ioctl( fd, PERF_EVENT_IOC_REFRESH, PAPI_REFRESH_VALUE ) == -1) { - PAPIERROR( "overflow refresh failed", 0 ); + PAPIERROR("overflow refresh failed"); } } diff -Nru papi-5.7.0+dfsg/src/components/perf_event_uncore/perf_event_uncore.c papi-6.0.0~dfsg/src/components/perf_event_uncore/perf_event_uncore.c --- papi-5.7.0+dfsg/src/components/perf_event_uncore/perf_event_uncore.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/perf_event_uncore/perf_event_uncore.c 2020-03-04 15:56:57.000000000 +0000 @@ -912,7 +912,7 @@ ret = read( pe_ctl->events[i].event_fd, papi_pe_buffer, sizeof ( papi_pe_buffer ) ); if ( ret == -1 ) { - PAPIERROR("read returned an error: ", strerror( errno )); + PAPIERROR("read returned an error: %s", strerror( errno )); SUBDBG("EXIT: PAPI_ESYS\n"); return PAPI_ESYS; } @@ -970,7 +970,7 @@ ret = read( pe_ctl->events[i].event_fd, papi_pe_buffer, sizeof ( papi_pe_buffer ) ); if ( ret == -1 ) { - PAPIERROR("read returned an error: ", strerror( errno )); + PAPIERROR("read returned an error: %s", strerror( errno )); SUBDBG("EXIT: PAPI_ESYS\n"); return PAPI_ESYS; } @@ -1007,7 +1007,7 @@ sizeof ( papi_pe_buffer ) ); if ( ret == -1 ) { - PAPIERROR("read returned an error: ", strerror( errno )); + PAPIERROR("read returned an error: %s", strerror( errno )); SUBDBG("EXIT: PAPI_ESYS\n"); return PAPI_ESYS; } diff -Nru papi-5.7.0+dfsg/src/components/perf_event_uncore/tests/perf_event_uncore.c papi-6.0.0~dfsg/src/components/perf_event_uncore/tests/perf_event_uncore.c --- papi-5.7.0+dfsg/src/components/perf_event_uncore/tests/perf_event_uncore.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/perf_event_uncore/tests/perf_event_uncore.c 2020-03-04 15:56:57.000000000 +0000 @@ -19,7 +19,8 @@ int retval,quiet; int EventSet = PAPI_NULL; long long values[1]; - char *uncore_event=NULL; + char *uncore_event_tmp=NULL; + char uncore_event[BUFSIZ]; char event_name[BUFSIZ]; int uncore_cidx=-1; const PAPI_component_info_t *info; @@ -56,8 +57,8 @@ } /* Get a relevant event name */ - uncore_event=get_uncore_event(event_name, BUFSIZ); - if (uncore_event==NULL) { + uncore_event_tmp=get_uncore_event(event_name, BUFSIZ); + if (uncore_event_tmp==NULL) { if (!quiet) { printf("uncore event name not available\n"); } @@ -66,7 +67,7 @@ PAPI_ENOSUPP ); } - sprintf(uncore_event,"%s:cpu=0",uncore_event); + sprintf(uncore_event,"%s:cpu=0",uncore_event_tmp); /* Create an eventset */ retval = PAPI_create_eventset(&EventSet); diff -Nru papi-5.7.0+dfsg/src/components/perf_event_uncore/tests/perf_event_uncore_cbox.c papi-6.0.0~dfsg/src/components/perf_event_uncore/tests/perf_event_uncore_cbox.c --- papi-5.7.0+dfsg/src/components/perf_event_uncore/tests/perf_event_uncore_cbox.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/perf_event_uncore/tests/perf_event_uncore_cbox.c 2020-03-04 15:56:58.000000000 +0000 @@ -126,7 +126,10 @@ /* Default domain should work */ /* Add our uncore event */ - sprintf(uncore_event,"%s%d::%s",uncore_base,j,event_name); + retval = snprintf(uncore_event, BUFSIZ, "%s%d::%s",uncore_base,j,event_name); + if( retval >= BUFSIZ ){ + fprintf(stderr,"Event full name \"%s%d::%s\" has been truncated to \"%s\"\n",uncore_base,j,event_name, uncore_event); + } retval = PAPI_add_named_event(EventSet[j][i], uncore_event); if (retval != PAPI_OK) { max_cbox=j; diff -Nru papi-5.7.0+dfsg/src/components/powercap/tests/powercap_limit.c papi-6.0.0~dfsg/src/components/powercap/tests/powercap_limit.c --- papi-5.7.0+dfsg/src/components/powercap/tests/powercap_limit.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/powercap/tests/powercap_limit.c 2020-03-04 15:56:58.000000000 +0000 @@ -1,172 +1,172 @@ -/** - * @author Philip Vaccaro - * Test case for powercap component - * @brief - * Tests basic functionality of powercap component - */ - -#include -#include -#include -#include - -#include "papi.h" -#include "papi_test.h" - -#define MAX_powercap_EVENTS 64 - -int main ( int argc, char **argv ) -{ - (void) argv; - (void) argc; - int retval,cid,powercap_cid=-1,numcmp; - int EventSet = PAPI_NULL; - long long values[MAX_powercap_EVENTS]; - int limit_map[MAX_powercap_EVENTS]; - int num_events=0, num_limits=0; - int code; - char event_names[MAX_powercap_EVENTS][PAPI_MAX_STR_LEN]; - int r,i; - - const PAPI_component_info_t *cmpinfo = NULL; - - /* PAPI Initialization */ - retval = PAPI_library_init( PAPI_VER_CURRENT ); - if ( retval != PAPI_VER_CURRENT ) - test_fail( __FILE__, __LINE__,"PAPI_library_init()\n",retval ); - - if ( !TESTS_QUIET ) printf( "Trying all powercap events\n" ); - - numcmp = PAPI_num_components(); - - for( cid=0; cidname,"powercap" ) ) { - powercap_cid=cid; - if ( !TESTS_QUIET ) printf( "Found powercap component at cid %d\n",powercap_cid ); - if ( cmpinfo->disabled ) { - if ( !TESTS_QUIET ) { - printf( "powercap component disabled: %s\n", - cmpinfo->disabled_reason ); - } - test_skip( __FILE__,__LINE__,"powercap component disabled",0 ); - } - break; - } - } - - /* Component not found */ - if ( cid==numcmp ) - test_skip( __FILE__,__LINE__,"No powercap component found\n",0 ); - - /* Skip if component has no counters */ - if ( cmpinfo->num_cntrs==0 ) - test_skip( __FILE__,__LINE__,"No counters in the powercap component\n",0 ); - - /* Create EventSet */ - retval = PAPI_create_eventset( &EventSet ); - if ( retval != PAPI_OK ) - test_fail( __FILE__, __LINE__, "PAPI_create_eventset()",retval ); - - /* Add all package limit events */ - code = PAPI_NATIVE_MASK; - r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, powercap_cid ); - - - /* find all package power events */ - while ( r == PAPI_OK ) { - retval = PAPI_event_code_to_name( code, event_names[num_events] ); - if ( retval != PAPI_OK ) - test_fail( __FILE__, __LINE__,"PAPI_event_code_to_name()", retval ); - - retval = PAPI_add_event(EventSet, code); - if (retval != PAPI_OK) - break; /* We've hit an event limit */ - - if (!(strstr(event_names[num_events],"SUBZONE")) && (strstr(event_names[num_events],"POWER_LIMIT"))) { - - limit_map[num_limits] = num_events; - num_limits++; - } - num_events++; - r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, powercap_cid ); - } - - - /* start collecting power data */ - retval = PAPI_start( EventSet ); - if ( retval != PAPI_OK ) - test_fail( __FILE__, __LINE__, "PAPI_start()",retval ); - - /* initial read of package limits */ - retval = PAPI_read( EventSet, values ); - if ( retval != PAPI_OK ) - test_fail( __FILE__, __LINE__, "PAPI_read()",retval ); - - - printf("\nCURRENT LIMITS\n"); - for( i=0; i +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define MAX_powercap_EVENTS 64 + +int main ( int argc, char **argv ) +{ + (void) argv; + (void) argc; + int retval,cid,powercap_cid=-1,numcmp; + int EventSet = PAPI_NULL; + long long values[MAX_powercap_EVENTS]; + int limit_map[MAX_powercap_EVENTS]; + int num_events=0, num_limits=0; + int code; + char event_names[MAX_powercap_EVENTS][PAPI_MAX_STR_LEN]; + int r,i; + + const PAPI_component_info_t *cmpinfo = NULL; + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__,"PAPI_library_init()\n",retval ); + + if ( !TESTS_QUIET ) printf( "Trying all powercap events\n" ); + + numcmp = PAPI_num_components(); + + for( cid=0; cidname,"powercap" ) ) { + powercap_cid=cid; + if ( !TESTS_QUIET ) printf( "Found powercap component at cid %d\n",powercap_cid ); + if ( cmpinfo->disabled ) { + if ( !TESTS_QUIET ) { + printf( "powercap component disabled: %s\n", + cmpinfo->disabled_reason ); + } + test_skip( __FILE__,__LINE__,"powercap component disabled",0 ); + } + break; + } + } + + /* Component not found */ + if ( cid==numcmp ) + test_skip( __FILE__,__LINE__,"No powercap component found\n",0 ); + + /* Skip if component has no counters */ + if ( cmpinfo->num_cntrs==0 ) + test_skip( __FILE__,__LINE__,"No counters in the powercap component\n",0 ); + + /* Create EventSet */ + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset()",retval ); + + /* Add all package limit events */ + code = PAPI_NATIVE_MASK; + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, powercap_cid ); + + + /* find all package power events */ + while ( r == PAPI_OK ) { + retval = PAPI_event_code_to_name( code, event_names[num_events] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__,"PAPI_event_code_to_name()", retval ); + + retval = PAPI_add_event(EventSet, code); + if (retval != PAPI_OK) + break; /* We've hit an event limit */ + + if (!(strstr(event_names[num_events],"SUBZONE")) && (strstr(event_names[num_events],"POWER_LIMIT"))) { + + limit_map[num_limits] = num_events; + num_limits++; + } + num_events++; + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, powercap_cid ); + } + + + /* start collecting power data */ + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start()",retval ); + + /* initial read of package limits */ + retval = PAPI_read( EventSet, values ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_read()",retval ); + + + printf("\nCURRENT LIMITS\n"); + for( i=0; i +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "linux-powercap-ppc.h" + + +static char read_buff[PAPI_MAX_STR_LEN]; +static char write_buff[PAPI_MAX_STR_LEN]; + +static int num_events=0; + +static int pkg_events[PKG_NUM_EVENTS] + = {PKG_MIN_POWER, PKG_MAX_POWER, PKG_CUR_POWER}; +static const char *pkg_event_names[PKG_NUM_EVENTS] + = {"MIN_POWER", "MAX_POWER", "CURRENT_POWER"}; +static const char *pkg_sys_names[PKG_NUM_EVENTS] + = {"powercap-min", "powercap-max", "powercap-current"}; +static const char *pkg_event_descs[PKG_NUM_EVENTS] + = {"Minimum value allowed for power capping.", + "Maximum value allowed for power capping.", + "Current power drawned by package."}; +static mode_t pkg_sys_flags[PKG_NUM_EVENTS] + = {O_RDONLY, O_RDONLY, O_RDWR}; + +static _powercap_ppc_native_event_entry_t powercap_ppc_ntv_events[(PKG_NUM_EVENTS)]; + +static int event_fds[POWERCAP_MAX_COUNTERS]; + +papi_vector_t _powercap_ppc_vector; + +/***************************************************************************/ +/****** BEGIN FUNCTIONS USED INTERNALLY SPECIFIC TO THIS COMPONENT *******/ +/***************************************************************************/ + +/* Null terminated version of strncpy */ +static char * +_local_strlcpy( char *dst, const char *src, size_t size ) +{ + char *retval = strncpy( dst, src, size ); + if ( size > 0 ) dst[size-1] = '\0'; + + return( retval ); +} + +static long long +read_powercap_value( int index ) +{ + int sz = pread(event_fds[index], read_buff, PAPI_MAX_STR_LEN, 0); + read_buff[sz] = '\0'; + + return atoll(read_buff); +} + +static int +write_powercap_value( int index, long long value ) +{ + size_t ret = snprintf(write_buff, sizeof(write_buff), "%lld", value); + if (ret <= 0 || sizeof(write_buff) <= ret) + return PAPI_ENOSUPP; + + papi_powercap_ppc_lock(); + int sz = pwrite(event_fds[index], write_buff, PAPI_MAX_STR_LEN, 0); + if ( sz == -1 ) { + perror("Error in pwrite(): "); + } + papi_powercap_ppc_unlock(); + + return 1; +} + +/************************* PAPI Functions **********************************/ + +/* + * This is called whenever a thread is initialized + */ +static int +_powercap_ppc_init_thread( hwd_context_t *ctx ) +{ + (void) ctx; + + return PAPI_OK; +} + +/* + * Called when PAPI process is initialized (i.e. PAPI_library_init) + */ +static int +_powercap_ppc_init_component( int cidx ) +{ + + int e = -1; + char events_dir[128]; + char event_path[128]; + + DIR *events; + + const PAPI_hw_info_t *hw_info; + hw_info=&( _papi_hwi_system_info.hw_info ); + + /* check if IBM processor */ + if ( hw_info->vendor!=PAPI_VENDOR_IBM ) { + strncpy(_powercap_ppc_vector.cmp_info.disabled_reason, "Not an IBM Power9 processor", PAPI_MAX_STR_LEN); + return PAPI_ENOSUPP; + } + + num_events = 0; + + /* Check the existence, and correct access modes to pkg directory path */ + size_t ret = snprintf(events_dir, sizeof(events_dir), "/sys/firmware/opal/powercap/system-powercap/"); + if (ret <= 0 || sizeof(events_dir) <= ret) + return PAPI_ENOSUPP; + + if ( NULL == (events = opendir(events_dir)) ) { + strncpy(_powercap_ppc_vector.cmp_info.disabled_reason, + "Directory /sys/firmware/opal/powercap/system-powercap missing.", + PAPI_MAX_STR_LEN); + return PAPI_ENOSUPP; + } + + /* opendir needs clean up. */ + closedir(events); + + /* loop through events and create powercap event entries */ + for ( e = 0; e < PKG_NUM_EVENTS; ++e ) { + /* compose string to individual event */ + size_t ret = snprintf(event_path, sizeof(event_path), "%s%s", events_dir, pkg_sys_names[e]); + if (ret <= 0 || sizeof(event_path) <= ret) + continue; + /* if it's not a valid pkg event path we skip it */ + if (access(event_path, F_OK) == -1) continue; + + ret = snprintf(powercap_ppc_ntv_events[num_events].name, + sizeof(powercap_ppc_ntv_events[num_events].name), + "%s", pkg_event_names[e]); + if (ret <= 0 || sizeof(powercap_ppc_ntv_events[num_events].name) <= ret) continue; + ret = snprintf(powercap_ppc_ntv_events[num_events].description, + sizeof(powercap_ppc_ntv_events[num_events].description), + "%s", pkg_event_descs[e]); + if (ret <= 0 || sizeof(powercap_ppc_ntv_events[num_events].description) <= ret) continue; + ret = snprintf(powercap_ppc_ntv_events[num_events].units, + sizeof(powercap_ppc_ntv_events[num_events].name), "W"); + if (ret <= 0 || sizeof(powercap_ppc_ntv_events[num_events].name) <= ret) continue; + + powercap_ppc_ntv_events[num_events].return_type = PAPI_DATATYPE_INT64; + powercap_ppc_ntv_events[num_events].type = pkg_events[e]; + + powercap_ppc_ntv_events[num_events].resources.selector = num_events + 1; + + event_fds[num_events] = open(event_path, O_SYNC|pkg_sys_flags[e]); + + num_events++; + } + + /* Export the total number of events available */ + _powercap_ppc_vector.cmp_info.num_native_events = num_events; + _powercap_ppc_vector.cmp_info.num_cntrs = num_events; + _powercap_ppc_vector.cmp_info.num_mpx_cntrs = num_events; + + /* Export the component id */ + _powercap_ppc_vector.cmp_info.CmpIdx = cidx; + + return PAPI_OK; +} + + +/* + * Control of counters (Reading/Writing/Starting/Stopping/Setup) + * functions + */ +static int +_powercap_ppc_init_control_state( hwd_control_state_t *ctl ) +{ + _powercap_ppc_control_state_t* control = ( _powercap_ppc_control_state_t* ) ctl; + memset( control, 0, sizeof ( _powercap_ppc_control_state_t ) ); + + return PAPI_OK; +} + +static int +_powercap_ppc_update_control_state( hwd_control_state_t *ctl, + NativeInfo_t *native, + int count, + hwd_context_t *ctx ) +{ + (void) ctx; + int i, index; + + _powercap_ppc_control_state_t* control = ( _powercap_ppc_control_state_t* ) ctl; + control->active_counters = count; + + for ( i = 0; i < count; ++i ) { + index = native[i].ni_event; + control->which_counter[i]=index; + native[i].ni_position = i; + } + + return PAPI_OK; +} + +/* + * There are no counters to start, all three values are instantaneous + * */ +static int +_powercap_ppc_start( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + (void) ctx; + (void) ctl; + + return PAPI_OK; +} + +static int +_powercap_ppc_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + (void) ctx; + (void) ctl; + + return PAPI_OK; +} + +/* + * Shutdown a thread + * */ +static int +_powercap_ppc_shutdown_thread( hwd_context_t *ctx ) +{ + (void) ctx; + SUBDBG( "Enter _powercap_ppc_shutdown_thread\n" ); + return PAPI_OK; +} + + +static int +_powercap_ppc_read( hwd_context_t *ctx, hwd_control_state_t *ctl, + long long **events, int flags ) +{ + SUBDBG("Enter _powercap_ppc_read\n"); + + (void) flags; + (void) ctx; + _powercap_ppc_control_state_t* control = ( _powercap_ppc_control_state_t* ) ctl; + + long long curr_val = 0; + + int c, i; + for( c = 0; c < control->active_counters; c++ ) { + i = control->which_counter[c]; + curr_val = read_powercap_value(i); + SUBDBG("%d, current value %lld\n", i, curr_val); + control->count[c]=curr_val; + } + + *events = ( ( _powercap_ppc_control_state_t* ) ctl )->count; + + return PAPI_OK; +} + +/* + * One counter only is writable, the current power one + * */ +static int +_powercap_ppc_write( hwd_context_t * ctx, hwd_control_state_t * ctl, long long *values ) +{ + (void) ctx; + _powercap_ppc_control_state_t *control = ( _powercap_ppc_control_state_t * ) ctl; + + int i; + for (i = 0; i < control->active_counters; i++) { + if (PKG_CUR_POWER == powercap_ppc_ntv_events[control->which_counter[i]].type) + write_powercap_value(control->which_counter[i], values[i]); + } + + return PAPI_OK; +} + +/* + * Close opened file descriptors. + */ +static int +_powercap_ppc_shutdown_component( void ) +{ + int i; + for( i = 0; i < num_events; i++ ) { + close(event_fds[i]); + } + + return PAPI_OK; +} + +static int +_powercap_ppc_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option ) +{ + SUBDBG( "Enter: ctx: %p\n", ctx ); + (void) ctx; + (void) code; + (void) option; + + return PAPI_OK; +} + + +static int +_powercap_ppc_set_domain( hwd_control_state_t *ctl, int domain ) +{ + (void) ctl; + if ( PAPI_DOM_ALL != domain ) + return PAPI_EINVAL; + + return PAPI_OK; +} + + +static int +_powercap_ppc_reset( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + (void) ctx; + (void) ctl; + + return PAPI_OK; +} + +/* + * Native Event functions + */ +static int +_powercap_ppc_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ + int index; + switch ( modifier ) { + case PAPI_ENUM_FIRST: + *EventCode = 0; + return PAPI_OK; + case PAPI_ENUM_EVENTS: + index = *EventCode; + if ( index < num_events - 1 ) { + *EventCode = *EventCode + 1; + return PAPI_OK; + } else { + return PAPI_ENOEVNT; + } + + default: + return PAPI_EINVAL; + } +} + +/* + * + */ +static int +_powercap_ppc_ntv_code_to_name( unsigned int EventCode, char *name, int len ) +{ + int index = EventCode & PAPI_NATIVE_AND_MASK; + + if ( index >= 0 && index < num_events ) { + _local_strlcpy( name, powercap_ppc_ntv_events[index].name, len ); + return PAPI_OK; + } + + return PAPI_ENOEVNT; +} + +static int +_powercap_ppc_ntv_code_to_info( unsigned int EventCode, PAPI_event_info_t *info ) +{ + int index = EventCode; + + if ( index < 0 || index >= num_events ) + return PAPI_ENOEVNT; + + _local_strlcpy( info->symbol, powercap_ppc_ntv_events[index].name, sizeof( info->symbol )); + _local_strlcpy( info->units, powercap_ppc_ntv_events[index].units, sizeof( info->units ) ); + _local_strlcpy( info->long_descr, powercap_ppc_ntv_events[index].description, sizeof( info->long_descr ) ); + + info->data_type = powercap_ppc_ntv_events[index].return_type; + return PAPI_OK; +} + +static int +_powercap_ppc_ntv_name_to_code( const char *name, unsigned int *EventCode) +{ + if (!strcmp(name, "MIN_POWER")) *EventCode = 0; + else if (!strcmp(name, "MAX_POWER")) *EventCode = 1; + else if (!strcmp(name, "CURRENT_POWER")) *EventCode = 2; + return PAPI_OK; +} + +papi_vector_t _powercap_ppc_vector = { + .cmp_info = { + .name = "powercap_ppc", + .short_name = "powercap_ppc", + .description = "Linux powercap energy measurements for IBM PowerPC (9) architectures", + .version = "5.7.0", + .default_domain = PAPI_DOM_ALL, + .default_granularity = PAPI_GRN_SYS, + .available_granularities = PAPI_GRN_SYS, + .hardware_intr_sig = PAPI_INT_SIGNAL, + .available_domains = PAPI_DOM_ALL, + }, + + /* sizes of framework-opaque component-private structures */ + .size = { + .context = sizeof ( _powercap_ppc_context_t ), + .control_state = sizeof ( _powercap_ppc_control_state_t ), + .reg_value = sizeof ( _powercap_ppc_register_t ), + .reg_alloc = sizeof ( _powercap_ppc_reg_alloc_t ), + }, + /* function pointers in this component */ + .init_thread = _powercap_ppc_init_thread, + .init_component = _powercap_ppc_init_component, + .init_control_state = _powercap_ppc_init_control_state, + .update_control_state = _powercap_ppc_update_control_state, + .start = _powercap_ppc_start, + .stop = _powercap_ppc_stop, + .read = _powercap_ppc_read, + .write = _powercap_ppc_write, + .shutdown_thread = _powercap_ppc_shutdown_thread, + .shutdown_component = _powercap_ppc_shutdown_component, + .ctl = _powercap_ppc_ctl, + + .set_domain = _powercap_ppc_set_domain, + .reset = _powercap_ppc_reset, + + .ntv_enum_events = _powercap_ppc_ntv_enum_events, + .ntv_name_to_code = _powercap_ppc_ntv_name_to_code, + .ntv_code_to_name = _powercap_ppc_ntv_code_to_name, + .ntv_code_to_info = _powercap_ppc_ntv_code_to_info, +}; diff -Nru papi-5.7.0+dfsg/src/components/powercap_ppc/linux-powercap-ppc.h papi-6.0.0~dfsg/src/components/powercap_ppc/linux-powercap-ppc.h --- papi-5.7.0+dfsg/src/components/powercap_ppc/linux-powercap-ppc.h 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/powercap_ppc/linux-powercap-ppc.h 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,72 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @file linux-io.h + * CVS: $Id$ + * + * @author PAPI team UTK/ICL + * dgenet@icl.utk.edu + * + * @ingroup papi_components + * + * @brief Power capping component for PowerPC + * This file contains the source code for a component that enables + * PAPI to get and set power capping on PowerPC (Power9) architecture. + */ + +#ifndef _POWERCAP_PPC_H +#define _POWERCAP_PPC_H + +/* Headers required by PAPI */ +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" + +#define papi_powercap_ppc_lock() _papi_hwi_lock(COMPONENT_LOCK); +#define papi_powercap_ppc_unlock() _papi_hwi_unlock(COMPONENT_LOCK); + +typedef struct _powercap_ppc_register { + unsigned int selector; +} _powercap_ppc_register_t; + +typedef struct _powercap_ppc_native_event_entry { + char name[PAPI_MAX_STR_LEN]; + char units[PAPI_MIN_STR_LEN]; + char description[PAPI_MAX_STR_LEN]; + int socket_id; + int component_id; + int event_id; + int type; + int return_type; + _powercap_ppc_register_t resources; +} _powercap_ppc_native_event_entry_t; + +typedef struct _powercap_ppc_reg_alloc { + _powercap_ppc_register_t ra_bits; +} _powercap_ppc_reg_alloc_t; + +// package events +// powercap-current powercap-max powercap-min +#define PKG_MIN_POWER 0 +#define PKG_MAX_POWER 1 +#define PKG_CUR_POWER 2 + +#define PKG_NUM_EVENTS 3 +#define POWERCAP_MAX_COUNTERS (PKG_NUM_EVENTS) + +typedef struct _powercap_ppc_control_state { + long long count[POWERCAP_MAX_COUNTERS]; + long long which_counter[POWERCAP_MAX_COUNTERS]; + long long lastupdate; + int active_counters; +} _powercap_ppc_control_state_t; + +typedef struct _powercap_ppc_context { + long long start_value[POWERCAP_MAX_COUNTERS]; + _powercap_ppc_control_state_t state; +} _powercap_ppc_context_t; + +#endif /* _POWERCAP_PPC_H */ diff -Nru papi-5.7.0+dfsg/src/components/powercap_ppc/README papi-6.0.0~dfsg/src/components/powercap_ppc/README --- papi-5.7.0+dfsg/src/components/powercap_ppc/README 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/powercap_ppc/README 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,61 @@ +/** +* @file: README +* CVS: $Id$ +* @author: Damien Genet (ICL/UTK) +* @defgroup papi_components Components +* @brief Component Specific Readme file: powercap_ppc +*/ + +/** @page component_readme Component Readme + +@section Component Specific Information + +powercap_ppc/ + +The PAPI powercap component supports measuring and capping power usage +on recent IBM PowerPC architectures (Power9 and later) using the powercap +interface exposed through the Linux kernel. + +-------------------------------------------------- +MEASURING AND CAPPING POWER + +The powercap sysfs interface exposes power measurments as R/W regsiter-like +power settings. The counters and R/W settings apply to the Power9. + +These counters and settings are exposed though this PAPI component and can be +accessed just like any normal PAPI counter. Running the "powercap_basic" test +in the test directory will list all the events on a system. There is also a +"powercap_limit" test in the test directory that shows how a power limit is +applied. + +Note: Power Limiting using powercap_ppc _does_ _not_ require root privileges. +Write permission to the file /sys/firmware/opal/powercap/powercap-current is +"enough". + +-------------------------------------------------- +CONFIGURING THE PAPI POWERCAP COMPONENT + +At the higher src dirctory, configure with this component + % cd /src + % ./configure --with-components="powercap_ppc" +Follow the standard PAPI build (make) instructions + % make +To use the module, make sure that the libraries are accessible. + % export LD_LIBRARY_PATH=${PAPIDIR}/src:${PAPIDIR}/src/libpfm4/lib:${LD_LIBRARY_PATH} +To check the installation, the following should show some available counters + % ./utils/papi_native_avail | grep powercap_ppc + +-------------------------------------------------- +SYSTEM SETUP + +The actions described below will generally require superuser ability. +Note, these actions may have security and performance consequences, so +please make sure you know what you are doing. + + Use chmod to set site-appropriate access permissions (e.g. 444) + for /sys/firmware/opal/powercap/powercap-(min|max) + + Use chmod to set site-appropriate access permissions (e.g. 664) + for /sys/firmware/opal/powercap/powercap-current + +-------------------------------------------------- diff -Nru papi-5.7.0+dfsg/src/components/powercap_ppc/Rules.powercap_ppc papi-6.0.0~dfsg/src/components/powercap_ppc/Rules.powercap_ppc --- papi-5.7.0+dfsg/src/components/powercap_ppc/Rules.powercap_ppc 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/powercap_ppc/Rules.powercap_ppc 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,6 @@ + +COMPSRCS += components/powercap_ppc/linux-powercap-ppc.c +COMPOBJS += linux-powercap-ppc.o + +linux-powercap-ppc.o: components/powercap_ppc/linux-powercap-ppc.c + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/powercap_ppc/linux-powercap-ppc.c -o linux-powercap-ppc.o diff -Nru papi-5.7.0+dfsg/src/components/powercap_ppc/tests/Makefile papi-6.0.0~dfsg/src/components/powercap_ppc/tests/Makefile --- papi-5.7.0+dfsg/src/components/powercap_ppc/tests/Makefile 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/powercap_ppc/tests/Makefile 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,22 @@ +NAME=powercap_ppc +include ../../Makefile_comp_tests.target + +TESTS = powercap_basic powercap_limit + +powercap_ppc_tests: $(TESTS) + +powercap_basic.o: powercap_basic.c + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c powercap_basic.c -o powercap_basic.o + +powercap_basic: powercap_basic.o $(UTILOBJS) $(PAPILIB) + $(CC) $(INCLUDE) -o powercap_basic powercap_basic.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + + +powercap_limit.o: powercap_limit.c + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c powercap_limit.c -o powercap_limit.o + +powercap_limit: powercap_limit.o $(UTILOBJS) $(PAPILIB) + $(CC) $(INCLUDE) -o powercap_limit powercap_limit.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +clean: + rm -f $(TESTS) *.o *~ diff -Nru papi-5.7.0+dfsg/src/components/powercap_ppc/tests/powercap_basic.c papi-6.0.0~dfsg/src/components/powercap_ppc/tests/powercap_basic.c --- papi-5.7.0+dfsg/src/components/powercap_ppc/tests/powercap_basic.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/powercap_ppc/tests/powercap_basic.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,142 @@ +/** + * @author PAPI team UTK/ICL + * Test case for powercap component + * @brief + * Tests basic functionality of powercap component + */ + +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define MAX_powercap_EVENTS 64 + +int +main( int argc, char **argv ) +{ + (void) argv; + (void) argc; + int retval,cid,powercap_cid=-1,numcmp; + int EventSet = PAPI_NULL; + long long *values; + int num_events=0; + int code; + char event_names[MAX_powercap_EVENTS][PAPI_MAX_STR_LEN]; + char event_descrs[MAX_powercap_EVENTS][PAPI_MAX_STR_LEN]; + char units[MAX_powercap_EVENTS][PAPI_MIN_STR_LEN]; + int data_type[MAX_powercap_EVENTS]; + int r,i, quiet = 1, passed = 0; + + const PAPI_component_info_t *cmpinfo = NULL; + PAPI_event_info_t evinfo; + + if (2 == argc) quiet = atoi(argv[1]); + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + fprintf( stderr, "PAPI_library_init failed\n" ); + + if (!quiet) fprintf( stdout, "Trying all powercap_ppc events\n" ); + + numcmp = PAPI_num_components(); + + for( cid=0; cidname,"powercap_ppc" ) ) { + powercap_cid=cid; + if ( !quiet ) fprintf( stdout, "Found powercap_ppc component at cid %d\n",powercap_cid ); + if ( cmpinfo->disabled ) { + if ( !quiet ) { + fprintf(stderr, "powercap_ppc component disabled: %s\n", + cmpinfo->disabled_reason); + } + fprintf(stderr, "powercap_ppc component disabled\n"); + } + break; + } + } + + /* Component not found */ + if ( cid==numcmp ) + fprintf(stderr, "No powercap_ppc component found\n" ); + + /* Skip if component has no counters */ + if ( cmpinfo->num_cntrs==0 ) + fprintf(stderr, "No counters in the powercap_ppc component\n" ); + + /* Create EventSet */ + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) + fprintf(stderr, "PAPI_create_eventset()\n"); + + /* Add all events */ + code = PAPI_NATIVE_MASK; + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, powercap_cid ); + while ( r == PAPI_OK ) { + retval = PAPI_event_code_to_name( code, event_names[num_events] ); + if ( retval != PAPI_OK ) + fprintf(stdout, "Error from PAPI_event_code_to_name\n"); + + retval = PAPI_get_event_info( code,&evinfo ); + if ( retval != PAPI_OK ) + fprintf(stderr, "Error getting event info\n"); + + strncpy( event_descrs[num_events],evinfo.long_descr,sizeof( event_descrs[0] )-1 ); + strncpy( units[num_events],evinfo.units,sizeof( units[0] )-1 ); + // buffer must be null terminated to safely use strstr operation on it below + units[num_events][sizeof( units[0] )-1] = '\0'; + data_type[num_events] = evinfo.data_type; + + retval = PAPI_add_event( EventSet, code ); + + if ( retval != PAPI_OK ) + break; /* We've hit an event limit */ + num_events++; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, powercap_cid ); + } + + passed = 1; + PAPI_start( EventSet ); + + values = calloc( num_events,sizeof( long long ) ); + if (!values) { fprintf(stderr, "No enough memory for allocation of values array.\n"); return -1; } + + retval |= PAPI_read( EventSet, values ); + for (i = 0; i < num_events; ++i) { + if (!quiet && strstr( event_names[i], "POWER") && data_type[i] == PAPI_DATATYPE_INT64) + fprintf( stdout, "%-45s%-20s > %lldW\n", + event_names[i], event_descrs[i], values[i]); + if (1 > values[0] || values[0] > values[1] || values[1] > 10000) + passed = 0; + if (values[0] > values[2] || values[2] > values[1]) + passed = 0; + } + + PAPI_stop( EventSet, values ); + + if (passed && PAPI_OK == retval) + fprintf(stdout, "TEST PASSED\n"); + else + fprintf(stdout, "TESTS FAILED\n"); + + /* Done, clean up */ + retval |= PAPI_cleanup_eventset( EventSet ); + if ( retval != PAPI_OK ) + fprintf(stderr, "PAPI_cleanup_eventset()\n"); + + retval |= PAPI_destroy_eventset( &EventSet ); + if ( retval != PAPI_OK ) + fprintf(stderr, "PAPI_destroy_eventset()\n"); + + return 0; +} + diff -Nru papi-5.7.0+dfsg/src/components/powercap_ppc/tests/powercap_limit.c papi-6.0.0~dfsg/src/components/powercap_ppc/tests/powercap_limit.c --- papi-5.7.0+dfsg/src/components/powercap_ppc/tests/powercap_limit.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/powercap_ppc/tests/powercap_limit.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,192 @@ +/** + * @author Philip Vaccaro + * Test case for powercap component + * @brief + * Tests basic functionality of powercap component + */ + +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define MAX_powercap_EVENTS 64 + +int main ( int argc, char **argv ) +{ + (void) argv; + (void) argc; + int retval,cid,powercap_cid=-1,numcmp; + int EventSet = PAPI_NULL; + long long *values; + int num_events=0; + int code; + char event_names[MAX_powercap_EVENTS][PAPI_MAX_STR_LEN]; + int r,i; + int quiet = 1, passed = 0; + + const PAPI_component_info_t *cmpinfo = NULL; + PAPI_event_info_t evinfo; + + if (argc >= 2) quiet = atoi(argv[1]); + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + fprintf( stderr, "PAPI_library_init failed\n" ); + + numcmp = PAPI_num_components(); + + for( cid=0; cidname,"powercap_ppc" ) ) { + powercap_cid=cid; + if ( !quiet ) fprintf(stdout, "Found powercap_ppc component at cid %d\n",powercap_cid ); + if ( cmpinfo->disabled ) { + if ( !quiet ) { + printf( "powercap_ppc component disabled: %s\n", + cmpinfo->disabled_reason ); + } + fprintf(stderr, "powercap_ppc component disabled\n"); + } + break; + } + } + + /* Component not found */ + if ( cid==numcmp ) + fprintf(stderr, "No powercap_ppc component found\n" ); + + /* Skip if component has no counters */ + if ( cmpinfo->num_cntrs==0 ) + fprintf(stderr, "No counters in the powercap_ppc component\n" ); + + /* Create EventSet */ + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) + fprintf(stderr, "PAPI_create_eventset()\n"); + + /* Add all events */ + code = PAPI_NATIVE_MASK; + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, powercap_cid ); + while ( r == PAPI_OK ) { + retval = PAPI_event_code_to_name( code, event_names[num_events] ); + if ( retval != PAPI_OK ) + fprintf(stdout, "Error from PAPI_event_code_to_name\n"); + + retval = PAPI_get_event_info( code,&evinfo ); + if ( retval != PAPI_OK ) + fprintf(stderr, "Error getting event info\n"); + + retval = PAPI_add_event( EventSet, code ); + + if (retval != PAPI_OK) + break; /* We've hit an event limit */ + num_events++; + + r = PAPI_enum_cmp_event(&code, PAPI_ENUM_EVENTS, powercap_cid); + } + + PAPI_start(EventSet); + + values=calloc(num_events,sizeof(long long)); + if ( values==NULL ) { fprintf(stdout, "No memory for values"); return -1; } + + if ( !quiet ) fprintf(stdout, "\nBefore actual test...\n" ); + + long long Pmin = 424242, Pmax = 42, Pcurrent = 42, Ptarget, Pold; + + retval = PAPI_read(EventSet, values); + int iPcurrent = -1; + for ( i = 0; i < num_events; ++i ) { + if (!quiet && strstr( event_names[i], "POWER")) + fprintf( stdout, "%-45s > %lldW\n", + event_names[i], values[i]); + + if ( strstr( event_names[i], "MIN_POWER")) + Pmin = values[i]; + + if ( strstr( event_names[i], "MAX_POWER")) + Pmax = values[i]; + + if ( strstr( event_names[i], "CURRENT_POWER")) { + iPcurrent = i; + Pcurrent = values[i]; + } + } + + + if (Pmin <= Pmax) { + Pold = Pcurrent; + /* Let's try to cap at 40% */ + Ptarget = Pmin + 0.4 * (Pmax - Pmin); + /* Ok, current cap was 40%, so let's make it 60% */ + if (Pold == Ptarget) Ptarget = Pmin + 0.6 * (Pmax - Pmin); + + if ( !quiet ) + fprintf(stdout, "Current capping is Pcurrent = %lld W.\nCapping with Ptarget = %lld W\n", Pold, Ptarget); + + values[iPcurrent] = Ptarget; + + long long before = PAPI_get_real_nsec(); + long long after = before; + + PAPI_write(EventSet, values); + + if (!quiet) fprintf(stdout, "Changing the power capping might take some time.\nThe test will time out after 10 seconds.\n"); + do { + /* Give everyone some time to realize it */ + usleep(100000); + after = PAPI_get_real_nsec(); + PAPI_read(EventSet, values); + if (!quiet) fprintf(stdout, "."); + } while (values[iPcurrent] != Ptarget && (after-before) < 10e10); + + if (values[iPcurrent] != Ptarget) { + /* test failure */ + if (!quiet) fprintf(stdout, "\nPcurrent read = %lld W, target was %lld W\n", values[iPcurrent], Ptarget); + } else { + /* we have a success here */ + if (!quiet) fprintf(stdout, "\nPcurrent read = %lld W, target was %lld W\n", values[iPcurrent], Ptarget); + /* let's clean behind us, revert to previous capping */ + values[iPcurrent] = Pold; + PAPI_write( EventSet, values); + if (!quiet) fprintf(stdout, "Reverting back to previous capping P = %lld W\n", Pold); + + do { + usleep(100000); + PAPI_read (EventSet, values); + } while (values[iPcurrent] != Pold); + passed = 1; + } + } + else { + passed = 0; + fprintf(stderr, "Power capping values read seems wrong: Pmin = %lld W; Pmax = %lld W; Pcurrent = %lld W\n", + Pmin, Pmax, Pcurrent); + } + + PAPI_stop(EventSet, values); + + if (passed) + fprintf(stdout, "TEST SUCCESS\n"); + else + fprintf(stdout, "TEST FAILED\n"); + + /* Done, clean up */ + retval = PAPI_cleanup_eventset(EventSet); + if ( retval != PAPI_OK ) + fprintf(stdout, "PAPI_cleanup_eventset()\n"); + + retval = PAPI_destroy_eventset(&EventSet); + if ( retval != PAPI_OK ) + fprintf(stdout, "PAPI_destroy_eventset()\n"); + + return 0; +} + diff -Nru papi-5.7.0+dfsg/src/components/rapl/linux-rapl.c papi-6.0.0~dfsg/src/components/rapl/linux-rapl.c --- papi-5.7.0+dfsg/src/components/rapl/linux-rapl.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/rapl/linux-rapl.c 2020-03-04 15:56:58.000000000 +0000 @@ -54,13 +54,13 @@ /* Package */ #define MSR_PKG_RAPL_POWER_LIMIT 0x610 -#define MSR_INTEL_PKG_ENERGY_STATUS 0x611 +#define MSR_INTEL_PKG_ENERGY_STATUS 0x611 #define MSR_PKG_PERF_STATUS 0x613 #define MSR_PKG_POWER_INFO 0x614 /* PP0 */ #define MSR_PP0_POWER_LIMIT 0x638 -#define MSR_INTEL_PP0_ENERGY_STATUS 0x639 +#define MSR_INTEL_PP0_ENERGY_STATUS 0x639 #define MSR_PP0_POLICY 0x63A #define MSR_PP0_PERF_STATUS 0x63B @@ -76,7 +76,7 @@ #define MSR_DRAM_POWER_INFO 0x61C /* PSYS RAPL Domain */ -#define MSR_PLATFORM_ENERGY_STATUS 0x64d +#define MSR_PLATFORM_ENERGY_STATUS 0x64d /* RAPL bitsmasks */ #define POWER_UNIT_OFFSET 0 @@ -129,10 +129,16 @@ long long lastupdate; } _rapl_control_state_t; +// The _ENERGY_ counters should return a monotonically increasing +// value from the _start point, but the hardware only returns a +// uint32_t that may wrap. We keep a start_value which is reset at +// _start and every read, handle overflows of the uint32_t, and +// accumulate a uint64_t which we return. typedef struct _rapl_context { long long start_value[RAPL_MAX_COUNTERS]; + long long accumulated_value[RAPL_MAX_COUNTERS]; _rapl_control_state_t state; } _rapl_context_t; @@ -495,6 +501,12 @@ /* Detect how many packages */ + // Some code below may be flagged by Coverity due to uninitialized array + // entries of cpu_to_use[]. This is not a bug; the 'filename' listed below + // will have 'cpu0', 'cpu1', sequentially on up to the maximum. Coverity + // cannot know that, so its code analysis allows the possibility that the + // cpu_to_use[] array is only partially filled in. [Tony C. 11-27-19]. + j=0; while(1) { int num_read; @@ -898,9 +910,11 @@ long long now = PAPI_get_real_usec(); int i; + for( i = 0; i < RAPL_MAX_COUNTERS; i++ ) { if ((control->being_measured[i]) && (control->need_difference[i])) { - context->start_value[i]=read_rapl_value(i); + context->start_value[i]=(read_rapl_value(i) & 0xFFFFFFFF); + context->accumulated_value[i]=0; } } @@ -912,31 +926,35 @@ static int _rapl_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) { - - /* read values */ - _rapl_context_t* context = (_rapl_context_t*) ctx; - _rapl_control_state_t* control = (_rapl_control_state_t*) ctl; - long long now = PAPI_get_real_usec(); - int i; - long long temp; - - for ( i = 0; i < RAPL_MAX_COUNTERS; i++ ) { - if (control->being_measured[i]) { - temp = read_rapl_value(i); - if (context->start_value[i]) - if (control->need_difference[i]) { - /* test for wrap around */ - if (temp < context->start_value[i] ) { - SUBDBG("Wraparound!\nstart:\t%#016x\ttemp:\t%#016x", - (unsigned)context->start_value[i], (unsigned)temp); - temp += (0x100000000 - context->start_value[i]); - SUBDBG("\tresult:\t%#016x\n", (unsigned)temp); - } else { - temp -= context->start_value[i]; - } - } - control->count[i] = convert_rapl_energy( i, temp ); - } + /* read values */ + _rapl_context_t* context = (_rapl_context_t*) ctx; + _rapl_control_state_t* control = (_rapl_control_state_t*) ctl; + long long now = PAPI_get_real_usec(); + int i; + long long temp, newstart; + + for ( i = 0; i < RAPL_MAX_COUNTERS; i++ ) { + if (control->being_measured[i]) { + temp = read_rapl_value(i); + if (control->need_difference[i]) { + temp &= 0xFFFFFFFF; + newstart = temp; + /* test for wrap around */ + if (temp < context->start_value[i] ) { + SUBDBG("Wraparound!\nstart:\t%#016x\ttemp:\t%#016x", + (unsigned)context->start_value[i], (unsigned)temp); + temp += (0x100000000 - context->start_value[i]); + SUBDBG("\tresult:\t%#016x\n", (unsigned)temp); + } else { + temp -= context->start_value[i]; + } + // reset the start value, add to accum, set temp for convert call. + context->start_value[i]=newstart; + context->accumulated_value[i] += temp; + temp = context->accumulated_value[i]; + } + control->count[i] = convert_rapl_energy( i, temp ); + } } control->lastupdate = now; return PAPI_OK; diff -Nru papi-5.7.0+dfsg/src/components/rapl/README papi-6.0.0~dfsg/src/components/rapl/README --- papi-5.7.0+dfsg/src/components/rapl/README 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/rapl/README 2020-03-04 15:56:58.000000000 +0000 @@ -12,19 +12,50 @@ @section Component Specific Information RAPL/ -RAPL uses the MSR kernel module to read model specific registers (MSRs) from user space. To enable the msr module interface the admin needs to 'chmod 666 /dev/cpu/*/msr'. -For kernels older than 3.7, this is all that is required to use the PAPI RAPL component. -Historically, the Linux MSR driver only relied upon file system checks. This means that anything as root with any capability set could read and write to MSRs. - -Changes in the mainline Linux kernel since around 3.7 now require an executable to have capability CAP_SYS_RAWIO to open the MSR device file [1]. This change impacts user programs that use PAPI APIs that rely on the MSR device driver. Besides loading the MSR kernel module and setting the appropriate file permissions on the msr device file, one must grant the CAP_SYS_RAWIO capability to any user executable that needs access to the MSR driver, using the command below: +RAPL _ENERGY_ values 2019-11-08: The MSRs for energy return a uint64; but only +the bottom 32 bits are meaningful; the upper 32 bits are "reserved" by Intel, +and not guaranteed to be zeros. Before using these values arithmetically, the +upper bits need to be masked to zeros. This is now done. These same MSR can +wraparound; but the energy is a monotonically increasing amount and this is +what we should report. To prevent PAPI from reporting a wrap-around, at each +read we compute the difference between what we read and what we previously +read (including at PAPI_start), handling any overflow, and add this to a 64 +bit accumulator which is what we report. We always zero the accumulator at any +PAPI_start. + +RAPL uses the MSR kernel module to read model specific registers (MSRs) from +user space. To enable the msr module interface the admin needs to 'chmod 666 +/dev/cpu/*/msr'. For kernels older than 3.7, this is all that is required to +use the PAPI RAPL component. + +Historically, the Linux MSR driver only relied upon file system checks. This +means that anything as root with any capability set could read and write to +MSRs. + +Changes in the mainline Linux kernel since around 3.7 now require an +executable to have capability CAP_SYS_RAWIO to open the MSR device file [1]. +This change impacts user programs that use PAPI APIs that rely on the MSR +device driver. Besides loading the MSR kernel module and setting the +appropriate file permissions on the msr device file, one must grant the +CAP_SYS_RAWIO capability to any user executable that needs access to the MSR +driver, using the command below: setcap cap_sys_rawio=ep -Note that one needs superuser privileges to grant the RAWIO capability to an executable, and that the executable cannot be located on a shared network file system partition. - -The dynamic linker on most operating systems will remove variables that control dynamic linking from the environment of executables with extended rights, such as setuid executables or executables with raised capabilities. One such variable is LD_LIBRARY_PATH. Therefore, executables that have the RAWIO capability can only load shared libraries from default system directories. -One can work around this restriction by either installing the shared libraries in system directories, linking statically against those libraries, or using the -rpath linker option to specify the full path to the shared libraries during the linking step. +Note that one needs superuser privileges to grant the RAWIO capability to an +executable, and that the executable cannot be located on a shared network file +system partition. + +The dynamic linker on most operating systems will remove variables that +control dynamic linking from the environment of executables with extended +rights, such as setuid executables or executables with raised capabilities. +One such variable is LD_LIBRARY_PATH. Therefore, executables that have the +RAWIO capability can only load shared libraries from default system +directories. One can work around this restriction by either installing the +shared libraries in system directories, linking statically against those +libraries, or using the -rpath linker option to specify the full path to the +shared libraries during the linking step. [1] http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=c903f0456bc69176912dee6dd25c6a66ee1aed00 diff -Nru papi-5.7.0+dfsg/src/components/rapl/tests/rapl_basic.c papi-6.0.0~dfsg/src/components/rapl/tests/rapl_basic.c --- papi-5.7.0+dfsg/src/components/rapl/tests/rapl_basic.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/rapl/tests/rapl_basic.c 2020-03-04 15:56:58.000000000 +0000 @@ -183,7 +183,7 @@ "Error getting event info\n",retval); } - strncpy(units[num_events],evinfo.units,sizeof(units[0])-1); + strncpy(units[num_events],evinfo.units,sizeof(units[0])); // buffer must be null terminated to safely use strstr operation on it below units[num_events][sizeof(units[0])-1] = '\0'; diff -Nru papi-5.7.0+dfsg/src/components/rapl/tests/rapl_overflow.c papi-6.0.0~dfsg/src/components/rapl/tests/rapl_overflow.c --- papi-5.7.0+dfsg/src/components/rapl/tests/rapl_overflow.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/rapl/tests/rapl_overflow.c 2020-03-04 15:56:58.000000000 +0000 @@ -12,6 +12,7 @@ static long long rapl_values[2]; static long long old_rapl_values[2] = {0,0}; static int rapl_backward=0; +static long long before_time, after_time; int EventSet2=PAPI_NULL; @@ -28,11 +29,15 @@ fprintf( stderr, "handler(%d ) Overflow at %p! bit=%#llx \n", EventSet, address, overflow_vector ); #endif - + PAPI_read(EventSet,values); - if (!quiet) printf("%lld %lld\t",values[0],values[1]); PAPI_read(EventSet2,rapl_values); - if (!quiet) printf("RAPL: %lld %lld\n",rapl_values[0],rapl_values[1]); + after_time = PAPI_get_real_nsec(); + double elapsed_time=((double)(after_time-before_time))/1.0e9; + + if (!quiet) printf("%15lld %15lld %18lld %15lld %.3fms\n", + values[0],values[1], + rapl_values[0], rapl_values[1], elapsed_time*1000.); if ((rapl_values[0] +#include +#include +#include + +#include "papi.h" +#include "papi_memory.h" +#include "papi_internal.h" +#include "papi_vector.h" + +/* this number assumes that there will never be more events than indicated */ +#define PAPIROCM_MAX_COUNTERS 512 + +#define BUF_SIZE (32 * 1024) +#define ALIGN_SIZE (8) +#define ALIGN_BUFFER(buffer, align) \ + (((uintptr_t) (buffer) & ((align)-1)) ? ((buffer) + (align) - ((uintptr_t) (buffer) & ((align)-1))) : (buffer)) + +#if 0 +#define ROCMDBG(format, args...) fprintf(stderr, format, ## args) +#else +//#define ROCMDBG(format, args...) do {} while(0) +#define ROCMDBG SUBDBG +#endif + +/* Macros for error checking... each arg is only referenced/evaluated once */ +#define CHECK_PRINT_EVAL(checkcond, str, evalthis) \ + do { \ + int _cond = (checkcond); \ + if (_cond) { \ + fprintf(stderr, "%s:%i error: condition %s failed: %s.\n", __FILE__, __LINE__, #checkcond, str); \ + evalthis; \ + } \ + } while (0) + +#define ROCM_CALL_CK(call, args, handleerror) \ + do { \ + hsa_status_t _status = (*call##Ptr)args; \ + if (_status != HSA_STATUS_SUCCESS && _status != HSA_STATUS_INFO_BREAK) { \ + fprintf(stderr, "%s:%i error: function %s failed with error %d.\n", \ + __FILE__, __LINE__, #call, _status); \ + handleerror; \ + } \ + } while (0) + +// Roc Profiler call. +#define ROCP_CALL_CK(call, args, handleerror) \ + do { \ + hsa_status_t _status = (*call##Ptr)args; \ + if (_status != HSA_STATUS_SUCCESS && _status != HSA_STATUS_INFO_BREAK) { \ + const char *profErr; \ + (*rocprofiler_error_stringPtr)(&profErr); \ + fprintf(stderr, "%s:%i error: function %s failed with error %d [%s].\n", \ + __FILE__, __LINE__, #call, _status, profErr); \ + handleerror; \ + } \ + } while (0) + +#define DLSYM_AND_CHECK(dllib, name) \ + do { \ + name##Ptr = dlsym(dllib, #name); \ + if (dlerror()!=NULL) { \ + snprintf(_rocm_vector.cmp_info.disabled_reason, \ + PAPI_MAX_STR_LEN, \ + "The ROCM required function '%s' was not found in dynamic libs", \ + #name); \ + fprintf(stderr, "%s:%i ROCM component disabled: %s\n", \ + __FILE__, __LINE__, _rocm_vector.cmp_info.disabled_reason); \ + return ( PAPI_ENOSUPP ); \ + } \ + } while (0) + +typedef rocprofiler_t* Context; +typedef rocprofiler_feature_t EventID; + +// Contains device list, pointer to device description, and the list of available events. +// Note that "indexed variables" in ROCM are read with eventname[%d], where %d is +// 0 to #instances. This is what we store in the EventID.name element. But the PAPI name +// doesn't use brackets; so in the ev_name_desc.name we store the user-visible name, +// something like "eventname:device=%d:instance=%d". +typedef struct _rocm_context { + uint32_t availAgentSize; + hsa_agent_t* availAgentArray; + uint32_t availEventSize; + int *availEventDeviceNum; + EventID *availEventIDArray; // Note: The EventID struct has its own .name element for ROCM internal operation. + uint32_t *availEventIsBeingMeasuredInEventset; + struct ev_name_desc *availEventDesc; // Note: This is where the PAPI name is stored; for user consumption. +} _rocm_context_t; + +/* Store the name and description for an event */ +typedef struct ev_name_desc { + char name[PAPI_MAX_STR_LEN]; + char description[PAPI_2MAX_STR_LEN]; +} ev_name_desc_t; + +/* Control structure tracks array of active contexts, records active events and their values */ +typedef struct _rocm_control { + uint32_t countOfActiveContexts; + struct _rocm_active_context_s *arrayOfActiveContexts[PAPIROCM_MAX_COUNTERS]; + uint32_t activeEventCount; + int activeEventIndex[PAPIROCM_MAX_COUNTERS]; + long long activeEventValues[PAPIROCM_MAX_COUNTERS]; + uint64_t startTimestampNs; + uint64_t readTimestampNs; +} _rocm_control_t; + +/* For each active context, which ROCM events are being measured, context eventgroups containing events */ +typedef struct _rocm_active_context_s { + Context ctx; + int deviceNum; + uint32_t conEventsCount; + EventID conEvents[PAPIROCM_MAX_COUNTERS]; + int conEventIndex[PAPIROCM_MAX_COUNTERS]; +} _rocm_active_context_t; + +/* Function prototypes */ +static int _rocm_cleanup_eventset(hwd_control_state_t * ctrl); + +// GLOBALS +static void *dl1 = NULL; +static void *dl2 = NULL; +static char rocm_hsa[]=PAPI_ROCM_HSA; +static char rocm_prof[]=PAPI_ROCM_PROF; + +/* ****** CHANGE PROTOTYPES TO DECLARE ROCM LIBRARY SYMBOLS AS WEAK ********** + * This is done so that a version of PAPI built with the rocm component can * + * be installed on a system which does not have the rocm libraries installed. * + * * + * If this is done without these prototypes, then all papi services on the * + * system without the rocm libraries installed will fail. The PAPI libraries * + * contain references to the rocm libraries which are not installed. The * + * load of PAPI commands fails because the rocm library references can not be * + * resolved. * + * * + * This also defines pointers to the rocm library functions that we call. * + * These function pointers will be resolved with dlopen/dlsym calls at * + * component initialization time. The component then calls the rocm library * + * functions through these function pointers. * + *******************************************************************************/ +void (*_dl_non_dynamic_init) (void) __attribute__ ((weak)); + +#define DECLAREROCMFUNC(funcname, funcsig) \ + hsa_status_t __attribute__((weak)) funcname funcsig; \ + hsa_status_t(*funcname##Ptr) funcsig; + +// ROCR API declaration +DECLAREROCMFUNC(hsa_init, ()); +DECLAREROCMFUNC(hsa_shut_down, ()); +DECLAREROCMFUNC(hsa_iterate_agents, (hsa_status_t (*)(hsa_agent_t, void*), + void*)); +DECLAREROCMFUNC(hsa_system_get_info, (hsa_system_info_t, void*)); +DECLAREROCMFUNC(hsa_agent_get_info, (hsa_agent_t agent, hsa_agent_info_t attribute, void* value)); +DECLAREROCMFUNC(hsa_queue_destroy, (hsa_queue_t* queue)); + +// ROC-profiler API declaration +DECLAREROCMFUNC(rocprofiler_get_info, (const hsa_agent_t*, rocprofiler_info_kind_t, void *)); +DECLAREROCMFUNC(rocprofiler_iterate_info, (const hsa_agent_t*, + rocprofiler_info_kind_t, + hsa_status_t (*)(const rocprofiler_info_data_t, void *), void *)); +DECLAREROCMFUNC(rocprofiler_open, (hsa_agent_t agent, // GPU handle + rocprofiler_feature_t* features, // [in] profiling features array + uint32_t feature_count, // profiling info count + rocprofiler_t** context, // [out] context object + uint32_t mode, // profiling mode mask + rocprofiler_properties_t* properties)); // profiling properties +DECLAREROCMFUNC(rocprofiler_close, (rocprofiler_t*)); +DECLAREROCMFUNC(rocprofiler_group_count, (const rocprofiler_t*, uint32_t*)); +DECLAREROCMFUNC(rocprofiler_start, (rocprofiler_t*, uint32_t)); +DECLAREROCMFUNC(rocprofiler_read, (rocprofiler_t*, uint32_t)); +DECLAREROCMFUNC(rocprofiler_stop, (rocprofiler_t*, uint32_t)); +DECLAREROCMFUNC(rocprofiler_get_data, (rocprofiler_t*, uint32_t)); +DECLAREROCMFUNC(rocprofiler_get_metrics, (const rocprofiler_t*)); +DECLAREROCMFUNC(rocprofiler_reset, (rocprofiler_t*, uint32_t)); +DECLAREROCMFUNC(rocprofiler_error_string, (const char**)); + +/* The PAPI side (external) variable as a global */ +papi_vector_t _rocm_vector; + +/* Global variable for hardware description, event and metric lists */ +static _rocm_context_t *global__rocm_context = NULL; +static uint32_t maxEventSize=0; // We accumulate all agent counts into this. +static rocprofiler_properties_t global__ctx_properties = { + NULL, // queue + 128, // queue depth + NULL, // handler on completion + NULL // handler_arg +}; + +/* This global variable points to the head of the control state list */ +static _rocm_control_t *global__rocm_control = NULL; + + +/***************************************************************************** + ******** BEGIN FUNCTIONS USED INTERNALLY SPECIFIC TO THIS COMPONENT ******** + *****************************************************************************/ + +/* + * Link the necessary ROCM libraries to use the rocm component. If any of them can not be found, then + * the ROCM component will just be disabled. This is done at runtime so that a version of PAPI built + * with the ROCM component can be installed and used on systems which have the ROCM libraries installed + * and on systems where these libraries are not installed. + */ +static int _rocm_linkRocmLibraries(void) +{ + ROCMDBG("Entering _rocm_linkRocmLibraries\n"); + + char path_name[1024]; + /* Attempt to guess if we were statically linked to libc, if so bail */ + if(_dl_non_dynamic_init != NULL) { + strncpy(_rocm_vector.cmp_info.disabled_reason, "The ROCM component does not support statically linking to libc.", PAPI_MAX_STR_LEN); + return PAPI_ENOSUPP; + } + + // collect any defined environment variables, or "NULL" if not present. + char *rocm_root = getenv("PAPI_ROCM_ROOT"); + dl1 = NULL; // Ensure reset to NULL. + + // Step 1: Process override if given. + if (strlen(rocm_hsa) > 0) { // If override given, it has to work. + dl1 = dlopen(rocm_hsa, RTLD_NOW | RTLD_GLOBAL); // Try to open that path. + if (dl1 == NULL) { + snprintf(_rocm_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "PAPI_ROCM_HSA override '%s' given in Rules.rocm not found.", rocm_hsa); + return(PAPI_ENOSUPP); // Override given but not found. + } + } + + // Step 2: Try system paths, will work with Spack, LD_LIBRARY_PATH, default paths. + if (dl1 == NULL) { // No override, + dl1 = dlopen("libhsa-runtime64.so", RTLD_NOW | RTLD_GLOBAL); // Try system paths. + } + + // Step 3: Try the explicit install default. + if (dl1 == NULL && rocm_root != NULL) { // if root given, try it. + snprintf(path_name, 1024, "%s/lib/libhsa-runtime64.so", rocm_root); // PAPI Root check. + dl1 = dlopen(path_name, RTLD_NOW | RTLD_GLOBAL); // Try to open that path. + } + + // Check for failure. + if (dl1 == NULL) { + snprintf(_rocm_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "libhsa-runtime64.so not found."); + return(PAPI_ENOSUPP); + } + + // We have a dl1. (libhsa-runtime64.so). + + DLSYM_AND_CHECK(dl1, hsa_init); + DLSYM_AND_CHECK(dl1, hsa_iterate_agents); + DLSYM_AND_CHECK(dl1, hsa_system_get_info); + DLSYM_AND_CHECK(dl1, hsa_agent_get_info); + DLSYM_AND_CHECK(dl1, hsa_shut_down); + DLSYM_AND_CHECK(dl1, hsa_queue_destroy); + + //------------------------------------------------------------------------- + + dl2 = NULL; // Ensure reset to NULL. + + // Step 1: Process override if given. + if (strlen(rocm_prof) > 0) { // If override given, it has to work. + dl2 = dlopen(rocm_prof, RTLD_NOW | RTLD_GLOBAL); // Try to open that path. + if (dl1 == NULL) { + snprintf(_rocm_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "PAPI_ROCM_PROF override '%s' given in Rules.rocm not found.", rocm_prof); + return(PAPI_ENOSUPP); // Override given but not found. + } + } + + // Step 2: Try system paths, will work with Spack, LD_LIBRARY_PATH, default paths. + if (dl2 == NULL) { // No override, + dl2 = dlopen("librocprofiler64.so", RTLD_NOW | RTLD_GLOBAL); // Try system paths. + } + + // Step 3: Try the explicit install default. + if (dl2 == NULL && rocm_root != NULL) { // if root given, try it. + snprintf(path_name, 1024, "%s/lib/librocprofiler64.so", rocm_root); // PAPI Root check. + dl2 = dlopen(path_name, RTLD_NOW | RTLD_GLOBAL); // Try to open that path. + } + + // Check for failure. + if (dl2 == NULL) { + snprintf(_rocm_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "librocprofiler64.so not found."); + return(PAPI_ENOSUPP); + } + + // We have a dl2. (librocprofiler64.so). + + DLSYM_AND_CHECK(dl2, rocprofiler_get_info); + DLSYM_AND_CHECK(dl2, rocprofiler_iterate_info); + DLSYM_AND_CHECK(dl2, rocprofiler_open); + DLSYM_AND_CHECK(dl2, rocprofiler_close); + DLSYM_AND_CHECK(dl2, rocprofiler_group_count); + DLSYM_AND_CHECK(dl2, rocprofiler_start); + DLSYM_AND_CHECK(dl2, rocprofiler_read); + DLSYM_AND_CHECK(dl2, rocprofiler_stop); + DLSYM_AND_CHECK(dl2, rocprofiler_get_data); + DLSYM_AND_CHECK(dl2, rocprofiler_get_metrics); + DLSYM_AND_CHECK(dl2, rocprofiler_reset); + DLSYM_AND_CHECK(dl2, rocprofiler_error_string); + + // Disable if ROCPROFILER env vars not present. + if (getenv("ROCP_METRICS") == NULL) { + snprintf(_rocm_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "Env. Var. ROCP_METRICS not set; rocprofiler is not configured."); + return(PAPI_ENOSUPP); // Wouldn't have any events. + } + + if (getenv("ROCPROFILER_LOG") == NULL) { + snprintf(_rocm_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "Env. Var. ROCPROFILER_LOG not set; rocprofiler is not configured."); + return(PAPI_ENOSUPP); // Wouldn't have any events. + } + + if (getenv("HSA_VEN_AMD_AQLPROFILE_LOG") == NULL) { + snprintf(_rocm_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "Env. Var. HSA_VEN_AMD_AQLPROFILE_LOG not set; rocprofiler is not configured."); + return(PAPI_ENOSUPP); // Wouldn't have any events. + } + + if (getenv("AQLPROFILE_READ_API") == NULL) { + snprintf(_rocm_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "Env. Var.AQLPROFILE_READ_API not set; rocprofiler is not configured."); + return(PAPI_ENOSUPP); // Wouldn't have any events. + } + + return (PAPI_OK); +} + + +// ---------------------------------------------------------------------------- +// Callback function to get the number of agents +static hsa_status_t _rocm_get_gpu_handle(hsa_agent_t agent, void* arg) +{ + _rocm_context_t * gctxt = (_rocm_context_t*) arg; + + hsa_device_type_t type; + ROCM_CALL_CK(hsa_agent_get_info,(agent, HSA_AGENT_INFO_DEVICE, &type), return (PAPI_EMISC)); + + // Device is a GPU agent + if (type == HSA_DEVICE_TYPE_GPU) { + gctxt->availAgentSize += 1; + gctxt->availAgentArray = (hsa_agent_t*) papi_realloc(gctxt->availAgentArray, (gctxt->availAgentSize*sizeof(hsa_agent_t))); + gctxt->availAgentArray[gctxt->availAgentSize - 1] = agent; + } + + return HSA_STATUS_SUCCESS; +} + +typedef struct { + int device_num; + int count; + _rocm_context_t * ctx; +} events_callback_arg_t; + +// ---------------------------------------------------------------------------- +// Callback function to get the number of events we will see; +// Each element of instanced metrics must be created as a separate event +static hsa_status_t _rocm_count_native_events_callback(const rocprofiler_info_data_t info, void * arg) +{ + const uint32_t instances = info.metric.instances; + uint32_t* count = (uint32_t*) arg; + (*count) += instances; + return HSA_STATUS_SUCCESS; +} // END CALLBACK. + + +// ---------------------------------------------------------------------------- +// Callback function that adds individual events. +static hsa_status_t _rocm_add_native_events_callback(const rocprofiler_info_data_t info, void * arg) +{ + uint32_t ui; + events_callback_arg_t * callback_arg = (events_callback_arg_t*) arg; + _rocm_context_t * ctx = callback_arg->ctx; + const uint32_t eventDeviceNum = callback_arg->device_num; + const uint32_t count = callback_arg->count; + uint32_t index = ctx->availEventSize; + const uint32_t instances = info.metric.instances; // short cut to instances. + + +// information about AMD Event. +// fprintf(stderr, "%s:%i name=%s block_name=%s, instances=%i block_counters=%i.\n", +// __FILE__, __LINE__, info.metric.name, info.metric.block_name, info.metric.instances, +// info.metric.block_counters); + if (index + instances > count) return HSA_STATUS_ERROR; // Should have enough space. + + for (ui=0; ui 1) { + snprintf(ctx->availEventDesc[index].name, + PAPI_MAX_STR_LEN, "%s:device=%d:instance=%d", // What PAPI user sees. + info.metric.name, eventDeviceNum, ui); + snprintf(ROCMname, PAPI_MAX_STR_LEN, "%s[%d]", + info.metric.name, ui); // use indexed version. + } else { + snprintf(ctx->availEventDesc[index].name, + PAPI_MAX_STR_LEN, "%s:device=%d", // What PAPI user sees. + info.metric.name, eventDeviceNum); + snprintf(ROCMname, PAPI_MAX_STR_LEN, "%s", + info.metric.name); // use non-indexed version. + } + + ROCMname[PAPI_MAX_STR_LEN - 1] = '\0'; // ensure z-terminated. + strncpy(ctx->availEventDesc[index].description, info.metric.description, PAPI_2MAX_STR_LEN); + ctx->availEventDesc[index].description[PAPI_2MAX_STR_LEN - 1] = '\0'; // ensure z-terminated. + + EventID eventId; // Removed declaration init. + eventId.kind = ROCPROFILER_FEATURE_KIND_METRIC; + eventId.name = strdup(ROCMname); // what ROCM needs to see. + eventId.parameters = NULL; // Not currently used, but init for safety. + eventId.parameter_count=0; // Not currently used, but init for safety. + + ctx->availEventDeviceNum[index] = eventDeviceNum; + ctx->availEventIDArray[index] = eventId; + index++; // increment index. + ctx->availEventSize = index; // Always set availEventSize. + } // end for each instance. + + return HSA_STATUS_SUCCESS; +} // end CALLBACK, _rocm_add_native_events_callback + +// ---------------------------------------------------------------------------- +// function called during initialization. +static int _rocm_add_native_events(_rocm_context_t * ctx) +{ + ROCMDBG("Entering _rocm_add_native_events\n"); + + uint32_t i; + + // Count all events in all agents; Each element of 'indexed' metrics is considered a separate event. + // NOTE: The environment variable ROCP_METRICS should point at a path and file like metrics.xml. + // If that file doesn't exist, this iterate info fails with a general error (0x1000). + // NOTE: We are *accumulating* into maxEventSize. + for (i = 0; i < ctx->availAgentSize; i++) { + ROCP_CALL_CK(rocprofiler_iterate_info, (&(ctx->availAgentArray[i]), ROCPROFILER_INFO_KIND_METRIC, + _rocm_count_native_events_callback, (void*)(&maxEventSize)), return (PAPI_EMISC)); + } + + /* Allocate space for all events and descriptors, includes space for instances. */ + ctx->availEventDeviceNum = (int *) papi_calloc(maxEventSize, sizeof(int)); + CHECK_PRINT_EVAL((ctx->availEventDeviceNum == NULL), "ERROR ROCM: Could not allocate memory", return (PAPI_ENOMEM)); + ctx->availEventIDArray = (EventID *) papi_calloc(maxEventSize, sizeof(EventID)); + CHECK_PRINT_EVAL((ctx->availEventIDArray == NULL), "ERROR ROCM: Could not allocate memory", return (PAPI_ENOMEM)); + ctx->availEventIsBeingMeasuredInEventset = (uint32_t *) papi_calloc(maxEventSize, sizeof(uint32_t)); + CHECK_PRINT_EVAL((ctx->availEventIsBeingMeasuredInEventset == NULL), "ERROR ROCM: Could not allocate memory", return (PAPI_ENOMEM)); + ctx->availEventDesc = (ev_name_desc_t *) papi_calloc(maxEventSize, sizeof(ev_name_desc_t)); + CHECK_PRINT_EVAL((ctx->availEventDesc == NULL), "ERROR ROCM: Could not allocate memory", return (PAPI_ENOMEM)); + + for (i = 0; i < ctx->availAgentSize; ++i) { + events_callback_arg_t arg; + arg.device_num = i; + arg.count = maxEventSize; + arg.ctx = ctx; + ROCP_CALL_CK(rocprofiler_iterate_info, (&(ctx->availAgentArray[i]), ROCPROFILER_INFO_KIND_METRIC, + _rocm_add_native_events_callback, (void*)(&arg)), return (PAPI_EMISC)); + } + + /* return 0 if everything went OK */ + return 0; +} + + +/***************************************************************************** + ******************* BEGIN PAPI's COMPONENT REQUIRED FUNCTIONS ************* + *****************************************************************************/ + +/* + * This is called whenever a thread is initialized. + */ +static int _rocm_init_thread(hwd_context_t * ctx) +{ + ROCMDBG("Entering _rocm_init_thread\n"); + + (void) ctx; + return PAPI_OK; +} + + +/* Initialize hardware counters, setup the function vector table + * and get hardware information, this routine is called when the + * PAPI process is initialized (IE PAPI_library_init) + */ +static int _rocm_init_component(int cidx) +{ + ROCMDBG("Entering _rocm_init_component\n"); + + /* link in all the rocm libraries and resolve the symbols we need to use */ + if(_rocm_linkRocmLibraries() != PAPI_OK) { + SUBDBG("Dynamic link of ROCM libraries failed, component will be disabled.\n"); + SUBDBG("See disable reason in papi_component_avail output for more details.\n"); + return (PAPI_ENOSUPP); + } + + ROCM_CALL_CK(hsa_init, (), return (PAPI_EMISC)); + + /* Create the structure */ + if(global__rocm_context == NULL) + global__rocm_context = (_rocm_context_t *) papi_calloc(1, sizeof(_rocm_context_t)); + + /* Get GPU agent */ + ROCM_CALL_CK(hsa_iterate_agents, (_rocm_get_gpu_handle, global__rocm_context), return (PAPI_EMISC)); + + int rv; + + /* Get list of all native ROCM events supported */ + rv = _rocm_add_native_events(global__rocm_context); + if(rv != 0) + return (rv); + + /* Export some information */ + _rocm_vector.cmp_info.CmpIdx = cidx; + _rocm_vector.cmp_info.num_native_events = global__rocm_context->availEventSize; + _rocm_vector.cmp_info.num_cntrs = _rocm_vector.cmp_info.num_native_events; + _rocm_vector.cmp_info.num_mpx_cntrs = _rocm_vector.cmp_info.num_native_events; + + ROCMDBG("Exiting _rocm_init_component cidx %d num_native_events %d num_cntrs %d num_mpx_cntrs %d\n", + cidx, + _rocm_vector.cmp_info.num_native_events, + _rocm_vector.cmp_info.num_cntrs, + _rocm_vector.cmp_info.num_mpx_cntrs); + + if (_rocm_vector.cmp_info.num_native_events == 0) { + char *metrics = getenv("ROCP_METRICS"); + if (metrics == NULL) { + strncpy(_rocm_vector.cmp_info.disabled_reason, "Environment Variable ROCP_METRICS is not defined, should point to a valid metrics.xml.", PAPI_MAX_STR_LEN); + return (PAPI_EMISC); + } + + snprintf(_rocm_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "No events. Ensure ROCP_METRICS=%s is correct.", metrics); + return (PAPI_EMISC); + } + + return (PAPI_OK); +} + + +/* Setup a counter control state. + * In general a control state holds the hardware info for an + * EventSet. + */ +static int _rocm_init_control_state(hwd_control_state_t * ctrl) +{ + ROCMDBG("Entering _rocm_init_control_state\n"); + + (void) ctrl; + _rocm_context_t *gctxt = global__rocm_context; + + CHECK_PRINT_EVAL((gctxt == NULL), "Error: The PAPI ROCM component needs to be initialized first", return (PAPI_ENOINIT)); + /* If no events were found during the initial component initialization, return error */ + if(global__rocm_context->availEventSize <= 0) { + strncpy(_rocm_vector.cmp_info.disabled_reason, "ERROR ROCM: No events exist", PAPI_MAX_STR_LEN); + return (PAPI_EMISC); + } + /* If it does not exist, create the global structure to hold ROCM contexts and active events */ + if(global__rocm_control == NULL) { + global__rocm_control = (_rocm_control_t *) papi_calloc(1, sizeof(_rocm_control_t)); + global__rocm_control->countOfActiveContexts = 0; + global__rocm_control->activeEventCount = 0; + } + return PAPI_OK; +} + + +/* Triggered by eventset operations like add or remove. For ROCM, + * needs to be called multiple times from each seperate ROCM context + * with the events to be measured from that context. For each + * context, create eventgroups for the events. + */ +/* Note: NativeInfo_t is defined in papi_internal.h */ +static int _rocm_update_control_state(hwd_control_state_t * ctrl, NativeInfo_t * nativeInfo, int nativeCount, hwd_context_t * ctx) +{ + ROCMDBG("Entering _rocm_update_control_state with nativeCount %d\n", nativeCount); + + (void) ctx; + _rocm_control_t *gctrl = global__rocm_control; + _rocm_context_t *gctxt = global__rocm_context; + int eventContextIdx = 0; + int index, ii; + uint32_t cc; + uint32_t numPasses = 1; + + /* Return if no events */ + if(nativeCount == 0) + return (PAPI_OK); + + /* Handle user request of events to be monitored */ + for(ii = 0; ii < nativeCount; ii++) { + /* Get the PAPI event index from the user */ + index = nativeInfo[ii].ni_event; + char *eventName = gctxt->availEventDesc[index].name; + (void) eventName; + int eventDeviceNum = gctxt->availEventDeviceNum[index]; + + /* if this event is already added continue to next ii, if not, mark it as being added */ + if(gctxt->availEventIsBeingMeasuredInEventset[index] == 1) { + ROCMDBG("Skipping event %s (%i of %i) which is already added\n", eventName, ii, nativeCount); + continue; + } else { + gctxt->availEventIsBeingMeasuredInEventset[index] = 1; + } + + /* Find context/control in papirocm, creating it if does not exist */ + for(cc = 0; cc < gctrl->countOfActiveContexts; cc++) { + CHECK_PRINT_EVAL(cc >= PAPIROCM_MAX_COUNTERS, "Exceeded hardcoded maximum number of contexts (PAPIROCM_MAX_COUNTERS)", return (PAPI_EMISC)); + if(gctrl->arrayOfActiveContexts[cc]->deviceNum == eventDeviceNum) { + break; + } + } + // Create context if it does not exist + if(cc == gctrl->countOfActiveContexts) { + ROCMDBG("Event %s device %d does not have a ctx registered yet...\n", eventName, eventDeviceNum); + gctrl->arrayOfActiveContexts[cc] = papi_calloc(1, sizeof(_rocm_active_context_t)); + CHECK_PRINT_EVAL(gctrl->arrayOfActiveContexts[cc] == NULL, "Memory allocation for new active context failed", return (PAPI_ENOMEM)); + gctrl->arrayOfActiveContexts[cc]->deviceNum = eventDeviceNum; + gctrl->arrayOfActiveContexts[cc]->ctx = NULL; + gctrl->arrayOfActiveContexts[cc]->conEventsCount = 0; + gctrl->countOfActiveContexts++; + ROCMDBG("Added a new context deviceNum %d ... now countOfActiveContexts is %d\n", eventDeviceNum, gctrl->countOfActiveContexts); + } + eventContextIdx = cc; + + _rocm_active_context_t *eventctrl = gctrl->arrayOfActiveContexts[eventContextIdx]; + ROCMDBG("Need to add event %d %s to the context\n", index, eventName); + // Now we have eventctrl, we can check on max event count. + if (eventctrl->conEventsCount >= PAPIROCM_MAX_COUNTERS) { + ROCMDBG("Num events exceeded PAPIROCM_MAX_COUNTERS\n"); + return(PAPI_EINVAL); + } + + /* lookup eventid for this event index */ + EventID eventId = gctxt->availEventIDArray[index]; + eventctrl->conEvents[eventctrl->conEventsCount] = eventId; + eventctrl->conEventIndex[eventctrl->conEventsCount] = index; + eventctrl->conEventsCount++; +// fprintf(stderr, "%s:%d Added eventId.name='%s' as conEventsCount=%i with index=%i.\n", __FILE__, __LINE__, eventId.name, eventctrl->conEventsCount-1, index); // test indexed events. + + /* Record index of this active event back into the nativeInfo structure */ + nativeInfo[ii].ni_position = gctrl->activeEventCount; + /* record added event at the higher level */ + CHECK_PRINT_EVAL(gctrl->activeEventCount == PAPIROCM_MAX_COUNTERS - 1, "Exceeded maximum num of events (PAPI_MAX_COUNTERS)", return (PAPI_EMISC)); + gctrl->activeEventIndex[gctrl->activeEventCount] = index; + gctrl->activeEventValues[gctrl->activeEventCount] = 0; + gctrl->activeEventCount++; + + /* Create/recreate eventgrouppass structures for the added event and context */ + ROCMDBG("Create eventGroupPasses for context (destroy pre-existing) (nativeCount %d, conEventsCount %d) \n", gctrl->activeEventCount, eventctrl->conEventsCount); + if(eventctrl->conEventsCount > 0) { + if (eventctrl->ctx != NULL) { + ROCP_CALL_CK(rocprofiler_close, (eventctrl->ctx), return (PAPI_EMISC)); + } + int openFailed=0; +// fprintf(stderr,"%s:%i calling rocprofiler_open, ii=%i device=%i numEvents=%i name='%s'.\n", __FILE__, __LINE__, ii, eventDeviceNum, eventctrl->conEventsCount, eventId.name); + const uint32_t mode = (global__ctx_properties.queue != NULL) ? ROCPROFILER_MODE_STANDALONE : ROCPROFILER_MODE_STANDALONE | ROCPROFILER_MODE_CREATEQUEUE; + ROCP_CALL_CK(rocprofiler_open, (gctxt->availAgentArray[eventDeviceNum], eventctrl->conEvents, eventctrl->conEventsCount, &(eventctrl->ctx), + mode, &global__ctx_properties), openFailed=1); + if (openFailed) { // If the open failed, + ROCMDBG("Error occurred: The ROCM event was not accepted by the ROCPROFILER.\n"); +// fprintf(stderr, "Error occurred: The ROCM event '%s' was not accepted by the ROCPROFILER.\n", eventId.name); + _rocm_cleanup_eventset(ctrl); // Try to cleanup, +// fprintf(stderr, "%s:%i Returning PAPI_ECOMBO.\n", __FILE__, __LINE__); + return(PAPI_ECOMBO); // Say its a bad combo. + } + + ROCP_CALL_CK(rocprofiler_group_count, (eventctrl->ctx, &numPasses), return (PAPI_EMISC)); + + if (numPasses > 1) { + ROCMDBG("Error occurred: The combined ROCM events require more than 1 pass... try different events\n"); + _rocm_cleanup_eventset(ctrl); + return(PAPI_ECOMBO); + } else { + ROCMDBG("Created eventGroupPasses for context total-events %d in-this-context %d passes-required %d) \n", gctrl->activeEventCount, eventctrl->conEventsCount, numPasses); + } + } + } + return (PAPI_OK); +} + + +/* Triggered by PAPI_start(). + * For ROCM component, switch to each context and start all eventgroups. + */ +static int _rocm_start(hwd_context_t * ctx, hwd_control_state_t * ctrl) +{ + ROCMDBG("Entering _rocm_start\n"); + + (void) ctx; + (void) ctrl; + _rocm_control_t *gctrl = global__rocm_control; + uint32_t ii, cc; + + ROCMDBG("Reset all active event values\n"); + for(ii = 0; ii < gctrl->activeEventCount; ii++) + gctrl->activeEventValues[ii] = 0; + + ROCM_CALL_CK(hsa_system_get_info, (HSA_SYSTEM_INFO_TIMESTAMP, &gctrl->startTimestampNs), return (PAPI_EMISC)); + for(cc = 0; cc < gctrl->countOfActiveContexts; cc++) { + int eventDeviceNum = gctrl->arrayOfActiveContexts[cc]->deviceNum; + (void) eventDeviceNum; // suppress "not used" error when not debug. + Context eventCtx = gctrl->arrayOfActiveContexts[cc]->ctx; + ROCMDBG("Start device %d ctx %p ts %lu\n", eventDeviceNum, eventCtx, gctrl->startTimestampNs); + if (eventCtx == NULL) abort(); + ROCP_CALL_CK(rocprofiler_start, (eventCtx, 0), return (PAPI_EMISC)); + } + + return (PAPI_OK); +} + + +/* Triggered by PAPI_read(). For ROCM component, switch to each + * context, read all the eventgroups, and put the values in the + * correct places. */ +static int _rocm_read(hwd_context_t * ctx, hwd_control_state_t * ctrl, long long **values, int flags) +{ + ROCMDBG("Entering _rocm_read\n"); + + (void) ctx; + (void) ctrl; + (void) flags; + _rocm_control_t *gctrl = global__rocm_control; + _rocm_context_t *gctxt = global__rocm_context; + uint32_t cc, jj, ee; + + // Get read time stamp + ROCM_CALL_CK(hsa_system_get_info, (HSA_SYSTEM_INFO_TIMESTAMP, &gctrl->readTimestampNs), return (PAPI_EMISC)); + uint64_t durationNs = gctrl->readTimestampNs - gctrl->startTimestampNs; + (void) durationNs; // Suppress 'not used' warning when not debug. + gctrl->startTimestampNs = gctrl->readTimestampNs; + + + for(cc = 0; cc < gctrl->countOfActiveContexts; cc++) { + int eventDeviceNum = gctrl->arrayOfActiveContexts[cc]->deviceNum; + Context eventCtx = gctrl->arrayOfActiveContexts[cc]->ctx; + ROCMDBG("Read device %d ctx %p(%u) ts %lu\n", eventDeviceNum, eventCtx, cc, gctrl->readTimestampNs); + ROCP_CALL_CK(rocprofiler_read, (eventCtx, 0), return (PAPI_EMISC)); + ROCMDBG("waiting for data\n"); + ROCP_CALL_CK(rocprofiler_get_data, (eventCtx, 0), return (PAPI_EMISC)); + ROCP_CALL_CK(rocprofiler_get_metrics, (eventCtx), return (PAPI_EMISC)); + ROCMDBG("done\n"); + + for(jj = 0; jj < gctrl->activeEventCount; jj++) { + int index = gctrl->activeEventIndex[jj]; + EventID eventId = gctxt->availEventIDArray[index]; + ROCMDBG("jj=%i of %i, index=%i, device#=%i.\n", jj, gctrl->activeEventCount, index, gctxt->availEventDeviceNum[index]); + (void) eventId; // Suppress 'not used' warning when not debug. + + /* If the device/context does not match the current context, move to next */ + if(gctxt->availEventDeviceNum[index] != eventDeviceNum) + continue; + + for(ee = 0; ee < gctrl->arrayOfActiveContexts[cc]->conEventsCount; ee++) { + ROCMDBG("Searching for activeEvent %s in Activecontext %u eventIndex %d duration %lu\n", eventId.name, ee, index, durationNs); + if (gctrl->arrayOfActiveContexts[cc]->conEventIndex[ee] == index) { + gctrl->activeEventValues[jj] = gctrl->arrayOfActiveContexts[cc]->conEvents[ee].data.result_int64; + ROCMDBG("Matched event %d:%d eventName %s value %lld\n", jj, index, eventId.name, gctrl->activeEventValues[jj]); + break; + } + } + } + } + + *values = gctrl->activeEventValues; + return (PAPI_OK); +} + + +/* Triggered by PAPI_stop() */ +static int _rocm_stop(hwd_context_t * ctx, hwd_control_state_t * ctrl) +{ + ROCMDBG("Entering _rocm_stop\n"); + + (void) ctx; + (void) ctrl; + _rocm_control_t *gctrl = global__rocm_control; + uint32_t cc; + + for(cc = 0; cc < gctrl->countOfActiveContexts; cc++) { + int eventDeviceNum = gctrl->arrayOfActiveContexts[cc]->deviceNum; + (void) eventDeviceNum; // Suppress 'not used' warning when not debug. + Context eventCtx = gctrl->arrayOfActiveContexts[cc]->ctx; + ROCMDBG("Stop device %d ctx %p \n", eventDeviceNum, eventCtx); + ROCP_CALL_CK(rocprofiler_stop, (eventCtx, 0), return (PAPI_EMISC)); + } + + return (PAPI_OK); +} // END ROUTINE. + +/* + * Disable and destroy the ROCM eventGroup + */ +static int _rocm_cleanup_eventset(hwd_control_state_t * ctrl) +{ + ROCMDBG("Entering _rocm_cleanup_eventset\n"); +// fprintf(stderr, "%s:%i _rocm_cleanup_eventset called.\n", __FILE__, __LINE__); + + (void) ctrl; + _rocm_control_t *gctrl = global__rocm_control; + uint32_t i, cc; + + for(cc = 0; cc < gctrl->countOfActiveContexts; cc++) { + int eventDeviceNum = gctrl->arrayOfActiveContexts[cc]->deviceNum; + (void) eventDeviceNum; // Suppress 'not used' warning when not debug. + Context eventCtx = gctrl->arrayOfActiveContexts[cc]->ctx; + ROCMDBG("Destroy device %d ctx %p \n", eventDeviceNum, eventCtx); +// fprintf(stderr, "%s:%i About to call rocprofiler_close.\n", __FILE__, __LINE__); + ROCP_CALL_CK(rocprofiler_close, (eventCtx), return (PAPI_EMISC)); +// fprintf(stderr, "%s:%i Returned from call to rocprofiler_close, papi_free ptr=%p.\n", __FILE__, __LINE__, gctrl->arrayOfActiveContexts[cc] ); + papi_free( gctrl->arrayOfActiveContexts[cc] ); +// fprintf(stderr, "%s:%i Returned from call to papi_free.\n", __FILE__, __LINE__); + } + if (global__ctx_properties.queue != NULL) { + ROCM_CALL_CK(hsa_queue_destroy, (global__ctx_properties.queue), return (PAPI_EMISC)); + global__ctx_properties.queue = NULL; + } + /* Record that there are no active contexts or events */ +// fprintf(stderr, "%s:%i Checkpoint, maxEventSize=%i.\n", __FILE__, __LINE__, maxEventSize); + gctrl->countOfActiveContexts = 0; + gctrl->activeEventCount = 0; + + /* Clear all indicators of event being measured. */ + _rocm_context_t *gctxt = global__rocm_context; + for (i=0; iavailEventIsBeingMeasuredInEventset[i] = 0; + } + +// fprintf(stderr, "%s:%i Returning from _rocm_cleanup_eventset.\n", __FILE__, __LINE__); + return (PAPI_OK); +} + + +/* Called at thread shutdown. Does nothing in the ROCM component. */ +static int _rocm_shutdown_thread(hwd_context_t * ctx) +{ + ROCMDBG("Entering _rocm_shutdown_thread\n"); + + (void) ctx; + return (PAPI_OK); +} + + +/* Triggered by PAPI_shutdown() and frees memory allocated in the ROCM component. */ +static int _rocm_shutdown_component(void) +{ + ROCMDBG("Entering _rocm_shutdown_component\n"); + + _rocm_control_t *gctrl = global__rocm_control; + _rocm_context_t *gctxt = global__rocm_context; + uint32_t cc; + + /* Free context */ + if(gctxt != NULL) { + papi_free(gctxt->availEventIDArray); + papi_free(gctxt->availEventDeviceNum); + papi_free(gctxt->availEventIsBeingMeasuredInEventset); + papi_free(gctxt->availEventDesc); + papi_free(gctxt); + global__rocm_context = gctxt = NULL; + } + + /* Free control */ + if(gctrl != NULL) { + for(cc = 0; cc < gctrl->countOfActiveContexts; cc++) { + if(gctrl->arrayOfActiveContexts[cc] != NULL) { + papi_free(gctrl->arrayOfActiveContexts[cc]); + } + } + + papi_free(gctrl); + global__rocm_control = gctrl = NULL; + } + + // Shutdown ROC runtime + // DEBUG: This causes a segfault. + ROCM_CALL_CK(hsa_shut_down, (), return (PAPI_EMISC)); + + // close the dynamic libraries needed by this component (opened in the init substrate call) + dlclose(dl1); + dlclose(dl2); + return (PAPI_OK); +} + + +/* Triggered by PAPI_reset() but only if the EventSet is currently + * running. If the eventset is not currently running, then the saved + * value in the EventSet is set to zero without calling this + * routine. */ +static int _rocm_reset(hwd_context_t * ctx, hwd_control_state_t * ctrl) +{ + ROCMDBG("Entering _rocm_reset\n"); + + (void) ctx; + (void) ctrl; + _rocm_control_t *gctrl = global__rocm_control; + uint32_t ii, cc; + + ROCMDBG("Reset all active event values\n"); + for(ii = 0; ii < gctrl->activeEventCount; ii++) + gctrl->activeEventValues[ii] = 0; + + for(cc = 0; cc < gctrl->countOfActiveContexts; cc++) { + int eventDeviceNum = gctrl->arrayOfActiveContexts[cc]->deviceNum; + (void) eventDeviceNum; // Suppress 'not used' error when not debug. + Context eventCtx = gctrl->arrayOfActiveContexts[cc]->ctx; + ROCMDBG("Reset device %d ctx %p \n", eventDeviceNum, eventCtx); + ROCP_CALL_CK(rocprofiler_reset, (eventCtx, 0), return (PAPI_EMISC)); + } + + return (PAPI_OK); +} + + +/* This function sets various options in the component - Does nothing in the ROCM component. + @param[in] ctx -- hardware context + @param[in] code valid are PAPI_SET_DEFDOM, PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL and PAPI_SET_INHERIT + @param[in] option -- options to be set +*/ +static int _rocm_ctrl(hwd_context_t * ctx, int code, _papi_int_option_t * option) +{ + ROCMDBG("Entering _rocm_ctrl\n"); + + (void) ctx; + (void) code; + (void) option; + return (PAPI_OK); +} + + +/* + * This function has to set the bits needed to count different domains + * In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER + * By default return PAPI_EINVAL if none of those are specified + * and PAPI_OK with success + * PAPI_DOM_USER is only user context is counted + * PAPI_DOM_KERNEL is only the Kernel/OS context is counted + * PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) + * PAPI_DOM_ALL is all of the domains + */ +static int _rocm_set_domain(hwd_control_state_t * ctrl, int domain) +{ + ROCMDBG("Entering _rocm_set_domain\n"); + + (void) ctrl; + if((PAPI_DOM_USER & domain) || (PAPI_DOM_KERNEL & domain) || (PAPI_DOM_OTHER & domain) || (PAPI_DOM_ALL & domain)) + return (PAPI_OK); + else + return (PAPI_EINVAL); + return (PAPI_OK); +} + + +/* Enumerate Native Events. + * @param EventCode is the event of interest + * @param modifier is one of PAPI_ENUM_FIRST, PAPI_ENUM_EVENTS + */ +static int _rocm_ntv_enum_events(unsigned int *EventCode, int modifier) +{ + //ROCMDBG("Entering (get next event after %u)\n", *EventCode ); + + switch (modifier) { + case PAPI_ENUM_FIRST: + *EventCode = 0; + return (PAPI_OK); + break; + case PAPI_ENUM_EVENTS: + if(global__rocm_context == NULL) { + return (PAPI_ENOEVNT); + } else if(*EventCode < global__rocm_context->availEventSize - 1) { + *EventCode = *EventCode + 1; + return (PAPI_OK); + } else + return (PAPI_ENOEVNT); + break; + default: + return (PAPI_EINVAL); + } + return (PAPI_OK); +} + + +//---------------------------------------------------------------------------- +// Takes a native event code and passes back the name, but the PAPI version +// of the name in availEventDesc[], not the ROCM internal name (in +// availEventIDArray[].name). +// @param EventCode is the native event code +// @param name is a pointer for the name to be copied to +// @param len is the size of the name string +//---------------------------------------------------------------------------- +static int _rocm_ntv_code_to_name(unsigned int EventCode, char *name, int len) +{ + //ROCMDBG("Entering EventCode %d\n", EventCode ); + + unsigned int index = EventCode; + _rocm_context_t *gctxt = global__rocm_context; + if(gctxt != NULL && index < gctxt->availEventSize) { + strncpy(name, gctxt->availEventDesc[index].name, len); + } else { + return (PAPI_EINVAL); + } + //ROCMDBG( "Exit: EventCode %d: Name %s\n", EventCode, name ); + return (PAPI_OK); +} + + +/* Takes a native event code and passes back the event description + * @param EventCode is the native event code + * @param descr is a pointer for the description to be copied to + * @param len is the size of the descr string + */ +static int _rocm_ntv_code_to_descr(unsigned int EventCode, char *name, int len) +{ + //ROCMDBG("Entering _rocm_ntv_code_to_descr\n"); + + unsigned int index = EventCode; + _rocm_context_t *gctxt = global__rocm_context; + if(gctxt != NULL && index < gctxt->availEventSize) { + strncpy(name, gctxt->availEventDesc[index].description, len); + } else { + return (PAPI_EINVAL); + } + return (PAPI_OK); +} + + +/* Vector that points to entry points for the component */ +papi_vector_t _rocm_vector = { + .cmp_info = { + /* default component information (unspecified values are initialized to 0) */ + .name = "rocm", + .short_name = "rocm", + .version = "1.0", + .description = "GPU events and metrics via AMD ROCm-PL API", + .num_mpx_cntrs = PAPIROCM_MAX_COUNTERS, + .num_cntrs = PAPIROCM_MAX_COUNTERS, + .default_domain = PAPI_DOM_USER, + .default_granularity = PAPI_GRN_THR, + .available_granularities = PAPI_GRN_THR, + .hardware_intr_sig = PAPI_INT_SIGNAL, + /* component specific cmp_info initializations */ + .fast_real_timer = 0, + .fast_virtual_timer = 0, + .attach = 0, + .attach_must_ptrace = 0, + .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL, + } + , + /* sizes of framework-opaque component-private structures... these are all unused in this component */ + .size = { + .context = 1, /* sizeof( _rocm_context_t ), */ + .control_state = 1, /* sizeof( _rocm_control_t ), */ + .reg_value = 1, /* sizeof( _rocm_register_t ), */ + .reg_alloc = 1, /* sizeof( _rocm_reg_alloc_t ), */ + } + , + /* function pointers in this component */ + .start = _rocm_start, /* ( hwd_context_t * ctx, hwd_control_state_t * ctrl ) */ + .stop = _rocm_stop, /* ( hwd_context_t * ctx, hwd_control_state_t * ctrl ) */ + .read = _rocm_read, /* ( hwd_context_t * ctx, hwd_control_state_t * ctrl, long_long ** events, int flags ) */ + .reset = _rocm_reset, /* ( hwd_context_t * ctx, hwd_control_state_t * ctrl ) */ + .cleanup_eventset = _rocm_cleanup_eventset, /* ( hwd_control_state_t * ctrl ) */ + + .init_component = _rocm_init_component, /* ( int cidx ) */ + .init_thread = _rocm_init_thread, /* ( hwd_context_t * ctx ) */ + .init_control_state = _rocm_init_control_state, /* ( hwd_control_state_t * ctrl ) */ + .update_control_state = _rocm_update_control_state, /* ( hwd_control_state_t * ptr, NativeInfo_t * native, int count, hwd_context_t * ctx ) */ + + .ctl = _rocm_ctrl, /* ( hwd_context_t * ctx, int code, _papi_int_option_t * option ) */ + .set_domain = _rocm_set_domain, /* ( hwd_control_state_t * cntrl, int domain ) */ + .ntv_enum_events = _rocm_ntv_enum_events, /* ( unsigned int *EventCode, int modifier ) */ + .ntv_code_to_name = _rocm_ntv_code_to_name, /* ( unsigned int EventCode, char *name, int len ) */ + .ntv_code_to_descr = _rocm_ntv_code_to_descr, /* ( unsigned int EventCode, char *name, int len ) */ + .shutdown_thread = _rocm_shutdown_thread, /* ( hwd_context_t * ctx ) */ + .shutdown_component = _rocm_shutdown_component, /* ( void ) */ +}; + diff -Nru papi-5.7.0+dfsg/src/components/rocm/README papi-6.0.0~dfsg/src/components/rocm/README --- papi-5.7.0+dfsg/src/components/rocm/README 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/rocm/README 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,109 @@ +/** +* @file: README +* CVS: $Id$ +* @author: Asim YarKhan yarkhan@icl.utk.edu +* @author: Heike McCraw mccraw@icl.utk.edu +* @defgroup papi_components Components +* @brief Component Specific Readme file: CUDA +*/ + +/** @page component_readme Component Readme + +@section Component Specific Information + +Known problems and limitations in early release of metric support +----------------------------------------------------------------- + +* If creation/destruction of EventSets is repeated dozens of times, + the AMD portion of the software refuses further creation. Perhaps + a limit is reached, or we are not performing some necessary + housekeeping. + +* Only sets of metrics and events that can be gathered in a single + pass are supported. + +* All metrics are returned as long long integers. + + +General information +------------------- + +The PAPI ROCM component is a hardware performance counter measurement +technology for the AMD ROCM platform which provides access to the hardware +counters inside the GPU. The component is an adapter to the ROCm profiling +library (RPL, 'ROC-profiler') which is included in standard ROCM release. + +How to install PAPI with the ROCM component? +-------------------------------------------- + +PAPI requires one environment variable: PAPI_ROCM_ROOT, but there are +four more needed at runtime for operation by AMD software. These added +environment variables are typically set as follows, AFTER +PAPI_ROCM_ROOT has been exported. An example is provided below, +setting PAPI_ROCM_ROOT to its default value: + +export PAPI_ROCM_ROOT=/opt/rocm +export ROCP_METRICS=$PAPI_ROCM_ROOT/rocprofiler/lib/metrics.xml +export ROCPROFILER_LOG=1 +export HSA_VEN_AMD_AQLPROFILE_LOG=1 +export AQLPROFILE_READ_API=1 + +The first of these, ROCP_METRICS, must point at a file containing the +descriptions of metrics. The standard location is shown above, the +final three are above are fixed settings. + +For a standard installed system, these are the only environment +variables that need to be set, for both compile and runtime. + +The above example works on ICL's Caffeine system. + +Within PAPI_ROCM_ROOT, we expect the following standard directories: +PAPI_ROCM_ROOT/include +PAPI_ROCM_ROOT/include/hsa +PAPI_ROCM_ROOT/lib +PAPI_ROCM_ROOT/rocprofiler/lib +PAPI_ROCM_ROOT/rocprofiler/include + +After the exports shown above, PAPI must be configured and built. +When papi is installed, there will be a papi/src directory. Navigate +to that, and execute the following: + +> ./configure --with-components="rocm" +> make + +If you are rebuilding PAPI, then before the configure step, execute +> make clobber + +TESTING the component is installed: Still from papi/src: +> utils/papi_component_avail + +If the component is functional, it will show that. Otherwise it will +report it is disabled, and provide a reason why. + +You can see what events are provided by a working component as +follows: +> utils/papi_native_avail | grep -i "rocm:::" + + + +------------------------UNUSUAL INSTALLATIONS------------------------ + +System configurations can vary. Some systems use Spack, a package +manager, to automatically keep paths straight. Others (like our own +ICL Saturn System) require "module load" commands to provide some +services, e.g. 'module load rocm', and these may also set environment +variables and change the LD_LIBRARY_PATH search order. + +Users may require the help of sysadmin personnel to navigate these +facilities and gain access to the correct libraries. + +For the ROCM component to be operational, it must find the dynamic +libraries libhsa-runtime64.so and librocprofiler64.so. + +If these are not found (or are not functional) then the +component will be listed as "disabled" with a reason explaining the +problem. If libraries were not found, then they are not in the +expected places. The component can be configured to look for each of +these libraries in a specific place, and using an alternate name if +desired. Detailed instructions are contained in the Rules.rocm file. +They are technical, users may wish to enlist the help of a sysadmin. diff -Nru papi-5.7.0+dfsg/src/components/rocm/Rules.rocm papi-6.0.0~dfsg/src/components/rocm/Rules.rocm --- papi-5.7.0+dfsg/src/components/rocm/Rules.rocm 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/rocm/Rules.rocm 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,110 @@ +# Set default if the root environment variable is not already set. +# Note PAPI_ROCM_ROOT is an environment variable that must be set. +# There are four other environment variables that must be exported +# for runtime operation; see the README file. +PAPI_ROCM_ROOT ?= /opt/rocm + +# For non-typical system configurations, the following 'runtime overrides' can +# be set, as just a library name, or a full path and name. There cannot be any +# spaces between the double quotes (which must be escaped as \"). An example: + +# PAPI_ROCM_HSA = \"$(PAPI_ROCM_ROOT)/lib/libhsa-runtime64.so.1\" + +# By default, all overrides are empty strings. + +# If an override is not an empty string, it must work, or the component will be +# disabled. + +# Both at compile time and run time, the software depends on PAPI_ROCM_ROOT. +# There are two libraries used by the ROCM component, they are: +# libhsa-runtime64.so +# librocprofiler64.so + +# The standard installed locations for these libraries, with overrides: +# $(PAPI_ROCM_ROOT)/lib/libhsa-runtime64.so #O.R. PAPI_ROCM_HSA +# $(PAPI_ROCM_ROOT)/lib/librocprofiler64.so #O.R. PAPI_ROCM_PROF +# +# There are many ways to cause these paths to be known. Spack is a +# package manager used on supercomputers, Linux and MacOS. If Spack +# is aware of ROCM, it encodes the paths to the necessary libraries. + +# The environment variable LD_LIBRARY_PATH encodes a list of paths to +# search for libraries; separated by a colon (:). These paths could be +# added to LD_LIBRARY_PATH. +# +# Warning: LD_LIBRARY_PATH often contains a list of directories that +# are searched for libraries, some of these may be needed by other +# packages you are using. Always add to LD_LIBRARY_PATH recursively; +# for example: +# >export LD_LIBRARY_PATH=someNewLibraryDirectory:$LD_LIBRARY_PATH +# which would append the existing LD_LIBRARY_PATH to the new directory +# you wish to add. Alternatively, you can prepend it: +# >export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:someNewLibraryDirectory +# Which will search the existing libraries first, then your new +# directory. + +# You can check on the value of LD_LIBRARY_PATH with +# echo $LD_LIBRARY_PATH + +# There may be other package managers or utilities, for example on a +# system with modules; the command 'module load rocm' may modify +# LD_LIBRARY_PATH. + +# A Linux system will also search for libraries by default in the +# directories listed by /etc/ld.so.conf, and /usr/lib64, /lib64, +# /usr/lib, /lib. + +# OVERRIDES: These are by default empty strings (""), if set they must work. +PAPI_ROCM_HSA = \"\" +PAPI_ROCM_PROF = \"\" + +# Examples of an override: +# PAPI_ROCM_HSA = \"$(PAPI_ROCM_ROOT)/hsa/lib/libhsa-runtime64.so.1.1.9\" +# PAPI_ROCM_PROF = \"$(PAPI_ROCM_ROOT)/rocprofiler/lib/librocprofiler64.so.1.0.0\" + +# Note: PAPI_ROCM_ROOT also applies to the ROCM_SMI component, which is by +# default installed by AMD when ROCM is installed; however, it must be +# specified at configure time as a separate component for PAPI. + +# Note: If you change these overrides, PAPI should be rebuilt from scratch. +# From papi/src/ +# make clobber +# ./configure --with-components="rocm" +# make + +# An alternative, for both rocm and rocm_smi components: +# ./configure --with-components="rocm rocm_smi" + +# OPERATION, per library: +# 1) If an override string is not empty, we will use it explicitly and fail if +# it does not work. This means disabling the component; a reason for disabling +# is shown using the papi utility, papi/src/utils/papi_component_avail + +# 2) We will attempt to open the library using the normal system library search +# paths; if Spack is present and configured correctly it should deliver the +# proper library. A failure here will be silent; we will proceed to (3). + +# 3) If that fails, we will try to find the library in the standard installed +# locations listed above. If this fails, we disable the component, the reason +# for disabling is shown using the papi utility, +# papi/src/utils/papi_component_avail. + +# DEFFLAGS is the macro defines for the two overrides. In the code we convert +# these to string variables with the following lines: +# static char rocm_hsa[]=PAPI_ROCM_HSA; +# static char rocm_prof[]=PAPI_ROCM_PROF; + +ROCM_MACS = -DPAPI_ROCM_HSA=$(PAPI_ROCM_HSA) -DPAPI_ROCM_PROF=$(PAPI_ROCM_PROF) + +COMPSRCS += components/rocm/linux-rocm.c +COMPOBJS += linux-rocm.o +# CFLAGS specifies compile flags; need include files here, and macro defines. +# Order is important here; there are multiple DIFFERENT hsa.h files. +CFLAGS += -I$(PAPI_ROCM_ROOT)/hsa/include/hsa +CFLAGS += -I$(PAPI_ROCM_ROOT)/rocprofiler/include +CFLAGS += -I$(PAPI_ROCM_ROOT)/include $(ROCM_MACS) -g +LDFLAGS += $(LDL) -g + +linux-rocm.o: components/rocm/linux-rocm.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/rocm/linux-rocm.c -o linux-rocm.o + diff -Nru papi-5.7.0+dfsg/src/components/rocm/tests/Makefile papi-6.0.0~dfsg/src/components/rocm/tests/Makefile --- papi-5.7.0+dfsg/src/components/rocm/tests/Makefile 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/rocm/tests/Makefile 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,33 @@ +NAME=rocm +include ../../Makefile_comp_tests.target +INCLUDE += -I/opt/rocm/hip/include +INCLUDE += -I$(PAPI_ROCM_ROOT)/hsa/include/hsa +INCLUDE += -I$(PAPI_ROCM_ROOT)/rocprofiler/include +INCLUDE += -I$(PAPI_ROCM_ROOT)/include +LDFLAGS = -ldl -g + +linux-rocm.o: components/rocm/linux-rocm.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/rocm/linux-rocm.c -o linux-rocm.o + + +%.o:%.c + @echo "INCLUDE=" $(INCLUDE) + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $< + +%.o:%.cpp + @echo "INCLUDE=" $(INCLUDE) + @echo "CFLAGS=" $(CFLAGS) + g++ $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $< + +TESTS = + +rocm_tests: $(TESTS) + +rocm_command_line: rocm_command_line.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o rocm_command_line rocm_command_line.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +rocm_standalone_report_all: rocm_standalone_report_all.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o rocm_standalone_report_all rocm_standalone_report_all.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +clean: + rm -f $(TESTS) *.o diff -Nru papi-5.7.0+dfsg/src/components/rocm/tests/rocm_all.cpp papi-6.0.0~dfsg/src/components/rocm/tests/rocm_all.cpp --- papi-5.7.0+dfsg/src/components/rocm/tests/rocm_all.cpp 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/rocm/tests/rocm_all.cpp 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,519 @@ +//----------------------------------------------------------------------------- +// This program must be compiled using a special makefile: +// make -f ROCM_Makefile rocm_all.out +//----------------------------------------------------------------------------- +#define __HIP_PLATFORM_HCC__ + +#include +#include +#include +#include "papi.h" +#include + +#define CHECK(cmd) \ +{\ + hipError_t error = cmd;\ + if (error != hipSuccess) { \ + fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \ + exit(EXIT_FAILURE);\ + }\ +} + +// THIS MACRO EXITS if the papi call does not return PAPI_OK. Do not use for routines that +// return anything else; e.g. PAPI_num_components, PAPI_get_component_info, PAPI_library_init. +#define CALL_PAPI_OK(papi_routine) \ + do { \ + int _papiret = papi_routine; \ + if (_papiret != PAPI_OK) { \ + fprintf(stderr, "%s:%d macro: PAPI Error: function " #papi_routine " failed with ret=%d [%s].\n", \ + __FILE__, __LINE__, _papiret, PAPI_strerror(_papiret)); \ + exit(-1); \ + } \ + } while (0); + + +#define MEMORY_ALLOCATION_CALL(var) \ + do { \ + if (var == NULL) { \ + fprintf(stderr, "%s:%d: Error: Memory Allocation Failed \n",\ + __FILE__, __LINE__); \ + exit(-1); \ + } \ + } while (0); + + +#define MAX_DEVICES (32) +#define BLOCK_SIZE (1024) +#define GRID_SIZE (512) +#define BUF_SIZE (32 * 1024) +#define ALIGN_SIZE (8) +#define SUCCESS (0) +#define NUM_METRIC (18) +#define NUM_EVENTS (2) +#define MAX_SIZE (64*1024*1024) // 64 MB + +typedef union +{ + long long ll; + unsigned long long ull; + double d; + void *vp; + unsigned char ch[8]; +} convert_64_t; + +typedef struct { + char name[128]; + long long value; +} eventStore_t; + +std::string EXCLUDE[] = { // List of events to specifically exclude, when rocprofiler_open is failing. + "TA_TA_BUSY", + "TA_FLAT_READ_WAVEFRONTS", + "TA_FLAT_WRITE_WAVEFRONTS", + "TCC_HIT", + "TCC_MISS", + "TCC_EA_WRREQ", + "TCC_EA_WRREQ_64B", + "TCC_EA_WRREQ_STALL", + "TCC_EA_RDREQ", + "TCC_EA_RDREQ_32B", + "TCP_TA_DATA_STALL_CYCLES", + "", // End of Table. MOVE TO TOP to disable this list. +}; + +int eventsFoundCount = 0; // occupants of the array. +int eventsFoundMax; // Size of the array. +int eventsFoundAdd = 32; // Blocksize for increasing the array. +int deviceCount=0; // Total devices seen. +int deviceEvents[32] = {0}; // Number of events for each device=??. +eventStore_t *eventsFound = NULL; // The array. + +//----------------------------------------------------------------------------- +// HIP routine: Square each element in the array A and write to array C. +//----------------------------------------------------------------------------- +template +__global__ void +vector_square(T *C_d, T *A_d, size_t N) +{ + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x ; + + for (size_t i=offset; i= eventsFoundMax) { // bump count, if too much, make room. + eventsFoundMax += eventsFoundAdd; // Add. + eventsFound = (eventStore_t*) realloc(eventsFound, eventsFoundMax*sizeof(eventStore_t)); // Make new room. + memset(eventsFound+(eventsFoundMax-eventsFoundAdd), 0, eventsFoundAdd*sizeof(eventStore_t)); // zero it. + } +} // end routine. + +//----------------------------------------------------------------------------- +// conduct a test using HIP. Derived from AMD sample code 'square.cpp'. +// coming in, EventSet is already populated, we just run the test and read. +// Note values must point at an array large enough to store the events in +// Eventset. +//----------------------------------------------------------------------------- +void conductTest(int EventSet, int device, long long *values, int numValues) { + float *A_d, *C_d; + float *A_h, *C_h; + size_t N = 1000000; + size_t Nbytes = N * sizeof(float); + int i, ret, thisDev, verbose=0; + + ret = PAPI_start( EventSet ); + if (ret != PAPI_OK ) { + fprintf(stderr,"Error! PAPI_start\n"); + exit( ret ); + } + + CHECK(hipSetDevice(device)); // Set device requested. + CHECK(hipGetDevice(&thisDev)); // Double check. + hipDeviceProp_t props; + CHECK(hipGetDeviceProperties(&props, thisDev)); // Get properties (for name). + if (verbose) printf ("info: Requested Device=%i, running on device %i=%s\n", device, thisDev, props.name); + + if (verbose) printf ("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0); + A_h = (float*)malloc(Nbytes); // standard malloc for host. + CHECK(A_h == NULL ? hipErrorMemoryAllocation : hipSuccess ); + C_h = (float*)malloc(Nbytes); // standard malloc for host. + CHECK(C_h == NULL ? hipErrorMemoryAllocation : hipSuccess ); + + // Fill with Phi + i + for (size_t i=0; iname) == 0) cid=i; // If we found our match, record it. + } // end search components. + + if (cid < 0) { // if no PCP component found, + fprintf(stderr, "Failed to find rocm component among %i " + "reported components.\n", k); + FreeGlobals(); + PAPI_shutdown(); + exit(-1); + } + + printf("Found ROCM Component at id %d\n", cid); + + eventCount = 0; + int eventsRead=0; + + // Begin enumeration of all events. + + printf("Events with numeric values were read; if they are zero, they may not \n" + "be operational, or the exercises performed by this code do not affect \n" + "them. We report all 'rocm' events presented by the rocm component. \n" + "\n" + "---------------------------Event Name---------------------------:---Value---\n"); + + PAPI_event_info_t info; // To get event enumeration info. + m=PAPI_NATIVE_MASK; // Get the PAPI NATIVE mask. + CALL_PAPI_OK(PAPI_enum_cmp_event(&m,PAPI_ENUM_FIRST,cid)); // Begin enumeration of ALL papi counters. + do { // Enumerate all events. + memset(&info,0,sizeof(PAPI_event_info_t)); // Clear event info. + k=m; // Make a copy of current code. + + // enumerate sub-events, with masks. For this test, we do not + // have any! But we do this to test our enumeration works as + // expected. First time through is guaranteed, of course. + + do { // enumerate masked events. + CALL_PAPI_OK(PAPI_get_event_info(k,&info)); // get name of k symbol. + char *devstr = strstr(info.symbol, "device="); // look for device enumerator. + if (devstr == NULL) continue; // Skip if no device present. + device=atoi(devstr+7); // Get the device id, for info. + if (device < 0 || device >= 32) continue; // skip any not in range. + + // Check if this symbol is in the exclusion table. + + i=0; // index into include table. + while (EXCLUDE[i].size() > 0) { + if (strstr(info.symbol, EXCLUDE[i].c_str()) != NULL) break; // Get out if we match an exclusion. + i++; // Not this one, check next string in table. + } + + if (EXCLUDE[i].size() != 0) continue; // Matched an exclusion, skip it. + + CALL_PAPI_OK(PAPI_create_eventset(&EventSet)); + CALL_PAPI_OK(PAPI_assign_eventset_component(EventSet, cid)); + + ret = PAPI_add_named_event(EventSet, info.symbol); // Don't want to fail program if name not found... + if(ret == PAPI_OK) { + eventCount++; // Bump number of events we could test. + if (deviceEvents[device] == 0) deviceCount++; // Increase count if first for this device. + deviceEvents[device]++; // Add to count of events on this device. + } else { + fprintf(stderr, "FAILED to add event '%s', ret=%i='%s'.\n", info.symbol, ret, PAPI_strerror(ret)); + CALL_PAPI_OK(PAPI_cleanup_eventset(EventSet)); // Delete all events in set. + CALL_PAPI_OK(PAPI_destroy_eventset(&EventSet)); // destroy the event set. + continue; + } + + long long value[1]={-1}; // The only value we read. + + // Prep stuff. + + conductTest(EventSet, device, value, 1); // Conduct a test, on device given. + addEventsFound(info.symbol, value[0]); // Add to events we were able to read. + + CALL_PAPI_OK(PAPI_cleanup_eventset(EventSet)); // Delete all events in set. + CALL_PAPI_OK(PAPI_destroy_eventset(&EventSet)); // destroy the event set. + + // report each event counted. + if (value[0] >= 0) { // If not still -1, + eventsRead++; // .. count and report. + if (value[0] == 0) { + printf("%-64s: %lli (not exercised by current test code.)\n", info.symbol, value[0]); + } else { + printf("%-64s: %lli\n", info.symbol, value[0]); + } + } else { + printf("%-64s: Failed to read.\n", info.symbol); + } + } while(PAPI_enum_cmp_event(&k,PAPI_NTV_ENUM_UMASKS,cid)==PAPI_OK); // Get next umask entry (bits different) (should return PAPI_NOEVNT). + } while(PAPI_enum_cmp_event(&m,PAPI_ENUM_EVENTS,cid)==PAPI_OK); // Get next event code. + + if (eventCount < 1) { // If we failed on all of them, + fprintf(stderr, "Unable to add any ROCM events; they are not present in the component.\n"); + fprintf(stderr, "Unable to proceed with this test.\n"); + FreeGlobals(); + PAPI_shutdown(); // Returns no value. + exit(-1); // exit no matter what. + } + + if (eventsRead < 1) { // If failed to read any, + fprintf(stderr, "\nFailed to read any ROCM events.\n"); // report a failure. + fprintf(stderr, "Unable to proceed with pair testing.\n"); + FreeGlobals(); + PAPI_shutdown(); // Returns no value. + exit(-1); // exit no matter what. + } + + printf("\nTotal ROCM events identified: %i.\n\n", eventsFoundCount); + if (eventsFoundCount < 2) { // If failed to get counts on any, + printf("Insufficient events are exercised by the current test code to perform pair testing.\n"); // report a failure. + FreeGlobals(); + PAPI_shutdown(); // Returns no value. + exit(0); // exit no matter what. + } + + + for (i=0; i<32; i++) { + if (deviceEvents[i] == 0) continue; // skip if none found. + printf("Device %i has %i events. %i potential pairings per device.\n", i, deviceEvents[i], deviceEvents[i]*(deviceEvents[i]-1)/2); + } + + // Begin pair testing. We consider every possible pairing of events + // that, tested alone, returned a value greater than zero. + + int mainEvent, pairEvent, mainDevice, pairDevice; + long long readValues[2]; + int goodOnSame=0, failOnDiff=0, badSameCombo=0, pairProblems=0; // Some counters. + int type; // 0 succeed on same device, 1 = fail across devices. + for (type=0; type<2; type++) { + if (type == 0) { + printf("List of Pairings on SAME device:\n"); + printf("* means value changed by more than 10%% when paired (vs measured singly, above).\n"); + printf("^ means a pair was rejected as an invalid combo.\n"); + } else { + printf("List of Failed Pairings on DIFFERENT devices:\n"); + } + + for (mainEvent = 0; mainEvent 1.10) flag1='*'; // Flag as significantly different for main. + if (pairCheck < 0.90 || pairCheck > 1.10) flag2='*'; // Flag as significantly different for pair. + if (flag1 == '*' || flag2 == '*') { + pairProblems++; // Remember number of problems. + flag = '*'; // set global flag. + } + + printf("%c %64s + %-64s [", flag, eventsFound[mainEvent].name, eventsFound[pairEvent].name); + printf("%c%lli,", flag1, readValues[0]); + printf("%c%lli]\n", flag2, readValues[1]); + + CALL_PAPI_OK(PAPI_cleanup_eventset(EventSet)); // Delete all events in set. + CALL_PAPI_OK(PAPI_destroy_eventset(&EventSet)); // destroy the event set. + } + } // end loop on all events. + + if (type == 0) { // For good pairings on same devices, + if (goodOnSame == 0) { + printf("NO valid pairings of above events if both on the SAME device.\n"); + } else { + printf("%i valid pairings of above events if both on the SAME device.\n", goodOnSame); + } + + printf("%i unique pairings on SAME device were rejected as bad combinations.\n", badSameCombo); + + if (pairProblems > 0) { + printf("%i pairings resulted in a change of one or both event values > 10%%.\n", pairProblems); + } else { + printf("No significant change in event values read for any pairings.\n"); + } + } else { // Must be reporting bad pairings across devies. + if (failOnDiff == 0) printf("NO failed pairings of above events if each on a DIFFERENT device.\n"); + else printf("%i failed pairings of above events with each on a DIFFERENT device.\n", failOnDiff); + } + } // end loop on type. + + PAPI_shutdown(); // Returns no value. + return(0); // exit OK. +} // end MAIN. diff -Nru papi-5.7.0+dfsg/src/components/rocm/tests/rocm_command_line.c papi-6.0.0~dfsg/src/components/rocm/tests/rocm_command_line.c --- papi-5.7.0+dfsg/src/components/rocm/tests/rocm_command_line.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/rocm/tests/rocm_command_line.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,183 @@ +/* file rocm_command_line.c + * Nearly identical to "papi/src/utils/papi_command_line.c". Changes noted. + * This simply tries to add the events listed on the command line one at a time + * then starts and stops the counters and prints the results. +*/ + +/** + * @page papi_command_line + * @brief executes PAPI preset or native events from the command line. + * + * @section Synopsis + * papi_command_line < event > < event > ... + * + * @section Description + * papi_command_line is a PAPI utility program that adds named events from the + * command line to a PAPI EventSet and does some work with that EventSet. + * This serves as a handy way to see if events can be counted together, + * and if they give reasonable results for known work. + * + * @section Options + *
    + *
  • -u Display output values as unsigned integers + *
  • -x Display output values as hexadecimal + *
  • -h Display help information about this utility. + *
+ * + * @section Bugs + * There are no known bugs in this utility. + * If you find a bug, it should be reported to the + * PAPI Mailing List at . + */ + +#include +#include +#include +#include + +#include "papi.h" +#include "do_loops.h" + +static void +print_help( char **argv ) +{ + printf( "Usage: %s [options] [EVENTNAMEs]\n", argv[0] ); + printf( "Options:\n\n" ); + printf( "General command options:\n" ); + printf( "\t-u Display output values as unsigned integers\n" ); + printf( "\t-x Display output values as hexadecimal\n" ); + printf( "\t-h Print this help message\n" ); + printf( "\tEVENTNAMEs Specify one or more preset or native events\n" ); + printf( "\n" ); + printf( "This utility performs work while measuring the specified events.\n" ); + printf( "It can be useful for sanity checks on given events and sets of events.\n" ); +} + + +int +main( int argc, char **argv ) +{ + int retval; + int num_events; + long long *values; + char *success; + PAPI_event_info_t info; + int EventSet = PAPI_NULL; + int i, j, k, event, data_type = PAPI_DATATYPE_INT64; + int u_format = 0; + int hex_format = 0; + + printf( "\nThis utility lets you add events from the command line " + "interface to see if they work.\n\n" ); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if (retval != PAPI_VER_CURRENT ) { + fprintf(stderr,"Error! PAPI_library_init\n"); + exit(retval ); + } + + retval = PAPI_create_eventset( &EventSet ); + if (retval != PAPI_OK ) { + fprintf(stderr,"Error! PAPI_create_eventset\n"); + exit(retval ); + } + + values = + ( long long * ) malloc( sizeof ( long long ) * ( size_t ) argc ); + success = ( char * ) malloc( ( size_t ) argc ); + + if ( success == NULL || values == NULL ) { + fprintf(stderr,"Error allocating memory!\n"); + exit(1); + } + + for ( num_events = 0, i = 1; i < argc; i++ ) { + if ( !strcmp( argv[i], "-h" ) ) { + print_help( argv ); + exit( 1 ); + } else if ( !strcmp( argv[i], "-u" ) ) { + u_format = 1; + } else if ( !strcmp( argv[i], "-x" ) ) { + hex_format = 1; + } else { + if ( ( retval = PAPI_add_named_event( EventSet, argv[i] ) ) != PAPI_OK ) { + printf( "Failed adding: %s\nbecause: %s\n", argv[i], + PAPI_strerror(retval)); + } else { + success[num_events++] = i; + printf( "Successfully added: %s\n", argv[i] ); + } + } + } + + /* Automatically pass if no events, for run_tests.sh */ + if ( num_events == 0 ) { + printf("No events specified!\n"); + printf("Try running something like: %s PAPI_TOT_CYC\n\n", + argv[0]); + return 0; + } + + // ROCM skipped do_flops(), do_flush() in papi_command_line.c. + printf( "\n" ); + + retval = PAPI_start( EventSet ); + if (retval != PAPI_OK ) { + fprintf(stderr,"Error! PAPI_start\n"); + exit( retval ); + } + + // ROCM skipped do_flops(), do_misses() in papi_command_line.c. + + for (k = 0; k < 3; k++ ) { // ROCM change to loop, to read three times. + sleep(1); // .. sleep between reads to build up events. + + retval = PAPI_read( EventSet, values ); + if (retval != PAPI_OK ) { + fprintf(stderr,"Error! PAPI_read\n"); + exit( retval ); + } + printf( "\n----------------------------------\n" ); + + for ( j = 0; j < num_events; j++ ) { // Back to original papi_command_line... + i = success[j]; + if (! (u_format || hex_format) ) { + retval = PAPI_event_name_to_code( argv[i], &event ); + if (retval == PAPI_OK) { + retval = PAPI_get_event_info(event, &info); + if (retval == PAPI_OK) data_type = info.data_type; + else data_type = PAPI_DATATYPE_INT64; + } + switch (data_type) { + case PAPI_DATATYPE_UINT64: + printf( "%s : \t%llu(u)", argv[i], (unsigned long long)values[j] ); + break; + case PAPI_DATATYPE_FP64: + printf( "%s : \t%0.3f", argv[i], *((double *)(&values[j])) ); + break; + case PAPI_DATATYPE_BIT64: + printf( "%s : \t%#llX", argv[i], values[j] ); + break; + case PAPI_DATATYPE_INT64: + default: + printf( "%s : \t%lld", argv[i], values[j] ); + break; + } + if (retval == PAPI_OK) printf( " %s", info.units ); + printf( "\n" ); + } + if (u_format) printf( "%s : \t%llu(u)\n", argv[i], (unsigned long long)values[j] ); + if (hex_format) printf( "%s : \t%#llX\n", argv[i], values[j] ); + } + } // end ROCM added loop. + + retval = PAPI_stop( EventSet, values ); // ROCM added stop and test. + if (retval != PAPI_OK ) { + fprintf(stderr,"Error! PAPI_stop\n"); + exit( retval ); + } + + PAPI_shutdown(); // Shut it down. + return 0; + +} diff -Nru papi-5.7.0+dfsg/src/components/rocm/tests/ROCM_Makefile papi-6.0.0~dfsg/src/components/rocm/tests/ROCM_Makefile --- papi-5.7.0+dfsg/src/components/rocm/tests/ROCM_Makefile 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/rocm/tests/ROCM_Makefile 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,41 @@ +# Originally /opt/rocm/hip/samples/0_Intro/square/Makefile +# Modified for testing in a PAPI environment. + +include ../../Makefile_comp_tests.target +HIP_PATH?= $(wildcard /opt/rocm/hip) +ifeq (,$(HIP_PATH)) + HIP_PATH=../../.. +endif + +HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform) +HIPCC=$(HIP_PATH)/bin/hipcc + +INCLUDE+=-I${HOME}/papi/src/ + +ifeq (${HIP_PLATFORM}, nvcc) + SOURCES=square.cu +else + SOURCES=square.cpp +endif + +all: + +# Step +square.cpp: square.cu + $(HIP_PATH)/bin/hipify-perl square.cu > square.cpp + +square.out: square.cpp + $(HIPCC) $(CXXFLAGS) square.cpp -o $@ + +rocm_all.out: rocm_all.cpp + $(HIPCC) $(CXXFLAGS) $(INCLUDE) rocm_all.cpp -o $@ $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +checkpath: + echo HIP_PATH = $(HIP_PATH) + echo SOURCES = $(SOURCES) + echo HIP_PLATFORM = $(HIP_PLATFORM) + echo HIPCC = $(HIPCC) + echo INCLUDE = $(INCLUDE) + +clean: + rm -f *.o *.out diff -Nru papi-5.7.0+dfsg/src/components/rocm/tests/run_papi.sh papi-6.0.0~dfsg/src/components/rocm/tests/run_papi.sh --- papi-5.7.0+dfsg/src/components/rocm/tests/run_papi.sh 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/rocm/tests/run_papi.sh 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,26 @@ +#!/bin/bash + +# NOTE: These directory settings apply only to the ICL test system Caffeine. +# Users should modify these settings to match their own system. See the +# components/rocm/README file for exports of environment variables that can be +# used INSTEAD of changing the LD_LIBRARY_PATH. This example shows how to make +# PAPI run with JUST the LD_LIBRARY_PATH. The SMI_DIR is only necessary if the +# rocm_smi component is also active. + +BIN_DIR=/opt/rocm +PROF_DIR=/home/adanalis/usr/rocprofiler/lib +SMI_DIR=$HOME/rocm_smi_lib/build/lib +PAPIDIR=$HOME/papi + +# Note, these paths work on the ICL test system 'Caffeine', +export LD_LIBRARY_PATH=/opt/rocm_src/lib/hsa:$BIN_DIR/lib:$PAPIDIR/src:$PROF_DIR:$SMI_DIR:$LD_LIBRARY_PATH + +# The following are required by the AMD rocprofiler utility; not by PAPI. +export ROCP_METRICS=$PROF_DIR/metrics.xml +export ROCPROFILER_LOG=1 +export HSA_VEN_AMD_AQLPROFILE_LOG=1 +export AQLPROFILE_READ_API=1 + +$HOME/papi/src/utils/papi_component_avail +#$HOME/papi/src/utils/papi_native_avail +#./rocm_command_line rocm:::device:0:GRBM_COUNT rocm:::device:0:GRBM_GUI_ACTIVE diff -Nru papi-5.7.0+dfsg/src/components/rocm_smi/linux-rocm-smi.c papi-6.0.0~dfsg/src/components/rocm_smi/linux-rocm-smi.c --- papi-5.7.0+dfsg/src/components/rocm_smi/linux-rocm-smi.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/rocm_smi/linux-rocm-smi.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,3550 @@ +//----------------------------------------------------------------------------- +// @file linux-rocm-smi.c +// +// @ingroup rocm_components +// +// @brief This implements a PAPI component that enables PAPI-C to access +// hardware system management controls for AMD ROCM GPU devices through the +// rocm_smi library. +// +// The open source software license for PAPI conforms to the BSD License +// template. +//----------------------------------------------------------------------------- + +#include +#include +#include +#include +#include + +#include "rocm_smi.h" +#include "papi.h" +#include "papi_memory.h" +#include "papi_internal.h" +#include "papi_vector.h" + +// The following macros, if defined, will help with diagnosing problems with new devices. +// output will be to stderr during any PAPI_INIT, e.g. execute utils/papi_component_avail. +// #define REPORT_KNOWN_EVENTS_NOT_SUPPORTED_BY_DEVICE +// #define REPORT_DEVICE_FUNCTION_NOT_SUPPORTED_BY_THIS_SOFTWARE + +static char *RSMI_ERROR_STRINGS[]={ + "RSMI_STATUS_SUCCESS", + "RSMI_STATUS_INVALID_ARGS", + "RSMI_STATUS_NOT_SUPPORTED", + "RSMI_STATUS_FILE_ERROR", + "RSMI_STATUS_PERMISSION", + "RSMI_STATUS_OUT_OF_RESOURCES", + "RSMI_STATUS_INTERNAL_EXCEPTION", + "RSMI_STATUS_INPUT_OUT_OF_BOUNDS", + "RSMI_STATUS_INIT_ERROR", + "RSMI_STATUS_NOT_YET_IMPLEMENTED", + "RSMI_STATUS_NOT_FOUND", + "RSMI_STATUS_INSUFFICIENT_SIZE", + "RSMI_STATUS_INTERRUPT", + "RSMI_STATUS_UNEXPECTED_SIZE", + "RSMI_STATUS_NO_DATA", + "RSMI_STATUS_UNKNOWN_ERROR"}; + + +// Macros for error checking... each arg is only referenced/evaluated once +#define CHECK_PRINT_EVAL(checkcond, str, evalthis) \ + do { \ + int _cond = (checkcond); \ + if (_cond) { \ + fprintf(stderr, "%s:%i error: condition %s failed: %s.\n", \ + __FILE__, __LINE__, #checkcond, str); \ + evalthis; \ + } \ + } while (0) + +// This macro declares a function pointer. It used to make +// the function name a weak link, but we never use the name +// directly as something the linker must resolve, so weak +// link names are not necessary. +#define DECLARE_RSMI(funcname, funcsig) \ +/* rsmi_status_t __attribute__((weak)) funcname funcsig; */ \ + static rsmi_status_t(*funcname##Ptr) funcsig; + +// This macro gets the function pointer from the dynamic +// library, and sets the function pointer declared above. +#define DLSYM_SMI(name) \ + do { \ + name##Ptr = dlsym(dl1, #name); \ + if (dlerror()!=NULL) { \ + snprintf(_rocm_smi_vector.cmp_info.disabled_reason, \ + PAPI_MAX_STR_LEN, \ + "The function '%s' was not found in SMI library.", \ + #name); \ + fprintf(stderr, "%s\n", \ + _rocm_smi_vector.cmp_info.disabled_reason); \ + name##Ptr = NULL; \ + return(PAPI_ENOSUPP); \ + } \ + } while (0) + +// The following will call and check the return on an SMI function; +// note it appends 'Ptr' to the name for the caller. +#define RSMI(name, args, handleerror) \ + do { \ + if (name##Ptr == NULL) { \ + fprintf(stderr, "%s function pointer is NULL.\n", #name); \ + return(-1); \ + } \ + rsmi_status_t _status = (*name##Ptr)args; \ + if (_status != RSMI_STATUS_SUCCESS) { \ + if (printRSMIerr) { \ + fprintf(stderr, "%s:%i error: RSMI function %s failed " \ + "with error %d='%s'.\n", \ + __FILE__, __LINE__, #name, _status, \ + RSMI_ERROR_STR(_status)); \ + } \ + handleerror; \ + } \ + } while (0) + +//----------------------------------------------------------------------------- +// How it all works! +// +// INTRO to ROCM_SMI: Unlike other event libraries we use, the ROCM_SMI +// library does not have a way to parse a string-name event and return values. +// Instead, their library has individual routines that must be called, and +// they don't have a uniform argument list: Some take 2 args, some 3 or 4. +// +// ROCM_SMI does have an iterator that returns the text names of whatever +// functions it has that are valid; along with 'variant' and 'subvariant' +// codes that are valid. You can see this in the routine scanEvents(). We load +// all these into an array ScanEvents[], which we sort by name, variant, and +// subvariant. +// +// We have (in this file) seperate functions for each event that call the +// library function to return a value for that event; these are the er_XXX +// routines and ew_XXX routines (for "event read" and, when applicable, "event +// write"). +// +// In the function _rocm_smi_add_native_events(), we go through every event we +// know about; see if we can find it in the ScanEvents[] array, if we can +// create a new event for PAPI users in the array AllEvents[]. This will have +// an explicit name (different than the routine name), and the table entry +// contains a pointer to read and/or write routines, the variant and +// subvariant necessary, the space to read the value, etc. +// +// The structure following these comments is one element in AllEvents[]. +// +// On PAPI_read(), we search the AllEvents[] array, and for any active entries +// we call the reader routine. It can return one value or whole structures. +// Each read routine is specific to the event, it must extract from +// multi-valued returns its single value. But if it does return multiple +// values, then there is only ONE event (the first) that has the array to read +// into, and all the others will have 'baseIdx' set to the event. Note that +// each event still gets its own reader (to handle indexing). Our protocol is +// that if 'baseIdx != myIdx' the baseIdx reader is called; it will populate +// its value and mark itself read. Then others can call their reader to +// populate their value, from the array in the baseIdx. +// +// For efficiency, when we construct AllEvents[] we ensure all events with the +// same device:sensor:baseIdx are contiguous. +// +// Whenever we enable an event, we check subsequent events in the table to see +// if they have the same baseIdx, and enable them as well. +// +// Each reader populates the single 'value' it will return. At the end of a +// PAPI_read(), we must return these values in the order they requested them; +// but we have an array of AllEvents[] indices; so we just look them up and +// copy this value. +// +// Note 'device' and 'sensor' are signed; so we do not reset anything if they +// are less than zero. +// +// If you need it, add 'int cumulative' indicator here and set it during the +// event setup in _rocm_smi_add_native_events. Then add to _rocm_smi_start() +// code to read a zero value for any active events. You would need to add a +// 'uint64_t zero' field, also. But because different routines treat this as +// int or unsigned, it is a little tricky to set the zero. I think the reader +// routine would always subtract it from a read value, recasting as needed. +// Then to set a new zero, set ->zero=0x0, read, set ->zero = ->value. +//----------------------------------------------------------------------------- + +typedef struct { + int read; // 0 for not read yet, 1 for read. + char name[PAPI_MAX_STR_LEN]; + char desc[PAPI_2MAX_STR_LEN]; + int32_t variant; // Corresponding variant, to match that returned by iterator. + int32_t subvariant; // Corresponding subvariant, to match that returned by iterator. + int(*reader)(int myIdx); // event-specific read function; baseIdx=(-1) for call required; otherwise skip call, AllEvents[baseIdx] has the data recorded in vptr[]. + int(*writer)(int myIdx); // event-specific write function (may be null if unwriteable). + int32_t device; // Device idx for event; -1 for calls without a device argument. + uint32_t baseIdx; // In case multivalued read; where the master data structure is. + size_t vptrSize; // malloc for whatever vptr needs when multiple values returned. + void* vptr; // NULL or a structure or vector of values that were read. + uint64_t value; // single value to return; always set on read, or value to write. +} event_info_t; + +#define scanEventFuncNameLen 64 +typedef struct { + char funcname[scanEventFuncNameLen]; + int32_t device; // Note: -1 == END OF LIST marker. + int32_t variant; + int32_t subvariant; + int32_t used; // diagnostic: Marked if found by nextEvent(). +} scanEvent_info_t; + + +// Function prototypes +static int _rocm_smi_cleanup_eventset(hwd_control_state_t * ctrl); +papi_vector_t _rocm_smi_vector; // Declare in advance, so it is present for error codes. + +//=================================== GLOBALS ================================== +// +// ****** CHANGE PROTOTYPES TO DECLARE ROCM LIBRARY SYMBOLS AS WEAK ********** +// This is done so that a version of PAPI built with the rocm component can * +// be installed on a system which does not have the rocm libraries installed. * +// * +// If this is done without these prototypes, then all papi services on the * +// system without the rocm libraries installed will fail. The PAPI libraries * +// contain references to the rocm libraries which are not installed. The * +// load of PAPI commands fails because the rocm library references can not be * +// resolved. * +// * +// This also defines pointers to the rocm library functions that we call. * +// These function pointers will be resolved with dlopen/dlsym calls at * +// component initialization time. The component then calls the rocm library * +// functions through these function pointers. * +// ***************************************************************************** +void (*_dl_non_dynamic_init) (void) __attribute__ ((weak)); + +// RSMI API declaration, in utility order. All return rsmi_status_t. The ones +// simple to implement that just read or write a value are first. We group them +// for creating various event creation routines, depending on whether multiple +// events must be created or special events must be created. These are copied +// in the same order to produce the corresponding function pointers and then +// event names. + +DECLARE_RSMI(rsmi_num_monitor_devices, (uint32_t *num_devices)); +DECLARE_RSMI(rsmi_dev_supported_func_iterator_open, (uint32_t dv_ind, rsmi_func_id_iter_handle_t *handle)); +DECLARE_RSMI(rsmi_dev_supported_variant_iterator_open, (rsmi_func_id_iter_handle_t obj_h,rsmi_func_id_iter_handle_t *var_iter)); +DECLARE_RSMI(rsmi_dev_supported_variant_iterator_open, (rsmi_func_id_iter_handle_t obj_h,rsmi_func_id_iter_handle_t *var_iter)); +DECLARE_RSMI(rsmi_dev_supported_func_iterator_close, (rsmi_func_id_iter_handle_t *handle)); +DECLARE_RSMI(rsmi_func_iter_value_get, (rsmi_func_id_iter_handle_t handle,rsmi_func_id_value_t *value)); +DECLARE_RSMI(rsmi_func_iter_next, (rsmi_func_id_iter_handle_t handle)); + +// All by device id. +DECLARE_RSMI(rsmi_dev_id_get, (uint32_t dv_ind, uint16_t *id)); +DECLARE_RSMI(rsmi_dev_subsystem_vendor_id_get, (uint32_t dv_ind, uint16_t *id)); +DECLARE_RSMI(rsmi_dev_vendor_id_get, (uint32_t dv_ind, uint16_t *id)); +DECLARE_RSMI(rsmi_dev_subsystem_id_get, (uint32_t dv_ind, uint16_t *id)); + +DECLARE_RSMI(rsmi_dev_drm_render_minor_get, (uint32_t dv_ind, uint32_t *minor)); +DECLARE_RSMI(rsmi_dev_overdrive_level_get, (uint32_t dv_ind, uint32_t *od)); +DECLARE_RSMI(rsmi_dev_overdrive_level_set, (int32_t dv_ind, uint32_t od)); +DECLARE_RSMI(rsmi_dev_memory_busy_percent_get, (uint32_t dv_ind, uint32_t *busy_percent)); +DECLARE_RSMI(rsmi_dev_memory_reserved_pages_get, (uint32_t dv_ind, uint32_t *num_pages, rsmi_retired_page_record_t *records)); + +// rsmi_dev_perf_level_t is just an enum; this can be returned as uint32. +DECLARE_RSMI(rsmi_dev_perf_level_get, (uint32_t dv_ind, rsmi_dev_perf_level_t *perf)); +DECLARE_RSMI(rsmi_dev_perf_level_set, ( int32_t dv_ind, rsmi_dev_perf_level_t perf_lvl)); + +// Iterate by memory type; an enum: +// RSMI_MEM_TYPE_VRAM; RSMI_MEM_TYPE_VIS_VRAM; RSMI_MEM_TYPE_GTT. (VIS=visible). +DECLARE_RSMI(rsmi_dev_memory_total_get, (uint32_t dv_ind, rsmi_memory_type_t mem_type, uint64_t *total)); +DECLARE_RSMI(rsmi_dev_memory_usage_get, (uint32_t dv_ind, rsmi_memory_type_t mem_type, uint64_t *used)); + +DECLARE_RSMI(rsmi_dev_busy_percent_get, (uint32_t dv_ind, uint32_t *busy_percent)); +DECLARE_RSMI(rsmi_dev_firmware_version_get, (uint32_t dv_ind, rsmi_fw_block_t block, uint64_t *fw_version)); +DECLARE_RSMI(rsmi_dev_ecc_count_get, (uint32_t dv_ind, rsmi_gpu_block_t block, rsmi_error_count_t *ec)); +DECLARE_RSMI(rsmi_dev_ecc_enabled_get, (uint32_t dv_ind, uint64_t *enabled_blocks)); +DECLARE_RSMI(rsmi_dev_ecc_status_get, (uint32_t dv_ind, rsmi_gpu_block_t block, rsmi_ras_err_state_t *state)); + +// clock frequency tables. +DECLARE_RSMI(rsmi_dev_gpu_clk_freq_get, (uint32_t dv_ind, rsmi_clk_type_t type, rsmi_frequencies_t *frequencies)); + +// Need sensor-id (0...n) in name. All zero for starters. +DECLARE_RSMI(rsmi_dev_fan_reset, (uint32_t dv_ind, uint32_t sensor_ind)); +DECLARE_RSMI(rsmi_dev_fan_rpms_get, (uint32_t dv_ind, uint32_t sensor_ind, int64_t *speed)); +DECLARE_RSMI(rsmi_dev_fan_speed_get, (uint32_t dv_ind, uint32_t sensor_ind, int64_t *speed)); +DECLARE_RSMI(rsmi_dev_fan_speed_max_get, (uint32_t dv_ind, uint32_t sensor_ind, uint64_t *max_speed)); +DECLARE_RSMI(rsmi_dev_fan_speed_set, (uint32_t dv_ind, uint32_t sensor_ind, uint64_t speed)); +DECLARE_RSMI(rsmi_dev_power_ave_get, (uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power)); +DECLARE_RSMI(rsmi_dev_power_cap_get, (uint32_t dv_ind, uint32_t sensor_ind, uint64_t *cap)); +DECLARE_RSMI(rsmi_dev_power_profile_presets_get, (uint32_t dv_ind, uint32_t sensor_ind, rsmi_power_profile_status_t *status)); +DECLARE_RSMI(rsmi_dev_power_profile_set, (uint32_t dv_ind, uint32_t reserved, rsmi_power_profile_preset_masks_t profile_mask)); + +DECLARE_RSMI(rsmi_dev_pci_id_get, (uint32_t dv_ind, uint64_t *bdfid)); + +// rsmi_temperature_metric_t is an enum with 14 settings; each would need to be an event. +DECLARE_RSMI(rsmi_dev_temp_metric_get, (uint32_t dv_ind, uint32_t sensor_ind, rsmi_temperature_metric_t metric, int64_t *temperature)); + +// rsmi_version_t contains uint32 for major; minor; patch. but could return 16-bit packed version as uint64_t. +DECLARE_RSMI(rsmi_version_get, (rsmi_version_t *version)); + +// rsmi_range_t contains two uint64's; lower_bound; upper_bound. +// This function has a prototype in the header file, but does not exist in the library. (circa Apr 5 2019). +// DECLARE_RSMI(rsmi_dev_od_freq_range_set, (uint32_t dv_ind, rsmi_clk_type_t clk, rsmi_range_t *range)); + +// Needs to be two events; sent and received. +DECLARE_RSMI(rsmi_dev_pci_throughput_get, (uint32_t dv_ind, uint64_t *sent, uint64_t *received, uint64_t *max_pkt_sz)); +DECLARE_RSMI(rsmi_dev_pci_replay_counter_get, (uint32_t dv_ind, uint64_t *counter)); + +// Needs to be two events; max and min. +DECLARE_RSMI(rsmi_dev_power_cap_range_get, (uint32_t dv_ind, uint32_t sensor_ind, uint64_t *max, uint64_t *min)); +DECLARE_RSMI(rsmi_dev_power_cap_set, (uint32_t dv_ind, uint32_t sensor_ind, uint64_t cap)); + +// rsmi_frequencies_t contains uint32 num_supported; uint32 current; uint64[] frequency. +DECLARE_RSMI(rsmi_dev_gpu_clk_freq_get, (uint32_t dv_ind, rsmi_clk_type_t clk_type, rsmi_frequencies_t *f)); +DECLARE_RSMI(rsmi_dev_gpu_clk_freq_set, (uint32_t dv_ind, rsmi_clk_type_t clk_type, uint64_t freq_bitmask)); + +// rsmi_freq_volt_region_t contains two rsmi_range_t; each has two uint64's lower_bound; upper_bound. +// Not implemented; data does not seem like useful performance data for PAPI users. +DECLARE_RSMI(rsmi_dev_od_volt_curve_regions_get, (uint32_t dv_ind, uint32_t *num_regions, rsmi_freq_volt_region_t *buffer)); + +// rsmi_od_volt_freq_data_t Complex structure with 4 rsmi_range_t and a 2D array of voltage curve points. +// Not implemented; data does not seem like useful performance data for PAPI users. +DECLARE_RSMI(rsmi_dev_od_volt_info_get, (uint32_t dv_ind, rsmi_od_volt_freq_data_t *odv)); + +// rsmi_pcie_bandwidth_t is a structure containing two arrays; for transfer_rates and lanes. +DECLARE_RSMI(rsmi_dev_pci_bandwidth_get, (uint32_t dv_ind, rsmi_pcie_bandwidth_t *bandwidth)); +DECLARE_RSMI(rsmi_dev_pci_bandwidth_set, (uint32_t dv_ind, uint64_t bw_bitmask)); +DECLARE_RSMI(rsmi_dev_unique_id_get, (uint32_t dv_ind, uint64_t *unique_id)); + +// The following functions return strings. +DECLARE_RSMI(rsmi_dev_brand_get, (uint32_t dv_ind, char *brand, uint32_t len)); +DECLARE_RSMI(rsmi_dev_name_get, (uint32_t dv_ind, char *name, size_t len)); +DECLARE_RSMI(rsmi_dev_serial_number_get, (uint32_t dv_ind, char *serial_number, uint32_t len)); +DECLARE_RSMI(rsmi_dev_subsystem_name_get, (uint32_t dv_ind, char *name, size_t len)); +DECLARE_RSMI(rsmi_dev_vbios_version_get, (uint32_t dv_ind, char *vbios, uint32_t len)); +DECLARE_RSMI(rsmi_dev_vendor_name_get, (uint32_t id, char *name, size_t len)); +DECLARE_RSMI(rsmi_version_str_get, (rsmi_sw_component_t id, char *name, size_t len)); + +// Non-Events. +DECLARE_RSMI(rsmi_init, (uint64_t init_flags)); +DECLARE_RSMI(rsmi_shut_down, (void)); +DECLARE_RSMI(rsmi_status_string, (rsmi_status_t status, const char **status_string)); + +// Globals. +static void *dl1 = NULL; +static char rocm_smi_main[]=PAPI_ROCM_SMI_MAIN; +static int TotalScanEvents = 0; // From the iterator scan, number we have. +static int SizeScanEvents = 0; // Size of dynamically growing array. +static int TotalEvents = 0; // Total Events we added. +static int ActiveEvents = 0; // Active events (number added by update_control_state). +static int SizeAllEvents = 0; // Size of the array. +static uint32_t TotalDevices = 0; // Number of devices we found. +static uint32_t DeviceCards[64]; // The cards we found them on; up to 64 of them. Currently populated but unused. +static event_info_t *AllEvents = NULL; // All events in the system. +static scanEvent_info_t *ScanEvents = NULL; // All scanned events in the system. +static int *CurrentIdx = NULL; // indices of events added by PAPI_add(), in order. +static long long *CurrentValue = NULL; // Value of events, in order, to return to user on PAPI_read(). +static int printRSMIerr = 0; // Suppresses RSMI errors during validation. + +static rsmi_frequencies_t *FreqTable = NULL; // For rsmi_dev_gpu_clk_freq_get (per device). +#define freqTablePerDevice (RSMI_CLK_TYPE_MEM+1) /* The only ones we know about */ + +static rsmi_pcie_bandwidth_t *PCITable = NULL; // For rsmi_dev_pci_bandwidth_get (no variants, just one per device). + +//**************************************************************************** +//******* BEGIN FUNCTIONS USED INTERNALLY SPECIFIC TO THIS COMPONENT ******** +//**************************************************************************** + +static char *RSMI_ERROR_STR(int err) +{ + int modErr=err; + if (modErr < 0 || modErr>11) modErr=12; + return(RSMI_ERROR_STRINGS[modErr]); +} // END ROUTINE. + +//---------------------------------------------------------------------------- +// Ensures there is room in all Events for one more entry. +// Note we always zero added space as the default if any elements are not set. +//---------------------------------------------------------------------------- +static void MakeRoomAllEvents(void) +{ + if (TotalEvents < SizeAllEvents) return; // One more will fit. + if (AllEvents == NULL) { // Never alloced; + SizeAllEvents = 16; // Begin with 16 entries, + AllEvents = calloc(SizeAllEvents, sizeof(event_info_t)); + return; + } + + // Must add 16 table entries. + SizeAllEvents += 16; // Add 16 entries. + AllEvents = realloc(AllEvents, SizeAllEvents*sizeof(event_info_t)); // make more room. + memset(&AllEvents[SizeAllEvents-16], 0, 16*sizeof(event_info_t)); // clear the added room. +} // END ROUTINE. + + +//---------------------------------------------------------------------------- +// Ensures there is room in scanEvents for one more entry. +// Note we always zero added space as the default if any elements are not set. +//---------------------------------------------------------------------------- +static void MakeRoomScanEvents(void) +{ + if (TotalScanEvents < SizeScanEvents) return; // One more will fit. + if (ScanEvents == NULL) { // Never alloced; + SizeScanEvents = 16; // Begin with 16 entries, + ScanEvents = calloc(SizeScanEvents, sizeof(scanEvent_info_t)); + return; + } + + // Must add 16 table entries. + SizeScanEvents += 16; // Add 16 entries. + ScanEvents = realloc(ScanEvents, SizeScanEvents*sizeof(scanEvent_info_t)); // make more room. + memset(&ScanEvents[SizeScanEvents-16], 0, 16*sizeof(scanEvent_info_t)); // clear the added room. +} // END ROUTINE. + + +//---------------------------------------------------------------------------- +// addScanEvent: Called from rocm_iterator, adds to list in ScanEvents. +//---------------------------------------------------------------------------- +void addScanEvent(const char* routine, int32_t device, uint64_t variant, uint64_t subvariant) +{ + MakeRoomScanEvents(); // Make room if needed. + strncpy(ScanEvents[TotalScanEvents].funcname, routine, scanEventFuncNameLen); // Copy name. + ScanEvents[TotalScanEvents].device=device; // Device ID. + ScanEvents[TotalScanEvents].variant=variant; // variant is typically enum, may be a type. + ScanEvents[TotalScanEvents].subvariant=subvariant; // subvariant is typically a sensor-ID. + TotalScanEvents++; // Count this one. +} // END routine. + + +static int sortScanEvents(const void *p1, const void *p2) +{ + scanEvent_info_t* e1 = (scanEvent_info_t*) p1; + scanEvent_info_t* e2 = (scanEvent_info_t*) p2; + + if (e1->device < e2->device) return(-1); + if (e1->device > e2->device) return( 1); + + // Same device. + int c=strcmp(e1->funcname, e2->funcname); + if (c != 0) return(c); + + // Same function name. + if (e1->variant < e2->variant) return(-1); + if (e1->variant > e2->variant) return( 1); + + // Same variant. + if (e1->subvariant < e2->subvariant) return(-1); + if (e1->subvariant > e2->subvariant) return( 1); + return(0); +} // END routine. + + +//------------------------------------------------------------------------- +// We use the ROCM iterator to list all the available functions on each +// device. +// This code is derived from the C++ example code in the rsmi manual, Ch5. +//------------------------------------------------------------------------- +static void scanEvents(void) { + rsmi_func_id_iter_handle_t iter_handle, var_iter, sub_var_iter; + rsmi_func_id_value_t v_name, v_enum, v_sensor; + rsmi_status_t err; + unsigned int ui; + for (ui=0; uidevice < 0) return(NULL); // Got to end of list. + if (strcmp(currentEvent->funcname, funcname) != 0) return(NULL); // Got to end of this funcname. + currentEvent->used = 1; // else found it, mark it used. + return(currentEvent); // Return with next one. +} // END nextEvent. + + +//---------------------------------------------------------------------------- +// Link the necessary ROCM libraries to use the rocm component. If any of +// them cannot be found, then the ROCM component will just be disabled. This +// is done at runtime so that a version of PAPI built with the ROCM component +// can be installed and used on systems which have the ROCM libraries +// installed and on systems where these libraries are not installed. +static int _rocm_smi_linkRocmLibraries(void) +{ + char path_name[1024]; + // Attempt to guess if we were statically linked to libc, if so, get out. + if(_dl_non_dynamic_init != NULL) { + strncpy(_rocm_smi_vector.cmp_info.disabled_reason, "The ROCM component does not support statically linking to libc.", PAPI_MAX_STR_LEN); + return PAPI_ENOSUPP; + } + + // collect any defined environment variables, or "NULL" if not present. + char *rocm_root = getenv("PAPI_ROCM_ROOT"); + dl1 = NULL; // Ensure reset to NULL. + + // Step 1: Process override if given. + if (strlen(rocm_smi_main) > 0) { // If override given, it has to work. + dl1 = dlopen(rocm_smi_main, RTLD_NOW | RTLD_GLOBAL); // Try to open that path. + if (dl1 == NULL) { + snprintf(_rocm_smi_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "PAPI_ROCM_SMI_MAIN override '%s' given in Rules.rocm_smi not found.", rocm_smi_main); + return(PAPI_ENOSUPP); // Override given but not found. + } + } + + // Step 2: Try system paths, will work with Spack, LD_LIBRARY_PATH, default paths. + if (dl1 == NULL) { // No override, + dl1 = dlopen("librocm_smi64.so", RTLD_NOW | RTLD_GLOBAL); // Try system paths. + } + + // Step 3: Try the explicit install default. + if (dl1 == NULL && rocm_root != NULL) { // if root given, try it. + snprintf(path_name, 1024, "%s/rocm_smi/lib/librocm_smi64.so", rocm_root); // PAPI Root check. + dl1 = dlopen(path_name, RTLD_NOW | RTLD_GLOBAL); // Try to open that path. + } + + // Check for failure. + if (dl1 == NULL) { + snprintf(_rocm_smi_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "librocm_smi64.so not found."); + return(PAPI_ENOSUPP); + } + + // We have a dl1. (librocm_smi64.so). + +// SMI Library routines. + DLSYM_SMI(rsmi_num_monitor_devices); + DLSYM_SMI(rsmi_dev_supported_func_iterator_open); + DLSYM_SMI(rsmi_dev_supported_variant_iterator_open); + DLSYM_SMI(rsmi_dev_supported_variant_iterator_open); + DLSYM_SMI(rsmi_dev_supported_func_iterator_close); + DLSYM_SMI(rsmi_func_iter_value_get); + DLSYM_SMI(rsmi_func_iter_next); + +// All by device id. + DLSYM_SMI(rsmi_dev_id_get); + DLSYM_SMI(rsmi_dev_unique_id_get); + DLSYM_SMI(rsmi_dev_subsystem_vendor_id_get); + DLSYM_SMI(rsmi_dev_vendor_id_get); + DLSYM_SMI(rsmi_dev_subsystem_id_get); + DLSYM_SMI(rsmi_dev_drm_render_minor_get); + DLSYM_SMI(rsmi_dev_overdrive_level_get); + DLSYM_SMI(rsmi_dev_overdrive_level_set); + DLSYM_SMI(rsmi_dev_pci_id_get); + DLSYM_SMI(rsmi_dev_memory_busy_percent_get); + + // Not implemented; data does not seem like useful performance data for PAPI users. + DLSYM_SMI(rsmi_dev_memory_reserved_pages_get); // retrieves an array. + + + +// rsmi_dev_perf_level_t is just an enum; this can be returned as uint32. + DLSYM_SMI(rsmi_dev_perf_level_get); + DLSYM_SMI(rsmi_dev_perf_level_set); + DLSYM_SMI(rsmi_dev_gpu_clk_freq_get); + +// Iterate by memory type; an enum: +// RSMI_MEM_TYPE_VRAM; RSMI_MEM_TYPE_VIS_VRAM; RSMI_MEM_TYPE_GTT. (VIS=visible). + DLSYM_SMI(rsmi_dev_memory_total_get); + DLSYM_SMI(rsmi_dev_memory_usage_get); + DLSYM_SMI(rsmi_dev_busy_percent_get); + DLSYM_SMI(rsmi_dev_firmware_version_get); + +// Iterate by GPU_BLOCK enum. + DLSYM_SMI(rsmi_dev_ecc_count_get); + DLSYM_SMI(rsmi_dev_ecc_enabled_get); + DLSYM_SMI(rsmi_dev_ecc_status_get); + +// Need sensor-id (0...n) in name. All zero for starters. + DLSYM_SMI(rsmi_dev_fan_reset); + DLSYM_SMI(rsmi_dev_fan_rpms_get); + DLSYM_SMI(rsmi_dev_fan_speed_get); + DLSYM_SMI(rsmi_dev_fan_speed_max_get); + DLSYM_SMI(rsmi_dev_fan_speed_set); + DLSYM_SMI(rsmi_dev_power_ave_get); + DLSYM_SMI(rsmi_dev_power_cap_get); + DLSYM_SMI(rsmi_dev_power_profile_presets_get); + DLSYM_SMI(rsmi_dev_power_profile_set); + +// rsmi_temperature_metric_t is an enum with 14 settings; each would need to be an event. + DLSYM_SMI(rsmi_dev_temp_metric_get); + +// rsmi_version_t contains uint32 for major; minor; patch. but could return 16-bit packed version as uint64_t. + DLSYM_SMI(rsmi_version_get); + +// rsmi_range_t contains two uint64's; lower_bound; upper_bound. +// This function has a prototype in the header file, but does not exist in the library. (circa Apr 5 2019). +// DLSYM_SMI(rsmi_dev_od_freq_range_set); + +// Needs to be two events; sent and received. + DLSYM_SMI(rsmi_dev_pci_throughput_get); + + DLSYM_SMI(rsmi_dev_pci_replay_counter_get); + +// Needs to be two events; max and min. + DLSYM_SMI(rsmi_dev_power_cap_range_get); + DLSYM_SMI(rsmi_dev_power_cap_set); + +// rsmi_frequencies_t contains uint32 num_supported; uint32 current; uint64[] frequency. + DLSYM_SMI(rsmi_dev_gpu_clk_freq_get); + DLSYM_SMI(rsmi_dev_gpu_clk_freq_set); + +// rsmi_freq_volt_region_t contains two rsmi_range_t; each has two uint64's lower_bound; upper_bound. + DLSYM_SMI(rsmi_dev_od_volt_curve_regions_get); + +// rsmi_od_volt_freq_data_t Complex structure with 4 rsmi_range_t and a 2D array of voltage curve points. + DLSYM_SMI(rsmi_dev_od_volt_info_get); + +// rsmi_pcie_bandwidth_t is a structure containing two arrays; for transfer_rates and lanes. + DLSYM_SMI(rsmi_dev_pci_bandwidth_get); + DLSYM_SMI(rsmi_dev_pci_bandwidth_set); + +// These functions return strings. + DLSYM_SMI(rsmi_dev_brand_get); + DLSYM_SMI(rsmi_dev_name_get); + DLSYM_SMI(rsmi_dev_serial_number_get); + DLSYM_SMI(rsmi_dev_subsystem_name_get); + DLSYM_SMI(rsmi_dev_vbios_version_get); + DLSYM_SMI(rsmi_dev_vendor_name_get); + DLSYM_SMI(rsmi_version_str_get); + +// Non-Events. + DLSYM_SMI(rsmi_init); + DLSYM_SMI(rsmi_shut_down); + DLSYM_SMI(rsmi_status_string); + + return (PAPI_OK); +} + +//----------------------------------------------------------------------------- +// Find devices: We search the file system for +// /sys/class/drm/card?/device/vendor. These must be sequential by card#; if +// they can be opened and return a line, it will be 0xhhhh as a hex vendor ID. +// 0x1002 is the vendor ID for AMD. +// This constructs the global value TotalDevices, and fills in the DeviceCards +// array with card-ids. +//----------------------------------------------------------------------------- +static int _rocm_smi_find_devices(void) +{ + char cardname[64]="/sys/class/drm/card?/device/vendor"; // card filename. + uint32_t myVendor = 0x1002; // The AMD GPU vendor ID. + char line[7]; + size_t bytes; + int card; + long int devID; + + TotalDevices=0; // Reset, in case called more than once. + line[6]=0; // ensure null terminator. + + for (card=0; card<64; card++) { + sprintf(cardname, "/sys/class/drm/card%i/device/vendor", card); // make a name for myself. + FILE *fcard = fopen(cardname, "r"); // Open for reading. + if (fcard == NULL) { // Failed to open, + break; + } + + bytes=fread(line, 1, 6, fcard); // read six bytes. + fclose(fcard); // Always close it (avoid mem leak). + if (bytes != 6) { // If we did not read 6, + break; // .. get out. + } + + devID = strtol(line, NULL, 16); // convert base 16 to long int. Handles '0xhhhh'. NULL=Don't need 'endPtr'. + if (devID != myVendor) continue; // Not the droid I am looking for. + + // Found one. + DeviceCards[TotalDevices]=card; // Remember this. + TotalDevices++; // count it. + } // end loop through possible cards. + + if (TotalDevices == 0) { // No AMD devices found. + char errstr[]="No AMD GPU devices found (vendor ID 0x1002)."; + strncpy(_rocm_smi_vector.cmp_info.disabled_reason, errstr, PAPI_MAX_STR_LEN); + return(PAPI_ENOSUPP); + } + + return(PAPI_OK); +} // end _rocm_smi_find_devices + + +//----------------------------------------------------------------------------- +// Read/Write Routines for each event. Prefixes 'er_', 'ew_' for event read, +// event write, 'ed_' for event data structure if not implicit. +// int(*reader)(int myIdx); // event-specific read function (null if unreadable). +// int(*writer)(int myIdx); // event-specific write function (null if unwriteable). +//----------------------------------------------------------------------------- + +// (rsmi_dev_id_get, (uint32_t dv_ind, uint16_t *id)); +static int er_device_id(int myIdx) +{ + uint16_t* data = (uint16_t*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_id_get, // Routine name. + (AllEvents[myIdx].device, data), // device, and pointer for storage of read. + return(PAPI_EMISC)); // Error handler. + AllEvents[myIdx].value = data[0]; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_subsystem_vendor_id_get, (uint32_t dv_ind, uint16_t *id)); +static int er_subsystem_vendor_id(int myIdx) +{ + uint16_t* data = (uint16_t*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_subsystem_vendor_id_get, // Routine name. + (AllEvents[myIdx].device, data), // device, and pointer for storage of read. + return(PAPI_EMISC)); // Error handler. + AllEvents[myIdx].value = data[0]; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_vendor_id_get, (uint32_t dv_ind, uint16_t *id)); +static int er_vendor_id(int myIdx) +{ + uint16_t* data = (uint16_t*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_vendor_id_get, // Routine name. + (AllEvents[myIdx].device, data), // device, and pointer for storage of read. + return(PAPI_EMISC)); // Error handler. + AllEvents[myIdx].value = data[0]; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_unique_id_get, (uint32_t dv_ind, uint64_t *unique_id)); +static int er_unique_id(int myIdx) +{ + uint64_t* data = (uint64_t*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_unique_id_get, // Routine name. + (AllEvents[myIdx].device, data), // device, and pointer for storage of read. + return(PAPI_EMISC)); // Error handler. + AllEvents[myIdx].value = data[0]; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_subsystem_id_get, (uint32_t dv_ind, uint16_t *id)); +static int er_subsystem_id(int myIdx) +{ + uint16_t* data = (uint16_t*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_subsystem_id_get, // Routine name. + (AllEvents[myIdx].device, data), // device, and pointer for storage of read. + return(PAPI_EMISC)); // Error handler. + AllEvents[myIdx].value = data[0]; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_drm_render_minor_get, (uint32_t dv_ind, uint32_t *id)); +static int er_render_minor(int myIdx) +{ + uint32_t* data = (uint32_t*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_drm_render_minor_get, // Routine name. + (AllEvents[myIdx].device, data), // device, and pointer for storage of read. + return(PAPI_EMISC)); // Error handler. + AllEvents[myIdx].value = data[0]; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_overdrive_level_get, (uint32_t dv_ind, uint32_t *od)); +static int er_overdrive_level(int myIdx) +{ + uint32_t* data = (uint32_t*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_overdrive_level_get, // Routine name. + (AllEvents[myIdx].device, data), // device, and pointer for storage of read. + return(PAPI_EMISC)); // Error handler. + AllEvents[myIdx].value = data[0]; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_overdrive_level_set, (int32_t dv_ind, uint32_t od)); +// The data to write must be given in AllEvents[myIdx].value. +static int ew_overdrive_level(int myIdx) +{ + uint32_t data = AllEvents[myIdx].value; // get a short cut to data. + RSMI(rsmi_dev_overdrive_level_set, // Routine name. + (AllEvents[myIdx].device, data), // device, and pointer for storage of read. + return(PAPI_EMISC)); // Error handler. + return(PAPI_OK); // Done. +} // end writer. + +// (rsmi_dev_perf_level_get, (uint32_t dv_ind, rsmi_dev_perf_level_t *perf)); +static int er_perf_level(int myIdx) +{ + uint32_t* data = (uint32_t*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_perf_level_get, // Routine name. + (AllEvents[myIdx].device, data), // device, and pointer for storage of read. + return(PAPI_EMISC)); // Error handler. + AllEvents[myIdx].value = data[0]; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_perf_level_set, ( int32_t dv_ind, rsmi_dev_perf_level_t perf_lvl)); +// The data to write must be given in AllEvents[myIdx].value. +// TONY: Should error-check value here, limited to enum values of rsmi_dev_perf_level_t. +static int ew_perf_level(int myIdx) +{ + uint32_t data = AllEvents[myIdx].value; // get a short cut to data. + if (data > RSMI_DEV_PERF_LEVEL_LAST) return(PAPI_EINVAL); // Error in value. + RSMI(rsmi_dev_perf_level_set, // Routine name. + (AllEvents[myIdx].device, data), // device, and pointer for storage of read. + return(PAPI_EMISC)); // Error handler. + return(PAPI_OK); // Done. +} // end writer. + +// (rsmi_dev_memory_total_get, (uint32_t dv_ind, RSMI_MEM_TYPE_VRAM, uint64_t *total)); +// (rsmi_dev_memory_total_get, (uint32_t dv_ind, RSMI_MEM_TYPE_VIS_VRAM, uint64_t *total)); +// (rsmi_dev_memory_total_get, (uint32_t dv_ind, RSMI_MEM_TYPE_GTT, uint64_t *total)); +static int er_mem_total(int myIdx) +{ + uint64_t* data = (uint64_t*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_memory_total_get, // Routine name. + (AllEvents[myIdx].device, // device, + AllEvents[myIdx].variant, data), // memory type, and pointer for storage of read. + return(PAPI_EMISC)); // Error handler. + AllEvents[myIdx].value = data[0]; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_memory_usage_get, (uint32_t dv_ind, RSMI_MEM_TYPE_VRAM, uint64_t *usage)); +// (rsmi_dev_memory_usage_get, (uint32_t dv_ind, RSMI_MEM_TYPE_VIS_VRAM, uint64_t *usage)); +// (rsmi_dev_memory_usage_get, (uint32_t dv_ind, RSMI_MEM_TYPE_GTT, uint64_t *usage)); +static int er_mem_usage(int myIdx) +{ + uint64_t* data = (uint64_t*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_memory_usage_get, // Routine name. + (AllEvents[myIdx].device, // device, + AllEvents[myIdx].variant, data), // memory type, and pointer for storage of read. + return(PAPI_EMISC)); // Error handler. + AllEvents[myIdx].value = data[0]; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_busy_percent_get, (uint32_t dv_ind, uint32_t *busy_percent)); +static int er_busy_percent(int myIdx) +{ + uint32_t* data = (uint32_t*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_busy_percent_get, // Routine name. + (AllEvents[myIdx].device, data), // device, and pointer for storage of read. + return(PAPI_EMISC)); // Error handler. + AllEvents[myIdx].value = data[0]; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_memory_busy_percent_get, (uint32_t dv_ind, uint32_t *busy_percent)); +// NOTE UNTESTED EVENT: This is given in the manual, but our test driver/equipment did not support it. +static int er_memory_busy_percent(int myIdx) +{ + uint32_t* data = (uint32_t*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_memory_busy_percent_get, // Routine name. + (AllEvents[myIdx].device, data), // device, and pointer for storage of read. + return(PAPI_EMISC)); // Error handler. + AllEvents[myIdx].value = data[0]; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_pci_id_get, (uint32_t dv_ind, uint64_t *bdfid)); +static int er_pci_id(int myIdx) +{ + uint64_t* data = (uint64_t*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_pci_id_get, // Routine name. + (AllEvents[myIdx].device, data), // device, and pointer for storage of read. + return(PAPI_EMISC)); // Error handler. + AllEvents[myIdx].value = data[0]; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_pci_replay_counter_get, (uint32_t dv_ind, uint64_t *counter)); +static int er_pci_replay_counter(int myIdx) +{ + uint64_t* data = (uint64_t*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_pci_replay_counter_get, // Routine name. + (AllEvents[myIdx].device, data), // device, and pointer for storage of read. + return(PAPI_EMISC)); // Error handler. + AllEvents[myIdx].value = data[0]; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_version_get, (rsmi_version_t *version)); +// structure contains uint32_t for major, minor, patch (and pointer to 'build' string we don't use). +static int er_rsmi_version(int myIdx) +{ + rsmi_version_t* data = (rsmi_version_t*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_version_get, // Routine name. + (data), // pointer for storage of read. + return(PAPI_EMISC)); // Error handler. + uint64_t pack = 0; + pack = (data->major & 0x0000FFFF); // pack elements into a uint64. + pack = (pack << 16) | (data->minor & 0x0000FFFF); + pack = (pack << 16) | (data->patch & 0x0000FFFF); + AllEvents[myIdx].value = pack; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_pci_throughput_get, (uint32_t dv_ind, uint64_t *sent, uint64_t *received, uint64_t *max_pkt_sz)); +static int er_pci_throughput_sent(int myIdx) // BASE EVENT. reads all three values. +{ + uint64_t* data = (uint64_t*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + if (AllEvents[myIdx].read == 0) { // If I haven't read yet, + RSMI(rsmi_dev_pci_throughput_get, // .. Routine name. + (AllEvents[myIdx].device, &data[0], &data[1], &data[2]), // .. device and ptrs for storage of read. + return(PAPI_EMISC)); // .. Error handler. + AllEvents[myIdx].read = 1; // .. Mark as read. + } + + AllEvents[myIdx].value = data[0]; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_pci_throughput_get, (uint32_t dv_ind, uint64_t *sent, uint64_t *received, uint64_t *max_pkt_sz)); +static int er_pci_throughput_received(int myIdx) // NOT THE BASE EVENT; Base event already called. +{ + int idx = AllEvents[myIdx].baseIdx; // Get location of storage. + uint64_t* data = (uint64_t*) AllEvents[idx].vptr; // get a shortcut. + AllEvents[myIdx].value = data[1]; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_pci_throughput_get, (uint32_t dv_ind, uint64_t *sent, uint64_t *received, uint64_t *max_pkt_sz)); +static int er_pci_throughput_max_packet(int myIdx) // NOT THE BASE EVENT; Base event already called. +{ + int idx = AllEvents[myIdx].baseIdx; // Get location of storage. + uint64_t* data = (uint64_t*) AllEvents[idx].vptr; // get a shortcut. + AllEvents[myIdx].value = data[2]; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_fan_reset, (uint32_t dv_ind, uint32_t sensor_ind)); +static int ew_fan_reset(int myIdx) +{ + (void) myIdx; // Not needed. Only present for consistent function pointer. + RSMI(rsmi_dev_fan_reset, // Routine name. + (AllEvents[myIdx].device, AllEvents[myIdx].subvariant), // device, sensor. No data to write. + return(PAPI_EMISC)); // Error handler. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_fan_rpms_get, (uint32_t dv_ind, uint32_t sensor_ind, int64_t *speed)); +static int er_fan_rpms(int myIdx) +{ + int64_t* data = (int64_t*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_fan_rpms_get, // Routine name. + (AllEvents[myIdx].device, AllEvents[myIdx].subvariant, data), // device, sensor, and pointer for storage of read. + return(PAPI_EMISC)); // Error handler. + AllEvents[myIdx].value = data[0]; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_fan_speed_max_get, (uint32_t dv_ind, uint32_t sensor_ind, uint64_t *max_speed)); +static int er_fan_speed_max(int myIdx) +{ + uint64_t* data = (uint64_t*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_fan_speed_max_get, // Routine name. + (AllEvents[myIdx].device, AllEvents[myIdx].subvariant, data), // device, sensor, and pointer for storage of read. + return(PAPI_EMISC)); // Error handler. + AllEvents[myIdx].value = data[0]; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_fan_speed_get, (uint32_t dv_ind, uint32_t sensor_ind, int64_t *speed)); +static int er_fan_speed(int myIdx) +{ + int64_t* data = (int64_t*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_fan_speed_get, // Routine name. + (AllEvents[myIdx].device, AllEvents[myIdx].subvariant, data), // device, sensor, and pointer for storage of read. + return(PAPI_EMISC)); // Error handler. + AllEvents[myIdx].value = data[0]; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_fan_speed_set, (uint32_t dv_ind, uint32_t sensor_ind, uint64_t speed)); +static int ew_fan_speed(int myIdx) +{ + uint64_t data = AllEvents[myIdx].value; // get a short cut to data. + if (data > 255) return(PAPI_EINVAL); // Invalid value. + RSMI(rsmi_dev_fan_speed_set, // Routine name. + (AllEvents[myIdx].device, AllEvents[myIdx].subvariant, data), // device, sensor. Data to write. + return(PAPI_EMISC)); // Error handler. + return(PAPI_OK); // Done. +} // end writer. + +// (rsmi_dev_power_ave_get, (uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power)); +static int er_power_ave(int myIdx) +{ + uint64_t* data = (uint64_t*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_power_ave_get, // Routine name. + (AllEvents[myIdx].device, AllEvents[myIdx].subvariant, data), // device, sensor, and pointer for storage of read. + return(PAPI_EMISC)); // Error handler. + AllEvents[myIdx].value = data[0]; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_power_cap_get, (uint32_t dv_ind, uint32_t sensor_ind, uint64_t *cap)); +static int er_power_cap(int myIdx) +{ + uint64_t* data = (uint64_t*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_power_cap_get, // Routine name. + (AllEvents[myIdx].device, AllEvents[myIdx].subvariant, data), // device, sensor, and pointer for storage of read. + return(PAPI_EMISC)); // Error handler. + AllEvents[myIdx].value = data[0]; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_power_cap_set, (uint32_t dv_ind, uint32_t sensor_ind, uint64_t cap)); +static int ew_power_cap(int myIdx) +{ + uint64_t data = AllEvents[myIdx].value; // get a short cut to data. + RSMI(rsmi_dev_power_cap_set, // Routine name. + (AllEvents[myIdx].device, AllEvents[myIdx].subvariant, data), // device, sensor. Data to write. + return(PAPI_EMISC)); // Error handler. + return(PAPI_OK); // Done. +} // end writer. + +// (rsmi_dev_power_cap_range_get, (uint32_t dv_ind, uint32_t sensor_ind, uint64_t *max, uint64_t *min)); +static int er_power_cap_range_min(int myIdx) // THIS IS THE BASE EVENT. +{ + uint64_t* data = (uint64_t*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + if (AllEvents[myIdx].read == 0) { // If I haven't read yet, + RSMI(rsmi_dev_power_cap_range_get, // .. Routine name. + (AllEvents[myIdx].device, AllEvents[myIdx].subvariant, &data[1], &data[0]), // .. device, sensor, ptr->max, ptr->min. + return(PAPI_EMISC)); // .. Error handler. + AllEvents[myIdx].read = 1; // .. Mark as read. + } + + AllEvents[myIdx].value = data[0]; // Copy/convert the returned value for min. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_power_cap_range_get, (uint32_t dv_ind, uint32_t sensor_ind, uint64_t *max, uint64_t *min)); +static int er_power_cap_range_max(int myIdx) // NOT THE BASE EVENT; Base event already called. +{ + int idx = AllEvents[myIdx].baseIdx; + uint64_t* data = (uint64_t*) AllEvents[idx].vptr; // get a shortcut to min/max. + AllEvents[myIdx].value = data[1]; // Copy/convert the returned value for max. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_temp_metric_get, (uint32_t dv_ind, uint32_t sensor_ind, rsmi_temperature_metric_t metric, int64_t *temperature)); +static int er_temp(int myIdx) +{ + int64_t* data = (int64_t*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_temp_metric_get, // Routine name. + (AllEvents[myIdx].device, // Device, + AllEvents[myIdx].subvariant, // Sensor, + AllEvents[myIdx].variant, data), // temp type, and pointer for storage of read. + return(PAPI_EMISC)); // Error handler. + AllEvents[myIdx].value = data[0]; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// rsmi_dev_firmware_version_get is an enum with 21 settings; each will be a separate event. +static int er_firmware_version(int myIdx) +{ + uint64_t* data = (uint64_t*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_firmware_version_get, // Routine name. + (AllEvents[myIdx].device, // Device, + AllEvents[myIdx].variant, data), // firmware block ID, and pointer for storage of read. + return(PAPI_EMISC)); // Error handler. + AllEvents[myIdx].value = data[0]; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// rsmi_dev_ecc_count_get is an enum with 14 settings; each will be a separate event. +// NOTE UNTESTED EVENT: This is given in the manual, but our test driver/equipment did not support it. +static int er_ecc_count_correctable(int myIdx) // THIS IS A BASE EVENT. +{ + rsmi_error_count_t* data = (rsmi_error_count_t*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + if (AllEvents[myIdx].read == 0) { + RSMI(rsmi_dev_ecc_count_get, // ..Routine name. + (AllEvents[myIdx].device, // ..Device, + AllEvents[myIdx].variant, data), // ..gpu block ID, and pointer for storage of read. + return(PAPI_EMISC)); // ..Error handler. + AllEvents[myIdx].read = 1; // ..mark as read. + } + + AllEvents[myIdx].value = data->correctable_err; // Copy/convert the returned value. + + return(PAPI_OK); // Done. +} // end reader. + +// rsmi_dev_ecc_count_get is an enum with 14 settings; each will be a separate event. +static int er_ecc_count_uncorrectable(int myIdx) // NOT THE BASE EVENT; Base event already called. +{ + int idx = AllEvents[myIdx].baseIdx; + rsmi_error_count_t* data = (rsmi_error_count_t*) AllEvents[idx].vptr; // get a shortcut. + AllEvents[myIdx].value = data->uncorrectable_err; // Copy/convert the returned value for uncorrectable. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_ecc_enabled_get, (uint32_t dv_ind, uint64_t *mask)); +// NOTE UNTESTED EVENT: This is given in the manual, but our test driver/equipment did not support it. +static int er_ecc_enabled(int myIdx) +{ + uint64_t* data = (uint64_t*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_ecc_enabled_get, // Routine name. + (AllEvents[myIdx].device, data), // device, data pointer. + return(PAPI_EMISC)); // Error handler. + AllEvents[myIdx].value = data[0]; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_ecc_status_get(uint32_t dv_ind, rsmi_gpu_block_t block, rsmi_ras_err_state_t ∗ state) +// NOTE UNTESTED EVENT: This is given in the manual, but our test driver/equipment did not support it. +static int er_ecc_status(int myIdx) +{ + rsmi_ras_err_state_t* data = (rsmi_ras_err_state_t*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_ecc_status_get, // Routine name. + (AllEvents[myIdx].device, // Device, + AllEvents[myIdx].variant, data), // gpu block ID, and pointer for storage of read. + return(PAPI_EMISC)); // Error handler. + AllEvents[myIdx].value = data[0]; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// rsmi_dev_gpu_clk_freq_get(device, clock_type, *rsmi_frequencies_t frequencies): +static int er_gpu_clk_freq_current(int myIdx) +{ + AllEvents[myIdx].value = 0; + int idx = AllEvents[myIdx].device*freqTablePerDevice + + AllEvents[myIdx].variant; // Index into frequency table. + RSMI(rsmi_dev_gpu_clk_freq_get, + (AllEvents[myIdx].device, AllEvents[myIdx].variant, &FreqTable[idx]), + return(PAPI_EMISC)); + int current = FreqTable[idx].current; + AllEvents[myIdx].value = FreqTable[idx].frequency[current]; + return(PAPI_OK); +} // end reader + +// rsmi_dev_gpu_clk_freq_get(device, clock_type, *rsmi_frequencies_t frequencies): +static int er_gpu_clk_freq_table(int myIdx) +{ + AllEvents[myIdx].value = 0; + int idx = AllEvents[myIdx].device*freqTablePerDevice + + AllEvents[myIdx].variant; // Index into frequency table. + uint32_t tblIdx = AllEvents[myIdx].subvariant; + RSMI(rsmi_dev_gpu_clk_freq_get, + (AllEvents[myIdx].device, AllEvents[myIdx].variant, &FreqTable[idx]), + return(PAPI_EMISC)); + if (tblIdx >= FreqTable[idx].num_supported) { // If this has changed, + return(PAPI_EMISC); // Exit with error. + } + + AllEvents[myIdx].value = FreqTable[idx].frequency[tblIdx]; // All okay, read newly loaded table. + return(PAPI_OK); +} // end reader + +// rsmi_dev_gpu_clk_freq_set ( uint32_t dv_ind, rsmi_clk_type_t clk_type, uint64_t freq_bitmask ) +// The data to write must be given in AllEvents[myIdx].value. +// Note need to build a mask of num_supported bits, and insure data is not zero when masked with it. +// e.g. for four bits, (1<<4)-1 = 2^4-1=15. +static int ew_gpu_clk_freq_mask(int myIdx) +{ + uint64_t data = AllEvents[myIdx].value; // get a short cut to data. + uint64_t mask; + int idx = AllEvents[myIdx].device*freqTablePerDevice + + AllEvents[myIdx].variant; // Index into frequency table. + mask = (1<num_profiles; // Copy/convert the returned value for number of profiles. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_power_profile_presets_get, (uint32_t dv_ind, uint32_t sensor, rsmi_power_profile_status_t *status); +static int er_power_profile_presets_avail_profiles(int myIdx) // NOT THE BASE EVENT; Base event already called. +{ + int idx = AllEvents[myIdx].baseIdx; + rsmi_power_profile_status_t* status = (rsmi_power_profile_status_t*) AllEvents[idx].vptr; // get a shortcut. + AllEvents[myIdx].value = status->available_profiles; // Copy/convert the returned value for available profiles. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_power_profile_presets_get, (uint32_t dv_ind, uint32_t sensor, rsmi_power_profile_status_t *status); +static int er_power_profile_presets_current(int myIdx) // NOT THE BASE EVENT; Base event already called. +{ + int idx = AllEvents[myIdx].baseIdx; + rsmi_power_profile_status_t* status = (rsmi_power_profile_status_t*) AllEvents[idx].vptr; // get a shortcut. + AllEvents[myIdx].value = status->current; // Copy/convert the returned value for current profile. + return(PAPI_OK); // Done. +} // end reader. + +// rsmi_dev_power_profile_set ( uint32_t dv_ind, uint32_t reserved, rsmi_power_profile_preset_masks_t profile_mask ) +// The data to write must be given in AllEvents[myIdx].value. It must be a power of 2, and <= RSMI_PWR_PROF_PRST_LAST. +static int ew_power_profile_mask(int myIdx) +{ + uint64_t data = AllEvents[myIdx].value; // get a short cut to data. + if ((data & (data-1)) != 0) { // Not a power of two, + return(PAPI_EINVAL); // .. so invalid argument. + } + + if (data > RSMI_PWR_PROF_PRST_LAST) { // If not a VALID power of two, + return(PAPI_EINVAL); // invalid argument. + } + + RSMI(rsmi_dev_power_profile_set, // Routine name. + (AllEvents[myIdx].device, // device, + AllEvents[myIdx].subvariant, // sub variant for 'reserved'. + data), // data to set. + return(PAPI_EMISC)); // Error handler. + + return(PAPI_OK); // Done. +} // end writer. + + +// (rsmi_dev_brand_get(uint32_t dv_ind, char *brand, uint32_t len); +static int er_brand(int myIdx) +{ + char *data = (char*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_brand_get, // Routine name. + (AllEvents[myIdx].device, // Device, + data, // string location, + PAPI_MAX_STR_LEN-1), // max length of string. + return(PAPI_EMISC)); // Error handler. + data[PAPI_MAX_STR_LEN-1] = 0; // Guarantee a zero terminator. + AllEvents[myIdx].value = (uint64_t) data; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_name_get(uint32_t dv_ind, char *name, size_t len); +static int er_name(int myIdx) +{ + char *data = (char*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_name_get, // Routine name. + (AllEvents[myIdx].device, // Device, + data, // string location, + PAPI_MAX_STR_LEN-1), // max length of string. + return(PAPI_EMISC)); // Error handler. + data[PAPI_MAX_STR_LEN-1] = 0; // Guarantee a zero terminator. + AllEvents[myIdx].value = (uint64_t) data; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_serial_number_get(uint32_t dv_ind, char *serial_number, uint32_t len); +// NOTE UNTESTED EVENT: This is given in the manual, but our test driver/equipment did not support it. +static int er_serial_number(int myIdx) +{ + char *data = (char*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_serial_number_get, // Routine name. + (AllEvents[myIdx].device, // Device, + data, // string location, + PAPI_MAX_STR_LEN-1), // max length of string. + return(PAPI_EMISC)); // Error handler. + data[PAPI_MAX_STR_LEN-1] = 0; // Guarantee a zero terminator. + AllEvents[myIdx].value = (uint64_t) data; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_subsystem_name_get(uint32_t dv_ind, char *name, size_t len); +static int er_subsystem_name(int myIdx) +{ + char *data = (char*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_subsystem_name_get, // Routine name. + (AllEvents[myIdx].device, // Device, + data, // string location, + PAPI_MAX_STR_LEN-1), // max length of string. + return(PAPI_EMISC)); // Error handler. + data[PAPI_MAX_STR_LEN-1] = 0; // Guarantee a zero terminator. + AllEvents[myIdx].value = (uint64_t) data; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_vbios_version_get(uint32_t dv_ind, char *vbios, uint32_t len); +static int er_vbios_version(int myIdx) +{ + char *data = (char*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_vbios_version_get, // Routine name. + (AllEvents[myIdx].device, // Device, + data, // string location, + PAPI_MAX_STR_LEN-1), // max length of string. + return(PAPI_EMISC)); // Error handler. + data[PAPI_MAX_STR_LEN-1] = 0; // Guarantee a zero terminator. + AllEvents[myIdx].value = (uint64_t) data; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_dev_vendor_name_get(uint32_t id, char *name, size_t len); +static int er_vendor_name(int myIdx) +{ + char *data = (char*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_dev_vendor_name_get, // Routine name. + (AllEvents[myIdx].device, // Device, + data, // string location, + PAPI_MAX_STR_LEN-1), // max length of string. + return(PAPI_EMISC)); // Error handler. + data[PAPI_MAX_STR_LEN-1] = 0; // Guarantee a zero terminator. + AllEvents[myIdx].value = (uint64_t) data; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + +// (rsmi_version_str_get(rsmi_sw_component_t id, char *name, size_t len); +static int er_driver_version(int myIdx) +{ + char *data = (char*) AllEvents[myIdx].vptr; // get a shortcut. + AllEvents[myIdx].value = 0; // Default if error. + RSMI(rsmi_version_str_get, // Routine name. + (RSMI_SW_COMP_DRIVER, // Only enumerated element. + data, // string location, + PAPI_MAX_STR_LEN-1), // max length of string. + return(PAPI_EMISC)); // Error handler. + data[PAPI_MAX_STR_LEN-1] = 0; // Guarantee a zero terminator. + AllEvents[myIdx].value = (uint64_t) data; // Copy/convert the returned value. + return(PAPI_OK); // Done. +} // end reader. + + +//============================================================================= +// END OF RW ROUTINES. +//============================================================================= + +//----------------------------------------------------------------------------- +// All values get returned by calling routines that may vary in parameters. +// Since we have no automatic list of events (or descriptions) we add them by +// hand; along with pointers to the routines that must be called. +//----------------------------------------------------------------------------- +static int _rocm_smi_add_native_events(void) +{ + uint32_t device; + event_info_t* thisEvent=NULL; // an event pointer. + scanEvent_info_t* scan=NULL; // a scan event pointer. + TotalEvents = 0; + int BaseEvent = 0; + int subvariants; + int i; + uint32_t ui; + char *gpuClkVariantName[] = {"System", "DataFabric", "DisplayEngine", "SOC", "Memory"}; + int enumList[64]; // List of enums found for variants. + #define enumSize (sizeof(enumList)/sizeof(enumList[0])) + +// This call is no longer used, we do our own search in _rocm_smi_find_devices to set TotalDevices. +// RSMI(rsmi_num_monitor_devices, (&TotalDevices), return(PAPI_ENOSUPP)); // call for number of devices. + +//(rsmi_num_monitor_devices, (uint32_t *num_devices)); // ONLY ONE OF THESE. + MakeRoomAllEvents(); + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "NUMDevices"); + strcpy(thisEvent->desc, "Number of Devices which have monitors, accessible by rocm_smi."); + thisEvent->reader = NULL; // No need to read anything, we have TotalDevices. + thisEvent->writer = NULL; // Not possible to change by writing. + thisEvent->device=-1; // There is no device to set in order to read. + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=0; // Not needed, reader returns TotalDevices. + thisEvent->vptr=NULL; // Not needed, reader returns TotalDevices. + thisEvent->value=TotalDevices; // A static event; always returns this. + thisEvent->variant=-1; // Not applicable. + thisEvent->subvariant=-1; // Not applicable. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + + // rsmi_version_t contains uint32 for major; minor; patch. but could return 16-bit packed versions as uint64_t. + //(rsmi_version_get, (rsmi_version_t *version)); + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "rsmi_version"); + strcpy(thisEvent->desc, "Version of RSMI lib; 0x0000MMMMmmmmpppp Major, Minor, Patch."); + thisEvent->reader = &er_rsmi_version; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=-1; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(rsmi_version_t); // Memory for read. + thisEvent->vptr=calloc(1, thisEvent->vptrSize); + thisEvent->variant=-1; // Not applicable. + thisEvent->subvariant=-1; // Not applicable. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "driver_version_str"); + strcpy(thisEvent->desc, "Returns char* to z-terminated driver version string; do not free()."); + thisEvent->reader = &er_driver_version; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=-1; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=(PAPI_MAX_STR_LEN); // Memory for read. + thisEvent->vptr=calloc(thisEvent->vptrSize, sizeof(char)); + thisEvent->variant=-1; // Not applicable. + thisEvent->subvariant=-1; // Not applicable. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + +// The following require a device ID. + + for (device=0; device < TotalDevices; device++) { // For every event requiring a device argument, + //(rsmi_dev_id_get, (uint32_t dv_ind, uint16_t *id)); + thisEvent = &AllEvents[TotalEvents]; + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_id_get"); + if (scan != NULL) { // If we found it, + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "device_id:device=%i", device); + strcpy(thisEvent->desc, "Vendor supplied device id number. May be shared by same model devices; see pci_id for a unique identifier."); + thisEvent->reader = &er_device_id; + thisEvent->writer = NULL; + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(uint16_t); + thisEvent->vptr=calloc(1, thisEvent->vptrSize); + thisEvent->variant=scan->variant; // Copy the variant. + thisEvent->subvariant=scan->subvariant; // Copy the subvariant. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } // end if found. + + //(rsmi_dev_subsystem_vendor_id_get, (uint32_t dv_ind, uint16_t *id)); + thisEvent = &AllEvents[TotalEvents]; + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_subsystem_vendor_id_get"); + if (scan != NULL) { + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "subsystem_vendor_id:device=%i", device); + strcpy(thisEvent->desc, "Subsystem vendor id number."); + thisEvent->reader = &er_subsystem_vendor_id; + thisEvent->writer = NULL; + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(uint16_t); + thisEvent->vptr=calloc(1, thisEvent->vptrSize); + thisEvent->variant=scan->variant; // Copy the variant. + thisEvent->subvariant=scan->subvariant; // Copy the subvariant. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + //(rsmi_dev_vendor_id_get, (uint32_t dv_ind, uint16_t *id)); + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_vendor_id_get"); + if (scan != NULL) { + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "vendor_id:device=%i", device); + strcpy(thisEvent->desc, "Vendor id number."); + thisEvent->reader = &er_vendor_id; + thisEvent->writer = NULL; + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(uint16_t); + thisEvent->vptr=calloc(1, thisEvent->vptrSize); + thisEvent->variant=-1; // Not applicable. + thisEvent->subvariant=-1; // Not applicable. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + //(rsmi_dev_unique_id_get, (uint32_t dv_ind, uint64_t *id)); + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_unique_id_get"); + if (scan != NULL) { + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "unique_id:device=%i", device); + strcpy(thisEvent->desc, "unique Id for device."); + thisEvent->reader = &er_unique_id; + thisEvent->writer = NULL; + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(uint64_t); + thisEvent->vptr=calloc(1, thisEvent->vptrSize); + thisEvent->variant=-1; // Not applicable. + thisEvent->subvariant=-1; // Not applicable. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + //(rsmi_dev_subsystem_id_get, (uint32_t dv_ind, uint16_t *id)); + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_subsystem_id_get"); + if (scan != NULL) { + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "subsystem_id:device=%i", device); + strcpy(thisEvent->desc, "Subsystem id number."); + thisEvent->reader = &er_subsystem_id; + thisEvent->writer = NULL; + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(uint16_t); + thisEvent->vptr=calloc(1, thisEvent->vptrSize); + thisEvent->variant=-1; // Not applicable. + thisEvent->subvariant=-1; // Not applicable. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + //(rsmi_dev_drm_render_minor_get, (uint32_t dv_ind, uint32_t *minor)); + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_drm_render_minor_get"); + if (scan != NULL) { + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "drm_render_minor:device=%i", device); + strcpy(thisEvent->desc, "DRM Minor Number associated with this device."); + thisEvent->reader = &er_render_minor; + thisEvent->writer = NULL; + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(uint16_t); + thisEvent->vptr=calloc(1, thisEvent->vptrSize); + thisEvent->variant=-1; // Not applicable. + thisEvent->subvariant=-1; // Not applicable. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + //(rsmi_dev_overdrive_level_get, (uint32_t dv_ind, uint32_t *od)); + //(rsmi_dev_overdrive_level_set, (int32_t dv_ind, uint32_t od)); + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_overdrive_level_get"); + if (scan != NULL) { + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "overdrive_level:device=%i", device); + strcpy(thisEvent->desc, "Overdrive Level % for device, 0 to 20, max overclocking permitted. Read Only."); + thisEvent->reader = &er_overdrive_level; + thisEvent->writer = NULL; + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(uint32_t); + thisEvent->vptr=calloc(1, thisEvent->vptrSize); + thisEvent->variant=-1; // Not applicable. + thisEvent->subvariant=-1; // Not applicable. + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_overdrive_level_set"); + if (scan != NULL) { + thisEvent->writer = &ew_overdrive_level; // Can be written. + strcpy(thisEvent->desc, "Overdrive Level % for device, 0 to 20, max overclocking permitted. Read/Write. WRITE MAY CAUSE DAMAGE NOT COVERED BY ANY WARRANTY."); + } + + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + // rsmi_dev_perf_level_t is just an enum; this can be returned as uint32. + //(rsmi_dev_perf_level_get, (uint32_t dv_ind, rsmi_dev_perf_level_t *perf)); + //(rsmi_dev_perf_level_set, ( int32_t dv_ind, rsmi_dev_perf_level_t perf_lvl)); + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_perf_level_get"); + if (scan != NULL) { + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "perf_level:device=%i", device); + snprintf(thisEvent->desc, PAPI_MAX_STR_LEN-1, "PowerPlay Performance Level; Read Only, enum 'rsmi_dev_perf_level_t' [0-%i], see ROCm_SMI_Manual for details.", RSMI_DEV_PERF_LEVEL_LAST); + thisEvent->reader = &er_perf_level; + thisEvent->writer = &ew_perf_level; // Can be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(uint32_t); + thisEvent->vptr=calloc(1, thisEvent->vptrSize); + thisEvent->variant=-1; // Not applicable. + thisEvent->subvariant=-1; // Not applicable. + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_perf_level_set"); + if (scan != NULL) { + thisEvent->writer = &ew_perf_level; // Can be written. + snprintf(thisEvent->desc, PAPI_MAX_STR_LEN-1, "PowerPlay Performance Level; Read/Write, enum 'rsmi_dev_perf_level_t' [0-%i], see ROCm_SMI_Manual for details.", RSMI_DEV_PERF_LEVEL_LAST); + } + + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + // Iterate by memory type; an enum: + // RSMI_MEM_TYPE_VRAM; RSMI_MEM_TYPE_VIS_VRAM; RSMI_MEM_TYPE_GTT. (VIS=visible). In ascending + // order, to be found in rocm_smi.h, as an enum. However, we show these as three separate events. + + //(rsmi_dev_memory_total_get, (uint32_t dv_ind, rsmi_memory_type_t mem_type, uint64_t *total)); + for (i=0; i<3; i++) enumList[i]=0; // init to false. + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_memory_total_get"); + while (scan != NULL && scan->variant < RSMI_MEM_TYPE_GTT) { + enumList[scan->variant] = 1; // show the variant as found. + scan = nextEvent(scan, device, "rsmi_dev_memory_total_get"); // Get the next, if any. + } + + if (enumList[0]) { // If we found TOTAL VRAM, + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "mem_total_VRAM:device=%i", device); + strcpy(thisEvent->desc, "Total VRAM memory."); + thisEvent->reader = &er_mem_total; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(uint64_t); // Memory for read. + thisEvent->vptr=calloc(1, thisEvent->vptrSize); + thisEvent->variant=RSMI_MEM_TYPE_VRAM; // The enum for it + thisEvent->subvariant=-1; // Not applicable. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + if (enumList[1]) { // If we found VISIBLE VRAM, + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "mem_total_VIS_VRAM:device=%i", device); + strcpy(thisEvent->desc, "Total Visible VRAM memory."); + thisEvent->reader = &er_mem_total; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(uint64_t); // Memory for read. + thisEvent->vptr=calloc(1, thisEvent->vptrSize); + thisEvent->variant=RSMI_MEM_TYPE_VIS_VRAM; // The enum for it. + thisEvent->subvariant=-1; // Not applicable. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + if (enumList[2]) { // If we found TOTAL GTT, + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "mem_total_GTT:device=%i", device); + strcpy(thisEvent->desc, "Total GTT (Graphics Translation Table) memory, aka GART memory."); + thisEvent->reader = &er_mem_total; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(uint64_t); // Memory for read. + thisEvent->vptr=calloc(1, thisEvent->vptrSize); + thisEvent->variant=RSMI_MEM_TYPE_GTT; // The enum for it. + thisEvent->subvariant=-1; // Not applicable. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + for (i=0; i<3; i++) enumList[i]=0; // init to false. + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_memory_usage_get"); + while (scan != NULL && scan->variant < RSMI_MEM_TYPE_GTT) { + enumList[scan->variant] = 1; // show the variant as found. + scan = nextEvent(scan, device, "rsmi_dev_memory_usage_get"); // Get the next, if any. + } + + //(rsmi_dev_memory_usage_get, (uint32_t dv_ind, rsmi_memory_type_t mem_type, uint64_t *used)); + if (enumList[0]) { // If we found USAGE VRAM, + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "mem_usage_VRAM:device=%i", device); + strcpy(thisEvent->desc, "VRAM memory in use."); + thisEvent->reader = &er_mem_usage; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(uint64_t); // Memory for read. + thisEvent->vptr=calloc(1, thisEvent->vptrSize); + thisEvent->variant=RSMI_MEM_TYPE_VRAM; // The enum for it + thisEvent->subvariant=-1; // Not applicable. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + if (enumList[1]) { // If we found USAGE VIS VRAM, + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "mem_usage_VIS_VRAM:device=%i", device); + strcpy(thisEvent->desc, "Visible VRAM memory in use."); + thisEvent->reader = &er_mem_usage; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(uint64_t); // Memory for read. + thisEvent->vptr=calloc(1, thisEvent->vptrSize); + thisEvent->variant=RSMI_MEM_TYPE_VIS_VRAM; // The enum for it. + thisEvent->subvariant=-1; // Not applicable. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + if (enumList[2]) { // If we found USAGE GTT, + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "mem_usage_GTT:device=%i", device); + strcpy(thisEvent->desc, "(Graphics Translation Table) memory in use (aka GART memory)."); + thisEvent->reader = &er_mem_usage; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(uint64_t); // Memory for read. + thisEvent->vptr=calloc(1, thisEvent->vptrSize); + thisEvent->variant=RSMI_MEM_TYPE_GTT; // The enum for it. + thisEvent->subvariant=-1; // Not applicable. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + //(rsmi_dev_busy_percent_get, (uint32_t dv_ind, uint32_t *bdfid)); + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_busy_percent_get"); + if (scan != NULL) { + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "busy_percent:device=%i", device); + strcpy(thisEvent->desc, "Percentage of time the device was busy doing any processing."); + thisEvent->reader = &er_busy_percent; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(uint32_t); // Memory for read. + thisEvent->vptr=calloc(1, thisEvent->vptrSize); + thisEvent->variant=-1; // Not applicable. + thisEvent->subvariant=-1; // Not applicable. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + //(rsmi_dev_memory_busy_percent_get, (uint32_t dv_ind, uint32_t *bdfid)); + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_memory_busy_percent_get"); + if (scan != NULL) { + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "memory_busy_percent:device=%i", device); + strcpy(thisEvent->desc, "Percentage of time any device memory is being used."); + thisEvent->reader = &er_memory_busy_percent; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(uint32_t); // Memory for read. + thisEvent->vptr=calloc(1, thisEvent->vptrSize); + thisEvent->variant=-1; // Not applicable. + thisEvent->subvariant=-1; // Not applicable. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + //(rsmi_dev_pci_id_get, (uint32_t dv_ind, uint64_t *bdfid)); + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_pci_id_get"); + if (scan != NULL) { + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "pci_id:device=%i", device); + strcpy(thisEvent->desc, "BDF (Bus/Device/Function) ID, unique per device."); + thisEvent->reader = &er_pci_id; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(uint64_t); // Memory for read. + thisEvent->vptr=calloc(1, thisEvent->vptrSize); + thisEvent->variant=-1; // Not applicable. + thisEvent->subvariant=-1; // Not applicable. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + //(rsmi_dev_pci_replay_counter_get, (uint32_t dv_ind, uint64_t *counter)); + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_pci_replay_counter_get"); + if (scan != NULL) { + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "pci_replay_counter:device=%i", device); + strcpy(thisEvent->desc, "Sum of the number of NAK's received by the GPU and the NAK's generated by the GPU."); + thisEvent->reader = &er_pci_replay_counter; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(uint64_t); // Memory for read. + thisEvent->vptr=calloc(1, thisEvent->vptrSize); + thisEvent->variant=-1; // Not applicable. + thisEvent->subvariant=-1; // Not applicable. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + // rsmi_range_t contains two uint64's; lower_bound; upper_bound. + // This function has a prototype in the header file, but does not exist in the library. (circa Apr 5 2019). + // //(rsmi_dev_od_freq_range_set, (uint32_t dv_ind, rsmi_clk_type_t clk, rsmi_range_t *range)); + + // -------------- BEGIN BASE EVENT ----------------- + // Needs to be three events; sent; received; max_pkt_size. + //(rsmi_dev_pci_throughput_get, (uint32_t dv_ind, uint64_t *sent, uint64_t *received, uint64_t *max_pkt_sz)); + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_pci_throughput_get"); + if (scan != NULL) { + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "pci_throughput_sent:device=%i", device); + strcpy(thisEvent->desc, "Throughput on PCIe traffic, bytes/second sent."); + thisEvent->reader = &er_pci_throughput_sent; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(uint64_t); // Memory for read. + thisEvent->vptr=calloc(3, thisEvent->vptrSize); // Space for three variables. + thisEvent->variant=-1; // Not applicable. + thisEvent->subvariant=-1; // Not applicable. + BaseEvent = TotalEvents; // Begin base event. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + + if (TotalEvents > BaseEvent) { // If the base did not succeed, do not add dependents. + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "pci_throughput_received:device=%i", device); + strcpy(thisEvent->desc, "Throughput on PCIe traffic, bytes/second received."); + thisEvent->reader = &er_pci_throughput_received; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = BaseEvent; // NOT SELF, part of a group read. + thisEvent->vptrSize=0; // Nothing to read, uses BaseEvent memory. + thisEvent->vptr=NULL; // .. + thisEvent->variant=-1; // Not applicable. + thisEvent->subvariant=-1; // Not applicable. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "pci_max_packet_size:device=%i", device); + strcpy(thisEvent->desc, "Maximum PCIe packet size."); + thisEvent->reader = &er_pci_throughput_max_packet; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = BaseEvent; // NOT SELF, part of a group read. + thisEvent->vptrSize=0; // Nothing to read, uses BaseEvent memory. + thisEvent->vptr=NULL; // .. + thisEvent->variant=-1; // Not applicable. + thisEvent->subvariant=-1; // Not applicable. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + // -------------- END BASE EVENT ----------------- + } + } + + // -------------- BEGIN BASE EVENT ----------------- + // Needs to be four events; count, current, mask (r/w). + //(rsmi_dev_power_profile_presets_get, (uint32_t dv_ind, uint32_t sensor, rsmi_power_profile_status_t *status); + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_power_profile_presets_get"); + if (scan != NULL) { + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "power_profile_presets:device=%i:count", device); + strcpy(thisEvent->desc, "Number of power profile presets available. See ROCM_SMI manual for details."); + thisEvent->reader = &er_power_profile_presets_count; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(rsmi_power_profile_status_t); // re-read for each call, may change. + thisEvent->vptr=calloc(1, thisEvent->vptrSize); // Make space for read. + thisEvent->variant=-1; // Not applicable. + thisEvent->subvariant=scan->subvariant; // used in routine, but may be -1. + BaseEvent = TotalEvents; // Begin base event. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + + if (TotalEvents > BaseEvent) { // If the base did not succeed, do not add dependents. + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "power_profile_presets:device=%i:avail_profiles", device); + strcpy(thisEvent->desc, "Bit mask for allowable power profile presets. See ROCM_SMI manual for details."); + thisEvent->reader = &er_power_profile_presets_avail_profiles; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = BaseEvent; // NOT SELF, part of a group read. + thisEvent->vptrSize=0; // Nothing to read, uses BaseEvent memory. + thisEvent->vptr=NULL; // .. + thisEvent->variant=-1; // Not applicable. + thisEvent->subvariant=-1; // Not applicable. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "power_profile_presets:device=%i:current", device); + strcpy(thisEvent->desc, "Bit mask for current power profile preset. Read/Write. See ROCM_SMI manual for details."); + thisEvent->reader = &er_power_profile_presets_current; + thisEvent->writer = NULL; + thisEvent->device=device; + thisEvent->baseIdx = BaseEvent; // NOT SELF, part of a group read. + thisEvent->vptrSize=0; // Nothing to read, uses BaseEvent memory. + thisEvent->vptr=NULL; // .. + thisEvent->variant=-1; // Not applicable. + thisEvent->subvariant=-1; // Not applicable. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + + // -------------- END BASE EVENT ----------------- + } + } + + // rsmi_dev_power_profile_set ( uint32_t dv_ind, uint32_t reserved, rsmi_power_profile_preset_masks_t profile_mask ) + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_power_profile_set"); + if (scan != NULL) { + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "power_profile_set:device=%i", device); + strcpy(thisEvent->desc, "Write Only, sets the power profile to one of the available masks. See ROCM_SMI manual for details."); + thisEvent->reader = NULL; + thisEvent->writer = &ew_power_profile_mask; // Write only. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=0; + thisEvent->vptr=NULL; + thisEvent->variant=-1; // Not applicable. + thisEvent->subvariant=scan->subvariant; // used in routine, but may be -1. + BaseEvent = TotalEvents; // Begin base event. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + //--------------------------------------------------------------------- + // The following events require sensor IDs (in the subvariant). + //--------------------------------------------------------------------- + + //(rsmi_dev_fan_reset, (uint32_t dv_ind, uint32_t sensor_ind)); // Note NO VARIANTS. + scan = NULL; + while (1) { // No variants, just subvariants. + scan = nextEvent(scan, device, "rsmi_dev_fan_reset"); // Get the next, if any. + if (scan == NULL) break; // Exit if done. + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "fan_reset:device=%i:sensor=%i", device, scan->subvariant); + strcpy(thisEvent->desc, "Fan Reset. Write Only, data value is ignored."); + thisEvent->reader = NULL; // can't be read! + thisEvent->writer = &ew_fan_reset; // Can be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=0; // We don't actually read/write a value. + thisEvent->vptr=NULL; // ... + thisEvent->variant=-1; // Not applicable (DUMMY) + thisEvent->subvariant=scan->subvariant; // subvariant is sensor. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + //(rsmi_dev_fan_rpms_get, (uint32_t dv_ind, uint32_t sensor_ind, int64_t *speed)); + scan = NULL; + while (1) { // No variants, just subvariants. + scan = nextEvent(scan, device, "rsmi_dev_fan_rpms_get"); // Get the next, if any. + if (scan == NULL) break; // Exit if done. + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "fan_rpms:device=%i:sensor=%i", device, scan->subvariant); + strcpy(thisEvent->desc, "Current Fan Speed in RPM (Rotations Per Minute)."); + thisEvent->reader = &er_fan_rpms; + thisEvent->writer = NULL; // can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(uint64_t); // Size of data to read. + thisEvent->vptr=calloc(1, thisEvent->vptrSize); // Space to read it. + thisEvent->variant=-1; // Not applicable (DUMMY) + thisEvent->subvariant=scan->subvariant; // subvariant is sensor. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + //(rsmi_dev_fan_speed_max_get, (uint32_t dv_ind, uint32_t sensor_ind, uint64_t *max_speed)); + scan = NULL; + while (1) { // No variants, just subvariants. + scan = nextEvent(scan, device, "rsmi_dev_fan_speed_max_get"); // Get the next, if any. + if (scan == NULL) break; // Exit if done. + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "fan_speed_max:device=%i:sensor=%i", device, scan->subvariant); + strcpy(thisEvent->desc, "Maximum possible fan speed in RPM (Rotations Per Minute)."); + thisEvent->reader = &er_fan_speed_max; + thisEvent->writer = NULL; // can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(uint64_t); // Size of data to read. + thisEvent->vptr=calloc(1, thisEvent->vptrSize); // Space to read it. + thisEvent->variant=-1; // Not applicable (DUMMY) + thisEvent->subvariant=scan->subvariant; // subvariant is sensor. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + //(rsmi_dev_fan_speed_get, (uint32_t dv_ind, uint32_t sensor_ind, int64_t *speed)); + //(rsmi_dev_fan_speed_set, (uint32_t dv_ind, uint32_t sensor_ind, uint64_t speed)); + // We worry about the gets first and count the ones set. Then if search for + // the sets, and back-fill thisEvent->writer; for matching subvariants. We ignore + // any 'sets' without matching 'gets', but allow 'gets' without 'sets'. Note we also + // fix up the description. + scan = NULL; + subvariants=0; + while (1) { // No variants, just subvariants. + scan = nextEvent(scan, device, "rsmi_dev_fan_speed_get"); // Get the next, if any. + if (scan == NULL) break; // Exit if done. + subvariants++; // count the number found. + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "fan_speed:device=%i:sensor=%i", device, scan->subvariant); + strcpy(thisEvent->desc, "Current Fan Speed in RPM (Rotations Per Minute), Read Only, result [0-255]."); + thisEvent->reader = &er_fan_speed; + thisEvent->writer = NULL; // Presume not written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(uint64_t); // Size of data to read. + thisEvent->vptr=calloc(1, thisEvent->vptrSize); // Space to read it. + thisEvent->variant=-1; // Not applicable (DUMMY) + thisEvent->subvariant=scan->subvariant; // subvariant is sensor. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + // This must immediately follow rsmi_dev_fan_speed_get. + // Deal with (rsmi_dev_fan_speed_set, (uint32_t dv_ind, uint32_t sensor_ind, uint64_t speed)); + scan = NULL; + while (1) { // No variants, just subvariants. + scan = nextEvent(scan, device, "rsmi_dev_fan_speed_set"); // Get the next, if any. + if (scan == NULL) break; // Exit if done. + for (i=0; isubvariant) { // If we found the matching read, + AllEvents[TotalEvents-1-i].writer = &ew_fan_speed; // Allow writing. + strcpy(AllEvents[TotalEvents-1-i].desc, "Current Fan Speed in RPM (Rotations Per Minute), Read/Write, Write must be <=MAX (see fan_speed_max event), arg int [0-255]."); + } + } + } + + //(rsmi_dev_power_ave_get, (uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power)); + scan = NULL; + while (1) { // No variants, just subvariants. + scan = nextEvent(scan, device, "rsmi_dev_power_ave_get"); // Get the next, if any. + if (scan == NULL) break; // Exit if done. + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "power_average:device=%i:sensor=%i", device, scan->subvariant); + strcpy(thisEvent->desc, "Current Average Power consumption in microwatts. Requires root privilege."); + thisEvent->reader = &er_power_ave; + thisEvent->writer = NULL; // can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(uint64_t); // Size of data to read. + thisEvent->vptr=calloc(1, thisEvent->vptrSize); // Space to read it. + thisEvent->variant=-1; // Not applicable (DUMMY) + thisEvent->subvariant=scan->subvariant; // subvariant is sensor. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + //(rsmi_dev_power_cap_get, (uint32_t dv_ind, uint32_t sensor_ind, uint64_t *cap)); + //(rsmi_dev_power_cap_set, (uint32_t dv_ind, uint32_t sensor_ind, uint64_t cap)); + // We worry about the gets first and count the ones set. Then if search for + // the sets, and back-fill thisEvent->writer; for matching subvariants. We ignore + // any 'sets' without matching 'gets', but allow 'gets' without 'sets'. Note we also + // fix up the description. + scan = NULL; + subvariants=0; + while (1) { // No variants, just subvariants. + scan = nextEvent(scan, device, "rsmi_dev_power_cap_get"); // Get the next, if any. + if (scan == NULL) break; // Exit if done. + subvariants++; // count the number found. + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "power_cap:device=%i:sensor=%i", device, scan->subvariant); + strcpy(thisEvent->desc, "Power cap in microwatts. Read Only. Between min/max (see power_cap_range_min/max). May require root privilege."); + thisEvent->reader = &er_power_cap; + thisEvent->writer = NULL; // Presume read only. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(uint64_t); // Size of data to read. + thisEvent->vptr=calloc(1, thisEvent->vptrSize); // Space to read it. + thisEvent->variant=-1; // Not applicable (DUMMY) + thisEvent->subvariant=scan->subvariant; // subvariant is sensor. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + // This must immediately follow rsmi_dev_power_cap_get. + // Deal with (rsmi_dev_power_cap_set, (uint32_t dv_ind, uint32_t sensor_ind, uint64_t cap)); + scan = NULL; + while (1) { // No variants, just subvariants. + scan = nextEvent(scan, device, "rsmi_dev_power_cap_set"); // Get the next, if any. + if (scan == NULL) break; // Exit if done. + for (i=0; isubvariant) { // If we found the matching read, + AllEvents[TotalEvents-1-i].writer = &ew_power_cap; // Allow writing. + strcpy(AllEvents[TotalEvents-1-i].desc, "Power cap in microwatts. Read/Write. Between min/max (see power_cap_range_min/max). May require root privilege."); + } + } + } + + + // -------------- BEGIN BASE EVENT ----------------- + // Needs to be two events; max and min. + //(rsmi_dev_power_cap_range_get, (uint32_t dv_ind, uint32_t sensor_ind, uint64_t *max, uint64_t *min)); + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_power_cap_range_get"); + if (scan != NULL) { + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "power_cap_range_min:device=%i:sensor=%i", device, scan->subvariant); + strcpy(thisEvent->desc, "Power cap Minimum settable value, in microwatts."); + thisEvent->reader = &er_power_cap_range_min; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(uint64_t); // Size of data to read. + thisEvent->vptr=calloc(2, thisEvent->vptrSize); // Space to read both [min,max] (we reverse the order vs arguments in this array). + thisEvent->variant=-1; // Not applicable (DUMMY) + thisEvent->subvariant=scan->subvariant; // subvariant is sensor. + BaseEvent = TotalEvents; // Remember this as the base event. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + + if (TotalEvents > BaseEvent) { // If the base did not succeed, do not add the dependent. + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "power_cap_range_max:device=%i:sensor=%i", device, scan->subvariant); + strcpy(thisEvent->desc, "Power cap Maximum settable value, in microwatts."); + thisEvent->reader = &er_power_cap_range_max; // Will call previous, this routine just copies it. + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = BaseEvent; // NOT SELF, combined read with previous event(s). + thisEvent->vptrSize=0; // Shares data with base event. + thisEvent->vptr=NULL; // No space here. + thisEvent->variant=-1; // Not applicable (DUMMY) + thisEvent->subvariant=scan->subvariant; // subvariant is sensor. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + // -------------- END BASE EVENT ----------------- + } + } + + // rsmi_temperature_metric_t is an enum with 14 settings; each will be a separate event. + //(rsmi_dev_temp_metric_get, (uint32_t dv_ind, uint32_t sensor_ind, rsmi_temperature_metric_t metric, int64_t *temperature)); + // This involves both variants and subvariants. + // We will have a single loop with a switch to pick the variants, + // and the subvariants (being different) will take care of themselves. + // We sorted the list, it should be in order by variant:subvariant. + scan = NULL; + while (1) { // No variants, just subvariants. + scan = nextEvent(scan, device, "rsmi_dev_temp_metric_get"); // Get the next, if any. + if (scan == NULL) break; // Exit if done. + + // Common elements. + int found=1; // Presume variant will be found. + thisEvent = &AllEvents[TotalEvents]; + thisEvent->writer = NULL; // can't be written. + thisEvent->reader = &er_temp; // read routine. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(int64_t); // Size of data to read. + thisEvent->vptr=calloc(1, thisEvent->vptrSize); // Space to read it. + thisEvent->variant=scan->variant; // Same as case we are in. + thisEvent->subvariant=scan->subvariant; // subvariant is sensor. + + switch(scan->variant) { + case RSMI_TEMP_CURRENT: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "temp_current:device=%i:sensor=%i", device, scan->subvariant); + strcpy(thisEvent->desc, "Temperature current value, millidegrees Celsius."); + break; // END CASE. + + case RSMI_TEMP_MAX: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "temp_max:device=%i:sensor=%i", device, scan->subvariant); + strcpy(thisEvent->desc, "Temperature maximum value, millidegrees Celsius."); + break; // END CASE. + + case RSMI_TEMP_MIN: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "temp_min:device=%i:sensor=%i", device, scan->subvariant); + strcpy(thisEvent->desc, "Temperature minimum value, millidegrees Celsius."); + break; // END CASE. + + case RSMI_TEMP_MAX_HYST: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "temp_max_hyst:device=%i:sensor=%i", device, scan->subvariant); + strcpy(thisEvent->desc, "Temperature hysteresis value for max limit, millidegrees Celsius."); + break; // END CASE. + + case RSMI_TEMP_MIN_HYST: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "temp_min_hyst:device=%i:sensor=%i", device, scan->subvariant); + strcpy(thisEvent->desc, "Temperature hysteresis value for min limit, millidegrees Celsius."); + break; // END CASE. + + case RSMI_TEMP_CRITICAL: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "temp_critical:device=%i:sensor=%i", device, scan->subvariant); + strcpy(thisEvent->desc, "Temperature critical max value, typically > temp_max, millidegrees Celsius."); + break; // END CASE. + + case RSMI_TEMP_CRITICAL_HYST: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "temp_critical_hyst:device=%i:sensor=%i", device, scan->subvariant); + strcpy(thisEvent->desc, "Temperature hysteresis value for critical limit, millidegrees Celsius."); + break; // END CASE. + + case RSMI_TEMP_EMERGENCY: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "temp_emergency:device=%i:sensor=%i", device, scan->subvariant); + strcpy(thisEvent->desc, "Temperature emergency max for chips supporting more than two upper temp limits, millidegrees Celsius."); + break; // END CASE. + + case RSMI_TEMP_EMERGENCY_HYST: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "temp_emergency_hyst:device=%i:sensor=%i", device, scan->subvariant); + strcpy(thisEvent->desc, "Temperature hysteresis value for emergency limit, millidegrees Celsius."); + break; // END CASE. + + case RSMI_TEMP_CRIT_MIN: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "temp_crit_min:device=%i:sensor=%i", device, scan->subvariant); + strcpy(thisEvent->desc, "Temperature critical min value; typical < temp_min, millidegrees Celsius."); + break; // END CASE. + + case RSMI_TEMP_CRIT_MIN_HYST: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "temp_crit_min_hyst:device=%i:sensor=%i", device, scan->subvariant); + strcpy(thisEvent->desc, "Temperature hysteresis value for critical min limit, millidegrees Celsius."); + break; // END CASE. + + case RSMI_TEMP_OFFSET: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "temp_offset:device=%i:sensor=%i", device, scan->subvariant); + strcpy(thisEvent->desc, "Temperature offset added to temp reading by the chip, millidegrees Celsius."); + break; // END CASE. + + case RSMI_TEMP_LOWEST: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "temp_lowest:device=%i:sensor=%i", device, scan->subvariant); + strcpy(thisEvent->desc, "Temperature historical minimum, millidegrees Celsius."); + break; // END CASE. + + case RSMI_TEMP_HIGHEST: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "temp_highest:device=%i:sensor=%i", device, scan->subvariant); + strcpy(thisEvent->desc, "Temperature historical maximum, millidegrees Celsius."); + break; // END CASE. + + default: // If we did not recognize it, kill stuff. + thisEvent->device= 0; + thisEvent->reader = NULL; + thisEvent->baseIdx = 0; + thisEvent->vptrSize = 0; + free(thisEvent->vptr); + thisEvent->vptr = NULL; + thisEvent->variant = 0; + thisEvent->subvariant = 0; + found = 0; // indicate not found. + break; + } // END switch on variant. + + if (found) { + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + } // END while for rsmi_dev_temp_metric_get. + + // rsmi_dev_firmware_version_get is an enum with 21 settings; each will be a separate event. + //(rsmi_dev_temp_metric_get, (uint32_t dv_ind, uint32_t block_Id, uint64_t *version)); + // This involves only variants. + // We will have a single loop with a switch to pick the variants. + // We sorted the list, it should be in order by variant. + scan = NULL; + while (1) { // No variants, just subvariants. + scan = nextEvent(scan, device, "rsmi_dev_firmware_version_get"); // Get the next, if any. + if (scan == NULL) break; // Exit if done. + + // Common elements. + int found=1; // Presume variant will be found. + thisEvent = &AllEvents[TotalEvents]; + thisEvent->writer = NULL; // can't be written. + thisEvent->reader = &er_firmware_version; // read routine. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(int64_t); // Size of data to read. + thisEvent->vptr=calloc(1, thisEvent->vptrSize); // Space to read it. + thisEvent->variant=scan->variant; // Same as case we are in. + thisEvent->subvariant=scan->subvariant; // subvariant is sensor. + + switch(scan->variant) { + case RSMI_FW_BLOCK_ASD: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "firmware_version:device=%i:block=ASD", device); + strcpy(thisEvent->desc, "Firmware Version Block ASD."); + break; // END CASE. + + case RSMI_FW_BLOCK_CE: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "firmware_version:device=%i:block=CE", device); + strcpy(thisEvent->desc, "Firmware Version Block CE."); + break; // END CASE. + + case RSMI_FW_BLOCK_DMCU: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "firmware_version:device=%i:block=DMCU", device); + strcpy(thisEvent->desc, "Firmware Version Block DMCU."); + break; // END CASE. + + case RSMI_FW_BLOCK_MC: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "firmware_version:device=%i:block=MC", device); + strcpy(thisEvent->desc, "Firmware Version Block MC."); + break; // END CASE. + + case RSMI_FW_BLOCK_ME: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "firmware_version:device=%i:block=ME", device); + strcpy(thisEvent->desc, "Firmware Version Block ME."); + break; // END CASE. + + case RSMI_FW_BLOCK_MEC: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "firmware_version:device=%i:block=MEC", device); + strcpy(thisEvent->desc, "Firmware Version Block MEC."); + break; // END CASE. + + case RSMI_FW_BLOCK_MEC2: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "firmware_version:device=%i:block=MEC2", device); + strcpy(thisEvent->desc, "Firmware Version Block MEC2."); + break; // END CASE. + + case RSMI_FW_BLOCK_PFP: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "firmware_version:device=%i:block=PFP", device); + strcpy(thisEvent->desc, "Firmware Version Block PFP."); + break; // END CASE. + + case RSMI_FW_BLOCK_RLC: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "firmware_version:device=%i:block=RLC", device); + strcpy(thisEvent->desc, "Firmware Version Block RLC."); + break; // END CASE. + + case RSMI_FW_BLOCK_RLC_SRLC: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "firmware_version:device=%i:block=SRLC", device); + strcpy(thisEvent->desc, "Firmware Version Block SRLC."); + break; // END CASE. + + case RSMI_FW_BLOCK_RLC_SRLG: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "firmware_version:device=%i:block=SRLG", device); + strcpy(thisEvent->desc, "Firmware Version Block SRLG."); + break; // END CASE. + + case RSMI_FW_BLOCK_RLC_SRLS: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "firmware_version:device=%i:block=SRLS", device); + strcpy(thisEvent->desc, "Firmware Version Block SRLS."); + break; // END CASE. + + case RSMI_FW_BLOCK_SDMA: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "firmware_version:device=%i:block=SDMA", device); + strcpy(thisEvent->desc, "Firmware Version Block SDMA."); + break; // END CASE. + + case RSMI_FW_BLOCK_SDMA2: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "firmware_version:device=%i:block=SDMA2", device); + strcpy(thisEvent->desc, "Firmware Version Block SDMA2."); + break; // END CASE. + + case RSMI_FW_BLOCK_SMC: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "firmware_version:device=%i:block=SMC", device); + strcpy(thisEvent->desc, "Firmware Version Block SMC."); + break; // END CASE. + + case RSMI_FW_BLOCK_SOS: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "firmware_version:device=%i:block=SOS", device); + strcpy(thisEvent->desc, "Firmware Version Block SOS."); + break; // END CASE. + + case RSMI_FW_BLOCK_TA_RAS: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "firmware_version:device=%i:block=RAS", device); + strcpy(thisEvent->desc, "Firmware Version Block RAS."); + break; // END CASE. + + case RSMI_FW_BLOCK_TA_XGMI: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "firmware_version:device=%i:block=XGMI", device); + strcpy(thisEvent->desc, "Firmware Version Block XGMI."); + break; // END CASE. + + case RSMI_FW_BLOCK_UVD: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "firmware_version:device=%i:block=UVD", device); + strcpy(thisEvent->desc, "Firmware Version Block UVD."); + break; // END CASE. + + case RSMI_FW_BLOCK_VCE: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "firmware_version:device=%i:block=VCE", device); + strcpy(thisEvent->desc, "Firmware Version Block VCE."); + break; // END CASE. + + case RSMI_FW_BLOCK_VCN: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "firmware_version:device=%i:block=VCN", device); + strcpy(thisEvent->desc, "Firmware Version Block VCN."); + break; // END CASE. + + default: // If we did not recognize it, kill stuff. + thisEvent->device= 0; + thisEvent->reader = NULL; + thisEvent->baseIdx = 0; + thisEvent->vptrSize = 0; + free(thisEvent->vptr); + thisEvent->vptr = NULL; + thisEvent->variant = 0; + thisEvent->subvariant = 0; + found = 0; // indicate not found. + break; + } // end switch + + if (found) { + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + } // end while. + + // rsmi_dev_ecc_count_get uses an enum with 14 settings; then each is a base event for + // correctable and uncorrectable errors. + // We will have a single loop with a switch to pick the variants. + // We sorted the list, it should be in order by variant. + scan = NULL; + while (1) { // No variants, just subvariants. + scan = nextEvent(scan, device, "rsmi_dev_ecc_count_get"); // Get the next, if any. + if (scan == NULL) break; // Exit if done. + + // Common elements. + int found=1; // Presume variant will be found. + char blockName[16] = ""; // Block name found. + thisEvent = &AllEvents[TotalEvents]; + thisEvent->writer = NULL; // can't be written. + thisEvent->reader = &er_ecc_count_correctable; // read routine. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(rsmi_error_count_t); // Size of data to read. + thisEvent->vptr=calloc(1, thisEvent->vptrSize); // Space to read it. + thisEvent->variant=scan->variant; // Same as case we are in. + thisEvent->subvariant=scan->subvariant; // subvariant is gpu block type (bit mask). + BaseEvent = TotalEvents; // Make the first a base event. + + switch(scan->variant) { + case RSMI_GPU_BLOCK_UMC: + strncpy(blockName, "UMC", 15); + break; + + case RSMI_GPU_BLOCK_SDMA: + strncpy(blockName, "SDMA", 15); + break; + + case RSMI_GPU_BLOCK_GFX: + strncpy(blockName, "GFX", 15); + break; + + case RSMI_GPU_BLOCK_MMHUB: + strncpy(blockName, "MMUB", 15); + break; + + case RSMI_GPU_BLOCK_ATHUB: + strncpy(blockName, "ATHUB", 15); + break; + + case RSMI_GPU_BLOCK_PCIE_BIF: + strncpy(blockName, "PCIE_BIF", 15); + break; + + case RSMI_GPU_BLOCK_HDP: + strncpy(blockName, "HDP", 15); + break; + + case RSMI_GPU_BLOCK_XGMI_WAFL: + strncpy(blockName, "XGMI_WAFL", 15); + break; + + case RSMI_GPU_BLOCK_DF: + strncpy(blockName, "DF", 15); + break; + + case RSMI_GPU_BLOCK_SMN: + strncpy(blockName, "SMN", 15); + break; + + case RSMI_GPU_BLOCK_SEM: + strncpy(blockName, "SEM", 15); + break; + + case RSMI_GPU_BLOCK_MP0: + strncpy(blockName, "MP0", 15); + break; + + case RSMI_GPU_BLOCK_MP1: + strncpy(blockName, "MP1", 15); + break; + + case RSMI_GPU_BLOCK_FUSE: + strncpy(blockName, "FUSE", 15); + break; + + + default: // If we did not recognize it, kill stuff. + thisEvent->device= 0; + thisEvent->reader = NULL; + thisEvent->baseIdx = 0; + thisEvent->vptrSize = 0; + free(thisEvent->vptr); + thisEvent->vptr = NULL; + thisEvent->variant = 0; + thisEvent->subvariant = 0; + found = 0; // indicate not found. + break; + } // end switch + + if (found) { + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "ecc_count_correctable:device=%i:block=%s", device, blockName); + snprintf(thisEvent->desc, PAPI_MAX_STR_LEN-1, "Correctable error count for the GPU Block %s.", blockName); + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "ecc_count_uncorrectable:device=%i:block=%s", device, blockName); + snprintf(thisEvent->desc, PAPI_MAX_STR_LEN-1, "Uncorrectable error count for the GPU Block %s.", blockName); + thisEvent->reader = &er_ecc_count_uncorrectable; // Will call previous, this routine just copies it. + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = BaseEvent; // NOT SELF, combined read with previous event(s). + thisEvent->vptrSize=0; // Shares data with base event. + thisEvent->vptr=NULL; // No space here. + thisEvent->variant=-1; // Not applicable (DUMMY) + thisEvent->subvariant=scan->subvariant; // subvariant is sensor. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + } // end while. + + //(rsmi_dev_ecc_enabled_get, (uint32_t dv_ind, uint64_t *enabled_blocks)); + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_ecc_enabled_get"); + if (scan != NULL) { + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "ecc_enabled_get:device=%i", device); + strcpy(thisEvent->desc, "Bit mask of gpu blocks with ecc error counting enabled."); + thisEvent->reader = &er_ecc_enabled; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(uint64_t); // Memory for read. + thisEvent->vptr=calloc(1, thisEvent->vptrSize); + thisEvent->variant=-1; // Not applicable. + thisEvent->subvariant=-1; // Not applicable. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + // rsmi_dev_ecc_status_get uses an enum with 14 settings; each will be a separate event. + // (rsmi_dev_ecc_status_get(uint32_t dv_ind, rsmi_gpu_block_t block, rsmi_ras_err_state_t ∗ state) + // We will have a single loop with a switch to pick the variants. + // We sorted the list, it should be in order by variant. + scan = NULL; + while (1) { // No variants, just subvariants. + scan = nextEvent(scan, device, "rsmi_dev_ecc_status_get"); // Get the next, if any. + if (scan == NULL) break; // Exit if done. + + // Common elements. + int found=1; // Presume variant will be found. + thisEvent = &AllEvents[TotalEvents]; + thisEvent->writer = NULL; // can't be written. + thisEvent->reader = &er_ecc_status; // read routine. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=sizeof(rsmi_ras_err_state_t); // Size of data to read. + thisEvent->vptr=calloc(1, thisEvent->vptrSize); // Space to read it. + thisEvent->variant=scan->variant; // Same as case we are in. + thisEvent->subvariant=scan->subvariant; // subvariant is gpu block type (bit mask). + + switch(scan->variant) { + case RSMI_GPU_BLOCK_UMC: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "ecc_status:device=%i:block=UMC", device); + strcpy(thisEvent->desc, "ECC Error Status for the GPU Block UMC."); + break; // END CASE. + + case RSMI_GPU_BLOCK_SDMA: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "ecc_status:device=%i:block=SDMA", device); + strcpy(thisEvent->desc, "ECC Error Status for the GPU Block SDMA."); + break; // END CASE. + + case RSMI_GPU_BLOCK_GFX: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "ecc_status:device=%i:block=GFX", device); + strcpy(thisEvent->desc, "ECC Error Status for the GPU Block GFX."); + break; // END CASE. + + case RSMI_GPU_BLOCK_MMHUB: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "ecc_status:device=%i:block=MMHUB", device); + strcpy(thisEvent->desc, "ECC Error Status for the GPU Block MMHUB."); + break; // END CASE. + + case RSMI_GPU_BLOCK_ATHUB: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "ecc_status:device=%i:block=ATHUB", device); + strcpy(thisEvent->desc, "ECC Error Status for the GPU Block ATHUB."); + break; // END CASE. + + case RSMI_GPU_BLOCK_PCIE_BIF: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "ecc_status:device=%i:block=PCIE_BIF", device); + strcpy(thisEvent->desc, "ECC Error Status for the GPU Block PCIE_BIF."); + break; // END CASE. + + case RSMI_GPU_BLOCK_HDP: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "ecc_status:device=%i:block=HDP", device); + strcpy(thisEvent->desc, "ECC Error Status for the GPU Block HDP."); + break; // END CASE. + + case RSMI_GPU_BLOCK_XGMI_WAFL: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "ecc_status:device=%i:block=XGMI_WAFL", device); + strcpy(thisEvent->desc, "ECC Error Status for the GPU Block XGMI_WAFL."); + break; // END CASE. + + case RSMI_GPU_BLOCK_DF: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "ecc_status:device=%i:block=DF", device); + strcpy(thisEvent->desc, "ECC Error Status for the GPU Block DF."); + break; // END CASE. + + case RSMI_GPU_BLOCK_SMN: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "ecc_status:device=%i:block=SMN", device); + strcpy(thisEvent->desc, "ECC Error Status for the GPU Block SMN."); + break; // END CASE. + + case RSMI_GPU_BLOCK_SEM: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "ecc_status:device=%i:block=SEM", device); + strcpy(thisEvent->desc, "ECC Error Status for the GPU Block SEM."); + break; // END CASE. + + case RSMI_GPU_BLOCK_MP0: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "ecc_status:device=%i:block=MP0", device); + strcpy(thisEvent->desc, "ECC Error Status for the GPU Block MP0."); + break; // END CASE. + + case RSMI_GPU_BLOCK_MP1: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "ecc_status:device=%i:block=MP1", device); + strcpy(thisEvent->desc, "ECC Error Status for the GPU Block MP1."); + break; // END CASE. + + case RSMI_GPU_BLOCK_FUSE: + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "ecc_status:device=%i:block=FUSE", device); + strcpy(thisEvent->desc, "ECC Error Status for the GPU Block FUSE."); + break; // END CASE. + + + default: // If we did not recognize it, kill stuff. + thisEvent->device= 0; + thisEvent->reader = NULL; + thisEvent->baseIdx = 0; + thisEvent->vptrSize = 0; + free(thisEvent->vptr); + thisEvent->vptr = NULL; + thisEvent->variant = 0; + thisEvent->subvariant = 0; + found = 0; // indicate not found. + break; + } // end switch + + if (found) { + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + } // end while. + + // rsmi_dev_gpu_clk_freq_get, has five variants. + // rsmi_dev_gpu_clk_freq_get(device, rsmi_clk_type_t type, *rsmi_frequencies_t frequencies): + // We will have a single loop with a switch to pick the variants. + // Note each one of these may turn into several events. + scan = NULL; + while (1) { // No variants, just subvariants. + scan = nextEvent(scan, device, "rsmi_dev_gpu_clk_freq_get"); // Get the next, if any. + if (scan == NULL) break; // Exit if done. + if (scan->variant < 0 || scan->variant>=freqTablePerDevice) continue; // skip if variant illegal. + int idx = device*freqTablePerDevice+scan->variant; // Index into frequency table. + + // The Count of frequencies for this variant. + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "gpu_clk_freq_%s:device=%i:count", gpuClkVariantName[scan->variant], device); + strcpy(thisEvent->desc, "Number of frequencies available."); + thisEvent->reader = NULL; // No reader is needed. + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=0; // Not needed, tables are read. + thisEvent->vptr=NULL; // Not needed. + thisEvent->value=FreqTable[idx].num_supported; // Value it will always be. + thisEvent->variant=scan->variant; // The type of frequency. + thisEvent->subvariant=-1; // subvariant doesn't matter. + BaseEvent = TotalEvents; // Remember this as the base event. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + + // The Current frequency for this variant. + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "gpu_clk_freq_%s:device=%i:current", gpuClkVariantName[scan->variant], device); + strcpy(thisEvent->desc, "Current operating frequency."); + thisEvent->reader = &er_gpu_clk_freq_current; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=0; // Not needed, tables are read. + thisEvent->vptr=NULL; // Not needed. + thisEvent->value=0; // Read at time of event. + thisEvent->variant=scan->variant; // The type of frequency. + thisEvent->subvariant=-1; // subvariant doesn't matter. + BaseEvent = TotalEvents; // Remember this as the base event. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + + // An event per frequency. + for (ui=0; uiname, PAPI_MAX_STR_LEN-1, "gpu_clk_freq_%s:device=%i:idx=%u", gpuClkVariantName[scan->variant], device, ui); + snprintf(thisEvent->desc, PAPI_MAX_STR_LEN-1, "Returns %s frequency value from supported_table[%u].", gpuClkVariantName[scan->variant], ui); + thisEvent->reader = &er_gpu_clk_freq_table; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=0; // Not needed, tables are read. + thisEvent->vptr=NULL; // Not needed. + thisEvent->value=0; // Read at time of event. + thisEvent->variant=scan->variant; // The type of frequency. + thisEvent->subvariant=ui; // subvariant stores the index value. + BaseEvent = TotalEvents; // Remember this as the base event. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + } // end while. + + // rsmi_dev_gpu_clk_freq_set, has five variants. + // rsmi_dev_gpu_clk_freq_set(device, rsmi_clk_type_t type, uint64_t bitmask): + // We will have a single loop with a switch to pick the variants. + scan = NULL; + while (1) { // No variants, just subvariants. + scan = nextEvent(scan, device, "rsmi_dev_gpu_clk_freq_set"); // Get the next, if any. + if (scan == NULL) break; // Exit if done. + if (scan->variant < 0 || scan->variant>=freqTablePerDevice) continue; // skip if variant illegal. + int idx = device*freqTablePerDevice+scan->variant; // Index into frequency table. + + // The Count of frequencies for this variant. + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "gpu_clk_freq_%s:device=%i:mask", gpuClkVariantName[scan->variant], device); + snprintf(thisEvent->desc, PAPI_MAX_STR_LEN-1, "Write Only. Sets bitmask, 1's for %s frequency values in support table permitted. All 0 mask prohibited.", gpuClkVariantName[scan->variant]); + thisEvent->reader = NULL; // No reader is needed. + thisEvent->writer = &ew_gpu_clk_freq_mask; // Write the mask. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=0; // Not needed, tables are read. + thisEvent->vptr=NULL; // Not needed. + thisEvent->value=FreqTable[idx].num_supported; // Value it will always be. + thisEvent->variant=scan->variant; // The type of frequency. + thisEvent->subvariant=-1; // subvariant doesn't matter. + BaseEvent = TotalEvents; // Remember this as the base event. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } // END while variants. + + // rsmi_dev_pci_bandwidth_get, has no variants. + // rsmi_dev_pci_bandwidth_get ( uint32_t dv_ind, rsmi_pcie_bandwidth_t ∗ bandwidth ) + // The rsmi_pcie_bandwidth_t is smi_frequencies_t transfer_rate + Lanes[] array): + // We will have a single loop with a switch to pick the variants. + // Note this turns into many events. + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_pci_bandwidth_get"); // Get the next, if any. + if (scan != NULL) { + + // The Count of frequencies for this variant. + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "pci_bandwidth_rate:device=%i:count", device); + strcpy(thisEvent->desc, "Number of PCI transfer rates available."); + thisEvent->reader = NULL; // No reader is needed. + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=0; // Not needed, tables are read. + thisEvent->vptr=NULL; // Not needed. + thisEvent->value=PCITable[device].transfer_rate.num_supported; // Value it will always be. + thisEvent->variant=-1; // Not used. + thisEvent->subvariant=-1; // Not used. + BaseEvent = TotalEvents; // Remember this as the base event. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + + // The Current frequency for this variant. + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "pci_bandwidth_rate:device=%i:current", device); + strcpy(thisEvent->desc, "Current PCI transfer rate."); + thisEvent->reader = &er_pci_bandwidth_rate_current; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=0; // Not needed, tables are read. + thisEvent->vptr=NULL; // Not needed. + thisEvent->value=0; // Read at time of event. + thisEvent->variant=-1; // Not used. + thisEvent->subvariant=-1; // Not used. + BaseEvent = TotalEvents; // Remember this as the base event. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + + // Two events per rate, the rate, and the lanes. + for (ui=0; uiname, PAPI_MAX_STR_LEN-1, "pci_bandwidth_rate:device=%i:rate_idx=%u", device, ui); + snprintf(thisEvent->desc, PAPI_MAX_STR_LEN-1, "Returns PCI bandwidth rate value from supported_table[%u].", ui); + thisEvent->reader = &er_pci_bandwidth_rate_table; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=0; // Not needed, tables are read. + thisEvent->vptr=NULL; // Not needed. + thisEvent->value=0; // Read at time of event. + thisEvent->variant=-1; // Not used. + thisEvent->subvariant=ui; // subvariant stores the index value. + BaseEvent = TotalEvents; // Remember this as the base event. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "pci_bandwidth_rate:device=%i:lane_idx=%u", device, ui); + snprintf(thisEvent->desc, PAPI_MAX_STR_LEN-1, "Returns PCI bandwidth rate corresponding lane count from supported_table[%u].", ui); + thisEvent->reader = &er_pci_bandwidth_lane_table; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=0; // Not needed, tables are read. + thisEvent->vptr=NULL; // Not needed. + thisEvent->value=0; // Read at time of event. + thisEvent->variant=-1; // Not used. + thisEvent->subvariant=ui; // subvariant stores the index value. + BaseEvent = TotalEvents; // Remember this as the base event. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + } // end if we had pci_bandwidth. + + // rsmi_dev_pci_bandwidth_set, has no variants. + // rsmi_dev_pci_bandwidth_set ( uint32_t dv_ind, uint64_t bitmask ) + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_pci_bandwidth_set"); // Get the next, if any. + if (scan != NULL) { + + // The Count of frequencies for this variant. + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "pci_bandwidth_rate:device=%i:count", device); + strcpy(thisEvent->desc, "Number of PCI transfer rates available."); + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "pci_bandwidth_rate:device=%i:mask", device); + snprintf(thisEvent->desc, PAPI_MAX_STR_LEN-1, "Write Only. Sets bitmask, 1's for pci transfer rates in support table permitted. All 0 mask prohibited."); + thisEvent->reader = NULL; // No reader is needed. + thisEvent->writer = &ew_pci_bandwidth_mask; // Write Only. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=0; // Not needed, tables are read. + thisEvent->vptr=NULL; // Not needed. + thisEvent->value=-1; // Value to write. + thisEvent->variant=-1; // Not used. + thisEvent->subvariant=-1; // Not used. + BaseEvent = TotalEvents; // Remember this as the base event. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } // end write pci bandwidth mask. + + //------------------------------------------------------------------------- + // The following are string routines, returning a character pointer. + //------------------------------------------------------------------------- + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_brand_get"); + if (scan != NULL) { + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "device_brand:device=%i", device); + strcpy(thisEvent->desc, "Returns char* to z-terminated brand string; do not free()."); + thisEvent->reader = &er_brand; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=(PAPI_MAX_STR_LEN); // Memory for read. + thisEvent->vptr=calloc(thisEvent->vptrSize, sizeof(char)); + thisEvent->variant=-1; // Not applicable. + thisEvent->subvariant=-1; // Not applicable. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_name_get"); + if (scan != NULL) { + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "device_name:device=%i", device); + strcpy(thisEvent->desc, "Returns char* to z-terminated name string; do not free()."); + thisEvent->reader = &er_name; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=(PAPI_MAX_STR_LEN); // Memory for read. + thisEvent->vptr=calloc(thisEvent->vptrSize, sizeof(char)); + thisEvent->variant=-1; // Not applicable. + thisEvent->subvariant=-1; // Not applicable. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_serial_number_get"); + if (scan != NULL) { + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "device_serial_number:device=%i", device); + strcpy(thisEvent->desc, "Returns char* to z-terminated serial number string; do not free()."); + thisEvent->reader = &er_serial_number; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=(PAPI_MAX_STR_LEN); // Memory for read. + thisEvent->vptr=calloc(thisEvent->vptrSize, sizeof(char)); + thisEvent->variant=-1; // Not applicable. + thisEvent->subvariant=-1; // Not applicable. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_subsystem_name_get"); + if (scan != NULL) { + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "device_subsystem_name:device=%i", device); + strcpy(thisEvent->desc, "Returns char* to z-terminated subsystem name string; do not free()."); + thisEvent->reader = &er_subsystem_name; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=(PAPI_MAX_STR_LEN); // Memory for read. + thisEvent->vptr=calloc(thisEvent->vptrSize, sizeof(char)); + thisEvent->variant=-1; // Not applicable. + thisEvent->subvariant=-1; // Not applicable. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_vbios_version_get"); + if (scan != NULL) { + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "vbios_version:device=%i", device); + strcpy(thisEvent->desc, "Returns char* to z-terminated vbios version string; do not free()."); + thisEvent->reader = &er_vbios_version; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=(PAPI_MAX_STR_LEN); // Memory for read. + thisEvent->vptr=calloc(thisEvent->vptrSize, sizeof(char)); + thisEvent->variant=-1; // Not applicable. + thisEvent->subvariant=-1; // Not applicable. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + + scan = NULL; + scan = nextEvent(scan, device, "rsmi_dev_vendor_name_get"); + if (scan != NULL) { + thisEvent = &AllEvents[TotalEvents]; + snprintf(thisEvent->name, PAPI_MAX_STR_LEN-1, "vendor_name:device=%i", device); + strcpy(thisEvent->desc, "Returns char* to z-terminated vendor name string; do not free()."); + thisEvent->reader = &er_vendor_name; + thisEvent->writer = NULL; // Can't be written. + thisEvent->device=device; + thisEvent->baseIdx = TotalEvents; // Self. + thisEvent->vptrSize=(PAPI_MAX_STR_LEN); // Memory for read. + thisEvent->vptr=calloc(thisEvent->vptrSize, sizeof(char)); + thisEvent->variant=-1; // Not applicable. + thisEvent->subvariant=-1; // Not applicable. + TotalEvents++; // Count it. + MakeRoomAllEvents(); // Make room for another. + } + } // end for each device. + + // Build arrays for current indices and values. + CurrentIdx = calloc(TotalEvents, sizeof(int)); + CurrentValue = calloc(TotalEvents, sizeof(long long)); + + /* return 0 if everything went OK */ + return 0; +} // END ROUTINE _rocm_smi_add_native_events. + + +/***************************************************************************** + ******************* BEGIN PAPI's COMPONENT REQUIRED FUNCTIONS ************* + *****************************************************************************/ + +/* + * This is called whenever a thread is initialized. + */ +static int _rocm_smi_init_thread(hwd_context_t * ctx) +{ + SUBDBG("Entering _rocm_smi_init_thread\n"); + + (void) ctx; + return PAPI_OK; +} // END ROUTINE. + + +// Link the library, set up event tables and function tables. This routine is +// called when the PAPI process is initialized (IE PAPI_library_init) + +static int _rocm_smi_init_component(int cidx) +{ + int i, ret; + (void) i; + uint32_t dev; + scanEvent_info_t* scan=NULL; // a scan event pointer. + SUBDBG("Entering _rocm_smi_init_component\n"); + + /* link in all the rocm libraries and resolve the symbols we need to use */ + if(_rocm_smi_linkRocmLibraries() != PAPI_OK) { + SUBDBG("Dynamic link of ROCM libraries failed, component will be disabled.\n"); + SUBDBG("See disable reason in papi_component_avail output for more details.\n"); + return (PAPI_ENOSUPP); + } + + RSMI(rsmi_init, (0),return(PAPI_ENOSUPP)); + + ret = _rocm_smi_find_devices(); // Find AMD devices. Must find at least 1. + if (ret != PAPI_OK) return(ret); // check for failure. + + // Before we can build the list of all potential events, + // we have to scan the events available to determine + // how many variants & sensors we need to process when + // we get to the build for each type of event. There is + // no other way to query this information. + // Note that some events (like the temperatures) have a + // fixed number of variants. + + // Note that scanEvents will sort the events by device, name, variant, subvariant. + scanEvents(); // Collect supportedEvents[]. + + // DEALING WITH rsmi_dev_gpu_clk_freq_get/set. + // There are five types of clock, and each has a set of frequencies we can retrieve. + // rsmi_dev_gpu_clk_freq_get(device, clock_type, *rsmi_frequencies_t frequencies): + // clock_types: + // RSMI_CLK_TYPE_SYS System clock. + // RSMI_CLK_TYPE_DF Data Fabric clock (for ASICs running on a separate clock) + // RSMI_CLK_TYPE_DCEF Display Controller Engine clock. + // RSMI_CLK_TYPE_SOC SOC clock. + // RSMI_CLK_TYPE_MEM Memory clock. + // The rsmi_frequencies_t structure contains: + // uint32_t num_supported // The count of valid entries in array. + // uint32_t current // the INDICE of the current frequency. + // uint64_t frequency [RSMI_MAX_NUM_FREQUENCIES] // ==32 at this writing. + // In order to support these functions, we need to know up front the num_supported. + // So we read these structures here, if each type is scanned. Note if one is missing, + // the num_supported will remain zero, from the calloc below. + + FreqTable = calloc(TotalDevices*freqTablePerDevice, sizeof(rsmi_frequencies)); + for (dev=0; devvariant<0 || scan->variant>=freqTablePerDevice) // Out of range? + continue; // Y. Skip if variant unrecognized. + int idx = dev*freqTablePerDevice+scan->variant; // idx into FreqTable. + RSMI(rsmi_dev_gpu_clk_freq_get, (dev, scan->variant, &FreqTable[idx]),); + } + } + + // Getting data needed to detail rsmi_dev_pci_bandwidth_get. + PCITable = calloc(TotalDevices, sizeof(rsmi_pcie_bandwidth_t)); + for (dev=0; dev= ((unsigned int) TotalEvents)) return(PAPI_ENOEVNT); // Bad event code. + if (name == NULL || len < 2) return(PAPI_EINVAL); // Invalid arguments. + + strncpy(name, AllEvents[EventCode].name, len); + return (PAPI_OK); +} // END ROUTINE. + + +// Takes a native event code and passes back the event description +static int _rocm_smi_ntv_code_to_descr(unsigned int EventCode, char *desc, int len) +{ + if (EventCode >=((unsigned int) TotalEvents)) return(PAPI_EINVAL); + if (desc == NULL || len < 2) return(PAPI_EINVAL); + + strncpy(desc, AllEvents[EventCode].desc, len); + return (PAPI_OK); +} // END ROUTINE. + + +// Vector that points to entry points for the component +papi_vector_t _rocm_smi_vector = { + .cmp_info = { + // default component information (unspecified values are initialized to 0), + // see _rocm_smi_init_component for additional settings. + .name = "rocm_smi", + .short_name = "rocm_smi", + .version = "1.0", + .description = "AMD GPU System Management Interface via rocm_smi_lib", + .default_domain = PAPI_DOM_USER, + .default_granularity = PAPI_GRN_THR, + .available_granularities = PAPI_GRN_THR, + .hardware_intr_sig = PAPI_INT_SIGNAL, + // component specific cmp_info initializations + .fast_real_timer = 0, + .fast_virtual_timer = 0, + .attach = 0, + .attach_must_ptrace = 0, + .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL, + } + , + // sizes of framework-opaque component-private structures... + // these are all unused in this component. + .size = { + .context = 1, // sizeof( _rocm_smi_context_t ) + .control_state = 1, // sizeof( _rocm_smi_control_t ) + .reg_value = 1, // sizeof( _rocm_smi_register_t ) + .reg_alloc = 1, // sizeof( _rocm_smi_reg_alloc_t ) + } + , + // function pointers in this component + .start = _rocm_smi_start, // ( hwd_context_t * ctx, hwd_control_state_t * ctrl ) + .stop = _rocm_smi_stop, // ( hwd_context_t * ctx, hwd_control_state_t * ctrl ) + .read = _rocm_smi_read, // ( hwd_context_t * ctx, hwd_control_state_t * ctrl, long_long ** events, int flags ) + .write = _rocm_smi_write, // ( hwd_context_t * ctx, hwd_control_state_t * ctrl, long_long ** events ) + .reset = _rocm_smi_reset, // ( hwd_context_t * ctx, hwd_control_state_t * ctrl ) + .cleanup_eventset = _rocm_smi_cleanup_eventset, // ( hwd_control_state_t * ctrl ) + + .init_component = _rocm_smi_init_component, // ( int cidx ) + .init_thread = _rocm_smi_init_thread, // ( hwd_context_t * ctx ) + .init_control_state = _rocm_smi_init_control_state, // ( hwd_control_state_t * ctrl ) + .update_control_state = _rocm_smi_update_control_state, // ( hwd_control_state_t * ptr, NativeInfo_t * native, int count, hwd_context_t * ctx ) + + .ctl = _rocm_smi_ctrl, // ( hwd_context_t * ctx, int code, _papi_int_option_t * option ) + .set_domain = _rocm_smi_set_domain, // ( hwd_control_state_t * cntrl, int domain ) + .ntv_enum_events = _rocm_smi_ntv_enum_events, // ( unsigned int *EventCode, int modifier ) + .ntv_code_to_name = _rocm_smi_ntv_code_to_name, // ( unsigned int EventCode, char *name, int len ) + .ntv_code_to_descr = _rocm_smi_ntv_code_to_descr, // ( unsigned int EventCode, char *name, int len ) + .shutdown_thread = _rocm_smi_shutdown_thread, // ( hwd_context_t * ctx ) + .shutdown_component = _rocm_smi_shutdown_component, // ( void ) +}; + diff -Nru papi-5.7.0+dfsg/src/components/rocm_smi/README papi-6.0.0~dfsg/src/components/rocm_smi/README --- papi-5.7.0+dfsg/src/components/rocm_smi/README 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/rocm_smi/README 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,92 @@ +/** +* @file: README +* CVS: $Id$ +* @defgroup papi_components Components +* @brief Component Specific Readme file: ROCM_SMI +*/ + +/** @page component_readme Component Readme + +@section Component Specific Information + +rocm_smi/ + +Support for ROCM_SMI (System Management Interface) library. + +General information +------------------- + +The PAPI ROCM_SMI component allows the user to read things like the +temperature, fan speed, and power consumption of AMD GPU devices. +It can also be used to set limits on the power consumption or fan +speed, using a PAPI_write() interface. + +How to install PAPI with the ROCM_SMI component? +------------------------------------------------ + +ROCM_SMI is installed as part of the AMD ROCM install; so we require +the same environment variable as the ROCM component: PAPI_ROCM_ROOT. +An example is provided below, setting PAPI_ROCM_ROOT to its default +value: + +export PAPI_ROCM_ROOT=/opt/rocm + +For a standard installed system, this is the only environment variable +that needs to be set, for both compile and runtime. + +The above example works on ICL's Caffeine system. + +Within PAPI_ROCM_ROOT, we expect the following standard directories: +PAPI_ROCM_ROOT/rocm_smi/lib +PAPI_ROCM_ROOT/rocm_smi/include/rocm_smi + +One library is required for the PAPI ROCM component. The name is +librocm_smi64.so. + +(At this writing, testing was done with librocm_smi64.so.2.1.) + +After the exports shown above, PAPI must be configured and built. +When papi is installed, there will be a papi/src directory. Navigate +to that, and execute the following: + +> ./configure --with-components="rocm_smi" +> make + +If you are rebuilding PAPI, then before the configure step, execute +> make clobber + +TESTING the component is installed: Still from papi/src: +> utils/papi_component_avail + +If the component is functional, it will show that. Otherwise it will +report it is disabled, and provide a reason why. + +You can see what events are provided by a working component as +follows: +> utils/papi_native_avail | grep -i "rocm_smi:::" + + + +------------------------UNUSUAL INSTALLATIONS------------------------ + +System configurations can vary. Some systems use Spack, a package +manager, to automatically keep paths straight. Others (like our own +ICL Saturn System) require "module load" commands to provide some +services, e.g. 'module load rocm', and these may also set environment +variables and change the LD_LIBRARY_PATH search order. + +Users may require the help of sysadmin personnel to navigate these +facilities and gain access to the correct libraries. + +For the ROCM_SMI component to be operational, it must find the dynamic +library librocm_smi64.so. + +If it is not found (or is not functional) then the component will be +listed as "disabled" with a reason explaining the problem. If +libraries were not found, then they are not in the expected places. + +The component can be configured to look for each of these libraries in +a specific place, and using an alternate name if desired. Detailed +instructions are contained in the Rules.rocm_smi file. They are +technical, users may wish to enlist the help of a sysadmin. + diff -Nru papi-5.7.0+dfsg/src/components/rocm_smi/Rules.rocm_smi papi-6.0.0~dfsg/src/components/rocm_smi/Rules.rocm_smi --- papi-5.7.0+dfsg/src/components/rocm_smi/Rules.rocm_smi 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/rocm_smi/Rules.rocm_smi 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,110 @@ +# Set default if the root environment variable is not already set. +# Note PAPI_ROCM_ROOT is an environment variable that must be set. +# There are four other environment variables that must be exported +# for runtime operation; see the README file. +PAPI_ROCM_ROOT ?= /opt/rocm + +# For non-typical system configurations, the following 'runtime overrides' can +# be set, as just a library name, or a full path and name. There cannot be any +# spaces between the double quotes (which must be escaped as \"). An example: + +# PAPI_ROCM_SMI_MAIN = \"$(PAPI_ROCM_ROOT)/rocm_smi/lib/librocm_smi64.so.2.2\" + +# By default, all overrides are empty strings. + +# If an override is not an empty string, it must work, or the component will be +# disabled. + +# Both at compile time and run time, the software depends on PAPI_ROCM_ROOT. +# There is one library used by the ROCM_SMI component: +# librocm_smi64.so + +# The standard installed location for this library, with override: +# $(PAPI_ROCM_ROOT)/rocm_smi/lib/librocm_smi64.so #O.R. PAPI_ROCM_SMI_MAIN +# +# There are many ways to cause this path to be known. Spack is a package +# manager used on supercomputers, Linux and MacOS. If Spack is aware of ROCM, +# it encodes the paths to the necessary libraries. + +# The environment variable LD_LIBRARY_PATH encodes a list of paths to +# search for libraries; separated by a colon (:). These paths could be +# added to LD_LIBRARY_PATH. +# +# Warning: LD_LIBRARY_PATH often contains a list of directories that +# are searched for libraries, some of these may be needed by other +# packages you are using. Always add to LD_LIBRARY_PATH recursively; +# for example: +# >export LD_LIBRARY_PATH=someNewLibraryDirectory:$LD_LIBRARY_PATH +# which would append the existing LD_LIBRARY_PATH to the new directory +# you wish to add. Alternatively, you can prepend it: +# >export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:someNewLibraryDirectory +# Which will search the existing libraries first, then your new +# directory. + +# You can check on the value of LD_LIBRARY_PATH with +# echo $LD_LIBRARY_PATH + +# There may be other package managers or utilities, for example on a +# system with modules; the command 'module load rocm' may modify +# LD_LIBRARY_PATH. + +# A Linux system will also search for libraries by default in the +# directories listed by /etc/ld.so.conf, and /usr/lib64, /lib64, +# /usr/lib, /lib. + +# OVERRIDES: These are by default empty strings (""), if set they must work. +PAPI_ROCM_SMI_MAIN = \"\" + +# This is used for development and a locally built library. +# PAPI_ROCM_SMI_MAIN = \"$(PAPI_ROCM_ROOT)/build/librocm_smi64.so\" + +# An example of an override: +# PAPI_ROCM_SMI_MAIN = \"$(PAPI_ROCM_ROOT)/rocm_smi/lib/librocm_smi64.so.1.0.0\" + +# Note: PAPI_ROCM_ROOT also applies to the ROCM component. The ROCM_SMI +# library is provided by default when ROCM is installed, in +# $(PAPI_ROCM_ROOT)/rocm_smi/lib/; however, the two components, rocm and +# rocm_smi, must be specified separately on the configure line. See below for +# examples. + +# Note: If you change these overrides, PAPI should be rebuilt from scratch. +# From papi/src/ +# make clobber +# ./configure --with-components="rocm_smi" +# make + +# An alternative, for both rocm and rocm_smi components: +# ./configure --with-components="rocm rocm_smi" + +# OPERATION, per library: +# 1) If an override string is not empty, we will use it explicitly and fail if +# it does not work. This means disabling the component; a reason for disabling +# is shown using the papi utility, papi/src/utils/papi_component_avail + +# 2) We will attempt to open the library using the normal system library search +# paths; if Spack is present and configured correctly it should deliver the +# proper library. A failure here will be silent; we will proceed to (3). + +# 3) If that fails, we will try to find the library in the standard installed +# locations listed above. If this fails, we disable the component, the reason +# for disabling is shown using the papi utility, +# papi/src/utils/papi_component_avail. + +# DEFFLAGS is the macro defines for the two overrides. In the code we convert +# these to string variables with the following lines: +# static char rocm_smi_main[]=PAPI_ROCM_HSA; + +ROCM_SMI_MACS = -DPAPI_ROCM_SMI_MAIN=$(PAPI_ROCM_SMI_MAIN) + +COMPSRCS += components/rocm_smi/linux-rocm-smi.c +COMPOBJS += linux-rocm-smi.o +# CFLAGS specifies compile flags; need include files here, and macro defines. +# Order is important here; there are multiple DIFFERENT hsa.h files. +# Where to find rocm_smi.h varies by ROCM release; we cover two possible paths. +CFLAGS += -I$(PAPI_ROCM_ROOT)/include/rocm_smi +CFLAGS += -I$(PAPI_ROCM_ROOT)/rocm_smi/include/rocm_smi +CFLAGS += $(ROCM_SMI_MACS) -g +LDFLAGS += $(LDL) -g + +linux-rocm-smi.o: components/rocm_smi/linux-rocm-smi.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/rocm_smi/linux-rocm-smi.c -o linux-rocm-smi.o diff -Nru papi-5.7.0+dfsg/src/components/rocm_smi/tests/Makefile papi-6.0.0~dfsg/src/components/rocm_smi/tests/Makefile --- papi-5.7.0+dfsg/src/components/rocm_smi/tests/Makefile 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/rocm_smi/tests/Makefile 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,18 @@ +NAME=rocm_smi +include ../../Makefile_comp_tests.target + +TESTS = + +rocm_smi_tests: $(TESTS) + +# We have no standardized tests yet. the below are examples of what one might look like; +# but the "-fverbose-asm -Wa,-adhln=testPCP.s" are not usually included; those are for +# advanced debugging. So is "-Xlinker -Map=testPCP_link.map"; those can be left off. +#testPCP.o: testPCP.c +# $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c testPCP.c -o testPCP.o -fverbose-asm -Wa,-adhln=testPCP.s + +#testPCP: testPCP.o $(UTILOBJS) $(PAPILIB) +# $(CC) $(INCLUDE) -o testPCP testPCP.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) -Xlinker -Map=testPCP_link.map + +clean: + rm -f $(TESTS) *.o *~ diff -Nru papi-5.7.0+dfsg/src/components/rocm_smi/tests/rocmcap_plot.cpp papi-6.0.0~dfsg/src/components/rocm_smi/tests/rocmcap_plot.cpp --- papi-5.7.0+dfsg/src/components/rocm_smi/tests/rocmcap_plot.cpp 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/rocm_smi/tests/rocmcap_plot.cpp 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,742 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @file nvmlcap_plot.cu + * CVS: $Id$ + * @author Tony Castaldo (tonycastaldon@icl.utk.edu) + * Mods: + * + * @brief + + * This file reads power limits using NVML and writes them + * every 50ms to nvmlcap_out.csv. + * + * It takes at least one argument; the number of seconds to + * run. + * + * If there is ONE additional argument, it is a power cap + * and all GPUs will be set to it. This is good if the GPUs + * are all the same model. + * + * If there are MULTIPLE additional arguments, there must be + * one per GPU, and they are individual power limits for the + * GPUs. This is useful if they are not all the same model. + * + * The output is written as tab-seperated-values (TSV) in + * PowerReadGPU.tsv. + */ + + +#include +#include +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define dprintf if (0) printf /* debug printf; change to (1) to enable. */ + +int CTL_Z = 0; // No SIGTSTP signalled yet. +void cbSignal_SIGTSTP(int signalNumber) { + CTL_Z = 1; // Indicate it was received. +} // end signal handler. + +void helpText(void) { + fprintf(stderr, "This program requires at least one argument.\n"); + fprintf(stderr, "First arg is number of seconds to run. If 0, will run \n"); + fprintf(stderr, "until killed. A graceful exit can be made by signalling \n"); + fprintf(stderr, "SIGTSTP (Terminal Stop, like Ctrl-z). We will trap it \n"); + fprintf(stderr, "and close files, free memory, etc. On SLURM, get job id \n"); + fprintf(stderr, "using 'squeue', then 'scancel -s SIGTSTP JOBID' \n"); + fprintf(stderr, "2nd (optional) argument is a global power limit to set \n"); + fprintf(stderr, "on all GPUs. If more than two arguments are given, then \n"); + fprintf(stderr, "there must be a power argument for EACH GPU we find, \n"); + fprintf(stderr, "each is the individual power limit for that GPU (in the \n"); + fprintf(stderr, "order we report them). \n"); + fprintf(stderr, " \n"); + fprintf(stderr, "We report to stderr the hardware found and current power\n"); + fprintf(stderr, "limit settings. If you change the power limit here, it \n"); + fprintf(stderr, "does limit other programs; the original power limits are\n"); + fprintf(stderr, "automatically restored upon any exit of this program. \n"); + fprintf(stderr, " \n"); + fprintf(stderr, "Typically, you will start this program on a node, then \n"); + fprintf(stderr, "while it is running execute ANOTHER program on the node \n"); + fprintf(stderr, "that exercises the GPU. \n"); + fprintf(stderr, " \n"); + fprintf(stderr, "After changing power settings (if specified), this code \n"); + fprintf(stderr, "READS the spot power usage every 50ms, for all GPUs on \n"); + fprintf(stderr, "the node, and reports those (tab-separated) to the file \n"); + fprintf(stderr, "/tmp/PowerReadGPUs.tsv. \n"); + fprintf(stderr, " \n"); + fprintf(stderr, "It will also output /tmp/PowerReadGPU.gnuplot, a gnuplot\n"); + fprintf(stderr, "script to plot the power usage for each GPU on the node.\n"); + fprintf(stderr, "This is just an ascii file and can be edited if needed. \n"); +}; + +void rocmGetDeviceCount(long long *deviceCount) +{ + int EventSet = PAPI_NULL; + int retval, devCntEventCode; + +// rocm_smi:::NUMDevices + retval = PAPI_event_name_to_code("rocm_smi:::NUMDevices", &devCntEventCode); + if( retval != PAPI_OK ) { + fprintf(stderr, "PAPI_event_name_to_code failure returned %i [%s].\n", retval, PAPI_strerror(retval)); + helpText(); + exit(-1); + } + + retval = PAPI_create_eventset( &EventSet ); + if( retval != PAPI_OK ) { + fprintf(stderr, "PAPI_create_eventset failure returned %i [%s].\n", retval, PAPI_strerror(retval)); + helpText(); + exit(-1); + } + + retval = PAPI_add_event(EventSet, devCntEventCode); // Add the event in. + if( retval != PAPI_OK ) { + fprintf(stderr, "PAPI_add_event failure returned %i [%s].\n", retval, PAPI_strerror(retval)); + helpText(); + exit(-1); + } + + retval = PAPI_start(EventSet); // Start the event set. + if( retval != PAPI_OK ) { + fprintf(stderr, "PAPI_start failure returned %i [%s].\n", retval, PAPI_strerror(retval)); + helpText(); + exit(-1); + } + + retval = PAPI_stop(EventSet, deviceCount); // STop and get value. + if( retval != PAPI_OK ) { + fprintf(stderr, "PAPI_stop failed, returned %i [%s].\n", retval, PAPI_strerror(retval)); + helpText(); + exit(-1); + } + + PAPI_cleanup_eventset(EventSet); // get rid of this set. +} // end Get Devices. + +// Host function +int main( int argc, char** argv ) +{ + +#define NUM_EVENTS 32 /* Max number of GPUs on a node this code can handle. */ + int retval, i, j, device_count; + int EventSet = PAPI_NULL; + long long values[NUM_EVENTS]; // For reading either limit or current power. + char *LimitEventName[NUM_EVENTS]={NULL}; + char *PowerEventName[NUM_EVENTS]={NULL}; + char *minEventName[NUM_EVENTS]={NULL}; + char *maxEventName[NUM_EVENTS]={NULL}; + int powerEvents[NUM_EVENTS]; // PAPI codes for current power events. + int limitEvents[NUM_EVENTS]; // PAPI codes for power limit setting. + int minEvents[NUM_EVENTS]; + int maxEvents[NUM_EVENTS]; + long long minSetting[NUM_EVENTS]; + long long maxSetting[NUM_EVENTS]; + long long UserLimitGiven[NUM_EVENTS]; // These are the values per GPU set by user. + long long OrigLimitFound[NUM_EVENTS]; // original limit read from device. + int PowerEventCount = 0, LimitEventCount = 0, minEventCount = 0, maxEventCount = 0; + const PAPI_component_info_t *cmpinfo; + char event_name[PAPI_MAX_STR_LEN]; + signal(SIGTSTP, cbSignal_SIGTSTP); // register the signal handler for CTL_Z. + + if (argc < 2) { + helpText(); + exit(-1); + } + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if( retval != PAPI_VER_CURRENT ) { + fprintf( stderr, "PAPI_library_init failed.\n" ); + helpText(); + exit(-1); + } + + printf( "PAPI_VERSION : %4d %6d %7d\n", + PAPI_VERSION_MAJOR( PAPI_VERSION ), + PAPI_VERSION_MINOR( PAPI_VERSION ), + PAPI_VERSION_REVISION( PAPI_VERSION ) ); + + int numcmp = PAPI_num_components(); + + // Search for the rocm_smi component. + int cid = 0; + for (cid=0; cidname, "rocm_smi" ) ) break; // If we found it, + } + } + + if ( cid==numcmp ) { // If true we looped through all without finding rocm_smi. + fprintf(stderr, "ROCM_SMI PAPI Component was not found.\n"); + exit(-1); + } + + printf( "ROCM_SMI found as Component %d of %d: %s: %d events\n", (1+cmpinfo->CmpIdx), numcmp, cmpinfo->name, cmpinfo->num_native_events ); + if (cmpinfo->disabled) { // If disabled, + fprintf(stderr, "ROCM_SMI PAPI Component is disabled.\n"); + exit(-1); + } + + long long llDC; + rocmGetDeviceCount( &llDC); + device_count = (int) llDC; + printf("AMD Device Count: %d.\n", device_count); + if (device_count < 1) { + fprintf(stderr, "There are no GPUs to manage.\n"); + exit(-1); + } + + FILE *myOut = fopen("/tmp/PowerReadGPU.tsv", "w"); // Open the file. + if (myOut == NULL) { // If that failed, + fprintf(stderr, "Failed to open output /tmp/PowerReadGPU.tsv. Error: %d (%s)\n", errno, strerror(errno)); + exit(-1); + } + + FILE *myGnuplot = fopen("/tmp/PowerReadGPU.gnuplot", "w"); + if (myGnuplot == NULL) { + fprintf(stderr, "Failed to open gnuplot output /tmp/PowerReadGPU.gnuplot. Error: %d (%s)\n", errno, strerror(errno)); + exit(-1); + } + + // Scan events to find rocm power events. + int code = PAPI_NATIVE_MASK; + int ii=0; + int event_modifier = PAPI_ENUM_FIRST; + for ( ii=0; iinum_native_events; ii++ ) { + retval = PAPI_enum_cmp_event( &code, event_modifier, cid ); + event_modifier = PAPI_ENUM_EVENTS; + if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + retval = PAPI_event_code_to_name( code, event_name ); + char *ss; + + ss = strstr(event_name, "device="); // Look for the device id. + if (ss == NULL) continue; // Not a valid name. + int did = atoi(ss+7); // convert it. + if (did >= device_count) continue; // Invalid device count. + + // rocm_smi:::power_average:device=0:sensor=1 + // rocm_smi:::power_cap:device=0:sensor=1 + // rocm_smi:::power_cap_range_min:device=0:sensor=1 + // rocm_smi:::power_cap_range_max:device=0:sensor=1 + // Have an event name to examine. + ss = strstr(event_name, "power_average:"); + if (ss != NULL) { + PowerEventName[did] = strdup(event_name); // .. remember the name, in device order. + dprintf("Found powerEvent '%s' for device %i.\n", event_name, did); + PowerEventCount++; // .. bump total power events. + continue; // .. done with this event. + } + + ss = strstr(event_name, "power_cap:"); + if (ss != NULL) { + LimitEventName[did] = strdup(event_name); // Valid! Remember the name. + dprintf("Found limitEvent '%s' for device %i.\n", event_name, did); // Report what we found. + LimitEventCount++; // Add to the number of events found. + continue; // Done with it. + } + + ss = strstr(event_name, "power_cap_range_min:"); + if (ss != NULL) { + minEventName[did] = strdup(event_name); // Valid! Remember the name. + dprintf("Found minEvent '%s' for device %i.\n", event_name, did); // Report what we found. + minEventCount++; // Add to the number of events found. + continue; // Done with it. + } + + ss = strstr(event_name, "power_cap_range_max"); + if (ss != NULL) { + maxEventName[did] = strdup(event_name); // Valid! Remember the name. + dprintf("Found maxEvent '%s' for device %i.\n", event_name, did); // Report what we found. + maxEventCount++; // Add to the number of events found. + continue; // Done with it. + } + + } // end of for each event. + + + if (PowerEventCount != device_count || + LimitEventCount != device_count || + minEventCount != device_count || + maxEventCount != device_count) { // If we did not get all the events, + fprintf(stderr, "Too few ROCM_SMI events found; %d devices, %i PowerEvents, %i LimitEvents, %i maxEvents, %i minEvents. Aborting\n", + device_count, PowerEventCount, LimitEventCount, minEventCount, maxEventCount); + for (j=0; j 2) { + if (argc != device_count+2) { + fprintf(stderr, "You have specified %i power limits, it doesn't match with %d devices.\n", argc-2, device_count); + for (j=0; j 2) { // If we have settings to check, + for (i=0; i maxSetting[i]) { + fprintf(stderr, "User Power Limit of %llu is out of range for device %i.\n", UserLimitGiven[i], i); + retval++; // increase violations. + } + } + + if (retval > 0) { // Any out of range, we get out. + for (j=0; j 2) { // If power limits were given, + retval = PAPI_write(EventSet, UserLimitGiven); // .. Try to write user values. + if( retval != PAPI_OK ) { + fprintf(stderr, "PAPI_write(User Limits) failed, returned %i [%s].\n", retval, PAPI_strerror(retval)); + for (j=0; j 0) { + fprintf(stderr, "Aborting for %i write failure(s).\n", retval); + for (j=0; j 0 && elapsedSec >= runSeconds) break; // Exit if time is up. + } + + if (CTL_Z) fprintf(stderr, "Received CTL_Z signal (SIGTSTP).\n"); + else fprintf(stderr, "Time %i seconds expired.\n", runSeconds); + fprintf(stderr, "Total reads: %i.\n", runCount); + + //-------------------------------------------------------------------------- + // Generate a gnuplot file instructions. + //-------------------------------------------------------------------------- + fprintf(myGnuplot, "set xlabel 'Time (sec)'\n"); // label for x axis. + fprintf(myGnuplot, "set nokey\n"); // no key needed. + fprintf(myGnuplot, "set terminal png\n"); // generate png output when plotting. + fprintf(myGnuplot, "set title 'Spot MW Usage During Run'\n"); // Title of graph. + fprintf(myGnuplot, "set yrange [0:300000]\n"); // Force the y range. + + for (i=0; i < event > ... + * + * @section Description + * papi_command_line is a PAPI utility program that adds named events from the + * command line to a PAPI EventSet and does some work with that EventSet. + * This serves as a handy way to see if events can be counted together, + * and if they give reasonable results for known work. + * + * @section Options + *
    + *
  • -u Display output values as unsigned integers + *
  • -x Display output values as hexadecimal + *
  • -h Display help information about this utility. + *
+ * + * @section Bugs + * There are no known bugs in this utility. + * If you find a bug, it should be reported to the + * PAPI Mailing List at . + */ + +#include +#include +#include +#include + +#include "papi.h" +#include + +// Checks if HIP command (AMD) worked or not. +#define HIPCHECK(cmd) \ +{ \ + hipError_t error = cmd; \ + if (error != hipSuccess) { \ + fprintf(stderr, "error: '%s'(%d) at %s:%d\n", \ + hipGetErrorString(error), error,__FILE__, __LINE__); \ + exit(EXIT_FAILURE); \ + } \ +} + +//----------------------------------------------------------------------------- +// HIP routine: Square each element in the array A and write to array C. +//----------------------------------------------------------------------------- +template +__global__ void +vector_square(T *C_d, T *A_d, size_t N) +{ + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x ; + + for (size_t i=offset; i>4) & 0x000000000000ffff; // Extract minor. + major = (startupValues[1]>>8) & 0x000000000000ffff; // Extract major. + printf("%i AMD rocm_smi capable devices found. Library version %i:%i:%i.\n", + NUMDevices, major, minor, patch); + + values = ( long long * ) malloc( sizeof ( long long ) * ( size_t ) argc ); // create reading space. + success = ( char * ) malloc( ( size_t ) argc ); + + if ( success == NULL || values == NULL ) { + fprintf(stderr,"Error allocating memory!\n"); + exit(1); + } + + for ( num_events = 0, i = 1; i < argc; i++ ) { + if ( !strcmp( argv[i], "-h" ) ) { + print_help( argv ); + exit( 1 ); + } else if ( !strcmp( argv[i], "-u" ) ) { + u_format = 1; + } else if ( !strcmp( argv[i], "-x" ) ) { + hex_format = 1; + } else { + if ( ( retval = PAPI_add_named_event( EventSet, argv[i] ) ) != PAPI_OK ) { + printf( "Failed adding: %s\nbecause: %s\n", argv[i], + PAPI_strerror(retval)); + } else { + success[num_events++] = i; + printf( "Successfully added: %s\n", argv[i] ); + } + } + } + + /* Automatically pass if no events, for run_tests.sh */ + if ( num_events == 0 ) { + printf("No events specified!\n"); + printf("Specify events like rocm_smi:::device=0:mem_usage_VRAM rocm_smi:::device=0:pci_throughput_sent\n"); + printf("Use papi/src/utils/papi_native_avail for a list of all events; search for 'rocm_smi:::'.\n"); + return 0; + } + + // ROCM Activity. + printf( "\n" ); + + retval = PAPI_start( EventSet ); + if (retval != PAPI_OK ) { + fprintf(stderr,"Error! PAPI_start, retval=%i [%s].\n", retval, PAPI_strerror(retval) ); + exit( retval ); + } + + // ROCM skipped do_flops(), do_misses() in papi_command_line.c. + + for (k = 0; k < NUMDevices; k++ ) { // ROCM loop through devices. + conductTest(k); // Do some GPU work on device 'k'. + sleep(1); // .. sleep between reads to build up events. + + retval = PAPI_read( EventSet, values ); + if (retval != PAPI_OK ) { + fprintf(stderr,"Error! PAPI_read, retval=%i [%s].\n", retval, PAPI_strerror(retval) ); + exit( retval ); + } + printf( "\n----------------------------------\n" ); + + for ( j = 0; j < num_events; j++ ) { // Back to original papi_command_line... + i = success[j]; + if (! (u_format || hex_format) ) { + retval = PAPI_event_name_to_code( argv[i], &event ); + if (retval == PAPI_OK) { + retval = PAPI_get_event_info(event, &info); + if (retval == PAPI_OK) data_type = info.data_type; + else data_type = PAPI_DATATYPE_INT64; + } + switch (data_type) { + case PAPI_DATATYPE_UINT64: + printf( "%s : \t%llu(u)", argv[i], (unsigned long long)values[j] ); + break; + case PAPI_DATATYPE_FP64: + printf( "%s : \t%0.3f", argv[i], *((double *)(&values[j])) ); + break; + case PAPI_DATATYPE_BIT64: + printf( "%s : \t%#llX", argv[i], values[j] ); + break; + case PAPI_DATATYPE_INT64: + default: + printf( "%s : \t%lld", argv[i], values[j] ); + break; + } + if (retval == PAPI_OK) printf( " %s", info.units ); + printf( "\n" ); + } + if (u_format) printf( "%s : \t%llu(u)\n", argv[i], (unsigned long long)values[j] ); + if (hex_format) printf( "%s : \t%#llX\n", argv[i], values[j] ); + } + } // end ROCM device loop. + + retval = PAPI_stop( EventSet, values ); // ROCM added stop and test. + if (retval != PAPI_OK ) { + fprintf(stderr,"Error! PAPI_stop, retval=%i [%s].\n", retval, PAPI_strerror(retval) ); + exit( retval ); + } + + return 0; +} // end main. diff -Nru papi-5.7.0+dfsg/src/components/rocm_smi/tests/rocm_smi_all.cpp papi-6.0.0~dfsg/src/components/rocm_smi/tests/rocm_smi_all.cpp --- papi-5.7.0+dfsg/src/components/rocm_smi/tests/rocm_smi_all.cpp 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/rocm_smi/tests/rocm_smi_all.cpp 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,569 @@ +//----------------------------------------------------------------------------- +// This program must be compiled using a special makefile: +// make -f ROCM_SMI_Makefile rocm_smi_all.out +//----------------------------------------------------------------------------- +#define __HIP_PLATFORM_HCC__ + +#include +#include +#include +#include "papi.h" +#include +#include + +#define CHECK(cmd) \ +{\ + hipError_t error = cmd;\ + if (error != hipSuccess) { \ + fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \ + exit(EXIT_FAILURE);\ + }\ +} + +// THIS MACRO EXITS if the papi call does not return PAPI_OK. Do not use for routines that +// return anything else; e.g. PAPI_num_components, PAPI_get_component_info, PAPI_library_init. +#define CALL_PAPI_OK(papi_routine) \ + do { \ + int _papiret = papi_routine; \ + if (_papiret != PAPI_OK) { \ + fprintf(stderr, "%s:%d macro: PAPI Error: function " #papi_routine " failed with ret=%d [%s].\n", \ + __FILE__, __LINE__, _papiret, PAPI_strerror(_papiret)); \ + exit(-1); \ + } \ + } while (0); + + +#define MEMORY_ALLOCATION_CALL(var) \ + do { \ + if (var == NULL) { \ + fprintf(stderr, "%s:%d: Error: Memory Allocation Failed \n",\ + __FILE__, __LINE__); \ + exit(-1); \ + } \ + } while (0); + + +#define MAX_DEVICES (32) +#define BLOCK_SIZE (1024) +#define GRID_SIZE (512) +#define BUF_SIZE (32 * 1024) +#define ALIGN_SIZE (8) +#define SUCCESS (0) +#define NUM_METRIC (18) +#define NUM_EVENTS (2) +#define MAX_SIZE (64*1024*1024) // 64 MB + +typedef union +{ + long long ll; + unsigned long long ull; + double d; + void *vp; + unsigned char ch[8]; +} convert_64_t; + +typedef struct { + char name[128]; + long long value; + int flagged; +} eventStore_t; + +int eventsFoundCount = 0; // occupants of the array. +int eventsFoundMax; // Size of the array. +int eventsFoundAdd = 32; // Blocksize for increasing the array. +int deviceCount=0; // Total devices seen. +int deviceEvents[32] = {0}; // Number of events for each device=??. +int globalEvents = 0; // events without a "device=". +eventStore_t *eventsFound = NULL; // The array. + +//----------------------------------------------------------------------------- +// HIP routine: Square each element in the array A and write to array C. +//----------------------------------------------------------------------------- +template +__global__ void +vector_square(T *C_d, T *A_d, size_t N) +{ + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x ; + + for (size_t i=offset; i= eventsFoundMax) { // bump count, if too much, make room. + eventsFoundMax += eventsFoundAdd; // Add. + eventsFound = (eventStore_t*) realloc(eventsFound, eventsFoundMax*sizeof(eventStore_t)); // Make new room. + memset(eventsFound+(eventsFoundMax-eventsFoundAdd), 0, eventsFoundAdd*sizeof(eventStore_t)); // zero it. + } +} // end routine. + +//----------------------------------------------------------------------------- +// conduct a test using HIP. Derived from AMD sample code 'square.cpp'. +// coming in, EventSet is already populated, we just run the test and read. +// Note values must point at an array large enough to store the events in +// Eventset. +//----------------------------------------------------------------------------- +void conductTest(int EventSet, int device, long long *values) { + float *A_d, *C_d; + float *A_h, *C_h; + size_t N = 1000000; + size_t Nbytes = N * sizeof(float); + int i, ret, thisDev, verbose=0; + + ret = PAPI_start( EventSet ); + if (ret != PAPI_OK ) { + fprintf(stderr,"Error! PAPI_start\n"); + exit( ret ); + } + + hipDeviceProp_t props; + if (verbose) fprintf(stderr, "args: EventSet=%i, device=%i, values=%p.\n", EventSet, device, values); + + CHECK(hipSetDevice(device)); // Set device requested. + CHECK(hipGetDevice(&thisDev)); // Double check. + CHECK(hipGetDeviceProperties(&props, thisDev)); // Get properties (for name). + if (verbose) fprintf (stderr, "info: Requested Device=%i, running on device %i=%s\n", device, thisDev, props.name); + + if (verbose) fprintf (stderr, "info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0); + A_h = (float*)malloc(Nbytes); // standard malloc for host. + CHECK(A_h == NULL ? hipErrorMemoryAllocation : hipSuccess ); + C_h = (float*)malloc(Nbytes); // standard malloc for host. + CHECK(C_h == NULL ? hipErrorMemoryAllocation : hipSuccess ); + + // Fill with Phi + i + for (size_t i=0; iname) == 0) cid=i; // If we found our match, record it. + } // end search components. + + if (cid < 0) { // if no PCP component found, + fprintf(stderr, "Failed to find rocm_smi component among %i " + "reported components.\n", k); + FreeGlobals(); + PAPI_shutdown(); + exit(-1); + } + + printf("Found ROCM_SMI Component at id %d\n", cid); + + // Add events at a GPU specific level ... eg rocm:::device=0:Whatever + eventCount = 0; + int eventsRead=0; + + // Begin enumeration of all events. + + printf("Events with numeric values were read; if they are zero, they may not \n" + "be operational, or the exercises performed by this code do not affect \n" + "them. We report all 'rocm' events presented by the rocm component. \n" + "\n" + "------------------------Event Name Found------------------------:---Value---\n"); + + PAPI_event_info_t info; // To get event enumeration info. + m=PAPI_NATIVE_MASK; // Get the PAPI NATIVE mask. + CALL_PAPI_OK(PAPI_enum_cmp_event(&m,PAPI_ENUM_FIRST,cid)); // Begin enumeration of ALL papi counters. + do { // Enumerate all events. + memset(&info,0,sizeof(PAPI_event_info_t)); // Clear event info. + k=m; // Make a copy of current code. + + // enumerate sub-events, with masks. For this test, we do not + // have any! But we do this to test our enumeration works as + // expected. First time through is guaranteed, of course. + + do { // enumerate masked events. + CALL_PAPI_OK(PAPI_get_event_info(k,&info)); // get name of k symbol. + char *devstr = strstr(info.symbol, "device="); // look for device enumerator. + if (devstr != NULL) { // If device specific, + device=atoi(devstr+7); // Get the device id, for info. +// fprintf(stderr, "Found rocm symbol '%s', device=%i.\n", info.symbol , device); + if (device < 0 || device >= 32) continue; // skip any not in range. + } else { // A few are system wide. +// fprintf(stderr, "Found rocm symbol '%s'.\n", info.symbol); + globalEvents++; // Add to global events. + device=0; // Any device will do. + } + + // Filter for strings being returned. + int isString = 0; + + if (strstr(info.symbol, "device_brand:") != NULL) isString=1; + if (strstr(info.symbol, "device_name:") != NULL) isString=1; + if (strstr(info.symbol, "device_serial_number:") != NULL) isString=1; + if (strstr(info.symbol, "device_subsystem_name:") != NULL) isString=1; + if (strstr(info.symbol, "vbios_version:") != NULL) isString=1; + if (strstr(info.symbol, "vendor_name:") != NULL) isString=1; + if (strstr(info.symbol, "driver_version_str:") != NULL) isString=1; + + // Filter out crashers. + if (strstr(info.symbol, "temp_current:device=0:sensor=3") != NULL) continue; + if (strstr(info.symbol, "temp_critical:device=0:sensor=3") != NULL) continue; + if (strstr(info.symbol, "temp_critical_hyst:device=0:sensor=3") != NULL) continue; + if (strstr(info.symbol, "temp_emergency:device=0:sensor=3") != NULL) continue; + if (strstr(info.symbol, "temp_emergency:device=0:sensor=3") != NULL) continue; + + if (strstr(info.symbol, "temp_current:device=1:sensor=3") != NULL) continue; + if (strstr(info.symbol, "temp_critical:device=1:sensor=3") != NULL) continue; + if (strstr(info.symbol, "temp_critical_hyst:device=1:sensor=3") != NULL) continue; + if (strstr(info.symbol, "temp_emergency:device=1:sensor=3") != NULL) continue; + if (strstr(info.symbol, "temp_emergency:device=1:sensor=3") != NULL) continue; + + CALL_PAPI_OK(PAPI_create_eventset(&EventSet)); + CALL_PAPI_OK(PAPI_assign_eventset_component(EventSet, cid)); + + ret = PAPI_add_named_event(EventSet, info.symbol); // Don't want to fail program if name not found... + if(ret == PAPI_OK) { + eventCount++; // Bump number of events we could test. + if (deviceEvents[device] == 0) deviceCount++; // Increase count of devices if first for this device. + deviceEvents[device]++; // Add to count of events on this device. + } else { + fprintf(stderr, "FAILED to add event '%s', ret=%i='%s'.\n", info.symbol, ret, PAPI_strerror(ret)); + CALL_PAPI_OK(PAPI_cleanup_eventset(EventSet)); // Delete all events in set. + CALL_PAPI_OK(PAPI_destroy_eventset(&EventSet)); // destroy the event set. + continue; + } + + long long value=0; // The only value we read. + + // Prep stuff. + + fprintf(stderr, "conductTest on single event: %s.\n", info.symbol); + conductTest(EventSet, device, &value); // Conduct a test, on device given. + addEventsFound(info.symbol, value); // Add to events we were able to read. + + CALL_PAPI_OK(PAPI_cleanup_eventset(EventSet)); // Delete all events in set. + CALL_PAPI_OK(PAPI_destroy_eventset(&EventSet)); // destroy the event set. + + // report each event counted. + eventsRead++; // .. count and report. + if (value == 0) { + printf("%-64s: %lli (perhaps not exercised by current test code.)\n", info.symbol, value); + } else { + if (isString) printf("%-64s: %-64s\n", info.symbol, ((char*) value)); + else printf("%-64s: %lli\n", info.symbol, value); + } + } while(PAPI_enum_cmp_event(&k,PAPI_NTV_ENUM_UMASKS,cid)==PAPI_OK); // Get next umask entry (bits different) (should return PAPI_NOEVNT). + } while(PAPI_enum_cmp_event(&m,PAPI_ENUM_EVENTS,cid)==PAPI_OK); // Get next event code. + +// fprintf(stderr, "%s:%i Finished Event Loops.\n", __FILE__, __LINE__); + + if (eventCount < 1) { // If we failed on all of them, + fprintf(stderr, "Unable to add any ROCM events; they are not present in the component.\n"); + fprintf(stderr, "Unable to proceed with this test.\n"); + FreeGlobals(); + PAPI_shutdown(); // Returns no value. + exit(-1); // exit no matter what. + } + + if (eventsRead < 1) { // If failed to read any, + fprintf(stderr, "\nFailed to read any ROCM events.\n"); // report a failure. + fprintf(stderr, "Unable to proceed with pair testing.\n"); + FreeGlobals(); + PAPI_shutdown(); // Returns no value. + exit(-1); // exit no matter what. + } + + printf("\nTotal ROCM events identified: %i.\n\n", eventsFoundCount); + + // EARLY SHUT DOWN. +// PAPI_shutdown(); +// return(0); + + // Next section is pair testing information. + if (eventsFoundCount < 2) { // If failed to get counts on any, + printf("Insufficient events are exercised by the current test code to perform pair testing.\n"); // report a failure. + FreeGlobals(); + PAPI_shutdown(); // Returns no value. + exit(0); // exit no matter what. + } + + + for (i=0; i<32; i++) { + if (deviceEvents[i] == 0) continue; // skip if none found. + if (i==0 && globalEvents >0) { + printf("Device %i assigned %i events (%i of which are not device specific). %i potential pairings for this device.\n", i, deviceEvents[i], globalEvents, deviceEvents[i]*(deviceEvents[i]-1)/2); + } else { + printf("Device %i assigned %i events. %i potential pairings for this device.\n", i, deviceEvents[i], deviceEvents[i]*(deviceEvents[i]-1)/2); + } + } + + // Begin pair testing. We consider every possible pairing of events + // that, tested alone, returned a value greater than zero. +// fprintf(stderr, "Begin Pair Testing.\n"); + + int mainEvent, pairEvent, mainDevice, pairDevice; + long long readValues[2]; + int goodOnSame=0, failOnDiff=0, badSameCombo=0, pairProblems=0; // Some counters. + int type; // 0 succeed on same device, 1 = fail across devices. + for (type=0; type<2; type++) { + if (type == 0) { + printf("List of Pairings on SAME device:\n"); + printf("* means value changed by more than 10%% when paired (vs measured singly, above).\n"); + printf("^ means a pair was rejected as an invalid combo.\n"); + } else { + printf("List of Pairings causing an error when on DIFFERENT devices:\n"); + } + + for (mainEvent = 0; mainEvent 1.10) { // Flag as significantly different for main. + flag1='*'; + eventsFound[mainEvent].flagged = 1; // .. remember this event is suspect. + } + + if (pairCheck < 0.90 || pairCheck > 1.10) { // Flag as significantly different for pair. + flag2='*'; + eventsFound[pairEvent].flagged = 1; // .. remember this event is suspect. + } + + if (flag1 == '*' || flag2 == '*') { + pairProblems++; // Remember number of problems. + flag = '*'; // set global flag. + } + + printf("%c %64s + %-64s [", flag, eventsFound[mainEvent].name, eventsFound[pairEvent].name); + if (flag1 == '*') printf("%c%lli (vs %lli),", flag1, readValues[0], eventsFound[mainEvent].value); + else printf("%c%lli,", flag1, readValues[0]); + + if (flag2 == '*') printf("%c%lli (vs %lli)]\n", flag2, readValues[1], eventsFound[pairEvent].value); + else printf("%c%lli]\n", flag2, readValues[1]); + + CALL_PAPI_OK(PAPI_cleanup_eventset(EventSet)); // Delete all events in set. + CALL_PAPI_OK(PAPI_destroy_eventset(&EventSet)); // destroy the event set. + } // end for each possible pairing event. + } // end loop for each possible primary event. + + if (type == 0) { // For good pairings on same devices, + if (goodOnSame == 0) { + printf("NO valid pairings of above events if both on the SAME device.\n"); + } else { + printf("%i valid pairings of above events if both on the SAME device.\n", goodOnSame); + } + + printf("%i unique pairings on SAME device were rejected as bad combinations.\n", badSameCombo); + + if (pairProblems > 0) { + printf("%i pairings resulted in a change of one or both event values > 10%%.\n", pairProblems); + printf("The following events were changed by pairing:\n"); + for (mainEvent = 0; mainEvent square.cpp + +square.out: square.cpp + $(HIPCC) $(CXXFLAGS) square.cpp -o $@ + +rocm_command_line.out: rocm_command_line.cpp + $(HIPCC) $(CXXFLAGS) -g $(INCLUDE) rocm_command_line.cpp -o $@ $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +rocm_smi_all.out: rocm_smi_all.cpp + $(HIPCC) $(CXXFLAGS) -g $(INCLUDE) rocm_smi_all.cpp -o $@ $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +rocm_smi_writeTests.out: rocm_smi_writeTests.cpp + $(HIPCC) $(CXXFLAGS) -g $(INCLUDE) rocm_smi_writeTests.cpp -o $@ $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +rocmcap_plot.out: rocmcap_plot.cpp + $(HIPCC) $(CXXFLAGS) -g $(INCLUDE) rocmcap_plot.cpp -o $@ $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +checkpath: + echo HIP_PATH = $(HIP_PATH) + echo SOURCES = $(SOURCES) + echo HIP_PLATFORM = $(HIP_PLATFORM) + echo HIPCC = $(HIPCC) + echo INCLUDE = $(INCLUDE) + +clean: + rm -f *.o *.out diff -Nru papi-5.7.0+dfsg/src/components/rocm_smi/tests/rocm_smi_writeTests.cpp papi-6.0.0~dfsg/src/components/rocm_smi/tests/rocm_smi_writeTests.cpp --- papi-5.7.0+dfsg/src/components/rocm_smi/tests/rocm_smi_writeTests.cpp 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/rocm_smi/tests/rocm_smi_writeTests.cpp 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,329 @@ +//----------------------------------------------------------------------------- +// This program must be compiled using a special makefile: +// make -f ROCM_SMI_Makefile rocm_smi_writeTests.out +//----------------------------------------------------------------------------- +#define __HIP_PLATFORM_HCC__ + +#include +#include +#include +#include "papi.h" +#include +#include +#include "rocm_smi.h" // Need some enumerations. + +#define CHECK(cmd) \ +{\ + hipError_t error = cmd;\ + if (error != hipSuccess) { \ + fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \ + exit(EXIT_FAILURE);\ + }\ +} + +// THIS MACRO EXITS if the papi call does not return PAPI_OK. Do not use for routines that +// return anything else; e.g. PAPI_num_components, PAPI_get_component_info, PAPI_library_init. +#define CALL_PAPI_OK(papi_routine) \ + do { \ + int _papiret = papi_routine; \ + if (_papiret != PAPI_OK) { \ + fprintf(stderr, "%s:%d macro: PAPI Error: function " #papi_routine " failed with ret=%d [%s].\n", \ + __FILE__, __LINE__, _papiret, PAPI_strerror(_papiret)); \ + exit(-1); \ + } \ + } while (0); + + +#define MEMORY_ALLOCATION_CALL(var) \ + do { \ + if (var == NULL) { \ + fprintf(stderr, "%s:%d: Error: Memory Allocation Failed \n",\ + __FILE__, __LINE__); \ + exit(-1); \ + } \ + } while (0); + + +#define MAX_DEVICES (32) +#define BLOCK_SIZE (1024) +#define GRID_SIZE (512) +#define BUF_SIZE (32 * 1024) +#define ALIGN_SIZE (8) +#define SUCCESS (0) +#define NUM_METRIC (18) +#define NUM_EVENTS (2) +#define MAX_SIZE (64*1024*1024) // 64 MB + +typedef union +{ + long long ll; + unsigned long long ull; + double d; + void *vp; + unsigned char ch[8]; +} convert_64_t; + +typedef struct { + char name[128]; + long long value; +} eventStore_t; + +int eventsFoundCount = 0; // occupants of the array. +int eventsFoundMax; // Size of the array. +int eventsFoundAdd = 32; // Blocksize for increasing the array. +int deviceCount=0; // Total devices seen. +int deviceEvents[32] = {0}; // Number of events for each device=??. +eventStore_t *eventsFound = NULL; // The array. + +//----------------------------------------------------------------------------- +// HIP routine: Square each element in the array A and write to array C. +//----------------------------------------------------------------------------- +template +__global__ void +vector_square(T *C_d, T *A_d, size_t N) +{ + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x ; + + for (size_t i=offset; iname) == 0) cid=i; // If we found our match, record it. + } // end search components. + + if (cid < 0) { // if no PCP component found, + fprintf(stderr, "Failed to find rocm_smi component among %i " + "reported components.\n", k); + PAPI_shutdown(); + exit(-1); + } + + printf("Found ROCM_SMI Component at id %d\n", cid); + + // Add events at a GPU specific level ... eg rocm:::device=0:Whatever + eventCount = 0; + int eventsRead=0; + + // Begin enumeration of all events. + + long long value=0; // The only value we read. + std::string eventName; + eventName = "rocm_smi:::NUMDevices"; + + CALL_PAPI_OK(PAPI_create_eventset(&EventSet)); + CALL_PAPI_OK(PAPI_assign_eventset_component(EventSet, cid)); + ret = PAPI_add_named_event(EventSet, eventName.c_str()); + if (ret == PAPI_OK) { + CALL_PAPI_OK(PAPI_start(EventSet)); + CALL_PAPI_OK(PAPI_stop(EventSet, &value)); + devices = value; + printf("Found %i devices.\n", devices); + } else { + fprintf(stderr, "FAILED to add event '%s', ret=%i='%s'.\n", eventName.c_str(), ret, PAPI_strerror(ret)); + CALL_PAPI_OK(PAPI_cleanup_eventset(EventSet)); // Delete all events in set. + CALL_PAPI_OK(PAPI_destroy_eventset(&EventSet)); // destroy the event set. + exit(-1); + } + + // Do something. + CALL_PAPI_OK(PAPI_cleanup_eventset(EventSet)); // Delete all events in set. + + eventName = "rocm_smi:::device=0:sensor=0:fan_speed"; + ret = PAPI_add_named_event(EventSet, eventName.c_str()); + if (ret != PAPI_OK) { + fprintf(stderr, "FAILED to add event '%s', ret=%i='%s'.\n", eventName.c_str(), ret, PAPI_strerror(ret)); + CALL_PAPI_OK(PAPI_cleanup_eventset(EventSet)); // Delete all events in set. + exit(-1); + } + + eventName = "rocm_smi:::device=0:sensor=0:fan_speed_max"; + ret = PAPI_add_named_event(EventSet, eventName.c_str()); + if (ret != PAPI_OK) { + fprintf(stderr, "FAILED to add event '%s', ret=%i='%s'.\n", eventName.c_str(), ret, PAPI_strerror(ret)); + CALL_PAPI_OK(PAPI_cleanup_eventset(EventSet)); // Delete all events in set. + exit(-1); + } + + long long curmax[2]; + CALL_PAPI_OK(PAPI_start(EventSet)); + CALL_PAPI_OK(PAPI_stop(EventSet, curmax)); + printf("Fan speed: current=%lli maximum=%lli.\n", curmax[0], curmax[1]); + CALL_PAPI_OK(PAPI_cleanup_eventset(EventSet)); // Delete all events in set. + + curmax[0]=128; + eventName = "rocm_smi:::device=0:sensor=0:fan_speed"; + ret = PAPI_add_named_event(EventSet, eventName.c_str()); + if (ret != PAPI_OK) { + fprintf(stderr, "FAILED to add event '%s', ret=%i='%s'.\n", eventName.c_str(), ret, PAPI_strerror(ret)); + CALL_PAPI_OK(PAPI_cleanup_eventset(EventSet)); // Delete all events in set. + exit(-1); + } + + CALL_PAPI_OK(PAPI_start(EventSet)); + ret = PAPI_write(EventSet, curmax); + if ( ret != PAPI_OK ) { + PAPI_stop(EventSet, curmax); // Must be stopped. + PAPI_cleanup_eventset(EventSet); // Empty it. + PAPI_destroy_eventset(&EventSet); // Release memory. + fprintf(stderr, "PAPI_write failure returned %i, = %s.\n", ret, PAPI_strerror(ret)); + } else { + printf("Call succeeded to set fan_speed to %llu RPM.\n", curmax[0]); + } + + // Now try to read it. + CALL_PAPI_OK(PAPI_stop(EventSet, &value)); + printf("After set, read-back of fan value is %lli.\n", value); + + CALL_PAPI_OK(PAPI_cleanup_eventset(EventSet)); // Delete all events in set. + CALL_PAPI_OK(PAPI_destroy_eventset(&EventSet)); // destroy the event set. + + printf("Finished All Events.\n"); + + PAPI_shutdown(); // Returns no value. + return(0); // exit OK. +} // end MAIN. diff -Nru papi-5.7.0+dfsg/src/components/sde/interface/papi_sde_interface.c papi-6.0.0~dfsg/src/components/sde/interface/papi_sde_interface.c --- papi-5.7.0+dfsg/src/components/sde/interface/papi_sde_interface.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/sde/interface/papi_sde_interface.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,238 @@ +#include +#include +#include "papi_sde_interface.h" + +#pragma weak papi_sde_init +#pragma weak papi_sde_register_counter +#pragma weak papi_sde_register_fp_counter +#pragma weak papi_sde_unregister_counter +#pragma weak papi_sde_describe_counter +#pragma weak papi_sde_create_counter +#pragma weak papi_sde_inc_counter +#pragma weak papi_sde_create_recorder +#pragma weak papi_sde_record +#pragma weak papi_sde_reset_recorder +#pragma weak papi_sde_reset_counter + +#pragma weak papi_sde_compare_long_long +#pragma weak papi_sde_compare_int +#pragma weak papi_sde_compare_double +#pragma weak papi_sde_compare_float + +papi_handle_t +__attribute__((weak)) +papi_sde_init(const char *name_of_library) +{ + (void) name_of_library; + + return NULL; +} + +int +__attribute__((weak)) +papi_sde_register_counter(papi_handle_t handle, const char *event_name, int cntr_mode, int cntr_type, void *counter) +{ + (void) handle; + (void) event_name; + (void) cntr_mode; + (void) cntr_type; + (void) counter; + + /* do nothing */ + + return 0; +} + +int +__attribute__((weak)) +papi_sde_register_fp_counter(papi_handle_t handle, const char *event_name, int cntr_mode, int cntr_type, papi_sde_fptr_t func_ptr, void *param ) +{ + (void) handle; + (void) event_name; + (void) cntr_mode; + (void) cntr_type; + (void) func_ptr; + (void) param; + + /* do nothing */ + + return 0; +} + +int +__attribute__((weak)) +papi_sde_unregister_counter( void *handle, const char *event_name) +{ + (void) handle; + (void) event_name; + + /* do nothing */ + + return 0; +} + +int +__attribute__((weak)) +papi_sde_describe_counter(papi_handle_t handle, const char *event_name, const char *event_description) +{ + (void) handle; + (void) event_name; + (void) event_description; + + /* do nothing */ + + return 0; +} + +int +__attribute__((weak)) +papi_sde_add_counter_to_group(papi_handle_t handle, const char *event_name, const char *group_name, uint32_t group_flags) +{ + (void) handle; + (void) event_name; + (void) group_name; + (void) group_flags; + + /* do nothing */ + + return 0; +} + + +int +__attribute__((weak)) +papi_sde_create_counter( papi_handle_t handle, const char *event_name, int cntr_type, void **cntr_handle ) +{ + (void) handle; + (void) event_name; + (void) cntr_type; + (void) cntr_handle; + + /* do nothing */ + + return 0; +} + + +int +__attribute__((weak)) +papi_sde_inc_counter( papi_handle_t cntr_handle, long long int increment) +{ + (void) cntr_handle; + (void) increment; + + /* do nothing */ + + return 0; +} + +int +__attribute__((weak)) +papi_sde_create_recorder( papi_handle_t handle, const char *event_name, size_t typesize, int (*cmpr_fptr)(const void *p1, const void *p2), void **record_handle ) +{ + (void) handle; + (void) event_name; + (void) typesize; + (void) record_handle; + + /* do nothing */ + + return 0; +} + + +int +__attribute__((weak)) +papi_sde_record( void *record_handle, size_t typesize, void *value) +{ + (void) record_handle; + (void) typesize; + (void) value; + + /* do nothing */ + + return 0; +} + +int +__attribute__((weak)) +papi_sde_reset_recorder(void *record_handle ) +{ + (void) record_handle; + + /* do nothing */ + + return 0; +} + +int +__attribute__((weak)) +papi_sde_reset_counter( void *cntr_handle ) +{ + (void) cntr_handle; + + /* do nothing */ + + return 0; +} + +void +__attribute__((weak)) +*papi_sde_get_counter_handle( void *handle, const char *event_name) +{ + (void) handle; + (void) event_name; + + /* do nothing */ + + return NULL; +} + + +int +__attribute__((weak)) +papi_sde_compare_long_long(const void *p1, const void *p2) +{ + (void) p1; + (void) p2; + + /* do nothing */ + + return 0; +} + +int +__attribute__((weak)) +papi_sde_compare_int(const void *p1, const void *p2) +{ + (void) p1; + (void) p2; + + /* do nothing */ + + return 0; +} + +int +__attribute__((weak)) +papi_sde_compare_double(const void *p1, const void *p2) +{ + (void) p1; + (void) p2; + + /* do nothing */ + + return 0; +} + +int +__attribute__((weak)) +papi_sde_compare_float(const void *p1, const void *p2) +{ + (void) p1; + (void) p2; + + /* do nothing */ + + return 0; +} diff -Nru papi-5.7.0+dfsg/src/components/sde/interface/papi_sde_interface.h papi-6.0.0~dfsg/src/components/sde/interface/papi_sde_interface.h --- papi-5.7.0+dfsg/src/components/sde/interface/papi_sde_interface.h 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/sde/interface/papi_sde_interface.h 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,99 @@ +#ifndef PAPI_SDE_INTERFACE_H +#define PAPI_SDE_INTERFACE_H + +#include + +#define PAPI_SDE_RO 0x00 +#define PAPI_SDE_RW 0x01 +#define PAPI_SDE_DELTA 0x00 +#define PAPI_SDE_INSTANT 0x10 + +#define PAPI_SDE_long_long 0x0 +#define PAPI_SDE_int 0x1 +#define PAPI_SDE_double 0x2 +#define PAPI_SDE_float 0x3 + +#define PAPI_SDE_SUM 0x0 +#define PAPI_SDE_MAX 0x1 +#define PAPI_SDE_MIN 0x2 + + +#define GET_FLOAT_SDE(x) *((float *)&x) +#define GET_DOUBLE_SDE(x) *((double *)&x) +/* + * GET_SDE_RECORDER_ADDRESS() USAGE EXAMPLE: + * If SDE recorder logs values of type 'double': + * double *ptr = GET_SDE_RECORDER_ADDRESS(papi_event_value[6], double); + * for (j=0; j +#include +#include +#include +#include +#include +#include "sde_internal.h" + + +/*************************************************************************/ +/* Functions related to internal hashing of events */ +/*************************************************************************/ + +static unsigned int ht_hash_id(unsigned int uniq_id){ + return uniq_id%PAPISDE_HT_SIZE; +} + +// djb2 hash +static unsigned long ht_hash_name(const char *str) +{ + unsigned long hash = 5381; + int c; + + while ((c = *str++)) + hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ + + return hash % PAPISDE_HT_SIZE; +} + +static void ht_insert(papisde_list_entry_t *hash_table, int ht_key, sde_counter_t *sde_counter) +{ + papisde_list_entry_t *list_head, *new_entry; + + list_head = &hash_table[ht_key]; + // If we have no counter is associated with this key we will put the new + // counter on the head of the list which has already been allocated. + if( NULL == list_head->item ){ + list_head->item = sde_counter; + list_head->next = NULL; // Just for aesthetic reasons. + return; + } + + // If we made it here it means that the head was occupied, so we + // will allocate a new element and put it just after the head. + new_entry = (papisde_list_entry_t *)calloc(1, sizeof(papisde_list_entry_t)); + new_entry->item = sde_counter; + new_entry->next = list_head->next; + list_head->next = new_entry; + + return; +} + +static sde_counter_t *ht_delete(papisde_list_entry_t *hash_table, int ht_key, unsigned int uniq_id) +{ + papisde_list_entry_t *list_head, *curr, *prev; + sde_counter_t *item; + + list_head = &hash_table[ht_key]; + if( NULL == list_head->item ){ + PAPIERROR("ht_delete(): the entry does not exist.\n"); + fprintf(stderr,"ht_delete(): the entry does not exist.\n"); + return NULL; + } + + // If the head contains the element to be deleted, free the space of the counter and pull the list up. + if( list_head->item->glb_uniq_id == uniq_id ){ + item = list_head->item; + if( NULL != list_head->next) + *list_head = *(list_head->next); + return item; + } + + prev = list_head; + // Traverse the linked list to find the element. + for(curr=list_head->next; NULL != curr; curr=curr->next){ + if(NULL == curr->item){ // This is only permitted for the head of the list. + PAPIERROR("ht_delete(): the hash table is clobbered.\n"); + fprintf(stderr,"ht_delete(): the hash table is clobbered.\n"); + return NULL; + } + if(curr->item->glb_uniq_id == uniq_id){ + prev->next = curr->next; + item = curr->item; + free(curr); // free the hash table entry + return item; + } + prev = curr; + } + + fprintf(stderr,"ht_delete(): the item is not in the list.\n"); + return NULL; +} + +static sde_counter_t *ht_lookup_by_name(papisde_list_entry_t *hash_table, const char *name) +{ + papisde_list_entry_t *list_head, *curr; + + list_head = &hash_table[ht_hash_name(name)]; + if( NULL == list_head->item ){ + return NULL; + } + + for(curr=list_head; NULL != curr; curr=curr->next){ + if(NULL == curr->item){ // This can only legally happen for the head of the list. + PAPIERROR("ht_lookup_by_name() the hash table is clobbered\n"); + return NULL; + } + if( !strcmp(curr->item->name, name) ){ + return curr->item; + } + } + + return NULL; +} + +static sde_counter_t *ht_lookup_by_id(papisde_list_entry_t *hash_table, unsigned int uniq_id) +{ + papisde_list_entry_t *list_head, *curr; + + list_head = &hash_table[ht_hash_id(uniq_id)]; + if( NULL == list_head->item ){ + return NULL; + } + + for(curr=list_head; NULL != curr; curr=curr->next){ + if(NULL == curr->item){ // This can only legally happen for the head of the list. + PAPIERROR("ht_lookup_by_id() the hash table is clobbered\n"); + return NULL; + } + if(curr->item->glb_uniq_id == uniq_id){ + return curr->item; + } + } + + return NULL; +} + +static inline void free_counter(sde_counter_t *counter) +{ + int i; + + if( NULL == counter ) + return; + + free(counter->name); + free(counter->description); + + // If we are dealing with a recorder we need to free all the data associated with it. + if( NULL != counter->recorder_data ){ + if( NULL != counter->recorder_data->sorted_buffer ){ + free( counter->recorder_data->sorted_buffer ); + } + for(i=0; irecorder_data->exp_container[i] ){ + free( counter->recorder_data->exp_container[i] ); + } + } + free(counter->recorder_data); + } + + // We are dealing with a counter whose 'data' field was + // allocated by us, not the library, so we need to free it. + if( counter->is_created ){ + free(counter->data); + } + + free(counter); +} + +static void recorder_data_to_contiguous(sde_counter_t *recorder, void *cont_buffer){ + long long current_size, typesize, used_entries, tmp_size = 0; + void *src, *dst; + int i; + + typesize = recorder->recorder_data->typesize; + used_entries = recorder->recorder_data->used_entries; + + for(i=0; irecorder_data->exp_container[i]; + dst = cont_buffer + tmp_size*typesize; + if ( (tmp_size+current_size) <= used_entries){ + memcpy(dst, src, current_size*typesize); + if ( (tmp_size+current_size) == used_entries){ + return; + } + }else{ + memcpy(dst, src, (used_entries-tmp_size)*typesize); + return; + } + tmp_size += current_size; + } +} + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// + +static sde_counter_t *allocate_and_insert(papisde_library_desc_t* lib_handle, const char* name, unsigned int uniq_id, int cntr_mode, int cntr_type, void* data, papi_sde_fptr_t func_ptr, void *param){ + + // make sure to calloc() the structure, so all the fields which we do not explicitly set remain zero. + sde_counter_t *item = (sde_counter_t *)calloc(1, sizeof(sde_counter_t)); + item->data = data; + item->func_ptr = func_ptr; + item->param = param; + item->cntr_type = cntr_type; + item->cntr_mode = cntr_mode; + item->glb_uniq_id = uniq_id; + item->name = strdup( name ); + item->description = strdup( name ); + item->which_lib = lib_handle; + + (void)ht_insert(lib_handle->lib_counters, ht_hash_name(name), item); + + papisde_control_t *gctl = _papisde_global_control; + (void)ht_insert(gctl->all_reg_counters, ht_hash_id(uniq_id), item); + + return item; +} + +int delete_counter(papisde_library_desc_t* lib_handle, const char* name) +{ + + sde_counter_t *tmp_item; + papisde_control_t *gctl = _papisde_global_control; + unsigned int item_uniq_id; + + // Look for the counter entry in the hash-table of the library + tmp_item = ht_lookup_by_name(lib_handle->lib_counters, name); + if( NULL == tmp_item ) + return 1; + + item_uniq_id = tmp_item->glb_uniq_id; + + // Delete the entry from the library hash-table (which hashes by name) + tmp_item = ht_delete(lib_handle->lib_counters, ht_hash_name(name), item_uniq_id); + if( NULL == tmp_item ){ + return 1; + } + + // Delete the entry from the global hash-table (which hashes by id) and free the memory + // occupied by the counter (not the hash-table entry 'papisde_list_entry_t', the 'sde_counter_t') + tmp_item = ht_delete(gctl->all_reg_counters, ht_hash_id(item_uniq_id), item_uniq_id); + if( NULL == tmp_item ){ + return 1; + } + + // We free the counter only once, although it is in two hash-tables, + // because it is the same structure that is pointed to by both hash-tables. + free_counter(tmp_item); + + return 0; +} + +/*************************************************************************/ +/* Below is the actual "hardware implementation" of the sde counters */ +/*************************************************************************/ + +static int +sde_cast_and_store(void *data, long long int previous_value, void *rslt, int cntr_type){ + void *tmp_ptr; + + switch(cntr_type){ + case PAPI_SDE_long_long: + *(long long int *)rslt = *((long long int *)data) - previous_value; + SUBDBG(" value LL=%lld (%lld-%lld)\n", *(long long int *)rslt, *((long long int *)data), previous_value); + return PAPI_OK; + case PAPI_SDE_int: + // We need to cast the result to "long long" so it is expanded to 64bit to take up all the space + *(long long int *)rslt = (long long int) (*((int *)data) - (int)previous_value); + SUBDBG(" value LD=%lld (%d-%d)\n", *(long long int *)rslt, *((int *)data), (int)previous_value); + return PAPI_OK; + case PAPI_SDE_double: + tmp_ptr = &previous_value; + *(double *)rslt = (*((double *)data) - *((double *)tmp_ptr)); + SUBDBG(" value LF=%lf (%lf-%lf)\n", *(double *)rslt, *((double *)data), *((double *)(&previous_value))); + return PAPI_OK; + case PAPI_SDE_float: + // We need to cast the result to "double" so it is expanded to 64bit to take up all the space + tmp_ptr = &previous_value; + *(double *)rslt = (double)(*((float *)data) - (float)(*((double *)tmp_ptr)) ); + SUBDBG(" value F=%lf (%f-%f)\n", *(double *)rslt, *((float *)data), (float)(*((double *)(&previous_value))) ); + return PAPI_OK; + default: + PAPIERROR("Unsupported counter type: %d\n",cntr_type); + return -1; + } + +} + + +/* both "rslt" and "data" are local variables that this component stored after promoting to 64 bits. */ +#define _SDE_AGGREGATE( _TYPE, _RSLT_TYPE ) do{\ + switch(group_flags){\ + case PAPI_SDE_SUM:\ + *(_RSLT_TYPE *)rslt = (_RSLT_TYPE) ((_TYPE)(*(_RSLT_TYPE *)rslt) + (_TYPE)(*((_RSLT_TYPE *)data)) );\ + break;\ + case PAPI_SDE_MAX:\ + if( *(_RSLT_TYPE *)rslt < *((_RSLT_TYPE *)data) )\ + *(_RSLT_TYPE *)rslt = *((_RSLT_TYPE *)data);\ + break;\ + case PAPI_SDE_MIN:\ + if( *(_RSLT_TYPE *)rslt > *((_RSLT_TYPE *)data) )\ + *(_RSLT_TYPE *)rslt = *((_RSLT_TYPE *)data);\ + break;\ + default:\ + PAPIERROR("Unsupported counter group flag: %d\n",group_flags);\ + return -1;\ + } \ + }while(0) + +int aggregate_value_in_group(long long int *data, long long int *rslt, int cntr_type, int group_flags){ + + switch(cntr_type){ + case PAPI_SDE_long_long: + _SDE_AGGREGATE(long long int, long long int); + return PAPI_OK; + case PAPI_SDE_int: + // We need to cast the result to "long long" so it is expanded to 64bit to take up all the space + _SDE_AGGREGATE(int, long long int); + return PAPI_OK; + case PAPI_SDE_double: + _SDE_AGGREGATE(double, double); + return PAPI_OK; + case PAPI_SDE_float: + // We need to cast the result to "double" so it is expanded to 64bit to take up all the space + _SDE_AGGREGATE(float, double); + return PAPI_OK; + default: + PAPIERROR("Unsupported counter type: %d\n",cntr_type); + return -1; + } + +} + +/** + This function assumes that all counters in a group (including recursive subgroups) have the same type. + */ +static int sde_read_counter_group( sde_counter_t *counter, long long int *rslt ){ + papisde_list_entry_t *curr; + long long int final_value = 0; + + if( NULL == counter ){ + PAPIERROR("sde_read_counter_group(): Counter parameter is NULL.\n"); + return PAPI_EINVAL; + } + + curr = counter->counter_group_head; + if( NULL == curr ){ + PAPIERROR("sde_read_counter_group(): Counter '%s' is not a counter group.\n",counter->name); + return PAPI_EINVAL; + } + + do{ + long long int tmp_value = 0; + int ret_val; + + sde_counter_t *tmp_cntr = curr->item; + if( NULL == tmp_cntr ){ + PAPIERROR("sde_read_counter_group(): List of counters in counter group '%s' is clobbered.\n",counter->name); + return PAPI_EINVAL; + } + + // We can _not_ have a recorder inside a group. + if( NULL != tmp_cntr->recorder_data ){ + PAPIERROR("sde_read_counter_group(): Recorder found inside counter group: %s.\n",tmp_cntr->name); + }else{ + // We allow counter groups to contain other counter groups recursively. + if( NULL != tmp_cntr->counter_group_head ){ + ret_val = sde_read_counter_group( tmp_cntr, &tmp_value ); + if( ret_val != PAPI_OK ){ + // If something went wrong with one counter group, ignore it silently. + continue; + } + }else{ // If we are here it means that we are trying to read a real counter. + if( (NULL == tmp_cntr->data) && (NULL == tmp_cntr->func_ptr) ){ + PAPIERROR("sde_read_counter_group(): Attempted read on a placeholder: %s.\n",tmp_cntr->name); + // If something went wrong with one counter, ignore it silently. + continue; + } + + ret_val = sde_hardware_read_and_store( tmp_cntr, tmp_cntr->previous_data, &tmp_value ); + if( PAPI_OK != ret_val ){ + PAPIERROR("sde_read_counter_group(): Error occured when reading counter: %s.\n",tmp_cntr->name); + } + } + + // There is nothing meaningful we could do with the error code here, so ignore it. + (void)aggregate_value_in_group(&tmp_value, &final_value, tmp_cntr->cntr_type, counter->counter_group_flags); + } + + curr = curr->next; + }while(NULL != curr); + + *rslt = final_value; + return PAPI_OK; +} + +static int +sde_hardware_write( sde_counter_t *counter, long long int new_value ) +{ + double tmp_double; + void *tmp_ptr; + + switch(counter->cntr_type){ + case PAPI_SDE_long_long: + *((long long int *)(counter->data)) = new_value; + break; + case PAPI_SDE_int: + *((int *)(counter->data)) = (int)new_value; + break; + case PAPI_SDE_double: + tmp_ptr = &new_value; + tmp_double = *((double *)tmp_ptr); + *((double *)(counter->data)) = tmp_double; + break; + case PAPI_SDE_float: + // The pointer has to be 64bit. We can cast the variable to safely convert between bit-widths later on. + tmp_ptr = &new_value; + tmp_double = *((double *)tmp_ptr); + *((float *)(counter->data)) = (float)tmp_double; + break; + default: + PAPIERROR("Unsupported counter type: %d\n",counter->cntr_type); + return -1; + } + + return PAPI_OK; +} + +static int +sde_hardware_read_and_store( sde_counter_t *counter, long long int previous_value, long long int *rslt ) +{ + int ret_val; + long long int tmp_int; + void *tmp_data; + + char *event_name = counter->name; + + if ( counter->data != NULL ) { + SUBDBG("Reading %s by accessing data pointer.\n", event_name); + tmp_data = counter->data; + } else if( NULL != counter->func_ptr ){ + SUBDBG("Reading %s by calling registered function pointer.\n", event_name); + tmp_int = counter->func_ptr(counter->param); + tmp_data = &tmp_int; + } else{ + PAPIERROR("sde_hardware_read_and_store(): Event %s has neither a variable nor a function pointer associated with it.\n", event_name); + return -1; + } + + if( is_instant(counter->cntr_mode) ){ + /* Instant counter means that we don't subtract the previous value (which we read at PAPI_Start()) */ + previous_value = 0; + } else if( is_delta(counter->cntr_mode) ){ + /* Do nothing here, this is the default mode */ + } else{ + PAPIERROR("Unsupported mode (%d) for event: %s\n",counter->cntr_mode, event_name); + return -1; + } + + ret_val = sde_cast_and_store(tmp_data, previous_value, rslt, counter->cntr_type); + return ret_val; +} + + + +/** This helper function checks if the global structure has been allocated + and allocates it if has not. + @return a pointer to the global structure. + */ +papisde_control_t *get_global_struct(void){ + // Allocate the global control structure, unless it has already been allocated by another library + // or the application code calling PAPI_name_to_code() for an SDE. + if ( !_papisde_global_control ) { + SUBDBG("get_global_struct(): global SDE control struct is being allocated.\n"); + _papisde_global_control = ( papisde_control_t* ) papi_calloc( 1, sizeof( papisde_control_t ) ); + } + return _papisde_global_control; +} + +/** This helper function checks to see if a given library has already been initialized and exists + in the global structure of the component. + @param[in] a pointer to the global structure. + @param[in] a string containing the name of the library. + @return a pointer to the library handle. + */ +papisde_library_desc_t *find_library_by_name(const char *library_name, papisde_control_t *gctl){ + + if( (NULL == gctl) || (NULL == library_name) ) + return NULL; + + papisde_library_desc_t *tmp_lib = gctl->lib_list_head; + // Check to see if this library has already been initialized. + while(NULL != tmp_lib){ + char *tmp_name = tmp_lib->libraryName; + SUBDBG("Checking library: '%s' against registered library: '%s'\n",library_name, tmp_lib->libraryName); + // If we find the same library already registered, we do not create a new entry. + if( (NULL != tmp_name) && !strcmp(tmp_name, library_name) ) + return tmp_lib; + + tmp_lib = tmp_lib->next; + } + + return NULL; +} + +/** This helper function simply adds a library handle to the beginning of the list of libraries + in the global structure. It's only reason of existence is to hide the structure of the + linked list in case we want to change it in the future. + @param[in] a pointer to the library handle. + @param[in] a pointer to the global structure. + */ +void insert_library_handle(papisde_library_desc_t *lib_handle, papisde_control_t *gctl){ + SUBDBG("insert_library_handle(): inserting new handle for library: '%s'\n",lib_handle->libraryName); + lib_handle->next = gctl->lib_list_head; + gctl->lib_list_head = lib_handle; + + return; +} + +/** This function creates the SDE component structure for an individual + software library and returns a handle to the structure. + @param[in] name_of_library -- library name + @param[in] event_count -- number of exposed software defeined events + @param[out] sde_handle -- opaque pointer to sde structure for initialized + library + */ +papi_handle_t +__attribute__((visibility("default"))) +papi_sde_init(const char *name_of_library) +{ + papisde_library_desc_t *tmp_lib; + + SUBDBG("Registering library: '%s'\n",name_of_library); + + // Lock before we read and/or modify the global structures. + papi_sde_lock(); + + // Put the actual work in a different function so we call it from other + // places in the component. We have to do this because we cannot call + // papi_sde_init() from places in the code which already call + // papi_sde_lock()/papi_sde_unlock(), or we will end up with deadlocks. + tmp_lib = do_sde_init(name_of_library); + + papi_sde_unlock(); + + SUBDBG("Library '%s' has been registered.\n",name_of_library); + + return tmp_lib; +} + + +// Initialize library handle, or return the existing one if already +// initialized. This function is _not_ thread safe, so it needs to be called +// from within regions protected by papi_sde_lock()/papi_sde_unlock(). +static papi_handle_t +do_sde_init(const char *name_of_library) +{ + papisde_control_t* gctl; + papisde_library_desc_t *tmp_lib; + + SUBDBG("Registering library: '%s'\n",name_of_library); + + gctl = get_global_struct(); + + // If the library is already initialized, return the handle to it + tmp_lib = find_library_by_name(name_of_library, gctl); + if( NULL != tmp_lib ){ + return tmp_lib; + } + + // If the library is not already initialized, then initialize it. + tmp_lib = ( papisde_library_desc_t* ) papi_calloc( 1, sizeof( papisde_library_desc_t ) ); + tmp_lib->libraryName = strdup(name_of_library); + + insert_library_handle(tmp_lib, gctl); + + return tmp_lib; +} + + +int +__attribute__((visibility("default"))) +papi_sde_add_counter_to_group(papi_handle_t handle, const char *event_name, const char *group_name, uint32_t group_flags) +{ + papisde_library_desc_t *lib_handle; + sde_counter_t *tmp_item, *tmp_group; + unsigned int cntr_group_uniq_id; + char *full_event_name, *full_group_name; + + SUBDBG("papi_sde_add_counter_to_group(): Adding counter: %s into group %s\n",event_name, group_name); + + lib_handle = (papisde_library_desc_t *) handle; + if( (NULL == lib_handle) || (NULL == lib_handle->libraryName) ){ + PAPIERROR("papi_sde_add_counter_to_group(): 'handle' is clobbered. Unable to add counter to group.\n"); + return PAPI_EINVAL; + } + + + size_t str_len = strlen(lib_handle->libraryName)+strlen(event_name)+2+1; // +2 for "::" and +1 for '\0' + full_event_name = malloc(str_len*sizeof(char)); + snprintf(full_event_name, str_len, "%s::%s", lib_handle->libraryName, event_name); + + // After this point we will be modifying data structures, so we need to acquire a lock. + // This function has multiple exist points. If you add more, make sure you unlock before each one of them. + papi_sde_lock(); + + // Check to make sure that the event is already registered. This is not the place to create a placeholder. + tmp_item = ht_lookup_by_name(lib_handle->lib_counters, full_event_name); + if( NULL == tmp_item ){ + papi_sde_unlock(); + PAPIERROR("papi_sde_add_counter_to_group(): Unable to find counter: '%s'.\n",full_event_name); + free(full_event_name); + return PAPI_EINVAL; + } + + // We will not use the name beyond this point + free(full_event_name); + + str_len = strlen(lib_handle->libraryName)+strlen(group_name)+2+1; // +2 for "::" and +1 for '\0' + full_group_name = malloc(str_len*sizeof(char)); + snprintf(full_group_name, str_len, "%s::%s", lib_handle->libraryName, group_name); + + // Check to see if the group exists already. Otherwise we need to create it. + tmp_group = ht_lookup_by_name(lib_handle->lib_counters, full_group_name); + if( NULL == tmp_group ){ + + // We use the current number of registered events as the uniq id of the counter group, and we + // increment it because counter groups are treated as real counters by the outside world. + // They are first class citizens. + papisde_control_t *gctl = _papisde_global_control; + cntr_group_uniq_id = gctl->num_reg_events++; + gctl->num_live_events++; + _sde_vector.cmp_info.num_native_events = gctl->num_live_events; + + SUBDBG("%s line %d: Unique ID for new counter group = %d\n", __FILE__, __LINE__, cntr_group_uniq_id); + + tmp_group = (sde_counter_t *)calloc(1, sizeof(sde_counter_t)); + tmp_group->glb_uniq_id = cntr_group_uniq_id; + // copy the name because we will free the malloced space further down in this function. + tmp_group->name = strdup(full_group_name); + // make a copy here, because we will free() the 'name' and the 'description' separately. + tmp_group->description = strdup( full_group_name ); + tmp_group->which_lib = lib_handle; + tmp_group->counter_group_flags = group_flags; + // Be explicit so that people reading the code can spot the initialization easier. + tmp_group->data = NULL; + tmp_group->func_ptr = NULL; + tmp_group->param = NULL; + tmp_group->counter_group_head = NULL; + + (void)ht_insert(lib_handle->lib_counters, ht_hash_name(full_group_name), tmp_group); + (void)ht_insert(gctl->all_reg_counters, ht_hash_id(cntr_group_uniq_id), tmp_group); + + }else{ + // should the following branch ever be true? Why do we already have a group registered if it's empty? + if( NULL == tmp_group->counter_group_head ){ + PAPIERROR("papi_sde_add_counter_to_group(): Found an empty counter group: '%s'. This might indicate that a cleanup routine is not doing its job.\n", group_name); + } + + // make sure the caller is not trying to change the flags of the group after it has been created. + if( tmp_group->counter_group_flags != group_flags ){ + papi_sde_unlock(); + PAPIERROR("papi_sde_add_counter_to_group(): Attempting to add counter '%s' to counter group '%s' with incompatible group flags.\n", event_name, group_name); + free(full_group_name); + return PAPI_EINVAL; + } + } + + // Add the new counter to the group's head. + papisde_list_entry_t *new_head = calloc(1, sizeof(papisde_list_entry_t)); + new_head->item = tmp_item; + new_head->next = tmp_group->counter_group_head; + tmp_group->counter_group_head = new_head; + + papi_sde_unlock(); + free(full_group_name); + return PAPI_OK; +} + + +// In contrast with papi_sde_register_counter(), the following function creates +// a counter whose memory is allocated and managed by PAPI, not the library. +// This counter can only by modified via the functions papi_sde_inc_counter() +// and papi_sde_reset_counter(). This has two benefits over a counter which +// lives inside a library and is modified directly by that library: +// A) Our counter and the modifying API is guaranteed to be thread safe. +// B) Since we learn about each change in the value of the counter, we can +// implement accurate overflowing and/or a push mode. +// +// However, this approach has higher overhead than executing "my_cntr += value" inside a library. + +int +__attribute__((visibility("default"))) +papi_sde_create_counter( papi_handle_t handle, const char *event_name, int cntr_mode, void **cntr_handle ) +{ + int ret_val; + long long int *counter_data; + char *full_event_name; + papisde_library_desc_t *lib_handle; + sde_counter_t *cntr, *placeholder; + + lib_handle = (papisde_library_desc_t *) handle; + if( (NULL == lib_handle) || (NULL == lib_handle->libraryName) ){ + PAPIERROR("papi_sde_create_counter(): 'handle' is clobbered. Unable to create counter.\n"); + return PAPI_EINVAL; + } + + SUBDBG("Preparing to create counter: '%s' with mode: '%d' in SDE library: %s.\n", event_name, cntr_mode, lib_handle->libraryName); + + counter_data = calloc(1, sizeof(long long int)); + + + ret_val = sde_setup_counter_internals( lib_handle, event_name, cntr_mode, PAPI_SDE_long_long, counter_data, NULL, NULL, &placeholder ); + if( PAPI_OK != ret_val ){ + return ret_val; + } + + size_t str_len = strlen(lib_handle->libraryName)+strlen(event_name)+2+1; // +2 for "::" and +1 for '\0' + full_event_name = malloc(str_len*sizeof(char)); + snprintf(full_event_name, str_len, "%s::%s", lib_handle->libraryName, event_name); + + cntr = ht_lookup_by_name(lib_handle->lib_counters, full_event_name); + if(NULL == cntr) { + SUBDBG("Logging counter '%s' not properly inserted in SDE library '%s'\n", full_event_name, lib_handle->libraryName); + free(full_event_name); + return PAPI_ECMP; + } + + // Signify that this counter is a created counter (as opposed to a registered one). + // The reason we need to know is so we can free() the 'data' entry which we allocated here, and for + // correctness checking in papi_sde_inc_coutner() and papi_sde_reset_counter(). + cntr->is_created = 1; + + if( NULL != cntr_handle ){ + *(sde_counter_t **)cntr_handle = cntr; + } + + free(full_event_name); + + return PAPI_OK; +} + + +// The following function works only for counters created using papi_sde_create_counter(). +int +__attribute__((visibility("default"))) +papi_sde_inc_counter( papi_handle_t cntr_handle, long long int increment) +{ + long long int *ptr; + sde_counter_t *tmp_cntr; +#if defined(SDE_HAVE_OVERFLOW) + EventSetInfo_t *ESI; + int cidx, i, index_in_ESI = -1; + ThreadInfo_t *thread; + sde_control_state_t *sde_ctl; +#endif //defined(SDE_HAVE_OVERFLOW) + + papi_sde_lock(); + + tmp_cntr = (sde_counter_t *)cntr_handle; + if( NULL == tmp_cntr ){ + papi_sde_unlock(); + PAPIERROR("papi_sde_inc_counter(): 'cntr_handle' is clobbered. Unable to modify value of counter.\n"); + return PAPI_EINVAL; + } + +// SUBDBG("Preparing to increment counter: '%s::%s' by %lld.\n", tmp_cntr->which_lib->libraryName, tmp_cntr->name, increment); + + ptr = (long long int *)(tmp_cntr->data); + + if( NULL == ptr ){ + papi_sde_unlock(); + PAPIERROR("papi_sde_inc_counter(): Counter structure is clobbered. Unable to modify value of counter.\n"); + return PAPI_EINVAL; + } + + if( !tmp_cntr->is_created ){ + papi_sde_unlock(); + PAPIERROR("papi_sde_inc_counter(): Counter is not created by PAPI, cannot be modified using this function.\n"); + return PAPI_EINVAL; + } + + if( PAPI_SDE_long_long != tmp_cntr->cntr_type ){ + papi_sde_unlock(); + PAPIERROR("papi_sde_inc_counter(): Counter is not of type \"long long int\" and cannot be modified using this function.\n"); + return PAPI_EINVAL; + } + + *ptr += increment; + +#if defined(SDE_HAVE_OVERFLOW) + cidx = _sde_vector.cmp_info.CmpIdx; + thread = _papi_hwi_lookup_thread( 0 ); + if( NULL == thread ) + goto counter_did_not_overflow; + + ESI = thread->running_eventset[cidx]; + // Check if there is a running event set and it has some events set to overflow + if( (NULL == ESI) || !(ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE) ) + goto counter_did_not_overflow; + + sde_ctl = ( sde_control_state_t * ) ESI->ctl_state; + int event_counter = ESI->overflow.event_counter; + + // Check all the events that are set to overflow + index_in_ESI = -1; + for (i = 0; i < event_counter; i++ ) { + int papi_index = ESI->overflow.EventIndex[i]; + unsigned int counter_uniq_id = sde_ctl->which_counter[papi_index]; + // If the created counter that we are incrementing corresponds to + // an event that was set to overflow, read the deadline and threshold. + if( counter_uniq_id == tmp_cntr->glb_uniq_id ){ + index_in_ESI = i; + break; + } + } + + if( index_in_ESI >= 0 ){ + long long deadline, threshold, latest; + deadline = ESI->overflow.deadline[index_in_ESI]; + threshold = ESI->overflow.threshold[index_in_ESI]; + + // If the current value has exceeded the deadline then + // invoke the user handler and update the deadline. + latest = *ptr; + SUBDBG("counter: '%s::%s' has value: %lld and the overflow deadline is at: %lld.\n", tmp_cntr->which_lib->libraryName, tmp_cntr->name, latest, deadline); + if( latest > deadline ){ + // We adjust the deadline in a way that it remains a multiple of threshold + // so we don't create an additive error. However, this code path should + // result in a precise overflow trigger, so this might not be necessary. + ESI->overflow.deadline[index_in_ESI] = threshold*(latest/threshold) + threshold; + invoke_user_handler(cntr_handle); + } + } + +counter_did_not_overflow: +#endif // defined(SDE_HAVE_OVERFLOW) + + papi_sde_unlock(); + + return PAPI_OK; +} + + +int +__attribute__((visibility("default"))) +papi_sde_compare_long_long(const void *p1, const void *p2){ + long long n1, n2; + n1 = *(long long *)p1; + n2 = *(long long *)p2; + + if( n1 < n2 ) return -1; + if( n1 > n2 ) return 1; + return 0; +} + +int +__attribute__((visibility("default"))) +papi_sde_compare_int(const void *p1, const void *p2){ + int n1, n2; + n1 = *(int *)p1; + n2 = *(int *)p2; + + if( n1 < n2 ) return -1; + if( n1 > n2 ) return 1; + return 0; +} + +int +__attribute__((visibility("default"))) +papi_sde_compare_double(const void *p1, const void *p2){ + double n1, n2; + n1 = *(double *)p1; + n2 = *(double *)p2; + + if( n1 < n2 ) return -1; + if( n1 > n2 ) return 1; + return 0; +} + +int +__attribute__((visibility("default"))) +papi_sde_compare_float(const void *p1, const void *p2){ + float n1, n2; + n1 = *(float *)p1; + n2 = *(float *)p2; + + if( n1 < n2 ) return -1; + if( n1 > n2 ) return 1; + return 0; +} + + + +#define _SDE_CMP_MIN 0 +#define _SDE_CMP_MAX 1 + +// This function returns a "long long" which contains a pointer to the +// data element that corresponds to the edge (min/max), so that it works +// for all types of data, not only integers. +static inline long long _sde_compute_edge(void *param, int which_edge){ + void *edge = NULL, *edge_copy; + long long elem_cnt; + long long current_size, cumul_size = 0; + void *src; + int i, chunk; + size_t typesize; + sde_counter_t *rcrd; + int (*cmpr_func_ptr)(const void *p1, const void *p2); + + + rcrd = ((sde_sorting_params_t *)param)->recording; + elem_cnt = rcrd->recorder_data->used_entries; + typesize = rcrd->recorder_data->typesize; + + cmpr_func_ptr = ((sde_sorting_params_t *)param)->cmpr_func_ptr; + + // The return value is supposed to be a pointer to the correct element, therefore zero + // is a NULL pointer, which should tell the caller that there was a problem. + if( (0 == elem_cnt) || (NULL == cmpr_func_ptr) ) + return 0; + + // If there is a sorted (contiguous) buffer, but it's stale, we need to free it. + // The value of elem_cnt (rcrd->recorder_data->used_entries) can + // only increase, or be reset to zero, but when it is reset to zero + // (by papi_sde_reset_recorder()) the buffer will be freed (by the same function). + if( (NULL != rcrd->recorder_data->sorted_buffer) && + (rcrd->recorder_data->sorted_entries < elem_cnt) ){ + + free( rcrd->recorder_data->sorted_buffer ); + rcrd->recorder_data->sorted_buffer = NULL; + rcrd->recorder_data->sorted_entries = 0; + } + + // Check if a sorted contiguous buffer is already there. If there is, return + // the first or last element (for MIN, or MAX respectively). + if( NULL != rcrd->recorder_data->sorted_buffer ){ + if( _SDE_CMP_MIN == which_edge ) + edge = rcrd->recorder_data->sorted_buffer; + if( _SDE_CMP_MAX == which_edge ) + edge = rcrd->recorder_data->sorted_buffer + (elem_cnt-1)*typesize; + }else{ + // Make "edge" point to the beginning of the first chunk. + edge = rcrd->recorder_data->exp_container[0]; + if ( NULL == edge ) + return 0; + + cumul_size = 0; + for(chunk=0; chunkrecorder_data->exp_container[chunk]; + + for(i=0; (i < (elem_cnt-cumul_size)) && (i < current_size); i++){ + void *next_elem = src + i*typesize; + int rslt = cmpr_func_ptr(next_elem, edge); + + // If the new element is smaller than the current min and we are looking for the min, then keep it. + if( (rslt < 0) && (_SDE_CMP_MIN == which_edge) ) + edge = next_elem; + // If the new element is larger than the current max and we are looking for the max, then keep it. + if( (rslt > 0) && (_SDE_CMP_MAX == which_edge) ) + edge = next_elem; + } + + cumul_size += current_size; + + if( cumul_size >= elem_cnt ) + break; + } + } + + // We might free the sorted_buffer (when it becomes stale), so we can't return "edge". + // Therefore, we allocate fresh space for the resulting element and copy it there. + // Since we do not know when the user will use this pointer, we will not be able + // to free it, so it is the responibility of the user (who calls PAPI_read()) to + // free this memory. + edge_copy = malloc( 1 * typesize); + memcpy(edge_copy, edge, 1 * typesize); + + // A pointer is guaranteed to fit inside a long long, so cast it and return a long long. + return (long long)edge_copy; +} + + +// This function returns a "long long" which contains a pointer to the +// data element that corresponds to the edge (min/max), so that it works +// for all types of data, not only integers. + +// NOTE: This function allocates memory for one element and returns a pointer +// to this memory. Since we do not know when the user will use this pointer, we +// can not free it anywhere in this component, so it is the responibility of +// the user (who calls PAPI_read()) to free this memory. +static inline long long _sde_compute_quantile(void *param, int percent){ + long long quantile, elem_cnt; + void *result_data; + size_t typesize; + sde_counter_t *rcrd; + int (*cmpr_func_ptr)(const void *p1, const void *p2); + + rcrd = ((sde_sorting_params_t *)param)->recording; + elem_cnt = rcrd->recorder_data->used_entries; + typesize = rcrd->recorder_data->typesize; + + cmpr_func_ptr = ((sde_sorting_params_t *)param)->cmpr_func_ptr; + + // The return value is supposed to be a pointer to the correct element, therefore zero + // is a NULL pointer, which should tell the caller that there was a problem. + if( (0 == elem_cnt) || (NULL == cmpr_func_ptr) ) + return 0; + + // If there is a sorted (contiguous) buffer, but it's stale, we need to free it. + // The value of elem_cnt (rcrd->recorder_data->used_entries) can + // only increase, or be reset to zero, but when it is reset to zero + // (by papi_sde_reset_recorder()) the buffer will be freed (by the same function). + if( (NULL != rcrd->recorder_data->sorted_buffer) && + (rcrd->recorder_data->sorted_entries < elem_cnt) ){ + + free( rcrd->recorder_data->sorted_buffer ); + rcrd->recorder_data->sorted_buffer = NULL; + rcrd->recorder_data->sorted_entries = 0; + } + + // Check if a sorted buffer is already there. If there isn't, allocate one. + if( NULL == rcrd->recorder_data->sorted_buffer ){ + rcrd->recorder_data->sorted_buffer = malloc(elem_cnt * typesize); + recorder_data_to_contiguous(rcrd, rcrd->recorder_data->sorted_buffer); + // We set this field so we can test later to see if the allocated buffer is stale. + rcrd->recorder_data->sorted_entries = elem_cnt; + } + void *sorted_buffer = rcrd->recorder_data->sorted_buffer; + + qsort(sorted_buffer, elem_cnt, typesize, cmpr_func_ptr); + void *tmp_ptr = sorted_buffer + typesize*((elem_cnt*percent)/100); + + // We might free the sorted_buffer (when it becomes stale), so we can't return "tmp_ptr". + // Therefore, we allocate fresh space for the resulting element and copy it there. + // Since we do not know when the user will use this pointer, we will not be able + // to free it, so it is the responibility of the user (who calls PAPI_read()) to + // free this memory. + result_data = malloc(typesize); + memcpy(result_data, tmp_ptr, typesize); + + // convert the pointer into a long long so we can return it. + quantile = (long long)result_data; + + return quantile; +} + + +long long _sde_compute_q1(void *param){ + return _sde_compute_quantile(param, 25); +} +long long _sde_compute_med(void *param){ + return _sde_compute_quantile(param, 50); +} +long long _sde_compute_q3(void *param){ + return _sde_compute_quantile(param, 75); +} +long long _sde_compute_min(void *param){ + return _sde_compute_edge(param, _SDE_CMP_MIN); +} +long long _sde_compute_max(void *param){ + return _sde_compute_edge(param, _SDE_CMP_MAX); +} + + +int +__attribute__((visibility("default"))) +papi_sde_create_recorder( papi_handle_t handle, const char *event_name, size_t typesize, int (*cmpr_func_ptr)(const void *p1, const void *p2), void **record_handle ) +{ + + int ret_val, i; + sde_counter_t *tmp_rec_handle; + char *aux_event_name; + size_t str_len; + char *full_event_name; +#define _SDE_MODIFIER_COUNT 6 + const char *modifiers[_SDE_MODIFIER_COUNT] = {":CNT",":MIN",":Q1",":MED",":Q3",":MAX"}; + // Add a NULL pointer for symmetry with the 'modifiers' vector, since the modifier ':CNT' does not have a function pointer. + long long (*func_ptr_vec[_SDE_MODIFIER_COUNT])(void *) = {NULL, _sde_compute_min, _sde_compute_q1, _sde_compute_med, _sde_compute_q3, _sde_compute_max}; + long long total_entries = (long long)EXP_CONTAINER_MIN_SIZE; + + papisde_library_desc_t *lib_handle = handle; + + papi_sde_lock(); + + if( (NULL == lib_handle) || (NULL == lib_handle->libraryName) ){ + PAPIERROR("papi_sde_create_recorder(): 'handle' is clobbered. Unable to create recorder.\n"); + papi_sde_unlock(); + return PAPI_EINVAL; + } + + SUBDBG("Preparing to create recorder: '%s' with typesize: '%d' in SDE library: %s.\n", event_name, (int)typesize, lib_handle->libraryName); + + // We setup the recorder like this, instead of using sde_do_register() because recorders cannot be set to overflow. + ret_val = sde_setup_counter_internals( lib_handle, event_name, PAPI_SDE_DELTA|PAPI_SDE_RO, PAPI_SDE_long_long, NULL, NULL, NULL, NULL ); + if( PAPI_OK != ret_val ) + return ret_val; + + str_len = strlen(lib_handle->libraryName)+strlen(event_name)+2+1; // +2 for "::" and +1 for '\0' + full_event_name = malloc(str_len*sizeof(char)); + snprintf(full_event_name, str_len, "%s::%s", lib_handle->libraryName, event_name); + + tmp_rec_handle = ht_lookup_by_name(lib_handle->lib_counters, full_event_name); + if(NULL == tmp_rec_handle) { + SUBDBG("Recorder '%s' not properly inserted in SDE library '%s'\n", full_event_name, lib_handle->libraryName); + free(full_event_name); + papi_sde_unlock(); + return PAPI_ECMP; + } + + // Allocate the structure for the recorder data and meta-data. + tmp_rec_handle->recorder_data = calloc(1,sizeof(recorder_data_t)); + // Allocate the first chunk of recorder data. + tmp_rec_handle->recorder_data->exp_container[0] = malloc(total_entries*typesize); + tmp_rec_handle->recorder_data->total_entries = total_entries; + tmp_rec_handle->recorder_data->typesize = typesize; + tmp_rec_handle->recorder_data->used_entries = 0; + + *(sde_counter_t **)record_handle = tmp_rec_handle; + + // We will not use the name beyond this point + free(full_event_name); + + // At this point we are done creating the recorder and we will create the additional events which will appear as modifiers of the recorder. + str_len = 0; + for(i=0; i<_SDE_MODIFIER_COUNT; i++){ + size_t tmp_len = strlen(modifiers[i]); + if( tmp_len > str_len ) + str_len = tmp_len; + } + str_len += strlen(event_name)+1; + aux_event_name = calloc(str_len, sizeof(char)); + + snprintf(aux_event_name, str_len, "%s%s", event_name, modifiers[0]); + SUBDBG("papi_sde_create_recorder(): Preparing to register aux counter: '%s' in SDE library: %s.\n", aux_event_name, lib_handle->libraryName); + + // The :CNT aux counter is properly registered so that it can be set to overflow. + ret_val = sde_do_register( lib_handle, (const char *)aux_event_name, PAPI_SDE_INSTANT|PAPI_SDE_RO, PAPI_SDE_long_long, &(tmp_rec_handle->recorder_data->used_entries), NULL, NULL ); + if( PAPI_OK != ret_val ){ + SUBDBG("papi_sde_create_recorder(): Registration of aux counter: '%s' in SDE library: %s FAILED.\n", aux_event_name, lib_handle->libraryName); + papi_sde_unlock(); + free(aux_event_name); + return ret_val; + } + + // If the caller passed NULL as the function pointer, then they do _not_ want the quantiles. Otherwise, create them. + if( NULL != cmpr_func_ptr ){ + for(i=1; i<_SDE_MODIFIER_COUNT; i++){ + sde_sorting_params_t *sorting_params; + + sorting_params = malloc(sizeof(sde_sorting_params_t)); // This will be free()-ed by papi_sde_unregister_counter() + sorting_params->recording = tmp_rec_handle; + sorting_params->cmpr_func_ptr = cmpr_func_ptr; + + snprintf(aux_event_name, str_len, "%s%s", event_name, modifiers[i]); + + SUBDBG("papi_sde_create_recorder(): Preparing to register aux fp counter: '%s' in SDE library: %s.\n", aux_event_name, lib_handle->libraryName); + ret_val = sde_do_register(lib_handle, (const char *)aux_event_name, PAPI_SDE_RO|PAPI_SDE_INSTANT, PAPI_SDE_long_long, NULL, func_ptr_vec[i], sorting_params ); + if( PAPI_OK != ret_val ){ + SUBDBG("papi_sde_create_recorder(): Registration of aux counter: '%s' in SDE library: %s FAILED.\n", aux_event_name, lib_handle->libraryName); + papi_sde_unlock(); + free(aux_event_name); + return ret_val; + } + } + } + + papi_sde_unlock(); + free(aux_event_name); + return PAPI_OK; +} + + +// UPDATED for EXP-storage +int +__attribute__((visibility("default"))) +papi_sde_record( void *record_handle, size_t typesize, void *value) +{ + sde_counter_t *tmp_item; + long long used_entries, total_entries, prev_entries, offset; + int i, chunk; + long long tmp_size; + + SUBDBG("Preparing to record value of size %lu at address: %p\n",typesize, value); + + papi_sde_lock(); + + tmp_item = (sde_counter_t *)record_handle; + + if( NULL == tmp_item ){ + papi_sde_unlock(); + PAPIERROR("papi_sde_record(): 'record_handle' is clobbered. Unable to record value.\n"); + return PAPI_EINVAL; + } + + if( NULL == tmp_item->recorder_data || NULL == tmp_item->recorder_data->exp_container[0]){ + papi_sde_unlock(); + PAPIERROR("papi_sde_record(): Counter structure is clobbered. Unable to record event.\n"); + return PAPI_EINVAL; + } + + // At this point the recorder exists, but we must check if it has room for more elements + + used_entries = tmp_item->recorder_data->used_entries; + total_entries = tmp_item->recorder_data->total_entries; + assert(used_entries <= total_entries); + + // Find how many chunks we have already allocated + tmp_size = 0; + for(i=0; irecorder_data->exp_container[0]" + // must have been already allocated when creating the recorder, so we can + // compare the total size after we add the "i-th" size. + if (total_entries == tmp_size) + break; + } + chunk = i; + + // Find how many entries down the last chunk we are. + offset = used_entries - prev_entries; + + if( used_entries == total_entries ){ + long long new_segment_size; + + // If we had used all the available entries (and thus we are allocating more), we start from the beginning of the new chunk. + offset = 0; + + chunk += 1; // we need to allocate the next chunk from the last one we found. + new_segment_size = ((long long)1<recorder_data->exp_container[chunk] = malloc(new_segment_size*typesize); + tmp_item->recorder_data->total_entries += new_segment_size; + } + + void *dest = tmp_item->recorder_data->exp_container[chunk] + offset*typesize; + (void)memcpy( dest, value, typesize ); + tmp_item->recorder_data->used_entries++; + + papi_sde_unlock(); + return PAPI_OK; +} + + + +// This function neither frees the allocated, nor does it zero it. It only resets the counter of used entries so that +// the allocated space can be resused (and overwritten) by future calls to record(). +int +__attribute__((visibility("default"))) +papi_sde_reset_recorder( void *record_handle ) +{ + sde_counter_t *tmp_rcrdr; + + papi_sde_lock(); + tmp_rcrdr = (sde_counter_t *)record_handle; + + if( NULL == tmp_rcrdr || NULL == tmp_rcrdr->recorder_data ){ + papi_sde_unlock(); + PAPIERROR("papi_sde_record(): 'record_handle' is clobbered. Unable to reset recorder.\n"); + return PAPI_EINVAL; + } + + // NOTE: do _not_ free the chunks and do _not_ reset "recorder_data->total_entries" + + tmp_rcrdr->recorder_data->used_entries = 0; + free( tmp_rcrdr->recorder_data->sorted_buffer ); + tmp_rcrdr->recorder_data->sorted_buffer = NULL; + tmp_rcrdr->recorder_data->sorted_entries = 0; + + papi_sde_unlock(); + return PAPI_OK; +} + + +// The following function works only for counters created using papi_sde_create_counter(). +int +__attribute__((visibility("default"))) +papi_sde_reset_counter( void *cntr_handle ) +{ + long long int *ptr; + sde_counter_t *tmp_cntr; + + papi_sde_lock(); + + tmp_cntr = (sde_counter_t *)cntr_handle; + + if( NULL == tmp_cntr ){ + papi_sde_unlock(); + PAPIERROR("papi_sde_reset_counter(): 'cntr_handle' is clobbered. Unable to reset value of counter.\n"); + return PAPI_EINVAL; + } + + ptr = (long long int *)(tmp_cntr->data); + + if( NULL == ptr ){ + papi_sde_unlock(); + PAPIERROR("papi_sde_reset_counter(): Counter structure is clobbered. Unable to reset value of counter.\n"); + return PAPI_EINVAL; + } + + if( tmp_cntr->is_created ){ + papi_sde_unlock(); + PAPIERROR("papi_sde_reset_counter(): Counter is not created by PAPI, so it cannot be reset.\n"); + return PAPI_EINVAL; + } + + *ptr = 0; // Reset the counter. + + papi_sde_unlock(); + + return PAPI_OK; +} + + +#if defined(SDE_HAVE_OVERFLOW) +static int +_sde_arm_timer(sde_control_state_t *sde_ctl){ + struct itimerspec its; + + // We will start the timer at 100us because we adjust its period in _sde_dispatch_timer() + // if the counter is not growing fast enough, or growing too slowly. + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 100*1000; // 100us + its.it_interval.tv_sec = its.it_value.tv_sec; + its.it_interval.tv_nsec = its.it_value.tv_nsec; + + SUBDBG( "starting SDE internal timer for emulating HARDWARE overflowing\n"); + if (timer_settime(sde_ctl->timerid, 0, &its, NULL) == -1){ + PAPIERROR("timer_settime"); + timer_delete(sde_ctl->timerid); + sde_ctl->has_timer = 0; + + // If the timer is broken, let the caller know that something internal went wrong. + return PAPI_ECMP; + } + + return PAPI_OK; +} +#endif //defined(SDE_HAVE_OVERFLOW) + + +static int +sde_setup_counter_internals( papi_handle_t handle, const char *event_name, int cntr_mode, int cntr_type, void *counter, papi_sde_fptr_t fp_counter, void *param, sde_counter_t **placeholder ) +{ + papisde_library_desc_t *lib_handle; + sde_counter_t *tmp_item; + unsigned int counter_uniq_id; + char *full_event_name; + + if( placeholder ) + *placeholder = NULL; + + lib_handle = (papisde_library_desc_t *) handle; + if( (NULL == lib_handle) || (NULL == lib_handle->libraryName) ){ + PAPIERROR("sde_setup_counter_internals(): 'handle' is clobbered. Unable to register counter.\n"); + return PAPI_EINVAL; + } + + size_t str_len = strlen(lib_handle->libraryName)+strlen(event_name)+2+1; // +2 for "::" and +1 for '\0' + full_event_name = malloc(str_len*sizeof(char)); + snprintf(full_event_name, str_len, "%s::%s", lib_handle->libraryName, event_name); + + SUBDBG("%s: Counter: '%s' will be added in library: %s.\n", __FILE__, full_event_name, lib_handle->libraryName); + + if( !is_instant(cntr_mode) && !is_delta(cntr_mode) ){ + PAPIERROR("Unknown mode %d. SDE counter mode must be either Instant or Delta.\n",cntr_mode); + free(full_event_name); + return PAPI_ECMP; + } + + // After this point we will be modifying data structures, so we need to acquire a lock. + // This function has multiple exist points. If you add more, make sure you unlock before each one of them. + papi_sde_lock(); + + // Look if the event is already registered. + tmp_item = ht_lookup_by_name(lib_handle->lib_counters, full_event_name); + + if( NULL != tmp_item ){ + if( NULL != tmp_item->counter_group_head ){ + PAPIERROR("sde_setup_counter_internals(): Unable to register counter '%s'. There is a counter group with the same name.\n",full_event_name); + free(full_event_name); + papi_sde_unlock(); + return PAPI_EINVAL; + } + if( (NULL != tmp_item->data) || (NULL != tmp_item->func_ptr) ){ + // If it is registered and it is _not_ a placeholder then ignore it silently. + SUBDBG("%s: Counter: '%s' was already in library: %s.\n", __FILE__, full_event_name, lib_handle->libraryName); + free(full_event_name); + papi_sde_unlock(); + return PAPI_OK; + } + // If it is registered and it _is_ a placeholder then update the mode, the type, and the 'data' pointer or the function pointer. + SUBDBG("%s: Updating placeholder for counter: '%s' in library: %s.\n", __FILE__, full_event_name, lib_handle->libraryName); + + // Both "counter" and "fp_counter" can be NULL, if we are creating a recorder. + if( counter ){ + tmp_item->data = counter; + }else if( fp_counter ){ + tmp_item->func_ptr = fp_counter; + tmp_item->param = param; + } + tmp_item->cntr_mode = cntr_mode; + tmp_item->cntr_type = cntr_type; + free(full_event_name); + + if( placeholder ) + *placeholder = tmp_item; + + papi_sde_unlock(); + return PAPI_OK; + } + + // If neither the event, nor a placeholder exists, then use the current + // number of registered events as the index of the new one, and increment it. + papisde_control_t *gctl = _papisde_global_control; + counter_uniq_id = gctl->num_reg_events++; + gctl->num_live_events++; + _sde_vector.cmp_info.num_native_events = gctl->num_live_events; + + SUBDBG("%s: Counter %s has unique ID = %d\n", __FILE__, full_event_name, counter_uniq_id); + + // allocate_and_insert() does not care if any (or all) of "counter", "fp_counter", or "param" are NULL. It will just assign them to the structure. + tmp_item = allocate_and_insert( lib_handle, full_event_name, counter_uniq_id, cntr_mode, cntr_type, counter, fp_counter, param ); + papi_sde_unlock(); + if(NULL == tmp_item) { + SUBDBG("%s: Counter not inserted in SDE %s\n", __FILE__, lib_handle->libraryName); + free(full_event_name); + return PAPI_ECMP; + } + + free(full_event_name); + + return PAPI_OK; +} + + + +/** This function registers an event name and counter within the SDE component + structure attached to the handle. A default description for an event is + synthesized from the library name and the event name when they are registered. + @param[in] handle -- pointer (of opaque type papi_handle_t) to sde structure for an individual library + @param[in] event_name -- string containing the name of the event + @param[in] cntr_type -- the type of the counter (PAPI_SDE_long_long, PAPI_SDE_int, PAPI_SDE_double, PAPI_SDE_float) + @param[in] cntr_mode -- the mode of the counter (one of: PAPI_SDE_RO, PAPI_SDE_RW and one of: PAPI_SDE_DELTA, PAPI_SDE_INSTANT) + @param[in] counter -- pointer to a variable that stores the value for the event + */ +int +__attribute__((visibility("default"))) +papi_sde_register_counter( papi_handle_t handle, const char *event_name, int cntr_mode, int cntr_type, void *counter ) +{ + int ret_val; + papi_sde_lock(); + ret_val = sde_do_register(handle, event_name, cntr_mode, cntr_type, counter, NULL, NULL); + papi_sde_unlock(); + + return ret_val; +} + +/** This function registers an event name and (caller provided) callback function + within the SDE component structure attached to the handle. + A default description for an event is + synthesized from the library name and the event name when they are registered. + @param[in] handle -- (void *) pointer to sde structure for an individual library. + @param[in] event_name -- (char *) name of the event. + @param[in] cntr_mode -- (int) mode of the event counter. + @param[in] cntr_type -- (int) type of the event counter. + @param[in] fp_counter -- pointer to a callback function that SDE will call when PAPI_read/stop/accum is called. + @param[in] param -- (void *) opaque parameter that will be passed to the callback function every time it's called. + */ +int +__attribute__((visibility("default"))) +papi_sde_register_fp_counter( void *handle, const char *event_name, int cntr_mode, int cntr_type, papi_sde_fptr_t fp_counter, void *param ) +{ + int ret_val; + papi_sde_lock(); + ret_val = sde_do_register( handle, event_name, cntr_mode, cntr_type, NULL, fp_counter, param ); + papi_sde_unlock(); + + return ret_val; +} + +static inline int +sde_do_register( papi_handle_t handle, const char *event_name, int cntr_mode, int cntr_type, void *counter, papi_sde_fptr_t fp_counter, void *param ) +{ + sde_counter_t *placeholder; + + SUBDBG("%s: Preparing to register counter: '%s' with mode: '%d' and type: '%d'.\n", __FILE__, event_name, cntr_mode, cntr_type); + + int ret = sde_setup_counter_internals( handle, event_name, cntr_mode, cntr_type, counter, fp_counter, param, &placeholder ); + + if( PAPI_OK != ret ) + return ret; + +#if defined(SDE_HAVE_OVERFLOW) + if( NULL != placeholder ){ + // Check if we need to worry about overflow (cases r[4-6], or c[4-6]) + if( placeholder->overflow ){ + ThreadInfo_t *thread; + EventSetInfo_t *ESI; + sde_control_state_t *sde_ctl; + + // Below here means that we are in cases r[4-6] + thread = _papi_hwi_lookup_thread( 0 ); + if( NULL == thread ) + goto no_new_timer; + + // Get the current running eventset and check if it has some events set to overflow. + int cidx = _sde_vector.cmp_info.CmpIdx; + ESI = thread->running_eventset[cidx]; + if( (NULL == ESI) || !(ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE) ) + goto no_new_timer; + + sde_ctl = ( sde_control_state_t * ) ESI->ctl_state; + + // Below this point we know we have a running eventset, so we are in case r5. + // Since the event is set to overfow, if there is no timer in the eventset, create one and arm it. + if( !(sde_ctl->has_timer) ){ + int ret = set_timer_for_overflow(sde_ctl); + if( PAPI_OK != ret ){ + return ret; + } + ret = _sde_arm_timer(sde_ctl); + return ret; + } + } + } +no_new_timer: +#endif // defined(SDE_HAVE_OVERFLOW) + + return PAPI_OK; +} + + +int +__attribute__((visibility("default"))) +papi_sde_unregister_counter( void *handle, const char *event_name) +{ + papisde_library_desc_t *lib_handle; + int error; + char *full_event_name; + + lib_handle = (papisde_library_desc_t *) handle; + if( (NULL == lib_handle) || (NULL == lib_handle->libraryName) ){ + PAPIERROR("papi_sde_unregister_counter(): 'handle' is clobbered. Unable to unregister counter.\n"); + return PAPI_EINVAL; + } + + size_t str_len = strlen(lib_handle->libraryName)+strlen(event_name)+2+1; // +2 for "::" and +1 for '\0' + full_event_name = malloc(str_len*sizeof(char)); + snprintf(full_event_name, str_len, "%s::%s", lib_handle->libraryName, event_name); + + SUBDBG("papi_sde_unregister_counter(): Preparing to unregister counter: '%s' from SDE library: %s.\n", full_event_name, lib_handle->libraryName); + + // After this point we will be modifying data structures, so we need to acquire a lock. + // This function has multiple exist points. If you add more, make sure you unlock before each one of them. + papi_sde_lock(); + + error = delete_counter( lib_handle, full_event_name ); + // Check if we found a registered counter, or if it never existed. + if( error ){ + PAPIERROR("papi_sde_unregister_counter(): Counter '%s' has not been registered by library '%s'.\n", full_event_name, lib_handle->libraryName); + free(full_event_name); + papi_sde_unlock(); + return PAPI_EINVAL; + } + + // We will not use the name beyond this point + free(full_event_name); + + papisde_control_t *gctl = _papisde_global_control; + gctl->num_live_events--; + _sde_vector.cmp_info.num_native_events = gctl->num_live_events; + + papi_sde_unlock(); + return PAPI_OK; +} + + + + +/** This function optionally replaces an event's default description with a + description provided by the library developer within the SDE component + structure attached to the handle. + @param[in] handle -- (void *) pointer to sde structure for an individual + library + @param[in] event_name -- name of the event + @param[in] event_description -- description of the event + */ +int +__attribute__((visibility("default"))) +papi_sde_describe_counter( void *handle, const char *event_name, const char *event_description ) +{ + sde_counter_t *tmp_item; + papisde_library_desc_t *lib_handle; + char *full_event_name; + + lib_handle = (papisde_library_desc_t *) handle; + if( (NULL == lib_handle) || (NULL == lib_handle->libraryName) ){ + PAPIERROR("papi_sde_describe_counter(): 'handle' is clobbered. Unable to add description for counter.\n"); + return PAPI_EINVAL; + } + + size_t str_len = strlen(lib_handle->libraryName)+strlen(event_name)+2+1; // +2 for "::" and +1 for '\0' + full_event_name = malloc(str_len*sizeof(char)); + snprintf(full_event_name, str_len, "%s::%s", lib_handle->libraryName, event_name); + + // After this point we will be modifying data structures, so we need to acquire a lock. + // This function has multiple exist points. If you add more, make sure you unlock before each one of them. + papi_sde_lock(); + + tmp_item = ht_lookup_by_name(lib_handle->lib_counters, full_event_name); + if( NULL != tmp_item ){ + tmp_item->description = strdup(event_description); + free(full_event_name); + papi_sde_unlock(); + return PAPI_OK; + } + SUBDBG("papi_sde_describe_counter() Event: '%s' is not registered in SDE library: '%s'\n", full_event_name, lib_handle->libraryName); + // We will not use the name beyond this point + free(full_event_name); + papi_sde_unlock(); + return PAPI_EINVAL; +} + + + +/** This function finds the handle associated with a created counter, or a recorder, + given the library handle and the event name. + @param[in] handle -- (void *) pointer to sde structure for an individual + library + @param[in] event_name -- name of the event + */ +void +__attribute__((visibility("default"))) +*papi_sde_get_counter_handle( void *handle, const char *event_name) +{ + sde_counter_t *counter_handle; + papisde_library_desc_t *lib_handle; + char *full_event_name; + + lib_handle = (papisde_library_desc_t *) handle; + if( (NULL == lib_handle) || (NULL == lib_handle->libraryName) ){ + PAPIERROR("papi_sde_get_counter_handle(): 'handle' is clobbered.\n"); + return NULL; + } + + size_t str_len = strlen(lib_handle->libraryName)+strlen(event_name)+2+1; // +2 for "::" and +1 for '\0' + full_event_name = malloc(str_len*sizeof(char)); + snprintf(full_event_name, str_len, "%s::%s", lib_handle->libraryName, event_name); + + // After this point we will be accessing shared data structures, so we need to acquire a lock. + papi_sde_lock(); + counter_handle = ht_lookup_by_name(lib_handle->lib_counters, full_event_name); + papi_sde_unlock(); + + free(full_event_name); + + return counter_handle; +} + + +/********************************************************************/ +/* Below are the functions required by the PAPI component interface */ +/********************************************************************/ + + +static int +_sde_init_component( int cidx ) +{ + SUBDBG("_sde_init_component...\n"); + + _sde_vector.cmp_info.num_native_events = 0; + _sde_vector.cmp_info.CmpIdx = cidx; + + return PAPI_OK; +} + + + +/** This is called whenever a thread is initialized */ +static int +_sde_init_thread( hwd_context_t *ctx ) +{ + (void)ctx; + SUBDBG( "_sde_init_thread %p...\n", ctx ); + return PAPI_OK; +} + + + +/** Setup a counter control state. + * In general a control state holds the hardware info for an + * EventSet. + */ + +static int +_sde_init_control_state( hwd_control_state_t * ctl ) +{ + SUBDBG( "sde_init_control_state... %p\n", ctl ); + + sde_control_state_t *sde_ctl = ( sde_control_state_t * ) ctl; + memset( sde_ctl, 0, sizeof ( sde_control_state_t ) ); + + return PAPI_OK; +} + + +/** Triggered by eventset operations like add or remove */ +static int +_sde_update_control_state( hwd_control_state_t *ctl, + NativeInfo_t *native, + int count, + hwd_context_t *ctx ) +{ + + (void) ctx; + int i, index; + + SUBDBG( "_sde_update_control_state %p %p...\n", ctl, ctx ); + + sde_control_state_t *sde_ctl = ( sde_control_state_t * ) ctl; + + for( i = 0; i < count; i++ ) { + index = native[i].ni_event & PAPI_NATIVE_AND_MASK; + if( index < 0 ){ + PAPIERROR("_sde_update_control_state(): Event at index %d has a negative native event code = %d.\n",i,index); + return PAPI_EINVAL; + } + SUBDBG("_sde_update_control_state: i=%d index=%u\n", i, index ); + sde_ctl->which_counter[i] = (unsigned)index; + native[i].ni_position = i; + } + + // If an event for which overflowing was set is being removed from the eventset, then the + // framework will turn overflowing off (by calling PAPI_overflow() with threshold=0), + // so we don't need to do anything here. + + sde_ctl->num_events=count; + + return PAPI_OK; +} + + +/** Triggered by PAPI_start() */ +static int +_sde_start( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + int ret_val; +#if defined(SDE_HAVE_OVERFLOW) + ThreadInfo_t *thread; + int cidx; + struct itimerspec its; +#endif // defined(SDE_HAVE_OVERFLOW) + ( void ) ctx; + ( void ) ctl; + + SUBDBG( "%p %p...\n", ctx, ctl ); + + ret_val = _sde_reset(ctx, ctl); + +#if defined(SDE_HAVE_OVERFLOW) + sde_control_state_t *sde_ctl = ( sde_control_state_t * ) ctl; + + its.it_value.tv_sec = 0; + // We will start the timer at 100us because we adjust its period in _sde_dispatch_timer() + // if the counter is not growing fast enough, or growing too slowly. + its.it_value.tv_nsec = 100*1000; // 100us + its.it_interval.tv_sec = its.it_value.tv_sec; + its.it_interval.tv_nsec = its.it_value.tv_nsec; + + cidx = _sde_vector.cmp_info.CmpIdx; + thread = _papi_hwi_lookup_thread( 0 ); + + if ( (NULL != thread) && (NULL != thread->running_eventset[cidx]) && (thread->running_eventset[cidx]->overflow.flags & PAPI_OVERFLOW_HARDWARE) ) { + if( !(sde_ctl->has_timer) ){ + // No registered counters went through r[1-3] + int i; + papisde_control_t *gctl = get_global_struct(); + for( i = 0; i < sde_ctl->num_events; i++ ) { + unsigned int counter_uniq_id = sde_ctl->which_counter[i]; + if( counter_uniq_id >= gctl->num_reg_events ){ + PAPIERROR("_sde_start(): Event at index %d does not correspond to a registered counter.\n",i); + continue; + } + + sde_counter_t *counter = ht_lookup_by_id(gctl->all_reg_counters, counter_uniq_id); + if( NULL == counter ){ + PAPIERROR("_sde_start(): Event at index %d corresponds to a clobbered counter.\n",i); + continue; + } + + // If the counter that we are checking was set to overflow and it is registered (not created), create the timer. + if( !(counter->is_created) && counter->overflow ){ + // Registered counters went through r4 + int ret = set_timer_for_overflow(sde_ctl); + if( PAPI_OK != ret ){ + papi_sde_unlock(); + } + break; + } + } + } + + // r[1-4] + if( sde_ctl->has_timer ){ + SUBDBG( "starting SDE internal timer for emulating HARDWARE overflowing\n"); + if (timer_settime(sde_ctl->timerid, 0, &its, NULL) == -1){ + PAPIERROR("timer_settime"); + timer_delete(sde_ctl->timerid); + sde_ctl->has_timer = 0; + return PAPI_ECMP; + } + } + } +#endif // defined(SDE_HAVE_OVERFLOW) + + return ret_val; +} + + +/** Triggered by PAPI_stop() */ +static int +_sde_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + + (void) ctx; + (void) ctl; +#if defined(SDE_HAVE_OVERFLOW) + ThreadInfo_t *thread; + int cidx; + struct itimerspec zero_time; +#endif // defined(SDE_HAVE_OVERFLOW) + + SUBDBG( "sde_stop %p %p...\n", ctx, ctl ); + /* anything that would need to be done at counter stop time */ + +#if defined(SDE_HAVE_OVERFLOW) + sde_control_state_t *sde_ctl = ( sde_control_state_t * ) ctl; + + cidx = _sde_vector.cmp_info.CmpIdx; + thread = _papi_hwi_lookup_thread( 0 ); + + if ( (NULL != thread) && (NULL != thread->running_eventset[cidx]) && (thread->running_eventset[cidx]->overflow.flags & PAPI_OVERFLOW_HARDWARE) ) { + if( sde_ctl->has_timer ){ + SUBDBG( "stopping SDE internal timer\n"); + memset(&zero_time, 0, sizeof(struct itimerspec)); + if (timer_settime(sde_ctl->timerid, 0, &zero_time, NULL) == -1){ + PAPIERROR("timer_settime"); + timer_delete(sde_ctl->timerid); + sde_ctl->has_timer = 0; + return PAPI_ECMP; + } + } + } +#endif // defined(SDE_HAVE_OVERFLOW) + + return PAPI_OK; +} + +/** Triggered by PAPI_read() */ +/* flags field is never set? */ +static int +_sde_read( hwd_context_t *ctx, hwd_control_state_t *ctl, long long **events, int flags ) +{ + int i; + int ret_val = PAPI_OK; + (void) flags; + (void) ctx; + + papisde_control_t *gctl = _papisde_global_control; + + SUBDBG( "_sde_read... %p %d\n", ctx, flags ); + + sde_control_state_t *sde_ctl = ( sde_control_state_t * ) ctl; + + // Lock before we read num_reg_events and the hash-tables. + papi_sde_lock(); + + + for( i = 0; i < sde_ctl->num_events; i++ ) { + unsigned int counter_uniq_id = sde_ctl->which_counter[i]; + if( counter_uniq_id >= gctl->num_reg_events ){ + PAPIERROR("_sde_read(): Event at index %d does not correspond to a registered counter.\n",i); + *events[i] = -1; + continue; + } + + sde_counter_t *counter = ht_lookup_by_id(gctl->all_reg_counters, counter_uniq_id); + if( NULL == counter ){ + PAPIERROR("_sde_read(): Event at index %d corresponds to a clobbered counter.\n",i); + sde_ctl->counter[i] = -1; + continue; + } + + // If the counter represents a counter group then we need to read the values of all the counters in the group. + if( NULL != counter->counter_group_head ){ + ret_val = sde_read_counter_group( counter, &(sde_ctl->counter[i]) ); + if( PAPI_OK != ret_val ){ + PAPIERROR("_sde_read(): Error occured when reading counter group: '%s'.\n",counter->name); + } + // we are done reading this one, move to the next. + continue; + } + + // Our convention is that read attempts on a placeholder will set the counter to "-1" to + // signify semantically that there was an error, but the function will not return an error + // to avoid breaking existing programs that do something funny when an error is returned. + if( (NULL == counter->data) && (NULL == counter->func_ptr) && (NULL == counter->recorder_data) ){ + PAPIERROR("_sde_read(): Attempted read on a placeholder: '%s'.\n",counter->name); + sde_ctl->counter[i] = -1; + continue; + } + + // If we are not dealing with a simple counter but with a recorder, we need to allocate + // a contiguous buffer, copy all the recorded data in it, and return to the user a pointer + // to this buffer cast as a long long. + if( NULL != counter->recorder_data ){ + long long used_entries; + size_t typesize; + void *out_buffer; + + // At least the first chunk should have been allocated at creation. + if( NULL == counter->recorder_data->exp_container[0] ){ + SUBDBG( "No space has been allocated for recorder %s\n",counter->name); + sde_ctl->counter[i] = (long long)-1; + continue; + } + + used_entries = counter->recorder_data->used_entries; + typesize = counter->recorder_data->typesize; + + // NOTE: After returning this buffer we loose track of it, so it's the user's responsibility to free it. + out_buffer = malloc( used_entries*typesize ); + recorder_data_to_contiguous(counter, out_buffer); + sde_ctl->counter[i] = (long long)out_buffer; + + continue; + } + + ret_val = sde_hardware_read_and_store( counter, counter->previous_data, &(sde_ctl->counter[i]) ); + + if( PAPI_OK != ret_val ){ + PAPIERROR("_sde_read(): Error occured when reading counter: '%s'.\n",counter->name); + } + } + + papi_sde_unlock(); + + *events = sde_ctl->counter; + + return PAPI_OK; +} + +/** Triggered by PAPI_write(), but only if the counters are running */ +/* otherwise, the updated state is written to ESI->hw_start */ +static int +_sde_write( hwd_context_t *ctx, hwd_control_state_t *ctl, long long *values ) +{ + int i, ret_val = PAPI_OK; + (void) ctx; + (void) ctl; + + papisde_control_t *gctl = _papisde_global_control; + + SUBDBG( "_sde_write... %p\n", ctx ); + + sde_control_state_t *sde_ctl = ( sde_control_state_t * ) ctl; + + // Lock before we access global data structures. + papi_sde_lock(); + + for( i = 0; i < sde_ctl->num_events; i++ ) { + unsigned int counter_uniq_id = sde_ctl->which_counter[i]; + if( counter_uniq_id >= gctl->num_reg_events ){ + PAPIERROR("_sde_write(): Event at index %d does not correspond to a registered counter.\n",i); + continue; + } + + sde_counter_t *counter = ht_lookup_by_id(gctl->all_reg_counters, counter_uniq_id); + if( NULL == counter ){ + PAPIERROR("_sde_read(): Event at index %d corresponds to a clobbered counter.\n",i); + continue; + } + + // We currently do not support writing in counter groups. + if( NULL != counter->counter_group_head ){ + SUBDBG("_sde_write(): Event '%s' corresponds to a counter group, and writing groups is not supported yet.\n",counter->name); + continue; + } + + if( NULL == counter->data ){ + if( NULL == counter->func_ptr ){ + // If we are not dealing with a simple counter but with a "recorder", which cannot be written, we have to error. + if( NULL != counter->recorder_data ){ + PAPIERROR("_sde_write(): Attempted write on a recorder: '%s'.\n",counter->name); + }else{ + PAPIERROR("_sde_write(): Attempted write on a placeholder: '%s'.\n",counter->name); + } + }else{ + PAPIERROR("_sde_write(): Attempted write on an event based on a callback function instead of a counter: '%s'.\n",counter->name); + } + continue; + } + + ret_val = sde_hardware_write( counter, values[i] ); + if( PAPI_OK != ret_val ){ + PAPIERROR("_sde_write(): Error occured when writing counter: '%s'.\n",counter->name); + } + } + + papi_sde_unlock(); + + return PAPI_OK; +} + + +/** Triggered by PAPI_reset() but only if the EventSet is currently running */ +/* If the eventset is not currently running, then the saved value in the */ +/* EventSet is set to zero without calling this routine. */ +static int +_sde_reset( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + int i; + (void) ctx; + + SUBDBG( "_sde_reset ctx=%p ctrl=%p...\n", ctx, ctl ); + + papisde_control_t *gctl = _papisde_global_control; + sde_control_state_t *sde_ctl = ( sde_control_state_t * ) ctl; + + // Lock before we read num_reg_events and the hash-tables. + papi_sde_lock(); + + for( i = 0; i < sde_ctl->num_events; i++ ) { + int ret_val; + unsigned int counter_uniq_id = sde_ctl->which_counter[i]; + if( counter_uniq_id >= gctl->num_reg_events ){ + PAPIERROR("_sde_reset(): Event at index %d does not correspond to a registered counter.\n",i); + continue; + } + + sde_counter_t *counter = ht_lookup_by_id(gctl->all_reg_counters, counter_uniq_id); + if( NULL == counter ){ + PAPIERROR("_sde_reset(): Event at index %d corresponds to a clobbered counter.\n",i); + continue; + } + + // If the counter represents a counter group then we do not need to record the current value, + // because when we read the real value we will keep track of all the previous values of the + // individual counters (if they are DELTA), or not (if they are INSTANT) + if( NULL != counter->counter_group_head ){ + // we are done with this one, move to the next. + continue; + } + + // Our convention is that read attempts on a placeholder will not return an error + // to avoid breaking existing programs that do something funny when an error is returned. + if( (NULL == counter->data) && (NULL == counter->func_ptr) ){ + PAPIERROR("_sde_reset(): Attempted read on a placeholder: %s.\n",counter->name); + continue; + } + + ret_val = sde_hardware_read_and_store( counter, 0, &(counter->previous_data) ); + if( PAPI_OK != ret_val ){ + PAPIERROR("_sde_reset(): Error occured when resetting counter: %s.\n",counter->name); + } + } + + papi_sde_unlock(); + + return PAPI_OK; +} + +/** Triggered by PAPI_shutdown() */ +static int +_sde_shutdown_component(void) +{ + papisde_library_desc_t *curr_lib, *next_lib; + + SUBDBG( "sde_shutdown_component...\n" ); + papisde_control_t *gctl = _papisde_global_control; + + if( NULL == gctl ) + return PAPI_OK; + + /* Free all the meta-data we allocated for libraries that are still active */ + curr_lib = gctl->lib_list_head; + while(NULL != curr_lib){ + /* save a pointer to the next list element before we free the current */ + next_lib = curr_lib->next; + + if( NULL != curr_lib->libraryName ){ + free( curr_lib->libraryName ); + } + free(curr_lib); + + curr_lib = next_lib; + } + + return PAPI_OK; +} + +/** Called at thread shutdown */ +static int +_sde_shutdown_thread( hwd_context_t *ctx ) +{ + + (void) ctx; + + SUBDBG( "sde_shutdown_thread... %p\n", ctx ); + + /* Last chance to clean up thread */ + + return PAPI_OK; +} + + + +/** This function sets various options in the component + @param[in] ctx -- hardware context + @param[in] code valid are PAPI_SET_DEFDOM, PAPI_SET_DOMAIN, + PAPI_SETDEFGRN, PAPI_SET_GRANUL and PAPI_SET_INHERIT + @param[in] option -- options to be set + */ +static int +_sde_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option ) +{ + + (void) ctx; + (void) code; + (void) option; + + SUBDBG( "sde_ctl...\n" ); + + return PAPI_OK; +} + +/** This function has to set the bits needed to count different domains + In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER + By default return PAPI_EINVAL if none of those are specified + and PAPI_OK with success + PAPI_DOM_USER is only user context is counted + PAPI_DOM_KERNEL is only the Kernel/OS context is counted + PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) + PAPI_DOM_ALL is all of the domains + */ +static int +_sde_set_domain( hwd_control_state_t * cntrl, int domain ) +{ + (void) cntrl; + + int found = 0; + SUBDBG( "sde_set_domain...\n" ); + + if ( PAPI_DOM_USER & domain ) { + SUBDBG( " PAPI_DOM_USER\n" ); + found = 1; + } + if ( PAPI_DOM_KERNEL & domain ) { + SUBDBG( " PAPI_DOM_KERNEL\n" ); + found = 1; + } + if ( PAPI_DOM_OTHER & domain ) { + SUBDBG( " PAPI_DOM_OTHER\n" ); + found = 1; + } + if ( PAPI_DOM_ALL & domain ) { + SUBDBG( " PAPI_DOM_ALL\n" ); + found = 1; + } + if ( !found ) + return ( PAPI_EINVAL ); + + return PAPI_OK; +} + + +/**************************************************************/ +/* Naming functions, used to translate event numbers to names */ +/**************************************************************/ + + +/** Enumerate Native Events + * @param EventCode is the event of interest + * @param modifier is one of PAPI_ENUM_FIRST, PAPI_ENUM_EVENTS + * If your component has attribute masks then these need to + * be handled here as well. + */ +static int +_sde_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ + unsigned int curr_code, next_code; + + SUBDBG("_sde_ntv_enum_events begin\n\tEventCode=%u modifier=%d\n", *EventCode, modifier); + + papisde_control_t *gctl = _papisde_global_control; + if( NULL == gctl ){ + return PAPI_ENOEVNT; + } + + switch ( modifier ) { + + /* return EventCode of first event */ + case PAPI_ENUM_FIRST: + /* return the first event that we support */ + *EventCode = 0; + return PAPI_OK; + + /* return EventCode of next available event */ + case PAPI_ENUM_EVENTS: + curr_code = *EventCode & PAPI_NATIVE_AND_MASK; + + // Lock before we read num_reg_events and the hash-tables. + papi_sde_lock(); + + if( curr_code >= gctl->num_reg_events-1 ){ + papi_sde_unlock(); + return PAPI_ENOEVNT; + } + + /* + * We have to check the events which follow the current one, because unregistering + * will create sparcity in the global SDE table, so we can't just return the next + * index. + */ + next_code = curr_code; + do{ + next_code++; + sde_counter_t *item = ht_lookup_by_id(gctl->all_reg_counters, next_code); + if( (NULL != item) && (NULL != item->name) ){ + *EventCode = next_code; + SUBDBG("Event name = %s (unique id = %d)\n", item->name, item->glb_uniq_id); + papi_sde_unlock(); + return PAPI_OK; + } + }while(next_code < gctl->num_reg_events); + + papi_sde_unlock(); + + break; + + default: + return PAPI_EINVAL; + } + + return PAPI_EINVAL; +} + +/** Takes a native event code and passes back the name + * @param EventCode is the native event code + * @param name is a pointer for the name to be copied to + * @param len is the size of the name string + */ +static int +_sde_ntv_code_to_name( unsigned int EventCode, char *name, int len ) +{ + papisde_control_t *gctl = _papisde_global_control; + unsigned int code = EventCode & PAPI_NATIVE_AND_MASK; + + SUBDBG("_sde_ntv_code_to_name %u\n", code); + + // Lock before we read num_reg_events and the hash-tables. + papi_sde_lock(); + + if( (NULL == gctl) || (code > gctl->num_reg_events) ){ + papi_sde_unlock(); + return PAPI_ENOEVNT; + } + + sde_counter_t *counter = ht_lookup_by_id(gctl->all_reg_counters, code); + if( (NULL == counter) || (NULL == counter->name) ){ + papi_sde_unlock(); + return PAPI_ENOEVNT; + } + SUBDBG("Event name = %s (unique id = %d)\n", counter->name, counter->glb_uniq_id); + + (void)strncpy( name, counter->name, len ); + + papi_sde_unlock(); + return PAPI_OK; +} + +/** Takes a native event code and passes back the event description + * @param EventCode is the native event code + * @param descr is a pointer for the description to be copied to + * @param len is the size of the descr string + */ +static int +_sde_ntv_code_to_descr( unsigned int EventCode, char *descr, int len ) +{ + unsigned int code = EventCode & PAPI_NATIVE_AND_MASK; + SUBDBG("_sde_ntv_code_to_descr %u\n", code); + + papisde_control_t *gctl = _papisde_global_control; + + // Lock before we read num_reg_events and the hash-tables. + papi_sde_lock(); + + if( (NULL == gctl) || (code > gctl->num_reg_events) ){ + papi_sde_unlock(); + return PAPI_ENOEVNT; + } + + sde_counter_t *counter = ht_lookup_by_id(gctl->all_reg_counters, code); + if( (NULL == counter) || (NULL == counter->description) ){ + papi_sde_unlock(); + return PAPI_ENOEVNT; + } + SUBDBG("Event (unique id = %d) description: %s\n", counter->glb_uniq_id, counter->description); + + (void)strncpy( descr, counter->description, len ); + descr[len] = '\0'; + + papi_sde_unlock(); + return PAPI_OK; +} + +/** Takes a native event name and passes back the code + * @param event_name -- a pointer for the name to be copied to + * @param event_code -- the native event code + */ +static int +_sde_ntv_name_to_code(const char *event_name, unsigned int *event_code ) +{ + papisde_library_desc_t *lib_handle; + char *pos, *tmp_lib_name; + sde_counter_t *tmp_item = NULL; + + SUBDBG( "%s\n", event_name ); + + papi_sde_lock(); + + papisde_control_t *gctl = _papisde_global_control; + + // Let's see if the event has the library name as a prefix (as it should). Note that this is + // the event name as it comes from the framework, so it should contain the library name, although + // when the library registers an event counter it will not use the library name as part of the event name. + tmp_lib_name = strdup(event_name); + pos = strstr(tmp_lib_name, "::"); + if( NULL != pos ){ // Good, it does. + *pos = '\0'; + + if( NULL == gctl ){ + // If no library has initialized the library side of the component, and the application is already inquiring + // about an event, let's initialize the component pretending to be the library which corresponds to this event. + lib_handle = do_sde_init(tmp_lib_name); + if(NULL == lib_handle){ + PAPIERROR("Unable to register library in SDE component.\n"); + papi_sde_unlock(); + return PAPI_ECMP; + } + gctl = _papisde_global_control; + }else{ + int is_library_present = 0; + // If the library side of the component has been initialized, then look for the library. + lib_handle = gctl->lib_list_head; + while(NULL != lib_handle){ // Look for the library. + if( !strcmp(lib_handle->libraryName, tmp_lib_name) ){ + // We found the library. + is_library_present = 1; + // Now, look for the event in the library. + tmp_item = ht_lookup_by_name(lib_handle->lib_counters, event_name); + break; + } + lib_handle = lib_handle->next; + } + + if( !is_library_present ){ + // If the library side of the component was initialized, but the specific library hasn't called + // papi_sde_init() then we call it here to allocate the data structures. + lib_handle = do_sde_init(tmp_lib_name); + if(NULL == lib_handle){ + PAPIERROR("Unable to register library in SDE component.\n"); + papi_sde_unlock(); + return PAPI_ECMP; + } + } + } + free(tmp_lib_name); // We don't need the library name any more. + + if( NULL != tmp_item ){ + SUBDBG("Found matching counter with global uniq id: %d in library: %s\n", tmp_item->glb_uniq_id, lib_handle->libraryName ); + *event_code = tmp_item->glb_uniq_id; + papi_sde_unlock(); + return PAPI_OK; + } else { + SUBDBG("Did not find event %s in library %s. Registering a placeholder.\n", event_name, lib_handle->libraryName ); + + // Use the current number of registered events as the index of the new one, and increment it. + unsigned int counter_uniq_id = gctl->num_reg_events++; + gctl->num_live_events++; + _sde_vector.cmp_info.num_native_events = gctl->num_live_events; + + // At this point in the code "lib_handle" contains a pointer to the data structure for this library whether + // the actual library has been initialized or not. + tmp_item = allocate_and_insert( lib_handle, event_name, counter_uniq_id, PAPI_SDE_RO, PAPI_SDE_long_long, NULL, NULL, NULL ); + if(NULL == tmp_item) { + papi_sde_unlock(); + SUBDBG("Event %s does not exist in library %s and placeholder could not be inserted.\n", event_name, lib_handle->libraryName); + return PAPI_ECMP; + } + *event_code = tmp_item->glb_uniq_id; + papi_sde_unlock(); + return PAPI_OK; + } + }else{ + free(tmp_lib_name); + } + + // If no library has initialized the component and we don't know a library name, then we have to return. + if( NULL == gctl ){ + papi_sde_unlock(); + return PAPI_ENOEVNT; + } + + // If the event name does not have the library name as a prefix, then we need to look in all the libraries for the event. However, in this case + // we can _not_ register a placeholder because we don't know which library the event belongs to. + lib_handle = gctl->lib_list_head; + while(NULL != lib_handle){ + + tmp_item = ht_lookup_by_name(lib_handle->lib_counters, event_name); + if( NULL != tmp_item ){ + *event_code = tmp_item->glb_uniq_id; + papi_sde_unlock(); + SUBDBG("Found matching counter with global uniq id: %d in library: %s\n", tmp_item->glb_uniq_id, lib_handle->libraryName ); + return PAPI_OK; + } else { + SUBDBG("Failed to find event %s in library %s. Looking in other libraries.\n", event_name, lib_handle->libraryName ); + } + + lib_handle = lib_handle->next; + } + papi_sde_unlock(); + + return PAPI_ENOEVNT; +} + +#if defined(SDE_HAVE_OVERFLOW) +static void +_sde_dispatch_timer( int n, hwd_siginfo_t *info, void *uc) +{ + + _papi_hwi_context_t hw_context; + caddr_t address; + ThreadInfo_t *thread; + int i, cidx, retval, isHardware, slow_down, speed_up; + int found_registered_counters, period_has_changed = 0; + EventSetInfo_t *ESI; + struct itimerspec its; + long long overflow_vector = 0; + sde_control_state_t *sde_ctl; + papisde_control_t *gctl; + + (void) n; + + SUBDBG("SDE timer expired. Dispatching (papi internal) overflow handler\n"); + + thread = _papi_hwi_lookup_thread( 0 ); + cidx = _sde_vector.cmp_info.CmpIdx; + + ESI = thread->running_eventset[cidx]; + // This holds only the number of events in the eventset that are set to overflow. + int event_counter = ESI->overflow.event_counter; + sde_ctl = ( sde_control_state_t * ) ESI->ctl_state; + gctl = _papisde_global_control; + + retval = _papi_hwi_read( thread->context[cidx], ESI, ESI->sw_stop ); + if ( retval < PAPI_OK ) + return; + + slow_down = 0; + speed_up = 0; + found_registered_counters = 0; + // Reset the deadline of counters which have exceeded the current deadline + // and check if we need to slow down the frequency of the timer. + for ( i = 0; i < event_counter; i++ ) { + int papi_index = ESI->overflow.EventIndex[i]; + long long deadline, threshold, latest, previous, diff; + unsigned int counter_uniq_id; + + counter_uniq_id = sde_ctl->which_counter[papi_index]; + sde_counter_t *counter = ht_lookup_by_id(gctl->all_reg_counters, counter_uniq_id); + if( (NULL == counter) || counter->is_created ) + continue; + + found_registered_counters = 1; + + latest = ESI->sw_stop[papi_index]; + deadline = ESI->overflow.deadline[i]; + threshold = ESI->overflow.threshold[i]; + + // Find the increment from the previous measurement. + previous = sde_ctl->previous_value[papi_index]; + + // NOTE: The following code assumes that the counters are "long long". No other + // NOTE: type will work correctly. + diff = latest-previous; + + // If it's too small we need to slow down the timer, it it's + // too large we need to speed up the timer. + if( 30*diff < threshold ){ + slow_down = 1; // I.e., grow the sampling period + }else if( 10*diff > threshold ){ + speed_up = 1; // I.e., shrink the sampling period + } + + // Update the "previous" measurement to be the latest one. + sde_ctl->previous_value[papi_index] = latest; + + // If this counter has exceeded the deadline, add it in the vector. + if ( latest >= deadline ) { + // pos[0] holds the first among the native events that compose the given event. If it is a derived event, + // then it might be made up of multiple native events, but this is a CPU component concept. The SDE component + // does not have derived events (the groups are first class citizens, they don't have multiple pos[] entries). + int pos = ESI->EventInfoArray[papi_index].pos[0]; + double rel_dist = 100.0*(double)(latest-deadline)/(double)threshold; + SUBDBG ( "Event at index %d (and pos %d) has value %lld which exceeds deadline %lld (threshold %lld, accuracy %.2lf)\n", + papi_index, pos, latest, deadline, threshold, rel_dist); + + overflow_vector ^= ( long long ) 1 << pos; + // We adjust the deadline in a way that it remains a multiple of threshold so we don't create an additive error. + ESI->overflow.deadline[i] = threshold*(latest/threshold) + threshold; + } + } + + if( !found_registered_counters && sde_ctl->has_timer ){ + struct itimerspec zero_time; + memset(&zero_time, 0, sizeof(struct itimerspec)); + if (timer_settime(sde_ctl->timerid, 0, &zero_time, NULL) == -1){ + PAPIERROR("timer_settime"); + timer_delete(sde_ctl->timerid); + sde_ctl->has_timer = 0; + return; + } + goto no_change_in_period; + } + + // Since we potentially check multiple counters in the loop above, both conditions could be true (for different counter). + // In this case, we give speed_up priority. + if( speed_up ) + slow_down = 0; + + // If neither was set, there is nothing to do here. + if( !speed_up && !slow_down ) + goto no_change_in_period; + + if( !sde_ctl->has_timer ) + goto no_change_in_period; + + // Get the current value of the timer. + if( timer_gettime(sde_ctl->timerid, &its) == -1){ + PAPIERROR("timer_gettime() failed. Timer will not be modified.\n"); + goto no_change_in_period; + } + + period_has_changed = 0; + // We only reduce the period if it is above 131.6us, so it never drops below 100us. + if( speed_up && (its.it_interval.tv_nsec > 131607) ){ + double new_val = (double)its.it_interval.tv_nsec; + new_val /= 1.31607; // sqrt(sqrt(3)) = 1.316074 + its.it_value.tv_nsec = (int)new_val; + its.it_interval.tv_nsec = its.it_value.tv_nsec; + period_has_changed = 1; + SUBDBG ("Timer will be sped up to %ld ns\n", its.it_value.tv_nsec); + } + + // We only increase the period if it is below 75.9ms, so it never grows above 100ms. + if( slow_down && (its.it_interval.tv_nsec < 75983800) ){ + double new_val = (double)its.it_interval.tv_nsec; + new_val *= 1.31607; // sqrt(sqrt(3)) = 1.316074 + its.it_value.tv_nsec = (int)new_val; + its.it_interval.tv_nsec = its.it_value.tv_nsec; + period_has_changed = 1; + SUBDBG ("Timer will be slowed down to %ld ns\n", its.it_value.tv_nsec); + } + + if( !period_has_changed ) + goto no_change_in_period; + + if (timer_settime(sde_ctl->timerid, 0, &its, NULL) == -1){ + PAPIERROR("timer_settime() failed when modifying PAPI internal timer. This might have broken overflow support for this eventset.\n"); + goto no_change_in_period; + } + +no_change_in_period: + + // If none of the events exceeded their deadline, there is nothing else to do. + if( 0 == overflow_vector ){ + return; + } + + if ( (NULL== thread) || (NULL == thread->running_eventset[cidx]) || (0 == thread->running_eventset[cidx]->overflow.flags) ){ + PAPIERROR( "_sde_dispatch_timer(): 'Can not access overflow flags'"); + return; + } + + hw_context.si = info; + hw_context.ucontext = ( hwd_ucontext_t * ) uc; + + address = GET_OVERFLOW_ADDRESS( hw_context ); + + int genOverflowBit = 0; + + _papi_hwi_dispatch_overflow_signal( ( void * ) &hw_context, address, &isHardware, overflow_vector, genOverflowBit, &thread, cidx ); + + return; +} +#endif // defined(SDE_HAVE_OVERFLOW) + +#if defined(SDE_HAVE_OVERFLOW) +static void +invoke_user_handler(sde_counter_t *cntr_handle){ + EventSetInfo_t *ESI; + int i, cidx; + ThreadInfo_t *thread; + sde_control_state_t *sde_ctl; + _papi_hwi_context_t hw_context; + ucontext_t uc; + caddr_t address; + long long overflow_vector; + + if( NULL == cntr_handle ) + return; + + thread = _papi_hwi_lookup_thread( 0 ); + cidx = _sde_vector.cmp_info.CmpIdx; + ESI = thread->running_eventset[cidx]; + + // checking again, just to be sure. + if( !(ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE) ) { + return; + } + + sde_ctl = ( sde_control_state_t * ) ESI->ctl_state; + + papisde_control_t *gctl = _papisde_global_control; + + if( NULL == gctl ){ + return; + } + + // This path comes from papi_sde_inc_counter() which increment _ONLY_ one counter, so we don't + // need to check if any others have overflown. + overflow_vector = 0; + for( i = 0; i < sde_ctl->num_events; i++ ) { + unsigned int counter_uniq_id = sde_ctl->which_counter[i]; + + if( counter_uniq_id == cntr_handle->glb_uniq_id ){ + // pos[0] holds the first among the native events that compose the given event. If it is a derived event, + // then it might be made up of multiple native events, but this is a CPU component concept. The SDE component + // does not have derived events (the groups are first class citizens, they don't have multiple pos[] entries). + int pos = ESI->EventInfoArray[i].pos[0]; + if( pos == -1 ){ + PAPIERROR( "The PAPI framework considers this event removed from the eventset, but the component does not\n"); + return; + } + overflow_vector = ( long long ) 1 << pos; + } + } + + getcontext( &uc ); + hw_context.ucontext = &uc; + hw_context.si = NULL; + address = GET_OVERFLOW_ADDRESS( hw_context ); + + ESI->overflow.handler( ESI->EventSetIndex, ( void * ) address, overflow_vector, hw_context.ucontext ); + return; +} +#endif // SDE_HAVE_OVERFLOW + +#if defined(SDE_HAVE_OVERFLOW) +static int +_sde_set_overflow( EventSetInfo_t *ESI, int EventIndex, int threshold ){ + + (void)ESI; + (void)EventIndex; + (void)threshold; + + SUBDBG("_sde_set_overflow(%d, %d).\n",EventIndex, threshold); + + sde_control_state_t *sde_ctl = ( sde_control_state_t * ) ESI->ctl_state; + papisde_control_t *gctl = _papisde_global_control; + + // pos[0] holds the first among the native events that compose the given event. If it is a derived event, + // then it might be made up of multiple native events, but this is a CPU component concept. The SDE component + // does not have derived events (the groups are first class citizens, they don't have multiple pos[] entries). + int pos = ESI->EventInfoArray[EventIndex].pos[0]; + unsigned int counter_uniq_id = sde_ctl->which_counter[pos]; + sde_counter_t *counter = ht_lookup_by_id(gctl->all_reg_counters, counter_uniq_id); + // If the counter is created then we will check for overflow every time its value gets updated, we don't need to poll. + // That is in cases c[1-3] + if( counter->is_created ) + return PAPI_OK; + + // We do not want to overflow on recorders, because we don't even know what this means (unless we check the number of recorder entries.) + if( (NULL != counter->recorder_data) && (threshold > 0) ){ + return PAPI_EINVAL; + } + + // If we still don't know what type the counter is, then we are _not_ in r[1-3] so we can't create a timer here. + if( (NULL == counter->data) && (NULL == counter->func_ptr) && (threshold > 0) ){ + SUBDBG("Event is a placeholder (it has not been registered by a library yet), so we cannot start overflow, but we can remember it.\n"); + counter->overflow = 1; + return PAPI_OK; + } + + // A threshold of zero indicates that overflowing is not needed anymore. + if( 0 == threshold ){ + counter->overflow = 0; + // If we had a timer (if the counter was created we wouldn't have one) then delete it. + if( sde_ctl->has_timer ) + timer_delete(sde_ctl->timerid); + sde_ctl->has_timer = 0; + }else{ + // If we are here we are in r[1-3] so we can create the timer + return set_timer_for_overflow(sde_ctl); + } + + return PAPI_OK; +} + +/** + * This code assumes that it is called _ONLY_ for registered counters, + * and that is why it sets has_timer to REGISTERED_EVENT_MASK + */ +static int +set_timer_for_overflow( sde_control_state_t *sde_ctl ){ + int signo, sig_offset; + struct sigevent sigev; + struct sigaction sa; + + sig_offset = 0; + + // Choose a new real-time signal + signo = SIGRTMIN+sig_offset; + if(signo > SIGRTMAX){ + PAPIERROR("_sde_set_overflow(): Unable to create new timer due to large number of existing timers. Overflowing will not be activated for the current event.\n"); + return PAPI_ECMP; + } + + // setup the signal handler + sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = _sde_dispatch_timer; + sigemptyset(&sa.sa_mask); + if (sigaction(signo, &sa, NULL) == -1){ + PAPIERROR("sigaction"); + return PAPI_ECMP; + } + + // create the timer + sigev.sigev_notify = SIGEV_SIGNAL; + sigev.sigev_signo = signo; + sigev.sigev_value.sival_ptr = &(sde_ctl->timerid); + if (timer_create(CLOCK_REALTIME, &sigev, &(sde_ctl->timerid)) == -1){ + PAPIERROR("timer_create"); + return PAPI_ECMP; + } + sde_ctl->has_timer |= REGISTERED_EVENT_MASK; + + return PAPI_OK; +} + +#endif // defined(SDE_HAVE_OVERFLOW) + + +/** Vector that points to entry points for our component */ +papi_vector_t _sde_vector = { + .cmp_info = { + /* default component information */ + /* (unspecified values are initialized to 0) */ + /* we explicitly set them to zero in this sde */ + /* to show what settings are available */ + + .name = "sde", + .short_name = "sde", + .description = "Software Defined Events (SDE) component", + .version = "1.15", + .support_version = "n/a", + .kernel_version = "n/a", + .num_cntrs = SDE_MAX_SIMULTANEOUS_COUNTERS, + .num_mpx_cntrs = SDE_MAX_SIMULTANEOUS_COUNTERS, + .default_domain = PAPI_DOM_USER, + .available_domains = PAPI_DOM_USER, + .default_granularity = PAPI_GRN_THR, + .available_granularities = PAPI_GRN_THR, + .hardware_intr_sig = PAPI_INT_SIGNAL, + .hardware_intr = 1, + + /* component specific cmp_info initializations */ + }, + + /* sizes of framework-opaque component-private structures */ + .size = { + /* once per thread */ + .context = sizeof ( sde_context_t ), + /* once per eventset */ + .control_state = sizeof ( sde_control_state_t ), + /* ?? */ + .reg_value = sizeof ( sde_register_t ), + /* ?? */ + .reg_alloc = sizeof ( sde_reg_alloc_t ), + }, + + /* function pointers */ + /* by default they are set to NULL */ + + /* Used for general PAPI interactions */ + .start = _sde_start, + .stop = _sde_stop, + .read = _sde_read, + .reset = _sde_reset, + .write = _sde_write, + .init_component = _sde_init_component, + .init_thread = _sde_init_thread, + .init_control_state = _sde_init_control_state, + .update_control_state = _sde_update_control_state, + .ctl = _sde_ctl, + .shutdown_thread = _sde_shutdown_thread, + .shutdown_component = _sde_shutdown_component, + .set_domain = _sde_set_domain, + /* .cleanup_eventset = NULL, */ + /* called in add_native_events() */ + /* .allocate_registers = NULL, */ + + /* Used for overflow/profiling */ +#if defined(SDE_HAVE_OVERFLOW) + .dispatch_timer = _sde_dispatch_timer, + .set_overflow = _sde_set_overflow, +#endif // defined(SDE_HAVE_OVERFLOW) + /* .get_overflow_address = NULL, */ + /* .stop_profiling = NULL, */ + /* .set_profile = NULL, */ + + /* ??? */ + /* .user = NULL, */ + + /* Name Mapping Functions */ + .ntv_enum_events = _sde_ntv_enum_events, + .ntv_code_to_name = _sde_ntv_code_to_name, + .ntv_code_to_descr = _sde_ntv_code_to_descr, + /* if .ntv_name_to_code not available, PAPI emulates */ + /* it by enumerating all events and looking manually */ + .ntv_name_to_code = _sde_ntv_name_to_code, + + + /* These are only used by _papi_hwi_get_native_event_info() */ + /* Which currently only uses the info for printing native */ + /* event info, not for any sort of internal use. */ + /* .ntv_code_to_bits = NULL, */ + +}; + diff -Nru papi-5.7.0+dfsg/src/components/sde/sde_F.F90 papi-6.0.0~dfsg/src/components/sde/sde_F.F90 --- papi-5.7.0+dfsg/src/components/sde/sde_F.F90 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/sde/sde_F.F90 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,569 @@ +module papi_sde_fortran_wrappers + use, intrinsic :: ISO_C_BINDING + + implicit none + +#include "f90papi.h" + + integer, parameter :: i_kind = 0 + integer, parameter :: PAPI_SDE_RO = int( Z'00', kind=kind(i_kind)) + integer, parameter :: PAPI_SDE_RW = int( Z'01', kind=kind(i_kind)) + integer, parameter :: PAPI_SDE_DELTA = int( Z'00', kind=kind(i_kind)) + integer, parameter :: PAPI_SDE_INSTANT = int( Z'10', kind=kind(i_kind)) + + integer, parameter :: PAPI_SDE_long_long = int( Z'00', kind=kind(i_kind)) + integer, parameter :: PAPI_SDE_int = int( Z'01', kind=kind(i_kind)) + integer, parameter :: PAPI_SDE_double = int( Z'02', kind=kind(i_kind)) + integer, parameter :: PAPI_SDE_float = int( Z'03', kind=kind(i_kind)) + +! ------------------------------------------------------------------- +! ------------ Interfaces for F08 bridge-to-C functions ------------- +! ------------------------------------------------------------------- + + type, bind(C) :: fptr_struct_t + type(C_funptr) init + type(C_funptr) register_counter + type(C_funptr) register_fp_counter + type(C_funptr) unregister_counter + type(C_funptr) describe_counter + type(C_funptr) add_counter_to_group + type(C_funptr) create_counter + type(C_funptr) inc_counter + type(C_funptr) create_recorder + type(C_funptr) record + type(c_funptr) reset_recorder + type(c_funptr) reset_counter + type(c_funptr) get_counter_handle + end type fptr_struct_t + + interface papif_sde_init_F08 + type(C_ptr) function papif_sde_init_F08(lib_name_C_str) result(handle) bind(C, name="papi_sde_init") + use, intrinsic :: ISO_C_BINDING, only : C_ptr + type(C_ptr), value, intent(in) :: lib_name_C_str + end function papif_sde_init_F08 + end interface papif_sde_init_F08 + + interface papif_sde_register_counter_F08 + integer(kind=C_int) function papif_sde_register_counter_F08(handle, event_name_C_str, cntr_mode, cntr_type, counter) result(error) bind(C, name="papi_sde_register_counter") + use, intrinsic :: ISO_C_BINDING, only : C_ptr, C_int + type(C_ptr), value, intent(in) :: handle + type(C_ptr), value, intent(in) :: event_name_C_str + integer(kind=C_int), value, intent(in) :: cntr_type + integer(kind=C_int), value, intent(in) :: cntr_mode + type(C_ptr), value, intent(in) :: counter + end function papif_sde_register_counter_F08 + end interface papif_sde_register_counter_F08 + + interface papif_sde_unregister_counter_F08 + integer(kind=C_int) function papif_sde_unregister_counter_F08(handle, event_name_C_str) result(error) bind(C, name="papi_sde_unregister_counter") + use, intrinsic :: ISO_C_BINDING, only : C_ptr, C_int + type(C_ptr), value, intent(in) :: handle + type(C_ptr), value, intent(in) :: event_name_C_str + end function papif_sde_unregister_counter_F08 + end interface papif_sde_unregister_counter_F08 + + interface papif_sde_register_fp_counter_F08 + integer(kind=C_int) function papif_sde_register_fp_counter_F08(handle, event_name_C_str, cntr_mode, cntr_type, func_ptr, param) result(error) bind(C, name="papi_sde_register_fp_counter") + use, intrinsic :: ISO_C_BINDING, only : C_ptr, C_funptr, C_int + type(C_ptr), value, intent(in) :: handle + type(C_ptr), value, intent(in) :: event_name_C_str + integer(kind=C_int), value, intent(in) :: cntr_type + integer(kind=C_int), value, intent(in) :: cntr_mode + type(C_funptr), value, intent(in) :: func_ptr + type(C_ptr), value, intent(in) :: param + end function papif_sde_register_fp_counter_F08 + end interface papif_sde_register_fp_counter_F08 + + interface papif_sde_describe_counter_F08 + integer(kind=C_int) function papif_sde_describe_counter_F08(handle, event_name_C_str, event_desc_C_str) result(error) bind(C, name="papi_sde_describe_counter") + use, intrinsic :: ISO_C_BINDING, only : C_ptr, C_int + type(C_ptr), value, intent(in) :: handle + type(C_ptr), value, intent(in) :: event_name_C_str + type(C_ptr), value, intent(in) :: event_desc_C_str + end function papif_sde_describe_counter_F08 + end interface papif_sde_describe_counter_F08 + + interface papif_sde_add_counter_to_group_F08 + integer(kind=C_int) function papif_sde_add_counter_to_group_F08(handle, event_name_C_str, group_name_C_str, flags) result(error) bind(C, name="papi_sde_add_counter_to_group") + use, intrinsic :: ISO_C_BINDING, only : C_ptr, C_int, C_int32_t + type(C_ptr), value, intent(in) :: handle + type(C_ptr), value, intent(in) :: event_name_C_str + type(C_ptr), value, intent(in) :: group_name_C_str + integer(kind=C_INT32_T), value, intent(in) :: flags + end function papif_sde_add_counter_to_group_F08 + end interface papif_sde_add_counter_to_group_F08 + + interface papif_sde_create_counter_F08 + integer(kind=C_int) function papif_sde_create_counter_F08(handle, event_name_C_str, cntr_type, counter_handle) result(error) bind(C, name="papi_sde_create_counter") + use, intrinsic :: ISO_C_BINDING, only : C_ptr, C_int + type(C_ptr), value, intent(in) :: handle + type(C_ptr), value, intent(in) :: event_name_C_str + integer(kind=C_int), value, intent(in) :: cntr_type + type(C_ptr), value, intent(in) :: counter_handle ! this argument is "intent(in)" because we will modify the address in which it points to, not the argument itself. + end function papif_sde_create_counter_F08 + end interface papif_sde_create_counter_F08 + + interface papif_sde_inc_counter_F08 + integer(kind=C_int) function papif_sde_inc_counter_F08(counter_handle, increment) result(error) bind(C, name="papi_sde_inc_counter") + use, intrinsic :: ISO_C_BINDING, only : C_ptr, C_long_long, C_int + type(C_ptr), value, intent(in) :: counter_handle + integer(kind=C_long_long), value, intent(in) :: increment + end function papif_sde_inc_counter_F08 + end interface papif_sde_inc_counter_F08 + + interface papif_sde_create_recorder_F08 + integer(kind=C_int) function papif_sde_create_recorder_F08(handle, event_name_C_str, typesize, cmpr_func_ptr, recorder_handle) result(error) bind(C, name="papi_sde_create_recorder") + use, intrinsic :: ISO_C_BINDING, only : C_ptr, C_funptr, C_size_t, C_int + type(C_ptr), value, intent(in) :: handle + type(C_ptr), value, intent(in) :: event_name_C_str + integer(kind=C_size_t), value, intent(in) :: typesize + type(C_funptr), value, intent(in) :: cmpr_func_ptr + type(C_ptr), value, intent(in) :: recorder_handle ! this argument is "intent(in)" because we will modify the address in which it points to, not the argument itself. + end function papif_sde_create_recorder_F08 + end interface papif_sde_create_recorder_F08 + + interface papif_sde_record_F08 + integer(kind=C_int) function papif_sde_record_F08(recorder_handle, typesize, value_to_rec) result(error) bind(C, name="papi_sde_record") + use, intrinsic :: ISO_C_BINDING, only : C_ptr, C_size_t, C_int + type(C_ptr), value, intent(in) :: recorder_handle + integer(kind=C_size_t), value, intent(in) :: typesize + type(C_ptr), value, intent(in) :: value_to_rec + end function papif_sde_record_F08 + end interface papif_sde_record_F08 + + interface papif_sde_reset_recorder_F08 + integer(kind=C_int) function papif_sde_reset_recorder_F08(recorder_handle) result(error) bind(C, name="papi_sde_reset_recorder") + use, intrinsic :: ISO_C_BINDING, only : C_ptr, C_int + type(C_ptr), value, intent(in) :: recorder_handle + end function papif_sde_reset_recorder_F08 + end interface papif_sde_reset_recorder_F08 + + interface papif_sde_reset_counter_F08 + integer(kind=C_int) function papif_sde_reset_counter_F08(counter_handle) result(error) bind(C, name="papi_sde_reset_counter") + use, intrinsic :: ISO_C_BINDING, only : C_ptr, C_int + type(C_ptr), value, intent(in) :: counter_handle + end function papif_sde_reset_counter_F08 + end interface papif_sde_reset_counter_F08 + + interface papif_sde_get_counter_handle_F08 + type(C_ptr) function papif_sde_get_counter_handle_F08(handle, event_name_C_str) result(counter_handle) bind(C, name="papi_sde_get_counter_handle") + use, intrinsic :: ISO_C_BINDING, only : C_ptr + type(C_ptr), value, intent(in) :: handle + type(C_ptr), value, intent(in) :: event_name_C_str + end function papif_sde_get_counter_handle_F08 + end interface papif_sde_get_counter_handle_F08 + + +! ------------------------------------------------------------------- +! ----------------- Interfaces for helper functions ----------------- +! ------------------------------------------------------------------- + + interface C_malloc + type(C_ptr) function C_malloc(size) bind(C,name="malloc") + use, intrinsic :: ISO_C_BINDING, only : C_ptr, C_size_t + integer(C_size_t), value, intent(in) :: size + end function C_malloc + end interface C_malloc + + interface C_free + subroutine C_free(ptr) bind(C,name="free") + use, intrinsic :: ISO_C_BINDING, only : C_ptr + type(C_ptr), value, intent(in) :: ptr + end subroutine C_free + end interface C_free + +! ------------------------------------------------------------------- +! ----------------- Interfaces for function pointers ---------------- +! ------------------------------------------------------------------- + + interface + function init_t(lib_name) result(ret_val) + use, intrinsic :: ISO_C_BINDING, only: C_ptr + type(C_ptr), value :: lib_name + type(C_ptr) :: ret_val + end function init_t + end interface + + interface + function register_counter_t(lib_handle, event_name, cntr_mode, cntr_type, cntr) result(ret_val) + use, intrinsic :: ISO_C_BINDING, only: C_ptr, C_int + type(C_ptr), value, intent(in) :: lib_handle + type(C_ptr), value, intent(in) :: event_name + integer(kind=C_int), value, intent(in) :: cntr_mode + integer(kind=C_int), value, intent(in) :: cntr_type + type(C_ptr), intent(in) :: cntr + integer(kind=C_int) :: ret_val + end function register_counter_t + end interface + + interface + function register_fp_counter_t(lib_handle, event_name, cntr_mode, cntr_type, c_func_ptr, param ) result(ret_val) + use, intrinsic :: ISO_C_BINDING, only : C_ptr, C_funptr, C_int + type(C_ptr), value, intent(in) :: lib_handle + type(C_ptr), value, intent(in) :: event_name + integer(kind=C_int), value, intent(in) :: cntr_type + integer(kind=C_int), value, intent(in) :: cntr_mode + type(C_funptr), value, intent(in) :: c_func_ptr + type(C_ptr), value, intent(in) :: param + integer(kind=C_int) :: ret_val + end function register_fp_counter_t + end interface + + interface + function unregister_counter_t(lib_handle, event_name) result(ret_val) + use, intrinsic :: ISO_C_BINDING, only : C_ptr, C_int + type(C_ptr), value, intent(in) :: lib_handle + type(C_ptr), value, intent(in) :: event_name + integer(kind=C_int) :: ret_val + end function unregister_counter_t + end interface + + interface + function describe_counter_t(lib_handle, event_name, event_desc) result(ret_val) + use, intrinsic :: ISO_C_BINDING, only : C_ptr, C_int + type(C_ptr), value, intent(in) :: lib_handle + type(C_ptr), value, intent(in) :: event_name + type(C_ptr), value, intent(in) :: event_desc + integer(kind=C_int) :: ret_val + end function describe_counter_t + end interface + + interface + function add_counter_to_group_t(handle, event_name_C_str, group_name_C_str, flags) result(ret_val) + use, intrinsic :: ISO_C_BINDING, only : C_ptr, C_int, C_int32_t + type(C_ptr), value, intent(in) :: handle + type(C_ptr), value, intent(in) :: event_name_C_str + type(C_ptr), value, intent(in) :: group_name_C_str + integer(kind=C_INT32_T), value, intent(in) :: flags + integer(kind=C_int) :: ret_val + end function add_counter_to_group_t + end interface + + interface + function create_counter_t(handle, event_name_C_str, cntr_type, counter_handle) result(ret_val) + use, intrinsic :: ISO_C_BINDING, only : C_ptr, C_int + type(C_ptr), value, intent(in) :: handle + type(C_ptr), value, intent(in) :: event_name_C_str + integer(kind=C_int), value, intent(in) :: cntr_type + type(C_ptr), value, intent(in) :: counter_handle ! this argument is "intent(in)" because we will modify the address in which it points to, not the argument itself. + integer(kind=C_int) :: ret_val + end function create_counter_t + end interface + + interface + function inc_counter_t(counter_handle, increment) result(ret_val) + use, intrinsic :: ISO_C_BINDING, only : C_ptr, C_long_long, C_int + type(C_ptr), value, intent(in) :: counter_handle + integer(kind=C_long_long), value, intent(in) :: increment + integer(kind=C_int) :: ret_val + end function inc_counter_t + end interface + + interface + function create_recorder_t(handle, event_name_C_str, typesize, recorder_handle) result(ret_val) + use, intrinsic :: ISO_C_BINDING, only : C_ptr, C_size_t, C_int + type(C_ptr), value, intent(in) :: handle + type(C_ptr), value, intent(in) :: event_name_C_str + integer(kind=C_size_t), value, intent(in) :: typesize + type(C_ptr), value, intent(in) :: recorder_handle ! this argument is "intent(in)" because we will modify the address in which it points to, not the argument itself. + integer(kind=C_int) :: ret_val + end function create_recorder_t + end interface + + interface + function record_t(recorder_handle, typesize, value_to_rec) result(ret_val) + use, intrinsic :: ISO_C_BINDING, only : C_ptr, C_size_t, C_int + type(C_ptr), value, intent(in) :: recorder_handle + integer(kind=C_size_t), value, intent(in) :: typesize + type(C_ptr), value, intent(in) :: value_to_rec + integer(kind=C_int) :: ret_val + end function record_t + end interface + + + interface + function reset_recorder_t(recorder_handle) result(ret_val) + use, intrinsic :: ISO_C_BINDING, only : C_ptr, C_int + type(C_ptr), value, intent(in) :: recorder_handle + integer(kind=C_int) :: ret_val + end function reset_recorder_t + end interface + + interface + function reset_counter_t(counter_handle) result(ret_val) + use, intrinsic :: ISO_C_BINDING, only : C_ptr, C_int + type(C_ptr), value, intent(in) :: counter_handle + integer(kind=C_int) :: ret_val + end function reset_counter_t + end interface + + interface + function get_counter_handle_t(handle, event_name_C_str) result(ret_val) + use, intrinsic :: ISO_C_BINDING, only : C_ptr + type(C_ptr), value, intent(in) :: handle + type(C_ptr), value, intent(in) :: event_name_C_str + type(C_ptr) :: ret_val + end function get_counter_handle_t + end interface + + +! ------------------------------------------------------------------- +! ------------------------ END OF INTERFACES ------------------------ +! ------------------------------------------------------------------- + + + contains + + + +! ------------------------------------------------------------------- +! ---------------------- F08 API subroutines ------------------------ +! ------------------------------------------------------------------- + + subroutine papif_sde_init(lib_name, handle, error) + character(len=*), intent(in) :: lib_name + type(C_ptr), intent(out) :: handle + integer, intent(out), optional :: error + + type(C_ptr) :: lib_name_C_str + + lib_name_C_str = F_str_to_C(lib_name) + handle = papif_sde_init_F08(lib_name_C_str) + call C_free(lib_name_C_str) + if( present(error) ) then + error = PAPI_OK + end if + end subroutine papif_sde_init + +! --------------------------------------------------------- + + subroutine papif_sde_register_counter( handle, event_name, cntr_mode, cntr_type, counter, error ) + type(C_ptr), intent(in) :: handle + character(len=*), intent(in) :: event_name + integer, intent(in) :: cntr_type + integer, intent(in) :: cntr_mode + type(C_ptr), value, intent(in) :: counter + integer, intent(out), optional :: error + integer :: tmp + + type(C_ptr) :: event_name_C_str + + event_name_C_str = F_str_to_C(event_name) + tmp = papif_sde_register_counter_F08(handle, event_name_C_str, cntr_mode, cntr_type, counter) + if( present(error) ) then + error = tmp + end if + call C_free(event_name_C_str) + end subroutine papif_sde_register_counter + +! --------------------------------------------------------- + + subroutine papif_sde_register_fp_counter( handle, event_name, cntr_mode, cntr_type, c_func_ptr, param, error ) + type(C_ptr), intent(in) :: handle + character(len=*), intent(in) :: event_name + integer, intent(in) :: cntr_type + integer, intent(in) :: cntr_mode + type(C_ptr), value, intent(in) :: param + integer, intent(out), optional :: error + integer :: tmp + + type(C_funptr) :: c_func_ptr + type(C_ptr) :: event_name_C_str + + event_name_C_str = F_str_to_C(event_name) + tmp = papif_sde_register_fp_counter_F08(handle, event_name_C_str, cntr_mode, cntr_type, c_func_ptr, param) + if( present(error) ) then + error = tmp + end if + call C_free(event_name_C_str) + end subroutine papif_sde_register_fp_counter + +! --------------------------------------------------------- + + subroutine papif_sde_unregister_counter( handle, event_name, error ) + type(C_ptr), intent(in) :: handle + character(len=*), intent(in) :: event_name + integer, intent(out), optional :: error + integer :: tmp + + type(C_ptr) :: event_name_C_str + + event_name_C_str = F_str_to_C(event_name) + tmp = papif_sde_unregister_counter_F08(handle, event_name_C_str) + if( present(error) ) then + error = tmp + end if + call C_free(event_name_C_str) + end subroutine papif_sde_unregister_counter + +! --------------------------------------------------------- + + subroutine papif_sde_describe_counter( handle, event_name, event_desc, error ) + type(C_ptr), intent(in) :: handle + character(len=*), intent(in) :: event_name + character(len=*), intent(in) :: event_desc + integer, intent(out), optional :: error + integer :: tmp + + type(C_ptr) :: event_name_C_str + type(C_ptr) :: event_desc_C_str + + event_name_C_str = F_str_to_C(event_name) + event_desc_C_str = F_str_to_C(event_desc) + tmp = papif_sde_describe_counter_F08(handle, event_name_C_str, event_desc_C_str) + if( present(error) ) then + error = tmp + end if + call C_free(event_name_C_str) + call C_free(event_desc_C_str) + end subroutine papif_sde_describe_counter + +! --------------------------------------------------------- + + subroutine papif_sde_create_counter(handle, event_name, cntr_type, counter_handle, error) + type(C_ptr), value, intent(in) :: handle + character(len=*), intent(in) :: event_name + integer(kind=C_int), value, intent(in) :: cntr_type + type(C_ptr), value, intent(in) :: counter_handle + integer, intent(out), optional :: error + integer :: tmp + + type(C_ptr) :: event_name_C_str + + event_name_C_str = F_str_to_C(event_name) + + tmp = papif_sde_create_counter_F08(handle, event_name_C_str, cntr_type, counter_handle) + if( present(error) ) then + error = tmp + end if + call C_free(event_name_C_str) + end subroutine papif_sde_create_counter + +! --------------------------------------------------------- + + subroutine papif_sde_inc_counter(counter_handle, increment, error) + type(C_ptr), value, intent(in) :: counter_handle + integer(kind=C_long_long), value, intent(in) :: increment + integer, intent(out), optional :: error + integer :: tmp + + tmp = papif_sde_inc_counter_F08(counter_handle, increment) + if( present(error) ) then + error = tmp + end if + end subroutine papif_sde_inc_counter + +! --------------------------------------------------------- + + subroutine papif_sde_create_recorder(handle, event_name, typesize, cmpr_c_func_ptr, recorder_handle, error) + type(C_ptr), value, intent(in) :: handle + character(len=*), intent(in) :: event_name + integer(kind=C_size_t), value, intent(in) :: typesize + type(C_funptr) :: cmpr_c_func_ptr + type(C_ptr), value, intent(in) :: recorder_handle + integer, intent(out), optional :: error + integer :: tmp + + type(C_ptr) :: event_name_C_str + + event_name_C_str = F_str_to_C(event_name) + + tmp = papif_sde_create_recorder_F08(handle, event_name_C_str, typesize, cmpr_c_func_ptr, recorder_handle) + if( present(error) ) then + error = tmp + end if + call C_free(event_name_C_str) + end subroutine papif_sde_create_recorder + +! --------------------------------------------------------- + + subroutine papif_sde_record(recorder_handle, typesize, value_to_rec, error) + type(C_ptr), value, intent(in) :: recorder_handle + integer(kind=C_size_t), value, intent(in) :: typesize + type(C_ptr), value, intent(in) :: value_to_rec + integer, intent(out), optional :: error + integer :: tmp + + tmp = papif_sde_record_F08(recorder_handle, typesize, value_to_rec) + if( present(error) ) then + error = tmp + end if + end subroutine papif_sde_record + +! --------------------------------------------------------- + + subroutine papif_sde_reset_recorder(recorder_handle, error) + type(C_ptr), value, intent(in) :: recorder_handle + integer, intent(out), optional :: error + integer :: tmp + + tmp = papif_sde_reset_recorder_F08(recorder_handle) + if( present(error) ) then + error = tmp + end if + end subroutine papif_sde_reset_recorder + +! --------------------------------------------------------- + + subroutine papif_sde_reset_counter(counter_handle, error) + type(C_ptr), value, intent(in) :: counter_handle + integer, intent(out), optional :: error + integer :: tmp + + tmp = papif_sde_reset_counter_F08(counter_handle) + if( present(error) ) then + error = tmp + end if + end subroutine papif_sde_reset_counter + +! --------------------------------------------------------- + + subroutine papif_sde_get_counter_handle(handle, event_name, counter_handle, error) + type(C_ptr), value, intent(in) :: handle + character(len=*), intent(in) :: event_name + integer, intent(out), optional :: error + type(C_ptr), intent(out) :: counter_handle + + type(C_ptr) :: event_name_C_str + + event_name_C_str = F_str_to_C(event_name) + counter_handle = papif_sde_get_counter_handle_F08(handle, event_name_C_str) + call C_free(event_name_C_str) + if( present(error) ) then + error = PAPI_OK + end if + end subroutine papif_sde_get_counter_handle + + +! ------------------------------------------------------------------- +! ------------------------ Helper functions ------------------------- +! ------------------------------------------------------------------- + + type(C_ptr) function F_str_to_C(F_str) result(C_str) + implicit none + character(len=*), intent(in) :: F_str + + character(len=1,kind=C_char), pointer :: tmp_str_ptr(:) + integer(C_size_t) :: i, strlen + + strlen = len(F_str) + + C_str = C_malloc(strlen+1) + if (C_associated(C_str)) then + call C_F_pointer(C_str,tmp_str_ptr,[strlen+1]) + forall (i=1:strlen) + tmp_str_ptr(i) = F_str(i:i) + end forall + tmp_str_ptr(strlen+1) = C_NULL_char + end if + end function F_str_to_C + + +end module papi_sde_fortran_wrappers + + + + + + + diff -Nru papi-5.7.0+dfsg/src/components/sde/sde_internal.h papi-6.0.0~dfsg/src/components/sde/sde_internal.h --- papi-5.7.0+dfsg/src/components/sde/sde_internal.h 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/sde/sde_internal.h 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,198 @@ +#ifndef SDE_H +#define SDE_H + +// Enable the following line if you want to use PAPI_overflow() +#define SDE_HAVE_OVERFLOW + +#define PAPI_SDE_THREAD_SAFE + +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(SDE_HAVE_OVERFLOW) +#include +#endif //defined(SDE_HAVE_OVERFLOW) + + +#define is_readonly(_X_) (PAPI_SDE_RO == ((_X_)&0x0F)) +#define is_readwrite(_X_) (PAPI_SDE_RW == ((_X_)&0x0F)) +#define is_delta(_X_) (PAPI_SDE_DELTA == ((_X_)&0xF0)) +#define is_instant(_X_) (PAPI_SDE_INSTANT == ((_X_)&0xF0)) + +#define EXP_CONTAINER_ENTRIES 52 +#define EXP_CONTAINER_MIN_SIZE 2048 + +#ifndef SDE_MAX_SIMULTANEOUS_COUNTERS +#define SDE_MAX_SIMULTANEOUS_COUNTERS 40 +#endif + +#define PAPISDE_HT_SIZE 512 +#define REGISTERED_EVENT_MASK 0x2; + +/* Headers required by PAPI */ +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" +#include "extras.h" + +#include "interface/papi_sde_interface.h" + +#if defined(PAPI_SDE_THREAD_SAFE) + #define papi_sde_lock() _papi_hwi_lock(COMPONENT_LOCK); + #define papi_sde_unlock() _papi_hwi_unlock(COMPONENT_LOCK); +#else + #warning "Thread safe locking is _NOT_ activated" + #define papi_sde_lock() + #define papi_sde_unlock() +#endif + +papi_vector_t _sde_vector; + +/* We do not use this structure, but the framework needs its size */ +typedef struct sde_register +{ + int junk; +} sde_register_t; + +/* We do not use this structure, but the framework needs its size */ +typedef struct sde_reg_alloc +{ + sde_register_t junk; +} sde_reg_alloc_t; + +/** + * There's one of these per event-set to hold data specific to the EventSet, like + * counter start values, number of events in a set and counter uniq ids. + */ +typedef struct sde_control_state +{ + int num_events; + unsigned int which_counter[SDE_MAX_SIMULTANEOUS_COUNTERS]; + long long counter[SDE_MAX_SIMULTANEOUS_COUNTERS]; + long long previous_value[SDE_MAX_SIMULTANEOUS_COUNTERS]; +#if defined(SDE_HAVE_OVERFLOW) + timer_t timerid; + int has_timer; +#endif //defined(SDE_HAVE_OVERFLOW) +} sde_control_state_t; + +typedef struct sde_context { + long long junk; +} sde_context_t; + +typedef struct sde_counter_s sde_counter_t; +typedef struct sde_sorting_params_s sde_sorting_params_t; +typedef struct papisde_list_entry_s papisde_list_entry_t; +typedef struct papisde_library_desc_s papisde_library_desc_t; +typedef struct papisde_control_s papisde_control_t; +typedef struct recorder_data_s recorder_data_t; + +/* Hash table entry */ +struct papisde_list_entry_s { + sde_counter_t *item; + papisde_list_entry_t *next; +}; + +struct recorder_data_s{ + void *exp_container[EXP_CONTAINER_ENTRIES]; + long long total_entries; + long long used_entries; + size_t typesize; + void *sorted_buffer; + long long sorted_entries; +}; + +/* The following type describes a counter, or a counter group, or a recording. */ +struct sde_counter_s { + unsigned int glb_uniq_id; + char *name; + char *description; + void *data; + long long int previous_data; + recorder_data_t *recorder_data; + int is_created; + int overflow; + papi_sde_fptr_t func_ptr; + void *param; + int cntr_type; + int cntr_mode; + papisde_library_desc_t *which_lib; + papisde_list_entry_t *counter_group_head; + uint32_t counter_group_flags; +}; + +struct sde_sorting_params_s{ + sde_counter_t *recording; + int (*cmpr_func_ptr)(const void *p1, const void *p2); +}; + +/* This type describes one library. This is the type of the handle returned by papi_sde_init(). */ +struct papisde_library_desc_s { + char* libraryName; + papisde_list_entry_t lib_counters[PAPISDE_HT_SIZE]; + papisde_library_desc_t *next; +}; + +/* One global variable of this type holds pointers to all other SDE meta-data */ +struct papisde_control_s { + unsigned int num_reg_events; /* This number only increases, so it can be used as a uniq id */ + unsigned int num_live_events; /* This number decreases at unregister() */ + papisde_library_desc_t *lib_list_head; + unsigned int activeLibCount; + papisde_list_entry_t all_reg_counters[PAPISDE_HT_SIZE]; +}; + +/** This global variable points to the head of the control state list **/ +static papisde_control_t *_papisde_global_control = NULL; + +/* All of the following functions are for internal use only. */ +static int _sde_reset( hwd_context_t *ctx, hwd_control_state_t *ctl ); +static int _sde_write( hwd_context_t *ctx, hwd_control_state_t *ctl, long long *events ); +static int _sde_read( hwd_context_t *ctx, hwd_control_state_t *ctl, long long **events, int flags ); +static int _sde_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ); +static int _sde_start( hwd_context_t *ctx, hwd_control_state_t *ctl ); +static int _sde_update_control_state( hwd_control_state_t *ctl, NativeInfo_t *native, int count, hwd_context_t *ctx ); +static int _sde_init_control_state( hwd_control_state_t * ctl ); +static int _sde_init_thread( hwd_context_t *ctx ); +static int _sde_init_component( int cidx ); +static int _sde_shutdown_component(void); +static int _sde_shutdown_thread( hwd_context_t *ctx ); +static int _sde_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option ); +static int _sde_set_domain( hwd_control_state_t * cntrl, int domain ); +static int _sde_ntv_enum_events( unsigned int *EventCode, int modifier ); +static int _sde_ntv_code_to_name( unsigned int EventCode, char *name, int len ); +static int _sde_ntv_code_to_descr( unsigned int EventCode, char *descr, int len ); +static int _sde_ntv_name_to_code(const char *name, unsigned int *event_code ); + +#if defined(SDE_HAVE_OVERFLOW) +static int _sde_set_overflow( EventSetInfo_t *ESI, int EventIndex, int threshold ); +static void _sde_dispatch_timer( int n, hwd_siginfo_t *info, void *uc); +static void invoke_user_handler(sde_counter_t *cntr_handle); +static int set_timer_for_overflow( sde_control_state_t *sde_ctl ); +#endif // defined(SDE_HAVE_OVERFLOW) + +static papi_handle_t do_sde_init(const char *name_of_library); +static int sde_cast_and_store(void *data, long long int previous_value, void *rslt, int type); +static int sde_hardware_read_and_store( sde_counter_t *counter, long long int previous_value, long long int *rslt ); +static int sde_read_counter_group( sde_counter_t *counter, long long int *rslt ); +static int sde_setup_counter_internals( papi_handle_t handle, const char *event_name, int cntr_mode, int cntr_type, void *counter, papi_sde_fptr_t fp_counter, void *param, sde_counter_t **placeholder ); +int aggregate_value_in_group(long long int *data, long long int *rslt, int cntr_type, int group_flags); +static inline int sde_do_register( papi_handle_t handle, const char *event_name, int cntr_mode, int cntr_type, void *counter, papi_sde_fptr_t fp_counter, void *param ); + +static sde_counter_t *allocate_and_insert(papisde_library_desc_t* lib_handle, const char *name, unsigned int uniq_id, int cntr_mode, int cntr_type, void *data, papi_sde_fptr_t func_ptr, void *param); +static int delete_counter(papisde_library_desc_t* lib_handle, const char *name); + +static inline void free_counter(sde_counter_t *counter); +static unsigned int ht_hash_id(unsigned int uniq_id); +static unsigned long ht_hash_name(const char *str); +static void ht_insert(papisde_list_entry_t *hash_table, int key, sde_counter_t *sde_counter); +static sde_counter_t *ht_delete(papisde_list_entry_t *hash_table, int key, unsigned int uniq_id); +static sde_counter_t *ht_lookup_by_name(papisde_list_entry_t *hash_table, const char *name); +static sde_counter_t *ht_lookup_by_id(papisde_list_entry_t *hash_table, unsigned int uniq_id); +#endif diff -Nru papi-5.7.0+dfsg/src/components/sde/tests/Advanced_C+FORTRAN/Gamum.c papi-6.0.0~dfsg/src/components/sde/tests/Advanced_C+FORTRAN/Gamum.c --- papi-5.7.0+dfsg/src/components/sde/tests/Advanced_C+FORTRAN/Gamum.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/sde/tests/Advanced_C+FORTRAN/Gamum.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,85 @@ +#include +#include +#include +#include "papi_sde_interface.h" + +// API functions (FORTRAN 77 friendly). +papi_handle_t papi_sde_hook_list_events( papi_sde_fptr_struct_t *fptr_struct); +void gamum_init_(void); +void gamum_unreg_(void); +void gamum_do_work_(void); + +// The following counter is a global variable that can be directly +// modified by programs linking with this library. +long long int gamum_cnt_i1; + +// The following counters are hidden to programs linking with +// this library, so they can not be directly modified. +static double cnt_d1, cnt_d2, cnt_d3, cnt_d4, cnt_d5; +static long long int cnt_i2, cnt_i3; +static double cnt_rm1, cnt_rm2; +static papi_handle_t handle; +static void *cntr_handle; + +// For internal use only. +static papi_handle_t gamum_papi_sde_hook_list_events( papi_sde_fptr_struct_t *fptr_struct); + +static const char *event_names[2] = { + "event_with_characters____ __up______to_______60_bytes", + "event_with_very_long_name_which_is_meant_to_exceed_128_bytes_or_in_other_words_the_size_of_two_cache_lines_so_we_see_if_it_makes_a_difference_in_performance" +}; + + +void gamum_init_(void){ + cnt_d1 = cnt_d2 = cnt_d3 = cnt_d4 = cnt_d5 = 1; + gamum_cnt_i1 = cnt_i2 = 0; + papi_sde_fptr_struct_t fptr_struct; + + POPULATE_SDE_FPTR_STRUCT( fptr_struct ); + (void)gamum_papi_sde_hook_list_events(&fptr_struct); + + return; +} + +papi_handle_t papi_sde_hook_list_events( papi_sde_fptr_struct_t *fptr_struct){ + return gamum_papi_sde_hook_list_events(fptr_struct); +} + +papi_handle_t gamum_papi_sde_hook_list_events( papi_sde_fptr_struct_t *fptr_struct){ + handle = fptr_struct->init("Gamum"); + fptr_struct->register_counter(handle, "rm1", PAPI_SDE_RO|PAPI_SDE_DELTA, PAPI_SDE_double, &cnt_rm1); + fptr_struct->register_counter(handle, "ev1", PAPI_SDE_RO|PAPI_SDE_DELTA, PAPI_SDE_double, &cnt_d1); + fptr_struct->add_counter_to_group(handle, "ev1", "group0", PAPI_SDE_SUM); + fptr_struct->register_counter(handle, "ev2", PAPI_SDE_RO|PAPI_SDE_INSTANT, PAPI_SDE_double, &cnt_d2); + fptr_struct->register_counter(handle, "ev3", PAPI_SDE_RO|PAPI_SDE_DELTA, PAPI_SDE_double, &cnt_d3); + fptr_struct->register_counter(handle, "ev4", PAPI_SDE_RO|PAPI_SDE_INSTANT, PAPI_SDE_double, &cnt_d4); + fptr_struct->add_counter_to_group(handle, "ev4", "group0", PAPI_SDE_SUM); + fptr_struct->register_counter(handle, "ev5", PAPI_SDE_RO|PAPI_SDE_INSTANT, PAPI_SDE_double, &cnt_d5); + fptr_struct->register_counter(handle, "rm2", PAPI_SDE_RO|PAPI_SDE_DELTA, PAPI_SDE_double, &cnt_rm2); + fptr_struct->register_counter(handle, "i1", PAPI_SDE_RO|PAPI_SDE_DELTA, PAPI_SDE_long_long, &gamum_cnt_i1); + fptr_struct->register_counter(handle, "i2", PAPI_SDE_RO|PAPI_SDE_DELTA, PAPI_SDE_long_long, &cnt_i2); + + fptr_struct->create_counter(handle, "papi_counter", PAPI_SDE_RO|PAPI_SDE_INSTANT, &cntr_handle ); + + fptr_struct->register_counter(handle, event_names[0], PAPI_SDE_RO|PAPI_SDE_DELTA, PAPI_SDE_long_long, &cnt_i3); + + return handle; +} + +void gamum_unreg_(void){ + papi_sde_unregister_counter(handle, "rm1"); + papi_sde_unregister_counter(handle, "rm2"); +} + +void gamum_do_work_(void){ + cnt_d1 += 0.1; + cnt_d2 += 0.111; + cnt_d3 += 0.2; + cnt_d4 += 0.222; + cnt_d5 += 0.3; + gamum_cnt_i1 += 6; + cnt_i2 += 101; + cnt_i3 += 33; + papi_sde_inc_counter(cntr_handle, 5); + papi_sde_inc_counter(papi_sde_get_counter_handle(handle, "papi_counter"), 1); +} diff -Nru papi-5.7.0+dfsg/src/components/sde/tests/Advanced_C+FORTRAN/sde_test_f08.F90 papi-6.0.0~dfsg/src/components/sde/tests/Advanced_C+FORTRAN/sde_test_f08.F90 --- papi-5.7.0+dfsg/src/components/sde/tests/Advanced_C+FORTRAN/sde_test_f08.F90 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/sde/tests/Advanced_C+FORTRAN/sde_test_f08.F90 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,1000 @@ + program test_F_interface + use, intrinsic :: ISO_C_BINDING + use :: ISO_FORTRAN_ENV + use :: papi_sde_fortran_wrappers + + implicit none + + TYPE(C_ptr) :: handle + TYPE(C_ptr) :: quantile + integer, pointer :: quantile_f + integer(kind=C_LONG_LONG) :: values(22), values_to_write(1) + + integer(kind=C_LONG_LONG), target :: ev_cnt1, ev_cnt2 + integer(kind=C_INT), target :: ev_cnt3_int + real, target :: ev_cnt3_float + real (KIND=KIND(0.0)), target :: ev_cnt4_float + real (KIND=KIND(0.0D0)) :: value_d + integer :: i, ret_val, error + integer :: eventset, eventset2, eventcode, junk, codes(3) + + real, target :: internal_variable + integer :: internal_variable_int + integer :: all_tests_passed + + interface + function callback_t(param) result(ret_val) + use, intrinsic :: ISO_C_BINDING, only: C_LONG_LONG + real :: param + integer(kind=C_LONG_LONG) :: ret_val + end function callback_t + end interface + + interface + function rounding_error(param) result(ret_val) + real (KIND=KIND(0.0D0)) :: param, ret_val + end function rounding_error + end interface + + procedure(callback_t) :: f08_callback + + all_tests_passed = 1 + + ev_cnt1 = 73 + ev_cnt3_int = 5 + ev_cnt4_float = 5.431 + values_to_write(1) = 9 + + call papif_sde_init('TESTLIB', handle, error) + if(error .ne. PAPI_OK ) print *,'Error in sde_init' + call papif_sde_register_counter(handle, 'TESTEVENT', PAPI_SDE_RO, PAPI_SDE_long_long, C_loc(ev_cnt1), error) + if(error .ne. PAPI_OK ) print *,'Error in sde_register_counter' + call papif_sde_describe_counter(handle, 'TESTEVENT', 'This is a test counter used to test SDE from FORTRAN, for testing purposes only. Use it when you test the functionality in a test or something. Happy testing.', error) + if(error .ne. PAPI_OK ) print *,'Error in sde_describe_counter' + call papif_sde_register_counter(handle, 'SERIOUSEVENT', PAPI_SDE_RO, PAPI_SDE_long_long, C_loc(ev_cnt2), error) + if(error .ne. PAPI_OK ) print *,'Error in sde_register_counter' + ! The following call should be ignored by the SDE component (since this counter is already registered.) + call papif_sde_register_counter(handle, 'SERIOUSEVENT', PAPI_SDE_RO, PAPI_SDE_long_long, C_loc(ev_cnt1), error) + if(error .ne. PAPI_OK ) print *,'Error in sde_register_counter' + call papif_sde_describe_counter(handle, 'SERIOUSEVENT', 'This is a not a test counter, this one is serious.', error) + if(error .ne. PAPI_OK ) print *,'Error in sde_describe_counter' + call papif_sde_register_counter(handle, 'FLOATEVENT', PAPI_SDE_RO, PAPI_SDE_float, C_loc(ev_cnt4_float), error) + if(error .ne. PAPI_OK ) print *,'Error in sde_register_counter' + + internal_variable = 987.65 + internal_variable_int = 12345 + + ! the following call should be ignored by the SDE component, but the returned 'handle' should still be valid. + call papif_sde_init('TESTLIB', handle, error) + if(error .ne. PAPI_OK ) print *,'Error in sde_init' + call papif_sde_register_fp_counter(handle, 'FP_EVENT', PAPI_SDE_RO, PAPI_SDE_long_long, c_funloc(f08_callback), C_loc(internal_variable), error) + if(error .ne. PAPI_OK ) print *,'Error in sde_register_fp_counter' + call papif_sde_describe_counter(handle, 'FP_EVENT', 'This is another counter.', error) + if(error .ne. PAPI_OK ) print *,'Error in sde_describe_counter' + ! The following call should be ignored by the SDE component (since this counter is already registered.) + call papif_sde_register_fp_counter(handle, 'FP_EVENT', PAPI_SDE_RO, PAPI_SDE_long_long, c_funloc(f08_callback), C_loc(ev_cnt1), error) + if(error .ne. PAPI_OK ) print *,'Error in sde_register_fp_counter' + + call xandria_init() + call gamum_init() + call recorder_init() + + internal_variable = 11.0 + + ret_val = PAPI_VER_CURRENT + + call papif_library_init(ret_val) + if( ret_val .ne. PAPI_VER_CURRENT ) then + print *,'Error at papif_init', ret_val, '!=', PAPI_VER_CURRENT + print *,'PAPI_EINVAL', PAPI_EINVAL + print *,'PAPI_ENOMEM', PAPI_ENOMEM + print *,'PAPI_ECMP', PAPI_ECMP + print *,'PAPI_ESYS', PAPI_ESYS + stop + endif + + call recorder_do_work() + + eventset = PAPI_NULL + call papif_create_eventset( eventset, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_create_eventset' + stop + endif + + eventset2 = PAPI_NULL + call papif_create_eventset( eventset2, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_create_eventset' + stop + endif + +! 1 + call papif_event_name_to_code( 'sde:::TESTLIB::TESTEVENT', eventcode, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_name_to_code' + stop + endif + + call papif_add_event( eventset, eventcode, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_add_event' + stop + endif + +!------------------------------------------------------------------------------- + + call recorder_do_work() + +!------------------------------------------------------------------------------- + + call papif_start( eventset, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_start' + stop + endif + + ev_cnt1 = ev_cnt1+100 + call xandria_do_work() + + call papif_stop( eventset, values, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_stop' + stop + endif + + call recorder_do_work() + + print '(A29,I4)', ' TESTLIB::TESTEVENT (100) = ', values(1) + if( values(1) .ne. 100 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + +! 2 + call papif_event_name_to_code( 'sde:::TESTLIB::FP_EVENT', eventcode, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_name_to_code' + stop + endif + + call papif_add_event( eventset, eventcode, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_add_event' + stop + endif + + internal_variable = 12.0 + internal_variable_int = 12 + +!------------------------------------------------------------------------------- + print *,'' + + call papif_start( eventset, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_start' + stop + endif + + ev_cnt1 = ev_cnt1+9 + ev_cnt4_float = ev_cnt4_float+33 + internal_variable = 12.4 + + call papif_stop( eventset, values, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_stop' + stop + endif + + print '(A27,I2)', ' TESTLIB::TESTEVENT (9) = ', values(1) + if( values(1) .ne. 9 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + print '(A26,I2)', ' TESTLIB::FP_EVENT (0) = ', values(2) + if( values(2) .ne. 0 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + +! 3 + call papif_event_name_to_code( 'sde:::TESTLIB::FLOATEVENT', eventcode, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_name_to_code' + stop + endif + + call recorder_do_work() + + call papif_add_event( eventset, eventcode, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_add_event' + stop + endif + +! 4 + call papif_event_name_to_code( 'sde:::Xandria::EV_I1', eventcode, ret_val ) +! not added + call papif_event_name_to_code( 'sde:::Xandria::EV_I2', junk, ret_val ) +! not added + call papif_event_name_to_code( 'sde:::Xandria::EV_I2', junk, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_name_to_code' + stop + endif + + call papif_add_event( eventset, eventcode, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_add_event' + stop + endif + +! 5 + call papif_add_named_event( eventset, 'sde:::Xandria::RW_I1', ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_add_named_event' + stop + endif + + do i=1,37 + call recorder_do_work() + end do + +!------------------------------------------------------------------------------- + call papif_start( eventset, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_start' + stop + endif + + ev_cnt1 = ev_cnt1+2 + ev_cnt4_float = ev_cnt4_float+3.98 + internal_variable = 20.12 + internal_variable_int = 20 + + call xandria_do_work() + +! Adding the 5th counter into a separate eventset so we can write into it. + call papif_add_named_event( eventset2, 'sde:::Xandria::RW_I1', ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_add_named_event' + stop + endif + +!-------------------- + print *,'' + + call papif_read(eventset, values, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_read' + stop + endif + + do i=1,370 + call recorder_do_work() + end do + + print '(A27,I2)', ' TESTLIB::TESTEVENT (2) = ', values(1) + if( values(1) .ne. 2 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + print '(A26,I2)', ' TESTLIB::FP_EVENT (8) = ', values(2) + if( values(2) .ne. 8 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + value_d = transfer(values(3), 1.0D0) + print '(A31,F4.2)', ' TESTLIB::FLOATEVENT (3.98) = ', value_d + if( abs(value_d - 3.98) .gt. rounding_error(value_d) ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + print '(A23,I1)', ' Xandria::EV_I1 (1) = ', values(4) + if( values(4) .ne. 1 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + print '(A24,I2)', ' Xandria::RW_I1 (14) = ', values(5) + if( values(5) .ne. 14 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + call xandria_do_work() + call xandria_do_work() + call xandria_do_work() + +!-------------------- + print *,'' + + call papif_stop( eventset, values, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_stop' + stop + endif + + print '(A24,I2)', ' Xandria::RW_I1 (35) = ', values(5) + if( values(5) .ne. 35 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + +!------------------------------------------------------------------------------- +! WRITE and then read the RW counter. + call papif_start( eventset2, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_start' + stop + endif + + call papif_write(eventset2, values_to_write, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_write' + stop + endif + + call papif_read(eventset2, values, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_read' + stop + endif + + print '(A23,I1)', ' Xandria::RW_I1 (9) = ', values(1) + if( values(1) .ne. 9 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + call papif_stop( eventset2, values, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_stop' + stop + endif +!------------------------------------------------------------------------------- + + call papif_start( eventset, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_start' + stop + endif + + ev_cnt1 = ev_cnt1+5 + ev_cnt4_float = ev_cnt4_float+18.8 + internal_variable = internal_variable + 30.1 + internal_variable_int = 30 + + call xandria_do_work() + call xandria_do_work() + call xandria_do_work() + + call papif_read(eventset, values, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_read' + stop + endif + + print '(A27,I1)', ' TESTLIB::TESTEVENT (5) = ', values(1) + if( values(1) .ne. 5 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + print '(A27,I2)', ' TESTLIB::FP_EVENT (30) = ', values(2) + if( values(2) .ne. 30 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + value_d = transfer(values(3), 1.0D0) + print '(A31,F4.1)',' TESTLIB::FLOATEVENT (18.8) = ', value_d + if( abs(value_d - 18.8) .gt. rounding_error(value_d) ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + print '(A23,I2)', ' Xandria::EV_I1 (3) = ', values(4) + if( values(4) .ne. 3 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + +!-------------------- + print *,'' + + call papif_reset(eventset, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_reset' + stop + endif + + call papif_stop( eventset, values, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_stop' + stop + endif + + + print '(A27,I2)', ' TESTLIB::TESTEVENT (0) = ', values(1) + if( values(1) .ne. 0 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + print '(A26,I2)', ' TESTLIB::FP_EVENT (0) = ', values(2) + if( values(2) .ne. 0 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + value_d = transfer(values(3), 1.0D0) + print '(A31,F4.1)', ' TESTLIB::FLOATEVENT, (0.0) = ', value_d + if( abs(value_d - 0.0) .gt. rounding_error(value_d) ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + print '(A23,I2)', ' Xandria::EV_I1 (0) = ', values(4) + if( values(4) .ne. 0 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + +! 6 + call papif_event_name_to_code('sde:::Xandria::EV_R1' , codes(1), ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_name_to_code' + stop + endif + +! 7 + call papif_event_name_to_code('sde:::Xandria::EV_R2' , codes(2), ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_name_to_code' + stop + endif + +! 8 + call papif_event_name_to_code('sde:::Xandria::EV_R3' , codes(3), ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_name_to_code' + stop + endif + + call papif_add_events( eventset, codes, 3, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_add_events' + stop + endif + + + do i=1,29 + call recorder_do_work() + end do + +!------------------------------------------------------------------------------- + print *,'' + + call papif_start( eventset, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_start' + stop + endif + + call xandria_do_work() + call xandria_do_work() + call xandria_do_work() + + call papif_stop( eventset, values, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_stop' + stop + endif + + print '(A27,I2)', ' TESTLIB::TESTEVENT (0) = ', values(1) + if( values(1) .ne. 0 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + print '(A26,I2)', ' TESTLIB::FP_EVENT (0) = ', values(2) + if( values(2) .ne. 0 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + value_d = transfer(values(3), 1.0D0) + print '(A30,F3.1)' ,' TESTLIB::FLOATEVENT (0.0) = ', value_d + if( abs(value_d - 0.0) .gt. rounding_error(value_d) ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + print '(A23,I2)', ' Xandria::EV_I1 (3) = ', values(4) + if( values(4) .ne. 3 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + print '(A24,I2)', ' Xandria::EV_R1 (30) = ', values(6) + if( values(6) .ne. 30 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + print '(A24,I2)', ' Xandria::EV_R2 (60) = ', values(7) + if( values(7) .ne. 60 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + print '(A24,I2)', ' Xandria::EV_R3 (90) = ', values(8) + if( values(8) .ne. 90 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + +!------------------------------------------------------------------------------- + call gamum_unreg() + + do i=1,248 + call recorder_do_work() + end do + +! 9 + call papif_event_name_to_code('sde:::Gamum::ev1' , codes(1), ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_name_to_code' + stop + endif + +! 10 + call papif_event_name_to_code('sde:::Gamum::ev3' , codes(2), ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_name_to_code' + stop + endif + +! 11 + call papif_event_name_to_code('sde:::Gamum::ev4' , codes(3), ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_name_to_code' + stop + endif + + call papif_add_events( eventset, codes, 3, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_add_events' + stop + endif + +!------------------------------------------------------------------------------- + print *,'' + + call papif_start( eventset, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_start' + stop + endif + + call gamum_do_work() + call gamum_do_work() + call gamum_do_work() + call gamum_do_work() + + do i=1,122 + call recorder_do_work() + end do + + call papif_stop( eventset, values, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_stop' + stop + endif + + + value_d = transfer(values(9), 1.0D0) + print '(A21,F4.1)',' Gamum::ev1 (0.4) = ', value_d + if( abs(value_d - 0.4) .gt. rounding_error(value_d) ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + value_d = transfer(values(10), 1.0D0) + print '(A21,F4.1)',' Gamum::ev3 (0.8) = ', value_d + if( abs(value_d - 0.8) .gt. rounding_error(value_d) ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + value_d = transfer(values(11), 1.0D0) + print '(A23,F5.3)',' Gamum::ev4 (1.888) = ', value_d + if( abs(value_d - 1.888) .gt. rounding_error(value_d) ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + +!------------------------------------------------------------------------------- + print *,'' + +! 12 + call papif_event_name_to_code('sde:::Xandria::LATE' , eventcode, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_name_to_code' + stop + endif + + call papif_add_event( eventset, eventcode, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_add_event' + stop + endif + +! We register this event after the placeholder was created + call xandria_add_more() + + call papif_start( eventset, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_start' + stop + endif + + call xandria_do_work() + + call papif_stop( eventset, values, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_stop' + stop + endif + + value_d = transfer(values(9), 1.0D0) + print '(A21,F4.1)', ' Gamum::ev1 (0.0) = ', value_d + if( abs(value_d - 0.0) .gt. rounding_error(value_d) ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + value_d = transfer(values(10), 1.0D0) + print '(A21,F4.1)',' Gamum::ev3 (0.0) = ', value_d + if( abs(value_d - 0.0) .gt. rounding_error(value_d) ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + value_d = transfer(values(11), 1.0D0) + print '(A23,F5.3)', ' Gamum::ev4 (1.888) = ', value_d + if( abs(value_d - 1.888) .gt. rounding_error(value_d) ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + print '(A22,I2)', ' Xandria::LATE (7) = ', values(12) + if( values(12) .ne. 7 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + + do i=1,9 + call recorder_do_work() + end do + +!------------------------------------------------------------------------------- + print *,'' + +! 13 + call papif_event_name_to_code('sde:::Xandria::WRONG' , eventcode, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_name_to_code' + stop + endif + + call papif_add_event( eventset, eventcode, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_add_event' + stop + endif + + call papif_start( eventset, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_start' + stop + endif + + call xandria_do_work() + call xandria_do_work() + call xandria_do_work() + + call papif_stop( eventset, values, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_stop' + stop + endif + + print '(A23,I2)', ' Xandria::LATE (21) = ', values(12) + if( values(12) .ne. 21 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + print '(A24,I2)', ' Xandria::WRONG (-1) = ', values(13) + if( values(13) .ne. -1 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + +!------------------------------------------------------------------------------- + print *,'' +! 14 + call papif_event_name_to_code('sde:::Gamum::group0' , eventcode, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_name_to_code' + stop + endif + + call papif_add_event( eventset, eventcode, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_add_event' + stop + endif + +! 15 + call papif_event_name_to_code('sde:::Gamum::papi_counter' , eventcode, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_name_to_code' + stop + endif + + call papif_add_event( eventset, eventcode, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_add_event' + stop + endif + + call papif_start( eventset, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_start' + stop + endif + + call gamum_do_work() + call gamum_do_work() + + call papif_read(eventset, values, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_read' + stop + endif + + print '(A22,I2)', ' Xandria::LATE (0) = ', values(12) + if( values(12) .ne. 0 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + print '(A24,I2)', ' Xandria::WRONG (-1) = ', values(13) + if( values(13) .ne. -1 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + value_d = transfer(values(9), 1.0D0) + print '(A21,F4.1)', ' Gamum::ev1 (0.2) = ', value_d + if( abs(value_d - 0.2) .gt. rounding_error(value_d) ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + value_d = transfer(values(11), 1.0D0) + print '(A23,F5.3)', ' Gamum::ev4 (2.332) = ', value_d + if( abs(value_d - 2.332) .gt. rounding_error(value_d) ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + value_d = transfer(values(14), 1.0D0) + print '(A36,F5.3)', ' Gamum::group0 [ev1+ev4] (2.532) = ', value_d + if( abs(value_d - 2.532) .gt. rounding_error(value_d) ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + print '(A29,I3)', ' Gamum::papi_counter (36) = ', values(15) + if( abs(values(15) - 36) .gt. 0 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + do i=1,5 + call gamum_do_work() + end do + call xandria_do_work() + do i=1,217 + call recorder_do_work() + end do + + call papif_stop( eventset, values, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_stop' + stop + endif + + value_d = transfer(values(9), 1.0D0) + print '(A21,F3.1)', ' Gamum::ev1 (0.7) = ', value_d + if( abs(value_d - 0.7) .gt. rounding_error(value_d) ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + value_d = transfer(values(11), 1.0D0) + print '(A23,F5.3)', ' Gamum::ev4 (3.442) = ', value_d + if( abs(value_d - 3.442) .gt. rounding_error(value_d) ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + value_d = transfer(values(14), 1.0D0) + print '(A36,F5.3)', ' Gamum::group0 [ev1+ev4] (4.142) = ', value_d + if( abs(value_d - 4.142) .gt. rounding_error(value_d) ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + print '(A29,I3)', ' Gamum::papi_counter (66) = ', values(15) + if( abs(values(15) - 66) .gt. 0 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + +!------------------------------------------------------------------------------- + print *,'' + +! 16 + call papif_add_named_event(eventset, 'sde:::Lib_With_Recorder::simple_recording:CNT', ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_add_named_event' + stop + endif + +! 17 + call papif_add_named_event(eventset, 'sde:::Lib_With_Recorder::simple_recording:MIN', ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_add_named_event' + stop + endif + +! 18 + call papif_add_named_event(eventset, 'sde:::Lib_With_Recorder::simple_recording:Q1', ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_add_named_event' + stop + endif + +! 19 + call papif_add_named_event(eventset, 'sde:::Lib_With_Recorder::simple_recording:MED', ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_add_named_event' + stop + endif + +! 20 + call papif_add_named_event(eventset, 'sde:::Lib_With_Recorder::simple_recording:Q3', ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_add_named_event' + stop + endif + +! 21 + call papif_add_named_event(eventset, 'sde:::Lib_With_Recorder::simple_recording:MAX', ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_add_named_event' + stop + endif + + call papif_start( eventset, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_start' + stop + endif + + call papif_stop( eventset, values, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_stop' + stop + endif + + print '(A51,I4)', ' Lib_With_Recorder::simple_recording:CNT (1036) = ', values(16) + if( abs(values(16) - 1036) .gt. 0 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + + call c_f_pointer(transfer(values(17), quantile), quantile_f) + print '(A54,I6)', ' Lib_With_Recorder::simple_recording:MIN ( >0) = ', quantile_f + + call c_f_pointer(transfer(values(18), quantile), quantile_f) + print '(A54,I6)', ' Lib_With_Recorder::simple_recording:Q1 ( ~30864) = ', quantile_f + + call c_f_pointer(transfer(values(19), quantile), quantile_f) + print '(A54,I6)', ' Lib_With_Recorder::simple_recording:MED ( ~61728) = ', quantile_f + + call c_f_pointer(transfer(values(20), quantile), quantile_f) + print '(A54,I6)', ' Lib_With_Recorder::simple_recording:Q3 ( ~92592) = ', quantile_f + + call c_f_pointer(transfer(values(21), quantile), quantile_f) + print '(A54,I6)', ' Lib_With_Recorder::simple_recording:MAX (<123456) = ', quantile_f + +!------------------------------------------------------------------------------- + print *,'' + +! 22 + call papif_event_name_to_code('sde:::Xandria::XND_CREATED' , eventcode, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_name_to_code' + stop + endif + + call papif_add_event( eventset, eventcode, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_add_event' + stop + endif + + call papif_start( eventset, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_start' + stop + endif + + call xandria_do_work() + call xandria_do_work() + call xandria_do_work() + + call papif_stop( eventset, values, ret_val ) + if( ret_val .ne. PAPI_OK ) then + print *,'Error at papif_stop' + stop + endif + + print '(A30,I2)', ' Xandria::XND_CREATED (27) = ', values(22) + if( abs(values(22) - 27) .gt. 0 ) then + print *,'^^^^^^^^^^^^^^^^^^^' + all_tests_passed = 0 + endif + +!------------------------------------------------------------------------------- +!------------------------------------------------------------------------------- +!------------------------------------------------------------------------------- + + call papif_shutdown( ) + + print *,'' + if( all_tests_passed .eq. 1 ) then + print *,'====> TEST PASSED' + else + print *,'====> TEST FAILED' + endif + + end program + + function rounding_error(param) result(ret_val) + real (KIND=KIND(0.0D0)) :: param, ret_val + + ret_val = param/100000.0 + end function rounding_error + + function f08_callback(param) result(ret_val) + use, intrinsic :: ISO_C_BINDING + implicit none + real :: param + integer(kind=C_LONG_LONG) :: ret_val + + ret_val = int(param, C_LONG_LONG) + end function f08_callback + diff -Nru papi-5.7.0+dfsg/src/components/sde/tests/Advanced_C+FORTRAN/Xandria.F90 papi-6.0.0~dfsg/src/components/sde/tests/Advanced_C+FORTRAN/Xandria.F90 --- papi-5.7.0+dfsg/src/components/sde/tests/Advanced_C+FORTRAN/Xandria.F90 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/sde/tests/Advanced_C+FORTRAN/Xandria.F90 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,203 @@ + module xandria_mod + use ISO_C_BINDING, only: C_LONG_LONG, C_DOUBLE, C_ptr, C_funptr + use papi_sde_fortran_wrappers + implicit none + integer(kind=C_LONG_LONG), target :: cntr_i1, cntr_i2, cntr_rw_i1, cntr_iL + integer(kind=C_LONG_LONG), target :: cntr_i10, cntr_i20, cntr_i30 + real(kind=C_DOUBLE), target :: cntr_r1, cntr_r2, cntr_r3 + TYPE(C_ptr) :: xandria_sde_handle + end module + + function papi_sde_hook_list_events(fptr_struct) result(tmp_handle) bind(C) + use xandria_mod + use, intrinsic :: ISO_C_BINDING, only: C_ptr, C_null_ptr, C_int, C_F_procpointer + implicit none + type(fptr_struct_t) :: fptr_struct + type(C_ptr) :: tmp_handle + integer(kind=C_int) :: error_code + integer(kind=C_int) :: cntr_mode, rw_mode, cntr_type + + procedure(init_t), pointer :: init_fptr + procedure(register_counter_t), pointer :: reg_cntr_fptr + procedure(create_counter_t), pointer :: create_cntr_fptr + + cntr_mode = PAPI_SDE_RO+PAPI_SDE_DELTA + rw_mode = PAPI_SDE_RW+PAPI_SDE_INSTANT + cntr_type = PAPI_SDE_long_long + + call C_F_procpointer(fptr_struct%init, init_fptr) + tmp_handle = init_fptr(F_str_to_C('Xandria')) + + call C_F_procpointer(fptr_struct%register_counter, reg_cntr_fptr) + + call C_F_procpointer(fptr_struct%create_counter, create_cntr_fptr) + + error_code = reg_cntr_fptr(tmp_handle, F_str_to_C('EV_I1'), cntr_mode, cntr_type, C_null_ptr) + if( error_code .ne. PAPI_OK ) then + print *,'Error in Xandria:papi_sde_hook_list_events() ' + return + endif + + error_code = reg_cntr_fptr(tmp_handle, F_str_to_C('EV_I2'), cntr_mode, cntr_type, C_null_ptr) + if( error_code .ne. PAPI_OK ) then + print *,'Error in Xandria:papi_sde_hook_list_events() ' + return + endif + + error_code = reg_cntr_fptr(tmp_handle, F_str_to_C('RW_I1'), rw_mode, cntr_type, C_null_ptr) + if( error_code .ne. PAPI_OK ) then + print *,'Error in Xandria:papi_sde_hook_list_events() ' + return + endif + + error_code = reg_cntr_fptr(tmp_handle, F_str_to_C('EV_R1'), cntr_mode, cntr_type, C_null_ptr) + if( error_code .ne. PAPI_OK ) then + print *,'Error in Xandria:papi_sde_hook_list_events() ' + return + endif + + error_code = reg_cntr_fptr(tmp_handle, F_str_to_C('EV_R2'), cntr_mode, cntr_type, C_null_ptr) + if( error_code .ne. PAPI_OK ) then + print *,'Error in Xandria:papi_sde_hook_list_events() ' + return + endif + + error_code = reg_cntr_fptr(tmp_handle, F_str_to_C('EV_R3'), cntr_mode, cntr_type, C_null_ptr) + if( error_code .ne. PAPI_OK ) then + print *,'Error in Xandria:papi_sde_hook_list_events() ' + return + endif + + error_code = reg_cntr_fptr(tmp_handle, F_str_to_C('LATE'), cntr_mode, cntr_type, C_null_ptr) + if( error_code .ne. PAPI_OK ) then + print *,'Error in Xandria:papi_sde_hook_list_events() ' + return + endif + + error_code = create_cntr_fptr(tmp_handle, F_str_to_C('XND_CREATED'), cntr_type, C_null_ptr) + if( error_code .ne. PAPI_OK ) then + print *,'Error in Xandria:papi_sde_hook_list_events() ' + return + endif + + end function papi_sde_hook_list_events + + subroutine xandria_init + use, intrinsic :: ISO_C_BINDING + use :: xandria_mod + implicit none + + integer :: cntr_mode, rw_mode, cntr_type, error + + cntr_i1 = 0 + cntr_i2 = 0 + cntr_rw_i1 = 0 + cntr_i10 = 0 + cntr_i20 = 0 + cntr_i30 = 0 + cntr_iL = 0 + + + cntr_mode = PAPI_SDE_RO+PAPI_SDE_DELTA + rw_mode = PAPI_SDE_RW+PAPI_SDE_INSTANT + cntr_type = PAPI_SDE_long_long + call papif_sde_init('Xandria', xandria_sde_handle, error) + if( error .ne. PAPI_OK ) then + print *,'Error in papif_sde_register_counter() ' + stop + endif + + call papif_sde_register_counter(xandria_sde_handle, 'EV_I1', cntr_mode, cntr_type, C_loc(cntr_i1), error) + if( error .ne. PAPI_OK ) then + print *,'Error in papif_sde_register_counter() ' + stop + endif + + call papif_sde_register_counter(xandria_sde_handle, 'EV_I2', cntr_mode, cntr_type, C_loc(cntr_i2), error) + if( error .ne. PAPI_OK ) then + print *,'Error in papif_sde_register_counter() ' + stop + endif + + call papif_sde_register_counter(xandria_sde_handle, 'RW_I1', rw_mode, cntr_type, C_loc(cntr_rw_i1), error) + if( error .ne. PAPI_OK ) then + print *,'Error in papif_sde_register_counter() ' + stop + endif + + call papif_sde_register_counter(xandria_sde_handle, 'EV_R1', cntr_mode, cntr_type, C_loc(cntr_i10), error) + if( error .ne. PAPI_OK ) then + print *,'Error in papif_sde_register_counter() ' + stop + endif + + call papif_sde_register_counter(xandria_sde_handle, 'EV_R2', cntr_mode, cntr_type, C_loc(cntr_i20), error) + if( error .ne. PAPI_OK ) then + print *,'Error in papif_sde_register_counter() ' + stop + endif + + call papif_sde_register_counter(xandria_sde_handle, 'EV_R3', cntr_mode, cntr_type, C_loc(cntr_i30), error) + if( error .ne. PAPI_OK ) then + print *,'Error in papif_sde_register_counter() ' + stop + endif + + call papif_sde_create_counter(xandria_sde_handle, 'XND_CREATED', cntr_type, C_null_ptr, error) + if( error .ne. PAPI_OK ) then + print *,'Error in papif_sde_create_counter() ' + return + endif + end subroutine + + subroutine xandria_add_more + use, intrinsic :: ISO_C_BINDING + use :: xandria_mod + implicit none + + integer :: cntr_mode, cntr_type, error + cntr_mode = PAPI_SDE_RO+PAPI_SDE_DELTA + cntr_type = PAPI_SDE_long_long + + call papif_sde_register_counter(xandria_sde_handle, 'LATE', cntr_mode, cntr_type, C_loc(cntr_iL), error) + if( error .ne. PAPI_OK ) then + print *,'Error in papif_sde_register_counter() ' + stop + endif + + end subroutine + + subroutine xandria_do_work + use, intrinsic :: ISO_C_BINDING + use :: xandria_mod + + implicit none + + TYPE(C_ptr) :: cntr_handle + integer :: error + + cntr_i1 = cntr_i1+1 + cntr_i2 = cntr_i2+3 + + cntr_rw_i1 = cntr_rw_i1 + 7 + + cntr_i10 = cntr_i10+10 + cntr_i20 = cntr_i20+20 + cntr_i30 = cntr_i30+30 + + cntr_iL = cntr_iL+7 + + call papif_sde_get_counter_handle(xandria_sde_handle, 'XND_CREATED', cntr_handle, error) + if( error .ne. PAPI_OK ) then + print *,'Error in papif_sde_get_counter_handle() ' + stop + endif + + call papif_sde_inc_counter( cntr_handle, 9_8, error) + if( error .ne. PAPI_OK ) then + print *,'Error in papif_sde_inc_counter() ' + stop + endif + + end subroutine + diff -Nru papi-5.7.0+dfsg/src/components/sde/tests/Makefile papi-6.0.0~dfsg/src/components/sde/tests/Makefile --- papi-5.7.0+dfsg/src/components/sde/tests/Makefile 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/sde/tests/Makefile 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,84 @@ +NAME=sde +include ../../Makefile_comp_tests.target +INCLUDE += -I../interface -I.. +ifeq ($(notdir $(F77)),gfortran) + FFLAGS +=-ffree-form -ffree-line-length-none +else + FFLAGS +=-free +endif +FFLAGS +=-g +#CFLAGS +=-g + +LDFLAGS +=-L.. +SDE_F08_API=$(datadir)/sde_F.o + +EXTRA_SRC=../interface/papi_sde_interface.c + +TESTS = Minimal_Test Simple_Test Simple2_Test Recorder_Test sde_test_f08 + +sde_tests: $(TESTS) + +weak_symbols.o: $(EXTRA_SRC) + $(CC) -Wall -c $< -o $@ + +################################################################################ +## Minimal test +prfx=Minimal + +Minimal_Test: $(prfx)/Minimal_Test.c + $(CC) $< -o $@ $(INCLUDE) $(CFLAGS) $(PAPILIB) $(LDFLAGS) + +################################################################################ +## Simple test +prfx=Simple + +$(prfx)/libSimple.so: $(prfx)/Simple_Lib.c weak_symbols.o + $(CC) -shared -Wall -fPIC $(CFLAGS) $(INCLUDE) -o $@ $^ + +Simple_Test: $(prfx)/Simple_Driver.c $(prfx)/libSimple.so + $(CC) $^ -o $@ $(INCLUDE) $(CFLAGS) $(PAPILIB) $(LDFLAGS) -lm + +################################################################################ +## Simple2 test +prfx=Simple2 + +$(prfx)/libSimple2.so: $(prfx)/Simple2_Lib.c weak_symbols.o + $(CC) -shared -Wall -fPIC $(CFLAGS) $(INCLUDE) -o $@ $^ + +Simple2_Test: $(prfx)/Simple2_Driver.c $(prfx)/libSimple2.so + $(CC) $^ -o $@ $(INCLUDE) $(CFLAGS) $(PAPILIB) $(LDFLAGS) -lm + +################################################################################ +## Recorder test +prfx=Recorder + +$(prfx)/libRecorder.so: $(prfx)/Lib_With_Recorder.c weak_symbols.o + $(CC) -shared -Wall -fPIC $(CFLAGS) $(INCLUDE) -o $@ $^ + +Recorder_Test: $(prfx)/Recorder_Driver.c $(prfx)/libRecorder.so + $(CC) $^ -o $@ $(INCLUDE) $(CFLAGS) $(PAPILIB) $(LDFLAGS) -lm + +################################################################################ +## Advanced test +prfx=Advanced_C+FORTRAN +rcrd_prfx=Recorder + +sde_test_f08: $(prfx)/sde_test_f08.F90 $(UTILOBJS) $(PAPILIB) $(prfx)/libXandria.so $(prfx)/libGamum.so $(rcrd_prfx)/libRecorder.so + $(F77) $< -o $@ $(INCLUDE) $(FFLAGS) $(prfx)/libXandria.so $(prfx)/libGamum.so $(rcrd_prfx)/libRecorder.so $(PAPILIB) $(LDFLAGS) + +$(prfx)/libXandria.so: $(prfx)/Xandria.F90 weak_symbols.o + $(F77) -shared -Wall -fPIC $(FFLAGS) $(INCLUDE) -o $@ $^ $(SDE_F08_API) + +$(prfx)/libGamum.so: $(prfx)/Gamum.c weak_symbols.o + $(CC) -shared -Wall -fPIC $(CFLAGS) $(INCLUDE) -o $@ $^ + + +LIBS=Advanced_C+FORTRAN/libXandria.so Advanced_C+FORTRAN/libGamum.so Recorder/libRecorder.so Simple/libSimple.so Simple2/libSimple2.so + +clean: + rm -f *.o *.mod $(LIBS) + +mrproper: clean + rm -f $(TESTS) + + diff -Nru papi-5.7.0+dfsg/src/components/sde/tests/Minimal/Minimal_Test.c papi-6.0.0~dfsg/src/components/sde/tests/Minimal/Minimal_Test.c --- papi-5.7.0+dfsg/src/components/sde/tests/Minimal/Minimal_Test.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/sde/tests/Minimal/Minimal_Test.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,64 @@ +#include +#include +#include +#include "papi.h" +#include "papi_sde_interface.h" + +long long local_var; + +void mintest_init(void){ + local_var =0; + papi_handle_t *handle = papi_sde_init("Min Example Code"); + papi_sde_register_counter(handle, "Example Event", PAPI_SDE_RO|PAPI_SDE_DELTA, PAPI_SDE_long_long, &local_var); +} + +void mintest_dowork(void){ + local_var += 7; +} + +int main(int argc, char **argv){ + int ret, Eventset = PAPI_NULL; + long long counter_values[1]; + + (void)argc; + (void)argv; + + mintest_init(); + + // --- Setup PAPI + if((ret=PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT){ + fprintf(stderr,"PAPI_library_init() error:%s \n",PAPI_strerror(ret)); + exit(-1); + } + + if((ret=PAPI_create_eventset(&Eventset)) != PAPI_OK){ + fprintf(stderr,"PAPI_create_eventset() error:%s \n",PAPI_strerror(ret)); + exit(-1); + } + + if((ret=PAPI_add_named_event(Eventset, "sde:::Min Example Code::Example Event")) != PAPI_OK){ + fprintf(stderr,"PAPI_add_named_event() error:%s \n",PAPI_strerror(ret)); + exit(-1); + } + + // --- Start PAPI + if((ret=PAPI_start(Eventset)) != PAPI_OK){ + fprintf(stderr,"PAPI_start error:%s \n",PAPI_strerror(ret)); + exit(-1); + } + + mintest_dowork(); + + // --- Stop PAPI + if((ret=PAPI_stop(Eventset, counter_values)) != PAPI_OK){ + fprintf(stderr,"PAPI_stop error:%s \n",PAPI_strerror(ret)); + exit(-1); + } + + if( counter_values[0] == 7 ) + printf("Success: counter value is %lld, as expected.\n",counter_values[0]); + else + printf("Error: counter value is %lld, when it should be 7.\n",counter_values[0]); + + return 0; +} diff -Nru papi-5.7.0+dfsg/src/components/sde/tests/Recorder/Lib_With_Recorder.c papi-6.0.0~dfsg/src/components/sde/tests/Recorder/Lib_With_Recorder.c --- papi-5.7.0+dfsg/src/components/sde/tests/Recorder/Lib_With_Recorder.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/sde/tests/Recorder/Lib_With_Recorder.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,39 @@ +#include +#include +#include +#include "papi_sde_interface.h" + +static const char *event_names[1] = { + "simple_recording" +}; + +void *rcrd_handle; + +// API functions. +void recorder_init_(void); +void recorder_do_work_(void); + +void recorder_init_(void){ + papi_handle_t tmp_handle; + + tmp_handle = papi_sde_init("Lib_With_Recorder"); + papi_sde_create_recorder(tmp_handle, event_names[0], sizeof(long long), papi_sde_compare_long_long, &rcrd_handle); + + return; +} + +void recorder_do_work_(void){ + long long r = random()%123456; + papi_sde_record(rcrd_handle, sizeof(r), &r); + return; +} + +// Hook for papi_native_avail utility. No user code which links against this library should call +// this function because it has the same name in all SDE-enabled libraries. papi_native_avail +// uses dlopen and dlclose on each library so it only has one version of this symbol at a time. +papi_handle_t papi_sde_hook_list_events( papi_sde_fptr_struct_t *fptr_struct){ + papi_handle_t tmp_handle; + tmp_handle = fptr_struct->init("Lib_With_Recorder"); + fptr_struct->create_recorder(tmp_handle, event_names[0], sizeof(long long), papi_sde_compare_long_long, &rcrd_handle); + return tmp_handle; +} diff -Nru papi-5.7.0+dfsg/src/components/sde/tests/Recorder/Recorder_Driver.c papi-6.0.0~dfsg/src/components/sde/tests/Recorder/Recorder_Driver.c --- papi-5.7.0+dfsg/src/components/sde/tests/Recorder/Recorder_Driver.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/sde/tests/Recorder/Recorder_Driver.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,81 @@ +#include +#include +#include +#include "papi.h" + +void recorder_init_(void); +void recorder_do_work_(void); +int setup_PAPI(int *event_set); + +int main(int argc, char **argv){ + int i, j, ret, event_set = PAPI_NULL; + long long counter_values[2]; + + (void)argc; + (void)argv; + + recorder_init_(); + + if( 0 != setup_PAPI(&event_set) ) + exit(-1); + + // --- Start PAPI + if((ret=PAPI_start(event_set)) != PAPI_OK){ + fprintf(stderr,"PAPI_start error:%s \n",PAPI_strerror(ret)); + exit(-1); + } + + for(i=0; i<10; i++){ + + recorder_do_work_(); + + // --- read the event counters + if((ret=PAPI_read(event_set, counter_values)) != PAPI_OK){ + fprintf(stderr,"PAPI_stop error:%s \n",PAPI_strerror(ret)); + exit(-1); + } + + printf("The number of recordings is: %lld (ptr is: %lld)\n",counter_values[0],counter_values[1]); + for(j=0; j +#include +#include +#include "papi.h" + +int setup_PAPI(int *event_set); +void simple_init(void); +double simple_compute(double x); + +int main(int argc, char **argv){ + int i,ret, event_set = PAPI_NULL; + long long counter_values[4]; + double *dbl_ptr; + + (void)argc; + (void)argv; + + simple_init(); + + if( 0 != setup_PAPI(&event_set) ) + exit(-1); + + // --- Start PAPI + if((ret=PAPI_start(event_set)) != PAPI_OK){ + fprintf(stderr,"PAPI_start error:%s \n",PAPI_strerror(ret)); + exit(-1); + } + + for(i=0; i<10; i++){ + double sum; + + sum = simple_compute(0.87*i); + printf("sum=%lf\n",sum); + + // --- read the event counters + if((ret=PAPI_read(event_set, counter_values)) != PAPI_OK){ + fprintf(stderr,"PAPI_stop error:%s \n",PAPI_strerror(ret)); + exit(-1); + } + + // PAPI has packed the bits of the double inside the long long. + dbl_ptr = (double *)&counter_values[3]; + printf("Low Mark=%lld, High Mark=%lld, Total Iterations=%lld, Comp. Value=%lf\n", + counter_values[0], counter_values[1], counter_values[2], *dbl_ptr); + } + + // --- Stop PAPI + if((ret=PAPI_stop(event_set, counter_values)) != PAPI_OK){ + fprintf(stderr,"PAPI_stop error:%s \n",PAPI_strerror(ret)); + exit(-1); + } + + return 0; +} + + +int setup_PAPI(int *event_set){ + int ret; + + if((ret=PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT){ + fprintf(stderr,"PAPI_library_init() error:%s \n",PAPI_strerror(ret)); + return -1; + } + + if((ret=PAPI_create_eventset(event_set)) != PAPI_OK){ + fprintf(stderr,"PAPI_create_eventset() error:%s \n",PAPI_strerror(ret)); + return -1; + } + + if((ret=PAPI_add_named_event(*event_set, "sde:::Simple::LOW_WATERMARK_REACHED")) != PAPI_OK){ + fprintf(stderr,"PAPI_add_named_event() error:%s \n",PAPI_strerror(ret)); + return -1; + } + + if((ret=PAPI_add_named_event(*event_set, "sde:::Simple::HIGH_WATERMARK_REACHED")) != PAPI_OK){ + fprintf(stderr,"PAPI_add_named_event() error:%s \n",PAPI_strerror(ret)); + return -1; + } + + if((ret=PAPI_add_named_event(*event_set, "sde:::Simple::TOTAL_ITERATIONS")) != PAPI_OK){ + fprintf(stderr,"PAPI_add_named_event() error:%s \n",PAPI_strerror(ret)); + return -1; + } + + if((ret=PAPI_add_named_event(*event_set, "sde:::Simple::COMPUTED_VALUE")) != PAPI_OK){ + fprintf(stderr,"PAPI_add_named_event() error:%s \n",PAPI_strerror(ret)); + return -1; + } + + return 0; +} + diff -Nru papi-5.7.0+dfsg/src/components/sde/tests/Simple/Simple_Lib.c papi-6.0.0~dfsg/src/components/sde/tests/Simple/Simple_Lib.c --- papi-5.7.0+dfsg/src/components/sde/tests/Simple/Simple_Lib.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/sde/tests/Simple/Simple_Lib.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include "papi_sde_interface.h" + +// API functions +void simple_init(void); +double simple_compute(double x); + +// The following counters are hidden to programs linking with +// this library, so they can not be accessed directly. +static double comp_value; +static long long int total_iter_cnt, low_wtrmrk, high_wtrmrk; +static papi_handle_t handle; + +static const char *ev_names[4] = { + "COMPUTED_VALUE", + "TOTAL_ITERATIONS", + "LOW_WATERMARK_REACHED", + "HIGH_WATERMARK_REACHED" +}; + + +void simple_init(void){ + + // Initialize library specific variables + comp_value = 0.0; + total_iter_cnt = 0; + low_wtrmrk = 0; + high_wtrmrk = 0; + + // Initialize PAPI SDEs + handle = papi_sde_init("Simple"); + papi_sde_register_counter(handle, ev_names[0], PAPI_SDE_RO|PAPI_SDE_INSTANT, PAPI_SDE_double, &comp_value); + papi_sde_register_counter(handle, ev_names[1], PAPI_SDE_RO|PAPI_SDE_DELTA, PAPI_SDE_long_long, &total_iter_cnt); + papi_sde_register_counter(handle, ev_names[2], PAPI_SDE_RO|PAPI_SDE_DELTA, PAPI_SDE_long_long, &low_wtrmrk); + papi_sde_register_counter(handle, ev_names[3], PAPI_SDE_RO|PAPI_SDE_DELTA, PAPI_SDE_long_long, &high_wtrmrk); + + return; +} + +// Perform some nonsense computation to emulate a possible library behavior. +// Notice that no SDE routines need to be called in the critical path of the library. +double simple_compute(double x){ + double sum = 0.0; + int lcl_iter = 0; + + while( 1 ){ + double y,tmp; + lcl_iter++; + + // Compute a function with range [0:1] so we can iterate + // multiple times without diverging or creating FP exceptions. + tmp = (2.0+sin(314.0*x)+sin(11.0*x*x)); + tmp = tmp*tmp; + y = 0.5+sin(3.5*tmp/4.0)/2.0; + + // Now set the next x to be the current y, so we can iterate again. + x = y; + + // Add y to sum unconditionally + sum += y; + + if( y < 0.1 ){ + low_wtrmrk++; + continue; + } + + if( y > 0.9 ){ + high_wtrmrk++; + continue; + } + + // Only add y to comp_value if y is between the low and high watermarks. + comp_value += y; + + // If some condition is met, terminate the loop + if( 0.67 < y && y < 0.68 ) + break; + } + total_iter_cnt += lcl_iter; + + return sum; +} diff -Nru papi-5.7.0+dfsg/src/components/sde/tests/Simple2/Simple2_Driver.c papi-6.0.0~dfsg/src/components/sde/tests/Simple2/Simple2_Driver.c --- papi-5.7.0+dfsg/src/components/sde/tests/Simple2/Simple2_Driver.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/sde/tests/Simple2/Simple2_Driver.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,97 @@ +#include +#include +#include +#include "papi.h" + +int setup_PAPI(int *event_set); +void simple_init(void); +double simple_compute(double x); + +int main(int argc, char **argv){ + int i,ret, event_set = PAPI_NULL; + long long counter_values[5]; + double *dbl_ptr; + + (void)argc; + (void)argv; + + simple_init(); + + if( 0 != setup_PAPI(&event_set) ) + exit(-1); + + // --- Start PAPI + if((ret=PAPI_start(event_set)) != PAPI_OK){ + fprintf(stderr,"PAPI_start error:%s \n",PAPI_strerror(ret)); + exit(-1); + } + + for(i=0; i<10; i++){ + double sum; + + sum = simple_compute(0.87*i); + printf("sum=%lf\n",sum); + + // --- read the event counters + if((ret=PAPI_read(event_set, counter_values)) != PAPI_OK){ + fprintf(stderr,"PAPI_stop error:%s \n",PAPI_strerror(ret)); + exit(-1); + } + + // PAPI has packed the bits of the double inside the long long. + dbl_ptr = (double *)&counter_values[3]; + printf("Low Watermark=%lld, High Watermark=%lld, Any Watermark=%lld, Total Iterations=%lld, Comp. Value=%lf\n", + counter_values[0], counter_values[1], counter_values[2], counter_values[3], *dbl_ptr); + } + + // --- Stop PAPI + if((ret=PAPI_stop(event_set, counter_values)) != PAPI_OK){ + fprintf(stderr,"PAPI_stop error:%s \n",PAPI_strerror(ret)); + exit(-1); + } + + return 0; +} + + +int setup_PAPI(int *event_set){ + int ret; + + if((ret=PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT){ + fprintf(stderr,"PAPI_library_init() error:%s \n",PAPI_strerror(ret)); + return -1; + } + + if((ret=PAPI_create_eventset(event_set)) != PAPI_OK){ + fprintf(stderr,"PAPI_create_eventset() error:%s \n",PAPI_strerror(ret)); + return -1; + } + + if((ret=PAPI_add_named_event(*event_set, "sde:::Simple::LOW_WATERMARK_REACHED")) != PAPI_OK){ + fprintf(stderr,"PAPI_add_named_event() error:%s \n",PAPI_strerror(ret)); + return -1; + } + + if((ret=PAPI_add_named_event(*event_set, "sde:::Simple::HIGH_WATERMARK_REACHED")) != PAPI_OK){ + fprintf(stderr,"PAPI_add_named_event() error:%s \n",PAPI_strerror(ret)); + return -1; + } + + if((ret=PAPI_add_named_event(*event_set, "sde:::Simple::ANY_WATERMARK_REACHED")) != PAPI_OK){ + fprintf(stderr,"PAPI_add_named_event() error:%s \n",PAPI_strerror(ret)); + return -1; + } + + if((ret=PAPI_add_named_event(*event_set, "sde:::Simple::TOTAL_ITERATIONS")) != PAPI_OK){ + fprintf(stderr,"PAPI_add_named_event() error:%s \n",PAPI_strerror(ret)); + return -1; + } + + if((ret=PAPI_add_named_event(*event_set, "sde:::Simple::COMPUTED_VALUE")) != PAPI_OK){ + fprintf(stderr,"PAPI_add_named_event() error:%s \n",PAPI_strerror(ret)); + return -1; + } + + return 0; +} + diff -Nru papi-5.7.0+dfsg/src/components/sde/tests/Simple2/Simple2_Lib.c papi-6.0.0~dfsg/src/components/sde/tests/Simple2/Simple2_Lib.c --- papi-5.7.0+dfsg/src/components/sde/tests/Simple2/Simple2_Lib.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/sde/tests/Simple2/Simple2_Lib.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,123 @@ +#include +#include +#include +#include +#include "papi_sde_interface.h" + +// API functions +void simple_init(void); +double simple_compute(double x); + +// The following counters are hidden to programs linking with +// this library, so they can not be accessed directly. +static double comp_value; +static long long int total_iter_cnt, low_wtrmrk, high_wtrmrk; +static papi_handle_t handle; + +static const char *ev_names[4] = { + "COMPUTED_VALUE", + "TOTAL_ITERATIONS", + "LOW_WATERMARK_REACHED", + "HIGH_WATERMARK_REACHED" +}; + +long long int compute_sde( void *param ); + +void simple_init(void){ + + // Initialize library specific variables + comp_value = 0.0; + total_iter_cnt = 0; + low_wtrmrk = 0; + high_wtrmrk = 0; + + // Initialize PAPI SDEs + handle = papi_sde_init("Simple"); + papi_sde_register_fp_counter(handle, ev_names[0], PAPI_SDE_RO|PAPI_SDE_INSTANT, PAPI_SDE_double, compute_sde, &comp_value); + papi_sde_register_counter(handle, ev_names[1], PAPI_SDE_RO|PAPI_SDE_DELTA, PAPI_SDE_long_long, &total_iter_cnt); + papi_sde_register_counter(handle, ev_names[2], PAPI_SDE_RO|PAPI_SDE_DELTA, PAPI_SDE_long_long, &low_wtrmrk); + papi_sde_register_counter(handle, ev_names[3], PAPI_SDE_RO|PAPI_SDE_DELTA, PAPI_SDE_long_long, &high_wtrmrk); + papi_sde_add_counter_to_group(handle, ev_names[2], "ANY_WATERMARK_REACHED", PAPI_SDE_SUM); + papi_sde_add_counter_to_group(handle, ev_names[3], "ANY_WATERMARK_REACHED", PAPI_SDE_SUM); + + return; +} + +// The following function will _NOT_ be called by other libray functions or normal +// applications. It is a hook for the utility 'papi_native_avail' to be able to +// discover the SDEs that are exported by this library. +papi_handle_t papi_sde_hook_list_events( papi_sde_fptr_struct_t *fptr_struct){ + handle = fptr_struct->init("Simple"); + fptr_struct->register_fp_counter(handle, ev_names[0], PAPI_SDE_RO|PAPI_SDE_INSTANT, PAPI_SDE_double, compute_sde, &comp_value); + fptr_struct->register_counter(handle, ev_names[1], PAPI_SDE_RO|PAPI_SDE_DELTA, PAPI_SDE_long_long, &total_iter_cnt); + fptr_struct->register_counter(handle, ev_names[2], PAPI_SDE_RO|PAPI_SDE_DELTA, PAPI_SDE_long_long, &low_wtrmrk); + fptr_struct->register_counter(handle, ev_names[3], PAPI_SDE_RO|PAPI_SDE_DELTA, PAPI_SDE_long_long, &high_wtrmrk); + fptr_struct->add_counter_to_group(handle, ev_names[2], "ANY_WATERMARK_REACHED", PAPI_SDE_SUM); + fptr_struct->add_counter_to_group(handle, ev_names[3], "ANY_WATERMARK_REACHED", PAPI_SDE_SUM); + + fptr_struct->describe_counter(handle, ev_names[0], "Sum of values that are within the watermarks."); + fptr_struct->describe_counter(handle, ev_names[1], "Total iterations executed by the library."); + fptr_struct->describe_counter(handle, ev_names[2], "Number of times a value was below the low watermark."); + fptr_struct->describe_counter(handle, ev_names[3], "Number of times a value was above the high watermark."); + fptr_struct->describe_counter(handle, "ANY_WATERMARK_REACHED", "Number of times a value was not between the two watermarks."); + + return handle; +} + +// This function allows the library to perform operations in order to compute the value of an SDE at run-time +long long compute_sde( void *param ){ + long long *ll_ptr; + double *dbl_ptr = (double *)param; + + // Scale the variable by a factor of two. Real libraries will do meaningful work here. + double value = *dbl_ptr * 2.0; + + // Pack the bits of the result in a long long int. This ugliness will go away as soon as we have a C++ API. + ll_ptr = (long long *)&value; + + return *ll_ptr; +} + +// Perform some nonsense computation to emulate a possible library behavior. +// Notice that no SDE routines need to be called in the critical path of the library. +double simple_compute(double x){ + double sum = 0.0; + int lcl_iter = 0; + + while( 1 ){ + double y,tmp; + lcl_iter++; + + // Compute a function with range [0:1] so we can iterate + // multiple times without diverging or creating FP exceptions. + tmp = (2.0+sin(314.0*x)+sin(11.0*x*x)); + tmp = tmp*tmp; + y = 0.5+sin(3.5*tmp/4.0)/2.0; + + // Now set the next x to be the current y, so we can iterate again. + x = y; + + // Add y to sum unconditionally + sum += y; + + if( y < 0.1 ){ + low_wtrmrk++; + continue; + } + + if( y > 0.9 ){ + high_wtrmrk++; + continue; + } + + // Only add y to comp_value if y is between the low and high watermarks. + comp_value += y; + + // If some condition is met, terminate the loop + if( 0.67 < y && y < 0.68 ) + break; + } + total_iter_cnt += lcl_iter; + + return sum; +} diff -Nru papi-5.7.0+dfsg/src/components/sensors_ppc/linux-sensors-ppc.c papi-6.0.0~dfsg/src/components/sensors_ppc/linux-sensors-ppc.c --- papi-5.7.0+dfsg/src/components/sensors_ppc/linux-sensors-ppc.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/sensors_ppc/linux-sensors-ppc.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,853 @@ +/** + * @file linux-sensors_ppc.c + * @author Philip Vaccaro + * @ingroup papi_components + * @brief sensors_ppc component + * + * To work, the sensors_ppc kernel module must be loaded. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "linux-sensors-ppc.h" + +papi_vector_t _sensors_ppc_vector; + +/***************************************************************************/ +/****** BEGIN FUNCTIONS USED INTERNALLY SPECIFIC TO THIS COMPONENT *******/ +/***************************************************************************/ + +/* Null terminated version of strncpy */ +static char * +_local_strlcpy( char *dst, const char *src, size_t size ) +{ + char *retval = strncpy( dst, src, size ); + if ( size > 0 ) dst[size-1] = '\0'; + + return( retval ); +} + +#define DESC_LINE_SIZE_ALLOWED 66 +static void +_space_padding(char *buf, size_t max) +{ + size_t len = strlen(buf); + /* 80 columns - 12 header - 2 footer*/ + size_t nlines = 1+ len / DESC_LINE_SIZE_ALLOWED, c = len; + /* space_padding */ + for (; c < nlines * DESC_LINE_SIZE_ALLOWED && c < max-1; ++c) buf[c] = ' '; + buf[c] = '\0'; +} + +/** @brief Refresh_data locks in write and update ping and pong at + * the same time for OCC occ_id. + * The occ_names array contains constant memory and doesn't + * need to be updated. + * Ping and Pong are read outside of the critical path, and + * only the swap needs to be protected. + * */ +static void +refresh_data(int occ_id, int forced) +{ + long long now = PAPI_get_real_nsec(); + if (forced || now > last_refresh[occ_id] + OCC_REFRESH_TIME) { + void *buf = double_ping[occ_id]; + + uint32_t ping_off = be32toh(occ_hdr[occ_id]->reading_ping_offset); + uint32_t pong_off = be32toh(occ_hdr[occ_id]->reading_pong_offset); + + lseek (event_fd, occ_id * OCC_SENSOR_DATA_BLOCK_SIZE + ping_off, SEEK_SET); + + /* To limit risks of begin desynchronized, we read one chunk */ + /* In memory, ping and pong are 40kB, with a 4kB buffer + * of nothingness in between */ + int to_read = pong_off - ping_off + OCC_PING_DATA_BLOCK_SIZE; + + int rc, bytes; + /* copy memory iteratively until the full chunk is saved */ + for (rc = bytes = 0; bytes < to_read; bytes += rc) { + rc = read(event_fd, buf + bytes, to_read - bytes); + if (!rc || rc < 0) /* done */ break; + } + + papi_sensors_ppc_lock(); + double_ping[occ_id] = ping[occ_id]; + ping[occ_id] = buf; + pong[occ_id] = ping[occ_id] + (pong_off - ping_off); + last_refresh[occ_id] = now; + papi_sensors_ppc_unlock(); + } +} +static double +_pow(int x, int y) +{ + if (0 == y) return 1.; + if (0 == x) return 0.; + if (0 > y) return 1. / _pow(x, -y); + if (1 == y) return 1. * x; + if (0 == y%2) return _pow(x, y/2) * _pow(x, y/2); + else return _pow(x, y/2) * _pow(x, y/2) * x; +} + +#define TO_FP(f) ((f >> 8) * _pow(10, ((int8_t)(f & 0xFF)))) + +static long long +read_sensors_ppc_record(int s, int gidx, int midx) +{ + uint64_t value = 41; + uint32_t offset = be32toh(occ_names[s][gidx].reading_offset); + uint32_t scale = be32toh(occ_names[s][gidx].scale_factor); + uint32_t freq = be32toh(occ_names[s][gidx].freq); + + occ_sensor_record_t *record = NULL; + /* Let's see if the data segment needs a refresh */ + refresh_data(s, 0); + + papi_sensors_ppc_lock(); + occ_sensor_record_t *sping = (occ_sensor_record_t *)((uint64_t)ping[s] + offset); + occ_sensor_record_t *spong = (occ_sensor_record_t *)((uint64_t)pong[s] + offset); + + if (*ping && *pong) { + if (be64toh(sping->timestamp) > be64toh(spong->timestamp)) + record = sping; + else + record = spong; + } else if (*ping && !*pong) { + record = sping; + } else if (!*ping && *pong) { + record = spong; + } else if (!*ping && !*pong) { + return value; + } + + switch (midx) { + case OCC_SENSORS_ACCUMULATOR_TAG: + /* freq, per sensor, contains freq sampling for the last 500us of accumulation */ + value = (uint64_t)(be64toh(record->accumulator) / TO_FP(freq)); + break; + default: + /* That one might upset people + * All the entries below sample (including it) are uint16_t packed */ + value = (uint64_t)(be16toh((&record->sample)[midx]) * TO_FP(scale)); + break; + } + papi_sensors_ppc_unlock(); + + return value; +} + +static long long +read_sensors_ppc_counter(int s, int gidx) +{ + uint32_t offset = be32toh(occ_names[s][gidx].reading_offset); + uint32_t scale = be32toh(occ_names[s][gidx].scale_factor); + + occ_sensor_counter_t *counter = NULL; + + refresh_data(s, 0); + + papi_sensors_ppc_lock(); + occ_sensor_counter_t *sping = (occ_sensor_counter_t *)((uint64_t)ping[s] + offset); + occ_sensor_counter_t *spong = (occ_sensor_counter_t *)((uint64_t)pong[s] + offset); + + if (*ping && *pong) { + if (be64toh(sping->timestamp) > be64toh(spong->timestamp)) + counter = sping; + else + counter = spong; + } else if (*ping && !*pong) { + counter = sping; + } else if (!*ping && *pong) { + counter = spong; + } else if (!*ping && !*pong) { + return 40; + } + + uint64_t value = be64toh(counter->accumulator) * TO_FP(scale); + papi_sensors_ppc_unlock(); + + return value; +} + +static int +_sensors_ppc_is_counter(int index) +{ + int s = 0; + /* get OCC s from index */ + for (; index > occ_num_events[s+1] && s < MAX_OCCS; ++s); + + int ridx = index - occ_num_events[s]; + int gidx = ridx / OCC_SENSORS_MASKS; + return (OCC_SENSOR_READING_COUNTER == occ_names[s][gidx].structure_type); +} + +static long long +read_sensors_ppc_value( int index ) +{ + int s = 0; + /* get OCC s from index */ + for (; index > occ_num_events[s+1] && s < MAX_OCCS; ++s); + + int ridx = index - occ_num_events[s]; + int gidx = ridx / OCC_SENSORS_MASKS; + int midx = ridx % OCC_SENSORS_MASKS; + uint8_t structure_type = occ_names[s][gidx].structure_type; + + switch (structure_type) { + case OCC_SENSOR_READING_FULL: + return read_sensors_ppc_record(s, gidx, midx); + case OCC_SENSOR_READING_COUNTER: + if (OCC_SENSORS_ACCUMULATOR_TAG == midx) + return read_sensors_ppc_counter(s, gidx); + /* fallthrough: counters only return the accumulator */ + default: + return 42; + } +} + + +/************************* PAPI Functions **********************************/ + +/* + * This is called whenever a thread is initialized + */ +static int +_sensors_ppc_init_thread( hwd_context_t *ctx ) +{ + (void) ctx; + + return PAPI_OK; +} + +/* + * Called when PAPI process is initialized (i.e. PAPI_library_init) + */ +static int +_sensors_ppc_init_component( int cidx ) +{ + int s = -1; + char events_dir[128]; + char event_path[128]; + + DIR *events; + + const PAPI_hw_info_t *hw_info; + hw_info=&( _papi_hwi_system_info.hw_info ); + + if ( PAPI_VENDOR_IBM != hw_info->vendor ) { + strncpy(_sensors_ppc_vector.cmp_info.disabled_reason, "Not an IBM processor", PAPI_MAX_STR_LEN); + return PAPI_ENOSUPP; + } + + int ret = snprintf(events_dir, sizeof(events_dir), "/sys/firmware/opal/exports/"); + if (ret <= 0 || (int)(sizeof(events_dir)) <= ret) + return PAPI_ENOSUPP; + if (NULL == (events = opendir(events_dir))) + return PAPI_ENOSUPP; + + ret = snprintf(event_path, sizeof(event_path), "%s%s", events_dir, pkg_sys_name); + if (ret <= 0 || (int)(sizeof(event_path)) <= ret) + return PAPI_ENOSUPP; + if (-1 == access(event_path, F_OK)) + return PAPI_ENOSUPP; + + event_fd = open(event_path, pkg_sys_flag); + + memset(occ_num_events, 0, (MAX_OCCS+1)*sizeof(int)); + num_events = 0; + for ( s = 0; s < MAX_OCCS; ++s ) { + void *buf = NULL; + if (NULL == (buf = malloc(OCC_SENSOR_DATA_BLOCK_SIZE))) + return PAPI_ENOSUPP; + occ_hdr[s] = (struct occ_sensor_data_header_s*)buf; + + lseek (event_fd, s * OCC_SENSOR_DATA_BLOCK_SIZE, SEEK_SET); + + int rc, bytes; + /* copy memory iteratively until the full chunk is saved */ + for (rc = bytes = 0; bytes < OCC_SENSOR_DATA_BLOCK_SIZE; bytes += rc) { + rc = read(event_fd, buf + bytes, OCC_SENSOR_DATA_BLOCK_SIZE - bytes); + if (!rc || rc < 0) /* done */ break; + } + + if (OCC_SENSOR_DATA_BLOCK_SIZE != bytes) { + /* We are running out of OCCs, let's stop there */ + free(buf); + num_occs = s; + s = MAX_OCCS; + continue; + } + + occ_sensor_name_t *names = (occ_sensor_name_t*)((uint64_t)buf + be32toh(occ_hdr[s]->names_offset)); + int n_sensors = be16toh(occ_hdr[s]->nr_sensors); + + /* Prepare the double buffering for the ping/pong buffers */ + int ping_off = be32toh(occ_hdr[s]->reading_ping_offset); + int pong_off = be32toh(occ_hdr[s]->reading_pong_offset); + /* Ping and pong are both 40kB, and we have a 4kB separator. + * In theory, the distance between the beginnings of ping and pong is (40+4) kB. + * But they expose an offset for the pong buffer. + * So I won't trust the 4kB distance between buffers, and compute the buffer size + * based on on both offsets ans the size of pong */ + int buff_size = pong_off - ping_off + OCC_PING_DATA_BLOCK_SIZE; + + ping[s] = (uint32_t*)malloc(buff_size); + double_ping[s] = (uint32_t*)malloc(buff_size); + double_pong[s] = double_ping[s]; + + refresh_data(s, 1); + + /* Not all events will exist, counter-based evens only have an accumulator to report */ + occ_num_events[s+1] = occ_num_events[s] + (n_sensors * OCC_SENSORS_MASKS); + + num_events += (n_sensors * OCC_SENSORS_MASKS); + + /* occ_names map to read-only information that change only after reboot */ + occ_names[s] = names; + } + + /* Export the total number of events available */ + _sensors_ppc_vector.cmp_info.num_native_events = num_events; + _sensors_ppc_vector.cmp_info.num_cntrs = num_events; + _sensors_ppc_vector.cmp_info.num_mpx_cntrs = num_events; + + /* 0 active events */ + num_events = 0; + + /* Export the component id */ + _sensors_ppc_vector.cmp_info.CmpIdx = cidx; + + return PAPI_OK; +} + + +/* + * Control of counters (Reading/Writing/Starting/Stopping/Setup) + * functions + */ +static int +_sensors_ppc_init_control_state( hwd_control_state_t *ctl ) +{ + _sensors_ppc_control_state_t* control = ( _sensors_ppc_control_state_t* ) ctl; + + memset( control, 0, sizeof ( _sensors_ppc_control_state_t ) ); + + return PAPI_OK; +} + +static int +_sensors_ppc_start( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + SUBDBG("Enter _sensors_ppc_start\n"); + + _sensors_ppc_context_t* context = ( _sensors_ppc_context_t* ) ctx; + _sensors_ppc_control_state_t* control = ( _sensors_ppc_control_state_t* ) ctl; + + memset( context->start_value, 0, sizeof(long long) * SENSORS_PPC_MAX_COUNTERS); + + int c, i; + for( c = 0; c < num_events; c++ ) { + i = control->which_counter[c]; + if (_sensors_ppc_is_counter(i)) + context->start_value[c] = read_sensors_ppc_value(i); + } + + /* At the end, ctx->start if full of 0s, except for counter-type sensors */ + return PAPI_OK; +} + +static int +_sensors_ppc_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + (void) ctx; + (void) ctl; + + /* not sure what the side effect of stop is supposed to be, do a read? */ + return PAPI_OK; +} + +/* Shutdown a thread */ +static int +_sensors_ppc_shutdown_thread( hwd_context_t *ctx ) +{ + (void) ctx; + + return PAPI_OK; +} + + +static int +_sensors_ppc_read( hwd_context_t *ctx, hwd_control_state_t *ctl, + long long **events, int flags ) +{ + SUBDBG("Enter _sensors_ppc_read\n"); + + (void) flags; + _sensors_ppc_control_state_t* control = ( _sensors_ppc_control_state_t* ) ctl; + _sensors_ppc_context_t* context = ( _sensors_ppc_context_t* ) ctx; + + long long start_val = 0; + long long curr_val = 0; + int c, i; + + /* c is the index in the dense array of selected counters */ + /* using control->which_counters[c], fetch actual indices in i */ + /* all subsequent methods use "global" indices i */ + for ( c = 0; c < num_events; c++ ) { + i = control->which_counter[c]; + start_val = context->start_value[c]; + curr_val = read_sensors_ppc_value(i); + + if (start_val) { + /* Make sure an event is a counter. */ + if (_sensors_ppc_is_counter(i)) { + /* Wraparound. */ + if(start_val > curr_val) { + curr_val += (0x100000000 - start_val); + } + /* Normal subtraction. */ + else if (start_val < curr_val) { + curr_val -= start_val; + } + } + } + control->count[c]=curr_val; + } + + *events = ( ( _sensors_ppc_control_state_t* ) ctl )->count; + return PAPI_OK; +} + +/* + * Clean up what was setup in sensors_ppc_init_component(). + */ +static int +_sensors_ppc_shutdown_component( void ) +{ + close(event_fd); + + int s; + papi_sensors_ppc_lock(); + for (s = 0; s < num_occs; ++s) { + free(occ_hdr[s]); + } + papi_sensors_ppc_unlock(); + return PAPI_OK; +} + +/* This function sets various options in the component. The valid + * codes being passed in are PAPI_SET_DEFDOM, PAPI_SET_DOMAIN, + * PAPI_SETDEFGRN, PAPI_SET_GRANUL and PAPI_SET_INHERIT + */ +static int +_sensors_ppc_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option ) +{ + SUBDBG( "Enter: ctx: %p\n", ctx ); + (void) ctx; + (void) code; + (void) option; + + return PAPI_OK; +} + + +static int +_sensors_ppc_update_control_state( hwd_control_state_t *ctl, + NativeInfo_t *native, int count, + hwd_context_t *ctx ) +{ + (void) ctx; + int i, index; + num_events = count; + _sensors_ppc_control_state_t* control = ( _sensors_ppc_control_state_t* ) ctl; + if (count == 0) return PAPI_OK; + + /* control contains a dense array of unsorted events */ + for ( i = 0; i < count; i++ ) { + index = native[i].ni_event; + control->which_counter[i]=index; + native[i].ni_position = i; + } + + return PAPI_OK; +} + +static int +_sensors_ppc_set_domain( hwd_control_state_t *ctl, int domain ) +{ + (void) ctl; + if ( PAPI_DOM_ALL != domain ) + return PAPI_EINVAL; + return PAPI_OK; +} + +static int +_sensors_ppc_reset( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + (void) ctx; + (void) ctl; + + return PAPI_OK; +} + +/* + * Iterator function. Given an Eventcode, returns the next valid Eventcode to consider + * returning anything but PAPI_OK will stop lookups and ignore next events. + */ +static int +_sensors_ppc_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ + int index; + switch (modifier) { + + case PAPI_ENUM_FIRST: + *EventCode = 0; + return PAPI_OK; + + case PAPI_ENUM_EVENTS: + index = *EventCode & PAPI_NATIVE_AND_MASK; + if (index < occ_num_events[num_occs] - 1) { + if (_sensors_ppc_is_counter(index+1)) + /* For counters, exposing only the accumulator, + * skips ghost events from _sample to _job_sched_max */ + *EventCode = *EventCode + OCC_SENSORS_MASKS; + else + *EventCode = *EventCode + 1; + return PAPI_OK; + } else { + return PAPI_ENOEVNT; + } + + default: + return PAPI_EINVAL; + } +} + +/* + * + */ +static int +_sensors_ppc_ntv_code_to_name( unsigned int EventCode, char *name, int len ) +{ + int index = EventCode & PAPI_NATIVE_AND_MASK; + + if ( index < 0 && index >= occ_num_events[num_occs] ) + return PAPI_ENOEVNT; + + int s = 0; + /* get OCC s from index */ + for (; index > occ_num_events[s+1] && s < MAX_OCCS; ++s); + + int ridx = index - occ_num_events[s]; + int gidx = ridx / OCC_SENSORS_MASKS; + int midx = ridx % OCC_SENSORS_MASKS; + + /* EventCode maps to a counter */ + /* Counters only expose their accumulator */ + if (_sensors_ppc_is_counter(index) && midx != OCC_SENSORS_ACCUMULATOR_TAG) + return PAPI_ENOEVNT; + + char buf[512]; + int ret = snprintf(buf, 512, "%s:occ=%d%s", occ_names[s][gidx].name, s, sensors_ppc_fake_qualifiers[midx]); + if (ret <= 0 || 512 <= ret) + return PAPI_ENOSUPP; + _local_strlcpy( name, buf, len); + + return PAPI_OK; +} + +/* This is the optional function used by utils/papi_*_avail. + * Not providing it will force the tools to forge a description using + * ntv_code_to_desc, ntv_code_to_*. + */ +static int +_sensors_ppc_ntv_code_to_info( unsigned int EventCode, PAPI_event_info_t *info ) +{ + int index = EventCode; + + if ( index < 0 || index >= occ_num_events[num_occs]) + return PAPI_ENOEVNT; + + int s = 0; + /* get OCC s from index */ + for (; index > occ_num_events[s+1] && s < MAX_OCCS; ++s); + + int ridx = index - occ_num_events[s]; + int gidx = ridx / OCC_SENSORS_MASKS; + int midx = ridx % OCC_SENSORS_MASKS; + + /* EventCode maps to a counter */ + /* Counters only expose their accumulator */ + if (_sensors_ppc_is_counter(index) && midx != OCC_SENSORS_ACCUMULATOR_TAG) + return PAPI_ENOEVNT; + + char buf[512]; + int ret = snprintf(buf, 512, "%s:occ=%d%s", occ_names[s][gidx].name, s, sensors_ppc_fake_qualifiers[midx]); + if (ret <= 0 || 512 <= ret) + return PAPI_ENOSUPP; + _local_strlcpy( info->symbol, buf, sizeof( info->symbol )); + _local_strlcpy( info->units, occ_names[s][gidx].units, sizeof( info->units ) ); + /* If it ends with: + * Qw: w-th Quad unit [0-5] + * Cxx: xx-th core [0-23] + * My: y-th memory channel [0-8] + * CHvv: vv-th memory module [0-15] + * or starts with: + * GPUz: z-th GPU [0-2] + * TEMPGPUz: z-th GPU [0-2] + * */ + uint16_t type = be16toh(occ_names[s][gidx].type); + char *name = strdup(occ_names[s][gidx].name); + uint32_t freq = be32toh(occ_names[s][gidx].freq); + int tgt = -1; + switch(type) { + /* IPS, STOPDEEPACTCxx, STOPDEEPREQCxx, IPSCxx, NOTBZECxx, NOTFINCxx, + * MRDMy, MWRMy, PROCPWRTHROT, PROCOTTHROT, MEMPWRTHROT, MEMOTTHROT, + * GPUzHWTHROT, GPUzSWTHROT, GPUzSWOTTHROT, GPUzSWPWRTHROT */ + case OCC_SENSOR_TYPE_PERFORMANCE: + if (!strncmp(name, "GPU", 3)) { + char z[] = {name[3], '\0'}; + tgt = atoi(z); + name[3] = 'z'; + if (!strncmp(name, "GPUzHWTHROT", 11)) + ret = snprintf(buf, 512, "Total time GPU %d has been throttled by hardware (thermal or power brake)", tgt); + else if (!strncmp(name, "GPUzSWTHROT", 11)) + ret = snprintf(buf, 512, "Total time GPU %d has been throttled by software for any reason", tgt); + else if (!strncmp(name, "GPUzSWOTTHROT", 13)) + ret = snprintf(buf, 512, "Total time GPU %d has been throttled by software due to thermal", tgt); + else if (!strncmp(name, "GPUzSWPWRTHROT", 14)) + ret = snprintf(buf, 512, "Total time GPU %d has been throttled by software due to power", tgt); + else + ret = snprintf(buf, 512, "[PERFORMANCE] Unexpected: GPU-%d %s", tgt, name); + } + else if (!strncmp(name, "IPSCxx", 4)) { + tgt = atoi(name+4); + ret = snprintf(buf, 512, "Instructions per second for core %d on this Processor", tgt); + } + else if (!strncmp(name, "IPS", 3)) + ret = snprintf(buf, 512, "Vector sensor that takes the average of all the cores this Processor"); + else if (!strncmp(name, "STOPDEEPACTCxx", 12)) { + tgt = atoi(name+12); + ret = snprintf(buf, 512, "Deepest actual stop state that was fully entered during sample time for core %d", tgt); + } + else if (!strncmp(name, "STOPDEEPREQCxx", 12)) { + tgt = atoi(name+12); + ret = snprintf(buf, 512, "Deepest stop state that has been requested during sample time for core %d", tgt); + } + else if (!strncmp(name, "MEMPWRTHROT", 11)) + ret = snprintf(buf, 512, "Count of memory throttled due to power"); + else if (!strncmp(name, "MEMOTTHROT", 10)) + ret = snprintf(buf, 512, "Count of memory throttled due to memory Over temperature"); + else if (!strncmp(name, "PROCOTTHROT", 11)) + ret = snprintf(buf, 512, "Count of processor throttled for temperature"); + else if (!strncmp(name, "PROCPWRTHROT", 12)) + ret = snprintf(buf, 512, "Count of processor throttled due to power"); + else if (!strncmp(name, "MWRM", 4)) { + tgt = atoi(name+4); + ret = snprintf(buf, 512, "Memory write requests per sec for MC %d", tgt); + } + else if (!strncmp(name, "MRDM", 4)) { + tgt = atoi(name+4); + ret = snprintf(buf, 512, "Memory read requests per sec for MC %d", tgt); + } + else + ret = snprintf(buf, 512, "[PERFORMANCE] Unexpected: %s", name); + break; + + /* PWRSYS, PWRGPU, PWRAPSSCHvv, PWRPROC, PWRVDD, PWRVDN, PWRMEM */ + case OCC_SENSOR_TYPE_POWER: + if (!strncmp(name, "PWRSYS", 6)) + ret = snprintf(buf, 512, "Bulk power of the system/node"); + else if (!strncmp(name, "PWRGPU", 6)) + ret = snprintf(buf, 512, "Power consumption for GPUs per socket (OCC) read from APSS"); + else if (!strncmp(name, "PWRPROC", 7)) + ret = snprintf(buf, 512, "Power consumption for this Processor"); + else if (!strncmp(name, "PWRVDD", 6)) + ret = snprintf(buf, 512, "Power consumption for this Processor's Vdd (calculated from AVSBus readings)"); + else if (!strncmp(name, "PWRVDN", 6)) + ret = snprintf(buf, 512, "Power consumption for this Processor's Vdn (nest) (calculated from AVSBus readings)"); + else if (!strncmp(name, "PWRMEM", 6)) + ret = snprintf(buf, 512, "Power consumption for Memory for this Processor read from APSS"); + else if (!strncmp(name, "PWRAPSSCH", 9)) { + tgt = atoi(name+9); + ret = snprintf(buf, 512, "Power Provided by APSS channel %d", tgt); + } + else + ret = snprintf(buf, 512, "[POWER] Unexpected: %s", name); + break; + + /* FREQA, FREQACxx */ + case OCC_SENSOR_TYPE_FREQUENCY: + if (!strncmp(name, "FREQACxx", 6)) { + tgt = atoi(name+6); + ret = snprintf(buf, 512, "Average/actual frequency for this processor, Core %d based on OCA data", tgt); + } + else if (!strncmp(name, "FREQA", 5)) + ret = snprintf(buf, 512, "Average of all core frequencies for Processor"); + else + ret = snprintf(buf, 512, "[FREQUENCY] Unexpected: %s", name); + break; + + case OCC_SENSOR_TYPE_TIME: + ret = snprintf(buf, 512, "[TIME] Unexpected: %s", name); + break; + + /* UTILCxx, UTIL, NUTILCxx, MEMSPSTATMy, MEMSPMy */ + case OCC_SENSOR_TYPE_UTILIZATION: + if (!strncmp(name, "MEMSPSTATM", 10)) { + tgt = atoi(name+10); + ret = snprintf(buf, 512, "Static Memory throttle level setting for MCA %d when not in a memory throttle condition", tgt); + } + else if (!strncmp(name, "MEMSPM", 6)) { + tgt = atoi(name+6); + ret = snprintf(buf, 512, "Current Memory throttle level setting for MCA %d", tgt); + } + else if (!strncmp(name, "NUTILC", 6)) { + tgt = atoi(name+6); + ret = snprintf(buf, 512, "Normalized average utilization, rolling average of this Processor's Core %d", tgt); + } + else if (!strncmp(name, "UTILC", 5)) { + tgt = atoi(name+5); + ret = snprintf(buf, 512, "Utilization of this Processor's Core %d (where 100%% means fully utilized): NOTE: per thread HW counters are combined as appropriate to give this core level utilization sensor", tgt); + } + else if (!strncmp(name, "UTIL", 4)) + ret = snprintf(buf, 512, "Average of all Cores UTILC[yy] sensor"); + else + ret = snprintf(buf, 512, "[UTILIZATION] Unexpected: %s", name); + break; + + /* TEMPNEST, TEMPPROCTHRMCxx, TEMPVDD, TEMPDIMMvv, TEMPGPUz, TEMPGPUzMEM*/ + case OCC_SENSOR_TYPE_TEMPERATURE: + if (!strncmp(name, "TEMPNEST", 8)) + ret = snprintf(buf, 512, "Average temperature of nest DTS sensors"); + else if (!strncmp(name, "TEMPVDD", 7)) + ret = snprintf(buf, 512, "VRM Vdd temperature"); + else if (!strncmp(name, "TEMPPROCTHRMCxx", 13)) { + tgt = atoi(name+13); + ret = snprintf(buf, 512, "The combined weighted core/quad temperature for processor core %d", tgt); + } + else if (!strncmp(name, "TEMPDIMMvv", 8)) { + tgt = atoi(name+8); + ret = snprintf(buf, 512, "DIMM temperature for DIMM %d", tgt); + } + else if (!strncmp(name, "TEMPGPUz", 7)) { + char z[] = {name[7], '\0'}; + tgt = atoi(z); + name[7] = 'z'; + if (!strncmp(name, "TEMPGPUzMEM", 11)) + ret = snprintf(buf, 512, "GPU %d hottest HBM temperature (individual memory temperatures are not available)", tgt); + else if (!strncmp(name, "TEMPGPUz", 8)) + ret = snprintf(buf, 512, "GPU %d board temperature", tgt); + else + ret = snprintf(buf, 512, "[TEMPERATURE] Unexpected: GPU-%d %s", tgt, name); + } + else + ret = snprintf(buf, 512, "[TEMPERATURE] Unexpected: %s", name); + break; + + /* VOLTVDD, VOLTVDDSENSE, VOLTVDN, VOLTVDNSENSE, VOLTDROOPCNTCx, VOLTDROOPCNTQw */ + case OCC_SENSOR_TYPE_VOLTAGE: + if (!strncmp(name, "VOLTVDDS", 8)) + ret = snprintf(buf, 512, "Vdn Voltage at the remote sense. (AVS reading adjusted for loadline)"); + else if (!strncmp(name, "VOLTVDNS", 8)) + ret = snprintf(buf, 512, "Vdd Voltage at the remote sense. (AVS reading adjusted for loadline)"); + else if (!strncmp(name, "VOLTVDD", 7)) + ret = snprintf(buf, 512, "Processor Vdd Voltage (read from AVSBus)"); + else if (!strncmp(name, "VOLTVDN", 7)) + ret = snprintf(buf, 512, "Processor Vdn Voltage (read from AVSBus)"); + else if (!strncmp(name, "VOLTDROOPCNTC", 13)) { + tgt = atoi(name+13); + ret = snprintf(buf, 512, "Small voltage droop count for core %d", tgt); + } + else if (!strncmp(name, "VOLTDROOPCNTQ", 13)) { + tgt = atoi(name+13); + ret = snprintf(buf, 512, "Small voltage droop count for core %d", tgt); + } + else + ret = snprintf(buf, 512, "[VOLTAGE] Unexpected: %s", name); + break; + + /* CURVDD, CURVDN */ + case OCC_SENSOR_TYPE_CURRENT: + if (!strncmp(name, "CURVDN", 6)) + ret = snprintf(buf, 512, "Processor Vdn Current (read from AVSBus)"); + else if (!strncmp(name, "CURVDD", 6)) + ret = snprintf(buf, 512, "Processor Vdd Current (read from AVSBus)"); + else + ret = snprintf(buf, 512, "[CURRENT] Unexpected: %s", name); + break; + + case OCC_SENSOR_TYPE_GENERIC: + default: + ret = snprintf(buf, 512, "[GENERIC] Unexpected: %s", name); + break; + } + + if (ret <= 0 || 512 <= ret) + return PAPI_ENOSUPP; + _space_padding(buf, sizeof(buf)); + ret = snprintf(buf+strlen(buf), 512, "%s", sensors_ppc_fake_qualif_desc[midx]); + if (ret <= 0 || 512 <= ret) + return PAPI_ENOSUPP; + _space_padding(buf, sizeof(buf)); + ret = snprintf(buf+strlen(buf), 512, "Sampling period: %lfs", 1./freq); + if (ret <= 0 || 512 <= ret) + return PAPI_ENOSUPP; + + _local_strlcpy( info->long_descr, buf, sizeof(info->long_descr)); + info->data_type = PAPI_DATATYPE_INT64; + + return PAPI_OK; +} + +papi_vector_t _sensors_ppc_vector = { + .cmp_info = { /* (unspecified values are initialized to 0) */ + .name = "sensors_ppc", + .short_name = "sensors_ppc", + .description = "Linux sensors_ppc energy measurements", + .version = "5.3.0", + .default_domain = PAPI_DOM_ALL, + .default_granularity = PAPI_GRN_SYS, + .available_granularities = PAPI_GRN_SYS, + .hardware_intr_sig = PAPI_INT_SIGNAL, + .available_domains = PAPI_DOM_ALL, + }, + + /* sizes of framework-opaque component-private structures */ + .size = { + .context = sizeof ( _sensors_ppc_context_t ), + .control_state = sizeof ( _sensors_ppc_control_state_t ), + .reg_value = sizeof ( _sensors_ppc_register_t ), + .reg_alloc = sizeof ( _sensors_ppc_reg_alloc_t ), + }, + /* function pointers in this component */ + .init_thread = _sensors_ppc_init_thread, + .init_component = _sensors_ppc_init_component, + .init_control_state = _sensors_ppc_init_control_state, + .update_control_state = _sensors_ppc_update_control_state, + .start = _sensors_ppc_start, + .stop = _sensors_ppc_stop, + .read = _sensors_ppc_read, + .shutdown_thread = _sensors_ppc_shutdown_thread, + .shutdown_component = _sensors_ppc_shutdown_component, + .ctl = _sensors_ppc_ctl, + + .set_domain = _sensors_ppc_set_domain, + .reset = _sensors_ppc_reset, + + .ntv_enum_events = _sensors_ppc_ntv_enum_events, + .ntv_code_to_name = _sensors_ppc_ntv_code_to_name, + .ntv_code_to_info = _sensors_ppc_ntv_code_to_info, +}; diff -Nru papi-5.7.0+dfsg/src/components/sensors_ppc/linux-sensors-ppc.h papi-6.0.0~dfsg/src/components/sensors_ppc/linux-sensors-ppc.h --- papi-5.7.0+dfsg/src/components/sensors_ppc/linux-sensors-ppc.h 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/sensors_ppc/linux-sensors-ppc.h 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,214 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @file linux-sensors-ppc.h + * CVS: $Id$ + * + * @author PAPI team UTK/ICL + * dgenet@icl.utk.edu + * + * @ingroup papi_components + * + * @brief OCC Inband Sensors component for PowerPC + * This file contains the source code for a component that enables + * PAPI to read counters and sensors on PowerPC (Power9) architecture. + */ + +#ifndef _sensors_ppc_H +#define _sensors_ppc_H + +/* Headers required by PAPI */ +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" + +#define papi_sensors_ppc_lock() _papi_hwi_lock(COMPONENT_LOCK); +#define papi_sensors_ppc_unlock() _papi_hwi_unlock(COMPONENT_LOCK); + +typedef struct _sensors_ppc_register { + unsigned int selector; +} _sensors_ppc_register_t; + +typedef struct _sensors_ppc_native_event_entry { + char name[PAPI_MAX_STR_LEN]; + char units[PAPI_MIN_STR_LEN]; + char description[PAPI_MAX_STR_LEN]; + int socket_id; + int component_id; + int event_id; + int type; + int return_type; + _sensors_ppc_register_t resources; +} _sensors_ppc_native_event_entry_t; + +typedef struct _sensors_ppc_reg_alloc { + _sensors_ppc_register_t ra_bits; +} _sensors_ppc_reg_alloc_t; + +static int num_events=0; + +typedef enum occ_sensor_type_e { + OCC_SENSOR_TYPE_GENERIC = 0x0001, + OCC_SENSOR_TYPE_CURRENT = 0x0002, + OCC_SENSOR_TYPE_VOLTAGE = 0x0004, + OCC_SENSOR_TYPE_TEMPERATURE = 0x0008, + OCC_SENSOR_TYPE_UTILIZATION = 0x0010, + OCC_SENSOR_TYPE_TIME = 0x0020, + OCC_SENSOR_TYPE_FREQUENCY = 0x0040, + OCC_SENSOR_TYPE_POWER = 0x0080, + OCC_SENSOR_TYPE_PERFORMANCE = 0x0200, +} occ_sensor_type_t; + +typedef enum occ_sensor_loc_e { + OCC_SENSOR_LOC_SYSTEM = 0x0001, + OCC_SENSOR_LOC_PROCESSOR = 0x0002, + OCC_SENSOR_LOC_PARTITION = 0x0004, + OCC_SENSOR_LOC_MEMORY = 0x0008, + OCC_SENSOR_LOC_VRM = 0x0010, + OCC_SENSOR_LOC_OCC = 0x0020, + OCC_SENSOR_LOC_CORE = 0x0040, + OCC_SENSOR_LOC_GPU = 0x0080, + OCC_SENSOR_LOC_QUAD = 0x0100, +} occ_sensor_loc_t; + +#define OCC_SENSOR_READING_FULL 0x01 +#define OCC_SENSOR_READING_COUNTER 0x02 + +static char *pkg_sys_name = "occ_inband_sensors"; +static mode_t pkg_sys_flag = O_RDONLY; + +/* 8 OCCs, starting at OCC_SENSOR_DATA_BLOCK_OFFSET + * OCC0: 0x00580000 -> 0x005A57FF + * OCC1: 0x005A5800 -> 0x005CAFFF + * Each zone is 150kB (OCC_SENSOR_DATA_BLOCK_SIZE) + * OCC7: 0x00686800 -> 0x006ABFFF*/ + +#define MAX_OCCS 8 +#define OCC_SENSOR_DATA_BLOCK_OFFSET 0x00580000 +#define OCC_SENSOR_DATA_BLOCK_SIZE 0x00025800 +#define OCC_PING_DATA_BLOCK_SIZE 0xA000 +#define OCC_REFRESH_TIME 100000 + +/* In the 150kB, map the beginning to */ +typedef struct occ_sensor_data_header_s { + uint8_t valid; /* 0x01 means the block can be read */ + uint8_t version; + uint16_t nr_sensors; /* number of sensors! */ + uint8_t reading_version; /* ping pong version */ + uint8_t pad[3]; + uint32_t names_offset; + uint8_t names_version; + uint8_t name_length; + uint16_t reserved; + uint32_t reading_ping_offset; + uint32_t reading_pong_offset; +} __attribute__((__packed__)) occ_sensor_data_header_t; +/* That header is reset after each reboot */ + +struct occ_sensor_data_header_s *occ_hdr[MAX_OCCS]; +static int event_fd; +static long long last_refresh[MAX_OCCS]; +static int num_occs; +static int occ_num_events[MAX_OCCS+1]; +static uint32_t *ping[MAX_OCCS], *pong[MAX_OCCS]; +static uint32_t *double_ping[MAX_OCCS], *double_pong[MAX_OCCS]; + +#define MAX_CHARS_SENSOR_NAME 16 +#define MAX_CHARS_SENSOR_UNIT 4 + +/* After 1kB, the list of sensor names, units */ +/* map an array of size header->nr_sensors */ +/* the following struct, */ +typedef struct occ_sensor_name_s { + char name[MAX_CHARS_SENSOR_NAME]; + char units[MAX_CHARS_SENSOR_UNIT]; + uint16_t gsid; + uint32_t freq; + uint32_t scale_factor; + uint16_t type; + uint16_t location; + uint8_t structure_type; /* determine size+format of sensor */ + uint32_t reading_offset; + uint8_t sensor_data; + uint8_t pad[8]; +} __attribute__((__packed__)) occ_sensor_name_t; + +struct occ_sensor_name_s *occ_names[MAX_OCCS]; + +/* The following 4kB, size of a page, has to be skipped */ + +/* Following 40kB is the ping buffer */ +/* Followed by another 4kB of skippable memory */ +/* Finally, 40kB for the pong buffer */ + +typedef struct occ_sensor_record_s { + uint16_t gsid; + uint64_t timestamp; + uint16_t sample; /* latest value */ + uint16_t sample_min; /*min max since reboot */ + uint16_t sample_max; + uint16_t csm_min;/* since CSM reset */ + uint16_t csm_max; + uint16_t profiler_min; /* since prof reset */ + uint16_t profiler_max; + uint16_t job_scheduler_min; /* since job sched reset */ + uint16_t job_scheduler_max; + uint64_t accumulator; /* accu if it makes sense */ + uint32_t update_tag; /* tics since between that value and previous one */ + uint8_t pad[8]; +} __attribute__((__packed__)) occ_sensor_record_t; + +typedef struct occ_sensor_counter_s { + uint16_t gsid; + uint64_t timestamp; + uint64_t accumulator; + uint8_t sample; + uint8_t pad[5]; +} __attribute__((__packed__)) occ_sensor_counter_t; + +typedef enum occ_sensors_mask_e { + OCC_SENSORS_SAMPLE = 0, + OCC_SENSORS_SAMPLE_MIN = 1, + OCC_SENSORS_SAMPLE_MAX = 2, + OCC_SENSORS_CSM_MIN = 3, + OCC_SENSORS_CSM_MAX = 4, + OCC_SENSORS_PROFILER_MIN = 5, + OCC_SENSORS_PROFILER_MAX = 6, + OCC_SENSORS_JOB_SCHED_MIN = 7, + OCC_SENSORS_JOB_SCHED_MAX = 8, + OCC_SENSORS_ACCUMULATOR_TAG = 9, + OCC_SENSORS_MASKS +} occ_sensors_mask_t; + +static const char* sensors_ppc_fake_qualifiers[] = {"", ":min", ":max", ":csm_min", + ":csm_max", ":profiler_min", ":profiler_max", ":job_scheduler_min", ":job_scheduler_max", ":accumulator", NULL}; +static const char *sensors_ppc_fake_qualif_desc[] = { + "Last sample of this sensor", + "Minimum value since last OCC reset (node reboot)", + "Maximum value since last OCC reset (node reboot)", + "Minimum value since last reset request by CSM", + "Maximum value since last reset request by CSM", + "Minimum value since last reset request by profiler", + "Maximum value since last reset request by profiler", + "Minimum value since last reset by job scheduler", + "Maximum value since last reset by job scheduler", + "Accumulator register for this sensor", NULL}; + +#define SENSORS_PPC_MAX_COUNTERS MAX_OCCS * 512 * OCC_SENSORS_MASKS + +typedef struct _sensors_ppc_control_state { + long long count[SENSORS_PPC_MAX_COUNTERS]; + long long which_counter[SENSORS_PPC_MAX_COUNTERS]; + long long need_difference[SENSORS_PPC_MAX_COUNTERS]; + uint32_t occ, scale; +} _sensors_ppc_control_state_t; + +typedef struct _sensors_ppc_context { + long long start_value[SENSORS_PPC_MAX_COUNTERS]; + _sensors_ppc_control_state_t state; +} _sensors_ppc_context_t; + +#endif /* _sensors_ppc_H */ diff -Nru papi-5.7.0+dfsg/src/components/sensors_ppc/README papi-6.0.0~dfsg/src/components/sensors_ppc/README --- papi-5.7.0+dfsg/src/components/sensors_ppc/README 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/sensors_ppc/README 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,61 @@ +/** +* @file: README +* CVS: $Id$ +* @author: Damien Genet (ICL/UTK) +* @defgroup papi_components Components +* @brief Component Specific Readme file: sensors_ppc +*/ + +/** @page component_readme Component Readme + +@section Component Specific Information + +sensors_ppc/ + +The PAPI sensors_ppc component supports reading system metrics +on recent IBM PowerPC architectures (Power9 and later) using the OCC memory exposed +through the Linux kernel. + +-------------------------------------------------- +MEASURING SYSTEM + +The opal/exports sysfs interface exposes sensors and counters as read only +registers. The sensors and counters apply to the Power9. + +These counters and settings are exposed though this PAPI component and can be +accessed just like any normal PAPI counter. Running the "sensors_ppc_basic" +test in the tests directory will report a very limited sub-set of information +on a system. For instance, voltage received by socket 0, and its extrema since +the last reboot. + +Note: /sys/firmware/opal/exports/occ_inband_sensors is RDONLY for root. PAPI +library will need read permissions to access it. + +-------------------------------------------------- +CONFIGURING THE PAPI SENSORS_PPC COMPONENT + +At the higher src dirctory, configure with this component + % cd /src + % ./configure --with-components="sensors_ppc" +Follow the standard PAPI build (make) instructions + % make +To use the module, make sure that the libraries are accessible. + % export LD_LIBRARY_PATH=${PAPIDIR}/src:${PAPIDIR}/src/libpfm4/lib: \ + ${LD_LIBRARY_PATH} +To check the installation, the following should show some available counters + % ./utils/papi_native_avail | grep sensors_ppc + +-------------------------------------------------- +SYSTEM SETUP + +The actions described below will generally require superuser ability. +Note, these actions may have security and performance consequences, so please +make sure you know what you are doing. + + Use chmod to set site-appropriate access permissions (e.g. 440), + use chown to set group ownership, + for /sys/firmware/opal/exports/occ_inband_sensors + + And finally, have your user added to said group, granting you read access + +-------------------------------------------------- diff -Nru papi-5.7.0+dfsg/src/components/sensors_ppc/Rules.sensors_ppc papi-6.0.0~dfsg/src/components/sensors_ppc/Rules.sensors_ppc --- papi-5.7.0+dfsg/src/components/sensors_ppc/Rules.sensors_ppc 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/sensors_ppc/Rules.sensors_ppc 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,6 @@ + +COMPSRCS += components/sensors_ppc/linux-sensors-ppc.c +COMPOBJS += linux-sensors-ppc.o + +linux-sensors-ppc.o: components/sensors_ppc/linux-sensors-ppc.c + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/sensors_ppc/linux-sensors-ppc.c -o linux-sensors-ppc.o diff -Nru papi-5.7.0+dfsg/src/components/sensors_ppc/tests/Makefile papi-6.0.0~dfsg/src/components/sensors_ppc/tests/Makefile --- papi-5.7.0+dfsg/src/components/sensors_ppc/tests/Makefile 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/sensors_ppc/tests/Makefile 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,15 @@ +NAME=sensors_ppc +include ../../Makefile_comp_tests.target + +TESTS = sensors_ppc_basic + +sensors_ppc_tests: $(TESTS) + +sensors_ppc_basic.o: sensors_ppc_basic.c + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c sensors_ppc_basic.c -o sensors_ppc_basic.o + +sensors_ppc_basic: sensors_ppc_basic.o $(UTILOBJS) $(PAPILIB) + $(CC) $(INCLUDE) -o sensors_ppc_basic sensors_ppc_basic.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +clean: + rm -f $(TESTS) *.o *~ diff -Nru papi-5.7.0+dfsg/src/components/sensors_ppc/tests/sensors_ppc_basic.c papi-6.0.0~dfsg/src/components/sensors_ppc/tests/sensors_ppc_basic.c --- papi-5.7.0+dfsg/src/components/sensors_ppc/tests/sensors_ppc_basic.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/sensors_ppc/tests/sensors_ppc_basic.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,105 @@ +/** + * @author PAPI team UTK/ICL + * Test case for sensors_ppc component + * @brief + * Tests basic functionality of sensors_ppc component + */ + +#include +#include +#include +#include + +#include "papi.h" + +#define MAX_sensors_ppc_EVENTS 64 + +int main ( int argc, char **argv ) +{ + (void) argv; + (void) argc; + int retval,cid,sensors_ppc_cid=-1,numcmp; + int EventSet = PAPI_NULL; + long long *values; + int num_events=0; + int code; + char event_names[MAX_sensors_ppc_EVENTS][PAPI_MAX_STR_LEN]; + char units[MAX_sensors_ppc_EVENTS][PAPI_MIN_STR_LEN]; + int r,i; + + const PAPI_component_info_t *cmpinfo = NULL; + PAPI_event_info_t evinfo; + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + fprintf(stderr, "PAPI_library_init failed %d\n",retval ); + + numcmp = PAPI_num_components(); + + for( cid=0; cidname,"sensors_ppc" ) ) { + sensors_ppc_cid=cid; + break; + } + } + + /* Component not found */ + if ( cid==numcmp ) + fprintf(stderr, "No sensors_ppc component found\n"); + + /* Skip if component has no counters */ + if ( cmpinfo->num_cntrs==0 ) + fprintf(stderr, "No counters in the sensors_ppc component\n"); + + /* Create EventSet */ + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) + fprintf(stderr, "PAPI_create_eventset()\n"); + + /* Add all events */ + code = PAPI_NATIVE_MASK; + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, sensors_ppc_cid ); + while ( r == PAPI_OK ) { + retval = PAPI_event_code_to_name(code, event_names[num_events]); + if ( retval != PAPI_OK ) + fprintf(stderr, "Error from PAPI_event_code_to_name, error = %d\n", retval); + + retval = PAPI_get_event_info(code,&evinfo); + if ( retval != PAPI_OK ) + fprintf(stderr, "Error getting event info, error = %d\n",retval); + + char *evt = "sensors_ppc:::VOLTVDD:occ=0"; + if (!strncmp(event_names[num_events], evt, strlen(evt))) { + retval = PAPI_add_event( EventSet, code ); + strcpy(units[num_events], evinfo.units); + num_events++; + } + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, sensors_ppc_cid ); + } + + values=calloc( num_events,sizeof( long long ) ); + if ( values==NULL ) + fprintf(stderr, "No memory"); + + PAPI_start(EventSet); + + retval = PAPI_read( EventSet, values ); + + for (i = 0; i < num_events; ++i) + fprintf(stdout, "%s > %lld %s\n", event_names[i], values[i], units[i]); + + /* Done, clean up */ + retval = PAPI_cleanup_eventset( EventSet ); + if ( retval != PAPI_OK ) + fprintf(stderr, "PAPI_cleanup_eventset(), error=%d\n",retval ); + + retval = PAPI_destroy_eventset( &EventSet ); + if ( retval != PAPI_OK ) + fprintf(stderr, "PAPI_destroy_eventset(), error=%d\n",retval ); + + return 0; +} diff -Nru papi-5.7.0+dfsg/src/components/vmware/VMwareComponentDocument.txt papi-6.0.0~dfsg/src/components/vmware/VMwareComponentDocument.txt --- papi-5.7.0+dfsg/src/components/vmware/VMwareComponentDocument.txt 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/components/vmware/VMwareComponentDocument.txt 2020-03-04 15:56:58.000000000 +0000 @@ -1,188 +1,188 @@ -PAPI-V VMware Component Document -Matthew R. Johnson -John Nelson -21 November 2011 -Revised: 23 January 2012 - - -This document is intended to detail the features of the PAPI-V VMware component, and more specifically the installation, usage, and pseudo performance counters available. In order to make this component possible, extensive research into the actual counters available, as well as the leveraging of the VMware Guest API1, was needed. As this is the first of the PAPI-V components, we seem to be stepping into a new realm of performance measurements that, previously, has been a new frontier, or unexplored all-together. - - -Installation: - -To make PAPI with the VMware component you must go to the PAPI_ROOT/papi/src/components/vmware directory and configure with the flag: --with-vmware_incdir=, where is the path to the VMware Guest SDK for your machine. - -NOTE: The VMware Guest SDK is normally found in the following default vmware-tools path: - - /usr/lib/vmware-tools/GuestSDK - -or: - - /opt/GuestSDK - - e.g.: ./configure --with-vmware_incdir=/usr/lib/vmware-tools/GuestSDK - - -After running configure in the vmware directory, go to PAPI_CVS_ROOT/papi/src and configure again using the flag: - - --with-components=vmware - - e.g.: ./configure --with-components=vmware - -After running the main configure script you can then type make, the Makefiles have been automatically generated. If at any point you would like to uninstall PAPI and the VMware comonent, from the PAPI_ROOT/papi/src directory, just type: - - make clean clobber - -To make use of VMWare timekeeping pseudo-performance counters, the following configuration must be added through the vSphere client: - - monitor_control.pseudo_perfctr = TRUE - -As well as adding the - - --with-vmware_pseudo_perfctr - - WARNING: If you do not enable the monitor_control.pseudo_perfctr on the host side, and give configure the --with-vmware_pseudo_perfctr, you will get a segmentation fault upon readpmc trying to access protected memory wiothout priveledged access. This is expected behavior. - -flag during component configure in the vmware component directory. - -Available Performance Counters: - -Below is the list of available performance metrics available to PAPI through the VMware component. If at any time you would like to see a full list of counters available to PAPI type ./papi_native_avail from within the utils directory. - -It is important to know that the counters VMWARE_HOST_TSC, VMWARE_ELAPSED_TIME, and VMWARE_ELAPSED_APPARENT are currently the only true to name register counters available from withing a VMware virtual machine. Any Guest OS running on a VMware host must have the access enabled from within the VMware vSphere client managing the system for each virtual machine that wishes to use the VMware component, this exposes the counters to the virtual machine. All other counters you will see in the following lost are software counters that are being exposed through the use of the VMware API1. - - - -Event Code | Symbol | Long Description | --------------------------------------------------------------------------------- -0x44000000 | VMWARE_HOST_TSC | Physical host TSC | --------------------------------------------------------------------------------- -0x44000001 | VMWARE_ELAPSED_TIME | Elapsed real time in ns. | --------------------------------------------------------------------------------- -0x44000002 | VMWARE_ELAPSED_APPARENT | Elapsed apparent time in ns. | --------------------------------------------------------------------------------- -0x44000003 | VMWARE_CPU_LIMIT | Retrieves the upper limit of processor use in | - | MHz available to the virtual machine. | --------------------------------------------------------------------------------- -0x44000004 | VMWARE_CPU_RESERVATION | Retrieves the minimum processing power | - | in MHz reserved for the virtual machine. | --------------------------------------------------------------------------------- -0x44000005 | VMWARE_CPU_SHARES | Retrieves the number of CPU shares allocated | - | to the virtual machine. | --------------------------------------------------------------------------------- -0x44000006 | VMWARE_CPU_STOLEN | Retrieves the number of milliseconds that th | - | e virtual machine was in a ready state (able to transition to a r | - | un state), but was not scheduled to run. | --------------------------------------------------------------------------------- -0x44000007 | VMWARE_CPU_USED | Retrieves the number of milliseconds during wh | - | ich the virtual machine has used the CPU. This value includes the | - | time used by the guest operating system and the time used by vir | - | tualization code for tasks for this virtual machine. You can comb | - | ine this value with the elapsed time (VMWARE_ELAPSED) to estimate | - | the effective virtual machine CPU speed. This value is a subset | - | of elapsedMs. | --------------------------------------------------------------------------------- -0x44000008 | VMWARE_ELAPSED | Retrieves the number of milliseconds that have | - | passed in the virtual machine since it last started running on th | - | e server. The count of elapsed time restarts each time the virtua | - | l machine is powered on, resumed, or migrated using VMotion. This | - | value counts milliseconds, regardless of whether the virtual mac | - | hine is using processing power during that time. You can combine | - | this value with the CPU time used by the virtual machine (VMWARE_ | - | CPU_USED) to estimate the effective virtual machine xCPU speed. c | - | puUsedMS is a subset of this value. | --------------------------------------------------------------------------------- -0x44000009 | VMWARE_MEM_ACTIVE | Retrieves the amount of memory the virtual m | - | achine is actively using in MB€”its estimated working set size. | --------------------------------------------------------------------------------- -0x4400000a | VMWARE_MEM_BALLOONED | Retrieves the amount of memory that has b | - | een reclaimed from this virtual machine by the vSphere memory bal | - | loon driver (also referred to as the “vmmemctl†driver) in MB. | --------------------------------------------------------------------------------- -0x4400000b | VMWARE_MEM_LIMIT | Retrieves the upper limit of memory that is a | - | vailable to the virtual machine in MB. | --------------------------------------------------------------------------------- -0x4400000c | VMWARE_MEM_MAPPED | Retrieves the amount of memory that is alloc | - | ated to the virtual machine in MB. Memory that is ballooned, swap | - | ped, or has never been accessed is excluded. | --------------------------------------------------------------------------------- -0x4400000d | VMWARE_MEM_OVERHEAD | Retrieves the amount of €œoverhead mem | - | ory associated with this virtual machine that is currently consum | - | ed on the host system in MB. Overhead memory is additional memory | - | that is reserved for data structures required by the virtualizat | - | ion layer. | --------------------------------------------------------------------------------- -0x4400000e | VMWARE_MEM_RESERVATION | Retrieves the minimum amount of memory | - | that is reserved for the virtual machine in MB. | --------------------------------------------------------------------------------- -0x4400000f | VMWARE_MEM_SHARED | Retrieves the amount of physical memory asso | - | ciated with this virtual machine that is copy €Âon €Âwrite (COW) | - | shared on the host in MB. | --------------------------------------------------------------------------------- -0x44000010 | VMWARE_MEM_SHARES | Retrieves the number of memory shares alloca | - | ted to the virtual machine. | --------------------------------------------------------------------------------- -0x44000011 | VMWARE_MEM_SWAPPED | Retrieves the amount of memory that has bee | - | n reclaimed from this virtual machine by transparently swapping g | - | uest memory to disk in MB. | --------------------------------------------------------------------------------- -0x44000012 | VMWARE_MEM_TARGET_SIZE | Retrieves the size of the target memory | - | allocation for this virtual machine in MB. | --------------------------------------------------------------------------------- -0x44000013 | VMWARE_MEM_USED | Retrieves the estimated amount of physical hos | - | t memory currently consumed for this virtual machine’s physical | - | memory. | --------------------------------------------------------------------------------- -0x44000014 | VMWARE_HOST_CPU | Retrieves the speed of the ESX system’€™s phys | - | ical CPU in MHz. | - - - - -Timekeeping Counters: - - - The pseudo-performance counter feature uses a trap to catch a privileged machine instruction issued by software running in the virtual machine and therefore has more overhead than reading a performance counter or the TSC on physical hardware. The feature will only trap correctly if the configuration setting is applied as described in Installation. - - -The timekeeping counters behave as follows: - - -VMWARE_HOST_TSC - Provides access the the Time Stamp Counter on the host machine which counts ticks since reset. -VMWARE_ELAPSED_TIME - Provides access to the elapsed time in ns since reset as measured on the host machine. -VMWARE_ELAPSED_APPARENT - Apparent time is the time visible the Guest OS using virtualized timer devices. This timer may fall behind real time and catch up as needed. - - -Usage: - - - After installation of the VMware Component, you may use the papi_commmand_line interface, found in PAPI_ROOT/papi/src/utils to read out an instantaneous value from a certain counter from the above list. If you would like to read out a counter, type: ./papi_command_line COUNTER_SYMBOL_NAME. - - -e.g.: To read out the value of the VMWARE_MEM_USED counter - - -user@vm1:~/papi/src/utils$ ./papi_command_line VMWARE_MEM_USED -Successfully added: VMWARE_MEM_USED - - -VMWARE_MEM_USED : 13074 - - ----------------------------------- -Verification: Checks for valid event name. -This utility lets you add events from the command line interface to see if they work. -command_line.c PASSED - - -For further usage of PAPI and it’s API on how to incorporate these counters into a program of your own please see the PAPI Documentation2. - - -________________ - - -References: - - -[1] VMware: http://www.vmware.com/support/developer/guest-sdk. Last accessed November 28, 2011 - [2] PAPI : http://icl.cs.utk.edu/projects/papi/wiki/Main_Page. Last accessed November 28, 2011 +PAPI-V VMware Component Document +Matthew R. Johnson +John Nelson +21 November 2011 +Revised: 23 January 2012 + + +This document is intended to detail the features of the PAPI-V VMware component, and more specifically the installation, usage, and pseudo performance counters available. In order to make this component possible, extensive research into the actual counters available, as well as the leveraging of the VMware Guest API1, was needed. As this is the first of the PAPI-V components, we seem to be stepping into a new realm of performance measurements that, previously, has been a new frontier, or unexplored all-together. + + +Installation: + +To make PAPI with the VMware component you must go to the PAPI_ROOT/papi/src/components/vmware directory and configure with the flag: --with-vmware_incdir=, where is the path to the VMware Guest SDK for your machine. + +NOTE: The VMware Guest SDK is normally found in the following default vmware-tools path: + + /usr/lib/vmware-tools/GuestSDK + +or: + + /opt/GuestSDK + + e.g.: ./configure --with-vmware_incdir=/usr/lib/vmware-tools/GuestSDK + + +After running configure in the vmware directory, go to PAPI_CVS_ROOT/papi/src and configure again using the flag: + + --with-components=vmware + + e.g.: ./configure --with-components=vmware + +After running the main configure script you can then type make, the Makefiles have been automatically generated. If at any point you would like to uninstall PAPI and the VMware comonent, from the PAPI_ROOT/papi/src directory, just type: + + make clean clobber + +To make use of VMWare timekeeping pseudo-performance counters, the following configuration must be added through the vSphere client: + + monitor_control.pseudo_perfctr = TRUE + +As well as adding the + + --with-vmware_pseudo_perfctr + + WARNING: If you do not enable the monitor_control.pseudo_perfctr on the host side, and give configure the --with-vmware_pseudo_perfctr, you will get a segmentation fault upon readpmc trying to access protected memory wiothout priveledged access. This is expected behavior. + +flag during component configure in the vmware component directory. + +Available Performance Counters: + +Below is the list of available performance metrics available to PAPI through the VMware component. If at any time you would like to see a full list of counters available to PAPI type ./papi_native_avail from within the utils directory. + +It is important to know that the counters VMWARE_HOST_TSC, VMWARE_ELAPSED_TIME, and VMWARE_ELAPSED_APPARENT are currently the only true to name register counters available from withing a VMware virtual machine. Any Guest OS running on a VMware host must have the access enabled from within the VMware vSphere client managing the system for each virtual machine that wishes to use the VMware component, this exposes the counters to the virtual machine. All other counters you will see in the following lost are software counters that are being exposed through the use of the VMware API1. + + + +Event Code | Symbol | Long Description | +-------------------------------------------------------------------------------- +0x44000000 | VMWARE_HOST_TSC | Physical host TSC | +-------------------------------------------------------------------------------- +0x44000001 | VMWARE_ELAPSED_TIME | Elapsed real time in ns. | +-------------------------------------------------------------------------------- +0x44000002 | VMWARE_ELAPSED_APPARENT | Elapsed apparent time in ns. | +-------------------------------------------------------------------------------- +0x44000003 | VMWARE_CPU_LIMIT | Retrieves the upper limit of processor use in | + | MHz available to the virtual machine. | +-------------------------------------------------------------------------------- +0x44000004 | VMWARE_CPU_RESERVATION | Retrieves the minimum processing power | + | in MHz reserved for the virtual machine. | +-------------------------------------------------------------------------------- +0x44000005 | VMWARE_CPU_SHARES | Retrieves the number of CPU shares allocated | + | to the virtual machine. | +-------------------------------------------------------------------------------- +0x44000006 | VMWARE_CPU_STOLEN | Retrieves the number of milliseconds that th | + | e virtual machine was in a ready state (able to transition to a r | + | un state), but was not scheduled to run. | +-------------------------------------------------------------------------------- +0x44000007 | VMWARE_CPU_USED | Retrieves the number of milliseconds during wh | + | ich the virtual machine has used the CPU. This value includes the | + | time used by the guest operating system and the time used by vir | + | tualization code for tasks for this virtual machine. You can comb | + | ine this value with the elapsed time (VMWARE_ELAPSED) to estimate | + | the effective virtual machine CPU speed. This value is a subset | + | of elapsedMs. | +-------------------------------------------------------------------------------- +0x44000008 | VMWARE_ELAPSED | Retrieves the number of milliseconds that have | + | passed in the virtual machine since it last started running on th | + | e server. The count of elapsed time restarts each time the virtua | + | l machine is powered on, resumed, or migrated using VMotion. This | + | value counts milliseconds, regardless of whether the virtual mac | + | hine is using processing power during that time. You can combine | + | this value with the CPU time used by the virtual machine (VMWARE_ | + | CPU_USED) to estimate the effective virtual machine xCPU speed. c | + | puUsedMS is a subset of this value. | +-------------------------------------------------------------------------------- +0x44000009 | VMWARE_MEM_ACTIVE | Retrieves the amount of memory the virtual m | + | achine is actively using in MB€”its estimated working set size. | +-------------------------------------------------------------------------------- +0x4400000a | VMWARE_MEM_BALLOONED | Retrieves the amount of memory that has b | + | een reclaimed from this virtual machine by the vSphere memory bal | + | loon driver (also referred to as the “vmmemctl†driver) in MB. | +-------------------------------------------------------------------------------- +0x4400000b | VMWARE_MEM_LIMIT | Retrieves the upper limit of memory that is a | + | vailable to the virtual machine in MB. | +-------------------------------------------------------------------------------- +0x4400000c | VMWARE_MEM_MAPPED | Retrieves the amount of memory that is alloc | + | ated to the virtual machine in MB. Memory that is ballooned, swap | + | ped, or has never been accessed is excluded. | +-------------------------------------------------------------------------------- +0x4400000d | VMWARE_MEM_OVERHEAD | Retrieves the amount of €œoverhead mem | + | ory associated with this virtual machine that is currently consum | + | ed on the host system in MB. Overhead memory is additional memory | + | that is reserved for data structures required by the virtualizat | + | ion layer. | +-------------------------------------------------------------------------------- +0x4400000e | VMWARE_MEM_RESERVATION | Retrieves the minimum amount of memory | + | that is reserved for the virtual machine in MB. | +-------------------------------------------------------------------------------- +0x4400000f | VMWARE_MEM_SHARED | Retrieves the amount of physical memory asso | + | ciated with this virtual machine that is copy €Âon €Âwrite (COW) | + | shared on the host in MB. | +-------------------------------------------------------------------------------- +0x44000010 | VMWARE_MEM_SHARES | Retrieves the number of memory shares alloca | + | ted to the virtual machine. | +-------------------------------------------------------------------------------- +0x44000011 | VMWARE_MEM_SWAPPED | Retrieves the amount of memory that has bee | + | n reclaimed from this virtual machine by transparently swapping g | + | uest memory to disk in MB. | +-------------------------------------------------------------------------------- +0x44000012 | VMWARE_MEM_TARGET_SIZE | Retrieves the size of the target memory | + | allocation for this virtual machine in MB. | +-------------------------------------------------------------------------------- +0x44000013 | VMWARE_MEM_USED | Retrieves the estimated amount of physical hos | + | t memory currently consumed for this virtual machine’s physical | + | memory. | +-------------------------------------------------------------------------------- +0x44000014 | VMWARE_HOST_CPU | Retrieves the speed of the ESX system’€™s phys | + | ical CPU in MHz. | + + + + +Timekeeping Counters: + + + The pseudo-performance counter feature uses a trap to catch a privileged machine instruction issued by software running in the virtual machine and therefore has more overhead than reading a performance counter or the TSC on physical hardware. The feature will only trap correctly if the configuration setting is applied as described in Installation. + + +The timekeeping counters behave as follows: + + +VMWARE_HOST_TSC - Provides access the the Time Stamp Counter on the host machine which counts ticks since reset. +VMWARE_ELAPSED_TIME - Provides access to the elapsed time in ns since reset as measured on the host machine. +VMWARE_ELAPSED_APPARENT - Apparent time is the time visible the Guest OS using virtualized timer devices. This timer may fall behind real time and catch up as needed. + + +Usage: + + + After installation of the VMware Component, you may use the papi_commmand_line interface, found in PAPI_ROOT/papi/src/utils to read out an instantaneous value from a certain counter from the above list. If you would like to read out a counter, type: ./papi_command_line COUNTER_SYMBOL_NAME. + + +e.g.: To read out the value of the VMWARE_MEM_USED counter + + +user@vm1:~/papi/src/utils$ ./papi_command_line VMWARE_MEM_USED +Successfully added: VMWARE_MEM_USED + + +VMWARE_MEM_USED : 13074 + + +---------------------------------- +Verification: Checks for valid event name. +This utility lets you add events from the command line interface to see if they work. +command_line.c PASSED + + +For further usage of PAPI and it’s API on how to incorporate these counters into a program of your own please see the PAPI Documentation2. + + +________________ + + +References: + + +[1] VMware: http://www.vmware.com/support/developer/guest-sdk. Last accessed November 28, 2011 + [2] PAPI : http://icl.cs.utk.edu/projects/papi/wiki/Main_Page. Last accessed November 28, 2011 diff -Nru papi-5.7.0+dfsg/src/configure.in papi-6.0.0~dfsg/src/configure.in --- papi-5.7.0+dfsg/src/configure.in 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/configure.in 2020-03-04 15:56:58.000000000 +0000 @@ -6,7 +6,7 @@ # cross compiling should work differently... AC_PREREQ(2.59) -AC_INIT(PAPI, 5.7.0.0, ptools-perfapi@icl.utk.edu) +AC_INIT(PAPI, 6.0.0.0, ptools-perfapi@icl.utk.edu) AC_CONFIG_SRCDIR([papi.c]) AC_CONFIG_HEADER([config.h]) @@ -141,6 +141,38 @@ AC_CHECK_FUNCS([gethrtime read_real_time time_base_to_time clock_gettime mach_absolute_time sched_getcpu]) # +# Check if the system provides time_* symbols without -lrt, and if not, +# check for -lrt existance. +# + +AC_MSG_CHECKING([for timer_create and timer_*ettime symbols in base system]) +AC_TRY_LINK([#include + #include ], + [timer_t timerid; timer_create(CLOCK_REALTIME, NULL, &timerid);], + [rtsymbols_in_base="yes"], [rtsymbols_in_base="no"]) +if test "${rtsymbols_in_base}" = "yes"; then + AC_MSG_RESULT([found]) + LRT="" +else + AC_MSG_RESULT([not found]) + AC_MSG_CHECKING([for timer_create and timer_*ettime symbols in -lrt]) + SAVED_LIBS=${LIBS} + LIBS="${LIBS} -lrt" + AC_TRY_LINK([#include + #include ], + [timer_t timerid; timer_create(CLOCK_REALTIME, NULL, &timerid);], + [has_lrt="yes"], [has_lrt="no"]) + LIBS=${SAVED_LIBS} + if test "${has_lrt}" = "yes" ; then + AC_MSG_RESULT([found]) + LRT="-lrt" + else + AC_MSG_ERROR([cannot find timer_create and timer_*ettime symbols neither in the base system libraries nor in -lrt]) + fi +fi +AC_SUBST(LRT) + +# # Check if the system provides dl* symbols without -ldl, and if not, # check for -ldl existance. # @@ -1262,7 +1294,8 @@ FTEST_TARGETS="all" LIBRARY=libpapi.a SHLIB='libpapi.so.AC_PACKAGE_VERSION' -VLIB='libpapi.so.$(PAPIVER)' +PAPISOVER='$(PAPIVER).$(PAPIREV)' +VLIB='libpapi.so.$(PAPISOVER)' OMPCFLGS=-fopenmp CC_R='$(CC) -pthread' CC_SHR='$(CC) -fPIC -DPIC -shared -Wl,-soname -Wl,$(VLIB) -Xlinker "-rpath" -Xlinker "$(LIBDIR)"' @@ -1717,6 +1750,14 @@ done tests="$tests comp_tests" +# check for SDE component to determine if we need -lrt in LDFLAGS +for comp in $components; do + if test "x$comp" = "xsde" ; then + LDFLAGS="$LDFLAGS $LRT" + LIBS="$LIBS $LRT" + fi +done + AC_MSG_RESULT($components) AC_MSG_CHECKING(for PAPI event CSV filename to use) @@ -1769,6 +1810,7 @@ AC_SUBST(FILENAME) AC_SUBST(LIBRARY) AC_SUBST(SHLIB) +AC_SUBST(PAPISOVER) AC_SUBST(VLIB) AC_SUBST(PAPICFLAGS) AC_SUBST(OPTFLAGS) diff -Nru papi-5.7.0+dfsg/src/counter_analysis_toolkit/branch.c papi-6.0.0~dfsg/src/counter_analysis_toolkit/branch.c --- papi-5.7.0+dfsg/src/counter_analysis_toolkit/branch.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/counter_analysis_toolkit/branch.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,450 @@ +#include +#include +#include +#include +#include +#include + +#include "papi.h" +#include "branch.h" + +volatile int iter_count, global_var1, global_var2; +volatile int result; +volatile unsigned int b, z1, z2, z3, z4; + +void branch_driver(char *papi_event_name, int init, char* outdir){ + int papi_eventset = PAPI_NULL; + int i, iter, sz, ret_val, max_iter = 16*1024; + long long int cnt; + double avg, round; + FILE* ofp_papi; + const char *sufx = ".branch"; + int l = strlen(outdir)+strlen(papi_event_name)+strlen(sufx); + + char *papiFileName = (char *)calloc( 1+l, sizeof(char) ); + if (l != (sprintf(papiFileName, "%s%s%s", outdir, papi_event_name, sufx))) { + goto error0; + } + if (NULL == (ofp_papi = fopen(papiFileName,"w"))) { + fprintf(stderr, "Unable to open file %s.\n", papiFileName); + goto error0; + } + + // Initialize undecidible values for the BRNG macro. + z1 = init*7; + z2 = (init+4)/(init+1); + z3 = (unsigned int)getpid(); + z4 = (z3+z2)/z1; + + ret_val = PAPI_create_eventset( &papi_eventset ); + if (ret_val != PAPI_OK){ + goto error1; + } + + ret_val = PAPI_add_named_event( papi_eventset, papi_event_name ); + if (ret_val != PAPI_OK){ + goto error1; + } + + BRANCH_BENCH(1); + BRANCH_BENCH(2); + BRANCH_BENCH(3); + BRANCH_BENCH(4); + BRANCH_BENCH(4a); + BRANCH_BENCH(4b); + BRANCH_BENCH(5); + BRANCH_BENCH(5a); + BRANCH_BENCH(5b); + BRANCH_BENCH(6); + BRANCH_BENCH(7); + + if( result == 143526 ){ + printf("Random side effect\n"); + } + + ret_val = PAPI_cleanup_eventset( papi_eventset ); + if (ret_val != PAPI_OK ){ + goto error1; + } + ret_val = PAPI_destroy_eventset( &papi_eventset ); + if (ret_val != PAPI_OK ){ + goto error1; + } + +error1: + fclose(ofp_papi); +error0: + free(papiFileName); + return; +} + +long long int branch_char_b1(int size, int event_set){ + int retval; + long long int value; + + if ( (retval=PAPI_start(event_set)) != PAPI_OK){ + return -1; + } + + /* + 1. Conditional EXECUTED = 2 + 1. Conditional RETIRED = 2 + 2. Conditional TAKEN = 1.5 + 4. Direct JUMP = 0 + 3. Branch MISPREDICT = 0 + 5. All Branches = 2 + */ + + iter_count = 1; + global_var2 = 1; + do{ + if ( iter_count < (size/2) ){ + global_var2 += 2; + } + BRNG(); + iter_count++; + }while(iter_count global_var2 ){ + global_var1+=2; + } + BRNG(); + iter_count++; + }while(iter_count> 13;\ + z1 = ((z1 & 4294967294U) << 18) ^ b;\ + b = ((z2 << 2) ^ z2) >> 27;\ + z2 = ((z2 & 4294967288U) << 2) ^ b;\ + b = ((z3 << 13) ^ z3) >> 21;\ + z3 = ((z3 & 4294967280U) << 7) ^ b;\ + b = ((z4 << 3) ^ z4) >> 12;\ + z4 = ((z4 & 4294967168U) << 13) ^ b;\ + z1++;\ + result = z1 ^ z2 ^ z3 ^ z4;\ +} + +#define BUSY_WORK() {BRNG(); BRNG(); BRNG(); BRNG();} + +extern volatile int result; +extern volatile unsigned int b, z1, z2, z3, z4; + +void branch_driver(char *papi_event_name, int init, char* outdir); +long long int branch_char_b1(int size, int papi_eventset); +long long int branch_char_b2(int size, int papi_eventset); +long long int branch_char_b3(int size, int papi_eventset); +long long int branch_char_b4(int size, int papi_eventset); +long long int branch_char_b4a(int size, int papi_eventset); +long long int branch_char_b4b(int size, int papi_eventset); +long long int branch_char_b5(int size, int papi_eventset); +long long int branch_char_b5a(int size, int papi_eventset); +long long int branch_char_b5b(int size, int papi_eventset); +long long int branch_char_b6(int size, int papi_eventset); +long long int branch_char_b7(int size, int papi_eventset); + +#endif diff -Nru papi-5.7.0+dfsg/src/counter_analysis_toolkit/caches.h papi-6.0.0~dfsg/src/counter_analysis_toolkit/caches.h --- papi-5.7.0+dfsg/src/counter_analysis_toolkit/caches.h 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/counter_analysis_toolkit/caches.h 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,61 @@ +#ifndef _CACHES_ +#define _CACHES_ + +#include +#include +#include +#include +#include +#include +// Header files for uintptr_t +#if defined (__SVR4) && defined (__sun) +# include +#else +# include +#endif +#include + +// Header files for setting the affinity +#if defined(__linux__) +# define __USE_GNU 1 +# include +#elif defined (__SVR4) && defined (__sun) +//#elif defined(__sparc) +# include +# include +# include +#endif + +#include + +#define SIZE (512*1024) + +#define L_SIZE 0 +#define C_SIZE 1 +#define ASSOC 2 + +//#define DEBUG + +typedef struct run_output_s{ + double dt; + double counter; + int status; +}run_output_t; + +static inline double getticks(void){ + double ret; + struct timeval tv; + + gettimeofday(&tv, NULL); + ret = 1000*1000*(double)tv.tv_sec + (double)tv.tv_usec; + return ret; +} + +static inline double elapsed(double t1, double t0){ + return (double)t1 - (double)t0; +} + +extern int compar_lf(const void *a, const void *b); +extern int compar_lld(const void *a, const void *b); + +#endif diff -Nru papi-5.7.0+dfsg/src/counter_analysis_toolkit/compar.c papi-6.0.0~dfsg/src/counter_analysis_toolkit/compar.c --- papi-5.7.0+dfsg/src/counter_analysis_toolkit/compar.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/counter_analysis_toolkit/compar.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,15 @@ +int compar_lf(const void *a, const void *b){ + const double *da = (const double *)a; + const double *db = (const double *)b; + if( *da < *db) return -1; + if( *da > *db) return 1; + return 0; +} + +int compar_lld(const void *a, const void *b){ + const long long int *da = (const long long int *)a; + const long long int *db = (const long long int *)b; + if( *da < *db) return -1; + if( *da > *db) return 1; + return 0; +} diff -Nru papi-5.7.0+dfsg/src/counter_analysis_toolkit/dcache.c papi-6.0.0~dfsg/src/counter_analysis_toolkit/dcache.c --- papi-5.7.0+dfsg/src/counter_analysis_toolkit/dcache.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/counter_analysis_toolkit/dcache.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,313 @@ +#include "papi.h" +#include "caches.h" +#include "prepareArray.h" +#include "timing_kernels.h" +#include "dcache.h" + +typedef struct { + int *values; + double **rslts; + double **counter; + char *event_name; + int latency_only; + int mode; +} data_t; + +extern int _papi_eventset; + +int global_max_iter, global_line_size_in_bytes, global_pattern; +float global_pages_per_block; +int line_size; +int guessCount, min_size, max_size; + +void d_cache_driver(char* papi_event_name, int max_iter, char* outdir, int latency_only, int mode, int show_progress) +{ + int pattern = 3; + int ls = 64; + int test_cnt = 0; + float ppb = 16; + FILE *ofp_papi; + char *sufx, *papiFileName; + + // Open file (pass handle to d_cache_test()). + if(CACHE_READ_WRITE == mode){ + sufx = strdup(".data.writes"); + }else{ + sufx = strdup(".data.reads"); + } + + int l = strlen(outdir)+strlen(papi_event_name)+strlen(sufx); + papiFileName = (char *)calloc( 1+l, sizeof(char) ); + if (!papiFileName) { + fprintf(stderr, "Unable to allocate memory. Skipping event %s.\n", papi_event_name); + goto error0; + } + if (l != (sprintf(papiFileName, "%s%s%s", outdir, papi_event_name, sufx))) { + fprintf(stderr, "sprintf error. Skipping event %s.\n", papi_event_name); + goto error1; + } + if (NULL == (ofp_papi = fopen(papiFileName,"w"))) { + fprintf(stderr, "Unable to open file %s. Skipping event %s.\n", papiFileName, papi_event_name); + goto error1; + } + + // Go through each parameter variant. + for(pattern = 3; pattern <= 4; ++pattern) + { + for(ls = 64; ls <= 128; ls *= 2) + { + // PPB variation only makes sense if the pattern is not sequential. + if(pattern != 4) + { + for(ppb = 64; ppb >= 16; ppb -= 48) + { + if( show_progress ) + { + printf("%3d%%\b\b\b\b",(100*test_cnt++)/6); + fflush(stdout); + } + d_cache_test(pattern, max_iter, ls, ppb, papi_event_name, latency_only, mode, ofp_papi); + } + } + else + { + if( show_progress ) + { + printf("%3d%%\b\b\b\b",(100*test_cnt++)/6); + fflush(stdout); + } + d_cache_test(pattern, max_iter, ls, ppb, papi_event_name, latency_only, mode, ofp_papi); + } + } + } + if( show_progress ) + { + size_t i; + printf("100%%"); + for(i=0; ivalues; + rslts = data->rslts; + counter = data->counter; + latency_only = data->latency_only; + mode = data->mode; + + if( !latency_only){ + _papi_eventset = PAPI_NULL; + if( PAPI_thread_init(pthread_self) != PAPI_OK ){ + fprintf(stderr,"PAPI was NOT initialized correctly.\n"); + pthread_exit((void *)error_flag); + } + + /* Set the event */ + ret_val = PAPI_create_eventset( &_papi_eventset ); + if (ret_val != PAPI_OK ){ + pthread_exit((void *)error_flag); + } + + ret_val = PAPI_event_name_to_code( data->event_name, &native ); + if (ret_val != PAPI_OK ){ + pthread_exit((void *)error_flag); + } + + ret_val = PAPI_add_event( _papi_eventset, native ); + if (ret_val != PAPI_OK ){ + pthread_exit((void *)error_flag); + } + /* Done setting the event. */ + } + + for(i=0; i + +int varyBufferSizes(int *values, double *rslts, double *counter, int line_size_in_bytes, float pages_per_block, int latency_only, int mode); +void *thread_main(void *arg); +void d_cache_driver(char* papi_event_name, int max_iter, char* outdir, int latency_only, int mode, int show_progress); +void d_cache_test(int pattern, int max_iter, int line_size_in_bytes, float pages_per_block, char* papi_event_name, int latency_only, int mode, FILE* ofp); + +#endif diff -Nru papi-5.7.0+dfsg/src/counter_analysis_toolkit/driver.h papi-6.0.0~dfsg/src/counter_analysis_toolkit/driver.h --- papi-5.7.0+dfsg/src/counter_analysis_toolkit/driver.h 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/counter_analysis_toolkit/driver.h 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,26 @@ +#include "eventstock.h" +#include "dcache.h" +#include "branch.h" +#include "icache.h" +#include "flops.h" + +#define USE_ALL_EVENTS 0x0 +#define READ_FROM_FILE 0x1 + +#define BENCH_FLOPS 0x01 +#define BENCH_BRANCH 0x02 +#define BENCH_DCACHE_READ 0x04 +#define BENCH_DCACHE_WRITE 0x08 +#define BENCH_ICACHE_READ 0x10 + +int parseArgs(int argc, char **argv, int *subsetsize, int *mode, int *numit, char **inputfile, char **outputdir, int *bench_type, int *show_progress); +int setup_evts(char* inputfile, char*** basenames, int** cards); +int check_cards(int mode, int** indexmemo, char** basenames, int* cards, int ct, int nevts, int pk, evstock* data); +void combine_qualifiers(int n, int pk, int ct, char** list, char* name, char** allevts, int* track, int flag, int* bitmap); +void trav_evts(evstock* stock, int pk, int* cards, int nevts, int selexnsize, int mode, char** allevts, int* track, int* indexmemo, char** basenames); +int perm(int n, int k); +int comb(int n, int k); +void get_dcache_latencies(int max_iter, char *outputdir); +void testbench(char** allevts, int cmbtotal, int max_iter, int init, char* outputdir, int bench_type, int show_progress); +void print_usage(); + diff -Nru papi-5.7.0+dfsg/src/counter_analysis_toolkit/event_list.txt papi-6.0.0~dfsg/src/counter_analysis_toolkit/event_list.txt --- papi-5.7.0+dfsg/src/counter_analysis_toolkit/event_list.txt 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/counter_analysis_toolkit/event_list.txt 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,4 @@ +L2_RQSTS 1 +ICACHE:MISSES 0 +ICACHE:HIT 0 +OFFCORE_RESPONSE_0:DMND_DATA_RD:L3_HIT:SNP_ANY 0 diff -Nru papi-5.7.0+dfsg/src/counter_analysis_toolkit/eventstock.c papi-6.0.0~dfsg/src/counter_analysis_toolkit/eventstock.c --- papi-5.7.0+dfsg/src/counter_analysis_toolkit/eventstock.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/counter_analysis_toolkit/eventstock.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,222 @@ +#include +#include +#include +#include + +#include "papi.h" +#include "eventstock.h" + +#if !defined(_PAPI_CPU_COMPONENT_NAME) +#define _PAPI_CPU_COMPONENT_NAME "perf_event" +#endif + +int build_stock(evstock* stock) +{ + int ret; + PAPI_event_info_t info; + int cid; + int ncomps = PAPI_num_components(); + int event_counter = 0; + int subctr = 0; + int tmp_event_count; + int event_qual_i, event_i; + + if (!stock) return 1; + + event_i = 0 | PAPI_NATIVE_MASK; + + // Add the names to the stock. + event_counter = 0; + for(cid = 0; cid < ncomps; ++cid) + { + const PAPI_component_info_t *cmp_info = PAPI_get_component_info(cid); + if( strcmp(cmp_info->name, _PAPI_CPU_COMPONENT_NAME) ) + continue; + + tmp_event_count = cmp_info->num_native_events; + + // Set the data stock's sizes all to zero. + if (NULL == (stock->evtsizes = (int*)calloc( (tmp_event_count),sizeof(int) ))) { + fprintf(stderr, "Failed allocation of stock->evtsizes.\n"); + goto gracious_error; + } + + if (NULL == (stock->base_evts = (char**)malloc( (tmp_event_count)*sizeof(char*) ))) { + fprintf(stderr, "Failed allocation of stock->base_evts.\n"); + goto gracious_error; + } + + if (NULL == (stock->evts = (char***)malloc((tmp_event_count)*sizeof(char**)))) { + fprintf(stderr, "Failed allocation of stock->evts.\n"); + goto gracious_error; + } + + if (NULL == (stock->maxqualsize = (size_t *)calloc( tmp_event_count, sizeof(size_t) ))) { + fprintf(stderr, "Failed allocation of stock->maxqualsize.\n"); + goto gracious_error; + } + + break; + } + + if( 0 == tmp_event_count ){ + fprintf(stderr,"ERROR: CPU component (%s) not found. Exiting.",_PAPI_CPU_COMPONENT_NAME); + goto gracious_error; + } + + // At this point "cid" contains the id of the perf_event (CPU) component. + + ret=PAPI_enum_cmp_event(&event_i,PAPI_ENUM_FIRST,cid); + if(ret!=PAPI_OK){ + fprintf(stderr,"ERROR: CPU component does not contain any events. Exiting"); + goto gracious_error; + } + + do{ + int i, max_qual_count = 32; + size_t max_qual_len, tmp_qual_len; + memset(&info,0,sizeof(info)); + event_qual_i = event_i; + + // Resize the arrays if needed. + if( event_counter >= tmp_event_count ){ + tmp_event_count *= 2; + stock->evts = (char ***)realloc( stock->evts, tmp_event_count*sizeof(char **) ); + stock->evtsizes = (int *)realloc( stock->evtsizes, tmp_event_count*sizeof(int) ); + stock->base_evts = (char **)realloc( stock->base_evts, tmp_event_count*sizeof(char *) ); + stock->maxqualsize = (size_t *)realloc( stock->maxqualsize, tmp_event_count*sizeof(size_t) ); + } + + if (NULL == (stock->evts[event_counter] = (char**)malloc( max_qual_count*sizeof(char*) )) ) { + fprintf(stderr, "Failed allocation of stock->evts[i].\n"); + goto gracious_error; + } + + max_qual_len = 0; + subctr = 0; + i = 0; + + do + { + char *col_pos; + ret=PAPI_get_event_info(event_qual_i,&info); + if(ret != PAPI_OK) + continue; + + if( 0 == i ){ + // The first iteration of the inner do loop will give us + // the base event, without qualifiers. + stock->base_evts[event_counter] = strdup(info.symbol); + i++; + continue; + } + + // TODO: For the CPU component, we skip qualifiers that + // contain the string "=". This assumption should be + // removed when working with other components. + if( NULL != strstr(info.symbol, "=") ) + continue; + + col_pos = rindex(info.symbol, ':'); + if ( NULL == col_pos ){ + continue; + } + + // Resize the array of qualifiers as needed. + if( subctr >= max_qual_count ){ + max_qual_count *= 2; + stock->evts[event_counter] = (char **)realloc( stock->evts[event_counter], max_qual_count*sizeof(char *) ); + } + + // Copy the qualifier name into the array. + stock->evts[event_counter][subctr] = strdup(col_pos+1); + tmp_qual_len = strlen( stock->evts[event_counter][subctr] ) + 1; + if( tmp_qual_len > max_qual_len ) + max_qual_len = tmp_qual_len; + subctr++; + + } while(PAPI_enum_cmp_event(&event_qual_i,PAPI_NTV_ENUM_UMASKS,cid)==PAPI_OK); + stock->evtsizes[event_counter] = subctr; + stock->maxqualsize[event_counter] = max_qual_len; + event_counter++; + } while( PAPI_enum_cmp_event(&event_i,PAPI_ENUM_EVENTS,cid)==PAPI_OK ); + + stock->size = event_counter; + return 0; + +gracious_error: + // Frees only the successfully allocated arrays + remove_stock(stock); + return 1; +} + +void print_stock(evstock* stock) +{ + int i, j; + for(i = 0; i < stock->size; ++i) + { + fprintf(stdout, "BASE EVENT <%s>\n", stock->base_evts[i]); + for(j = 0; j < stock->evtsizes[i]; ++j) + { + fprintf(stdout, "%s\n", stock->evts[i][j]); + } + } + + return; +} + +int num_evts(evstock* stock) +{ + return stock->size; +} + +int num_quals(evstock* stock, int base_evt) +{ + return stock->evtsizes[base_evt]; +} + +size_t max_qual_size(evstock* stock, int base_evt) +{ + return stock->maxqualsize[base_evt]; +} + +char* evt_qual(evstock* stock, int base_evt, int tag) +{ + return stock->evts[base_evt][tag]; +} + +char* evt_name(evstock* stock, int index) +{ + return stock->base_evts[index]; +} + +void remove_stock(evstock* stock) +{ + if (!stock) return; + + int i, j; + for(i = 0; i < stock->size; ++i) + { + if (!stock->evtsizes) + for(j = 0; j < stock->evtsizes[i]; ++j) + { + if (stock->evts[i][j]) + free(stock->evts[i][j]); + } + if (stock->evts[i]) + free(stock->evts[i]); + if (stock->base_evts[i]) + free(stock->base_evts[i]); + } + if (stock->evts) + free(stock->evts); + if (stock->base_evts) + free(stock->base_evts); + if (stock->evtsizes) + free(stock->evtsizes); + if (stock->maxqualsize) + free(stock->maxqualsize); + free(stock); + + return; +} diff -Nru papi-5.7.0+dfsg/src/counter_analysis_toolkit/eventstock.h papi-6.0.0~dfsg/src/counter_analysis_toolkit/eventstock.h --- papi-5.7.0+dfsg/src/counter_analysis_toolkit/eventstock.h 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/counter_analysis_toolkit/eventstock.h 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,22 @@ +#ifndef _EVENT_STOCK_ +#define _EVENT_STOCK_ + +typedef struct +{ + int size; + int* evtsizes; + size_t* maxqualsize; + char** base_evts; + char*** evts; +} evstock; + +int build_stock(evstock* stock); +void print_stock(evstock* stock); +int num_evts(evstock* stock); +int num_quals(evstock* stock, int base_evt); +size_t max_qual_size(evstock* stock, int base_evt); +char* evt_qual(evstock* stock, int base_evt, int tag); +char* evt_name(evstock* stock, int index); +void remove_stock(evstock* stock); + +#endif diff -Nru papi-5.7.0+dfsg/src/counter_analysis_toolkit/flops_aux.c papi-6.0.0~dfsg/src/counter_analysis_toolkit/flops_aux.c --- papi-5.7.0+dfsg/src/counter_analysis_toolkit/flops_aux.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/counter_analysis_toolkit/flops_aux.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,9 @@ +#include "flops_aux.h" + +void dummy( void *array ) +{ +/* Confuse the compiler so as not to optimize + away the flops in the calling routine */ +/* Cast the array as a void to eliminate unused argument warning */ + ( void ) array; +} diff -Nru papi-5.7.0+dfsg/src/counter_analysis_toolkit/flops_aux.h papi-6.0.0~dfsg/src/counter_analysis_toolkit/flops_aux.h --- papi-5.7.0+dfsg/src/counter_analysis_toolkit/flops_aux.h 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/counter_analysis_toolkit/flops_aux.h 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1 @@ +void dummy( void *array ); diff -Nru papi-5.7.0+dfsg/src/counter_analysis_toolkit/flops.c papi-6.0.0~dfsg/src/counter_analysis_toolkit/flops.c --- papi-5.7.0+dfsg/src/counter_analysis_toolkit/flops.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/counter_analysis_toolkit/flops.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,338 @@ +#include +#include +#include +#include +#include "flops_aux.h" +#include "flops.h" + +#define INDEX1 100 +#define INDEX5 500 + +#define MAX_WARN 10 +#define MAX_ERROR 80 +#define MAX_DIFF 14 + +#if defined(mips) +#define FMA 1 +#elif (defined(sparc) && defined(sun)) +#define FMA 1 +#else +#define FMA 0 +#endif + +static void resultline( int i, int j, int EventSet, FILE *fp) +{ + long long flpins = 0; + long long papi, theory; + int retval; + + if ( (retval=PAPI_stop(EventSet, &flpins)) != PAPI_OK){ + return; + } + + i++; + theory = 2; + while ( j-- ) + theory *= i; + papi = flpins << FMA; + + fprintf(fp, "%lld\n", papi); +} + +static float inner_single( int n, float *x, float *y ) +{ + float aa = 0.0; + int i; + + for ( i = 0; i <= n; i++ ) + aa = aa + x[i] * y[i]; + return ( aa ); +} + +static double inner_double( int n, double *x, double *y ) +{ + double aa = 0.0; + int i; + + for ( i = 0; i <= n; i++ ) + aa = aa + x[i] * y[i]; + return ( aa ); +} + +static void vector_single( int n, float *a, float *x, float *y ) +{ + int i, j; + + for ( i = 0; i <= n; i++ ) + for ( j = 0; j <= n; j++ ) + y[i] = y[i] + a[i * n + j] * x[i]; +} + +static void vector_double( int n, double *a, double *x, double *y ) +{ + int i, j; + + for ( i = 0; i <= n; i++ ) + for ( j = 0; j <= n; j++ ) + y[i] = y[i] + a[i * n + j] * x[i]; +} + +static void matrix_single( int n, float *c, float *a, float *b ) +{ + int i, j, k; + + for ( i = 0; i <= n; i++ ) + for ( j = 0; j <= n; j++ ) + for ( k = 0; k <= n; k++ ) + c[i * n + j] = c[i * n + j] + a[i * n + k] * b[k * n + j]; +} + +static void matrix_double( int n, double *c, double *a, double *b ) +{ + int i, j, k; + + for ( i = 0; i <= n; i++ ) + for ( j = 0; j <= n; j++ ) + for ( k = 0; k <= n; k++ ) + c[i * n + j] = c[i * n + j] + a[i * n + k] * b[k * n + j]; +} + +void exec_flops(int double_precision, int EventSet, int retval, FILE *fp) +{ + extern void dummy( void * ); + + float aa, *a=NULL, *b=NULL, *c=NULL, *x=NULL, *y=NULL; + double aad, *ad=NULL, *bd=NULL, *cd=NULL, *xd=NULL, *yd=NULL; + int i, j, n; + + /* Inner Product test */ + /* Allocate the linear arrays */ + if (double_precision) { + xd = malloc( INDEX5 * sizeof(double) ); + yd = malloc( INDEX5 * sizeof(double) ); + } + else { + x = malloc( INDEX5 * sizeof(float) ); + y = malloc( INDEX5 * sizeof(float) ); + } + + if ( retval == PAPI_OK ) { + + /* step through the different array sizes */ + for ( n = 0; n < INDEX5; n++ ) { + if ( n < INDEX1 || ( ( n + 1 ) % 50 ) == 0 ) { + + /* Initialize the needed arrays at this size */ + if ( double_precision ) { + for ( i = 0; i <= n; i++ ) { + xd[i] = ( double ) rand( ) * ( double ) 1.1; + yd[i] = ( double ) rand( ) * ( double ) 1.1; + } + } else { + for ( i = 0; i <= n; i++ ) { + x[i] = ( float ) rand( ) * ( float ) 1.1; + y[i] = ( float ) rand( ) * ( float ) 1.1; + } + } + + /* reset PAPI flops count */ + if ( (retval = PAPI_start( EventSet )) != PAPI_OK ) { + return; + } + + /* do the multiplication */ + if ( double_precision ) { + aad = inner_double( n, xd, yd ); + dummy( ( void * ) &aad ); + } else { + aa = inner_single( n, x, y ); + dummy( ( void * ) &aa ); + } + resultline( n, 1, EventSet, fp); + } + } + } + if (double_precision) { + free( xd ); + free( yd ); + } else { + free( x ); + free( y ); + } + + /* Matrix Vector test */ + /* Allocate the needed arrays */ + if (double_precision) { + ad = malloc( INDEX5 * INDEX5 * sizeof(double) ); + xd = malloc( INDEX5 * sizeof(double) ); + yd = malloc( INDEX5 * sizeof(double) ); + } else { + a = malloc( INDEX5 * INDEX5 * sizeof(float) ); + x = malloc( INDEX5 * sizeof(float) ); + y = malloc( INDEX5 * sizeof(float) ); + } + + if ( retval == PAPI_OK ) { + + /* step through the different array sizes */ + for ( n = 0; n < INDEX5; n++ ) { + if ( n < INDEX1 || ( ( n + 1 ) % 50 ) == 0 ) { + + /* Initialize the needed arrays at this size */ + if ( double_precision ) { + for ( i = 0; i <= n; i++ ) { + yd[i] = 0.0; + xd[i] = ( double ) rand( ) * ( double ) 1.1; + for ( j = 0; j <= n; j++ ) + ad[i * n + j] = + ( double ) rand( ) * ( double ) 1.1; + } + } else { + for ( i = 0; i <= n; i++ ) { + y[i] = 0.0; + x[i] = ( float ) rand( ) * ( float ) 1.1; + for ( j = 0; j <= n; j++ ) + a[i * n + j] = + ( float ) rand( ) * ( float ) 1.1; + } + } + + /* reset PAPI flops count */ + if ( (retval = PAPI_start( EventSet )) != PAPI_OK ) { + return; + } + + /* compute the resultant vector */ + if ( double_precision ) { + vector_double( n, ad, xd, yd ); + dummy( ( void * ) yd ); + } else { + vector_single( n, a, x, y ); + dummy( ( void * ) y ); + } + resultline( n, 2, EventSet, fp); + } + } + } + if (double_precision) { + free( ad ); + free( xd ); + free( yd ); + } else { + free( a ); + free( x ); + free( y ); + } + + /* Matrix Multiply test */ + /* Allocate the needed arrays */ + if (double_precision) { + ad = malloc( INDEX5 * INDEX5 * sizeof(double) ); + bd = malloc( INDEX5 * INDEX5 * sizeof(double) ); + cd = malloc( INDEX5 * INDEX5 * sizeof(double) ); + } else { + a = malloc( INDEX5 * INDEX5 * sizeof(float) ); + b = malloc( INDEX5 * INDEX5 * sizeof(float) ); + c = malloc( INDEX5 * INDEX5 * sizeof(float) ); + } + + + if ( retval == PAPI_OK ) { + /* step through the different array sizes */ + for ( n = 0; n < INDEX5; n++ ) { + if ( n < INDEX1 || ( ( n + 1 ) % 50 ) == 0 ) { + + /* Initialize the needed arrays at this size */ + if ( double_precision ) { + for ( i = 0; i <= n * n + n; i++ ) { + cd[i] = 0.0; + ad[i] = ( double ) rand( ) * ( double ) 1.1; + bd[i] = ( double ) rand( ) * ( double ) 1.1; + } + } else { + for ( i = 0; i <= n * n + n; i++ ) { + c[i] = 0.0; + a[i] = ( float ) rand( ) * ( float ) 1.1; + b[i] = ( float ) rand( ) * ( float ) 1.1; + } + } + + /* reset PAPI flops count */ + if ( (retval = PAPI_start( EventSet )) != PAPI_OK ) { + return; + } + + /* compute the resultant matrix */ + if ( double_precision ) { + matrix_double( n, cd, ad, bd ); + dummy( ( void * ) cd ); + } else { + matrix_single( n, c, a, b ); + dummy( ( void * ) c ); + } + resultline( n, 3, EventSet, fp); + } + } + } + if (double_precision) { + free( ad ); + free( bd ); + free( cd ); + } else { + free( a ); + free( b ); + free( c ); + } + +} + +void flops_driver(char* papi_event_name, char* outdir) +{ + int retval = PAPI_OK; + int EventSet = PAPI_NULL; + FILE* ofp_papi; + const char *sufx = ".flops"; + char *papiFileName; + + int l = strlen(outdir)+strlen(papi_event_name)+strlen(sufx); + if (NULL == (papiFileName = (char *)calloc( 1+l, sizeof(char)))) { + return; + } + if (l != (sprintf(papiFileName, "%s%s%s", outdir, papi_event_name, sufx))) { + goto error0; + } + if (NULL == (ofp_papi = fopen(papiFileName,"w"))) { + fprintf(stderr, "Failed to open file %s.\n", papiFileName); + goto error0; + } + + retval = PAPI_create_eventset( &EventSet ); + if (retval != PAPI_OK ){ + goto error1; + } + + retval = PAPI_add_named_event( EventSet, papi_event_name ); + if (retval != PAPI_OK ){ + goto error1; + } + + retval = PAPI_OK; + + exec_flops(0, EventSet, retval, ofp_papi); + exec_flops(1, EventSet, retval, ofp_papi); + + retval = PAPI_cleanup_eventset( EventSet ); + if (retval != PAPI_OK ){ + goto error1; + } + retval = PAPI_destroy_eventset( &EventSet ); + if (retval != PAPI_OK ){ + goto error1; + } + +error1: + fclose(ofp_papi); +error0: + free(papiFileName); + return; +} diff -Nru papi-5.7.0+dfsg/src/counter_analysis_toolkit/flops.h papi-6.0.0~dfsg/src/counter_analysis_toolkit/flops.h --- papi-5.7.0+dfsg/src/counter_analysis_toolkit/flops.h 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/counter_analysis_toolkit/flops.h 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,7 @@ +#ifndef _FLOPS_ +#define _FLOPS_ + +void exec_flops(int double_precision, int EventSet, int retval, FILE *fp); +void flops_driver(char* papi_event_str, char* outdir); + +#endif diff -Nru papi-5.7.0+dfsg/src/counter_analysis_toolkit/gen_seq_dlopen.sh papi-6.0.0~dfsg/src/counter_analysis_toolkit/gen_seq_dlopen.sh --- papi-5.7.0+dfsg/src/counter_analysis_toolkit/gen_seq_dlopen.sh 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/counter_analysis_toolkit/gen_seq_dlopen.sh 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,352 @@ +#!/bin/bash + +DRV_F=icache_seq.c +KRN_F=icache_seq_kernel.c +HEAD_F=icache_seq.h + +TRUE_IF=1 +FALSE_IF=0 + + +################################################################################ +create_common_prefix(){ + cat < +#include +#include +#include +#include +#include + +#include "papi.h" +#include "icache_seq.h" + +EOF +} + + +################################################################################ +create_kernel(){ + + basic_block_copies=$1; + block_type=$2; + + + for((i=0; i<$((${basic_block_copies}-1)); i++)); do + deref[$i]=$(($i+1)) + done + + available=$((${basic_block_copies}-1)); + indx=0; + + for((i=1; i<${basic_block_copies}; i++)); do + rnd=$((RANDOM % ${available})) + next=${deref[${rnd}]}; + # If the next jump is too close, try one more time. + if (( ${next} <= $((${indx}+2)) && ${next} > ${indx} )); then + rnd=$((RANDOM % ${available})) + next=${deref[${rnd}]}; + fi + permutation[${indx}]=$next; + indx=${next}; + deref[${rnd}]=${deref[$((${available}-1))]} # replace the element we used with the last one + ((available--)); # reduce the number of available elements (to ditch the last one). + done + + permutation[${indx}]=-1; + last_link_in_chain=${indx}; + + + if (( $block_type == $TRUE_IF )); then + echo "long long seq_kernel_TRUE_IF_${basic_block_copies}(int epilogue){" + else + echo "long long seq_kernel_FALSE_IF_${basic_block_copies}(int epilogue){" + fi + cat < 3 ){" + echo " RNG();" + echo " }" + echo " result = z1 ^ z2 ^ z3 ^ z4;" + echo " is_zero *= result;" + fi + done + + cat <= 10000 )); then + dl_reps=$(( ${basic_block_copies}/5000 )) + tmp=$(( ${basic_block_copies}/${dl_reps} )) + basic_block_copies=$tmp + else + create_kernel $basic_block_copies $j $TRUE_IF >> ${KRN_F} + create_kernel $basic_block_copies $j $FALSE_IF >> ${KRN_F} + echo "" >> ${KRN_F} + echo "long long seq_kernel_TRUE_IF_${basic_block_copies}(int epilogue);" >> ${HEAD_F} + echo "long long seq_kernel_FALSE_IF_${basic_block_copies}(int epilogue);" >> ${HEAD_F} + fi + echo "int seq_jumps_${basic_block_copies}x${dl_reps}(int iter_count, int eventset, int epilogue, int branch_type, int run_type, FILE* ofp_papi);" >> ${HEAD_F} + + create_caller ${basic_block_copies} $dl_reps >> ${DRV_F} + echo "" >> ${DRV_F} + +} + + +################################################################################ +create_main(){ + + cat <= 10000 )); then + dl_reps=$(( ${basic_block_copies}/5000 )) + tmp=$(( ${basic_block_copies}/${dl_reps} )) + basic_block_copies=$tmp + fi + echo " if( show_progress ){" + echo " printf(\"%3d%%\b\b\b\b\",(100*exp_cnt)/(4*$#));" + echo " exp_cnt++;" + echo " fflush(stdout);" + echo " }" + echo " side_effect += seq_jumps_${basic_block_copies}x${dl_reps}(1, eventset, NO_COPY, TRUE_IF, COLD_RUN, NULL);" + echo " if(side_effect < init){" + echo " return;" + echo " }" + echo " side_effect += seq_jumps_${basic_block_copies}x${dl_reps}(150, eventset, ${copy_type}, TRUE_IF, NORMAL_RUN, ofp_papi);" + echo " if(side_effect < init){" + echo " return;" + echo " }" + echo "" + done + done + + for copy_type in "NO_COPY" "DO_COPY"; do + for ((prm=1; prm<=$#; prm++)); do + basic_block_copies=${!prm} + dl_reps=1; + if (( $basic_block_copies >= 10000 )); then + dl_reps=$(( ${basic_block_copies}/5000 )) + tmp=$(( ${basic_block_copies}/${dl_reps} )) + basic_block_copies=$tmp + fi + echo " if( show_progress ){" + echo " printf(\"%3d%%\b\b\b\b\",(100*exp_cnt)/(4*$#));" + echo " exp_cnt++;" + echo " fflush(stdout);" + echo " }" + echo " side_effect += seq_jumps_${basic_block_copies}x${dl_reps}(1, eventset, NO_COPY, FALSE_IF, COLD_RUN, NULL);" + echo " if(side_effect < init){" + echo " return;" + echo " }" + echo " side_effect += seq_jumps_${basic_block_copies}x${dl_reps}(150, eventset, ${copy_type}, FALSE_IF, NORMAL_RUN, ofp_papi);" + echo " if(side_effect < init){" + echo " return;" + echo " }" + echo "" + done + done + cat < ${HEAD_F} +echo "#include " >> ${HEAD_F} + +echo "" >> ${HEAD_F} +echo "float buff[BUF_ELEM_CNT];" >> ${HEAD_F} +echo "volatile int global_zero;" >> ${HEAD_F} +echo "" >> ${HEAD_F} + +create_common_prefix > ${DRV_F} +create_common_prefix > ${KRN_F} +for sz in 10 20 30 50 100 150 200 300 400 600 800 1200 1600 2400 3200 5000 10000 15000 20000 25000 35000 40000 50000 60000; do + create_functions ${sz} +done + +create_main 10 20 30 50 100 150 200 300 400 600 800 1200 1600 2400 3200 5000 10000 15000 20000 25000 35000 40000 50000 60000 >> ${DRV_F} + diff -Nru papi-5.7.0+dfsg/src/counter_analysis_toolkit/icache.c papi-6.0.0~dfsg/src/counter_analysis_toolkit/icache.c --- papi-5.7.0+dfsg/src/counter_analysis_toolkit/icache.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/counter_analysis_toolkit/icache.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,43 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "papi.h" +#include "icache.h" + +void i_cache_driver(char* papi_event_name, int init, char* outdir, int show_progress) +{ + // Open output file. + const char *sufx = ".instr"; + char *papiFileName; + FILE *ofp_papi; + + int l = strlen(outdir)+strlen(papi_event_name)+strlen(sufx); + if (NULL == (papiFileName = (char *)calloc( 1+l, sizeof(char) ))) { + fprintf(stderr, "Failed to allocate papiFileName.\n"); + return; + } + if (l != (sprintf(papiFileName, "%s%s%s", outdir, papi_event_name, sufx))) { + fprintf(stderr, "sprintf failed to copy into papiFileName.\n"); + free(papiFileName); + return; + } + if (NULL == (ofp_papi = fopen(papiFileName,"w"))) { + fprintf(stderr, "Failed to open file %s.\n", papiFileName); + free(papiFileName); + return; + } + + seq_driver(ofp_papi, papi_event_name, init, show_progress); + + // Close output file. + fclose(ofp_papi); + free(papiFileName); + + return; +} diff -Nru papi-5.7.0+dfsg/src/counter_analysis_toolkit/icache.h papi-6.0.0~dfsg/src/counter_analysis_toolkit/icache.h --- papi-5.7.0+dfsg/src/counter_analysis_toolkit/icache.h 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/counter_analysis_toolkit/icache.h 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,37 @@ +#ifndef _INSTR_CACHE_ +#define _INSTR_CACHE_ + +#include + +#define NO_COPY 0 +#define DO_COPY 1 + +#define FALSE_IF 0 +#define TRUE_IF 1 + +#define COLD_RUN 0 +#define NORMAL_RUN 1 + +#define BUF_ELEM_CNT 32*1024*1024 // Hopefully larger than the L3 cache. + +#define RNG() {\ + b = ((z1 << 6) ^ z1) >> 13;\ + z1 = ((z1 & 4294967294U) << 18) ^ b;\ + b = ((z2 << 2) ^ z2) >> 27;\ + z2 = ((z2 & 4294967288U) << 2) ^ b;\ + b = ((z3 << 13) ^ z3) >> 21;\ + z3 = ((z3 & 4294967280U) << 7) ^ b;\ + b = ((z4 << 3) ^ z4) >> 12;\ + z4 = ((z4 & 4294967168U) << 13) ^ b;\ + b = ((z1 << 6) ^ z4) >> 13;\ + z1 = ((z1 & 4294967294U) << 18) ^ b;\ + b = ((z2 << 2) ^ z1) >> 27;\ + b += z4;\ + z2 = ((z2 & 4294967288U) << 2) ^ b;\ + result = z1 ^ z2 ^ z3 ^ z4;\ +} + +void i_cache_driver(char* papi_event_name, int init, char* outdir, int show_progress); +void seq_driver(FILE* ofp_papi, char* papi_event_name, int init, int show_progress); + +#endif diff -Nru papi-5.7.0+dfsg/src/counter_analysis_toolkit/main.c papi-6.0.0~dfsg/src/counter_analysis_toolkit/main.c --- papi-5.7.0+dfsg/src/counter_analysis_toolkit/main.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/counter_analysis_toolkit/main.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,846 @@ +#include +#include +#include +#include +#include +#include + +#include "papi.h" +#include "driver.h" + +int main(int argc, char*argv[]) +{ + int cmbtotal = 0, ct = 0, track = 0, ret = 0; + int bench_type = 0; + int mode = 0, pk = 0, max_iter = 1, i = 0, nevts = 0, show_progress = 0, status; + int *cards = NULL, *indexmemo = NULL; + char *infile = NULL, *outdir = NULL; + char **allevts = NULL, **basenames = NULL; + evstock *data = NULL; + + // Initialize PAPI. + ret = PAPI_library_init(PAPI_VER_CURRENT); + if(ret != PAPI_VER_CURRENT){ + + fprintf(stderr,"PAPI shared library version error: %s Exiting...\n", PAPI_strerror(ret)); + return 0; + } + + // Parse the command-line arguments. + status = parseArgs(argc, argv, &pk, &mode, &max_iter, &infile, &outdir, &bench_type, &show_progress ); + if(0 != status) + { + free(outdir); + PAPI_shutdown(); + return 0; + } + + // Allocate space for the native events and qualifiers. + data = (evstock*)calloc(1,sizeof(evstock)); + if(NULL == data) + { + free(outdir); + fprintf(stderr, "Could not initialize event stock. Exiting...\n"); + PAPI_shutdown(); + return 0; + } + + // Read the list of base event names and maximum qualifier set cardinalities. + if( READ_FROM_FILE == mode) + { + ct = setup_evts(infile, &basenames, &cards); + if(ct == -1) + { + free(outdir); + remove_stock(data); + PAPI_shutdown(); + return 0; + } + } + + // Populate the event stock. + status = build_stock(data); + if(status) + { + free(outdir); + remove_stock(data); + if(READ_FROM_FILE == mode) + { + for(i = 0; i < ct; ++i) + { + free(basenames[i]); + } + free(basenames); + free(cards); + } + fprintf(stderr, "Could not populate event stock. Exiting...\n"); + PAPI_shutdown(); + return 0; + } + + // Get the number of events contained in the event stock. + nevts = num_evts(data); + + // Verify the validity of the cardinalities. + cmbtotal = check_cards(mode, &indexmemo, basenames, cards, ct, nevts, pk, data); + if(-1 == cmbtotal) + { + free(outdir); + remove_stock(data); + if(READ_FROM_FILE == mode) + { + for(i = 0; i < ct; ++i) + { + free(basenames[i]); + } + free(basenames); + free(cards); + } + PAPI_shutdown(); + return 0; + } + + // Allocate enough space for all of the event+qualifier combinations. + if (NULL == (allevts = (char**)malloc(cmbtotal*sizeof(char*)))) { + fprintf(stderr, "Failed to allocate memory.\n"); + PAPI_shutdown(); + return 0; + } + + // Create the qualifier combinations for each event. + trav_evts(data, pk, cards, nevts, ct, mode, allevts, &track, indexmemo, basenames); + + // Run the benchmark for each qualifier combination. + testbench(allevts, cmbtotal, max_iter, argc, outdir, bench_type, show_progress); + + // Free dynamically allocated memory. + free(outdir); + remove_stock(data); + if(READ_FROM_FILE == mode) + { + for(i = 0; i < ct; ++i) + { + free(basenames[i]); + } + free(basenames); + free(cards); + free(indexmemo); + } + for(i = 0; i < cmbtotal; ++i) + { + free(allevts[i]); + } + free(allevts); + + PAPI_shutdown(); + return 0; +} + +// Verify that valid qualifier counts are provided and count their combinations. +int check_cards(int mode, int** indexmemo, char** basenames, int* cards, int ct, int nevts, int pk, evstock* data) +{ + int i, j, minim, n, cmbtotal = 0; + char *name; + + // User provided a file of events. + if(READ_FROM_FILE == mode) + { + // Compute the total number of qualifier combinations and allocate memory to store them. + (*indexmemo) = (int*)malloc(ct*sizeof(int)); + + // Find the index in the main stock whose event corresponds to that in the file provided. + // This simplifies looking up event qualifiers later. + for(i = 0; i < ct; ++i) + { + if(NULL == basenames[i]) + { + (*indexmemo)[i] = -1; + cmbtotal -= 1; + continue; + } + + // j is the index of the event name provided by the user. + for(j = 0; j < nevts; ++j) + { + name = evt_name(data, j); + if(strcmp(basenames[i], name) == 0) + { + break; + } + } + + // If the event name provided by the user does not match any of the main event + // names in the architecture, then it either contains qualifiers or it does not + // exist. + if(cards[i] != 0 && j == nevts) + { + fprintf(stderr, "The provided event '%s' is either not in the architecture or contains qualifiers.\n" \ + "If the latter, use '0' in place of the provided '%d'.\n", basenames[i], cards[i]); + cards[i] = 0; + } + + // If an invalid (negative) qualifier count was given, use zero qualifiers. + if(cards[i] < 0) + { + fprintf(stderr, "The qualifier count (provided for event '%s') cannot be negative.\n", basenames[i]); + cards[i] = 0; + } + + (*indexmemo)[i] = j; + } + + // Count the total number of events to test. + for(i = 0; i < ct; ++i) + { + // If no qualifiers are used, then just count the event itself. + if(cards[i] <= 0) + { + cmbtotal += 1; + continue; + } + + // Get the number of qualifiers which belong to the main event. + if((*indexmemo)[i] != -1) + { + n = num_quals(data, (*indexmemo)[i]); + } + else + { + n = 0; + } + + // If the user specifies to use more qualifiers than are available + // for the main event, do not use any qualifiers. Otherwise, count + // the number of combinations of qualifiers for the main event. + minim = cards[i]; + if(cards[i] > n || cards[i] < 0) + { + minim = 0; + } + cmbtotal += comb(n, minim); + } + } + // User wants to inspect all events in the architecture. + else + { + for(i = 0; i < nevts; ++i) + { + // Get the number of qualifiers which belong to the main event. + n = num_quals(data, i); + + // If the user specifies to use more qualifiers than are available + // for the main event, do not use any qualifiers. Otherwise, count + // the number of combinations of qualifiers for the main event. + minim = pk; + if(pk > n || pk < 0) + { + minim = 0; + } + cmbtotal += comb(n, minim); + } + } + + return cmbtotal; +} + +// Read the contents of the file supplied by the user. +int setup_evts(char* inputfile, char*** basenames, int** evnt_cards) +{ + size_t linelen = 0; + int cnt = 0, status = 0; + char *line, *place; + FILE *input; + int evnt_count = 256; + + char **names = (char **)calloc(evnt_count, sizeof(char *)); + int *cards = (int *)calloc(evnt_count, sizeof(int)); + + // Read the base event name and cardinality columns. + input = fopen(inputfile, "r"); + for(cnt=0; 1; cnt++) + { + ssize_t ret_val = getline(&line, &linelen, input); + if( ret_val < 0 ) + break; + if( cnt >= evnt_count ) + { + evnt_count *= 2; + names = realloc(names, evnt_count*sizeof(char *)); + } + + place = strstr(line, " "); + if( NULL == place ) + { + fprintf(stderr,"problem with line: '%s'\n",line); + names[cnt] = NULL; + cards[cnt] = -1; + cnt--; + + free(line); + line = NULL; + linelen = 0; + continue; + } + + names[cnt] = NULL; + status = sscanf(line, "%ms %d", &(names[cnt]), &(cards[cnt]) ); + + // If this line was malformed, silently ignore it. + if(2 != status) + { + fprintf(stderr,"problem with line: '%s'\n",line); + names[cnt] = NULL; + cards[cnt] = -1; + cnt--; + } + + free(line); + line = NULL; + linelen = 0; + } + free(line); + fclose(input); + + *basenames = names; + *evnt_cards = cards; + + return cnt; +} + +// Recursively builds the list of all combinations of an event's qualifiers. +void combine_qualifiers(int n, int pk, int ct, char** list, char* name, char** allevts, int* track, int flag, int* bitmap) +{ + int original; + int counter; + int i; + + // Set flag in the array. + original = bitmap[ct]; + bitmap[ct] = flag; + + // Only make recursive calls if there are more items. + // Ensure proper cardinality. + counter = 0; + for(i = 0; i < n; ++i) + { + counter += bitmap[i]; + } + + // Cannot use more qualifiers than are available. + if(ct+1 < n) + { + // Make recursive calls both with and without a given qualifier. + // Recursion cannot exceed the number of qualifiers specified by + // the user. + if(counter < pk) + { + combine_qualifiers(n, pk, ct+1, list, name, allevts, track, 1, bitmap); + } + combine_qualifiers(n, pk, ct+1, list, name, allevts, track, 0, bitmap); + } + // Qualifier count matches that specified by the user. + else + { + if(counter == pk) + { + // Construct the qualifier combination string. + char* chunk; + size_t evtsize = strlen(name)+1; + for(i = 0; i < n; ++i) + { + if(bitmap[i] == 1) + { + // Add one to account for the colon in front of the qualifier. + evtsize += strlen(list[i])+1; + } + } + + chunk = (char*)malloc((evtsize+1)*sizeof(char)); + strcpy(chunk,name); + for(i = 0; i < n; ++i) + { + if(bitmap[i] == 1) + { + strcat(chunk,":"); + strcat(chunk,list[i]); + } + } + + // Add qualifier combination string to the list. + allevts[*track] = strdup(chunk); + *track += 1; + + free(chunk); + } + } + + // Undo effect of recursive call to combine other qualifiers. + bitmap[ct] = original; + + return; +} + +// Create the combinations of qualifiers for the events. +void trav_evts(evstock* stock, int pk, int* cards, int nevts, int selexnsize, int mode, char** allevts, int* track, int* indexmemo, char** basenames) +{ + int i, j, k, n = 0; + char** chosen = NULL; + char* name = NULL; + int* bitmap = NULL; + + // User provided a file of events. + if(READ_FROM_FILE == mode) + { + for(i = 0; i < selexnsize; ++i) + { + // Iterate through whole stock. If there are matches, proceed normally using the given cardinalities. + j = indexmemo[i]; + if( -1 == j ) + { + allevts[i] = NULL; + continue; + } + + // Get event's name and qualifier count. + if(j == nevts) + { + // User a provided specific qualifier combination. + name = basenames[i]; + } + else + { + name = evt_name(stock, j); + n = num_quals(stock, j); + } + + // Create a list to contain the qualifiers. + if(cards[i] > 0) + { + chosen = (char**)malloc(n*sizeof(char*)); + bitmap = (int*)calloc(n, sizeof(int)); + + // Store the qualifiers for the current event. + for(k = 0; k < n; ++k) + { + chosen[k] = strdup(stock->evts[j][k]); + } + } + + // Get combinations of all current event's qualifiers. + if (n!=0 && cards[i]>0) + { + combine_qualifiers(n, cards[i], 0, chosen, name, allevts, track, 0, bitmap); + combine_qualifiers(n, cards[i], 0, chosen, name, allevts, track, 1, bitmap); + } + else + { + allevts[*track] = strdup(name); + *track += 1; + } + + // Free the space back up. + if(cards[i] > 0) + { + for(k = 0; k < n; ++k) + { + free(chosen[k]); + } + free(chosen); + free(bitmap); + } + } + } + // User wants to inspect all events in the architecture. + else + { + for(i = 0; i < nevts; ++i) + { + // Get event's name and qualifier count. + n = num_quals(stock, i); + name = evt_name(stock, i); + + // Show progress to the user. + //fprintf(stderr, "CURRENT EVENT: %s (%d/%d)\n", name, (i+1), nevts); + + // Create a list to contain the qualifiers. + chosen = (char**)malloc(n*sizeof(char*)); + bitmap = (int*)calloc(n, sizeof(int)); + + // Store the qualifiers for the current event. + for(j = 0; j < n; ++j) + { + chosen[j] = strdup(stock->evts[i][j]); + } + + // Get combinations of all current event's qualifiers. + if (n!=0) + { + combine_qualifiers(n, pk, 0, chosen, name, allevts, track, 0, bitmap); + combine_qualifiers(n, pk, 0, chosen, name, allevts, track, 1, bitmap); + } + else + { + allevts[*track] = strdup(name); + *track += 1; + } + + // Free the space back up. + for(j = 0; j < n; ++j) + { + free(chosen[j]); + } + free(chosen); + free(bitmap); + } + } + + return; +} + +// Compute the permutations of k objects from a set of n objects. +int perm(int n, int k) +{ + int i; + int prod = 1; + int diff = n-k; + + for(i = n; i > diff; --i) + { + prod *= i; + } + + return prod; +} + +// Compute the combinations of k objects from a set of n objects. +int comb(int n, int k) +{ + return perm(n, k)/perm(k, k); +} + +// Measures the read latencies of the data cache. This information is +// useful for analyzing data cache-related event signatures. +void get_dcache_latencies(int max_iter, char *outputdir){ + FILE *ofp; + + // Make sure the output files could be opened. + int l = strlen(outputdir)+strlen("latencies.txt"); + char *latencyFileName = (char *)calloc( 1+l, sizeof(char) ); + if (!latencyFileName) { + fprintf(stderr, "Unable to allocate memory. Skipping latency test.\n"); + return; + } + if (l != (sprintf(latencyFileName, "%slatencies.txt", outputdir))) { + fprintf(stderr, "sprintf error.\n"); + return; + } + if (NULL == (ofp = fopen(latencyFileName,"w"))) { + fprintf(stderr, "Unable to open file %s. Skipping latency test.\n", latencyFileName); + return; + } + + d_cache_test(3, max_iter, 256, 128, NULL, 1, 0, ofp); + + fclose(ofp); + + return; +} + +static void print_progress(int prg) +{ + if(prg < 100) + printf("%3d%%\b\b\b\b",prg); + else + printf("%3d%%\n",prg); + + fflush(stdout); +} + +static void print_progress2(int prg) +{ + if(prg < 100) + printf("Total:%3d%% Current test: 0%%\b\b\b\b",prg); + else + printf("Total:%3d%%\n",prg); + + fflush(stdout); +} + +void testbench(char** allevts, int cmbtotal, int max_iter, int init, char* outputdir, int bench_type, int show_progress ) +{ + int i; + + // Make sure the user provided events and iterate through all events. + if( 0 == cmbtotal ) + { + fprintf(stderr, "No events to measure.\n"); + return; + } + + // Run the branch benchmark by default if none are specified. + if( 0 == bench_type ) + { + bench_type |= BENCH_BRANCH; + fprintf(stderr, "Warning: No benchmark specified. Running 'branch' by default.\n"); + } + + /* Benchmark I - Branch*/ + if( bench_type & BENCH_BRANCH ) + { + if(show_progress) printf("Branch Benchmarks: "); + + for(i = 0; i < cmbtotal; ++i) + { + if(show_progress) print_progress((100*i)/cmbtotal); + + if( allevts[i] != NULL ) + branch_driver(allevts[i], init, outputdir); + } + if(show_progress) print_progress(100); + } + + /* Benchmark II - Data Cache Reads*/ + if( bench_type & BENCH_DCACHE_READ ) + { + if(show_progress) + { + printf("D-Cache Latencies: "); + fflush(stdout); + } + get_dcache_latencies(max_iter, outputdir); + if(show_progress) printf("100%%\n"); + + if(show_progress) printf("D-Cache Read Benchmarks: "); + for(i = 0; i < cmbtotal; ++i) + { + if(show_progress) print_progress2((100*i)/cmbtotal); + + if( allevts[i] != NULL ) { + d_cache_driver(allevts[i], max_iter, outputdir, 0, 0, show_progress); + } + } + if(show_progress) print_progress2(100); + } + + /* Benchmark III - Data Cache Writes*/ + if( bench_type & BENCH_DCACHE_WRITE ) + { + // If the READ benchmark was run, do not recompute the latencies. + if ( !(bench_type & BENCH_DCACHE_READ) ) + { + if(show_progress) + { + printf("D-Cache Latencies: "); + fflush(stdout); + } + get_dcache_latencies(max_iter, outputdir); + if(show_progress) printf("100%%\n"); + } + + if(show_progress) printf("D-Cache Write Benchmarks: "); + for(i = 0; i < cmbtotal; ++i) + { + if(show_progress) print_progress2((100*i)/cmbtotal); + + if( allevts[i] != NULL ) { + d_cache_driver(allevts[i], max_iter, outputdir, 0, 1, show_progress); + } + } + if(show_progress) print_progress2(100); + } + + /* Benchmark IV - FLOPS*/ + if( bench_type & BENCH_FLOPS ) + { + if(show_progress) printf("FLOP Benchmarks: "); + + for(i = 0; i < cmbtotal; ++i) + { + if(show_progress) print_progress((100*i)/cmbtotal); + + if( allevts[i] != NULL ) + flops_driver(allevts[i], outputdir); + } + if(show_progress) print_progress(100); + } + + /* Benchmark V - Instruction Cache*/ + if( bench_type & BENCH_ICACHE_READ ) + { + if(show_progress) printf("I-Cache Benchmarks: "); + + for(i = 0; i < cmbtotal; ++i) + { + if(show_progress) print_progress2((100*i)/cmbtotal); + + if( allevts[i] != NULL ) + i_cache_driver(allevts[i], init, outputdir, show_progress); + } + if(show_progress) print_progress2(100); + } + + return; +} + +int parseArgs(int argc, char **argv, int *subsetsize, int *mode, int *numit, char **inputfile, char **outputdir, int *bench_type, int *show_progress){ + + char *name = argv[0]; + char *tmp = NULL; + int dirlen = 0; + int kflag = 0; + int inflag = 0; + FILE *test = NULL; + int len, status = 0; + + *subsetsize = -1; + *show_progress=0; + + // Parse the command line arguments + while(--argc){ + ++argv; + if( !strcmp(argv[0],"-h") ){ + print_usage(name); + return -1; + } + if( argc > 1 && !strcmp(argv[0],"-k") ){ + *subsetsize = atoi(argv[1]); + if( *subsetsize < 0 ) + { + *subsetsize = 0; + fprintf(stderr, "Warning: Cannot pass a negative value to -k.\n"); + } + *mode = USE_ALL_EVENTS; + kflag = 1; + --argc; + ++argv; + continue; + } + if( argc > 1 && !strcmp(argv[0],"-n") ){ + *numit = atoi(argv[1]); + --argc; + ++argv; + continue; + } + if( argc > 1 && !strcmp(argv[0],"-in") ){ + *inputfile = argv[1]; + *mode = READ_FROM_FILE; + inflag = 1; + --argc; + ++argv; + continue; + } + if( argc > 1 && !strcmp(argv[0],"-out") ){ + tmp = argv[1]; + --argc; + ++argv; + continue; + } + if( !strcmp(argv[0],"-verbose") ){ + *show_progress=1; + continue; + } + if( !strcmp(argv[0],"-branch") ){ + *bench_type |= BENCH_BRANCH; + continue; + } + if( !strcmp(argv[0],"-dcr") ){ + *bench_type |= BENCH_DCACHE_READ; + continue; + } + if( !strcmp(argv[0],"-dcw") ){ + *bench_type |= BENCH_DCACHE_WRITE; + continue; + } + if( !strcmp(argv[0],"-flops") ){ + *bench_type |= BENCH_FLOPS; + continue; + } + if( !strcmp(argv[0],"-ic") ){ + *bench_type |= BENCH_ICACHE_READ; + continue; + } + + print_usage(name); + return -1; + } + + // MODE INFO: mode 1 uses file; mode 2 uses all native events. + if(*mode == 1) + { + test = fopen(*inputfile, "r"); + if(test == NULL) + { + fprintf(stderr, "Could not open %s. Exiting...\n", *inputfile); + return -1; + } + fclose(test); + } + + // Make sure user does not specify both modes simultaneously. + if(kflag == 1 && inflag == 1) + { + fprintf(stderr, "Cannot use -k flag with -in flag. Exiting...\n"); + return -1; + } + + // Make sure user specifies mode implicitly. + if(kflag == 0 && inflag == 0) + { + print_usage(name); + return -1; + } + + // Make sure output path was provided. + if(tmp == NULL) + { + fprintf(stderr, "Output path not provided. Exiting...\n"); + return -1; + } + + // Write output files in the user-specified directory. + dirlen = strlen(tmp); + *outputdir = (char*)malloc((2+dirlen)*sizeof(char)); + len = snprintf( *outputdir, 2+dirlen, "%s/", tmp); + if( len < 1+dirlen ) + { + fprintf(stderr, "Problem with output directory name.\n"); + return -1; + } + + // Make sure files can be written to the provided path. + status = access(*outputdir, W_OK); + if(status != 0) + { + fprintf(stderr, "Permission to write files to \"%s\" denied. Make sure the path exists and is writable.\n", tmp); + return -1; + } + + return 0; +} + +// Show the user how to properly use the program. +void print_usage(char* name) +{ + fprintf(stdout, "\nUsage: %s [OPTIONS...]\n", name); + + fprintf(stdout, "\nRequired:\n"); + fprintf(stdout, " -out Output files location.\n"); + fprintf(stdout, " -in Events and cardinalities file.\n"); + fprintf(stdout, " -k Cardinality of subsets.\n"); + fprintf(stdout, " Parameters \"-k\" and \"-in\" are mutually exclusive.\n"); + + fprintf(stdout, "\nOptional:\n"); + fprintf(stdout, " -verbose Show benchmark progress in the standard output.\n"); + fprintf(stdout, " -n Number of iterations for data cache kernels.\n"); + fprintf(stdout, " -branch Branch kernels.\n"); + fprintf(stdout, " -dcr Data cache reading kernels.\n"); + fprintf(stdout, " -dcw Data cache writing kernels.\n"); + fprintf(stdout, " -flops Floating point operations kernels.\n"); + fprintf(stdout, " -ic Instruction cache kernels.\n"); + + fprintf(stdout, "\n"); + fprintf(stdout, "EXAMPLE: %s -in event_list.txt -out OUTPUT_DIRECTORY -branch -dcw\n", name); + fprintf(stdout, "\n"); + + return; +} diff -Nru papi-5.7.0+dfsg/src/counter_analysis_toolkit/Makefile papi-6.0.0~dfsg/src/counter_analysis_toolkit/Makefile --- papi-5.7.0+dfsg/src/counter_analysis_toolkit/Makefile 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/counter_analysis_toolkit/Makefile 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,56 @@ +PAPIDIR?=/opt/papi +LDFLAGS=-L$(PAPIDIR)/lib -lpapi -lm -lpthread -ldl -lrt +INCFLAGS=-I$(PAPIDIR)/include +CFLAGS+=-g -Wall -Wextra +OPT0=-O0 +OPT1=-O1 +OPT2=-O2 +CC=gcc + +all: cat_collect + +d_cache: timing_kernels.o prepareArray.o compar.o dcache.o + +i_cache: icache.o icache_seq_kernel_0.o + +branch.o: branch.c branch.h + $(CC) $(OPT0) $(CFLAGS) $(INCFLAGS) -c branch.c -o branch.o + +timing_kernels.o: timing_kernels.c timing_kernels.h + $(CC) $(OPT2) $(CFLAGS) $(INCFLAGS) -c timing_kernels.c -o timing_kernels.o + +prepareArray.o: prepareArray.c prepareArray.h + $(CC) $(OPT2) $(CFLAGS) -c prepareArray.c -o prepareArray.o + +compar.o: compar.c + $(CC) $(CFLAGS) $(OPT2) -c compar.c -o compar.o + +dcache.o: dcache.c dcache.h + $(CC) $(CFLAGS) $(OPT2) $(INCFLAGS) -c dcache.c -o dcache.o + +eventstock.o: eventstock.c eventstock.h + $(CC) $(CFLAGS) $(OPT0) $(INCFLAGS) -c eventstock.c -o eventstock.o + +flops: flops_aux.c flops_aux.h flops.c flops.h + $(CC) $(CFLAGS) $(OPT1) $(INCFLAGS) -c flops.c -o flops.o + $(CC) $(CFLAGS) $(OPT1) $(INCFLAGS) -c flops_aux.c -o flops_aux.o + +icache.o: icache.c icache.h + bash gen_seq_dlopen.sh + $(CC) $(CFLAGS) $(OPT0) $(INCFLAGS) -c icache.c -o icache.o + +icache_seq_kernel_0.o: icache_seq.c icache_seq.h + $(CC) $(CFLAGS) $(OPT0) $(INCFLAGS) -c icache_seq.c -o icache_seq.o + $(CC) $(CFLAGS) $(OPT0) $(INCFLAGS) -fPIC -c icache_seq_kernel.c -o icache_seq_kernel_0.o + $(CC) $(CFLAGS) $(OPT0) -shared -o icache_seq_kernel_0.so icache_seq_kernel_0.o + bash replicate.sh + +cat_collect: main.c branch.o d_cache eventstock.o flops i_cache + $(CC) $(CFLAGS) $(INCFLAGS) main.c flops_aux.o flops.o icache_seq.o icache.o branch.o dcache.o timing_kernels.o prepareArray.o compar.o eventstock.o -o cat_collect $(LDFLAGS) + +clean: + rm -f *.o + +realclean: + rm -f cat_collect *.o *.so icache_seq.c icache_seq.h icache_seq_kernel.c + diff -Nru papi-5.7.0+dfsg/src/counter_analysis_toolkit/prepareArray.c papi-6.0.0~dfsg/src/counter_analysis_toolkit/prepareArray.c --- papi-5.7.0+dfsg/src/counter_analysis_toolkit/prepareArray.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/counter_analysis_toolkit/prepareArray.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,120 @@ +#include +#include +#include +#include +#include + +#include "prepareArray.h" + +volatile uintptr_t opt_killer_zero; +extern int global_pattern; +static void _prepareArray_sections_random(uintptr_t *array, int len, int stride, long secSize); +static void _prepareArray_sequential(uintptr_t *array, int len, int stride); + + +/* + * "stride" is in "uintptr_t" elements, NOT in bytes + * Note: It is wise to provide an "array" that is aligned to the cache line size. + */ +int prepareArray(uintptr_t *array, int len, int stride, long secSize){ + assert( array != NULL ); + opt_killer_zero = (uintptr_t)( (len+37)/(len+36) - 1 ); + + switch(global_pattern){ + case SECRND: + _prepareArray_sections_random(array, len, stride, secSize); + break; + case SEQUEN: + _prepareArray_sequential(array, len, stride); + break; + default: + fprintf(stderr,"prepareArray() unknown array access pattern: %d\n",global_pattern); + return -1; + break; + } + return 0; +} + +/* + * "stride" is in "uintptr_t" elements, NOT in bytes + * Note: It is wise to provide an "array" that is aligned to the cache line size. + */ +static void _prepareArray_sections_random(uintptr_t *array, int len, int stride, long secSize){ + + assert( array != NULL ); + + int elemCnt, maxElemCnt, sec, i; + int currElemCnt, uniqIndex, taken; + uintptr_t **p, *next; + int currSecSize = secSize; + int secCnt = 1+len/secSize; + int *availableNumbers; + + p = (uintptr_t **)&array[0]; + + maxElemCnt = currSecSize/stride; + availableNumbers = (int *)calloc(maxElemCnt, sizeof(int)); + + // For every section of the array + for(sec=0; sec + +#define RANDOM 0x2 +#define SECRND 0x3 +#define SEQUEN 0x4 + +int prepareArray(uintptr_t *array, int len, int stride, long secSize); + +#endif diff -Nru papi-5.7.0+dfsg/src/counter_analysis_toolkit/README papi-6.0.0~dfsg/src/counter_analysis_toolkit/README --- papi-5.7.0+dfsg/src/counter_analysis_toolkit/README 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/counter_analysis_toolkit/README 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,28 @@ +Description: +Benchmarks for helping in the understanding of native events +by stressing different aspects of the architecture selectively. + +Compilation: +make PAPIDIR=/path/to/your/papi/installation + + +Usage: +./cit_collect -in event_list.txt -out OUTPUT_DIRECTORY -branch -dcr + +The following five flags specify the corresponding benchmarks: + -branch Branch kernels. + -dcr Data cache reading kernels. + -dcw Data cache writing kernels. + -flops Floating point operations kernels. + -ic Instruction cache kernels. + +Each line in the event-list file should contain ether the name of a base +event followed by the number of qualifiers to be appended, or a +fully expanded event with qualifiers followed by the number zero, as in +the following example: + +L2_RQSTS 1 +ICACHE:MISSES 0 +ICACHE:HIT 0 +OFFCORE_RESPONSE_0:DMND_DATA_RD:L3_HIT:SNP_ANY 0 + diff -Nru papi-5.7.0+dfsg/src/counter_analysis_toolkit/replicate.sh papi-6.0.0~dfsg/src/counter_analysis_toolkit/replicate.sh --- papi-5.7.0+dfsg/src/counter_analysis_toolkit/replicate.sh 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/counter_analysis_toolkit/replicate.sh 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,6 @@ +#!/bin/bash + +for ((i=1; i<12; i++)); do + cp icache_seq_kernel_0.so icache_seq_kernel_${i}.so; +done + diff -Nru papi-5.7.0+dfsg/src/counter_analysis_toolkit/timing_kernels.c papi-6.0.0~dfsg/src/counter_analysis_toolkit/timing_kernels.c --- papi-5.7.0+dfsg/src/counter_analysis_toolkit/timing_kernels.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/counter_analysis_toolkit/timing_kernels.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,126 @@ +#include +#include +#include +#include +#include +#include +#include "prepareArray.h" + +#include "timing_kernels.h" + +// For do_work macro in the header file +volatile double x,y; + +int _papi_eventset = PAPI_NULL; +extern int max_size; + +run_output_t probeBufferSize(int active_buf_len, int line_size, float pageCountPerBlock, uintptr_t *v, uintptr_t *rslt, int latency_only, int mode){ + int count, status; + register uintptr_t *p = NULL; + double time1=0.0, time2=1.0; + double dt, factor; + long pageSize, blockSize; + long long int counter = 0; + run_output_t out; + + assert( sizeof(int) >= 4 ); + + x = (double)*rslt; + x = floor(1.3*x/(1.4*x+1.8)); + y = x*3.97; + if( x > 0 || y > 0 ) + printf("WARNING: x=%lf y=%lf\n",x,y); + + // Max counter value to access 1GB worth of buffer. + int countMax = 1024*1024*1024/(line_size*sizeof(uintptr_t)); + + // Clean up the memory. + memset(v,0,active_buf_len*sizeof(uintptr_t)); + + pageSize = sysconf(_SC_PAGESIZE)/sizeof(uintptr_t); + if( pageSize <= 0 ){ + fprintf(stderr,"Cannot determine pagesize, sysconf() returned an error code.\n"); + out.status = -1; + return out; + } + blockSize = (long)(pageCountPerBlock*(float)pageSize); + status = prepareArray(v, active_buf_len, line_size, blockSize); + out.status = status; + if(status != 0) + { + return out; + } + + // Start the counters. + if (!latency_only) + { + if ( PAPI_start(_papi_eventset) != PAPI_OK ) + { + error_handler(1, __LINE__); + } + + } + + // Start the actual test. + count = countMax; + p = &v[0]; + if(latency_only || (CACHE_READ_ONLY == mode)) + { + time1 = getticks(); + while(count > 0){ + N_128; + count -= 128; + } + time2 = getticks(); + } + else + { + while(count > 0){ + NW_128; + count -= 128; + } + } + + // Stop the counters. + if (!latency_only) + { + if ( PAPI_stop(_papi_eventset, &counter) != PAPI_OK ) + { + error_handler(1, __LINE__); + } + } + + dt = elapsed(time2, time1); + + // Turn the time into nanoseconds. + factor = 1000.0; + // Number of loads per run of this function. + factor /= (1.0*countMax); + + *rslt = (uintptr_t)p+(uintptr_t)(x+y); + + out.dt = dt*factor; + out.counter = (1.0*counter)/(1.0*countMax); + + return out; +} + +void error_handler(int e, int line){ + fprintf(stderr,"An error occured at line %d. Exiting\n", line); + switch(e){ + case PAPI_EINVAL: + fprintf(stderr,"One or more of the arguments is invalid.\n"); break; + case PAPI_ENOMEM: + fprintf(stderr, "Insufficient memory to complete the operation.\n"); break; + case PAPI_ENOEVST: + fprintf(stderr, "The event set specified does not exist.\n"); break; + case PAPI_EISRUN: + fprintf(stderr, "The event set is currently counting events.\n"); break; + case PAPI_ECNFLCT: + fprintf(stderr, "The underlying counter hardware can not count this event and other events in the event set simultaneously.\n"); break; + case PAPI_ENOEVNT: + fprintf(stderr, "The PAPI preset is not available on the underlying hardware.\n"); break; + default: + fprintf(stderr, "Unknown error occured.\n"); + } +} diff -Nru papi-5.7.0+dfsg/src/counter_analysis_toolkit/timing_kernels.h papi-6.0.0~dfsg/src/counter_analysis_toolkit/timing_kernels.h --- papi-5.7.0+dfsg/src/counter_analysis_toolkit/timing_kernels.h 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/counter_analysis_toolkit/timing_kernels.h 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,24 @@ +#ifndef _TIMING_KERNELS_ +#define _TIMING_KERNELS_ + +#include + +#include "caches.h" + +#define N_1 p = (uintptr_t *)*p; +#define N_2 N_1 N_1 +#define N_16 N_2 N_2 N_2 N_2 N_2 N_2 N_2 N_2 +#define N_128 N_16 N_16 N_16 N_16 N_16 N_16 N_16 N_16 + +#define NW_1 p = (uintptr_t *)*p; *(p+max_size) = 3; +#define NW_2 NW_1 NW_1 +#define NW_16 NW_2 NW_2 NW_2 NW_2 NW_2 NW_2 NW_2 NW_2 +#define NW_128 NW_16 NW_16 NW_16 NW_16 NW_16 NW_16 NW_16 NW_16 + +#define CACHE_READ_ONLY 0x0 +#define CACHE_READ_WRITE 0x1 + +run_output_t probeBufferSize(int l1_size, int line_size, float pageCountPerBlock, uintptr_t *v, uintptr_t *rslt, int detect_size, int mode); +void error_handler(int e, int line); + +#endif diff -Nru papi-5.7.0+dfsg/src/ctests/api.c papi-6.0.0~dfsg/src/ctests/api.c --- papi-5.7.0+dfsg/src/ctests/api.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/ctests/api.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,361 +0,0 @@ -/* - * File: api.c - * Author: Brian Sheely - * bsheely@eecs.utk.edu - * - * Description: This test is designed to provide unit testing and complete - * coverage for all functions which comprise the "Low Level API" - * and the "High Level API" as defined in papi.h. - * - * Currently only the "high level" API is actually tested. - */ - -#include -#include - -#include "papi.h" -#include "papi_test.h" - -int -main( int argc, char **argv ) -{ - const int NUM_COUNTERS = 1; - int Events[] = { PAPI_TOT_INS }; - long long values[NUM_COUNTERS]; - float rtime, ptime, ipc, mflips, mflops; - long long ins, flpins, flpops; - int retval; - int quiet=0; - - quiet=tests_quiet( argc, argv ); - - /* Initialize PAPI */ - retval = PAPI_library_init( PAPI_VER_CURRENT ); - if ( retval != PAPI_VER_CURRENT ) { - test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); - } - - /****** High Level API ******/ - - if ( !quiet ) printf( "Testing PAPI_num_components... " ); - - /* get the number of components available on the system */ - retval = PAPI_num_components( ); - if ( !quiet ) printf( "%d\n", retval ); - - if ( retval == 0) { - if ( !quiet ) printf( "No components found, skipping high level tests\n"); - test_skip(__FILE__, __LINE__, "No components found", 0); - } - - if ( !quiet ) printf( "Testing PAPI_num_counters... " ); - - /* get the number of hardware counters available on the system */ - retval = PAPI_num_counters( ); - if ( retval != PAPI_get_cmp_opt( PAPI_MAX_HWCTRS, NULL, 0 ) ) { - test_fail( __FILE__, __LINE__, "PAPI_num_counters", retval ); - } - else if ( !quiet ) printf( "%d\n", retval ); - - - /* Test PAPI_start_counters() */ - if ( !quiet ) printf( "Testing PAPI_start_counters... " ); - // pass invalid 1st argument - retval = PAPI_start_counters( NULL, NUM_COUNTERS ); - if ( retval != PAPI_EINVAL ) { - test_fail( __FILE__, __LINE__, "PAPI_start_counters", retval ); - } - // pass invalid 2nd argument - retval = PAPI_start_counters( Events, 0 ); - if ( retval != PAPI_EINVAL ) { - test_fail( __FILE__, __LINE__, "PAPI_start_counters", retval ); - } - /* Try PAPI_TOT_INS */ - retval = PAPI_start_counters( Events, NUM_COUNTERS ); - if ( retval != PAPI_OK ) { - if (!quiet) printf("\nCould not start PAPI_TOT_INS\n"); - test_skip( __FILE__, __LINE__, "PAPI_TOT_INS not available.", retval ); - } - else { - if ( !quiet ) printf( "started PAPI_TOT_INS\n" ); - } - - /* Test PAPI_stop_counters() */ - if ( !quiet ) printf( "Testing PAPI_stop_counters... " ); - // pass invalid 1st argument - retval = PAPI_stop_counters( NULL, NUM_COUNTERS ); - if ( retval != PAPI_EINVAL ) { - test_fail( __FILE__, __LINE__, "PAPI_stop_counters", retval ); - } - // pass invalid 2nd argument - retval = PAPI_stop_counters( values, 0 ); - if ( retval != PAPI_EINVAL ) { - test_fail( __FILE__, __LINE__, "PAPI_stop_counters", retval ); - } - // stop counters and return current counts - retval = PAPI_stop_counters( values, NUM_COUNTERS ); - if ( retval != PAPI_OK ) { - test_fail( __FILE__, __LINE__, "PAPI_stop_counters", retval ); - } - else if ( !quiet ) printf( "stopped counting PAPI_TOT_INS\n" ); - //NOTE: There are currently no checks on whether or not counter values are correct - - - // start counting hardware events again - retval = PAPI_start_counters( Events, NUM_COUNTERS ); - if ( retval != PAPI_OK ) { - test_fail( __FILE__, __LINE__, "PAPI_start_counters", retval ); - } - - /* Test PAPI_read_counters() */ - if ( !quiet ) printf( "Testing PAPI_read_counters... " ); - // pass invalid 1st argument - retval = PAPI_read_counters( NULL, NUM_COUNTERS ); - if ( retval != PAPI_EINVAL ) { - test_fail( __FILE__, __LINE__, "PAPI_read_counters", retval ); - } - // pass invalid 2nd argument - retval = PAPI_read_counters( values, 0 ); - if ( retval != PAPI_EINVAL ) { - test_fail( __FILE__, __LINE__, "PAPI_read_counters", retval ); - } - // copy current counts to array and reset counters - retval = PAPI_read_counters( values, NUM_COUNTERS ); - if ( retval != PAPI_OK ) { - test_fail( __FILE__, __LINE__, "PAPI_read_counters", retval ); - } - else { - if ( !quiet ) printf( "read PAPI_TOT_INS counts and reset counter\n" ); - } - //NOTE: There are currently no checks on whether or not counter values are correct - - - /* Test PAPI_accum_counters() */ - if ( !quiet ) printf( "Testing PAPI_accum_counters... " ); - // pass invalid 1st argument - retval = PAPI_accum_counters( NULL, NUM_COUNTERS ); - if ( retval != PAPI_EINVAL ) { - test_fail( __FILE__, __LINE__, "PAPI_accum_counters", retval ); - } - // pass invalid 2nd argument - retval = PAPI_accum_counters( values, 0 ); - if ( retval != PAPI_EINVAL ) { - test_fail( __FILE__, __LINE__, "PAPI_accum_counters", retval ); - } - // add current counts to array and reset counters - retval = PAPI_accum_counters( values, NUM_COUNTERS ); - if ( retval != PAPI_OK ) { - test_fail( __FILE__, __LINE__, "PAPI_accum_counters", retval ); - } - else { - if ( !quiet ) printf( "added PAPI_TOT_INS counts and reset counter\n" ); - } - //NOTE: There are currently no checks on whether or not counter values are correct - - - // stop counting hardware events - retval = PAPI_stop_counters( values, NUM_COUNTERS ); - if ( retval != PAPI_OK ) { - test_fail( __FILE__, __LINE__, "PAPI_stop_counters", retval ); - } - - - /* Test PAPI_ipc() */ - if ( !quiet ) printf( "Testing PAPI_ipc... " ); - // pass invalid 1st argument - retval = PAPI_ipc( NULL, &ptime, &ins, &ipc ); - if ( retval != PAPI_EINVAL ) { - test_fail( __FILE__, __LINE__, "PAPI_ipc", retval ); - } - // pass invalid 2nd argument - retval = PAPI_ipc( &rtime, NULL, &ins, &ipc ); - if ( retval != PAPI_EINVAL ) { - test_fail( __FILE__, __LINE__, "PAPI_ipc", retval ); - } - // pass invalid 3rd argument - retval = PAPI_ipc( &rtime, &ptime, NULL, &ipc ); - if ( retval != PAPI_EINVAL ) { - test_fail( __FILE__, __LINE__, "PAPI_ipc", retval ); - } - // pass invalid 4th argument - retval = PAPI_ipc( &rtime, &ptime, &ins, NULL ); - if ( retval != PAPI_EINVAL ) { - test_fail( __FILE__, __LINE__, "PAPI_ipc", retval ); - } - // get instructions per cycle, real and processor time - retval = PAPI_ipc( &rtime, &ptime, &ins, &ipc ); - if ( retval != PAPI_OK ) { - test_fail( __FILE__, __LINE__, "PAPI_ipc", retval ); - } - else { - if ( !quiet ) printf( "got instructions per cycle, real and processor time\n" ); - } - //NOTE: There are currently no checks on whether or not returned values are correct - - - //NOTE: PAPI_flips and PAPI_flops fail if any other low-level calls have been made! - PAPI_shutdown( ); - retval = PAPI_library_init( PAPI_VER_CURRENT ); - if ( retval != PAPI_VER_CURRENT ) { - test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); - } - - /* Test PAPI_flips() */ - if ( !quiet ) printf( "Testing PAPI_flips... " ); - // pass invalid 1st argument - retval = PAPI_flips( NULL, &ptime, &flpins, &mflips ); - if ( retval != PAPI_EINVAL ) { - test_fail( __FILE__, __LINE__, "PAPI_flips", retval ); - } - // pass invalid 2nd argument - retval = PAPI_flips( &rtime, NULL, &flpins, &mflips ); - if ( retval != PAPI_EINVAL ) { - test_fail( __FILE__, __LINE__, "PAPI_flips", retval ); - } - // pass invalid 3rd argument - retval = PAPI_flips( &rtime, &ptime, NULL, &mflips ); - if ( retval != PAPI_EINVAL ) { - test_fail( __FILE__, __LINE__, "PAPI_flips", retval ); - } - // pass invalid 4th argument - retval = PAPI_flips( &rtime, &ptime, &flpins, NULL ); - if ( retval != PAPI_EINVAL ) { - test_fail( __FILE__, __LINE__, "PAPI_flips", retval ); - } - // get Mflips/s, real and processor time - retval = PAPI_flips( &rtime, &ptime, &flpins, &mflips ); - if ( retval == PAPI_ENOEVNT ) { - test_warn( __FILE__, __LINE__, "PAPI_flips", retval); - } - else if ( retval != PAPI_OK ) { - test_fail( __FILE__, __LINE__, "PAPI_flips", retval ); - } - else { - if ( !quiet ) printf( "got Mflips/s, real and processor time\n" ); - } - //NOTE: There are currently no checks on whether or not returned values are correct - - - PAPI_shutdown( ); - retval = PAPI_library_init( PAPI_VER_CURRENT ); - if ( retval != PAPI_VER_CURRENT ) { - test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); - } - - /* Test PAPI_flops() */ - if ( !quiet ) printf( "Testing PAPI_flops... " ); - // pass invalid 1st argument - retval = PAPI_flops( NULL, &ptime, &flpops, &mflops ); - if ( retval != PAPI_EINVAL ) { - test_fail( __FILE__, __LINE__, "PAPI_flops", retval ); - } - // pass invalid 2nd argument - retval = PAPI_flops( &rtime, NULL, &flpops, &mflops ); - if ( retval != PAPI_EINVAL ) { - test_fail( __FILE__, __LINE__, "PAPI_flops", retval ); - } - // pass invalid 3rd argument - retval = PAPI_flops( &rtime, &ptime, NULL, &mflops ); - if ( retval != PAPI_EINVAL ) { - test_fail( __FILE__, __LINE__, "PAPI_flops", retval ); - } - // pass invalid 4th argument - retval = PAPI_flops( &rtime, &ptime, &flpops, NULL ); - if ( retval != PAPI_EINVAL ) { - test_fail( __FILE__, __LINE__, "PAPI_flops", retval ); - } - // get Mflops/s, real and processor time - retval = PAPI_flops( &rtime, &ptime, &flpops, &mflops ); - if ( retval == PAPI_ENOEVNT ) { - test_warn( __FILE__, __LINE__, "PAPI_flops", retval); - } - else if ( retval != PAPI_OK ) { - test_fail( __FILE__, __LINE__, "PAPI_flops", retval ); - } - else if ( !quiet ) { - printf( "got Mflops/s, real and processor time\n" ); - } - //NOTE: There are currently no checks on whether or not returned values are correct - - - /***************************/ - /****** Low Level API ******/ - /***************************/ -/* - int PAPI_accum(int EventSet, long long * values); // accumulate and reset hardware events from an event set - int PAPI_add_event(int EventSet, int Event); // add single PAPI preset or native hardware event to an event set - int PAPI_add_events(int EventSet, int *Events, int number); // add array of PAPI preset or native hardware events to an event set - int PAPI_assign_eventset_component(int EventSet, int cidx); // assign a component index to an existing but empty eventset - int PAPI_attach(int EventSet, unsigned long tid); // attach specified event set to a specific process or thread id - int PAPI_cleanup_eventset(int EventSet); // remove all PAPI events from an event set - int PAPI_create_eventset(int *EventSet); // create a new empty PAPI event set - int PAPI_detach(int EventSet); // detach specified event set from a previously specified process or thread id - int PAPI_destroy_eventset(int *EventSet); // deallocates memory associated with an empty PAPI event set - int PAPI_enum_event(int *EventCode, int modifier); // return the event code for the next available preset or natvie event - int PAPI_event_code_to_name(int EventCode, char *out); // translate an integer PAPI event code into an ASCII PAPI preset or native name - int PAPI_event_name_to_code(char *in, int *out); // translate an ASCII PAPI preset or native name into an integer PAPI event code - int PAPI_get_dmem_info(PAPI_dmem_info_t *dest); // get dynamic memory usage information - int PAPI_get_event_info(int EventCode, PAPI_event_info_t * info); // get the name and descriptions for a given preset or native event code - const PAPI_exe_info_t *PAPI_get_executable_info(void); // get the executable's address space information - const PAPI_hw_info_t *PAPI_get_hardware_info(void); // get information about the system hardware - const PAPI_component_info_t *PAPI_get_component_info(int cidx); // get information about the component features - int PAPI_get_multiplex(int EventSet); // get the multiplexing status of specified event set - int PAPI_get_opt(int option, PAPI_option_t * ptr); // query the option settings of the PAPI library or a specific event set - int PAPI_get_cmp_opt(int option, PAPI_option_t * ptr,int cidx); // query the component specific option settings of a specific event set - long long PAPI_get_real_cyc(void); // return the total number of cycles since some arbitrary starting point - long long PAPI_get_real_nsec(void); // return the total number of nanoseconds since some arbitrary starting point - long long PAPI_get_real_usec(void); // return the total number of microseconds since some arbitrary starting point - const PAPI_shlib_info_t *PAPI_get_shared_lib_info(void); // get information about the shared libraries used by the process - int PAPI_get_thr_specific(int tag, void **ptr); // return a pointer to a thread specific stored data structure - int PAPI_get_overflow_event_index(int Eventset, long long overflow_vector, int *array, int *number); // # decomposes an overflow_vector into an event index array - long long PAPI_get_virt_cyc(void); // return the process cycles since some arbitrary starting point - long long PAPI_get_virt_nsec(void); // return the process nanoseconds since some arbitrary starting point - long long PAPI_get_virt_usec(void); // return the process microseconds since some arbitrary starting point - int PAPI_is_initialized(void); // return the initialized state of the PAPI library - int PAPI_library_init(int version); // initialize the PAPI library - int PAPI_list_events(int EventSet, int *Events, int *number); // list the events that are members of an event set - int PAPI_list_threads(unsigned long *tids, int *number); // list the thread ids currently known to PAPI - int PAPI_lock(int); // lock one of two PAPI internal user mutex variables - int PAPI_multiplex_init(void); // initialize multiplex support in the PAPI library - int PAPI_num_hwctrs(void); // return the number of hardware counters for the cpu - int PAPI_num_cmp_hwctrs(int cidx); // return the number of hardware counters for a specified component - int PAPI_num_hwctrs(void); // for backward compatibility - int PAPI_num_events(int EventSet); // return the number of events in an event set - int PAPI_overflow(int EventSet, int EventCode, int threshold, - int flags, PAPI_overflow_handler_t handler); // set up an event set to begin registering overflows - int PAPI_perror( char *msg); // convert PAPI error codes to strings - int PAPI_profil(void *buf, unsigned bufsiz, caddr_t offset, - unsigned scale, int EventSet, int EventCode, - int threshold, int flags); // generate PC histogram data where hardware counter overflow occurs - int PAPI_query_event(int EventCode); // query if a PAPI event exists - int PAPI_read(int EventSet, long long * values); // read hardware events from an event set with no reset - int PAPI_read_ts(int EventSet, long long * values, long long *cyc); - int PAPI_register_thread(void); // inform PAPI of the existence of a new thread - int PAPI_remove_event(int EventSet, int EventCode); // remove a hardware event from a PAPI event set - int PAPI_remove_events(int EventSet, int *Events, int number); // remove an array of hardware events from a PAPI event set - int PAPI_reset(int EventSet); // reset the hardware event counts in an event set - int PAPI_set_debug(int level); // set the current debug level for PAPI - int PAPI_set_cmp_domain(int domain, int cidx); // set the component specific default execution domain for new event sets - int PAPI_set_domain(int domain); // set the default execution domain for new event sets - int PAPI_set_cmp_granularity(int granularity, int cidx); // set the component specific default granularity for new event sets - int PAPI_set_granularity(int granularity); //set the default granularity for new event sets - int PAPI_set_multiplex(int EventSet); // convert a standard event set to a multiplexed event set - int PAPI_set_opt(int option, PAPI_option_t * ptr); // change the option settings of the PAPI library or a specific event set - int PAPI_set_thr_specific(int tag, void *ptr); // save a pointer as a thread specific stored data structure - void PAPI_shutdown(void); // finish using PAPI and free all related resources - int PAPI_sprofil(PAPI_sprofil_t * prof, int profcnt, int EventSet, int EventCode, int threshold, int flags); // generate hardware counter profiles from multiple code regions - int PAPI_start(int EventSet); // start counting hardware events in an event set - int PAPI_state(int EventSet, int *status); // return the counting state of an event set - int PAPI_stop(int EventSet, long long * values); // stop counting hardware events in an event set and return current events - char *PAPI_strerror(int); // return a pointer to the error message corresponding to a specified error code - unsigned long PAPI_thread_id(void); // get the thread identifier of the current thread - int PAPI_thread_init(unsigned long (*id_fn) (void)); // initialize thread support in the PAPI library - int PAPI_unlock(int); // unlock one of two PAPI internal user mutex variables - int PAPI_unregister_thread(void); // inform PAPI that a previously registered thread is disappearing - int PAPI_write(int EventSet, long long * values); // write counter values into counters -*/ - - test_pass( __FILE__ ); - - return 0; -} diff -Nru papi-5.7.0+dfsg/src/ctests/bgp/Makefile papi-6.0.0~dfsg/src/ctests/bgp/Makefile --- papi-5.7.0+dfsg/src/ctests/bgp/Makefile 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/ctests/bgp/Makefile 1970-01-01 00:00:00.000000000 +0000 @@ -1,9 +0,0 @@ -TESTS = papi_1 - -bgp_tests: $(TESTS) - -papi%: - $(CC) $(INCLUDE) $(CFLAGS) -o $@ $@.c $(LIBRARY) $(LDFLAGS) - -clean: - rm -f core $(TESTS) *~ *.o diff -Nru papi-5.7.0+dfsg/src/ctests/bgp/papi_1.c papi-6.0.0~dfsg/src/ctests/bgp/papi_1.c --- papi-5.7.0+dfsg/src/ctests/bgp/papi_1.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/ctests/bgp/papi_1.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,2092 +0,0 @@ -/* - * Basic PAPI Test for BG/P - * - * NOTE: If a PAPI function is not listed below, the function is - * untested and user beware... - * - * The following high level functions are called... - * PAPI_num_counters - get the number of hardware counters available on the system - * PAPI_flips - simplified call to get Mflips/s (floating point instruction rate), real and processor time - * PAPI_flops - simplified call to get Mflops/s (floating point operation rate), real and processor time - * PAPI_ipc - gets instructions per cycle, real and processor time - * PAPI_accum_counters - add current counts to array and reset counters - * PAPI_read_counters - copy current counts to array and reset counters - * PAPI_start_counters - start counting hardware events - * PAPI_stop_counters - stop counters and return current counts - * - * The following low level functions are called... - * PAPI_accum - accumulate and reset hardware events from an event set - * PAPI_add_event - add single PAPI preset or native hardware event to an event set - * PAPI_cleanup_eventset - remove all PAPI events from an event set - * PAPI_create_eventset - create a new empty PAPI event set - * PAPI_destroy_eventset - deallocates memory associated with an empty PAPI event set - * PAPI_enum_event - return the event code for the next available preset or natvie event - * PAPI_event_code_to_name - translate an integer PAPI event code into an ASCII PAPI preset or native name - * PAPI_event_name_to_code - translate an ASCII PAPI preset or native name into an integer PAPI event code - * PAPI_get_dmem_info - get dynamic memory usage information - * PAPI_get_event_info - get the name and descriptions for a given preset or native event code - * PAPI_get_executable_info - get the executable’s address space information - * PAPIF_get_exe_info - Fortran version of PAPI_get_executable_info with different calling semantics - * PAPI_get_hardware_info - get information about the system hardware - * PAPI_get_multiplex - get the multiplexing status of specified event set - * PAPI_get_real_cyc - return the total number of cycles since some arbitrary starting point - * PAPI_get_real_usec - return the total number of microseconds since some arbitrary starting point - * PAPI_get_shared_lib_info - get information about the shared libraries used by the process - * PAPI_get_virt_cyc - return the process cycles since some arbitrary starting point - * PAPI_get_virt_usec - return the process microseconds since some arbitrary starting point - * PAPI_is_initialized - return the initialized state of the PAPI library - * PAPI_library_init - initialize the PAPI library - * PAPI_list_events - list the events that are members of an event set - * PAPI_num_hwctrs - return the number of hardware counters - * PAPI_num_events - return the number of events in an event set - * PAPI_query_event - query if a PAPI event exists - * PAPI_read - read hardware events from an event set with no reset - * PAPI_remove_event - remove a hardware event from a PAPI event set - * PAPI_reset - reset the hardware event counts in an event set - * PAPI_shutdown - finish using PAPI and free all related resources - * PAPI_start - start counting hardware events in an event set - * PAPI_state - return the counting state of an event set - * PAPI_stop - stop counting hardware events in an event set and return current events - * PAPI_write - write counter values into counters - * NOTE: Not supported when UPC is running, and when not running, only changes local PAPI memory. - */ - -#include -#include -#include - -#include - -#include -#include "papiStdEventDefs.h" -#include "papi.h" -#include "linux-bgp-native-events.h" - -#define MAX_COUNTERS 256 -#define NUMBER_COUNTERS_PER_ROW 8 -/* - * Prototypes... - */ -void Do_Tests(void); -void Do_Low_Level_Tests(void); -void Do_High_Level_Tests(void); -void Do_Multiplex_Tests(void); -void Run_Cycle(const int pNumEvents); -void Zero_Local_Counters(long long* pCounters); -void FPUArith(void); -void List_PAPI_Events(const int pEventSet, int* pEvents, int* xNumEvents); -void Print_Native_Counters(); -void Print_Native_Counters_via_Buffer(const BGP_UPC_Read_Counters_Struct_t* pBuffer); -void Print_Native_Counters_for_PAPI_Counters(const int pEventSet); -void Print_Native_Counters_for_PAPI_Counters_From_List(const int* pEvents, const int pNumEvents); -void Print_PAPI_Counters(const int pEventSet, const long long* pCounters); -void Print_PAPI_Counters_From_List(const int* pEventList, const int pNumEvents, const long long* pCounters); -void Print_Counters(const int pEventSet); -void Print_Node_Info(void); -void Read_Native_Counters(const int pLength); -void Print_PAPI_Events(const int pEventSet); -void Print_Counter_Values(const long long* pCounters, const int pNumCounters); -void DumpInHex(const char* pBuffer, int pSize); - - -/* - * Global variables... - */ -int PAPI_Events[MAX_COUNTERS]; -long long PAPI_Counters[MAX_COUNTERS]; -char Native_Buffer[BGP_UPC_MAXIMUM_LENGTH_READ_COUNTERS_STRUCTURE]; -double x[32] ALIGN_L3_CACHE; - - -const int NumEventsPerSet = MAX_COUNTERS; -const int MaxPresetEventId = 104; -const int MaxNativeEventId = 511; - -int main(int argc, char * argv[]) { - _BGP_Personality_t personality; - int pRank=0, pMode=-2, pCore=0, pEdge=1, xActiveCore=0, xActiveRank=0, xRC; - - /* - * Check args, print test inputs. - */ - - if ( argc > 1 ) - sscanf(argv[1], "%d", &pRank); - if ( argc > 2 ) - sscanf(argv[2], "%d", &pMode); - if ( argc > 3 ) - sscanf(argv[3], "%d", &pCore); - if ( argc > 4 ) - sscanf(argv[4], "%d", &pEdge); - -/* - * Check for valid rank... - */ - if ( pRank < 0 || pRank > 31 ) { - printf("Invalid rank (%d) specified\n", pRank); - exit(1); - } -/* - * Check for valid mode... - * Mode = -2 means use what was initialized by CNK - * Mode = -1 means to initialize with the default - * Mode = 0-3 means to initialize with mode 0-3 - */ - if ( pMode < -2 || pMode > 3 ) { - printf("Invalid mode (%d) specified\n", pMode); - exit(1); - } -/* - * Check for valid core... - */ - if ( pCore < 0 || pCore > 3 ) { - printf("Invalid core (%d) specified\n", pCore); - exit(1); - } -/* - * Check for valid edge... - * Edge = 1 means initialize with the default edge - * Edge = 0 means initialize with level high - * Edge = 4 means initialize with edge rise - * Edge = 8 means initialize with edge fall - * Edge = 12 means initialize with level low - */ - if ( pEdge != 0 && pEdge != 1 && pEdge != 4 && pEdge != 8 && pEdge != 12 ) { - printf("Invalid edge (%d) specified\n", pEdge); - exit(1); - } - -/* - * Initialize the UPC environment... - * NOTE: Must do this from all 'ranks'... - */ -// BGP_UPC_Initialize(); - xRC = PAPI_library_init(PAPI_VER_CURRENT); - if (xRC != 50921472) { - printf("PAPI_library_init failed: xRC=%d, ending...\n", xRC); - exit(1); - } - -/* - * Only run if this is specified rank... - */ - - xRC = Kernel_GetPersonality(&personality, sizeof(_BGP_Personality_t)); - if (xRC !=0) { - printf(" Kernel_GetPersonality returned %d\n",xRC) ; - exit(xRC); - } - xActiveRank = personality.Network_Config.Rank; - xActiveCore = Kernel_PhysicalProcessorID(); - - printf("Rank %d, core %d reporting...\n", xActiveRank, xActiveCore); - - if (xActiveRank != pRank) { - printf("Rank %d is not to run... Exiting...\n", xActiveRank); - exit(0); - } - - if ( xActiveCore == pCore ) { - printf("Program is to run on rank %d core %d, using mode= %d, edge= %d\n", pRank, xActiveCore, pMode, pEdge); - } - else { - printf("Program is NOT to run on rank %d core %d... Exiting...\n", pRank, xActiveCore); - exit(0); - } - -/* - * Main processing... - */ - printf("************************************************************\n"); - printf("* Configuration parameters used: *\n"); - printf("* Rank = %d *\n", pRank); - printf("* Mode = %d *\n", pMode); - printf("* Core = %d *\n", pCore); - printf("* Edge = %d *\n", pEdge); - printf("************************************************************\n\n"); - - printf("Print config after PAPI_library_init...\n"); - BGP_UPC_Print_Config(); - -/* - * If we are to initialize, do so with user mode and edge... - * Otherwise, use what was initialized by CNK... - */ - if (pMode > -2) { - BGP_UPC_Initialize_Counter_Config(pMode, pEdge); - printf("UPC unit(s) initialized with mode=%d, edge=%d...\n", pMode, pEdge); - } - - printf("Before running the main test procedure...\n"); - BGP_UPC_Print_Config(); - BGP_UPC_Print_Counter_Values(BGP_UPC_READ_EXCLUSIVE); - -/* - * Perform the main test procedure... - */ - Do_Tests(); - -/* - * Print out final configuration and results... - */ - printf("After running the main test procedure...\n"); - BGP_UPC_Print_Config(); - BGP_UPC_Print_Counter_Values(BGP_UPC_READ_EXCLUSIVE); - - exit(0); -} - - -/* - * Do_Tests - */ - -void Do_Tests(void) { - printf("==> Do_Tests(): Beginning of the main body...\n"); - - // NOTE: PAPI_library_init() has already been done for each participating node - // prior to calling this routine... - - Do_Low_Level_Tests(); - Do_High_Level_Tests(); - Do_Multiplex_Tests(); // NOTE: Not supported... - PAPI_shutdown(); - - printf("==> Do_Tests(): End of the main body...\n"); - fflush(stdout); - - return; -} - -/* - * Do_Low_Level_Tests - */ - -void Do_Low_Level_Tests(void) { - int xRC, xEventSet, xEventCode, xState; - long long xLLValue; - char xName[256]; - - printf("==> Do_Low_Level_Tests(): Beginning of the main body...\n"); - - /* - * Low-level API tests... - */ - - xRC = PAPI_is_initialized(); - if (xRC == 1) - printf("SUCCESS: PAPI has been low-level initialized by main()...\n"); - else { - printf("FAILURE: PAPI has not been properly initialized by main(), xRC=%d, ending...\n", xRC); - return; - } - - /* - * Print out the node information with respect to UPC units... - */ - Print_Node_Info(); - - /* - * Zero the buffers for counters... - */ - Zero_Local_Counters(PAPI_Counters); - BGP_UPC_Read_Counters_Struct_t* xTemp; - xTemp = (BGP_UPC_Read_Counters_Struct_t*)(void*)Native_Buffer; - Zero_Local_Counters(xTemp->counter); - - /* - * Start of real tests... - */ - xLLValue = -1; - xLLValue = PAPI_get_real_cyc(); - printf("PAPI_get_real_cyc: xLLValue=%lld...\n", xLLValue); - - xLLValue = -1; - xLLValue = PAPI_get_virt_cyc(); - printf("PAPI_get_virt_cyc: xLLValue=%lld...\n", xLLValue); - - xLLValue = -1; - xLLValue = PAPI_get_real_usec(); - printf("PAPI_get_real_usec: xLLValue=%lld...\n", xLLValue); - - xLLValue = -1; - xLLValue = PAPI_get_virt_usec(); - printf("PAPI_get_virt_usec: xLLValue=%lld...\n", xLLValue); - - xRC = PAPI_num_hwctrs(); - if (xRC == 256) - printf("SUCCESS: PAPI_num_hwctrs returned 256 hardware counters...\n"); - else - printf("FAILURE: PAPI_num_hwctrs failed, returned xRC=%d...\n", xRC); - - *xName = 0; - char* xEventName_1 = "PAPI_L3_LDM"; - xRC = PAPI_event_code_to_name(PAPI_L3_LDM, xName); - if (xRC == PAPI_OK) { - xRC = strcmp(xName,xEventName_1); - if (!xRC) - printf("SUCCESS: PAPI_event_code_to_name for PAPI_L3_LDM...\n"); - else - printf("FAILURE: PAPI_event_code_to_name returned incorrect name, xName=%s\n", xName); - } - else - printf("FAILURE: PAPI_event_code_to_name failed, xRC=%d...\n", xRC); - - *xName = 0; - char* xEventName_2 = "PNE_BGP_PU1_IPIPE_INSTRUCTIONS"; - xRC = PAPI_event_code_to_name(PNE_BGP_PU1_IPIPE_INSTRUCTIONS, xName); - if (xRC == PAPI_OK) { - xRC = strcmp(xName,xEventName_2); - if (!xRC) - printf("SUCCESS: PAPI_event_code_to_name for PNE_BGP_PU1_IPIPE_INSTRUCTIONS...\n"); - else - printf("FAILURE: PAPI_event_code_to_name returned incorrect name, xName=%s\n", xName); - } - else - printf("FAILURE: PAPI_event_code_to_name failed, xRC=%d...\n", xRC); - - strcpy(xName,"PAPI_L3_LDM"); - xRC = PAPI_event_name_to_code(xName, &xEventCode); - if (xRC == PAPI_OK) - if (xEventCode == 0x8000000E) - printf("SUCCESS: PAPI_event_name_to_code for PAPI_L3_LDM...\n"); - else - printf("FAILURE: PAPI_event_name_to_code returned incorrect code, xEventCode=%d\n", xEventCode); - else - printf("FAILURE: PAPI_event_name_to_code failed, xRC=%d...\n", xRC); - - strcpy(xName,"PNE_BGP_PU1_IPIPE_INSTRUCTIONS"); - xRC = PAPI_event_name_to_code(xName, &xEventCode); - if (xRC == PAPI_OK) - if (xEventCode == 0x40000027) - printf("SUCCESS: PAPI_event_name_to_code for PNE_BGP_PU1_IPIPE_INSTRUCTIONS...\n"); - else - printf("FAILURE: PAPI_event_name_to_code returned incorrect code, xEventCode=%8.8x\n", xEventCode); - else - printf("FAILURE: PAPI_event_name_to_code failed, xRC=%d...\n", xRC); - - xEventCode = 0x80000000; - xRC = PAPI_enum_event(&xEventCode, PAPI_ENUM_ALL); - if (xRC == PAPI_OK) - if (xEventCode == 0x80000001) - printf("SUCCESS: PAPI_enum_event for 0x80000000 PAPI_PRESET_ENUM_ALL, returned 0x80000001...\n"); - else - printf("FAILURE: PAPI_enum_event for 0x80000000 PAPI_PRESET_ENUM_ALL returned incorrect code, xEventCode=%8.8x\n", xEventCode); - else - printf("FAILURE: PAPI_enum_event for 0x80000000 PAPI_PRESET_ENUM_ALL failed, xRC=%d...\n", xRC); - - xEventCode = 0x80000002; - xRC = PAPI_enum_event(&xEventCode, PAPI_ENUM_ALL); - if (xRC == PAPI_OK) - if (xEventCode == 0x80000003) - printf("SUCCESS: PAPI_enum_event for 0x80000002 PAPI_PRESET_ENUM_ALL, returned 0x80000003...\n"); - else - printf("FAILURE: PAPI_enum_event for 0x80000002 PAPI_PRESET_ENUM_ALL returned incorrect code, xEventCode=%8.8x\n", xEventCode); - else - printf("FAILURE: PAPI_enum_event for 0x80000002 PAPI_PRESET_ENUM_ALL failed, xRC=%d...\n", xRC); - - xEventCode = 0x80000067; - xRC = PAPI_enum_event(&xEventCode, PAPI_ENUM_ALL); - if (xRC == PAPI_OK) - if (xEventCode == 0x80000068) - printf("SUCCESS: PAPI_enum_event for 0x80000067 PAPI_PRESET_ENUM_ALL, returned 0x80000068...\n"); - else - printf("FAILURE: PAPI_enum_event for 0x80000067 PAPI_PRESET_ENUM_ALL returned incorrect code, xEventCode=%8.8x\n", xEventCode); - else - printf("FAILURE: PAPI_enum_event for 0x80000067 PAPI_PRESET_ENUM_ALL failed, xRC=%d...\n", xRC); - - xEventCode = 0x80000068; - xRC = PAPI_enum_event(&xEventCode, PAPI_ENUM_ALL); - if (xRC == PAPI_ENOEVNT) - printf("SUCCESS: PAPI_enum_event for 0x80000068 PAPI_PRESET_ENUM_ALL, no next event...\n"); - else - printf("FAILURE: PAPI_enum_event for 0x80000068 PAPI_PRESET_ENUM_ALL failed, xRC=%d...\n", xRC); - - xEventCode = 0x40000000; - xRC = PAPI_enum_event(&xEventCode, PAPI_ENUM_ALL); - if (xRC == PAPI_OK) - if (xEventCode == 0x40000001) - printf("SUCCESS: PAPI_enum_event for 0x40000000 PAPI_PRESET_ENUM_ALL, returned 0x40000001...\n"); - else - printf("FAILURE: PAPI_enum_event for 0x40000000 PAPI_PRESET_ENUM_ALL returned incorrect code, xEventCode=%8.8x\n", xEventCode); - else - printf("FAILURE: PAPI_enum_event for 0x40000000 PAPI_PRESET_ENUM_ALL failed, xRC=%d...\n", xRC); - - xEventCode = 0x40000001; - xRC = PAPI_enum_event(&xEventCode, PAPI_ENUM_ALL); - if (xRC == PAPI_OK) - if (xEventCode == 0x40000002) - printf("SUCCESS: PAPI_enum_event for 0x40000001 PAPI_PRESET_ENUM_ALL, returned 0x40000002...\n"); - else - printf("FAILURE: PAPI_enum_event for 0x40000001 PAPI_PRESET_ENUM_ALL returned incorrect code, xEventCode=%8.8x\n", xEventCode); - else - printf("FAILURE: PAPI_enum_event for 0x40000001 PAPI_PRESET_ENUM_ALL failed, xRC=%d...\n", xRC); - - xEventCode = 0x400000FC; - xRC = PAPI_enum_event(&xEventCode, PAPI_ENUM_ALL); - if (xRC == PAPI_OK) - if (xEventCode == 0x400000FF) - printf("SUCCESS: PAPI_enum_event for 0x400000FC PAPI_PRESET_ENUM_ALL, returned 0x400000FF...\n"); - else - printf("FAILURE: PAPI_enum_event for 0x400000FC PAPI_PRESET_ENUM_ALL returned incorrect code, xEventCode=%8.8x\n", xEventCode); - else - printf("FAILURE: PAPI_enum_event for 0x400000FC PAPI_PRESET_ENUM_ALL failed, xRC=%d...\n", xRC); - - xEventCode = 0x400001FD; - xRC = PAPI_enum_event(&xEventCode, PAPI_ENUM_ALL); - if (xRC == PAPI_OK) - if (xEventCode == 0x400001FF) - printf("SUCCESS: PAPI_enum_event for 0x400001FD PAPI_ENUM_ALL, returned 0x400001FF...\n"); - else - printf("FAILURE: PAPI_enum_event for 0x400001FD PAPI_ENUM_ALL returned incorrect code, xEventCode=%8.8x\n", xEventCode); - else - printf("FAILURE: PAPI_enum_event for 0x400001FD PAPI_ENUM_ALL failed, xRC=%d...\n", xRC); - - xEventCode = 0x400001FF; - xRC = PAPI_enum_event(&xEventCode, PAPI_ENUM_ALL); - if (xRC == PAPI_ENOEVNT) - printf("SUCCESS: PAPI_enum_event for 0x400001FF PAPI_PRESET_ENUM_ALL, no next event...\n"); - else - printf("FAILURE: PAPI_enum_event for 0x400001FF PAPI_PRESET_ENUM_ALL failed, xRC=%d...\n", xRC); - - xEventCode = 0x80000000; - xRC = PAPI_enum_event(&xEventCode, PAPI_PRESET_ENUM_AVAIL); - if (xRC == PAPI_OK) - if (xEventCode == 0x80000001) - printf("SUCCESS: PAPI_enum_event for 0x80000000 PAPI_PRESET_ENUM_AVAIL, returned 0x80000001...\n"); - else - printf("FAILURE: PAPI_enum_event for 0x80000000PAPI_PRESET_ENUM_AVAIL returned incorrect code, xEventCode=%8.8x\n", xEventCode); - else - printf("FAILURE: PAPI_enum_event for 0x80000000PAPI_PRESET_ENUM_AVAIL failed, xRC=%d...\n", xRC); - - xEventCode = 0x80000002; - xRC = PAPI_enum_event(&xEventCode, PAPI_PRESET_ENUM_AVAIL); - if (xRC == PAPI_OK) - if (xEventCode == 0x80000006) - printf("SUCCESS: PAPI_enum_event for 0x80000002 PAPI_PRESET_ENUM_AVAIL, returned 0x80000006...\n"); - else - printf("FAILURE: PAPI_enum_event for 0x80000002 PAPI_PRESET_ENUM_AVAIL returned incorrect code, xEventCode=%8.8x\n", xEventCode); - else - printf("FAILURE: PAPI_enum_event for 0x80000002 PAPI_PRESET_ENUM_AVAIL failed, xRC=%d...\n", xRC); - - xEventCode = 0x80000067; - xRC = PAPI_enum_event(&xEventCode, PAPI_PRESET_ENUM_AVAIL); - if (xRC == PAPI_OK) - if (xEventCode == 0x80000068) - printf("SUCCESS: PAPI_enum_event for 0x80000067 PAPI_PRESET_ENUM_AVAIL, returned 0x80000068...\n"); - else - printf("FAILURE: PAPI_enum_event for 0x80000067 PAPI_PRESET_ENUM_AVAIL returned incorrect code, xEventCode=%8.8x\n", xEventCode); - else - printf("FAILURE: PAPI_enum_event for 0x80000067 PAPI_PRESET_ENUM_AVAIL failed, xRC=%d...\n", xRC); - - xEventCode = 0x80000068; - xRC = PAPI_enum_event(&xEventCode, PAPI_PRESET_ENUM_AVAIL); - if (xRC == PAPI_ENOEVNT) - printf("SUCCESS: PAPI_enum_event for 0x80000068 PAPI_PRESET_ENUM_AVAIL, no next event...\n"); - else - printf("FAILURE: PAPI_enum_event for 0x80000068 PAPI_PRESET_ENUM_AVAIL failed, xRC=%d...\n", xRC); - - xEventCode = 0x40000000; - xRC = PAPI_enum_event(&xEventCode, PAPI_PRESET_ENUM_AVAIL); - if (xRC == PAPI_OK) - if (xEventCode == 0x40000001) - printf("SUCCESS: PAPI_enum_event for 0x40000000 PAPI_PRESET_ENUM_AVAIL, returned 0x40000001...\n"); - else - printf("FAILURE: PAPI_enum_event for 0x40000000 PAPI_PRESET_ENUM_AVAIL returned incorrect code, xEventCode=%8.8x\n", xEventCode); - else - printf("FAILURE: PAPI_enum_event for 0x40000000 PAPI_PRESET_ENUM_AVAIL failed, xRC=%d...\n", xRC); - - xEventCode = 0x40000001; - xRC = PAPI_enum_event(&xEventCode, PAPI_PRESET_ENUM_AVAIL); - if (xRC == PAPI_OK) - if (xEventCode == 0x40000002) - printf("SUCCESS: PAPI_enum_event for 0x40000001 PAPI_PRESET_ENUM_AVAIL, returned 0x40000002...\n"); - else - printf("FAILURE: PAPI_enum_event for 0x40000001 PAPI_PRESET_ENUM_AVAIL returned incorrect code, xEventCode=%8.8x\n", xEventCode); - else - printf("FAILURE: PAPI_enum_event for 0x40000001 PAPI_PRESET_ENUM_AVAIL failed, xRC=%d...\n", xRC); - - printf("NOTE: Might get two messages indicating invalid event id specified for 253 and 254. These are OK...\n"); - xEventCode = 0x400000FC; - xRC = PAPI_enum_event(&xEventCode, PAPI_PRESET_ENUM_AVAIL); - if (xRC == PAPI_OK) - if (xEventCode == 0x400000FF) - printf("SUCCESS: PAPI_enum_event for 0x400000FC PAPI_PRESET_ENUM_AVAIL, returned 0x400000FF...\n"); - else - printf("FAILURE: PAPI_enum_event for 0x400000FC PAPI_PRESET_ENUM_AVAIL returned incorrect code, xEventCode=%8.8x\n", xEventCode); - else - printf("FAILURE: PAPI_enum_event for 0x400000FC PAPI_PRESET_ENUM_AVAIL failed, xRC=%d...\n", xRC); - - printf("NOTE: Might get one message indicating invalid event id specified for 510. This is OK...\n"); - xEventCode = 0x400001FD; - xRC = PAPI_enum_event(&xEventCode, PAPI_PRESET_ENUM_AVAIL); - if (xRC == PAPI_OK) - if (xEventCode == 0x400001FF) - printf("SUCCESS: PAPI_enum_event for 0x400001FD PAPI_PRESET_ENUM_AVAIL, returned 0x400001FF...\n"); - else - printf("FAILURE: PAPI_enum_event for 0x400001FD PAPI_PRESET_ENUM_AVAIL returned incorrect code, xEventCode=%8.8x\n", xEventCode); - else - printf("FAILURE: PAPI_enum_event for 0x400001FD PAPI_PRESET_ENUM_AVAIL failed, xRC=%d...\n", xRC); - - xEventCode = 0x400001FF; - xRC = PAPI_enum_event(&xEventCode, PAPI_PRESET_ENUM_AVAIL); - if (xRC == PAPI_ENOEVNT) - printf("SUCCESS: PAPI_enum_event for 0x400001FF PAPI_PRESET_ENUM_AVAIL, no next event...\n"); - else - printf("FAILURE: PAPI_enum_event for 0x400001FF PAPI_PRESET_ENUM_AVAIL failed, xRC=%d...\n", xRC); - - PAPI_dmem_info_t xDmemSpace; - xRC = PAPI_get_dmem_info(&xDmemSpace); - if (xRC == PAPI_OK) { - DumpInHex((char*)&xDmemSpace, sizeof( PAPI_dmem_info_t)); - printf("SUCCESS: PAPI_get_dmem_info...\n"); - } - else - printf("FAILURE: PAPI_get_dmem_info failed, xRC=%d...\n", xRC); - - PAPI_event_info_t xInfoSpace; - xRC = PAPI_get_event_info(PAPI_L3_LDM, &xInfoSpace); - if (xRC == PAPI_OK) { - DumpInHex((char*)&xInfoSpace, sizeof( PAPI_event_info_t)); - printf("SUCCESS: PAPI_get_event_info for PAPI_L3_LDM...\n"); - } - else - printf("FAILURE: PAPI_get_event_info failed for PAPI_L3_LDM, xRC=%d...\n", xRC); - - const PAPI_exe_info_t* xExeInfo = NULL; - if ((xExeInfo = PAPI_get_executable_info()) != NULL) { - DumpInHex((char*)xExeInfo, sizeof( PAPI_exe_info_t)); - printf("SUCCESS: PAPI_get_executable_info...\n"); - } - else - printf("FAILURE: PAPI_get_executable_info failed, returned null pointer...\n"); - - const PAPI_hw_info_t* xHwInfo = NULL; - if ((xHwInfo = PAPI_get_hardware_info()) != NULL) { - DumpInHex((char*)xHwInfo, sizeof( PAPI_hw_info_t)); - printf("SUCCESS: PAPI_get_hardware_info...\n"); - } - else - printf("FAILURE: PAPI_get_hardware_info failed, returned null pointer...\n"); - - const PAPI_shlib_info_t* xShLibInfo = NULL; - if ((xShLibInfo = PAPI_get_shared_lib_info()) != NULL) { - DumpInHex((char*)xShLibInfo, sizeof( PAPI_shlib_info_t)); - printf("SUCCESS: PAPI_get_shared_lib_info...\n"); - } - else - printf("FAILURE: PAPI_get_shared_lib_info failed, returned null pointer...\n"); - - xEventSet = PAPI_NULL; - xRC = PAPI_create_eventset(&xEventSet); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_create_eventset created...\n"); - else { - printf("FAILURE: PAPI_create_eventset failed, xRC=%d...\n", xRC); - return; - } - - printf("==> No events should be in the event set...\n"); - Print_Counters(xEventSet); - - xRC = PAPI_num_events(xEventSet); - if (xRC == 0) - printf("SUCCESS: PAPI_num_events returned 0...\n"); - else - printf("FAILURE: PAPI_num_events failed, returned xRC=%d...\n", xRC); - - xRC = PAPI_add_event(xEventSet, PAPI_L1_DCM); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_add_event PAPI_L1_DCM...\n"); - else - printf("FAILURE: PAPI_add_event PAPI_L1_DCM failed, xRC=%d...\n", xRC); - - xRC = PAPI_num_events(xEventSet); - if (xRC == 1) - printf("SUCCESS: PAPI_num_events returned 1...\n"); - else - printf("FAILURE: PAPI_num_events failed, returned xRC=%d...\n", xRC); - - xRC = PAPI_add_event(xEventSet, PNE_BGP_PU3_L2_MEMORY_WRITES); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_add_event PNE_BGP_PU3_L2_MEMORY_WRITES...\n"); - else - printf("FAILURE: PAPI_add_event PNE_BGP_PU3_L2_MEMORY_WRITES failed, xRC=%d...\n", xRC); - - xRC = PAPI_num_events(xEventSet); - if (xRC == 2) - printf("SUCCESS: PAPI_num_events returned 2...\n"); - else - printf("FAILURE: PAPI_num_events failed, returned xRC=%d...\n", xRC); - - xRC = PAPI_add_event(xEventSet, BGP_PU3_L2_MEMORY_WRITES); - if (xRC == PAPI_EINVAL) - printf("SUCCESS: PAPI_add_event BGP_PU3_L2_MEMORY_WRITES not allowed...\n"); - else - printf("FAILURE: PAPI_add_event BGP_PU3_L2_MEMORY_WRITES allowed, or failed incorrectly..., xRC=%d...\n", xRC); - - xRC = PAPI_num_events(xEventSet); - if (xRC == 2) - printf("SUCCESS: PAPI_num_events returned 2...\n"); - else - printf("FAILURE: PAPI_num_events failed, returned xRC=%d...\n", xRC); - - xRC = PAPI_add_event(xEventSet, 0x40000208); - if (xRC == PAPI_ENOEVNT) - printf("SUCCESS: PAPI_add_event 0x40000208 not allowed...\n"); - else - printf("FAILURE: PAPI_add_event 0x40000208 allowed, or failed incorrectly..., xRC=%d...\n", xRC); - - xRC = PAPI_num_events(xEventSet); - if (xRC == 2) - printf("SUCCESS: PAPI_num_events returned 2...\n"); - else - printf("FAILURE: PAPI_num_events failed, returned xRC=%d...\n", xRC); - - xRC = PAPI_add_event(xEventSet, PAPI_L1_ICM); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_add_event PAPI_L1_ICM...\n"); - else - printf("FAILURE: PAPI_add_event PAPI_L1_ICM failed, xRC=%d...\n", xRC); - - xRC = PAPI_num_events(xEventSet); - if (xRC == 3) - printf("SUCCESS: PAPI_num_events returned 3...\n"); - else - printf("FAILURE: PAPI_num_events failed, returned xRC=%d...\n", xRC); - - xRC = PAPI_add_event(xEventSet, PAPI_L1_TCM); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_add_event PAPI_L1_TCM...\n"); - else - printf("FAILURE: PAPI_add_event PAPI_L1_TCM failed, xRC=%d...\n", xRC); - - xRC = PAPI_num_events(xEventSet); - if (xRC == 4) - printf("SUCCESS: PAPI_num_events returned 4...\n"); - else - printf("FAILURE: PAPI_num_events failed, returned xRC=%d...\n", xRC); - - xRC = PAPI_add_event(xEventSet, PAPI_L1_DCM); - if (xRC == PAPI_ECNFLCT) - printf("SUCCESS: PAPI_add_event, redundantly adding PAPI_L1_DCM not allowed...\n"); - else - printf("FAILURE: PAPI_add_event PAPI_L1_DCM failed incorrectly, xRC=%d...\n", xRC); - - xRC = PAPI_add_event(xEventSet, PNE_BGP_PU3_L2_MEMORY_WRITES); - if (xRC == PAPI_ECNFLCT) - printf("SUCCESS: PAPI_add_event, redundantly adding PNE_BGP_PU3_L2_MEMORY_WRITES not allowed...\n"); - else - printf("FAILURE: PAPI_add_event PNE_BGP_PU3_L2_MEMORY_WRITES failed incorectly, xRC=%d...\n", xRC); - - printf("\n==> All events added... Perform a read now...\n"); - xRC = PAPI_read(xEventSet, PAPI_Counters); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_read...\n"); - else - printf("FAILURE: PAPI_read failed, xRC=%d...\n", xRC); - - printf("\n==> Perform a reset now...\n"); - xRC = PAPI_reset(xEventSet); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_reset...\n"); - else - printf("FAILURE: PAPI_reset failed, xRC=%d...\n", xRC); - - printf("\n==> Perform another read now...\n"); - xRC = PAPI_read(xEventSet, PAPI_Counters); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_read...\n"); - else - printf("FAILURE: PAPI_read failed, xRC=%d...\n", xRC); - - printf("\n==> Should be 4 counters below, preset, native, preset, and preset. All counter values should be zero.\n"); - Print_Counters(xEventSet); - - printf("\n==> Stop the UPC now...\n"); - xRC = PAPI_stop(xEventSet, PAPI_Counters); - if (xRC == PAPI_ENOTRUN) - printf("SUCCESS: PAPI_stop, but not running...\n"); - else - printf("FAILURE: PAPI_stop failed incorectly, xRC=%d...\n", xRC); - - printf("\n==> Start the UPC now...\n"); - xRC = PAPI_start(xEventSet); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_start...\n"); - else { - printf("FAILURE: PAPI_start failed, xRC=%d...\n", xRC); - return; - } - - printf("\n==> Try to start it again...\n"); - xRC = PAPI_start(xEventSet); - if (xRC == PAPI_EISRUN) - printf("SUCCESS: PAPI_start, but already running...\n"); - else - printf("FAILURE: PAPI_start failed incorectly, xRC=%d...\n", xRC); - - FPUArith(); - - printf("\n==> Stop the UPC after the arithmetic was performed... The individual native counter values will be greater than the PAPI counters because the PAPI counters are read prior to the UPC(s) being stopped...\n"); - xRC = PAPI_stop(xEventSet, PAPI_Counters); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_stop...\n"); - else { - printf("FAILURE: PAPI_stop failed, xRC=%d...\n", xRC); - return; - } - Print_Counters(xEventSet); - - printf("\n==> Perform a read of the counters after performing arithmetic, UPC is stopped... Values should be the same as right after the prior PAPI_Stop()...\n"); - xRC = PAPI_read(xEventSet, PAPI_Counters); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_read...\n"); - else - printf("FAILURE: PAPI_read failed, xRC=%d...\n", xRC); - Print_Counters(xEventSet); - - printf("\n==> Zero local counters. Perform a PAPI_accum, UPC is stopped... Native values should be zero, and the local PAPI counters the same as the previous read...\n"); - Zero_Local_Counters(PAPI_Counters); - xRC = PAPI_accum(xEventSet, PAPI_Counters); - if (xRC == PAPI_OK) { - printf("SUCCESS: PAPI_accum...\n"); - } - else { - printf("FAILURE: PAPI_accum failed, xRC=%d...\n", xRC); - return; - } - Print_Counters(xEventSet); - - printf("\n==> Perform a PAPI_read, UPC is stopped... All values should be zero...\n"); - xRC = PAPI_read(xEventSet, PAPI_Counters); - if (xRC == PAPI_OK) { - printf("SUCCESS: PAPI_read...\n"); - } - else { - printf("FAILURE: PAPI_read failed, xRC=%d...\n", xRC); - return; - } - Print_Counters(xEventSet); - - printf("\n==> Perform a reset after performing arithmetic, UPC is stopped... All values should be zero...\n"); - xRC = PAPI_reset(xEventSet); - if (xRC == PAPI_OK) { - printf("SUCCESS: PAPI_reset...\n"); - } - else { - printf("FAILURE: PAPI_reset failed, xRC=%d...\n", xRC); - return; - } - Print_Counters(xEventSet); - - printf("\n==> Perform another read of the counters after resetting the counters, UPC is stopped... All values should be zero...\n"); - xRC = PAPI_read(xEventSet, PAPI_Counters); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_read...\n"); - else - printf("FAILURE: PAPI_read failed, xRC=%d...\n", xRC); - Print_Counters(xEventSet); - - printf("\n==> Perform another PAPI_accum after resetting the counters, UPC is stopped... All values should be zero...\n"); - Zero_Local_Counters(PAPI_Counters); - xRC = PAPI_accum(xEventSet, PAPI_Counters); - if (xRC == PAPI_OK) { - printf("SUCCESS: PAPI_accum...\n"); - } - else { - printf("FAILURE: PAPI_accum failed, xRC=%d...\n", xRC); - return; - } - Print_Counters(xEventSet); - - printf("\n==> Perform another PAPI_read after accumulating and resetting the UPC, UPC is stopped... All values should be zero...\n"); - xRC = PAPI_read(xEventSet, PAPI_Counters); - if (xRC == PAPI_OK) { - printf("SUCCESS: PAPI_read...\n"); - } - else { - printf("FAILURE: PAPI_read failed, xRC=%d...\n", xRC); - return; - } - Print_Counters(xEventSet); - - printf("\n==> Start the UPC again...\n"); - xRC = PAPI_start(xEventSet); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_start...\n"); - else { - printf("FAILURE: PAPI_start failed, xRC=%d...\n", xRC); - return; - } - - FPUArith(); - - printf("\n==> Get the state of the event set...\n"); - xRC = PAPI_state(xEventSet, &xState); - if (xRC == PAPI_OK) { - if (xState == PAPI_RUNNING) { - printf("SUCCESS: PAPI_state is RUNNING...\n"); - } - else { - printf("FAILURE: PAPI_state failed, incorrect state, xState=%d...\n", xState); - } - } - else { - printf("FAILURE: PAPI_state failed, xRC=%d...\n", xRC); - return; - } - - printf("\n==> Perform a read of the counters, UPC is running... The individual native counter values will be greater than the PAPI counters because the PAPI counters are read prior to the reads for the individual counter values...\n"); - xRC = PAPI_read(xEventSet, PAPI_Counters); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_read...\n"); - else - printf("FAILURE: PAPI_read failed, xRC=%d...\n", xRC); - Print_Counters(xEventSet); - - FPUArith(); - - printf("\n==> Perform another read of the counters, UPC is running... Values should be increasing...\n"); - xRC = PAPI_read(xEventSet, PAPI_Counters); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_read...\n"); - else - printf("FAILURE: PAPI_read failed, xRC=%d...\n", xRC); - Print_Counters(xEventSet); - - FPUArith(); - - printf("\n==> Perform another read of the counters, UPC is running... Values should continue increasing...\n"); - xRC = PAPI_read(xEventSet, PAPI_Counters); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_read...\n"); - else - printf("FAILURE: PAPI_read failed, xRC=%d...\n", xRC); - Print_Counters(xEventSet); - - printf("\n==> Perform a reset after performing arithmetic, UPC is still running... Native counter values should be less than prior read, but PAPI counter values should be identical to the prior read (local buffer was not changed)...\n"); - xRC = PAPI_reset(xEventSet); - if (xRC == PAPI_OK) { - printf("SUCCESS: PAPI_reset...\n"); - } - else { - printf("FAILURE: PAPI_reset failed, xRC=%d...\n", xRC); - return; - } - Print_Counters(xEventSet); - - printf("\n==> Zero local counters. Perform a PAPI_accum, UPC is still running...\n"); - Zero_Local_Counters(PAPI_Counters); - xRC = PAPI_accum(xEventSet, PAPI_Counters); - if (xRC == PAPI_OK) { - printf("SUCCESS: PAPI_accum...\n"); - } - else { - printf("FAILURE: PAPI_accum failed, xRC=%d...\n", xRC); - return; - } - Print_Counters(xEventSet); - - FPUArith(); - - printf("\n==> Accumulate local counters. Perform a PAPI_accum, UPC is still running... PAPI counters should show an increase from prior accumulate...\n"); - xRC = PAPI_accum(xEventSet, PAPI_Counters); - if (xRC == PAPI_OK) { - printf("SUCCESS: PAPI_accum...\n"); - } - else { - printf("FAILURE: PAPI_accum failed, xRC=%d...\n", xRC); - return; - } - Print_Counters(xEventSet); - - FPUArith(); - - printf("\n==> Accumulate local counters. Perform another PAPI_accum, UPC is still running... PAPI counters should show an increase from prior accumulate...\n"); - xRC = PAPI_accum(xEventSet, PAPI_Counters); - if (xRC == PAPI_OK) { - printf("SUCCESS: PAPI_accum...\n"); - } - else { - printf("FAILURE: PAPI_accum failed, xRC=%d...\n", xRC); - return; - } - Print_Counters(xEventSet); - - printf("\n==> Zero local counters. Perform a PAPI_accum, UPC is still running... PAPI counters should be less than the prior accumulate...\n"); - Zero_Local_Counters(PAPI_Counters); - xRC = PAPI_accum(xEventSet, PAPI_Counters); - if (xRC == PAPI_OK) { - printf("SUCCESS: PAPI_accum...\n"); - } - else { - printf("FAILURE: PAPI_accum failed, xRC=%d...\n", xRC); - return; - } - Print_Counters(xEventSet); - - printf("\n==> Perform a PAPI_read, UPC is still running... Native counters and PAPI counters should have both increased from prior accumulate...\n"); - xRC = PAPI_read(xEventSet, PAPI_Counters); - if (xRC == PAPI_OK) { - printf("SUCCESS: PAPI_read...\n"); - } - else { - printf("FAILURE: PAPI_read failed, xRC=%d...\n", xRC); - return; - } - Print_Counters(xEventSet); - - printf("\n==> Perform a PAPI_write (not supported when UPC is running)...\n"); - xRC = PAPI_write(xEventSet, PAPI_Counters); - if (xRC == PAPI_ECMP) { - printf("SUCCESS: PAPI_write, not allowed...\n"); - } - else { - printf("FAILURE: PAPI_write failed, xRC=%d...\n", xRC); - return; - } - - printf("\n==> Stop the UPC... The individual native counter values will be greater than the PAPI counters because the PAPI counters are read prior to the UPC(s) being stopped...\n"); - xRC = PAPI_stop(xEventSet, PAPI_Counters); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_stop...\n"); - else { - printf("FAILURE: PAPI_stop failed, xRC=%d...\n", xRC); - return; - } - Print_Counters(xEventSet); - - printf("\n==> Perform a PAPI_read with the UPC stopped...\n"); - xRC = PAPI_read(xEventSet, PAPI_Counters); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_read...\n"); - else - printf("FAILURE: PAPI_read failed, xRC=%d...\n", xRC); - - printf("\n==> Should be same 4 counters below, with the same native and PAPI counters as after the PAPI_stop...\n"); - Print_Counters(xEventSet); - - printf("\n==> Perform a PAPI_accum with the UPC stopped... Native counters sould be zeroed, with the PAPI counters unchanged from prior read (with the UPC already stopped, the accumulate does not add any counter values to the local buffer)...\n"); - Zero_Local_Counters(PAPI_Counters); - xRC = PAPI_accum(xEventSet, PAPI_Counters); - if (xRC == PAPI_OK) { - printf("SUCCESS: PAPI_accum...\n"); - } - else { - printf("FAILURE: PAPI_accum failed, xRC=%d...\n", xRC); - return; - } - Print_Counters(xEventSet); - - printf("\n==> Perform a PAPI_read with the UPC stopped... Native and PAPI counters are zero...\n"); - xRC = PAPI_read(xEventSet, PAPI_Counters); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_read...\n"); - else - printf("FAILURE: PAPI_read failed, xRC=%d...\n", xRC); - Print_Counters(xEventSet); - - printf("\n==> Perform a reset, UPC is stopped... Native and PAPI counters are zero...\n"); - xRC = PAPI_reset(xEventSet); - if (xRC == PAPI_OK) { - printf("SUCCESS: PAPI_reset...\n"); - } - else { - printf("FAILURE: PAPI_reset failed, xRC=%d...\n", xRC); - return; - } - Print_Counters(xEventSet); - - printf("\n==> Perform a PAPI_write, but only to local memory...\n"); - xRC = PAPI_write(xEventSet, PAPI_Counters); - if (xRC == PAPI_OK) { - printf("SUCCESS: PAPI_write, but only to local memory...\n"); - } - else { - printf("FAILURE: PAPI_write failed, xRC=%d...\n", xRC); - return; - } - - printf("\n==> Get the state of the event set...\n"); - xRC = PAPI_state(xEventSet, &xState); - if (xRC == PAPI_OK) { - if (xState == PAPI_STOPPED) { - printf("SUCCESS: PAPI_state is STOPPED...\n"); - } - else { - printf("FAILURE: PAPI_state failed, incorrect state, xState=%d...\n", xState); - } - } - else { - printf("FAILURE: PAPI_state failed, xRC=%d...\n", xRC); - return; - } - - printf("\n==> Get the multiplex status of the eventset...\n"); - xRC = PAPI_get_multiplex(xEventSet); - if (xRC == PAPI_OK) { - printf("SUCCESS: PAPI_get_multiplex (NOTE: The rest of the multiplex path is untested)...\n"); - } - else { - printf("FAILURE: PAPI_get_multiplex failed, xRC=%d...\n", xRC); - return; - } - - printf("\n==> Remove the events, and clean up the event set...\n"); - xRC = PAPI_remove_event(xEventSet, PNE_BGP_PU1_IPIPE_INSTRUCTIONS); - if (xRC == PAPI_EINVAL) - printf("SUCCESS: PAPI_remove_event could not find PNE_BGP_PU1_IPIPE_INSTRUCTIONS...\n"); - else - printf("FAILURE: PAPI_remove_event PNE_BGP_PU1_IPIPE_INSTRUCTIONS failed, xRC=%d...\n", xRC); - - xRC = PAPI_remove_event(xEventSet, PAPI_L3_LDM); - if (xRC == PAPI_EINVAL) - printf("SUCCESS: PAPI_remove_event could not find PAPI_L3_LDM...\n"); - else - printf("FAILURE: PAPI_remove_event PAPI_L3_LDM failed, xRC=%d...\n", xRC); - - xRC = PAPI_remove_event(xEventSet, PAPI_L1_TCM); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_remove_event PAPI_L1_TCM...\n"); - else - printf("FAILURE: PAPI_remove_event PAPI_L1_TCM failed, xRC=%d...\n", xRC); - - xRC = PAPI_num_events(xEventSet); - if (xRC == 3) - printf("SUCCESS: PAPI_num_events returned 3...\n"); - else - printf("FAILURE: PAPI_num_events failed, returned xRC=%d...\n", xRC); - - xRC = PAPI_remove_event(xEventSet, PAPI_L1_ICM); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_remove_event PAPI_L1_ICM...\n"); - else - printf("FAILURE: PAPI_remove_event PAPI_L1_ICM failed, xRC=%d...\n", xRC); - - xRC = PAPI_num_events(xEventSet); - if (xRC == 2) - printf("SUCCESS: PAPI_num_events returned 2...\n"); - else - printf("FAILURE: PAPI_num_events failed, returned xRC=%d...\n", xRC); - - xRC = PAPI_remove_event(xEventSet, PNE_BGP_PU3_L2_MEMORY_WRITES); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_remove_event PNE_BGP_PU3_L2_MEMORY_WRITES...\n"); - else - printf("FAILURE: PAPI_remove_event PNE_BGP_PU3_L2_MEMORY_WRITES failed, xRC=%d...\n", xRC); - - xRC = PAPI_num_events(xEventSet); - if (xRC == 1) - printf("SUCCESS: PAPI_num_events returned 1...\n"); - else - printf("FAILURE: PAPI_num_events failed, returned xRC=%d...\n", xRC); - - xRC = PAPI_remove_event(xEventSet, PAPI_L1_DCM); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_remove_event PAPI_L1_DCM...\n"); - else - printf("FAILURE: PAPI_remove_event PAPI_L1_DCM failed, xRC=%d...\n", xRC); - - xRC = PAPI_num_events(xEventSet); - if (xRC == 0) - printf("SUCCESS: PAPI_num_events returned 0...\n"); - else - printf("FAILURE: PAPI_num_events failed, returned xRC=%d...\n", xRC); - - xRC = PAPI_cleanup_eventset(xEventSet); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_cleanup_eventset...\n"); - else - printf("FAILURE: PAPI_cleanup_eventset failed, xRC=%d...\n", xRC); - - xRC = PAPI_destroy_eventset(&xEventSet); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_destroy_eventset...\n"); - else - printf("FAILURE: PAPI_destroy_eventset failed, xRC=%d...\n", xRC); - - printf("==> Do_Low_Level_Tests(): End of the main body...\n"); - - return; -} - -/* - * Do_High_Level_Tests - */ - -void Do_High_Level_Tests(void) { - uint xEventId, xEventCode; - int xRC, xNumEvents; - - printf("==> Do_High_Level_Tests(): Beginning of the main body...\n"); - - xRC = PAPI_num_counters(); - if (xRC == 256) - printf("SUCCESS: PAPI_num_counters returned 256 hardware counters...\n"); - else - printf("FAILURE: PAPI_num_counters failed, returned xRC=%d...\n", xRC); - - xRC = PAPI_num_components(); - if (xRC == 1) - printf("SUCCESS: PAPI_num_components returned 256 hardware counters...\n"); - else - printf("FAILURE: PAPI_num_components failed, returned xRC=%d...\n", xRC); - - xEventId = 0; - while (xEventId < MaxPresetEventId) { - xNumEvents = 0; - while (xEventId <= MaxPresetEventId && xNumEvents < NumEventsPerSet) { - xEventCode = xEventId | 0x80000000; - xRC = PAPI_query_event(xEventCode); - if (xRC == PAPI_OK) { - switch(xEventCode) { - case 0x80000003: - case 0x80000004: - case 0x80000005: - case 0x80000007: - case 0x80000008: - case 0x8000000A: - case 0x8000000B: - case 0x8000000C: - case 0x8000000D: - case 0x8000000F: - case 0x80000010: - case 0x80000011: - case 0x80000012: - case 0x80000013: - case 0x80000014: - case 0x80000015: - case 0x80000016: - case 0x80000017: - case 0x80000018: - case 0x80000019: - case 0x8000001A: - case 0x8000001B: - case 0x8000001D: - case 0x8000001E: - case 0x8000001F: - case 0x80000020: - case 0x80000021: - case 0x80000022: - case 0x80000023: - case 0x80000024: - case 0x80000025: - case 0x80000026: - case 0x80000027: - case 0x80000028: - case 0x80000029: - case 0x8000002A: - case 0x8000002B: - case 0x8000002C: - case 0x8000002D: - case 0x8000002E: - case 0x8000002F: - case 0x80000031: - case 0x80000032: - case 0x80000033: - case 0x80000037: - case 0x80000038: - case 0x80000039: - case 0x8000003A: - case 0x8000003D: - case 0x80000042: - case 0x80000045: - case 0x80000046: - case 0x80000048: - case 0x8000004A: - case 0x8000004B: - case 0x8000004D: - case 0x8000004E: - case 0x80000050: - case 0x80000051: - case 0x80000053: - case 0x80000054: - case 0x80000056: - case 0x80000057: - case 0x80000059: - case 0x8000005c: - case 0x8000005f: - case 0x80000061: - case 0x80000062: - case 0x80000063: - case 0x80000064: - case 0x80000065: - printf("FAILURE: Do_High_Level_Tests, preset event code %#8.8x added to list of events to be started, but should not be allowed...\n", xEventCode); - break; - default: - printf("SUCCESS: Do_High_Level_Tests, preset event code %#8.8x added to list of events to be started...\n", xEventCode); - } - PAPI_Events[xNumEvents] = xEventCode; - xNumEvents++; - } - else { - switch(xEventCode) { - case 0x80000003: - case 0x80000004: - case 0x80000005: - case 0x80000007: - case 0x80000008: - case 0x8000000A: - case 0x8000000B: - case 0x8000000C: - case 0x8000000D: - case 0x8000000F: - case 0x80000010: - case 0x80000011: - case 0x80000012: - case 0x80000013: - case 0x80000014: - case 0x80000015: - case 0x80000016: - case 0x80000017: - case 0x80000018: - case 0x80000019: - case 0x8000001A: - case 0x8000001B: - case 0x8000001D: - case 0x8000001E: - case 0x8000001F: - case 0x80000020: - case 0x80000021: - case 0x80000022: - case 0x80000023: - case 0x80000024: - case 0x80000025: - case 0x80000026: - case 0x80000027: - case 0x80000028: - case 0x80000029: - case 0x8000002A: - case 0x8000002B: - case 0x8000002C: - case 0x8000002D: - case 0x8000002E: - case 0x8000002F: - case 0x80000031: - case 0x80000032: - case 0x80000033: - case 0x80000037: - case 0x80000038: - case 0x80000039: - case 0x8000003A: - case 0x8000003D: - case 0x80000042: - case 0x80000045: - case 0x80000046: - case 0x80000048: - case 0x8000004A: - case 0x8000004B: - case 0x8000004D: - case 0x8000004E: - case 0x80000050: - case 0x80000051: - case 0x80000053: - case 0x80000054: - case 0x80000056: - case 0x80000057: - case 0x80000059: - case 0x8000005c: - case 0x8000005f: - case 0x80000061: - case 0x80000062: - case 0x80000063: - case 0x80000064: - case 0x80000065: - printf("SUCCESS: Do_High_Level_Tests, preset event code %#8.8x cannot be added to list of events to be started, xRC = %d...\n", xEventCode, xRC); - break; - default: - printf("FAILURE: Do_High_Level_Tests, preset event code %#8.8x cannot be added to list of events to be started, xRC = %d...\n", xEventCode, xRC); - } - } - xEventId++; - } - if (xNumEvents) - Run_Cycle(xNumEvents); - } - - xEventId = 0; - while (xEventId < MaxNativeEventId) { - xNumEvents = 0; - while (xEventId <= MaxNativeEventId && xNumEvents < NumEventsPerSet) { - xEventCode = xEventId | 0x40000000; - xRC = PAPI_query_event(xEventCode); - if (xRC == PAPI_OK) { - switch(xEventCode) { - case 0x4000005C: - case 0x4000005D: - case 0x4000005E: - case 0x4000005F: - case 0x40000060: - case 0x40000061: - case 0x40000062: - case 0x40000063: - case 0x40000064: - case 0x4000007C: - case 0x4000007D: - case 0x4000007E: - case 0x4000007F: - case 0x40000080: - case 0x40000081: - case 0x40000082: - case 0x40000083: - case 0x40000084: - case 0x400000D8: - case 0x400000D9: - case 0x400000DA: - case 0x400000DB: - case 0x400000DC: - case 0x400000DD: - case 0x400000FD: - case 0x400000FE: - case 0x40000198: - case 0x40000199: - case 0x4000019A: - case 0x4000019B: - case 0x4000019C: - case 0x4000019D: - case 0x4000019E: - case 0x4000019F: - case 0x400001A0: - case 0x400001B8: - case 0x400001B9: - case 0x400001BA: - case 0x400001BB: - case 0x400001BC: - case 0x400001BD: - case 0x400001BE: - case 0x400001BF: - case 0x400001C0: - case 0x400001D2: - case 0x400001D3: - case 0x400001D4: - case 0x400001D5: - case 0x400001D6: - case 0x400001D7: - case 0x400001E6: - case 0x400001E7: - case 0x400001E8: - case 0x400001E9: - case 0x400001EA: - case 0x400001EB: - case 0x400001FE: - printf("FAILURE: Do_High_Level_Tests, native event code %#8.8x added to list of events to be started, but should not be allowed...\n", xEventCode); - break; - default: - printf("SUCCESS: Do_High_Level_Tests, native event code %#8.8x added to list of events to be started...\n", xEventCode); - } - PAPI_Events[xNumEvents] = xEventCode; - xNumEvents++; - } - else { - switch(xEventCode) { - case 0x4000005C: - case 0x4000005D: - case 0x4000005E: - case 0x4000005F: - case 0x40000060: - case 0x40000061: - case 0x40000062: - case 0x40000063: - case 0x40000064: - case 0x4000007C: - case 0x4000007D: - case 0x4000007E: - case 0x4000007F: - case 0x40000080: - case 0x40000081: - case 0x40000082: - case 0x40000083: - case 0x40000084: - case 0x400000D8: - case 0x400000D9: - case 0x400000DA: - case 0x400000DB: - case 0x400000DC: - case 0x400000DD: - case 0x400000FD: - case 0x400000FE: - case 0x40000198: - case 0x40000199: - case 0x4000019A: - case 0x4000019B: - case 0x4000019C: - case 0x4000019D: - case 0x4000019E: - case 0x4000019F: - case 0x400001A0: - case 0x400001B8: - case 0x400001B9: - case 0x400001BA: - case 0x400001BB: - case 0x400001BC: - case 0x400001BD: - case 0x400001BE: - case 0x400001BF: - case 0x400001C0: - case 0x400001D2: - case 0x400001D3: - case 0x400001D4: - case 0x400001D5: - case 0x400001D6: - case 0x400001D7: - case 0x400001E6: - case 0x400001E7: - case 0x400001E8: - case 0x400001E9: - case 0x400001EA: - case 0x400001EB: - case 0x400001FE: - printf("SUCCESS: Do_High_Level_Tests, native event code %#8.8x cannot be added to list of events to be started, xRC = %d...\n", xEventCode, xRC); - break; - default: - printf("FAILURE: Do_High_Level_Tests, native event code %#8.8x cannot be added to list of events to be started, xRC = %d...\n", xEventCode, xRC); - } - } - xEventId++; - } - if (xNumEvents) - Run_Cycle(xNumEvents); - } - - float xRtime, xPtime, xMflips, xMflops, xIpc; - long long xFlpins, xFlpops, xIns; - long long values[3] = {PAPI_FP_INS, PAPI_FP_OPS, PAPI_TOT_CYC}; - - xRC = PAPI_flips(&xRtime, &xPtime, &xFlpins, &xMflips); - - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_flips started.\n"); - else - printf("FAILURE: PAPI_flips failed, returned xRC=%d...\n", xRC); - - FPUArith(); - - xRC = PAPI_flips(&xRtime, &xPtime, &xFlpins, &xMflips); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_flips Rtime=%e Ptime=%e, Flpins=%lld, Mflips=%e\n", xRtime, xPtime, xFlpins, xMflips); - else - printf("FAILURE: PAPI_flips failed, returned xRC=%d...\n", xRC); - - FPUArith(); - FPUArith(); - - xRC = PAPI_flips(&xRtime, &xPtime, &xFlpins, &xMflips); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_flips Rtime=%e Ptime=%e, Flpins=%lld, Mflips=%e\n", xRtime, xPtime, xFlpins, xMflips); - else - printf("FAILURE: PAPI_flips failed, returned xRC=%d...\n", xRC); - - xRC = PAPI_stop_counters(values, 3); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_stop_counters stopped counters.\n"); - else - printf("FAILURE: PAPI_stop_counters failed, returned xRC=%d...\n", xRC); - - - xRC = PAPI_flops(&xRtime, &xPtime, &xFlpops, &xMflops); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_flops started.\n"); - else - printf("FAILURE: PAPI_flops failed, returned xRC=%d...\n", xRC); - - FPUArith(); - - xRC = PAPI_flops(&xRtime, &xPtime, &xFlpops, &xMflops); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_flops Rtime=%e Ptime=%e Flpops=%lld Mflops=%e\n", xRtime, xPtime, xFlpops, xMflops); - else - printf("FAILURE: PAPI_flops failed, returned xRC=%d...\n", xRC); - - FPUArith(); - FPUArith(); - - xRC = PAPI_flops(&xRtime, &xPtime, &xFlpops, &xMflops); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_flops Rtime=%e Ptime=%e Flpops=%lld Mflops=%e\n", xRtime, xPtime, xFlpops, xMflops); - else - printf("FAILURE: PAPI_flops failed, returned xRC=%d...\n", xRC); - - xRC = PAPI_stop_counters(values, 3); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_stop_counters stopped counters.\n"); - else - printf("FAILURE: PAPI_stop_counters failed, returned xRC=%d...\n", xRC); - - xRC = PAPI_ipc(&xRtime, &xPtime, &xIns, &xIpc); - if (xRC == PAPI_ENOEVNT) - printf("SUCCESS: PAPI_ipc, no event found...\n"); - else - printf("FAILURE: PAPI_ipc failed, returned xRC=%d...\n", xRC); - - printf("==> Do_High_Level_Tests(): End of the main body...\n"); - - return; -} - - -/* - * Do_Multiplex_Tests - */ - -void Do_Multiplex_Tests(void) { - int xRC; - - printf("==> Do_Multiplex_Tests(): Beginning of the main body...\n"); - - xRC = PAPI_multiplex_init(); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_multiplex_init...\n"); - else - printf("FAILURE: PAPI_multiplex_init failed, returned xRC=%d...\n", xRC); - - printf("==> Do_Multiplex_Tests(): End of the main body...\n"); - - return; -} - - -void Run_Cycle(const int pNumEvents) { - int xRC; - -// BGP_UPC_Zero_Counter_Values(); - Zero_Local_Counters(PAPI_Counters); - xRC = PAPI_start_counters(PAPI_Events, pNumEvents); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_start_counters...\n"); - else - printf("FAILURE: PAPI_start_counters failed, returned xRC=%d...\n", xRC); - - Print_Native_Counters(); - Print_Native_Counters_for_PAPI_Counters_From_List(PAPI_Events, pNumEvents); - FPUArith(); - Print_Native_Counters_for_PAPI_Counters_From_List(PAPI_Events, pNumEvents); - Print_PAPI_Counters_From_List(PAPI_Events, pNumEvents, PAPI_Counters); - - FPUArith(); - - xRC = PAPI_read_counters(PAPI_Counters, pNumEvents); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_read_counters...\n"); - else - printf("FAILURE: PAPI_read_counters failed, returned xRC=%d...\n", xRC); - - Print_Native_Counters(); - Print_Native_Counters_for_PAPI_Counters_From_List(PAPI_Events, pNumEvents); - FPUArith(); - Print_Native_Counters_for_PAPI_Counters_From_List(PAPI_Events, pNumEvents); - Print_PAPI_Counters_From_List(PAPI_Events, pNumEvents, PAPI_Counters); - - FPUArith(); - - Zero_Local_Counters(PAPI_Counters); - xRC = PAPI_accum_counters(PAPI_Counters, pNumEvents); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_accum_counters...\n"); - else - printf("FAILURE: PAPI_accum_counters failed, returned xRC=%d...\n", xRC); - - Print_Native_Counters(); - Print_Native_Counters_for_PAPI_Counters_From_List(PAPI_Events, pNumEvents); - FPUArith(); - Print_Native_Counters_for_PAPI_Counters_From_List(PAPI_Events, pNumEvents); - Print_PAPI_Counters_From_List(PAPI_Events, pNumEvents, PAPI_Counters); - - FPUArith(); - - xRC = PAPI_read_counters(PAPI_Counters, pNumEvents); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_read_counters...\n"); - else - printf("FAILURE: PAPI_read_counters failed, returned xRC=%d...\n", xRC); - - Print_Native_Counters(); - Print_Native_Counters_for_PAPI_Counters_From_List(PAPI_Events, pNumEvents); - FPUArith(); - Print_Native_Counters_for_PAPI_Counters_From_List(PAPI_Events, pNumEvents); - Print_PAPI_Counters_From_List(PAPI_Events, pNumEvents, PAPI_Counters); - - FPUArith(); - - xRC = PAPI_stop_counters(PAPI_Counters, pNumEvents); - if (xRC == PAPI_OK) - printf("SUCCESS: PAPI_stop_counters...\n"); - else - printf("FAILURE: PAPI_stop_counters failed, returned xRC=%d...\n", xRC); - - Print_Native_Counters(); - Print_Native_Counters_for_PAPI_Counters_From_List(PAPI_Events, pNumEvents); - FPUArith(); - Print_Native_Counters_for_PAPI_Counters_From_List(PAPI_Events, pNumEvents); - Print_PAPI_Counters_From_List(PAPI_Events, pNumEvents, PAPI_Counters); - - FPUArith(); - - return; -} - - -/* - * Zero_Local_Counters - */ - -void Zero_Local_Counters(long long* pCounters) { - int i; - for (i=0; i<255; i++) - pCounters[i] = 0; - - return; -} - - -/* - * FPU Arithmetic... - */ -void FPUArith(void) { - int i; - - printf("\n==> Start: Performing arithmetic...\n"); - register unsigned int zero = 0; - register double *x_p = &x[0]; - - for ( i = 0; i < 32; i++ ) - x[i] = 1.0; - - // Single Hummer Instructions: - - #if 1 - - asm volatile ("fabs 1,2"); - asm volatile ("fmr 1,2"); - asm volatile ("fnabs 1,2"); - asm volatile ("fneg 1,2"); - - asm volatile ("fadd 1,2,3"); - asm volatile ("fadds 1,2,3"); - asm volatile ("fdiv 1,2,3"); - asm volatile ("fdivs 1,2,3"); - asm volatile ("fmul 1,2,3"); - asm volatile ("fmuls 1,2,3"); - asm volatile ("fres 1,2"); - asm volatile ("frsqrte 1,2"); - //asm volatile ("fsqrt 1,2"); // gives exception - //asm volatile ("fsqrts 1,2"); // gives exception - asm volatile ("fsub 1,2,3"); - asm volatile ("fsubs 1,2,3"); - - asm volatile ("fmadd 3,4,5,6"); - asm volatile ("fmadds 3,4,5,6"); - asm volatile ("fmsub 3,4,5,6"); - asm volatile ("fmsubs 3,4,5,6"); - asm volatile ("fnmadd 3,4,5,6"); - asm volatile ("fnmadds 3,4,5,6"); - asm volatile ("fnmsub 3,4,5,6"); - asm volatile ("fnmsubs 3,4,5,6"); - - //asm volatile ("fcfid 5,6"); // invalid instruction - //asm volatile ("fctid 5,6"); // invalid instruction - //asm volatile ("fctidz 5,6"); // invalid instruction - asm volatile ("fctiw 5,6"); - asm volatile ("fctiwz 5,6"); - asm volatile ("frsp 5,6"); - - asm volatile ("fcmpo 0,1,2"); - asm volatile ("fcmpu 0,1,2"); - asm volatile ("fsel 0,1,2,3"); - - #endif - - #if 1 - - asm volatile("fpadd 9,10,11"); - asm volatile("fpsub 9,10,11"); - - #endif - - - #if 1 - - asm volatile("fpmul 23,24,25"); - asm volatile("fxmul 26, 27, 28"); - asm volatile("fxpmul 28, 29, 30"); - asm volatile("fxsmul 2, 3, 4"); - #endif - - #if 1 - - asm volatile("fpmadd 10,11,12,13"); - asm volatile("fpmsub 18, 19, 20, 21"); - asm volatile("fpnmadd 26, 27, 28, 29"); - asm volatile("fpnmsub 16,17,18,19"); - - asm volatile("fxmadd 10,11,12,13"); - asm volatile("fxmsub 18, 19, 20, 21"); - asm volatile("fxnmadd 26, 27, 28, 29"); - asm volatile("fxnmsub 16,17,18,19"); - - asm volatile("fxcpmadd 10,11,12,13"); - asm volatile("fxcpmsub 18, 19, 20, 21"); - asm volatile("fxcpnmadd 26, 27, 28, 29"); - asm volatile("fxcpnmsub 16,17,18,19"); - - asm volatile("fxcsmadd 10,11,12,13"); - asm volatile("fxcsmsub 18, 19, 20, 21"); - asm volatile("fxcsnmadd 26, 27, 28, 29"); - asm volatile("fxcsnmsub 16,17,18,19"); - - asm volatile("fxcpnpma 1,2,3,4"); - asm volatile("fxcsnpma 5,6,7,8"); - asm volatile("fxcpnsma 9,10,11,12"); - asm volatile("fxcsnsma 3,4,5,6"); - - asm volatile("fxcxnpma 9,10,11,12"); - asm volatile("fxcxnsma 8,9,10,11"); - asm volatile("fxcxma 3,4,5,6"); - asm volatile("fxcxnms 8,9,10,11"); - - #endif - - - #if 1 - - asm volatile("fpre 12, 13"); - asm volatile("fprsqrte 15, 16"); - asm volatile("fpsel 17, 18, 19, 20"); - asm volatile("fpctiw 1,2"); - asm volatile("fpctiwz 3,4"); - asm volatile("fprsp 5,6"); - asm volatile("fscmp 1,2,3"); - asm volatile("fpmr 1,2"); - asm volatile("fpneg 1,2"); - asm volatile("fpabs 1,2"); - asm volatile("fpnabs 1,2"); - asm volatile("fsmr 1,2"); - asm volatile("fsneg 1,2"); - asm volatile("fsabs 1,2"); - asm volatile("fsnabs 1,2"); - asm volatile("fxmr 1,2"); - asm volatile("fsmfp 1,2"); - asm volatile("fsmtp 1,2"); - - #endif - - #if 1 - asm volatile("lfdx 16,%0,%1" : "+b"(x_p) : "b"(zero)); - asm volatile("lfdux 16,%0,%1" : "+b"(x_p) : "b"(zero)); - asm volatile("lfsx 16,%0,%1" : "+b"(x_p) : "b"(zero)); - asm volatile("lfsux 16,%0,%1" : "+b"(x_p) : "b"(zero)); - - asm volatile("lfsdx 16,%0,%1" : "+b"(x_p) : "b"(zero)); - asm volatile("lfsdux 16,%0,%1" : "+b"(x_p) : "b"(zero)); - asm volatile("lfssx 16,%0,%1" : "+b"(x_p) : "b"(zero)); - asm volatile("lfssux 16,%0,%1" : "+b"(x_p) : "b"(zero)); - - asm volatile("lfpsx 16,%0,%1" : "+b"(x_p) : "b"(zero)); - asm volatile("lfpsux 16,%0,%1" : "+b"(x_p) : "b"(zero)); - asm volatile("lfxsx 16,%0,%1" : "+b"(x_p) : "b"(zero)); - asm volatile("lfxsux 16,%0,%1" : "+b"(x_p) : "b"(zero)); - #endif - - #if 1 - asm volatile("lfpdx 16,%0,%1" : "+b"(x_p) : "b"(zero)); - asm volatile("lfpdux 16,%0,%1" : "+b"(x_p) : "b"(zero)); - asm volatile("lfxdx 16,%0,%1" : "+b"(x_p) : "b"(zero)); - asm volatile("lfxdux 16,%0,%1" : "+b"(x_p) : "b"(zero)); - #endif - - #if 1 - asm volatile("stfdx 16,%0,%1" : "+b"(x_p) : "b"(zero)); - asm volatile("stfdux 16,%0,%1" : "+b"(x_p) : "b"(zero)); - asm volatile("stfsx 16,%0,%1" : "+b"(x_p) : "b"(zero)); - asm volatile("stfsux 16,%0,%1" : "+b"(x_p) : "b"(zero)); - - asm volatile("stfsdx 16,%0,%1" : "+b"(x_p) : "b"(zero)); - asm volatile("stfsdux 16,%0,%1" : "+b"(x_p) : "b"(zero)); - asm volatile("stfssx 16,%0,%1" : "+b"(x_p) : "b"(zero)); - //asm volatile("stfssux 16,%0,%1" : "+b"(x_p) : "b"(zero)); - - asm volatile("stfpsx 16,%0,%1" : "+b"(x_p) : "b"(zero)); - asm volatile("stfpsux 16,%0,%1" : "+b"(x_p) : "b"(zero)); - asm volatile("stfxsx 16,%0,%1" : "+b"(x_p) : "b"(zero)); - asm volatile("stfxsux 16,%0,%1" : "+b"(x_p) : "b"(zero)); - #endif - - #if 1 - asm volatile("stfpdx 16,%0,%1" : "+b"(x_p) : "b"(zero)); - asm volatile("stfpdux 16,%0,%1" : "+b"(x_p) : "b"(zero)); - asm volatile("stfxdx 16,%0,%1" : "+b"(x_p) : "b"(zero)); - asm volatile("stfxdux 16,%0,%1" : "+b"(x_p) : "b"(zero)); - #endif - printf("==> End: Performing arithmetic...\n"); - - return; -} - - -/* - * Print_Counters - */ -void Print_Counters(const int pEventSet) { - printf("\n***** Start Print Counter Values *****\n"); -// Print_Native_Counters_via_Buffer((BGP_UPC_Read_Counters_Struct_t*)Native_Buffer); -// Print_Native_Counters(); - Print_Native_Counters_for_PAPI_Counters(pEventSet); - Print_PAPI_Counters(pEventSet, PAPI_Counters); - printf("\n***** End Print Counter Values *****\n"); - - return; -} - - -/* - * Print_Native_Counters - */ - -void Print_Native_Counters() { - printf("\n***** Start Print of Native Counter Values *****\n"); - BGP_UPC_Print_Counter_Values(BGP_UPC_READ_EXCLUSIVE); - printf("***** End Print of Native Counter Values *****\n"); - - return; -} - - -/* - * Print_Native_Counters_for_PAPI_Counters - */ - -void Print_Native_Counters_for_PAPI_Counters(const int pEventSet) { - printf("\n***** Start Print of Native Counter Values for PAPI Counters *****\n"); - int xNumEvents = PAPI_num_events(pEventSet); - if (xNumEvents) { - List_PAPI_Events(pEventSet, PAPI_Events, &xNumEvents); - Print_Native_Counters_for_PAPI_Counters_From_List(PAPI_Events, xNumEvents); - } - else { - printf("No events are present in the event set.\n"); - } - printf("***** End Print of Native Counter Values for PAPI Counters *****\n"); - - return; -} - - -/* - * Print_Native_Counters_for_PAPI_Counters_From_List - */ -void Print_Native_Counters_for_PAPI_Counters_From_List(const int* pEvents, const int pNumEvents) { - int i, j, xRC; - char xName[256]; - BGP_UPC_Event_Id_t xNativeEventId; - PAPI_event_info_t xEventInfo; - -// BGP_UPC_Print_Counter_Values(); // DLH - for (i=0; inumber_of_counters); - printf("***** End Print of Native Counter Values *****\n"); - - return; -} - - -/* - * Print_PAPI_Counters - */ - -void Print_PAPI_Counters(const int pEventSet, const long long* pCounters) { - int i; - char xName[256]; - printf("\n***** Start Print of PAPI Counter Values *****\n"); -// printf("Print_PAPI_Counters: PAPI_Counters*=%p, pCounters*=%p\n", PAPI_Counters, pCounters); - int pNumEvents = PAPI_num_events(pEventSet); - printf("Number of Counters = %d\n", pNumEvents); - if (pNumEvents) { - printf(" Calculated Value Location Event Number Event Name\n"); - printf("-------------------- -------- ------------ --------------------------------------------\n"); - List_PAPI_Events(pEventSet, PAPI_Events, &pNumEvents); - for (i=0; irank); - printf("Core = %d\n", xTemp->core); - printf("UPC Number = %d\n", xTemp->upc_number); - printf("Number of Processes per UPC = %d\n", xTemp->number_processes_per_upc); - printf("User Mode = %d\n", (int) xTemp->mode); - printf("Location = %s\n", xTemp->location); - printf("\n***** End Print of Node Information *****\n\n"); - - return; -} - - -/* - * Read_Native_Counters - */ - -void Read_Native_Counters(const int pLength) { - - int xRC = BGP_UPC_Read_Counter_Values(Native_Buffer, pLength, BGP_UPC_READ_EXCLUSIVE); - if (xRC < 0) { - printf("FAILURE: BGP_UPC_Read_Counter_Values failed, xRC=%d...\n", xRC); - exit(1); - } - - return; -} - -/* - * Print_PAPI_Events - */ - -void Print_PAPI_Events(const int pEventSet) { - int i; - char xName[256]; - int pNumEvents = PAPI_num_events(pEventSet); - List_PAPI_Events(pEventSet, PAPI_Events, &pNumEvents); - for (i=0; i5) { - info1.symbol[strlen(info1.symbol)-4]^=0xa5; + // PROBLEM: info1 is NOT initialized by anyone! + // Original code referenced info1, changed to info. [Tony C. 11-27-19] +// printf("%s\n",info.symbol); + if (strlen(info.symbol)>5) { + info.symbol[strlen(info.symbol)-4]^=0xa5; - retval=PAPI_add_named_event(EventSet,info1.symbol); + retval=PAPI_add_named_event(EventSet,info.symbol); if (retval==PAPI_OK) { if (!quiet) { printf("Unexpectedly opened %s!\n", - info1.symbol); + info.symbol); err_count++; } } diff -Nru papi-5.7.0+dfsg/src/ctests/filter_helgrind.c papi-6.0.0~dfsg/src/ctests/filter_helgrind.c --- papi-5.7.0+dfsg/src/ctests/filter_helgrind.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/ctests/filter_helgrind.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,170 @@ +/* + * This code is a simple filter for the helgrind_out.txt file + * produced by: + * "valgrind --tool=helgrind --log-file=helgrind_out.txt someProgram" + * + * This is useful because the tool does not recognize PAPI locks, + * thus reports as possible race conditions reads/writes by + * different threads that are actually fine (surrounded by locks). + * + * This was written particularly for krentel_pthreads_race.c + * when processed by the above valgrind. We produce a line per + * condition, in the form: + * OP@file:line OP@file:line + * where OP is R or W. The first file:line code occurred + * after the second file:line code, and on a different thread. + * + * We print the results to stdout. It is useful to filter this + * through the standard utility 'uniq', each occurrence only + * needs to be investigated once. Just insure there are + * MATCHING locks around each operation within the code. + * + * An example run (using uniq): The options -uc will print + * only unique lines, preceeded by a count of how many times + * it occurs. + * + * ./filter_helgrind | uniq -uc + * + * An example output line (piped through uniq as above): + * 1 R@threads.c:190 W@threads.c:206 + * An investigation shows threads.c:190 is protected by + * _papi_hwi_lock(THREADS_LOCK); and threads.c:206 is + * protected by the same lock. Thus no data race can + * occur for this instance. + * + * Compilation within the papi/src/ctests directory: + * make filter_helgrind + * + */ + +#include +#include +#include + +int main(int argc, char** args) { + (void) argc; + (void) args; + + char myLine[16384]; + int state, size; + char type1, type2; + char fname1[256], fname2[256]; + char *paren1, *paren2; + + FILE *HELOUT = fopen("helgrind_out.txt", "r"); // Read the file. + if (HELOUT == NULL) { + fprintf(stderr, "Could not open helgrind_out.txt.\n"); + exit(-1); + } + + char PDRR[]="Possible data race during read"; + char PDRW[]="Possible data race during write"; + char TCWW[]="This conflicts with a previous write"; + char TCWR[]="This conflicts with a previous read"; + char atSTR[]=" at "; + + // State machine: + // State 0: We are looking for a line with PDRR or PDRW. + // We don't exit until we find it, or run out of lines. + // if we find it, we remember which and go to state 1. + // State 1: Looking for " at " in column 11. + // When found, we extract the string betweeen '(' and ')' + // which is program name:line. go to state 2. + // State 2: We are searching for TCWW, TCWR, PDRW, PDRR. + // If we find the first two: + // Remember which, and go to state 3. + // If we find either of the second two, go back to State 1. + // State 3: Looking for " at " in column 11. + // When found, extract the string betweeen '(' and ')', + // which is program name:line. + // OUTPUT LINE for an investigation. + // Go to State 0. + + state = 0; // looking for PDRR, PDRW. + while (fgets(myLine, 16384, HELOUT) != NULL) { + if (strlen(myLine) < 20) continue; + switch (state) { + case 0: // Looking for PDRR or PRDW. + if (strstr(myLine, PDRR) != NULL) { + type1='R'; + state=1; + continue; + } + + if (strstr(myLine, PDRW) != NULL) { + type1='W'; + state=1; + continue; + } + + continue; + break; + + case 1: // Looking for atSTR in column 11. + if (strncmp(myLine+10, atSTR, 6) != 0) continue; + paren1=strchr(myLine, '('); + paren2=strchr(myLine, ')'); + if (paren1 == NULL || paren2 == NULL || + paren1 > paren2) { + state=0; // Abort, found something I don't understand. + continue; + } + + size = paren2-paren1-1; // compute length of name. + strncpy(fname1, paren1+1, size); // Copy the name. + fname1[size]=0; // install z-terminator. + state=2; + continue; + break; + + case 2: // Looking for TCWW, TCWR, PDRR, PDRW. + if (strstr(myLine, TCWR) != NULL) { + type2='R'; + state=3; + continue; + } + + if (strstr(myLine, TCWW) != NULL) { + type2='W'; + state=3; + continue; + } + + if (strstr(myLine, PDRR) != NULL) { + type1='R'; + state=1; + continue; + } + + if (strstr(myLine, PDRW) != NULL) { + type1='W'; + state=1; + continue; + } + + continue; + break; + + case 3: // Looking for atSTR in column 11. + if (strncmp(myLine+10, atSTR, 6) != 0) continue; + paren1=strchr(myLine, '('); + paren2=strchr(myLine, ')'); + if (paren1 == NULL || paren2 == NULL || + paren1 > paren2) { + state=0; // Abort, found something I don't understand. + continue; + } + + size = paren2-paren1-1; // compute length of name. + strncpy(fname2, paren1+1, size); // Copy the name. + fname2[size]=0; // install z-terminator. + fprintf(stdout, "%c@%-32s %c@%-32s\n", type1, fname1, type2, fname2); + state=0; + continue; + break; + } // end switch. + } // end while. + + fclose(HELOUT); + exit(0); +} diff -Nru papi-5.7.0+dfsg/src/ctests/flops.c papi-6.0.0~dfsg/src/ctests/flops.c --- papi-5.7.0+dfsg/src/ctests/flops.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/ctests/flops.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,110 +0,0 @@ -/* - * A simple example for the use of PAPI, the number of flops you should - * get is about INDEX^3 on machines that consider add and multiply one flop - * such as SGI, and 2*(INDEX^3) that don't consider it 1 flop such as INTEL - * -Kevin London - */ - -#include -#include - -#include "papi.h" -#include "papi_test.h" - -#include "testcode.h" -#include "display_error.h" - -int -main( int argc, char **argv ) -{ - float real_time, proc_time, mflops; - long long flpins; - int retval; - int fip = 0; - int quiet=0; - long long expected; - double double_result,error; - - /* Set TESTS_QUIET variable */ - quiet=tests_quiet( argc, argv ); - - /* Initialize PAPI library */ - retval = PAPI_library_init( PAPI_VER_CURRENT ); - if ( retval != PAPI_VER_CURRENT ) { - test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); - } - - /* Try to use one of the FP events */ - if ( PAPI_query_event( PAPI_FP_INS ) == PAPI_OK ) { - fip = 1; - } - else if ( PAPI_query_event( PAPI_FP_OPS ) == PAPI_OK ) { - fip = 2; - } - else { - if ( !quiet ) printf( "PAPI_FP_INS and PAPI_FP_OPS are not defined for this platform.\n" ); - test_skip(__FILE__,__LINE__,"No FP events available",1); - } - - /* Shutdown? */ - /* I guess because it would interfere with the high-level interface? */ - PAPI_shutdown( ); - - /* Initialize the Matrix arrays */ - expected=flops_float_init_matrix(); - - /* Setup PAPI library and begin collecting data from the counters */ - if ( fip == 1 ) { - retval = PAPI_flips( &real_time, &proc_time, &flpins, &mflops ); - if (retval!=PAPI_OK) { - test_fail( __FILE__, __LINE__, "PAPI_flips", retval ); - } - } - else { - retval = PAPI_flops( &real_time, &proc_time, &flpins, &mflops ); - if (retval!=PAPI_OK) { - test_fail( __FILE__, __LINE__, "PAPI_flops", retval ); - } - } - - /* Matrix-Matrix multiply */ - double_result=flops_float_matrix_matrix_multiply(); - - /* Collect the data into the variables passed in */ - if ( fip == 1 ) { - retval = PAPI_flips( &real_time, &proc_time, &flpins, &mflops ); - if (retval!=PAPI_OK) { - test_fail( __FILE__, __LINE__, "PAPI_flips", retval ); - } - } else { - retval = PAPI_flops( &real_time, &proc_time, &flpins, &mflops ); - if (retval!=PAPI_OK) { - test_fail( __FILE__, __LINE__, "PAPI_flops", retval ); - } - } - - if (!quiet) printf("result=%lf\n",double_result); - - if ( !quiet ) { - printf( "Real_time: %f Proc_time: %f MFLOPS: %f\n", - real_time, proc_time, mflops ); - if ( fip == 1 ) { - printf( "Total flpins: "); - } else { - printf( "Total flpops: "); - } - printf( "%lld\n\n", flpins ); - } - - error=display_error(flpins,flpins,flpins,expected,quiet); - - if ((error > 1.0) || (error<-1.0)) { - if (!quiet) printf("Instruction count off by more than 1%%\n"); - test_fail( __FILE__, __LINE__, "Validation failed", 1 ); - } - - test_pass( __FILE__ ); - - return 0; - -} diff -Nru papi-5.7.0+dfsg/src/ctests/high-level2.c papi-6.0.0~dfsg/src/ctests/high-level2.c --- papi-5.7.0+dfsg/src/ctests/high-level2.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/ctests/high-level2.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,141 +0,0 @@ -/* This test checks that mixing PAPI_flips and the other high - * level calls does the right thing. - * by Kevin London - */ - -#include -#include - -#include "papi.h" -#include "papi_test.h" - -int -main( int argc, char **argv ) -{ - int retval; - int Events, fip = 0; - long long values, flpins; - float real_time, proc_time, mflops; - int quiet; - - /* Set TESTS_QUIET variable */ - quiet=tests_quiet( argc, argv ); - - /* Initialize PAPI library */ - retval = PAPI_library_init( PAPI_VER_CURRENT ); - if ( retval != PAPI_VER_CURRENT ) { - test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); - } - - /* First see if we have PAPI_FP_INS event */ - if ( PAPI_query_event( PAPI_FP_INS ) == PAPI_OK ) { - fip = 1; - Events = PAPI_FP_INS; - /* If not, look for PAPI_FP_OPS */ - } else if ( PAPI_query_event( PAPI_FP_OPS ) == PAPI_OK ) { - fip = 2; - Events = PAPI_FP_OPS; - } else { - if ( !quiet ) { - printf( "PAPI_FP_INS and PAPI_FP_OPS are not defined for this platform.\n" ); - } - test_skip( __FILE__, __LINE__, "FLOPS event not supported", 1); - } - - /* Start counting flips or flops event */ - if ( fip == 1 ) { - retval = PAPI_flips( &real_time, &proc_time, &flpins, &mflops ); - if (retval != PAPI_OK ) { - test_fail( __FILE__, __LINE__, "PAPI_flips", retval ); - } - } else { - retval = PAPI_flops( &real_time, &proc_time, &flpins, &mflops ); - if (retval != PAPI_OK ) { - test_fail( __FILE__, __LINE__, "PAPI_flops", retval ); - } - } - - /* If we are flipsing/flopsing, then start_counters should fail */ - retval = PAPI_start_counters( &Events, 1 ); - if (retval == PAPI_OK) { - test_fail( __FILE__, __LINE__, "PAPI_start_counters", retval ); - } - - /* Try flipsing/flopsing again, should work */ - if ( fip == 1 ) { - retval = PAPI_flips( &real_time, &proc_time, &flpins, &mflops ); - if (retval != PAPI_OK ) { - test_fail( __FILE__, __LINE__, "PAPI_flips", retval ); - } - } else { - retval = PAPI_flops( &real_time, &proc_time, &flpins, &mflops ); - if (retval != PAPI_OK) { - test_fail( __FILE__, __LINE__, "PAPI_flops", retval ); - } - } - - /* If we are flipsing/flopsing, then read should fail */ - if ( ( retval = PAPI_read_counters( &values, 1 ) ) == PAPI_OK ) { - test_fail( __FILE__, __LINE__, "PAPI_read_counters", retval ); - } - - /* Stop should still work then */ - if ( ( retval = PAPI_stop_counters( &values, 1 ) ) != PAPI_OK ) { - test_fail( __FILE__, __LINE__, "PAPI_stop_counters", retval ); - } - - /* Restart flips/flops */ - if ( fip == 1 ) { - retval = PAPI_flips( &real_time, &proc_time, &flpins, &mflops ); - if (retval != PAPI_OK) { - test_fail( __FILE__, __LINE__, "PAPI_flips", retval ); - } - } else { - retval = PAPI_flops( &real_time, &proc_time, &flpins, &mflops ); - if (retval != PAPI_OK) { - test_fail( __FILE__, __LINE__, "PAPI_flops", retval ); - } - } - - /* Try reading again, should fail */ - if ( ( retval = PAPI_read_counters( &values, 1 ) ) == PAPI_OK ) { - test_fail( __FILE__, __LINE__, "PAPI_read_counters", retval ); - } - - /* Stop */ - if ( ( retval = PAPI_stop_counters( &values, 1 ) ) != PAPI_OK ) { - test_fail( __FILE__, __LINE__, "PAPI_stop_counters", retval ); - } - - /* Now try starting, should work */ - if ( ( retval = PAPI_start_counters( &Events, 1 ) ) != PAPI_OK ) { - test_fail( __FILE__, __LINE__, "PAPI_start_counters", retval ); - } - - /* Read should work too */ - if ( ( retval = PAPI_read_counters( &values, 1 ) ) != PAPI_OK ) { - test_fail( __FILE__, __LINE__, "PAPI_read_counters", retval ); - } - - /* flipsing/flopsing should fail */ - if ( fip == 1 ) { - retval = PAPI_flips( &real_time, &proc_time, &flpins, &mflops ); - if (retval == PAPI_OK ) { - test_fail( __FILE__, __LINE__, "PAPI_flips", retval ); - } - } else { - retval = PAPI_flops( &real_time, &proc_time, &flpins, &mflops ); - if (retval == PAPI_OK ) { - test_fail( __FILE__, __LINE__, "PAPI_flops", retval ); - } - } - - /* Stop everything */ - if ( ( retval = PAPI_stop_counters( &values, 1 ) ) != PAPI_OK ) { - test_fail( __FILE__, __LINE__, "PAPI_stop_counters", retval ); - } - - test_pass( __FILE__ ); - - return 0; -} diff -Nru papi-5.7.0+dfsg/src/ctests/high-level.c papi-6.0.0~dfsg/src/ctests/high-level.c --- papi-5.7.0+dfsg/src/ctests/high-level.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/ctests/high-level.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,127 +0,0 @@ -/* These examples show the essentials in using the PAPI high-level - interface. The program consists of 4 work-loops. The programmer - intends to count the total events for loop 1, 2 and 4, but not - include the number of events in loop 3. - - To accomplish this PAPI_read_counters is used as a counter - reset function, while PAPI_accum_counters is used to sum - the contributions of loops 2 and 4 into the total count. -*/ - -#include -#include - -#include "papi.h" -#include "papi_test.h" - -#include "do_loops.h" - -#define NUM_EVENTS 2 - -int -main( int argc, char **argv ) -{ - int retval; - long long values[NUM_EVENTS], dummyvalues[NUM_EVENTS]; - long long myvalues[NUM_EVENTS]; - int Events[NUM_EVENTS]; - int quiet; - - /* Set TESTS_QUIET variable */ - quiet=tests_quiet( argc, argv ); - - retval = PAPI_library_init( PAPI_VER_CURRENT ); - if ( retval != PAPI_VER_CURRENT ) { - test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); - } - - /* query and set up the right events to monitor */ - if ( PAPI_query_event( PAPI_FP_INS ) == PAPI_OK ) { - Events[0] = PAPI_FP_INS; - } else { - Events[0] = PAPI_TOT_INS; - } - Events[1] = PAPI_TOT_CYC; - - retval = PAPI_start_counters( ( int * ) Events, NUM_EVENTS ); - if ( retval != PAPI_OK ) { - if (!quiet) printf("Cannot start events\n"); - test_skip( __FILE__, __LINE__, "PAPI_start_counters", retval ); - } - - /* Loop 1 */ - do_flops( NUM_FLOPS ); - - retval = PAPI_read_counters( values, NUM_EVENTS ); - if ( retval != PAPI_OK ) - test_fail( __FILE__, __LINE__, "PAPI_read_counters", retval ); - - if ( !quiet ) - printf( TWO12, values[0], values[1], "(Counters continuing...)\n" ); - - myvalues[0] = values[0]; - myvalues[1] = values[1]; - /* Loop 2 */ - do_flops( NUM_FLOPS ); - - retval = PAPI_accum_counters( values, NUM_EVENTS ); - if ( retval != PAPI_OK ) - test_fail( __FILE__, __LINE__, "PAPI_accum_counters", retval ); - - if ( !quiet ) - printf( TWO12, values[0], values[1], "(Counters being ''held'')\n" ); - - /* Loop 3 */ - /* Simulated code that should not be counted */ - do_flops( NUM_FLOPS ); - - retval = PAPI_read_counters( dummyvalues, NUM_EVENTS ); - if ( retval != PAPI_OK ) - test_fail( __FILE__, __LINE__, "PAPI_read_counters", retval ); - if ( !quiet ) - printf( TWO12, dummyvalues[0], dummyvalues[1], "(Skipped counts)\n" ); - - if ( !quiet ) - printf( "%12s %12s (''Continuing'' counting)\n", "xxx", "xxx" ); - /* Loop 4 */ - do_flops( NUM_FLOPS ); - - retval = PAPI_accum_counters( values, NUM_EVENTS ); - if ( retval != PAPI_OK ) - test_fail( __FILE__, __LINE__, "PAPI_accum_counters", retval ); - - if ( !quiet ) - printf( TWO12, values[0], values[1], "" ); - - if ( !quiet ) { - printf( "----------------------------------\n" ); - printf( "Verification: The last line in each experiment should be\n" ); - printf( "approximately three times the value of the first line.\n" ); - } - - { - long long min, max; - min = ( long long ) ( ( double ) myvalues[0] * .9 ); - max = ( long long ) ( ( double ) myvalues[0] * 1.1 ); - if ( values[0] < ( 3 * min ) || values[0] > ( 3 * max ) ) { - retval = 1; - if ( PAPI_query_event( PAPI_FP_INS ) == PAPI_OK ) { - test_fail( __FILE__, __LINE__, "PAPI_FP_INS", 1 ); - } else { - test_fail( __FILE__, __LINE__, "PAPI_TOT_INS", 1 ); - } - } - min = ( long long ) ( ( double ) myvalues[1] * .9 ); - max = ( long long ) ( ( double ) myvalues[1] * 1.1 ); - if ( values[1] < ( 3 * min ) || values[1] > ( 3 * max ) ) { - retval = 1; - test_fail( __FILE__, __LINE__, "PAPI_TOT_CYC", 1 ); - } - } - /* The values array is not allocated through allocate_test_space - * so we need to pass NULL here */ - test_pass( __FILE__ ); - - return 0; - -} diff -Nru papi-5.7.0+dfsg/src/ctests/hl_rates.c papi-6.0.0~dfsg/src/ctests/hl_rates.c --- papi-5.7.0+dfsg/src/ctests/hl_rates.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/ctests/hl_rates.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,337 +0,0 @@ -/* file hl_rates.c - * This test exercises the four PAPI High Level rate calls: - * PAPI_flops, PAPI_flips, PAPI_ipc, and PAPI_epc - * flops and flips report cumulative real and process time since the first call, - * and either floating point operations or instructions since the first call. - * Also reported is incremental flop or flip rate since the last call. - * - * PAPI_ipc reports the same cumulative information, substituting - * total instructions for flops or flips, and also reports - * instructions per (process) cycle as a measure of execution efficiency. - * - * PAPI_epc is new in PAPI 5.2. It reports the same information as PAPI_IPC, - * but for an arbitrary event instead of total cycles. It also reports - * incremental core and (where available) reference cycles to allow the - * computation of effective clock rates in the presence of clock scaling - * like speed step or turbo-boost. - * - * This test computes a 1000 x 1000 matrix multiply for orders of indexing for - * each of the four rate calls. It also accepts a command line parameter - * for the event to be measured for PAPI_epc. If not provided, PAPI_TOT_INS - * is measured. - */ - -#include -#include - -#include "papi.h" -#include "papi_test.h" - -#include "testcode.h" - -int -main( int argc, char **argv ) -{ - int retval, event = 0; - float rtime, ptime, mflips, mflops, ipc, epc; - long long flpins, flpops, ins, ref, core, evt; - - double mflips_classic,mflips_swapped; - double mflops_classic,mflops_swapped; - double ipc_classic,ipc_swapped; - double epc_classic,epc_swapped; - - int quiet; - - /* Set TESTS_QUIET variable */ - quiet=tests_quiet( argc, argv ); - - /* Initialize the test matrix */ - flops_float_init_matrix(); - - /************************/ - /* FLIPS */ - /************************/ - - if (!quiet) { - printf( "\n----------------------------------\n" ); - printf( "PAPI_flips\n"); - } - - /* Run flips at start */ - retval=PAPI_flips(&rtime, &ptime, &flpins, &mflips); - if (retval!=PAPI_OK) { - if (!quiet) PAPI_perror( "PAPI_flips" ); - test_skip(__FILE__,__LINE__,"Could not add event",0); - } - - if (!quiet) { - printf( "\nStart\n"); - printf( "real time: %f\n", rtime); - printf( "process time: %f\n", ptime); - printf( "FP Instructions: %lld\n", flpins); - printf( "MFLIPS %f\n", mflips); - } - - /* Be sure we are all zero at beginning */ - if ((rtime!=0) || (ptime!=0) || (flpins!=0) || (mflips!=0)) { - test_fail(__FILE__,__LINE__,"Not initialized to zero",0); - } - - // Flips classic - flops_float_matrix_matrix_multiply(); - if ( PAPI_flips(&rtime, &ptime, &flpins, &mflips) != PAPI_OK ) - PAPI_perror( "PAPI_flips" ); - - if (!quiet) { - printf( "\nClassic\n"); - printf( "real time: %f\n", rtime); - printf( "process time: %f\n", ptime); - printf( "FP Instructions: %lld\n", flpins); - printf( "MFLIPS %f\n", mflips); - } - mflips_classic=mflips; - - // Flips swapped - flops_float_swapped_matrix_matrix_multiply(); - if ( PAPI_flips(&rtime, &ptime, &flpins, &mflips) != PAPI_OK ) - PAPI_perror( "PAPI_flips" ); - - if (!quiet) { - printf( "\nSwapped\n"); - printf( "real time: %f\n", rtime); - printf( "process time: %f\n", ptime); - printf( "FP Instructions: %lld\n", flpins); - printf( "MFLIPS %f\n", mflips); - } - mflips_swapped=mflips; - - // turn off flips - if ( PAPI_stop_counters(NULL, 0) != PAPI_OK ) { - PAPI_perror( "PAPI_stop_counters" ); - } - - - /************************/ - /* FLOPS */ - /************************/ - - if (!quiet) { - printf( "\n----------------------------------\n" ); - printf( "PAPI_flops\n"); - } - - // Start flops - if ( PAPI_flops(&rtime, &ptime, &flpops, &mflops) != PAPI_OK ) { - PAPI_perror( "PAPI_flops" ); - } - - if (!quiet) { - printf( "\nStart\n"); - printf( "real time: %f\n", rtime); - printf( "process time: %f\n", ptime); - printf( "FP Operations: %lld\n", flpops); - printf( "MFLOPS %f\n", mflops); - } - - /* Be sure we are all zero at beginning */ - if ((rtime!=0) || (ptime!=0) || (flpops!=0) || (mflops!=0)) { - test_fail(__FILE__,__LINE__,"Not initialized to zero",0); - } - - // Classic flops - flops_float_matrix_matrix_multiply(); - if ( PAPI_flops(&rtime, &ptime, &flpops, &mflops) != PAPI_OK ) - PAPI_perror( "PAPI_flops" ); - - if (!quiet) { - printf( "\nClassic\n"); - printf( "real time: %f\n", rtime); - printf( "process time: %f\n", ptime); - printf( "FP Operations: %lld\n", flpops); - printf( "MFLOPS %f\n", mflops); - } - mflops_classic=mflops; - - // Swapped flops - flops_float_swapped_matrix_matrix_multiply(); - if ( PAPI_flops(&rtime, &ptime, &flpops, &mflops) != PAPI_OK ) - PAPI_perror( "PAPI_flops" ); - - if (!quiet) { - printf( "\nSwapped\n"); - printf( "real time: %f\n", rtime); - printf( "process time: %f\n", ptime); - printf( "FP Operations: %lld\n", flpops); - printf( "MFLOPS %f\n", mflops); - } - mflops_swapped=mflops; - - // turn off flops - if ( PAPI_stop_counters(NULL, 0) != PAPI_OK ) { - PAPI_perror( "PAPI_stop_counters" ); - } - - - /************************/ - /* IPC */ - /************************/ - - if (!quiet) { - printf( "\n----------------------------------\n" ); - printf( "PAPI_ipc\n"); - } - - // Start ipc - if ( PAPI_ipc(&rtime, &ptime, &ins, &ipc) != PAPI_OK ) - PAPI_perror( "PAPI_ipc" ); - - if (!quiet) { - printf( "\nStart\n"); - printf( "real time: %f\n", rtime); - printf( "process time: %f\n", ptime); - printf( "Instructions: %lld\n", ins); - printf( "IPC %f\n", ipc); - } - - /* Be sure we are all zero at beginning */ - if ((rtime!=0) || (ptime!=0) || (ins!=0) || (ipc!=0)) { - test_fail(__FILE__,__LINE__,"Not initialized to zero",0); - } - - // Classic ipc - flops_float_matrix_matrix_multiply(); - if ( PAPI_ipc(&rtime, &ptime, &ins, &ipc) != PAPI_OK ) - PAPI_perror( "PAPI_ipc" ); - - if (!quiet) { - printf( "\nClassic\n"); - printf( "real time: %f\n", rtime); - printf( "process time: %f\n", ptime); - printf( "Instructions: %lld\n", ins); - printf( "IPC %f\n", ipc); - } - ipc_classic=ipc; - - // Swapped ipc - flops_float_swapped_matrix_matrix_multiply(); - if ( PAPI_ipc(&rtime, &ptime, &ins, &ipc) != PAPI_OK ) - PAPI_perror( "PAPI_ipc" ); - - if (!quiet) { - printf( "\nSwapped\n"); - printf( "real time: %f\n", rtime); - printf( "process time: %f\n", ptime); - printf( "Instructions: %lld\n", ins); - printf( "IPC %f\n", ipc); - } - ipc_swapped=ipc; - - // turn off ipc - if ( PAPI_stop_counters(NULL, 0) != PAPI_OK ) { - PAPI_perror( "PAPI_stop_counters" ); - } - - - /************************/ - /* EPC */ - /************************/ - - if (!quiet) { - printf( "\n----------------------------------\n" ); - printf( "PAPI_epc\n"); - } - - /* This unfortunately conflicts a bit with the TESTS_QUIET */ - /* command line paramater nonsense. */ - - if ( argc >= 2) { - retval = PAPI_event_name_to_code( argv[1], &event ); - if (retval != PAPI_OK) { - if (!quiet) printf("Can't find %s; Using PAPI_TOT_INS\n", argv[1]); - event = PAPI_TOT_INS; - } else { - if (!quiet) printf("Using event %s\n", argv[1]); - } - } - - // Start epc - if ( PAPI_epc(event, &rtime, &ptime, &ref, &core, &evt, &epc) != PAPI_OK ) - PAPI_perror( "PAPI_epc" ); - - if (!quiet) { - printf( "\nStart\n"); - printf( "real time: %f\n", rtime); - printf( "process time: %f\n", ptime); - printf( "Ref Cycles: %lld\n", ref); - printf( "Core Cycles: %lld\n", core); - printf( "Events: %lld\n", evt); - printf( "EPC: %f\n", epc); - } - - /* Be sure we are all zero at beginning */ - if ((rtime!=0) || (ptime!=0) || (ref!=0) || (core!=0) - || (evt!=0) || (epc!=0)) { - test_fail(__FILE__,__LINE__,"Not initialized to zero",0); - } - - // Classic epc - flops_float_matrix_matrix_multiply(); - if ( PAPI_epc(event, &rtime, &ptime, &ref, &core, &evt, &epc) != PAPI_OK ) - PAPI_perror( "PAPI_epc" ); - - if (!quiet) { - printf( "\nClassic\n"); - printf( "real time: %f\n", rtime); - printf( "process time: %f\n", ptime); - printf( "Ref Cycles: %lld\n", ref); - printf( "Core Cycles: %lld\n", core); - printf( "Events: %lld\n", evt); - printf( "EPC: %f\n", epc); - } - epc_classic=epc; - - // Swapped epc - flops_float_swapped_matrix_matrix_multiply(); - if ( PAPI_epc(event, &rtime, &ptime, &ref, &core, &evt, &epc) != PAPI_OK ) { - PAPI_perror( "PAPI_epc" ); - } - - if (!quiet) { - printf( "\nSwapped\n"); - printf( "real time: %f\n", rtime); - printf( "process time: %f\n", ptime); - printf( "Ref Cycles: %lld\n", ref); - printf( "Core Cycles: %lld\n", core); - printf( "Events: %lld\n", evt); - printf( "EPC: %f\n", epc); - } - epc_swapped=epc; - - // turn off epc - if ( PAPI_stop_counters(NULL, 0) != PAPI_OK ) { - PAPI_perror( "PAPI_stop_counters" ); - } - - if (!quiet) { - printf( "\n----------------------------------\n" ); - } - - /* Validate */ - if (mflips_swapped -#include - -#include "papi.h" -#include "papi_test.h" - - -#define INDEX 500 - -int -main( int argc, char **argv ) -{ - extern void dummy( void * ); - float matrixa[INDEX][INDEX], matrixb[INDEX][INDEX], mresult[INDEX][INDEX]; - float real_time, proc_time, ipc; - long long ins; - int retval; - int i, j, k; - int quiet; - - /* Set TESTS_QUIET variable */ - quiet=tests_quiet( argc, argv ); - - /* Initialize the Matrix arrays */ - for( i = 0; i < INDEX; i++ ) { - for( j= 0; j < INDEX; j++ ) { - mresult[i][j] = 0.0; - matrixa[i][j] = matrixb[i][j] = ( float ) rand( ) * ( float ) 1.1; - } - } - - /* Setup PAPI library and begin collecting data from the counters */ - retval = PAPI_ipc( &real_time, &proc_time, &ins, &ipc ); - if (retval < PAPI_OK ) { - if (!quiet) printf("Trouble starting IPC\n"); - test_skip( __FILE__, __LINE__, "PAPI_ipc", retval ); - } - - /* Matrix-Matrix multiply */ - for ( i = 0; i < INDEX; i++ ) - for ( j = 0; j < INDEX; j++ ) - for ( k = 0; k < INDEX; k++ ) - mresult[i][j] = mresult[i][j] + matrixa[i][k] * matrixb[k][j]; - - /* Collect the data into the variables passed in */ - if ( ( retval = PAPI_ipc( &real_time, &proc_time, &ins, &ipc ) ) < PAPI_OK ) - test_fail( __FILE__, __LINE__, "PAPI_ipc", retval ); - dummy( ( void * ) mresult ); - - if ( !quiet ) { - printf( "Real_time: %f Proc_time: %f Total ins: ", real_time, - proc_time ); - printf( LLDFMT, ins ); - printf( " IPC: %f\n", ipc ); - } - - /* This should not happen unless the optimizer */ - /* gets too good */ - if (ins < INDEX*INDEX) { - test_fail( __FILE__, __LINE__, "Instruction count too low.", - 5 ); - } - /* Something is broken, or else you have a really */ - /* slow processor */ - if (ipc<0.01 ) { - test_fail( __FILE__, __LINE__, "IPC equals zero.", - 5 ); - } - - test_pass( __FILE__ ); - - return 0; -} diff -Nru papi-5.7.0+dfsg/src/ctests/krentel_pthreads_race.c papi-6.0.0~dfsg/src/ctests/krentel_pthreads_race.c --- papi-5.7.0+dfsg/src/ctests/krentel_pthreads_race.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/ctests/krentel_pthreads_race.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,236 @@ +/* + * Test PAPI with multiple threads. + * This code is a modification of krentel_pthreads.c by William Cohen + * , on Sep 10 2019, to exercise and test for the race + * condition in papi_internal.c involving the formerly static variables + * papi_event_code and papi_event_code_changed. This code should be run with + * "valgrind --tool=helgrind" to show any data races. If run with: + * "valgrind --tool=helgrind --log-file=helgrind_out.txt" + * The output will be captured in helgrind_out.txt and can then be processed + * with the program filter_helgrind.c; see commentary at the top of that file. + */ + +#define MAX_THREADS 256 + +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define EVENT PAPI_TOT_CYC + +static int program_time = 5; +static int threshold = 20000000; +static int num_threads = 3; + +static long count[MAX_THREADS]; +static long iter[MAX_THREADS]; +static struct timeval last[MAX_THREADS]; + +static pthread_key_t key; + +static struct timeval start; + +static void +my_handler( int EventSet, void *pc, long long ovec, void *context ) +{ + ( void ) EventSet; + ( void ) pc; + ( void ) ovec; + ( void ) context; + + long num = ( long ) pthread_getspecific( key ); + + if ( num < 0 || num > num_threads ) + test_fail( __FILE__, __LINE__, "getspecific failed", 1 ); + count[num]++; +} + +static void +print_rate( long num ) +{ + struct timeval now; + long st_secs; + double last_secs; + + gettimeofday( &now, NULL ); + st_secs = now.tv_sec - start.tv_sec; + last_secs = ( double ) ( now.tv_sec - last[num].tv_sec ) + + ( ( double ) ( now.tv_usec - last[num].tv_usec ) ) / 1000000.0; + if ( last_secs <= 0.001 ) + last_secs = 0.001; + + if (!TESTS_QUIET) { + printf( "[%ld] time = %ld, count = %ld, iter = %ld, " + "rate = %.1f/Kiter\n", + num, st_secs, count[num], iter[num], + ( 1000.0 * ( double ) count[num] ) / ( double ) iter[num] ); + } + + count[num] = 0; + iter[num] = 0; + last[num] = now; +} + +static void +do_cycles( long num, int len ) +{ + struct timeval start, now; + double x, sum; + + gettimeofday( &start, NULL ); + + for ( ;; ) { + sum = 1.0; + for ( x = 1.0; x < 250000.0; x += 1.0 ) + sum += x; + if ( sum < 0.0 ) + printf( "==>> SUM IS NEGATIVE !! <<==\n" ); + + iter[num]++; + + gettimeofday( &now, NULL ); + if ( now.tv_sec >= start.tv_sec + len ) + break; + } +} + +static void * +my_thread( void *v ) +{ + long num = ( long ) v; + int n; + int EventSet = PAPI_NULL; + int event_code; + long long value; + + int retval; + + retval = PAPI_register_thread( ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_register_thread", retval ); + } + pthread_setspecific( key, v ); + + count[num] = 0; + iter[num] = 0; + last[num] = start; + + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_create_eventset failed", retval ); + } + + retval = PAPI_event_name_to_code("PAPI_TOT_CYC", &event_code); + if (retval != PAPI_OK ) { + if (!TESTS_QUIET) printf("Trouble creating event name\n"); + test_fail( __FILE__, __LINE__, "PAPI_event_name_to_code failed", retval ); + } + + retval = PAPI_add_event( EventSet, EVENT ); + if (retval != PAPI_OK ) { + if (!TESTS_QUIET) printf("Trouble adding event\n"); + test_fail( __FILE__, __LINE__, "PAPI_add_event failed", retval ); + } + + if ( PAPI_overflow( EventSet, EVENT, threshold, 0, my_handler ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_overflow failed", 1 ); + + if ( PAPI_start( EventSet ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start failed", 1 ); + + if (!TESTS_QUIET) printf( "launched timer in thread %ld\n", num ); + + for ( n = 1; n <= program_time; n++ ) { + do_cycles( num, 1 ); + print_rate( num ); + } + + PAPI_stop( EventSet, &value ); + + retval = PAPI_overflow( EventSet, EVENT, 0, 0, my_handler); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_overflow failed to reset the overflow handler", retval ); + + if ( PAPI_remove_event( EventSet, EVENT ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_remove_event", 1 ); + + if ( PAPI_destroy_eventset( &EventSet ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", 1 ); + + if ( PAPI_unregister_thread( ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_unregister_thread", 1 ); + + return ( NULL ); +} + +int +main( int argc, char **argv ) +{ + pthread_t *td = NULL; + long n; + int quiet,retval; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + if ( argc < 2 || sscanf( argv[1], "%d", &program_time ) < 1 ) + program_time = 6; + if ( argc < 3 || sscanf( argv[2], "%d", &threshold ) < 1 ) + threshold = 20000000; + if ( argc < 4 || sscanf( argv[3], "%d", &num_threads ) < 1 ) + num_threads = 32; + + td = malloc((num_threads+1) * sizeof(pthread_t)); + if (!td) { + test_fail( __FILE__, __LINE__, "td malloc failed", 1 ); + } + + if (!quiet) { + printf( "program_time = %d, threshold = %d, num_threads = %d\n\n", + program_time, threshold, num_threads ); + } + + if ( PAPI_library_init( PAPI_VER_CURRENT ) != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init failed", 1 ); + + /* Test to be sure we can add events */ + retval = PAPI_query_event( EVENT ); + if (retval!=PAPI_OK) { + if (!quiet) printf("Trouble finding event\n"); + test_skip(__FILE__,__LINE__,"Event not available",1); + } + + if ( PAPI_thread_init( ( unsigned long ( * )( void ) ) ( pthread_self ) ) != + PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_thread_init failed", 1 ); + + if ( pthread_key_create( &key, NULL ) != 0 ) + test_fail( __FILE__, __LINE__, "pthread key create failed", 1 ); + + gettimeofday( &start, NULL ); + + for ( n = 1; n <= num_threads; n++ ) { + if ( pthread_create( &(td[n]), NULL, my_thread, ( void * ) n ) != 0 ) + test_fail( __FILE__, __LINE__, "pthread create failed", 1 ); + } + + my_thread( ( void * ) 0 ); + + /* wait for all the threads */ + for ( n = 1; n <= num_threads; n++ ) { + if ( pthread_join( td[n], NULL)) + test_fail( __FILE__, __LINE__, "pthread join failed", 1 ); + } + + free(td); + + if (!quiet) printf( "done\n" ); + + test_pass( __FILE__ ); + + return 0; +} diff -Nru papi-5.7.0+dfsg/src/ctests/kufrin.c papi-6.0.0~dfsg/src/ctests/kufrin.c --- papi-5.7.0+dfsg/src/ctests/kufrin.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/ctests/kufrin.c 2020-03-04 15:56:58.000000000 +0000 @@ -57,24 +57,29 @@ 0 is always the cpu component */ ret = PAPI_assign_eventset_component( eventset, 0 ); if ( ret != PAPI_OK ) { + free(values); test_fail( __FILE__, __LINE__, "PAPI_assign_eventset_component", ret ); } ret = PAPI_set_multiplex( eventset ); - if ( ret == PAPI_ENOSUPP) { - test_skip( __FILE__, __LINE__, "Multiplexing not supported", 1 ); + if ( ret == PAPI_ENOSUPP) { + free(values); + test_skip( __FILE__, __LINE__, "Multiplexing not supported", 1 ); } else if ( ret != PAPI_OK ) { + free(values); test_fail( __FILE__, __LINE__, "PAPI_set_multiplex", ret ); } ret = PAPI_add_events( eventset, events, numevents ); if ( ret < PAPI_OK ) { + free(values); test_fail( __FILE__, __LINE__, "PAPI_add_events", ret ); } ret = PAPI_start( eventset ); if ( ret != PAPI_OK ) { + free(values); test_fail( __FILE__, __LINE__, "PAPI_start", ret ); } @@ -82,22 +87,29 @@ ret = PAPI_stop( eventset, values ); if ( ret != PAPI_OK ) { + free(values); test_fail( __FILE__, __LINE__, "PAPI_stop", ret ); } ret = PAPI_cleanup_eventset( eventset ); if ( ret != PAPI_OK ) { + free(values); test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", ret ); } ret = PAPI_destroy_eventset( &eventset ); if ( ret != PAPI_OK ) { + free(values); test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", ret ); } ret = PAPI_unregister_thread( ); - if ( ret != PAPI_OK ) + if ( ret != PAPI_OK ) { + free(values); test_fail( __FILE__, __LINE__, "PAPI_unregister_thread", ret ); + } + + free(values); return ( NULL ); } @@ -171,6 +183,7 @@ threads = ( pthread_t * ) malloc( ( size_t ) nthreads * sizeof ( pthread_t ) ); if ( threads == NULL ) { + free(events); test_fail( __FILE__, __LINE__, "malloc", PAPI_ENOMEM ); } @@ -178,6 +191,8 @@ for ( i = 0; i < nthreads; i++ ) { retval = pthread_create( &threads[i], NULL, thread, NULL ); if ( retval != 0 ) { + free(events); + free(threads); test_fail( __FILE__, __LINE__, "pthread_create", PAPI_ESYS ); } } @@ -186,12 +201,16 @@ for ( i = 0; i < nthreads; i++ ) { retval = pthread_join( threads[i], NULL ); if ( retval != 0 ) { + free(events); + free(threads); test_fail( __FILE__, __LINE__, "pthread_join", PAPI_ESYS ); } } if (!quiet) printf( "Done." ); + free(events); + free(threads); test_pass( __FILE__ ); pthread_exit( NULL ); diff -Nru papi-5.7.0+dfsg/src/ctests/Makefile.recipies papi-6.0.0~dfsg/src/ctests/Makefile.recipies --- papi-5.7.0+dfsg/src/ctests/Makefile.recipies 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/ctests/Makefile.recipies 2020-03-04 15:56:58.000000000 +0000 @@ -1,19 +1,23 @@ -OMP = zero_omp omptough +OMP = omp_hl \ + zero_omp omptough SMP = zero_smp SHMEM = zero_shmem -PTHREADS= pthrtough pthrtough2 thrspecific profile_pthreads overflow_pthreads \ +PTHREADS= pthread_hl \ + pthrtough pthrtough2 thrspecific profile_pthreads overflow_pthreads \ zero_pthreads clockres_pthreads overflow3_pthreads locks_pthreads \ krentel_pthreads MPX = max_multiplex multiplex1 multiplex2 mendes-alt sdsc-mpx sdsc2-mpx \ sdsc2-mpx-noreset sdsc4-mpx reset_multiplex MPXPTHR = multiplex1_pthreads multiplex3_pthreads kufrin -MPI = mpifirst +MPI = mpi_hl mpi_omp_hl \ + mpifirst SHARED = shlib -SERIAL = all_events all_native_events branches calibrate case1 case2 \ +SERIAL = serial_hl serial_hl_ll_comb\ + all_events all_native_events branches calibrate case1 case2 \ cmpinfo code2name derived describe destroy disable_component \ - dmem_info eventname exeinfo failed_events first flops \ - get_event_component inherit high-level high-level2 hl_rates \ - hwinfo ipc johnmay2 low-level matrix-hl memory \ + dmem_info eventname exeinfo failed_events first \ + get_event_component inherit \ + hwinfo johnmay2 low-level memory \ realtime remove_events reset second tenth version virttime \ zero zero_flip zero_named FORKEXEC = fork fork2 exec exec2 forkexec forkexec2 forkexec3 forkexec4 \ @@ -31,7 +35,6 @@ EAR = earprofile RANGE = data_range BROKEN = pernode val_omp -API = api ifneq ($(MPICC),) ALL = $(PTHREADS) $(SERIAL) $(FORKEXEC) $(OVERFLOW) $(PROFILE) $(MPI) $(MPX) $(MPXPTHR) $(OMP) $(SMP) $(SHMEM)\ $(SHARED) $(EAR) $(RANGE) $(P4_TEST) $(ATTACH) $(API) @@ -80,9 +83,6 @@ papi_api: $(API) -api: api.c $(TESTLIB) $(PAPILIB) - $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) api.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o $@ - sdsc2: sdsc2.c $(TESTLIB) $(PAPILIB) $(TESTFLOPS) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) sdsc.c $(TESTLIB) $(TESTFLOPS) $(PAPILIB) $(LDFLAGS) -lm -o $@ @@ -143,6 +143,9 @@ thrspecific: thrspecific.c $(TESTLIB) $(PAPILIB) $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) thrspecific.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o thrspecific -lpthread +pthread_hl: pthread_hl.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) pthread_hl.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o pthread_hl -lpthread + pthrtough: pthrtough.c $(TESTLIB) $(PAPILIB) $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) pthrtough.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o pthrtough -lpthread @@ -158,6 +161,12 @@ krentel_pthreads: krentel_pthreads.c $(TESTLIB) $(PAPILIB) $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) krentel_pthreads.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o krentel_pthreads -lpthread +# krentel_pthreads_race is not included with the standard tests; +# it is a modification of krentel_pthreads intended to be run with +# "valgrind --tool=helgrind" to test for race conditions. +krentel_pthreads_race: krentel_pthreads_race.c $(TESTLIB) $(PAPILIB) + $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) krentel_pthreads_race.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o krentel_pthreads_race -lpthread + overflow_pthreads: overflow_pthreads.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) overflow_pthreads.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o overflow_pthreads -lpthread @@ -173,6 +182,9 @@ zero_shmem: zero_shmem.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC_R) $(INCLUDE) $(SMPCFLGS) $(CFLAGS) $(TOPTFLAGS) zero_shmem.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o zero_shmem $(SMPLIBS) +omp_hl: omp_hl.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + -$(CC_R) $(INCLUDE) $(OMPCFLGS) $(CFLAGS) $(TOPTFLAGS) omp_hl.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o omp_hl $(OMPLIBS) + zero_omp: zero_omp.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) -$(CC_R) $(INCLUDE) $(OMPCFLGS) $(CFLAGS) $(TOPTFLAGS) zero_omp.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o zero_omp $(OMPLIBS) @@ -227,6 +239,12 @@ first: first.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) first.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o first +mpi_hl: mpi_hl.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(MPICC) $(INCLUDE) $(MPFLAGS) $(CFLAGS) $(TOPTFLAGS) mpi_hl.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o mpi_hl + +mpi_omp_hl: mpi_omp_hl.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(MPICC) $(INCLUDE) $(MPFLAGS) $(OMPCFLGS) $(CFLAGS) $(TOPTFLAGS) mpi_omp_hl.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o mpi_omp_hl $(OMPLIBS) + mpifirst: mpifirst.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(MPICC) $(INCLUDE) $(MPFLAGS) $(CFLAGS) $(TOPTFLAGS) first.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o mpifirst @@ -236,12 +254,6 @@ second: second.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) second.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o second -flops: flops.c $(TESTLIB) $(TESTFLOPS) $(DISPLAYERROR) $(PAPILIB) - $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) flops.c $(TESTLIB) $(TESTFLOPS) $(DISPLAYERROR) $(PAPILIB) $(LDFLAGS) -o flops - -ipc: ipc.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) - $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) ipc.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o ipc - overflow: overflow.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) overflow.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o overflow @@ -293,6 +305,12 @@ dmem_info: dmem_info.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) dmem_info.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o dmem_info +serial_hl: serial_hl.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) serial_hl.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o serial_hl + +serial_hl_ll_comb: serial_hl_ll_comb.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) serial_hl_ll_comb.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o serial_hl_ll_comb + all_events: all_events.c $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) all_events.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o all_events @@ -326,18 +344,6 @@ low-level: low-level.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) low-level.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o low-level -matrix-hl: matrix-hl.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) - $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) matrix-hl.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o matrix-hl - -hl_rates: hl_rates.c $(TESTLIB) $(TESTFLOPS) $(PAPILIB) - $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) hl_rates.c $(TESTLIB) $(TESTFLOPS) $(PAPILIB) $(LDFLAGS) -o hl_rates - -high-level: high-level.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) - $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) high-level.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o high-level - -high-level2: high-level2.c $(TESTLIB) $(PAPILIB) - $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) high-level2.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o high-level2 - shlib: shlib.c $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) shlib.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o shlib $(LDL) @@ -434,6 +440,9 @@ prof_utils.o: prof_utils.c $(testlibdir)/papi_test.h prof_utils.h $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) -c prof_utils.c +filter_helgrind: filter_helgrind.c $(TESTLIB) $(PAPILIB) + -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) filter_helgrind.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o filter_helgrind + .PHONY : all default ctests ctest clean clean: diff -Nru papi-5.7.0+dfsg/src/ctests/matrix-hl.c papi-6.0.0~dfsg/src/ctests/matrix-hl.c --- papi-5.7.0+dfsg/src/ctests/matrix-hl.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/ctests/matrix-hl.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,140 +0,0 @@ -/**************************************************************************** - *C - *C matrix-hl.f - *C An example of matrix-matrix multiplication and using PAPI high level - *C to look at the performance. written by Kevin London - *C March 2000 - *C Added to c tests to check stop - *C**************************************************************************** - */ - - -#include -#include - -#include "papi.h" -#include "papi_test.h" - -#include "do_loops.h" - -int -main( int argc, char **argv ) -{ - -#define NROWS1 175 -#define NCOLS1 225 -#define NROWS2 NCOLS1 -#define NCOLS2 150 - double p[NROWS1][NCOLS1], q[NROWS2][NCOLS2], r[NROWS1][NCOLS2]; - int i, j, k, num_events, retval; - /* PAPI standardized event to be monitored */ - int event[2]; - /* PAPI values of the counters */ - long long values[2], tmp; - int quiet; - - quiet = tests_quiet( argc, argv ); - - /* Setup default values */ - num_events = 0; - - /* See how many hardware events at one time are supported - * This also initializes the PAPI library */ - num_events = PAPI_num_counters( ); - if ( num_events < 2 ) { - if (!quiet) printf( "This example program requries the architecture to " - "support 2 simultaneous hardware events...shutting down.\n" ); - test_skip( __FILE__, __LINE__, "PAPI_num_counters", 1 ); - } - - if ( !quiet ) - printf( "Number of hardware counters supported: %d\n", num_events ); - - if ( PAPI_query_event( PAPI_FP_OPS ) == PAPI_OK ) - event[0] = PAPI_FP_OPS; - else if ( PAPI_query_event( PAPI_FP_INS ) == PAPI_OK ) - event[0] = PAPI_FP_INS; - else - event[0] = PAPI_TOT_INS; - - /* Time used */ - event[1] = PAPI_TOT_CYC; - - /* matrix 1: read in the matrix values */ - for ( i = 0; i < NROWS1; i++ ) - for ( j = 0; j < NCOLS1; j++ ) - p[i][j] = i * j * 1.0; - - for ( i = 0; i < NROWS2; i++ ) - for ( j = 0; j < NCOLS2; j++ ) - q[i][j] = i * j * 1.0; - - for ( i = 0; i < NROWS1; i++ ) - for ( j = 0; j < NCOLS2; j++ ) - r[i][j] = i * j * 1.0; - - /* Set up the counters */ - num_events = 2; - retval = PAPI_start_counters( event, num_events ); - if ( retval != PAPI_OK ) - test_fail( __FILE__, __LINE__, "PAPI_start_counters", retval ); - - /* Clear the counter values */ - retval = PAPI_read_counters( values, num_events ); - if ( retval != PAPI_OK ) - test_fail( __FILE__, __LINE__, "PAPI_read_counters", retval ); - - /* Compute the matrix-matrix multiplication */ - for ( i = 0; i < NROWS1; i++ ) - for ( j = 0; j < NCOLS2; j++ ) - for ( k = 0; k < NCOLS1; k++ ) - r[i][j] = r[i][j] + p[i][k] * q[k][j]; - - /* Stop the counters and put the results in the array values */ - retval = PAPI_stop_counters( values, num_events ); - if ( retval != PAPI_OK ) - test_fail( __FILE__, __LINE__, "PAPI_stop_counters", retval ); - - /* Make sure the compiler does not optimize away the multiplication - * with dummy(r); - */ - dummy( r ); - - if ( !quiet ) { - if ( event[0] == PAPI_TOT_INS ) { - printf( TAB1, "TOT Instructions:", values[0] ); - } else { - printf( TAB1, "FP Instructions:", values[0] ); - } - printf( TAB1, "Cycles:", values[1] ); - } - - /* - * Intel Core overreports flops by 50% when using -O - * Use -O2 or -O3 to produce the expected # of flops - */ - - if ( event[0] == PAPI_FP_INS ) { - /* Compare measured FLOPS to expected value */ - tmp = - 2 * ( long long ) ( NROWS1 ) * ( long long ) ( NCOLS2 ) * - ( long long ) ( NCOLS1 ); - if ( abs( ( int ) values[0] - ( int ) tmp ) > ( double ) tmp * 0.05 ) { - /* Maybe we are counting FMAs? */ - tmp = tmp / 2; - if ( abs( ( int ) values[0] - ( int ) tmp ) > - ( double ) tmp * 0.05 ) { - printf( "\n" TAB1, "Expected operation count: ", 2 * tmp ); - printf( TAB1, "Or possibly (using FMA): ", tmp ); - printf( TAB1, "Instead I got: ", values[0] ); - test_fail( __FILE__, __LINE__, - "Unexpected FLOP count (check vector operations)", - 1 ); - } - } - } - test_pass( __FILE__ ); - - return 0; - -} diff -Nru papi-5.7.0+dfsg/src/ctests/mpi_hl.c papi-6.0.0~dfsg/src/ctests/mpi_hl.c --- papi-5.7.0+dfsg/src/ctests/mpi_hl.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/ctests/mpi_hl.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,45 @@ +#include +#include +#include +#include +#include "papi.h" +#include "papi_test.h" +#include "do_loops.h" + +int main( int argc, char **argv ) +{ + int retval; + int quiet = 0; + char* region_name; + int world_size, world_rank; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + MPI_Init( &argc, &argv ); + MPI_Comm_size(MPI_COMM_WORLD, &world_size); + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + + region_name = "do_flops"; + + if ( !quiet ) { + printf("\nRank %d: instrument flops\n", world_rank); + } + + retval = PAPI_hl_region_begin(region_name); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_hl_region_begin", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_hl_region_end(region_name); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_hl_region_end", retval ); + } + + MPI_Finalize(); + test_hl_pass( __FILE__ ); + + return 0; +} \ No newline at end of file diff -Nru papi-5.7.0+dfsg/src/ctests/mpi_omp_hl.c papi-6.0.0~dfsg/src/ctests/mpi_omp_hl.c --- papi-5.7.0+dfsg/src/ctests/mpi_omp_hl.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/ctests/mpi_omp_hl.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,54 @@ +#include +#include +#include +#include +#include +#include "papi.h" +#include "papi_test.h" +#include "do_loops.h" + +int main( int argc, char **argv ) +{ + int retval, i; + int quiet = 0; + char* region_name; + int world_size, world_rank; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + MPI_Init( &argc, &argv ); + MPI_Comm_size(MPI_COMM_WORLD, &world_size); + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + + region_name = "do_flops"; + + #pragma omp parallel + #pragma omp for + for ( i = 1; i <= 2; ++i ) { + int tid; + tid = omp_get_thread_num(); + + if ( !quiet ) { + printf("\nRank %d, Thread %d: instrument flops\n", world_rank, tid); + } + + retval = PAPI_hl_region_begin(region_name); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_hl_region_begin", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_hl_region_end(region_name); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_hl_region_end", retval ); + } + + } + + MPI_Finalize(); + test_hl_pass( __FILE__ ); + + return 0; +} \ No newline at end of file diff -Nru papi-5.7.0+dfsg/src/ctests/multiattach2.c papi-6.0.0~dfsg/src/ctests/multiattach2.c --- papi-5.7.0+dfsg/src/ctests/multiattach2.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/ctests/multiattach2.c 2020-03-04 15:56:58.000000000 +0000 @@ -55,7 +55,7 @@ int num_events1, num_events2; long long **values; long long elapsed_us, elapsed_cyc, elapsed_virt_us, elapsed_virt_cyc; - char event_name[PAPI_MAX_STR_LEN], add_event_str[PAPI_MAX_STR_LEN]; + char event_name[PAPI_MAX_STR_LEN], add_event_str[PAPI_2MAX_STR_LEN]; const PAPI_component_info_t *cmpinfo; pid_t pid; diff -Nru papi-5.7.0+dfsg/src/ctests/multiattach.c papi-6.0.0~dfsg/src/ctests/multiattach.c --- papi-5.7.0+dfsg/src/ctests/multiattach.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/ctests/multiattach.c 2020-03-04 15:56:58.000000000 +0000 @@ -56,7 +56,7 @@ int num_events1, num_events2; long long **values; long long elapsed_us, elapsed_cyc, elapsed_virt_us, elapsed_virt_cyc; - char event_name[PAPI_MAX_STR_LEN], add_event_str[PAPI_MAX_STR_LEN]; + char event_name[PAPI_MAX_STR_LEN], add_event_str[PAPI_2MAX_STR_LEN]; const PAPI_component_info_t *cmpinfo; pid_t pid, pid2; double ratio1,ratio2; diff -Nru papi-5.7.0+dfsg/src/ctests/omp_hl.c papi-6.0.0~dfsg/src/ctests/omp_hl.c --- papi-5.7.0+dfsg/src/ctests/omp_hl.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/ctests/omp_hl.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,71 @@ +#include +#include +#include +#include +#include "papi.h" +#include "papi_test.h" +#include "do_loops.h" + +int main( int argc, char **argv ) +{ + int retval, i; + int quiet = 0; + char* region_name; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + region_name = "do_flops"; + + #pragma omp parallel + #pragma omp for + for ( i = 1; i <= 4; ++i ) { + int tid; + tid = omp_get_thread_num(); + + if ( !quiet ) { + printf("\nThread %d: instrument flops\n", tid); + } + + retval = PAPI_hl_region_begin(region_name); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_hl_region_begin", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_hl_region_end(region_name); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_hl_region_end", retval ); + } + + } + + region_name = "do_flops_2"; + #pragma omp parallel + #pragma omp for + for ( i = 1; i <= 4; ++i ) { + int tid; + tid = omp_get_thread_num(); + + if ( !quiet ) { + printf("\nThread %d: instrument flops_2\n", tid); + } + + retval = PAPI_hl_region_begin(region_name); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_hl_region_begin", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_hl_region_end(region_name); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_hl_region_end", retval ); + } + } + + test_hl_pass( __FILE__ ); + + return 0; +} \ No newline at end of file diff -Nru papi-5.7.0+dfsg/src/ctests/profile_twoevents.c papi-6.0.0~dfsg/src/ctests/profile_twoevents.c --- papi-5.7.0+dfsg/src/ctests/profile_twoevents.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/ctests/profile_twoevents.c 2020-03-04 15:56:58.000000000 +0000 @@ -21,7 +21,7 @@ int i, num_tests = 6; unsigned long length, blength; int num_buckets, mask; - char title[80]; + char title[PAPI_2MAX_STR_LEN]; int retval; const PAPI_exe_info_t *prginfo; caddr_t start, end; diff -Nru papi-5.7.0+dfsg/src/ctests/pthread_hl.c papi-6.0.0~dfsg/src/ctests/pthread_hl.c --- papi-5.7.0+dfsg/src/ctests/pthread_hl.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/ctests/pthread_hl.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,90 @@ +#include +#include +#include +#include +#include "papi.h" +#include "papi_test.h" +#include "do_loops.h" + +#define NUM_THREADS 4 + +typedef struct papi_args +{ + long tid; + int quiet; +} papi_args_t; + +void *CallMatMul(void *args) +{ + long tid; + int retval, quiet; + char* region_name; + + papi_args_t* papi_args = (papi_args_t*)args; + tid = (*papi_args).tid; + quiet = (*papi_args).quiet; + region_name = "do_flops"; + + if ( !quiet ) { + printf("\nThread %ld: instrument flops\n", tid); + } + + retval = PAPI_hl_region_begin(region_name); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_hl_region_begin", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_hl_region_end(region_name); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_hl_region_end", retval ); + } + + pthread_exit(NULL); +} + +int main( int argc, char **argv ) +{ + pthread_t threads[NUM_THREADS]; + papi_args_t args[NUM_THREADS]; + int rc; + long t; + int quiet = 0; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + for( t = 0; t < NUM_THREADS; t++) { + args[t].tid = t; + args[t].quiet = quiet; + rc = pthread_create(&threads[t], NULL, CallMatMul, (void *)&args[t]); + if (rc) { + printf("ERROR; return code from pthread_create() is %d\n", rc); + exit(-1); + } + } + + for( t = 0; t < NUM_THREADS; t++) { + pthread_join(threads[t], NULL); + } + + + for( t = 0; t < NUM_THREADS; t++) { + args[t].tid = t; + args[t].quiet = quiet; + rc = pthread_create(&threads[t], NULL, CallMatMul, (void *)&args[t]); + if (rc) { + printf("ERROR; return code from pthread_create() is %d\n", rc); + exit(-1); + } + } + + for( t = 0; t < NUM_THREADS; t++) { + pthread_join(threads[t], NULL); + } + + test_hl_pass( __FILE__ ); + + return 0; +} \ No newline at end of file diff -Nru papi-5.7.0+dfsg/src/ctests/pthrtough2.c papi-6.0.0~dfsg/src/ctests/pthrtough2.c --- papi-5.7.0+dfsg/src/ctests/pthrtough2.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/ctests/pthrtough2.c 2020-03-04 15:56:58.000000000 +0000 @@ -81,8 +81,10 @@ ret = pthread_create( &th[j], &attr, &Thread, NULL ); if ( ret ) { printf( "Failed to create thread: %d\n", j ); - if ( j < 10 ) + if ( j < 10 ) { + free(th); test_fail( __FILE__, __LINE__, "pthread_create", PAPI_ESYS ); + } printf( "Continuing test with %d threads.\n", j - 1 ); nthr = j - 1; th = ( pthread_t * ) realloc( th, @@ -96,6 +98,7 @@ pthread_join( th[j], NULL ); } + free(th); test_pass( __FILE__ ); return 0; diff -Nru papi-5.7.0+dfsg/src/ctests/pthrtough.c papi-6.0.0~dfsg/src/ctests/pthrtough.c --- papi-5.7.0+dfsg/src/ctests/pthrtough.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/ctests/pthrtough.c 2020-03-04 15:56:58.000000000 +0000 @@ -85,14 +85,17 @@ for ( j = 0; j < nthr; j++ ) { ret = pthread_create( &th[j], &attr, &Thread, NULL ); - if ( ret ) + if ( ret ) { + free(th); test_fail( __FILE__, __LINE__, "pthread_create", PAPI_ESYS ); + } } for ( j = 0; j < nthr; j++ ) { pthread_join( th[j], NULL ); } + free(th); test_pass( __FILE__ ); return 0; diff -Nru papi-5.7.0+dfsg/src/ctests/reset.c papi-6.0.0~dfsg/src/ctests/reset.c --- papi-5.7.0+dfsg/src/ctests/reset.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/ctests/reset.c 2020-03-04 15:56:58.000000000 +0000 @@ -69,7 +69,7 @@ long long **values; int EventSet = PAPI_NULL; int PAPI_event, mask; - char event_name[PAPI_MAX_STR_LEN], add_event_str[PAPI_MAX_STR_LEN]; + char event_name[PAPI_MAX_STR_LEN], add_event_str[PAPI_2MAX_STR_LEN]; int quiet; /* Set TESTS_QUIET variable */ diff -Nru papi-5.7.0+dfsg/src/ctests/reset_multiplex.c papi-6.0.0~dfsg/src/ctests/reset_multiplex.c --- papi-5.7.0+dfsg/src/ctests/reset_multiplex.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/ctests/reset_multiplex.c 2020-03-04 15:56:58.000000000 +0000 @@ -20,7 +20,7 @@ long long **values; int EventSet = PAPI_NULL; int PAPI_event, mask; - char event_name[PAPI_MAX_STR_LEN], add_event_str[PAPI_MAX_STR_LEN]; + char event_name[PAPI_MAX_STR_LEN], add_event_str[PAPI_2MAX_STR_LEN]; int quiet; /* Set TESTS_QUIET variable */ diff -Nru papi-5.7.0+dfsg/src/ctests/serial_hl.c papi-6.0.0~dfsg/src/ctests/serial_hl.c --- papi-5.7.0+dfsg/src/ctests/serial_hl.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/ctests/serial_hl.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,41 @@ +#include +#include +#include +#include "papi.h" +#include "papi_test.h" +#include "do_loops.h" + +int main( int argc, char **argv ) +{ + int retval, i; + int quiet = 0; + char* region_name; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + region_name = "do_flops"; + + if ( !quiet ) { + printf("\nInstrument flops\n"); + } + + for ( i = 1; i <= 4; ++i ) { + + retval = PAPI_hl_region_begin(region_name); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_hl_region_begin", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_hl_region_end(region_name); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_hl_region_end", retval ); + } + } + + test_hl_pass( __FILE__ ); + + return 0; +} \ No newline at end of file diff -Nru papi-5.7.0+dfsg/src/ctests/serial_hl_ll_comb.c papi-6.0.0~dfsg/src/ctests/serial_hl_ll_comb.c --- papi-5.7.0+dfsg/src/ctests/serial_hl_ll_comb.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/ctests/serial_hl_ll_comb.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,92 @@ +#include +#include +#include +#include "papi.h" +#include "papi_test.h" +#include "do_loops.h" + +int main( int argc, char **argv ) +{ + int retval, i; + int quiet = 0; + char* region_name; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + region_name = "do_flops"; + + /* three iterations with high-level API */ + if ( !quiet ) { + printf("\nTesting high-level API: do_flops\n"); + } + + for ( i = 1; i < 4; ++i ) { + + retval = PAPI_hl_region_begin(region_name); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_hl_region_begin", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_hl_region_end(region_name); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_hl_region_end", retval ); + } + } + + if ( !quiet ) { + printf("\nTesting low-level API: do_flops\n"); + } + + long long values[2]; + int EventSet = PAPI_NULL; + char event_name1[]="appio:::READ_BYTES"; + char event_name2[]="appio:::WRITE_BYTES"; + + /* create the eventset */ + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + } + + retval = PAPI_add_named_event( EventSet, event_name1); + if ( retval != PAPI_OK ) { + if (!quiet) printf("Couldn't add %s\n",event_name1); + test_skip(__FILE__,__LINE__,"Couldn't add appio:::READ_BYTES",0); + } + + retval = PAPI_add_named_event( EventSet, event_name2); + if ( retval != PAPI_OK ) { + if (!quiet) printf("Couldn't add %s\n",event_name2); + test_skip(__FILE__,__LINE__,"Couldn't add appio:::WRITE_BYTES",0); + } + + /* Start PAPI */ + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + /* Read results */ + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("%s: %lld\n", event_name1, values[0]); + printf("%s: %lld\n", event_name2, values[1]); + } + + /* remove results. */ + PAPI_remove_named_event(EventSet,event_name1); + PAPI_remove_named_event(EventSet,event_name2); + + test_hl_pass( __FILE__ ); + + return 0; +} \ No newline at end of file diff -Nru papi-5.7.0+dfsg/src/ctests/zero_attach.c papi-6.0.0~dfsg/src/ctests/zero_attach.c --- papi-5.7.0+dfsg/src/ctests/zero_attach.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/ctests/zero_attach.c 2020-03-04 15:56:58.000000000 +0000 @@ -52,7 +52,7 @@ int num_events1; long long **values; long long elapsed_us, elapsed_cyc, elapsed_virt_us, elapsed_virt_cyc; - char event_name[PAPI_MAX_STR_LEN], add_event_str[PAPI_MAX_STR_LEN]; + char event_name[PAPI_MAX_STR_LEN], add_event_str[PAPI_2MAX_STR_LEN]; const PAPI_component_info_t *cmpinfo; pid_t pid; diff -Nru papi-5.7.0+dfsg/src/ctests/zero_flip.c papi-6.0.0~dfsg/src/ctests/zero_flip.c --- papi-5.7.0+dfsg/src/ctests/zero_flip.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/ctests/zero_flip.c 2020-03-04 15:56:58.000000000 +0000 @@ -31,7 +31,7 @@ int PAPI_event; long long values1[2], values2[2]; long long elapsed_us, elapsed_cyc; - char event_name[PAPI_MAX_STR_LEN], add_event_str[PAPI_MAX_STR_LEN]; + char event_name[PAPI_MAX_STR_LEN], add_event_str[PAPI_2MAX_STR_LEN]; int quiet; /* Set TESTS_QUIET variable */ diff -Nru papi-5.7.0+dfsg/src/ctests/zero_omp.c papi-6.0.0~dfsg/src/ctests/zero_omp.c --- papi-5.7.0+dfsg/src/ctests/zero_omp.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/ctests/zero_omp.c 2020-03-04 15:56:58.000000000 +0000 @@ -125,6 +125,10 @@ } } +unsigned long omp_get_thread_num_wrapper(void){ + return (unsigned long)omp_get_thread_num(); +} + int main( int argc, char **argv ) { @@ -160,8 +164,7 @@ elapsed_cyc = PAPI_get_real_cyc( ); - retval = PAPI_thread_init( ( unsigned long ( * )( void ) ) - ( omp_get_thread_num ) ); + retval = PAPI_thread_init( omp_get_thread_num_wrapper ); if ( retval != PAPI_OK ) { if ( retval == PAPI_ECMP ) { if (!quiet) printf("Trouble init threads\n"); diff -Nru papi-5.7.0+dfsg/src/examples/high_level.c papi-6.0.0~dfsg/src/examples/high_level.c --- papi-5.7.0+dfsg/src/examples/high_level.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/examples/high_level.c 2020-03-04 15:56:58.000000000 +0000 @@ -1,16 +1,21 @@ /***************************************************************************** -* This example code shows how to use most of PAPI's High level functions * -* to start,count,read and stop on an event set. We use two preset events * -* here: * -* PAPI_TOT_INS: Total instructions executed in a period of time * -* PAPI_TOT_CYC: Total cpu cycles in a period of time * +* This example code shows how to use PAPI's High level functions. * +* Events to be recorded are determined via an environment variable * +* PAPI_EVENTS that lists comma separated events for any component. * +* If events are not specified via the environment variable PAPI_EVENTS, an * +* output with default events is generated after the run. If supported by * +* the respective machine the following default events are recorded: * +* perf::TASK-CLOCK * +* PAPI_TOT_INS * +* PAPI_TOT_CYC * +* PAPI_FP_INS * +* PAPI_FP_OPS or PAPI_DP_OPS or PAPI_SP_OPS * ******************************************************************************/ #include #include #include "papi.h" -#define NUM_EVENTS 2 #define THRESHOLD 10000 #define ERROR_RETURN(retval) { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__); exit(retval); } @@ -41,110 +46,38 @@ int main() { - /*Declaring and initializing the event set with the presets*/ - int Events[2] = {PAPI_TOT_INS, PAPI_TOT_CYC}; - /*The length of the events array should be no longer than the - value returned by PAPI_num_counters.*/ - - /*declaring place holder for no of hardware counters */ - int num_hwcntrs = 0; int retval; char errstring[PAPI_MAX_STR_LEN]; - /*This is going to store our list of results*/ - long long values[NUM_EVENTS]; - - /*************************************************************************** - * This part initializes the library and compares the version number of the* - * header file, to the version of the library, if these don't match then it * - * is likely that PAPI won't work correctly.If there is an error, retval * - * keeps track of the version number. * - ***************************************************************************/ - - if((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT ) - { - fprintf(stderr, "Error: %d %s\n",retval, errstring); - exit(1); - } - - - /************************************************************************** - * PAPI_num_counters returns the number of hardware counters the platform * - * has or a negative number if there is an error * - **************************************************************************/ - if ((num_hwcntrs = PAPI_num_counters()) < PAPI_OK) - { - printf("There are no counters available. \n"); - exit(1); - } - - printf("There are %d counters in this system\n",num_hwcntrs); - - /************************************************************************** - * PAPI_start_counters initializes the PAPI library (if necessary) and * - * starts counting the events named in the events array. This function * - * implicitly stops and initializes any counters running as a result of * - * a previous call to PAPI_start_counters. * - **************************************************************************/ - - if ( (retval = PAPI_start_counters(Events, NUM_EVENTS)) != PAPI_OK) - ERROR_RETURN(retval); - - printf("\nCounter Started: \n"); + retval = PAPI_hl_region_begin("computation_add"); + if ( retval != PAPI_OK ) + ERROR_RETURN(retval); /* Your code goes here*/ computation_add(); - - - - /********************************************************************** - * PAPI_read_counters reads the counter values into values array * - **********************************************************************/ - - if ( (retval=PAPI_read_counters(values, NUM_EVENTS)) != PAPI_OK) + retval = PAPI_hl_read("computation_add"); + if ( retval != PAPI_OK ) ERROR_RETURN(retval); - printf("Read successfully\n"); - - - - printf("The total instructions executed for addition are %lld \n",values[0]); - printf("The total cycles used are %lld \n", values[1] ); - - printf("\nNow we try to use PAPI_accum to accumulate values\n"); - - /* Do some computation here */ + /* Your code goes here*/ computation_add(); - - - /************************************************************************ - * What PAPI_accum_counters does is it adds the running counter values * - * to what is in the values array. The hardware counters are reset and * - * left running after the call. * - ************************************************************************/ - if ( (retval=PAPI_accum_counters(values, NUM_EVENTS)) != PAPI_OK) + retval = PAPI_hl_region_end("computation_add"); + if ( retval != PAPI_OK ) ERROR_RETURN(retval); - printf("We did an additional %d times addition!\n", THRESHOLD); - printf("The total instructions executed for addition are %lld \n", - values[0] ); - printf("The total cycles used are %lld \n", values[1] ); - - /*********************************************************************** - * Stop counting events(this reads the counters as well as stops them * - ***********************************************************************/ - printf("\nNow we try to do some multiplications\n"); + retval = PAPI_hl_region_begin("computation_mult"); + if ( retval != PAPI_OK ) + ERROR_RETURN(retval); + + /* Your code goes here*/ computation_mult(); - /******************* PAPI_stop_counters **********************************/ - if ((retval=PAPI_stop_counters(values, NUM_EVENTS)) != PAPI_OK) - ERROR_RETURN(retval); - - printf("The total instruction executed for multiplication are %lld \n", - values[0] ); - printf("The total cycles used are %lld \n", values[1] ); - exit(0); + retval = PAPI_hl_region_end("computation_mult"); + if ( retval != PAPI_OK ) + ERROR_RETURN(retval); + + exit(0); } diff -Nru papi-5.7.0+dfsg/src/examples/Makefile papi-6.0.0~dfsg/src/examples/Makefile --- papi-5.7.0+dfsg/src/examples/Makefile 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/examples/Makefile 2020-03-04 15:56:58.000000000 +0000 @@ -4,7 +4,7 @@ CFLAGS += -I$(PAPIINC) OS = $(shell uname) -TARGETS_NTHD = PAPI_set_domain sprofile multiplex PAPI_state PAPI_reset PAPI_profil PAPI_perror PAPI_get_virt_cyc PAPI_get_real_cyc PAPI_get_opt PAPI_hw_info PAPI_get_executable_info PAPI_ipc PAPI_flops PAPI_flips PAPI_overflow PAPI_add_remove_event high_level PAPI_add_remove_events +TARGETS_NTHD = PAPI_set_domain sprofile multiplex PAPI_state PAPI_reset PAPI_profil PAPI_perror PAPI_get_virt_cyc PAPI_get_real_cyc PAPI_get_opt PAPI_hw_info PAPI_get_executable_info PAPI_ipc PAPI_epc PAPI_flops PAPI_flips PAPI_mix_hl_rate PAPI_mix_ll_rate PAPI_mix_hl_ll PAPI_overflow PAPI_add_remove_event high_level PAPI_add_remove_events TARGETS_PTHREAD = locks_pthreads overflow_pthreads diff -Nru papi-5.7.0+dfsg/src/examples/PAPI_epc.c papi-6.0.0~dfsg/src/examples/PAPI_epc.c --- papi-5.7.0+dfsg/src/examples/PAPI_epc.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/examples/PAPI_epc.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,67 @@ +/***************************************************************************** + * This example demonstrates the usage of the function PAPI_epc which * + * measures arbitrary events per cpu cycle * + *****************************************************************************/ + +/***************************************************************************** + * The first call to PAPI_epc() will initialize the PAPI interface, * + * set up the counters to monitor the user specified event, PAPI_TOT_CYC, * + * and PAPI_REF_CYC (if it exists) and start the counters. Subsequent calls * + * will read the counters and return real time, process time, event counts, * + * the core and reference cycle count and EPC rate since the latest call to * + * PAPI_epc(). * + *****************************************************************************/ + + + +#include +#include +#include "papi.h" + +int your_slow_code(); + +int main() +{ + float real_time, proc_time, epc; + long long ref, core, evt; + float real_time_i, proc_time_i, epc_i; + long long ref_i, core_i, evt_i; + int retval; + + if((retval=PAPI_epc(PAPI_TOT_INS, &real_time_i, &proc_time_i, &ref_i, &core_i, &evt_i, &epc_i)) < PAPI_OK) + { + printf("Could not initialise PAPI_epc \n"); + printf("retval: %d\n", retval); + exit(1); + } + + your_slow_code(); + + + if((retval=PAPI_epc(PAPI_TOT_INS, &real_time, &proc_time, &ref, &core, &evt, &epc)) #include #include "papi.h" +int your_slow_code(); -main() +int main() { float real_time, proc_time,mflips; long long flpins; @@ -37,7 +39,7 @@ * this platform, so PAPI_flops returns an error. * ***********************************************************************/ - if((retval=PAPI_flips(&ireal_time,&iproc_time,&iflpins,&imflips)) < PAPI_OK) + if((retval=PAPI_flips_rate(PAPI_FP_INS,&ireal_time,&iproc_time,&iflpins,&imflips)) < PAPI_OK) { printf("Could not initialise PAPI_flips \n"); printf("Your platform may not support floating point instruction event.\n"); printf("retval: %d\n", retval); @@ -47,14 +49,14 @@ your_slow_code(); - if((retval=PAPI_flips( &real_time, &proc_time, &flpins, &mflips)) #include "papi.h" +int your_slow_code(); -main() +int main() { float real_time, proc_time,mflops; long long flpops; @@ -29,14 +30,15 @@ int retval; /*********************************************************************** - * if PAPI_FP_OPS is a derived event in your platform, then your * - * platform must have at least three counters to support PAPI_flops, * - * because PAPI needs one counter to cycles. So in UltraSparcIII, even * - * the platform supports PAPI_FP_OPS, but UltraSparcIII only has two * - * available hardware counters and PAPI_FP_OPS is a derived event in * - * this platform, so PAPI_flops returns an error. * + * If PAPI_FP_OPS is a derived event in your platform, then your * + * platform must have at least three counters to support * + * PAPI_flops_rate, because PAPI needs one counter for cycles. So in * + * UltraSparcIII, even though the platform supports PAPI_FP_OPS, * + * UltraSparcIII only has two available hardware counters, and * + * PAPI_FP_OPS is a derived event that requires both of them, so * + * PAPI_flops_rate returns an error. * ***********************************************************************/ - if((retval=PAPI_flops(&ireal_time,&iproc_time,&iflpops,&imflops)) < PAPI_OK) + if((retval=PAPI_flops_rate(PAPI_FP_OPS,&ireal_time,&iproc_time,&iflpops,&imflops)) < PAPI_OK) { printf("Could not initialise PAPI_flops \n"); printf("Your platform may not support floating point operation event.\n"); @@ -47,14 +49,14 @@ your_slow_code(); - if((retval=PAPI_flops( &real_time, &proc_time, &flpops, &mflops)) #include "papi.h" +int your_slow_code(); -main() +int main() { float real_time, proc_time,ipc; long long ins; @@ -42,7 +43,7 @@ } - printf("Real_time: %f Proc_time: %f Total instructions: %lld IPC: %f\n", + printf("Real_time: %f Proc_time: %f Instructions: %lld IPC: %f\n", real_time, proc_time,ins,ipc); /* clean up */ diff -Nru papi-5.7.0+dfsg/src/examples/PAPI_mix_hl_ll.c papi-6.0.0~dfsg/src/examples/PAPI_mix_hl_ll.c --- papi-5.7.0+dfsg/src/examples/PAPI_mix_hl_ll.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/examples/PAPI_mix_hl_ll.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,78 @@ +/***************************************************************************** + * This example compares the measurement of IPC using the high-level API * + * and the low-level API. Both methods should deliver the same * + * result for IPC. * + * Note: There is no need to initialize PAPI for the low-level functions * + * since this is done by the high-level API. * + * * + * Hint: Use PAPI's high-level output script to print the measurement report * + * of the high-level API. * + * * + * ../high-level/scripts/papi_hl_output_writer.py --type=accumulate * + *****************************************************************************/ + + +#include +#include +#include "papi.h" + +#define THRESHOLD 10000 +#define ERROR_RETURN(retval) { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__); exit(retval); } + +int your_slow_code(); + +int main() +{ + float ipc; + int retval; + int EventSet = PAPI_NULL; + long_long values[2]; + + if ( (retval = PAPI_hl_region_begin("slow_code")) < PAPI_OK ) + ERROR_RETURN(retval); + + your_slow_code(); + + if ( (retval = PAPI_hl_region_end("slow_code")) < PAPI_OK ) + ERROR_RETURN(retval); + + if ( (retval = PAPI_hl_stop()) < PAPI_OK ) + ERROR_RETURN(retval); + + /* get IPC using low-level API */ + if ( (retval = PAPI_create_eventset(&EventSet)) < PAPI_OK ) + ERROR_RETURN(retval); + + if ( (retval = PAPI_add_event(EventSet, PAPI_TOT_INS)) < PAPI_OK ) + ERROR_RETURN(retval); + if ( (retval = PAPI_add_event(EventSet, PAPI_TOT_CYC)) < PAPI_OK ) + ERROR_RETURN(retval); + + if ( (retval = PAPI_start(EventSet)) < PAPI_OK ) + ERROR_RETURN(retval); + + your_slow_code(); + + if ( (retval = PAPI_stop(EventSet, values)) < PAPI_OK ) + ERROR_RETURN(retval); + + ipc = (float) ((float)values[0] / (float) ( values[1])); + + printf("Results from the low-level API:\n"); + printf("IPC: %f\n", ipc); + + exit(0); +} + +int your_slow_code() +{ + int i; + double tmp=1.1; + + for(i=1; i<2000; i++) + { + tmp=(tmp+100)/i; + } + return 0; +} + diff -Nru papi-5.7.0+dfsg/src/examples/PAPI_mix_hl_rate.c papi-6.0.0~dfsg/src/examples/PAPI_mix_hl_rate.c --- papi-5.7.0+dfsg/src/examples/PAPI_mix_hl_rate.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/examples/PAPI_mix_hl_rate.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,76 @@ +/***************************************************************************** + * This example compares the measurement of IPC using the rate function * + * PAPI_ipc and the high-level region instrumentation. Both methods should * + * deliver the same result for IPC. * + * Hint: Use PAPI's high-level output script to print the measurement report * + * of the high-level API. * + * * + * ../high-level/scripts/papi_hl_output_writer.py --type=accumulate * + *****************************************************************************/ + + +#include +#include +#include "papi.h" + +#define THRESHOLD 10000 +#define ERROR_RETURN(retval) { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__); exit(retval); } + +int your_slow_code(); + +int main() +{ + float real_time, proc_time,ipc; + long long ins; + int retval; + + if ( (retval = PAPI_ipc(&real_time, &proc_time, &ins ,&ipc)) < PAPI_OK ) + ERROR_RETURN(retval); + + your_slow_code(); + + if ( (retval = PAPI_ipc( &real_time, &proc_time, &ins, &ipc)) < PAPI_OK ) + ERROR_RETURN(retval); + + printf("Real_time: %f Proc_time: %f Instructions: %lld IPC: %f\n", + real_time, proc_time,ins,ipc); + + + if ( (retval = PAPI_hl_region_begin("slow_code")) < PAPI_OK ) + ERROR_RETURN(retval); + + your_slow_code(); + + if ( (retval = PAPI_hl_region_end("slow_code")) < PAPI_OK ) + ERROR_RETURN(retval); + + + if ( (retval = PAPI_ipc(&real_time, &proc_time, &ins ,&ipc)) < PAPI_OK ) + ERROR_RETURN(retval); + + your_slow_code(); + + if ( (retval = PAPI_ipc( &real_time, &proc_time, &ins, &ipc)) < PAPI_OK ) + ERROR_RETURN(retval); + + printf("Real_time: %f Proc_time: %f Instructions: %lld IPC: %f\n", + real_time, proc_time,ins,ipc); + + if ( (retval = PAPI_rate_stop()) < PAPI_OK ) + ERROR_RETURN(retval); + + exit(0); +} + +int your_slow_code() +{ + int i; + double tmp=1.1; + + for(i=1; i<2000; i++) + { + tmp=(tmp+100)/i; + } + return 0; +} + diff -Nru papi-5.7.0+dfsg/src/examples/PAPI_mix_ll_rate.c papi-6.0.0~dfsg/src/examples/PAPI_mix_ll_rate.c --- papi-5.7.0+dfsg/src/examples/PAPI_mix_ll_rate.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/examples/PAPI_mix_ll_rate.c 2020-03-04 15:56:58.000000000 +0000 @@ -0,0 +1,78 @@ +/***************************************************************************** + * This example compares the measurement of IPC using the rate function * + * PAPI_ipc and the low-level API. Both methods should deliver the same * + * result for IPC. * + * Note: There is no need to initialize PAPI for the low-level functions * + * since this is done by PAPI_ipc. * + *****************************************************************************/ + + +#include +#include +#include "papi.h" + +#define THRESHOLD 10000 +#define ERROR_RETURN(retval) { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__); exit(retval); } + +int your_slow_code(); + +int main() +{ + float real_time, proc_time, ipc; + long long ins; + int retval; + int EventSet = PAPI_NULL; + long_long values[2]; + + if ( (retval = PAPI_ipc(&real_time, &proc_time, &ins ,&ipc)) < PAPI_OK ) + ERROR_RETURN(retval); + + your_slow_code(); + + if ( (retval = PAPI_ipc( &real_time, &proc_time, &ins, &ipc)) < PAPI_OK ) + ERROR_RETURN(retval); + + printf("Results from PAPI_ipc:\n"); + printf("Real_time: %f Proc_time: %f Instructions: %lld IPC: %f\n", + real_time, proc_time,ins,ipc); + + if ( (retval = PAPI_rate_stop()) < PAPI_OK ) + ERROR_RETURN(retval); + + /* get IPC using low-level API */ + if ( (retval = PAPI_create_eventset(&EventSet)) < PAPI_OK ) + ERROR_RETURN(retval); + + if ( (retval = PAPI_add_event(EventSet, PAPI_TOT_INS)) < PAPI_OK ) + ERROR_RETURN(retval); + if ( (retval = PAPI_add_event(EventSet, PAPI_TOT_CYC)) < PAPI_OK ) + ERROR_RETURN(retval); + + if ( (retval = PAPI_start(EventSet)) < PAPI_OK ) + ERROR_RETURN(retval); + + your_slow_code(); + + if ( (retval = PAPI_stop(EventSet, values)) < PAPI_OK ) + ERROR_RETURN(retval); + + ipc = (float) ((float)values[0] / (float) ( values[1])); + + printf("Results from the low-level API:\n"); + printf("IPC: %f\n", ipc); + + exit(0); +} + +int your_slow_code() +{ + int i; + double tmp=1.1; + + for(i=1; i<2000; i++) + { + tmp=(tmp+100)/i; + } + return 0; +} + diff -Nru papi-5.7.0+dfsg/src/extras.c papi-6.0.0~dfsg/src/extras.c --- papi-5.7.0+dfsg/src/extras.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/extras.c 2020-03-04 15:56:58.000000000 +0000 @@ -252,7 +252,7 @@ if ( ESI->master != thread ) { PAPIERROR - ( "eventset->thread %#lx vs. current thread %#lx mismatch", + ( "eventset->thread %p vs. current thread %p mismatch", ESI->master, thread ); return ( PAPI_EBUG ); } diff -Nru papi-5.7.0+dfsg/src/ftests/flops.F papi-6.0.0~dfsg/src/ftests/flops.F --- papi-5.7.0+dfsg/src/ftests/flops.F 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/ftests/flops.F 1970-01-01 00:00:00.000000000 +0000 @@ -1,73 +0,0 @@ -C A simple example for the use of PAPI, the number of flops you should -C get is about INDEX^3 on machines that consider add and multiply one flop -C such as SGI, and 2*(INDEX^3) that don't consider it 1 flop such as INTEL -C -Kevin London - -#include "fpapi_test.h" - - program flops - implicit integer (p) - integer index - - PARAMETER(index=100) - REAL*4 matrixa(index,index),matrixb(index,index),mres(index,index) - REAL*4 proc_time, mflops, real_time - INTEGER*8 flpins - INTEGER i,j,k, retval - integer tests_quiet, get_quiet - external get_quiet - - tests_quiet = get_quiet() - - - retval = PAPI_VER_CURRENT - call PAPIf_library_init(retval) - if ( retval.NE.PAPI_VER_CURRENT) then - call ftest_fail(__FILE__, __LINE__, - . 'PAPI_library_init', retval) - end if - - call PAPIf_query_event(PAPI_FP_INS, retval) - if (retval .NE. PAPI_OK) then - call ftest_skip(__FILE__, __LINE__, 'PAPI_FP_INS', PAPI_ENOEVNT) - end if - -C Initialize the Matrix arrays - do i=1,index - do j=1,index - matrixa(i,j) = i+j - matrixb(i,j) = j-i - mres(i,j) = 0.0 - end do - end do - -C Setup PAPI library and begin collecting data from the counters - call PAPIf_flips( real_time, proc_time, flpins, mflops, retval ) - if ( retval.NE.PAPI_OK) then - call ftest_fail(__FILE__, __LINE__, 'PAPIf_flips', retval) - end if - -C Matrix-Matrix Multiply - do i=1,index - do j=1,index - do k=1,index - mres(i,j) = mres(i,j) + matrixa(i,k)*matrixb(k,j) - end do - end do - end do - -C Collect the data into the Variables passed in - call PAPIf_flips( real_time, proc_time, flpins, mflops, retval) - if ( retval.NE.PAPI_OK) then - call ftest_fail(__FILE__, __LINE__, 'PAPIf_flips', retval) - end if - if (tests_quiet .EQ. 0) then - print *, 'Real_time: ', real_time - print *, ' Proc_time: ', proc_time - print *, ' Total flpins: ', flpins - print *, ' MFLOPS: ', mflops - end if - call dummy(mres) - - call ftests_pass(__FILE__) - end diff -Nru papi-5.7.0+dfsg/src/ftests/fmatrixpapi2.F papi-6.0.0~dfsg/src/ftests/fmatrixpapi2.F --- papi-5.7.0+dfsg/src/ftests/fmatrixpapi2.F 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/ftests/fmatrixpapi2.F 1970-01-01 00:00:00.000000000 +0000 @@ -1,195 +0,0 @@ -C **************************************************************************** -C -C fmatrixpapi2.f -C An example of matrix-matrix multiplication and using PAPI high level to -C look at the performance. The example illustrates how PAPIF_read_counters -C and PAPIF_accum_counters can be used to selectively measure parts of a -C code without having to use the low-level interface. -C -C Derived from an example written by Kevin London March 2000 -C **************************************************************************** - -#include "fpapi_test.h" - - program fmatrixpapi - IMPLICIT integer (p) - - INTEGER ncols1,nrows1,ncols2,nrows2 - PARAMETER(nrows1=175,ncols1=225,nrows2=ncols1,ncols2=150) - INTEGER i,j,num_events,retval -C PAPI standardized event to be monitored - INTEGER event(2) -C PAPI values of the counters - INTEGER*8 values(2), dummies(2) - REAL*8 p(nrows1,ncols1),q(nrows2,ncols2), - & r(nrows1,ncols2) - integer tests_quiet, get_quiet - external get_quiet - - tests_quiet = get_quiet() - -C Setup default values - num_events=0 - -C Open matrix file number 1 for reading -C OPEN(UNIT=1,FILE='fmt1',STATUS='OLD') -C Open matrix file number 2 for reading -C OPEN(UNIT=2,FILE='fmt2',STATUS='OLD') - - retval = PAPI_VER_CURRENT - call PAPIf_library_init(retval) - if ( retval.NE.PAPI_VER_CURRENT) then - call ftest_fail(__FILE__, __LINE__, - . 'PAPI_library_init', retval) - end if - -C Total floating point operations - call PAPIf_query_event(PAPI_FP_INS, retval) - if (retval .NE. PAPI_OK) then - event(1) = PAPI_TOT_INS - else -C Total floating point operations - event(1) = PAPI_FP_INS - end if - -C Time used - event(2) = PAPI_TOT_CYC - -C See how many hardware events at one time are supported - call PAPIf_num_counters( num_events ) - if ( num_events .LT. 2 ) then - print *,'This example program requries the architecture ', - . 'to support 2 simultaneous hardware events...shutting down.' - stop - end if - - if (tests_quiet .EQ. 0) then - print *, 'Number of hardware counters supported: ', num_events - end if - -C matrix 1: read in the matrix values - do i=1, nrows1 - do j=1,ncols1 - p(i,j) = i*j*1.0 - end do - end do - -C matrix 2: read in the matrix values - do i=1, nrows2 - do j=1,ncols2 - q(i,j) = i*j*1.0 - end do - end do - -C Initialize the result matrix - do i=1,nrows1 - do j=1, ncols2 - r(i,j) = i*j*1.0 - end do - end do - -C Set up the counters - num_events = 2 - call PAPIf_start_counters( event, num_events, retval) - if ( retval .NE. PAPI_OK ) then - call ftest_fail(__FILE__, __LINE__, - *'PAPIf_start_counters', retval) - end if - -C We wish to count the events for this call - call Adding_MatMult(p,q,r,nrows1,ncols1,ncols2) - -C Read and clear the counter values - call PAPIf_read_counters(values, num_events,retval) - if ( retval .NE. PAPI_OK ) then - call ftest_fail(__FILE__, __LINE__, - *'PAPIf_read_counters', retval) - end if - - if (tests_quiet .EQ. 0) then - print * - if (event(1) .EQ. PAPI_TOT_INS) then - print *, 'TOT Instructions: ',values(1) - else - print *, 'FP Instructions: ',values(1) - end if - - print *, 'Cycles: ',values(2) - - if (event(1) .EQ. PAPI_FP_INS) then - write(*,'(a,f9.6)') ' Efficiency (flops/cycles):', - & real(values(1))/real(values(2)) - end if - end if - -C We don't wish to count the events for this call - call Adding_MatMult(p,q,r,nrows1,ncols1,ncols2) - -C Clear the counter values - call PAPIf_read_counters(dummies, num_events,retval) - if ( retval .NE. PAPI_OK ) then - call ftest_fail(__FILE__, __LINE__, - *'PAPIf_read_counters', retval) - end if - -C We wish to count the events for this call - call Adding_MatMult(p,q,r,nrows1,ncols1,ncols2) - -C Read the counter values - call PAPIf_accum_counters(values, num_events,retval) - if ( retval .NE. PAPI_OK ) then - call ftest_fail(__FILE__, __LINE__, - *'PAPIf_accum_counters', retval) - end if - -C Stop the counters and put the results in the array values - call PAPIf_stop_counters(dummies,num_events,retval) - if ( retval .NE. PAPI_OK ) then - call ftest_fail(__FILE__, __LINE__, - *'PAPIf_stop_counters', retval) - end if - - if (tests_quiet .EQ. 0) then - print * - if (event(1) .EQ. PAPI_TOT_INS) then - print *, 'TOT Instructions: ',values(1) - else - print *, 'FP Instructions: ',values(1) - end if - - print *, 'Cycles: ',values(2) - - if (event(1) .EQ. PAPI_FP_INS) then - write(*,'(a,f9.6)') ' Efficiency (flops/cycles):', - & real(values(1))/real(values(2)) - end if - - print * - print *,'----------------------------------------------------' - print *,'The second instruction and cycle counts should be' - print *,'approximately twice the first ones. The efficiency' - print *,'metric should be fairly equal between the cases.' - end if - - call ftests_pass(__FILE__) - end - - subroutine Adding_MatMult(p,q,r,ni,nk,nj) - implicit integer (p) - integer ni,nk,nj - real*8 p(ni,*),q(nk,*),r(ni,nj) - - integer i,j,k -C Compute the matrix-matrix multiplication - do i=1,ni - do j=1,nj - do k=1,nk - r(i,j)=r(i,j) + p(i,k)*q(k,j) - end do - end do - end do - -C Make sure the compiler does not optimize away the multiplication - call dummy(r) - - end diff -Nru papi-5.7.0+dfsg/src/ftests/fmatrixpapi.F papi-6.0.0~dfsg/src/ftests/fmatrixpapi.F --- papi-5.7.0+dfsg/src/ftests/fmatrixpapi.F 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/ftests/fmatrixpapi.F 1970-01-01 00:00:00.000000000 +0000 @@ -1,146 +0,0 @@ -C**************************************************************************** -C -C fmatrixpapi.f -C An example of matrix-matrix multiplication and using PAPI high level to -C look at the performance. written by Kevin London -C March 2000 -C**************************************************************************** - -#include "fpapi_test.h" - - program fmatrixpapi - IMPLICIT integer (p) - - INTEGER ncols1,nrows1,ncols2,nrows2 - PARAMETER(nrows1=175,ncols1=225,nrows2=ncols1,ncols2=150) - INTEGER i,j,k,num_events,retval -C PAPI standardized event to be monitored - INTEGER event(2) -C PAPI values of the counters - INTEGER*8 values(2) - REAL*8 p(nrows1,ncols1),q(nrows2,ncols2), - & r(nrows1,ncols2),tmp - integer tests_quiet, get_quiet - external get_quiet - - tests_quiet = get_quiet() - -C Setup default values - num_events=0 - -C Open matrix file number 1 for reading -C OPEN(UNIT=1,FILE='fmt1',STATUS='OLD') -C Open matrix file number 2 for reading -C OPEN(UNIT=2,FILE='fmt2',STATUS='OLD') - -C See how many hardware events at one time are supported -C This also initializes the PAPI library - call PAPIf_num_counters( num_events ) - if ( num_events .LT. 2 ) then - print *,'This example program requries the architecture to ', - . 'support 2 simultaneous hardware events...shutting down.' - call ftest_skip(__FILE__, __LINE__, - * 'too few counters', num_events) - end if - - if (tests_quiet .EQ. 0) then - print *, 'Number of hardware counters supported: ', num_events - end if - - call PAPIf_query_event(PAPI_FP_INS, retval) - if (retval .NE. PAPI_OK) then - event(1) = PAPI_TOT_INS - else -C Total floating point operations - event(1) = PAPI_FP_INS - end if - -C Time used - event(2) = PAPI_TOT_CYC - -C matrix 1: read in the matrix values - do i=1, nrows1 - do j=1,ncols1 - p(i,j) = i*j*1.0 - end do - end do - -C matrix 2: read in the matrix values - do i=1, nrows2 - do j=1,ncols2 - q(i,j) = i*j*1.0 - end do - end do - -C Initialize the result matrix - do i=1,nrows1 - do j=1, ncols2 - r(i,j) = i*j*1.0 - end do - end do - -C Set up the counters - num_events = 2 - call PAPIf_start_counters( event, num_events, retval) - if ( retval .NE. PAPI_OK ) then - call ftest_fail(__FILE__, __LINE__, - * 'PAPIf_start_counters', retval) - end if - -C Clear the counter values - call PAPIf_read_counters(values, num_events,retval) - if ( retval .NE. PAPI_OK ) then - call ftest_fail(__FILE__, __LINE__, - * 'PAPIf_read_counters', retval) - end if - -C Compute the matrix-matrix multiplication - do i=1,nrows1 - do j=1,ncols2 - do k=1,ncols1 - r(i,j)=r(i,j) + p(i,k)*q(k,j) - end do - end do - end do - -C Stop the counters and put the results in the array values - call PAPIf_stop_counters(values,num_events,retval) - if ( retval .NE. PAPI_OK ) then - call ftest_fail(__FILE__, __LINE__, - * 'PAPIf_stop_counters', retval) - end if - -C Make sure the compiler does not optimize away the multiplication - call dummy(r) - - if (tests_quiet .EQ. 0) then - - if (event(1) .EQ. PAPI_TOT_INS) then - print *, 'TOT Instructions: ',values(1) - else - print *, 'FP Instructions: ',values(1) - end if - - print *, 'Cycles: ',values(2) - - if (event(1) .EQ. PAPI_FP_INS) then - write(*,'(a,f9.6)') ' Efficiency (flops/cycles):', - & real(values(1))/real(values(2)) -C Compare measured FLOPS to expected value - tmp=2.0*real(nrows1)*real(ncols2)*real(ncols1) - if(abs(values(1)-tmp).gt.tmp*0.05)then -C Maybe we are counting FMAs? - tmp=tmp/2.0 - if(abs(values(1)-tmp).gt.tmp*0.05)then - print *,'Expected operation count:',2.0*tmp - print *,'Or possibly (using FMA): ',tmp - print *,'Instead I got: ',values(1) - call ftest_fail(__FILE__, __LINE__, - * 'Unexpected FLOP count (check vector operations)', 1) - end if - end if - end if - end if - - call ftests_pass(__FILE__) - end diff -Nru papi-5.7.0+dfsg/src/ftests/highlevel.F papi-6.0.0~dfsg/src/ftests/highlevel.F --- papi-5.7.0+dfsg/src/ftests/highlevel.F 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/ftests/highlevel.F 1970-01-01 00:00:00.000000000 +0000 @@ -1,85 +0,0 @@ -#include "fpapi_test.h" - - program highlevel - implicit integer (p) - - integer*8 values(10) - integer events(2) - integer eventnum - integer availcounters - integer retval - integer tests_quiet, get_quiet - external get_quiet - character*PAPI_MAX_STR_LEN name - integer last_char, n - external last_char - - tests_quiet = get_quiet() - - eventnum = 2 - call PAPIf_num_counters(availcounters) - if (eventnum .GT. availcounters) then - print *, "Not enough hardware counters!" - stop - end if - - retval = PAPI_VER_CURRENT - call PAPIf_library_init(retval) - if ( retval.NE.PAPI_VER_CURRENT) then - call ftest_fail(__FILE__, __LINE__, - . 'PAPI_library_init', retval) - end if - - call PAPIf_query_event(PAPI_FP_INS, retval) - if (retval .NE. PAPI_OK) then - events(1)=PAPI_TOT_INS - else - events(1)=PAPI_FP_INS - end if - events(2)=PAPI_TOT_CYC - - call PAPIf_start_counters(events, eventnum, retval) - if ( retval .NE. PAPI_OK ) then - call ftest_fail(__FILE__, __LINE__, - . 'PAPIf_start_counters', - *retval) - end if - - call fdo_flops(NUM_FLOPS) - - call PAPIf_read_counters(values(1), eventnum, retval) - if ( retval .NE. PAPI_OK ) then - call ftest_fail(__FILE__, __LINE__, - . 'PAPIf_read_counters', - *retval) - end if - - call fdo_flops(NUM_FLOPS) - - call PAPIf_stop_counters(values(3), eventnum, retval) - if ( retval .NE. PAPI_OK ) then - call ftest_fail(__FILE__, __LINE__, - . 'PAPIf_stop_counters', - *retval) - end if - - if (tests_quiet .EQ. 0) then - call PAPIf_event_code_to_name (events(1), name, retval) - if ( retval.NE.PAPI_OK) then - call ftest_fail(__FILE__, __LINE__, - * 'PAPIf_event_code_to_name', retval) - end if - n=last_char(name) - print *, "Test case highlevel: Test of high-level APIs." - print *, "---------------------------------------------", - * "---------------------" - write (*,100) "Test type", 1, 2 - write (*,100) name(1:n), values(1), values(3) - write (*,100) "PAPI_TOT_CYC", values(2), values(4) - 100 format(a15, ":", i12, i12) - print *, "---------------------------------------------", - * "---------------------" - end if - - call ftests_pass(__FILE__) - End diff -Nru papi-5.7.0+dfsg/src/ftests/Makefile.recipies papi-6.0.0~dfsg/src/ftests/Makefile.recipies --- papi-5.7.0+dfsg/src/ftests/Makefile.recipies 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/ftests/Makefile.recipies 2020-03-04 15:56:59.000000000 +0000 @@ -1,11 +1,15 @@ -ALL = strtest zero zeronamed first second tenth description fdmemtest accum highlevel cost \ - case1 case2 clockres eventname fmatrixlowpapi fmatrixpapi fmatrixpapi2 flops fmultiplex1 \ - johnmay2 fmultiplex2 avail openmp +ALL = strtest zero zeronamed first second tenth description fdmemtest accum cost \ + case1 case2 clockres eventname fmatrixlowpapi fmultiplex1 \ + johnmay2 fmultiplex2 avail openmp\ + serial_hl .PHONY : all default ftests ftest clean install all default ftests ftest: $(ALL) +serial_hl: serial_hl.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) serial_hl.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o serial_hl + clockres: clockres.F $(TESTLIB) $(PAPILIB) $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) clockres.F $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o clockres @@ -27,27 +31,15 @@ fmatrixlowpapi: fmatrixlowpapi.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) fmatrixlowpapi.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o fmatrixlowpapi -fmatrixpapi: fmatrixpapi.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) - $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) fmatrixpapi.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o fmatrixpapi - -fmatrixpapi2: fmatrixpapi2.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) - $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) fmatrixpapi2.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o fmatrixpapi2 - strtest: strtest.F $(TESTLIB) $(PAPILIB) $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) strtest.F $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o strtest -flops: flops.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) - $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) flops.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o flops - description: description.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) description.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o description accum: accum.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) accum.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) -o accum $(LDFLAGS) -highlevel: highlevel.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) - $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) highlevel.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) -o highlevel $(LDFLAGS) - openmp: openmp.F $(TESTLIB) $(PAPILIB) $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) openmp.F $(TESTLIB) $(PAPILIB) -o openmp $(LDFLAGS) $(OMPCFLGS) diff -Nru papi-5.7.0+dfsg/src/ftests/serial_hl.F papi-6.0.0~dfsg/src/ftests/serial_hl.F --- papi-5.7.0+dfsg/src/ftests/serial_hl.F 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/ftests/serial_hl.F 2020-03-04 15:56:59.000000000 +0000 @@ -0,0 +1,23 @@ +#include "fpapi.h" + + program flops + integer retval + integer i + + do i = 1, 4 + call PAPIf_hl_region_begin("main", retval) + if ( retval .NE. PAPI_OK ) then + write (*,*) "PAPIf_hl_region_begin failed!" + end if + + write (*,*) 'Round', i + call fdo_flops(NUM_FLOPS) + + call PAPIf_hl_region_end("main", retval) + if ( retval .NE. PAPI_OK ) then + write (*,*) "PAPIf_hl_region_end failed!" + end if + end do + + call ftests_hl_pass(__FILE__) + end program flops diff -Nru papi-5.7.0+dfsg/src/high-level/papi_hl.c papi-6.0.0~dfsg/src/high-level/papi_hl.c --- papi-5.7.0+dfsg/src/high-level/papi_hl.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/high-level/papi_hl.c 2020-03-04 15:56:59.000000000 +0000 @@ -0,0 +1,2015 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** +* @file papi_hl.c +* @author Frank Winkler +* frank.winkler@icl.utk.edu +* @author Philip Mucci +* mucci@cs.utk.edu +* @brief This file contains the 'high level' interface to PAPI. +* BASIC is a high level language. ;-) */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "papi.h" +#include "papi_internal.h" + + +/* For dynamic linking to libpapi */ +/* Weak symbol for pthread_once to avoid additional linking + * against libpthread when not used. */ +#pragma weak pthread_once + +#define verbose_fprintf \ + if (verbosity == 1) fprintf + +/* defaults for number of components and events */ +#define PAPIHL_NUM_OF_COMPONENTS 10 +#define PAPIHL_NUM_OF_EVENTS_PER_COMPONENT 10 + +#define PAPIHL_ACTIVE 1 +#define PAPIHL_DEACTIVATED 0 + +/* global components data begin *****************************************/ +typedef struct components +{ + int component_id; + int num_of_events; + int max_num_of_events; + char **event_names; + int *event_codes; + short *event_types; + int EventSet; //only for testing at initialization phase +} components_t; + +components_t *components = NULL; +int num_of_components = 0; +int max_num_of_components = PAPIHL_NUM_OF_COMPONENTS; +int total_num_events = 0; +int num_of_cleaned_threads = 0; + +/* global components data end *******************************************/ + + +/* thread local components data begin ***********************************/ +typedef struct local_components +{ + int EventSet; + /** Return values for the eventsets */ + long_long *values; +} local_components_t; + +THREAD_LOCAL_STORAGE_KEYWORD local_components_t *_local_components = NULL; +THREAD_LOCAL_STORAGE_KEYWORD long_long _local_cycles; +THREAD_LOCAL_STORAGE_KEYWORD volatile bool _local_state = PAPIHL_ACTIVE; +THREAD_LOCAL_STORAGE_KEYWORD int _local_region_begin_cnt = 0; /**< Count each PAPI_hl_region_begin call */ +THREAD_LOCAL_STORAGE_KEYWORD int _local_region_end_cnt = 0; /**< Count each PAPI_hl_region_end call */ + +/* thread local components data end *************************************/ + + +/* global event storage data begin **************************************/ +typedef struct reads +{ + struct reads *next; + struct reads *prev; + long_long value; /**< Event value */ +} reads_t; + +typedef struct +{ + long_long offset; /**< Event value for region_begin */ + long_long total; /**< Event value for region_end - region_begin + previous value */ + reads_t *read_values; /**< List of read event values inside a region */ +} value_t; + +typedef struct regions +{ + char *region; /**< Region name */ + struct regions *next; + struct regions *prev; + value_t values[]; /**< Array of event values based on current eventset */ +} regions_t; + +typedef struct +{ + unsigned long key; /**< Thread ID */ + regions_t *value; /**< List of regions */ +} threads_t; + +int compar(const void *l, const void *r) +{ + const threads_t *lm = l; + const threads_t *lr = r; + return lm->key - lr->key; +} + +typedef struct +{ + void *root; /**< Root of binary tree */ + threads_t *find_p; /**< Pointer that is used for finding a thread node */ +} binary_tree_t; + +/**< Global binary tree that stores events from all threads */ +binary_tree_t* binary_tree = NULL; + +/* global event storage data end ****************************************/ + + +/* global auxiliary variables begin *************************************/ +enum region_type { REGION_BEGIN, REGION_READ, REGION_END }; + +char **requested_event_names = NULL; /**< Events from user or default */ +int num_of_requested_events = 0; + +bool hl_initiated = false; /**< Check PAPI-HL has been initiated */ +bool hl_finalized = false; /**< Check PAPI-HL has been fininalized */ +bool events_determined = false; /**< Check if events are determined */ +bool output_generated = false; /**< Check if output has been already generated */ +static char *absolute_output_file_path = NULL; +static int output_counter = 0; /**< Count each output generation. Not used yet */ +short verbosity = 0; /**< Verbose output is off by default */ +bool state = PAPIHL_ACTIVE; /**< PAPIHL is active until first error or finalization */ +static int region_begin_cnt = 0; /**< Count each PAPI_hl_region_begin call */ +static int region_end_cnt = 0; /**< Count each PAPI_hl_region_end call */ +unsigned long master_thread_id = -1; /**< Remember id of master thread */ + +/* global auxiliary variables end ***************************************/ + +static void _internal_hl_library_init(void); +static void _internal_hl_onetime_library_init(void); + +/* functions for creating eventsets for different components */ +static int _internal_hl_checkCounter ( char* counter ); +static int _internal_hl_determine_rank(); +static char *_internal_hl_remove_spaces( char *str ); +static int _internal_hl_determine_default_events(); +static int _internal_hl_read_user_events(); +static int _internal_hl_new_component(int component_id, components_t *component); +static int _internal_hl_add_event_to_component(char *event_name, int event, + short event_type, components_t *component); +static int _internal_hl_create_components(); +static int _internal_hl_read_events(const char* events); +static int _internal_hl_create_event_sets(); +static int _internal_hl_start_counters(); + +/* functions for storing events */ +static inline reads_t* _internal_hl_insert_read_node( reads_t** head_node ); +static inline int _internal_hl_add_values_to_region( regions_t *node, enum region_type reg_typ ); +static inline regions_t* _internal_hl_insert_region_node( regions_t** head_node, const char *region ); +static inline regions_t* _internal_hl_find_region_node( regions_t* head_node, const char *region ); +static inline threads_t* _internal_hl_insert_thread_node( unsigned long tid ); +static inline threads_t* _internal_hl_find_thread_node( unsigned long tid ); +static int _internal_hl_store_counters( unsigned long tid, const char *region, + enum region_type reg_typ ); +static int _internal_hl_read_counters(); +static int _internal_hl_read_and_store_counters( const char *region, enum region_type reg_typ ); +static int _internal_hl_create_global_binary_tree(); + +/* functions for output generation */ +static int _internal_hl_mkdir(const char *dir); +static int _internal_hl_determine_output_path(); +static void _internal_hl_json_line_break_and_indent(FILE* f, bool b, int width); +static void _internal_hl_json_region_events(FILE* f, bool beautifier, regions_t *regions); +static void _internal_hl_json_regions(FILE* f, bool beautifier, threads_t* thread_node); +static void _internal_hl_json_threads(FILE* f, bool beautifier, unsigned long* tids, int threads_num); +static void _internal_hl_write_output(); + +/* functions for cleaning up heap memory */ +static void _internal_hl_clean_up_local_data(); +static void _internal_hl_clean_up_global_data(); +static void _internal_hl_clean_up_all(bool deactivate); +static int _internal_hl_check_for_clean_thread_states(); + +/* internal advanced functions */ +int _internal_PAPI_hl_init(); /**< intialize high level library */ +int _internal_PAPI_hl_cleanup_thread(); /**< clean local-thread event sets */ +int _internal_PAPI_hl_finalize(); /**< shutdown event sets and clear up everything */ +int _internal_PAPI_hl_set_events(const char* events); /**< set specfic events to be recorded */ +void _internal_PAPI_hl_print_output(); /**< generate output */ + + +static void _internal_hl_library_init(void) +{ + /* This function is only called by one thread! */ + int retval; + + /* check VERBOSE level */ + if ( getenv("PAPI_HL_VERBOSE") != NULL ) { + verbosity = 1; + } + + if ( ( retval = PAPI_library_init(PAPI_VER_CURRENT) ) != PAPI_VER_CURRENT ) + verbose_fprintf(stdout, "PAPI-HL Error: PAPI_library_init failed!\n"); + + /* PAPI_thread_init only suceeds if PAPI_library_init has suceeded */ + if ((retval = PAPI_thread_init(&pthread_self)) == PAPI_OK) { + + /* determine output directory and output file */ + if ( ( retval = _internal_hl_determine_output_path() ) != PAPI_OK ) { + verbose_fprintf(stdout, "PAPI-HL Error: _internal_hl_determine_output_path failed!\n"); + state = PAPIHL_DEACTIVATED; + verbose_fprintf(stdout, "PAPI-HL Error: PAPI could not be initiated!\n"); + } else { + + /* register the termination function for output */ + atexit(_internal_PAPI_hl_print_output); + verbose_fprintf(stdout, "PAPI-HL Info: PAPI has been initiated!\n"); + + /* remember thread id */ + master_thread_id = PAPI_thread_id(); + HLDBG("master_thread_id=%lu\n", master_thread_id); + } + + /* Support multiplexing if user wants to */ + if ( getenv("PAPI_MULTIPLEX") != NULL ) { + retval = PAPI_multiplex_init(); + if ( retval == PAPI_ENOSUPP) { + verbose_fprintf(stdout, "PAPI-HL Info: Multiplex is not supported!\n"); + } else if ( retval != PAPI_OK ) { + verbose_fprintf(stdout, "PAPI-HL Error: PAPI_multiplex_init failed!\n"); + } else if ( retval == PAPI_OK ) { + verbose_fprintf(stdout, "PAPI-HL Info: Multiplex has been initiated!\n"); + } + } + + } else { + verbose_fprintf(stdout, "PAPI-HL Error: PAPI_thread_init failed!\n"); + state = PAPIHL_DEACTIVATED; + verbose_fprintf(stdout, "PAPI-HL Error: PAPI could not be initiated!\n"); + } + + hl_initiated = true; +} + +static void _internal_hl_onetime_library_init(void) +{ + static pthread_once_t library_is_initialized = PTHREAD_ONCE_INIT; + if ( pthread_once ) { + /* we assume that PAPI_hl_init() is called from a parallel region */ + pthread_once(&library_is_initialized, _internal_hl_library_init); + /* wait until first thread has finished */ + int i = 0; + /* give it 5 seconds in case PAPI_thread_init crashes */ + while ( !hl_initiated && (i++) < 500000 ) + usleep(10); + } else { + /* we assume that PAPI_hl_init() is called from a serial application + * that was not linked against libpthread */ + _internal_hl_library_init(); + } +} + +static int +_internal_hl_checkCounter ( char* counter ) +{ + int EventSet = PAPI_NULL; + int eventcode; + int retval; + + HLDBG("Counter: %s\n", counter); + if ( ( retval = PAPI_create_eventset( &EventSet ) ) != PAPI_OK ) + return ( retval ); + + if ( ( retval = PAPI_event_name_to_code( counter, &eventcode ) ) != PAPI_OK ) { + HLDBG("Counter %s does not exist\n", counter); + return ( retval ); + } + + if ( ( retval = PAPI_add_event (EventSet, eventcode) ) != PAPI_OK ) { + HLDBG("Cannot add counter %s\n", counter); + return ( retval ); + } + + if ( ( retval = PAPI_cleanup_eventset (EventSet) ) != PAPI_OK ) + return ( retval ); + + if ( ( retval = PAPI_destroy_eventset (&EventSet) ) != PAPI_OK ) + return ( retval ); + + return ( PAPI_OK ); +} + +static int _internal_hl_determine_rank() +{ + int rank = -1; + /* check environment variables for rank identification */ + + if ( getenv("OMPI_COMM_WORLD_RANK") != NULL ) + rank = atoi(getenv("OMPI_COMM_WORLD_RANK")); + else if ( getenv("ALPS_APP_PE") != NULL ) + rank = atoi(getenv("ALPS_APP_PE")); + else if ( getenv("PMI_RANK") != NULL ) + rank = atoi(getenv("PMI_RANK")); + else if ( getenv("SLURM_PROCID") != NULL ) + rank = atoi(getenv("SLURM_PROCID")); + + return rank; +} + +static char *_internal_hl_remove_spaces( char *str ) +{ + char *out = str, *put = str; + for(; *str != '\0'; ++str) { + if(*str != ' ') + *put++ = *str; + } + *put = '\0'; + return out; +} + +static int _internal_hl_determine_default_events() +{ + int i; + HLDBG("Default events\n"); + char *default_events[] = { + "perf::TASK-CLOCK", + "PAPI_TOT_INS", + "PAPI_TOT_CYC", + "PAPI_FP_INS", + "PAPI_FP_OPS" + }; + int num_of_defaults = sizeof(default_events) / sizeof(char*); + + /* allocate memory for requested events */ + requested_event_names = (char**)malloc(num_of_defaults * sizeof(char*)); + if ( requested_event_names == NULL ) + return ( PAPI_ENOMEM ); + + /* check if default events are available on the current machine */ + for ( i = 0; i < num_of_defaults; i++ ) { + if ( _internal_hl_checkCounter( default_events[i] ) == PAPI_OK ) { + requested_event_names[num_of_requested_events++] = strdup(default_events[i]); + if ( requested_event_names[num_of_requested_events -1] == NULL ) + return ( PAPI_ENOMEM ); + } + else { + /* if PAPI_FP_OPS is not available try PAPI_SP_OPS or PAPI_DP_OPS */ + if ( strcmp(default_events[i], "PAPI_FP_OPS") == 0 ) { + if ( _internal_hl_checkCounter( "PAPI_SP_OPS" ) == PAPI_OK ) + requested_event_names[num_of_requested_events++] = strdup("PAPI_SP_OPS"); + else if ( _internal_hl_checkCounter( "PAPI_DP_OPS" ) == PAPI_OK ) + requested_event_names[num_of_requested_events++] = strdup("PAPI_DP_OPS"); + } + + /* if PAPI_FP_INS is not available try PAPI_VEC_SP or PAPI_VEC_DP */ + if ( strcmp(default_events[i], "PAPI_FP_INS") == 0 ) { + if ( _internal_hl_checkCounter( "PAPI_VEC_SP" ) == PAPI_OK ) + requested_event_names[num_of_requested_events++] = strdup("PAPI_VEC_SP"); + else if ( _internal_hl_checkCounter( "PAPI_VEC_DP" ) == PAPI_OK ) + requested_event_names[num_of_requested_events++] = strdup("PAPI_VEC_DP"); + } + } + } + + return ( PAPI_OK ); +} + +static int _internal_hl_read_user_events(const char *user_events) +{ + char* user_events_copy; + const char *separator; //separator for events + int num_of_req_events = 1; //number of events in string + int req_event_index = 0; //index of event + const char *position = NULL; //current position in processed string + char *token; + + HLDBG("User events: %s\n", user_events); + user_events_copy = strdup(user_events); + if ( user_events_copy == NULL ) + return ( PAPI_ENOMEM ); + + /* check if string is not empty */ + if ( strlen( user_events_copy ) > 0 ) + { + /* count number of separator characters */ + position = user_events_copy; + separator=","; + while ( *position ) { + if ( strchr( separator, *position ) ) { + num_of_req_events++; + } + position++; + } + + /* allocate memory for requested events */ + requested_event_names = (char**)malloc(num_of_req_events * sizeof(char*)); + if ( requested_event_names == NULL ) { + free(user_events_copy); + return ( PAPI_ENOMEM ); + } + + /* parse list of event names */ + token = strtok( user_events_copy, separator ); + while ( token ) { + if ( req_event_index >= num_of_req_events ){ + /* more entries as in the first run */ + free(user_events_copy); + return PAPI_EINVAL; + } + requested_event_names[req_event_index] = strdup(_internal_hl_remove_spaces(token)); + if ( requested_event_names[req_event_index] == NULL ) { + free(user_events_copy); + return ( PAPI_ENOMEM ); + } + token = strtok( NULL, separator ); + req_event_index++; + } + } + + num_of_requested_events = num_of_req_events; + free(user_events_copy); + if ( num_of_requested_events == 0 ) + return PAPI_EINVAL; + + HLDBG("Number of requested events: %d\n", num_of_requested_events); + return ( PAPI_OK ); +} + +static int _internal_hl_new_component(int component_id, components_t *component) +{ + int retval; + + /* create new EventSet */ + component->EventSet = PAPI_NULL; + if ( ( retval = PAPI_create_eventset( &component->EventSet ) ) != PAPI_OK ) { + verbose_fprintf(stdout, "PAPI-HL Error: Cannot create EventSet for component %d.\n", component_id); + return ( retval ); + } + + /* Support multiplexing if user wants to */ + if ( getenv("PAPI_MULTIPLEX") != NULL ) { + + /* multiplex only for cpu core events */ + if ( component_id == 0 ) { + retval = PAPI_assign_eventset_component(component->EventSet, component_id); + if ( retval != PAPI_OK ) { + verbose_fprintf(stdout, "PAPI-HL Error: PAPI_assign_eventset_component failed.\n"); + } else { + if ( PAPI_get_multiplex(component->EventSet) == false ) { + retval = PAPI_set_multiplex(component->EventSet); + if ( retval != PAPI_OK ) { + verbose_fprintf(stdout, "PAPI-HL Error: PAPI_set_multiplex failed.\n"); + } + } + } + } + } + + component->component_id = component_id; + component->num_of_events = 0; + component->max_num_of_events = PAPIHL_NUM_OF_EVENTS_PER_COMPONENT; + + component->event_names = NULL; + component->event_names = (char**)malloc(component->max_num_of_events * sizeof(char*)); + if ( component->event_names == NULL ) + return ( PAPI_ENOMEM ); + + component->event_codes = NULL; + component->event_codes = (int*)malloc(component->max_num_of_events * sizeof(int)); + if ( component->event_codes == NULL ) + return ( PAPI_ENOMEM ); + + component->event_types = NULL; + component->event_types = (short*)malloc(component->max_num_of_events * sizeof(short)); + if ( component->event_types == NULL ) + return ( PAPI_ENOMEM ); + + num_of_components += 1; + return ( PAPI_OK ); +} + +static int _internal_hl_add_event_to_component(char *event_name, int event, + short event_type, components_t *component) +{ + int i, retval; + + /* check if we need to reallocate memory for event_names, event_codes and event_types */ + if ( component->num_of_events == component->max_num_of_events ) { + component->max_num_of_events *= 2; + + component->event_names = (char**)realloc(component->event_names, component->max_num_of_events * sizeof(char*)); + if ( component->event_names == NULL ) + return ( PAPI_ENOMEM ); + + component->event_codes = (int*)realloc(component->event_codes, component->max_num_of_events * sizeof(int)); + if ( component->event_codes == NULL ) + return ( PAPI_ENOMEM ); + + component->event_types = (short*)realloc(component->event_types, component->max_num_of_events * sizeof(short)); + if ( component->event_types == NULL ) + return ( PAPI_ENOMEM ); + } + + retval = PAPI_add_event( component->EventSet, event ); + if ( retval != PAPI_OK ) { + const PAPI_component_info_t* cmpinfo; + cmpinfo = PAPI_get_component_info( component->component_id ); + verbose_fprintf(stdout, "PAPI-HL Warning: Cannot add %s to component %s.\n", event_name, cmpinfo->name); + verbose_fprintf(stdout, "The following event combination is not supported:\n"); + for ( i = 0; i < component->num_of_events; i++ ) + verbose_fprintf(stdout, " %s\n", component->event_names[i]); + verbose_fprintf(stdout, " %s\n", event_name); + verbose_fprintf(stdout, "Advice: Use papi_event_chooser to obtain an appropriate event set for this component or set PAPI_MULTIPLEX=1.\n"); + + return PAPI_EINVAL; + } + + component->event_names[component->num_of_events] = event_name; + component->event_codes[component->num_of_events] = event; + component->event_types[component->num_of_events] = event_type; + component->num_of_events += 1; + + total_num_events += 1; + + return PAPI_OK; +} + +static int _internal_hl_create_components() +{ + int i, j, retval, event; + int component_id = -1; + int comp_index = 0; + bool component_exists = false; + short event_type = 0; + + HLDBG("Create components\n"); + components = (components_t*)malloc(max_num_of_components * sizeof(components_t)); + if ( components == NULL ) + return ( PAPI_ENOMEM ); + + for ( i = 0; i < num_of_requested_events; i++ ) { + /* check if requested event contains event type (instant or delta) */ + const char sep = '='; + char *ret; + int index; + /* search for '=' in event name */ + ret = strchr(requested_event_names[i], sep); + if (ret) { + if ( strcmp(ret, "=instant") == 0 ) + event_type = 1; + else + event_type = 0; + + /* get index of '=' in event name */ + index = (int)(ret - requested_event_names[i]); + /* remove event type from string if '=instant' or '=delta' */ + if ( (strcmp(ret, "=instant") == 0) || (strcmp(ret, "=delta") == 0) ) + requested_event_names[i][index] = '\0'; + } + + /* check if event is supported on current machine */ + retval = _internal_hl_checkCounter(requested_event_names[i]); + if ( retval != PAPI_OK ) { + verbose_fprintf(stdout, "PAPI-HL Warning: \"%s\" does not exist or is not supported on this machine.\n", requested_event_names[i]); + } else { + /* determine event code and corresponding component id */ + retval = PAPI_event_name_to_code( requested_event_names[i], &event ); + if ( retval != PAPI_OK ) + return ( retval ); + component_id = PAPI_COMPONENT_INDEX( event ); + + /* check if component_id already exists in global components structure */ + for ( j = 0; j < num_of_components; j++ ) { + if ( components[j].component_id == component_id ) { + component_exists = true; + comp_index = j; + break; + } + else { + component_exists = false; + } + } + + /* create new component */ + if ( false == component_exists ) { + /* check if we need to reallocate memory for components */ + if ( num_of_components == max_num_of_components ) { + max_num_of_components *= 2; + components = (components_t*)realloc(components, max_num_of_components * sizeof(components_t)); + if ( components == NULL ) + return ( PAPI_ENOMEM ); + } + comp_index = num_of_components; + retval = _internal_hl_new_component(component_id, &components[comp_index]); + if ( retval != PAPI_OK ) + return ( retval ); + } + + /* add event to current component */ + retval = _internal_hl_add_event_to_component(requested_event_names[i], event, event_type, &components[comp_index]); + if ( retval == PAPI_ENOMEM ) + return ( retval ); + } + } + + HLDBG("Number of components %d\n", num_of_components); + if ( num_of_components > 0 ) + verbose_fprintf(stdout, "PAPI-HL Info: Using the following events:\n"); + + /* destroy all EventSets from global data */ + for ( i = 0; i < num_of_components; i++ ) { + if ( ( retval = PAPI_cleanup_eventset (components[i].EventSet) ) != PAPI_OK ) + return ( retval ); + if ( ( retval = PAPI_destroy_eventset (&components[i].EventSet) ) != PAPI_OK ) + return ( retval ); + components[i].EventSet = PAPI_NULL; + + HLDBG("component_id = %d\n", components[i].component_id); + HLDBG("num_of_events = %d\n", components[i].num_of_events); + for ( j = 0; j < components[i].num_of_events; j++ ) { + HLDBG(" %s type=%d\n", components[i].event_names[j], components[i].event_types[j]); + verbose_fprintf(stdout, " %s\n", components[i].event_names[j]); + } + } + + if ( num_of_components == 0 ) + return PAPI_EINVAL; + + return PAPI_OK; +} + +static int _internal_hl_read_events(const char* events) +{ + int i, retval; + HLDBG("Read events: %s\n", events); + if ( events != NULL ) { + if ( _internal_hl_read_user_events(events) != PAPI_OK ) + if ( ( retval = _internal_hl_determine_default_events() ) != PAPI_OK ) + return ( retval ); + + /* check if user specified events via environment variable */ + } else if ( getenv("PAPI_EVENTS") != NULL ) { + char *user_events_from_env = strdup( getenv("PAPI_EVENTS") ); + if ( user_events_from_env == NULL ) + return ( PAPI_ENOMEM ); + /* if string is emtpy use default events */ + if ( strlen( user_events_from_env ) == 0 ) { + if ( ( retval = _internal_hl_determine_default_events() ) != PAPI_OK ) { + free(user_events_from_env); + return ( retval ); + } + } + else if ( _internal_hl_read_user_events(user_events_from_env) != PAPI_OK ) + if ( ( retval = _internal_hl_determine_default_events() ) != PAPI_OK ) { + free(user_events_from_env); + return ( retval ); + } + free(user_events_from_env); + } else { + if ( ( retval = _internal_hl_determine_default_events() ) != PAPI_OK ) + return ( retval ); + } + + /* create components based on requested events */ + if ( _internal_hl_create_components() != PAPI_OK ) + { + /* requested events do not work at all, use default events */ + verbose_fprintf(stdout, "PAPI-HL Warning: All requested events do not work, using default.\n"); + + for ( i = 0; i < num_of_requested_events; i++ ) + free(requested_event_names[i]); + free(requested_event_names); + num_of_requested_events = 0; + if ( ( retval = _internal_hl_determine_default_events() ) != PAPI_OK ) + return ( retval ); + if ( ( retval = _internal_hl_create_components() ) != PAPI_OK ) + return ( retval ); + } + + events_determined = true; + return ( PAPI_OK ); +} + +static int _internal_hl_create_event_sets() +{ + int i, j, retval; + + if ( state == PAPIHL_ACTIVE ) { + /* allocate memory for local components */ + _local_components = (local_components_t*)malloc(num_of_components * sizeof(local_components_t)); + if ( _local_components == NULL ) + return ( PAPI_ENOMEM ); + + for ( i = 0; i < num_of_components; i++ ) { + /* create EventSet */ + _local_components[i].EventSet = PAPI_NULL; + if ( ( retval = PAPI_create_eventset( &_local_components[i].EventSet ) ) != PAPI_OK ) { + return (retval ); + } + + /* Support multiplexing if user wants to */ + if ( getenv("PAPI_MULTIPLEX") != NULL ) { + + /* multiplex only for cpu core events */ + if ( components[i].component_id == 0 ) { + retval = PAPI_assign_eventset_component(_local_components[i].EventSet, components[i].component_id ); + if ( retval != PAPI_OK ) { + verbose_fprintf(stdout, "PAPI-HL Error: PAPI_assign_eventset_component failed.\n"); + } else { + if ( PAPI_get_multiplex(_local_components[i].EventSet) == false ) { + retval = PAPI_set_multiplex(_local_components[i].EventSet); + if ( retval != PAPI_OK ) { + verbose_fprintf(stdout, "PAPI-HL Error: PAPI_set_multiplex failed.\n"); + } + } + } + } + } + + /* add event to current EventSet */ + for ( j = 0; j < components[i].num_of_events; j++ ) { + retval = PAPI_add_event( _local_components[i].EventSet, components[i].event_codes[j] ); + if ( retval != PAPI_OK ) { + return (retval ); + } + } + /* allocate memory for return values */ + _local_components[i].values = (long_long*)malloc(components[i].num_of_events * sizeof(long_long)); + if ( _local_components[i].values == NULL ) + return ( PAPI_ENOMEM ); + + } + return PAPI_OK; + } + return ( PAPI_EMISC ); +} + +static int _internal_hl_start_counters() +{ + int i, retval; + long_long cycles; + + if ( state == PAPIHL_ACTIVE ) { + for ( i = 0; i < num_of_components; i++ ) { + if ( ( retval = PAPI_start( _local_components[i].EventSet ) ) != PAPI_OK ) + return (retval ); + + /* warm up PAPI code paths and data structures */ + if ( ( retval = PAPI_read_ts( _local_components[i].EventSet, _local_components[i].values, &cycles ) != PAPI_OK ) ) { + return (retval ); + } + } + _papi_hl_events_running = 1; + return PAPI_OK; + } + return ( PAPI_EMISC ); +} + +static inline reads_t* _internal_hl_insert_read_node(reads_t** head_node) +{ + reads_t *new_node; + + /* create new region node */ + if ( ( new_node = malloc(sizeof(reads_t)) ) == NULL ) + return ( NULL ); + new_node->next = NULL; + new_node->prev = NULL; + + /* insert node in list */ + if ( *head_node == NULL ) { + *head_node = new_node; + return new_node; + } + (*head_node)->prev = new_node; + new_node->next = *head_node; + *head_node = new_node; + + return new_node; +} + +static inline int _internal_hl_add_values_to_region( regions_t *node, enum region_type reg_typ ) +{ + int i, j; + int region_count = 1; + int cmp_iter = 2; + + if ( reg_typ == REGION_BEGIN ) { + /* set first fixed counters */ + node->values[0].offset = region_count; + node->values[1].offset = _local_cycles; + /* events from components */ + for ( i = 0; i < num_of_components; i++ ) + for ( j = 0; j < components[i].num_of_events; j++ ) + node->values[cmp_iter++].offset = _local_components[i].values[j]; + } else if ( reg_typ == REGION_READ ) { + /* create a new read node and add values*/ + reads_t* read_node; + if ( ( read_node = _internal_hl_insert_read_node(&node->values[1].read_values) ) == NULL ) + return ( PAPI_ENOMEM ); + read_node->value = _local_cycles - node->values[1].offset; + for ( i = 0; i < num_of_components; i++ ) { + for ( j = 0; j < components[i].num_of_events; j++ ) { + reads_t* read_node; + if ( ( read_node = _internal_hl_insert_read_node(&node->values[cmp_iter].read_values) ) == NULL ) + return ( PAPI_ENOMEM ); + if ( components[i].event_types[j] == 1 ) + read_node->value = _local_components[i].values[j]; + else + read_node->value = _local_components[i].values[j] - node->values[cmp_iter].offset; + cmp_iter++; + } + } + } else if ( reg_typ == REGION_END ) { + /* determine difference of current value and offset and add + previous total value */ + node->values[0].total += node->values[0].offset; + node->values[1].total += _local_cycles - node->values[1].offset; + /* events from components */ + for ( i = 0; i < num_of_components; i++ ) + for ( j = 0; j < components[i].num_of_events; j++ ) { + /* if event type is istant only save last value */ + if ( components[i].event_types[j] == 1 ) + node->values[cmp_iter].total += _local_components[i].values[j]; + else + node->values[cmp_iter].total += _local_components[i].values[j] - node->values[cmp_iter].offset; + cmp_iter++; + } + } + return ( PAPI_OK ); +} + + +static inline regions_t* _internal_hl_insert_region_node(regions_t** head_node, const char *region ) +{ + regions_t *new_node; + int i; + int extended_total_num_events; + + /* number of all events including region count and CPU cycles */ + extended_total_num_events = total_num_events + 2; + + /* create new region node */ + new_node = malloc(sizeof(regions_t) + extended_total_num_events * sizeof(value_t)); + if ( new_node == NULL ) + return ( NULL ); + new_node->region = (char *)malloc((strlen(region) + 1) * sizeof(char)); + if ( new_node->region == NULL ) { + free(new_node); + return ( NULL ); + } + + new_node->next = NULL; + new_node->prev = NULL; + strcpy(new_node->region, region); + for ( i = 0; i < extended_total_num_events; i++ ) { + new_node->values[i].total = 0; + new_node->values[i].read_values = NULL; + } + + /* insert node in list */ + if ( *head_node == NULL ) { + *head_node = new_node; + return new_node; + } + (*head_node)->prev = new_node; + new_node->next = *head_node; + *head_node = new_node; + + return new_node; +} + + +static inline regions_t* _internal_hl_find_region_node(regions_t* head_node, const char *region ) +{ + regions_t* find_node = head_node; + while ( find_node != NULL ) { + if ( strcmp(find_node->region, region) == 0 ) { + return find_node; + } + find_node = find_node->next; + } + find_node = NULL; + return find_node; +} + +static inline threads_t* _internal_hl_insert_thread_node(unsigned long tid) +{ + threads_t *new_node = (threads_t*)malloc(sizeof(threads_t)); + if ( new_node == NULL ) + return ( NULL ); + new_node->key = tid; + new_node->value = NULL; /* head node of region list */ + tsearch(new_node, &binary_tree->root, compar); + return new_node; +} + +static inline threads_t* _internal_hl_find_thread_node(unsigned long tid) +{ + threads_t *find_node = binary_tree->find_p; + find_node->key = tid; + void *found = tfind(find_node, &binary_tree->root, compar); + if ( found != NULL ) { + find_node = (*(threads_t**)found); + return find_node; + } + return NULL; +} + + +static int _internal_hl_store_counters( unsigned long tid, const char *region, + enum region_type reg_typ ) +{ + int retval; + + _papi_hwi_lock( HIGHLEVEL_LOCK ); + threads_t* current_thread_node; + + /* check if current thread is already stored in tree */ + current_thread_node = _internal_hl_find_thread_node(tid); + if ( current_thread_node == NULL ) { + /* insert new node for current thread in tree if type is REGION_BEGIN */ + if ( reg_typ == REGION_BEGIN ) { + if ( ( current_thread_node = _internal_hl_insert_thread_node(tid) ) == NULL ) { + _papi_hwi_unlock( HIGHLEVEL_LOCK ); + return ( PAPI_ENOMEM ); + } + } else { + _papi_hwi_unlock( HIGHLEVEL_LOCK ); + return ( PAPI_EINVAL ); + } + } + + regions_t* current_region_node; + /* check if node for current region already exists */ + current_region_node = _internal_hl_find_region_node(current_thread_node->value, region); + + if ( current_region_node == NULL ) { + /* create new node for current region in list if type is REGION_BEGIN */ + if ( reg_typ == REGION_BEGIN ) { + if ( ( current_region_node = _internal_hl_insert_region_node(¤t_thread_node->value,region) ) == NULL ) { + _papi_hwi_unlock( HIGHLEVEL_LOCK ); + return ( PAPI_ENOMEM ); + } + } else { + /* ignore no matching REGION_READ */ + if ( reg_typ == REGION_READ ) { + verbose_fprintf(stdout, "PAPI-HL Warning: Cannot find matching region for PAPI_hl_read(\"%s\") for thread id=%lu.\n", region, PAPI_thread_id()); + retval = PAPI_OK; + } else { + verbose_fprintf(stdout, "PAPI-HL Warning: Cannot find matching region for PAPI_hl_region_end(\"%s\") for thread id=%lu.\n", region, PAPI_thread_id()); + retval = PAPI_EINVAL; + } + _papi_hwi_unlock( HIGHLEVEL_LOCK ); + return ( retval ); + } + } + + /* add recorded values to current region */ + if ( ( retval = _internal_hl_add_values_to_region( current_region_node, reg_typ ) ) != PAPI_OK ) { + _papi_hwi_unlock( HIGHLEVEL_LOCK ); + return ( retval ); + } + + /* count all REGION_BEGIN and REGION_END calls */ + if ( reg_typ == REGION_BEGIN ) region_begin_cnt++; + if ( reg_typ == REGION_END ) region_end_cnt++; + + _papi_hwi_unlock( HIGHLEVEL_LOCK ); + return ( PAPI_OK ); +} + + +static int _internal_hl_read_counters() +{ + int i, j, retval; + for ( i = 0; i < num_of_components; i++ ) { + if ( i < ( num_of_components - 1 ) ) { + retval = PAPI_read( _local_components[i].EventSet, _local_components[i].values); + } else { + /* get cycles for last component */ + retval = PAPI_read_ts( _local_components[i].EventSet, _local_components[i].values, &_local_cycles ); + } + HLDBG("Thread-ID:%lu, Component-ID:%d\n", PAPI_thread_id(), components[i].component_id); + for ( j = 0; j < components[i].num_of_events; j++ ) { + HLDBG("Thread-ID:%lu, %s:%lld\n", PAPI_thread_id(), components[i].event_names[j], _local_components[i].values[j]); + } + + if ( retval != PAPI_OK ) + return ( retval ); + } + return ( PAPI_OK ); +} + +static int _internal_hl_read_and_store_counters( const char *region, enum region_type reg_typ ) +{ + int retval; + /* read all events */ + if ( ( retval = _internal_hl_read_counters() ) != PAPI_OK ) { + verbose_fprintf(stdout, "PAPI-HL Error: Could not read counters for thread %lu.\n", PAPI_thread_id()); + _internal_hl_clean_up_all(true); + return ( retval ); + } + + /* store all events */ + if ( ( retval = _internal_hl_store_counters( PAPI_thread_id(), region, reg_typ) ) != PAPI_OK ) { + verbose_fprintf(stdout, "PAPI-HL Error: Could not store counters for thread %lu.\n", PAPI_thread_id()); + verbose_fprintf(stdout, "PAPI-HL Advice: Check if your regions are matching.\n"); + _internal_hl_clean_up_all(true); + return ( retval ); + } + return ( PAPI_OK ); +} + +static int _internal_hl_create_global_binary_tree() +{ + if ( ( binary_tree = (binary_tree_t*)malloc(sizeof(binary_tree_t)) ) == NULL ) + return ( PAPI_ENOMEM ); + binary_tree->root = NULL; + if ( ( binary_tree->find_p = (threads_t*)malloc(sizeof(threads_t)) ) == NULL ) + return ( PAPI_ENOMEM ); + return ( PAPI_OK ); +} + + +static int _internal_hl_mkdir(const char *dir) +{ + int retval; + int errno; + char *tmp = NULL; + char *p = NULL; + size_t len; + + if ( ( tmp = strdup(dir) ) == NULL ) + return ( PAPI_ENOMEM ); + len = strlen(tmp); + + /* check if there is a file with the same name as the ouptut directory */ + struct stat buf; + if ( stat(dir, &buf) == 0 && S_ISREG(buf.st_mode) ) { + verbose_fprintf(stdout, "PAPI-HL Error: Name conflict with measurement directory and existing file.\n"); + return ( PAPI_ESYS ); + } + + if(tmp[len - 1] == '/') + tmp[len - 1] = 0; + for(p = tmp + 1; *p; p++) + { + if(*p == '/') + { + *p = 0; + errno = 0; + retval = mkdir(tmp, S_IRWXU); + *p = '/'; + if ( retval != 0 && errno != EEXIST ) { + free(tmp); + return ( PAPI_ESYS ); + } + } + } + retval = mkdir(tmp, S_IRWXU); + free(tmp); + if ( retval != 0 && errno != EEXIST ) + return ( PAPI_ESYS ); + + return ( PAPI_OK ); +} + +static int _internal_hl_determine_output_path() +{ + /* check if PAPI_OUTPUT_DIRECTORY is set */ + char *output_prefix = NULL; + if ( getenv("PAPI_OUTPUT_DIRECTORY") != NULL ) { + if ( ( output_prefix = strdup( getenv("PAPI_OUTPUT_DIRECTORY") ) ) == NULL ) + return ( PAPI_ENOMEM ); + } else { + if ( ( output_prefix = strdup( getcwd(NULL,0) ) ) == NULL ) + return ( PAPI_ENOMEM ); + } + + /* generate absolute path for measurement directory */ + if ( ( absolute_output_file_path = (char *)malloc((strlen(output_prefix) + 64) * sizeof(char)) ) == NULL ) { + free(output_prefix); + return ( PAPI_ENOMEM ); + } + if ( output_counter > 0 ) + sprintf(absolute_output_file_path, "%s/papi_hl_output_%d", output_prefix, output_counter); + else + sprintf(absolute_output_file_path, "%s/papi_hl_output", output_prefix); + + /* check if directory already exists */ + struct stat buf; + if ( stat(absolute_output_file_path, &buf) == 0 && S_ISDIR(buf.st_mode) ) { + + /* rename old directory by adding a timestamp */ + char *new_absolute_output_file_path = NULL; + if ( ( new_absolute_output_file_path = (char *)malloc((strlen(absolute_output_file_path) + 64) * sizeof(char)) ) == NULL ) { + free(output_prefix); + free(absolute_output_file_path); + return ( PAPI_ENOMEM ); + } + + /* create timestamp */ + time_t t = time(NULL); + struct tm tm = *localtime(&t); + char m_time[32]; + sprintf(m_time, "%d%02d%02d-%02d%02d%02d", tm.tm_year+1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec); + /* add timestamp to existing folder string */ + sprintf(new_absolute_output_file_path, "%s-%s", absolute_output_file_path, m_time); + + uintmax_t current_unix_time = (uintmax_t)t; + uintmax_t unix_time_from_old_directory = buf.st_mtime; + + /* This is a workaround for MPI applications!!! + * Only rename existing measurement directory when it is older than + * current timestamp. If it's not, we assume that another MPI process already created a + * new measurement directory. */ + if ( unix_time_from_old_directory < current_unix_time ) { + + if ( rename(absolute_output_file_path, new_absolute_output_file_path) != 0 ) { + verbose_fprintf(stdout, "PAPI-HL Warning: Cannot rename old measurement directory.\n"); + verbose_fprintf(stdout, "If you use MPI, another process may have already renamed the directory.\n"); + } + } + + free(new_absolute_output_file_path); + } + free(output_prefix); + output_counter++; + + return ( PAPI_OK ); +} + +static void _internal_hl_json_line_break_and_indent( FILE* f, bool b, int width ) +{ + int i; + if ( b ) { + fprintf(f, "\n"); + for ( i = 0; i < width; ++i ) + fprintf(f, " "); + } +} + +static void _internal_hl_json_region_events(FILE* f, bool beautifier, regions_t *regions) +{ + char **all_event_names = NULL; + int extended_total_num_events; + int i, j, cmp_iter; + + /* generate array of all events including region count and CPU cycles for output */ + extended_total_num_events = total_num_events + 2; + all_event_names = (char**)malloc(extended_total_num_events * sizeof(char*)); + all_event_names[0] = "region_count"; + all_event_names[1] = "cycles"; + cmp_iter = 2; + for ( i = 0; i < num_of_components; i++ ) { + for ( j = 0; j < components[i].num_of_events; j++ ) { + all_event_names[cmp_iter++] = components[i].event_names[j]; + } + } + + for ( j = 0; j < extended_total_num_events; j++ ) { + + _internal_hl_json_line_break_and_indent(f, beautifier, 6); + + /* print read values if available */ + if ( regions->values[j].read_values != NULL) { + reads_t* read_node = regions->values[j].read_values; + /* going to last node */ + while ( read_node->next != NULL ) { + read_node = read_node->next; + } + /* read values in reverse order */ + int read_cnt = 1; + fprintf(f, "\"%s\":{", all_event_names[j]); + + _internal_hl_json_line_break_and_indent(f, beautifier, 7); + fprintf(f, "\"total\":\"%lld\",", regions->values[j].total); + + while ( read_node != NULL ) { + _internal_hl_json_line_break_and_indent(f, beautifier, 7); + fprintf(f, "\"read_%d\":\"%lld\"", read_cnt,read_node->value); + + read_node = read_node->prev; + + if ( read_node == NULL ) { + _internal_hl_json_line_break_and_indent(f, beautifier, 6); + fprintf(f, "}"); + if ( j < extended_total_num_events - 1 ) + fprintf(f, ","); + } else { + fprintf(f, ","); + } + + read_cnt++; + } + } else { + HLDBG(" %s:%lld\n", all_event_names[j], regions->values[j].total); + + if ( j == ( extended_total_num_events - 1 ) ) { + fprintf(f, "\"%s\":\"%lld\"", all_event_names[j], regions->values[j].total); + } else { + fprintf(f, "\"%s\":\"%lld\",", all_event_names[j], regions->values[j].total); + } + } + } + + free(all_event_names); +} + +static void _internal_hl_json_regions(FILE* f, bool beautifier, threads_t* thread_node) +{ + /* iterate over regions list */ + regions_t *regions = thread_node->value; + + /* going to last node */ + while ( regions->next != NULL ) { + regions = regions->next; + } + + /* read regions in reverse order */ + while (regions != NULL) { + HLDBG(" Region:%s\n", regions->region); + + _internal_hl_json_line_break_and_indent(f, beautifier, 4); + fprintf(f, "{"); + _internal_hl_json_line_break_and_indent(f, beautifier, 5); + fprintf(f, "\"%s\":{", regions->region); + + _internal_hl_json_region_events(f, beautifier, regions); + + _internal_hl_json_line_break_and_indent(f, beautifier, 5); + fprintf(f, "}"); + + regions = regions->prev; + _internal_hl_json_line_break_and_indent(f, beautifier, 4); + if (regions == NULL ) { + fprintf(f, "}"); + } else { + fprintf(f, "},"); + } + } +} + +static void _internal_hl_json_threads(FILE* f, bool beautifier, unsigned long* tids, int threads_num) +{ + int i; + + _internal_hl_json_line_break_and_indent(f, beautifier, 1); + fprintf(f, "\"threads\":["); + + /* get regions of all threads */ + for ( i = 0; i < threads_num; i++ ) + { + HLDBG("Thread ID:%lu\n", tids[i]); + /* find values of current thread in global binary tree */ + threads_t* thread_node = _internal_hl_find_thread_node(tids[i]); + if ( thread_node != NULL ) { + /* do we really need the exact thread id? */ + _internal_hl_json_line_break_and_indent(f, beautifier, 2); + fprintf(f, "{"); + _internal_hl_json_line_break_and_indent(f, beautifier, 3); + fprintf(f, "\"id\":\"%lu\",", thread_node->key); + + /* in case we only store iterator id as thread id */ + //fprintf(f, "\"ID\":%d,", i); + + _internal_hl_json_line_break_and_indent(f, beautifier, 3); + fprintf(f, "\"regions\":["); + + _internal_hl_json_regions(f, beautifier, thread_node); + + _internal_hl_json_line_break_and_indent(f, beautifier, 3); + fprintf(f, "]"); + + _internal_hl_json_line_break_and_indent(f, beautifier, 2); + if ( i < threads_num - 1 ) { + fprintf(f, "},"); + } else { + fprintf(f, "}"); + } + } + } + + _internal_hl_json_line_break_and_indent(f, beautifier, 1); + fprintf(f, "]"); +} + +static void _internal_hl_write_output() +{ + if ( output_generated == false ) + { + _papi_hwi_lock( HIGHLEVEL_LOCK ); + if ( output_generated == false ) { + /* check if events were recorded */ + if ( binary_tree == NULL ) { + verbose_fprintf(stdout, "PAPI-HL Info: No events were recorded.\n"); + free(absolute_output_file_path); + return; + } + unsigned long *tids = NULL; + int number_of_threads; + FILE *output_file; + /* current CPU frequency in MHz */ + int cpu_freq; + + if ( region_begin_cnt == region_end_cnt ) { + verbose_fprintf(stdout, "PAPI-HL Info: Print results...\n"); + } else { + verbose_fprintf(stdout, "PAPI-HL Warning: Cannot generate output due to not matching regions.\n"); + output_generated = true; + HLDBG("region_begin_cnt=%d, region_end_cnt=%d\n", region_begin_cnt, region_end_cnt); + _papi_hwi_unlock( HIGHLEVEL_LOCK ); + free(absolute_output_file_path); + return; + } + + /* create new measurement directory */ + if ( ( _internal_hl_mkdir(absolute_output_file_path) ) != PAPI_OK ) { + verbose_fprintf(stdout, "PAPI-HL Error: Cannot create measurement directory %s.\n", absolute_output_file_path); + free(absolute_output_file_path); + return; + } + + /* determine rank for output file */ + int rank = _internal_hl_determine_rank(); + + if ( rank < 0 ) + { + /* generate unique rank number */ + sprintf(absolute_output_file_path + strlen(absolute_output_file_path), "/rank_XXXXXX"); + int fd; + fd = mkstemp(absolute_output_file_path); + close(fd); + } + else + { + sprintf(absolute_output_file_path + strlen(absolute_output_file_path), "/rank_%04d", rank); + } + + /* determine current cpu frequency */ + cpu_freq = PAPI_get_opt( PAPI_CLOCKRATE, NULL ); + + output_file = fopen(absolute_output_file_path, "w"); + + if ( output_file == NULL ) + { + verbose_fprintf(stdout, "PAPI-HL Error: Cannot create output file %s!\n", absolute_output_file_path); + free(absolute_output_file_path); + return; + } + else + { + /* list all threads */ + if ( PAPI_list_threads( tids, &number_of_threads ) != PAPI_OK ) { + verbose_fprintf(stdout, "PAPI-HL Error: PAPI_list_threads call failed!\n"); + fclose(output_file); + free(absolute_output_file_path); + return; + } + if ( ( tids = malloc( number_of_threads * sizeof(unsigned long) ) ) == NULL ) { + verbose_fprintf(stdout, "PAPI-HL Error: OOM!\n"); + fclose(output_file); + free(absolute_output_file_path); + return; + } + if ( PAPI_list_threads( tids, &number_of_threads ) != PAPI_OK ) { + verbose_fprintf(stdout, "PAPI-HL Error: PAPI_list_threads call failed!\n"); + fclose(output_file); + free(absolute_output_file_path); + return; + } + + /* start writing json file */ + + /* JSON beautifier (line break and indent) */ + bool beautifier = true; + + /* start of JSON file */ + fprintf(output_file, "{"); + _internal_hl_json_line_break_and_indent(output_file, beautifier, 1); + fprintf(output_file, "\"cpu in mhz\":\"%d\",", cpu_freq); + + /* write all regions with events per thread */ + _internal_hl_json_threads(output_file, beautifier, tids, number_of_threads); + + /* end of JSON file */ + _internal_hl_json_line_break_and_indent(output_file, beautifier, 0); + fprintf(output_file, "}"); + fprintf(output_file, "\n"); + + fclose(output_file); + free(tids); + + if ( getenv("PAPI_REPORT") != NULL ) { + /* print output to stdout */ + printf("\n\nPAPI-HL Output:\n"); + output_file = fopen(absolute_output_file_path, "r"); + int c = fgetc(output_file); + while (c != EOF) + { + printf("%c", c); + c = fgetc(output_file); + } + printf("\n"); + fclose(output_file); + } + + } + + output_generated = true; + free(absolute_output_file_path); + } + _papi_hwi_unlock( HIGHLEVEL_LOCK ); + } +} + +static void _internal_hl_clean_up_local_data() +{ + int i, retval; + /* destroy all EventSets from local data */ + if ( _local_components != NULL ) { + HLDBG("Thread-ID:%lu\n", PAPI_thread_id()); + for ( i = 0; i < num_of_components; i++ ) { + if ( ( retval = PAPI_stop( _local_components[i].EventSet, _local_components[i].values ) ) != PAPI_OK ) + /* only print error when event set is running */ + if ( retval != -9 ) + verbose_fprintf(stdout, "PAPI-HL Error: PAPI_stop failed: %d.\n", retval); + if ( ( retval = PAPI_cleanup_eventset (_local_components[i].EventSet) ) != PAPI_OK ) + verbose_fprintf(stdout, "PAPI-HL Error: PAPI_cleanup_eventset failed: %d.\n", retval); + if ( ( retval = PAPI_destroy_eventset (&_local_components[i].EventSet) ) != PAPI_OK ) + verbose_fprintf(stdout, "PAPI-HL Error: PAPI_destroy_eventset failed: %d.\n", retval); + free(_local_components[i].values); + } + free(_local_components); + _local_components = NULL; + + /* count global thread variable */ + _papi_hwi_lock( HIGHLEVEL_LOCK ); + num_of_cleaned_threads++; + _papi_hwi_unlock( HIGHLEVEL_LOCK ); + } + _papi_hl_events_running = 0; + _local_state = PAPIHL_DEACTIVATED; +} + +static void _internal_hl_clean_up_global_data() +{ + int i; + int extended_total_num_events; + + /* clean up binary tree of recorded events */ + threads_t *thread_node; + if ( binary_tree != NULL ) { + while ( binary_tree->root != NULL ) { + thread_node = *(threads_t **)binary_tree->root; + + /* clean up double linked list of region data */ + regions_t *region = thread_node->value; + regions_t *tmp; + while ( region != NULL ) { + + /* clean up read node list */ + extended_total_num_events = total_num_events + 2; + for ( i = 0; i < extended_total_num_events; i++ ) { + reads_t *read_node = region->values[i].read_values; + reads_t *read_node_tmp; + while ( read_node != NULL ) { + read_node_tmp = read_node; + read_node = read_node->next; + free(read_node_tmp); + } + } + + tmp = region; + region = region->next; + + free(tmp->region); + free(tmp); + } + free(region); + + tdelete(thread_node, &binary_tree->root, compar); + free(thread_node); + } + } + + /* we cannot free components here since other threads could still use them */ + + /* clean up requested event names */ + for ( i = 0; i < num_of_requested_events; i++ ) + free(requested_event_names[i]); + free(requested_event_names); + + free(absolute_output_file_path); +} + +static void _internal_hl_clean_up_all(bool deactivate) +{ + int i, num_of_threads; + + /* we assume that output has been already generated or + * cannot be generated due to previous errors */ + output_generated = true; + + /* clean up thread local data */ + if ( _local_state == PAPIHL_ACTIVE ) { + HLDBG("Clean up thread local data for thread %lu\n", PAPI_thread_id()); + _internal_hl_clean_up_local_data(); + } + + /* clean up global data */ + if ( state == PAPIHL_ACTIVE ) { + _papi_hwi_lock( HIGHLEVEL_LOCK ); + if ( state == PAPIHL_ACTIVE ) { + + verbose_fprintf(stdout, "PAPI-HL Info: Output generation is deactivated!\n"); + + HLDBG("Clean up global data for thread %lu\n", PAPI_thread_id()); + _internal_hl_clean_up_global_data(); + + /* check if all other registered threads have cleaned up */ + PAPI_list_threads(NULL, &num_of_threads); + + HLDBG("Number of registered threads: %d.\n", num_of_threads); + HLDBG("Number of cleaned threads: %d.\n", num_of_cleaned_threads); + + if ( _internal_hl_check_for_clean_thread_states() == PAPI_OK && + num_of_threads == num_of_cleaned_threads ) { + PAPI_shutdown(); + /* clean up components */ + for ( i = 0; i < num_of_components; i++ ) { + free(components[i].event_names); + free(components[i].event_codes); + free(components[i].event_types); + } + free(components); + HLDBG("PAPI-HL shutdown!\n"); + } else { + verbose_fprintf(stdout, "PAPI-HL Warning: Could not call PAPI_shutdown() since some threads still have running event sets.\n"); + } + + /* deactivate PAPI-HL */ + if ( deactivate ) + state = PAPIHL_DEACTIVATED; + } + _papi_hwi_unlock( HIGHLEVEL_LOCK ); + } +} + +static int _internal_hl_check_for_clean_thread_states() +{ + EventSetInfo_t *ESI; + DynamicArray_t *map = &_papi_hwi_system_info.global_eventset_map; + int i; + + for( i = 0; i < map->totalSlots; i++ ) { + ESI = map->dataSlotArray[i]; + if ( ESI ) { + if ( ESI->state & PAPI_RUNNING ) + return ( PAPI_EISRUN ); + } + } + return ( PAPI_OK ); +} + +int +_internal_PAPI_hl_init() +{ + if ( state == PAPIHL_ACTIVE ) { + if ( hl_initiated == false && hl_finalized == false ) { + _internal_hl_onetime_library_init(); + /* check if the library has been initialized successfully */ + if ( state == PAPIHL_DEACTIVATED ) + return ( PAPI_EMISC ); + return ( PAPI_OK ); + } + return ( PAPI_ENOINIT ); + } + return ( PAPI_EMISC ); +} + +int _internal_PAPI_hl_cleanup_thread() +{ + if ( state == PAPIHL_ACTIVE && + hl_initiated == true && + _local_state == PAPIHL_ACTIVE ) { + /* do not clean local data from master thread */ + if ( master_thread_id != PAPI_thread_id() ) + _internal_hl_clean_up_local_data(); + return ( PAPI_OK ); + } + return ( PAPI_EMISC ); +} + +int _internal_PAPI_hl_finalize() +{ + if ( state == PAPIHL_ACTIVE && hl_initiated == true ) { + _internal_hl_clean_up_all(true); + return ( PAPI_OK ); + } + return ( PAPI_EMISC ); +} + +int +_internal_PAPI_hl_set_events(const char* events) +{ + int retval; + if ( state == PAPIHL_ACTIVE ) { + + /* This may only be called once after the high-level API was successfully + * initiated. Any second call just returns PAPI_OK without doing an + * expensive lock. */ + if ( hl_initiated == true ) { + if ( events_determined == false ) + { + _papi_hwi_lock( HIGHLEVEL_LOCK ); + if ( events_determined == false && state == PAPIHL_ACTIVE ) + { + HLDBG("Set events: %s\n", events); + if ( ( retval = _internal_hl_read_events(events) ) != PAPI_OK ) { + state = PAPIHL_DEACTIVATED; + _internal_hl_clean_up_global_data(); + _papi_hwi_unlock( HIGHLEVEL_LOCK ); + return ( retval ); + } + if ( ( retval = _internal_hl_create_global_binary_tree() ) != PAPI_OK ) { + state = PAPIHL_DEACTIVATED; + _internal_hl_clean_up_global_data(); + _papi_hwi_unlock( HIGHLEVEL_LOCK ); + return ( retval ); + } + } + _papi_hwi_unlock( HIGHLEVEL_LOCK ); + } + } + /* in case the first locked thread ran into problems */ + if ( state == PAPIHL_DEACTIVATED) + return ( PAPI_EMISC ); + return ( PAPI_OK ); + } + return ( PAPI_EMISC ); +} + +void +_internal_PAPI_hl_print_output() +{ + if ( state == PAPIHL_ACTIVE && + hl_initiated == true && + output_generated == false ) { + _internal_hl_write_output(); + } +} + +/** @class PAPI_hl_region_begin + * @brief Read performance events at the beginning of a region. + * + * @par C Interface: + * \#include @n + * int PAPI_hl_region_begin( const char* region ); + * + * @param region + * -- a unique region name + * + * @retval PAPI_OK + * @retval PAPI_ENOTRUN + * -- EventSet is currently not running or could not determined. + * @retval PAPI_ESYS + * -- A system or C library call failed inside PAPI, see the errno variable. + * @retval PAPI_EMISC + * -- PAPI has been deactivated due to previous errors. + * @retval PAPI_ENOMEM + * -- Insufficient memory. + * + * PAPI_hl_region_begin reads performance events and stores them internally at the beginning + * of an instrumented code region. + * If not specified via the environment variable PAPI_EVENTS, default events are used. + * The first call sets all counters implicitly to zero and starts counting. + * Note that if PAPI_EVENTS is not set or cannot be interpreted, default performance events are + * recorded. + * + * @par Example: + * + * @code + * export PAPI_EVENTS="PAPI_TOT_INS,PAPI_TOT_CYC" + * + * @endcode + * + * + * @code + * int retval; + * + * retval = PAPI_hl_region_begin("computation"); + * if ( retval != PAPI_OK ) + * handle_error(1); + * + * //Do some computation here + * + * retval = PAPI_hl_region_end("computation"); + * if ( retval != PAPI_OK ) + * handle_error(1); + * + * @endcode + * + * @see PAPI_hl_read + * @see PAPI_hl_region_end + * @see PAPI_hl_stop + */ +int +PAPI_hl_region_begin( const char* region ) +{ + int retval; + /* if a rate event set is running stop it */ + if ( _papi_rate_events_running == 1 ) { + if ( ( retval = PAPI_rate_stop() ) != PAPI_OK ) + return ( retval ); + } + + if ( state == PAPIHL_DEACTIVATED ) { + /* check if we have to clean up local stuff */ + if ( _local_state == PAPIHL_ACTIVE ) + _internal_hl_clean_up_local_data(); + return ( PAPI_EMISC ); + } + + if ( hl_finalized == true ) + return ( PAPI_ENOTRUN ); + + if ( hl_initiated == false ) { + if ( ( retval = _internal_PAPI_hl_init() ) != PAPI_OK ) + return ( retval ); + } + + if ( events_determined == false ) { + if ( ( retval = _internal_PAPI_hl_set_events(NULL) ) != PAPI_OK ) + return ( retval ); + } + + if ( _local_components == NULL ) { + if ( ( retval = _internal_hl_create_event_sets() ) != PAPI_OK ) { + HLDBG("Could not create local events sets for thread %lu.\n", PAPI_thread_id()); + _internal_hl_clean_up_all(true); + return ( retval ); + } + } + + if ( _papi_hl_events_running == 0 ) { + if ( ( retval = _internal_hl_start_counters() ) != PAPI_OK ) { + HLDBG("Could not start counters for thread %lu.\n", PAPI_thread_id()); + _internal_hl_clean_up_all(true); + return ( retval ); + } + } + + /* read and store all events */ + HLDBG("Thread ID:%lu, Region:%s\n", PAPI_thread_id(), region); + if ( ( retval = _internal_hl_read_and_store_counters(region, REGION_BEGIN) ) != PAPI_OK ) + return ( retval ); + + _local_region_begin_cnt++; + return ( PAPI_OK ); +} + +/** @class PAPI_hl_read + * @brief Read performance events inside of a region and store the difference to the corresponding + * beginning of the region. + * + * @par C Interface: + * \#include @n + * int PAPI_hl_read( const char* region ); + * + * @param region + * -- a unique region name corresponding to PAPI_hl_region_begin + * + * @retval PAPI_OK + * @retval PAPI_ENOTRUN + * -- EventSet is currently not running or could not determined. + * @retval PAPI_ESYS + * -- A system or C library call failed inside PAPI, see the errno variable. + * @retval PAPI_EMISC + * -- PAPI has been deactivated due to previous errors. + * @retval PAPI_ENOMEM + * -- Insufficient memory. + * + * PAPI_hl_read reads performance events inside of a region and stores the difference to the + * corresponding beginning of the region. + * + * Assumes that PAPI_hl_region_begin was called before. + * + * @par Example: + * + * @code + * int retval; + * + * retval = PAPI_hl_region_begin("computation"); + * if ( retval != PAPI_OK ) + * handle_error(1); + * + * //Do some computation here + * + * retval = PAPI_hl_read("computation"); + * if ( retval != PAPI_OK ) + * handle_error(1); + * + * //Do some computation here + * + * retval = PAPI_hl_region_end("computation"); + * if ( retval != PAPI_OK ) + * handle_error(1); + * + * @endcode + * + * @see PAPI_hl_region_begin + * @see PAPI_hl_region_end + * @see PAPI_hl_stop + */ +int +PAPI_hl_read(const char* region) +{ + int retval; + + if ( state == PAPIHL_DEACTIVATED ) { + /* check if we have to clean up local stuff */ + if ( _local_state == PAPIHL_ACTIVE ) + _internal_hl_clean_up_local_data(); + return ( PAPI_EMISC ); + } + + if ( _local_region_begin_cnt == 0 ) { + verbose_fprintf(stdout, "PAPI-HL Warning: Cannot find matching region for PAPI_hl_read(\"%s\") for thread %lu.\n", region, PAPI_thread_id()); + return ( PAPI_EMISC ); + } + + if ( _local_components == NULL ) + return ( PAPI_ENOTRUN ); + + /* read and store all events */ + HLDBG("Thread ID:%lu, Region:%s\n", PAPI_thread_id(), region); + if ( ( retval = _internal_hl_read_and_store_counters(region, REGION_READ) ) != PAPI_OK ) + return ( retval ); + + return ( PAPI_OK ); +} + +/** @class PAPI_hl_region_end + * @brief Read performance events at the end of a region and store the difference to the + * corresponding beginning of the region. + * + * @par C Interface: + * \#include @n + * int PAPI_hl_region_end( const char* region ); + * + * @param region + * -- a unique region name corresponding to PAPI_hl_region_begin + * + * @retval PAPI_OK + * @retval PAPI_ENOTRUN + * -- EventSet is currently not running or could not determined. + * @retval PAPI_ESYS + * -- A system or C library call failed inside PAPI, see the errno variable. + * @retval PAPI_EMISC + * -- PAPI has been deactivated due to previous errors. + * @retval PAPI_ENOMEM + * -- Insufficient memory. + * + * PAPI_hl_region_end reads performance events at the end of a region and stores the + * difference to the corresponding beginning of the region. + * + * Assumes that PAPI_hl_region_begin was called before. + * + * Note that PAPI_hl_region_end does not stop counting the performance events. Counting + * continues until the application terminates. Therefore, the programmer can also create + * nested regions if required. To stop a running high-level event set, the programmer must call + * PAPI_hl_stop(). It should also be noted, that a marked region is thread-local and therefore + * has to be in the same thread. + * + * An output of the measured events is created automatically after the application exits. + * In the case of a serial, or a thread-parallel application there is only one output file. + * MPI applications would be saved in multiple files, one per MPI rank. + * The output is generated in the current directory by default. However, it is recommended to + * specify an output directory for larger measurements, especially for MPI applications via + * the environment variable PAPI_OUTPUT_DIRECTORY. In the case where measurements are performed, + * while there are old measurements in the same directory, PAPI will not overwrite or delete the + * old measurement directories. Instead, timestamps are added to the old directories. + * + * For more convenience, the output can also be printed to stdout by setting PAPI_REPORT=1. This + * is not recommended for MPI applications as each MPI rank tries to print the output concurrently. + * + * The generated measurement output can also be converted in a better readable output. The python + * script papi_hl_output_writer.py enhances the output by creating some derived metrics, like IPC, + * MFlops/s, and MFlips/s as well as real and processor time in case the corresponding PAPI events + * have been recorded. The python script can also summarize performance events over all threads and + * MPI ranks when using the option "accumulate" as seen below. + * + * @par Example: + * + * @code + * int retval; + * + * retval = PAPI_hl_region_begin("computation"); + * if ( retval != PAPI_OK ) + * handle_error(1); + * + * //Do some computation here + * + * retval = PAPI_hl_region_end("computation"); + * if ( retval != PAPI_OK ) + * handle_error(1); + * + * @endcode + * + * @code + * python papi_hl_output_writer.py --type=accumulate + * + * { + * "computation": { + * "Region count": 1, + * "Real time in s": 0.97 , + * "CPU time in s": 0.98 , + * "IPC": 1.41 , + * "MFLIPS /s": 386.28 , + * "MFLOPS /s": 386.28 , + * "Number of ranks ": 1, + * "Number of threads ": 1, + * "Number of processes ": 1 + * } + * } + * + * @endcode + * + * @see PAPI_hl_region_begin + * @see PAPI_hl_read + * @see PAPI_hl_stop + */ +int +PAPI_hl_region_end( const char* region ) +{ + int retval; + + if ( state == PAPIHL_DEACTIVATED ) { + /* check if we have to clean up local stuff */ + if ( _local_state == PAPIHL_ACTIVE ) + _internal_hl_clean_up_local_data(); + return ( PAPI_EMISC ); + } + + if ( _local_region_begin_cnt == 0 ) { + verbose_fprintf(stdout, "PAPI-HL Warning: Cannot find matching region for PAPI_hl_region_end(\"%s\") for thread %lu.\n", region, PAPI_thread_id()); + return ( PAPI_EMISC ); + } + + if ( _local_components == NULL ) + return ( PAPI_ENOTRUN ); + + /* read and store all events */ + HLDBG("Thread ID:%lu, Region:%s\n", PAPI_thread_id(), region); + if ( ( retval = _internal_hl_read_and_store_counters(region, REGION_END) ) != PAPI_OK ) + return ( retval ); + + _local_region_end_cnt++; + return ( PAPI_OK ); +} + +/** @class PAPI_hl_stop + * @brief Stop a running high-level event set. + * + * @par C Interface: + * \#include @n + * int PAPI_hl_stop(); + * + * @retval PAPI_ENOEVNT + * -- The EventSet is not started yet. + * @retval PAPI_ENOMEM + * -- Insufficient memory to complete the operation. + * + * PAPI_hl_stop stops a running high-level event set. + * + * This call is optional and only necessary if the programmer wants to use the low-level API in addition + * to the high-level API. It should be noted that PAPI_hl_stop and low-level calls are not + * allowed inside of a marked region. Furthermore, PAPI_hl_stop is thread-local and therefore + * has to be called in the same thread as the corresponding marked region. + * + * @see PAPI_hl_region_begin + * @see PAPI_hl_read + * @see PAPI_hl_region_end + */ +int +PAPI_hl_stop() +{ + int retval, i; + + if ( _papi_hl_events_running == 1 ) { + if ( _local_components != NULL ) { + for ( i = 0; i < num_of_components; i++ ) { + if ( ( retval = PAPI_stop( _local_components[i].EventSet, _local_components[i].values ) ) != PAPI_OK ) + return ( retval ); + } + } + _papi_hl_events_running = 0; + return ( PAPI_OK ); + } + return ( PAPI_ENOEVNT ); +} + diff -Nru papi-5.7.0+dfsg/src/high-level/scripts/papi_hl_output_writer.py papi-6.0.0~dfsg/src/high-level/scripts/papi_hl_output_writer.py --- papi-5.7.0+dfsg/src/high-level/scripts/papi_hl_output_writer.py 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/high-level/scripts/papi_hl_output_writer.py 2020-03-04 15:56:59.000000000 +0000 @@ -0,0 +1,386 @@ +#!/usr/bin/python +from __future__ import division +from collections import OrderedDict + +import argparse +import os +import json +# Make it work for Python 2+3 and with Unicode +import io +try: + to_unicode = unicode +except NameError: + to_unicode = str + +cpu_freq = 0 + +def merge_json_files(source): + json_object = {} + json_object["ranks"] = [] + + #get measurement files + file_list = os.listdir(source) + for item in file_list: + json_rank = {} + + #determine mpi rank based on file name (rank_#) + rank = item.split('_', 1)[1] + rank = rank.rsplit('.', 1)[0] + #print("rank: {}".format(rank)) + + json_rank["id"] = rank + + #open meaurement file + file_name = str(source) + "/rank_" + str(rank) + + try: + with open(file_name) as json_file: + #keep order of all objects + data = json.load(json_file, object_pairs_hook=OrderedDict) + except IOError as ioe: + print("Cannot open file {} ({})".format(file_name, repr(ioe))) + return + + #determine cpu frequency + global cpu_freq + cpu_freq = int(data['cpu in mhz']) * 1000000 + + #get all threads + json_rank["threads"] = data["threads"] + + #append current rank to json file + json_object["ranks"].append(json_rank) + + # print json.dumps(json_object,indent=2, sort_keys=False, + # separators=(',', ': '), ensure_ascii=False) + return json_object + +class Sum_Counters(object): + regions = OrderedDict() + regions_last_rank_id = {} + + def add_region(self, rank_id, region, events=OrderedDict()): + + #clean events from read values caused by PAPI_hl_read + cleaned_events = OrderedDict() + for key,value in events.items(): + metric_value = value + if isinstance(value, dict): + metric_value = float(value['total']) + cleaned_events[key] = metric_value + + if region not in self.regions: + #new region + new_region_events = cleaned_events.copy() + new_region_events['Number of ranks'] = 1 + new_region_events['Number of threads'] = 1 + new_region_events['Number of processes'] = 1 + self.regions[region] = new_region_events.copy() + self.regions_last_rank_id[region] = rank_id + else: + #add counter values to existing region + known_events = self.regions[region].copy() + new_events = cleaned_events.copy() + + #increase number of ranks when rank_id has changed + if self.regions_last_rank_id[region] == rank_id: + new_events['Number of ranks'] = 0 + else: + self.regions_last_rank_id[region] = rank_id + new_events['Number of ranks'] = 1 + + #always increase number of threads + new_events['Number of threads'] = 1 + new_events['Number of processes'] = 1 + + #add values + for event_key,event_value in known_events.items(): + if 'Number of' in event_key or 'count' in event_key: + known_events[event_key] = event_value + new_events[event_key] + else: + known_events[event_key] = float(format(event_value + new_events[event_key], '.2f')) + self.regions[region] = known_events.copy() + + def get_json(self): + #calculate correct thread number (number of processes / number of ranks) + for name in self.regions: + events = self.regions[name] + events['Number of threads'] = int(events['Number of processes'] / events['Number of ranks']) + return self.regions + + +def sum_json_object(json): + sum_cnt = Sum_Counters() + for ranks in json['ranks']: + for threads in ranks['threads']: + for regions in threads['regions']: + for region_key,region_value in regions.items(): + name = region_key + events = region_value + sum_cnt.add_region(ranks['id'], name, events) + + return sum_cnt.get_json() + + +def get_ipc_dict(inst, cyc): + ipc_dict = OrderedDict() + for (inst_key,inst_value), (cyc_key,cyc_value) in zip(inst.items(), cyc.items()): + #print str(inst_key) + "," + str(inst_value) + #print str(cyc_key) + "," + str(cyc_value) + try: + ipc = float(int(inst_value) / int(cyc_value)) + except: + ipc = 0 + ipc_dict[inst_key] = float(format(ipc, '.2f')) + return ipc_dict + + +def get_ops_dict(ops, rt): + ops_dict = OrderedDict() + for (ops_key,ops_value), (rt_key,rt_value) in zip(ops.items(), rt.items()): + #print str(ops_key) + "," + str(ops_value) + #print str(rt_key) + "," + str(rt_value) + ops = float(ops_value) / 1000000 / rt_value + ops_dict[ops_key] = float(format(ops, '.2f')) + return ops_dict + + +def convert_value(value, event_type = 'Other'): + if event_type == 'Other': + result = float(value) + result = float(format(result, '.2f')) + elif event_type == 'Runtime': + try: + result = float(value) / int(cpu_freq) + except: + result = 1.0 + result = float(format(result, '.2f')) + elif event_type == 'CPUtime': + result = float(value) / 1000000000 + result = float(format(result, '.2f')) + + return result + + +def format_read_events(events, event_type = 'Other'): + format_read_dict = OrderedDict() + for read_key,read_value in events.items(): + format_read_dict[read_key] = convert_value(read_value, event_type) + return format_read_dict + + +def format_events(events): + #keep order as declared + format_events = OrderedDict() + #remember runtime for other metrics like MFLOPS + rt = 1.0 + rt_dict = OrderedDict() + + #Region Count + if 'region_count' in events: + format_events['Region count'] = int(events['region_count']) + del events['region_count'] + + #Real Time + if 'cycles' in events: + if isinstance(events['cycles'],dict): + for read_key,read_value in events['cycles'].items(): + rt_dict[read_key] = float(read_value) / int(cpu_freq) + format_events['Real time in s'] = format_read_events(events['cycles'],'Runtime') + else: + rt = float(events['cycles']) / int(cpu_freq) + format_events['Real time in s'] = convert_value(events['cycles'], 'Runtime') + del events['cycles'] + + #CPU Time + if 'perf::TASK-CLOCK' in events: + if isinstance(events['perf::TASK-CLOCK'],dict): + format_events['CPU time in s'] = format_read_events(events['perf::TASK-CLOCK'],'CPUtime') + else: + format_events['CPU time in s'] = convert_value(events['perf::TASK-CLOCK'], 'CPUtime') + del events['perf::TASK-CLOCK'] + + #PAPI_TOT_INS and PAPI_TOT_CYC to calculate IPC + if 'PAPI_TOT_INS' and 'PAPI_TOT_CYC' in events: + if isinstance(events['PAPI_TOT_INS'],dict) and isinstance(events['PAPI_TOT_CYC'],dict): + ipc_dict = get_ipc_dict(events['PAPI_TOT_INS'], events['PAPI_TOT_CYC']) + format_events['IPC'] = ipc_dict + else: + try: + ipc = float(int(events['PAPI_TOT_INS']) / int(events['PAPI_TOT_CYC'])) + except: + ipc = 0 + format_events['IPC'] = float(format(ipc, '.2f')) + + del events['PAPI_TOT_INS'] + del events['PAPI_TOT_CYC'] + + #FLIPS + if 'PAPI_FP_INS' in events: + if isinstance(events['PAPI_FP_INS'],dict): + mflips_dict = get_ops_dict(events['PAPI_FP_INS'], rt_dict) + format_events['MFLIPS/s'] = mflips_dict + else: + mflips = float(events['PAPI_FP_INS']) / 1000000 / rt + mflips = float(format(mflips, '.2f')) + format_events['MFLIPS/s'] = mflips + del events['PAPI_FP_INS'] + + #SP vector instructions per second + if 'PAPI_VEC_SP' in events: + if isinstance(events['PAPI_VEC_SP'],dict): + mvecins_dict = get_ops_dict(events['PAPI_VEC_SP'], rt_dict) + format_events['Single precision vector/SIMD instructions rate in M/s'] = mvecins_dict + else: + mvecins = float(events['PAPI_VEC_SP']) / 1000000 / rt + mvecins = float(format(mvecins, '.2f')) + format_events['Single precision vector/SIMD instructions rate in M/s'] = mvecins + del events['PAPI_VEC_SP'] + + #DP vector instructions per second + if 'PAPI_VEC_DP' in events: + if isinstance(events['PAPI_VEC_DP'],dict): + mvecins_dict = get_ops_dict(events['PAPI_VEC_DP'], rt_dict) + format_events['Double precision vector/SIMD instructions rate in M/s'] = mvecins_dict + else: + mvecins = float(events['PAPI_VEC_DP']) / 1000000 / rt + mvecins = float(format(mvecins, '.2f')) + format_events['Double precision vector/SIMD instructions rate in M/s'] = mvecins + del events['PAPI_VEC_DP'] + + #FLOPS + if 'PAPI_FP_OPS' in events: + if isinstance(events['PAPI_FP_OPS'],dict): + mflops_dict = get_ops_dict(events['PAPI_FP_OPS'], rt_dict) + format_events['MFLOPS/s'] = mflops_dict + else: + mflops = float(events['PAPI_FP_OPS']) / 1000000 / rt + mflops = float(format(mflops, '.2f')) + format_events['MFLOPS/s'] = mflops + del events['PAPI_FP_OPS'] + + #SP FLOPS + if 'PAPI_SP_OPS' in events: + if isinstance(events['PAPI_SP_OPS'],dict): + mflops_dict = get_ops_dict(events['PAPI_SP_OPS'], rt_dict) + format_events['Single precision MFLOPS/s'] = mflops_dict + else: + mflops = float(events['PAPI_SP_OPS']) / 1000000 / rt + mflops = float(format(mflops, '.2f')) + format_events['Single precision MFLOPS/s'] = mflops + del events['PAPI_SP_OPS'] + + #DP FLOPS + if 'PAPI_DP_OPS' in events: + if isinstance(events['PAPI_DP_OPS'],dict): + mflops_dict = get_ops_dict(events['PAPI_DP_OPS'], rt_dict) + format_events['Double precision MFLOPS/s'] = mflops_dict + else: + mflops = float(events['PAPI_DP_OPS']) / 1000000 / rt + mflops = float(format(mflops, '.2f')) + format_events['Double precision MFLOPS/s'] = mflops + del events['PAPI_DP_OPS'] + + #read the rest + for event_key,event_value in events.items(): + if isinstance(event_value,dict): + format_events[event_key] = format_read_events(event_value) + else: + format_events[event_key] = convert_value(event_value) + + return format_events + + +def format_json_object(json): + json_object = {} + json_object['ranks'] = [] + + for rank in json['ranks']: + # print rank['id'] + # print rank['threads'] + json_rank = {} + json_rank['id'] = rank['id'] + json_rank['threads'] = [] + + for thread in rank['threads']: + # print thread['id'] + json_thread = {} + json_thread['id'] = thread['id'] + json_thread['regions'] = [] + for region in thread['regions']: + json_region = {} + for region_key,region_value in region.items(): + # print region_key + # print region_value + json_region[region_key] = format_events(region_value) + + json_thread['regions'].append(json_region) + json_rank['threads'].append(json_thread) + json_object['ranks'].append(json_rank) + + return json_object + +def write_json_file(data, file_name): + with io.open(file_name, 'w', encoding='utf8') as outfile: + str_ = json.dumps(data, + indent=4, sort_keys=False, + separators=(',', ': '), ensure_ascii=False) + outfile.write(to_unicode(str_)) + print (str_) + + +def main(source, format, type): + if (format == "json"): + json = merge_json_files(source) + formated_json = format_json_object(json) + + if type == 'detail': + write_json_file(formated_json, 'papi.json') + + #summarize data over threads and ranks + if type == 'accumulate': + sum_json = sum_json_object(formated_json) + write_json_file(sum_json, 'papi_sum.json') + else: + print("Format not supported!") + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument('--source', type=str, required=False, default="papi_hl_output", + help='Measurement directory of raw data.') + parser.add_argument('--format', type=str, required=False, default='json', + help='Output format, e.g. json.') + parser.add_argument('--type', type=str, required=False, default='detail', + help='Output type: detail or accumulate.') + + # check if papi directory exists + source = str(parser.parse_args().source) + if os.path.isdir(source) == False: + print("Measurement directory '{}' does not exist!\n".format(source)) + parser.print_help() + parser.exit() + + # check format + output_format = str(parser.parse_args().format) + if output_format != "json": + print("Output format '{}' is not supported!\n".format(output_format)) + parser.print_help() + parser.exit() + + # check type + output_type = str(parser.parse_args().type) + if output_type != "detail" and output_type != "accumulate": + print("Output type '{}' is not supported!\n".format(output_type)) + parser.print_help() + parser.exit() + + + return parser.parse_args() + + +if __name__ == '__main__': + args = parse_args() + main(format=args.format, + source=args.source, + type=args.type) \ No newline at end of file diff -Nru papi-5.7.0+dfsg/src/libpapi.exp papi-6.0.0~dfsg/src/libpapi.exp --- papi-5.7.0+dfsg/src/libpapi.exp 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/libpapi.exp 2020-03-04 15:56:59.000000000 +0000 @@ -51,11 +51,10 @@ PAPI_thread_init PAPI_unlock PAPI_write -PAPI_accum_counters -PAPI_num_counters -PAPI_read_counters -PAPI_start_counters -PAPI_stop_counters -PAPI_flips -PAPI_flops +PAPI_flips_rate +PAPI_flops_rate PAPI_ipc +PAPI_epc +PAPI_hl_region_begin +PAPI_hl_read +PAPI_hl_region_end \ No newline at end of file diff -Nru papi-5.7.0+dfsg/src/linux-common.c papi-6.0.0~dfsg/src/linux-common.c --- papi-5.7.0+dfsg/src/linux-common.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/linux-common.c 2020-03-04 15:57:01.000000000 +0000 @@ -197,7 +197,7 @@ s = search_cpu_info( f, "model name"); strSize = sizeof(hwinfo->model_string); if ( s ) { - strncpy( hwinfo->model_string, s, strSize); + strncpy( hwinfo->model_string, s, strSize - 1); } /* Family */ @@ -239,7 +239,7 @@ s = search_cpu_info( f, "model"); strSize = sizeof(hwinfo->model_string); if ( s ) { - strncpy( hwinfo->model_string, s, strSize); + strncpy( hwinfo->model_string, s, strSize - 1); } return PAPI_OK; @@ -268,7 +268,7 @@ s = search_cpu_info( f, "model name"); strSize = sizeof(hwinfo->model_string); if ( s ) { - strncpy( hwinfo->model_string, s, strSize ); + strncpy( hwinfo->model_string, s, strSize - 1); } /* Architecture (ARMv6, ARMv7, ARMv8, etc.) */ @@ -368,6 +368,7 @@ *cpuinfo_mhz = mhz; } else { + *cpuinfo_mhz = -1; // Could not find it. // PAPIWARN("Failed to find a clock speed in /proc/cpuinfo"); } @@ -379,19 +380,22 @@ /* Which is the most common field */ s = search_cpu_info( f, "vendor_id"); if ( s ) { - strcpy( hwinfo->vendor_string, s ); + strncpy( hwinfo->vendor_string, s, PAPI_MAX_STR_LEN ); + hwinfo->vendor_string[PAPI_MAX_STR_LEN-1]=0; } else { /* If not found, try "vendor" which seems to be Itanium specific */ s = search_cpu_info( f, "vendor" ); if ( s ) { - strcpy( hwinfo->vendor_string, s ); + strncpy( hwinfo->vendor_string, s, PAPI_MAX_STR_LEN ); + hwinfo->vendor_string[PAPI_MAX_STR_LEN-1]=0; } else { /* "system type" seems to be MIPS and Alpha */ s = search_cpu_info( f, "system type"); if ( s ) { - strcpy( hwinfo->vendor_string, s ); + strncpy( hwinfo->vendor_string, s, PAPI_MAX_STR_LEN ); + hwinfo->vendor_string[PAPI_MAX_STR_LEN-1]=0; } else { /* "platform" indicates Power */ diff -Nru papi-5.7.0+dfsg/src/linux-lock.h papi-6.0.0~dfsg/src/linux-lock.h --- papi-5.7.0+dfsg/src/linux-lock.h 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/linux-lock.h 2020-03-04 15:57:01.000000000 +0000 @@ -201,8 +201,12 @@ " bnez %1, 1b \n" " li %1, 1 \n" " sc %1, %0 \n" +#if __mips_isa_rev < 6 " beqzl %1, 1b \n" " nop \n" +#else + " beqzc %1,1b \n" +#endif " sync \n" " .set reorder \n" : "=m" (*lock), "=&r" (tmp) diff -Nru papi-5.7.0+dfsg/src/linux-memory.c papi-6.0.0~dfsg/src/linux-memory.c --- papi-5.7.0+dfsg/src/linux-memory.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/linux-memory.c 2020-03-04 15:57:01.000000000 +0000 @@ -907,7 +907,7 @@ char filename[BUFSIZ],type_string[BUFSIZ]; struct dirent *d; int max_level=0; - int level_count=0,last_level=-1,level_index=0; + int level_count,level_index; PAPI_mh_level_t *L = hw_info->mem_hierarchy.level; @@ -920,6 +920,12 @@ goto unrecoverable_error; } + for (level_index=0; level_index < PAPI_MAX_MEM_HIERARCHY_LEVELS; ++level_index) { + for (level_count = 0; level_count < PAPI_MH_MAX_LEVELS; ++level_count) { + L[level_index].cache[level_count].type = PAPI_MH_TYPE_EMPTY; + } + } + while(1) { d = readdir(dir); if (d==NULL) break; @@ -950,11 +956,12 @@ /* Index arrays from 0 */ level_index=level-1; - if (level!=last_level) { - level_count=0; - last_level=level; - } else { + level_count = 0; + while (L[level_index].cache[level_count].type != PAPI_MH_TYPE_EMPTY) { level_count++; + if (level_count>=PAPI_MH_MAX_LEVELS) { + break; + } } if (level_count>=PAPI_MH_MAX_LEVELS) { @@ -1093,6 +1100,7 @@ } } + closedir(dir); hw_info->mem_hierarchy.levels = max_level; return 0; @@ -1102,6 +1110,7 @@ /* Just say we have no cache */ hw_info->mem_hierarchy.levels = 0; + closedir(dir); return 0; } @@ -1217,7 +1226,8 @@ t_index++; tmp[t_index - 1].text_start = ( caddr_t ) begin; tmp[t_index - 1].text_end = ( caddr_t ) ( begin + size ); - strncpy( tmp[t_index - 1].name, mapname, PAPI_MAX_STR_LEN ); + strncpy( tmp[t_index - 1].name, mapname, PAPI_HUGE_STR_LEN ); + tmp[t_index - 1].name[PAPI_HUGE_STR_LEN-1]=0; } } else if ( ( perm[0] == 'r' ) && ( perm[1] == 'w' ) && ( inode != 0 ) ) { diff -Nru papi-5.7.0+dfsg/src/Makefile.in papi-6.0.0~dfsg/src/Makefile.in --- papi-5.7.0+dfsg/src/Makefile.in 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/Makefile.in 2020-03-04 15:56:57.000000000 +0000 @@ -1,5 +1,5 @@ -PAPIVER=5 -PAPIREV=7 +PAPIVER=6 +PAPIREV=0 PAPIAGE=0 PAPIINC=0 PREFIX = @prefix@ @@ -75,6 +75,7 @@ PMINIT = @PMINIT@ SETPATH = @SETPATH@ SHLIB = @SHLIB@ +PAPISOVER = @PAPISOVER@ VLIB = @VLIB@ SHLIBDEPS = @SHLIBDEPS@ SHOW_CONF = @SHOW_CONF@ diff -Nru papi-5.7.0+dfsg/src/Makefile.inc papi-6.0.0~dfsg/src/Makefile.inc --- papi-5.7.0+dfsg/src/Makefile.inc 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/Makefile.inc 2020-03-04 15:56:57.000000000 +0000 @@ -1,9 +1,13 @@ PAPI_SRCDIR = $(PWD) -SOURCES = $(MISCSRCS) papi.c papi_internal.c papi_hl.c extras.c sw_multiplex.c \ +SOURCES = $(MISCSRCS) papi.c papi_internal.c \ + high-level/papi_hl.c \ + extras.c sw_multiplex.c \ papi_fwrappers.c papi_fwrappers_.c papi_fwrappers__.c upper_PAPI_FWRAPPERS.c \ threads.c cpus.c $(OSFILESSRC) $(CPUCOMPONENT_C) papi_preset.c \ papi_vector.c papi_memory.c $(COMPSRCS) -OBJECTS = $(MISCOBJS) papi.o papi_internal.o papi_hl.o extras.o sw_multiplex.o \ +OBJECTS = $(MISCOBJS) papi.o papi_internal.o \ + papi_hl.o \ + extras.o sw_multiplex.o \ papi_fwrappers.o papi_fwrappers_.o papi_fwrappers__.o upper_PAPI_FWRAPPERS.o \ threads.o cpus.o $(OSFILESOBJ) $(CPUCOMPONENT_OBJ) papi_preset.o \ papi_vector.o papi_memory.o $(COMPOBJS) @@ -12,7 +16,7 @@ papi.h papi_internal.h papiStdEventDefs.h \ papi_preset.h threads.h cpus.h papi_vector.h \ papi_memory.h config.h \ - extras.h sw_multiplex.h papi_hl.h \ + extras.h sw_multiplex.h \ papi_common_strings.h components_config.h LIBCFLAGS += -I. $(CFLAGS) -DOSLOCK=\"$(OSLOCK)\" -DOSCONTEXT=\"$(OSCONTEXT)\" @@ -54,13 +58,13 @@ rm -f $(LIBRARY) $(AR) $(ARG64) rv $(LIBRARY) $(OBJECTS) -shared: libpapi.so libpapi.so.$(PAPIVER) +shared: libpapi.so libpapi.so.$(PAPISOVER) -libpapi.so libpapi.so.$(PAPIVER): $(SHLIB) +libpapi.so libpapi.so.$(PAPISOVER): $(SHLIB) ln -sf $(SHLIB) $@ $(SHLIB): $(HEADERS) $(SOURCES) $(SHLIBOBJS) - rm -f $(SHLIB) libpapi.so libpapi.so.$(PAPIVER); + rm -f $(SHLIB) libpapi.so libpapi.so.$(PAPISOVER) $(CC_SHR) $(LIBCFLAGS) $(OPTFLAGS) $(SOURCES) $(SHLIBOBJS) -o $@ $(SHLIBDEPS) $(LDFLAGS) @set -ex; if test "$(POST_BUILD)" != "" ; then \ -$(POST_BUILD) ; \ @@ -99,8 +103,8 @@ cpus.o: cpus.c $(HEADERS) $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c cpus.c -o cpus.o -papi_hl.o: papi_hl.c $(HEADERS) - $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c papi_hl.c -o papi_hl.o +papi_hl.o: high-level/papi_hl.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c high-level/papi_hl.c -o papi_hl.o aix-memory.o: aix-memory.c $(HEADERS) $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c aix-memory.c -o aix-memory.o @@ -254,7 +258,7 @@ endif clean: comp_tests_clean native_clean - rm -rf $(LIBRARY) $(SHLIB) libpapi.so libpapi.so.$(PAPIVER) $(OBJECTS) core rii_files genpapifdef *~ so_locations papi_fwrappers_.c papi_fwrappers__.c upper_PAPI_FWRAPPERS.c + rm -rf $(LIBRARY) $(SHLIB) libpapi.so libpapi.so.$(PAPISOVER) $(OBJECTS) core rii_files genpapifdef *~ so_locations papi_fwrappers_.c papi_fwrappers__.c upper_PAPI_FWRAPPERS.c $(MAKE) -C ../doc clean $(MAKE) -C ctests clean $(MAKE) -C ftests clean @@ -291,7 +295,10 @@ install-all: install install-tests -install: install-lib install-man install-utils install-pkgconf +install: install-lib install-man install-utils install-hl-scripts install-pkgconf + +install-hl-scripts: + cp high-level/scripts/papi_hl_output_writer.py $(DESTDIR)$(BINDIR) install-lib: native_install @echo "Headers (INCDIR) being installed in: \"$(DESTDIR)$(INCDIR)\""; @@ -310,8 +317,7 @@ cp -p $(SHLIB) $(DESTDIR)$(LIBDIR)/libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC); \ chmod go+r $(DESTDIR)$(LIBDIR)/libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) ; \ cd $(DESTDIR)$(LIBDIR); \ - ln -sf libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE); \ - ln -sf libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) libpapi.so.$(PAPIVER); \ + ln -sf libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) libpapi.so.$(PAPISOVER); \ ln -sf libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) libpapi.so; \ fi @@ -345,9 +351,8 @@ -mkdir -p $(DESTDIR)$(LIBPC) -chmod go+rx $(DESTDIR)$(LIBPC) cp papi.pc $(DESTDIR)$(LIBPC)/papi-$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC).pc - cd $(DESTDIR)$(LIBPC); ln -sf papi-$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC).pc papi-$(PAPIVER).$(PAPIREV).$(PAPIAGE).pc; - cd $(DESTDIR)$(LIBPC); ln -sf papi-$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC).pc papi-$(PAPIVER).pc; - cd $(DESTDIR)$(LIBPC); ln -sf papi-$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC).pc papi.pc; + ln -sf papi-$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC).pc $(DESTDIR)$(LIBPC)/papi-$(PAPISOVER).pc + ln -sf papi-$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC).pc $(DESTDIR)$(LIBPC)/papi.pc # # Dummy targets for configurations that do not also include a Rules file with targets diff -Nru papi-5.7.0+dfsg/src/Matlab/FlopsInnerProduct.m papi-6.0.0~dfsg/src/Matlab/FlopsInnerProduct.m --- papi-5.7.0+dfsg/src/Matlab/FlopsInnerProduct.m 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/Matlab/FlopsInnerProduct.m 1970-01-01 00:00:00.000000000 +0000 @@ -1,24 +0,0 @@ -function FlopsInnerProduct - -% Compute an Inner Product (c = a * x) -% on elements sized from 50 to 500, -% in steps of 50. -% -% Use the PAPI flops call to measure the floating point operations performed. -% For each size, display: -% - number of floating point operations -% - theoretical number of operations -% - difference -% - per cent error -% - mflops/s - -fprintf(1,'\nPAPI Inner Product Test'); -fprintf(1,'\nUsing flops'); -fprintf(1,'\n%12s %12s %12s %12s %12s %12s\n', 'n', 'ops', '2n', 'difference', '% error', 'mflops') -for n=50:50:500, - a=rand(1,n);x=rand(n,1); - flops(0); - c=a*x; - [ops, mflops] = flops; - fprintf(1,'%12d %12d %12d %12d %12.2f %12.2f\n',n,ops,2*n,ops - 2*n, (1.0 - ((2*n) / ops)) * 100,mflops) -end diff -Nru papi-5.7.0+dfsg/src/Matlab/flops.m papi-6.0.0~dfsg/src/Matlab/flops.m --- papi-5.7.0+dfsg/src/Matlab/flops.m 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/Matlab/flops.m 1970-01-01 00:00:00.000000000 +0000 @@ -1,46 +0,0 @@ -% FLOPS Floating point operation count. -% FLOPS returns the cumulative number of floating point operations. -% -% FLOPS(0) - Initialize PAPI library, reset counters -% to zero and begin counting. -% ops = FLOPS - Return the number of floating point -% operations since the first call or last reset. -% [ops, mflops] = FLOPS - Return both the number of floating point -% operations since the first call or last reset, -% and the incremental rate of floating point -% execution since the last call. -% -% DESCRIPTION -% The PAPI flops function uses the PAPI Performance API to do the heavy -% lifting. PAPI takes advantage of the fact that most modern microprocessors -% have built-in hardware support for counting a variety of basic operations -% or events. PAPI uses these counters to track things like instructions -% executed, cycles elapsed, floating point instructions performed and -% a variety of other events. -% The first call to flops will initialize PAPI, set up the counters to -% monitor floating point instructions and total cpu cycles, and start -% the counters. Subsequent calls will return one or two values. The first -% value is the number of floating point operations since the first call or -% last reset. The second optional value, the execution rate in mflops, can -% also be returned. The mflops rate is computed by dividing the operations -% since the last call by the cycles since the last call and multiplying by -% cycles per second: -% mflops = ((ops/cycles)*(cycles/second))/10^6 -% The cycles per second value is a derived number determined empirically -% by counting cycles for a fixed amount of system time during the -% initialization of the PAPI library. Because of the way it is determined, -% this value can be a small but consistent source of systematic error, -% and can introduce differences between rates measured by PAPI and those -% determined by other time measurements, for example, tic and toc. Also -% note that PAPI on Windows counts events on a system level rather than -% a process or thread level. This can lead to an over-reporting of cycles, -% and typically an under-reporting of mflops. -% The flops function continues counting after any call. A call with an -% input of 0 resets the counters and returns 0. - -% Copyright 2001 - 2004 The Innovative Computing Laboratory, -% University of Tennessee. -% $Revision$ $Date$ - - - diff -Nru papi-5.7.0+dfsg/src/Matlab/FlopsMatrixMatrix.m papi-6.0.0~dfsg/src/Matlab/FlopsMatrixMatrix.m --- papi-5.7.0+dfsg/src/Matlab/FlopsMatrixMatrix.m 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/Matlab/FlopsMatrixMatrix.m 1970-01-01 00:00:00.000000000 +0000 @@ -1,23 +0,0 @@ -function FlopsMatrixMatrix - -% Compute a Matrix Matrix multiply -% on square arrays sized from 50 to 500, -% in steps of 50. -% -% Use the PAPI flops call to measure the floating point operations performed. -% For each size, display: -% - number of floating point operations -% - theoretical number of operations -% - difference -% - per cent error -% - mflops/s - -fprintf(1,'\nPAPI Matrix Matrix Multiply Test'); -fprintf(1,'\n%12s %12s %12s %12s %12s %12s\n', 'n', 'ops', '2n^3', 'difference', '% error', 'mflops') -for n=50:50:500, - a=rand(n);b=rand(n);c=rand(n); - flops(0); - c=c+a*b; - [count,mflops]=flops; - fprintf(1,'%12d %12d %12d %12d %12.2f %12.2f\n',n,count,2*n^3,count - 2*n^3, (1.0 - ((2*n^3) / count)) * 100,mflops) -end \ No newline at end of file diff -Nru papi-5.7.0+dfsg/src/Matlab/FlopsMatrixVector.m papi-6.0.0~dfsg/src/Matlab/FlopsMatrixVector.m --- papi-5.7.0+dfsg/src/Matlab/FlopsMatrixVector.m 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/Matlab/FlopsMatrixVector.m 1970-01-01 00:00:00.000000000 +0000 @@ -1,23 +0,0 @@ -function FlopsMatrixVector - -% Compute a Matrix Vector multiply -% on arrays and vectors sized from 50 to 500, -% in steps of 50. -% -% Use the PAPI flops call to measure the floating point operations performed. -% For each size, display: -% - number of floating point operations -% - theoretical number of operations -% - difference -% - per cent error -% - mflops/s - -fprintf(1,'\nPAPI Matrix Vector Multiply Test'); -fprintf(1,'\n%12s %12s %12s %12s %12s %12s\n', 'n', 'ops', '2n^2', 'difference', '% error', 'mflops') -for n=50:50:500, - a=rand(n);x=rand(n,1); - flops(0); - b=a*x; - [count,mflops]=flops; - fprintf(1,'%12d %12d %12d %12d %12.2f %12.2f\n',n,count,2*n^2,count - 2*n^2, (1.0 - ((2*n^2) / count)) * 100,mflops) -end \ No newline at end of file diff -Nru papi-5.7.0+dfsg/src/Matlab/flops.readme papi-6.0.0~dfsg/src/Matlab/flops.readme --- papi-5.7.0+dfsg/src/Matlab/flops.readme 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/Matlab/flops.readme 1970-01-01 00:00:00.000000000 +0000 @@ -1,83 +0,0 @@ -Running PAPI FLOPS in the MATLAB Environment - -If you want to measure the rate of floating point operations -in a MATLAB program, PAPI FLOPS is a good way to do it. -PAPI FLOPS uses the PAPI library and underlying hardware counters -to accurately compute both the total number of floating point operations -and the rate of floating point execution in a section of -MATLAB code. - -For more information on the flops function, -you can type 'help flops' inside MATLAB. - -FLOPS - Has 3 execution possibilities: - Initialize FLOP counting or reset the counters with: - FLOPS(0) - Record the number of floating point instructions since - initialization: - ops = FLOPS - Record the number of floating point instructions and - the incremental rate of floating point execution - since initialization: - [ops, mflips] = FLOPS - -PAPI_flops.c, when compiled, turns into a callable function in MATLAB. - In order to use this function, you need to know a little -about mex. mex is simply the compiler you use to make your -code run in the MATLAB environment. If you don't know -how to use mex, you might want to acquaint yourself a bit. - -"mex -setup "might be needed if you encounter problems, but -the simplest explanation might be to substitute "mex" for -"gcc" and you are on your way. - -All the other rules for compiling PAPI are the same. mex -compilations can de done inside or outside of the Matlab -environment, but in this case, it is recommended that you -compile outside of Matlab. For some reason, compiling -inside does not work on some systems. - -So far, the Linux environment and the Windows environment -have been tested, but _in theory_ this code should work anywhere -PAPI and Matlab both work. - -The following instructions are for a Linux/Unix environment: - -Assuming papi.h is present in /usr/local/include and libpapi.a -is present in /usr/local/lib, the below should work. If not, -you may need to alter the compile strings and/or the #include -statement in PAPI_flops.c. Also, the compile string will be -different for different platforms. - -For instance, if I want to compile and run on a linux -machine assuming PAPI_flops.c is in your current working -directory (you'll have a different compile string on a -different architecture): - -1. Compile the wrapper: -mex -I/usr/local/include PAPI_flops.c /usr/local/lib/libpapi.a -output flops - -2. Start Matlab: -matlab - -3. Run the code: - -b. Play with FLOPS - the first initializes the counting; - the second returns the number of floating point instructions - since the first call, and the third line does the same as the - first AND reports the incremental rate of floating point - execution since the last call: -FLOPS(0) -ins = FLOPS -[ins, mflips] = FLOPS - -c. Try the example m files included with the distribution: -FlopsInnerProduct.m -FlopsMatrixVector.m -FlopsMatrixMatrix.m -FlopsSampler.m - -Contact jthomas@cs.utk.edu with any questions regarding -PAPI calls in Matlab - either errors or questions. -Also, this has just been implemented, so changes could -be coming.......... diff -Nru papi-5.7.0+dfsg/src/Matlab/FlopsSampler.m papi-6.0.0~dfsg/src/Matlab/FlopsSampler.m --- papi-5.7.0+dfsg/src/Matlab/FlopsSampler.m 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/Matlab/FlopsSampler.m 1970-01-01 00:00:00.000000000 +0000 @@ -1,135 +0,0 @@ -function FlopsSampler(n) - -% A Sampler of Matlab functions that consume Floating Point Operations -% in increasing order of floating point intensity. -% -% FlopsSampler(n) - where n == array or vector size -% - -fprintf(1,'\nCounts Using PAPI\n'); -fprintf(1,'\n%24s %12s %14s %12s\n', 'Operations', 'n', 'fl pt ops', 'Mflop/s' ) -s1=rand(1,1);s2=rand(1,1); -x=rand(n,1);y=rand(n,1); -a=rand(n); -b=a; -c=a*a'; - -fprintf(1,'%25s', 'calling PAPI flops') -flops(0); -[ops,mflops]=flops; -fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) - -fprintf(1,'%25s', 'dot product') -flops(0); -x'*y; -[ops,mflops]=flops; -fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) - -fprintf(1,'%25s', 'matrix vector') -flops(0); -a*x; -[ops,mflops]=flops; -fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) - -fprintf(1,'%25s', 'random matrix') -flops(0); -a=rand(n); -[ops,mflops]=flops; -fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) - -fprintf(1,'%25s', 'chol(a)') -flops(0); -chol(c); -[ops,mflops]=flops; -fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) - -fprintf(1,'%25s', 'lu(a)') -flops(0); -lu(a); -[ops,mflops]=flops; -fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) - -fprintf(1,'%25s', 'x=a\y') -flops(0); -x=a\y; -[ops,mflops]=flops; -fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) - -fprintf(1,'%25s', 'condest(a)') -flops(0); -condest(a); -[ops,mflops]=flops; -fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) - -fprintf(1,'%25s', 'qr(a)') -flops(0); -qr(a); -[ops,mflops]=flops; -fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) - -fprintf(1,'%25s', 'matrix multiply') -flops(0); -a*b; -[ops,mflops]=flops; -fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) - -fprintf(1,'%25s', 'inv(a)') -flops(0); -inv(a); -[ops,mflops]=flops; -fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) - -fprintf(1,'%25s', 'svd(a)') -flops(0); -svd(a); -[ops,mflops]=flops; -fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) - -fprintf(1,'%25s', 'cond(a)') -flops(0); -cond(a); -[ops,mflops]=flops; -fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) - -fprintf(1,'%25s', 'hess(a)') -flops(0); -hess(a); -[ops,mflops]=flops; -fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) - -fprintf(1,'%25s', 'eig(a)') -flops(0); -eig(a); -[ops,mflops]=flops; -fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) - -fprintf(1,'%25s', '[u,s,v]=svd(a)') -flops(0); -[u,s,v]=svd(a); -[ops,mflops]=flops; -fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) - -fprintf(1,'%25s', 'pinv(a)') -flops(0); -pinv(a); -[ops,mflops]=flops; -fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) - -fprintf(1,'%25s', 's=gsvd(a)') -flops(0); -s=gsvd(a,b); -[ops,mflops]=flops; -fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) - -fprintf(1,'%25s', '[x,e]=eig(a)') -flops(0); -[x,e]=eig(a); -[ops,mflops]=flops; -fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) - -fprintf(1,'%25s', ' [u,v,x,c,s]=gsvd(a,b)') -flops(0); -[u,v,x,c,s]=gsvd(a,b); -[ops,mflops]=flops; -fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) - diff -Nru papi-5.7.0+dfsg/src/Matlab/PAPI_flops.c papi-6.0.0~dfsg/src/Matlab/PAPI_flops.c --- papi-5.7.0+dfsg/src/Matlab/PAPI_flops.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/Matlab/PAPI_flops.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,88 +0,0 @@ -#include "mex.h" -#include "matrix.h" -#include "papi.h" - -static long long accum_error = 0; -static long long start_time = 0; - -void mexFunction(int nlhs, mxArray *plhs[], - int nrhs, const mxArray *prhs[]) { - float real_time, proc_time, rate; - double *x; - unsigned int mrows, ncols; - int result; - unsigned int flop_events[2]; - long long ins = 0, flop_values[2]; - long long elapsed_time; - - /* Check for proper number of arguments. */ - if(nrhs > 1) { - mexErrMsgTxt("This function expects one optional input."); - } else if(nlhs > 2) { - mexErrMsgTxt("This function produces 1 or 2 outputs: [ops, mflops]."); - } - /* The input must be a noncomplex scalar double.*/ - if(nrhs == 1) { - mrows = mxGetM(prhs[0]); - ncols = mxGetN(prhs[0]); - if(!mxIsDouble(prhs[0]) || mxIsComplex(prhs[0]) || !(mrows == 1 && ncols == 1)) { - mexErrMsgTxt("Input must be a noncomplex scalar double."); - } - /* Assign a pointer to the input. */ - x = mxGetPr(prhs[0]); - - /* if input is 0, reset the counters by calling PAPI_stop_counters with 0 values */ - if(*x == 0) { - if (start_time == 0) { - PAPI_stop_counters(NULL, 0); - accum_error = 0; - } else { - start_time = 0; - PAPI_stop_counters(flop_values, 2); - } - } - } - if(result = PAPI_event_name_to_code("EMON_SSE_SSE2_COMP_INST_RETIRED_PACKED_DOUBLE", &(flop_events[0])) < PAPI_OK) { - if(result = PAPI_flops( &real_time, &proc_time, &ins, &rate) 0) { - plhs[0] = mxCreateScalarDouble((double)(ins - accum_error)); - /* this call adds 7 fp instructions to the total */ - /* but apparently not on Pentium M with Matlab 7.0.4 */ -// accum_error += 7; - if(nlhs == 2) { - plhs[1] = mxCreateScalarDouble((double)rate); - /* the second call adds 4 fp instructions to the total */ - /* but apparently not on Pentium M with Matlab 7.0.4 */ -// accum_error += 4; - } - } -} diff -Nru papi-5.7.0+dfsg/src/Matlab/PAPI_flops.vcproj papi-6.0.0~dfsg/src/Matlab/PAPI_flops.vcproj --- papi-5.7.0+dfsg/src/Matlab/PAPI_flops.vcproj 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/Matlab/PAPI_flops.vcproj 1970-01-01 00:00:00.000000000 +0000 @@ -1,208 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -Nru papi-5.7.0+dfsg/src/Matlab/PAPIInnerProduct.m papi-6.0.0~dfsg/src/Matlab/PAPIInnerProduct.m --- papi-5.7.0+dfsg/src/Matlab/PAPIInnerProduct.m 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/Matlab/PAPIInnerProduct.m 2020-03-04 15:56:57.000000000 +0000 @@ -5,8 +5,8 @@ % in steps of 50. % % Use the PAPI mex function with two different methods: -% - The PAPI High Level flops call -% - PAPI High Level start/stop calls +% - The PAPI flops call +% - PAPI start/stop calls % % For each size, display: % - number of floating point operations @@ -16,7 +16,7 @@ % - mflops/s fprintf(1,'\n\nPAPI Inner Product Test'); -fprintf(1,'\nUsing the High Level PAPI("flops") call'); +fprintf(1,'\nUsing the PAPI("flops") call'); fprintf(1,'\n%12s %12s %12s %12s %12s %12s\n', 'n', 'ops', '2n', 'difference', '% error', 'mflops') for n=50:50:500, a=rand(1,n);x=rand(n,1); diff -Nru papi-5.7.0+dfsg/src/Matlab/PAPI.m papi-6.0.0~dfsg/src/Matlab/PAPI.m --- papi-5.7.0+dfsg/src/Matlab/PAPI.m 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/Matlab/PAPI.m 2020-03-04 15:56:57.000000000 +0000 @@ -10,9 +10,9 @@ % to the input values. % PAPI('ipc') - Begin counting instructions. % ins = PAPI('ipc') - Return the number of instructions executed -% since the first call. +% since the last call. % [ins, ipc] = PAPI('ipc') - Return both the total number of instructions -% executed since the first call, and the +% executed since the last call, and the % incremental rate of instruction execution % since the last call. % PAPI('flips') @@ -20,11 +20,11 @@ % instructions or operations. % ins = PAPI('flips') % ops = PAPI('flops') - Return the number of floating point instruc- -% tions or operations since the first call. +% tions or operations since the last call. % [ins, mflips] = PAPI('flips') % [ops, mflops] = PAPI('flops') - % Return both the number of floating point -% instructions or operations since the first +% instructions or operations since the last % call, and the incremental rate of floating % point execution since since the last call. % @@ -55,7 +55,7 @@ % to this subfunction, and the rate of execution of instructions % (as instructions per cycle) since the last call. % 'flips' - returns the total floating point instructions executed since -% the first call to this subfunction, and the rate of execution +% the last call to this subfunction, and the rate of execution % of floating point instructions (as mega-floating point % instructions per second, or mflips) since the last call. % A floating point instruction is defined as whatever this cpu diff -Nru papi-5.7.0+dfsg/src/Matlab/PAPI_Matlab.c papi-6.0.0~dfsg/src/Matlab/PAPI_Matlab.c --- papi-5.7.0+dfsg/src/Matlab/PAPI_Matlab.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/Matlab/PAPI_Matlab.c 2020-03-04 15:56:57.000000000 +0000 @@ -4,19 +4,43 @@ /****************************/ /** - * @file: PAPI_Matlab.c - * CVS: $Id$ - * @author Joseph Thomas + * @file: PAPI_Matlab.c + * @author Frank Winkler * * @brief PAPI Matlab integration. * See PAPI_Matlab.readme for more information. */ + +#define FLIPS_EVENT PAPI_FP_INS +#define FLOPS_EVENT PAPI_FP_OPS + #include "mex.h" #include "matrix.h" #include "papi.h" static long long accum_error = 0; static long long start_time = 0; +int EventSet = PAPI_NULL; +int papi_init = 0; +int papi_start = 0; + +void initialize_papi() { + int result; + + /* initialize PAPI */ + result = PAPI_library_init(PAPI_VER_CURRENT); + if(result < PAPI_OK) { + mexPrintf("Error code: %d\n", result); + mexErrMsgTxt("Error PAPI_create_eventset."); + } + + /* create EventSet */ + result = PAPI_create_eventset(&EventSet); + if(result < PAPI_OK) { + mexPrintf("Error code: %d\n", result); + mexErrMsgTxt("Error PAPI_library_init."); + } +} void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { @@ -34,6 +58,11 @@ char no_input[] = "This function expects no input."; char error_reading[] = "Error reading the running counters."; + if ( papi_init == 0 ) { + initialize_papi(); + papi_init = 1; + } + /* Check for proper number of arguments. */ if(nrhs < 1) { mexErrMsgTxt("This function expects input."); @@ -49,7 +78,7 @@ else if(nlhs != 1) { mexErrMsgTxt("This function produces one and only one output: counters."); } - result = PAPI_num_counters(); + result = PAPI_num_cmp_hwctrs(0); if(result < PAPI_OK) { mexPrintf("Error code: %d\n", result); mexErrMsgTxt("Error reading counters."); @@ -62,34 +91,44 @@ mexErrMsgTxt(no_input); } else if(nlhs > 2) { if (input[2] == 'i') - mexErrMsgTxt("This function produces 1 or 2 outputs: [ops, mflips]."); + mexErrMsgTxt("This function produces 1 or 2 outputs: [ops, mflips]."); else - mexErrMsgTxt("This function produces 1 or 2 outputs: [ops, mflops]."); + mexErrMsgTxt("This function produces 1 or 2 outputs: [ops, mflops]."); } if (input[2] == 'i') { - if(result = PAPI_flips( &real_time, &proc_time, &ins, &rate) (PAPI_num_counters() + 1)) { + if(nrhs > (PAPI_num_cmp_hwctrs(0) + 1)) { mexErrMsgTxt(one_output); } mrows = mxGetM(prhs[1]); @@ -133,7 +172,7 @@ temp = mxArrayToString(prhs[i]); if(result = PAPI_event_name_to_code(temp, &(events[i - 1])) < PAPI_OK) { mxFree(temp); - mexPrintf("Error code: %d\n", result); + mexPrintf("Error code: %d\n", result); mexErrMsgTxt("Incorrect PAPI code given."); } mxFree(temp); @@ -142,11 +181,23 @@ events[i - 1] = (unsigned int)mxGetScalar(prhs[i]); } } - if((result = PAPI_start_counters(events, nrhs - 1)) < PAPI_OK) { + + if((result = PAPI_cleanup_eventset(EventSet)) < PAPI_OK) + mexErrMsgTxt("Error PAPI_cleanup_eventset"); + + for (i = 0; i < nrhs - 1; i++) { + result = PAPI_add_event(EventSet, events[i]); + if(result < PAPI_OK) { + mexPrintf("Error code: %d\n", result); + mexErrMsgTxt("Error PAPI_add_event."); + } + } + if((result = PAPI_start(EventSet)) < PAPI_OK) { mxFree(events); mexPrintf("Error code: %d\n", result); mexErrMsgTxt("Error initializing counters."); } + papi_start = 1; mxFree(events); } @@ -154,47 +205,49 @@ if(nrhs != 1) { mexErrMsgTxt(no_input); } - number_of_counters = PAPI_num_counters(); + number_of_counters = PAPI_num_cmp_hwctrs(0); if(nlhs > number_of_counters ) { mexErrMsgTxt(one_output); } - if (nlhs == 0) - values = (long long*)mxCalloc(number_of_counters, sizeof(long long)); - else - values = (long long *)mxCalloc(nlhs, sizeof(long long) + 1); - - if (start_time == 0) { - if (nlhs == 0) - result = PAPI_stop_counters(values, number_of_counters); - else - result = PAPI_stop_counters(values, nlhs); - } else { - start_time = 0; - result = PAPI_stop_counters(flop_values, 2); - } - - if(result < PAPI_OK) { - if(result != PAPI_ENOTRUN) { - mexPrintf("Error code: %d\n", result); - mexErrMsgTxt("Error stopping the running counters."); - } - } - accum_error = 0; - for(i = 0; i < nlhs; i++) { - plhs[i] = mxCreateDoubleScalar((double)values[i]); - } - mxFree(values); + if (nlhs == 0) + values = (long long*)mxCalloc(number_of_counters, sizeof(long long)); + else + values = (long long *)mxCalloc(nlhs, sizeof(long long) + 1); + + result = PAPI_OK; + if (start_time == 0) { + if ( papi_start == 1 ) + result = PAPI_stop(EventSet, values); + } else { + start_time = 0; + if ( papi_start == 1 ) + result = PAPI_stop(EventSet, flop_values); + } + PAPI_rate_stop(); + papi_start = 0; + + if(result < PAPI_OK) { + if(result != PAPI_ENOTRUN) { + mexPrintf("Error code: %d\n", result); + mexErrMsgTxt("Error stopping the running counters."); + } + } + accum_error = 0; + for(i = 0; i < nlhs; i++) { + plhs[i] = mxCreateDoubleScalar((double)values[i]); + } + mxFree(values); } else if(!strncmp(input, "read", 4)) { if(nrhs != 1) { mexErrMsgTxt(no_input); } - if(nlhs > PAPI_num_counters()) { + if(nlhs > PAPI_num_cmp_hwctrs(0)) { mexErrMsgTxt(one_output); } values = (long long *)mxCalloc(nlhs, sizeof(long long) + 1); - if((result = PAPI_read_counters(values, nlhs)) < PAPI_OK) { + if((result = PAPI_read(EventSet, values)) < PAPI_OK) { mexPrintf("%d\n", result); mexErrMsgTxt(error_reading); } @@ -205,17 +258,17 @@ } else if(!strncmp(input, "accum", 5)) { - if(nrhs > PAPI_num_counters() + 1) { + if(nrhs > PAPI_num_cmp_hwctrs(0) + 1) { mexErrMsgTxt(no_input); } - if(nlhs > PAPI_num_counters()) { + if(nlhs > PAPI_num_cmp_hwctrs(0)) { mexErrMsgTxt(one_output); } values = (long long *)mxCalloc(nlhs, sizeof(long long) + 1); for(i = 0; i < nrhs - 1; i++) { values[i] = (long long)(*(mxGetPr(prhs[i + 1]))); } - if(result = PAPI_accum_counters(values, nlhs) < PAPI_OK) { + if(result = PAPI_accum(EventSet, values) < PAPI_OK) { mexPrintf("Error code: %d\n", result); mexErrMsgTxt(error_reading); } diff -Nru papi-5.7.0+dfsg/src/Matlab/PAPI_Matlab.dsw papi-6.0.0~dfsg/src/Matlab/PAPI_Matlab.dsw --- papi-5.7.0+dfsg/src/Matlab/PAPI_Matlab.dsw 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/Matlab/PAPI_Matlab.dsw 1970-01-01 00:00:00.000000000 +0000 @@ -1,41 +0,0 @@ -Microsoft Developer Studio Workspace File, Format Version 6.00 -# WARNING: DO NOT EDIT OR DELETE THIS WORKSPACE FILE! - -############################################################################### - -Project: "PAPI_Matlab"=.\PAPI_Matlab.dsp - Package Owner=<4> - -Package=<5> -{{{ -}}} - -Package=<4> -{{{ -}}} - -############################################################################### - -Project: "PAPI_flops"=.\PAPI_flops.dsp - Package Owner=<4> - -Package=<5> -{{{ -}}} - -Package=<4> -{{{ -}}} - -############################################################################### - -Global: - -Package=<5> -{{{ -}}} - -Package=<3> -{{{ -}}} - -############################################################################### - diff -Nru papi-5.7.0+dfsg/src/Matlab/PAPI_Matlab.readme papi-6.0.0~dfsg/src/Matlab/PAPI_Matlab.readme --- papi-5.7.0+dfsg/src/Matlab/PAPI_Matlab.readme 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/Matlab/PAPI_Matlab.readme 2020-03-04 15:56:57.000000000 +0000 @@ -1,14 +1,9 @@ -Running PAPI's High Level API in the MATLAB Environment +Running PAPI in the MATLAB Environment If you have the desire to do this, you most likely already know why you want to make calls to PAPI inside of a MATLAB environment. -If you don't know much about what composes PAPI's high -level API, you should probably take a look at this: - -http://icl.cs.utk.edu/projects/papi/files/documentation/PAPI_USER_GUIDE_23.htm#WHAT_IS_HIGH_LEVEL_API - This section of the PAPI user guide covers C and FORTRAN calls, but at the moment, you can only make C calls from the MATLAB environment. @@ -17,23 +12,23 @@ you specify which of the 6 specific functions you want to call, and then the arguments to each. Here are some examples: -PAPI_num_counters - Returns the number of available +PAPI_num_cmp_hwctrs(0) - Returns the number of available preset hardware counters on the system. Ex: num_counters = PAPI('num') -PAPI_flips - Has 3 possibilities: +PAPI_flips_rate - Has 3 possibilities: Initialize FLIP counting with: PAPI('flips') Record the number of floating point instructions since - initialization: + latest call: ops = PAPI('flips') Record the number of floating point instructions and the incremental rate of floating point execution - since initialization: + since latest call: [ops, mflips] = PAPI('flips') - Use PAPI_stop_counters to stop counting flips and reset the counters. + Use PAPI('stop') to stop counting flips and reset the counters. PAPI_flops - Identical to PAPI_flips, but counts floating point *operations* rather than instructions. @@ -52,7 +47,7 @@ [ins, ipc] = PAPI('ipc') -PAPI_start_counters - Specify the events to count +PAPI_start - Specify the events to count (in text form or the actual numeric code; NOTE: make sure to not confuse normal decimal and hexadecimal.) You cannot specify more events than there are hardware counters. @@ -61,16 +56,15 @@ PAPI('start', 'PAPI_TOT_CYC', 'PAPI_TOT_INS'); -PAPI_read_counters - Simply specify the variables to read +PAPI_read - Simply specify the variables to read the values into. You cannot specify more variables - than there are hardware counters. This will reset the - counters. + than there are hardware counters. To read the above events you just started: [cycles, instructions] = PAPI('read'); -PAPI_accum_counters - This function adds the value you +PAPI_accum - This function adds the value you pass to the readings in the hardware counter. You cannot specify more variables than there are hardware counters. This function will reset the counters. @@ -80,7 +74,7 @@ [cycles, instructions] = PAPI('accum', cycles, instructions); -PAPI_stop_counters - This function reads the value of +PAPI_stop - This function reads the value of the running hardware counters into the variables you specify. You cannot specify more variables than there are hardware counters. @@ -123,6 +117,20 @@ directory (you'll have a different compile string on a different architecture): +0. Define the events for flops and flips in PAPI_Matlab.c: +ONE of the three presets for flips: +- PAPI_FP_INS +- PAPI_VEC_SP +- PAPI_VEC_DP +ONE of the three presets for flops: +- PAPI_FP_OPS +- PAPI_SP_OPS +- PAPI_DP_OPS + +Example: +#define FLIPS_EVENT PAPI_FP_INS +#define FLOPS_EVENT PAPI_FP_OPS + 1. Compile the wrapper: mex -I/usr/local/include PAPI_Matlab.c /usr/local/lib/libpapi.so -output PAPI @@ -164,7 +172,7 @@ e. Start counting: PAPI('start', 'PAPI_TOT_CYC', 'PAPI_TOT_INS') -f. Read the counters and reset: +f. Read the counters: [cycles, instr] = PAPI('read') g. Add the current value of the counters to a previous read @@ -178,7 +186,7 @@ recorded, as long as that number does not exceed the number of available hardware counters. -Contact ralph@eecs.utk.edu or/and ptools-perfapi@icl.utk.edu +Contact ptools-perfapi@icl.utk.edu with any questions regarding PAPI calls in Matlab - either errors or questions. Also, this has just been implemented, so changes could be coming.......... diff -Nru papi-5.7.0+dfsg/src/Matlab/PAPI_Matlab.vcproj papi-6.0.0~dfsg/src/Matlab/PAPI_Matlab.vcproj --- papi-5.7.0+dfsg/src/Matlab/PAPI_Matlab.vcproj 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/Matlab/PAPI_Matlab.vcproj 1970-01-01 00:00:00.000000000 +0000 @@ -1,220 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -Nru papi-5.7.0+dfsg/src/Matlab/PAPIMatrixMatrix.m papi-6.0.0~dfsg/src/Matlab/PAPIMatrixMatrix.m --- papi-5.7.0+dfsg/src/Matlab/PAPIMatrixMatrix.m 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/Matlab/PAPIMatrixMatrix.m 2020-03-04 15:56:57.000000000 +0000 @@ -5,8 +5,8 @@ % in steps of 50. % % Use the PAPI mex function with two different methods: -% - The PAPI High Level flops call -% - PAPI High Level start/stop calls +% - The PAPI flops call +% - PAPI start/stop calls % % For each size, display: % - number of floating point operations @@ -16,7 +16,7 @@ % - mflops/s fprintf(1,'\nPAPI Matrix Matrix Multiply Test'); -fprintf(1,'\nUsing the High Level PAPI("flops") call'); +fprintf(1,'\nUsing the PAPI("flops") call'); fprintf(1,'\n%12s %12s %12s %12s %12s %12s\n', 'n', 'ops', '2n^3', 'difference', '% error', 'mflops') for n=50:50:500, a=rand(n);b=rand(n);c=rand(n); diff -Nru papi-5.7.0+dfsg/src/Matlab/PAPIMatrixVector.m papi-6.0.0~dfsg/src/Matlab/PAPIMatrixVector.m --- papi-5.7.0+dfsg/src/Matlab/PAPIMatrixVector.m 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/Matlab/PAPIMatrixVector.m 2020-03-04 15:56:57.000000000 +0000 @@ -5,8 +5,8 @@ % in steps of 50. % % Use the PAPI mex function with two different methods: -% - The PAPI High Level flops call -% - PAPI High Level start/stop calls +% - The PAPI flops call +% - PAPI start/stop calls % % For each size, display: % - number of floating point operations @@ -16,7 +16,7 @@ % - mflops/s fprintf(1,'\nPAPI Matrix Vector Multiply Test'); -fprintf(1,'\nUsing the High Level PAPI("flops") call'); +fprintf(1,'\nUsing the PAPI("flops") call'); fprintf(1,'\n%12s %12s %12s %12s %12s %12s\n', 'n', 'ops', '2n^2', 'difference', '% error', 'mflops') for n=50:50:500, a=rand(n);x=rand(n,1); diff -Nru papi-5.7.0+dfsg/src/papi.c papi-6.0.0~dfsg/src/papi.c --- papi-5.7.0+dfsg/src/papi.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/papi.c 2020-03-04 15:57:01.000000000 +0000 @@ -15,6 +15,8 @@ * london@cs.utk.edu * @author Per Ekman * pek@pdc.kth.se +* @author Frank Winkler +* frank.winkler@icl.utk.edu * Mods: Gary Mohr * gary.mohr@bull.com * @@ -24,7 +26,9 @@ #include #include #include -#include +#include +#include +#include #include "papi.h" #include "papi_internal.h" @@ -35,7 +39,545 @@ #include "cpus.h" #include "extras.h" #include "sw_multiplex.h" -#include "papi_hl.h" + + +/* simplified papi functions for event rates */ + +/* For dynamic linking to libpapi */ +/* Weak symbol for pthread_once to avoid additional linking + * against libpthread when not used. */ +#pragma weak pthread_once + +#define STOP 0 +#define FLIP 1 +#define FLOP 2 +#define IPC 3 +#define EPC 4 + +/** \internal + * This is stored per thread + */ +typedef struct _RateInfo +{ + int EventSet; /**< EventSet of the thread */ + int event_0; /**< first event of the eventset */ + short int running; /**< STOP, FLIP, FLOP, IPC or EPC */ + long long last_real_time; /**< Previous value of real time */ + long long last_proc_time; /**< Previous value of processor time */ +} RateInfo; + +THREAD_LOCAL_STORAGE_KEYWORD RateInfo *_rate_state = NULL; +bool _papi_rate_initiated = false; + +static void _internal_papi_init(void); +static void _internal_onetime_papi_init(void); +static int _start_new_rate_call(float *real_time, float *proc_time, int *events, + int num_events, long long *ins, float *rate); +static int _rate_calls( float *real_time, float *proc_time, int *events, + long long *values, long long *ins, float *rate, int mode ); +static int _internal_check_rate_state(); + + +static void _internal_papi_init(void) +{ + /* This function is only called by the first thread! */ + int retval; + + /* check if user has already initialzed PAPI with thread support */ + if ( init_level != ( PAPI_LOW_LEVEL_INITED | PAPI_THREAD_LEVEL_INITED ) ) { + if ( ( retval = PAPI_library_init(PAPI_VER_CURRENT) ) != PAPI_VER_CURRENT ) { + fprintf( stderr, "PAPI Error: PAPI_library_init failed with return value %d.\n", retval); + } else { + + if ((retval = PAPI_thread_init(&pthread_self)) != PAPI_OK) { + fprintf( stderr, "PAPI Error: PAPI_thread_init failed with return value %d.\n", retval); + fprintf( stderr, "PAPI Error: PAPI could not be initiated!\n"); + } else { + _papi_rate_initiated = true; + } + } + } else { + _papi_rate_initiated = true; + } +} + +static void _internal_onetime_papi_init(void) +{ + static pthread_once_t library_is_initialized = PTHREAD_ONCE_INIT; + if ( pthread_once ) { + /* we assume that this function was called from a parallel region */ + pthread_once(&library_is_initialized, _internal_papi_init); + /* wait until first thread has finished */ + int i = 0; + /* give it 5 seconds in case PAPI_thread_init crashes */ + while ( !_papi_rate_initiated && (i++) < 500000 ) + usleep(10); + } else { + /* we assume that this function was called from a serial application + * that was not linked against libpthread */ + _internal_papi_init(); + } +} + +static int +_internal_check_rate_state() +{ + /* check if PAPI is initialized for rate functions */ + if ( _papi_rate_initiated == false ) { + _internal_onetime_papi_init(); + + if ( _papi_rate_initiated == false ) + return ( PAPI_EINVAL ); + } + + if ( _rate_state== NULL ) { + _rate_state= ( RateInfo* ) papi_malloc( sizeof ( RateInfo ) ); + if ( _rate_state== NULL ) + return ( PAPI_ENOMEM ); + + memset( _rate_state, 0, sizeof ( RateInfo ) ); + _rate_state->running = STOP; + } + return ( PAPI_OK ); +} + +/** @class PAPI_flips_rate + * @brief Simplified call to get Mflips/s (floating point instruction rate), real and processor time. + * + * @par C Interface: + * \#include @n + * int PAPI_flips_rate( int event, float *rtime, float *ptime, long long *flpins, float *mflips ); + * + * @param event + * one of the three presets PAPI_FP_INS, PAPI_VEC_SP or PAPI_VEC_DP + * @param *rtime + * realtime since the latest call + * @param *ptime + * process time since the latest call + * @param *flpins + * floating point instructions since the latest call + * @param *mflips + * incremental (Mega) floating point instructions per seconds since the latest call + * + * @retval PAPI_EINVAL + * The counters were already started by something other than PAPI_flips_rate(). + * @retval PAPI_ENOEVNT + * The floating point instructions event does not exist. + * @retval PAPI_ENOMEM + * Insufficient memory to complete the operation. + * + * The first call to PAPI_flips_rate() will initialize the PAPI interface, + * set up the counters to monitor the floating point instructions event and start the counters. + * + * Subsequent calls will read the counters and return real time, process time, + * floating point instructions and the Mflip/s rate since the latest call to PAPI_flips_rate(). + * + * PAPI_flips_rate() returns information related to floating point instructions using + * the floating point instructions event. This is intended to measure instruction rate through the + * floating point pipe with no massaging. Note that PAPI_flips_rate() is thread-safe and can + * therefore be called by multiple threads. + * + * @see PAPI_flops_rate() + * @see PAPI_ipc() + * @see PAPI_epc() + */ +int +PAPI_flips_rate( int event, float *rtime, float *ptime, long long *flpins, float *mflips ) +{ + int retval; + + /* check event first */ + if ( event == PAPI_FP_INS || event == PAPI_VEC_DP || event == PAPI_VEC_SP ) { + + int events[1] = {event}; + long long values = 0; + + if ( rtime == NULL || ptime == NULL || + flpins == NULL || mflips == NULL ) { + return PAPI_EINVAL; + } + + retval = _rate_calls( rtime, ptime, events, + &values, flpins, mflips, FLIP ); + + return ( retval ); + } + return ( PAPI_ENOEVNT ); +} + +/** @class PAPI_flops_rate + * @brief Simplified call to get Mflops/s (floating point operation rate), real and processor time. + * + * @par C Interface: + * \#include @n + * int PAPI_flops_rate ( int event, float *rtime, float *ptime, long long *flpops, float *mflops ); + * + * @param event + * one of the three presets PAPI_FP_OPS, PAPI_SP_OPS or PAPI_DP_OPS + * @param *rtime + * realtime since the latest call + * @param *ptime + * process time since the latest call + * @param *flpops + * floating point operations since the latest call + * @param *mflops + * incremental (Mega) floating point operations per seconds since the latest call + * + * @retval PAPI_EINVAL + * The counters were already started by something other than PAPI_flops_rate(). + * @retval PAPI_ENOEVNT + * The floating point operations event does not exist. + * @retval PAPI_ENOMEM + * Insufficient memory to complete the operation. + * + * The first call to PAPI_flops_rate() will initialize the PAPI interface, + * set up the counters to monitor the floating point operations event and start the counters. + * + * Subsequent calls will read the counters and return real time, process time, + * floating point operations and the Mflop/s rate since the latest call to PAPI_flops_rate(). + * + * PAPI_flops_rate() returns information related to theoretical floating point operations + * rather than simple instructions. It uses the floating point operations event which attempts to + * 'correctly' account for, e.g., FMA undercounts and FP Store overcounts. Note that + * PAPI_flops_rate() is thread-safe and can therefore be called by multiple threads. + * + * @see PAPI_flips_rate() + * @see PAPI_ipc() + * @see PAPI_epc() + * @see PAPI_rate_stop() + */ +int +PAPI_flops_rate( int event, float *rtime, float *ptime, long long *flpops, float *mflops ) +{ + int retval; + + /* check event first */ + if ( event == PAPI_FP_OPS || event == PAPI_SP_OPS || event == PAPI_DP_OPS ) { + + int events[1] = {event}; + long long values = 0; + + if ( rtime == NULL || ptime == NULL || + flpops == NULL || mflops == NULL ) { + return PAPI_EINVAL; + } + + retval = _rate_calls( rtime, ptime, events, + &values, flpops, mflops, FLOP ); + + return ( retval ); + } + return ( PAPI_ENOEVNT ); +} + +/** @class PAPI_ipc + * @brief Simplified call to get instructions per cycle, real and processor time. + * + * @par C Interface: + * \#include @n + * int PAPI_ipc( float *rtime, float *ptime, long long *ins, float *ipc ); + * + * @param *rtime + * realtime since the latest call + * @param *ptime + * process time since the latest call + * @param *ins + * instructions since the latest call + * @param *ipc + * incremental instructions per cycle since the latest call + * + * @retval PAPI_EINVAL + * The counters were already started by something other than PAPI_ipc(). + * @retval PAPI_ENOEVNT + * The events PAPI_TOT_INS and PAPI_TOT_CYC are not supported. + * @retval PAPI_ENOMEM + * Insufficient memory to complete the operation. + * + * The first call to PAPI_ipc() will initialize the PAPI interface, + * set up the counters to monitor PAPI_TOT_INS and PAPI_TOT_CYC events + * and start the counters. + * + * Subsequent calls will read the counters and return real time, + * process time, instructions and the IPC rate since the latest call to PAPI_ipc(). + * + * PAPI_ipc() should return a ratio greater than 1.0, indicating instruction level + * parallelism within the chip. The larger this ratio the more effeciently the program + * is running. Note that PAPI_ipc() is thread-safe and can therefore be called by multiple threads. + * + * @see PAPI_flips_rate() + * @see PAPI_flops_rate() + * @see PAPI_epc() + * @see PAPI_rate_stop() + */ +int +PAPI_ipc( float *rtime, float *ptime, long long *ins, float *ipc ) +{ + long long values[2] = { 0, 0 }; + int events[2] = {PAPI_TOT_INS, PAPI_TOT_CYC}; + int retval = 0; + + if ( rtime == NULL || ptime == NULL || ins == NULL || ipc == NULL ) + return PAPI_EINVAL; + + retval = _rate_calls( rtime, ptime, events, values, ins, ipc, IPC ); + return ( retval ); +} + +/** @class PAPI_epc + * @brief Simplified call to get arbitrary events per cycle, real and processor time. + * + * @par C Interface: + * \#include @n + * int PAPI_epc( int event, float *rtime, float *ptime, long long *ref, long long *core, long long *evt, float *epc ); + * + * @param event + * event code to be measured (0 defaults to PAPI_TOT_INS) + * @param *rtime + * realtime since the latest call + * @param *ptime + * process time since the latest call + * @param *ref + * incremental reference clock cycles since the latest call + * @param *core + * incremental core clock cycles since the latest call + * @param *evt + * events since the latest call + * @param *epc + * incremental events per cycle since the latest call + * + * @retval PAPI_EINVAL + * The counters were already started by something other than PAPI_epc(). + * @retval PAPI_ENOEVNT + * One of the requested events does not exist. + * @retval PAPI_ENOMEM + * Insufficient memory to complete the operation. + * + * The first call to PAPI_epc() will initialize the PAPI interface, + * set up the counters to monitor the user specified event, PAPI_TOT_CYC, + * and PAPI_REF_CYC (if it exists) and start the counters. + * + * Subsequent calls will read the counters and return real time, + * process time, event counts, the core and reference cycle count and EPC rate + * since the latest call to PAPI_epc(). + * + * PAPI_epc() can provide a more detailed look at algorithm efficiency in light of clock + * variability in modern cpus. MFLOPS is no longer an adequate description of peak + * performance if clock rates can arbitrarily speed up or slow down. By allowing a + * user specified event and reporting reference cycles, core cycles and real time, + * PAPI_epc provides the information to compute an accurate effective clock rate, and + * an accurate measure of computational throughput. Note that PAPI_epc() is thread-safe and can + * therefore be called by multiple threads. + * + * @see PAPI_flips_rate() + * @see PAPI_flops_rate() + * @see PAPI_ipc() + * @see PAPI_rate_stop() + */ +int +PAPI_epc( int event, float *rtime, float *ptime, long long *ref, long long *core, long long *evt, float *epc ) +{ + long long values[3] = { 0, 0, 0 }; + int events[3] = {PAPI_TOT_INS, PAPI_TOT_CYC, PAPI_REF_CYC}; + int retval = 0; + + if ( rtime == NULL || ptime == NULL || ref == NULL ||core == NULL || evt == NULL || epc == NULL ) + return PAPI_EINVAL; + + // if an event is provided, use it; otherwise use TOT_INS + if (event != 0 ) events[0] = event; + + retval = _rate_calls( rtime, ptime, events, values, evt, epc, EPC ); + *ref = values[2]; + *core = values[1]; + return ( retval ); +} + +/** @class PAPI_rate_stop + * @brief Stop a running event set of a rate function. + * + * @par C Interface: + * \#include @n + * int PAPI_rate_stop(); + * + * @retval PAPI_ENOEVNT + * -- The EventSet is not started yet. + * @retval PAPI_ENOMEM + * -- Insufficient memory to complete the operation. + * + * PAPI_rate_stop stops a running event set of a rate function. + * + * @see PAPI_flips_rate() + * @see PAPI_flops_rate() + * @see PAPI_ipc() + * @see PAPI_epc() + */ +int +PAPI_rate_stop() +{ + int retval; + long long tmp_values[3]; + + if ( _papi_rate_events_running == 1 ) { + if ( _rate_state!= NULL ) { + if ( _rate_state->running > STOP ) { + retval = PAPI_stop( _rate_state->EventSet, tmp_values ); + if ( retval == PAPI_OK ) { + PAPI_cleanup_eventset( _rate_state->EventSet ); + _rate_state->running = STOP; + } + _papi_rate_events_running = 0; + return retval; + } + } + } + return ( PAPI_ENOEVNT ); +} + +static int +_start_new_rate_call(float *real_time, float *proc_time, int *events, + int num_events, long long *ins, float *rate) +{ + int retval; + _rate_state->EventSet = -1; + + if ( ( retval = PAPI_create_eventset( &_rate_state->EventSet ) ) != PAPI_OK ) + return ( retval ); + + if (( retval = PAPI_add_events( _rate_state->EventSet, events, num_events )) != PAPI_OK ) + return retval; + + /* remember the event for subsequent calls of PAPI_flips_rate and PAPI_flops_rate */ + _rate_state->event_0 = events[0]; + *real_time = 0.0; + *proc_time = 0.0; + *rate = 0.0; + *ins = 0; + + _rate_state->last_real_time = PAPI_get_real_usec( ); + _rate_state->last_proc_time = PAPI_get_virt_usec( ); + + if ( ( retval = PAPI_start( _rate_state->EventSet ) ) != PAPI_OK ) { + return retval; + } + + return ( PAPI_OK ); +} + +static int +_rate_calls( float *real_time, float *proc_time, int *events, + long long *values, long long *ins, float *rate, int mode ) +{ + + // printf("_rate_calls event %d, mode %d\n", events[0], mode); + + long long rt, pt; // current elapsed real and process times in usec + int num_events = 2; + int retval = 0; + + /* if a high-level event set is running stop it */ + if ( _papi_hl_events_running == 1 ) { + if ( ( retval = PAPI_hl_stop() ) != PAPI_OK ) + return ( retval ); + } + + if ( ( retval = _internal_check_rate_state() ) != PAPI_OK ) { + return ( retval ); + } + + + switch (mode) { + case FLOP: + case FLIP: + if ( (retval = PAPI_query_event(events[0])) != PAPI_OK) + return retval; + num_events = 1; + break; + case IPC: + break; + case EPC: + if ( (retval = PAPI_query_event(events[0])) != PAPI_OK) + return retval; + if ( (retval = PAPI_query_event(events[2])) == PAPI_OK) + num_events = 3; + break; + default: + return PAPI_EINVAL; + } + + /* STOP means the first call of a rate function */ + if ( _rate_state->running == STOP ) { + + if ( ( retval = _start_new_rate_call(real_time, proc_time, events, num_events, ins, rate)) != PAPI_OK ) + return retval; + _rate_state->running = mode; + + } else { + // check last mode + // printf("current mode: %d, last mode: %d\n", mode, _rate_state->running); + // printf("current event: %d, last event: %d\n", events[0], _rate_state->event_0); + + if ( mode != _rate_state->running || events[0] != _rate_state->event_0 ) { + + long long tmp_values[3]; + retval = PAPI_stop( _rate_state->EventSet, tmp_values ); + if ( retval == PAPI_OK ) { + PAPI_cleanup_eventset( _rate_state->EventSet ); + } else { + return retval; + } + + if ( ( retval = _start_new_rate_call(real_time, proc_time, events, num_events, ins, rate)) != PAPI_OK ) + return retval; + _rate_state->running = mode; + _papi_rate_events_running = 1; + return ( PAPI_OK ); + } + + if ( ( retval = PAPI_stop( _rate_state->EventSet, values ) ) != PAPI_OK ) { + _rate_state->running = STOP; + return retval; + } + + /* Read elapsed real and process times */ + rt = PAPI_get_real_usec(); + pt = PAPI_get_virt_usec(); + + /* Convert to seconds with multiplication because it is much faster */ + *real_time = ((float)( rt - _rate_state->last_real_time )) * .000001; + *proc_time = ((float)( pt - _rate_state->last_proc_time )) * .000001; + + *ins = values[0]; + + switch (mode) { + case FLOP: + case FLIP: + /* Calculate MFLOP and MFLIP rates */ + if ( pt > 0 ) { + *rate = (float)values[0] / (pt - _rate_state->last_proc_time); + } else *rate = 0; + break; + case IPC: + case EPC: + /* Calculate IPC */ + if (values[1]!=0) { + *rate = (float) ((float)values[0] / (float) ( values[1])); + } + break; + default: + return PAPI_EINVAL; + } + _rate_state->last_real_time = rt; + _rate_state->last_proc_time = pt; + + if ( ( retval = PAPI_start( _rate_state->EventSet ) ) != PAPI_OK ) { + _rate_state->running = STOP; + return retval; + } + } + _papi_rate_events_running = 1; + return PAPI_OK; +} + /*******************************/ /* BEGIN EXTERNAL DECLARATIONS */ @@ -338,7 +880,7 @@ * @par Example: * @code int ret; - HighLevelInfo *state = NULL; + RateInfo *state = NULL; ret = PAPI_thread_init(pthread_self); if (ret != PAPI_OK) handle_error(ret); @@ -346,9 +888,9 @@ ret = PAPI_get_thr_specific(PAPI_USR1_TLS, (void *) &state); if (ret != PAPI_OK || state == NULL) { - state = (HighLevelInfo *) malloc(sizeof(HighLevelInfo)); + state = (RateInfo *) malloc(sizeof(RateInfo)); if (state == NULL) return (PAPI_ESYS); - memset(state, 0, sizeof(HighLevelInfo)); + memset(state, 0, sizeof(RateInfo)); state->EventSet = PAPI_NULL; ret = PAPI_create_eventset(&state->EventSet); if (ret != PAPI_OK) return (PAPI_ESYS); @@ -414,7 +956,7 @@ * @par Example: * @code int ret; -HighLevelInfo *state = NULL; +RateInfo *state = NULL; ret = PAPI_thread_init(pthread_self); if (ret != PAPI_OK) handle_error(ret); @@ -422,9 +964,9 @@ ret = PAPI_get_thr_specific(PAPI_USR1_TLS, (void *) &state); if (ret != PAPI_OK || state == NULL) { - state = (HighLevelInfo *) malloc(sizeof(HighLevelInfo)); + state = (RateInfo *) malloc(sizeof(RateInfo)); if (state == NULL) return (PAPI_ESYS); - memset(state, 0, sizeof(HighLevelInfo)); + memset(state, 0, sizeof(RateInfo)); state->EventSet = PAPI_NULL; ret = PAPI_create_eventset(&state->EventSet); if (ret != PAPI_OK) return (PAPI_ESYS); @@ -582,6 +1124,8 @@ _papi_hwi_debug |= DEBUG_MEMORY; if ( strstr( var, "LEAK" ) ) _papi_hwi_debug |= DEBUG_LEAK; + if ( strstr( var, "HIGHLEVEL" ) ) + _papi_hwi_debug |= DEBUG_HIGHLEVEL; if ( strstr( var, "ALL" ) ) _papi_hwi_debug |= DEBUG_ALL; } @@ -607,32 +1151,26 @@ papi_return( init_retval ); } - /* Initialize component globals */ + /* Initialize thread globals, including the main threads */ - tmp = _papi_hwi_init_global( ); + tmp = _papi_hwi_init_global_threads( ); if ( tmp ) { init_retval = tmp; _papi_hwi_shutdown_global_internal( ); - _in_papi_library_init_cnt--; + _in_papi_library_init_cnt--; papi_return( init_retval ); } - - /* Initialize thread globals, including the main threads */ - tmp = _papi_hwi_init_global_threads( ); + /* Initialize component globals */ + + tmp = _papi_hwi_init_global( ); if ( tmp ) { - int i; init_retval = tmp; _papi_hwi_shutdown_global_internal( ); - for ( i = 0; i < papi_num_components; i++ ) { - if (!_papi_hwd[i]->cmp_info.disabled) { - _papi_hwd[i]->shutdown_component( ); - } - } _in_papi_library_init_cnt--; papi_return( init_retval ); } - + init_level = PAPI_LOW_LEVEL_INITED; _in_papi_library_init_cnt--; @@ -4535,7 +5073,7 @@ user_defined_events_count = 0; /* Shutdown the entire component */ - _papi_hwi_shutdown_highlevel( ); + //_papi_hwi_shutdown_highlevel( ); _papi_hwi_shutdown_global_internal( ); _papi_hwi_shutdown_global_threads( ); for( i = 0; i < papi_num_components; i++ ) { diff -Nru papi-5.7.0+dfsg/src/papi_debug.h papi-6.0.0~dfsg/src/papi_debug.h --- papi-5.7.0+dfsg/src/papi_debug.h 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/papi_debug.h 2020-03-04 15:57:01.000000000 +0000 @@ -33,7 +33,8 @@ #define DEBUG_PROFILE 0x080 #define DEBUG_MEMORY 0x100 #define DEBUG_LEAK 0x200 -#define DEBUG_ALL (DEBUG_SUBSTRATE|DEBUG_API|DEBUG_INTERNAL|DEBUG_THREADS|DEBUG_MULTIPLEX|DEBUG_OVERFLOW|DEBUG_PROFILE|DEBUG_MEMORY|DEBUG_LEAK) +#define DEBUG_HIGHLEVEL 0x400 +#define DEBUG_ALL (DEBUG_SUBSTRATE|DEBUG_API|DEBUG_INTERNAL|DEBUG_THREADS|DEBUG_MULTIPLEX|DEBUG_OVERFLOW|DEBUG_PROFILE|DEBUG_MEMORY|DEBUG_LEAK|DEBUG_HIGHLEVEL) /* Please get rid of the DBG macro from your code */ @@ -53,7 +54,7 @@ #define DEBUGLABEL(a) if (_papi_hwi_thread_id_fn) fprintf(stderr, "%s:%s:%s:%d:%d:%#lx ",a,__FILE__, FUNC, __LINE__,(int)getpid(),_papi_hwi_thread_id_fn()); else fprintf(stderr, "%s:%s:%s:%d:%d ",a,__FILE__, FUNC, __LINE__, (int)getpid()) #define ISLEVEL(a) (_papi_hwi_debug&a) -#define DEBUGLEVEL(a) ((a&DEBUG_SUBSTRATE)?"SUBSTRATE":(a&DEBUG_API)?"API":(a&DEBUG_INTERNAL)?"INTERNAL":(a&DEBUG_THREADS)?"THREADS":(a&DEBUG_MULTIPLEX)?"MULTIPLEX":(a&DEBUG_OVERFLOW)?"OVERFLOW":(a&DEBUG_PROFILE)?"PROFILE":(a&DEBUG_MEMORY)?"MEMORY":(a&DEBUG_LEAK)?"LEAK":"UNKNOWN") +#define DEBUGLEVEL(a) ((a&DEBUG_SUBSTRATE)?"SUBSTRATE":(a&DEBUG_API)?"API":(a&DEBUG_INTERNAL)?"INTERNAL":(a&DEBUG_THREADS)?"THREADS":(a&DEBUG_MULTIPLEX)?"MULTIPLEX":(a&DEBUG_OVERFLOW)?"OVERFLOW":(a&DEBUG_PROFILE)?"PROFILE":(a&DEBUG_MEMORY)?"MEMORY":(a&DEBUG_LEAK)?"LEAK":(a&DEBUG_HIGHLEVEL)?"HIGHLEVEL":"UNKNOWN") #ifndef NO_VARARG_MACRO /* Has variable arg macro support */ #define PAPIDEBUG(level,format, args...) { if(_papi_hwi_debug&level){DEBUGLABEL(DEBUGLEVEL(level));fprintf(stderr,format, ## args);}} @@ -69,6 +70,7 @@ #define PRFDBG(format, args...) (PAPIDEBUG(DEBUG_PROFILE,format, ## args)) #define MEMDBG(format, args...) (PAPIDEBUG(DEBUG_MEMORY,format, ## args)) #define LEAKDBG(format, args...) (PAPIDEBUG(DEBUG_LEAK,format, ## args)) +#define HLDBG(format, args...) (PAPIDEBUG(DEBUG_HIGHLEVEL,format, ## args)) #endif #else @@ -82,6 +84,7 @@ #define PRFDBG(format, args...) { ; } #define MEMDBG(format, args...) { ; } #define LEAKDBG(format, args...) { ; } +#define HLDBG(format, args...) { ; } #define PAPIDEBUG(level, format, args...) { ; } #endif #endif @@ -283,6 +286,26 @@ #define LEAKDBG _LEAKDBG #endif +static void +_HLDBG( char *format, ... ) +{ +#ifdef DEBUG + va_list args; + va_start(args, format); + PAPIDEBUG( DEBUG_HIGHLEVEL, format , args); + va_end(args); +#endif +} +#ifdef DEBUG +#define HLDBG do { \ + if (DEBUG_HIGHLEVEL&_papi_hwi_debug) {\ + DEBUGLABEL( DEBUGLEVEL ( DEBUG_HIGHLEVEL ) ); \ + } \ +} while(0); _HLDBG +#else +#define HLDBG _HLDBG +#endif + /* ifdef NO_VARARG_MACRO */ #endif diff -Nru papi-5.7.0+dfsg/src/papi_events.csv papi-6.0.0~dfsg/src/papi_events.csv --- papi-5.7.0+dfsg/src/papi_events.csv 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/papi_events.csv 2020-03-04 15:57:01.000000000 +0000 @@ -396,6 +396,8 @@ # # CPU,amd64_fam17h +CPU,amd64_fam17h_zen1 +CPU,amd64_fam17h_zen2 # PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT @@ -412,7 +414,7 @@ # Note, need access to special L2 uncore events # to get L2 related events # -PRESET,PAPI_TLB_DM,NOT_DERIVED,L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_32K_L2_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_32K_L2_HIT:TLB_RELOAD_4K_L2_HIT +PRESET,PAPI_TLB_DM,NOT_DERIVED,L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:IF1G:IF2M:IF4K # PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS @@ -1588,6 +1590,8 @@ PRESET,PAPI_L2_DCM,NOT_DERIVED,PM_DATA_FROM_L2MISS PRESET,PAPI_L2_LDM,NOT_DERIVED,PM_L2_LD_MISS PRESET,PAPI_L2_STM,NOT_DERIVED,PM_L2_ST_MISS +PRESET,PAPI_L2_DCR,NOT_DERIVED,PM_DATA_FROM_L2 +PRESET,PAPI_L2_DCW,NOT_DERIVED,PM_L2_ST_HIT PRESET,PAPI_L3_DCR,NOT_DERIVED,PM_DATA_FROM_L2MISS PRESET,PAPI_L3_DCM,DERIVED_ADD,PM_DATA_FROM_LMEM,PM_DATA_FROM_RMEM PRESET,PAPI_L3_LDM,DERIVED_ADD,PM_DATA_FROM_LMEM,PM_DATA_FROM_RMEM @@ -1611,12 +1615,16 @@ PRESET,PAPI_HW_INT,NOT_DERIVED,PM_EXT_INT PRESET,PAPI_STL_ICY,DERIVED_POSTFIX,N0|N1|-|,PM_RUN_CYC,PM_1PLUS_PPC_DISP PRESET,PAPI_SR_INS,NOT_DERIVED,PM_ST_FIN -PRESET,PAPI_LD_INS,DERIVED_ADD,PM_LD_REF_L1,PM_LD_MISS_L1_ALT +PRESET,PAPI_LD_INS,NOT_DERIVED,PM_LD_REF_L1 PRESET,PAPI_LST_INS,NOT_DERIVED,PM_LSU_FIN PRESET,PAPI_LST_INS,DERIVED_ADD,PM_LD_REF_L1,PM_LD_MISS_L1,PM_ST_FIN PRESET,PAPI_BR_INS,NOT_DERIVED,PM_BRU_FIN PRESET,PAPI_BR_MSP,NOT_DERIVED,PM_TAKEN_BR_MPRED_CMPL PRESET,PAPI_BR_PRC,NOT_DERIVED,PM_BR_PRED +PRESET,PAPI_BR_CN,DERIVED_SUB,PM_BR_CMPL,PM_BR_UNCOND +PRESET,PAPI_BR_NTK,DERIVED_POSTFIX,N0|N1|-|,PM_BR_CMPL,PM_BR_TAKEN_CMPL +PRESET,PAPI_BR_UCN,NOT_DERIVED,PM_BR_UNCOND +PRESET,PAPI_BR_TKN,NOT_DERIVED,PM_BR_CORECT_PRED_TAKEN_CMPL PRESET,PAPI_FXU_IDL,NOT_DERIVED,PM_FXU_IDLE # CPU,ultra12 diff -Nru papi-5.7.0+dfsg/src/papi_fwrappers.c papi-6.0.0~dfsg/src/papi_fwrappers.c --- papi-5.7.0+dfsg/src/papi_fwrappers.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/papi_fwrappers.c 2020-03-04 15:57:01.000000000 +0000 @@ -43,7 +43,7 @@ #endif /* The Low Level Wrappers */ -/** \internal @defgroup PAPIF PAPI Fortran API */ +/** \internal @defgroup PAPIF PAPI Fortran Low Level API */ /* helper routine to convert Fortran strings to C strings */ #if defined(_FORTRAN_STRLEN_AT_END) @@ -1162,156 +1162,6 @@ *check = PAPI_unlock( *lock ); } -/* The High Level API Wrappers */ - -/** @class PAPIF_start_counters - * @ingroup PAPIF - * @brief Start counting hardware events. - * - * @par Fortran Interface: - * \#include "fpapi.h" @n - * PAPIF_start_counters( C_INT(*) events, C_INT array_len, C_INT check ) - * - * @see PAPI_start_counters - */ -PAPI_FCALL( papif_start_counters, PAPIF_START_COUNTERS, - ( int *events, int *array_len, int *check ) ) -{ - *check = PAPI_start_counters( events, *array_len ); -} - -/** @class PAPI_read_counters - * @ingroup PAPIF - * @brief Read and reset counters. - * - * @par Fortran Interface: - * \#include "fpapi.h" @n - * PAPIF_read_counters( C_LONG_LONG(*) values, C_INT array_len, C_INT check ) - * - * @see PAPI_read_counters - */ -PAPI_FCALL( papif_read_counters, PAPIF_READ_COUNTERS, - ( long long *values, int *array_len, int *check ) ) -{ - *check = PAPI_read_counters( values, *array_len ); -} - -/** @class PAPIF_stop_counters - * @ingroup PAPIF - * @brief Stop counting hardware events and reset values to zero. - * - * @par Fortran Interface: - * \#include "fpapi.h" @n - * PAPIF_stop_counters( C_LONG_LONG(*) values, C_INT array_len, C_INT check ) - * - * @see PAPI_stop_counters - */ -PAPI_FCALL( papif_stop_counters, PAPIF_STOP_COUNTERS, - ( long long *values, int *array_len, int *check ) ) -{ - *check = PAPI_stop_counters( values, *array_len ); -} - -/** @class PAPIF_accum_counters - * @ingroup PAPIF - * @brief Accumulate and reset counters. - * - * @par Fortran Interface: - * \#include "fpapi.h" @n - * PAPIF_accum_counters( C_LONG_LONG(*) values, C_INT array_len, C_INT check ) - * - * @see PAPI_accum_counters - */ -PAPI_FCALL( papif_accum_counters, PAPIF_ACCUM_COUNTERS, - ( long long *values, int *array_len, int *check ) ) -{ - *check = PAPI_accum_counters( values, *array_len ); -} - -/** @class PAPIF_num_counters - * @ingroup PAPIF - * @brief Get the number of hardware counters available on the system. - * - * @par Fortran Interface: - * \#include "fpapi.h" @n - * PAPIF_num_counters( C_INT numevents ) - * - * @see PAPI_num_counters - */ -PAPI_FCALL( papif_num_counters, PAPIF_NUM_COUNTERS, ( int *numevents ) ) -{ - *numevents = PAPI_num_counters( ); -} - -/** @class PAPIF_ipc - * @ingroup PAPIF - * @brief Get instructions per cycle, real and processor time. - * - * @par Fortran Interface: - * \#include "fpapi.h" @n - * PAPIF_ipc( C_FLOAT real_time, C_FLOAT proc_time, C_LONG_LONG ins, C_FLOAT ipc, C_INT check ) - * - * @see PAPI_ipc - */ -PAPI_FCALL( papif_ipc, PAPIF_IPC, - ( float *rtime, float *ptime, long long *ins, float *ipc, - int *check ) ) -{ - *check = PAPI_ipc( rtime, ptime, ins, ipc ); -} - -/** @class PAPIF_epc - * @ingroup PAPIF - * @brief Get named events per cycle, real and processor time, reference and core cycles. - * - * @par Fortran Interface: - * \#include "fpapi.h" @n - * PAPIF_epc( C_STRING EventName, C_FLOAT real_time, C_FLOAT proc_time, C_LONG_LONG ref, C_LONG_LONG core, C_LONG_LONG evt, C_FLOAT epc, C_INT check ) - * - * @see PAPI_epc - */ -PAPI_FCALL( papif_epc, PAPIF_EPC, - ( int event, float *rtime, float *ptime, - long long *ref, long long *core, long long *evt, float *epc, - int *check) ) -{ - *check = PAPI_epc( event, rtime, ptime, ref, core, evt, epc ); -} - -/** @class PAPIF_flips - * @ingroup PAPIF - * @brief Simplified call to get Mflips/s (floating point instruction rate), real and processor time. - * - * @par Fortran Interface: - * \#include "fpapi.h" @n - * PAPIF_flips( C_FLOAT real_time, C_FLOAT proc_time, C_LONG_LONG flpins, C_FLOAT mflips, C_INT check ) - * - * @see PAPI_flips - */ -PAPI_FCALL( papif_flips, PAPIF_FLIPS, - ( float *real_time, float *proc_time, long long *flpins, - float *mflips, int *check ) ) -{ - *check = PAPI_flips( real_time, proc_time, flpins, mflips ); -} - -/** @class PAPIF_flops - * @ingroup PAPIF - * @brief Simplified call to get Mflops/s (floating point instruction rate), real and processor time. - * - * @par Fortran Interface: - * \#include "fpapi.h" @n - * PAPIF_flops( C_FLOAT real_time, C_FLOAT proc_time, C_LONG_LONG flpops, C_FLOAT mflops, C_INT check ) - * - * @see PAPI_flops - */ -PAPI_FCALL( papif_flops, PAPIF_FLOPS, - ( float *real_time, float *proc_time, long long *flpops, - float *mflops, int *check ) ) -{ - *check = PAPI_flops( real_time, proc_time, flpops, mflops ); -} - /* Fortran only APIs for get_opt and set_opt functionality */ @@ -1475,4 +1325,343 @@ *check = PAPI_set_opt( PAPI_INHERIT, &i ); } +/** @class PAPIF_ipc + * @ingroup PAPIF + * @brief Get instructions per cycle, real and processor time. + * + * @par Fortran Interface: + * \#include "fpapi.h" @n + * PAPIF_ipc( C_FLOAT real_time, C_FLOAT proc_time, C_LONG_LONG ins, C_FLOAT ipc, C_INT check ) + * + * @see PAPI_ipc + */ +PAPI_FCALL( papif_ipc, PAPIF_IPC, + ( float *rtime, float *ptime, long long *ins, float *ipc, + int *check ) ) +{ + *check = PAPI_ipc( rtime, ptime, ins, ipc ); +} + +/** @class PAPIF_epc + * @ingroup PAPIF + * @brief Get named events per cycle, real and processor time, reference and core cycles. + * + * @par Fortran Interface: + * \#include "fpapi.h" @n + * PAPIF_epc( C_STRING EventName, C_FLOAT real_time, C_FLOAT proc_time, C_LONG_LONG ref, C_LONG_LONG core, C_LONG_LONG evt, C_FLOAT epc, C_INT check ) + * + * @see PAPI_epc + */ +PAPI_FCALL( papif_epc, PAPIF_EPC, + ( int event, float *rtime, float *ptime, + long long *ref, long long *core, long long *evt, float *epc, + int *check) ) +{ + *check = PAPI_epc( event, rtime, ptime, ref, core, evt, epc ); +} + +/** @class PAPIF_flips_rate + * @ingroup PAPIF + * @brief Simplified call to get Mflips/s (floating point instruction rate), real and processor time. + * + * @par Fortran Interface: + * \#include "fpapi.h" @n + * PAPIF_flips_rate ( C_STRING EventName, C_FLOAT real_time, C_FLOAT proc_time, C_LONG_LONG flpins, C_FLOAT mflips, C_INT check ) + * + * @see PAPI_flips_rate + */ +PAPI_FCALL( papif_flips_rate, PAPIF_FLIPS_RATE, + ( int event, float *real_time, float *proc_time, long long *flpins, + float *mflips, int *check ) ) +{ + *check = PAPI_flips_rate( event, real_time, proc_time, flpins, mflips ); +} + +/** @class PAPIF_flops_rate + * @ingroup PAPIF + * @brief Simplified call to get Mflops/s (floating point instruction rate), real and processor time. + * + * @par Fortran Interface: + * \#include "fpapi.h" @n + * PAPIF_flops_rate( C_STRING EventName, C_FLOAT real_time, C_FLOAT proc_time, C_LONG_LONG flpops, C_FLOAT mflops, C_INT check ) + * + * @see PAPI_flops_rate + */ +PAPI_FCALL( papif_flops_rate, PAPIF_FLOPS_RATE, + ( int event, float *real_time, float *proc_time, long long *flpops, + float *mflops, int *check ) ) +{ + *check = PAPI_flops_rate( event, real_time, proc_time, flpops, mflops ); +} + +/** @class PAPIF_rate_stop + * @ingroup PAPIF + * @brief Stop a running event set of a rate function. + * + * @par Fortran Interface: + * \#include "fpapi.h" @n + * PAPIF_rate_stop( C_INT check ) + * + * @see PAPI_rate_stop + */ +PAPI_FCALL( papif_rate_stop, PAPIF_RATE_STOP, + ( int *check ) ) +{ + *check = PAPI_rate_stop( ); +} + +/* The High Level API Wrappers */ +/** \internal @defgroup PAPIF-HL PAPI Fortran High Level API */ + +/** @class PAPIf_hl_region_begin + * @ingroup PAPIF-HL + * @brief Reads and stores hardware events at the beginning of an instrumented code region. + * + * @par Fortran Prototype: + * \#include "fpapi.h" @n + * PAPIf_hl_region_begin( C_STRING region, C_INT check ) + * + * @retval PAPI_OK + * @retval PAPI_ENOTRUN + * -- EventSet is currently not running or could not determined. + * @retval PAPI_ESYS + * -- A system or C library call failed inside PAPI, see the errno variable. + * @retval PAPI_EMISC + * -- PAPI has been deactivated due to previous erros. + * @retval PAPI_ENOMEM + * -- Insufficient memory. + * + * PAPIf_hl_region_begin reads hardware events and stores them internally at the beginning + * of an instrumented code region. + * If not specified via environment variable PAPI_EVENTS, default events are used. + * The first call sets all counters implicitly to zero and starts counting. + * Note that if PAPI_EVENTS is not set or cannot be interpreted, default hardware events are + * recorded. + * + * @par Example: + * + * @code + * export PAPI_EVENTS="PAPI_TOT_INS,PAPI_TOT_CYC" + * @endcode + * + * + * @code + * integer retval + * + * call PAPIf_hl_region_begin("computation", retval) + * if ( retval .NE. PAPI_OK ) then + * write (*,*) "PAPIf_hl_region_begin failed!" + * end if + * + * !do some computation here + * + * call PAPIf_hl_region_end("computation", retval) + * if ( retval .NE. PAPI_OK ) then + * write (*,*) "PAPIf_hl_region_end failed!" + * end if + * + * @endcode + * + * @see PAPI_hl_region_begin + */ +#if defined(_FORTRAN_STRLEN_AT_END) +PAPI_FCALL( papif_hl_region_begin, PAPIF_HL_REGION_BEGIN, + ( char* name, int *check, int Event_len ) ) +{ + char tmp[PAPI_MAX_STR_LEN]; + Fortran2cstring( tmp, name, PAPI_MAX_STR_LEN, Event_len ); + *check = PAPI_hl_region_begin( tmp ); +} +#else +PAPI_FCALL( papif_hl_region_begin, PAPIF_HL_REGION_BEGIN, + ( char* name, int *check ) ) +{ + *check = PAPI_hl_region_begin( name ); +} +#endif + +/** @class PAPIf_hl_read + * @ingroup PAPIF-HL + * @brief Reads and stores hardware events inside of an instrumented code region. + * + * @par Fortran Prototype: + * \#include @n + * int PAPIf_hl_read( C_STRING region, C_INT check ) + * + * @param region + * -- a unique region name corresponding to PAPIf_hl_region_begin + * + * @retval PAPI_OK + * @retval PAPI_ENOTRUN + * -- EventSet is currently not running or could not determined. + * @retval PAPI_ESYS + * -- A system or C library call failed inside PAPI, see the errno variable. + * @retval PAPI_EMISC + * -- PAPI has been deactivated due to previous erros. + * @retval PAPI_ENOMEM + * -- Insufficient memory. + * + * PAPIf_hl_read reads hardware events and stores them internally inside + * of an instrumented code region. + * Assumes that PAPIf_hl_region_begin was called before. + * + * @par Example: + * + * @code + * integer retval + * + * call PAPIf_hl_region_begin("computation", retval) + * if ( retval .NE. PAPI_OK ) then + * write (*,*) "PAPIf_hl_region_begin failed!" + * end if + * + * !do some computation here + * + * call PAPIf_hl_read("computation", retval) + * if ( retval .NE. PAPI_OK ) then + * write (*,*) "PAPIf_hl_read failed!" + * end if + * + * !do some computation here + * + * call PAPIf_hl_region_end("computation", retval) + * if ( retval .NE. PAPI_OK ) then + * write (*,*) "PAPIf_hl_region_end failed!" + * end if + * + * @endcode + * + * @see PAPI_hl_read + */ +#if defined(_FORTRAN_STRLEN_AT_END) +PAPI_FCALL( papif_hl_read, PAPIF_HL_READ, + ( char* name, int *check, int Event_len ) ) +{ + char tmp[PAPI_MAX_STR_LEN]; + Fortran2cstring( tmp, name, PAPI_MAX_STR_LEN, Event_len ); + *check = PAPI_hl_read( tmp ); +} +#else +PAPI_FCALL( papif_hl_read, PAPIF_HL_READ, + ( char* name, int *check ) ) +{ + *check = PAPI_hl_read( name ); +} +#endif + +/** @class PAPIf_hl_region_end + * @ingroup PAPIF-HL + * @brief Reads and stores hardware events at the end of an instrumented code region. + * + * @par Fortran Prototype: + * \#include "fpapi.h" @n + * PAPIf_hl_region_end( C_STRING region, C_INT check ) + * + * @param region + * -- a unique region name corresponding to PAPIf_hl_region_begin + * + * @retval PAPI_OK + * @retval PAPI_ENOTRUN + * -- EventSet is currently not running or could not determined. + * @retval PAPI_ESYS + * -- A system or C library call failed inside PAPI, see the errno variable. + * @retval PAPI_EMISC + * -- PAPI has been deactivated due to previous erros. + * @retval PAPI_ENOMEM + * -- Insufficient memory. + * + * PAPIf_hl_region_end reads hardware events and stores the difference to the values from + * PAPIf_hl_region_begin at the end of an instrumented code region. + * Assumes that PAPIf_hl_region_begin was called before. + * Note that an output is automatically generated when your application terminates. + * + * @par Example: + * + * @code + * integer retval + * + * call PAPIf_hl_region_begin("computation", retval) + * if ( retval .NE. PAPI_OK ) then + * write (*,*) "PAPIf_hl_region_begin failed!" + * end if + * + * !do some computation here + * + * call PAPIf_hl_region_end("computation", retval) + * if ( retval .NE. PAPI_OK ) then + * write (*,*) "PAPIf_hl_region_end failed!" + * end if + * + * @endcode + * + * @see PAPI_hl_region_end + */ +#if defined(_FORTRAN_STRLEN_AT_END) +PAPI_FCALL( papif_hl_region_end, PAPIF_HL_REGION_END, + ( char* name, int *check, int Event_len ) ) +{ + char tmp[PAPI_MAX_STR_LEN]; + Fortran2cstring( tmp, name, PAPI_MAX_STR_LEN, Event_len ); + *check = PAPI_hl_region_end( tmp ); +} +#else +PAPI_FCALL( papif_hl_region_end, PAPIF_HL_REGION_END, + ( char* name, int *check ) ) +{ + *check = PAPI_hl_region_end( name ); +} +#endif + +/** @class PAPIf_hl_stop + * @ingroup PAPIF-HL + * @brief Stop a running high-level event set. + * + * @par Fortran Prototype: + * \#include "fpapi.h" @n + * PAPIf_hl_stop( C_INT check ) + * + * @retval PAPI_ENOEVNT + * -- The EventSet is not started yet. + * @retval PAPI_ENOMEM + * -- Insufficient memory to complete the operation. + * + * PAPIf_hl_stop stops a running high-level event set. + * + * This call is optional and only necessary if the programmer wants to use the low-level API in addition + * to the high-level API. It should be noted that PAPIf_hl_stop and low-level calls are not + * allowed inside of a marked region. Furthermore, PAPIf_hl_stop is thread-local and therefore + * has to be called in the same thread as the corresponding marked region. + * + * @par Example: + * + * @code + * integer retval + * + * call PAPIf_hl_region_begin("computation", retval) + * if ( retval .NE. PAPI_OK ) then + * write (*,*) "PAPIf_hl_region_begin failed!" + * end if + * + * !do some computation here + * + * call PAPIf_hl_region_end("computation", retval) + * if ( retval .NE. PAPI_OK ) then + * write (*,*) "PAPIf_hl_region_end failed!" + * end if + * + * call PAPIf_hl_stop(retval) + * if ( retval .NE. PAPI_OK ) then + * write (*,*) "PAPIf_hl_stop failed!" + * end if + * + * @endcode + * + * @see PAPI_hl_stop + */ +PAPI_FCALL( papif_hl_stop, PAPIF_HL_STOP, + ( int *check ) ) +{ + *check = PAPI_hl_stop( ); +} + #pragma GCC visibility pop diff -Nru papi-5.7.0+dfsg/src/papi.h papi-6.0.0~dfsg/src/papi.h --- papi-5.7.0+dfsg/src/papi.h 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/papi.h 2020-03-04 15:57:01.000000000 +0000 @@ -39,7 +39,7 @@ * * @section papi_high_api High Level Functions * A simple interface for instrumenting end-user applications. - * Fully supported on both C and Fortran. + * Fully supported on both C and Fortran. * See individual functions for details on usage. * * @ref high_api @@ -61,6 +61,8 @@ * * @ref PAPIF * + * @ref PAPIF-HL + * * @section Components * * Components provide access to hardware information on specific subsystems. @@ -221,7 +223,7 @@ /* This is the official PAPI version */ /* The final digit represents the patch count */ -#define PAPI_VERSION PAPI_VERSION_NUMBER(5,7,0,0) +#define PAPI_VERSION PAPI_VERSION_NUMBER(6,0,0,0) #define PAPI_VER_CURRENT (PAPI_VERSION & 0xffff0000) /* Tests for checking event code type */ @@ -282,6 +284,7 @@ #define PAPI_LOW_LEVEL_INITED 1 /* Low level has called library init */ #define PAPI_HIGH_LEVEL_INITED 2 /* High level has called library init */ #define PAPI_THREAD_LEVEL_INITED 4 /* Threads have been inited */ + /** @} */ /** @internal @@ -317,11 +320,10 @@ * @{ */ #define PAPI_USR1_TLS 0x0 #define PAPI_USR2_TLS 0x1 -#define PAPI_HIGH_LEVEL_TLS 0x2 +#define PAPI_TLS_HIGH_LEVEL 0x2 #define PAPI_NUM_TLS 0x3 #define PAPI_TLS_USR1 PAPI_USR1_TLS #define PAPI_TLS_USR2 PAPI_USR2_TLS -#define PAPI_TLS_HIGH_LEVEL PAPI_HIGH_LEVEL_TLS #define PAPI_TLS_NUM PAPI_NUM_TLS #define PAPI_TLS_ALL_THREADS 0x10 /** @} */ @@ -1100,33 +1102,29 @@ int PAPI_get_component_index(const char *name); /**< Return component index for component with matching name */ int PAPI_disable_component(int cidx); /**< Disables a component before init */ int PAPI_disable_component_by_name(const char *name ); /**< Disable, before library init, a component by name. */ + int PAPI_num_components(void); /**< get the number of components available on the system */ + int PAPI_flips_rate(int event, float *rtime, float *ptime, long long *flpins, float *mflips); /**< simplified call to get Mflips/s (floating point instruction rate), real and processor time */ + int PAPI_flops_rate(int event, float *rtime, float *ptime, long long * flpops, float *mflops); /**< simplified call to get Mflops/s (floating point operation rate), real and processor time */ + int PAPI_ipc(float *rtime, float *ptime, long long * ins, float *ipc); /**< gets instructions per cycle, real and processor time */ + int PAPI_epc(int event, float *rtime, float *ptime, long long *ref, long long *core, long long *evt, float *epc); /**< gets (named) events per cycle, real and processor time, reference and core cycles */ + int PAPI_rate_stop(); /**< stops a running event set of a rate function */ /** @} */ /** \internal @defgroup high_api The High Level API - The simple interface implemented by the following eight routines - allows the user to access and count specific hardware events from - both C and Fortran. It should be noted that this API can be used in - conjunction with the low level API. - @{ */ + The simple interface implemented by the following routines allows the user to record hardware events inside instrumented regions from both C and Fortran. + @{ */ - int PAPI_accum_counters(long long * values, int array_len); /**< add current counts to array and reset counters */ - int PAPI_num_counters(void); /**< get the number of hardware counters available on the system */ - int PAPI_num_components(void); /**< get the number of components available on the system */ - int PAPI_read_counters(long long * values, int array_len); /**< copy current counts to array and reset counters */ - int PAPI_start_counters(int *events, int array_len); /**< start counting hardware events */ - int PAPI_stop_counters(long long * values, int array_len); /**< stop counters and return current counts */ - int PAPI_flips(float *rtime, float *ptime, long long * flpins, float *mflips); /**< simplified call to get Mflips/s (floating point instruction rate), real and processor time */ - int PAPI_flops(float *rtime, float *ptime, long long * flpops, float *mflops); /**< simplified call to get Mflops/s (floating point operation rate), real and processor time */ - int PAPI_ipc(float *rtime, float *ptime, long long * ins, float *ipc); /**< gets instructions per cycle, real and processor time */ - int PAPI_epc(int event, float *rtime, float *ptime, long long *ref, long long *core, long long *evt, float *epc); /**< gets (named) events per cycle, real and processor time, reference and core cycles */ + int PAPI_hl_region_begin(const char* region); /**< read performance events at the beginning of a region */ + int PAPI_hl_read(const char* region); /**< read performance events inside of a region and store the difference to the corresponding beginning of the region */ + int PAPI_hl_region_end(const char* region); /**< read performance events at the end of a region and store the difference to the corresponding beginning of the region */ + int PAPI_hl_stop(); /**< stops a running high-level event set */ /** @} */ - /* Backwards compatibility hacks. Remove eventually? */ int PAPI_num_hwctrs(void); /**< return the number of hardware counters for the cpu. for backward compatibility. Don't use! */ #define PAPI_COMPONENT_INDEX(a) PAPI_get_event_component(a) diff -Nru papi-5.7.0+dfsg/src/papi_hl.c papi-6.0.0~dfsg/src/papi_hl.c --- papi-5.7.0+dfsg/src/papi_hl.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/papi_hl.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,849 +0,0 @@ -/****************************/ -/* THIS IS OPEN SOURCE CODE */ -/****************************/ - -/** -* @file papi_hl.c -* @author Philip Mucci -* mucci@cs.utk.edu -* @author Kevin London -* london@cs.utk.edu -* @author dan terpstra -* terpstra@cs.utk.edu -* @brief This file contains the 'high level' interface to PAPI. -* BASIC is a high level language. ;-) */ - -#include "papi.h" -#include "papi_internal.h" -#include "papi_memory.h" -#include - -/* high level papi functions*/ - -/* - * Which high-level interface are we using? - */ -#define HL_STOP 0 -#define HL_START 1 -#define HL_FLIP 2 -#define HL_FLOP 3 -#define HL_IPC 4 -#define HL_EPC 5 -#define HL_READ 6 -#define HL_ACCUM 7 - -/** \internal - * This is stored per thread - */ -typedef struct _HighLevelInfo -{ - int EventSet; /**< EventSet of the thread */ - short int num_evts; /**< number of events in the eventset */ - short int running; /**< STOP, START, or RATE */ - long long initial_real_time; /**< Start real time */ - long long initial_proc_time; /**< Start processor time */ - long long last_real_time; /**< Previous value of real time */ - long long last_proc_time; /**< Previous value of processor time */ - long long total_ins; /**< Total instructions */ -} HighLevelInfo; - -int _hl_rate_calls( float *real_time, float *proc_time, int *events, - long long *values, long long *ins, float *rate, int mode ); -void _internal_cleanup_hl_info( HighLevelInfo * state ); -int _internal_check_state( HighLevelInfo ** state ); -int _internal_start_hl_counters( HighLevelInfo * state ); -int _internal_hl_read_cnts( long long *values, int array_len, int flag ); - -/* CHANGE LOG: - - ksl 10/17/03 - Pretty much a complete rewrite of the high level interface. Now - the interface is thread safe and you don't have to worry as much - about mixing the various high level calls. - - - dkt 11/19/01: - After much discussion with users and developers, removed FMA and SLOPE - fudge factors. SLOPE was not being used, and we decided the place to - apply FMA was at a higher level where there could be a better understanding - of platform discrepancies and code implications. - ALL PAPI CALLS NOW RETURN EXACTLY WHAT THE HARDWARE REPORTS - - dkt 08/14/01: - Added reinitialization of values and proc_time to new reinit code. - Added SLOPE and FMA constants to correct for systemic errors on a - platform-by-platform basis. - SLOPE is a factor subtracted from flpins on each call to compensate - for platform overhead in the call. - FMA is a shifter that doubles floating point counts on platforms that - count FMA as one op instead of two. - NOTE: We are making the FLAWED assumption that ALL flpins are FMA! - This will result in counts that are TOO HIGH on the affected platforms - in instances where the code is NOT mostly FMA. - - dkt 08/01/01: - NOTE: Calling semantics have changed! - Now, if flpins < 0 (an invalid value) a PAPI_reset is issued to reset the - counter values. The internal start time is also reset. This should be a - benign change, exept in the rare case where a user passes an uninitialized - (and possibly negative) value for flpins to the routine *AFTER* it has been - called the first time. This is unlikely, since the first call clears and - returns th is value. - - dkt 08/01/01: - Internal sequencing changes: - -- initial PAPI_get_real_usec() call moved above PAPI_start to avoid unwanted flops. - -- PAPI_accum() replaced with PAPI_start() / PAPI_stop pair for same reason. -*/ - -/** @internal - * This function is called to determine the state of the system. - * We may as well set the HighLevelInfo so you don't have to look it - * up again. - */ -int -_internal_check_state( HighLevelInfo ** outgoing ) -{ - int retval; - HighLevelInfo *state = NULL; - - /* Only allow one thread at a time in here */ - if ( init_level == PAPI_NOT_INITED ) { - retval = PAPI_library_init( PAPI_VER_CURRENT ); - if ( retval != PAPI_VER_CURRENT ) { - return ( retval ); - } else { - _papi_hwi_lock( HIGHLEVEL_LOCK ); - init_level = PAPI_HIGH_LEVEL_INITED; - _papi_hwi_unlock( HIGHLEVEL_LOCK ); - } - } - - /* - * Do we have the thread specific data setup yet? - */ - if ( ( retval = - PAPI_get_thr_specific( PAPI_HIGH_LEVEL_TLS, ( void ** ) &state ) ) - != PAPI_OK || state == NULL ) { - state = ( HighLevelInfo * ) papi_malloc( sizeof ( HighLevelInfo ) ); - if ( state == NULL ) - return ( PAPI_ENOMEM ); - - memset( state, 0, sizeof ( HighLevelInfo ) ); - state->EventSet = -1; - - if ( ( retval = PAPI_create_eventset( &state->EventSet ) ) != PAPI_OK ) - return ( retval ); - - if ( ( retval = - PAPI_set_thr_specific( PAPI_HIGH_LEVEL_TLS, - state ) ) != PAPI_OK ) - return ( retval ); - } - *outgoing = state; - return ( PAPI_OK ); -} - -/** @internal - * Make sure to allocate space for values - */ -int -_internal_start_hl_counters( HighLevelInfo * state ) -{ - return ( PAPI_start( state->EventSet ) ); -} - -void -_internal_cleanup_hl_info( HighLevelInfo * state ) -{ - state->num_evts = 0; - state->running = HL_STOP; - state->initial_real_time = -1; - state->initial_proc_time = -1; - state->total_ins = 0; - return; -} - -/** @class PAPI_flips - * @brief Simplified call to get Mflips/s (floating point instruction rate), real and processor time. - * - * @par C Interface: - * \#include @n - * int PAPI_flips( float *rtime, float *ptime, long long *flpins, float *mflips ); - * - * @param *rtime - * total realtime since the first call - * @param *ptime - * total process time since the first call - * @param *flpins - * total floating point instructions since the first call - * @param *mflips - * incremental (Mega) floating point instructions per seconds since the last call - * - * @retval PAPI_EINVAL - * The counters were already started by something other than PAPI_flips(). - * @retval PAPI_ENOEVNT - * The floating point instructions event does not exist. - * @retval PAPI_ENOMEM - * Insufficient memory to complete the operation. - * - * The first call to PAPI_flips() will initialize the PAPI High Level interface, - * set up the counters to monitor the PAPI_FP_INS event and start the counters. - * - * Subsequent calls will read the counters and return total real time, - * total process time, total floating point instructions since the start of the - * measurement and the Mflip/s rate since latest call to PAPI_flips(). - * A call to PAPI_stop_counters() will stop the counters from running and then - * calls such as PAPI_start_counters() or other rate calls can safely be used. - * - * PAPI_flips returns information related to floating point instructions using - * the PAPI_FP_INS event. This is intended to measure instruction rate through the - * floating point pipe with no massaging. - * - * @see PAPI_flops() - * @see PAPI_ipc() - * @see PAPI_epc() - * @see PAPI_stop_counters() - */ -int -PAPI_flips( float *rtime, float *ptime, long long *flpins, float *mflips ) -{ - int retval; - int events[1] = {PAPI_FP_INS}; - long long values = 0; - - if ( rtime == NULL || ptime == NULL || - flpins == NULL || mflips == NULL ) { - return PAPI_EINVAL; - } - - retval = _hl_rate_calls( rtime, ptime, events, - &values, flpins, mflips, HL_FLIP ); - - return ( retval ); -} - -/** @class PAPI_flops - * @brief Simplified call to get Mflops/s (floating point operation rate), real and processor time. - * - * @par C Interface: - * \#include @n - * int PAPI_flops( float *rtime, float *ptime, long long *flpops, float *mflops ); - * - * @param *rtime - * total realtime since the first call - * @param *ptime - * total process time since the first call - * @param *flpops - * total floating point operations since the first call - * @param *mflops - * incremental (Mega) floating point operations per seconds since the last call - * - * @retval PAPI_EINVAL - * The counters were already started by something other than PAPI_flops(). - * @retval PAPI_ENOEVNT - * The floating point operations event does not exist. - * @retval PAPI_ENOMEM - * Insufficient memory to complete the operation. - * - * The first call to PAPI_flops() will initialize the PAPI High Level interface, - * set up the counters to monitor the PAPI_FP_OPS event and start the counters. - * - * Subsequent calls will read the counters and return total real time, - * total process time, total floating point operations since the start of the - * measurement and the Mflop/s rate since latest call to PAPI_flops(). - * A call to PAPI_stop_counters() will stop the counters from running and then - * calls such as PAPI_start_counters() or other rate calls can safely be used. - * - * PAPI_flops returns information related to theoretical floating point operations - * rather than simple instructions. It uses the PAPI_FP_OPS event which attempts to - * 'correctly' account for, e.g., FMA undercounts and FP Store overcounts, etc. - * - * @see PAPI_flips() - * @see PAPI_ipc() - * @see PAPI_epc() - * @see PAPI_stop_counters() - */ -int -PAPI_flops( float *rtime, float *ptime, long long *flpops, float *mflops ) -{ - int retval; - int events[1] = {PAPI_FP_OPS}; - long long values = 0; - - if ( rtime == NULL || ptime == NULL || flpops == NULL || mflops == NULL ) - return PAPI_EINVAL; - - retval = _hl_rate_calls( rtime, ptime, events, &values, flpops, mflops, HL_FLOP ); - return ( retval ); -} - -/** @class PAPI_ipc - * @brief Simplified call to get instructions per cycle, real and processor time. - * - * @par C Interface: - * \#include @n - * int PAPI_ipc( float *rtime, float *ptime, long long *ins, float *ipc ); - * - * @param *rtime - * total realtime since the first call - * @param *ptime - * total process time since the first call - * @param *ins - * total instructions since the first call - * @param *ipc - * incremental instructions per cycle since the last call - * - * @retval PAPI_EINVAL - * The counters were already started by something other than PAPI_ipc(). - * @retval PAPI_ENOEVNT - * The floating point operations event does not exist. - * @retval PAPI_ENOMEM - * Insufficient memory to complete the operation. - * - * The first call to PAPI_ipc() will initialize the PAPI High Level interface, - * set up the counters to monitor PAPI_TOT_INS and PAPI_TOT_CYC events - * and start the counters. - * - * Subsequent calls will read the counters and return total real time, - * total process time, total instructions since the start of the - * measurement and the IPC rate since the latest call to PAPI_ipc(). - * - * A call to PAPI_stop_counters() will stop the counters from running and then - * calls such as PAPI_start_counters() or other rate calls can safely be used. - * - * PAPI_ipc should return a ratio greater than 1.0, indicating instruction level - * parallelism within the chip. The larger this ratio the more effeciently the program - * is running. - * - * @see PAPI_flips() - * @see PAPI_flops() - * @see PAPI_epc() - * @see PAPI_stop_counters() - */ -int -PAPI_ipc( float *rtime, float *ptime, long long *ins, float *ipc ) -{ - long long values[2] = { 0, 0 }; - int events[2] = {PAPI_TOT_INS, PAPI_TOT_CYC}; - int retval = 0; - - if ( rtime == NULL || ptime == NULL || ins == NULL || ipc == NULL ) - return PAPI_EINVAL; - - retval = _hl_rate_calls( rtime, ptime, events, values, ins, ipc, HL_IPC ); - return ( retval ); -} - -/** @class PAPI_epc - * @brief Simplified call to get arbitrary events per cycle, real and processor time. - * - * @par C Interface: - * \#include @n - * int PAPI_epc( int event, float *rtime, float *ptime, long long *ref, long long *core, long long *evt, float *epc ); - * - * @param event - * event code to be measured (0 defaults to PAPI_TOT_INS) - * @param *rtime - * total realtime since the first call - * @param *ptime - * total process time since the first call - * @param *ref - * incremental reference clock cycles since the last call - * @param *core - * incremental core clock cycles since the last call - * @param *evt - * total events since the first call - * @param *epc - * incremental events per cycle since the last call - * - * @retval PAPI_EINVAL - * The counters were already started by something other than PAPI_epc(). - * @retval PAPI_ENOEVNT - * One of the requested events does not exist. - * @retval PAPI_ENOMEM - * Insufficient memory to complete the operation. - * - * The first call to PAPI_epc() will initialize the PAPI High Level interface, - * set up the counters to monitor the user specified event, PAPI_TOT_CYC, - * and PAPI_REF_CYC (if it exists) and start the counters. - * - * Subsequent calls will read the counters and return total real time, - * total process time, total event counts since the start of the - * measurement and the core and reference cycle count and EPC rate since the - * latest call to PAPI_epc(). - - * A call to PAPI_stop_counters() will stop the counters from running and then - * calls such as PAPI_start_counters() or other rate calls can safely be used. - * - * PAPI_epc can provide a more detailed look at algorithm efficiency in light of clock - * variability in modern cpus. MFLOPS is no longer an adequate description of peak - * performance if clock rates can arbitrarily speed up or slow down. By allowing a - * user specified event and reporting reference cycles, core cycles and real time, - * PAPI_epc provides the information to compute an accurate effective clock rate, and - * an accurate measure of computational throughput. - * - * @see PAPI_flips() - * @see PAPI_flops() - * @see PAPI_ipc() - * @see PAPI_stop_counters() - */ -int -PAPI_epc( int event, float *rtime, float *ptime, long long *ref, long long *core, long long *evt, float *epc ) -{ - long long values[3] = { 0, 0, 0 }; - int events[3] = {PAPI_TOT_INS, PAPI_TOT_CYC, PAPI_REF_CYC}; - int retval = 0; - - if ( rtime == NULL || ptime == NULL || ref == NULL ||core == NULL || evt == NULL || epc == NULL ) - return PAPI_EINVAL; - - // if an event is provided, use it; otherwise use TOT_INS - if (event != 0 ) events[0] = event; - - if ( PAPI_query_event( ( int ) PAPI_REF_CYC ) != PAPI_OK ) - events[2] = 0; - - retval = _hl_rate_calls( rtime, ptime, events, values, evt, epc, HL_EPC ); - *core = values[1]; - *ref = values[2]; - return ( retval ); -} - -int -_hl_rate_calls( float *real_time, float *proc_time, int *events, - long long *values, long long *ins, float *rate, int mode ) -{ - long long rt, pt; // current elapsed real and process times in usec - int num_events = 2; - int retval = 0; - HighLevelInfo *state = NULL; - - if ( ( retval = _internal_check_state( &state ) ) != PAPI_OK ) { - return ( retval ); - } - - if ( state->running != HL_STOP && state->running != mode ) { - return PAPI_EINVAL; - } - - if ( state->running == HL_STOP ) { - - switch (mode) { - case HL_FLOP: - case HL_FLIP: - num_events = 1; - break; - case HL_IPC: - break; - case HL_EPC: - if ( events[2] != 0 ) num_events = 3; - break; - default: - return PAPI_EINVAL; - } - if (( retval = PAPI_add_events( state->EventSet, events, num_events )) != PAPI_OK ) { - _internal_cleanup_hl_info( state ); - PAPI_cleanup_eventset( state->EventSet ); - return retval; - } - - state->total_ins = 0; - state->initial_real_time = state->last_real_time = PAPI_get_real_usec( ); - state->initial_proc_time = state->last_proc_time = PAPI_get_virt_usec( ); - - if ( ( retval = PAPI_start( state->EventSet ) ) != PAPI_OK ) { - return retval; - } - - /* Initialize the interface */ - state->running = mode; - *real_time = 0.0; - *proc_time = 0.0; - *rate = 0.0; - - } else { - if ( ( retval = PAPI_stop( state->EventSet, values ) ) != PAPI_OK ) { - state->running = HL_STOP; - return retval; - } - - /* Read elapsed real and process times */ - rt = PAPI_get_real_usec(); - pt = PAPI_get_virt_usec(); - - /* Convert to seconds with multiplication because it is much faster */ - *real_time = ((float)( rt - state->initial_real_time )) * .000001; - *proc_time = ((float)( pt - state->initial_proc_time )) * .000001; - - state->total_ins += values[0]; - - switch (mode) { - case HL_FLOP: - case HL_FLIP: - /* Calculate MFLOP and MFLIP rates */ - if ( pt > 0 ) { - *rate = (float)values[0] / (pt - state->last_proc_time); - } else *rate = 0; - break; - case HL_IPC: - case HL_EPC: - /* Calculate IPC */ - if (values[1]!=0) { - *rate = (float) ((float)values[0] / (float) ( values[1])); - } - break; - default: - return PAPI_EINVAL; - } - state->last_real_time = rt; - state->last_proc_time = pt; - - if ( ( retval = PAPI_start( state->EventSet ) ) != PAPI_OK ) { - state->running = HL_STOP; - return retval; - } - } - *ins = state->total_ins; - return PAPI_OK; -} - -/** @class PAPI_num_counters - * @brief Get the number of hardware counters available on the system. - * - * @par C Interface: - * \#include @n - * int PAPI_num_counters( void ); - * - * @post - * Initializes the library to PAPI_HIGH_LEVEL_INITED if necessary. - * - * @retval PAPI_EINVAL - * papi.h is different from the version used to compile the PAPI library. - * @retval PAPI_ENOMEM - * Insufficient memory to complete the operation. - * @retval PAPI_ESYS - * A system or C library call failed inside PAPI, see the errno variable. - * - * @par Examples: - * @code - * int num_hwcntrs; - * // The installation does not support PAPI - * if ((num_hwcntrs = PAPI_num_counters()) < 0 ) - * handle_error(1); - * // The installation supports PAPI, but has no counters - * if ((num_hwcntrs = PAPI_num_counters()) == 0 ) - * fprintf(stderr,"Info:: This machine does not provide hardware counters.\n"); - * @endcode - * - * PAPI_num_counters() returns the optimal length of the values array for the high level functions. - * This value corresponds to the number of hardware counters supported by the current CPU component. - * - * @note This function only works for the CPU component. To determine the number of counters on - * another component, use the low level PAPI_num_cmp_hwctrs(). - */ -int -PAPI_num_counters( void ) -{ - int retval; - HighLevelInfo *tmp = NULL; - - /* Make sure the Library is initialized, etc... */ - if ( ( retval = _internal_check_state( &tmp ) ) != PAPI_OK ) - return ( retval ); - - return ( PAPI_get_opt( PAPI_MAX_HWCTRS, NULL ) ); -} - -/** @class PAPI_start_counters - * @brief Start counting hardware events. - * - * @par C Interface: - * \#include @n - * int PAPI_start_counters( int *events, int array_len ); - * - * @param *events - * an array of codes for events such as PAPI_INT_INS or a native event code - * @param array_len - * the number of items in the *events array - * - * @retval PAPI_EINVAL - * One or more of the arguments is invalid. - * @retval PAPI_EISRUN - * Counters have already been started, you must call PAPI_stop_counters() - * before you call this function again. - * @retval PAPI_ESYS - * A system or C library call failed inside PAPI, see the errno variable. - * @retval PAPI_ENOMEM - * Insufficient memory to complete the operation. - * @retval PAPI_ECNFLCT - * The underlying counter hardware cannot count this event and other events - * in the EventSet simultaneously. - * @retval PAPI_ENOEVNT - * The PAPI preset is not available on the underlying hardware. - * - * PAPI_start_counters() starts counting the events named in the *events array. - * This function cannot be called if the counters have already been started. - * The user must call PAPI_stop_counters() to stop the events explicitly if - * he/she wants to call this function again. - * It is the user's responsibility to choose events that can be counted - * simultaneously by reading the vendor's documentation. - * The length of the *events array should be no longer than the value returned - * by PAPI_num_counters(). - * - * @code -if( PAPI_start_counters( Events, num_hwcntrs ) != PAPI_OK ) - handle_error(1); - * @endcode - * - * @see PAPI_stop_counters() PAPI_add_event() PAPI_create_eventset() - */ -int -PAPI_start_counters( int *events, int array_len ) -{ - int i, retval; - HighLevelInfo *state = NULL; - - if ( events == NULL || array_len <= 0 ) - return PAPI_EINVAL; - - if ( ( retval = _internal_check_state( &state ) ) != PAPI_OK ) - return ( retval ); - - if ( state->running != 0 ) - return ( PAPI_EINVAL ); - - /* load events to the new EventSet */ - for ( i = 0; i < array_len; i++ ) { - retval = PAPI_add_event( state->EventSet, events[i] ); - if ( retval == PAPI_EISRUN ) - return ( retval ); - - if ( retval ) { - /* remove any prior events that may have been added - * and cleanup the high level information - */ - _internal_cleanup_hl_info( state ); - PAPI_cleanup_eventset( state->EventSet ); - return ( retval ); - } - } - /* start the EventSet */ - if ( ( retval = _internal_start_hl_counters( state ) ) == PAPI_OK ) { - state->running = HL_START; - state->num_evts = ( short ) array_len; - } - return ( retval ); -} - -/*========================================================================*/ -/* int PAPI_read_counters(long long *values, int array_len) */ -/* */ -/* Read the running counters into the values array. This call */ -/* implicitly initializes the internal counters to zero and allows */ -/* them continue to run upon return. */ -/*========================================================================*/ - -int -_internal_hl_read_cnts( long long *values, int array_len, int flag ) -{ - int retval; - HighLevelInfo *state = NULL; - - if ( ( retval = _internal_check_state( &state ) ) != PAPI_OK ) - return ( retval ); - - if ( state->running != HL_START || array_len < state->num_evts ) - return ( PAPI_EINVAL ); - - if ( flag == HL_ACCUM ) - return ( PAPI_accum( state->EventSet, values ) ); - else if ( flag == HL_READ ) { - if ( ( retval = PAPI_read( state->EventSet, values ) ) != PAPI_OK ) - return ( retval ); - return ( PAPI_reset( state->EventSet ) ); - } - - /* Invalid flag passed in */ - return ( PAPI_EINVAL ); -} - -/** @class PAPI_read_counters - * @brief Read and reset counters. - * - * @par C Interface: - * \#include @n - * int PAPI_read_counters( long long *values, int array_len ); - * - * @param *values - * an array to hold the counter values of the counting events - * @param arry_len - * the number of items in the *events array - * - * @pre - * These calls assume an initialized PAPI library and a properly added event set. - * - * @post - * The counters are reset and left running after the call. - * - * @retval PAPI_EINVAL - * One or more of the arguments is invalid. - * @retval PAPI_ESYS - * A system or C library call failed inside PAPI, see the errno variable. - * - * PAPI_read_counters() copies the event counters into the array *values. - * - * @code -do_100events(); -if ( PAPI_read_counters( values, num_hwcntrs ) != PAPI_OK ) - handlw_error(1); -// values[0] now equals 100 -do_100events(); -if ( PAPI_accum_counters( values, num_hwcntrs ) != PAPI_OK ) - handle_error(1); -// values[0] now equals 200 -values[0] = -100; -do_100events(); -if ( PAPI_accum_counters(values, num_hwcntrs ) != PAPI_OK ) - handle_error(); -// values[0] now equals 0 - * @endcode - * - * @see PAPI_set_opt() PAPI_start_counters() - */ -int -PAPI_read_counters( long long *values, int array_len ) -{ - return ( _internal_hl_read_cnts( values, array_len, HL_READ ) ); -} - - -/** @class PAPI_accum_counters - * @brief Accumulate and reset counters. - * - * @par C Interface: - * \#include @n - * int PAPI_accum_counters( long long *values, int array_len ); - * - * @param *values - * an array to hold the counter values of the counting events - * @param arry_len - * the number of items in the *events array - * - * @pre - * These calls assume an initialized PAPI library and a properly added event set. - * - * @post - * The counters are reset and left running after the call. - * - * @retval PAPI_EINVAL - * One or more of the arguments is invalid. - * @retval PAPI_ESYS - * A system or C library call failed inside PAPI, see the errno variable. - * - * PAPI_accum_counters() adds the event counters into the array *values. - * - * @code -do_100events(); -if ( PAPI_read_counters( values, num_hwcntrs ) != PAPI_OK ) - handlw_error(1); -// values[0] now equals 100 -do_100events(); -if ( PAPI_accum_counters( values, num_hwcntrs ) != PAPI_OK ) - handle_error(1); -// values[0] now equals 200 -values[0] = -100; -do_100events(); -if ( PAPI_accum_counters(values, num_hwcntrs ) != PAPI_OK ) - handle_error(); -// values[0] now equals 0 - * @endcode - * - * @see PAPI_set_opt() PAPI_start_counters() - */ -int -PAPI_accum_counters( long long *values, int array_len ) -{ - if ( values == NULL || array_len <= 0 ) - return PAPI_EINVAL; - - return ( _internal_hl_read_cnts( values, array_len, HL_ACCUM ) ); -} - -/** @class PAPI_stop_counters - * @brief Stop counting hardware events and reset values to zero. - * - * @par C Interface: - * \#include @n - * int PAPI_stop_counters( long long *values, int array_len ); - * - * @param *values - * an array where to put the counter values - * @param array_len - * the number of items in the *values array - * - * @post - * After this function is called, the values are reset to zero. - * - * @retval PAPI_EINVAL - * One or more of the arguments is invalid. - * @retval PAPI_ENOTRUN - * The EventSet is not started yet. - * @retval PAPI_ENOEVST - * The EventSet has not been added yet. - * - * The PAPI_stop_counters() function stops the counters and copies the counts - * into the *values array. - * The counters must have been started by a previous call to PAPI_start_counters(). - * - * \code -int Events[2] = { PAPI_TOT_CYC, PAPI_TOT_INS }; -long long values[2]; -if ( PAPI_start_counters( Events, 2 ) != PAPI_OK ) - handle_error(1); -your_slow_code(); -if ( PAPI_stop_counters( values, 2 ) != PAPI_OK ) - handle_error(1); - * \endcode - * - * @see PAPI_read_counters() PAPI_start_counters() PAPI_set_opt() - */ -int -PAPI_stop_counters( long long *values, int array_len ) -{ - int retval; - HighLevelInfo *state = NULL; - - if ( ( retval = _internal_check_state( &state ) ) != PAPI_OK ) - return ( retval ); - - if ( state->running == 0 ) - return ( PAPI_ENOTRUN ); - - if ( state->running == HL_START ) { - if ( array_len < state->num_evts || values == NULL) { - return ( PAPI_EINVAL ); - } else { - retval = PAPI_stop( state->EventSet, values ); - } - } - - if ( state->running > HL_START ) { - long long tmp_values[3]; - retval = PAPI_stop( state->EventSet, tmp_values ); - } - - if ( retval == PAPI_OK ) { - _internal_cleanup_hl_info( state ); - PAPI_cleanup_eventset( state->EventSet ); - } - APIDBG( "PAPI_stop_counters returns %d\n", retval ); - return retval; -} - -void -_papi_hwi_shutdown_highlevel( ) -{ - HighLevelInfo *state = NULL; - - if ( PAPI_get_thr_specific( PAPI_HIGH_LEVEL_TLS, ( void ** ) &state ) == - PAPI_OK ) { - if ( state ) - papi_free( state ); - } -} diff -Nru papi-5.7.0+dfsg/src/papi_hl.h papi-6.0.0~dfsg/src/papi_hl.h --- papi-5.7.0+dfsg/src/papi_hl.h 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/papi_hl.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,6 +0,0 @@ -#ifndef PAPI_HL_H -#define PAPI_HL_H - -void _papi_hwi_shutdown_highlevel( ); - -#endif diff -Nru papi-5.7.0+dfsg/src/papi_internal.c papi-6.0.0~dfsg/src/papi_internal.c --- papi-5.7.0+dfsg/src/papi_internal.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/papi_internal.c 2020-03-04 15:57:01.000000000 +0000 @@ -59,6 +59,9 @@ hwi_presets_t user_defined_events[PAPI_MAX_USER_EVENTS]; int user_defined_events_count = 0; +THREAD_LOCAL_STORAGE_KEYWORD int _papi_rate_events_running = 0; +THREAD_LOCAL_STORAGE_KEYWORD int _papi_hl_events_running = 0; + /*****************************/ /* Native Event Mapping Code */ /*****************************/ @@ -111,31 +114,28 @@ } return; } -// A place to keep the current papi event code so some component functions can fetch its value -// The current event code can be stored here prior to component calls and cleared after the component returns -static unsigned int papi_event_code = -1; -static int papi_event_code_changed = -1; + void _papi_hwi_set_papi_event_code (unsigned int event_code, int update_flag) { - INTDBG("new event_code: %#x, update_flag: %d, previous event_code: %#x\n", event_code, update_flag, papi_event_code); + INTDBG("new event_code: %#x, update_flag: %d, previous event_code: %#x\n", event_code, update_flag, _papi_hwi_my_thread->tls_papi_event_code); // if call is just to reset and start over, set both flags to show nothing saved yet if (update_flag < 0) { - papi_event_code_changed = -1; - papi_event_code = -1; + _papi_hwi_my_thread->tls_papi_event_code_changed = -1; + _papi_hwi_my_thread->tls_papi_event_code = -1; return; } // if 0, it is being set prior to calling a component, if >0 it is being changed by the component - papi_event_code_changed = update_flag; + _papi_hwi_my_thread->tls_papi_event_code_changed = update_flag; // save the event code passed in - papi_event_code = event_code; + _papi_hwi_my_thread->tls_papi_event_code = event_code; return; } unsigned int _papi_hwi_get_papi_event_code () { - INTDBG("papi_event_code: %#x\n", papi_event_code); - return papi_event_code; + INTDBG("papi_event_code: %#x\n", _papi_hwi_my_thread->tls_papi_event_code); + return _papi_hwi_my_thread->tls_papi_event_code; } /* Get the index into the ESI->NativeInfoArray for the current PAPI event code */ int @@ -560,7 +560,7 @@ int result; - if (papi_event_code_changed > 0) { + if (_papi_hwi_my_thread->tls_papi_event_code_changed > 0) { result = _papi_hwi_get_papi_event_code(); INTDBG("EXIT: papi_event_code: %#x set by the component\n", result); return result; @@ -1992,7 +1992,6 @@ /* PAPI_hw_info_t struct */ memset(&(_papi_hwi_system_info.hw_info),0x0,sizeof(PAPI_hw_info_t)); - return PAPI_OK; } @@ -2312,7 +2311,7 @@ info->count = _papi_hwi_presets[i].count; _papi_hwi_derived_string( _papi_hwi_presets[i].derived_int, - info->derived, sizeof ( info->derived ) ); + info->derived, sizeof ( info->derived )-1 ); if ( _papi_hwi_presets[i].postfix != NULL ) strncpy( info->postfix, _papi_hwi_presets[i].postfix, diff -Nru papi-5.7.0+dfsg/src/papi_internal.h papi-6.0.0~dfsg/src/papi_internal.h --- papi-5.7.0+dfsg/src/papi_internal.h 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/papi_internal.h 2020-03-04 15:57:01.000000000 +0000 @@ -31,7 +31,6 @@ extern int _papi_hwi_num_errors; extern char **_papi_errlist; - /********************************************************/ /* This block provides general strings used in PAPI */ /* If a new string is needed for PAPI prompts */ @@ -438,6 +437,9 @@ #include "papi_lock.h" #include "threads.h" +extern THREAD_LOCAL_STORAGE_KEYWORD int _papi_rate_events_running; +extern THREAD_LOCAL_STORAGE_KEYWORD int _papi_hl_events_running; + EventSetInfo_t *_papi_hwi_lookup_EventSet( int eventset ); void _papi_hwi_set_papi_event_string (const char *event_string); char *_papi_hwi_get_papi_event_string (void); diff -Nru papi-5.7.0+dfsg/src/papi_libpfm4_events.c papi-6.0.0~dfsg/src/papi_libpfm4_events.c --- papi-5.7.0+dfsg/src/papi_libpfm4_events.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/papi_libpfm4_events.c 2020-03-04 15:57:01.000000000 +0000 @@ -115,7 +115,7 @@ } else { strncpy(my_vector->cmp_info.disabled_reason, - pfm_strerror(retval),PAPI_MAX_STR_LEN); + pfm_strerror(retval),PAPI_MAX_STR_LEN-1); _papi_hwi_unlock( NAMELIB_LOCK ); return PAPI_ESBSTR; } diff -Nru papi-5.7.0+dfsg/src/papi_vector.c papi-6.0.0~dfsg/src/papi_vector.c --- papi-5.7.0+dfsg/src/papi_vector.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/papi_vector.c 2020-03-04 15:57:01.000000000 +0000 @@ -146,7 +146,7 @@ v->init_thread = ( int ( * )( hwd_context_t * ) ) vec_int_ok_dummy; if ( !v->init_control_state ) v->init_control_state = - ( int ( * )( hwd_control_state_t * ptr ) ) vec_void_dummy; + ( int ( * )( hwd_control_state_t * ptr ) ) vec_int_dummy; if ( !v->update_control_state ) v->update_control_state = ( int ( * ) ( hwd_control_state_t *, NativeInfo_t *, diff -Nru papi-5.7.0+dfsg/src/Rules.perfmon2 papi-6.0.0~dfsg/src/Rules.perfmon2 --- papi-5.7.0+dfsg/src/Rules.perfmon2 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/Rules.perfmon2 2020-03-04 15:56:57.000000000 +0000 @@ -60,8 +60,8 @@ -install -d $(DESTDIR)$(LIBDIR) ifneq (,$(findstring shared,$(LIBS))) cp -p $(SHLIB) $(DESTDIR)$(LIBDIR)/libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) - cd $(DESTDIR)$(LIBDIR); ln -sf libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) libpapi.so.$(PAPIVER) - cd $(DESTDIR)$(LIBDIR); ln -sf libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) libpapi.so + ln -sf libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) $(DESTDIR)$(LIBDIR)/libpapi.so.$(PAPISOVER) + ln -sf libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) $(DESTDIR)$(LIBDIR)/libpapi.so endif -install -d $(DESTDIR)$(DATADIR) cp -f ./papi_events.csv $(DESTDIR)$(DATADIR) diff -Nru papi-5.7.0+dfsg/src/run_tests_exclude.txt papi-6.0.0~dfsg/src/run_tests_exclude.txt --- papi-5.7.0+dfsg/src/run_tests_exclude.txt 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/run_tests_exclude.txt 2020-03-04 15:57:02.000000000 +0000 @@ -44,6 +44,13 @@ # Some architectures require OMP_NUM_THREADS otherwise the test hangs ctests/omptough +# Mixed high-level and low-level tests with different components +ctests/serial_hl_ll_comb2 + +# MPI tests for high-level API +ctests/mpi_hl +ctests/mpi_omp_hl + # these tests haven't been implemented diff -Nru papi-5.7.0+dfsg/src/run_tests.sh papi-6.0.0~dfsg/src/run_tests.sh --- papi-5.7.0+dfsg/src/run_tests.sh 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/run_tests.sh 2020-03-04 15:57:02.000000000 +0000 @@ -29,6 +29,11 @@ export TESTS_QUIET fi +# Disable high-level output +if [ "x$TESTS_QUIET" != "xTESTS_QUIET" ] ; then + export PAPI_REPORT=1 +fi + if [ "x$VALGRIND" != "x" ]; then VALGRIND="valgrind --leak-check=full"; fi @@ -41,6 +46,7 @@ #EXCLUDE=`grep --regexp=^# --invert-match run_tests_exclude.txt` EXCLUDE=`grep -v -e '^#\|^$' run_tests_exclude.txt` + ALLTESTS="$VTESTS $CTESTS $FTESTS $COMPTESTS"; PATH=./ctests:$PATH @@ -122,9 +128,15 @@ done if [ $MATCH -ne 1 ]; then if [ -x $i ]; then - RAN="$i $RAN" - printf "Running %-50s " $i: - $VALGRIND ./$i $TESTS_QUIET + RAN="$i $RAN" + printf "Running %-50s %s" $i: + $VALGRIND ./$i $TESTS_QUIET + + #delete output folder for high-level tests + case "$i" in + *"_hl"*) rm -r papi_hl_output ;; + esac + fi; fi; MATCH=0 @@ -145,9 +157,15 @@ done if [ $MATCH -ne 1 ]; then if [ -x $i ]; then - RAN="$i $RAN" - printf "Running %-50s " $i: + RAN="$i $RAN" + printf "Running %-50s %s" $i: $VALGRIND ./$i $TESTS_QUIET + + #delete output folder for high-level tests + case "$i" in + *"_hl"*) rm -r papi_hl_output ;; + esac + fi; fi; MATCH=0 @@ -168,9 +186,15 @@ done if [ $MATCH -ne 1 ]; then if [ -x $i ]; then - RAN="$i $RAN" - printf "Running $i:\n" - $VALGRIND ./$i $TESTS_QUIET + RAN="$i $RAN" + printf "Running $i:\n" + $VALGRIND ./$i $TESTS_QUIET + + #delete output folder for high-level tests + case "$i" in + *"_hl"*) rm -r papi_hl_output ;; + esac + fi; fi; MATCH=0 diff -Nru papi-5.7.0+dfsg/src/testlib/clockcore.c papi-6.0.0~dfsg/src/testlib/clockcore.c --- papi-5.7.0+dfsg/src/testlib/clockcore.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/testlib/clockcore.c 2020-03-04 15:57:02.000000000 +0000 @@ -48,6 +48,7 @@ elapsed_cyc[i] = ( long long ) PAPI_get_virt_usec( ); break; default: + free(elapsed_cyc); return -1; } diff -Nru papi-5.7.0+dfsg/src/testlib/ftests_util.F papi-6.0.0~dfsg/src/testlib/ftests_util.F --- papi-5.7.0+dfsg/src/testlib/ftests_util.F 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/testlib/ftests_util.F 2020-03-04 15:57:02.000000000 +0000 @@ -79,6 +79,14 @@ 100 format(a,t41,a) end + subroutine ftests_hl_pass(test_str) + implicit integer (p) + character*(*) test_str + + write(*,100) test_str,' PASSED' + 100 format(a,t41,a) + end + subroutine ftest_fail(file, line, callstr, retval) implicit integer (p) character*(*) file diff -Nru papi-5.7.0+dfsg/src/testlib/papi_test.h papi-6.0.0~dfsg/src/testlib/papi_test.h --- papi-5.7.0+dfsg/src/testlib/papi_test.h 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/testlib/papi_test.h 2020-03-04 15:57:02.000000000 +0000 @@ -48,6 +48,13 @@ extern struct test_events_t test_events[]; +/* Mark non-returning functions if the compiler support GNU C extensions. */ +#if defined(__GNUC__) +#define PAPI_NORETURN __attribute__ ((__noreturn__)) +#else +#define PAPI_NORETURN +#endif + void validate_string(const char *name, char *s); void *get_overflow_address(void *context); void free_test_space(long long ** values, int num_tests); @@ -65,9 +72,12 @@ char *stringify_granularity(int granularity); char *stringify_all_granularities(int granularities); int tests_quiet(int argc, char **argv); -void test_pass(const char *filename); -void test_fail(const char *file, int line, const char *call, int retval); -void test_skip(const char *file, int line, const char *call, int retval); +void PAPI_NORETURN test_pass(const char *filename); +void PAPI_NORETURN test_hl_pass(const char *filename); +void PAPI_NORETURN test_fail(const char *file, int line, + const char *call, int retval); +void PAPI_NORETURN test_skip(const char *file, int line, + const char *call, int retval); void test_warn(const char *file, int line, const char *call, int retval); void test_print_event_header(const char *call, int evset); int approx_equals(double a, double b); diff -Nru papi-5.7.0+dfsg/src/testlib/test_utils.c papi-6.0.0~dfsg/src/testlib/test_utils.c --- papi-5.7.0+dfsg/src/testlib/test_utils.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/testlib/test_utils.c 2020-03-04 15:57:02.000000000 +0000 @@ -428,7 +428,7 @@ /* TESTS_QUIET #define printf hack */ /* FIXME! Revert to printf once we are done converting */ -void +void PAPI_NORETURN test_pass( const char *filename ) { (void)filename; @@ -463,8 +463,31 @@ } +void PAPI_NORETURN +test_hl_pass( const char *filename ) +{ + (void)filename; + + if ( TEST_WARN ) { + print_spaces(59); + if (TESTS_COLOR) fprintf( stdout, "%s", YELLOW); + fprintf( stdout, "PASSED with WARNING"); + if (TESTS_COLOR) fprintf( stdout, "%s", NORMAL); + fprintf( stdout, "\n"); + } + else { + if (TESTS_COLOR) fprintf( stdout, "%s",GREEN); + fprintf( stdout, "PASSED"); + if (TESTS_COLOR) fprintf( stdout, "%s",NORMAL); + fprintf( stdout, "\n"); + } + + exit( 0 ); + +} + /* Use a positive value of retval to simply print an error message */ -void +void PAPI_NORETURN test_fail( const char *file, int line, const char *call, int retval ) { // int line_pad; @@ -557,7 +580,7 @@ TEST_WARN++; } -void +void PAPI_NORETURN test_skip( const char *file, int line, const char *call, int retval ) { // int line_pad; diff -Nru papi-5.7.0+dfsg/src/threads.c papi-6.0.0~dfsg/src/threads.c --- papi-5.7.0+dfsg/src/threads.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/threads.c 2020-03-04 15:57:02.000000000 +0000 @@ -286,6 +286,10 @@ return PAPI_ENOMEM; } + /* init event memory variables, used by papi_internal.c */ + thread->tls_papi_event_code = -1; + thread->tls_papi_event_code_changed = -1; + /* Call the component to fill in anything special. */ for ( i = 0; i < papi_num_components; i++ ) { @@ -421,6 +425,11 @@ unsigned long tid; int i, failure = 0; + /* Clear event memory variables */ + thread->tls_papi_event_code = -1; + thread->tls_papi_event_code_changed = -1; + + /* Get thread id */ if ( _papi_hwi_thread_id_fn ) tid = ( *_papi_hwi_thread_id_fn ) ( ); else diff -Nru papi-5.7.0+dfsg/src/threads.h papi-6.0.0~dfsg/src/threads.h --- papi-5.7.0+dfsg/src/threads.h 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/threads.h 2020-03-04 15:57:02.000000000 +0000 @@ -30,6 +30,11 @@ EventSetInfo_t **running_eventset; EventSetInfo_t *from_esi; /* ESI used for last update this control state */ int wants_signal; + + // The current event code can be stored here prior to + // component calls and cleared after the component returns. + unsigned int tls_papi_event_code; + int tls_papi_event_code_changed; } ThreadInfo_t; /** The list of threads, gets initialized to master process with TID of getpid() diff -Nru papi-5.7.0+dfsg/src/utils/cost_utils.c papi-6.0.0~dfsg/src/utils/cost_utils.c --- papi-5.7.0+dfsg/src/utils/cost_utils.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/utils/cost_utils.c 2020-03-04 15:57:02.000000000 +0000 @@ -117,8 +117,8 @@ i_50=(int)num_iters/2; // index for 75%, not quite accurate because it doesn't // take even or odd into consideration - i_75=(int)num_iters/4*3; - i_99=(int)num_iters/10*9.9; + i_75=((int)num_iters*3)/4; + i_99=((int)num_iters*99)/100; qsort(a_sort,num_iters-1,sizeof(long long),cmpfunc); diff -Nru papi-5.7.0+dfsg/src/utils/Makefile papi-6.0.0~dfsg/src/utils/Makefile --- papi-5.7.0+dfsg/src/utils/Makefile 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/utils/Makefile 2020-03-04 15:57:02.000000000 +0000 @@ -16,46 +16,46 @@ default all utils: $(ALL) papi_avail: papi_avail.o $(PAPILIB) print_header.o - $(CC) $(LDFLAGS) -o papi_avail papi_avail.o print_header.o $(PAPILIB) + $(CC) -o papi_avail papi_avail.o print_header.o $(PAPILIB) $(LDFLAGS) papi_clockres: papi_clockres.o $(PAPILIB) $(CLOCKCORE) - $(CC) $(LDFLAGS) -o papi_clockres papi_clockres.o $(PAPILIB) $(CLOCKCORE) -lm + $(CC) -o papi_clockres papi_clockres.o $(PAPILIB) $(CLOCKCORE) -lm $(LDFLAGS) papi_command_line: papi_command_line.o $(PAPILIB) $(DOLOOPS) - $(CC) $(LDFLAGS) -o papi_command_line papi_command_line.o $(PAPILIB) $(DOLOOPS) + $(CC) -o papi_command_line papi_command_line.o $(PAPILIB) $(DOLOOPS) $(LDFLAGS) papi_component_avail: papi_component_avail.o $(PAPILIB) print_header.o - $(CC) $(LDFLAGS) -o papi_component_avail papi_component_avail.o $(PAPILIB) print_header.o + $(CC) -o papi_component_avail papi_component_avail.o $(PAPILIB) print_header.o $(LDFLAGS) papi_cost: papi_cost.o $(PAPILIB) cost_utils.o - $(CC) $(LDFLAGS) -o papi_cost papi_cost.o cost_utils.o $(PAPILIB) -lm + $(CC) -o papi_cost papi_cost.o cost_utils.o $(PAPILIB) -lm $(LDFLAGS) papi_decode: papi_decode.o $(PAPILIB) - $(CC) $(LDFLAGS) -o papi_decode papi_decode.o $(PAPILIB) + $(CC) -o papi_decode papi_decode.o $(PAPILIB) $(LDFLAGS) papi_error_codes: papi_error_codes.o $(PAPILIB) - $(CC) $(LDFLAGS) -o papi_error_codes papi_error_codes.o $(PAPILIB) + $(CC) -o papi_error_codes papi_error_codes.o $(PAPILIB) $(LDFLAGS) papi_event_chooser: papi_event_chooser.o $(PAPILIB) print_header.o - $(CC) $(LDFLAGS) -o papi_event_chooser papi_event_chooser.o print_header.o $(PAPILIB) + $(CC) -o papi_event_chooser papi_event_chooser.o print_header.o $(PAPILIB) $(LDFLAGS) papi_hybrid_native_avail: papi_hybrid_native_avail.o $(PAPILIB) - $(CC) $(LDFLAGS) -o papi_hybrid_native_avail papi_hybrid_native_avail.o $(PAPILIB) + $(CC) -o papi_hybrid_native_avail papi_hybrid_native_avail.o $(PAPILIB) $(LDFLAGS) papi_mem_info: papi_mem_info.o $(PAPILIB) - $(CC) $(LDFLAGS) -o papi_mem_info papi_mem_info.o $(PAPILIB) + $(CC) -o papi_mem_info papi_mem_info.o $(PAPILIB) $(LDFLAGS) papi_multiplex_cost: papi_multiplex_cost.o $(PAPILIB) cost_utils.o - $(CC) $(LDFLAGS) -o papi_multiplex_cost papi_multiplex_cost.o cost_utils.o $(PAPILIB) -lm + $(CC) -o papi_multiplex_cost papi_multiplex_cost.o cost_utils.o $(PAPILIB) -lm $(LDFLAGS) -papi_native_avail: papi_native_avail.o $(PAPILIB) print_header.o - $(CC) $(LDFLAGS) -o papi_native_avail papi_native_avail.o $(PAPILIB) print_header.o +papi_native_avail: papi_native_avail.o $(PAPILIB) print_header.o papi_sde_interface.o + $(CC) -o papi_native_avail papi_native_avail.o $(PAPILIB) print_header.o $(LDFLAGS) papi_sde_interface.o papi_version: papi_version.o $(PAPILIB) - $(CC) $(LDFLAGS) -o papi_version papi_version.o $(PAPILIB) + $(CC) -o papi_version papi_version.o $(PAPILIB) $(LDFLAGS) papi_xml_event_info: papi_xml_event_info.o $(PAPILIB) - $(CC) $(LDFLAGS) -o papi_xml_event_info papi_xml_event_info.o $(PAPILIB) + $(CC) -o papi_xml_event_info papi_xml_event_info.o $(PAPILIB) $(LDFLAGS) @@ -65,6 +65,9 @@ print_header.o: print_header.h print_header.c $(CC) $(INCLUDE) $(CFLAGS) $(OPTFLAGS) -c print_header.c +papi_sde_interface.o: papi_sde_interface.c + $(CC) $(INCLUDE) $(CFLAGS) $(OPTFLAGS) -c papi_sde_interface.c + clean: rm -f *.o *.stderr *.stdout core *~ $(ALL) diff -Nru papi-5.7.0+dfsg/src/utils/papi_avail.c papi-6.0.0~dfsg/src/utils/papi_avail.c --- papi-5.7.0+dfsg/src/utils/papi_avail.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/utils/papi_avail.c 2020-03-04 15:57:02.000000000 +0000 @@ -320,11 +320,11 @@ for( args = 1; args < argc; args++ ) { if ( strstr( argv[args], "-e" ) ) { print_event_info = 1; - name = argv[args + 1]; - if ( ( name == NULL ) || ( strlen( name ) == 0 ) ) { + if( (args+1 >= argc) || ( argv[args+1] == NULL ) || ( strlen( argv[args+1] ) == 0 ) ) { print_help( argv ); exit( 1 ); } + name = argv[args + 1]; } else if ( strstr( argv[args], "-c" ) || strstr (argv[args], "--check") ) { diff -Nru papi-5.7.0+dfsg/src/utils/papi_command_line.c papi-6.0.0~dfsg/src/utils/papi_command_line.c --- papi-5.7.0+dfsg/src/utils/papi_command_line.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/utils/papi_command_line.c 2020-03-04 15:57:02.000000000 +0000 @@ -110,6 +110,8 @@ /* Automatically pass if no events, for run_tests.sh */ if ( num_events == 0 ) { + free(values); + free(success); printf("No events specified!\n"); printf("Try running something like: %s PAPI_TOT_CYC\n\n", argv[0]); @@ -124,6 +126,8 @@ retval = PAPI_start( EventSet ); if (retval != PAPI_OK ) { + free(values); + free(success); fprintf(stderr,"Error! PAPI_start\n"); exit( retval ); } @@ -133,6 +137,8 @@ retval = PAPI_stop( EventSet, values ); if (retval != PAPI_OK ) { + free(values); + free(success); fprintf(stderr,"Error! PAPI_stop\n"); exit( retval ); } @@ -170,6 +176,8 @@ printf( "\n----------------------------------\n" ); + free(values); + free(success); return 0; } diff -Nru papi-5.7.0+dfsg/src/utils/papi_component_avail.c papi-6.0.0~dfsg/src/utils/papi_component_avail.c --- papi-5.7.0+dfsg/src/utils/papi_component_avail.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/utils/papi_component_avail.c 2020-03-04 15:57:02.000000000 +0000 @@ -103,7 +103,7 @@ retval = papi_print_header( "Available components and " "hardware information.\n", &hwinfo ); if ( retval != PAPI_OK ) { - fprintf(stderr,"Error! PAPI_get_ahrdware_info\n"); + fprintf(stderr,"Error! PAPI_get_hardware_info\n"); return 2; } @@ -138,24 +138,38 @@ printf( "Name: %-23s %s\n", cmpinfo->name ,cmpinfo->description); printf( " %-23s Native: %d, Preset: %d, Counters: %d\n", " ", cmpinfo->num_native_events, cmpinfo->num_preset_events, cmpinfo->num_cntrs); - printf( " %-23s PMUs supported: ", " "); - int line_len = 49; - for (i=0 ; ipmu_names[i] == NULL) continue; - - if (line_len + strlen(cmpinfo->pmu_names[i]) > 130) { - printf("\n %-23s ", " "); - line_len = 49; - } else { - line_len += strlen(cmpinfo->pmu_names[i]); - } - // if it is not the first entry on a line, separate the names - if ((i != 0) && (line_len > 50)) { - printf(", "); - } - printf("%s", cmpinfo->pmu_names[i]); - } - printf("\n\n"); + + int pmus=0; + for (i=0; ipmu_names[i] != NULL) pmus++; // Non-Null get printed. + } + + if (pmus) { // If we have any, print. + printf( " %-23s PMUs supported: ", " "); + int line_len = 48, name_len; + for (i=0 ; ipmu_names[i] == NULL) continue; + + name_len = strlen(cmpinfo->pmu_names[i]); + + if ((line_len + 2 + name_len) > 130) { // If it would be too long, + printf("\n %-23s ", " "); // terminate line without printing current name, + line_len = 48; // reset line length. + } + + // if it is not the first entry on a line, separate the names + if (line_len > 48) { + printf(", "); + line_len += 2; // account for the separator. + } + printf("%s", cmpinfo->pmu_names[i]); + line_len += name_len; // Add the new name to the length. + } + + printf("\n"); + } // end if we had PMUs to print. + + printf("\n"); // extra line. if ( flags.details ) { printf( " %-23s Version:\t\t\t%s\n", " ", cmpinfo->version ); diff -Nru papi-5.7.0+dfsg/src/utils/papi_multiplex_cost.c papi-6.0.0~dfsg/src/utils/papi_multiplex_cost.c --- papi-5.7.0+dfsg/src/utils/papi_multiplex_cost.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/utils/papi_multiplex_cost.c 2020-03-04 15:57:02.000000000 +0000 @@ -675,6 +675,10 @@ PAPI_cleanup_eventset( KernelMPX ); finalize_test(); + if ( values != NULL ) free(values); + if ( array != NULL ) free(array); + if ( Events != NULL ) free(Events); + return 0; cleanup: diff -Nru papi-5.7.0+dfsg/src/utils/papi_native_avail.c papi-6.0.0~dfsg/src/utils/papi_native_avail.c --- papi-5.7.0+dfsg/src/utils/papi_native_avail.c 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/utils/papi_native_avail.c 2020-03-04 15:57:02.000000000 +0000 @@ -46,9 +46,12 @@ #include #include #include +#include +#include #include "papi.h" #include "print_header.h" +#include "components/sde/interface/papi_sde_interface.h" #define EVT_LINE 80 #define EVT_LINE_BUF_SIZE 4096 @@ -60,7 +63,8 @@ int include; int xclude; int check; - char *name, *istr, *xstr; + int list_sdes; + char *path, *name, *istr, *xstr; int darr; int dear; int iarr; @@ -80,6 +84,7 @@ printf( "\nGeneral command options:\n" ); printf( "\t-h, --help print this help message\n" ); printf( "\t-c, --check attempts to add each event\n"); + printf( "\t-sde FILE lists SDEs that are registered by the library or executable in FILE\n" ); printf( "\t-e EVENTNAME display detailed information about named native event\n" ); printf( "\t-i EVENTSTR include only event names that contain EVENTSTR\n" ); printf( "\t-x EVENTSTR exclude any event names that contain EVENTSTR\n" ); @@ -128,24 +133,36 @@ else if ( !strcmp( argv[i], "-e" ) ) { f->named = 1; i++; - f->name = argv[i]; - if ( i >= argc || no_str_arg( f->name ) ) { + if ( i < argc ) + f->name = argv[i]; + if ( no_str_arg( f->name ) ) { printf( "Invalid argument for -e\n"); exit(1); } + } else if ( !strcmp( argv[i], "-sde" ) ) { + f->list_sdes = 1; + i++; + if ( i < argc ) + f->path = argv[i]; + if ( no_str_arg( f->path ) ) { + printf( "Invalid argument for -sde\n"); + exit(1); + } } else if ( !strcmp( argv[i], "-i" ) ) { f->include = 1; i++; - f->istr = argv[i]; - if ( i >= argc || no_str_arg( f->istr ) ) { + if ( i < argc ) + f->istr = argv[i]; + if ( no_str_arg( f->istr ) ) { printf( "Invalid argument for -i\n"); exit(1); } } else if ( !strcmp( argv[i], "-x" ) ) { f->xclude = 1; i++; - f->xstr = argv[i]; - if ( i >= argc || no_str_arg( f->xstr ) ) { + if ( i < argc ) + f->xstr = argv[i]; + if ( no_str_arg( f->xstr ) ) { printf( "Invalid argument for -x\n"); exit(1); } @@ -351,6 +368,33 @@ return ( 1 ); } +void +invoke_hook_fptr( char *lib_path ) +{ + void *dl_handle; + typedef void *(* hook_fptr_t)(papi_sde_fptr_struct_t *); + hook_fptr_t hook_func_ptr; + + /* Clear any old error conditions */ + (void)dlerror(); + + dl_handle = dlopen(lib_path, RTLD_LOCAL | RTLD_LAZY); + if ( NULL == dl_handle ) { + return; + } + + hook_func_ptr = (hook_fptr_t)dlsym(dl_handle, "papi_sde_hook_list_events"); + if ( (NULL != hook_func_ptr) && ( NULL == dlerror()) ) { + papi_sde_fptr_struct_t fptr_struct; + + POPULATE_SDE_FPTR_STRUCT( fptr_struct ); + (void)hook_func_ptr( &fptr_struct ); + } + + dlclose(dl_handle); + return; +} + int main( int argc, char **argv ) { @@ -400,6 +444,77 @@ return 2; } + /* + The following code will execute if the user wants to list the SDEs in the + library (or executable) stored in flags.path. This code will not list the + SDEs per se, it will only give an opportunity to the library to register + their SDEs, so they can be listed further down. + */ + if ( flags.list_sdes ){ + char *cmd; + FILE *pipe; + + if ( access(flags.path, R_OK) == -1 ){ + fprintf(stderr,"Error! Unable to read file '%s'.\n",flags.path); + goto no_sdes; + } + + int len = 5+strlen(flags.path); + cmd = (char *)calloc(len, sizeof(char)); + if( NULL == cmd ) goto no_sdes; + + int l = snprintf(cmd, len, "ldd %s",flags.path); + if(l ")) ) { + goto skip_lib; + } + + int status = sscanf(lineptr, "%ms => %ms (%*x)", &lib_name, &lib_path); + /* If this line is malformed, ignore it. */ + if(2 != status){ + /* According to the man page: "it is necessary to call free() + only if the scanf() call successfully read a string." */ + goto skip_lib; + } + + /* Invoke the hook for the dependency we just discovered */ + invoke_hook_fptr(lib_path); + + if( lib_name ) free(lib_name); + if( lib_path ) free(lib_path); +skip_lib: + if(lineptr) free(lineptr); + lineptr = NULL; + n=0; + } + pclose(pipe); + } + + /* Finally, invoke the hook for the file the user gave us */ + invoke_hook_fptr(flags.path); + + if( NULL != cmd ) free(cmd); + } +no_sdes: + /* Do this code if the event name option was specified on the commandline */ if ( flags.named ) { if ( PAPI_event_name_to_code( flags.name, &i ) == PAPI_OK ) { diff -Nru papi-5.7.0+dfsg/src/utils/papi_sde_interface.c papi-6.0.0~dfsg/src/utils/papi_sde_interface.c --- papi-5.7.0+dfsg/src/utils/papi_sde_interface.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/utils/papi_sde_interface.c 2020-03-04 15:57:02.000000000 +0000 @@ -0,0 +1,240 @@ +#include +#include +#include "components/sde/interface/papi_sde_interface.h" + + +#pragma weak papi_sde_init +#pragma weak papi_sde_register_counter +#pragma weak papi_sde_register_fp_counter +#pragma weak papi_sde_unregister_counter +#pragma weak papi_sde_describe_counter +#pragma weak papi_sde_create_counter +#pragma weak papi_sde_inc_counter +#pragma weak papi_sde_create_recorder +#pragma weak papi_sde_record +#pragma weak papi_sde_reset_recorder +#pragma weak papi_sde_reset_counter + +#pragma weak papi_sde_compare_long_long +#pragma weak papi_sde_compare_int +#pragma weak papi_sde_compare_double +#pragma weak papi_sde_compare_float + +papi_handle_t +__attribute__((weak)) +papi_sde_init(const char *name_of_library) +{ + (void) name_of_library; + + return NULL; +} + +int +__attribute__((weak)) +papi_sde_register_counter(papi_handle_t handle, const char *event_name, int cntr_mode, int cntr_type, void *counter) +{ + (void) handle; + (void) event_name; + (void) cntr_mode; + (void) cntr_type; + (void) counter; + + /* do nothing */ + + return 0; +} + +int +__attribute__((weak)) +papi_sde_register_fp_counter(papi_handle_t handle, const char *event_name, int cntr_mode, int cntr_type, papi_sde_fptr_t func_ptr, void *param ) +{ + (void) handle; + (void) event_name; + (void) cntr_mode; + (void) cntr_type; + (void) func_ptr; + (void) param; + + /* do nothing */ + + return 0; +} + +int +__attribute__((weak)) +papi_sde_unregister_counter( void *handle, const char *event_name) +{ + (void) handle; + (void) event_name; + + /* do nothing */ + + return 0; +} + +int +__attribute__((weak)) +papi_sde_describe_counter(papi_handle_t handle, const char *event_name, const char *event_description) +{ + (void) handle; + (void) event_name; + (void) event_description; + + /* do nothing */ + + return 0; +} + +int +__attribute__((weak)) +papi_sde_add_counter_to_group(papi_handle_t handle, const char *event_name, const char *group_name, uint32_t group_flags) +{ + (void) handle; + (void) event_name; + (void) group_name; + (void) group_flags; + + /* do nothing */ + + return 0; +} + + +int +__attribute__((weak)) +papi_sde_create_counter( papi_handle_t handle, const char *event_name, int cntr_type, void **cntr_handle ) +{ + (void) handle; + (void) event_name; + (void) cntr_type; + (void) cntr_handle; + + /* do nothing */ + + return 0; +} + + +int +__attribute__((weak)) +papi_sde_inc_counter( papi_handle_t cntr_handle, long long int increment) +{ + (void) cntr_handle; + (void) increment; + + /* do nothing */ + + return 0; +} + +int +__attribute__((weak)) +papi_sde_create_recorder( papi_handle_t handle, const char *event_name, size_t typesize, int (*cmpr_fptr)(const void *p1, const void *p2), void **record_handle ) +{ + (void) handle; + (void) event_name; + (void) typesize; + (void) record_handle; + (void) cmpr_fptr; + + /* do nothing */ + + return 0; +} + + +int +__attribute__((weak)) +papi_sde_record( void *record_handle, size_t typesize, void *value) +{ + (void) record_handle; + (void) typesize; + (void) value; + + /* do nothing */ + + return 0; +} + +int +__attribute__((weak)) +papi_sde_reset_recorder(void *record_handle ) +{ + (void) record_handle; + + /* do nothing */ + + return 0; +} + +int +__attribute__((weak)) +papi_sde_reset_counter( void *cntr_handle ) +{ + (void) cntr_handle; + + /* do nothing */ + + return 0; +} + +void +__attribute__((weak)) +*papi_sde_get_counter_handle( void *handle, const char *event_name) +{ + (void) handle; + (void) event_name; + + /* do nothing */ + + return NULL; +} + + +int +__attribute__((weak)) +papi_sde_compare_long_long(const void *p1, const void *p2) +{ + (void) p1; + (void) p2; + + /* do nothing */ + + return 0; +} + +int +__attribute__((weak)) +papi_sde_compare_int(const void *p1, const void *p2) +{ + (void) p1; + (void) p2; + + /* do nothing */ + + return 0; +} + +int +__attribute__((weak)) +papi_sde_compare_double(const void *p1, const void *p2) +{ + (void) p1; + (void) p2; + + /* do nothing */ + + return 0; +} + +int +__attribute__((weak)) +papi_sde_compare_float(const void *p1, const void *p2) +{ + (void) p1; + (void) p2; + + /* do nothing */ + + return 0; +} diff -Nru papi-5.7.0+dfsg/src/validation_tests/fp_validation_hl.c papi-6.0.0~dfsg/src/validation_tests/fp_validation_hl.c --- papi-5.7.0+dfsg/src/validation_tests/fp_validation_hl.c 1970-01-01 00:00:00.000000000 +0000 +++ papi-6.0.0~dfsg/src/validation_tests/fp_validation_hl.c 2020-03-04 15:57:02.000000000 +0000 @@ -0,0 +1,52 @@ +/* This test runs a "classic" matrix multiply + * and then runs it again with the inner loop swapped. + * the swapped version should have better MFLIPS/MFLOPS/IPC and we test that. + */ + +#include +#include + +#include "papi.h" +#include "papi_test.h" +#include "testcode.h" + +int main( int argc, char **argv ) +{ + int retval; + int quiet = 0; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + // Flips classic + retval = PAPI_hl_region_begin("matrix_multiply_classic"); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_hl_region_begin", retval ); + } + if ( !quiet ) { + printf("flops_float_matrix_matrix_multiply()\n"); + } + flops_float_matrix_matrix_multiply(); + retval = PAPI_hl_region_end("matrix_multiply_classic"); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_hl_region_end", retval ); + } + + // Flips swapped + retval = PAPI_hl_region_begin("matrix_multiply_swapped"); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_hl_region_begin", retval ); + } + if ( !quiet ) { + printf("flops_float_swapped_matrix_matrix_multiply()\n"); + } + flops_float_swapped_matrix_matrix_multiply(); + retval = PAPI_hl_region_end("matrix_multiply_swapped"); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_hl_region_end", retval ); + } + + test_hl_pass( __FILE__ ); + + return 0; +} diff -Nru papi-5.7.0+dfsg/src/validation_tests/Makefile.recipies papi-6.0.0~dfsg/src/validation_tests/Makefile.recipies --- papi-5.7.0+dfsg/src/validation_tests/Makefile.recipies 2019-03-04 19:56:23.000000000 +0000 +++ papi-6.0.0~dfsg/src/validation_tests/Makefile.recipies 2020-03-04 15:57:02.000000000 +0000 @@ -1,4 +1,5 @@ -ALL = cycles_validation flops_validation \ +ALL = fp_validation_hl \ + cycles_validation flops_validation \ papi_br_cn papi_br_ins papi_br_msp \ papi_br_ntk papi_br_prc papi_br_tkn papi_br_ucn \ papi_dp_ops papi_fp_ops papi_sp_ops papi_hw_int \ @@ -37,6 +38,8 @@ matrix_multiply.o: matrix_multiply.c matrix_multiply.h $(CC) $(INCLUDE) $(CFLAGS) $(OPTFLAGS) -c matrix_multiply.c +fp_validation_hl: fp_validation_hl.o $(TESTLIB) $(PAPILIB) flops_testcode.o + $(CC) -o fp_validation_hl fp_validation_hl.o $(TESTLIB) flops_testcode.o $(PAPILIB) $(LDFLAGS) $(EXTRALIB) -lpthread cycles_validation: cycles_validation.o $(TESTLIB) $(PAPILIB) display_error.o instructions_testcode.o $(CC) -o cycles_validation cycles_validation.o $(TESTLIB) display_error.o instructions_testcode.o $(PAPILIB) $(LDFLAGS) $(EXTRALIB)