diff -Nru ricks-amdgpu-utils-3.0.0/amdgpu-chk ricks-amdgpu-utils-3.5.0/amdgpu-chk --- ricks-amdgpu-utils-3.0.0/amdgpu-chk 2020-02-29 07:32:52.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/amdgpu-chk 1970-01-01 00:00:00.000000000 +0000 @@ -1,242 +0,0 @@ -#!/usr/bin/env python3 -""" amdgpu-chk - Checks OS/Python compatibility - - This utility verifies if the environment is compatible with amdgpu-utils. - - Copyright (C) 2019 RueiKe - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -""" -__author__ = 'RueiKe' -__copyright__ = 'Copyright (C) 2019 RueiKe' -__credits__ = ['Craig Echt - Testing, Debug, Verification, and Documentation'] -__license__ = 'GNU General Public License' -__program_name__ = 'amdgpu-chk' -__version__ = 'v3.0.0' -__maintainer__ = 'RueiKe' -__status__ = 'Stable Release' -__docformat__ = 'reStructuredText' -# pylint: disable=multiple-statements -# pylint: disable=line-too-long - -import argparse -import re -import subprocess -import os -import shlex -import platform -import sys -import shutil -import warnings -warnings.filterwarnings('ignore') - - -class GutConst: - """ - Base object for chk util. These are simplified versions of what are in env module designed to run in python2 - in order to detect setup issues even if wrong version of python. - """ - def __init__(self): - self.DEBUG = False - - def check_env(self): - """ - Checks python version, kernel version, and amd gpu driver version. - :return: A list of 3 integers representing status of 3 check items. - :rtype: list - """ - ret_val = [0, 0, 0] - # Check python version - required_pversion = [3, 6] - (python_major, python_minor, python_patch) = platform.python_version_tuple() - print('Using python ' + python_major + '.' + python_minor + '.' + python_patch) - if int(python_major) < required_pversion[0]: - print(' ' + '\x1b[1;37;41m' + ' but amdgpu-utils requires python ' + - str(required_pversion[0]) + '.' + str(required_pversion[1]) + ' or newer.' + '\x1b[0m') - ret_val[0] = -1 - elif int(python_major) == required_pversion[0] and int(python_minor) < required_pversion[1]: - print(' ' + '\x1b[1;37;41m' + ' but amdgpu-utils requires python ' + - str(required_pversion[0]) + '.' + str(required_pversion[1]) + ' or newer.' + '\x1b[0m') - ret_val[0] = -1 - else: - print(' ' + '\x1b[1;37;42m' + ' Python version OK. ' + '\x1b[0m') - ret_val[0] = 0 - - # Check Linux Kernel version - required_kversion = [4, 8] - linux_version = platform.release() - print('Using Linux Kernel ' + str(linux_version)) - if int(linux_version.split('.')[0]) < required_kversion[0]: - print(' ' + '\x1b[1;37;41m' + ' but amdgpu-util requires ' + - str(required_kversion[0]) + '.' + str(required_kversion[1]) + ' or newer.' + '\x1b[0m') - ret_val[1] = -2 - elif int(linux_version.split('.')[0]) == required_kversion[0] and \ - int(linux_version.split('.')[1]) < required_kversion[1]: - print(' ' + '\x1b[1;37;41m' + ' but amdgpu-util requires ' + - str(required_kversion[0]) + '.' + str(required_kversion[1]) + ' or newer.' + '\x1b[0m') - ret_val[1] = -2 - else: - print(' ' + '\x1b[1;37;42m' + ' OS kernel OK. ' + '\x1b[0m') - ret_val[1] = 0 - - # Check for amdgpu driver - ret_val[2] = 0 if self.read_amd_driver_version() else -3 - return ret_val - - def read_amd_driver_version(self): - """ - Read the AMD driver version and store in GutConst object. - :return: True if successful - :rtype: bool - """ - try: - cmd_dpkg = shutil.which('dpkg') - except (NameError, AttributeError): - cmd_dpkg = None - if not cmd_dpkg: - print('Command dpkg not found. Can not determine amdgpu version.') - print(' ' + '\x1b[1;30;43m' + ' gpu-utils can still be used. ' + '\x1b[0m') - return True - version_ok = False - for pkgname in ['amdgpu', 'amdgpu-core', 'amdgpu-pro', 'rocm-utils']: - try: - dpkg_out = subprocess.check_output(shlex.split(cmd_dpkg + ' -l ' + pkgname), - shell=False, stderr=subprocess.DEVNULL).decode().split('\n') - for dpkg_line in dpkg_out: - for driverpkg in ['amdgpu', 'rocm']: - search_obj = re.search(driverpkg, dpkg_line) - if search_obj: - if self.DEBUG: print('Debug: ' + dpkg_line) - dpkg_items = dpkg_line.split() - if len(dpkg_items) > 2: - if re.fullmatch(r'.*none.*', dpkg_items[2]): - continue - else: - print('AMD: ' + driverpkg + ' version: ' + dpkg_items[2]) - print(' ' + '\x1b[1;37;42m' + ' AMD driver OK. ' + '\x1b[0m') - version_ok = True - break - if version_ok: - break - except (subprocess.CalledProcessError, OSError): - continue - if not version_ok: - print('amdgpu/rocm version: UNKNOWN') - print(' ' + '\x1b[1;30;43m' + ' gpu-utils can still be used. ' + '\x1b[0m') - # return False - return True - - -GUT_CONST = GutConst() - - -def is_venv_installed(): - """ - Check if a venv is being used - :return: True if using venv - :rtype: bool - """ - cmdstr = 'python3 -m venv -h > /dev/null' - try: - p = subprocess.Popen(shlex.split(cmdstr), shell=False, stdin=subprocess.PIPE, - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - output, _error = p.communicate() - # print('subprocess output: ', output.decode(), 'subprocess error: ', error.decode()) - if not re.fullmatch(r'.*No module named.*', output.decode()): - print('python3 venv is installed') - print(' ' + '\x1b[1;37;42m' + ' python3-venv OK. ' + '\x1b[0m') - return True - except: - pass - print('python3 venv is NOT installed') - print(' ' + '\x1b[1;30;43m' + ' Python3 venv package \'python3-venv\' package is recommended. ' + - '\x1b[0m') - return False - - -def does_amdgpu_utils_env_exist(): - """ - Check if venv exists. - :return: Return True if venv exists. - :rtype: bool - """ - env_name = './amdgpu-utils-env/bin/activate' - - if os.path.isfile(env_name): - print('amdgpu-utils-env available') - print(' ' + '\x1b[1;37;42m' + ' amdgpu-utils-env OK. ' + '\x1b[0m') - return True - print('amdgpu-utils-env is NOT available') - print(' ' + '\x1b[1;30;43m' + ' amdgpu-utils-env should be configured per User Guide. ' + '\x1b[0m') - return False - - -def is_in_venv(): - """ - Check if execution is from within a venv. - :return: True if in venv - :rtype: bool - """ - try: - python_path = shutil.which('python') - except (NameError, AttributeError): - python_path = None - print('Maybe python version compatibility issue.') - - if re.fullmatch(r'.*amdgpu-utils-env.*', python_path): - print('In amdgpu-utils-env') - print(' ' + '\x1b[1;37;42m' + ' amdgpu-utils-env is activated. ' + '\x1b[0m') - return True - print('Not in amdgpu-utils-env') - print(' ' + '\x1b[1;30;43m' + ' amdgpu-utils-env should be activated per User Guide. ' + '\x1b[0m') - return False - - -def main(): - """ - Main flow for chk utility. - :return: None - """ - parser = argparse.ArgumentParser() - parser.add_argument('--about', help='README', action='store_true', default=False) - parser.add_argument('-d', '--debug', help='Debug output', action='store_true', default=False) - args = parser.parse_args() - - # About me - if args.about: - print(__doc__) - print('Author: ', __author__) - print('Copyright: ', __copyright__) - print('Credits: ', __credits__) - print('License: ', __license__) - print('Version: ', __version__) - print('Maintainer: ', __maintainer__) - print('Status: ', __status__) - sys.exit(0) - - GUT_CONST.DEBUG = args.debug - - if GUT_CONST.check_env() != [0, 0, 0]: - print('Error in environment. Exiting...') - sys.exit(-1) - - if not is_venv_installed() or not does_amdgpu_utils_env_exist(): - print('Environment not configured. WARNING') - - if not is_in_venv(): - print('Virtual Environment not activated. WARNING') - - -if __name__ == '__main__': - main() diff -Nru ricks-amdgpu-utils-3.0.0/amdgpu-ls ricks-amdgpu-utils-3.5.0/amdgpu-ls --- ricks-amdgpu-utils-3.0.0/amdgpu-ls 2020-02-29 07:32:52.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/amdgpu-ls 1970-01-01 00:00:00.000000000 +0000 @@ -1,141 +0,0 @@ -#!/usr/bin/env python3 -""" amdgpu-ls - Displays details about installed AMD GPUs - - This utility displays most relevant parameters for installed and compatible AMD GPUs. The default - behavior is to list relevant parameters by GPU. OpenCL platform information is added when the - *--clinfo* option is used. A simplified table of current GPU state is displayed with the *--table* - option. The *--no_fan* can be used to ignore fan settings. The *--pstate* option can be used to - output the p-state table for each GPU instead of the list of basic parameters. The *--ppm* option - is used to output the table of available power/performance modes instead of basic parameters. - - Copyright (C) 2019 RueiKe - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -""" -__author__ = 'RueiKe' -__copyright__ = 'Copyright (C) 2019 RueiKe' -__credits__ = ['Craig Echt - Testing, Debug, Verification, and Documentation'] -__license__ = 'GNU General Public License' -__program_name__ = 'amdgpu-ls' -__version__ = 'v3.0.0' -__maintainer__ = 'RueiKe' -__status__ = 'Stable Release' -__docformat__ = 'reStructuredText' -# pylint: disable=multiple-statements -# pylint: disable=line-too-long - -import argparse -import sys -from GPUmodules import GPUmodule as gpu -from GPUmodules import env - - -def main(): - """ - Main flow for amdgpu-ls. - :return: - """ - parser = argparse.ArgumentParser() - parser.add_argument('--about', help='README', - action='store_true', default=False) - parser.add_argument('--table', help='Output table of basic GPU details', - action='store_true', default=False) - parser.add_argument('--pstates', help='Output pstate tables instead of GPU details', - action='store_true', default=False) - parser.add_argument('--ppm', help='Output power/performance mode tables instead of GPU details', - action='store_true', default=False) - parser.add_argument('--clinfo', help='Include openCL with card details', - action='store_true', default=False) - parser.add_argument('--no_fan', help='do not include fan setting options', - action='store_true', default=False) - parser.add_argument('-d', '--debug', help='Debug output', - action='store_true', default=False) - args = parser.parse_args() - - # About me - if args.about: - print(__doc__) - print('Author: ', __author__) - print('Copyright: ', __copyright__) - print('Credits: ', __credits__) - print('License: ', __license__) - print('Version: ', __version__) - print('Maintainer: ', __maintainer__) - print('Status: ', __status__) - sys.exit(0) - - env.GUT_CONST.DEBUG = args.debug - if args.no_fan: - env.GUT_CONST.show_fans = False - - if env.GUT_CONST.check_env() < 0: - print('Error in environment. Exiting...') - sys.exit(-1) - - # Get list of GPUs and get basic non-driver details - gpu_list = gpu.GpuList() - gpu_list.set_gpu_list(clinfo_flag=True) - - # Check list of GPUs - num_gpus = gpu_list.num_vendor_gpus() - print('Detected GPUs: ', end='') - for i, (k, v) in enumerate(num_gpus.items()): - if i: - print(', {}: {}'.format(k, v), end='') - else: - print('{}: {}'.format(k, v), end='') - print('') - if 'AMD' in num_gpus.keys(): - env.GUT_CONST.read_amd_driver_version() - print('AMD: {}'.format(gpu_list.wattman_status())) - if 'NV' in num_gpus.keys(): - print('nvidia smi: [{}]'.format(env.GUT_CONST.cmd_nvidia_smi)) - - num_gpus = gpu_list.num_gpus() - if num_gpus['total'] == 0: - print('No GPUs detected, exiting...') - sys.exit(-1) - - # Read data static/dynamic/info/state driver information for GPUs - gpu_list.read_gpu_sensor_data(data_type='All') - - # Check number of readable/writable GPUs again - num_gpus = gpu_list.num_gpus() - print('{} total GPUs, {} rw, {} r-only, {} w-only\n'.format(num_gpus['total'], num_gpus['rw'], - num_gpus['r-only'], num_gpus['w-only'])) - - # Read report specific details - if args.clinfo: - if not gpu_list.read_gpu_opencl_data(): - args.clinfo = False - - # Print out user requested details - if args.pstates: - gpu_list.read_gpu_pstates() - gpu_list.print_pstates() - if args.ppm: - gpu_list.read_gpu_ppm_table() - gpu_list.print_ppm_table() - if not args.pstates and not args.ppm: - gpu_list.read_gpu_pstates() - if args.table: - com_gpu_list = gpu_list.list_gpus(compatibility='readable') - com_gpu_list.print_table(title='Status of Readable GPUs:') - else: - gpu_list.print(args.clinfo) - sys.exit(0) - - -if __name__ == '__main__': - main() diff -Nru ricks-amdgpu-utils-3.0.0/amdgpu-monitor ricks-amdgpu-utils-3.5.0/amdgpu-monitor --- ricks-amdgpu-utils-3.0.0/amdgpu-monitor 2020-02-29 07:32:52.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/amdgpu-monitor 1970-01-01 00:00:00.000000000 +0000 @@ -1,402 +0,0 @@ -#!/usr/bin/env python3 -""" amdgpu-monitor - Displays current status of all active GPUs - - A utility to give the current state of all compatible AMD GPUs. The default behavior - is to continuously update a text based table in the current window until Ctrl-C is - pressed. With the *--gui* option, a table of relevant parameters will be updated - in a Gtk window. You can specify the delay between updates with the *--sleep N* - option where N is an integer > zero that specifies the number of seconds to sleep - between updates. The *--no_fan* option can be used to disable the reading and display - of fan information. The *--log* option is used to write all monitor data to a psv log - file. When writing to a log file, the utility will indicate this in red at the top of - the window with a message that includes the log file name. The *--plot* will display a - plot of critical GPU parameters which updates at the specified *--sleep N* interval. If - you need both the plot and monitor displays, then using the --plot option is preferred - over running both tools as a single read of the GPUs is used to update both displays. - The *--ltz* option results in the use of local time instead of UTC. - - Copyright (C) 2019 RueiKe - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -""" -__author__ = 'RueiKe' -__copyright__ = 'Copyright (C) 2019 RueiKe' -__credits__ = ['Craig Echt - Testing, Debug, Verification, and Documentation'] -__license__ = 'GNU General Public License' -__program_name__ = 'amdgpu-monitor' -__version__ = 'v3.0.0' -__maintainer__ = 'RueiKe' -__status__ = 'Stable Release' -__docformat__ = 'reStructuredText' -# pylint: disable=multiple-statements -# pylint: disable=line-too-long - -import argparse -import subprocess -import threading -import os -import sys -import shlex -import time -import signal - -try: - import gi -except ModuleNotFoundError as error: - print('gi import error: {}'.format(error)) - print('gi is required for {}'.format(__program_name__)) - print(' In a venv, first install vext: pip install --no-cache-dir vext') - print(' Then install vext.gi: pip install --no-cache-dir vext.gi') - sys.exit(0) -gi.require_version('Gtk', '3.0') -from gi.repository import GLib, Gtk, Gdk - -from GPUmodules import GPUmodule as gpu -from GPUmodules import env - - -def ctrl_c_handler(target_signal, frame): - """ - Signal catcher for ctrl-c to exit monitor loop. - :param target_signal: - :type target_signal: Signals - :param frame: - :return: None - """ - if env.GUT_CONST.DEBUG: - print('ctrl_c_handler (ID: {}) has been caught. Setting quit flag...'.format(target_signal)) - else: - print('Setting quit flag...') - MonitorWindow.quit = True - - -signal.signal(signal.SIGINT, ctrl_c_handler) - -# SEMAPHORE ############ -UD_SEM = threading.Semaphore() -######################## - - -class MonitorWindow(Gtk.Window): - """ - Custom PAC Gtk window. - """ - quit = False - - def __init__(self, gpu_list, devices): - - Gtk.Window.__init__(self, title='amdgpu-monitor') - self.set_border_width(1) - icon_file = os.path.join(env.GUT_CONST.icon_path, 'amdgpu-monitor.icon.png') - if env.GUT_CONST.DEBUG: print('Icon file: [{}]'.format(icon_file)) - if os.path.isfile(icon_file): - self.set_icon_from_file(icon_file) - grid = Gtk.Grid() - grid.override_background_color(Gtk.StateType.NORMAL, Gdk.RGBA(1, 1, 1, 1)) - self.add(grid) - - col = 0 - row = 0 - num_amd_gpus = gpu_list.num_gpus()['total'] - if env.GUT_CONST.LOG: - log_label = Gtk.Label() - log_label.set_markup(' Logging to: {}'.format(env.GUT_CONST.log_file)) - log_label.override_color(Gtk.StateFlags.NORMAL, Gdk.RGBA(1.0, 1.0, 1.0, 1.0)) - lbox = Gtk.Box(spacing=6) - lbox.override_background_color(Gtk.StateType.NORMAL, Gdk.RGBA(.60, .20, .20, 1.0)) - lbox.set_property('margin-top', 1) - lbox.set_property('margin-bottom', 1) - lbox.set_property('margin-right', 1) - lbox.set_property('margin-left', 1) - lbox.pack_start(log_label, True, True, 0) - grid.attach(lbox, 0, row, num_amd_gpus+1, 1) - row += 1 - row_start = row - - row = row_start - row_labels = {'card_num': Gtk.Label()} - row_labels['card_num'].set_markup('Card #') - row_labels['card_num'].override_color(Gtk.StateFlags.NORMAL, Gdk.RGBA(1.0, 1.0, 1.0, 1.0)) - for k, v in gpu_list.table_param_labels().items(): - row_labels[k] = Gtk.Label() - row_labels[k].set_markup('{}'.format(v)) - row_labels[k].override_color(Gtk.StateFlags.NORMAL, Gdk.RGBA(1.0, 1.0, 1.0, 1.0)) - for k, v in row_labels.items(): - lbox = Gtk.Box(spacing=6) - lbox.override_background_color(Gtk.StateType.NORMAL, Gdk.RGBA(.20, .40, .60, 1.0)) - lbox.set_property('margin-top', 1) - lbox.set_property('margin-bottom', 1) - lbox.set_property('margin-right', 1) - lbox.set_property('margin-left', 1) - v.set_property('margin-top', 1) - v.set_property('margin-bottom', 1) - v.set_property('margin-right', 4) - v.set_property('margin-left', 4) - lbox.pack_start(v, True, True, 0) - grid.attach(lbox, col, row, 1, 1) - v.set_alignment(0, 0.5) - row += 1 - for v in gpu_list.list.values(): - devices[v.prm.uuid] = {'card_num': Gtk.Label(label='card{}'.format(v.get_params_value('card_num')))} - for cv in gpu_list.table_param_labels(): - devices[v.prm.uuid][cv] = Gtk.Label(label=v.get_params_value(str(cv))) - devices[v.prm.uuid][cv].set_width_chars(10) - - for dv in devices.values(): - col += 1 - row = row_start - for lv in dv.values(): - lv.set_text('') - lbox = Gtk.Box(spacing=6) - lbox.override_background_color(Gtk.StateType.NORMAL, Gdk.RGBA(.06, .06, .06, .06)) - lbox.set_property('margin-top', 1) - lbox.set_property('margin-bottom', 1) - lbox.set_property('margin-right', 1) - lbox.set_property('margin-left', 1) - lv.set_property('margin-top', 1) - lv.set_property('margin-bottom', 1) - lv.set_property('margin-right', 3) - lv.set_property('margin-left', 3) - lv.set_width_chars(17) - lbox.pack_start(lv, True, True, 0) - grid.attach(lbox, col, row, 1, 1) - row += 1 - - def set_quit(self, _arg2, _arg3): - """ - Set quit flag when Gtk quit is selected. - :param _arg2: - :param _arg3: - :return: None - """ - self.quit = True - - -def update_data(gpu_list, devices, cmd): - """ - Update monitor data with data read from GPUs. - :param gpu_list: A gpuList object with all gpuItems - :type gpu_list: gpuList - :param devices: A dictionary linking Gui items with data. - :type devices: dict - :param cmd: Subprocess return from running plot. - :type cmd: subprocess.Popen - :return: None - """ - # SEMAPHORE ############ - if not UD_SEM.acquire(blocking=False): - if env.GUT_CONST.DEBUG: print('Update while updating, skipping new update') - return - ######################## - gpu_list.read_gpu_sensor_data(data_type='DynamicM') - gpu_list.read_gpu_sensor_data(data_type='StateM') - if env.GUT_CONST.LOG: - gpu_list.print_log(env.GUT_CONST.log_file_ptr) - if env.GUT_CONST.PLOT: - try: - gpu_list.print_plot(cmd.stdin) - except (OSError, KeyboardInterrupt) as except_err: - if env.GUT_CONST.DEBUG: - print('amdgpu-plot has closed: [{}]'.format(except_err)) - else: - print('amdgpu-plot has closed') - env.GUT_CONST.PLOT = False - - # update gui - for dk, dv in devices.items(): - for lk, lv in dv.items(): - if lk == 'card_num': - data_value = 'card{}'.format(gpu_list.list[dk].get_params_value('card_num'))[:16] - else: - data_value = str(gpu_list.list[dk].get_params_value(lk))[:16] - if data_value == '-1': - data_value = '' - lv.set_text(data_value) - lv.set_width_chars(17) - - while Gtk.events_pending(): - Gtk.main_iteration_do(True) - # SEMAPHORE ############ - UD_SEM.release() - ######################## - - -def refresh(refreshtime, update_data, gpu_list, devices, cmd, gmonitor): - """ - Method called for monitor refresh. - :param refreshtime: Amount of seconds to sleep after refresh. - :type refreshtime: int - :param update_data: Function that does actual data update. - :type update_data: Callable - :param gpu_list: A gpuList object with all gpuItems - :type gpu_list: gpuList - :param devices: A dictionary linking Gui items with data. - :type devices: dict - :param cmd: Subprocess return from running plot. - :type cmd: subprocess.Popen - :param gmonitor: - :type gmonitor: Gtk - :return: - """ - while True: - if gmonitor.quit: - print('Quitting...') - Gtk.main_quit() - sys.exit(0) - GLib.idle_add(update_data, gpu_list, devices, cmd) - tst = 0.0 - sleep_interval = 0.2 - while tst < refreshtime: - time.sleep(sleep_interval) - tst += sleep_interval - - -def main(): - """ - Flow for amdgpu-monitor. - """ - parser = argparse.ArgumentParser() - parser.add_argument('--about', help='README', action='store_true', default=False) - parser.add_argument('--gui', help='Display GTK Version of Monitor', action='store_true', default=False) - parser.add_argument('--log', help='Write all monitor data to logfile', action='store_true', default=False) - parser.add_argument('--plot', help='Open and write to amdgpu-plot', action='store_true', default=False) - parser.add_argument('--ltz', help='Use local time zone instead of UTC', action='store_true', default=False) - parser.add_argument('--sleep', help='Number of seconds to sleep between updates', type=int, default=2) - parser.add_argument('--no_fan', help='do not include fan setting options', action='store_true', default=False) - parser.add_argument('-d', '--debug', help='Debug output', action='store_true', default=False) - parser.add_argument('--pdebug', help='Plot debug output', action='store_true', default=False) - args = parser.parse_args() - - # About me - if args.about: - print(__doc__) - print('Author: ', __author__) - print('Copyright: ', __copyright__) - print('Credits: ', __credits__) - print('License: ', __license__) - print('Version: ', __version__) - print('Maintainer: ', __maintainer__) - print('Status: ', __status__) - sys.exit(0) - - env.GUT_CONST.DEBUG = args.debug - env.GUT_CONST.PDEBUG = args.pdebug - if args.ltz: - env.GUT_CONST.USELTZ = True - if args.no_fan: - env.GUT_CONST.show_fans = False - if int(args.sleep) > 0: - env.GUT_CONST.SLEEP = int(args.sleep) - else: - print('Invalid value for sleep specified. Must be an integer great than zero') - sys.exit(-1) - - if env.GUT_CONST.check_env() < 0: - print('Error in environment. Exiting...') - sys.exit(-1) - - # Get list of AMD GPUs and get basic non-driver details - gpu_list = gpu.GpuList() - gpu_list.set_gpu_list() - - # Check list of GPUs - num_gpus = gpu_list.num_vendor_gpus() - print('Detected GPUs: ', end='') - for i, (k, v) in enumerate(num_gpus.items()): - if i: - print(', {}: {}'.format(k, v), end='') - else: - print('{}: {}'.format(k, v), end='') - print('') - if 'AMD' in num_gpus.keys(): - env.GUT_CONST.read_amd_driver_version() - print('AMD: {}'.format(gpu_list.wattman_status())) - if 'NV' in num_gpus.keys(): - print('nvidia smi: [{}]'.format(env.GUT_CONST.cmd_nvidia_smi)) - - num_gpus = gpu_list.num_gpus() - if num_gpus['total'] == 0: - print('No GPUs detected, exiting...') - sys.exit(-1) - - # Read data static/dynamic/info/state driver information for GPUs - gpu_list.read_gpu_sensor_data(data_type='All') - - # Check number of readable/writable GPUs again - num_gpus = gpu_list.num_gpus() - print('{} total GPUs, {} rw, {} r-only, {} w-only\n'.format(num_gpus['total'], num_gpus['rw'], - num_gpus['r-only'], num_gpus['w-only'])) - - time.sleep(1) - # Generate a new list of only compatible GPUs - com_gpu_list = gpu_list.list_gpus(compatibility='readable') - - if args.log: - env.GUT_CONST.LOG = True - env.GUT_CONST.log_file = './log_monitor_{}.txt'.format( - env.GUT_CONST.now(ltz=env.GUT_CONST.USELTZ).strftime('%m%d_%H%M%S')) - env.GUT_CONST.log_file_ptr = open(env.GUT_CONST.log_file, 'w', 1) - gpu_list.print_log_header(env.GUT_CONST.log_file_ptr) - - if args.plot: - args.gui = True - if args.gui: - # Display Gtk style Monitor - devices = {} - gmonitor = MonitorWindow(com_gpu_list, devices) - gmonitor.connect('delete-event', gmonitor.set_quit) - gmonitor.show_all() - - cmd = None - if args.plot: - env.GUT_CONST.PLOT = True - if os.path.isfile('/usr/bin/amdgpu-plot'): - plot_util = '/usr/bin/amdgpu-plot' - else: - plot_util = os.path.join(env.GUT_CONST.repository_path, 'amdgpu-plot') - if os.path.isfile(plot_util): - if env.GUT_CONST.PDEBUG: - cmd_str = '{} --debug --stdin --sleep {}'.format(plot_util, env.GUT_CONST.SLEEP) - else: - cmd_str = '{} --stdin --sleep {}'.format(plot_util, env.GUT_CONST.SLEEP) - cmd = subprocess.Popen(shlex.split(cmd_str), bufsize=-1, shell=False, stdin=subprocess.PIPE) - com_gpu_list.print_plot_header(cmd.stdin) - - # Start thread to update Monitor - _ = threading.Thread(target=refresh, daemon=True, - args=[env.GUT_CONST.SLEEP, update_data, com_gpu_list, devices, cmd, gmonitor]).start() - - Gtk.main() - else: - # Display text style Monitor - try: - while True: - com_gpu_list.read_gpu_sensor_data(data_type='DynamicM') - com_gpu_list.read_gpu_sensor_data(data_type='StateM') - if not env.GUT_CONST.DEBUG: os.system('clear') - if env.GUT_CONST.LOG: - print('{}Logging to: {}{}'.format('\033[31m \033[01m', env.GUT_CONST.log_file, '\033[0m')) - com_gpu_list.print_log(env.GUT_CONST.log_file_ptr) - com_gpu_list.print_table() - time.sleep(env.GUT_CONST.SLEEP) - if MonitorWindow.quit: - sys.exit(-1) - except KeyboardInterrupt: - if env.GUT_CONST.LOG: - env.GUT_CONST.log_file_ptr.close() - sys.exit(0) - - -if __name__ == '__main__': - main() diff -Nru ricks-amdgpu-utils-3.0.0/amdgpu-pac ricks-amdgpu-utils-3.5.0/amdgpu-pac --- ricks-amdgpu-utils-3.0.0/amdgpu-pac 2020-02-29 07:32:52.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/amdgpu-pac 1970-01-01 00:00:00.000000000 +0000 @@ -1,1357 +0,0 @@ -#!/usr/bin/env python3 -""" amdgpu-pac - A utility program and control compatible AMD GPUs - - Program and Control compatible AMD GPUs with this utility. By default, the commands to - be written to a GPU are written to a bash file for the user to inspect and run. If you - have confidence, the *--execute_pac* option can be used to execute and then delete the - saved bash file. Since the GPU device files are writable only by root, sudo is used to - execute commands in the bash file, as a result, you will be prompted for credentials in the - terminal where you executed *amdgpu-pac*. The *--no_fan* option can be used to eliminate - fan details from the utility. The *--force_write* option can be used to force all configuration - parameters to be written to the GPU. The default behavior is to only write changes. - - Copyright (C) 2019 RueiKe - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -""" -__author__ = 'RueiKe' -__copyright__ = 'Copyright (C) 2019 RueiKe' -__credits__ = ['Craig Echt - Testing, Debug, Verification, and Documentation'] -__license__ = 'GNU General Public License' -__program_name__ = 'amdgpu-pac' -__version__ = 'v3.0.0' -__maintainer__ = 'RueiKe' -__status__ = 'Stable Release' -__docformat__ = 'reStructuredText' -# pylint: disable=multiple-statements -# pylint: disable=line-too-long -# pylint: bad-continuation - -import argparse -import re -import subprocess -import os -import sys -import time -from uuid import uuid4 - -try: - import gi -except ModuleNotFoundError as error: - print('gi import error: {}'.format(error)) - print('gi is required for {}'.format(__program_name__)) - print(' In a venv, first install vext: pip install --no-cache-dir vext') - print(' Then install vext.gi: pip install --no-cache-dir vext.gi') - sys.exit(0) -gi.require_version('Gtk', '3.0') -from gi.repository import GLib, Gtk, Gdk - -from GPUmodules import GPUmodule as gpu -from GPUmodules import env - -MAX_CHAR = 54 -CHAR_WIDTH = 8 - - -class PACWindow(Gtk.Window): - """ - PAC Window class. - """ - def __init__(self, gpu_list, devices): - Gtk.Window.__init__(self, title=__program_name__) - self.set_border_width(1) - - def set_prop(gui_item, top=None, bottom=None, right=None, left=None, bg_color=None, color=None, align=None, - set_hom=None): - if color: - gui_item.override_color(Gtk.StateFlags.NORMAL, Gdk.RGBA(*color)) - if bg_color: - gui_item.override_background_color(Gtk.StateType.NORMAL, Gdk.RGBA(*bg_color)) - if top: - gui_item.set_property('margin-top', top) - if bottom: - gui_item.set_property('margin-bottom', bottom) - if right: - gui_item.set_property('margin-right', right) - if left: - gui_item.set_property('margin-left', left) - if align: - gui_item.set_alignment(*align) - if set_hom: - gui_item.set_homogeneous(set_hom) - - icon_file = os.path.join(env.GUT_CONST.icon_path, 'amdgpu-pac.icon.png') - if os.path.isfile(icon_file): - self.set_icon_from_file(icon_file) - grid = Gtk.Grid() - grid.override_background_color(Gtk.StateType.NORMAL, Gdk.RGBA(1, 1, 1, 1)) - self.add(grid) - - num_com_gpus = gpu_list.num_gpus()['total'] - max_rows = 0 - col = 0 - for v in gpu_list.list.values(): - row = 0 - # Card Number in top center of box - devices[v.prm.uuid] = {'card_num': Gtk.Label()} - devices[v.prm.uuid]['card_num'].set_markup('Card {}: {}'.format( - v.get_params_value(str('card_num')), v.get_params_value('model_display'))) - set_prop(devices[v.prm.uuid]['card_num'], - align=(0.5, 0.5), color=(1.0, 1.0, 1.0, 1.0), top=1, bottom=1, right=4, left=4) - lbox = Gtk.Box(spacing=6) - set_prop(lbox, bg_color=(0.2, 0.4, 0.6, 1.0), top=1, bottom=1, right=1, left=1) - lbox.pack_start(devices[v.prm.uuid]['card_num'], True, True, 0) - grid.attach(lbox, col, row, 1, 1) - row += 1 - - # Card Path - devices[v.prm.uuid]['card_path'] = Gtk.Label() - devices[v.prm.uuid]['card_path'].set_markup('Device: {}'.format(v.get_params_value('card_path'))) - devices[v.prm.uuid]['card_path'].set_property('width-request', MAX_CHAR*CHAR_WIDTH) - set_prop(devices[v.prm.uuid]['card_path'], align=(0.0, 0.5), top=1, bottom=1, right=4, left=4) - lbox = Gtk.Box(spacing=6) - set_prop(lbox, bg_color=(0.6, 0.6, 0.6, 0.12), top=1, bottom=1, right=1, left=1) - lbox.pack_start(devices[v.prm.uuid]['card_path'], True, True, 0) - grid.attach(lbox, col, row, 1, 1) - row += 1 - - # Card Power Cap - power_cap_range = v.get_params_value('power_cap_range') - devices[v.prm.uuid]['power_cap'] = Gtk.Label() - devices[v.prm.uuid]['power_cap'].set_markup('Power Cap: Range ({} - {} W)'.format( - power_cap_range[0], power_cap_range[1])) - set_prop(devices[v.prm.uuid]['power_cap'], align=(0.0, 0.5), top=1, bottom=1, right=2, left=2) - lbox = Gtk.Box(spacing=6) - set_prop(lbox, bg_color=(0.6, 0.6, 0.6, 0.12), top=1, bottom=1, right=1, left=1) - lbox.override_background_color(Gtk.StateType.NORMAL, Gdk.RGBA(.06, .06, .06, .12)) - lbox.pack_start(devices[v.prm.uuid]['power_cap'], True, True, 0) - grid.attach(lbox, col, row, 1, 1) - row += 1 - - # Card Power Cap Value and Entry - devices[v.prm.uuid]['power_cap_cur'] = Gtk.Label() - set_prop(devices[v.prm.uuid]['power_cap_cur'], top=1, bottom=1, right=2, left=2) - devices[v.prm.uuid]['power_cap_ent'] = Gtk.Entry() - devices[v.prm.uuid]['power_cap_ent'].set_width_chars(5) - devices[v.prm.uuid]['power_cap_ent'].set_max_length(5) - devices[v.prm.uuid]['power_cap_ent'].set_alignment(xalign=1) - set_prop(devices[v.prm.uuid]['power_cap_ent'], top=1, bottom=1, right=0, left=2) - devices[v.prm.uuid]['power_cap_ent_unit'] = Gtk.Label() - devices[v.prm.uuid]['power_cap_ent_unit'].set_text('W (value or \'reset\')') - set_prop(devices[v.prm.uuid]['power_cap_ent_unit'], top=1, bottom=1, right=0, left=0, align=(0.0, 0.5)) - lbox = Gtk.Box(Gtk.Orientation.HORIZONTAL, 0, spacing=2) - set_prop(lbox, set_hom=False, bg_color=(0.06, 0.06, 0.06, 0.06), top=1, bottom=1, right=1, left=1) - lbox.pack_start(devices[v.prm.uuid]['power_cap_cur'], False, False, 0) - lbox.pack_start(devices[v.prm.uuid]['power_cap_ent'], False, False, 0) - lbox.pack_start(devices[v.prm.uuid]['power_cap_ent_unit'], False, False, 0) - grid.attach(lbox, col, row, 1, 1) - row += 1 - - if env.GUT_CONST.show_fans: - # Fan PWM Value - fan_pwm_range = v.get_params_value('fan_pwm_range') - devices[v.prm.uuid]['fan_pwm_range'] = Gtk.Label() - devices[v.prm.uuid]['fan_pwm_range'].set_markup('Fan PWM: Range ({} - {} %)'.format( - fan_pwm_range[0], fan_pwm_range[1])) - set_prop(devices[v.prm.uuid]['fan_pwm_range'], top=1, bottom=1, right=2, left=2, align=(0.0, 0.5)) - lbox = Gtk.Box(spacing=6) - set_prop(lbox, bg_color=(0.06, 0.06, 0.06, 0.12), top=1, bottom=1, right=1, left=1) - lbox.pack_start(devices[v.prm.uuid]['fan_pwm_range'], True, True, 0) - grid.attach(lbox, col, row, 1, 1) - row += 1 - - # Card Fan PWM Value and Entry - devices[v.prm.uuid]['fan_pwm_cur'] = Gtk.Label() - set_prop(devices[v.prm.uuid]['fan_pwm_cur'], top=1, bottom=1, right=2, left=2) - devices[v.prm.uuid]['fan_pwm_ent'] = Gtk.Entry() - devices[v.prm.uuid]['fan_pwm_ent'].set_width_chars(5) - devices[v.prm.uuid]['fan_pwm_ent'].set_max_length(5) - devices[v.prm.uuid]['fan_pwm_ent'].set_alignment(xalign=1) - set_prop(devices[v.prm.uuid]['fan_pwm_ent'], top=1, bottom=1, right=0, left=2) - devices[v.prm.uuid]['fan_pwm_ent_unit'] = Gtk.Label() - devices[v.prm.uuid]['fan_pwm_ent_unit'].set_text('% (value, \'reset\', or \'max\')') - set_prop(devices[v.prm.uuid]['fan_pwm_ent_unit'], top=1, bottom=1, right=0, left=0, align=(0.0, 0.5)) - lbox = Gtk.Box(Gtk.Orientation.HORIZONTAL, 0, spacing=2) - set_prop(lbox, set_hom=False, bg_color=(0.06, 0.06, 0.06, 0.06), top=1, bottom=1, right=1, left=1) - lbox.pack_start(devices[v.prm.uuid]['fan_pwm_cur'], False, False, 0) - lbox.pack_start(devices[v.prm.uuid]['fan_pwm_ent'], False, False, 0) - lbox.pack_start(devices[v.prm.uuid]['fan_pwm_ent_unit'], False, False, 0) - grid.attach(lbox, col, row, 1, 1) - row += 1 - - if v.get_params_value('gpu_type') == 1: - # Sclk P-States - devices[v.prm.uuid]['sclk_range'] = Gtk.Label() - devices[v.prm.uuid]['sclk_range'].set_markup('Sclk P-States: Ranges {}-{}, {}-{} '.format( - v.get_params_value('sclk_f_range')[0], - v.get_params_value('sclk_f_range')[1], - v.get_params_value('vddc_range')[0], - v.get_params_value('vddc_range')[1])) - set_prop(devices[v.prm.uuid]['sclk_range'], top=1, bottom=1, right=2, left=2, align=(0.0, 0.5)) - lbox = Gtk.Box(Gtk.Orientation.HORIZONTAL, spacing=6) - set_prop(lbox, set_hom=False, bg_color=(0.06, 0.06, 0.06, 0.12), top=1, bottom=1, right=1, left=1) - lbox.pack_start(devices[v.prm.uuid]['sclk_range'], False, False, 0) - grid.attach(lbox, col, row, 1, 1) - row += 1 - - # Sclk P-State Values and Entry - devices[v.prm.uuid]['sclk_pstate'] = {} - for ps, psd in v.sclk_state.items(): - devices[v.prm.uuid]['sclk_pstate'][ps] = {} - - devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_cur_obj'] = Gtk.Label() - devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_cur_obj'].set_width_chars(20) - set_prop(devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_cur_obj'], - top=1, bottom=1, right=2, left=2, align=(0.0, 0.5)) - devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj'] = Gtk.Entry() - devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj'].set_width_chars(5) - devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj'].set_max_length(5) - devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj'].set_alignment(xalign=1) - set_prop(devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj'], - top=1, bottom=1, right=0, left=0) - devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj_unit'] = Gtk.Label() - set_prop(devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj_unit'], - top=1, bottom=1, right=4, left=0, align=(0.0, 0.5)) - devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_v_obj'] = Gtk.Entry() - devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_v_obj'].set_width_chars(5) - devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_v_obj'].set_max_length(5) - devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_v_obj'].set_alignment(xalign=1) - set_prop(devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_v_obj'], - top=1, bottom=1, right=0, left=0) - devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_v_obj_unit'] = Gtk.Label() - set_prop(devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_v_obj_unit'], - top=1, bottom=1, right=4, left=0, align=(0.0, 0.5)) - - lbox = Gtk.Box(Gtk.Orientation.HORIZONTAL, spacing=6) - set_prop(lbox, set_hom=False, bg_color=(0.06, 0.06, 0.06, 0.06), top=1, bottom=1, right=1, left=1) - lbox.pack_start(devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_cur_obj'], False, False, 0) - lbox.pack_start(devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj'], False, False, 0) - lbox.pack_start(devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj_unit'], False, False, 0) - lbox.pack_start(devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_v_obj'], False, False, 0) - lbox.pack_start(devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_v_obj_unit'], False, False, 0) - grid.attach(lbox, col, row, 1, 1) - row += 1 - - elif v.get_params_value('gpu_type') == 2: - # Sclk Curve End Points - devices[v.prm.uuid]['sclk_range'] = Gtk.Label() - devices[v.prm.uuid]['sclk_range'].set_markup('Sclk Curve End Points: Ranges {}-{} '.format( - v.get_params_value('sclk_f_range')[0], - v.get_params_value('sclk_f_range')[1])) - set_prop(devices[v.prm.uuid]['sclk_range'], - top=1, bottom=1, right=2, left=2, align=(0.0, 0.5)) - lbox = Gtk.Box(Gtk.Orientation.HORIZONTAL, spacing=6) - set_prop(lbox, set_hom=False, bg_color=(0.06, 0.06, 0.06, 0.12), top=1, bottom=1, right=1, left=1) - lbox.pack_start(devices[v.prm.uuid]['sclk_range'], False, False, 0) - grid.attach(lbox, col, row, 1, 1) - row += 1 - - # Sclk Curve End Points Values and Entry - devices[v.prm.uuid]['sclk_pstate'] = {} - for ps, psd in v.sclk_state.items(): - devices[v.prm.uuid]['sclk_pstate'][ps] = {} - - devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_cur_obj'] = Gtk.Label() - devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_cur_obj'].set_width_chars(20) - set_prop(devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_cur_obj'], - top=1, bottom=1, right=2, left=2, align=(0.0, 0.5)) - devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj'] = Gtk.Entry() - devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj'].set_width_chars(5) - devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj'].set_max_length(5) - devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj'].set_alignment(xalign=1) - set_prop(devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj'], - top=1, bottom=1, right=0, left=0) - devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj_unit'] = Gtk.Label() - set_prop(devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj_unit'], - top=1, bottom=1, right=4, left=0, align=(0.0, 0.5)) - lbox = Gtk.Box(Gtk.Orientation.HORIZONTAL, spacing=6) - set_prop(lbox, set_hom=False, bg_color=(0.06, 0.06, 0.06, 0.06), top=1, bottom=1, right=1, left=1) - lbox.pack_start(devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_cur_obj'], False, False, 0) - lbox.pack_start(devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj'], False, False, 0) - lbox.pack_start(devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj_unit'], False, False, 0) - grid.attach(lbox, col, row, 1, 1) - row += 1 - - if v.get_params_value('gpu_type') == 1 or v.get_params_value('gpu_type') == 2: - # SCLK P-State Mask - devices[v.prm.uuid]['sclk_pst_mask_cur'] = Gtk.Label() - set_prop(devices[v.prm.uuid]['sclk_pst_mask_cur'], top=1, bottom=1, right=2, left=2) - devices[v.prm.uuid]['sclk_pst_mask_ent'] = Gtk.Entry() - devices[v.prm.uuid]['sclk_pst_mask_ent'].set_width_chars(17) - devices[v.prm.uuid]['sclk_pst_mask_ent'].set_max_length(17) - devices[v.prm.uuid]['sclk_pst_mask_ent'].set_alignment(xalign=0) - set_prop(devices[v.prm.uuid]['sclk_pst_mask_ent'], top=1, bottom=1, right=0, left=1) - lbox = Gtk.Box(Gtk.Orientation.HORIZONTAL, 0, spacing=2) - set_prop(lbox, set_hom=False, bg_color=(0.06, 0.06, 0.06, 0.06), top=1, bottom=1, right=1, left=1) - lbox.pack_start(devices[v.prm.uuid]['sclk_pst_mask_cur'], False, False, 0) - lbox.pack_start(devices[v.prm.uuid]['sclk_pst_mask_ent'], False, False, 0) - grid.attach(lbox, col, row, 1, 1) - row += 1 - - if v.get_params_value('gpu_type') == 1: - # Mclk P-States - devices[v.prm.uuid]['mclk_range'] = Gtk.Label() - devices[v.prm.uuid]['mclk_range'].set_markup('Mclk P-States: Ranges {}-{}, {}-{} '.format( - v.get_params_value('mclk_f_range')[0], - v.get_params_value('mclk_f_range')[1], - v.get_params_value('vddc_range')[0], - v.get_params_value('vddc_range')[1])) - set_prop(devices[v.prm.uuid]['mclk_range'], top=1, bottom=1, right=2, left=2, align=(0.0, 0.5)) - lbox = Gtk.Box(Gtk.Orientation.HORIZONTAL, spacing=6) - set_prop(lbox, set_hom=False, bg_color=(0.06, 0.06, 0.06, 0.12), top=1, bottom=1, right=1, left=1) - lbox.pack_start(devices[v.prm.uuid]['mclk_range'], True, True, 0) - grid.attach(lbox, col, row, 1, 1) - row += 1 - - # Mclk P-State Values and Entry - devices[v.prm.uuid]['mclk_pstate'] = {} - for ps, psd in v.mclk_state.items(): - devices[v.prm.uuid]['mclk_pstate'][ps] = {} - - devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_cur_obj'] = Gtk.Label() - devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_cur_obj'].set_width_chars(20) - set_prop(devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_cur_obj'], - top=1, bottom=1, right=2, left=2, align=(0.0, 0.5)) - devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj'] = Gtk.Entry() - devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj'].set_width_chars(5) - devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj'].set_max_length(5) - devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj'].set_alignment(xalign=1) - set_prop(devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj'], - top=1, bottom=1, right=0, left=0) - devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj_unit'] = Gtk.Label() - set_prop(devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj_unit'], - top=1, bottom=1, right=4, left=0, align=(0.0, 0.5)) - devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_v_obj'] = Gtk.Entry() - devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_v_obj'].set_width_chars(5) - devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_v_obj'].set_max_length(5) - devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_v_obj'].set_alignment(xalign=1) - set_prop(devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_v_obj'], - top=1, bottom=1, right=0, left=0) - devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_v_obj_unit'] = Gtk.Label() - set_prop(devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_v_obj_unit'], - top=1, bottom=1, right=4, left=0, align=(0.0, 0.5)) - - lbox = Gtk.Box(Gtk.Orientation.HORIZONTAL, spacing=6) - set_prop(lbox, set_hom=False, bg_color=(0.06, 0.06, 0.06, 0.06), top=1, bottom=1, right=1, left=1) - lbox.pack_start(devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_cur_obj'], False, False, 0) - lbox.pack_start(devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj'], False, False, 0) - lbox.pack_start(devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj_unit'], False, False, 0) - lbox.pack_start(devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_v_obj'], False, False, 0) - lbox.pack_start(devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_v_obj_unit'], False, False, 0) - grid.attach(lbox, col, row, 1, 1) - row += 1 - - elif v.get_params_value('gpu_type') == 2: - # Mclk Curve End points - devices[v.prm.uuid]['mclk_range'] = Gtk.Label() - devices[v.prm.uuid]['mclk_range'].set_markup('Mclk Curve End Points: Ranges {}-{} '.format( - v.get_params_value('mclk_f_range')[0], - v.get_params_value('mclk_f_range')[1])) - set_prop(devices[v.prm.uuid]['mclk_range'], - top=1, bottom=1, right=2, left=2, align=(0.0, 0.5)) - lbox = Gtk.Box(Gtk.Orientation.HORIZONTAL, spacing=6) - set_prop(lbox, set_hom=False, bg_color=(0.06, 0.06, 0.06, 0.12), top=1, bottom=1, right=1, left=1) - lbox.pack_start(devices[v.prm.uuid]['mclk_range'], True, True, 0) - grid.attach(lbox, col, row, 1, 1) - row += 1 - - # Mclk Curve End Points Values and Entry - devices[v.prm.uuid]['mclk_pstate'] = {} - for ps, psd in v.mclk_state.items(): - devices[v.prm.uuid]['mclk_pstate'][ps] = {} - - devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_cur_obj'] = Gtk.Label() - devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_cur_obj'].set_width_chars(20) - set_prop(devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_cur_obj'], - top=1, bottom=1, right=2, left=2, align=(0.0, 0.5)) - devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj'] = Gtk.Entry() - devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj'].set_width_chars(5) - devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj'].set_max_length(5) - devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj'].set_alignment(xalign=1) - set_prop(devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj'], - top=1, bottom=1, right=0, left=0) - devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj_unit'] = Gtk.Label() - set_prop(devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj_unit'], - top=1, bottom=1, right=4, left=0, align=(0.0, 0.5)) - lbox = Gtk.Box(Gtk.Orientation.HORIZONTAL, spacing=6) - set_prop(lbox, set_hom=False, bg_color=(0.06, 0.06, 0.06, 0.06), top=1, bottom=1, right=1, left=1) - lbox.pack_start(devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_cur_obj'], False, False, 0) - lbox.pack_start(devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj'], False, False, 0) - lbox.pack_start(devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj_unit'], False, False, 0) - grid.attach(lbox, col, row, 1, 1) - row += 1 - - if v.get_params_value('gpu_type') == 1 or v.get_params_value('gpu_type') == 2: - # MCLK P-State Mask - devices[v.prm.uuid]['mclk_pst_mask_cur'] = Gtk.Label() - set_prop(devices[v.prm.uuid]['mclk_pst_mask_cur'], top=1, bottom=1, right=2, left=2) - devices[v.prm.uuid]['mclk_pst_mask_ent'] = Gtk.Entry() - devices[v.prm.uuid]['mclk_pst_mask_ent'].set_width_chars(17) - devices[v.prm.uuid]['mclk_pst_mask_ent'].set_max_length(17) - devices[v.prm.uuid]['mclk_pst_mask_ent'].set_alignment(xalign=0) - set_prop(devices[v.prm.uuid]['mclk_pst_mask_ent'], top=1, bottom=1, right=0, left=1) - lbox = Gtk.Box(Gtk.Orientation.HORIZONTAL, 0, spacing=2) - set_prop(lbox, set_hom=False, bg_color=(0.06, 0.06, 0.06, 0.06), top=1, bottom=1, right=1, left=1) - lbox.pack_start(devices[v.prm.uuid]['mclk_pst_mask_cur'], False, False, 0) - lbox.pack_start(devices[v.prm.uuid]['mclk_pst_mask_ent'], False, False, 0) - grid.attach(lbox, col, row, 1, 1) - row += 1 - - if v.get_params_value('gpu_type') == 2: - # VDDC Curve Points - devices[v.prm.uuid]['vddc_curve_range'] = Gtk.Label() - devices[v.prm.uuid]['vddc_curve_range'].set_markup( - 'VDDC Curve Points: Ranges {}-{}, {}-{} '.format(v.vddc_curve_range['0']['SCLK'][0], - v.vddc_curve_range['0']['SCLK'][1], - v.vddc_curve_range['0']['VOLT'][0], - v.vddc_curve_range['0']['VOLT'][1])) - set_prop(devices[v.prm.uuid]['vddc_curve_range'], top=1, bottom=1, right=2, left=2, align=(0.0, 0.5)) - lbox = Gtk.Box(Gtk.Orientation.HORIZONTAL, spacing=6) - set_prop(lbox, set_hom=False, bg_color=(0.06, 0.06, 0.06, 0.12), top=1, bottom=1, right=1, left=1) - lbox.pack_start(devices[v.prm.uuid]['vddc_curve_range'], False, False, 0) - grid.attach(lbox, col, row, 1, 1) - row += 1 - - # VDDC CURVE Points Values and Entry - devices[v.prm.uuid]['vddc_curve_pt'] = {} - for ps, psd in v.vddc_curve.items(): - devices[v.prm.uuid]['vddc_curve_pt'][ps] = {} - - devices[v.prm.uuid]['vddc_curve_pt'][ps]['gtk_cur_obj'] = Gtk.Label() - devices[v.prm.uuid]['vddc_curve_pt'][ps]['gtk_cur_obj'].set_width_chars(20) - set_prop(devices[v.prm.uuid]['vddc_curve_pt'][ps]['gtk_cur_obj'], - top=1, bottom=1, right=2, left=2, align=(0.0, 0.5)) - devices[v.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_f_obj'] = Gtk.Entry() - devices[v.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_f_obj'].set_width_chars(5) - devices[v.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_f_obj'].set_max_length(5) - devices[v.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_f_obj'].set_alignment(xalign=1) - set_prop(devices[v.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_f_obj'], - top=1, bottom=1, right=0, left=0) - devices[v.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_f_obj_unit'] = Gtk.Label() - set_prop(devices[v.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_f_obj_unit'], - top=1, bottom=1, right=4, left=0, align=(0.0, 0.5)) - devices[v.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_v_obj'] = Gtk.Entry() - devices[v.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_v_obj'].set_width_chars(5) - devices[v.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_v_obj'].set_max_length(5) - devices[v.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_v_obj'].set_alignment(xalign=1) - set_prop(devices[v.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_v_obj'], - top=1, bottom=1, right=0, left=0) - devices[v.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_v_obj_unit'] = Gtk.Label() - set_prop(devices[v.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_v_obj_unit'], - top=1, bottom=1, right=4, left=0, align=(0.0, 0.5)) # right was 0 - lbox = Gtk.Box(Gtk.Orientation.HORIZONTAL, spacing=6) - set_prop(lbox, set_hom=False, bg_color=(0.06, 0.06, 0.06, 0.06), top=1, bottom=1, right=1, left=1) - lbox.pack_start(devices[v.prm.uuid]['vddc_curve_pt'][ps]['gtk_cur_obj'], False, False, 0) - lbox.pack_start(devices[v.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_f_obj'], False, False, 0) - lbox.pack_start(devices[v.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_f_obj_unit'], False, False, 0) - lbox.pack_start(devices[v.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_v_obj'], False, False, 0) - lbox.pack_start(devices[v.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_v_obj_unit'], False, False, 0) - grid.attach(lbox, col, row, 1, 1) - row += 1 - - # Power Performance Mode Selection - devices[v.prm.uuid]['ppm'] = Gtk.Label() - devices[v.prm.uuid]['ppm'].set_markup('Power Performance Modes:') - set_prop(devices[v.prm.uuid]['ppm'], top=1, bottom=1, right=2, left=2, align=(0.0, 0.5)) - - lbox = Gtk.Box(spacing=6) - set_prop(lbox, bg_color=(0.06, 0.06, 0.06, 0.12), top=1, bottom=1, right=1, left=1) - lbox.pack_start(devices[v.prm.uuid]['ppm'], True, True, 0) - grid.attach(lbox, col, row, 1, 1) - row += 1 - - devices[v.prm.uuid]['ppm_modes'] = Gtk.ListStore(int, str) - devices[v.prm.uuid]['ppm_mode_items'] = {} - item_num = 0 - for mode_num, mode in v.ppm_modes.items(): - if mode_num == 'NUM': - continue - if mode[0] == 'CUSTOM': - continue - devices[v.prm.uuid]['ppm_modes'].append([int(mode_num), mode[0]]) - devices[v.prm.uuid]['ppm_mode_items'][int(mode_num)] = item_num - item_num += 1 - - lbox = Gtk.Box(spacing=6) - set_prop(lbox, bg_color=(0.06, 0.06, 0.06, 0.06), top=1, bottom=1, right=1, left=1) - - devices[v.prm.uuid]['ppm_modes_combo'] = Gtk.ComboBox.new_with_model_and_entry( - devices[v.prm.uuid]['ppm_modes']) - devices[v.prm.uuid]['ppm_modes_combo'].connect('changed', ppm_select, devices[v.prm.uuid]) - devices[v.prm.uuid]['ppm_modes_combo'].set_entry_text_column(1) - lbox.pack_start(devices[v.prm.uuid]['ppm_modes_combo'], False, False, 0) - grid.attach(lbox, col, row, 1, 1) - row += 1 - - # Save/Reset Card Buttons - devices[v.prm.uuid]['save_button'] = Gtk.Button('') - for child in devices[v.prm.uuid]['save_button'].get_children(): - child.set_label('Save') - child.set_use_markup(True) - devices[v.prm.uuid]['save_button'].connect('clicked', self.save_card, gpu_list, devices, v.prm.uuid) - devices[v.prm.uuid]['save_button'].set_property('width-request', 90) - - devices[v.prm.uuid]['reset_button'] = Gtk.Button('') - for child in devices[v.prm.uuid]['reset_button'].get_children(): - child.set_label('Reset') - child.set_use_markup(True) - devices[v.prm.uuid]['reset_button'].connect('clicked', self.reset_card, gpu_list, devices, v.prm.uuid) - devices[v.prm.uuid]['reset_button'].set_property('width-request', 90) - - lbox = Gtk.Box(Gtk.Orientation.HORIZONTAL, spacing=6) - set_prop(lbox, set_hom=False, bg_color=(0.6, 0.6, 0.6, 1.0), top=1, bottom=1, right=1, left=1) - lbox.pack_start(devices[v.prm.uuid]['save_button'], True, False, 0) - lbox.pack_start(devices[v.prm.uuid]['reset_button'], True, False, 0) - grid.attach(lbox, col, row, 1, 1) - row += 1 - - # Increment column before going to next Device - if max_rows < row: - max_rows = row - col += 1 - # End of for v in values - - # Setup the Save_ALL and Reset_ALL buttons - if num_com_gpus > 1: - # Save/Reset/Update ALL Card Buttons - devices['all_buttons'] = {} - devices['all_buttons']['save_all_button'] = Gtk.Button('') - for child in devices['all_buttons']['save_all_button'].get_children(): - child.set_label('Save All') - child.set_use_markup(True) - devices['all_buttons']['save_all_button'].connect('clicked', self.save_all_cards, gpu_list, devices) - devices['all_buttons']['save_all_button'].set_property('width-request', 100) - - devices['all_buttons']['reset_all_button'] = Gtk.Button('') - for child in devices['all_buttons']['reset_all_button'].get_children(): - child.set_label('Reset All') - child.set_use_markup(True) - devices['all_buttons']['reset_all_button'].connect('clicked', self.reset_all_cards, gpu_list, devices) - devices['all_buttons']['reset_all_button'].set_property('width-request', 100) - - devices['all_buttons']['refresh_all_button'] = Gtk.Button('') - for child in devices['all_buttons']['refresh_all_button'].get_children(): - child.set_label('Refresh All') - child.set_use_markup(True) - devices['all_buttons']['refresh_all_button'].connect('clicked', self.refresh_all_cards, gpu_list, - devices, True) - devices['all_buttons']['refresh_all_button'].set_property('width-request', 100) - - lbox = Gtk.Box(Gtk.Orientation.HORIZONTAL, spacing=6) - set_prop(lbox, set_hom=False, bg_color=(0.6, 0.6, 0.6, 1.0), top=1, bottom=1, right=1, left=1) - lbox.pack_start(devices['all_buttons']['save_all_button'], True, False, 0) - lbox.pack_start(devices['all_buttons']['reset_all_button'], True, False, 0) - lbox.pack_start(devices['all_buttons']['refresh_all_button'], True, False, 0) - grid.attach(lbox, 0, max_rows, col, 1) - row += 1 - max_rows += 1 - - # Initialize message box - devices['message_label'] = Gtk.Label() - devices['message_label'].set_max_width_chars(num_com_gpus * MAX_CHAR) - devices['message_label'].set_property('width-request', num_com_gpus * MAX_CHAR * CHAR_WIDTH) - devices['message_label'].set_line_wrap(True) - set_prop(devices['message_label'], color=(1.0, 1.0, 1.0, 1.0), align=(0.0, 0.5)) - - devices['message_box'] = Gtk.Box(Gtk.Orientation.HORIZONTAL, spacing=6) - set_prop(devices['message_box'], set_hom=False, bg_color=(0.6, 0.6, 0.6, 1.0), top=1, bottom=1, right=1, left=1) - devices['message_box'].pack_start(devices['message_label'], True, True, 1) - grid.attach(devices['message_box'], 0, max_rows, col, 1) - row += 1 - - self.update_message(devices, '', 'gray') - self.refresh_pac(gpu_list, devices) - - @staticmethod - def update_message(devices, message, color='gray'): - """ - Set PAC message using default message if no message specified. - :param devices: Dictionary of GUI items and GPU data. - :type devices: dict - :param message: - :type message: str - :param color: Valid color strings: gray, yellow, white, red - :type color: str - :return: None - """ - if message == '': - if env.GUT_CONST.execute_pac: - message = ('Using the --execute_pac option. Changes will be written to the GPU without ' + - 'confirmation.\nSudo will be used, so you may be prompted for credentials in ' + - 'the window where amdgpu-pac was executed from.') - else: - message = ('Using amdgpu-pac without --execute_pac option.\nYou must manually run bash ' + - 'file with sudo to execute changes.') - - if color == 'red': - devices['message_box'].override_background_color(Gtk.StateType.NORMAL, Gdk.RGBA(.60, .20, .20, 1.0)) - devices['message_label'].override_color(Gtk.StateFlags.NORMAL, Gdk.RGBA(1.0, 1.0, 1.0, 1.0)) - elif color == 'yellow': - devices['message_box'].override_background_color(Gtk.StateType.NORMAL, Gdk.RGBA(.50, .50, .00, 1.0)) - devices['message_label'].override_color(Gtk.StateFlags.NORMAL, Gdk.RGBA(1.0, 1.0, 1.0, 1.0)) - elif color == 'white': - devices['message_box'].override_background_color(Gtk.StateType.NORMAL, Gdk.RGBA(1.0, 1.0, 1.0, 1.0)) - devices['message_label'].override_color(Gtk.StateFlags.NORMAL, Gdk.RGBA(0.0, 0.0, 0.0, 1.0)) - else: - devices['message_box'].override_background_color(Gtk.StateType.NORMAL, Gdk.RGBA(.6, .6, .6, 1.0)) - devices['message_label'].override_color(Gtk.StateFlags.NORMAL, Gdk.RGBA(1.0, 1.0, 1.0, 1.0)) - devices['message_label'].set_text(message) - while Gtk.events_pending(): - Gtk.main_iteration_do(True) - return - - def refresh_all_cards(self, _, gpu_list, devices, reset_message=False): - """ - Refresh all cards by calling card level refresh. - :param _: parent not used - :param gpu_list: - :type gpu_list: gpuList - :param devices: Dictionary of GUI items and GPU data. - :type devices: dict - :param reset_message: - :type reset_message: bool - :return: None - """ - self.refresh_pac(gpu_list, devices, reset_message) - return - - def refresh_pac(self, gpu_list, devices, refresh_message=False): - """ - Update device data from gpuList data - :param gpu_list: gpuList of all gpuItems - :type gpu_list: gpuList - :param devices: Dictionary of GUI items and GPU data. - :type devices: dict - :param refresh_message: - :return: - """ - # Read sensor and state data from GPUs - gpu_list.read_gpu_sensor_data(data_type='All') - # Read pstate and ppm table data - gpu_list.read_gpu_pstates() - gpu_list.read_gpu_ppm_table() - - for v in gpu_list.list.values(): - devices[v.prm.uuid]['power_cap_cur'].set_text(' Current: {}W Set: '.format( - v.get_params_value('power_cap'))) - devices[v.prm.uuid]['power_cap_ent'].set_text(str(int(v.get_params_value('power_cap')))) - if env.GUT_CONST.show_fans: - devices[v.prm.uuid]['fan_pwm_cur'].set_text(' Current: {}% Set: '.format( - v.get_params_value('fan_pwm'))) - devices[v.prm.uuid]['fan_pwm_ent'].set_text(str(int(v.get_params_value('fan_pwm')))) - # SCLK - if v.get_params_value('gpu_type') == 1: - for ps, psd in v.sclk_state.items(): - devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_cur_obj'].set_text(' {}: {}, {}'.format(ps, *psd)) - item_value = re.sub(r'[a-z,A-Z]*', '', str(psd[0])) - item_unit = re.sub(r'[0-9][.]*[0-9]*', '', str(psd[0])) - devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj'].set_text(item_value) - devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj_unit'].set_text(item_unit + ' ') - item_value = re.sub(r'[a-z,A-Z]*', '', str(psd[1])) - item_unit = re.sub(r'[0-9][.]*[0-9]*', '', str(psd[1])) - devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_v_obj'].set_text(str(item_value)) - devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_v_obj_unit'].set_text(item_unit) - devices[v.prm.uuid]['sclk_pst_mask_cur'].set_text( - ' SCLK Default: {} Set Mask: '.format(v.prm.sclk_mask)) - devices[v.prm.uuid]['sclk_pst_mask_ent'].set_text(v.prm.sclk_mask) - elif v.get_params_value('gpu_type') == 2: - for ps, psd in v.sclk_state.items(): - devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_cur_obj'].set_text(' {}: {}'.format(ps, psd[0])) - item_value = re.sub(r'[a-z,A-Z]*', '', str(psd[0])) - item_unit = re.sub(r'[0-9][.]*[0-9]*', '', str(psd[0])) - devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj'].set_text(item_value) - devices[v.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj_unit'].set_text(item_unit + ' ') - devices[v.prm.uuid]['sclk_pst_mask_cur'].set_text( - ' SCLK Default: {} Set Mask: '.format(v.prm.sclk_mask)) - devices[v.prm.uuid]['sclk_pst_mask_ent'].set_text(v.prm.sclk_mask) - # MCLK - if v.get_params_value('gpu_type') == 1: - for ps, psd in v.mclk_state.items(): - devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_cur_obj'].set_text(' {}: {}, {}'.format(ps, *psd)) - item_value = re.sub(r'[a-z,A-Z]*', '', str(psd[0])) - item_unit = re.sub(r'[0-9][.]*[0-9]*', '', str(psd[0])) - devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj'].set_text(item_value) - devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj_unit'].set_text(item_unit + ' ') - item_value = re.sub(r'[a-z,A-Z]*', '', str(psd[1])) - item_unit = re.sub(r'[0-9][.]*[0-9]*', '', str(psd[1])) - devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_v_obj'].set_text(str(item_value)) - devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_v_obj_unit'].set_text(item_unit) - devices[v.prm.uuid]['mclk_pst_mask_cur'].set_text( - ' MCLK Default: {} Set Mask: '.format(v.prm.mclk_mask)) - devices[v.prm.uuid]['mclk_pst_mask_ent'].set_text(v.prm.mclk_mask) - elif v.get_params_value('gpu_type') == 2: - for ps, psd in v.mclk_state.items(): - devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_cur_obj'].set_text(' {}: {}'.format(ps, psd[0])) - item_value = re.sub(r'[a-z,A-Z]*', '', str(psd[0])) - item_unit = re.sub(r'[0-9][.]*[0-9]*', '', str(psd[0])) - devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj'].set_text(item_value) - devices[v.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj_unit'].set_text(item_unit + ' ') - devices[v.prm.uuid]['mclk_pst_mask_cur'].set_text( - ' MCLK Default: {} Set Mask: '.format(v.prm.mclk_mask)) - devices[v.prm.uuid]['mclk_pst_mask_ent'].set_text(v.prm.mclk_mask) - # VDDC CURVE - if v.get_params_value('gpu_type') == 2: - for ps, psd in v.vddc_curve.items(): - devices[v.prm.uuid]['vddc_curve_pt'][ps]['gtk_cur_obj'].set_text(' {}: {}, {}'.format(ps, *psd)) - item_value = re.sub(r'[a-z,A-Z]*', '', str(psd[0])) - item_unit = re.sub(r'[0-9][.]*[0-9]*', '', str(psd[0])) - devices[v.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_f_obj'].set_text(item_value) - devices[v.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_f_obj_unit'].set_text(item_unit + ' ') - item_value = re.sub(r'[a-z,A-Z]*', '', str(psd[1])) - item_unit = re.sub(r'[0-9][.]*[0-9]*', '', str(psd[1])) - devices[v.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_v_obj'].set_text(str(item_value)) - devices[v.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_v_obj_unit'].set_text(item_unit) - - # refresh active mode item - devices[v.prm.uuid]['ppm_modes_combo'].set_active( - devices[v.prm.uuid]['ppm_mode_items'][v.get_current_ppm_mode()[0]]) - - if refresh_message: - self.update_message(devices, 'Refresh complete.\n', 'gray') - while Gtk.events_pending(): - Gtk.main_iteration_do(True) - return - - def save_all_cards(self, parent, gpu_list, devices): - """ - Save modified data for all GPUs. - :param parent: parent - :param gpu_list: - :param devices: Dictionary of GUI items and GPU data. - :type devices: dict - :return: - """ - changed = 0 - # Write start message - if env.GUT_CONST.execute_pac: - message = ('Using the --execute_pac option. Changes will be written to the GPU without ' + - 'confirmation.\nSudo will be used, so you may be prompted for credentials in ' + - 'the window where amdgpu-pac was executed from.') - else: - message = 'Writing PAC command bash file.\n' - self.update_message(devices, message, 'red') - - # save each card - for gk in gpu_list.list.keys(): - changed += self.save_card(parent, gpu_list, devices, gk, refresh=False) - - # Write finish message - time.sleep(1.0) - if env.GUT_CONST.execute_pac: - if changed: - message = ('Write {} PAC commands to card complete.\n'.format(changed) + - 'Confirm changes with amdgpu-monitor.') - else: - message = 'No PAC commands to write to card.\nNo changes specified.' - else: - if changed: - message = ('Writing {} PAC commands to bash file complete.\n'.format(changed) + - 'Run bash file with sudo to execute changes.') - else: - message = 'No PAC commands to write to bash file.\nNo changes specified.' - self.update_message(devices, message, 'yellow') - - self.refresh_all_cards(parent, gpu_list, devices) - return - - def save_card(self, _, gpu_list, devices, uuid, refresh=True): - """ - Save modified data for specified GPU. - :param _: parent not used - :param gpu_list: - :param devices: Dictionary of GUI items and GPU data. - :type devices: dict - :param uuid: GPU device ID - :type uuid: str - :param refresh: Flag to indicate if refresh should be done - :type refresh: bool - :return: - """ - if refresh: - # Write message - if env.GUT_CONST.execute_pac: - message = ('Using the --execute_pac option. Changes will be written to the GPU ' + - 'without confirmation.\nSudo will be used, so you may be prompted for ' + - 'credentials in the window where amdgpu-pac was executed from.') - else: - message = 'Writing PAC commands to bash file.\n' - self.update_message(devices, message, 'red') - - # Specify output batch file name - out_filename = os.path.join(os.getcwd(), 'pac_writer_{}.sh'.format(uuid4().hex)) - fileptr = open(out_filename, 'x') - # Output header - print('#!/bin/sh', file=fileptr) - print('###########################################################################', file=fileptr) - print('## amdgpu-pac generated script to modify GPU configuration/settings', file=fileptr) - print('###########################################################################', file=fileptr) - print('', file=fileptr) - print('###########################################################################', file=fileptr) - print('## WARNING - Do not execute this script without completely', file=fileptr) - print('## understanding appropriate values to write to your specific GPUs', file=fileptr) - print('###########################################################################', file=fileptr) - print('#', file=fileptr) - print('# Copyright (C) 2019 RueiKe', file=fileptr) - print('#', file=fileptr) - print('# This program is free software: you can redistribute it and/or modify', file=fileptr) - print('# it under the terms of the GNU General Public License as published by', file=fileptr) - print('# the Free Software Foundation, either version 3 of the License, or', file=fileptr) - print('# (at your option) any later version.', file=fileptr) - print('#', file=fileptr) - print('# This program is distributed in the hope that it will be useful,', file=fileptr) - print('# but WITHOUT ANY WARRANTY; without even the implied warranty of', file=fileptr) - print('# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the', file=fileptr) - print('# GNU General Public License for more details.', file=fileptr) - print('#', file=fileptr) - print('# You should have received a copy of the GNU General Public License', file=fileptr) - print('# along with this program. If not, see .', file=fileptr) - print('###########################################################################', file=fileptr) - - changed = 0 - v = gpu_list.list[uuid] - print('# ', file=fileptr) - print('# Card{} {}'.format(v.prm.card_num, v.get_params_value('model')), file=fileptr) - print('# {}'.format(v.prm.card_path), file=fileptr) - if not env.GUT_CONST.write_delta_only: - print('# Force Write mode.') - else: - print('# Write Delta mode.') - print('# ', file=fileptr) - print('set -x', file=fileptr) - - # Check/set power_dpm_force_performance_level - # Mode of manual required to change ppm or clock masks - curr_power_dpm_force = v.get_params_value('power_dpm_force').lower() - if curr_power_dpm_force == 'manual' and env.GUT_CONST.write_delta_only: - print('# Power DPM Force Performance Level: already [{}], skipping.'.format(curr_power_dpm_force), file=fileptr) - else: - power_dpm_force_file = os.path.join(v.prm.card_path, 'power_dpm_force_performance_level') - print('# Power DPM Force Performance Level: [{}] change to [manual]'.format(curr_power_dpm_force), - file=fileptr) - print("sudo sh -c \"echo \'manual\' > {}\"".format(power_dpm_force_file), file=fileptr) - - # Power Cap - power_cap_file = os.path.join(v.prm.hwmon_path, 'power1_cap') - old_power_cap = int(v.get_params_value('power_cap')) - new_power_cap_str = devices[uuid]['power_cap_ent'].get_text() - if new_power_cap_str.lower() == 'reset': - changed += 1 - print('# Powercap entry: {}, Resetting to default'.format(new_power_cap_str), file=fileptr) - print("sudo sh -c \"echo \'0\' > {}\"".format(power_cap_file), file=fileptr) - elif re.fullmatch(r'^[0-9]+', new_power_cap_str): - new_power_cap = int(new_power_cap_str) - power_cap_range = v.get_params_value('power_cap_range') - print('# Powercap Old: {}'.format(old_power_cap), end='', file=fileptr) - print(' New: {}'.format(new_power_cap), end='', file=fileptr) - print(' Min: {}'.format(power_cap_range[0]), end='', file=fileptr) - print(' Max: {}\n'.format(power_cap_range[1]), end='', file=fileptr) - if new_power_cap == old_power_cap and env.GUT_CONST.write_delta_only: - print('# No changes, skipped', file=fileptr) - else: - if v.is_valid_power_cap(new_power_cap): - changed += 1 - print("sudo sh -c \"echo \'{}\' > {}\"".format((int(1000000 * new_power_cap)), power_cap_file), - file=fileptr) - else: - print('# Invalid parameter values', file=fileptr) - else: - print('# Powercap New: {}, invalid input, ignoring'.format(new_power_cap_str), file=fileptr) - new_power_cap = old_power_cap - - if env.GUT_CONST.show_fans: - # Fan PWM - pwm_enable_file = os.path.join(v.prm.hwmon_path, 'pwm1_enable') - pwm_file = os.path.join(v.prm.hwmon_path, 'pwm1') - old_pwm = int(v.get_params_value('fan_pwm')) - new_pwm_str = devices[uuid]['fan_pwm_ent'].get_text() - if new_pwm_str.lower() == 'reset': - changed += 1 - print('# PWM entry: {}, Resetting to default mode of dynamic'.format(new_pwm_str), file=fileptr) - print("sudo sh -c \"echo \'0\' > {}\"".format(pwm_enable_file), file=fileptr) - print("sudo sh -c \"echo \'2\' > {}\"".format(pwm_enable_file), file=fileptr) - elif new_pwm_str.lower() == 'max': - changed += 1 - print('# PWM entry: {}, Disabling fan control'.format(new_pwm_str), file=fileptr) - print("sudo sh -c \"echo \'0\' > {}\"".format(pwm_enable_file), file=fileptr) - elif re.fullmatch(r'^[0-9]+', new_pwm_str): - new_pwm = int(new_pwm_str) - print('# Fan PWM Old: {}'.format(old_pwm), end='', file=fileptr) - print(' New: {}'.format(new_pwm), end='', file=fileptr) - pwm_range = v.get_params_value('fan_pwm_range') - print(' Min: {}'.format(pwm_range[0]), end='', file=fileptr) - print(' Max: {}\n'.format(pwm_range[1]), end='', file=fileptr) - if new_pwm == old_pwm and env.GUT_CONST.write_delta_only: - print('# No changes, skipped', file=fileptr) - else: - if v.is_valid_fan_pwm(new_pwm): - changed += 1 - new_pwm_value = int(255 * new_pwm / 100) - #print("sudo sh -c \"echo \'0\' > {}\"".format(pwm_enable_file), file=fileptr) - print("sudo sh -c \"echo \'1\' > {}\"".format(pwm_enable_file), file=fileptr) - print("sudo sh -c \"echo \'{}\' > {}\"".format(new_pwm_value, pwm_file), file=fileptr) - else: - print('# Invalid parameter values', file=fileptr) - else: - print('# PWM entry: {}, invalid input, ignoring'.format(new_power_cap_str), file=fileptr) - new_pwm = old_pwm - - device_file = os.path.join(v.prm.card_path, 'pp_od_clk_voltage') - commit_needed = False - if v.get_params_value('gpu_type') == 1: - # Sclk P-states - for pk, pv in devices[uuid]['sclk_pstate'].items(): - if not pv['gtk_ent_f_obj'].get_text().isnumeric(): - print('# Invalid sclk pstate entry: {}'.format(pv['gtk_ent_f_obj'].get_text()), file=fileptr) - print('# Invalid sclk pstate entry: {}'.format(pv['gtk_ent_f_obj'].get_text())) - continue - if not pv['gtk_ent_v_obj'].get_text().isnumeric(): - print('# Invalid sclk pstate entry: {}'.format(pv['gtk_ent_v_obj'].get_text()), file=fileptr) - print('# Invalid sclk pstate entry: {}'.format(pv['gtk_ent_v_obj'].get_text())) - pstate = [pk, int(pv['gtk_ent_f_obj'].get_text()), int(pv['gtk_ent_v_obj'].get_text())] - print('#sclk p-state: {} : {} MHz, {} mV'.format(pstate[0], pstate[1], pstate[2]), file=fileptr) - if v.is_valid_sclk_pstate(pstate): - if v.is_changed_sclk_pstate(pstate) or not env.GUT_CONST.write_delta_only: - changed += 1 - commit_needed = True - print("sudo sh -c \"echo \'s {} {} {}\' > {}\"".format(pstate[0], pstate[1], - pstate[2], device_file), file=fileptr) - else: - print('# Sclk pstate {} unchanged, skipping'.format(pk), file=fileptr) - else: - print('# Invalid sclk pstate values', file=fileptr) - # Mclk P-states - for pk, pv in devices[uuid]['mclk_pstate'].items(): - if not pv['gtk_ent_f_obj'].get_text().isnumeric(): - print('# Invalid mclk pstate entry: {}'.format(pv['gtk_ent_f_obj'].get_text()), file=fileptr) - print('# Invalid mclk pstate entry: {}'.format(pv['gtk_ent_f_obj'].get_text())) - continue - if not pv['gtk_ent_v_obj'].get_text().isnumeric(): - print('# Invalid mclk pstate entry: {}'.format(pv['gtk_ent_v_obj'].get_text()), file=fileptr) - print('# Invalid mclk pstate entry: {}'.format(pv['gtk_ent_v_obj'].get_text())) - continue - pstate = [pk, int(pv['gtk_ent_f_obj'].get_text()), int(pv['gtk_ent_v_obj'].get_text())] - print('#mclk p-state: {} : {} MHz, {} mV'.format(pstate[0], pstate[1], pstate[2]), file=fileptr) - if v.is_valid_mclk_pstate(pstate): - if v.is_changed_mclk_pstate(pstate) or not env.GUT_CONST.write_delta_only: - changed += 1 - commit_needed = True - print("sudo sh -c \"echo \'m {} {} {}\' > {}\"".format(pstate[0], pstate[1], - pstate[2], device_file), file=fileptr) - else: - print('# Mclk pstate {} unchanged, skipping'.format(pk), file=fileptr) - else: - print('# Invalid mclk pstate values', file=fileptr) - elif v.get_params_value('gpu_type') == 2: - # Sclk Curve End Points - for pk, pv in devices[uuid]['sclk_pstate'].items(): - if not pv['gtk_ent_f_obj'].get_text().isnumeric(): - print('# Invalid sclk curve end point entry: {}'.format(pv['gtk_ent_f_obj'].get_text()), - file=fileptr) - print('# Invalid sclk curve end point entry: {}'.format(pv['gtk_ent_f_obj'].get_text())) - continue - pstate = [pk, int(pv['gtk_ent_f_obj'].get_text()), '-'] - print('# sclk curve end point: {} : {} MHz'.format(pstate[0], pstate[1]), file=fileptr) - if v.is_valid_sclk_pstate(pstate): - if v.is_changed_sclk_pstate(pstate) or not env.GUT_CONST.write_delta_only: - changed += 1 - commit_needed = True - print("sudo sh -c \"echo \'s {} {}\' > {}\"".format(pstate[0], pstate[1], device_file), - file=fileptr) - else: - print('# Sclk curve point {} unchanged, skipping'.format(pk), file=fileptr) - else: - print('# Invalid sclk curve end point values', file=fileptr) - # Mclk Curve End Points - for pk, pv in devices[uuid]['mclk_pstate'].items(): - if not pv['gtk_ent_f_obj'].get_text().isnumeric(): - print('# Invalid mclk curve end point entry: {}'.format(pv['gtk_ent_f_obj'].get_text()), - file=fileptr) - print('# Invalid mclk curve end point entry: {}'.format(pv['gtk_ent_f_obj'].get_text())) - continue - pstate = [pk, int(pv['gtk_ent_f_obj'].get_text()), '-'] - print('# mclk curve end point: {} : {} MHz'.format(pstate[0], pstate[1]), file=fileptr) - if v.is_valid_mclk_pstate(pstate): - if v.is_changed_mclk_pstate(pstate) or not env.GUT_CONST.write_delta_only: - changed += 1 - commit_needed = True - print("sudo sh -c \"echo \'m {} {}\' > {}\"".format(pstate[0], pstate[1], device_file), - file=fileptr) - else: - print('# mclk curve point {} unchanged, skipping'.format(pk), file=fileptr) - else: - print('# Invalid mclk curve end point values', file=fileptr) - # VDDC Curve Points - for pk, pv in devices[uuid]['vddc_curve_pt'].items(): - if not pv['gtk_ent_f_obj'].get_text().isnumeric(): - print('# Invalid vddc curve point entry: {}'.format(pv['gtk_ent_f_obj'].get_text()), file=fileptr) - print('# Invalid vddc curve point entry: {}'.format(pv['gtk_ent_f_obj'].get_text())) - continue - if not pv['gtk_ent_v_obj'].get_text().isnumeric(): - print('# Invalid vddc curve point entry: {}'.format(pv['gtk_ent_v_obj'].get_text()), file=fileptr) - print('# Invalid vddc curve point entry: {}'.format(pv['gtk_ent_v_obj'].get_text())) - continue - curve_pts = [pk, int(pv['gtk_ent_f_obj'].get_text()), int(pv['gtk_ent_v_obj'].get_text())] - print('# vddc curve point: {} : {} MHz, {} mV'.format(curve_pts[0], curve_pts[1], curve_pts[2]), - file=fileptr) - if v.is_valid_vddc_curve_pts(curve_pts): - if v.is_changed_vddc_curve_pt(curve_pts) or not env.GUT_CONST.write_delta_only: - changed += 1 - commit_needed = True - print("sudo sh -c \"echo \'vc {} {} {}\' > {}\"".format(curve_pts[0], curve_pts[1], - curve_pts[2], device_file), file=fileptr) - else: - print('# Vddc curve point {} unchanged, skipping'.format(pk), file=fileptr) - else: - print('# Invalid Vddc curve point values', file=fileptr) - - # PPM - ppm_mode_file = os.path.join(v.prm.card_path, 'pp_power_profile_mode') - - tree_iter = devices[uuid]['ppm_modes_combo'].get_active_iter() - if tree_iter is not None: - model = devices[uuid]['ppm_modes_combo'].get_model() - row_id, name = model[tree_iter][:2] - selected_mode = devices[uuid]['new_ppm'][0] - print('# Selected: ID={}, name={}'.format(devices[uuid]['new_ppm'][0], devices[uuid]['new_ppm'][1]), - file=fileptr) - if v.get_current_ppm_mode()[0] != devices[uuid]['new_ppm'][0] or not env.GUT_CONST.write_delta_only: - changed += 1 - print("sudo sh -c \"echo \'{}\' > {}\"".format(devices[uuid]['new_ppm'][0], ppm_mode_file), - file=fileptr) - else: - print('# PPM mode {} unchanged, skipping'.format(devices[uuid]['new_ppm'][1]), file=fileptr) - - # Commit changes - device_file = os.path.join(v.prm.card_path, 'pp_od_clk_voltage') - if commit_needed: - changed += 1 - print("sudo sh -c \"echo \'c\' > {}\"".format(device_file), file=fileptr) - else: - print('# No clock changes made, commit skipped', file=fileptr) - - if v.get_params_value('gpu_type') == 1 or v.get_params_value('gpu_type') == 2: - # Writes of pstate Masks must come after commit of pstate changes - # Sclk Mask - sclk_mask_file = os.path.join(v.prm.card_path, 'pp_dpm_sclk') - old_sclk_mask = v.prm.sclk_mask.replace(',', ' ') - new_sclk_mask = devices[uuid]['sclk_pst_mask_ent'].get_text().replace(',', ' ').strip() - print('# Sclk P-State Mask Default: {}'.format(old_sclk_mask), end='', file=fileptr) - print(' New: {}'.format(new_sclk_mask), file=fileptr) - if new_sclk_mask == old_sclk_mask and env.GUT_CONST.write_delta_only: - print('# No changes, skipped', file=fileptr) - else: - if v.is_valid_pstate_list_str(new_sclk_mask, 'SCLK'): - changed += 1 - if new_sclk_mask == '': - # reset - print('# Resetting SCLK Mask to default', file=fileptr) - print("sudo sh -c \"echo \'{}\' > {}\"".format(old_sclk_mask, sclk_mask_file), file=fileptr) - else: - print("sudo sh -c \"echo \'{}\' > {}\"".format(new_sclk_mask, sclk_mask_file), file=fileptr) - else: - print('# Invalid parameter values', file=fileptr) - - # Mclk Mask - mclk_mask_file = os.path.join(v.prm.card_path, 'pp_dpm_mclk') - old_mclk_mask = v.prm.mclk_mask.replace(',', ' ') - new_mclk_mask = devices[uuid]['mclk_pst_mask_ent'].get_text().replace(',', ' ').strip() - print('# Mclk P-State Mask Default: {}'.format(old_mclk_mask), end='', file=fileptr) - print(' New: {}'.format(new_mclk_mask), file=fileptr) - if new_mclk_mask == old_mclk_mask and env.GUT_CONST.write_delta_only: - print('# No changes, skipped', file=fileptr) - else: - if v.is_valid_pstate_list_str(new_mclk_mask, 'MCLK'): - changed += 1 - if new_mclk_mask == '': - # reset - print('# Resetting MCLK Mask to default', file=fileptr) - print("sudo sh -c \"echo \'{}\' > {}\"".format(old_mclk_mask, mclk_mask_file), file=fileptr) - else: - print("sudo sh -c \"echo \'{}\' > {}\"".format(new_mclk_mask, mclk_mask_file), file=fileptr) - else: - print('# Invalid parameter values', file=fileptr) - - # Close file and Set permissions and Execute it --execute_pac - fileptr.close() - os.chmod(out_filename, 0o744) - print('Batch file completed: {}'.format(out_filename)) - if env.GUT_CONST.execute_pac: - # Execute bash file - print('Writing {} changes to GPU {}'.format(changed, v.prm.card_path)) - cmd = subprocess.Popen(out_filename, shell=True) - cmd.wait() - print('PAC execution complete.') - - if refresh: - # dismiss execute_pac message - time.sleep(0.5) - if changed: - message = ('Write of {} PAC commands to card complete.\n'.format(changed) + - 'Confirm changes with amdgpu-monitor.') - else: - message = 'No PAC commands to write to card.\nNo changes specified.' - self.update_message(devices, message, 'yellow') - - if refresh: - self.refresh_pac(gpu_list, devices) - os.remove(out_filename) - else: - if refresh: - # dismiss execute_pac message - if changed: - message = ('Write of {} PAC commands to bash file complete.\n'.format(changed) + - 'Manually run bash file with sudo to execute changes.') - else: - message = 'No PAC commands to write bash file.\nNo changes specified.' - self.update_message(devices, message, 'yellow') - print('Execute to write changes to GPU {}'.format(v.prm.card_path)) - print('') - return changed - - def reset_all_cards(self, parent, gpu_list, devices): - """ - Reset data for all GPUs. - :param parent: parent - :param gpu_list: - :param devices: Dictionary of GUI items and GPU data. - :type devices: dict - :return: - """ - # Write start message - if env.GUT_CONST.execute_pac: - message = ('Using the --execute_pac option Reset commands will be written to the GPU ' + - 'without confirmation.\nSudo will be used, so you may be prompted for ' + - 'credentials in the window where amdgpu-pac was executed from.') - else: - message = 'Writing reset commands to bash file.\n' - self.update_message(devices, message, 'red') - - # reset each card - for gk in gpu_list.list.keys(): - self.reset_card(parent, gpu_list, devices, gk, refresh=False) - - # Write finish message - if env.GUT_CONST.execute_pac: - message = 'Write reset commands to card complete.\nConfirm changes with amdgpu-monitor.' - else: - message = 'Write reset commands to bash file complete.\nRun bash file with sudo to execute changes.' - self.update_message(devices, message, 'yellow') - - self.refresh_all_cards(parent, gpu_list, devices) - return - - def reset_card(self, _, gpu_list, devices, uuid, refresh=True): - """ - Reset data for specified GPU. - :param _: parent not used - :param gpu_list: - :param devices: Dictionary of GUI items and GPU data. - :type devices: dict - :param uuid: GPU device ID - :type uuid: str - :param refresh: Flag to indicate if refresh should be done - :type refresh: bool - :return: - """ - if refresh: - # Write message - if env.GUT_CONST.execute_pac: - message = ('Using the --execute_pac option Reset commands will be written to the GPU ' + - 'without confirmation.\nSudo will be used, so you may be prompted for ' + - 'credentials in the window where amdgpu-pac was executed from.') - else: - message = 'Writing reset commands to bash file.\n' - self.update_message(devices, message, 'red') - # specify output batch file name - out_filename = os.path.join(os.getcwd(), 'pac_resetter_{}.sh'.format(uuid4().hex)) - fileptr = open(out_filename, 'x') - # output header - print('#!/bin/sh', file=fileptr) - print('###########################################################################', file=fileptr) - print('## amdgpu-pac generated script to modify GPU configuration/settings', file=fileptr) - print('###########################################################################', file=fileptr) - print('', file=fileptr) - print('###########################################################################', file=fileptr) - print('## WARNING - Do not execute this script without completely', file=fileptr) - print('## understanding appropriate value to write to your specific GPUs', file=fileptr) - print('###########################################################################', file=fileptr) - print('#', file=fileptr) - print('# Copyright (C) 2019 RueiKe', file=fileptr) - print('#', file=fileptr) - print('# This program is free software: you can redistribute it and/or modify', file=fileptr) - print('# it under the terms of the GNU General Public License as published by', file=fileptr) - print('# the Free Software Foundation, either version 3 of the License, or', file=fileptr) - print('# (at your option) any later version.', file=fileptr) - print('#', file=fileptr) - print('# This program is distributed in the hope that it will be useful,', file=fileptr) - print('# but WITHOUT ANY WARRANTY; without even the implied warranty of', file=fileptr) - print('# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the', file=fileptr) - print('# GNU General Public License for more details.', file=fileptr) - print('#', file=fileptr) - print('# You should have received a copy of the GNU General Public License', file=fileptr) - print('# along with this program. If not, see .', file=fileptr) - print('###########################################################################', file=fileptr) - - v = gpu_list.list[uuid] - print('# ', file=fileptr) - print('# Card{} {}'.format(v.prm.card_num, v.get_params_value('model')), file=fileptr) - print('# {}'.format(v.prm.card_path), file=fileptr) - print('# ', file=fileptr) - print('set -x', file=fileptr) - - # Commit changes - power_cap_file = os.path.join(v.prm.hwmon_path, 'power1_cap') - pwm_enable_file = os.path.join(v.prm.hwmon_path, 'pwm1_enable') - device_file = os.path.join(v.prm.card_path, 'pp_od_clk_voltage') - power_dpm_force_file = os.path.join(v.prm.card_path, 'power_dpm_force_performance_level') - print("sudo sh -c \"echo \'0\' > {}\"".format(power_cap_file), file=fileptr) - if env.GUT_CONST.show_fans: - print("sudo sh -c \"echo \'2\' > {}\"".format(pwm_enable_file), file=fileptr) - print("sudo sh -c \"echo \'auto\' > {}\"".format(power_dpm_force_file), file=fileptr) - print("sudo sh -c \"echo \'r\' > {}\"".format(device_file), file=fileptr) - print("sudo sh -c \"echo \'c\' > {}\"".format(device_file), file=fileptr) - # No need to reset clk pstate masks as commit to pp_od_clk_voltage will reset - - # Close file and Set permissions and Execute it --execute_pac - fileptr.close() - os.chmod(out_filename, 0o744) - print('Batch file completed: {}'.format(out_filename)) - if env.GUT_CONST.execute_pac: - print('Writing changes to GPU {}'.format(v.prm.card_path)) - cmd = subprocess.Popen(out_filename, shell=True) - cmd.wait() - print('') - if refresh: - # dismiss execute_pac message - message = 'Write reset commands to card complete.\nConfirm changes with amdgpu-monitor.' - self.update_message(devices, message, 'yellow') - self.refresh_pac(gpu_list, devices) - os.remove(out_filename) - else: - print('Execute to write changes to GPU {}.\n'.format(v.prm.card_path)) - if refresh: - # Dismiss execute_pac message - message = 'Write reset commands to bash file complete.\nRun bash file with sudo to execute changes.' - self.update_message(devices, message, 'yellow') - return - - -def ppm_select(_, device): - """ - Update device data for ppm selection and update active selected item in Gui. - :param _: self - :param device: Dictionary of GUI items and GPU data. - :type device: dict - :return: None - """ - tree_iter = device['ppm_modes_combo'].get_active_iter() - if tree_iter is not None: - model = device['ppm_modes_combo'].get_model() - row_id, name = model[tree_iter][:2] - device['new_ppm'] = [row_id, name] - return - - -def main(): - """ - Main PAC flow. - :return: - """ - parser = argparse.ArgumentParser() - parser.add_argument('--about', help='README', action='store_true', default=False) - parser.add_argument('--execute_pac', help='execute pac bash script without review', - action='store_true', default=False) - parser.add_argument('--no_fan', help='do not include fan setting options', action='store_true', default=False) - parser.add_argument('--force_write', help='write all parameters, even if unchanged', - action='store_true', default=False) - parser.add_argument('-d', '--debug', help='Debug output', action='store_true', default=False) - args = parser.parse_args() - - # About me - if args.about: - print(__doc__) - print('Author: ', __author__) - print('Copyright: ', __copyright__) - print('Credits: ', __credits__) - print('License: ', __license__) - print('Version: ', __version__) - print('Maintainer: ', __maintainer__) - print('Status: ', __status__) - sys.exit(0) - - env.GUT_CONST.DEBUG = args.debug - if args.no_fan: - env.GUT_CONST.show_fans = False - if args.force_write: - env.GUT_CONST.write_delta_only = False - else: - env.GUT_CONST.write_delta_only = True - env.GUT_CONST.execute_pac = args.execute_pac - - if env.GUT_CONST.check_env() < 0: - print('Error in environment. Exiting...') - sys.exit(-1) - - # Get list of GPUs and get basic non-driver details - gpu_list = gpu.GpuList() - gpu_list.set_gpu_list() - - # Check list of GPUs - num_gpus = gpu_list.num_vendor_gpus() - print('Detected GPUs: ', end='') - for i, (k, v) in enumerate(num_gpus.items()): - if i: - print(', {}: {}'.format(k, v), end='') - else: - print('{}: {}'.format(k, v), end='') - print('') - if 'AMD' in num_gpus.keys(): - env.GUT_CONST.read_amd_driver_version() - print('AMD: {}'.format(gpu_list.wattman_status())) - if 'NV' in num_gpus.keys(): - print('nvidia smi: [{}]'.format(env.GUT_CONST.cmd_nvidia_smi)) - - num_gpus = gpu_list.num_gpus() - if num_gpus['total'] == 0: - print('No GPUs detected, exiting...') - sys.exit(-1) - - # Read data static/dynamic/info/state driver information for GPUs - gpu_list.read_gpu_sensor_data(data_type='All') - - # Check number of readable/writable GPUs again - num_gpus = gpu_list.num_gpus() - print('{} total GPUs, {} rw, {} r-only, {} w-only\n'.format(num_gpus['total'], num_gpus['rw'], - num_gpus['r-only'], num_gpus['w-only'])) - - # Check number of compatible GPUs again - com_gpu_list = gpu_list.list_gpus(compatibility='writable') - writable_gpus = com_gpu_list.num_gpus()['total'] - if not writable_gpus: - print('None are writable, exiting...') - sys.exit(-1) - com_gpu_list.read_gpu_pstates() - com_gpu_list.read_gpu_ppm_table() - - # Display Gtk style Monitor - devices = {} - gmonitor = PACWindow(com_gpu_list, devices) - gmonitor.connect('delete-event', Gtk.main_quit) - gmonitor.show_all() - - Gtk.main() - - -if __name__ == '__main__': - main() diff -Nru ricks-amdgpu-utils-3.0.0/amdgpu-plot ricks-amdgpu-utils-3.5.0/amdgpu-plot --- ricks-amdgpu-utils-3.0.0/amdgpu-plot 2020-02-29 07:32:52.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/amdgpu-plot 1970-01-01 00:00:00.000000000 +0000 @@ -1,887 +0,0 @@ -#!/usr/bin/env python3 -""" amdgpu-plot - Plot GPU parameter curves - - A utility to continuously plot the trend of critical GPU parameters for all compatible - AMD GPUs. The *--sleep N* can be used to specify the update interval. The *amdgpu-plot* - utility has 2 modes of operation. The default mode is to read the GPU driver details - directly, which is useful as a standalone utility. The *--stdin* option causes - *amdgpu-plot* to read GPU data from stdin. This is how *amdgpu-monitor* produces the - plot and can also be used to pipe your own data into the process. The *--simlog* - option can be used with the *--stdin* when a monitor log file is piped as stdin. - This is useful for troubleshooting and can be used to display saved log results. - The *--ltz* option results in the use of local time instead of UTC. If you plan - to run both *amdgpu-plot* and *amdgpu-monitor*, then the *--plot* option of the - *amdgpu-monitor* utility should be used instead of both utilities in order reduce - data reads by a factor of 2. - - Copyright (C) 2019 RueiKe - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -""" -__author__ = 'RueiKe' -__copyright__ = 'Copyright (C) 2019 RueiKe' -__credits__ = ['Craig Echt - Testing, Debug, Verification, and Documentation'] -__license__ = 'GNU General Public License' -__program_name__ = 'amdgpu-plot' -__version__ = 'v3.0.0' -__maintainer__ = 'RueiKe' -__status__ = 'Stable Release' -__docformat__ = 'reStructuredText' -# pylint: disable=multiple-statements -# pylint: disable=line-too-long - -import sys -import gc as garbcollect -import argparse -import re -import threading -import os -import time -import numpy as np - -try: - from matplotlib.backends.backend_gtk3cairo import FigureCanvasGTK3Cairo as FigureCanvas - import matplotlib.pyplot as plt -except ModuleNotFoundError as error: - print('matplotlib import error: {}'.format(error)) - print('matplotlib is required for {}'.format(__program_name__)) - print('Use \'sudo apt-get install python3-matplotlib\' to install') - sys.exit(0) - -try: - import pandas as pd -except ModuleNotFoundError as error: - print('Pandas import error: {}'.format(error)) - print('Pandas is required for {}'.format(__program_name__)) - print('Install pip3 if needed: \'sudo apt install python3-pip\'') - print('Then pip install pandas: \'pip3 install pandas\'') - sys.exit(0) -from pandas.plotting import register_matplotlib_converters -register_matplotlib_converters() - -try: - import gi -except ModuleNotFoundError as error: - print('gi import error: {}'.format(error)) - print('gi is required for {}'.format(__program_name__)) - print(' In a venv, first install vext: pip install --no-cache-dir vext') - print(' Then install vext.gi: pip install --no-cache-dir vext.gi') - sys.exit(0) -gi.require_version('Gtk', '3.0') -from gi.repository import GLib, Gtk, Gdk - -from GPUmodules import GPUmodule as gpu -from GPUmodules import env - - -# SEMAPHORE ############ -PD_SEM = threading.Semaphore() -######################## - - -def hex_to_rgba(value): - """ - Return rgba tuple for give hex color name. - :param value: hex color value as string - :type value: str - :return: rgba tuple - :rtype: tuple - .. note:: Code copied from Stack Overflow - """ - value = value.lstrip('#') - if len(value) == 3: - value = ''.join([v*2 for v in list(value)]) - (r1, g1, b1, a1) = tuple(int(value[i:i+2], 16) for i in range(0, 6, 2))+(1,) - (r1, g1, b1, a1) = (r1/255.00000, g1/255.00000, b1/255.00000, a1) - return r1, g1, b1, a1 - - -def get_stack_size(): - """ - Get stack size for caller's frame. Code copied from Stack Overflow. - :return: Stack size - :rtype: int - """ - size = 2 # current frame and caller's frame always exist - while True: - try: - sys._getframe(size) - size += 1 - except ValueError: - return size - 1 # subtract current frame - - -class PlotData: - """ - Plot data object. - """ - def __init__(self): - self.df = pd.DataFrame() - self.gui_comp = None - self.gui_ready = False - self.length = 200 - self.quit = False - self.writer = False - self.reader = False - self.consec_writer = 0 - self.consec_reader = 0 - self.gpu_list = '' - self.num_gpus = 1 - self.com_gpu_list = gpu.GpuList() - - def set_gpus(self): - """ - Populate num_gpus and gpu_list from dataframe member. - :return: None - """ - self.num_gpus = self.df['Card#'].nunique() - self.gpu_list = self.df['Card#'].unique() - - def get_plot_data(self): - """ - Get deep copy of plot data df. - :return: deep copy of the plot data dataframe - :rtype: dataFrame - .. note:: This may have contention issues - """ - # SEMAPHORE ############ - PD_SEM.acquire() - ######################## - ndf = self.df.copy() - # SEMAPHORE ############ - PD_SEM.release() - ######################## - return ndf - - def kill_thread(self): - """ - Sets flags that result in reader thread death. - :return: None - """ - self.reader = False - self.quit = True - print('Stopping reader thread') - time.sleep(0.2) - - -class GuiComponents: - """ - Define the gui components of the plot window. - """ - def __init__(self, plot_data): - plot_data.gui_comp = self - self.ready = False - self.gpu_list = plot_data.gpu_list - self.num_gpus = plot_data.num_gpus - self.gui_components = {} - self.gpu_color = {} - self.colors = {'plotface': '#404040', 'figface': '#909090', - 'sclk_f_val': '#BED661', 'mclk_f_val': '#89E894', - 'loading': '#1E90FF', 'power': '#E12B06', 'power_cap': '#800000', - 'vddgfx_val': '#778899', 'temp_val': '#E0E0E0'} - self.font_colors = {'plotface': '#000000', 'figface': '#000000', - 'sclk_f_val': '#000000', 'mclk_f_val': '#000000', - 'loading': '#FFFFFF', 'power': '#FFFFFF', 'power_cap': '#FFFFFF', - 'vddgfx_val': '#000000', 'temp_val': '#000000'} - gpu_color_list = ['#B52735', '#EBB035', '#06A2CB', '#218559', '#D0C6B1', '#E18A07', '#336688', '#7C821E'] - plot_item_list = ['loading', 'power', 'power_cap', 'temp_val', 'vddgfx_val', 'sclk_f_val', 'mclk_f_val'] - - self.plot_items = {'loading': True, 'power': True, 'power_cap': True, - 'temp_val': True, 'vddgfx_val': True, 'sclk_f_val': True, 'mclk_f_val': True} - - self.gui_components['info_bar'] = {} - self.gui_components['legend'] = {} - self.gui_components['legend']['buttons'] = {} - self.gui_components['legend']['plot_items'] = {} - for plotitem in plot_item_list: - self.gui_components['legend']['plot_items'][plotitem] = True - self.gui_components['sclk_pstate_status'] = {} - self.gui_components['sclk_pstate_status']['df_name'] = 'sclk_ps_val' - self.gui_components['mclk_pstate_status'] = {} - self.gui_components['mclk_pstate_status']['df_name'] = 'mclk_ps_val' - self.gui_components['temp_status'] = {} - self.gui_components['temp_status']['df_name'] = 'temp_val' - self.gui_components['card_plots'] = {} - for i, gpu_i in enumerate(self.gpu_list): - self.gui_components['card_plots'][gpu_i] = {} - self.gui_components['card_plots'][gpu_i]['color'] = gpu_color_list[i] - self.gpu_color[gpu_i] = gpu_color_list[i] - return - - def set_ready(self, mode): - """ - Set flag to indicate gui is ready. - :param mode: True if gui is ready - :type mode: bool - :return: None - """ - self.ready = mode - - def is_ready(self): - """ - Return the ready status of the plot gui. - :return: True if ready - :rtype: bool - """ - return self.ready - - -class GPUPlotWindow(Gtk.Window): - """ - Plot window. - """ - def __init__(self, gc, plot_data): - box_spacing_val = 5 - num_bar_plots = 3 - if gc.num_gpus > 4: - def_gp_y_size = 150 - def_bp_y_size = 200 - elif gc.num_gpus == 4: - def_gp_y_size = 200 - def_bp_y_size = 200 - else: - def_gp_y_size = 250 - def_bp_y_size = 250 - def_gp_x_size = 600 - def_bp_x_size = 250 - def_lab_y_size = 28 - if gc.num_gpus > num_bar_plots: - tot_y_size = gc.num_gpus * (def_gp_y_size + def_lab_y_size) - gp_y_size = def_gp_y_size - bp_y_size = (tot_y_size - (num_bar_plots * def_lab_y_size))/num_bar_plots - elif gc.num_gpus < num_bar_plots: - tot_y_size = num_bar_plots * (def_bp_y_size + def_lab_y_size) - bp_y_size = def_bp_y_size - gp_y_size = (tot_y_size - (gc.num_gpus * def_lab_y_size))/gc.num_gpus - else: - gp_y_size = def_gp_y_size - bp_y_size = def_bp_y_size - - Gtk.Window.__init__(self, title=__program_name__) - self.set_border_width(1) - icon_file = os.path.join(env.GUT_CONST.icon_path, 'amdgpu-plot.icon.png') - if os.path.isfile(icon_file): - self.set_icon_from_file(icon_file) - grid = Gtk.Grid() - grid.override_background_color(Gtk.StateType.NORMAL, Gdk.RGBA(0.7, 0.7, 0.7, 1)) - self.add(grid) - - # Get deep copy of current df - ldf = plot_data.get_plot_data() - - row = 0 - # Top Bar - info - gc.gui_components['info_bar']['gtk_obj'] = Gtk.Label() - gc.gui_components['info_bar']['gtk_obj'].set_markup('{} Plot'.format(__program_name__)) - gc.gui_components['info_bar']['gtk_obj'].override_color(Gtk.StateFlags.NORMAL, Gdk.RGBA(1.0, 1.0, 1.0, 1.0)) - gc.gui_components['info_bar']['gtk_obj'].set_property('margin-top', 1) - gc.gui_components['info_bar']['gtk_obj'].set_property('margin-bottom', 1) - gc.gui_components['info_bar']['gtk_obj'].set_property('margin-right', 4) - gc.gui_components['info_bar']['gtk_obj'].set_property('margin-left', 4) - gc.gui_components['info_bar']['gtk_obj'].set_alignment(0.5, 0.5) - lbox = Gtk.Box(spacing=box_spacing_val) - lbox.override_background_color(Gtk.StateType.NORMAL, Gdk.RGBA(.20, .40, .60, 1.0)) - lbox.set_property('margin-top', 1) - lbox.set_property('margin-bottom', 1) - lbox.set_property('margin-right', 1) - lbox.set_property('margin-left', 1) - lbox.pack_start(gc.gui_components['info_bar']['gtk_obj'], True, True, 0) - grid.attach(lbox, 1, row, 4, 1) - row += 1 - - # Legend - gc.gui_components['legend']['gtk_obj'] = Gtk.Label() - gc.gui_components['legend']['gtk_obj'].set_markup('Plot Items') - gc.gui_components['legend']['gtk_obj'].override_color(Gtk.StateFlags.NORMAL, Gdk.RGBA(1.0, 1.0, 1.0, 1.0)) - gc.gui_components['legend']['gtk_obj'].set_property('margin-top', 1) - gc.gui_components['legend']['gtk_obj'].set_property('margin-bottom', 1) - gc.gui_components['legend']['gtk_obj'].set_property('margin-right', 4) - gc.gui_components['legend']['gtk_obj'].set_property('margin-left', 4) - gc.gui_components['legend']['gtk_obj'].set_alignment(0.5, 0.5) - lbox = Gtk.Box(spacing=box_spacing_val) - lbox.override_background_color(Gtk.StateType.NORMAL, Gdk.RGBA(.40, .40, .40, 1.0)) - lbox.set_property('margin-top', 1) - lbox.set_property('margin-bottom', 1) - lbox.set_property('margin-right', 1) - lbox.set_property('margin-left', 1) - lbox.pack_start(gc.gui_components['legend']['gtk_obj'], True, True, 0) - for k, v in gc.gui_components['legend']['plot_items'].items(): - but_color = hex_to_rgba(gc.colors[k]) - but_font_color = hex_to_rgba(gc.font_colors[k]) - gc.gui_components['legend']['buttons'][k] = Gtk.Button('') - for child in gc.gui_components['legend']['buttons'][k].get_children(): - child.set_label('{}'.format(k)) - child.set_use_markup(True) - child.override_color( - Gtk.StateFlags.NORMAL, - Gdk.RGBA(but_font_color[0], but_font_color[1], but_font_color[2], but_font_color[3])) - child.override_background_color( - Gtk.StateFlags.NORMAL, - Gdk.RGBA(but_color[0], but_color[1], but_color[2], but_color[3])) - gc.gui_components['legend']['buttons'][k].override_color( - Gtk.StateFlags.NORMAL, - Gdk.RGBA(but_font_color[0], but_font_color[1], but_font_color[2], but_font_color[3])) - gc.gui_components['legend']['buttons'][k].override_background_color( - Gtk.StateFlags.NORMAL, - Gdk.RGBA(but_color[0], but_color[1], but_color[2], but_color[3])) - gc.gui_components['legend']['buttons'][k].connect('clicked', self.toggle_plot_item, gc, k) - gc.gui_components['legend']['buttons'][k].set_property('width-request', 90) - gc.gui_components['legend']['buttons'][k].set_property('margin-top', 1) - gc.gui_components['legend']['buttons'][k].set_property('margin-bottom', 1) - gc.gui_components['legend']['buttons'][k].set_property('margin-right', 1) - gc.gui_components['legend']['buttons'][k].set_property('margin-left', 1) - lbox.pack_start(gc.gui_components['legend']['buttons'][k], True, True, 0) - grid.attach(lbox, 1, row, 4, 1) - row += 1 - main_last_row = row - - # Set up bar plots - grid_bar = Gtk.Grid() - grid_bar.override_background_color(Gtk.StateType.NORMAL, Gdk.RGBA(0.7, 0.7, 0.7, 1)) - grid.attach(grid_bar, 1, main_last_row, 1, 1) - brow = 0 - fig_num = 0 - # plot_top_row = row - for v in [gc.gui_components['sclk_pstate_status'], - gc.gui_components['mclk_pstate_status'], - gc.gui_components['temp_status']]: - # Add Bar Plots Titles - v['title_obj'] = Gtk.Label() - v['title_obj'].set_markup('Card {}'.format(v['df_name'])) - v['title_obj'].override_color(Gtk.StateFlags.NORMAL, Gdk.RGBA(1.0, 1.0, 1.0, 1.0)) - v['title_obj'].set_property('margin-top', 1) - v['title_obj'].set_property('margin-bottom', 1) - v['title_obj'].set_property('margin-right', 4) - v['title_obj'].set_property('margin-left', 4) - v['title_obj'].set_alignment(0.5, 0.5) - lbox = Gtk.Box(spacing=box_spacing_val) - lbox.override_background_color(Gtk.StateType.NORMAL, Gdk.RGBA(.20, .40, .60, 1.0)) - lbox.set_property('margin-top', 1) - lbox.set_property('margin-bottom', 1) - lbox.set_property('margin-right', 1) - lbox.set_property('margin-left', 1) - lbox.pack_start(v['title_obj'], True, True, 0) - - grid_bar.attach(lbox, 1, brow, 1, 1) - brow += 1 - - # Add Bar Plots - # Set up plot figure and canvas - v['figure_num'] = 100 + fig_num - fig_num += 1 - v['figure'], v['ax1'] = plt.subplots(num=v['figure_num']) - v['figure'].set_facecolor(gc.colors['figface']) - - plt.figure(v['figure_num']) - plt.subplots_adjust(left=0.13, right=0.97, top=0.97, bottom=0.1) - v['ax1'].set_facecolor(gc.colors['plotface']) - if v['df_name'] == 'temp_val': - plt.yticks(np.arange(20, 91, 10)) - else: - plt.yticks(np.arange(0, 9, 1)) - - v['canvas'] = FigureCanvas(v['figure']) # a Gtk.DrawingArea - v['canvas'].set_size_request(def_bp_x_size, bp_y_size) - - lbox = Gtk.Box(spacing=box_spacing_val) - lbox.override_background_color(Gtk.StateType.NORMAL, Gdk.RGBA(0.5, 0.5, 0.5, 1.0)) - lbox.set_property('margin-top', 1) - lbox.set_property('margin-bottom', 1) - lbox.set_property('margin-right', 1) - lbox.set_property('margin-left', 1) - lbox.pack_start(v['canvas'], True, True, 0) - - grid_bar.attach(lbox, 1, brow, 1, 1) - brow += 1 - - # Set up gpu plots - grid_plot = Gtk.Grid() - grid_plot.override_background_color(Gtk.StateType.NORMAL, Gdk.RGBA(0.7, 0.7, 0.7, 1)) - grid.attach(grid_plot, 2, main_last_row, 3, 1) - prow = 0 - # row = plot_top_row - for k, v in gc.gui_components['card_plots'].items(): - data_val = ldf[ldf['Card#'].isin([k])]['energy'].iloc[-1] - model_val = ldf[ldf['Card#'].isin([k])]['model_display'].iloc[-1] - # Add GPU Plots Titles - v['title_obj'] = Gtk.Label() - v['title_obj'].set_markup('Card {} {} Energy: {}'.format(k, model_val, data_val)) - v['title_obj'].override_color(Gtk.StateFlags.NORMAL, Gdk.RGBA(1.0, 1.0, 1.0, 1.0)) - v['title_obj'].set_property('margin-top', 1) - v['title_obj'].set_property('margin-bottom', 1) - v['title_obj'].set_property('margin-right', 4) - v['title_obj'].set_property('margin-left', 4) - v['title_obj'].set_alignment(0.5, 0.5) - lbox = Gtk.Box(spacing=box_spacing_val) - rgba_col = hex_to_rgba(v['color']) - lbox.override_background_color(Gtk.StateType.NORMAL, - Gdk.RGBA(rgba_col[0], rgba_col[1], rgba_col[2], rgba_col[3])) - lbox.set_property('margin-top', 1) - lbox.set_property('margin-bottom', 1) - lbox.set_property('margin-right', 1) - lbox.set_property('margin-left', 1) - lbox.pack_start(v['title_obj'], True, True, 0) - - grid_plot.attach(lbox, 1, prow, 1, 1) - prow += 1 - - # Add GPU Plots - # Set up plot figure and canvas - v['figure_num'] = 500 + k - v['figure'], v['ax1'] = plt.subplots(num=v['figure_num']) - v['figure'].set_facecolor(gc.colors['figface']) - plt.figure(v['figure_num']) - plt.subplots_adjust(left=0.1, right=0.9, top=0.97, bottom=0.03) - - v['ax1'].set_facecolor(gc.colors['plotface']) - v['ax1'].set_xticks([]) - v['ax1'].set_xticklabels([]) - v['ax1'].set_yticks(np.arange(0, 250, 20)) - v['ax1'].tick_params(axis='y', which='major', labelsize=8) - - v['ax2'] = v['ax1'].twinx() - v['ax2'].set_xticks([]) - v['ax2'].set_xticklabels([]) - v['ax2'].set_yticks(np.arange(500, 1500, 100)) - v['ax2'].tick_params(axis='y', which='major', labelsize=8) - - v['canvas'] = FigureCanvas(v['figure']) # a Gtk.DrawingArea - v['canvas'].set_size_request(def_gp_x_size, gp_y_size) - - lbox = Gtk.Box(spacing=box_spacing_val) - lbox.override_background_color(Gtk.StateType.NORMAL, Gdk.RGBA(1, 1, 1, 1.0)) - lbox.set_property('margin-top', 1) - lbox.set_property('margin-bottom', 1) - lbox.set_property('margin-right', 1) - lbox.set_property('margin-left', 1) - lbox.pack_start(v['canvas'], True, True, 0) - - grid_plot.attach(lbox, 1, prow, 1, 1) - prow += 1 - - @staticmethod - def toggle_plot_item(_, gc, k): - """ - Toggle specified plot item. - :param _: parent - :param gc: gui components object - :type gc: GuiComponents - :param k: Name of plot item to toggle - :type k: str - :return: None - """ - gc.plot_items[k] = False if gc.plot_items[k] else True - - -def update_data(gc, plot_data): - """ - Update plot data. - :param gc: - :type gc: GuiComponents - :param plot_data: - :type plot_data: PlotData - :return: None - """ - # SEMAPHORE ########### - PD_SEM.acquire() - ####################### - ldf = plot_data.df - try: - time_val = ldf[ldf['Card#'].isin([plot_data.gpu_list[0]])]['Time'].iloc[-1] - gc.gui_components['info_bar']['gtk_obj'].set_markup('Time {}'.format(time_val)) - # Update Bar Plots - for v in [gc.gui_components['sclk_pstate_status'], - gc.gui_components['mclk_pstate_status'], - gc.gui_components['temp_status']]: - data_val = [] - label_val = [] - bar_col = [] - # Set Plot Parameters - for k in plot_data.gpu_list: - l, d = ldf[ldf['Card#'].isin([k])][['Card#', v['df_name']]].iloc[-1] - label_val.append(int(l)) - data_val.append(float(d)) - bar_col.append(gc.gpu_color[l]) - ind = np.arange(gc.num_gpus) # the x locations for the groups - width = 0.65 # the width of the bars - - # Do bar plot - plt.figure(v['figure_num']) - v['ax1'].clear() - _rects1 = v['ax1'].bar(ind, data_val, width, color=bar_col, tick_label=label_val) - if v['df_name'] == 'temp_val': - for a, b in zip(ind, data_val): - v['ax1'].text(x=float(a)-(float(width)/1.8), y=0.90*b, s=' '+str(b), fontsize=8) - plt.ylim((20, 91)) - else: - data_val = list(map(int, data_val)) - for a, b in zip(ind, data_val): - if b == 0: - y_val = b + width - else: - y_val = b - width - v['ax1'].text(x=a-width/4.0, y=y_val, s=str(b), fontsize=10) - plt.ylim((0, 8)) - v['canvas'].draw() - v['canvas'].flush_events() - - # Update GPU Plots - y1lim_max_val = 10*(ldf.loc[:, ['loading', 'power_cap', 'power', 'temp_val']].max().max() // 10) + 5 - y1lim_min_val = 10*(ldf.loc[:, ['loading', 'power_cap', 'power', 'temp_val']].min().min() // 10) - 5 - y2lim_max_val = 100*(ldf.loc[:, ['vddgfx_val', 'sclk_f_val', 'mclk_f_val']].max().max() // 100) + 300 - y2lim_min_val = 100*(ldf.loc[:, ['vddgfx_val', 'sclk_f_val', 'mclk_f_val']].min().min() // 100) - 100 - for k, v in gc.gui_components['card_plots'].items(): - data_val = ldf[ldf['Card#'].isin([k])]['energy'].iloc[-1] - model_val = ldf[ldf['Card#'].isin([k])]['model_display'].iloc[-1] - v['title_obj'].set_markup('Card {} {} Energy: {}'.format(k, model_val, data_val)) - - # Plot GPUs - plt.figure(v['figure_num']) - v['ax1'].set_xticklabels([]) - v['ax1'].clear() - v['ax1'].set_ylabel('Loading/Power/Temp', color='k', fontsize=10) - for plot_item in ['loading', 'power_cap', 'power', 'temp_val']: - if gc.plot_items[plot_item]: - v['ax1'].plot(ldf[ldf['Card#'].isin([k])]['datetime'], - ldf[ldf['Card#'].isin([k])][plot_item], - color=gc.colors[plot_item], linewidth=0.5) - v['ax1'].text(x=ldf[ldf['Card#'].isin([k])]['datetime'].iloc[-1], - y=ldf[ldf['Card#'].isin([k])][plot_item].iloc[-1], - s=str(int(ldf[ldf['Card#'].isin([k])][plot_item].iloc[-1])), - bbox=dict(boxstyle='round,pad=0.2', facecolor=gc.colors[plot_item]), fontsize=6) - - v['ax2'].clear() - v['ax2'].set_xticklabels([]) - v['ax2'].set_ylabel('MHz/mV', color='k', fontsize=10) - for plot_item in ['vddgfx_val', 'sclk_f_val', 'mclk_f_val']: - if gc.plot_items[plot_item]: - v['ax2'].plot(ldf[ldf['Card#'].isin([k])]['datetime'], - ldf[ldf['Card#'].isin([k])][plot_item], - color=gc.colors[plot_item], linewidth=0.5) - v['ax2'].text(x=ldf[ldf['Card#'].isin([k])]['datetime'].iloc[-1], - y=ldf[ldf['Card#'].isin([k])][plot_item].iloc[-1], - s=str(int(ldf[ldf['Card#'].isin([k])][plot_item].iloc[-1])), - bbox=dict(boxstyle='round,pad=0.2', facecolor=gc.colors[plot_item]), fontsize=6) - - v['ax1'].set_yticks(np.arange(y1lim_min_val, y1lim_max_val, 10)) - v['ax2'].set_yticks(np.arange(y2lim_min_val, y2lim_max_val, 100)) - - v['canvas'].draw() - v['canvas'].flush_events() - except (OSError, ArithmeticError, NameError, TypeError, ValueError) as err: - print('matplotlib error: {}'.format(err)) - print('matplotlib error, stack size is {}'.format(get_stack_size())) - plot_data.kill_thread() - - # SEMAPHORE ########### - PD_SEM.release() - ####################### - - -def read_from_stdin(refreshtime, plot_data): - """ - Read plot data from stdin. - :param refreshtime: - :type refreshtime: int - :param plot_data: - :type plot_data: PlotData - :return: - .. note:: this should continuously read from stdin and populate df and call plot/gui update - """ - first_update = True - header = True - sync_add = 0 - while not plot_data.quit: - if env.GUT_CONST.SIMLOG: time.sleep(refreshtime/4.0) - ndf = pd.DataFrame() - - # Process a set of GPUs at a time - skip_update = False - read_time = 0.0 - for _gpu_index in range(0, plot_data.num_gpus + sync_add): - tb = env.GUT_CONST.now(env.GUT_CONST.USELTZ) - line = sys.stdin.readline() - tmp_read_time = (env.GUT_CONST.now(env.GUT_CONST.USELTZ) - tb).total_seconds() - if tmp_read_time > read_time: - read_time = tmp_read_time - - if line == '': - if env.GUT_CONST.DEBUG: print('Error: Null input line') - plot_data.kill_thread() - break - if header: - header_item = list(line.strip().split('|')) - header = False - continue - line = line.strip() - line_item = list(line.strip().split('|')) - new_line_item = [] - for l in line_item: - ll = l.strip() - if ll.isnumeric(): - new_line_item.append(int(ll)) - elif re.fullmatch(r'[0-9]+.[0-9]*', ll) or re.fullmatch(r'[0-9]*.[0-9]+', ll): - new_line_item.append(float(ll)) - elif ll == '' or ll == '-1' or ll == 'NA' or ll is None: - new_line_item.append(np.nan) - else: - new_line_item.append(ll) - line_item = tuple(new_line_item) - rdf = pd.DataFrame.from_records([line_item], columns=header_item) - rdf['datetime'] = pd.to_datetime(rdf['Time']) - ndf = pd.concat([ndf, rdf], ignore_index=True) - del rdf - if ndf['Time'].tail(plot_data.num_gpus).nunique() > 1: - sync_add = 1 - else: - sync_add = 0 - - if env.GUT_CONST.DEBUG: - print(env.GUT_CONST.now(env.GUT_CONST.USELTZ).strftime('%c')) - print(ndf) - - if not env.GUT_CONST.SIMLOG: - if read_time < 0.003: - skip_update = True - if env.GUT_CONST.DEBUG: print('skipping update') - - # SEMAPHORE ############ - PD_SEM.acquire() - ######################## - # Concatenate new data on plot_data dataframe and truncate - plot_data.df = pd.concat([plot_data.df, ndf], ignore_index=True) - plot_data.df.reset_index(drop=True, inplace=True) - - # Truncate df in place - plot_length = int(len(plot_data.df.index) / plot_data.num_gpus) - if plot_length > plot_data.length: - trun_index = plot_length - plot_data.length - plot_data.df.drop(np.arange(0, trun_index), inplace=True) - plot_data.df.reset_index(drop=True, inplace=True) - # SEMAPHORE ############ - PD_SEM.release() - ######################## - del ndf - - ######################### - # Update plots - ######################### - if skip_update: - continue - if plot_data.gui_comp is None: - continue - if plot_data.gui_comp.is_ready(): - if first_update: - time.sleep(refreshtime) - first_update = False - GLib.idle_add(update_data, plot_data.gui_comp, plot_data) - while Gtk.events_pending(): - Gtk.main_iteration_do(True) - # SEMAPHORE ############ - time.sleep(0.01) - PD_SEM.acquire() - PD_SEM.release() - ######################## - garbcollect.collect() - if env.GUT_CONST.DEBUG: print('update stack size: {}'.format(get_stack_size())) - - # Quit - print('exit stack size: {}'.format(get_stack_size())) - sys.exit(0) - - -def read_from_gpus(refreshtime, plot_data): - """ - Read plot data from stdin. - :param refreshtime: - :type refreshtime: int - :param plot_data: - :type plot_data: PlotData - :return: - .. note:: this should continuously read from GPUs and populate df and call plot/gui update - """ - first_update = True - while not plot_data.quit: - ndf = pd.DataFrame() - - plot_data.com_gpu_list.read_gpu_sensor_data(data_type='DynamicM') - plot_data.com_gpu_list.read_gpu_sensor_data(data_type='StateM') - - # Process a set of GPUs at a time - skip_update = False - for v in plot_data.com_gpu_list.list.values(): - gpu_plot_data = v.get_plot_data(plot_data.com_gpu_list) - if env.GUT_CONST.DEBUG: print('gpu_plot_data: ', gpu_plot_data) - - rdf = pd.DataFrame.from_records([tuple(gpu_plot_data.values())], columns=tuple(gpu_plot_data.keys())) - rdf['datetime'] = pd.to_datetime(rdf['Time']) - ndf = pd.concat([ndf, rdf], ignore_index=True) - del rdf - - # SEMAPHORE ############ - PD_SEM.acquire() - ######################## - # Concatenate new data on plot_data dataframe and truncate - plot_data.df = pd.concat([plot_data.df, ndf], ignore_index=True) - plot_data.df.reset_index(drop=True, inplace=True) - - # Truncate df in place - plot_length = int(len(plot_data.df.index) / plot_data.num_gpus) - if plot_length > plot_data.length: - trun_index = plot_length - plot_data.length - plot_data.df.drop(np.arange(0, trun_index), inplace=True) - plot_data.df.reset_index(drop=True, inplace=True) - # SEMAPHORE ############ - PD_SEM.release() - ######################## - del ndf - - ######################### - # Update plots - ######################### - if skip_update: - continue - if plot_data.gui_comp is None: - time.sleep(refreshtime) - continue - if plot_data.gui_comp.is_ready(): - if first_update: - time.sleep(refreshtime) - first_update = False - GLib.idle_add(update_data, plot_data.gui_comp, plot_data) - while Gtk.events_pending(): - Gtk.main_iteration_do(True) - # SEMAPHORE ############ - time.sleep(0.01) - PD_SEM.acquire() - PD_SEM.release() - ######################## - garbcollect.collect() - if env.GUT_CONST.DEBUG: print('update stack size: {}'.format(get_stack_size())) - time.sleep(refreshtime) - - # Quit - print('exit stack size: {}'.format(get_stack_size())) - sys.exit(0) - - -def main(): - """ Main flow for plot.""" - parser = argparse.ArgumentParser() - parser.add_argument('--about', help='README', action='store_true', default=False) - parser.add_argument('--stdin', help='Read from stdin', action='store_true', default=False) - parser.add_argument('--simlog', help='Simulate with piped log file', action='store_true', default=False) - parser.add_argument('--ltz', help='Use local time zone instead of UTC', action='store_true', default=False) - parser.add_argument('--sleep', help='Number of seconds to sleep between updates', type=int, default=3) - parser.add_argument('-d', '--debug', help='Debug output', action='store_true', default=False) - args = parser.parse_args() - - # About me - if args.about: - print(__doc__) - print('Author: ', __author__) - print('Copyright: ', __copyright__) - print('Credits: ', __credits__) - print('License: ', __license__) - print('Version: ', __version__) - print('Maintainer: ', __maintainer__) - print('Status: ', __status__) - import matplotlib - print('matplotlib version: ', matplotlib.__version__) - print('pandas version: ', pd.__version__) - print('numpy version: ', np.__version__) - sys.exit(0) - - env.GUT_CONST.DEBUG = args.debug - env.GUT_CONST.SIMLOG = args.simlog - if args.ltz: - env.GUT_CONST.USELTZ = True - - if env.GUT_CONST.check_env() < 0: - print('Error in environment. Exiting...') - sys.exit(-1) - - # Define graph gui and data components - plot_data = PlotData() - - if not args.stdin: - # Get list of AMD GPUs and get basic non-driver details - gpu_list = gpu.GpuList() - gpu_list.set_gpu_list() - - # Check list of GPUs - num_gpus = gpu_list.num_vendor_gpus() - print('Detected GPUs: ', end='') - for i, (k, v) in enumerate(num_gpus.items()): - if i: - print(', {}: {}'.format(k, v), end='') - else: - print('{}: {}'.format(k, v), end='') - print('') - if 'AMD' in num_gpus.keys(): - env.GUT_CONST.read_amd_driver_version() - print('AMD: {}'.format(gpu_list.wattman_status())) - if 'NV' in num_gpus.keys(): - print('nvidia smi: [{}]'.format(env.GUT_CONST.cmd_nvidia_smi)) - - num_gpus = gpu_list.num_gpus() - if num_gpus['total'] == 0: - print('No GPUs detected, exiting...') - sys.exit(-1) - - # Read data static/dynamic/info/state driver information for GPUs - gpu_list.read_gpu_sensor_data(data_type='All') - - # Check number of readable/writable GPUs again - num_gpus = gpu_list.num_gpus() - print('{} total GPUs, {} rw, {} r-only, {} w-only\n'.format(num_gpus['total'], num_gpus['rw'], - num_gpus['r-only'], num_gpus['w-only'])) - - # Check number of compatible GPUs again - com_gpu_list = gpu_list.list_gpus(compatibility='readable') - readable_gpus = com_gpu_list.num_gpus()['total'] - if not readable_gpus: - print('None are readable, exiting...') - sys.exit(-1) - - # Generate a new list of only compatible GPUs - plot_data.com_gpu_list = com_gpu_list - plot_data.num_gpus = readable_gpus - # end of if args.stdin == False - - if args.stdin or args.simlog: - _readthread = threading.Thread(target=read_from_stdin, daemon=True, args=[args.sleep, plot_data]).start() - else: - _readthread = threading.Thread(target=read_from_gpus, daemon=True, args=[args.sleep, plot_data]).start() - - print('{} waiting for initial data'.format(__program_name__), end='', flush=True) - while len(plot_data.df.index) < 9: - print('.', end='', flush=True) - time.sleep(args.sleep/4.0) - print('') - - # After reading initial data, set gpus - plot_data.set_gpus() - - gc = GuiComponents(plot_data) - gplot = GPUPlotWindow(gc, plot_data) - gplot.connect('delete-event', Gtk.main_quit) - gplot.show_all() - gc.set_ready(True) - Gtk.main() - plot_data.kill_thread() - - -if __name__ == '__main__': - main() diff -Nru ricks-amdgpu-utils-3.0.0/debian/changelog ricks-amdgpu-utils-3.5.0/debian/changelog --- ricks-amdgpu-utils-3.0.0/debian/changelog 2020-04-09 04:13:25.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/debian/changelog 2020-08-12 06:31:01.000000000 +0000 @@ -1,3 +1,15 @@ +ricks-amdgpu-utils (3.5.0-1) unstable; urgency=medium + + * Team upload. + * New upstream version 3.5.0 + * Fixing split of the installed files to binary packages. + * Wrapping and sorting. + * Defining Breaks+Replaces relationship between current ricks-amdgpu-utils + and older python3-gpumodules (moved manpages). + * Dropping reference to nonexistent ricks-amdgpu-utils-doc package. + + -- Andrius Merkys Wed, 12 Aug 2020 02:31:01 -0400 + ricks-amdgpu-utils (3.0.0-1) unstable; urgency=medium * Team upload. diff -Nru ricks-amdgpu-utils-3.0.0/debian/control ricks-amdgpu-utils-3.5.0/debian/control --- ricks-amdgpu-utils-3.0.0/debian/control 2020-04-09 04:12:26.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/debian/control 2020-08-12 06:26:41.000000000 +0000 @@ -2,11 +2,14 @@ Section: utils Priority: optional Maintainer: Debian Science Team -Uploaders: Steffen Möller , - Rick Langford -Build-Depends: debhelper-compat (= 12), +Uploaders: + Steffen Möller , + Rick Langford , +Build-Depends: + debhelper-compat (= 12), dh-python, - python3-all, python3-setuptools, + python3-all, + python3-setuptools, Standards-Version: 4.5.0 Homepage: https://github.com/Ricks-Lab/amdgpu-utils Rules-Requires-Root: no @@ -15,19 +18,28 @@ Package: python3-gpumodules Architecture: all -Depends: ${python3:Depends}, ${misc:Depends}, +Depends: + lshw, python3-matplotlib, python3-pandas, - lshw -Suggests: ricks-amdgpu-utils + ${misc:Depends}, + ${python3:Depends}, +Suggests: + ricks-amdgpu-utils, Description: adjustment and inspection of AMD GPUs An internal module used by the Ricks-Lab's AMD GPU Utilities. Package: ricks-amdgpu-utils Architecture: all -Depends: ${python3:Depends}, ${misc:Depends}, python3-gpumodules -Suggests: ricks-amdgpu-utils-doc +Depends: + python3-gpumodules, + ${misc:Depends}, + ${python3:Depends}, +Replaces: + python3-gpumodules (<< 3.5.0), +Breaks: + python3-gpumodules (<< 3.5.0), Description: AMD GPU performance adjustment and monitoring A set of utilities for monitoring AMD GPU performance and modifying control settings. diff -Nru ricks-amdgpu-utils-3.0.0/debian/ricks-amdgpu-utils.dirs ricks-amdgpu-utils-3.5.0/debian/ricks-amdgpu-utils.dirs --- ricks-amdgpu-utils-3.0.0/debian/ricks-amdgpu-utils.dirs 2020-04-09 04:12:26.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/debian/ricks-amdgpu-utils.dirs 2020-08-12 06:14:18.000000000 +0000 @@ -1 +1 @@ -usr +usr/share diff -Nru ricks-amdgpu-utils-3.0.0/debian/ricks-amdgpu-utils.docs ricks-amdgpu-utils-3.5.0/debian/ricks-amdgpu-utils.docs --- ricks-amdgpu-utils-3.0.0/debian/ricks-amdgpu-utils.docs 2020-04-09 04:12:26.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/debian/ricks-amdgpu-utils.docs 2020-08-12 06:19:56.000000000 +0000 @@ -1 +1,2 @@ +README.md docs/* diff -Nru ricks-amdgpu-utils-3.0.0/debian/rules ricks-amdgpu-utils-3.5.0/debian/rules --- ricks-amdgpu-utils-3.0.0/debian/rules 2020-04-09 04:12:26.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/debian/rules 2020-08-12 06:18:54.000000000 +0000 @@ -1,5 +1,5 @@ #!/usr/bin/make -f -export DH_VERBOSE = 1 + export PYBUILD_NAME=gpumodules %: @@ -7,9 +7,10 @@ override_dh_auto_install: dh_auto_install - mkdir -p debian/ricks-amdgpu-utils/usr/ - mv debian/python3-gpumodules/usr/bin debian/ricks-amdgpu-utils/usr/ + mv debian/python3-gpumodules/usr/bin debian/ricks-amdgpu-utils/usr + mv debian/python3-gpumodules/usr/share/man debian/ricks-amdgpu-utils/usr/share + rm -rf debian/python3-gpumodules/usr/share/rickslab-gpu-utils override_dh_auto_clean: dh_clean - rm -rf build .pybuild + rm -rf build .pybuild GPUmodules/__pycache__ rickslab_gpu_utils.egg-info Binary files /tmp/tmppAtOdN/Q_uST5q1A0/ricks-amdgpu-utils-3.0.0/docs/amdgpu-monitor_scrshot.png and /tmp/tmppAtOdN/pk1AO_cpsw/ricks-amdgpu-utils-3.5.0/docs/amdgpu-monitor_scrshot.png differ Binary files /tmp/tmppAtOdN/Q_uST5q1A0/ricks-amdgpu-utils-3.0.0/docs/amdgpu-pac_scrshot.png and /tmp/tmppAtOdN/pk1AO_cpsw/ricks-amdgpu-utils-3.5.0/docs/amdgpu-pac_scrshot.png differ Binary files /tmp/tmppAtOdN/Q_uST5q1A0/ricks-amdgpu-utils-3.0.0/docs/amdgpu-pac_type1.png and /tmp/tmppAtOdN/pk1AO_cpsw/ricks-amdgpu-utils-3.5.0/docs/amdgpu-pac_type1.png differ Binary files /tmp/tmppAtOdN/Q_uST5q1A0/ricks-amdgpu-utils-3.0.0/docs/amdgpu-pac_type2.png and /tmp/tmppAtOdN/pk1AO_cpsw/ricks-amdgpu-utils-3.5.0/docs/amdgpu-pac_type2.png differ Binary files /tmp/tmppAtOdN/Q_uST5q1A0/ricks-amdgpu-utils-3.0.0/docs/amdgpu-plot_scrshot.png and /tmp/tmppAtOdN/pk1AO_cpsw/ricks-amdgpu-utils-3.5.0/docs/amdgpu-plot_scrshot.png differ Binary files /tmp/tmppAtOdN/Q_uST5q1A0/ricks-amdgpu-utils-3.0.0/docs/gpu-monitor-gui_scrshot.png and /tmp/tmppAtOdN/pk1AO_cpsw/ricks-amdgpu-utils-3.5.0/docs/gpu-monitor-gui_scrshot.png differ Binary files /tmp/tmppAtOdN/Q_uST5q1A0/ricks-amdgpu-utils-3.0.0/docs/gpu-pac.png and /tmp/tmppAtOdN/pk1AO_cpsw/ricks-amdgpu-utils-3.5.0/docs/gpu-pac.png differ Binary files /tmp/tmppAtOdN/Q_uST5q1A0/ricks-amdgpu-utils-3.0.0/docs/gpu-pac_scrshot.png and /tmp/tmppAtOdN/pk1AO_cpsw/ricks-amdgpu-utils-3.5.0/docs/gpu-pac_scrshot.png differ Binary files /tmp/tmppAtOdN/Q_uST5q1A0/ricks-amdgpu-utils-3.0.0/docs/gpu-plot_scrshot.png and /tmp/tmppAtOdN/pk1AO_cpsw/ricks-amdgpu-utils-3.5.0/docs/gpu-plot_scrshot.png differ diff -Nru ricks-amdgpu-utils-3.0.0/docs/USER_GUIDE.md ricks-amdgpu-utils-3.5.0/docs/USER_GUIDE.md --- ricks-amdgpu-utils-3.0.0/docs/USER_GUIDE.md 2020-02-29 07:32:52.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/docs/USER_GUIDE.md 2020-07-06 00:57:49.000000000 +0000 @@ -1,169 +1,239 @@ -# amdgpu-utils - User Guide -A set of utilities for monitoring AMD GPU performance and modifying control settings. +# Ricks-Lab GPU Utilities - User Guide -## Current amdgpu-utils Version: 3.0.x +A set of utilities for monitoring GPU performance and modifying control settings. + +## Current rickslab-gpu-utils Version: 3.5.x + + - [Installation](#installation) - [Getting Started](#getting-started) - - [Using amdgpu-ls](#using-amdgpu-ls) + - [Using gpu-ls](#using-gpu-ls) - [GPU Type Dependent Behavior](#gpu-type-dependent-behavior) - - [Using amdgpu-monitor](#using-amdgpu-monitor) - - [Using amdgpu-plot](#using-amdgpu-plot) - - [Using amdgpu-pac](#using-amdgpu-pac) - - [Updating the PCI ID decode file](#Updating-the-PCI-ID-decode-file) + - [Using gpu-mon](#using-gpu-mon) + - [Using gpu-plot](#using-gpu-plot) + - [Using gpu-pac](#using-gpu-pac) + - [Updating the PCI ID decode file](#updating-the-PCI-ID-decode-file) - [Optimizing Compute Performance-Power](#optimizing-compute-performance-power) - - [Setting GPU Automatically at Startup](#setting-gpu-automatically-at-startup) + - [Running Startup PAC Bash Files](#running-startup-pac-bash-files) + +## Installation + +For a typical user, the installation is accomplished using pip to install from +[PyPI](https://pypi.org/project/rickslab-gpu-utils/) with the following command: + +``` +pip3 install rickslab-gpu-utils +``` + +For a developer/contributor to the project, it is expected that you duplicate the development +environment using a virtual environment: + +```shell script +sudo apt install -y python3.6-venv +sudo apt install -y python3.6.dev +``` + +Initialize your *rickslab-gpu-utils-env* if it is your first time to use it. From the project +root directory, execute: + +```shell script +python3.6 -m venv rickslab-gpu-utils-env +source rickslab-gpu-utils-env/bin/activate +pip install --no-cache-dir -r requirements-venv.txt +``` ## Getting Started + First, this set of utilities is written and tested with Python3.6. If you are using an older -version, you will likely see syntax errors. Unfortunately, I don't know how to catch a -syntax error, so if you have issues, just execute: +version, you will likely see syntax errors. If you are encountering problems, then execute: + ``` -./amdgpu-chk +gpu-chk ``` -and it should display a message indicating any Python or Kernel incompatibilities. You will -also notice that there is a minimum version of the Kernel that supports these features, but be -warned, I have tested it with kernel releases no older than 4.15. There have been amdgpu features -implemented over time that span many releases of the kernel, so your experience in using these -utilities with older kernels might not be ideal. - -To use any of these utilities, you must have the *amdgpu* open source driver package installed, -either the All-Open stack or Pro stack. Components of *amdgpu* are also installed when *ROCm* is -installed. You can check with the following commands: -``` -dpkg -l 'amdgpu*' -dpkg -l 'rocm*' -``` - -You also must set your Linux machine to boot with the feature mask set to support the functionality -that these tools depend on. Do do this, you must set amdgpu.ppfeaturemask=0xfffd7fff. This -can be accomplished by adding amdgpu.ppfeaturemask=0xfffd7fff to the GRUB_CMDLINE_LINUX_DEFAULT -value in /etc/default/grub and executing *sudo update-grub* as in the following example, using *vi* or your favorite -command line editor: + +This should display a message indicating any Python or Kernel incompatibilities. In order to +get maximum capability of these utilities, you should be running with a kernel that provides +support of the GPUs you have installed. If using AMD GPUs, installing the latest **amdgpu** +driver package or the latest **ROCm** release, may provide additional capabilities. If you +have Nvidia GPUs installed, you should have **nvidia.smi** installed in order for the utility +reading of the cards to be possible. Writing to GPUs is currently only possible for AMD GPUs, +and only with compatible cards. Modifying AMD GPU properties requires the AMD ppfeaturemask +to be set to 0xfffd7fff. This can be accomplished by adding `amdgpu.ppfeaturemask=0xfffd7fff` +to the `GRUB_CMDLINE_LINUX_DEFAULT` value in `/etc/default/grub` and executing `sudo update-grub`: + ``` cd /etc/default sudo vi grub ``` + Modify to include the featuremask as follows: + ``` GRUB_CMDLINE_LINUX_DEFAULT="quiet splash amdgpu.ppfeaturemask=0xfffd7fff" ``` + After saving, update grub: + ``` sudo update-grub ``` -and then reboot. -If not running from a package installation, it is suggested run amdgpu-util in a virtual environment to avoid -dependency issues. If you don't have venv installed with python3, then execute the following (Ubuntu example): -``` -sudo apt install -y python3-venv -``` +and then reboot. -Initialize your amdgpu-utils-env if it is your first time to use it. From the project directory, execute: -``` -python3 -m venv amdgpu-utils-env -source amdgpu-utils-env/bin/activate -pip install --no-cache-dir -r requirements-venv.txt -``` -Use the deactivate command to leave the venv. +If you have Nvidia GPUs installed, you will need to have nvidia-smi installed. -The amdgpu-util package can be run without a venv by pip installing the requirements.txt file: -``` -sudo -H pip3 install --no-cache-dir -r requirements.txt -``` +## Using gpu-ls +After getting your system setup to support rickslab-gpu-utils, it is best to verify functionality by +listing your GPU details with the *gpu-ls* command. The utility will use the system `lspci` command +to identify all installed GPUs. The utility will also verify system setup/configuration for read, write, +and compute capability. Additional performance/configuration details are read from the GPU for compatible +GPUs. Example of the output is as follows: -## Using amdgpu-ls -After getting your system setup to support amdgpu-utils, it is best to verify functionality by -listing your GPU details with the *amdgpu-ls* command. It first attempts to detect the version -of amdgpu drivers you have installed and then check compatibility of installed AMD GPUs. Its -default behavior is to list basic GPU details for all compatible cards: ``` -Detected GPUs: AMD: 1, ASPEED: 1 -AMD: rocm version: 3.0.6 +OS command [nvidia-smi] executable found: [/usr/bin/nvidia-smi] +Detected GPUs: INTEL: 1, NVIDIA: 1, AMD: 1 +AMD: amdgpu version: 20.10-1048554 AMD: Wattman features enabled: 0xfffd7fff -2 total GPUs, 1 rw, 0 r-only, 0 w-only +3 total GPUs, 1 rw, 1 r-only, 0 w-only + +Card Number: 0 + Vendor: INTEL + Readable: False + Writable: False + Compute: False + Device ID: {'device': '0x3e91', 'subsystem_device': '0x8694', 'subsystem_vendor': '0x1043', 'vendor': '0x8086'} + Decoded Device ID: 8th Gen Core Processor Gaussian Mixture Model + Card Model: Intel Corporation 8th Gen Core Processor Gaussian Mixture Model + PCIe ID: 00:02.0 + Driver: i915 + GPU Type: Unsupported + HWmon: None + Card Path: /sys/class/drm/card0/device + System Card Path: /sys/devices/pci0000:00/0000:00:02.0 Card Number: 1 + Vendor: NVIDIA + Readable: True + Writable: False + Compute: True + GPU UID: GPU-fcbaadc4-4040-c2e5-d5b6-52d1547bcc64 + GPU S/N: [Not Supported] + Device ID: {'device': '0x1381', 'subsystem_device': '0x1073', 'subsystem_vendor': '0x10de', 'vendor': '0x10de'} + Decoded Device ID: GM107 [GeForce GTX 750] + Card Model: GeForce GTX 750 + Display Card Model: GeForce GTX 750 + Card Index: 0 + PCIe ID: 01:00.0 + Link Speed: GEN3 + Link Width: 8 + ################################################## + Driver: 390.138 + vBIOS Version: 82.07.32.00.32 + Compute Platform: OpenCL 1.2 CUDA + Compute Mode: Default + GPU Type: Supported + HWmon: None + Card Path: /sys/class/drm/card1/device + System Card Path: /sys/devices/pci0000:00/0000:00:01.0/0000:01:00.0 + ################################################## + Current Power (W): 15.910 + Power Cap (W): 38.50 + Power Cap Range (W): [30.0, 38.5] + Fan Target Speed (rpm): None + Current Fan PWM (%): 40.000 + ################################################## + Current GPU Loading (%): 100 + Current Memory Loading (%): 36 + Current VRAM Usage (%): 91.437 + Current VRAM Used (GB): 0.876 + Total VRAM (GB): 0.958 + Current Temps (C): {'temperature.gpu': 40.0, 'temperature.memory': None} + Current Clk Frequencies (MHz): {'clocks.gr': 1163.0, 'clocks.mem': 2505.0, 'clocks.sm': 1163.0, 'clocks.video': 1046.0} + Maximum Clk Frequencies (MHz): {'clocks.max.gr': 1293.0, 'clocks.max.mem': 2505.0, 'clocks.max.sm': 1293.0} + Current SCLK P-State: [0, ''] + Power Profile Mode: [Not Supported] + +Card Number: 2 Vendor: AMD Readable: True Writable: True Compute: True - GPU UID: a5e4788172dc768b - Device ID: {'vendor': '0x1002', 'device': '0x66af', 'subsystem_vendor': '0x1458', 'subsystem_device': '0x1000'} - Decoded Device ID: Vega 20 - Card Model: Advanced Micro Devices, Inc. [AMD/ATI] Vega 20 (rev c1) - Display Card Model: Vega 20 - PCIe ID: 43:00.0 - Link Speed: 8 GT/s + GPU UID: None + Device ID: {'device': '0x731f', 'subsystem_device': '0xe411', 'subsystem_vendor': '0x1da2', 'vendor': '0x1002'} + Decoded Device ID: Radeon RX 5600 XT + Card Model: Advanced Micro Devices, Inc. [AMD/ATI] Navi 10 [Radeon RX 5600 OEM/5600 XT / 5700/5700 XT] (rev ca) + Display Card Model: Radeon RX 5600 XT + PCIe ID: 04:00.0 + Link Speed: 16 GT/s Link Width: 16 ################################################## Driver: amdgpu - vBIOS Version: 113-D3600200-106 - Compute Platform: OpenCL 2.0 - GPU Frequency/Voltage Control Type: 2 - HWmon: /sys/class/drm/card1/device/hwmon/hwmon2 - Card Path: /sys/class/drm/card1/device + vBIOS Version: 113-5E4111U-X4G + Compute Platform: OpenCL 2.0 AMD-APP (3075.10) + GPU Type: CurvePts + HWmon: /sys/class/drm/card2/device/hwmon/hwmon3 + Card Path: /sys/class/drm/card2/device + System Card Path: /sys/devices/pci0000:00/0000:00:01.1/0000:02:00.0/0000:03:00.0/0000:04:00.0 ################################################## - Current Power (W): 82.0 - Power Cap (W): 150.0 - Power Cap Range (W): [0, 300] + Current Power (W): 99.000 + Power Cap (W): 160.000 + Power Cap Range (W): [0, 192] Fan Enable: 0 Fan PWM Mode: [2, 'Dynamic'] - Fan Target Speed (rpm): 0 - Current Fan Speed (rpm): 0 - Current Fan PWM (%): 0 - Fan Speed Range (rpm): [0, 3850] + Fan Target Speed (rpm): 1170 + Current Fan Speed (rpm): 1170 + Current Fan PWM (%): 35 + Fan Speed Range (rpm): [0, 3200] Fan PWM Range (%): [0, 100] ################################################## - Current GPU Loading (%): 93 - Current Memory Loading (%): 25 - Current Temps (C): {'mem': 31.0, 'edge': 33.0, 'junction': 39.0} - Critical Temp (C): 100.0 - Current Voltages (V): {'vddgfx': 1006} - Current Clk Frequencies (MHz): {'sclk': 1633.0, 'mclk': 1051.0} - Current SCLK P-State: [8, '1651Mhz'] - SCLK Range: ['808Mhz', '2200Mhz'] - Current MCLK P-State: [2, '1051Mhz'] - MCLK Range: ['801Mhz', '1200Mhz'] - Power Performance Mode: 5-COMPUTE - Power Force Performance Level: manual - -Card Number: 0 - Vendor: ASPEED - Readable: False - Writable: False - Compute: False - Card Model: ASPEED Technology, Inc. ASPEED Graphics Family (rev 41) - PCIe ID: c4:00.0 - Driver: ast - Card Path: /sys/class/drm/card0/device + Current GPU Loading (%): 50 + Current Memory Loading (%): 49 + Current GTT Memory Usage (%): 0.432 + Current GTT Memory Used (GB): 0.026 + Total GTT Memory (GB): 5.984 + Current VRAM Usage (%): 11.969 + Current VRAM Used (GB): 0.716 + Total VRAM (GB): 5.984 + Current Temps (C): {'edge': 54.0, 'junction': 61.0, 'mem': 68.0} + Critical Temps (C): {'edge': 118.0, 'junction': 99.0, 'mem': 99.0} + Current Voltages (V): {'vddgfx': 937} + Current Clk Frequencies (MHz): {'mclk': 875.0, 'sclk': 1780.0} + Current SCLK P-State: [2, '1780Mhz'] + SCLK Range: ['800Mhz', '1820Mhz'] + Current MCLK P-State: [3, '875Mhz'] + MCLK Range: ['625Mhz', '930Mhz'] + Power Profile Mode: 5-COMPUTE + Power DPM Force Performance Level: manual ``` If everything is working fine, you should see no warning or errors. The listing utility also has other command line options: + ``` -usage: amdgpu-ls [-h] [--about] [--table] [--pstates] [--ppm] [--clinfo] - [--no_fan] [-d] +usage: gpu-ls [-h] [--about] [--short] [--table] [--pstates] [--ppm] + [--clinfo] [--no_fan] [-d] optional arguments: -h, --help show this help message and exit --about README - --table Output table of basic GPU details + --short Short listing of basic GPU details + --table Current status of readable GPUs --pstates Output pstate tables instead of GPU details --ppm Output power/performance mode tables instead of GPU details --clinfo Include openCL with card details - --no_fan do not include fan setting options - -d, --debug Debug output + --no_fan Do not include fan setting options + -d, --debug Debug logger output ``` -The *--clinfo* option will make a call to clinfo, if it is installed, and list these parameters -along with the basic parameters. The benefit of running this in *amdgpu-ls* is that the tool +The *--clinfo* option will make a call to clinfo, if it is installed, and list openCL parameters +along with the basic parameters. The benefit of running this in *gpu-ls* is that the tool uses the PCIe slot id to associate clinfo results with the appropriate GPU in the listing. The *--pstates* and *--ppm* options will display the P-State definition table and the power performance mode table. + ``` -./amdgpu-ls --pstate --ppm +gpu-ls --pstate --ppm Detected GPUs: AMD: 1, ASPEED: 1 AMD: rocm version: 3.0.6 AMD: Wattman features enabled: 0xfffd7fff @@ -171,21 +241,26 @@ Card Number: 1 Card Model: Vega 20 - Card: /sys/class/drm/card1/device - Type: 2 + Card Path: /sys/class/drm/card1/device + GPU Frequency/Voltage Control Type: CurvePts + ################################################## + DPM States: SCLK: MCLK: - 0: 701Mhz 0: 351Mhz - 1: 809Mhz 1: 801Mhz - 2: 1085Mhz 2: 1051Mhz + 0: 701Mhz 0: 351Mhz + 1: 809Mhz 1: 801Mhz + 2: 1085Mhz 2: 1051Mhz 3: 1287Mhz 4: 1434Mhz 5: 1550Mhz 6: 1606Mhz 7: 1627Mhz 8: 1651Mhz + ################################################## + PP OD States: SCLK: MCLK: 0: 808Mhz - 1: 1650Mhz - 1: 1050Mhz - + ################################################## VDDC_CURVE: 0: ['808Mhz', '724mV'] 1: ['1304Mhz', '822mV'] @@ -205,70 +280,116 @@ -1: AUTO ``` +Different generations of cards will provide different information with the --ppm option. Here is an +example for AMD Ellesmere and Polaris cards: + +``` +gpu-ls --ppm +Detected GPUs: INTEL: 1, AMD: 2 +AMD: amdgpu version: 19.50-967956 +AMD: Wattman features enabled: 0xfffd7fff +3 total GPUs, 2 rw, 0 r-only, 0 w-only + +Card Number: 1 + Card Model: Advanced Micro Devices, Inc. [AMD/ATI] Ellesmere [Radeon RX 470/480/570/570X/580/580X/590] (rev ef) + Card Path: /sys/class/drm/card1/device + Power DPM Force Performance Level: manual + NUM MODE_NAME SCLK_UP_HYST SCLK_DOWN_HYST SCLK_ACTIVE_LEVEL MCLK_UP_HYST MCLK_DOWN_HYST MCLK_ACTIVE_LEVEL + 0 BOOTUP_DEFAULT: - - - - - - + 1 3D_FULL_SCREEN: 0 100 30 0 100 10 + 2 POWER_SAVING: 10 0 30 - - - + 3 VIDEO: - - - 10 16 31 + 4 VR: 0 11 50 0 100 10 + 5 COMPUTE *: 0 5 30 0 100 10 + 6 CUSTOM: - - - - - - +``` + ## GPU Type Dependent Behavior -AMD GPU's compatible with the amdgpu open source drivers are of three different types in terms of how frequency/voltage -is managed. GPUs of Vega10 and earlier architecture rely on the definition of specific power states to determine -the clock frequency and voltage. The GPU will operate only at the specific Frequency/Voltage states that are defined, -and move between states based on power, temperature, and loading. These GPU's are of type 1, if the P-state table -is readable and type 0 if it is not. For GPUs of Vega20 architecture or newer, it appears that Voltage/Frequency curves -are defined with three points on a Voltage vs. Frequency curve. These GPU's are classified as type 2. -With the *amdgpu-ls* tool, you can determine if your card is of type 1 or 2. Here are the relevant lines from the -output for and RX Vega64 GPU and the Radeon VII: +GPU capability and compatibility varies over the various vendors and generations of hardware. In +order to manage this variability, rickslab-gpu-utils must classify each installed GPU by its vendor +and type. So far, valid types are as follows: + +* **Undefined** - This is the default assigned type, before a valid type can be determined. +* **Unsupported** - This is the type assigned for cards which have no capability of reading beyond basic parameters typical of PCIe devices. +* **Supported** - This is the type assigned for basic readability, including *nvidia-smi* readabile GPUs. +* **Legacy** - Applies to legacy AMD GPUs with very basic parameters available to read. (pre-HD7) +* **APU** - Applies to AMD integrated graphics with limited parameters available. (Carizzo - Renoir) +* **PStatesNE** - Applies to AMD GPUs with most parameters available, but Pstates not writeable. (HD7 series) +* **PStates** - Applies to modern AMD GPUs with writeable Pstates. (R9 series thr RX-Vega) +* **CurvePts** - Applies to latest generation AMD GPUs that use AVFS curves instead of Pstates. (Vega20 and newer) + +With the *gpu-ls* tool, you can determine the type of your installed GPUs. Here are examples of +relevant lines from the output for different types of GPUs: + ``` +Decoded Device ID: 8th Gen Core Processor Gaussian Mixture Model +GPU Type: Unsupported # Intel CPU with integrated graphics + +Decoded Device ID: GM107 [GeForce GTX 750] +GPU Type: Supported + Decoded Device ID: R9 290X DirectCU II -GPU Frequency/Voltage Control Type: 0 +GPU Type: PStatesNE Decoded Device ID: RX Vega64 -GPU Frequency/Voltage Control Type: 1 +GPU Type: PStates Decoded Device ID: Radeon VII -GPU Frequency/Voltage Control Type: 2 +GPU Type: CurvePts + +Decoded Device ID: Radeon RX 5600 XT +GPU Type: CurvePts ``` -Monitor and Control utilities will differ between the three types. -* For type 0, you can monitor the P-state details with monitor utilities, but you can NOT define P-states or set -P-state masks. -* For type 1, you can monitor the P-state details with monitor utilities, and you can define P-states and set -P-state masks. -* For Type 2, you can monitor current Clocks frequency and P-states, with latest amdgpu drivers. The SCLK and MCLK -curve end points can be controlled, which has the effect of limiting the frequency range, similar to P-state masking -for Type 1 cards. The option of p-state masking is also available for Type 2 cards. You are also able to modify the -three points that define the Vddc-SCLK curve. I have not attempted to OC the card yet, but I assume redefining the -3rd point would be the best approach. For underclocking, lowering the SCLK end point is effective. I don't see a -curve defined for memory clock on the Radeon VII, so setting memory clock vs. voltage doesn't seem possible at this -time. There also appears to be an inconsistency in the defined voltage ranges for curve points and actual default -settings. +Monitor and Control utilities will differ between these types: + +* For **Undefined** and **Unsupported** types, only generic PCIe parameters are available. These types are +considered unreadable, unwritable, and as having no compute capability. +* For **Supported** types have the most basic level of readability. This includes NV cards with nvidia-smi support. +* For **Legacy** and **APU**, only basic and limited respectively are readable. +* For **Pstates** and **PstatesNE** type GPUs, pstate details are readable, but for **PstatesNE** they are not +writable. For type **Pstates** pstate Voltages/Frequencies as well as pstate masking can be specified. +* The **CurvePts** type applies to modern (Vega20 and later) AMD GPUs that use AVFS instead of Pstates for +performance control. These have the highest degree of read/write capability. The SCLK and MCLK curve end points +can be controlled, which has the effect of over/under clocking/voltage. You are also able to modify the three points that define the Vddc-SCLK curve. I have not attempted to OC the card yet, but I assume redefining the 3rd point would be the best approach. For underclocking, lowering the SCLK end point is effective. I don't see a curve defined for memory clock on the Radeon VII, so setting memory clock vs. voltage doesn't seem possible at this time. There also appears to be an inconsistency in the defined voltage ranges for curve points and actual default settings. Below is a plot of what I extracted for the Frequency vs Voltage curves of the RX Vega64 and the Radeon VII. ![](Type1vsType2.png) -## Using amdgpu-monitor -By default, *amdgpu-monitor* will display a text based table in the current terminal window that updates +## Using gpu-mon + +By default, *gpu-mon* will display a text based table in the current terminal window that updates every sleep duration, in seconds, as defined by *--sleep N* or 2 seconds by default. If you are using -water cooling, you can use the *--no_fans* to remove fan functionality. +water cooling, you can use the *--no_fans* to remove fan monitoring functionality. + ``` ┌─────────────┬────────────────┬────────────────┐ -│Card # │card1 │card0 │ +│Card # │card1 │card2 │ ├─────────────┼────────────────┼────────────────┤ -│Model │RX Vega64 │Vega 20 Radeon │ -│Load % │99 │93 │ -│Power (W) │60.0 │138.0 │ -│Power Cap (W)│140.0 │140.0 │ -│Energy (kWh) │1e-06 │3e-06 │ -│T (C) │30.0 │47.0 │ -│VddGFX (mV) │1037 │1062 │ -│Fan Spd (%) │0 │93 │ -│Sclk (MHz) │1536Mhz │ │ -│Sclk Pstate │6 │-1 │ -│Mclk (MHz) │945Mhz │ │ -│Mclk Pstate │3 │-1 │ -│Perf Mode │4-COMPUTE │4-COMPUTE │ +│Model │GeForce GTX 750 │Radeon RX 5600 X│ +│GPU Load % │100 │91 │ +│Mem Load % │36 │68 │ +│VRAM Usage % │89.297 │11.969 │ +│GTT Usage % │None │0.432 │ +│Power (W) │15.69 │92.0 │ +│Power Cap (W)│38.50 │160.0 │ +│Energy (kWh) │0.0 │0.002 │ +│T (C) │48.0 │61.0 │ +│VddGFX (mV) │nan │925 │ +│Fan Spd (%) │40.0 │36 │ +│Sclk (MHz) │1163 │1780 │ +│Sclk Pstate │0 │2 │ +│Mclk (MHz) │2505 │875 │ +│Mclk Pstate │0 │3 │ +│Perf Mode │[Not Supported] │5-COMPUTE │ └─────────────┴────────────────┴────────────────┘ ``` + The fields are the same as the GUI version of the display, available with the *--gui* option. -![](amdgpu-monitor_scrshot.png) + +![](gpu-monitor-gui_scrshot.png) The first row gives the card number for each GPU. This number is the integer used by the driver for each GPU. Most fields are self describing. The Power Cap field is especially useful in managing compute power efficiency, and @@ -277,58 +398,62 @@ Note that total card power usage may be more than reported GPU power usage. Energy is calculated as the product of the latest power reading and the elapsed time since the last power reading. -You will notice no clock frequencies or valid P-states for the Vega 20 card. This is because of a limitation in -the first drivers that supported Vega 20 which have a change in the way frequency vs voltage is managed. In later -version of the drivers, actual clock frequency and P-states are readable. The P-state table for Vega 20 is a -definition of frequency vs. voltage curves, so setting P-states to control the GPU is no longer relevant, but -these concepts are used in reading current states. +The P-states in the table for **CurvePts** type GPU are an indication of frequency vs. voltage curves. +Setting P-states to control the GPU is no longer relevant for this type, but these concepts are used in +reading current states. -The Perf Mode field gives the current power performance mode, which may be modified in with *amdgpu-pac*. These +The Perf Mode field gives the current power performance mode, which may be modified in with *gpu-pac*. These modes affect the how frequency and voltage are managed versus loading. This is a very important parameter when managing compute performance. -Executing *amdgpu-monitor* with the *--plot* option will display a continuously updating plot of the critical +Executing *gpu-mon* with the *--plot* option will display a continuously updating plot of the critical GPU parameters. -![](amdgpu-plot_scrshot.png) +![](gpu-plot_scrshot.png) -Having an *amdgpu-monitor* Gtx window open at startup may be useful if you run GPU compute projects that autostart -and you need to quickly confirm that *amdgpu-pac* bash scripts ran as expected at startup -(see [Using amdgpu-pac](#using-amdgpu-pac)). You can have *amdgpu-monitor --gui* automatically launch at startup -or upon reboot by using the startup utility for your system. In Ubuntu, for example, open *Startup Applications +Having an *gpu-mon* Gtx window open at startup may be useful if you run GPU compute projects that autostart +and you need to quickly confirm that *gpu-pac* bash scripts ran as expected at startup +(see [Using gpu-pac](#using-gpu-pac)). You can have *gpu-mon --gui* automatically launch at startup +or upon reboot by using the startup utility for your distribution. In Ubuntu, for example, open *Startup Applications Preferences* app, then in the Preferences window select *Add* and use something like this in the command field: + ``` -/usr/bin/python3 /home//Desktop/amdgpu-utils/amdgpu-monitor --gui +/usr/bin/python3 /home//Desktop/rickslab-gpu-utils/gpu-mon --gui ``` -where `/amdgpu-utils` may be a soft link to your current distribution directory. This startup approach does not -work for the default Terminal text execution of *amdgpu-monitor*. -## Using amdgpu-plot -In addition to being called from *amdgpu-monitor* with the *--plot* option, *amdgpu-plot* may be ran as a standalone -utility. Just execute *amdgpu-plot --sleep N* and the plot will update at the defined interval. It is not +where `/rickslab-gpu-utils` may be a soft link to your current distribution directory. This startup approach does not +work for the default Terminal text execution of *gpu-mon*. + +## Using gpu-plot + +In addition to being called from *gpu-mon* with the *--plot* option, *gpu-plot* may be ran as a standalone +utility. Just execute *gpu-plot --sleep N* and the plot will update at the defined interval. It is not recommended to run both the monitor with an independently executed plot, as it will result in twice as many reads from the driver files. Once the plots are displayed, individual items on the plot can be toggled by selecting the named button on the plot display. -The *--stdin* option is used by *amdgpu-monitor --plot* in its execution of *amdgpu-plot*. This option along -with *--simlog* option can be used to simulate a plot output using a log file generated by *amdgpu-monitor --log*. +The *--stdin* option is used by *gpu-mon --plot* in its execution of *gpu-plot*. This option along +with *--simlog* option can be used to simulate a plot output using a log file generated by *gpu-mon --log*. I use this feature when troubleshooting problems from other users, but it may also be useful in benchmarking performance. An example of the command line for this is as follows: + ``` -cat log_monitor_0421_081038.txt | ./amdgpu-plot --stdin --simlog +cat log_monitor_0421_081038.txt | gpu-plot --stdin --simlog ``` -## Using amdgpu-pac -By default, *amdgpu-pac* will open a Gtk based GUI to allow the user to modify GPU performance parameters. I strongly +## Using gpu-pac + +By default, *gpu-pac* will open a Gtk based GUI to allow the user to modify GPU performance parameters. I strongly suggest that you completely understand the implications of changing any of the performance settings before you use this utility. As per the terms of the GNU General Public License that covers this project, there is no warranty on the usability of these tools. Any use of this tool is at your own risk. To help you manage the risk in using this tool, two modes are provided to modify GPU parameters. By default, a bash file is created that you can review and execute to implement the desired changes. Here is an example of that file: + ``` #!/bin/sh ########################################################################### -## amdgpu-pac generated script to modify GPU configuration/settings +## rickslab-gpu-pac generated script to modify GPU configuration/settings ########################################################################### ########################################################################### @@ -384,24 +509,22 @@ sudo sh -c "echo '0 1 2' > /sys/class/drm/card1/device/pp_dpm_mclk" ``` -When you execute *amdgpu-pac*, you will notice a message bar at the bottom of the interface. By default, it informs +When you execute *gpu-pac*, you will notice a message bar at the bottom of the interface. By default, it informs you of the mode you are running in. By default, the operation mode is to create a bash file, but with the *--execute_pac* (or *--execute*) command line option, the bash file will be automatically executed and then deleted. -The message bar will indicate this status. Because the driver files are writable only by root, the commands to write -configuration settings are executed with sudo. The message bar will display in red when credentials are pending. Once -executed, a yellow message will remind you to check the state of the gpu with *amdgpu-monitor*. I suggest to using -the monitor routine when executing pac to see the changes in real-time. +The message bar will indicate this status. Because the driver files are writable only by root, the commands to +write configuration settings are executed with sudo. The message bar will display in red when credentials are +pending. Once executed, a yellow message will remind you to check the state of the gpu with *gpu-mon*. I +suggest using the monitor routine when executing pac to see and confirm the changes in real-time. The command line option *--force_write* will result in all configuration parameters to be written to the bash file. The default behavior since v2.4.0 is to write only changes. The *--force_write* is useful for creating a bash file that can be execute to set your cards to a known state. As an example, you could use such a file to configure your -GPUs on boot up (see [Setting GPU Automatically at Startup](#setting-gpu-automatically-at-startup)). +GPUs on boot up (see [Running Startup PAC Bash Files](#running-startup-pac-bash-files)). -### The amdgpu-pac interface for Type 1 cards -![](amdgpu-pac_type1.png) +### The gpu-pac interface for Type PStates and Type CurvePts cards -### The amdgpu-pac interface for Type 2 cards -![](amdgpu-pac_type2.png) +![](gpu-pac_scrshot.png) In the interface, you will notice entry fields for indicating new values for specific parameters. In most cases, the values in these fields will be the current values, but in the case of P-state masks, it will show the default value @@ -413,84 +536,74 @@ settings to determine how it works with your card. I recommend running these experimental settings when the GPU is not under load. If you know the cause of the differences between entered and final fan PWM values, let me know. -Changes made with *amdgpu-pac* do not persist through a system reboot. To reestablish desired GPU settings after a -reboot, either re-enter them using *amdgpu-pac* or *amdgpu-pac --execute*, or execute a previously saved bash file. -*Amdgpu-pac* bash files must retain their originally assigned file name to run properly. -See [Setting GPU Automatically at Startup](#setting-gpu-automatically-at-startup) for how to run PAC bash +Changes made with *gpu-pac* do not persist through a system reboot. To reestablish desired GPU settings after a +reboot, either re-enter them using *gpu-pac* or *gpu-pac --execute*, or execute a previously saved bash file. +*gpu-pac* bash files must retain their originally assigned file name to run properly. +See [Running Startup PAC Bash Files](#running-startup-pac-bash-files) for how to run PAC bash scripts automatically at system startup. -For Type 1 cards, while changes to power caps and fan speeds can be made while the GPU is under load, for -*amdgpu-pac* to work properly, other changes may require that the GPU not be under load, *i.e.*, that sclk +For Type **Pstates** cards, while changes to power caps and fan speeds can be made while the GPU is under load, for +*gpu-pac* to work properly, other changes may require that the GPU not be under load, *i.e.*, that sclk P-state and mclk P-state are 0. Possible consequences with making changes under load is that the GPU become stuck in a 0 P-state or that the entire system becomes slow to respond, where a reboot will be needed to restore full GPU functions. Note that when you change a P-state mask, default mask values will reappear in the field -after Save, but your specified changes will have been implemented on the card and show up in *amdgpu-monitor*. -Some changes may not persist when a card has a connected display. +after Save, but your specified changes will have been implemented on the card and show up in *gpu-mon*. +Some changes may not persist when a card has a connected display. When changing P-state MHz or mV, the desired +P-state mask, if different from default (no masking), will have to be re-entered for clock or voltage changes to +be applied. Again, save PAC changes to clocks, voltages, or masks only when the GPU is at resting state (state 0). + +For Type **CurvePts** cards, although changes to P-state masks cannot be made through *gpu-pac*, changes to all +other fields can be made on-the-fly while the card is under load. Some basic error checking is done before writing, but I suggest you be very certain of all entries before you save -changes to the GPU. +changes to the GPU. You should always confirm your changes with *gpu-mon*. + +## Updating the PCI ID decode file + +In determining the GPU display name, *rickslab-gpu-utils* will examine two sources. The output of +`lspci -k -s nn:nn.n` is used to generate a complete name and an algorithm is used to generate a shortened +version. From the driver files, a set of files (vendor, device, subsystem_vendor, subsystem_device) contain +4 parts of the Device ID are read and used to extract a GPU model name from system pci.ids file which is +sourced from [https://pci-ids.ucw.cz/](https://pci-ids.ucw.cz/) where a comprehensive list is maintained. The +system file can be updated from the original source with the command: -## Updating the PCI ID decode file -In determining the GPU display name, *amdgpu-utils* will examine 2 sources. The output of *lspci -k -s nn:nn.n* is -used to generate a complete name and an algorithm is used to generate a shortened version. From the driver files, a -set of files (vendor, device, subsystem_vendor, subsystem_device) contain a 4 parts of the Device ID are read and used -to extract a GPU model name from system pci.ids file which is sourced from -[https://pci-ids.ucw.cz/](https://pci-ids.ucw.cz/) where a comprehensive list is maintained. The system file can -be updated from the original source with the command: ``` sudo update-pciids ``` + If your GPU is not listed in the extract, the pci.id website has an interface to allow the user to request an addition to the master list. ## Optimizing Compute Performance-Power -The *amdgpu-utils* tools can be used to optimize performance vs. power for compute workloads by leveraging + +The *rickslab-gpu-utils* tools can be used to optimize performance vs. power for compute workloads by leveraging its ability to measure power and control relevant GPU settings. This flexibility allows one to execute a DOE to measure the effect of GPU settings on the performance in executing specific workloads. In SETI@Home performance, the Energy feature has also been built into [benchMT](https://github.com/Ricks-Lab/benchMT) to benchmark power and execution times for various work units. This, combined with the log file produced with -*amdgpu-monitor --gui --log*, may be useful in optimizing performance. +*gpu-mon --gui --log*, may be useful in optimizing performance. ![](https://i.imgur.com/YPuDez2l.png) -## Setting GPU Automatically at Startup -If you set your system to run *amdgpu-pac* bash scripts automatically, as described in this section, note that +## Running Startup PAC Bash Files + +If you set your system to run *gpu-pac* bash scripts automatically, as described in this section, note that changes in your hardware or graphic drivers may cause potentially serious problems with GPU settings unless new -PAC bash files are generated following the changes. Review the [Using amdgpu-pac](#using-amdgpu-pac) section +PAC bash files are generated following the changes. Review the [Using gpu-pac](#using-gpu-pac) section before proceeding. -One approach to automatically execute a saved PAC bash file at startup or reboot is to run it as a cron job. To -do this, first create a PAC bash file of your optimized GPU settings using the *amdgpu-pac --force-write* option. -The executable file will be named *pac_writer_[string-of-characters].sh* and will be created in your current -amdgpu-utils directory. A separate file is needed for each GPU card. Copy the file(s) to a convenient directory, -without renaming or changing attributes. (If you leave it in the amdgpu-utils directory, then it may be lost with -the next distribution update.) In the example here, two bash files were copied to a new directory, `/etc/cron.custom`. -Now open crontab, the table that drives cron, using the command `~$ sudo crontab -e`. This will open crontab -in your default terminal text editor. (You may be prompted to choose an editor like *nano* or *vi*.) Add a line -like this, including an explicit path for each card's bash file: -``` -@reboot /etc/cron.custom/pac_writer_[string for 1st card].sh -@reboot /etc/cron.custom/pac_writer_[string for 2nd card].sh -``` -then save and exit. The next time you reboot, or the system restarts after a power outage, your GPU card(s) will -be ready to run with optimized settings. Because some PAC parameters can't be changed when a card is under load, -you will want to make sure that the PAC bash script executes before the card begins computing. For example, if you -have a *boinc-client* that automatically runs on startup, then consider delaying it for 30 seconds using the -cc_config.xml option *30*. - -Another approach, perhaps a more reliable one, is to execute PAC bash scripts as a systemd startup service. As -with setting up files for crontab, from *amdgpu-pac --force_write* set your optimal configurations for each GPU, -then Save All. Change ownership to root of each card's bash file: `sudo chown root pac_writer*.sh` - -For each bash file, create a symlink (soft link) that corresponds to the card number referenced in each linked -bash file, using simple descriptive names, *e.g.*, pac_writer_card1, pac_writer_card2, *etc.*. These links are -optional, but can make management of new startup bash files easier. Links are used in the startup service example, -below. Don't forget to reform the link(s) each time a new PAC bash file is written for a card. +One approach is to execute PAC bash scripts as a systemd startup service. From *gpu-pac --force_write*, set your optimal configurations for each GPU, then Save All. You may need to change ownership to root of each card's bash file: `sudo chown root pac_writer*.sh` + +For each bash file, you could create a symlink (soft link) that corresponds to the card number referenced in each +linked bash file, using simple descriptive names, *e.g.*, pac_writer_card1, pac_writer_card2, *etc.*. These links are +optional, but can make management of new or edited startup bash files easier. Links are used in the startup service +example, below. Don't forget to reform the link(s) each time a new PAC bash file is written for a card. -Next, create a .service file named something like, amdgpu-pac-startup.service and give it the following content: +Next, create a .service file named something like, gpu-pac-startup.service and give it the following content: + ``` [Unit] -Description=run at boot amdgpu-utils PAC bash scripts +Description=run at boot rickslab-gpu-utils PAC bash scripts [Service] Type=oneshot @@ -502,30 +615,52 @@ [Install] WantedBy=multi-user.target ``` + The Type=oneshot service allows use of more than one ExecStart. In this example, three bash files are used for two cards, where two alternative files are used for one card that the system may recognize as either card0 or card1; see further below for an explanation. Once your .service file is set up, execute the following commands: + ``` - ~$ sudo chown root:root amdgpu-pac-startup.service - ~$ sudo mv amdgpu-pac-startup.service /etc/systemd/system/ - ~$ sudo chmod 664 /etc/systemd/system/amdgpu-pac-startup.service - ~$ sudo systemctl daemon-reload - ~$ sudo systemctl enable amdgpu-pac-startup.service +sudo chown root:root gpu-pac-startup.service +sudo mv gpu-pac-startup.service /etc/systemd/system/ +sudo chmod 664 /etc/systemd/system/gpu-pac-startup.service +sudo systemctl daemon-reload +sudo systemctl enable gpu-pac-startup.service ``` + The last command should produce a terminal stdout like this: -`Created symlink /etc/systemd/system/multi-user.target.wants/amdgpu-pac-startup.service → /etc/systemd/system/amdgpu-pac-startup.service.` +`Created symlink /etc/systemd/system/multi-user.target.wants/gpu-pac-startup.service → /etc/systemd/system/gpu-pac-startup.service.` On the next reboot or restart, the GPU(s) will be set with the PAC run parameters. If you want to test the bash -script(s) before rebooting, run: `~$ sudo systemctl start amdgpu-pac-startup.service`. +script(s) before rebooting, run: `~$ sudo systemctl start gpu-pac-startup.service`. + +If you have a Type PStates card where some PAC parameters can't be changed when it is under load, you will want +to make sure that the PAC bash script executes before the card begins computing. If you have a *boinc-client* that +automatically runs on startup, for example, then consider delaying it for 20 seconds using the cc_config.xml +option *30*. -One or more of your cards' numbers that are assigned by amdgpu drivers may change following a system or driver +One or more card numbers that are assigned by amdgpu drivers may change following a system or driver update and restart. With subsequent updates or restarts, a card can switch back to its original number. When a switch occurs, the bash file written for a previous card number will still be read at startup, but will have no -effect, causing the renumbered card to run at its default settings. To deal with this possibility, whether using -crontab or systemd, you can create an alternative PAC bash file after a renumbering event and add these alternative -files in your crontab or systemd service. You will probably just need two alternative bash files for a card that is -subject to amdgpu reindexing. A card's number is shown by *amdgpu-ls* and also appears in *amdgpu-monitor* and -*amdgpu-plot*. Card reindexing does not affect a card's PCI ID number, which corresponds to its PCIe slot number -on the motherboard. PCI IDs are listed by *amdgpu-ls*. If you know what causes GPU card index switching, let me know. +effect, causing the renumbered card to run at its default settings. To deal with this possibility, you can create +an alternative PAC bash file after a renumbering event and add these alternative files in your systemd service. +You will probably just need two alternative bash files for a card that is subject to reindexing. A card's +number is shown by *gpu-ls* and also appears in *gpu-mon* and *gpu-plot*. A card's PCI IDs is listed +by *gpu-ls*. If you know what causes GPU card index switching, let me know. + +You may find a card running at startup with default power limits and Fan PWM settings instead of what is prescribed +in its startup PAC bash file. If so, it may be that the card's hwmon# is different from what is hard coded in the +bash file, because the hwmon index for devices can also change upon reboot. To work around this, you can edit a +card's bash file to define hwmon# as a variable and modify the hwmon lines to use it. Here is an example for card1: + +``` +set -x +HWMON=$(ls /sys/class/drm/card1/device/hwmon/) +# Powercap Old: 120 New: 110 Min: 0 Max: 180 +sudo sh -c "echo '1100000000' > /sys/class/drm/card1/device/hwmon/$HWMON/power1_cap" +# Fan PWM Old: 44 New: 47 Min: 0 Max: 100 +sudo sh -c "echo '1' > /sys/class/drm/card1/device/hwmon/$HWMON/pwm1_enable" +sudo sh -c "echo '119' > /sys/class/drm/card1/device/hwmon/$HWMON/pwm1" +``` diff -Nru ricks-amdgpu-utils-3.0.0/gpu-chk ricks-amdgpu-utils-3.5.0/gpu-chk --- ricks-amdgpu-utils-3.0.0/gpu-chk 1970-01-01 00:00:00.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/gpu-chk 2020-07-06 00:57:49.000000000 +0000 @@ -0,0 +1,259 @@ +#!/usr/bin/python3 +""" gpu-chk - Checks OS/Python compatibility + + This utility verifies if the environment is compatible with *rickslab-gpu-utils*. + + Copyright (C) 2019 RicksLab + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +""" +__author__ = 'RueiKe' +__copyright__ = 'Copyright (C) 2019 RicksLab' +__credits__ = ['Craig Echt - Testing, Debug, Verification, and Documentation'] +__license__ = 'GNU General Public License' +__program_name__ = 'gpu-chk' +__maintainer__ = 'RueiKe' +__docformat__ = 'reStructuredText' +# pylint: disable=multiple-statements +# pylint: disable=line-too-long +# pylint: disable=bad-continuation + +import argparse +import re +import subprocess +import os +import shlex +import platform +import sys +import shutil +import warnings +from GPUmodules import __version__, __status__ + +warnings.filterwarnings('ignore') +ENV_DIR = 'rickslab-gpu-utils-env' + + +class GutConst: + """ + Base object for chk util. These are simplified versions of what are in env module designed to run in python2 + in order to detect setup issues even if wrong version of python. + """ + _verified_distros = ['Debian', 'Ubuntu', 'Gentoo', 'Arch'] + _dpkg_tool = {'Debian': 'dpkg', 'Ubuntu': 'dpkg', 'Arch': 'pacman', 'Gentoo': 'portage'} + + def __init__(self): + self.DEBUG = False + + def check_env(self) -> list: + """ + Checks python version, kernel version, distro, and amd gpu driver version. + + :return: A list of 4 integers representing status of 3 check items. + """ + ret_val = [0, 0, 0, 0] + # Check python version + required_pversion = [3, 6] + (python_major, python_minor, python_patch) = platform.python_version_tuple() + print('Using python ' + python_major + '.' + python_minor + '.' + python_patch) + if int(python_major) < required_pversion[0]: + print(' ' + '\x1b[1;37;41m' + ' but amdgpu-utils requires python ' + + str(required_pversion[0]) + '.' + str(required_pversion[1]) + ' or newer.' + '\x1b[0m') + ret_val[0] = -1 + elif int(python_major) == required_pversion[0] and int(python_minor) < required_pversion[1]: + print(' ' + '\x1b[1;37;41m' + ' but amdgpu-utils requires python ' + + str(required_pversion[0]) + '.' + str(required_pversion[1]) + ' or newer.' + '\x1b[0m') + ret_val[0] = -1 + else: + print(' ' + '\x1b[1;37;42m' + ' Python version OK. ' + '\x1b[0m') + ret_val[0] = 0 + + # Check Linux Kernel version + required_kversion = [4, 8] + linux_version = platform.release() + print('Using Linux Kernel ' + str(linux_version)) + if int(linux_version.split('.')[0]) < required_kversion[0]: + print(' ' + '\x1b[1;37;41m' + ' but amdgpu-util requires ' + + str(required_kversion[0]) + '.' + str(required_kversion[1]) + ' or newer.' + '\x1b[0m') + ret_val[1] = -2 + elif int(linux_version.split('.')[0]) == required_kversion[0] and \ + int(linux_version.split('.')[1]) < required_kversion[1]: + print(' ' + '\x1b[1;37;41m' + ' but amdgpu-util requires ' + + str(required_kversion[0]) + '.' + str(required_kversion[1]) + ' or newer.' + '\x1b[0m') + ret_val[1] = -2 + else: + print(' ' + '\x1b[1;37;42m' + ' OS kernel OK. ' + '\x1b[0m') + ret_val[1] = 0 + + # Check Linux Distribution + cmd_lsb_release = shutil.which('lsb_release') + print('Using Linux distribution: ', end='') + if cmd_lsb_release: + distributor = description = None + lsbr_out = subprocess.check_output(shlex.split('{} -a'.format(cmd_lsb_release)), + shell=False, stderr=subprocess.DEVNULL).decode().split('\n') + for lsbr_line in lsbr_out: + if re.search('Distributor ID', lsbr_line): + lsbr_item = re.sub(r'Distributor ID:[\s]*', '', lsbr_line) + distributor = lsbr_item.strip() + if re.search('Description', lsbr_line): + lsbr_item = re.sub(r'Description:[\s]*', '', lsbr_line) + if self.DEBUG: print('Distro Description: {}'.format(lsbr_item)) + description = lsbr_item.strip() + + if distributor: + print(description) + if distributor in GutConst._verified_distros: + print(' ' + '\x1b[1;37;42m' + ' Distro has been Validated. ' + '\x1b[0m') + ret_val[2] = 0 + else: + print(' ' + '\x1b[1;30;43m' + ' Distro has not been verified. ' + '\x1b[0m') + ret_val[2] = 0 + else: + print('unknown') + print(' ' + '\x1b[1;30;43m' + '[lsb_release] executable not found.' + '\x1b[0m') + ret_val[2] = 0 + + # Check for amdgpu driver + ret_val[3] = 0 if self.read_amd_driver_version() else 0 # Ignore False + return ret_val + + def read_amd_driver_version(self) -> bool: + """ + Read the AMD driver version and store in GutConst object. + + :return: True if successful + """ + cmd_dpkg = shutil.which('dpkg') + if not cmd_dpkg: + print('Command dpkg not found. Can not determine amdgpu version.') + print(' ' + '\x1b[1;30;43m' + ' gpu-utils can still be used. ' + '\x1b[0m') + return True + for pkgname in ['amdgpu', 'amdgpu-core', 'amdgpu-pro', 'rocm-utils']: + try: + dpkg_out = subprocess.check_output(shlex.split(cmd_dpkg + ' -l ' + pkgname), + shell=False, stderr=subprocess.DEVNULL).decode().split('\n') + except (subprocess.CalledProcessError, OSError): + continue + for dpkg_line in dpkg_out: + for driverpkg in ['amdgpu', 'rocm']: + if re.search(driverpkg, dpkg_line): + if self.DEBUG: print('Debug: ' + dpkg_line) + dpkg_items = dpkg_line.split() + if len(dpkg_items) > 2: + if re.fullmatch(r'.*none.*', dpkg_items[2]): continue + print('AMD: ' + driverpkg + ' version: ' + dpkg_items[2]) + print(' ' + '\x1b[1;37;42m' + ' AMD driver OK. ' + '\x1b[0m') + return True + print('amdgpu/rocm version: UNKNOWN') + print(' ' + '\x1b[1;30;43m' + ' gpu-utils can still be used. ' + '\x1b[0m') + return False + + +GUT_CONST = GutConst() + + +def is_venv_installed() -> bool: + """ + Check if a venv is being used. + + :return: True if using venv + """ + cmdstr = 'python3 -m venv -h > /dev/null' + try: + p = subprocess.Popen(shlex.split(cmdstr), shell=False, stdin=subprocess.PIPE, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + except (subprocess.CalledProcessError, OSError): + pass + else: + output, _error = p.communicate() + if not re.fullmatch(r'.*No module named.*', output.decode()): + print('python3 venv is installed') + print(' ' + '\x1b[1;37;42m' + ' python3-venv OK. ' + '\x1b[0m') + return True + print('python3 venv is NOT installed') + print(' ' + '\x1b[1;30;43m' + ' Python3 venv package \'python3-venv\' package is recommended. ' + + 'for developers' + '\x1b[0m') + return False + + +def does_amdgpu_utils_env_exist() -> bool: + """ + Check if venv exists. + + :return: Return True if venv exists. + """ + env_name = './' + ENV_DIR + '/bin/activate' + + if os.path.isfile(env_name): + print(ENV_DIR + ' available') + print(' ' + '\x1b[1;37;42m ' + ENV_DIR + ' OK. ' + '\x1b[0m') + return True + print(ENV_DIR + ' is NOT available') + print(' ' + '\x1b[1;30;43m ' + ENV_DIR + ' can be configured per User Guide. ' + '\x1b[0m') + return False + + +def is_in_venv() -> bool: + """ + Check if execution is from within a venv. + + :return: True if in venv + """ + python_path = shutil.which('python3') + if not python_path: + print('Maybe python version compatibility issue.') + else: + if re.search(ENV_DIR, python_path): + print('In ' + ENV_DIR) + print(' ' + '\x1b[1;37;42m ' + ENV_DIR + ' is activated. ' + '\x1b[0m') + return True + print('Not in ' + ENV_DIR + ' (Only needed if you want to duplicate dev env)') + print(' ' + '\x1b[1;30;43m' + ENV_DIR + ' can be activated per User Guide. ' + '\x1b[0m') + return False + + +def main() -> None: + """ + Main flow for chk utility. + """ + parser = argparse.ArgumentParser() + parser.add_argument('--about', help='README', action='store_true', default=False) + args = parser.parse_args() + + # About me + if args.about: + print(__doc__) + print('Author: ', __author__) + print('Copyright: ', __copyright__) + print('Credits: ', *['\n {}'.format(item) for item in __credits__]) + print('License: ', __license__) + print('Version: ', __version__) + print('Maintainer: ', __maintainer__) + print('Status: ', __status__) + sys.exit(0) + + if GUT_CONST.check_env() != [0, 0, 0, 0]: + print('Error in environment. Exiting...') + sys.exit(-1) + + if not is_venv_installed() or not does_amdgpu_utils_env_exist(): + print('Virtual Environment not configured. Only required by developers.') + + if not is_in_venv(): + pass + print('') + + +if __name__ == '__main__': + main() diff -Nru ricks-amdgpu-utils-3.0.0/gpu-ls ricks-amdgpu-utils-3.5.0/gpu-ls --- ricks-amdgpu-utils-3.0.0/gpu-ls 1970-01-01 00:00:00.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/gpu-ls 2020-07-06 00:57:49.000000000 +0000 @@ -0,0 +1,150 @@ +#!/usr/bin/python3 +""" gpu-ls - Displays details about installed and compatible GPUs + + This utility displays most relevant parameters for installed and compatible GPUs. The + default behavior is to list relevant parameters by GPU. OpenCL platform information is + added when the *--clinfo* option is used. A brief listing of key parameters is available + with the *--short* command line option. A simplified table of current GPU state is + displayed with the *--table* option. The *--no_fan* can be used to ignore fan settings. The + *--pstate* option can be used to output the p-state table for each GPU instead of the list + of basic parameters. The *--ppm* option is used to output the table of available + power/performance modes instead of basic parameters. + + Copyright (C) 2019 RicksLab + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +""" +__author__ = 'RueiKe' +__copyright__ = 'Copyright (C) 2019 RicksLab' +__credits__ = ['Craig Echt - Testing, Debug, Verification, and Documentation', + 'Keith Myers - Testing, Debug, Verification of NV Capability'] +__license__ = 'GNU General Public License' +__program_name__ = 'gpu-ls' +__maintainer__ = 'RueiKe' +__docformat__ = 'reStructuredText' +# pylint: disable=multiple-statements +# pylint: disable=line-too-long +# pylint: disable=bad-continuation + +import argparse +import sys +import logging +from GPUmodules import __version__, __status__ +from GPUmodules import GPUmodule as Gpu +from GPUmodules import env + +LOGGER = logging.getLogger('gpu-utils') + + +def main() -> None: + """ + Main flow for gpu-ls. + """ + parser = argparse.ArgumentParser() + parser.add_argument('--about', help='README', + action='store_true', default=False) + parser.add_argument('--short', help='Short listing of basic GPU details', + action='store_true', default=False) + parser.add_argument('--table', help='Current status of readable GPUs', + action='store_true', default=False) + parser.add_argument('--pstates', help='Output pstate tables instead of GPU details', + action='store_true', default=False) + parser.add_argument('--ppm', help='Output power/performance mode tables instead of GPU details', + action='store_true', default=False) + parser.add_argument('--clinfo', help='Include openCL with card details', + action='store_true', default=False) + parser.add_argument('--no_fan', help='Do not include fan setting options', + action='store_true', default=False) + parser.add_argument('-d', '--debug', help='Debug logger output', + action='store_true', default=False) + args = parser.parse_args() + + # About me + if args.about: + print(__doc__) + print('Author: ', __author__) + print('Copyright: ', __copyright__) + print('Credits: ', *['\n {}'.format(item) for item in __credits__]) + print('License: ', __license__) + print('Version: ', __version__) + print('Maintainer: ', __maintainer__) + print('Status: ', __status__) + sys.exit(0) + + if args.short: args.no_fan = True + env.GUT_CONST.set_args(args) + LOGGER.debug('########## %s %s', __program_name__, __version__) + + if env.GUT_CONST.check_env() < 0: + print('Error in environment. Exiting...') + sys.exit(-1) + + if (args.pstates or args.ppm) and args.table: + print('The --table option can not be used with --pstate or --ppm, ignoring --table') + + # Get list of GPUs and get basic non-driver details + gpu_list = Gpu.GpuList() + gpu_list.set_gpu_list(clinfo_flag=True) + + # Check list of GPUs + num_gpus = gpu_list.num_vendor_gpus() + print('Detected GPUs: ', end='') + for i, (type_name, type_value) in enumerate(num_gpus.items()): + if i: + print(', {}: {}'.format(type_name, type_value), end='') + else: + print('{}: {}'.format(type_name, type_value), end='') + print('') + if 'AMD' in num_gpus.keys(): + env.GUT_CONST.read_amd_driver_version() + print('AMD: {}'.format(gpu_list.wattman_status())) + if 'NV' in num_gpus.keys(): + print('nvidia smi: [{}]'.format(env.GUT_CONST.cmd_nvidia_smi)) + + num_gpus = gpu_list.num_gpus() + if num_gpus['total'] == 0: + print('No GPUs detected, exiting...') + sys.exit(-1) + + # Read data static/dynamic/info/state driver information for GPUs + gpu_list.read_gpu_sensor_set(data_type=Gpu.GpuItem.SensorSet.All) + + # Check number of readable/writable GPUs again + num_gpus = gpu_list.num_gpus() + print('{} total GPUs, {} rw, {} r-only, {} w-only\n'.format(num_gpus['total'], num_gpus['rw'], + num_gpus['r-only'], num_gpus['w-only'])) + + # Read report specific details + if args.clinfo: + if not gpu_list.read_gpu_opencl_data(): + args.clinfo = False + + # Print out user requested details + gpu_list.read_gpu_pstates() + if args.pstates: + gpu_list.print_pstates() + if args.ppm: + gpu_list.read_gpu_ppm_table() + gpu_list.print_ppm_table() + if not args.pstates and not args.ppm: + if args.table: + com_gpu_list = gpu_list.list_gpus(compatibility=Gpu.GpuItem.GPU_Comp.Readable) + com_gpu_list.print_table(title='Status of Readable GPUs:') + else: + gpu_list.print(short=args.short, clflag=args.clinfo) + sys.exit(0) + + +if __name__ == '__main__': + main() diff -Nru ricks-amdgpu-utils-3.0.0/GPUmodules/env.py ricks-amdgpu-utils-3.5.0/GPUmodules/env.py --- ricks-amdgpu-utils-3.0.0/GPUmodules/env.py 2020-02-29 07:32:52.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/GPUmodules/env.py 2020-07-06 00:57:49.000000000 +0000 @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -"""env.py - sets environment for amdgpu-utils and establishes global variables +"""env.py - sets environment for rickslab-gpu-utils and establishes global variables - - Copyright (C) 2019 RueiKe + Copyright (C) 2019 RicksLab This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,126 +17,214 @@ along with this program. If not, see . """ __author__ = 'RueiKe' -__copyright__ = 'Copyright (C) 2019 RueiKe' +__copyright__ = 'Copyright (C) 2019 RicksLab' __credits__ = ['Craig Echt - Testing, Debug, and Verification'] __license__ = 'GNU General Public License' -__program_name__ = 'amdgpu-utils' -__version__ = 'v3.0.0' +__program_name__ = 'gpu-utils' __maintainer__ = 'RueiKe' -__status__ = 'Stable Release' __docformat__ = 'reStructuredText' # pylint: disable=multiple-statements # pylint: disable=line-too-long +# pylint: disable=bad-continuation import re import subprocess import platform import sys import os +import logging from pathlib import Path import shlex import shutil import time from datetime import datetime +from typing import Dict, Union, List +from GPUmodules import __version__, __status__ + +LOGGER = logging.getLogger('gpu-utils') class GutConst: """ GPU Utils constants used throughout the project. """ + _verified_distros: List[str] = ['Debian', 'Ubuntu', 'Gentoo', 'Arch'] + _dpkg_tool: Dict[str, str] = {'Debian': 'dpkg', 'Ubuntu': 'dpkg', 'Arch': 'pacman', 'Gentoo': 'equery'} + _all_args: List[str] = ['execute_pac', 'debug', 'pdebug', 'sleep', 'no_fan', 'ltz', 'simlog', 'log', 'force_write'] + PATTERNS = {'HEXRGB': re.compile(r'^#[0-9a-fA-F]{6}'), + 'PCIIID_L0': re.compile(r'^[0-9a-fA-F]{4}.*'), + 'PCIIID_L1': re.compile(r'^\t[0-9a-fA-F]{4}.*'), + 'PCIIID_L2': re.compile(r'^\t\t[0-9a-fA-F]{4}.*'), + 'END_IN_ALPHA': re.compile(r'[a-zA-Z]+$'), + 'ALPHA': re.compile(r'[a-zA-Z]+'), + 'AMD_GPU': re.compile(r'(AMD|amd|ATI|ati)'), + 'NV_GPU': re.compile(r'(NVIDIA|nvidia|nVidia)'), + 'INTC_GPU': re.compile(r'(INTEL|intel|Intel)'), + 'ASPD_GPU': re.compile(r'(ASPEED|aspeed|Aspeed)'), + 'MTRX_GPU': re.compile(r'(MATROX|matrox|Matrox)'), + 'MHz': re.compile(r'M[Hh]z'), + 'PPM_CHK': re.compile(r'[*].*'), + 'PCI_GPU': re.compile(r'(VGA|3D|Display)'), + 'PCI_ADD': re.compile(r'^([0-9a-fA-F]{2}:[0-9a-fA-F]{2}.[0-9a-fA-F])'), + 'PPM_NOTCHK': re.compile(r'[ ]+'), + 'VALID_PS_STR': re.compile(r'[0-9]+(\s[0-9])*'), + 'IS_FLOAT': re.compile(r'[-+]?\d*\.?\d+|[-+]?\d+'), + 'DIGITS': re.compile(r'^[0-9]+[0-9]*$'), + 'VAL_ITEM': re.compile(r'.*_val$'), + 'GPUMEMTYPE': re.compile(r'^mem_(gtt|vram)_.*')} + + _sys_pciid_list = ['/usr/share/misc/pci.ids', '/usr/share/hwdata/pci.ids'] + _local_icon_list = ['{}/.local/share/rickslab-gpu-utils/icons'.format(str(Path.home())), + '/usr/share/rickslab-gpu-utils/icons'] + featuremask = '/sys/module/amdgpu/parameters/ppfeaturemask' + card_root = '/sys/class/drm/' + hwmon_sub = 'hwmon/hwmon' + gui_window_title = 'Ricks-Lab GPU Utilities' + def __init__(self): + self.args = None self.repository_module_path = os.path.dirname(str(Path(__file__).resolve())) self.repository_path = os.path.join(self.repository_module_path, '..') - self.config_dir = os.path.join(os.getenv('HOME'), '.amdgpu-utils/') - self.dist_share = '/usr/share/ricks-amdgpu-utils/' - self.sys_pciid = '/usr/share/misc/pci.ids' - self.dist_icons = os.path.join(self.dist_share, 'icons') - if os.path.isdir(self.dist_icons): - self.icon_path = self.dist_icons + + # Set pciid Path + for try_pciid_path in GutConst._sys_pciid_list: + if os.path.isfile(try_pciid_path): + self.sys_pciid = try_pciid_path + break else: - self.icon_path = os.path.join(self.repository_path, 'icons') - self.featuremask = '/sys/module/amdgpu/parameters/ppfeaturemask' - self.card_root = '/sys/class/drm/' - self.hwmon_sub = 'hwmon/hwmon' + self.sys_pciid = None + + # Set Icon Path + self._local_icon_list.append(os.path.join(self.repository_path, 'icons')) + for try_icon_path in GutConst._local_icon_list: + if os.path.isdir(try_icon_path): + self.icon_path = try_icon_path + break + else: + self.icon_path = None + + self.distro: Dict[str, Union[str, None]] = {'Distributor': None, 'Description': None} + self.amdfeaturemask = '' + self.log_file_ptr = '' + + # From args self.execute_pac = False self.DEBUG = False self.PDEBUG = False self.SIMLOG = False self.LOG = False self.PLOT = False - self.log_file_ptr = '' self.show_fans = True self.write_delta_only = False self.SLEEP = 2 - self.amdfeaturemask = '' self.USELTZ = False + # Time + self.TIME_FORMAT = '%d-%b-%Y %H:%M:%S' self.LTZ = datetime.utcnow().astimezone().tzinfo - if self.DEBUG: print('Local TZ: {}'.format(self.LTZ)) # GPU platform capability self.amd_read = None self.amd_write = None self.nv_read = None self.nv_write = None # Command access + self.cmd_lsb_release = None self.cmd_lspci = None self.cmd_clinfo = None self.cmd_dpkg = None self.cmd_nvidia_smi = None + def set_args(self, args) -> None: + """ + Set arguments for the give args object. + + :param args: The object return by args parser. + """ + self.args = args + for target_arg in self._all_args: + if target_arg in self.args: + if target_arg == 'debug': self.DEBUG = self.args.debug + elif target_arg == 'execute_pac': self.execute_pac = self.args.execute_pac + elif target_arg == 'pdebug': self.PDEBUG = self.args.pdebug + elif target_arg == 'sleep': self.SLEEP = self.args.sleep + elif target_arg == 'no_fan': self.show_fans = not self.args.no_fan + elif target_arg == 'ltz': self.USELTZ = self.args.ltz + elif target_arg == 'simlog': self.SIMLOG = self.args.simlog + elif target_arg == 'log': self.LOG = self.args.log + elif target_arg == 'force_write': self.write_delta_only = not self.args.force_write + else: print('Invalid arg: {}'.format(target_arg)) + LOGGER.propagate = False + formatter = logging.Formatter("%(levelname)s:%(name)s:%(module)s.%(funcName)s:%(message)s") + stream_handler = logging.StreamHandler() + stream_handler.setFormatter(formatter) + stream_handler.setLevel(logging.WARNING) + LOGGER.addHandler(stream_handler) + LOGGER.setLevel(logging.WARNING) + if self.DEBUG: + LOGGER.setLevel(logging.DEBUG) + file_handler = logging.FileHandler( + 'debug_gpu-utils_{}.log'.format(datetime.now().strftime("%Y%m%d-%H%M%S")), 'w') + file_handler.setFormatter(formatter) + file_handler.setLevel(logging.DEBUG) + LOGGER.addHandler(file_handler) + LOGGER.debug('Command line arguments:\n %s', args) + LOGGER.debug('Local TZ: %s', self.LTZ) + LOGGER.debug('pciid path set to: %s', self.sys_pciid) + LOGGER.debug('Icon path set to: %s', self.icon_path) + @staticmethod - def now(ltz=False): + def now(ltz: bool = False) -> datetime: """ Get the current datetime object. + :param ltz: Flag to get local time instead of UTC - :type ltz: bool - :return: - :rtype: datetime - """ - if ltz: - return datetime.now() - return datetime.utcnow() + :return: datetime obj of current time + """ + return datetime.now() if ltz else datetime.utcnow() @staticmethod - def utc2local(utc): + def utc2local(utc: datetime) -> datetime: """ Return local time for given UTC time. - :param utc: - :type utc: datetime - :return: - :rtype: datetime + + :param utc: Time for UTC + :return: Time for local time zone .. note:: from https://stackoverflow.com/questions/4770297/convert-utc-datetime-string-to-local-datetime """ epoch = time.mktime(utc.timetuple()) offset = datetime.fromtimestamp(epoch) - datetime.utcfromtimestamp(epoch) return utc + offset - def read_amdfeaturemask(self): + def read_amdfeaturemask(self) -> int: """ Read and return the amdfeaturemask as an int. - :return: - :rtype: int + + :return: AMD Feature Mask """ - with open(self.featuremask) as fm_file: - self.amdfeaturemask = int(fm_file.readline()) - return self.amdfeaturemask + try: + with open(self.featuremask) as fm_file: + self.amdfeaturemask = int(fm_file.readline()) + except OSError as err: + LOGGER.debug('Could not read AMD Featuremask [%s]', err) + self.amdfeaturemask = 0 + return self.amdfeaturemask - def check_env(self): + def check_env(self) -> int: """ Check the compatibility of the user environment. + :return: Return status: ok=0, python issue= -1, kernel issue= -2, command issue= -3 - :rtype: int """ # Check python version - required_pversion = [3, 6] + required_pversion = (3, 6) (python_major, python_minor, python_patch) = platform.python_version_tuple() - if self.DEBUG: print('Using python: {}.{}.{}'.format(python_major, python_minor, python_patch)) + LOGGER.debug('Using python: %s.%s.%s', python_major, python_minor, python_patch) if int(python_major) < required_pversion[0]: print('Using python {}, but {} requires python {}.{} or higher.'.format(python_major, __program_name__, required_pversion[0], required_pversion[1]), file=sys.stderr) return -1 - elif int(python_major) == required_pversion[0] and int(python_minor) < required_pversion[1]: + if int(python_major) == required_pversion[0] and int(python_minor) < required_pversion[1]: print('Using python {}.{}.{}, but {} requires python {}.{} or higher.'.format(python_major, python_minor, python_patch, __program_name__, @@ -147,91 +234,173 @@ return -1 # Check Linux Kernel version - required_kversion = [4, 8] + required_kversion = (4, 8) linux_version = platform.release() - if self.DEBUG: print('Using Linux Kernel: {}'.format(linux_version)) + LOGGER.debug('Using Linux Kernel: %s', linux_version) if int(linux_version.split('.')[0]) < required_kversion[0]: print('Using Linux Kernel {}, but {} requires > {}.{}.'.format(linux_version, __program_name__, required_kversion[0], required_kversion[1]), file=sys.stderr) return -2 - elif int(linux_version.split('.')[0]) == required_kversion[0] and \ + if int(linux_version.split('.')[0]) == required_kversion[0] and \ int(linux_version.split('.')[1]) < required_kversion[1]: print('Using Linux Kernel {}, but {} requires > {}.{}.'.format(linux_version, __program_name__, required_kversion[0], required_kversion[1]), file=sys.stderr) return -2 + # Check Linux Distro + self.cmd_lsb_release = shutil.which('lsb_release') + if self.cmd_lsb_release: + lsbr_out = subprocess.check_output(shlex.split('{} -a'.format(self.cmd_lsb_release)), + shell=False, stderr=subprocess.DEVNULL).decode().split('\n') + for lsbr_line in lsbr_out: + if 'Distributor ID' in lsbr_line: + lsbr_item = re.sub(r'Distributor ID:[\s]*', '', lsbr_line) + LOGGER.debug('Using Linux Distro: %s', lsbr_item) + self.distro['Distributor'] = lsbr_item.strip() + if 'Description' in lsbr_line: + lsbr_item = re.sub(r'Description:[\s]*', '', lsbr_line) + LOGGER.debug('Linux Distro Description: %s', lsbr_item) + self.distro['Description'] = lsbr_item.strip() + + if self.distro['Distributor'] and self.DEBUG: + print('{}: '.format(self.distro['Distributor']), end='') + if self.distro['Distributor'] in GutConst._verified_distros: print('Validated') + else: print('Unverified') + else: + print('OS command [lsb_release] executable not found.') + # Check access/paths to system commands command_access_fail = False self.cmd_lspci = shutil.which('lspci') if not self.cmd_lspci: - print('OS command [lspci] executable not found.') + print('Error: OS command [lspci] executable not found.') command_access_fail = True + LOGGER.debug('lspci path: %s', self.cmd_lspci) + self.cmd_clinfo = shutil.which('clinfo') if not self.cmd_clinfo: print('Package addon [clinfo] executable not found. Use sudo apt-get install clinfo to install') - #command_access_fail = True - self.cmd_dpkg = shutil.which('dpkg') - if not self.cmd_dpkg: - print('OS command [dpkg] executable not found.') - #command_access_fail = True - self.cmd_nvidia_smi = shutil.which('nvidia_smi') - if not self.cmd_nvidia_smi: - pass - #print('OS command [nvidia_smi] executable not found.') - #command_access_fail = True + LOGGER.debug('clinfo path: %s', self.cmd_clinfo) + + # Package Reader + if self.distro['Distributor'] in GutConst._dpkg_tool: + pkg_tool = GutConst._dpkg_tool[self.distro['Distributor']] + self.cmd_dpkg = shutil.which(pkg_tool) + if not self.cmd_dpkg: + print('OS command [{}] executable not found.'.format(pkg_tool)) + else: + self.cmd_dpkg = None + LOGGER.debug('%s package query tool: %s', self.distro["Distributor"], self.cmd_dpkg) + + self.cmd_nvidia_smi = shutil.which('nvidia-smi') + if self.cmd_nvidia_smi: + print('OS command [nvidia-smi] executable found: [{}]'.format(self.cmd_nvidia_smi)) if command_access_fail: return -3 return 0 - def read_amd_driver_version(self): + def read_amd_driver_version(self) -> bool: """ Read the AMD driver version and store in GutConst object. - :return: True if successful - :rtype: bool + + :return: True on success. """ if not self.cmd_dpkg: - print('Command {} not found. Can not determine amdgpu version.'.format(self.cmd_dpkg)) + print('Can not access package read utility to verify AMD driver.') return False - version_ok = False + if re.search(r'([uU]buntu|[dD]ebian)', self.distro['Distributor']): + return self.read_amd_driver_version_debian() + if re.search(r'([gG]entoo)', self.distro['Distributor']): + return self.read_amd_driver_version_gentoo() + if re.search(r'([aA]rch)', self.distro['Distributor']): + return self.read_amd_driver_version_arch() + return False + + def read_amd_driver_version_gentoo(self) -> bool: + """ + Read the AMD driver version and store in GutConst object. + + :return: True if successful + """ + for pkgname in ['dev-libs/amdgpu', 'dev-libs/amdgpu-pro-opencl', 'dev-libs/rocm', 'dev-libs/rocm-utils']: + try: + dpkg_out = subprocess.check_output(shlex.split('{} list {}'.format(self.cmd_dpkg, pkgname)), + shell=False, stderr=subprocess.DEVNULL).decode().split('\n') + except (subprocess.CalledProcessError, OSError): + continue + for dpkg_line in dpkg_out: + if '!!!' in dpkg_line: + continue + for driverpkg in ['amdgpu', 'rocm']: + if re.search('Searching', dpkg_line): + continue + if re.search(driverpkg, dpkg_line): + LOGGER.debug(dpkg_line) + dpkg_line = re.sub(r'.*\][\s]*', '', dpkg_line) + print('AMD: {} version: {}'.format(driverpkg, dpkg_line)) + return True + print('amdgpu/rocm version: UNKNOWN') + return False + + def read_amd_driver_version_arch(self) -> bool: + """ + Read the AMD driver version and store in GutConst object. + + :return: True if successful + """ + for pkgname in ['amdgpu', 'rocm', 'rocm-utils']: + try: + dpkg_out = subprocess.check_output(shlex.split('{} -Qs {}'.format(self.cmd_dpkg, pkgname)), + shell=False, stderr=subprocess.DEVNULL).decode().split('\n') + except (subprocess.CalledProcessError, OSError): + continue + for dpkg_line in dpkg_out: + for driverpkg in ['amdgpu', 'rocm']: + if re.search(driverpkg, dpkg_line): + LOGGER.debug(dpkg_line) + dpkg_items = dpkg_line.split() + if len(dpkg_items) >= 2: + print('AMD: {} version: {}'.format(driverpkg, dpkg_items[1])) + return True + print('amdgpu/rocm version: UNKNOWN') + return False + + def read_amd_driver_version_debian(self) -> bool: + """ + Read the AMD driver version and store in GutConst object. + + :return: True if successful + """ for pkgname in ['amdgpu', 'amdgpu-core', 'amdgpu-pro', 'rocm-utils']: try: dpkg_out = subprocess.check_output(shlex.split('{} -l {}'.format(self.cmd_dpkg, pkgname)), shell=False, stderr=subprocess.DEVNULL).decode().split('\n') - for dpkg_line in dpkg_out: - for driverpkg in ['amdgpu', 'rocm']: - search_obj = re.search(driverpkg, dpkg_line) - if search_obj: - if self.DEBUG: print('Debug: {}'.format(dpkg_line)) - dpkg_items = dpkg_line.split() - if len(dpkg_items) > 2: - if re.fullmatch(r'.*none.*', dpkg_items[2]): - continue - else: - print('AMD: {} version: {}'.format(driverpkg, dpkg_items[2])) - version_ok = True - break - if version_ok: - break except (subprocess.CalledProcessError, OSError): continue - if not version_ok: - print('amdgpu/rocm version: UNKNOWN') - return False - return True + for dpkg_line in dpkg_out: + for driverpkg in ['amdgpu', 'rocm']: + if re.search(driverpkg, dpkg_line): + LOGGER.debug(dpkg_line) + dpkg_items = dpkg_line.split() + if len(dpkg_items) > 2: + if re.fullmatch(r'.*none.*', dpkg_items[2]): continue + print('AMD: {} version: {}'.format(driverpkg, dpkg_items[2])) + return True + print('amdgpu/rocm version: UNKNOWN') + return False GUT_CONST = GutConst() -def about(): +def about() -> None: """ Display details of this module. - :return: None """ print(__doc__) print('Author: ', __author__) print('Copyright: ', __copyright__) - print('Credits: ', __credits__) + print('Credits: ', *['\n {}'.format(item) for item in __credits__]) print('License: ', __license__) print('Version: ', __version__) print('Maintainer: ', __maintainer__) diff -Nru ricks-amdgpu-utils-3.0.0/GPUmodules/GPUgui.py ricks-amdgpu-utils-3.5.0/GPUmodules/GPUgui.py --- ricks-amdgpu-utils-3.0.0/GPUmodules/GPUgui.py 1970-01-01 00:00:00.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/GPUmodules/GPUgui.py 2020-07-06 00:57:49.000000000 +0000 @@ -0,0 +1,257 @@ +#!/usr/bin/env python3 +""" gpu-utils: GPUgui module to support gui in rickslab-gpu-utils. + + Copyright (C) 2020 RicksLab + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +""" +__author__ = 'RueiKe' +__copyright__ = 'Copyright (C) 2020 RicksLab' +__credits__ = ['@berturion - Testing and Verification'] +__license__ = 'GNU General Public License' +__program_name__ = 'gpu-utils' +__maintainer__ = 'RueiKe' +__docformat__ = 'reStructuredText' +# pylint: disable=multiple-statements +# pylint: disable=line-too-long +# pylint: disable=bad-continuation + +from typing import Tuple, Dict +import sys +import re +import logging +import warnings + +try: + import gi + gi.require_version('Gtk', '3.0') + from gi.repository import Gtk, Gdk +except ModuleNotFoundError as error: + print('gi import error: {}'.format(error)) + print('gi is required for {}'.format(__program_name__)) + print(' In a venv, first install vext: pip install --no-cache-dir vext') + print(' Then install vext.gi: pip install --no-cache-dir vext.gi') + sys.exit(0) + +try: + from GPUmodules import env +except ImportError: + import env +from GPUmodules import __version__, __status__ + +ColorDict = Dict[str, str] +LOGGER = logging.getLogger('gpu-utils') +PATTERNS = env.GutConst.PATTERNS + + +def get_color(value: str) -> str: + """ + Get the rgb hex string for the provided color name. + + :param value: A valid project color name. + :return: rrb value as a hex string. + """ + return GuiProps.color_name_to_hex(value) + + +class GuiProps: + """ + Class to manage style properties of Gtk widgets. + """ + _colors: ColorDict = {'white': '#FFFFFF', + 'white_off': '#FCFCFC', + 'white_pp': '#F0E5D3', + 'cream': '#FFFDD1', + 'gray20': '#CCCCCC', + 'gray50': '#7F7F7F', + 'gray60': '#666666', + 'gray70': '#4D4D4D', + 'gray80': '#333333', + 'gray95': '#0D0D0D', + 'gray_dk': '#6A686E', + 'black': '#000000', + # Colors Low Contrast - For table fields + 'green': '#8EC3A7', + 'green_dk': '#6A907C', + 'teal': '#218C8D', + 'olive': '#6C9040', + 'red': '#B73743', + 'orange': '#E86850', + 'yellow': '#C9A100', + 'blue': '#587498', + 'purple': '#6264A7', + # Colors Bright - For plot lines + 'br_red': '#FF2D2D', + 'br_orange': '#FF6316', + 'br_blue': '#66CCFF', + 'br_pink': '#CC00FF', + 'br_green': '#99FF99', + 'br_yellow': '#FFFF66', + # Slate - For table fields + 'slate_lt': '#A0A0AA', + 'slate_md': '#80808d', + 'slate_dk': '#5D5D67', + 'slate_vdk': '#3A3A40'} + + @staticmethod + def color_name_to_hex(value: str) -> str: + """ + Return the hex code for the given string. The specified string must exist in the project color list. + :param value: Color name + :return: Color hex code + """ + if value not in GuiProps._colors.keys(): + raise ValueError('Invalid color name {} not in {}'.format(value, GuiProps._colors)) + return GuiProps._colors[value] + + @staticmethod + def color_name_to_rgba(value: str) -> Tuple[float, ...]: + """ + Convert the given color name to a color tuple. The given color string mus exist in the project + color list. + + :param value: Color name + :return: Color tuple + """ + if value not in GuiProps._colors.keys(): + raise ValueError('Invalid color name {} not in {}'.format(value, GuiProps._colors)) + return GuiProps.hex_to_rgba(GuiProps._colors[value]) + + @staticmethod + def hex_to_rgba(value: str) -> Tuple[float, ...]: + """ + Return rgba tuple for give hex color name. + + :param value: hex color value as string + :return: rgba tuple + + .. note:: Code copied from Stack Overflow + """ + if not re.fullmatch(PATTERNS['HEXRGB'], value): + raise ValueError('Invalid hex color format in {}'.format(value)) + value = value.lstrip('#') + if len(value) != 6: + raise ValueError('Invalid hex color format in {}'.format(value)) + (r_1, g_1, b_1, a_1) = tuple(int(value[i:i + 2], 16) for i in range(0, 6, 2)) + (1,) + (r_1, g_1, b_1, a_1) = (r_1 / 255.0, g_1 / 255.0, b_1 / 255.0, a_1) + return tuple([r_1, g_1, b_1, a_1]) + + @staticmethod + def set_gtk_prop(gui_item, top: int = None, bottom: int = None, right: int = None, + left: int = None, width: int = None, width_chars: int = None, width_max: int = None, + max_length: int = None, align: tuple = None, xalign: float = None) -> None: + """ + Set properties of Gtk objects. + + :param gui_item: Gtk object + :param top: Top margin + :param bottom: Bottom margin + :param right: Right margin + :param left: Left margin + :param width: Width of request field + :param width_chars: Width of label + :param width_max: Max Width of object + :param max_length: max length of entry + :param align: Alignment parameters + :param xalign: X Alignment parameter + """ + if top: + gui_item.set_property('margin-top', top) + if bottom: + gui_item.set_property('margin-bottom', bottom) + if right: + gui_item.set_property('margin-right', right) + if left: + gui_item.set_property('margin-left', left) + if width: + gui_item.set_property('width-request', width) + if width_max: + gui_item.set_max_width_chars(width_max) + if width_chars: + gui_item.set_width_chars(width_chars) + if max_length: + gui_item.set_max_length(max_length) + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', category=DeprecationWarning) + if xalign: + # FIXME - This is deprecated in latest Gtk, need to use halign + gui_item.set_alignment(xalign=xalign) + if align: + # FIXME - This is deprecated in latest Gtk, need to use halign + gui_item.set_alignment(*align) + + @classmethod + def set_style(cls, css_str=None) -> None: + """ + Set the specified css style, or set default styles if no css string is specified. + + :param css_str: A valid css format string. + """ + css_list = [] + if css_str is None: + # Initialize formatting colors. + css_list.append("grid { background-image: image(%s); }" % cls._colors['gray80']) + css_list.append("#light_grid { background-image: image(%s); }" % cls._colors['gray20']) + css_list.append("#dark_grid { background-image: image(%s); }" % cls._colors['gray70']) + css_list.append("#dark_box { background-image: image(%s); }" % cls._colors['slate_dk']) + css_list.append("#med_box { background-image: image(%s); }" % cls._colors['slate_md']) + css_list.append("#light_box { background-image: image(%s); }" % cls._colors['slate_lt']) + css_list.append("#head_box { background-image: image(%s); }" % cls._colors['blue']) + css_list.append("#warn_box { background-image: image(%s); }" % cls._colors['red']) + css_list.append("#button_box { background-image: image(%s); }" % cls._colors['slate_dk']) + css_list.append("#message_box { background-image: image(%s); }" % cls._colors['gray50']) + css_list.append("#message_label { color: %s; }" % cls._colors['white_off']) + css_list.append("#warn_label { color: %s; }" % cls._colors['white_pp']) + css_list.append("#white_label { color: %s; }" % cls._colors['white_off']) + css_list.append("#black_label { color: %s; }" % cls._colors['gray95']) + css_list.append("#ppm_combo { background-image: image(%s); color: %s; }" % + (cls._colors['green'], cls._colors['black'])) + css_list.append("button { background-image: image(%s); color: %s; }" % + (cls._colors['slate_lt'], cls._colors['black'])) + css_list.append("entry { background-image: image(%s); color: %s; }" % + (cls._colors['green'], cls._colors['gray95'])) + # Below format does not work. + css_list.append("entry:selected { background-image: image(%s); color: %s; }" % + (cls._colors['yellow'], cls._colors['white'])) + else: + css_list.append(css_str) + LOGGER.info('css %s', css_list) + + screen = Gdk.Screen.get_default() + + for css_item in css_list: + provider = Gtk.CssProvider() + css = css_item.encode('utf-8') + provider.load_from_data(css) + style_context = Gtk.StyleContext() + style_context.add_provider_for_screen(screen, provider, Gtk.STYLE_PROVIDER_PRIORITY_APPLICATION) + + +def about() -> None: + """ + Display details of this module. + """ + print(__doc__) + print('Author: ', __author__) + print('Copyright: ', __copyright__) + print('Credits: ', *['\n {}'.format(item) for item in __credits__]) + print('License: ', __license__) + print('Version: ', __version__) + print('Maintainer: ', __maintainer__) + print('Status: ', __status__) + sys.exit(0) + + +if __name__ == '__main__': + about() diff -Nru ricks-amdgpu-utils-3.0.0/GPUmodules/GPUmodule.py ricks-amdgpu-utils-3.5.0/GPUmodules/GPUmodule.py --- ricks-amdgpu-utils-3.0.0/GPUmodules/GPUmodule.py 2020-02-29 07:32:52.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/GPUmodules/GPUmodule.py 2020-07-06 00:57:49.000000000 +0000 @@ -1,8 +1,8 @@ #!/usr/bin/env python3 -"""GPUmodules - classes used in amdgpu-utils +"""GPUmodules - Classes to represent GPUs and sets of GPUs used in rickslab-gpu-utils. - Copyright (C) 2019 RueiKe + Copyright (C) 2019 RicksLab This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,42 +18,60 @@ along with this program. If not, see . """ __author__ = 'RueiKe' -__copyright__ = 'Copyright (C) 2019 RueiKe' -__credits__ = ['Craig Echt - Testing, Debug, and Verification'] +__copyright__ = 'Copyright (C) 2019 RicksLab' +__credits__ = ['Craig Echt - Testing, Debug, Verification, and Documentation', + 'Keith Myers - Testing, Debug, Verification of NV Capability'] __license__ = 'GNU General Public License' -__program_name__ = 'amdgpu-utils' -__version__ = 'v3.0.0' +__program_name__ = 'gpu-utils' __maintainer__ = 'RueiKe' -__status__ = 'Stable Release' __docformat__ = 'reStructuredText' # pylint: disable=multiple-statements # pylint: disable=line-too-long +# pylint: disable=bad-continuation import re import subprocess import shlex import os import sys +import logging +from typing import Union, List, Dict, TextIO, IO, Generator from pathlib import Path from uuid import uuid4 +from enum import Enum import glob +from numpy import nan as np_nan + +from GPUmodules import __version__, __status__ try: from GPUmodules import env except ImportError: import env +LOGGER = logging.getLogger('gpu-utils') +PATTERNS = env.GutConst.PATTERNS + + +class GpuEnum(Enum): + """ + Replace __str__ method of Enum so that name excludes type and can be used as key in other dicts. + """ + def __str__(self): + return self.name + + class ObjDict(dict): """ Allow access of dictionary keys by key name. """ # pylint: disable=attribute-defined-outside-init + # pylint: disable=too-many-instance-attributes def __getattr__(self, name): if name in self: return self[name] - else: - raise AttributeError('No such attribute: ' + name) + raise AttributeError('No such attribute: {}'.format(name)) def __setattr__(self, name, value): self[name] = value @@ -62,161 +80,349 @@ if name in self: del self[name] else: - raise AttributeError('No such attribute: ' + name) + raise AttributeError('No such attribute: {}'.format(name)) class GpuItem: """An object to store GPU details. - .. note:: GPU Frequency/Voltage Control Type: 0 = None, 1 = P-states, 2 = Curve """ # pylint: disable=attribute-defined-outside-init - _GPU_NC_Param_List = ['compute', 'readable', 'writable', 'vendor', 'model', 'card_num', - 'card_path', 'pcie_id', 'driver'] + # pylint: disable=too-many-instance-attributes + _finalized = False + _button_labels = {'loading': 'Load%', + 'power': 'Power', + 'power_cap': 'PowerCap', + 'temp_val': 'Temp', + 'vddgfx_val': 'VddGfx', + 'sclk_ps_val': 'SCLK Pstate', + 'sclk_f_val': 'SCLK', + 'mclk_ps_val': 'MCLK Pstate', + 'mclk_f_val': 'MCLK'} + + _fan_item_list = ['fan_enable', 'pwm_mode', 'fan_target', + 'fan_speed', 'fan_pwm', 'fan_speed_range', 'fan_pwm_range'] + short_list = ['vendor', 'readable', 'writable', 'compute', 'card_num', 'id', 'model_device_decode', + 'gpu_type', 'card_path', 'sys_card_path', 'hwmon_path', 'pcie_id'] + _GPU_NC_Param_List = ['compute', 'readable', 'writable', 'vendor', 'model', 'card_num', 'sys_card_path', + 'gpu_type', 'card_path', 'hwmon_path', 'pcie_id', 'driver', 'id', 'model_device_decode'] + + # Vendor and Type skip lists for reporting + AMD_Skip_List = ['frequencies_max', 'compute_mode', 'serial_number', 'card_index'] + NV_Skip_List = ['fan_enable', 'fan_speed', 'fan_pwm_range', 'fan_speed_range', 'pwm_mode', + 'mem_gtt_total', 'mem_gtt_used', 'mem_gtt_usage', + 'mclk_ps', 'mclk_f_range', 'sclk_f_range', 'vddc_range', 'power_dpm_force', + 'temp_crits', 'voltages'] + LEGACY_Skip_List = ['vbios', 'loading', 'mem_loading', 'sclk_ps', 'mclk_ps', 'ppm', 'power', 'power_cap', + 'power_cap_range', 'mem_vram_total', 'mem_vram_used', 'mem_gtt_total', 'mem_gtt_used', + 'mem_vram_usage', 'mem_gtt_usage', 'fan_speed_range', 'fan_enable', 'fan_target', + 'fan_speed', 'voltages', 'vddc_range', 'frequencies', 'sclk_f_range', 'mclk_f_range'] # Define Class Labels + GPU_Type = GpuEnum('type', 'Undefined Unsupported Supported Legacy APU PStatesNE PStates CurvePts') + GPU_Comp = GpuEnum('Compatibility', 'None ALL ReadWrite ReadOnly WriteOnly Readable Writable') + GPU_Vendor = GpuEnum('vendor', 'Undefined ALL AMD NVIDIA INTEL ASPEED MATROX PCIE') + _apu_gpus = ['Carrizo', 'Picasso', 'Renoir'] + + # Table parameters labels. + table_parameters = ['model_display', 'loading', 'mem_loading', 'mem_vram_usage', 'mem_gtt_usage', + 'power', 'power_cap', 'energy', 'temp_val', 'vddgfx_val', + 'fan_pwm', 'sclk_f_val', 'sclk_ps_val', 'mclk_f_val', 'mclk_ps_val', 'ppm'] + _table_param_labels = {'model_display': 'Model', + 'loading': 'GPU Load %', + 'mem_loading': 'Mem Load %', + 'mem_vram_usage': 'VRAM Usage %', + 'mem_gtt_usage': 'GTT Usage %', + 'power': 'Power (W)', + 'power_cap': 'Power Cap (W)', + 'energy': 'Energy (kWh)', + 'temp_val': 'T (C)', + 'vddgfx_val': 'VddGFX (mV)', + 'fan_pwm': 'Fan Spd (%)', + 'sclk_f_val': 'Sclk (MHz)', + 'sclk_ps_val': 'Sclk Pstate', + 'mclk_f_val': 'Mclk (MHz)', + 'mclk_ps_val': 'Mclk Pstate', + 'ppm': 'Perf Mode'} + + # Complete GPU print items, use skip lists where appropriate _GPU_CLINFO_Labels = {'sep4': '#', - 'opencl_version': ' Device OpenCL C Version', - 'device_name': ' Device Name', - 'device_version': ' Device Version', - 'driver_version': ' Driver Version', - 'max_cu': ' Max Compute Units', - 'simd_per_cu': ' SIMD per CU', - 'simd_width': ' SIMD Width', - 'simd_ins_width': ' SIMD Instruction Width', + 'opencl_version': ' Device OpenCL C Version', + 'device_name': ' Device Name', + 'device_version': ' Device Version', + 'driver_version': ' Driver Version', + 'max_cu': ' Max Compute Units', + 'simd_per_cu': ' SIMD per CU', + 'simd_width': ' SIMD Width', + 'simd_ins_width': ' SIMD Instruction Width', 'max_mem_allocation': ' CL Max Memory Allocation', - 'max_wi_dim': ' Max Work Item Dimensions', - 'max_wi_sizes': ' Max Work Item Sizes', - 'max_wg_size': ' Max Work Group Size', - 'prf_wg_size': ' Preferred Work Group Size', - 'prf_wg_multiple': ' Preferred Work Group Multiple'} - _GPU_Param_Labels = {'card_num': 'Card Number', - 'vendor': 'Vendor', - 'readable': 'Readable', - 'writable': 'Writable', - 'compute': 'Compute', - 'unique_id': 'GPU UID', - 'id': 'Device ID', + 'max_wi_dim': ' Max Work Item Dimensions', + 'max_wi_sizes': ' Max Work Item Sizes', + 'max_wg_size': ' Max Work Group Size', + 'prf_wg_size': ' Preferred Work Group Size', + 'prf_wg_multiple': ' Preferred Work Group Multiple'} + _GPU_Param_Labels = {'card_num': 'Card Number', + 'vendor': 'Vendor', + 'readable': 'Readable', + 'writable': 'Writable', + 'compute': 'Compute', + 'unique_id': 'GPU UID', + 'serial_number': 'GPU S/N', + 'id': 'Device ID', 'model_device_decode': 'Decoded Device ID', - 'model': 'Card Model', - 'model_display': 'Display Card Model', - 'pcie_id': 'PCIe ID', - 'link_spd': ' Link Speed', - 'link_wth': ' Link Width', - 'sep1': '#', - 'driver': 'Driver', - 'vbios': 'vBIOS Version', - 'compute_platform': 'Compute Platform', - 'gpu_type': 'GPU Frequency/Voltage Control Type', - 'hwmon_path': 'HWmon', - 'card_path': 'Card Path', - 'sep2': '#', - 'power': 'Current Power (W)', - 'power_cap': 'Power Cap (W)', - 'power_cap_range': ' Power Cap Range (W)'} - if env.GUT_CONST.show_fans: - _GPU_Param_Labels.update({'fan_enable': 'Fan Enable', - 'pwm_mode': 'Fan PWM Mode', - 'fan_target': 'Fan Target Speed (rpm)', - 'fan_speed': 'Current Fan Speed (rpm)', - 'fan_pwm': 'Current Fan PWM (%)', - 'fan_speed_range': ' Fan Speed Range (rpm)', - 'fan_pwm_range': ' Fan PWM Range (%)'}) - _GPU_Param_Labels.update({'sep3': '#', - 'loading': 'Current GPU Loading (%)', - 'mem_loading': 'Current Memory Loading (%)', - 'temperatures': 'Current Temps (C)', - 'temp_crit': ' Critical Temp (C)', - 'voltages': 'Current Voltages (V)', - 'vddc_range': ' Vddc Range', - 'frequencies': 'Current Clk Frequencies (MHz)', - 'sclk_ps': 'Current SCLK P-State', - 'sclk_f_range': ' SCLK Range', - 'mclk_ps': 'Current MCLK P-State', - 'mclk_f_range': ' MCLK Range', - 'ppm': 'Power Performance Mode', - 'power_dpm_force': 'Power Force Performance Level'}) - - # HWMON sensor reading details - _sensor_details = {'AMD': {'HWMON': { - 'power': {'type': 'sp', 'cf': 0.000001, 'sensor': ['power1_average']}, - 'power_cap': {'type': 'sp', 'cf': 0.000001, 'sensor': ['power1_cap']}, - 'power_cap_range': {'type': 'mm', 'cf': 0.000001, - 'sensor': ['power1_cap_min', 'power1_cap_max']}, - 'fan_enable': {'type': 'sp', 'cf': 1, 'sensor': ['fan1_enable']}, - 'fan_target': {'type': 'sp', 'cf': 1, 'sensor': ['fan1_target']}, - 'fan_speed': {'type': 'sp', 'cf': 1, 'sensor': ['fan1_input']}, - 'fan_speed_range': {'type': 'mm', 'cf': 1, 'sensor': ['fan1_min', 'fan1_max']}, - 'pwm_mode': {'type': 'sp', 'cf': 1, 'sensor': ['pwm1_enable']}, - 'fan_pwm': {'type': 'sp', 'cf': 0.39216, 'sensor': ['pwm1']}, - 'fan_pwm_range': {'type': 'mm', 'cf': 0.39216, 'sensor': ['pwm1_min', 'pwm1_max']}, - 'temp': {'type': 'sp', 'cf': 0.001, 'sensor': ['temp1_input']}, - 'temp_crit': {'type': 'sp', 'cf': 0.001, 'sensor': ['temp1_crit']}, - 'freq1': {'type': 'sl', 'cf': 0.000001, 'sensor': ['freq1_input', 'freq1_label']}, - 'freq2': {'type': 'sl', 'cf': 0.000001, 'sensor': ['freq2_input', 'freq2_label']}, - 'frequencies': {'type': 'sl*', 'cf': 0.000001, 'sensor': ['freq*_input']}, - 'voltages': {'type': 'sl*', 'cf': 1, 'sensor': ['in*_input']}, - 'temperatures': {'type': 'sl*', 'cf': 0.001, 'sensor': ['temp*_input']}, - 'vddgfx': {'type': 'sl', 'cf': 0.001, 'sensor': ['in0_input', 'in0_label']}}, + 'model': 'Card Model', + 'model_display': 'Display Card Model', + 'card_index': 'Card Index', + 'pcie_id': 'PCIe ID', + 'link_spd': ' Link Speed', + 'link_wth': ' Link Width', + 'sep1': '#', + 'driver': 'Driver', + 'vbios': 'vBIOS Version', + 'compute_platform': 'Compute Platform', + 'compute_mode': 'Compute Mode', + 'gpu_type': 'GPU Type', + 'hwmon_path': 'HWmon', + 'card_path': 'Card Path', + 'sys_card_path': 'System Card Path', + 'sep2': '#', + 'power': 'Current Power (W)', + 'power_cap': 'Power Cap (W)', + 'power_cap_range': ' Power Cap Range (W)', + 'fan_enable': 'Fan Enable', + 'pwm_mode': 'Fan PWM Mode', + 'fan_target': 'Fan Target Speed (rpm)', + 'fan_speed': 'Current Fan Speed (rpm)', + 'fan_pwm': 'Current Fan PWM (%)', + 'fan_speed_range': ' Fan Speed Range (rpm)', + 'fan_pwm_range': ' Fan PWM Range (%)', + 'sep3': '#', + 'loading': 'Current GPU Loading (%)', + 'mem_loading': 'Current Memory Loading (%)', + 'mem_gtt_usage': 'Current GTT Memory Usage (%)', + 'mem_gtt_used': ' Current GTT Memory Used (GB)', + 'mem_gtt_total': ' Total GTT Memory (GB)', + 'mem_vram_usage': 'Current VRAM Usage (%)', + 'mem_vram_used': ' Current VRAM Used (GB)', + 'mem_vram_total': ' Total VRAM (GB)', + 'temperatures': 'Current Temps (C)', + 'temp_crits': 'Critical Temps (C)', + 'voltages': 'Current Voltages (V)', + 'vddc_range': ' Vddc Range', + 'frequencies': 'Current Clk Frequencies (MHz)', + 'frequencies_max': 'Maximum Clk Frequencies (MHz)', + 'sclk_ps': 'Current SCLK P-State', + 'sclk_f_range': ' SCLK Range', + 'mclk_ps': 'Current MCLK P-State', + 'mclk_f_range': ' MCLK Range', + 'ppm': 'Power Profile Mode', + 'power_dpm_force': 'Power DPM Force Performance Level'} + + # GPU sensor reading details + SensorSet = Enum('set', 'None Test Static Dynamic Info State Monitor All') + sensor_sets = {SensorSet.Static: {'HWMON': ['power_cap_range', 'temp_crits', + 'fan_speed_range', 'fan_pwm_range']}, + SensorSet.Dynamic: {'HWMON': ['power', 'power_cap', 'temperatures', 'voltages', + 'frequencies', 'fan_enable', 'fan_target', + 'fan_speed', 'pwm_mode', 'fan_pwm']}, + SensorSet.Info: {'DEVICE': ['unique_id', 'vbios', 'mem_vram_total', 'mem_gtt_total']}, + SensorSet.State: {'DEVICE': ['loading', 'mem_loading', 'mem_gtt_used', 'mem_vram_used', + 'link_spd', 'link_wth', 'sclk_ps', 'mclk_ps', 'ppm', + 'power_dpm_force']}, + SensorSet.Monitor: {'HWMON': ['power', 'power_cap', 'temperatures', 'voltages', + 'frequencies', 'fan_pwm'], + 'DEVICE': ['loading', 'mem_loading', 'mem_gtt_used', 'mem_vram_used', + 'sclk_ps', 'mclk_ps', 'ppm']}, + SensorSet.All: {'DEVICE': ['unique_id', 'vbios', 'loading', 'mem_loading', + 'link_spd', 'link_wth', 'sclk_ps', 'mclk_ps', 'ppm', + 'power_dpm_force', 'mem_vram_total', 'mem_gtt_total', + 'mem_vram_used', 'mem_gtt_used'], + 'HWMON': ['power_cap_range', 'temp_crits', 'power', 'power_cap', + 'temperatures', 'voltages', 'frequencies', + 'fan_speed_range', 'fan_pwm_range', 'fan_enable', 'fan_target', + 'fan_speed', 'pwm_mode', 'fan_pwm']}} + + SensorType = Enum('type', 'SingleParam SingleString SingleStringSelect MinMax MLSS InputLabel InputLabelX MLMS') + _gbcf = 1.0/(1024*1024*1024) + _sensor_details = {GPU_Vendor.AMD: { + 'HWMON': { + 'power': {'type': SensorType.SingleParam, + 'cf': 0.000001, 'sensor': ['power1_average']}, + 'power_cap': {'type': SensorType.SingleParam, + 'cf': 0.000001, 'sensor': ['power1_cap']}, + 'power_cap_range': {'type': SensorType.MinMax, + 'cf': 0.000001, 'sensor': ['power1_cap_min', 'power1_cap_max']}, + 'fan_enable': {'type': SensorType.SingleParam, + 'cf': 1, 'sensor': ['fan1_enable']}, + 'fan_target': {'type': SensorType.SingleParam, + 'cf': 1, 'sensor': ['fan1_target']}, + 'fan_speed': {'type': SensorType.SingleParam, + 'cf': 1, 'sensor': ['fan1_input']}, + 'fan_speed_range': {'type': SensorType.MinMax, + 'cf': 1, 'sensor': ['fan1_min', 'fan1_max']}, + 'pwm_mode': {'type': SensorType.SingleParam, + 'cf': 1, 'sensor': ['pwm1_enable']}, + 'fan_pwm': {'type': SensorType.SingleParam, + 'cf': 0.39216, 'sensor': ['pwm1']}, + 'fan_pwm_range': {'type': SensorType.MinMax, + 'cf': 0.39216, 'sensor': ['pwm1_min', 'pwm1_max']}, + 'temp_crits': {'type': SensorType.InputLabelX, + 'cf': 0.001, 'sensor': ['temp*_crit']}, + 'frequencies': {'type': SensorType.InputLabelX, + 'cf': 0.000001, 'sensor': ['freq*_input']}, + 'voltages': {'type': SensorType.InputLabelX, + 'cf': 1, 'sensor': ['in*_input']}, + 'temperatures': {'type': SensorType.InputLabelX, + 'cf': 0.001, 'sensor': ['temp*_input']}, + 'vddgfx': {'type': SensorType.InputLabelX, + 'cf': 0.001, 'sensor': ['in*_input']}}, 'DEVICE': { - 'id': {'type': 'mt', 'cf': None, - 'sensor': ['vendor', 'device', 'subsystem_vendor', 'subsystem_device']}, - 'unique_id': {'type': 'st', 'cf': None, 'sensor': ['unique_id']}, - 'loading': {'type': 'st', 'cf': None, 'sensor': ['gpu_busy_percent']}, - 'mem_loading': {'type': 'st', 'cf': None, 'sensor': ['mem_busy_percent']}, - 'link_spd': {'type': 'st', 'cf': None, 'sensor': ['current_link_speed']}, - 'link_wth': {'type': 'st', 'cf': None, 'sensor': ['current_link_width']}, - 'sclk_ps': {'type': 'ml', 'cf': None, 'sensor': ['pp_dpm_sclk']}, - 'mclk_ps': {'type': 'ml', 'cf': None, 'sensor': ['pp_dpm_mclk']}, - 'power_dpm_force': {'type': 'st', 'cf': None, + 'id': {'type': SensorType.MLMS, + 'cf': None, 'sensor': ['vendor', 'device', + 'subsystem_vendor', 'subsystem_device']}, + 'unique_id': {'type': SensorType.SingleString, + 'cf': None, 'sensor': ['unique_id']}, + 'loading': {'type': SensorType.SingleParam, + 'cf': 1, 'sensor': ['gpu_busy_percent']}, + 'mem_loading': {'type': SensorType.SingleParam, + 'cf': 1, 'sensor': ['mem_busy_percent']}, + 'mem_vram_total': {'type': SensorType.SingleParam, + 'cf': _gbcf, 'sensor': ['mem_info_vram_total']}, + 'mem_vram_used': {'type': SensorType.SingleParam, + 'cf': _gbcf, 'sensor': ['mem_info_vram_used']}, + 'mem_gtt_total': {'type': SensorType.SingleParam, + 'cf': _gbcf, 'sensor': ['mem_info_gtt_total']}, + 'mem_gtt_used': {'type': SensorType.SingleParam, + 'cf': _gbcf, 'sensor': ['mem_info_gtt_used']}, + 'link_spd': {'type': SensorType.SingleString, + 'cf': None, 'sensor': ['current_link_speed']}, + 'link_wth': {'type': SensorType.SingleString, + 'cf': None, 'sensor': ['current_link_width']}, + 'sclk_ps': {'type': SensorType.MLSS, + 'cf': None, 'sensor': ['pp_dpm_sclk']}, + 'mclk_ps': {'type': SensorType.MLSS, + 'cf': None, 'sensor': ['pp_dpm_mclk']}, + 'power_dpm_force': {'type': SensorType.SingleString, + 'cf': None, 'sensor': ['power_dpm_force_performance_level']}, - 'ppm': {'type': 'st*', 'cf': None, 'sensor': ['pp_power_profile_mode']}, - 'vbios': {'type': 'st', 'cf': None, 'sensor': ['vbios_version']}}}} + 'ppm': {'type': SensorType.SingleStringSelect, + 'cf': None, 'sensor': ['pp_power_profile_mode']}, + 'vbios': {'type': SensorType.SingleString, + 'cf': None, 'sensor': ['vbios_version']}}}, + GPU_Vendor.PCIE: { + 'DEVICE': { + 'id': {'type': SensorType.MLMS, + 'cf': None, 'sensor': ['vendor', 'device', + 'subsystem_vendor', + 'subsystem_device']}}}} + + nv_query_items = {SensorSet.Static: { + 'power_cap': ['power.limit'], + 'power_cap_range': ['power.min_limit', 'power.max_limit'], + 'mem_vram_total': ['memory.total'], + 'frequencies_max': ['clocks.max.gr', 'clocks.max.sm', 'clocks.max.mem'], + 'vbios': ['vbios_version'], + 'compute_mode': ['compute_mode'], + 'driver': ['driver_version'], + 'model': ['name'], + 'serial_number': ['serial'], + 'card_index': ['index'], + 'unique_id': ['gpu_uuid']}, + SensorSet.Dynamic: { + 'power': ['power.draw'], + 'temperatures': ['temperature.gpu', 'temperature.memory'], + 'frequencies': ['clocks.gr', 'clocks.sm', 'clocks.mem', 'clocks.video'], + 'loading': ['utilization.gpu'], + 'mem_loading': ['utilization.memory'], + 'mem_vram_used': ['memory.used'], + 'fan_speed': ['fan.speed'], + 'ppm': ['gom.current'], + 'link_wth': ['pcie.link.width.current'], + 'link_spd': ['pcie.link.gen.current'], + 'pstates': ['pstate']}, + SensorSet.Monitor: { + 'power': ['power.draw'], + 'power_cap': ['power.limit'], + 'temperatures': ['temperature.gpu'], + 'frequencies': ['clocks.gr', 'clocks.mem'], + 'loading': ['utilization.gpu'], + 'mem_loading': ['utilization.memory'], + 'mem_vram_used': ['memory.used'], + 'fan_speed': ['fan.speed'], + 'ppm': ['gom.current'], + 'pstates': ['pstate']}, + SensorSet.All: { + 'power_cap': ['power.limit'], + 'power_cap_range': ['power.min_limit', 'power.max_limit'], + 'mem_vram_total': ['memory.total'], + 'vbios': ['vbios_version'], + 'driver': ['driver_version'], + 'compute_mode': ['compute_mode'], + 'model': ['name'], + 'serial_number': ['serial'], + 'card_index': ['index'], + 'unique_id': ['gpu_uuid'], + 'power': ['power.draw'], + 'temperatures': ['temperature.gpu', 'temperature.memory'], + 'frequencies': ['clocks.gr', 'clocks.sm', 'clocks.mem', 'clocks.video'], + 'frequencies_max': ['clocks.max.gr', 'clocks.max.sm', 'clocks.max.mem'], + 'loading': ['utilization.gpu'], + 'mem_loading': ['utilization.memory'], + 'mem_vram_used': ['memory.used'], + 'fan_speed': ['fan.speed'], + 'ppm': ['gom.current'], + 'link_wth': ['pcie.link.width.current'], + 'link_spd': ['pcie.link.gen.current'], + 'pstates': ['pstate']}} - def __repr__(self): + def __repr__(self) -> Dict[str, any]: """ Return dictionary representing all parts of the GpuItem object. - :return: - :rtype: dict + + :return: Dictionary of core GPU parameters. """ return {'params': self.prm, 'clinfo': self.clinfo, 'sclk_state': self.sclk_state, 'mclk_state': self.mclk_state, 'vddc_curve': self.vddc_curve, 'vddc_curve_range': self.vddc_curve_range, 'ppm_modes': self.ppm_modes} - def __str__(self): + def __str__(self) -> str: """ - Return simple string representing the GpuItem object. - :return: - :rtype: str + Return simple string representing the GpuItem object. + + :return: GPU_item informational string """ return 'GPU_Item: uuid={}'.format(self.prm.uuid) - def __init__(self, item_id): + def __init__(self, item_id: str): """ Initialize GpuItem object. + :param item_id: UUID of the new item. - :type item_id: str """ time_0 = env.GUT_CONST.now(env.GUT_CONST.USELTZ) + self.validated_sensors = False self.energy = {'t0': time_0, 'tn': time_0, 'cumulative': 0.0} self.read_disabled = [] # List of parameters that failed during read. self.write_disabled = [] # List of parameters that failed during write. self.prm = ObjDict({'uuid': item_id, 'unique_id': '', - 'card_num': '', + 'card_num': None, 'pcie_id': '', 'driver': '', - 'vendor': '', + 'vendor': self.GPU_Vendor.Undefined, 'readable': False, 'writable': False, 'compute': False, 'compute_platform': None, - 'gpu_type': 0, + 'compute_mode': None, + 'gpu_type': self.GPU_Type.Undefined, 'id': {'vendor': '', 'device': '', 'subsystem_vendor': '', 'subsystem_device': ''}, 'model_device_decode': 'UNDETERMINED', 'model': '', - 'model_short': '', 'model_display': '', + 'serial_number': '', + 'card_index': '', 'card_path': '', + 'sys_card_path': '', 'hwmon_path': '', 'energy': 0.0, 'power': None, @@ -229,15 +435,22 @@ 'fan_speed_range': [None, None], 'fan_pwm_range': [None, None], 'fan_target': None, - 'temp': None, - 'temp_crit': None, + 'temp_crits': None, 'vddgfx': None, 'vddc_range': ['', ''], 'temperatures': None, 'voltages': None, 'frequencies': None, + 'frequencies_max': None, 'loading': None, 'mem_loading': None, + 'mem_vram_total': None, + 'mem_vram_used': None, + 'mem_vram_usage': None, + 'mem_gtt_total': None, + 'mem_gtt_used': None, + 'mem_gtt_usage': None, + 'pstate': None, 'mclk_ps': ['', ''], 'mclk_f_range': ['', ''], 'mclk_mask': '', @@ -248,7 +461,6 @@ 'link_wth': '', 'ppm': '', 'power_dpm_force': '', - # auto, low, high, manual, profile_standard, profile_min_sclk, profile_min_mclk, profile_peak 'vbios': ''}) self.clinfo = ObjDict({'device_name': '', 'device_version': '', @@ -272,15 +484,69 @@ self.vddc_curve = {} # {'1': ['Mhz', 'mV']} self.vddc_curve_range = {} # {'1': {SCLK: ['val1', 'val2'], VOLT: ['val1', 'val2']} self.ppm_modes = {} # {'1': ['Name', 'Description']} + self.finalize_fan_option() + + @classmethod + def finalize_fan_option(cls) -> None: + """ + Finalize class variables of gpu parameters based on command line options. This must be + done after setting of env. Doing at at the instantiation of a GpuItem assures that. + """ + if cls._finalized: + return + cls.finalized = True + if not env.GUT_CONST.show_fans: + for fan_item in cls._fan_item_list: + # Remove fan params from GPU_Param_Labels + if fan_item in cls._GPU_Param_Labels.keys(): + del cls._GPU_Param_Labels[fan_item] + # Remove fan params from Table_Param_Labels + if fan_item in cls._table_param_labels.keys(): + del cls._table_param_labels[fan_item] + # Remove fan params from SensorSets + if fan_item in cls.sensor_sets[cls.SensorSet.Static]['HWMON']: + cls.sensor_sets[cls.SensorSet.Static]['HWMON'].remove(fan_item) + if fan_item in cls.sensor_sets[cls.SensorSet.Dynamic]['HWMON']: + cls.sensor_sets[cls.SensorSet.Dynamic]['HWMON'].remove(fan_item) + if fan_item in cls.sensor_sets[cls.SensorSet.Monitor]['HWMON']: + cls.sensor_sets[cls.SensorSet.Monitor]['HWMON'].remove(fan_item) + if fan_item in cls.sensor_sets[cls.SensorSet.All]['HWMON']: + cls.sensor_sets[cls.SensorSet.All]['HWMON'].remove(fan_item) + # Remove fan params from table param list + if fan_item in cls.table_parameters: + cls.table_parameters.remove(fan_item) + + @classmethod + def is_apu(cls, name: str) -> bool: + """ + Check if given GPU name is an APU. + + :param name: Target GPU name + :return: True if name matches APU name + """ + for apu_name in cls._apu_gpus: + if name in apu_name: + return True + return False + + @classmethod + def get_button_label(cls, name: str) -> str: + """ + Return button label for given parameter name. + + :param name: Parameter name + :return: Button label + """ + if name not in cls._button_labels.keys(): + raise KeyError('{} not in button_label dict'.format(name)) + return cls._button_labels[name] - def set_params_value(self, name, value): + def set_params_value(self, name: str, value: Union[int, float, str, list, None]) -> None: """ Set parameter value for give name. + :param name: Parameter name - :type name: str :param value: parameter value - :type value: Union[int, str, list] - :return: None """ if isinstance(value, tuple): self.prm[name] = list(value) @@ -290,64 +556,156 @@ elif value == 1: self.prm[name][1] = 'Manual' else: self.prm[name][1] = 'Dynamic' elif name == 'ppm': - self.prm[name] = re.sub(r'[*].*', '', value).strip() - self.prm[name] = re.sub(r'[ ]+', '-', self.prm[name]) + self.prm[name] = re.sub(PATTERNS['PPM_CHK'], '', value).strip() + self.prm[name] = re.sub(PATTERNS['PPM_NOTCHK'], '-', self.prm[name]) elif name == 'power': time_n = env.GUT_CONST.now(env.GUT_CONST.USELTZ) - self.prm[name] = round(value, 1) + self.prm[name] = value delta_hrs = ((time_n - self.energy['tn']).total_seconds()) / 3600 self.energy['tn'] = time_n self.energy['cumulative'] += delta_hrs * value / 1000 self.prm['energy'] = round(self.energy['cumulative'], 6) elif name == 'sclk_ps': mask = '' - for ps in value: + for ps_val in value: if not mask: - mask = ps.split(':')[0].strip() + mask = ps_val.split(':')[0].strip() else: - mask += ',' + ps.split(':')[0].strip() - sclk_ps = ps.strip('*').strip().split(': ') + mask += ',' + ps_val.split(':')[0].strip() + sclk_ps = ps_val.strip('*').strip().split(': ') self.sclk_dpm_state.update({int(sclk_ps[0]): sclk_ps[1]}) - if re.search(r'\*', ps): + if '*' in ps_val: self.prm.sclk_ps[0] = int(sclk_ps[0]) self.prm.sclk_ps[1] = sclk_ps[1] self.prm.sclk_mask = mask - if env.GUT_CONST.DEBUG: print('mask: [{}], ps: [{}, {}]'.format(mask, *self.prm.sclk_ps)) + LOGGER.debug('Mask: [%s], ps: [%s, %s]', mask, self.prm.sclk_ps[0], self.prm.sclk_ps[1]) elif name == 'mclk_ps': mask = '' - for ps in value: + for ps_val in value: if not mask: - mask = ps.split(':')[0].strip() + mask = ps_val.split(':')[0].strip() else: - mask += ',' + ps.split(':')[0].strip() - mclk_ps = ps.strip('*').strip().split(': ') + mask += ',' + ps_val.split(':')[0].strip() + mclk_ps = ps_val.strip('*').strip().split(': ') self.mclk_dpm_state.update({int(mclk_ps[0]): mclk_ps[1]}) - if re.search(r'\*', ps): + if '*' in ps_val: self.prm.mclk_ps[0] = int(mclk_ps[0]) self.prm.mclk_ps[1] = mclk_ps[1] self.prm.mclk_mask = mask - if env.GUT_CONST.DEBUG: print('mask: [{}], ps: [{}, {}]'.format(mask, *self.prm.mclk_ps)) + LOGGER.debug('Mask: [%s], ps: [%s, %s]', mask, self.prm.mclk_ps[0], self.prm.mclk_ps[1]) elif name == 'fan_pwm': - self.prm.fan_pwm = int(value) + if isinstance(value, int): + self.prm.fan_pwm = value + elif isinstance(value, float): + self.prm.fan_pwm = int(value) + elif isinstance(value, str): + self.prm.fan_pwm = int(value) if value.isnumeric() else None + else: + self.prm.fan_pwm = None + elif re.fullmatch(PATTERNS['GPUMEMTYPE'], name): + self.prm[name] = value + self.set_memory_usage() elif name == 'id': self.prm.id = dict(zip(['vendor', 'device', 'subsystem_vendor', 'subsystem_device'], list(value))) - self.prm.model_device_decode = self.read_pciid_model() - if (self.prm.model_device_decode != 'UNDETERMINED' and - len(self.prm.model_device_decode) < 1.2*len(self.prm.model_short)): - self.prm.model_display = self.prm.model_device_decode + self.prm.model_display = self.prm.model_device_decode = self.read_pciid_model() else: self.prm[name] = value - def read_pciid_model(self): + def get_params_value(self, name: str, num_as_int: bool = False) -> Union[int, float, str, list, None]: + """ + Get parameter value for give name. + + :param name: Parameter name + :param num_as_int: Convert float to in if True + :return: Parameter value + """ + # Parameters with '_val' as a suffix are derived from a direct source. + if re.fullmatch(PATTERNS['VAL_ITEM'], name): + if self.prm.gpu_type == self.GPU_Type.Legacy: + return None + if name == 'temp_val': + if not self.prm['temperatures']: + return None + if 'edge' in self.prm['temperatures'].keys(): + if num_as_int: + return int(self.prm['temperatures']['edge']) + return round(self.prm['temperatures']['edge'], 1) + if 'temperature.gpu' in self.prm['temperatures'].keys(): + if num_as_int: + return int(self.prm['temperatures']['temperature.gpu']) + return round(self.prm['temperatures']['temperature.gpu'], 1) + if self.prm['temperatures'].keys(): + return list(self.prm['temperatures'].keys())[0] + return None + if name == 'vddgfx_val': + if not self.prm['voltages']: + return np_nan + if 'vddgfx' not in self.prm['voltages']: + return np_nan + return int(self.prm['voltages']['vddgfx']) + if name == 'sclk_ps_val': + return self.prm['sclk_ps'][0] + if name == 'sclk_f_val': + if not self.prm['frequencies']: + return None + if 'sclk' in self.prm['frequencies'].keys(): + return int(self.prm['frequencies']['sclk']) + if 'clocks.gr' in self.prm['frequencies'].keys(): + return int(self.prm['frequencies']['clocks.gr']) + return self.prm['sclk_ps'][1] + if name == 'mclk_ps_val': + return self.prm['mclk_ps'][0] + if name == 'mclk_f_val': + if not self.prm['frequencies']: + return None + if 'mclk' in self.prm['frequencies'].keys(): + return int(self.prm['frequencies']['mclk']) + if 'clocks.mem' in self.prm['frequencies'].keys(): + return int(self.prm['frequencies']['clocks.mem']) + return self.prm['mclk_ps'][1] + + # Set type for params that could be float or int + if name in ['fan_pwm', 'fan_speed', 'power_cap', 'power']: + if num_as_int: + if isinstance(self.prm[name], int): + return self.prm[name] + if isinstance(self.prm[name], float): + return int(self.prm[name]) + if isinstance(self.prm[name], str): + return int(self.prm[name]) if self.prm[name].isnumeric() else None + return self.prm[name] + return self.prm[name] + + def set_memory_usage(self) -> None: + """ + Set system and vram memory usage percentage. + + :return: A tuple of the system and vram memory usage percentage. + """ + if self.prm.mem_gtt_used is None or self.prm.mem_gtt_total is None: + self.prm.mem_gtt_usage = None + else: + self.prm.mem_gtt_usage = 100.0 * self.prm.mem_gtt_used / self.prm.mem_gtt_total + + if self.prm.mem_vram_used is None or self.prm.mem_vram_total is None: + self.prm.mem_vram_usage = None + else: + self.prm.mem_vram_usage = 100.0 * self.prm.mem_vram_used / self.prm.mem_vram_total + + def read_pciid_model(self) -> str: """ Read the model name from the system pcid.ids file + :return: GPU model name - :rtype: str """ + LOGGER.debug('Logger active in module') + if not env.GUT_CONST.sys_pciid: + print('Error: Can not access system pci.ids file [{}]'.format(env.GUT_CONST.sys_pciid)) + return '' if not os.path.isfile(env.GUT_CONST.sys_pciid): print('Error: Can not access system pci.ids file [{}]'.format(env.GUT_CONST.sys_pciid)) - return None - with open(env.GUT_CONST.sys_pciid, 'r') as pci_id_file_ptr: + return '' + with open(env.GUT_CONST.sys_pciid, 'r', encoding='utf8') as pci_id_file_ptr: model_str = '' level = 0 for line_item in pci_id_file_ptr: @@ -357,321 +715,266 @@ if line[0] == '#': continue if level == 0: - if re.fullmatch(r'^[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F].*', line): + if re.fullmatch(PATTERNS['PCIIID_L0'], line): if line[:4] == self.prm.id['vendor'].replace('0x', ''): level += 1 continue elif level == 1: - if re.fullmatch(r'^[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F].*', line): + if re.fullmatch(PATTERNS['PCIIID_L0'], line): break - if re.fullmatch(r'^\t[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F].*', line): + if re.fullmatch(PATTERNS['PCIIID_L1'], line): if line[1:5] == self.prm.id['device'].replace('0x', ''): model_str = line[5:] level += 1 continue elif level == 2: - if re.fullmatch(r'^[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F].*', line): + if re.fullmatch(PATTERNS['PCIIID_L0'], line): break - if re.fullmatch(r'^\t[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F].*', line): + if re.fullmatch(PATTERNS['PCIIID_L1'], line): break - if re.fullmatch(r'^\t\t[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F].*', line): + if re.fullmatch(PATTERNS['PCIIID_L2'], line): if line[2:6] == self.prm.id['subsystem_vendor'].replace('0x', ''): if line[7:11] == self.prm.id['subsystem_device'].replace('0x', ''): model_str = line[11:] break return model_str.strip() - - def get_params_value(self, name): - """ - Get parameter value for give name. - :param name: Parameter name - :type name: str - :return: Parameter value - :rtype: Union[int, str, list] + + def populate_prm_from_dict(self, params: Dict[str, any]) -> None: """ - if re.fullmatch(r'.*_val', name): - if name == 'temp_val': - if 'edge' in self.prm['temperatures'].keys(): - return round(self.prm['temperatures']['edge'], 1) - return self.prm['temperatures'].keys()[0] - if name == 'vddgfx_val': - return int(self.prm['voltages']['vddgfx']) - if name == 'sclk_ps_val': - return self.prm['sclk_ps'][0] - if name == 'sclk_f_val': - if 'sclk' in self.prm['frequencies'].keys(): - return int(self.prm['frequencies']['sclk']) - return self.prm['sclk_ps'][1] - if name == 'mclk_ps_val': - return self.prm['mclk_ps'][0] - if name == 'mclk_f_val': - if 'mclk' in self.prm['frequencies'].keys(): - return int(self.prm['frequencies']['mclk']) - return self.prm['mclk_ps'][1] - return self.prm[name] + Populate elements of a GpuItem with items from a dict with keys that align to elements of GpuItem. - def populate(self, pcie_id, gpu_name, short_gpu_name, vendor, driver_module, card_path, hwmon_path, - readable, writable, compute, ocl_ver): + :param params: A dictionary of parameters with keys that align to GpuItem elements. """ - Populate elements of a GpuItem. - :param pcie_id: The pcid ID of the GPU. - :type pcie_id: str - :param gpu_name: Model name of the GPU - :type gpu_name: str - :param short_gpu_name: Short Model name of the GPU - :type short_gpu_name: str - :param vendor: The make of the GPU (AMD, NVIDIA, ...) - :type vendor: str - :param driver_module: The name of the driver. - :type driver_module: str - :param card_path: The path to the GPU. - :type card_path: str - :param hwmon_path: Path to the hardware monitor files. - :type hwmon_path: str - :param readable: readable compatibility flag - :type readable: bool - :param writable: writable compatibility flag - :type writable: bool - :param compute: Compute compatibility flag - :type compute: bool - :param ocl_ver: Compute platform Name - :type ocl_ver: str - :return: None - :rtype: None - """ - self.prm.pcie_id = pcie_id - self.prm.model = gpu_name - self.prm.model_short = short_gpu_name - self.prm.vendor = vendor - self.prm.driver = driver_module - self.prm.card_path = card_path - self.prm.card_num = int(card_path.replace('{}card'.format(env.GUT_CONST.card_root), '').replace('/device', '')) - self.prm.hwmon_path = hwmon_path - self.prm.readable = readable - self.prm.writable = writable - self.prm.compute = compute - self.prm.compute_platform = ocl_ver if compute else 'None' + LOGGER.debug('prm dict:\n%s', params) + set_ocl_ver = None + for source_name, source_value in params.items(): + # Set primary parameter + if source_name not in self.prm.keys(): + raise KeyError('Populate dict contains unmatched key: {}'.format(source_name)) + self.prm[source_name] = source_value + + # Set secondary parameters + if source_name == 'card_path' and source_value: + card_num_str = source_value.replace('{}card'.format(env.GUT_CONST.card_root), '').replace('/device', '') + self.prm.card_num = int(card_num_str) if card_num_str.isnumeric() else None + elif source_name == 'compute_platform': + set_ocl_ver = source_value + elif source_name == 'gpu_type' and source_value: + self.prm.gpu_type = source_value + if source_value == GpuItem.GPU_Type.Legacy: + self.read_disabled = GpuItem.LEGACY_Skip_List[:] + elif source_value == GpuItem.GPU_Type.APU: + self.read_disabled = GpuItem._fan_item_list[:] + + # compute platform requires that the compute bool be set first + if set_ocl_ver: + self.prm.compute_platform = set_ocl_ver if self.prm.compute else 'None' - def populate_ocl(self, ocl_dict): + def populate_ocl(self, ocl_dict: dict) -> None: """ - Populate ocl parameters in GpuItem + Populate ocl parameters in GpuItem. + :param ocl_dict: Dictionary of parameters for specific pcie_id - :type ocl_dict: dict - :return: None """ - for k, v in ocl_dict.items(): - if k in self.clinfo.keys(): - self.set_clinfo_value(k, v) + for ocl_name, ocl_val in ocl_dict.items(): + if ocl_name in self.clinfo.keys(): + self.set_clinfo_value(ocl_name, ocl_val) - def set_clinfo_value(self, name, value): + def set_clinfo_value(self, name: str, value: Union[int, str, list]) -> None: """ Set clinfo values in GPU item dictionary. + :param name: clinfo parameter name - :type name: str :param value: parameter value - :type value: Union[int, str, list] - :return: None - :rtype: None """ self.clinfo[name] = value - def get_clinfo_value(self, name): + def get_clinfo_value(self, name: str) -> Union[int, str, list]: """ Get clinfo parameter value for give name. + :param name: clinfo Parameter name - :type name: str :return: clinfo Parameter value - :rtype: Union[int, str, list] .. note: Maybe not needed """ return self.clinfo[name] - def get_nc_params_list(self): + def get_nc_params_list(self) -> List[str]: """ - Get list of parameter names for use with non-readable cards. + Get list of parameter names for use with non-compatible cards. + :return: List of parameter names - :rtype: list """ return self._GPU_NC_Param_List - def is_valid_power_cap(self, power_cap): + def is_valid_power_cap(self, power_cap: int) -> bool: """ Check if a given power_cap value is valid. + :param power_cap: Target power cap value to be tested. - :type power_cap: int :return: True if valid - :rtype: bool """ power_cap_range = self.prm.power_cap_range if power_cap_range[0] <= power_cap <= power_cap_range[1]: return True - elif power_cap < 0: + if power_cap < 0: # negative values will be interpreted as reset request return True return False - def is_valid_fan_pwm(self, pwm_value): + def is_valid_fan_pwm(self, pwm_value: int) -> bool: """ Check if a given fan_pwm value is valid. + :param pwm_value: Target fan_pwm value to be tested. - :type pwm_value: int :return: True if valid - :rtype: bool """ pwm_range = self.prm.fan_pwm_range if pwm_range[0] <= pwm_value <= pwm_range[1]: return True - elif pwm_value < 0: + if pwm_value < 0: # negative values will be interpreted as reset request return True return False - def is_valid_mclk_pstate(self, pstate): + def is_valid_mclk_pstate(self, pstate: List[int]) -> bool: """ Check if given mclk pstate value is valid. - .. note:: pstate = [pstate_number, clk_value, vddc_value] - :param pstate: - :type pstate: list[int] + + :param pstate: pstate = [pstate_number, clk_value, vddc_value] :return: Return True if valid - :rtype: bool """ mclk_range = self.prm.mclk_f_range - mclk_min = int(re.sub(r'[a-z,A-Z]*', '', str(mclk_range[0]))) - mclk_max = int(re.sub(r'[a-z,A-Z]*', '', str(mclk_range[1]))) + mclk_min = int(re.sub(PATTERNS['END_IN_ALPHA'], '', str(mclk_range[0]))) + mclk_max = int(re.sub(PATTERNS['END_IN_ALPHA'], '', str(mclk_range[1]))) if pstate[1] < mclk_min or pstate[1] > mclk_max: return False - if self.prm.gpu_type != 2: + if self.prm.gpu_type in [self.GPU_Type.PStatesNE, self.GPU_Type.PStates]: vddc_range = self.prm.vddc_range - vddc_min = int(re.sub(r'[a-z,A-Z]*', '', str(vddc_range[0]))) - vddc_max = int(re.sub(r'[a-z,A-Z]*', '', str(vddc_range[1]))) + vddc_min = int(re.sub(PATTERNS['END_IN_ALPHA'], '', str(vddc_range[0]))) + vddc_max = int(re.sub(PATTERNS['END_IN_ALPHA'], '', str(vddc_range[1]))) if pstate[2] < vddc_min or pstate[2] > vddc_max: return False return True - def is_valid_sclk_pstate(self, pstate): + def is_valid_sclk_pstate(self, pstate: List[int]) -> bool: """ Check if given sclk pstate value is valid. - pstate = [pstate_number, clk_value, vddc_value] - :param pstate: - :type pstate: list[int] + + :param pstate: pstate = [pstate_number, clk_value, vddc_value] :return: Return True if valid - :rtype: bool """ sclk_range = self.prm.sclk_f_range - sclk_min = int(re.sub(r'[a-z,A-Z]*', '', str(sclk_range[0]))) - sclk_max = int(re.sub(r'[a-z,A-Z]*', '', str(sclk_range[1]))) + sclk_min = int(re.sub(PATTERNS['END_IN_ALPHA'], '', str(sclk_range[0]))) + sclk_max = int(re.sub(PATTERNS['END_IN_ALPHA'], '', str(sclk_range[1]))) if pstate[1] < sclk_min or pstate[1] > sclk_max: return False - if self.prm.gpu_type != 2: + if self.prm.gpu_type in [self.GPU_Type.PStatesNE, self.GPU_Type.PStates]: vddc_range = self.prm.vddc_range - vddc_min = int(re.sub(r'[a-z,A-Z]*', '', str(vddc_range[0]))) - vddc_max = int(re.sub(r'[a-z,A-Z]*', '', str(vddc_range[1]))) + vddc_min = int(re.sub(PATTERNS['END_IN_ALPHA'], '', str(vddc_range[0]))) + vddc_max = int(re.sub(PATTERNS['END_IN_ALPHA'], '', str(vddc_range[1]))) if pstate[2] < vddc_min or pstate[2] > vddc_max: return False return True - def is_changed_sclk_pstate(self, pstate): + def is_changed_sclk_pstate(self, pstate: List[int]) -> bool: """ Check if given sclk pstate value different from current. - pstate = [pstate_number, clk_value, vddc_value] - :param pstate: - :type pstate: list[int] + + :param pstate: pstate = [pstate_number, clk_value, vddc_value] :return: Return True if changed - :rtype: bool """ - if int(re.sub(r'[a-z,A-Z]*', '', self.sclk_state[pstate[0]][0])) != pstate[1]: + if int(re.sub(PATTERNS['END_IN_ALPHA'], '', self.sclk_state[pstate[0]][0])) != pstate[1]: return True - if self.prm.gpu_type != 2: - if int(re.sub(r'[a-z,A-Z]*', '', self.sclk_state[pstate[0]][1])) != pstate[2]: + if self.prm.gpu_type in [self.GPU_Type.PStatesNE, self.GPU_Type.PStates]: + if int(re.sub(PATTERNS['END_IN_ALPHA'], '', self.sclk_state[pstate[0]][1])) != pstate[2]: return True return False - def is_changed_mclk_pstate(self, pstate): + def is_changed_mclk_pstate(self, pstate: List[int]) -> bool: """ Check if given mclk pstate value different from current. - pstate = [pstate_number, clk_value, vddc_value] - :param pstate: - :type pstate: list[int] + + :param pstate: pstate = [pstate_number, clk_value, vddc_value] :return: Return True if changed - :rtype: bool """ - if int(re.sub(r'[a-z,A-Z]*', '', self.mclk_state[pstate[0]][0])) != pstate[1]: + if int(re.sub(PATTERNS['END_IN_ALPHA'], '', self.mclk_state[pstate[0]][0])) != pstate[1]: return True - if self.prm.gpu_type != 2: - if int(re.sub(r'[a-z,A-Z]*', '', self.mclk_state[pstate[0]][1])) != pstate[2]: + if self.prm.gpu_type in [self.GPU_Type.PStatesNE, self.GPU_Type.PStates]: + if int(re.sub(PATTERNS['END_IN_ALPHA'], '', self.mclk_state[pstate[0]][1])) != pstate[2]: return True return False - def is_changed_vddc_curve_pt(self, pstate): + def is_changed_vddc_curve_pt(self, pstate: List[int]) -> bool: """ Check if given vddc curve point value different from current. - curve_point = [point_number, clk_value, vddc_value] - :param pstate: - :type pstate: list[int] + + :param pstate: curve_point = [point_number, clk_value, vddc_value] :return: Return True if changed - :rtype: bool """ - if int(re.sub(r'[a-z,A-Z]*', '', self.vddc_curve[pstate[0]][0])) != pstate[1]: + if int(re.sub(PATTERNS['END_IN_ALPHA'], '', self.vddc_curve[pstate[0]][0])) != pstate[1]: return True - if int(re.sub(r'[a-z,A-Z]*', '', self.vddc_curve[pstate[0]][1])) != pstate[2]: + if int(re.sub(PATTERNS['END_IN_ALPHA'], '', self.vddc_curve[pstate[0]][1])) != pstate[2]: return True return False - def is_valid_vddc_curve_pts(self, curve_pts): + def is_valid_vddc_curve_pts(self, curve_pts: List[int]) -> bool: """ Check if given sclk pstate value is valid. - curve_point = [point_number, clk_value, vddc_value] - :param curve_pts: - :type curve_pts: list[int] + + :param curve_pts: curve_point = [point_number, clk_value, vddc_value] :return: Return True if valid - :rtype: bool """ - sclk_min = int(re.sub(r'[a-z,A-Z]*', '', str(self.vddc_curve_range[str(curve_pts[0])]['SCLK'][0]))) - sclk_max = int(re.sub(r'[a-z,A-Z]*', '', str(self.vddc_curve_range[str(curve_pts[0])]['SCLK'][1]))) + sclk_min = int(re.sub(PATTERNS['END_IN_ALPHA'], '', str(self.vddc_curve_range[str(curve_pts[0])]['SCLK'][0]))) + sclk_max = int(re.sub(PATTERNS['END_IN_ALPHA'], '', str(self.vddc_curve_range[str(curve_pts[0])]['SCLK'][1]))) if curve_pts[1] < sclk_min or curve_pts[1] > sclk_max: return False - vddc_min = int(re.sub(r'[a-z,A-Z]*', '', str('650mV'))) - vddc_max = int(re.sub(r'[a-z,A-Z]*', '', str(self.vddc_curve_range[str(curve_pts[0])]['VOLT'][1]))) + vddc_min = int(re.sub(PATTERNS['END_IN_ALPHA'], '', str('650mV'))) + vddc_max = int(re.sub(PATTERNS['END_IN_ALPHA'], '', str(self.vddc_curve_range[str(curve_pts[0])]['VOLT'][1]))) if curve_pts[2] < vddc_min or curve_pts[2] > vddc_max: return False return True - def is_valid_pstate_list_str(self, ps_str, clk_name): + def is_valid_pstate_list_str(self, ps_str: str, clk_name: str) -> bool: """ - Check if the given p-states are valid for the given clock. + Check if the given p-states are valid for the given clock. + :param ps_str: String of comma separated pstate numbers - :type ps_str: str :param clk_name: The target clock name - :type clk_name: str :return: True if valid - :rtype: bool """ if ps_str == '': return True + if not re.fullmatch(PATTERNS['VALID_PS_STR'], ps_str): + return False ps_list = self.prm.mclk_mask.split(',') if clk_name == 'MCLK' else self.prm.sclk_mask.split(',') - for ps in ps_str.split(): - if ps not in ps_list: + for ps_val in ps_str.split(): + if ps_val not in ps_list: return False return True - def get_current_ppm_mode(self): + def get_current_ppm_mode(self) -> Union[None, List[Union[int, str]]]: """ Read GPU ppm definitions and current settings from driver files. :return: ppm state :rtype: list """ + if self.prm.vendor != GpuItem.GPU_Vendor.AMD: + return None if self.prm.power_dpm_force.lower() == 'auto': return [-1, 'AUTO'] ppm_item = self.prm.ppm.split('-') return [int(ppm_item[0]), ppm_item[1]] - def read_gpu_ppm_table(self): + def read_gpu_ppm_table(self) -> None: """ Read the ppm table. - :return: None """ - if not self.prm.readable: + if self.prm.vendor != GpuItem.GPU_Vendor.AMD: + return + if not self.prm.readable or self.prm.gpu_type in [GpuItem.GPU_Type.Legacy, GpuItem.GPU_Type.Unsupported]: return + file_path = os.path.join(self.prm.card_path, 'pp_power_profile_mode') if not os.path.isfile(file_path): print('Error getting power profile modes: {}'.format(file_path), file=sys.stderr) @@ -683,32 +986,35 @@ if re.fullmatch(r'[ ]+[0-9].*', line[0:3]): linestr = re.sub(r'[ ]*[*]*:', ' ', linestr) line_items = linestr.split() - if env.GUT_CONST.DEBUG: print('Debug: ppm line: {}'.format(linestr), file=sys.stderr) + LOGGER.debug('PPM line: %s', linestr) if len(line_items) < 2: print('Error: invalid ppm: {}'.format(linestr), file=sys.stderr) continue - if env.GUT_CONST.DEBUG: print('Debug: valid ppm: {}'.format(linestr), file=sys.stderr) + LOGGER.debug('Valid ppm line: %s', linestr) self.ppm_modes[line_items[0]] = line_items[1:] self.ppm_modes['-1'] = ['AUTO', 'Auto'] - rdata = self.read_gpu_sensor('power_dpm_force', vendor='AMD', sensor_type='DEVICE') + rdata = self.read_gpu_sensor('power_dpm_force', vendor=GpuItem.GPU_Vendor.AMD, sensor_type='DEVICE') if rdata is False: print('Error: card file does not exist: {}'.format(file_path), file=sys.stderr) + LOGGER.debug('Card file does not exist: %s', file_path) self.prm.readable = False else: self.set_params_value('power_dpm_force', rdata) - def read_gpu_pstates(self): + def read_gpu_pstates(self) -> None: """ Read GPU pstate definitions and parameter ranges from driver files. Set card type based on pstate configuration - :return: None """ - if not self.prm.readable: + if self.prm.vendor != GpuItem.GPU_Vendor.AMD: + return + if not self.prm.readable or self.prm.gpu_type in [GpuItem.GPU_Type.Legacy, + GpuItem.GPU_Type.Unsupported, + GpuItem.GPU_Type.APU]: return - range_mode = False - type_unknown = True + range_mode = False file_path = os.path.join(self.prm.card_path, 'pp_od_clk_voltage') if not os.path.isfile(file_path): print('Error getting p-states: {}'.format(file_path), file=sys.stderr) @@ -726,28 +1032,28 @@ clk_name = '' range_mode = True continue - lineitems = line.split() + line = re.sub(r'@', ' ', line) + lineitems: List[any] = line.split() lineitems_len = len(lineitems) - if type_unknown: + if self.prm.gpu_type in [self.GPU_Type.Undefined, self.GPU_Type.Supported]: if len(lineitems) == 3: - # type 1 GPU - self.prm.gpu_type = 1 + self.prm.gpu_type = self.GPU_Type.PStates elif len(lineitems) == 2: - self.prm.gpu_type = 2 - type_unknown = False - if lineitems_len < 2 or lineitems_len > 3: - print('Error: Invalid pstate entry: {}pp_od_clk_voltage'.format(self.prm.card_path), - file=sys.stderr) - continue + self.prm.gpu_type = self.GPU_Type.CurvePts + else: + print('Error: Invalid pstate entry length {} for{}: '.format(lineitems_len, + os.path.join(self.prm.card_path, 'pp_od_clk_voltage')), file=sys.stderr) + LOGGER.debug('Invalid line length for pstate line item: %s', line) + continue if not range_mode: lineitems[0] = int(re.sub(':', '', lineitems[0])) - if self.prm.gpu_type == 0 or self.prm.gpu_type == 1: + if self.prm.gpu_type in [self.GPU_Type.PStatesNE, self.GPU_Type.PStates]: if clk_name == 'OD_SCLK:': self.sclk_state[lineitems[0]] = [lineitems[1], lineitems[2]] elif clk_name == 'OD_MCLK:': self.mclk_state[lineitems[0]] = [lineitems[1], lineitems[2]] else: - # Type 2 + # Type GPU_Type.CurvePts if clk_name == 'OD_SCLK:': self.sclk_state[lineitems[0]] = [lineitems[1], '-'] elif clk_name == 'OD_MCLK:': @@ -767,10 +1073,8 @@ index = re.sub(r'\].*', '', index) param = re.sub(r'VDDC_CURVE_', '', lineitems[0]) param = re.sub(r'\[[0-9]\]:', '', param) - if env.GUT_CONST.DEBUG: - print('Curve: index: {} param: {}, val1 {}, val2: {}'.format(index, param, - lineitems[1], - lineitems[2])) + LOGGER.debug('Curve: index: %s param: %s, val1 %s, val2: %s', + index, param, lineitems[1], lineitems[2]) if index in self.vddc_curve_range.keys(): self.vddc_curve_range[index].update({param: [lineitems[1], lineitems[2]]}) else: @@ -779,20 +1083,62 @@ else: print('Error: Invalid CURVE entry: {}'.format(file_path), file=sys.stderr) - def read_gpu_sensor(self, parameter, vendor='AMD', sensor_type='HWMON'): + def read_gpu_sensor(self, parameter: str, vendor: GpuEnum = GPU_Vendor.AMD, + sensor_type: str = 'HWMON') -> Union[None, bool, int, str, tuple, list, dict]: """ Read sensor for the given parameter name. Process per sensor_details dict using the specified vendor name and sensor_type. + :param parameter: GpuItem parameter name (AMD) - :type parameter: str - :param vendor: GPU vendor name - :type vendor: str + :param vendor: GPU vendor name enum object :param sensor_type: GPU sensor name (HWMON or DEVICE) - :type sensor_type: str - :return: + :return: Value from reading sensor. """ - if vendor not in self._sensor_details.keys(): - print('Error: Invalid vendor [{}]'.format(vendor)) + if vendor in [self.GPU_Vendor.AMD, self.GPU_Vendor.PCIE]: + return self.read_gpu_sensor_generic(parameter, vendor, sensor_type) + if vendor == self.GPU_Vendor.NVIDIA: + return self.read_gpu_sensor_nv(parameter) + print('Error: Invalid vendor [{}]'.format(vendor)) + return None + + def read_gpu_sensor_nv(self, parameter: str) -> Union[None, bool, int, str, tuple, list, dict]: + """ + Function to read a single sensor from NV GPU. + + :param parameter: Target parameter for reading + :return: read results + """ + if parameter in self.read_disabled: + return False + cmd_str = '{} -i {} --query-gpu={} --format=csv,noheader,nounits'.format( + env.GUT_CONST.cmd_nvidia_smi, self.prm.pcie_id, parameter) + LOGGER.debug('NV command:\n%s', cmd_str) + nsmi_item = None + try: + nsmi_item = subprocess.check_output(shlex.split(cmd_str), shell=False).decode().split('\n') + LOGGER.debug('NV raw query response: [%s]', nsmi_item) + except (subprocess.CalledProcessError, OSError) as except_err: + LOGGER.debug('NV query %s error: [%s]', nsmi_item, except_err) + self.read_disabled.append(parameter) + return False + return_item = nsmi_item[0].strip() if nsmi_item else None + LOGGER.debug('NV query result: [%s]', return_item) + return return_item + + def read_gpu_sensor_generic(self, parameter: str, vendor: GpuEnum = GPU_Vendor.AMD, + sensor_type: str = 'HWMON') -> Union[None, bool, int, str, tuple, list, dict]: + """ + Read sensor for the given parameter name. Process per sensor_details dict using the specified + vendor name and sensor_type. + + :param parameter: GpuItem parameter name (AMD) + :param vendor: GPU vendor name enum object + :param sensor_type: GPU sensor name (HWMON or DEVICE) + :return: Value from reading sensor. + """ + if self.prm.gpu_type in [GpuItem.GPU_Type.Unsupported] and parameter != 'id': + return None + if not self.prm.readable and parameter != 'id': return None if sensor_type not in self._sensor_details[vendor].keys(): print('Error: Invalid sensor_type [{}]'.format(sensor_type)) @@ -804,239 +1150,367 @@ if parameter in self.read_disabled: return None - sensor_path = self.prm.hwmon_path if sensor_type == 'HWMON' else self.prm.card_path + device_sensor_path = self.prm.card_path if self.prm.card_path else self.prm.sys_card_path + LOGGER.debug('sensor path set to [%s]', device_sensor_path) + sensor_path = self.prm.hwmon_path if sensor_type == 'HWMON' else device_sensor_path values = [] ret_value = [] ret_dict = {} - if sensor_dict[parameter]['type'] == 'sl*': - sensor_files = glob.glob(os.path.join(sensor_path, sensor_dict[parameter]['sensor'][0])) + target_sensor = sensor_dict[parameter] + if target_sensor['type'] == self.SensorType.InputLabelX: + sensor_files = glob.glob(os.path.join(sensor_path, target_sensor['sensor'][0])) else: - sensor_files = sensor_dict[parameter]['sensor'] + sensor_files = target_sensor['sensor'] for sensor_file in sensor_files: file_path = os.path.join(sensor_path, sensor_file) if os.path.isfile(file_path): try: with open(file_path) as hwmon_file: - if sensor_dict[parameter]['type'] == 'st*' or sensor_dict[parameter]['type'] == 'ml': + if target_sensor['type'] in [self.SensorType.SingleStringSelect, self.SensorType.MLSS]: lines = hwmon_file.readlines() for line in lines: values.append(line.strip()) else: values.append(hwmon_file.readline().strip()) - if sensor_dict[parameter]['type'] == 'sl*': - with open(file_path.replace('input', 'label')) as hwmon_file: - values.append(hwmon_file.readline().strip()) + if target_sensor['type'] == self.SensorType.InputLabelX: + if '_input' in file_path: + file_path = file_path.replace('_input', '_label') + elif '_crit' in file_path: + file_path = file_path.replace('_crit', '_label') + else: + print('Error in sensor label pair: {}'.format(target_sensor)) + if os.path.isfile(file_path): + with open(file_path) as hwmon_file: + values.append(hwmon_file.readline().strip()) + else: + values.append(os.path.basename(sensor_file)) except OSError as err: - if env.GUT_CONST.DEBUG: - print('Error [{}]: Can not read HW file: {}'.format(err, file_path), file=sys.stderr) + LOGGER.debug('Exception [%s]: Can not read HW file: %s', err, file_path) self.read_disabled.append(parameter) return False else: - if env.GUT_CONST.DEBUG: print('Error: HW file does not exist: {}'.format(file_path), file=sys.stderr) + LOGGER.debug('HW file does not exist: %s', file_path) self.read_disabled.append(parameter) return False - if sensor_dict[parameter]['type'] == 'sp': - if sensor_dict[parameter]['cf'] == 1: + if target_sensor['type'] == self.SensorType.SingleParam: + if target_sensor['cf'] == 1: return int(values[0]) - return int(values[0])*sensor_dict[parameter]['cf'] - elif sensor_dict[parameter]['type'] == 'sl': - ret_value.append(int(values[0])*sensor_dict[parameter]['cf']) + return int(values[0]) * target_sensor['cf'] + if target_sensor['type'] == self.SensorType.InputLabel: + ret_value.append(int(values[0]) * target_sensor['cf']) ret_value.append(values[1]) return tuple(ret_value) - elif sensor_dict[parameter]['type'] == 'mt' or sensor_dict[parameter]['type'] == 'ml': + if target_sensor['type'] in [self.SensorType.MLSS, self.SensorType.MLMS]: return values - elif sensor_dict[parameter]['type'] == 'mm': - ret_value.append(int(int(values[0])*sensor_dict[parameter]['cf'])) - ret_value.append(int(int(values[1])*sensor_dict[parameter]['cf'])) + if target_sensor['type'] == self.SensorType.MinMax: + ret_value.append(int(int(values[0]) * target_sensor['cf'])) + ret_value.append(int(int(values[1]) * target_sensor['cf'])) return tuple(ret_value) - elif sensor_dict[parameter]['type'] == 'sl*': + if target_sensor['type'] == self.SensorType.InputLabelX: for i in range(0, len(values), 2): - ret_dict.update({values[i+1]: int(values[i])*sensor_dict[parameter]['cf']}) + ret_dict.update({values[i+1]: int(values[i]) * target_sensor['cf']}) return ret_dict - elif sensor_dict[parameter]['type'] == 'st*': + if target_sensor['type'] == self.SensorType.SingleStringSelect: for item in values: - if re.search(r'\*', item): + if '*' in item: return item return None - else: # 'st or st*' + if target_sensor['type'] == self.SensorType.SingleString: return values[0] + raise ValueError('Invalid sensor type: {}'.format(target_sensor['type'])) - def read_gpu_sensor_data(self, data_type='All'): + def read_gpu_sensor_set(self, data_type: Enum = SensorSet.All) -> bool: """ - Read GPU static data from HWMON path. - :param data_type: Test, Static, Dynamic, Info, State, or All - :type data_type: str - :return: None + Read GPU sensor data from HWMON and DEVICE sensors using the sensor set defined + by data_type. + + :param data_type: Specifies the sensor set: Dynamic, Static, Info, State, All Monitor """ - if not self.prm.readable: - return None + if self.prm.vendor == self.GPU_Vendor.AMD: + return self.read_gpu_sensor_set_amd(data_type) + if self.prm.vendor == self.GPU_Vendor.NVIDIA: + return self.read_gpu_sensor_set_nv(data_type) + return False - def concat_sensor_dicts(dict1, dict2): - """ - Concatenate dict2 onto dict1 - :param dict1: - :type dict1: dict - :param dict2: - :type dict2: dict - :return: None - """ - for st in dict2.keys(): - if st in dict1.keys(): - dict1[st] += dict2[st] + def read_gpu_sensor_set_nv(self, data_type: Enum = SensorSet.All) -> bool: + """ + Use the nvidia_smi tool to query GPU parameters. + + :param data_type: specifies the set of sensors to read + :return: True if successful, else False and card will have read disabled + """ + if data_type not in self.nv_query_items.keys(): + raise TypeError('Invalid SensorSet value: [{}]'.format(data_type)) + + sensor_dict = GpuItem.nv_query_items[data_type] + nsmi_items = [] + query_list = [item for sublist in sensor_dict.values() for item in sublist] + query_list = [item for item in query_list if item not in self.read_disabled] + + if self.validated_sensors: + qry_string = ','.join(query_list) + cmd_str = '{} -i {} --query-gpu={} --format=csv,noheader,nounits'.format( + env.GUT_CONST.cmd_nvidia_smi, self.prm.pcie_id, qry_string) + LOGGER.debug('NV command:\n%s', cmd_str) + try: + nsmi_items = subprocess.check_output(shlex.split(cmd_str), shell=False).decode().split('\n') + LOGGER.debug('NV query (single-call) result: [%s]', nsmi_items) + except (subprocess.CalledProcessError, OSError) as except_err: + LOGGER.debug('NV query %s error: [%s]', nsmi_items, except_err) + return False + if nsmi_items: + nsmi_items = nsmi_items[0].split(',') + nsmi_items = [item.strip() for item in nsmi_items] + else: + # Read sensors one at a time if SensorSet.All has not been validated + if data_type == GpuItem.SensorSet.All: + self.validated_sensors = True + for query_item in query_list: + query_data = self.read_gpu_sensor_nv(query_item) + nsmi_items.append(query_data) + LOGGER.debug('NV query (each-call) query item [%s], result: [%s]', query_item, query_data) + if not nsmi_items: + LOGGER.debug('NV query (each-call) failed for all sensors, disabling read for card [%s]', + self.prm.card_num) + self.prm.readable = False + return False + + results = dict(zip(query_list, nsmi_items)) + LOGGER.debug('NV query result: %s', results) + + # Populate GpuItem data from results dictionary + for param_name, sensor_list in sensor_dict.items(): + if param_name == 'power_cap_range': + if results['power.min_limit'] and re.fullmatch(PATTERNS['IS_FLOAT'], results['power.min_limit']): + power_min = float(results['power.min_limit']) + else: + power_min = results['power.min_limit'] + if results['power.max_limit'] and re.fullmatch(PATTERNS['IS_FLOAT'], results['power.max_limit']): + power_max = float(results['power.max_limit']) else: - dict1.update({st: dict2[st]}) + power_max = results['power.max_limit'] + self.prm.power_cap_range = [power_min, power_max] + elif param_name == 'power': + if results['power.draw'] and re.fullmatch(PATTERNS['IS_FLOAT'], results['power.draw']): + power = float(results['power.draw']) + else: + power = None + self.set_params_value('power', power) + elif param_name == 'pstates': + pstate_str = re.sub(PATTERNS['ALPHA'], '', results['pstate']) + pstate = int(pstate_str) if pstate_str.isnumeric() else None + self.prm['sclk_ps'][0] = pstate + self.prm['mclk_ps'][0] = pstate + elif param_name in ['temperatures', 'voltages', 'frequencies', 'frequencies_max']: + self.prm[param_name] = {} + for sn_k in sensor_list: + if sn_k not in results: continue + if results[sn_k] and re.fullmatch(PATTERNS['IS_FLOAT'], results[sn_k]): + param_val = float(results[sn_k]) + else: + param_val = None + self.prm[param_name].update({sn_k: param_val}) + elif re.fullmatch(PATTERNS['GPUMEMTYPE'], param_name): + for sn_k in sensor_list: + if sn_k not in results: continue + mem_value = int(results[sn_k]) if results[sn_k].isnumeric else None + self.prm[param_name] = mem_value / 1024.0 + self.set_memory_usage() + elif param_name == 'fan_speed': + sn_k = sensor_list[0] + if re.fullmatch(PATTERNS['IS_FLOAT'], results[sn_k]): + self.prm[param_name] = float(results[sn_k]) + self.prm.fan_pwm = self.prm[param_name] + elif param_name == 'link_spd': + self.prm.link_spd = 'GEN{}'.format(results['pcie.link.gen.current']) + elif param_name == 'model': + self.prm.model = results['name'] + self.prm.model_display = self.prm.model_device_decode + if results['name'] and len(results['name']) < len(self.prm.model_device_decode): + self.prm.model_display = results['name'] + elif len(sensor_list) == 1: + sn_k = sensor_list[0] + if re.fullmatch(PATTERNS['IS_FLOAT'], results[sn_k]): + self.prm[param_name] = float(results[sn_k]) + elif not results[sn_k]: + self.prm[param_name] = None + self.prm[param_name] = results[sn_k] + return True + + def read_gpu_sensor_set_amd(self, data_type: Enum = SensorSet.All) -> bool: + """ + Read GPU sensor data from HWMON and DEVICE sensors using the sensor set defined + by data_type. + + :param data_type: Specifies the sensor set: Dynamic, Static, Info, State, All Monitor + """ + if not self.prm.readable: + return False - param_list_static = {'HWMON': ['power_cap_range', 'temp_crit']} - param_list_static_fan = {'HWMON': ['fan_speed_range', 'fan_pwm_range']} - param_list_dynamic = {'HWMON': ['power', 'power_cap', 'temperatures', 'voltages', 'frequencies']} - param_list_dynamic_fan = {'HWMON': ['fan_enable', 'fan_target', 'fan_speed', 'pwm_mode', 'fan_pwm']} - param_list_info = {'DEVICE': ['id', 'unique_id', 'vbios']} - param_list_state = {'DEVICE': ['loading', 'mem_loading', 'link_spd', 'link_wth', 'sclk_ps', 'mclk_ps', 'ppm', - 'power_dpm_force']} - param_list_state_mon = {'DEVICE': ['loading', 'mem_loading', 'sclk_ps', 'mclk_ps', 'power_dpm_force', 'ppm']} - param_list_all = {'DEVICE': ['id', 'unique_id', 'vbios', 'loading', 'mem_loading', 'link_spd', 'link_wth', - 'sclk_ps', 'mclk_ps', 'ppm', 'power_dpm_force'], - 'HWMON': ['power_cap_range', 'temp_crit', 'power', 'power_cap', 'temperatures', - 'voltages', 'frequencies']} - param_list_all_fan = {'HWMON': ['fan_speed_range', 'fan_pwm_range', 'fan_enable', 'fan_target', 'fan_speed', - 'pwm_mode', 'fan_pwm']} - - if data_type == 'Static': - param_list = param_list_static.copy() - if env.GUT_CONST.show_fans: - concat_sensor_dicts(param_list, param_list_static_fan) - elif data_type == 'DynamicM': - param_list = param_list_dynamic.copy() - if env.GUT_CONST.show_fans: - concat_sensor_dicts(param_list, param_list_dynamic_fan) - elif data_type == 'Dynamic': - param_list = param_list_dynamic.copy() - if env.GUT_CONST.show_fans: - concat_sensor_dicts(param_list, param_list_dynamic_fan) - elif data_type == 'Info': - param_list = param_list_info - elif data_type == 'StateM': - param_list = param_list_state_mon - elif data_type == 'State': - param_list = param_list_state - else: # '== All' - param_list = param_list_all.copy() - if env.GUT_CONST.show_fans: - concat_sensor_dicts(param_list, param_list_all_fan) + return_status = False + param_list = self.sensor_sets[data_type] for sensor_type, param_names in param_list.items(): for param in param_names: - if env.GUT_CONST.DEBUG: print('Processing parameter: {}'.format(param)) + LOGGER.debug('Processing parameter: %s', param) rdata = self.read_gpu_sensor(param, vendor=self.prm.vendor, sensor_type=sensor_type) if rdata is False: if param != 'unique_id': + LOGGER.debug('Error reading parameter: %s disabling for %s', param, self.prm.card_num) print('Warning: Error reading parameter: {}, disabling for this GPU: {}'.format(param, self.prm.card_num)) elif rdata is None: - if env.GUT_CONST.DEBUG: print('Warning: Invalid or disabled parameter: {}'.format(param)) + LOGGER.debug('Read data [%s], Invalid or disabled parameter: %s', rdata, param) else: - if env.GUT_CONST.DEBUG: print('Valid data [{}] for parameter: {}'.format(rdata, param)) + LOGGER.debug('Valid data [%s] for parameter: %s', rdata, param) self.set_params_value(param, rdata) - return None + return_status = True + return return_status - def print_ppm_table(self): + def print_ppm_table(self) -> None: """ Print human friendly table of ppm parameters. - :return: None """ - if not self.prm.readable: - if env.GUT_CONST.DEBUG: print('PPM for card number {} not readable.'.format(self.prm.card_num)) + if self.prm.vendor != GpuItem.GPU_Vendor.AMD: return - print('Card Number: {}'.format(self.prm.card_num)) - print(' Card Model: {}'.format(self.prm.model_display)) - print(' Card: {}'.format(self.prm.card_path)) - print(' Power Performance Mode: {}'.format(self.prm.power_dpm_force)) - for k, v in self.ppm_modes.items(): - print(' {:>2}: {:>16}'.format(k, v[0]), end='') - for v_item in v[1:]: - print('{:>18}'.format(v_item), end='') - print('') - print('') - def print_pstates(self): + if not self.prm.readable or self.prm.gpu_type in [GpuItem.GPU_Type.Legacy, GpuItem.GPU_Type.Unsupported]: + LOGGER.debug('PPM for card number %s not readable.', self.prm.card_num) + return + pre = ' ' + print('{}: {}'.format(self._GPU_Param_Labels['card_num'], self.prm.card_num)) + print('{}{}: {}'.format(pre, self._GPU_Param_Labels['model'], self.prm.model)) + print('{}{}: {}'.format(pre, self._GPU_Param_Labels['card_path'], self.prm.card_path)) + print('{}{}: {}'.format(pre, self._GPU_Param_Labels['gpu_type'], self.prm.gpu_type.name)) + print('{}{}: {}'.format(pre, self._GPU_Param_Labels['power_dpm_force'], self.prm.power_dpm_force)) + print('{}{}{}'.format(pre, '', '#'.ljust(50, '#'))) + file_path = os.path.join(self.prm.card_path, 'pp_power_profile_mode') + with open(file_path, 'r') as file_ptr: + lines = file_ptr.readlines() + for line in lines: + print(' {}'.format(line.strip('\n'))) + + def print_pstates(self) -> None: """ Print human friendly table of p-states. - :return: None """ - if not self.prm.readable: - if env.GUT_CONST.DEBUG: print('P-States for card number {} not readable.'.format(self.prm.card_num)) + if self.prm.vendor != GpuItem.GPU_Vendor.AMD: return - print('Card Number: {}'.format(self.prm.card_num)) - print(' Card Model: {}'.format(self.prm.model_display)) - print(' Card: {}'.format(self.prm.card_path)) - print(' Type: {}'.format(self.prm.gpu_type)) - if self.prm.gpu_type == 2: - print(' SCLK: {:<17} MCLK:'.format(' ')) - for k, v in self.sclk_dpm_state.items(): - print(' {:>1}: {:<8} '.format(k, v), end='') - if k in self.mclk_dpm_state.keys(): - print('{:3>}: {:<8}'.format(k, self.mclk_dpm_state[k])) + + if not self.prm.readable or self.prm.gpu_type in [GpuItem.GPU_Type.Legacy, GpuItem.GPU_Type.Unsupported]: + LOGGER.debug('P-states for card number %s not readable.', self.prm.card_num) + return + pre = ' ' + print('{}: {}'.format(self._GPU_Param_Labels['card_num'], self.prm.card_num)) + print('{}{}: {}'.format(pre, self._GPU_Param_Labels['model'], self.prm.model)) + print('{}{}: {}'.format(pre, self._GPU_Param_Labels['card_path'], self.prm.card_path)) + print('{}{}: {}'.format(pre, self._GPU_Param_Labels['gpu_type'], self.prm.gpu_type.name)) + + # DPM States + if self.prm.gpu_type == self.GPU_Type.CurvePts: + print('{}{}{}'.format(pre, '', '#'.ljust(50, '#'))) + print('{}DPM States:'.format(pre)) + print('{}SCLK: {:<17} MCLK:'.format(pre, ' ')) + for ps_num, ps_freq in self.sclk_dpm_state.items(): + print('{} {:>1}: {:<8} '.format(pre, ps_num, ps_freq), end='') + if ps_num in self.mclk_dpm_state.keys(): + print('{:3>}: {:<8}'.format(ps_num, self.mclk_dpm_state[ps_num])) else: print('') - print(' SCLK: {:<17} MCLK:'.format(' ')) - for k, v in self.sclk_state.items(): - print(' {:>1}: {:<8} {:<8} '.format(k, v[0], v[1]), end='') - if k in self.mclk_state.keys(): - print('{:3>}: {:<8} {:<8}'.format(k, self.mclk_state[k][0], self.mclk_state[k][1])) + + # pp_od_clk_voltage states + print('{}{}{}'.format(pre, '', '#'.ljust(50, '#'))) + print('{}PP OD States:'.format(pre)) + print('{}SCLK: {:<17} MCLK:'.format(pre, ' ')) + for ps_num, ps_vals in self.sclk_state.items(): + print('{} {:>1}: {:<8} {:<8} '.format(pre, ps_num, ps_vals[0], ps_vals[1]), end='') + if ps_num in self.mclk_state.keys(): + print('{:3>}: {:<8} {:<8}'.format(ps_num, self.mclk_state[ps_num][0], self.mclk_state[ps_num][1])) else: print('') - if self.prm.gpu_type == 2: - print(' VDDC_CURVE:') - for k, v in self.vddc_curve.items(): - print(' {}: {}'.format(k, v)) + if self.prm.gpu_type == self.GPU_Type.CurvePts: + # Curve points + print('{}{}{}'.format(pre, '', '#'.ljust(50, '#'))) + print('{}VDDC_CURVE:'.format(pre)) + for vc_index, vc_vals in self.vddc_curve.items(): + print('{} {}: {}'.format(pre, vc_index, vc_vals)) print('') - def print(self, clflag=False): + def print(self, short: bool = False, clflag: bool = False) -> None: """ Display ls like listing function for GPU parameters. + :param clflag: Display clinfo data if True - :type clflag: bool - :return: None + :param short: Display short listing """ - for k, v in self._GPU_Param_Labels.items(): + pre = '' + for param_name, param_label in self._GPU_Param_Labels.items(): + if short: + if param_name not in self.short_list: + continue + + if self.prm.vendor == GpuItem.GPU_Vendor.NVIDIA: + if param_name in self.NV_Skip_List: + continue + elif self.prm.vendor == GpuItem.GPU_Vendor.AMD: + if param_name in self.AMD_Skip_List: + continue + + if self.prm.gpu_type == self.GPU_Type.APU: + if param_name in self._fan_item_list: + continue + if 'Range' in param_label: + continue + if self.prm.gpu_type == self.GPU_Type.Legacy: + if param_name in self.LEGACY_Skip_List: + continue if not self.prm.readable: - if k not in self.get_nc_params_list(): + if param_name not in self.get_nc_params_list(): continue - pre = '' if k == 'card_num' else ' ' + pre = '' if param_name == 'card_num' else ' ' - if re.search(r'sep[0-9]', k): - print('{}{}'.format(pre, v.ljust(50, v))) + if re.search(r'sep[0-9]', param_name): + print('{}{}'.format(pre, param_label.ljust(50, param_label))) continue - if k == 'unique_id': + if param_name == 'unique_id': if self.prm.unique_id is None: continue - if self.prm.gpu_type == 2 and k == 'vddc_range': + if self.prm.gpu_type == self.GPU_Type.CurvePts and param_name == 'vddc_range': continue - print('{}{}: {}'.format(pre, v, self.get_params_value(k))) + if isinstance(self.get_params_value(param_name), float): + print('{}{}: {:.3f}'.format(pre, param_label, self.get_params_value(param_name))) + elif isinstance(self.get_params_value(param_name), dict): + param_dict = self.get_params_value(param_name) + print('{}{}: {}'.format(pre, param_label, {key: param_dict[key] for key in sorted(param_dict)})) + elif self.get_params_value(param_name) == '': + print('{}{}: {}'.format(pre, param_label, None)) + else: + print('{}{}: {}'.format(pre, param_label, self.get_params_value(param_name))) if clflag and self.prm.compute: - for k, v in self._GPU_CLINFO_Labels.items(): - if re.search(r'sep[0-9]', k): - print('{}{}'.format(pre, v.ljust(50, v))) + for param_name, param_label in self._GPU_CLINFO_Labels.items(): + if re.search(r'sep[0-9]', param_name): + print('{}{}'.format(pre, param_label.ljust(50, param_label))) continue - print('{}: {}'.format(v, self.get_clinfo_value(k))) + print('{}: {}'.format(param_label, self.get_clinfo_value(param_name))) print('') - def get_plot_data(self, gpu_list): + def get_plot_data(self) -> dict: """ - Return a dictionary of dynamic gpu parameters used by amdgpu-plot to populate a df. - :param gpu_list: GpuList object - :type gpu_list: GpuList + Return a dictionary of dynamic gpu parameters used by gpu-plot to populate a df. + :return: Dictionary of GPU state info for plot data. - :rtype: dict """ - gpu_state = {'Time': str(self.energy['tn'].strftime('%c')).strip(), 'Card#': int(self.prm.card_num)} + gpu_state = {'Time': str(self.energy['tn'].strftime(env.GUT_CONST.TIME_FORMAT)), + 'Card#': int(self.prm.card_num)} - for table_item in gpu_list.table_parameters(): - gpu_state_str = str(re.sub('M[Hh]z', '', str(self.get_params_value(table_item)))).strip() - if gpu_state_str.isnumeric(): + for table_item in self.table_parameters: + gpu_state_str = str(re.sub(PATTERNS['MHz'], '', str(self.get_params_value(table_item)))).strip() + if gpu_state_str == 'nan': + gpu_state[table_item] = np_nan + elif gpu_state_str.isnumeric(): gpu_state[table_item] = int(gpu_state_str) - elif re.fullmatch(r'[0-9]+.[0-9]*', gpu_state_str) or re.fullmatch(r'[0-9]*.[0-9]+', gpu_state_str): + elif re.fullmatch(PATTERNS['IS_FLOAT'], gpu_state_str): gpu_state[table_item] = float(gpu_state_str) elif gpu_state_str == '' or gpu_state_str == '-1' or gpu_state_str == 'NA' or gpu_state_str is None: gpu_state[table_item] = 'NA' @@ -1045,164 +1519,205 @@ return gpu_state +GpuDict = Dict[str, GpuItem] + + class GpuList: """ A list of GpuItem indexed with uuid. It also contains a table of parameters used for tabular printouts """ - # Table parameters labels. - if env.GUT_CONST.show_fans: - _table_parameters = ['model_display', 'loading', 'mem_loading', 'power', 'power_cap', 'energy', 'temp_val', - 'vddgfx_val', 'fan_pwm', 'sclk_f_val', 'sclk_ps_val', 'mclk_f_val', 'mclk_ps_val', 'ppm'] - _table_param_labels = {'model_display': 'Model', - 'loading': 'Load %', - 'mem_loading': 'Mem Load %', - 'power': 'Power (W)', - 'power_cap': 'Power Cap (W)', - 'energy': 'Energy (kWh)', - 'temp_val': 'T (C)', - 'vddgfx_val': 'VddGFX (mV)', - 'fan_pwm': 'Fan Spd (%)', - 'sclk_f_val': 'Sclk (MHz)', - 'sclk_ps_val': 'Sclk Pstate', - 'mclk_f_val': 'Mclk (MHz)', - 'mclk_ps_val': 'Mclk Pstate', - 'ppm': 'Perf Mode'} - else: - _table_parameters = ['model_display', 'loading', 'mem_loading', 'power', 'power_cap', 'energy', 'temp_val', - 'vddgfx_val', 'sclk_f_val', 'sclk_ps_val', 'mclk_f_val', 'mclk_ps_val', 'ppm'] - _table_param_labels = {'model_display': 'Model', - 'loading': 'Load %', - 'mem_loading': 'Mem Load %', - 'power': 'Power (W)', - 'power_cap': 'Power Cap (W)', - 'energy': 'Energy (kWh)', - 'temp_val': 'T (C)', - 'vddgfx_val': 'VddGFX (mV)', - 'sclk_f_val': 'Sclk (MHz)', - 'sclk_ps_val': 'Sclk Pstate', - 'mclk_f_val': 'Mclk (MHz)', - 'mclk_ps_val': 'Mclk Pstate', - 'ppm': 'Perf Mode'} + def __init__(self) -> None: + self.list: GpuDict = {} + self.opencl_map = {} + self.amd_featuremask = None + self.amd_wattman = False + self.amd_writable = False + self.nv_readwritable = False - def __repr__(self): + def __repr__(self) -> dict: return self.list - def __str__(self): + def __str__(self) -> str: return 'GPU_List: Number of GPUs: {}'.format(self.num_gpus()) - def __init__(self): - self.list = {} - self.opencl_map = {} - self.amd_featuremask = None - self.amd_wattman = False - self.amd_writable = False - self.nv_writable = False + def __getitem__(self, uuid: str) -> GpuItem: + if uuid in self.list: + return self.list[uuid] + raise KeyError('KeyError: invalid uuid: {}'.format(uuid)) + + def __setitem__(self, uuid: str, value: GpuItem) -> None: + self.list[uuid] = value + + def __iter__(self) -> Generator[GpuItem, None, None]: + for value in self.list.values(): + yield value + + def items(self) -> Generator[Union[str, GpuItem], None, None]: + """ + Get uuid, gpu pairs from a GpuList object. + + :return: uuid, gpu pair + """ + for key, value in self.list.items(): + yield key, value + + def uuids(self) -> Generator[str, None, None]: + """ + Get uuids of the GpuList object. + + :return: uuids from the GpuList object. + """ + for key in self.list: + yield key - def wattman_status(self): + def gpus(self) -> Generator[GpuItem, None, None]: + """ + Get GpuItems from a GpuList object. + + :return: GpuUItem + """ + return self.__iter__() + + def add(self, gpu_item: GpuItem) -> None: + """ + Add given GpuItem to the GpuList. + + :param gpu_item: Item to be added + """ + self[gpu_item.prm.uuid] = gpu_item + LOGGER.debug('Added GPU Item %s to GPU List', gpu_item.prm.uuid) + + def get_pcie_map(self) -> dict: + """ + Get mapping of card number to pcie address as dict. + + :return: dict of num: pcieid + """ + pcie_dict = {} + for gpu in self.gpus(): + pcie_dict.update({gpu.prm.card_num: gpu.prm.pcie_id}) + return pcie_dict + + def wattman_status(self) -> str: """ Display Wattman status. + :return: Status string - :rtype: str """ + LOGGER.debug('AMD featuremask: %s', hex(self.amd_featuremask)) if self.amd_wattman: return 'Wattman features enabled: {}'.format(hex(self.amd_featuremask)) return 'Wattman features not enabled: {}, See README file.'.format(hex(self.amd_featuremask)) - def add(self, gpu_item): - """ - Add given GpuItem to the GpuList - :param gpu_item: Item to be added - :type gpu_item: GpuItem - :return: None - """ - self.list[gpu_item.prm.uuid] = gpu_item - - def table_param_labels(self): + @staticmethod + def table_param_labels() -> dict: """ Get dictionary of parameter labels to be used in table reports. + :return: Dictionary of table parameters/labels - :rtype: dict """ - return self._table_param_labels + return GpuItem._table_param_labels - def table_parameters(self): + @staticmethod + def table_parameters() -> List[str]: """ Get list of parameters to be used in table reports. + :return: List of table parameters - :rtype: list """ - return self._table_parameters + return GpuItem.table_parameters + + @staticmethod + def get_gpu_pci_list() -> Union[List[str], None]: + """ + Use call to lspci to get a list of pci addresses of all GPUs. - def set_gpu_list(self, clinfo_flag=False): + :return: List of GPU pci addresses. + """ + pci_list = [] + try: + lspci_output = subprocess.check_output(env.GUT_CONST.cmd_lspci, shell=False).decode().split('\n') + except (subprocess.CalledProcessError, OSError) as except_err: + print('Error [{}]: lspci failed to find GPUs'.format(except_err)) + return None + + for lspci_line in lspci_output: + if re.search(PATTERNS['PCI_GPU'], lspci_line): + LOGGER.debug('Found GPU pci: %s', lspci_line) + pciid = re.search(env.GUT_CONST.PATTERNS['PCI_ADD'], lspci_line) + if pciid: + pci_list.append(pciid.group(0)) + return pci_list + + def set_gpu_list(self, clinfo_flag: bool = False) -> bool: """ Use lspci to populate list of all installed GPUs. + :return: True on success - :rtype: bool """ if not env.GUT_CONST.cmd_lspci: return False if clinfo_flag: self.read_gpu_opencl_data() - if env.GUT_CONST.DEBUG: print('openCL map: {}'.format(self.opencl_map)) + LOGGER.debug('OpenCL map: %s', self.opencl_map) # Check AMD writability try: self.amd_featuremask = env.GUT_CONST.read_amdfeaturemask() except FileNotFoundError: self.amd_wattman = self.amd_writable = False + self.amd_wattman = self.amd_writable = (self.amd_featuremask == int(0xffff7fff) or + self.amd_featuremask == int(0xffffffff) or + self.amd_featuremask == int(0xfffd7fff)) - self.amd_wattman = self.amd_writable = True if (self.amd_featuremask == int(0xffff7fff) or - self.amd_featuremask == int(0xffffffff) or - self.amd_featuremask == int(0xfffd7fff)) else False - - # Check NV writability + # Check NV read/writability if env.GUT_CONST.cmd_nvidia_smi: - self.nv_writable = True + self.nv_readwritable = True - try: - pcie_ids = subprocess.check_output('{} | grep -E \"^.*(VGA|3D|Display).*$\" | grep -Eo \ - \"^([0-9a-fA-F]+:[0-9a-fA-F]+.[0-9a-fA-F])\"'.format( - env.GUT_CONST.cmd_lspci), shell=True).decode().split() - except (subprocess.CalledProcessError, OSError) as except_err: - print('Error [{}]: lspci failed to find GPUs'.format(except_err)) + pcie_ids = self.get_gpu_pci_list() + if not pcie_ids: + print('Error [{}]: lspci failed to find GPUs') return False - if env.GUT_CONST.DEBUG: print('Found {} GPUs'.format(len(pcie_ids))) + LOGGER.debug('Found %s GPUs', len(pcie_ids)) for pcie_id in pcie_ids: + # Initial GPU Item gpu_uuid = uuid4().hex self.add(GpuItem(gpu_uuid)) - if env.GUT_CONST.DEBUG: print('GPU: {}'.format(pcie_id)) + LOGGER.debug('GPU: %s', pcie_id) + gpu_name = 'UNKNOWN' + driver_module = 'UNKNOWN' + card_path = '' + sys_card_path = '' + hwmon_path = '' readable = writable = compute = False + gpu_type = GpuItem.GPU_Type.Undefined + vendor = GpuItem.GPU_Vendor.Undefined + opencl_device_version = None if clinfo_flag else 'UNKNOWN' + + # Get more GPU details from lspci -k -s + cmd_str = '{} -k -s {}'.format(env.GUT_CONST.cmd_lspci, pcie_id) try: - lspci_items = subprocess.check_output('{} -k -s {}'.format(env.GUT_CONST.cmd_lspci, pcie_id), - shell=True).decode().split('\n') + lspci_items = subprocess.check_output(shlex.split(cmd_str), shell=False).decode().split('\n') except (subprocess.CalledProcessError, OSError) as except_err: + LOGGER.debug('Fatal error [%s]: Can not get GPU details with lspci.', except_err) print('Fatal Error [{}]: Can not get GPU details with lspci'.format(except_err)) sys.exit(-1) - if env.GUT_CONST.DEBUG: print(lspci_items) + LOGGER.debug('lspci output items:\n %s', lspci_items) # Get Long GPU Name - gpu_name = 'UNKNOWN' gpu_name_items = lspci_items[0].split(': ', maxsplit=1) if len(gpu_name_items) >= 2: gpu_name = gpu_name_items[1] - try: - short_gpu_name = gpu_name.split('[AMD/ATI]')[1] - except IndexError: - short_gpu_name = 'UNKNOWN' # Check for Fiji ProDuo - srch_obj = re.search('Fiji', gpu_name) - if srch_obj: - srch_obj = re.search(r'Radeon Pro Duo', lspci_items[1].split('[AMD/ATI]')[1]) - if srch_obj: + if re.search('Fiji', gpu_name): + if re.search(r'Radeon Pro Duo', lspci_items[1].split('[AMD/ATI]')[1]): gpu_name = 'Radeon Fiji Pro Duo' # Get GPU brand: AMD, INTEL, NVIDIA, ASPEED - vendor = 'UNKNOWN' - opencl_device_version = None if clinfo_flag else 'UNKNOWN' - srch_obj = re.search(r'(AMD|amd|ATI|ati)', gpu_name) - if srch_obj: - vendor = 'AMD' + if re.search(PATTERNS['AMD_GPU'], gpu_name): + vendor = GpuItem.GPU_Vendor.AMD + gpu_type = GpuItem.GPU_Type.Supported if self.opencl_map: if pcie_id in self.opencl_map.keys(): if 'device_version' in self.opencl_map[pcie_id].keys(): @@ -1210,9 +1725,11 @@ compute = True else: compute = True - srch_obj = re.search(r'(NVIDIA|nvidia|nVidia)', gpu_name) - if srch_obj: - vendor = 'NVIDIA' + if re.search(PATTERNS['NV_GPU'], gpu_name): + vendor = GpuItem.GPU_Vendor.NVIDIA + if env.GUT_CONST.cmd_nvidia_smi: + readable = True + gpu_type = GpuItem.GPU_Type.Supported if self.opencl_map: if pcie_id in self.opencl_map.keys(): if 'device_version' in self.opencl_map[pcie_id].keys(): @@ -1220,73 +1737,128 @@ compute = True else: compute = True - srch_obj = re.search(r'(INTEL|intel|Intel)', gpu_name) - if srch_obj: - vendor = 'INTEL' + if re.search(PATTERNS['INTC_GPU'], gpu_name): + vendor = GpuItem.GPU_Vendor.INTEL + gpu_type = GpuItem.GPU_Type.Unsupported if self.opencl_map: if pcie_id in self.opencl_map.keys(): if 'device_version' in self.opencl_map[pcie_id].keys(): opencl_device_version = self.opencl_map[pcie_id]['device_version'] compute = True else: - srch_obj = re.search(r' 530', gpu_name) - if srch_obj: - compute = False - else: - compute = True - srch_obj = re.search(r'(ASPEED|aspeed|Aspeed)', gpu_name) - if srch_obj: - vendor = 'ASPEED' - srch_obj = re.search(r'(MATROX|matrox|Matrox)', gpu_name) - if srch_obj: - vendor = 'MATROX' + compute = not bool(re.search(r' 530', gpu_name)) + if re.search(PATTERNS['ASPD_GPU'], gpu_name): + vendor = GpuItem.GPU_Vendor.ASPEED + gpu_type = GpuItem.GPU_Type.Unsupported + if re.search(PATTERNS['MTRX_GPU'], gpu_name): + vendor = GpuItem.GPU_Vendor.MATROX + gpu_type = GpuItem.GPU_Type.Unsupported # Get Driver Name - driver_module = 'UNKNOWN' for lspci_line in lspci_items: - srch_obj = re.search(r'(Kernel|kernel)', lspci_line) - if srch_obj: + if re.search(r'([kK]ernel)', lspci_line): driver_module_items = lspci_line.split(': ') if len(driver_module_items) >= 2: driver_module = driver_module_items[1].strip() # Get full card path - card_path = None device_dirs = glob.glob(os.path.join(env.GUT_CONST.card_root, 'card?/device')) + # Match system device directory to pcie ID. for device_dir in device_dirs: sysfspath = str(Path(device_dir).resolve()) + LOGGER.debug('sysfpath: %s\ndevice_dir: %s', sysfspath, device_dir) if pcie_id == sysfspath[-7:]: card_path = device_dir + sys_card_path = sysfspath + LOGGER.debug('card_path set to: %s', device_dir) + + # No card path could be found. Set readable/writable to False and type to Unsupported + if not card_path: + LOGGER.debug('card_path not set for: %s', pcie_id) + LOGGER.debug('GPU[%s] type set to Unsupported', gpu_uuid) + gpu_type = GpuItem.GPU_Type.Unsupported + readable = writable = False + try_path = '/sys/devices/pci*:*/' + sys_pci_dirs = None + for _ in range(6): + search_path = os.path.join(try_path, '????:{}'.format(pcie_id)) + sys_pci_dirs = glob.glob(search_path) + if sys_pci_dirs: + # Found a match + break + try_path = os.path.join(try_path, '????:??:??.?') + if not sys_pci_dirs: + LOGGER.debug('/sys/device file search found no match to pcie_id: %s', pcie_id) + else: + if len(sys_pci_dirs) > 1: + LOGGER.debug('/sys/device file search found multiple matches to pcie_id %s:\n%s', + pcie_id, sys_pci_dirs) + else: + LOGGER.debug('/sys/device file search found match to pcie_id %s:\n%s', + pcie_id, sys_pci_dirs) + sys_card_path = sys_pci_dirs[0] # Get full hwmon path - hwmon_path = None - hw_file_srch = glob.glob(os.path.join(card_path, env.GUT_CONST.hwmon_sub) + '?') - if env.GUT_CONST.DEBUG: print('hw_file_search: ', hw_file_srch) - if len(hw_file_srch) > 1: - print('More than one hwmon file found: ', hw_file_srch) - elif len(hw_file_srch) == 1: - hwmon_path = hw_file_srch[0] + if card_path: + LOGGER.debug('Card dir [%s] contents:\n%s', card_path, list(os.listdir(card_path))) + hw_file_srch = glob.glob(os.path.join(card_path, env.GUT_CONST.hwmon_sub) + '?') + LOGGER.debug('HW file search: %s', hw_file_srch) + if len(hw_file_srch) > 1: + print('More than one hwmon file found: {}'.format(hw_file_srch)) + elif len(hw_file_srch) == 1: + hwmon_path = hw_file_srch[0] + LOGGER.debug('HW dir [%s] contents:\n%s', hwmon_path, list(os.listdir(hwmon_path))) # Check AMD write capability - if vendor == 'AMD': + if vendor == GpuItem.GPU_Vendor.AMD and card_path: pp_od_clk_voltage_file = os.path.join(card_path, 'pp_od_clk_voltage') if os.path.isfile(pp_od_clk_voltage_file): + gpu_type = GpuItem.GPU_Type.Supported readable = True if self.amd_writable: writable = True + elif os.path.isfile(os.path.join(card_path, 'power_dpm_state')): + if os.path.isfile(os.path.join(card_path, 'pp_dpm_mclk')) or GpuItem.is_apu(gpu_name): + readable = True + gpu_type = GpuItem.GPU_Type.APU + elif os.path.isfile(os.path.join(card_path, 'power_dpm_state')): + # if no pp_od_clk_voltage but has power_dpm_state, assume legacy, and disable some sensors + readable = True + gpu_type = GpuItem.GPU_Type.Legacy + if LOGGER.getEffectiveLevel() == logging.DEBUG: + # Write pp_od_clk_voltage details to debug LOGGER + if os.path.isfile(pp_od_clk_voltage_file): + with open(pp_od_clk_voltage_file, 'r') as file_ptr: + pp_od_file_details = file_ptr.read() + else: + pp_od_file_details = 'The file {} does not exist'.format(pp_od_clk_voltage_file) + LOGGER.debug('%s contents:\n%s', pp_od_clk_voltage_file, pp_od_file_details) - self.list[gpu_uuid].populate(pcie_id, gpu_name, short_gpu_name, vendor, driver_module, - card_path, hwmon_path, readable, writable, compute, opencl_device_version) + # Set GPU parameters + self[gpu_uuid].populate_prm_from_dict({'pcie_id': pcie_id, 'model': gpu_name, + 'vendor': vendor, + 'driver': driver_module, 'card_path': card_path, + 'sys_card_path': sys_card_path, 'gpu_type': gpu_type, + 'hwmon_path': hwmon_path, 'readable': readable, + 'writable': writable, 'compute': compute, + 'compute_platform': opencl_device_version}) + LOGGER.debug('Card flags: readable: %s, writable: %s, type: %s', + readable, writable, self[gpu_uuid].prm.gpu_type) + + # Read GPU ID + rdata = self[gpu_uuid].read_gpu_sensor('id', vendor=GpuItem.GPU_Vendor.PCIE, sensor_type='DEVICE') + if rdata: + self[gpu_uuid].set_params_value('id', rdata) if clinfo_flag: if pcie_id in self.opencl_map.keys(): - self.list[gpu_uuid].populate_ocl(self.opencl_map[pcie_id]) + self[gpu_uuid].populate_ocl(self.opencl_map[pcie_id]) return True - def read_gpu_opencl_data(self): + def read_gpu_opencl_data(self) -> bool: """ Use clinfo system call to get openCL details for relevant GPUs. + :return: Returns True if successful - :rtype: bool .. todo:: Read of Intel pcie_id is not working. """ # Check access to clinfo command @@ -1300,25 +1872,25 @@ # Clinfo Keywords and related opencl_map key. ocl_keywords = {'CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE': 'prf_wg_multiple', - 'CL_DEVICE_MAX_WORK_GROUP_SIZE': 'max_wg_size', - 'CL_DEVICE_PREFERRED_WORK_GROUP_SIZE': 'prf_wg_size', - 'CL_DEVICE_MAX_WORK_ITEM_SIZES': 'max_wi_sizes', - 'CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS': 'max_wi_dim', - 'CL_DEVICE_MAX_MEM_ALLOC_SIZE': 'max_mem_allocation', - 'CL_DEVICE_SIMD_INSTRUCTION_WIDTH': 'simd_ins_width', - 'CL_DEVICE_SIMD_WIDTH': 'simd_width', - 'CL_DEVICE_SIMD_PER_COMPUTE_UNIT': 'simd_per_cu', - 'CL_DEVICE_MAX_COMPUTE_UNITS': 'max_cu', - 'CL_DEVICE_NAME': 'device_name', - 'CL_DEVICE_OPENCL_C_VERSION': 'opencl_version', - 'CL_DRIVER_VERSION': 'driver_version', - 'CL_DEVICE_VERSION': 'device_version'} + 'CL_DEVICE_MAX_WORK_GROUP_SIZE': 'max_wg_size', + 'CL_DEVICE_PREFERRED_WORK_GROUP_SIZE': 'prf_wg_size', + 'CL_DEVICE_MAX_WORK_ITEM_SIZES': 'max_wi_sizes', + 'CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS': 'max_wi_dim', + 'CL_DEVICE_MAX_MEM_ALLOC_SIZE': 'max_mem_allocation', + 'CL_DEVICE_SIMD_INSTRUCTION_WIDTH': 'simd_ins_width', + 'CL_DEVICE_SIMD_WIDTH': 'simd_width', + 'CL_DEVICE_SIMD_PER_COMPUTE_UNIT': 'simd_per_cu', + 'CL_DEVICE_MAX_COMPUTE_UNITS': 'max_cu', + 'CL_DEVICE_NAME': 'device_name', + 'CL_DEVICE_OPENCL_C_VERSION': 'opencl_version', + 'CL_DRIVER_VERSION': 'driver_version', + 'CL_DEVICE_VERSION': 'device_version'} - def init_temp_map(): + def init_temp_map() -> dict: """ Return an initialized clinfo dict. + :return: Initialized clinfo dict - :rtype: dict """ t_dict = {} for temp_keys in ocl_keywords.values(): @@ -1326,7 +1898,7 @@ return t_dict # Initialize dict variables - ocl_index = ocl_pcie_id = ocl_pcie_bus_id = ocl_pcie_slot_id = None + ocl_vendor = ocl_index = ocl_pcie_id = ocl_pcie_bus_id = ocl_pcie_slot_id = None temp_map = init_temp_map() # Read each line from clinfo --raw @@ -1339,195 +1911,210 @@ line_items = linestr.split(maxsplit=2) if len(line_items) != 3: continue - _cl_vendor, cl_index = tuple(re.sub(r'[\[\]]', '', line_items[0]).split('/')) + cl_vendor, cl_index = tuple(re.sub(r'[\[\]]', '', line_items[0]).split('/')) if cl_index == '*': continue if not ocl_index: ocl_index = cl_index + ocl_vendor = cl_vendor ocl_pcie_slot_id = ocl_pcie_bus_id = None # If new cl_index, then update opencl_map - if cl_index != ocl_index: + if cl_vendor != ocl_vendor or cl_index != ocl_index: # Update opencl_map with dict variables when new index is encountered. self.opencl_map.update({ocl_pcie_id: temp_map}) - if env.GUT_CONST.DEBUG: print('cl_index: {}'.format(self.opencl_map[ocl_pcie_id])) + LOGGER.debug('cl_vendor: %s, cl_index: %s, pcie_id: %s', + ocl_vendor, ocl_index, self.opencl_map[ocl_pcie_id]) # Initialize dict variables ocl_index = cl_index + ocl_vendor = cl_vendor ocl_pcie_id = ocl_pcie_bus_id = ocl_pcie_slot_id = None temp_map = init_temp_map() param_str = line_items[1] # Check item in clinfo_keywords for clinfo_keyword, opencl_map_keyword in ocl_keywords.items(): - srch_obj = re.search(clinfo_keyword, param_str) - if srch_obj: + if clinfo_keyword in param_str: temp_map[opencl_map_keyword] = line_items[2].strip() - if env.GUT_CONST.DEBUG: print('{}: [{}]'.format(clinfo_keyword, temp_map[opencl_map_keyword])) + LOGGER.debug('openCL map %s: [%s]', clinfo_keyword, temp_map[opencl_map_keyword]) continue # PCIe ID related clinfo_keywords # Check for AMD pcie_id details - srch_obj = re.search('CL_DEVICE_TOPOLOGY', param_str) - if srch_obj: + if 'CL_DEVICE_TOPOLOGY' in param_str: ocl_pcie_id = (line_items[2].split()[1]).strip() - if env.GUT_CONST.DEBUG: print('ocl_pcie_id [{}]'.format(ocl_pcie_id)) + LOGGER.debug('AMD ocl_pcie_id [%s]', ocl_pcie_id) continue # Check for NV pcie_id details - srch_obj = re.search('CL_DEVICE_PCI_BUS_ID_NV', param_str) - if srch_obj: + if 'CL_DEVICE_PCI_BUS_ID_NV' in param_str: ocl_pcie_bus_id = hex(int(line_items[2].strip())) if ocl_pcie_slot_id is not None: ocl_pcie_id = '{}:{}.0'.format(ocl_pcie_bus_id[2:].zfill(2), ocl_pcie_slot_id[2:].zfill(2)) ocl_pcie_slot_id = ocl_pcie_bus_id = None - if env.GUT_CONST.DEBUG: print('ocl_pcie_id [{}]'.format(ocl_pcie_id)) + LOGGER.debug('NV ocl_pcie_id [%s]', ocl_pcie_id) continue - srch_obj = re.search('CL_DEVICE_PCI_SLOT_ID_NV', param_str) - if srch_obj: + if 'CL_DEVICE_PCI_SLOT_ID_NV' in param_str: ocl_pcie_slot_id = hex(int(line_items[2].strip())) if ocl_pcie_bus_id is not None: ocl_pcie_id = '{}:{}.0'.format(ocl_pcie_bus_id[2:].zfill(2), ocl_pcie_slot_id[2:].zfill(2)) ocl_pcie_slot_id = ocl_pcie_bus_id = None - if env.GUT_CONST.DEBUG: print('ocl_pcie_id [{}]'.format(ocl_pcie_id)) + LOGGER.debug('NV ocl_pcie_id [%s]', ocl_pcie_id) continue # Check for INTEL pcie_id details # TODO don't know how to do this yet. self.opencl_map.update({ocl_pcie_id: temp_map}) - if env.GUT_CONST.DEBUG: print('cl_index: {}'.format(self.opencl_map[ocl_pcie_id])) return True - def num_vendor_gpus(self, compatibility='total'): + def num_vendor_gpus(self, compatibility: Enum = GpuItem.GPU_Comp.ALL) -> Dict[str, int]: """ Return the count of GPUs by vendor. Counts total by default, but can also by rw, ronly, or wonly. + :param compatibility: Only count vendor GPUs if True. - :type compatibility: str :return: Dictionary of GPU counts - :rtype: dict """ + try: + _ = compatibility.name + except AttributeError: + raise AttributeError('Error: {} not a valid compatibility name: [{}]'.format( + compatibility, GpuItem.GPU_Comp)) results_dict = {} - for v in self.list.values(): - if compatibility == 'rw': - if not v.prm.readable or not v.prm.writable: + for gpu in self.gpus(): + if compatibility == GpuItem.GPU_Comp.ReadWrite: + if not gpu.prm.readable or not gpu.prm.writable: continue - if compatibility == 'r-only': - if not v.prm.readable: + if compatibility == GpuItem.GPU_Comp.ReadOnly: + if not gpu.prm.readable: continue - if compatibility == 'w-only': - if not v.prm.writable: + if compatibility == GpuItem.GPU_Comp.WriteOnly: + if not gpu.prm.writable: continue - if v.prm.vendor not in results_dict.keys(): - results_dict.update({v.prm.vendor: 1}) + if gpu.prm.vendor.name not in results_dict.keys(): + results_dict.update({gpu.prm.vendor.name: 1}) else: - results_dict[v.prm.vendor] += 1 + results_dict[gpu.prm.vendor.name] += 1 return results_dict - def num_gpus(self, vendor='All'): + def num_gpus(self, vendor: Enum = GpuItem.GPU_Vendor.ALL) -> Dict[str, int]: """ Return the count of GPUs by total, rw, r-only or w-only. - :param vendor: Only count vendor GPUs if True. - :type vendor: str + + :param vendor: Only count vendor GPUs of specific vendor or all vendors by default. :return: Dictionary of GPU counts - :rtype: dict """ - results_dict = {'vendor': vendor, 'total': 0, 'rw': 0, 'r-only': 0, 'w-only': 0} - for v in self.list.values(): - if vendor != 'All': - if vendor != v.prm.vendor: + try: + vendor_name = vendor.name + except AttributeError: + raise AttributeError('Error: {} not a valid vendor name: [{}]'.format(vendor, GpuItem.GPU_Vendor)) + results_dict = {'vendor': vendor_name, 'total': 0, 'rw': 0, 'r-only': 0, 'w-only': 0} + for gpu in self.gpus(): + if vendor != GpuItem.GPU_Vendor.ALL: + if vendor != gpu.prm.vendor: continue - if v.prm.readable and v.prm.writable: + if gpu.prm.readable and gpu.prm.writable: results_dict['rw'] += 1 - elif v.prm.readable: + elif gpu.prm.readable: results_dict['r-only'] += 1 - elif v.prm.writable: + elif gpu.prm.writable: results_dict['w-only'] += 1 results_dict['total'] += 1 return results_dict - def list_gpus(self, vendor='All', compatibility='total'): + def list_gpus(self, vendor: Enum = GpuItem.GPU_Vendor.ALL, + compatibility: Enum = GpuItem.GPU_Comp.ALL) -> 'class GpuList': """ Return GPU_Item of GPUs. Contains all by default, but can be a subset with vendor and compatibility args. Only one flag should be set. - :param vendor: Only count vendor GPUs or All by default (All, AMD, INTEL, NV, ...) - :type vendor: str - :param compatibility: Only count GPUs with specified compatibility (total, readable, writable) - :type compatibility: str + + :param vendor: Only count vendor GPUs or ALL by default. + :param compatibility: Only count GPUs with specified compatibility (all, readable, writable) :return: GpuList of compatible GPUs - :rtype: GpuList """ + try: + _ = compatibility.name + except AttributeError: + raise AttributeError('Error: {} not a valid compatibility name: [{}]'.format( + compatibility, GpuItem.GPU_Comp)) + try: + _ = vendor.name + except AttributeError: + raise AttributeError('Error: {} not a valid vendor name: [{}]'.format(vendor, GpuItem.GPU_Vendor)) result_list = GpuList() - for k, v in self.list.items(): - if vendor != 'All': - if vendor != v.prm.vendor: + for uuid, gpu in self.items(): + if vendor != GpuItem.GPU_Vendor.ALL: + if vendor != gpu.prm.vendor: continue - if compatibility == 'readable': - if v.prm.readable: - result_list.list[k] = v - elif compatibility == 'writable': - if v.prm.writable: - result_list.list[k] = v + if compatibility == GpuItem.GPU_Comp.Readable: + # Skip Legacy GPU type, since most parameters can not be read. + if gpu.prm.gpu_type != GpuItem.GPU_Type.Legacy: + if gpu.prm.readable: + result_list[uuid] = gpu + elif compatibility == GpuItem.GPU_Comp.Writable: + if gpu.prm.writable: + result_list[uuid] = gpu else: - result_list.list[k] = v + result_list[uuid] = gpu return result_list - def read_gpu_ppm_table(self): + def read_gpu_ppm_table(self) -> None: """ - Read GPU ppm data and populate GpuItem - :return: None + Read GPU ppm data and populate GpuItem. """ - for v in self.list.values(): - if v.prm.readable: - v.read_gpu_ppm_table() + for gpu in self.gpus(): + if gpu.prm.readable: + gpu.read_gpu_ppm_table() - def print_ppm_table(self): + def print_ppm_table(self) -> None: """ Print the GpuItem ppm data. - :return: None """ - for v in self.list.values(): - v.print_ppm_table() + for gpu in self.gpus(): + gpu.print_ppm_table() - def read_gpu_pstates(self): + def read_gpu_pstates(self) -> None: """ - Read GPU p-state data and populate GpuItem - :return: None + Read GPU p-state data and populate GpuItem. """ - for v in self.list.values(): - if v.prm.readable: - v.read_gpu_pstates() + for gpu in self.gpus(): + if gpu.prm.readable: + gpu.read_gpu_pstates() - def print_pstates(self): + def print_pstates(self) -> None: """ Print the GpuItem p-state data. - :return: None """ - for v in self.list.values(): - v.print_pstates() + for gpu in self.gpus(): + gpu.print_pstates() + + def read_gpu_sensor_set(self, data_type: Enum = GpuItem.SensorSet.All) -> None: + """ + Read sensor data from all GPUs in self.list. - def read_gpu_sensor_data(self, data_type='All'): - """Read sensor data from GPUs""" - for v in self.list.values(): - if v.prm.readable: - v.read_gpu_sensor_data(data_type) + :param data_type: Specifies the sensor set to use in the read. + """ + for gpu in self.gpus(): + if gpu.prm.readable: + gpu.read_gpu_sensor_set(data_type) # Printing Methods follow. - def print(self, clflag=False): + def print(self, short: bool = False, clflag: bool = False) -> None: """ Print all GpuItem. + + :param short: If true, print short report :param clflag: If true, print clinfo - :type clflag: bool - :return: """ - for v in self.list.values(): - v.print(clflag) + for gpu in self.gpus(): + gpu.print(short=short, clflag=clflag) - def print_table(self, title=None): + def print_table(self, title: Union[str, None] = None) -> bool: """ Print table of parameters. + :return: True if success - :rtype: bool """ if self.num_gpus()['total'] < 1: return False @@ -1536,39 +2123,41 @@ print('\x1b[1;36m{}\x1b[0m'.format(title)) print('┌', '─'.ljust(13, '─'), sep='', end='') - for _ in self.list.values(): + for _ in self.gpus(): print('┬', '─'.ljust(16, '─'), sep='', end='') print('┐') print('│\x1b[1;36m' + 'Card #'.ljust(13, ' ') + '\x1b[0m', sep='', end='') - for v in self.list.values(): - print('│\x1b[1;36mcard{:<12}\x1b[0m'.format(v.prm.card_num), end='') + for gpu in self.gpus(): + print('│\x1b[1;36mcard{:<12}\x1b[0m'.format(gpu.prm.card_num), end='') print('│') print('├', '─'.ljust(13, '─'), sep='', end='') - for _ in self.list.values(): + for _ in self.gpus(): print('┼', '─'.ljust(16, '─'), sep='', end='') print('┤') for table_item in self.table_parameters(): print('│\x1b[1;36m{:<13}\x1b[0m'.format(str(self.table_param_labels()[table_item])[:13]), end='') - for v in self.list.values(): - print('│{:<16}'.format(str(v.get_params_value(table_item))[:16]), end='') + for gpu in self.gpus(): + data_value_raw = gpu.get_params_value(table_item) + if isinstance(data_value_raw, float): + data_value_raw = round(data_value_raw, 3) + print('│{:<16}'.format(str(data_value_raw)[:16]), end='') print('│') print('└', '─'.ljust(13, '─'), sep='', end='') - for _ in self.list.values(): + for _ in self.gpus(): print('┴', '─'.ljust(16, '─'), sep='', end='') print('┘') return True - def print_log_header(self, log_file_ptr): + def print_log_header(self, log_file_ptr: TextIO) -> bool: """ Print the log header. + :param log_file_ptr: File pointer for target output. - :type log_file_ptr: file :return: True if success - :rtype: bool """ if self.num_gpus()['total'] < 1: return False @@ -1580,34 +2169,32 @@ print('', file=log_file_ptr) return True - def print_log(self, log_file_ptr): + def print_log(self, log_file_ptr: TextIO) -> bool: """ Print the log data. + :param log_file_ptr: File pointer for target output. - :type log_file_ptr: file :return: True if success - :rtype: bool """ if self.num_gpus()['total'] < 1: return False # Print Data - for v in self.list.values(): - print('{}|{}'.format(v.energy['tn'].strftime('%c').strip(), v.prm.card_num), + for gpu in self.gpus(): + print('{}|{}'.format(gpu.energy['tn'].strftime(env.GUT_CONST.TIME_FORMAT), gpu.prm.card_num), sep='', end='', file=log_file_ptr) for table_item in self.table_parameters(): - print('|{}'.format(re.sub('M[Hh]z', '', str(v.get_params_value(table_item)).strip())), + print('|{}'.format(re.sub(PATTERNS['MHz'], '', str(gpu.get_params_value(table_item)).strip())), sep='', end='', file=log_file_ptr) print('', file=log_file_ptr) return True - def print_plot_header(self, log_file_ptr): + def print_plot_header(self, log_file_ptr: IO[Union[str, bytes]]) -> bool: """ Print the plot header. + :param log_file_ptr: File pointer for target output. - :type log_file_ptr: file :return: True if success - :rtype: bool """ if self.num_gpus()['total'] < 1: return False @@ -1622,22 +2209,22 @@ log_file_ptr.flush() return True - def print_plot(self, log_file_ptr): + def print_plot(self, log_file_ptr: IO[Union[str, bytes]]) -> bool: """ Print the plot data. + :param log_file_ptr: File pointer for target output. - :type log_file_ptr: file - :return: True if success - :rtype: bool + :return: True on success """ if self.num_gpus()['total'] < 1: return False # Print Data - for v in self.list.values(): - line_str_item = ['{}|{}'.format(str(v.energy['tn'].strftime('%c')).strip(), v.prm.card_num)] + for gpu in self.gpus(): + line_str_item = ['{}|{}'.format(str(gpu.energy['tn'].strftime(env.GUT_CONST.TIME_FORMAT)), + gpu.prm.card_num)] for table_item in self.table_parameters(): - line_str_item.append('|' + str(re.sub('M[Hh]z', '', str(v.get_params_value(table_item)))).strip()) + line_str_item.append('|' + re.sub(PATTERNS['MHz'], '', str(gpu.get_params_value(table_item))).strip()) line_str_item.append('\n') line_str = ''.join(line_str_item) log_file_ptr.write(line_str.encode('utf-8')) @@ -1645,15 +2232,14 @@ return True -def about(): +def about() -> None: """ Print details about this module. - :return: None """ print(__doc__) print('Author: ', __author__) print('Copyright: ', __copyright__) - print('Credits: ', __credits__) + print('Credits: ', *['\n {}'.format(item) for item in __credits__]) print('License: ', __license__) print('Version: ', __version__) print('Maintainer: ', __maintainer__) diff -Nru ricks-amdgpu-utils-3.0.0/GPUmodules/__init__.py ricks-amdgpu-utils-3.5.0/GPUmodules/__init__.py --- ricks-amdgpu-utils-3.0.0/GPUmodules/__init__.py 2020-02-29 07:32:52.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/GPUmodules/__init__.py 2020-07-06 00:57:49.000000000 +0000 @@ -1,18 +1,3 @@ -# This file is part the amdgpu-util package -# -# Copyright (C) 2019 RueiKe -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -from GPUmodules import GPUmodule -from GPUmodules import env +__version__ = '3.5.0' +__status__ = 'Development Status :: 5 - Production/Stable' +#__status__ = 'Development Status :: 4 - Beta' diff -Nru ricks-amdgpu-utils-3.0.0/gpu-mon ricks-amdgpu-utils-3.5.0/gpu-mon --- ricks-amdgpu-utils-3.0.0/gpu-mon 1970-01-01 00:00:00.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/gpu-mon 2020-07-06 00:57:49.000000000 +0000 @@ -0,0 +1,391 @@ +#!/usr/bin/python3 +""" gpu-mon - Displays current status of all active GPUs + + A utility to give the current state of all compatible GPUs. The default behavior + is to continuously update a text based table in the current window until Ctrl-C is + pressed. With the *--gui* option, a table of relevant parameters will be updated + in a Gtk window. You can specify the delay between updates with the *--sleep N* + option where N is an integer > zero that specifies the number of seconds to sleep + between updates. The *--no_fan* option can be used to disable the reading and display + of fan information. The *--log* option is used to write all monitor data to a psv log + file. When writing to a log file, the utility will indicate this in red at the top of + the window with a message that includes the log file name. The *--plot* will display a + plot of critical GPU parameters which updates at the specified *--sleep N* interval. If + you need both the plot and monitor displays, then using the --plot option is preferred + over running both tools as a single read of the GPUs is used to update both displays. + The *--ltz* option results in the use of local time instead of UTC. + + Copyright (C) 2019 RicksLab + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +""" +__author__ = 'RueiKe' +__copyright__ = 'Copyright (C) 2019 RicksLab' +__credits__ = ['Craig Echt - Testing, Debug, Verification, and Documentation', + 'Keith Myers - Testing, Debug, Verification of NV Capability'] +__license__ = 'GNU General Public License' +__program_name__ = 'gpu-mon' +__maintainer__ = 'RueiKe' +__docformat__ = 'reStructuredText' +# pylint: disable=multiple-statements +# pylint: disable=line-too-long +# pylint: disable=bad-continuation + +import argparse +import subprocess +import threading +import os +import logging +import sys +import shlex +import shutil +import time +import signal +from typing import Callable +from numpy import isnan + +try: + import gi + gi.require_version('Gtk', '3.0') + from gi.repository import GLib, Gtk +except ModuleNotFoundError as error: + print('gi import error: {}'.format(error)) + print('gi is required for {}'.format(__program_name__)) + print(' In a venv, first install vext: pip install --no-cache-dir vext') + print(' Then install vext.gi: pip install --no-cache-dir vext.gi') + sys.exit(0) + +from GPUmodules import __version__, __status__ +from GPUmodules import GPUgui +from GPUmodules import GPUmodule as Gpu +from GPUmodules import env + +set_gtk_prop = GPUgui.GuiProps.set_gtk_prop +LOGGER = logging.getLogger('gpu-utils') + + +def ctrl_c_handler(target_signal: signal.Signals, _frame) -> None: + """ + Signal catcher for ctrl-c to exit monitor loop. + + :param target_signal: Target signal name + :param _frame: Ignored + """ + LOGGER.debug('ctrl_c_handler (ID: %s) has been caught. Setting quit flag...', target_signal) + print('Setting quit flag...') + MonitorWindow.quit = True + + +signal.signal(signal.SIGINT, ctrl_c_handler) + +# SEMAPHORE ############ +UD_SEM = threading.Semaphore() +######################## + + +class MonitorWindow(Gtk.Window): + """ + Custom PAC Gtk window. + """ + quit = False + + def __init__(self, gpu_list, devices): + + Gtk.Window.__init__(self, title=env.GUT_CONST.gui_window_title) + self.set_border_width(0) + GPUgui.GuiProps.set_style() + + if env.GUT_CONST.icon_path: + icon_file = os.path.join(env.GUT_CONST.icon_path, 'gpu-mon.icon.png') + LOGGER.debug('Icon file: [%s]', icon_file) + if os.path.isfile(icon_file): + self.set_icon_from_file(icon_file) + + grid = Gtk.Grid() + self.add(grid) + + col = 0 + row = 0 + num_amd_gpus = gpu_list.num_gpus()['total'] + if env.GUT_CONST.DEBUG: + debug_label = Gtk.Label(name='warn_label') + debug_label.set_markup(' DEBUG Logger Active ') + lbox = Gtk.Box(spacing=6, name='warn_box') + set_gtk_prop(debug_label, top=1, bottom=1, right=1, left=1) + lbox.pack_start(debug_label, True, True, 0) + grid.attach(lbox, 0, row, num_amd_gpus+1, 1) + row += 1 + if env.GUT_CONST.LOG: + log_label = Gtk.Label(name='warn_label') + log_label.set_markup(' Logging to: {}'.format(env.GUT_CONST.log_file)) + lbox = Gtk.Box(spacing=6, name='warn_box') + set_gtk_prop(log_label, top=1, bottom=1, right=1, left=1) + lbox.pack_start(log_label, True, True, 0) + grid.attach(lbox, 0, row, num_amd_gpus+1, 1) + row += 1 + row_start = row + + row = row_start + row_labels = {'card_num': Gtk.Label(name='white_label')} + row_labels['card_num'].set_markup('Card #') + for param_name, param_label in gpu_list.table_param_labels().items(): + row_labels[param_name] = Gtk.Label(name='white_label') + row_labels[param_name].set_markup('{}'.format(param_label)) + for row_label_item in row_labels.values(): + lbox = Gtk.Box(spacing=6, name='head_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + set_gtk_prop(row_label_item, top=1, bottom=1, right=4, left=4, align=(0.0, 0.5)) + lbox.pack_start(row_label_item, True, True, 0) + grid.attach(lbox, col, row, 1, 1) + row += 1 + for gpu in gpu_list.gpus(): + devices[gpu.prm.uuid] = {'card_num': Gtk.Label(name='white_label')} + devices[gpu.prm.uuid]['card_num'].set_markup('CARD{}'.format(gpu.get_params_value('card_num'))) + devices[gpu.prm.uuid]['card_num'].set_use_markup(True) + for param_name in gpu_list.table_param_labels(): + devices[gpu.prm.uuid][param_name] = Gtk.Label(label=gpu.get_params_value(str(param_name)), + name='white_label') + devices[gpu.prm.uuid][param_name].set_width_chars(10) + set_gtk_prop(devices[gpu.prm.uuid][param_name], width_chars=10) + + for gui_component in devices.values(): + col += 1 + row = row_start + for comp_name, comp_item in gui_component.items(): + comp_item.set_text('') + if comp_name == 'card_num': + lbox = Gtk.Box(spacing=6, name='head_box') + else: + lbox = Gtk.Box(spacing=6, name='med_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + set_gtk_prop(comp_item, top=1, bottom=1, right=3, left=3, width_chars=17) + lbox.pack_start(comp_item, True, True, 0) + grid.attach(lbox, col, row, 1, 1) + row += 1 + + def set_quit(self, _arg2, _arg3) -> None: + """ + Set quit flag when Gtk quit is selected. + """ + self.quit = True + + +def update_data(gpu_list: Gpu.GpuList, devices: dict, cmd: subprocess.Popen) -> None: + """ + Update monitor data with data read from GPUs. + + :param gpu_list: A gpuList object with all gpuItems + :param devices: A dictionary linking Gui items with data. + :param cmd: Subprocess return from running plot. + """ + # SEMAPHORE ############ + if not UD_SEM.acquire(blocking=False): + LOGGER.debug('Update while updating, skipping new update') + return + ######################## + gpu_list.read_gpu_sensor_set(data_type=Gpu.GpuItem.SensorSet.Monitor) + if env.GUT_CONST.LOG: + gpu_list.print_log(env.GUT_CONST.log_file_ptr) + if env.GUT_CONST.PLOT: + try: + gpu_list.print_plot(cmd.stdin) + except (OSError, KeyboardInterrupt) as except_err: + LOGGER.debug('gpu-plot has closed: [%s]', except_err) + print('gpu-plot has closed') + env.GUT_CONST.PLOT = False + + # update gui + for uuid, gui_component in devices.items(): + for comp_name, comp_item in gui_component.items(): + if comp_name == 'card_num': + comp_item.set_markup('Card{}'.format(gpu_list[uuid].get_params_value('card_num'))) + else: + data_value_raw = gpu_list[uuid].get_params_value(comp_name) + LOGGER.debug('raw data value: %s', data_value_raw) + if isinstance(data_value_raw, float): + if not isnan(data_value_raw): + data_value_raw = round(data_value_raw, 3) + data_value = str(data_value_raw)[:16] + comp_item.set_text(data_value) + set_gtk_prop(comp_item, width_chars=17) + + while Gtk.events_pending(): + Gtk.main_iteration_do(True) + # SEMAPHORE ############ + UD_SEM.release() + ######################## + + +def refresh(refreshtime: int, update_data_func: Callable, gpu_list: Gpu.GpuList, devices: dict, + cmd: subprocess.Popen, gmonitor: Gtk.Window) -> None: + """ + Method called for monitor refresh. + + :param refreshtime: Amount of seconds to sleep after refresh. + :param update_data_func: Function that does actual data update. + :param gpu_list: A gpuList object with all gpuItems + :param devices: A dictionary linking Gui items with data. + :param cmd: Subprocess return from running plot. + :param gmonitor: + """ + while True: + if gmonitor.quit: + print('Quitting...') + Gtk.main_quit() + sys.exit(0) + GLib.idle_add(update_data_func, gpu_list, devices, cmd) + tst = 0.0 + sleep_interval = 0.2 + while tst < refreshtime: + time.sleep(sleep_interval) + tst += sleep_interval + + +def main() -> None: + """ + Flow for gpu-mon. + """ + parser = argparse.ArgumentParser() + parser.add_argument('--about', help='README', action='store_true', default=False) + parser.add_argument('--gui', help='Display GTK Version of Monitor', action='store_true', default=False) + parser.add_argument('--log', help='Write all monitor data to logfile', action='store_true', default=False) + parser.add_argument('--plot', help='Open and write to gpu-plot', action='store_true', default=False) + parser.add_argument('--ltz', help='Use local time zone instead of UTC', action='store_true', default=False) + parser.add_argument('--sleep', help='Number of seconds to sleep between updates', type=int, default=2) + parser.add_argument('--no_fan', help='do not include fan setting options', action='store_true', default=False) + parser.add_argument('-d', '--debug', help='Debug output', action='store_true', default=False) + parser.add_argument('--pdebug', help='Plot debug output', action='store_true', default=False) + args = parser.parse_args() + + # About me + if args.about: + print(__doc__) + print('Author: ', __author__) + print('Copyright: ', __copyright__) + print('Credits: ', *['\n {}'.format(item) for item in __credits__]) + print('License: ', __license__) + print('Version: ', __version__) + print('Maintainer: ', __maintainer__) + print('Status: ', __status__) + sys.exit(0) + + if int(args.sleep) <= 1: + print('Invalid value for sleep specified. Must be an integer great than zero') + sys.exit(-1) + env.GUT_CONST.set_args(args) + LOGGER.debug('########## %s %s', __program_name__, __version__) + + if env.GUT_CONST.check_env() < 0: + print('Error in environment. Exiting...') + sys.exit(-1) + + # Get list of AMD GPUs and get basic non-driver details + gpu_list = Gpu.GpuList() + gpu_list.set_gpu_list() + + # Check list of GPUs + num_gpus = gpu_list.num_vendor_gpus() + print('Detected GPUs: ', end='') + for i, (type_name, type_value) in enumerate(num_gpus.items()): + if i: + print(', {}: {}'.format(type_name, type_value), end='') + else: + print('{}: {}'.format(type_name, type_value), end='') + print('') + if 'AMD' in num_gpus.keys(): + env.GUT_CONST.read_amd_driver_version() + print('AMD: {}'.format(gpu_list.wattman_status())) + if 'NV' in num_gpus.keys(): + print('nvidia smi: [{}]'.format(env.GUT_CONST.cmd_nvidia_smi)) + + num_gpus = gpu_list.num_gpus() + if num_gpus['total'] == 0: + print('No GPUs detected, exiting...') + sys.exit(-1) + + # Read data static/dynamic/info/state driver information for GPUs + gpu_list.read_gpu_sensor_set(data_type=Gpu.GpuItem.SensorSet.All) + + # Check number of readable/writable GPUs again + num_gpus = gpu_list.num_gpus() + print('{} total GPUs, {} rw, {} r-only, {} w-only\n'.format(num_gpus['total'], num_gpus['rw'], + num_gpus['r-only'], num_gpus['w-only'])) + + time.sleep(1) + # Generate a new list of only compatible GPUs + if num_gpus['r-only'] + num_gpus['rw'] < 1: + print('No readable GPUs, exiting...') + sys.exit(0) + com_gpu_list = gpu_list.list_gpus(compatibility=Gpu.GpuItem.GPU_Comp.Readable) + + if args.log: + env.GUT_CONST.LOG = True + env.GUT_CONST.log_file = './log_monitor_{}.txt'.format( + env.GUT_CONST.now(ltz=env.GUT_CONST.USELTZ).strftime('%m%d_%H%M%S')) + env.GUT_CONST.log_file_ptr = open(env.GUT_CONST.log_file, 'w', 1) + gpu_list.print_log_header(env.GUT_CONST.log_file_ptr) + + if args.plot: + args.gui = True + if args.gui: + # Display Gtk style Monitor + devices = {} + gmonitor = MonitorWindow(com_gpu_list, devices) + gmonitor.connect('delete-event', gmonitor.set_quit) + gmonitor.show_all() + + cmd = None + if args.plot: + env.GUT_CONST.PLOT = True + plot_util = shutil.which('gpu-plot') + if not plot_util: + plot_util = os.path.join(env.GUT_CONST.repository_path, 'gpu-plot') + if os.path.isfile(plot_util): + if env.GUT_CONST.PDEBUG: + cmd_str = '{} --debug --stdin --sleep {}'.format(plot_util, env.GUT_CONST.SLEEP) + else: + cmd_str = '{} --stdin --sleep {}'.format(plot_util, env.GUT_CONST.SLEEP) + cmd = subprocess.Popen(shlex.split(cmd_str), bufsize=-1, shell=False, stdin=subprocess.PIPE) + com_gpu_list.print_plot_header(cmd.stdin) + else: + print('Fatal Error: gpu-plot not found.') + + # Start thread to update Monitor + threading.Thread(target=refresh, daemon=True, + args=[env.GUT_CONST.SLEEP, update_data, com_gpu_list, devices, cmd, gmonitor]).start() + + Gtk.main() + else: + # Display text style Monitor + try: + while True: + com_gpu_list.read_gpu_sensor_set(data_type=Gpu.GpuItem.SensorSet.Monitor) + os.system('clear') + if env.GUT_CONST.DEBUG: + print('{}DEBUG logger is active{}'.format('\033[31m \033[01m', '\033[0m')) + if env.GUT_CONST.LOG: + print('{}Logging to: {}{}'.format('\033[31m \033[01m', env.GUT_CONST.log_file, '\033[0m')) + com_gpu_list.print_log(env.GUT_CONST.log_file_ptr) + com_gpu_list.print_table() + time.sleep(env.GUT_CONST.SLEEP) + if MonitorWindow.quit: + sys.exit(-1) + except KeyboardInterrupt: + if env.GUT_CONST.LOG: + env.GUT_CONST.log_file_ptr.close() + sys.exit(0) + + +if __name__ == '__main__': + main() diff -Nru ricks-amdgpu-utils-3.0.0/gpu-pac ricks-amdgpu-utils-3.5.0/gpu-pac --- ricks-amdgpu-utils-3.0.0/gpu-pac 1970-01-01 00:00:00.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/gpu-pac 2020-07-06 00:57:49.000000000 +0000 @@ -0,0 +1,1323 @@ +#!/usr/bin/python3 +""" gpu-pac - A utility program and control compatible GPUs + + Program and Control compatible GPUs with this utility. By default, the commands to + be written to a GPU are written to a bash file for the user to inspect and run. If you + have confidence, the *--execute_pac* option can be used to execute and then delete the + saved bash file. Since the GPU device files are writable only by root, sudo is used to + execute commands in the bash file, as a result, you will be prompted for credentials in the + terminal where you executed *gpu-pac*. The *--no_fan* option can be used to eliminate + fan details from the utility. The *--force_write* option can be used to force all configuration + parameters to be written to the GPU. The default behavior is to only write changes. + + Copyright (C) 2019 RicksLab + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +""" +__author__ = 'RueiKe' +__copyright__ = 'Copyright (C) 2019 RicksLab' +__credits__ = ['Craig Echt - Testing, Debug, Verification, and Documentation'] +__license__ = 'GNU General Public License' +__program_name__ = 'gpu-pac' +__maintainer__ = 'RueiKe' +__docformat__ = 'reStructuredText' +# pylint: disable=multiple-statements +# pylint: disable=line-too-long +# pylint: disable=bad-continuation + +import argparse +import re +import subprocess +import os +import logging +import sys +import time +from uuid import uuid4 + +try: + import gi + gi.require_version('Gtk', '3.0') + from gi.repository import Gtk +except ModuleNotFoundError as error: + print('gi import error: {}'.format(error)) + print('gi is required for {}'.format(__program_name__)) + print(' In a venv, first install vext: pip install --no-cache-dir vext') + print(' Then install vext.gi: pip install --no-cache-dir vext.gi') + sys.exit(0) + +from GPUmodules import __version__, __status__ +from GPUmodules import GPUgui +from GPUmodules import GPUmodule as Gpu +from GPUmodules import env + +MAX_CHAR = 54 +CHAR_WIDTH = 8 +set_gtk_prop = GPUgui.GuiProps.set_gtk_prop +LOGGER = logging.getLogger('gpu-utils') +PATTERNS = env.GutConst.PATTERNS + + +class PACWindow(Gtk.Window): + """ + PAC Window class. + """ + def __init__(self, gpu_list, devices): + Gtk.Window.__init__(self, title=env.GUT_CONST.gui_window_title) + self.set_border_width(0) + GPUgui.GuiProps.set_style() + + if env.GUT_CONST.icon_path: + icon_file = os.path.join(env.GUT_CONST.icon_path, 'gpu-pac.icon.png') + if os.path.isfile(icon_file): + self.set_icon_from_file(icon_file) + + grid = Gtk.Grid() + self.add(grid) + + num_com_gpus = gpu_list.num_gpus()['total'] + max_rows = 0 + row = col = 0 + for gpu in gpu_list.gpus(): + row = 0 + # Card Number in top center of box + devices[gpu.prm.uuid] = {'card_num': Gtk.Label(name='white_label')} + devices[gpu.prm.uuid]['card_num'].set_markup('Card {}: {}'.format( + gpu.get_params_value(str('card_num')), gpu.get_params_value('model_display')[:40])) + set_gtk_prop(devices[gpu.prm.uuid]['card_num'], align=(0.5, 0.5), top=1, bottom=1, right=4, left=4) + lbox = Gtk.Box(spacing=6, name='head_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + lbox.pack_start(devices[gpu.prm.uuid]['card_num'], True, True, 0) + grid.attach(lbox, col, row, 1, 1) + row += 1 + + # Card Path + devices[gpu.prm.uuid]['card_path'] = Gtk.Label(name='white_label') + devices[gpu.prm.uuid]['card_path'].set_markup('Device: {}'.format(gpu.get_params_value('card_path'))) + set_gtk_prop(devices[gpu.prm.uuid]['card_path'], align=(0.0, 0.5), top=1, bottom=1, + right=4, left=4, width=MAX_CHAR*CHAR_WIDTH) + lbox = Gtk.Box(spacing=6, name='dark_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + lbox.pack_start(devices[gpu.prm.uuid]['card_path'], True, True, 0) + grid.attach(lbox, col, row, 1, 1) + row += 1 + + # Card Power Cap + power_cap_range = gpu.get_params_value('power_cap_range') + devices[gpu.prm.uuid]['power_cap'] = Gtk.Label(name='white_label') + devices[gpu.prm.uuid]['power_cap'].set_markup('Power Cap: Range ({} - {} W)'.format( + power_cap_range[0], power_cap_range[1])) + set_gtk_prop(devices[gpu.prm.uuid]['power_cap'], align=(0.0, 0.5), top=1, bottom=1, right=4, left=4) + lbox = Gtk.Box(spacing=6, name='dark_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + lbox.pack_start(devices[gpu.prm.uuid]['power_cap'], True, True, 0) + grid.attach(lbox, col, row, 1, 1) + row += 1 + + # Card Power Cap Value and Entry + devices[gpu.prm.uuid]['power_cap_cur'] = Gtk.Label(name='white_label') + set_gtk_prop(devices[gpu.prm.uuid]['power_cap_cur'], top=1, bottom=1, right=2, left=2) + devices[gpu.prm.uuid]['power_cap_ent'] = Gtk.Entry() + set_gtk_prop(devices[gpu.prm.uuid]['power_cap_ent'], top=1, bottom=1, right=0, left=2, xalign=1, + width_chars=5, max_length=5) + devices[gpu.prm.uuid]['power_cap_ent_unit'] = Gtk.Label(name='white_label') + devices[gpu.prm.uuid]['power_cap_ent_unit'].set_text('W (value or \'reset\')') + set_gtk_prop(devices[gpu.prm.uuid]['power_cap_ent_unit'], top=1, bottom=1, right=0, left=0, + align=(0.0, 0.5)) + lbox = Gtk.Box(spacing=2, name='med_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + lbox.pack_start(devices[gpu.prm.uuid]['power_cap_cur'], False, False, 0) + lbox.pack_start(devices[gpu.prm.uuid]['power_cap_ent'], False, False, 0) + lbox.pack_start(devices[gpu.prm.uuid]['power_cap_ent_unit'], False, False, 0) + grid.attach(lbox, col, row, 1, 1) + row += 1 + + if env.GUT_CONST.show_fans: + # Fan PWM Value + fan_pwm_range = gpu.get_params_value('fan_pwm_range') + devices[gpu.prm.uuid]['fan_pwm_range'] = Gtk.Label(name='white_label') + devices[gpu.prm.uuid]['fan_pwm_range'].set_markup('Fan PWM: Range ({} - {} %)'.format( + fan_pwm_range[0], fan_pwm_range[1])) + set_gtk_prop(devices[gpu.prm.uuid]['fan_pwm_range'], top=1, bottom=1, right=4, left=4, align=(0.0, 0.5)) + lbox = Gtk.Box(spacing=6, name='dark_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + lbox.pack_start(devices[gpu.prm.uuid]['fan_pwm_range'], True, True, 0) + grid.attach(lbox, col, row, 1, 1) + row += 1 + + # Card Fan PWM Value and Entry + devices[gpu.prm.uuid]['fan_pwm_cur'] = Gtk.Label(name='white_label') + set_gtk_prop(devices[gpu.prm.uuid]['fan_pwm_cur'], top=1, bottom=1, right=2, left=2) + devices[gpu.prm.uuid]['fan_pwm_ent'] = Gtk.Entry() + set_gtk_prop(devices[gpu.prm.uuid]['fan_pwm_ent'], top=1, bottom=1, right=0, left=2, + width_chars=5, max_length=5, xalign=1) + devices[gpu.prm.uuid]['fan_pwm_ent_unit'] = Gtk.Label(name='white_label') + devices[gpu.prm.uuid]['fan_pwm_ent_unit'].set_text('% (value, \'reset\', or \'max\')') + set_gtk_prop(devices[gpu.prm.uuid]['fan_pwm_ent_unit'], top=1, bottom=1, + right=0, left=0, align=(0.0, 0.5)) + lbox = Gtk.Box(spacing=2, name='med_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + lbox.pack_start(devices[gpu.prm.uuid]['fan_pwm_cur'], False, False, 0) + lbox.pack_start(devices[gpu.prm.uuid]['fan_pwm_ent'], False, False, 0) + lbox.pack_start(devices[gpu.prm.uuid]['fan_pwm_ent_unit'], False, False, 0) + grid.attach(lbox, col, row, 1, 1) + row += 1 + + if gpu.get_params_value('gpu_type') in [gpu.GPU_Type.PStatesNE, gpu.GPU_Type.PStates]: + # Sclk P-States + devices[gpu.prm.uuid]['sclk_range'] = Gtk.Label(name='white_label') + devices[gpu.prm.uuid]['sclk_range'].set_markup('Sclk P-States: Ranges {}-{}, {}-{} '.format( + gpu.get_params_value('sclk_f_range')[0], + gpu.get_params_value('sclk_f_range')[1], + gpu.get_params_value('vddc_range')[0], + gpu.get_params_value('vddc_range')[1])) + set_gtk_prop(devices[gpu.prm.uuid]['sclk_range'], + top=1, bottom=1, right=4, left=4, align=(0.0, 0.5)) + lbox = Gtk.Box(spacing=6, name='dark_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + lbox.pack_start(devices[gpu.prm.uuid]['sclk_range'], False, False, 0) + grid.attach(lbox, col, row, 1, 1) + row += 1 + + # Sclk P-State Values and Entry + devices[gpu.prm.uuid]['sclk_pstate'] = {} + for ps in gpu.sclk_state.keys(): + devices[gpu.prm.uuid]['sclk_pstate'][ps] = {} + + devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_cur_obj'] = Gtk.Label(name='white_label') + set_gtk_prop(devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_cur_obj'], width_chars=20, + top=1, bottom=1, right=2, left=2, align=(0.0, 0.5)) + devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj'] = Gtk.Entry() + set_gtk_prop(devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj'], width_chars=5, max_length=5, + xalign=1, top=1, bottom=1, right=0, left=0) + devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj_unit'] = Gtk.Label(name='white_label') + set_gtk_prop(devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj_unit'], + top=1, bottom=1, right=4, left=0, align=(0.0, 0.5)) + devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_ent_v_obj'] = Gtk.Entry() + set_gtk_prop(devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_ent_v_obj'], width_chars=5, max_length=5, + xalign=1, top=1, bottom=1, right=0, left=0) + devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_ent_v_obj_unit'] = Gtk.Label(name='white_label') + set_gtk_prop(devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_ent_v_obj_unit'], + top=1, bottom=1, right=4, left=0, align=(0.0, 0.5)) + + lbox = Gtk.Box(spacing=6, name='med_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + lbox.pack_start(devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_cur_obj'], False, False, 0) + lbox.pack_start(devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj'], False, False, 0) + lbox.pack_start(devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj_unit'], False, False, 0) + lbox.pack_start(devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_ent_v_obj'], False, False, 0) + lbox.pack_start(devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_ent_v_obj_unit'], False, False, 0) + grid.attach(lbox, col, row, 1, 1) + row += 1 + + elif gpu.get_params_value('gpu_type') == gpu.GPU_Type.CurvePts: + # Sclk Curve End Points + devices[gpu.prm.uuid]['sclk_range'] = Gtk.Label(name='white_label') + devices[gpu.prm.uuid]['sclk_range'].set_markup('Sclk Curve End Points: Ranges {}-{} '.format( + gpu.get_params_value('sclk_f_range')[0], gpu.get_params_value('sclk_f_range')[1])) + set_gtk_prop(devices[gpu.prm.uuid]['sclk_range'], top=1, bottom=1, right=4, left=4, align=(0.0, 0.5)) + lbox = Gtk.Box(spacing=6, name='dark_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + lbox.pack_start(devices[gpu.prm.uuid]['sclk_range'], False, False, 0) + grid.attach(lbox, col, row, 1, 1) + row += 1 + + # Sclk Curve End Points Values and Entry + devices[gpu.prm.uuid]['sclk_pstate'] = {} + for ps, psd in gpu.sclk_state.items(): + devices[gpu.prm.uuid]['sclk_pstate'][ps] = {} + + devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_cur_obj'] = Gtk.Label(name='white_label') + set_gtk_prop(devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_cur_obj'], width_chars=20, + top=1, bottom=1, right=2, left=2, align=(0.0, 0.5)) + devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj'] = Gtk.Entry() + set_gtk_prop(devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj'], width_chars=5, max_length=5, + xalign=1, top=1, bottom=1, right=0, left=0) + devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj_unit'] = Gtk.Label(name='white_label') + set_gtk_prop(devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj_unit'], + top=1, bottom=1, right=4, left=0, align=(0.0, 0.5)) + lbox = Gtk.Box(spacing=6, name='med_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + lbox.pack_start(devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_cur_obj'], False, False, 0) + lbox.pack_start(devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj'], False, False, 0) + lbox.pack_start(devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj_unit'], False, False, 0) + grid.attach(lbox, col, row, 1, 1) + row += 1 + + if gpu.get_params_value('gpu_type') in [gpu.GPU_Type.CurvePts, gpu.GPU_Type.PStates]: + # SCLK P-State Mask + devices[gpu.prm.uuid]['sclk_pst_mask_cur'] = Gtk.Label(name='white_label') + set_gtk_prop(devices[gpu.prm.uuid]['sclk_pst_mask_cur'], top=1, bottom=1, right=2, left=2) + devices[gpu.prm.uuid]['sclk_pst_mask_ent'] = Gtk.Entry() + set_gtk_prop(devices[gpu.prm.uuid]['sclk_pst_mask_ent'], width_chars=17, max_length=17, + xalign=0, top=1, bottom=1, right=0, left=1) + lbox = Gtk.Box(spacing=2, name='med_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + lbox.pack_start(devices[gpu.prm.uuid]['sclk_pst_mask_cur'], False, False, 0) + lbox.pack_start(devices[gpu.prm.uuid]['sclk_pst_mask_ent'], False, False, 0) + grid.attach(lbox, col, row, 1, 1) + row += 1 + + if gpu.get_params_value('gpu_type') == gpu.GPU_Type.PStates: + # Mclk P-States + devices[gpu.prm.uuid]['mclk_range'] = Gtk.Label(name='white_label') + devices[gpu.prm.uuid]['mclk_range'].set_markup('Mclk P-States: Ranges {}-{}, {}-{} '.format( + gpu.get_params_value('mclk_f_range')[0], + gpu.get_params_value('mclk_f_range')[1], + gpu.get_params_value('vddc_range')[0], + gpu.get_params_value('vddc_range')[1])) + set_gtk_prop(devices[gpu.prm.uuid]['mclk_range'], top=1, bottom=1, right=4, left=4, align=(0.0, 0.5)) + lbox = Gtk.Box(spacing=6, name='dark_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + lbox.pack_start(devices[gpu.prm.uuid]['mclk_range'], True, True, 0) + grid.attach(lbox, col, row, 1, 1) + row += 1 + + # Mclk P-State Values and Entry + devices[gpu.prm.uuid]['mclk_pstate'] = {} + for ps, psd in gpu.mclk_state.items(): + devices[gpu.prm.uuid]['mclk_pstate'][ps] = {} + + devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_cur_obj'] = Gtk.Label(name='white_label') + set_gtk_prop(devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_cur_obj'], width_chars=20, + top=1, bottom=1, right=2, left=2, align=(0.0, 0.5)) + devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj'] = Gtk.Entry() + set_gtk_prop(devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj'], width_chars=5, max_length=5, + xalign=1, top=1, bottom=1, right=0, left=0) + devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj_unit'] = Gtk.Label(name='white_label') + set_gtk_prop(devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj_unit'], + top=1, bottom=1, right=4, left=0, align=(0.0, 0.5)) + devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_ent_v_obj'] = Gtk.Entry() + set_gtk_prop(devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_ent_v_obj'], width_chars=5, max_length=5, + xalign=1, top=1, bottom=1, right=0, left=0) + devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_ent_v_obj_unit'] = Gtk.Label(name='white_label') + set_gtk_prop(devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_ent_v_obj_unit'], + top=1, bottom=1, right=4, left=0, align=(0.0, 0.5)) + + lbox = Gtk.Box(spacing=6, name='med_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + lbox.pack_start(devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_cur_obj'], False, False, 0) + lbox.pack_start(devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj'], False, False, 0) + lbox.pack_start(devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj_unit'], False, False, 0) + lbox.pack_start(devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_ent_v_obj'], False, False, 0) + lbox.pack_start(devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_ent_v_obj_unit'], False, False, 0) + grid.attach(lbox, col, row, 1, 1) + row += 1 + + elif gpu.get_params_value('gpu_type') == gpu.GPU_Type.CurvePts: + # Mclk Curve End points + devices[gpu.prm.uuid]['mclk_range'] = Gtk.Label(name='white_label') + devices[gpu.prm.uuid]['mclk_range'].set_markup('Mclk Curve End Points: Ranges {}-{} '.format( + gpu.get_params_value('mclk_f_range')[0], + gpu.get_params_value('mclk_f_range')[1])) + set_gtk_prop(devices[gpu.prm.uuid]['mclk_range'], top=1, bottom=1, right=4, left=4, align=(0.0, 0.5)) + lbox = Gtk.Box(spacing=6, name='dark_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + lbox.pack_start(devices[gpu.prm.uuid]['mclk_range'], True, True, 0) + grid.attach(lbox, col, row, 1, 1) + row += 1 + + # Mclk Curve End Points Values and Entry + devices[gpu.prm.uuid]['mclk_pstate'] = {} + for ps, psd in gpu.mclk_state.items(): + devices[gpu.prm.uuid]['mclk_pstate'][ps] = {} + + devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_cur_obj'] = Gtk.Label(name='white_label') + set_gtk_prop(devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_cur_obj'], width_chars=20, + top=1, bottom=1, right=2, left=2, align=(0.0, 0.5)) + devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj'] = Gtk.Entry() + set_gtk_prop(devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj'], width_chars=5, max_length=5, + xalign=1, top=1, bottom=1, right=0, left=0) + devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj_unit'] = Gtk.Label(name='white_label') + set_gtk_prop(devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj_unit'], + top=1, bottom=1, right=4, left=0, align=(0.0, 0.5)) + lbox = Gtk.Box(spacing=6, name='med_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + lbox.pack_start(devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_cur_obj'], False, False, 0) + lbox.pack_start(devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj'], False, False, 0) + lbox.pack_start(devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj_unit'], False, False, 0) + grid.attach(lbox, col, row, 1, 1) + row += 1 + + if gpu.get_params_value('gpu_type') in [gpu.GPU_Type.CurvePts, gpu.GPU_Type.PStates]: + # MCLK P-State Mask + devices[gpu.prm.uuid]['mclk_pst_mask_cur'] = Gtk.Label(name='white_label') + set_gtk_prop(devices[gpu.prm.uuid]['mclk_pst_mask_cur'], top=1, bottom=1, right=2, left=2) + devices[gpu.prm.uuid]['mclk_pst_mask_ent'] = Gtk.Entry() + set_gtk_prop(devices[gpu.prm.uuid]['mclk_pst_mask_ent'], width_chars=17, max_length=17, + xalign=0, top=1, bottom=1, right=0, left=1) + lbox = Gtk.Box(spacing=2, name='med_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + lbox.pack_start(devices[gpu.prm.uuid]['mclk_pst_mask_cur'], False, False, 0) + lbox.pack_start(devices[gpu.prm.uuid]['mclk_pst_mask_ent'], False, False, 0) + grid.attach(lbox, col, row, 1, 1) + row += 1 + + if gpu.get_params_value('gpu_type') == gpu.GPU_Type.CurvePts: + # VDDC Curve Points + devices[gpu.prm.uuid]['vddc_curve_range'] = Gtk.Label(name='white_label') + devices[gpu.prm.uuid]['vddc_curve_range'].set_markup( + 'VDDC Curve Points: Ranges {}-{}, {}-{} '.format(gpu.vddc_curve_range['0']['SCLK'][0], + gpu.vddc_curve_range['0']['SCLK'][1], + gpu.vddc_curve_range['0']['VOLT'][0], + gpu.vddc_curve_range['0']['VOLT'][1])) + set_gtk_prop(devices[gpu.prm.uuid]['vddc_curve_range'], top=1, bottom=1, + right=4, left=4, align=(0.0, 0.5)) + lbox = Gtk.Box(spacing=6, name='dark_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + lbox.pack_start(devices[gpu.prm.uuid]['vddc_curve_range'], False, False, 0) + grid.attach(lbox, col, row, 1, 1) + row += 1 + + # VDDC CURVE Points Values and Entry + devices[gpu.prm.uuid]['vddc_curve_pt'] = {} + for ps, psd in gpu.vddc_curve.items(): + devices[gpu.prm.uuid]['vddc_curve_pt'][ps] = {} + + devices[gpu.prm.uuid]['vddc_curve_pt'][ps]['gtk_cur_obj'] = Gtk.Label(name='white_label') + set_gtk_prop(devices[gpu.prm.uuid]['vddc_curve_pt'][ps]['gtk_cur_obj'], width_chars=20, + top=1, bottom=1, right=2, left=2, align=(0.0, 0.5)) + devices[gpu.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_f_obj'] = Gtk.Entry() + set_gtk_prop(devices[gpu.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_f_obj'], width_chars=5, + max_length=5, xalign=1, top=1, bottom=1, right=0, left=0) + devices[gpu.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_f_obj_unit'] = Gtk.Label(name='white_label') + set_gtk_prop(devices[gpu.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_f_obj_unit'], + top=1, bottom=1, right=4, left=0, align=(0.0, 0.5)) + devices[gpu.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_v_obj'] = Gtk.Entry() + set_gtk_prop(devices[gpu.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_v_obj'], width_chars=5, + max_length=5, xalign=1, top=1, bottom=1, right=0, left=0) + devices[gpu.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_v_obj_unit'] = Gtk.Label(name='white_label') + set_gtk_prop(devices[gpu.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_v_obj_unit'], + top=1, bottom=1, right=4, left=0, align=(0.0, 0.5)) + lbox = Gtk.Box(spacing=6, name='med_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + lbox.pack_start(devices[gpu.prm.uuid]['vddc_curve_pt'][ps]['gtk_cur_obj'], False, False, 0) + lbox.pack_start(devices[gpu.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_f_obj'], False, False, 0) + lbox.pack_start(devices[gpu.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_f_obj_unit'], False, False, 0) + lbox.pack_start(devices[gpu.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_v_obj'], False, False, 0) + lbox.pack_start(devices[gpu.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_v_obj_unit'], False, False, 0) + grid.attach(lbox, col, row, 1, 1) + row += 1 + + # Power Performance Mode Selection + devices[gpu.prm.uuid]['ppm'] = Gtk.Label(name='white_label') + devices[gpu.prm.uuid]['ppm'].set_markup('Power Performance Modes:') + set_gtk_prop(devices[gpu.prm.uuid]['ppm'], top=1, bottom=1, right=4, left=4, align=(0.0, 0.5)) + + lbox = Gtk.Box(spacing=6, name='dark_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + lbox.pack_start(devices[gpu.prm.uuid]['ppm'], True, True, 0) + grid.attach(lbox, col, row, 1, 1) + row += 1 + + devices[gpu.prm.uuid]['ppm_modes'] = Gtk.ListStore(int, str) + devices[gpu.prm.uuid]['ppm_mode_items'] = {} + item_num = 0 + for mode_num, mode in gpu.ppm_modes.items(): + if mode_num == 'NUM': + continue + if mode[0] == 'CUSTOM': + continue + devices[gpu.prm.uuid]['ppm_modes'].append([int(mode_num), mode[0]]) + devices[gpu.prm.uuid]['ppm_mode_items'][int(mode_num)] = item_num + item_num += 1 + + lbox = Gtk.Box(spacing=6, name='med_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + devices[gpu.prm.uuid]['ppm_selection'] = Gtk.Label(name='white_label') + devices[gpu.prm.uuid]['ppm_selection'].set_markup(' PPM Selection: ') + set_gtk_prop(devices[gpu.prm.uuid]['ppm_selection'], top=1, bottom=1, right=4, left=4, align=(0.0, 0.5)) + + devices[gpu.prm.uuid]['ppm_modes_combo'] = Gtk.ComboBox.new_with_model_and_entry( + devices[gpu.prm.uuid]['ppm_modes']) + devices[gpu.prm.uuid]['ppm_modes_combo'].get_child().set_name('ppm_combo') + devices[gpu.prm.uuid]['ppm_modes_combo'].connect('changed', ppm_select, devices[gpu.prm.uuid]) + devices[gpu.prm.uuid]['ppm_modes_combo'].set_entry_text_column(1) + lbox.pack_start(devices[gpu.prm.uuid]['ppm_selection'], False, False, 0) + lbox.pack_start(devices[gpu.prm.uuid]['ppm_modes_combo'], False, False, 0) + grid.attach(lbox, col, row, 1, 1) + row += 1 + + # Save/Reset Card Buttons + devices[gpu.prm.uuid]['save_button'] = Gtk.Button(label='') + for child in devices[gpu.prm.uuid]['save_button'].get_children(): + child.set_label('Save') + child.set_use_markup(True) + devices[gpu.prm.uuid]['save_button'].connect('clicked', self.save_card, gpu_list, devices, gpu.prm.uuid) + set_gtk_prop(devices[gpu.prm.uuid]['save_button'], width=90) + + devices[gpu.prm.uuid]['reset_button'] = Gtk.Button(label='') + for child in devices[gpu.prm.uuid]['reset_button'].get_children(): + child.set_label('Reset') + child.set_use_markup(True) + devices[gpu.prm.uuid]['reset_button'].connect('clicked', self.reset_card, gpu_list, devices, gpu.prm.uuid) + set_gtk_prop(devices[gpu.prm.uuid]['reset_button'], width=90) + + lbox = Gtk.Box(spacing=6) + lbox.set_name('button_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + lbox.pack_start(devices[gpu.prm.uuid]['save_button'], True, False, 0) + lbox.pack_start(devices[gpu.prm.uuid]['reset_button'], True, False, 0) + grid.attach(lbox, col, row, 1, 1) + row += 1 + + # Increment column before going to next Device + if max_rows < row: + max_rows = row + col += 1 + # End of for v in values + + # Setup the Save_ALL and Reset_ALL buttons + if num_com_gpus > 1: + # Save/Reset/Update ALL Card Buttons + devices['all_buttons'] = {} + devices['all_buttons']['save_all_button'] = Gtk.Button(label='') + for child in devices['all_buttons']['save_all_button'].get_children(): + child.set_label('Save All') + child.set_use_markup(True) + devices['all_buttons']['save_all_button'].connect('clicked', self.save_all_cards, gpu_list, devices) + set_gtk_prop(devices['all_buttons']['save_all_button'], width=100) + + devices['all_buttons']['reset_all_button'] = Gtk.Button(label='') + for child in devices['all_buttons']['reset_all_button'].get_children(): + child.set_label('Reset All') + child.set_use_markup(True) + devices['all_buttons']['reset_all_button'].connect('clicked', self.reset_all_cards, gpu_list, devices) + set_gtk_prop(devices['all_buttons']['reset_all_button'], width=100) + + devices['all_buttons']['refresh_all_button'] = Gtk.Button(label='') + for child in devices['all_buttons']['refresh_all_button'].get_children(): + child.set_label('Refresh All') + child.set_use_markup(True) + devices['all_buttons']['refresh_all_button'].connect('clicked', self.refresh_all_cards, gpu_list, + devices, True) + set_gtk_prop(devices['all_buttons']['refresh_all_button'], width=100) + + lbox = Gtk.Box(spacing=6) + lbox.set_name('button_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + lbox.pack_start(devices['all_buttons']['save_all_button'], True, False, 0) + lbox.pack_start(devices['all_buttons']['reset_all_button'], True, False, 0) + lbox.pack_start(devices['all_buttons']['refresh_all_button'], True, False, 0) + grid.attach(lbox, 0, max_rows, col, 1) + row += 1 + max_rows += 1 + + # Initialize message box + devices['message_label'] = Gtk.Label(name='message_label') + devices['message_label'].set_line_wrap(True) + set_gtk_prop(devices['message_label'], width_max=num_com_gpus * MAX_CHAR, + align=(0.0, 0.5), width=num_com_gpus * MAX_CHAR * CHAR_WIDTH) + devices['message_label'].set_line_wrap(True) + + devices['message_box'] = Gtk.Box(spacing=6) + devices['message_box'].set_name('message_box') + set_gtk_prop(devices['message_box'], top=1, bottom=1, right=1, left=1) + devices['message_box'].pack_start(devices['message_label'], True, True, 1) + grid.attach(devices['message_box'], 0, max_rows, col, 1) + row += 1 + + self.update_message(devices, '', 'gray') + self.refresh_pac(gpu_list, devices) + + @staticmethod + def update_message(devices: dict, message: str, color: str = 'gray') -> None: + """ + Set PAC message using default message if no message specified. + + :param devices: Dictionary of GUI items and GPU data. + :param message: + :param color: Valid color strings: gray, yellow, white, red + """ + if message == '': + if env.GUT_CONST.execute_pac: + message = ('Using the --execute_pac option. Changes will be written to the GPU without ' + 'confirmation.\nSudo will be used, so you may be prompted for credentials in ' + 'the window where gpu-pac was executed from.') + else: + message = ('Using gpu-pac without --execute_pac option.\nYou must manually run bash ' + 'file with sudo to execute changes.') + + if color == 'red': + GPUgui.GuiProps.set_style(css_str="#message_label { color: %s; }" % + GPUgui.GuiProps.color_name_to_hex('white_off')) + GPUgui.GuiProps.set_style(css_str="#message_box { background-image: image(%s); }" % + GPUgui.GuiProps.color_name_to_hex('red')) + elif color == 'yellow': + GPUgui.GuiProps.set_style(css_str="#message_label { color: %s; }" % + GPUgui.GuiProps.color_name_to_hex('white_off')) + GPUgui.GuiProps.set_style(css_str="#message_box { background-image: image(%s); }" % + GPUgui.GuiProps.color_name_to_hex('yellow')) + elif color == 'white': + GPUgui.GuiProps.set_style(css_str="#message_label { color: %s; }" % + GPUgui.GuiProps.color_name_to_hex('gray95')) + GPUgui.GuiProps.set_style(css_str="#message_box { background-image: image(%s); }" % + GPUgui.GuiProps.color_name_to_hex('gray20')) + else: + GPUgui.GuiProps.set_style(css_str="#message_label { color: %s; }" % + GPUgui.GuiProps.color_name_to_hex('white_off')) + GPUgui.GuiProps.set_style(css_str="#message_box { background-image: image(%s); }" % + GPUgui.GuiProps.color_name_to_hex('gray50')) + devices['message_label'].set_text(message) + + while Gtk.events_pending(): + Gtk.main_iteration_do(True) + + def refresh_all_cards(self, _, gpu_list: Gpu.GpuList, devices: dict, reset_message: bool = False) -> None: + """ + Refresh all cards by calling card level refresh. + + :param _: parent not used + :param gpu_list: + :param devices: Dictionary of GUI items and GPU data. + :param reset_message: + """ + self.refresh_pac(gpu_list, devices, reset_message) + + def refresh_pac(self, gpu_list: Gpu.GpuList, devices: dict, refresh_message: bool = False) -> None: + """ + Update device data from gpuList data. + + :param gpu_list: gpuList of all gpuItems + :param devices: Dictionary of GUI items and GPU data. + :param refresh_message: + """ + # Read sensor and state data from GPUs + gpu_list.read_gpu_sensor_set(data_type=Gpu.GpuItem.SensorSet.All) + # Read pstate and ppm table data + gpu_list.read_gpu_pstates() + gpu_list.read_gpu_ppm_table() + + for gpu in gpu_list.gpus(): + devices[gpu.prm.uuid]['power_cap_cur'].set_text(' Current: {:3d}W Set: '.format( + gpu.get_params_value('power_cap', num_as_int=True))) + devices[gpu.prm.uuid]['power_cap_ent'].set_text(str(gpu.get_params_value('power_cap', num_as_int=True))) + if env.GUT_CONST.show_fans: + devices[gpu.prm.uuid]['fan_pwm_cur'].set_text(' Current: {:3d}% Set: '.format( + gpu.get_params_value('fan_pwm', num_as_int=True))) + devices[gpu.prm.uuid]['fan_pwm_ent'].set_text(str(gpu.get_params_value('fan_pwm', num_as_int=True))) + LOGGER.debug('Refresh got current pwm speed: %s', devices[gpu.prm.uuid]['fan_pwm_ent'].get_text()) + # SCLK + if gpu.get_params_value('gpu_type') in [gpu.GPU_Type.PStates]: + for ps, psd in gpu.sclk_state.items(): + devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_cur_obj'].set_text(' {}: {}, {}'.format(ps, *psd)) + item_value = re.sub(PATTERNS['END_IN_ALPHA'], '', str(psd[0])) + item_unit = re.sub(PATTERNS['IS_FLOAT'], '', str(psd[0])) + devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj'].set_text(item_value) + devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj_unit'].set_text(item_unit + ' ') + item_value = re.sub(PATTERNS['END_IN_ALPHA'], '', str(psd[1])) + item_unit = re.sub(PATTERNS['IS_FLOAT'], '', str(psd[1])) + devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_ent_v_obj'].set_text(str(item_value)) + devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_ent_v_obj_unit'].set_text(item_unit) + devices[gpu.prm.uuid]['sclk_pst_mask_cur'].set_text( + ' SCLK Default: {} Set Mask: '.format(gpu.prm.sclk_mask)) + devices[gpu.prm.uuid]['sclk_pst_mask_ent'].set_text(gpu.prm.sclk_mask) + elif gpu.get_params_value('gpu_type') == gpu.GPU_Type.CurvePts: + for ps, psd in gpu.sclk_state.items(): + devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_cur_obj'].set_text(' {}: {}'.format(ps, psd[0])) + item_value = re.sub(PATTERNS['END_IN_ALPHA'], '', str(psd[0])) + item_unit = re.sub(PATTERNS['IS_FLOAT'], '', str(psd[0])) + devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj'].set_text(item_value) + devices[gpu.prm.uuid]['sclk_pstate'][ps]['gtk_ent_f_obj_unit'].set_text(item_unit + ' ') + devices[gpu.prm.uuid]['sclk_pst_mask_cur'].set_text( + ' SCLK Default: {} Set Mask: '.format(gpu.prm.sclk_mask)) + devices[gpu.prm.uuid]['sclk_pst_mask_ent'].set_text(gpu.prm.sclk_mask) + # MCLK + if gpu.get_params_value('gpu_type') in [gpu.GPU_Type.PStates]: + for ps, psd in gpu.mclk_state.items(): + devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_cur_obj'].set_text(' {}: {}, {}'.format(ps, *psd)) + item_value = re.sub(PATTERNS['END_IN_ALPHA'], '', str(psd[0])) + item_unit = re.sub(PATTERNS['IS_FLOAT'], '', str(psd[0])) + devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj'].set_text(item_value) + devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj_unit'].set_text(item_unit + ' ') + item_value = re.sub(PATTERNS['END_IN_ALPHA'], '', str(psd[1])) + item_unit = re.sub(PATTERNS['IS_FLOAT'], '', str(psd[1])) + devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_ent_v_obj'].set_text(str(item_value)) + devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_ent_v_obj_unit'].set_text(item_unit) + devices[gpu.prm.uuid]['mclk_pst_mask_cur'].set_text( + ' MCLK Default: {} Set Mask: '.format(gpu.prm.mclk_mask)) + devices[gpu.prm.uuid]['mclk_pst_mask_ent'].set_text(gpu.prm.mclk_mask) + elif gpu.get_params_value('gpu_type') == gpu.GPU_Type.CurvePts: + for ps, psd in gpu.mclk_state.items(): + devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_cur_obj'].set_text(' {}: {}'.format(ps, psd[0])) + item_value = re.sub(PATTERNS['END_IN_ALPHA'], '', str(psd[0])) + item_unit = re.sub(PATTERNS['IS_FLOAT'], '', str(psd[0])) + devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj'].set_text(item_value) + devices[gpu.prm.uuid]['mclk_pstate'][ps]['gtk_ent_f_obj_unit'].set_text(item_unit + ' ') + devices[gpu.prm.uuid]['mclk_pst_mask_cur'].set_text( + ' MCLK Default: {} Set Mask: '.format(gpu.prm.mclk_mask)) + devices[gpu.prm.uuid]['mclk_pst_mask_ent'].set_text(gpu.prm.mclk_mask) + # VDDC CURVE + if gpu.get_params_value('gpu_type') == gpu.GPU_Type.CurvePts: + for ps, psd in gpu.vddc_curve.items(): + devices[gpu.prm.uuid]['vddc_curve_pt'][ps]['gtk_cur_obj'].set_text(' {}: {}, {}'.format(ps, *psd)) + item_value = re.sub(PATTERNS['END_IN_ALPHA'], '', str(psd[0])) + item_unit = re.sub(PATTERNS['IS_FLOAT'], '', str(psd[0])) + devices[gpu.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_f_obj'].set_text(item_value) + devices[gpu.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_f_obj_unit'].set_text(item_unit + ' ') + item_value = re.sub(PATTERNS['END_IN_ALPHA'], '', str(psd[1])) + item_unit = re.sub(PATTERNS['IS_FLOAT'], '', str(psd[1])) + devices[gpu.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_v_obj'].set_text(str(item_value)) + devices[gpu.prm.uuid]['vddc_curve_pt'][ps]['gtk_ent_v_obj_unit'].set_text(item_unit) + + # refresh active mode item + devices[gpu.prm.uuid]['ppm_modes_combo'].set_active( + devices[gpu.prm.uuid]['ppm_mode_items'][gpu.get_current_ppm_mode()[0]]) + + if refresh_message: + self.update_message(devices, 'Refresh complete.\n', 'gray') + while Gtk.events_pending(): + Gtk.main_iteration_do(True) + + def save_all_cards(self, parent, gpu_list: Gpu.GpuList, devices: dict) -> None: + """ + Save modified data for all GPUs. + + :param parent: parent + :param gpu_list: + :param devices: Dictionary of GUI items and GPU data. + """ + changed = 0 + # Write start message + if env.GUT_CONST.execute_pac: + message = ('Using the --execute_pac option. Changes will be written to the GPU without ' + 'confirmation.\nSudo will be used, so you may be prompted for credentials in ' + 'the window where gpu-pac was executed from.') + else: + message = 'Writing PAC command bash file.\n' + self.update_message(devices, message, 'red') + + # save each card + for uuid in gpu_list.uuids(): + changed += self.save_card(parent, gpu_list, devices, uuid, refresh=False) + + # Write finish message + time.sleep(1.0) + if env.GUT_CONST.execute_pac: + if changed: + message = ('Write {} PAC commands to card complete.\n' + 'Confirm changes with gpu-mon.').format(changed) + else: + message = 'No PAC commands to write to card.\nNo changes specified.' + else: + if changed: + message = ('Writing {} PAC commands to bash file complete.\n' + 'Run bash file with sudo to execute changes.').format(changed) + else: + message = 'No PAC commands to write to bash file.\nNo changes specified.' + self.update_message(devices, message, 'yellow') + + self.refresh_all_cards(parent, gpu_list, devices) + + def save_card(self, _, gpu_list: Gpu.GpuList, devices: dict, uuid: str, refresh: bool = True) -> None: + """ + Save modified data for specified GPU. + + :param _: parent not used + :param gpu_list: + :param devices: Dictionary of GUI items and GPU data. + :param uuid: GPU device ID + :param refresh: Flag to indicate if refresh should be done + """ + if refresh: + # Write message + if env.GUT_CONST.execute_pac: + message = ('Using the --execute_pac option. Changes will be written to the GPU ' + 'without confirmation.\nSudo will be used, so you may be prompted for ' + 'credentials in the window where gpu-pac was executed from.') + else: + message = 'Writing PAC commands to bash file.\n' + self.update_message(devices, message, 'red') + + # Specify output batch file name + out_filename = os.path.join(os.getcwd(), 'pac_writer_{}.sh'.format(uuid4().hex)) + fileptr = open(out_filename, 'x') + # Output header + print('#!/bin/sh', file=fileptr) + print('###########################################################################', file=fileptr) + print('## rickslab-gpu-pac generated script to modify GPU configuration/settings', file=fileptr) + print('###########################################################################', file=fileptr) + print('', file=fileptr) + print('###########################################################################', file=fileptr) + print('## WARNING - Do not execute this script without completely', file=fileptr) + print('## understanding appropriate values to write to your specific GPUs', file=fileptr) + print('###########################################################################', file=fileptr) + print('#', file=fileptr) + print('# Copyright (C) 2019 RueiKe', file=fileptr) + print('#', file=fileptr) + print('# This program is free software: you can redistribute it and/or modify', file=fileptr) + print('# it under the terms of the GNU General Public License as published by', file=fileptr) + print('# the Free Software Foundation, either version 3 of the License, or', file=fileptr) + print('# (at your option) any later version.', file=fileptr) + print('#', file=fileptr) + print('# This program is distributed in the hope that it will be useful,', file=fileptr) + print('# but WITHOUT ANY WARRANTY; without even the implied warranty of', file=fileptr) + print('# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the', file=fileptr) + print('# GNU General Public License for more details.', file=fileptr) + print('#', file=fileptr) + print('# You should have received a copy of the GNU General Public License', file=fileptr) + print('# along with this program. If not, see .', file=fileptr) + print('###########################################################################', file=fileptr) + + changed = 0 + gpu = gpu_list[uuid] + print('# ', file=fileptr) + print('# Card{} {}'.format(gpu.prm.card_num, gpu.get_params_value('model')), file=fileptr) + print('# {}'.format(gpu.prm.card_path), file=fileptr) + if not env.GUT_CONST.write_delta_only: + print('# Force Write mode.') + else: + print('# Write Delta mode.') + print('# ', file=fileptr) + print('set -x', file=fileptr) + + # Check/set power_dpm_force_performance_level + # Mode of manual required to change ppm or clock masks + curr_power_dpm_force = gpu.get_params_value('power_dpm_force').lower() + if curr_power_dpm_force == 'manual' and env.GUT_CONST.write_delta_only: + print('# Power DPM Force Performance Level: already [{}], skipping.'.format(curr_power_dpm_force), + file=fileptr) + else: + power_dpm_force_file = os.path.join(gpu.prm.card_path, 'power_dpm_force_performance_level') + print('# Power DPM Force Performance Level: [{}] change to [manual]'.format(curr_power_dpm_force), + file=fileptr) + print("sudo sh -c \"echo \'manual\' > {}\"".format(power_dpm_force_file), file=fileptr) + + # Power Cap + power_cap_file = os.path.join(gpu.prm.hwmon_path, 'power1_cap') + old_power_cap = gpu.get_params_value('power_cap', num_as_int=True) + new_power_cap_str = devices[uuid]['power_cap_ent'].get_text() + if new_power_cap_str.lower() == 'reset': + changed += 1 + print('# Powercap entry: {}, Resetting to default'.format(new_power_cap_str), file=fileptr) + print("sudo sh -c \"echo \'0\' > {}\"".format(power_cap_file), file=fileptr) + elif re.fullmatch(PATTERNS['DIGITS'], new_power_cap_str): + new_power_cap = int(new_power_cap_str) + power_cap_range = gpu.get_params_value('power_cap_range') + print('# Powercap Old: {}'.format(old_power_cap), end='', file=fileptr) + print(' New: {}'.format(new_power_cap), end='', file=fileptr) + print(' Min: {}'.format(power_cap_range[0]), end='', file=fileptr) + print(' Max: {}\n'.format(power_cap_range[1]), end='', file=fileptr) + if new_power_cap == old_power_cap and env.GUT_CONST.write_delta_only: + print('# No Powercap changes, skipped', file=fileptr) + else: + if gpu.is_valid_power_cap(new_power_cap): + changed += 1 + print("sudo sh -c \"echo \'{}\' > {}\"".format((int(1000000 * new_power_cap)), power_cap_file), + file=fileptr) + else: + print('# Invalid power_cap parameter values', file=fileptr) + print('Invalid power_cap parameter values') + else: + print('# Powercap New: {}, invalid input, ignoring'.format(new_power_cap_str), file=fileptr) + new_power_cap = old_power_cap + + if env.GUT_CONST.show_fans: + # Fan PWM + pwm_enable_file = os.path.join(gpu.prm.hwmon_path, 'pwm1_enable') + pwm_file = os.path.join(gpu.prm.hwmon_path, 'pwm1') + old_pwm = gpu.get_params_value('fan_pwm', num_as_int=True) + LOGGER.debug('Current pwm value: %s', old_pwm) + new_pwm_str = devices[uuid]['fan_pwm_ent'].get_text() + LOGGER.debug('Original pwm value %s, entered value %s', old_pwm, new_pwm_str) + if new_pwm_str.lower() == 'reset': + changed += 1 + print('# PWM entry: {}, Resetting to default mode of dynamic'.format(new_pwm_str), file=fileptr) + print("sudo sh -c \"echo \'0\' > {}\"".format(pwm_enable_file), file=fileptr) + print("sudo sh -c \"echo \'2\' > {}\"".format(pwm_enable_file), file=fileptr) + elif new_pwm_str.lower() == 'max': + changed += 1 + print('# PWM entry: {}, Disabling fan control'.format(new_pwm_str), file=fileptr) + print("sudo sh -c \"echo \'0\' > {}\"".format(pwm_enable_file), file=fileptr) + elif re.fullmatch(PATTERNS['DIGITS'], new_pwm_str): + new_pwm = int(new_pwm_str) + print('# Fan PWM Old: {}'.format(old_pwm), end='', file=fileptr) + print(' New: {}'.format(new_pwm), end='', file=fileptr) + pwm_range = gpu.get_params_value('fan_pwm_range') + print(' Min: {}'.format(pwm_range[0]), end='', file=fileptr) + print(' Max: {}\n'.format(pwm_range[1]), end='', file=fileptr) + if new_pwm == old_pwm and env.GUT_CONST.write_delta_only: + print('# No PWM changes, skipped', file=fileptr) + elif new_pwm == 0 and old_pwm is None: + print('# No PWM changes, None to Zero skipped', file=fileptr) + elif new_pwm < 20: + print('# Specified PWM value below min safe limit of 20%, skipped', file=fileptr) + LOGGER.debug('Unsafe PWM value skipped: %s', new_pwm_str) + else: + if gpu.is_valid_fan_pwm(new_pwm): + changed += 1 + new_pwm_value = int(255 * new_pwm / 100) + print("sudo sh -c \"echo \'1\' > {}\"".format(pwm_enable_file), file=fileptr) + print("sudo sh -c \"echo \'{}\' > {}\"".format(new_pwm_value, pwm_file), file=fileptr) + else: + print('# Invalid pwm parameter values', file=fileptr) + print('Invalid pwm parameter values') + else: + print('# PWM entry: {}, invalid input, ignoring'.format(new_pwm_str), file=fileptr) + new_pwm = old_pwm + + device_file = os.path.join(gpu.prm.card_path, 'pp_od_clk_voltage') + commit_needed = False + if gpu.get_params_value('gpu_type') == gpu.GPU_Type.PStates: + # Sclk P-states + for comp_name, comp_item in devices[uuid]['sclk_pstate'].items(): + if not comp_item['gtk_ent_f_obj'].get_text().isnumeric(): + print('# Invalid sclk pstate entry: {}'.format(comp_item['gtk_ent_f_obj'].get_text()), + file=fileptr) + print('# Invalid sclk pstate entry: {}'.format(comp_item['gtk_ent_f_obj'].get_text())) + continue + if not comp_item['gtk_ent_v_obj'].get_text().isnumeric(): + print('# Invalid sclk pstate entry: {}'.format(comp_item['gtk_ent_v_obj'].get_text()), + file=fileptr) + print('# Invalid sclk pstate entry: {}'.format(comp_item['gtk_ent_v_obj'].get_text())) + pstate = [comp_name, + int(comp_item['gtk_ent_f_obj'].get_text()), + int(comp_item['gtk_ent_v_obj'].get_text())] + print('#sclk p-state: {} : {} MHz, {} mV'.format(pstate[0], pstate[1], pstate[2]), file=fileptr) + if gpu.is_valid_sclk_pstate(pstate): + if gpu.is_changed_sclk_pstate(pstate) or not env.GUT_CONST.write_delta_only: + changed += 1 + commit_needed = True + print("sudo sh -c \"echo \'s {} {} {}\' > {}\"".format(pstate[0], pstate[1], + pstate[2], device_file), file=fileptr) + else: + print('# Sclk pstate {} unchanged, skipping'.format(comp_name), file=fileptr) + else: + print('# Invalid sclk pstate values', file=fileptr) + print('Invalid sclk pstate values') + # Mclk P-states + for comp_name, comp_item in devices[uuid]['mclk_pstate'].items(): + if not comp_item['gtk_ent_f_obj'].get_text().isnumeric(): + print('# Invalid mclk pstate entry: {}'.format(comp_item['gtk_ent_f_obj'].get_text()), + file=fileptr) + print('# Invalid mclk pstate entry: {}'.format(comp_item['gtk_ent_f_obj'].get_text())) + continue + if not comp_item['gtk_ent_v_obj'].get_text().isnumeric(): + print('# Invalid mclk pstate entry: {}'.format(comp_item['gtk_ent_v_obj'].get_text()), + file=fileptr) + print('# Invalid mclk pstate entry: {}'.format(comp_item['gtk_ent_v_obj'].get_text())) + continue + pstate = [comp_name, + int(comp_item['gtk_ent_f_obj'].get_text()), + int(comp_item['gtk_ent_v_obj'].get_text())] + print('#mclk p-state: {} : {} MHz, {} mV'.format(pstate[0], pstate[1], pstate[2]), file=fileptr) + if gpu.is_valid_mclk_pstate(pstate): + if gpu.is_changed_mclk_pstate(pstate) or not env.GUT_CONST.write_delta_only: + changed += 1 + commit_needed = True + print("sudo sh -c \"echo \'m {} {} {}\' > {}\"".format(pstate[0], pstate[1], + pstate[2], device_file), file=fileptr) + else: + print('# Mclk pstate {} unchanged, skipping'.format(comp_name), file=fileptr) + else: + print('# Invalid mclk pstate values', file=fileptr) + print('Invalid mclk pstate values') + elif gpu.get_params_value('gpu_type') == gpu.GPU_Type.CurvePts: + # Sclk Curve End Points + for comp_name, comp_item in devices[uuid]['sclk_pstate'].items(): + if not comp_item['gtk_ent_f_obj'].get_text().isnumeric(): + print('# Invalid sclk curve end point entry: {}'.format(comp_item['gtk_ent_f_obj'].get_text()), + file=fileptr) + print('# Invalid sclk curve end point entry: {}'.format(comp_item['gtk_ent_f_obj'].get_text())) + continue + pstate = [comp_name, int(comp_item['gtk_ent_f_obj'].get_text()), '-'] + print('# Sclk curve end point: {} : {} MHz'.format(pstate[0], pstate[1]), file=fileptr) + if gpu.is_valid_sclk_pstate(pstate): + if gpu.is_changed_sclk_pstate(pstate) or not env.GUT_CONST.write_delta_only: + changed += 1 + commit_needed = True + print("sudo sh -c \"echo \'s {} {}\' > {}\"".format(pstate[0], pstate[1], device_file), + file=fileptr) + else: + print('# Sclk curve point {} unchanged, skipping'.format(comp_name), file=fileptr) + else: + print('# Invalid sclk curve end point values', file=fileptr) + print('Invalid sclk curve end point values') + # Mclk Curve End Points + for comp_name, comp_item in devices[uuid]['mclk_pstate'].items(): + if not comp_item['gtk_ent_f_obj'].get_text().isnumeric(): + print('# Invalid mclk curve end point entry: {}'.format(comp_item['gtk_ent_f_obj'].get_text()), + file=fileptr) + print('# Invalid mclk curve end point entry: {}'.format(comp_item['gtk_ent_f_obj'].get_text())) + continue + pstate = [comp_name, int(comp_item['gtk_ent_f_obj'].get_text()), '-'] + print('# Mclk curve end point: {} : {} MHz'.format(pstate[0], pstate[1]), file=fileptr) + if gpu.is_valid_mclk_pstate(pstate): + if gpu.is_changed_mclk_pstate(pstate) or not env.GUT_CONST.write_delta_only: + changed += 1 + commit_needed = True + print("sudo sh -c \"echo \'m {} {}\' > {}\"".format(pstate[0], pstate[1], device_file), + file=fileptr) + else: + print('# Mclk curve point {} unchanged, skipping'.format(comp_name), file=fileptr) + else: + print('# Invalid mclk curve end point values', file=fileptr) + print('Invalid mclk curve end point values') + # VDDC Curve Points + for comp_name, comp_item in devices[uuid]['vddc_curve_pt'].items(): + if not comp_item['gtk_ent_f_obj'].get_text().isnumeric(): + print('# Invalid vddc curve point entry: {}'.format(comp_item['gtk_ent_f_obj'].get_text()), + file=fileptr) + print('# Invalid vddc curve point entry: {}'.format(comp_item['gtk_ent_f_obj'].get_text())) + continue + if not comp_item['gtk_ent_v_obj'].get_text().isnumeric(): + print('# Invalid vddc curve point entry: {}'.format(comp_item['gtk_ent_v_obj'].get_text()), + file=fileptr) + print('# Invalid vddc curve point entry: {}'.format(comp_item['gtk_ent_v_obj'].get_text())) + continue + curve_pts = [comp_name, + int(comp_item['gtk_ent_f_obj'].get_text()), + int(comp_item['gtk_ent_v_obj'].get_text())] + print('# Vddc curve point: {} : {} MHz, {} mV'.format(curve_pts[0], curve_pts[1], curve_pts[2]), + file=fileptr) + if gpu.is_valid_vddc_curve_pts(curve_pts): + if gpu.is_changed_vddc_curve_pt(curve_pts) or not env.GUT_CONST.write_delta_only: + changed += 1 + commit_needed = True + print("sudo sh -c \"echo \'vc {} {} {}\' > {}\"".format(curve_pts[0], curve_pts[1], + curve_pts[2], device_file), file=fileptr) + else: + print('# Vddc curve point {} unchanged, skipping'.format(comp_name), file=fileptr) + else: + print('# Invalid Vddc curve point values', file=fileptr) + print('Invalid Vddc curve point values') + + # PPM + ppm_mode_file = os.path.join(gpu.prm.card_path, 'pp_power_profile_mode') + + tree_iter = devices[uuid]['ppm_modes_combo'].get_active_iter() + if tree_iter is not None: + model = devices[uuid]['ppm_modes_combo'].get_model() + row_id, name = model[tree_iter][:2] + selected_mode = devices[uuid]['new_ppm'][0] + print('# Selected: ID={}, name={}'.format(devices[uuid]['new_ppm'][0], devices[uuid]['new_ppm'][1]), + file=fileptr) + if gpu.get_current_ppm_mode()[0] != devices[uuid]['new_ppm'][0] or not env.GUT_CONST.write_delta_only: + changed += 1 + print("sudo sh -c \"echo \'{}\' > {}\"".format(devices[uuid]['new_ppm'][0], ppm_mode_file), + file=fileptr) + else: + print('# PPM mode {} unchanged, skipping'.format(devices[uuid]['new_ppm'][1]), file=fileptr) + + # Commit changes + device_file = os.path.join(gpu.prm.card_path, 'pp_od_clk_voltage') + if commit_needed: + changed += 1 + print("sudo sh -c \"echo \'c\' > {}\"".format(device_file), file=fileptr) + else: + print('# No clock changes made, commit skipped', file=fileptr) + + if gpu.get_params_value('gpu_type') in [gpu.GPU_Type.PStates, gpu.GPU_Type.CurvePts]: + # Writes of pstate Masks must come after commit of pstate changes + # Sclk Mask + sclk_mask_file = os.path.join(gpu.prm.card_path, 'pp_dpm_sclk') + old_sclk_mask = gpu.prm.sclk_mask.replace(',', ' ') + new_sclk_mask = devices[uuid]['sclk_pst_mask_ent'].get_text().replace(' ', '').strip() + new_sclk_mask = new_sclk_mask.replace(',', ' ').strip() + print('# Sclk P-State Mask Default: {}'.format(old_sclk_mask), end='', file=fileptr) + print(' New: {}'.format(new_sclk_mask), file=fileptr) + if new_sclk_mask == old_sclk_mask and env.GUT_CONST.write_delta_only: + print('# No changes, skipped', file=fileptr) + else: + if gpu.is_valid_pstate_list_str(new_sclk_mask, 'SCLK'): + changed += 1 + if new_sclk_mask == '': + # reset + print('# Resetting SCLK Mask to default', file=fileptr) + print("sudo sh -c \"echo \'{}\' > {}\"".format(old_sclk_mask, sclk_mask_file), file=fileptr) + else: + print("sudo sh -c \"echo \'{}\' > {}\"".format(new_sclk_mask, sclk_mask_file), file=fileptr) + else: + print('# Invalid sclk mask parameter values', file=fileptr) + print('Invalid sclk mask parameter values: {}'.format(new_sclk_mask)) + + # Mclk Mask + mclk_mask_file = os.path.join(gpu.prm.card_path, 'pp_dpm_mclk') + old_mclk_mask = gpu.prm.mclk_mask.replace(',', ' ') + new_mclk_mask = devices[uuid]['mclk_pst_mask_ent'].get_text().replace(' ', '').strip() + new_mclk_mask = new_mclk_mask.replace(',', ' ').strip() + print('# Mclk P-State Mask Default: {}'.format(old_mclk_mask), end='', file=fileptr) + print(' New: {}'.format(new_mclk_mask), file=fileptr) + if new_mclk_mask == old_mclk_mask and env.GUT_CONST.write_delta_only: + print('# No changes, skipped', file=fileptr) + else: + if gpu.is_valid_pstate_list_str(new_mclk_mask, 'MCLK'): + changed += 1 + if new_mclk_mask == '': + # reset + print('# Resetting MCLK Mask to default', file=fileptr) + print("sudo sh -c \"echo \'{}\' > {}\"".format(old_mclk_mask, mclk_mask_file), file=fileptr) + else: + print("sudo sh -c \"echo \'{}\' > {}\"".format(new_mclk_mask, mclk_mask_file), file=fileptr) + else: + print('# Invalid mclk mask parameter values', file=fileptr) + print('Invalid mclk mask parameter values: {}'.format(new_mclk_mask)) + + # Close file and Set permissions and Execute it --execute_pac + fileptr.close() + os.chmod(out_filename, 0o744) + print('Batch file completed: {}'.format(out_filename)) + if env.GUT_CONST.execute_pac: + # Execute bash file + print('Writing {} changes to GPU {}'.format(changed, gpu.prm.card_path)) + cmd = subprocess.Popen(out_filename, shell=True) + cmd.wait() + print('PAC execution complete.') + + if refresh: + # dismiss execute_pac message + time.sleep(0.5) + if changed: + message = ('Write of {} PAC commands to card complete.\n' + 'Confirm changes with gpu-monitor.').format(changed) + else: + message = 'No PAC commands to write to card.\nNo changes specified.' + self.update_message(devices, message, 'yellow') + + if refresh: + self.refresh_pac(gpu_list, devices) + os.remove(out_filename) + else: + if refresh: + # dismiss execute_pac message + if changed: + message = ('Write of {} PAC commands to bash file complete.\n' + 'Manually run bash file with sudo to execute changes.').format(changed) + else: + message = 'No PAC commands to write bash file.\nNo changes specified.' + self.update_message(devices, message, 'yellow') + print('Execute to write changes to GPU {}'.format(gpu.prm.card_path)) + print('') + return changed + + def reset_all_cards(self, parent, gpu_list: Gpu.GpuList, devices: dict) -> None: + """ + Reset data for all GPUs. + + :param parent: parent + :param gpu_list: + :param devices: Dictionary of GUI items and GPU data. + """ + # Write start message + if env.GUT_CONST.execute_pac: + message = ('Using the --execute_pac option Reset commands will be written to the GPU ' + 'without confirmation.\nSudo will be used, so you may be prompted for ' + 'credentials in the window where gpu-pac was executed from.') + else: + message = 'Writing reset commands to bash file.\n' + self.update_message(devices, message, 'red') + + # reset each card + for uuid in gpu_list.uuids(): + self.reset_card(parent, gpu_list, devices, uuid, refresh=False) + + # Write finish message + if env.GUT_CONST.execute_pac: + message = 'Write reset commands to card complete.\nConfirm changes with gpu-mon.' + else: + message = 'Write reset commands to bash file complete.\nRun bash file with sudo to execute changes.' + self.update_message(devices, message, 'yellow') + + self.refresh_all_cards(parent, gpu_list, devices) + + def reset_card(self, _, gpu_list: Gpu.GpuList, devices: dict, uuid: str, refresh: bool = True) -> None: + """ + Reset data for specified GPU. + + :param _: parent not used + :param gpu_list: + :param devices: Dictionary of GUI items and GPU data. + :param uuid: GPU device ID + :param refresh: Flag to indicate if refresh should be done + """ + if refresh: + # Write message + if env.GUT_CONST.execute_pac: + message = ('Using the --execute_pac option Reset commands will be written to the GPU ' + 'without confirmation.\nSudo will be used, so you may be prompted for ' + 'credentials in the window where gpu-pac was executed from.') + else: + message = 'Writing reset commands to bash file.\n' + self.update_message(devices, message, 'red') + # specify output batch file name + out_filename = os.path.join(os.getcwd(), 'pac_resetter_{}.sh'.format(uuid4().hex)) + fileptr = open(out_filename, 'x') + # Output header + print('#!/bin/sh', file=fileptr) + print('###########################################################################', file=fileptr) + print('## rickslab-gpu-pac generated script to modify GPU configuration/settings', file=fileptr) + print('###########################################################################', file=fileptr) + print('', file=fileptr) + print('###########################################################################', file=fileptr) + print('## WARNING - Do not execute this script without completely', file=fileptr) + print('## understanding appropriate value to write to your specific GPUs', file=fileptr) + print('###########################################################################', file=fileptr) + print('#', file=fileptr) + print('# Copyright (C) 2019 RueiKe', file=fileptr) + print('#', file=fileptr) + print('# This program is free software: you can redistribute it and/or modify', file=fileptr) + print('# it under the terms of the GNU General Public License as published by', file=fileptr) + print('# the Free Software Foundation, either version 3 of the License, or', file=fileptr) + print('# (at your option) any later version.', file=fileptr) + print('#', file=fileptr) + print('# This program is distributed in the hope that it will be useful,', file=fileptr) + print('# but WITHOUT ANY WARRANTY; without even the implied warranty of', file=fileptr) + print('# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the', file=fileptr) + print('# GNU General Public License for more details.', file=fileptr) + print('#', file=fileptr) + print('# You should have received a copy of the GNU General Public License', file=fileptr) + print('# along with this program. If not, see .', file=fileptr) + print('###########################################################################', file=fileptr) + + gpu = gpu_list[uuid] + print('# ', file=fileptr) + print('# Card{} {}'.format(gpu.prm.card_num, gpu.get_params_value('model')), file=fileptr) + print('# {}'.format(gpu.prm.card_path), file=fileptr) + print('# ', file=fileptr) + print('set -x', file=fileptr) + + # Commit changes + power_cap_file = os.path.join(gpu.prm.hwmon_path, 'power1_cap') + pwm_enable_file = os.path.join(gpu.prm.hwmon_path, 'pwm1_enable') + device_file = os.path.join(gpu.prm.card_path, 'pp_od_clk_voltage') + power_dpm_force_file = os.path.join(gpu.prm.card_path, 'power_dpm_force_performance_level') + print("sudo sh -c \"echo \'0\' > {}\"".format(power_cap_file), file=fileptr) + if env.GUT_CONST.show_fans: + print("sudo sh -c \"echo \'2\' > {}\"".format(pwm_enable_file), file=fileptr) + print("sudo sh -c \"echo \'auto\' > {}\"".format(power_dpm_force_file), file=fileptr) + print("sudo sh -c \"echo \'r\' > {}\"".format(device_file), file=fileptr) + print("sudo sh -c \"echo \'c\' > {}\"".format(device_file), file=fileptr) + # No need to reset clk pstate masks as commit to pp_od_clk_voltage will reset + + # Close file and Set permissions and Execute it --execute_pac + fileptr.close() + os.chmod(out_filename, 0o744) + print('Batch file completed: {}'.format(out_filename)) + if env.GUT_CONST.execute_pac: + print('Writing changes to GPU {}'.format(gpu.prm.card_path)) + cmd = subprocess.Popen(out_filename, shell=True) + cmd.wait() + print('') + if refresh: + # Dismiss execute_pac message + message = 'Write reset commands to card complete.\nConfirm changes with gpu-mon.' + self.update_message(devices, message, 'yellow') + self.refresh_pac(gpu_list, devices) + os.remove(out_filename) + else: + print('Execute to write changes to GPU {}.\n'.format(gpu.prm.card_path)) + if refresh: + # Dismiss execute_pac message + message = 'Write reset commands to bash file complete.\nRun bash file with sudo to execute changes.' + self.update_message(devices, message, 'yellow') + + +def ppm_select(_, device: dict) -> None: + """ + Update device data for ppm selection and update active selected item in Gui. + + :param _: self + :param device: Dictionary of GUI items and GPU data. + """ + tree_iter = device['ppm_modes_combo'].get_active_iter() + if tree_iter is not None: + model = device['ppm_modes_combo'].get_model() + row_id, name = model[tree_iter][:2] + device['new_ppm'] = [row_id, name] + + +def main() -> None: + """ + Main PAC flow. + """ + parser = argparse.ArgumentParser() + parser.add_argument('--about', help='README', action='store_true', default=False) + parser.add_argument('--execute_pac', help='execute pac bash script without review', + action='store_true', default=False) + parser.add_argument('--no_fan', help='do not include fan setting options', action='store_true', default=False) + parser.add_argument('--force_write', help='write all parameters, even if unchanged', + action='store_true', default=False) + parser.add_argument('-d', '--debug', help='Debug output', action='store_true', default=False) + args = parser.parse_args() + + # About me + if args.about: + print(__doc__) + print('Author: ', __author__) + print('Copyright: ', __copyright__) + print('Credits: ', *['\n {}'.format(item) for item in __credits__]) + print('License: ', __license__) + print('Version: ', __version__) + print('Maintainer: ', __maintainer__) + print('Status: ', __status__) + sys.exit(0) + + env.GUT_CONST.set_args(args) + LOGGER.debug('########## %s %s', __program_name__, __version__) + + if env.GUT_CONST.check_env() < 0: + print('Error in environment. Exiting...') + sys.exit(-1) + + # Get list of GPUs and get basic non-driver details + gpu_list = Gpu.GpuList() + gpu_list.set_gpu_list() + + # Check list of GPUs + num_gpus = gpu_list.num_vendor_gpus() + print('Detected GPUs: ', end='') + for i, (type_name, type_value) in enumerate(num_gpus.items()): + if i: + print(', {}: {}'.format(type_name, type_value), end='') + else: + print('{}: {}'.format(type_name, type_value), end='') + print('') + if 'AMD' in num_gpus.keys(): + env.GUT_CONST.read_amd_driver_version() + print('AMD: {}'.format(gpu_list.wattman_status())) + if 'NV' in num_gpus.keys(): + print('nvidia smi: [{}]'.format(env.GUT_CONST.cmd_nvidia_smi)) + + num_gpus = gpu_list.num_gpus() + if num_gpus['total'] == 0: + print('No GPUs detected, exiting...') + sys.exit(-1) + + # Read data static/dynamic/info/state driver information for GPUs + gpu_list.read_gpu_sensor_set(data_type=Gpu.GpuItem.SensorSet.All) + + # Check number of readable/writable GPUs again + num_gpus = gpu_list.num_gpus() + print('{} total GPUs, {} rw, {} r-only, {} w-only\n'.format(num_gpus['total'], num_gpus['rw'], + num_gpus['r-only'], num_gpus['w-only'])) + + # Check number of compatible GPUs again + com_gpu_list = gpu_list.list_gpus(compatibility=Gpu.GpuItem.GPU_Comp.Writable) + writable_gpus = com_gpu_list.num_gpus()['total'] + if not writable_gpus: + print('None are writable, exiting...') + sys.exit(-1) + com_gpu_list.read_gpu_pstates() + com_gpu_list.read_gpu_ppm_table() + + # Display Gtk style Monitor + devices = {} + gmonitor = PACWindow(com_gpu_list, devices) + gmonitor.connect('delete-event', Gtk.main_quit) + gmonitor.show_all() + + Gtk.main() + + +if __name__ == '__main__': + main() diff -Nru ricks-amdgpu-utils-3.0.0/gpu-plot ricks-amdgpu-utils-3.5.0/gpu-plot --- ricks-amdgpu-utils-3.0.0/gpu-plot 1970-01-01 00:00:00.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/gpu-plot 2020-07-06 00:57:49.000000000 +0000 @@ -0,0 +1,874 @@ +#!/usr/bin/python3 +""" gpu-plot - Plot GPU parameter values over time + + A utility to continuously plot the trend of critical GPU parameters for all compatible + GPUs. The *--sleep N* can be used to specify the update interval. The *gpu-plot* + utility has 2 modes of operation. The default mode is to read the GPU driver details + directly, which is useful as a standalone utility. The *--stdin* option causes + *gpu-plot* to read GPU data from stdin. This is how *gpu-mon* produces the + plot and can also be used to pipe your own data into the process. The *--simlog* + option can be used with the *--stdin* when a monitor log file is piped as stdin. + This is useful for troubleshooting and can be used to display saved log results. + The *--ltz* option results in the use of local time instead of UTC. If you plan + to run both *gpu-plot* and *gpu-mon*, then the *--plot* option of the + *gpu-mon* utility should be used instead of both utilities in order reduce + data reads by a factor of 2. + + Copyright (C) 2019 RicksLab + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +""" +__author__ = 'RueiKe' +__copyright__ = 'Copyright (C) 2019 RicksLab' +__credits__ = ['Craig Echt - Testing, Debug, Verification, and Documentation', + 'Keith Myers - Testing, Debug, Verification of NV Capability'] +__license__ = 'GNU General Public License' +__program_name__ = 'gpu-plot' +__maintainer__ = 'RueiKe' +__docformat__ = 'reStructuredText' +# pylint: disable=multiple-statements +# pylint: disable=line-too-long +# pylint: disable=bad-continuation + +import sys +import gc as garbcollect +import argparse +import re +import threading +import os +import logging +import time +import numpy as np + +try: + import gi + gi.require_version('Gtk', '3.0') + from gi.repository import GLib, Gtk +except ModuleNotFoundError as error: + print('gi import error: {}'.format(error)) + print('gi is required for {}'.format(__program_name__)) + print(' In a venv, first install vext: pip install --no-cache-dir vext') + print(' Then install vext.gi: pip install --no-cache-dir vext.gi') + sys.exit(0) + +try: + from matplotlib.backends.backend_gtk3cairo import FigureCanvasGTK3Cairo as FigureCanvas + import matplotlib.pyplot as plt +except ModuleNotFoundError as error: + print('matplotlib import error: {}'.format(error)) + print('matplotlib is required for {}'.format(__program_name__)) + print('Use \'sudo apt-get install python3-matplotlib\' to install') + sys.exit(0) + +try: + import pandas as pd +except ModuleNotFoundError as error: + print('Pandas import error: {}'.format(error)) + print('Pandas is required for {}'.format(__program_name__)) + print('Install pip3 if needed: \'sudo apt install python3-pip\'') + print('Then pip install pandas: \'pip3 install pandas\'') + sys.exit(0) +from pandas.plotting import register_matplotlib_converters + +from GPUmodules import __version__, __status__ +from GPUmodules import GPUgui +from GPUmodules import GPUmodule as Gpu +from GPUmodules import env + +register_matplotlib_converters() +set_gtk_prop = GPUgui.GuiProps.set_gtk_prop +LOGGER = logging.getLogger('gpu-utils') +PATTERNS = env.GutConst.PATTERNS + +# SEMAPHORE ############ +PD_SEM = threading.Semaphore() +######################## + + +def get_stack_size() -> int: + """ + Get stack size for caller's frame. Code copied from Stack Overflow. + + :return: Stack size + """ + size = 2 # current frame and caller's frame always exist + while True: + try: + sys._getframe(size) + size += 1 + except ValueError: + return size - 1 # subtract current frame + + +class PlotData: + """ + Plot data object. + """ + def __init__(self): + self.df = pd.DataFrame() + self.pcie_dict = {} + self.gui_comp = None + self.gui_ready = False + self.length = 200 + self.quit = False + self.writer = False + self.reader = False + self.consec_writer = 0 + self.consec_reader = 0 + self.gpu_name_list = '' + self.num_gpus = 1 + self.com_gpu_list = Gpu.GpuList() + + def set_gpus(self) -> None: + """ + Populate num_gpus and gpu_name_list from dataframe member. + """ + self.num_gpus = self.df['Card#'].nunique() + self.gpu_name_list = self.df['Card#'].unique() + + def set_com_gpu_list(self, gpu_list: Gpu.GpuList) -> None: + """ + Set plot data gpu_list object and initialize pcie decode dict. + + :param gpu_list: + """ + self.com_gpu_list = gpu_list + self.pcie_dict = gpu_list.get_pcie_map() + + def get_gpu_pcieid(self, card_num: int) -> str: + """ + Return the pcie id for a given card number. + + :param card_num: + :return: the pcie id as a string + """ + if card_num in self.pcie_dict.keys(): + return self.pcie_dict[card_num] + return 'Error' + + def get_plot_data(self) -> pd.DataFrame: + """ + Get deep copy of plot data df. + + :return: deep copy of the plot data dataframe + """ + # SEMAPHORE ############ + PD_SEM.acquire() + ######################## + ndf = self.df.copy() + # SEMAPHORE ############ + PD_SEM.release() + ######################## + return ndf + + def kill_thread(self) -> None: + """ + Sets flags that result in reader thread death. + """ + self.reader = False + self.quit = True + print('Stopping reader thread') + time.sleep(0.2) + + +class GuiComponents: + """ + Define the gui components of the plot window. + """ + _colors = {'plotface': GPUgui.GuiProps.color_name_to_hex('slate_vdk'), + 'figface': GPUgui.GuiProps.color_name_to_hex('slate_md'), + 'sclk_f_val': GPUgui.GuiProps.color_name_to_hex('br_green'), + 'mclk_f_val': GPUgui.GuiProps.color_name_to_hex('br_yellow'), + 'loading': GPUgui.GuiProps.color_name_to_hex('br_pink'), + 'power': GPUgui.GuiProps.color_name_to_hex('br_orange'), + 'power_cap': GPUgui.GuiProps.color_name_to_hex('br_red'), + 'vddgfx_val': GPUgui.GuiProps.color_name_to_hex('br_blue'), + 'temp_val': GPUgui.GuiProps.color_name_to_hex('slate_md')} + + _font_colors = {'plotface': GPUgui.GuiProps.color_name_to_hex('black'), + 'figface': GPUgui.GuiProps.color_name_to_hex('black'), + 'sclk_f_val': GPUgui.GuiProps.color_name_to_hex('gray95'), + 'mclk_f_val': GPUgui.GuiProps.color_name_to_hex('gray95'), + 'loading': GPUgui.GuiProps.color_name_to_hex('white_off'), + 'power': GPUgui.GuiProps.color_name_to_hex('white_off'), + 'power_cap': GPUgui.GuiProps.color_name_to_hex('white_off'), + 'vddgfx_val': GPUgui.GuiProps.color_name_to_hex('gray95'), + 'temp_val': GPUgui.GuiProps.color_name_to_hex('white_off')} + + _gpu_color_list = [GPUgui.GuiProps.color_name_to_hex('red'), + GPUgui.GuiProps.color_name_to_hex('green_dk'), + GPUgui.GuiProps.color_name_to_hex('yellow'), + GPUgui.GuiProps.color_name_to_hex('orange'), + GPUgui.GuiProps.color_name_to_hex('purple'), + GPUgui.GuiProps.color_name_to_hex('blue'), + GPUgui.GuiProps.color_name_to_hex('teal'), + GPUgui.GuiProps.color_name_to_hex('olive')] + + def __init__(self, plot_data): + plot_data.gui_comp = self + self.ready = False + self.gpu_name_list = plot_data.gpu_name_list + self.num_gpus = plot_data.num_gpus + self.gui_components = {} + self.gpu_color = {} + gpu_color_list = self._gpu_color_list + plot_item_list = ['loading', 'power', 'power_cap', 'temp_val', 'vddgfx_val', 'sclk_f_val', 'mclk_f_val'] + + self.plot_items = {'loading': True, 'power': True, 'power_cap': True, + 'temp_val': True, 'vddgfx_val': True, 'sclk_f_val': True, 'mclk_f_val': True} + + self.gui_components['info_bar'] = {} + self.gui_components['legend'] = {} + self.gui_components['legend']['buttons'] = {} + self.gui_components['legend']['plot_items'] = {} + for plotitem in plot_item_list: + self.gui_components['legend']['plot_items'][plotitem] = True + self.gui_components['sclk_pstate_status'] = {} + self.gui_components['sclk_pstate_status']['df_name'] = 'sclk_ps_val' + self.gui_components['mclk_pstate_status'] = {} + self.gui_components['mclk_pstate_status']['df_name'] = 'mclk_ps_val' + self.gui_components['temp_status'] = {} + self.gui_components['temp_status']['df_name'] = 'temp_val' + self.gui_components['card_plots'] = {} + for i, gpu_i in enumerate(self.gpu_name_list): + self.gui_components['card_plots'][gpu_i] = {} + self.gui_components['card_plots'][gpu_i]['color'] = gpu_color_list[i] + self.gpu_color[gpu_i] = gpu_color_list[i] + + def get_color(self, color_name: str) -> str: + """ + Get color RGB hex code for the given color name. + + :param color_name: Color Name + :return: Color RGB hex code + """ + if color_name not in self._colors.keys(): + raise KeyError('color name {} not in color dict {}'.format(color_name, self._colors.keys())) + return self._colors[color_name] + + def get_font_color(self, color_name: str) -> str: + """ + Get font color RGB hex code for the given color name. + + :param color_name: Color Name + :return: Color RGB hex code + """ + if color_name not in self._font_colors.keys(): + raise KeyError('color name {} not in color dict {}'.format(color_name, self._font_colors.keys())) + return self._font_colors[color_name] + + def set_ready(self, mode: bool) -> None: + """ + Set flag to indicate gui is ready. + + :param mode: True if gui is ready + """ + self.ready = mode + + def is_ready(self) -> bool: + """ + Return the ready status of the plot gui. + + :return: True if ready + """ + return self.ready + + +class GPUPlotWindow(Gtk.Window): + """ + Plot window. + """ + def __init__(self, gc: GuiComponents, plot_data: PlotData): + box_spacing_val = 5 + num_bar_plots = 3 + if gc.num_gpus > 4: + def_gp_y_size = 150 + def_bp_y_size = 200 + elif gc.num_gpus == 4: + def_gp_y_size = 200 + def_bp_y_size = 200 + else: + def_gp_y_size = 250 + def_bp_y_size = 250 + def_gp_x_size = 650 + def_bp_x_size = 250 + def_lab_y_size = 28 + if gc.num_gpus > num_bar_plots: + tot_y_size = gc.num_gpus * (def_gp_y_size + def_lab_y_size) + gp_y_size = def_gp_y_size + bp_y_size = (tot_y_size - (num_bar_plots * def_lab_y_size))/num_bar_plots + elif gc.num_gpus < num_bar_plots: + tot_y_size = num_bar_plots * (def_bp_y_size + def_lab_y_size) + bp_y_size = def_bp_y_size + gp_y_size = (tot_y_size - (gc.num_gpus * def_lab_y_size))/gc.num_gpus + else: + gp_y_size = def_gp_y_size + bp_y_size = def_bp_y_size + + Gtk.Window.__init__(self, title=env.GUT_CONST.gui_window_title) + self.set_border_width(0) + GPUgui.GuiProps.set_style() + + if env.GUT_CONST.icon_path: + icon_file = os.path.join(env.GUT_CONST.icon_path, 'gpu-plot.icon.png') + if os.path.isfile(icon_file): + self.set_icon_from_file(icon_file) + + grid = Gtk.Grid() + self.add(grid) + + # Get deep copy of current df + ldf = plot_data.get_plot_data() + + row = 0 + # Top Bar - info + gc.gui_components['info_bar']['gtk_obj'] = Gtk.Label(name='white_label') + gc.gui_components['info_bar']['gtk_obj'].set_markup('{} Plot'.format(__program_name__)) + set_gtk_prop(gc.gui_components['info_bar']['gtk_obj'], align=(0.5, 0.5), top=1, bottom=1, right=4, left=4) + lbox = Gtk.Box(spacing=box_spacing_val, name='head_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + lbox.pack_start(gc.gui_components['info_bar']['gtk_obj'], True, True, 0) + grid.attach(lbox, 1, row, 4, 1) + row += 1 + + # Legend + gc.gui_components['legend']['gtk_obj'] = Gtk.Label(name='white_label') + gc.gui_components['legend']['gtk_obj'].set_markup('Plot Items') + set_gtk_prop(gc.gui_components['legend']['gtk_obj'], align=(0.5, 0.5), top=1, bottom=1, right=4, left=4) + lbox = Gtk.Box(spacing=box_spacing_val, name='dark_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + lbox.pack_start(gc.gui_components['legend']['gtk_obj'], True, True, 0) + for comp_name in gc.gui_components['legend']['plot_items'].keys(): + but_label = Gpu.GpuItem.get_button_label(comp_name) + but_color = gc.get_color(comp_name) + but_font_color = gc.get_font_color(comp_name) + gc.gui_components['legend']['buttons'][comp_name] = Gtk.Button(label='') + gc.gui_components['legend']['buttons'][comp_name].set_name(but_color[1:]) + GPUgui.GuiProps.set_style(css_str="#%s { background-image: image(%s); color: %s; }" % ( + but_color[1:], but_color, but_font_color)) + for child in gc.gui_components['legend']['buttons'][comp_name].get_children(): + child.set_label('{}'.format(but_label)) + child.set_use_markup(True) + gc.gui_components['legend']['buttons'][comp_name].connect('clicked', self.toggle_plot_item, gc, comp_name) + lbox.pack_start(gc.gui_components['legend']['buttons'][comp_name], True, True, 0) + grid.attach(lbox, 1, row, 4, 1) + row += 1 + main_last_row = row + + # Set up bar plots + grid_bar = Gtk.Grid(name='dark_grid') + grid.attach(grid_bar, 1, main_last_row, 1, 1) + brow = 0 + fig_num = 0 + # plot_top_row = row + for comp_item in [gc.gui_components['sclk_pstate_status'], + gc.gui_components['mclk_pstate_status'], + gc.gui_components['temp_status']]: + # Add Bar Plots Titles + bar_plot_name = Gpu.GpuItem.get_button_label(comp_item['df_name']) + comp_item['title_obj'] = Gtk.Label(name='white_label') + comp_item['title_obj'].set_markup('Card {}'.format(bar_plot_name)) + set_gtk_prop(comp_item['title_obj'], align=(0.5, 0.5), top=1, bottom=1, right=4, left=4) + lbox = Gtk.Box(spacing=box_spacing_val, name='head_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + lbox.pack_start(comp_item['title_obj'], True, True, 0) + + grid_bar.attach(lbox, 1, brow, 1, 1) + brow += 1 + + # Add Bar Plots + # Set up plot figure and canvas + comp_item['figure_num'] = 100 + fig_num + fig_num += 1 + comp_item['figure'], comp_item['ax1'] = plt.subplots(num=comp_item['figure_num']) + comp_item['figure'].set_facecolor(gc.get_color('figface')) + + plt.figure(comp_item['figure_num']) + plt.subplots_adjust(left=0.13, right=0.97, top=0.97, bottom=0.1) + comp_item['ax1'].set_facecolor(gc.get_color('plotface')) + if comp_item['df_name'] == 'temp_val': + plt.yticks(np.arange(15, 99, 10)) + else: + plt.yticks(np.arange(0, 9, 1)) + + comp_item['canvas'] = FigureCanvas(comp_item['figure']) + comp_item['canvas'].set_size_request(def_bp_x_size, bp_y_size) + + lbox = Gtk.Box(spacing=box_spacing_val, name='med_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + lbox.pack_start(comp_item['canvas'], True, True, 0) + + grid_bar.attach(lbox, 1, brow, 1, 1) + brow += 1 + + # Set up gpu plots + grid_plot = Gtk.Grid(name='dark_grid') + grid.attach(grid_plot, 2, main_last_row, 3, 1) + prow = 0 + # row = plot_top_row + for comp_num, comp_item in gc.gui_components['card_plots'].items(): + data_val = ldf[ldf['Card#'].isin([comp_num])]['energy'].iloc[-1] + model_val = ldf[ldf['Card#'].isin([comp_num])]['model_display'].iloc[-1] + # Add GPU Plots Titles + comp_item['title_obj'] = Gtk.Label(name='white_label') + comp_item['title_obj'].set_markup('Card{} [{}] {} Energy: {} kWh'.format( + comp_num, plot_data.get_gpu_pcieid(comp_num), model_val[:30], data_val)) + set_gtk_prop(comp_item['title_obj'], align=(0.5, 0.5), top=1, bottom=1, right=4, left=4) + box_name = comp_item['color'][1:] + lbox = Gtk.Box(spacing=box_spacing_val, name=box_name) + GPUgui.GuiProps.set_style(css_str="#%s { background-image: image(%s); }" % (box_name, comp_item['color'])) + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + lbox.pack_start(comp_item['title_obj'], True, True, 0) + + grid_plot.attach(lbox, 1, prow, 1, 1) + prow += 1 + + # Add GPU Plots + # Set up plot figure and canvas + comp_item['figure_num'] = 500 + comp_num + comp_item['figure'], comp_item['ax1'] = plt.subplots(num=comp_item['figure_num']) + comp_item['figure'].set_facecolor(gc.get_color('figface')) + plt.figure(comp_item['figure_num']) + plt.subplots_adjust(left=0.1, right=0.9, top=0.97, bottom=0.03) + + comp_item['ax1'].set_facecolor(gc.get_color('plotface')) + comp_item['ax1'].set_xticks([]) + comp_item['ax1'].set_xticklabels([]) + comp_item['ax1'].set_yticks(np.arange(0, 250, 20)) + comp_item['ax1'].tick_params(axis='y', which='major', labelsize=8) + + comp_item['ax2'] = comp_item['ax1'].twinx() + comp_item['ax2'].set_xticks([]) + comp_item['ax2'].set_xticklabels([]) + comp_item['ax2'].set_yticks(np.arange(500, 1500, 100)) + comp_item['ax2'].tick_params(axis='y', which='major', labelsize=8) + + comp_item['canvas'] = FigureCanvas(comp_item['figure']) # a Gtk.DrawingArea + comp_item['canvas'].set_size_request(def_gp_x_size, gp_y_size) + + lbox = Gtk.Box(spacing=box_spacing_val, name='light_box') + set_gtk_prop(lbox, top=1, bottom=1, right=1, left=1) + lbox.pack_start(comp_item['canvas'], True, True, 0) + + grid_plot.attach(lbox, 1, prow, 1, 1) + prow += 1 + + @staticmethod + def toggle_plot_item(_, gc: GuiComponents, k: str) -> None: + """ + Toggle specified plot item. + + :param _: parent + :param gc: gui components object + :param k: Name of plot item to toggle + """ + gc.plot_items[k] = not gc.plot_items[k] + + +def update_data(gc: GuiComponents, plot_data: PlotData) -> None: + """ + Update plot data. + + :param gc: + :param plot_data: + """ + # SEMAPHORE ########### + PD_SEM.acquire() + ####################### + ldf = plot_data.df + try: + time_val = ldf[ldf['Card#'].isin([plot_data.gpu_name_list[0]])]['Time'].iloc[-1] + gc.gui_components['info_bar']['gtk_obj'].set_markup('Time {}'.format(time_val)) + # Update Bar Plots + for comp_item in [gc.gui_components['sclk_pstate_status'], + gc.gui_components['mclk_pstate_status'], + gc.gui_components['temp_status']]: + data_val = [] + label_val = [] + bar_col = [] + # Set Plot Parameters + for card_num in plot_data.gpu_name_list: + l, d = ldf[ldf['Card#'].isin([card_num])][['Card#', comp_item['df_name']]].iloc[-1] + label_val.append(int(l)) + data_val.append(float(d)) + bar_col.append(gc.gpu_color[l]) + x_index = np.arange(gc.num_gpus) # the x locations for the groups + width = 0.65 # the width of the bars + + # Do bar plot + plt.figure(comp_item['figure_num']) + comp_item['ax1'].clear() + _rects1 = comp_item['ax1'].bar(x_index, data_val, width, color=bar_col, tick_label=label_val) + if comp_item['df_name'] == 'temp_val': + for a, b in zip(x_index, data_val): + comp_item['ax1'].text(x=a, y=b-5, s=str(b), fontsize=8, ha='center') + plt.ylim((15, 99)) + else: + data_val = list(map(int, data_val)) + for a, b in zip(x_index, data_val): + y_val = b + width if b == 0 else b - width + comp_item['ax1'].text(x=a, y=y_val, s=str(b), fontsize=10, ha='center') + plt.ylim((0, 9)) + comp_item['canvas'].draw() + comp_item['canvas'].flush_events() + + # Update GPU Plots + y1lim_max_val = 10*(ldf.loc[:, ['loading', 'power_cap', 'power', 'temp_val']].max().max() // 10) + 10 + y1lim_min_val = 10*(ldf.loc[:, ['loading', 'power_cap', 'power', 'temp_val']].min().min() // 10) - 5 + y2lim_max_val = 100*(ldf.loc[:, ['vddgfx_val', 'sclk_f_val', 'mclk_f_val']].max().max() // 100) + 300 + y2lim_min_val = 100*(ldf.loc[:, ['vddgfx_val', 'sclk_f_val', 'mclk_f_val']].min().min() // 100) - 100 + for comp_num, comp_item in gc.gui_components['card_plots'].items(): + data_val = ldf[ldf['Card#'].isin([comp_num])]['energy'].iloc[-1] + model_val = ldf[ldf['Card#'].isin([comp_num])]['model_display'].iloc[-1] + comp_item['title_obj'].set_markup('Card{} [{}] {} Energy: {} kWh'.format( + comp_num, plot_data.get_gpu_pcieid(comp_num), model_val[:30], data_val)) + + # Plot GPUs + plt.figure(comp_item['figure_num']) + comp_item['ax1'].set_xticklabels([]) + comp_item['ax1'].clear() + comp_item['ax1'].set_ylabel('Loading/Power/Temp', + color=GPUgui.GuiProps.color_name_to_hex('white_off'), fontsize=10) + for plot_item in ['loading', 'power_cap', 'power', 'temp_val']: + if gc.plot_items[plot_item]: + comp_item['ax1'].plot(ldf[ldf['Card#'].isin([comp_num])]['datetime'], + ldf[ldf['Card#'].isin([comp_num])][plot_item], + color=gc.get_color(plot_item), linewidth=0.5) + comp_item['ax1'].text(x=ldf[ldf['Card#'].isin([comp_num])]['datetime'].iloc[-1], + y=ldf[ldf['Card#'].isin([comp_num])][plot_item].iloc[-1], + s=str(int(ldf[ldf['Card#'].isin([comp_num])][plot_item].iloc[-1])), + bbox=dict(boxstyle='round,pad=0.2', facecolor=gc.get_color(plot_item)), + fontsize=6) + + comp_item['ax2'].clear() + comp_item['ax2'].set_xticklabels([]) + comp_item['ax2'].set_ylabel('MHz/mV', color=GPUgui.GuiProps.color_name_to_hex('gray95'), fontsize=10) + for plot_item in ['vddgfx_val', 'sclk_f_val', 'mclk_f_val']: + if gc.plot_items[plot_item]: + if np.isnan((ldf[ldf['Card#'].isin([comp_num])][plot_item].iloc[-1])): + continue + comp_item['ax2'].plot(ldf[ldf['Card#'].isin([comp_num])]['datetime'], + ldf[ldf['Card#'].isin([comp_num])][plot_item], + color=gc.get_color(plot_item), linewidth=0.5) + comp_item['ax2'].text(x=ldf[ldf['Card#'].isin([comp_num])]['datetime'].iloc[-1], + y=ldf[ldf['Card#'].isin([comp_num])][plot_item].iloc[-1], + s=str(int(ldf[ldf['Card#'].isin([comp_num])][plot_item].iloc[-1])), + bbox=dict(boxstyle='round,pad=0.2', facecolor=gc.get_color(plot_item)), + fontsize=6) + + tick_inc = int(10 * round(((y1lim_max_val - y1lim_min_val) // 12)/10.0, 0)) + comp_item['ax1'].set_yticks(np.arange(y1lim_min_val, y1lim_max_val, tick_inc)) + tick_inc = int(100 * round(((y2lim_max_val - y2lim_min_val) // 12)/100.0, 0)) + comp_item['ax2'].set_yticks(np.arange(y2lim_min_val, y2lim_max_val, tick_inc)) + + comp_item['canvas'].draw() + comp_item['canvas'].flush_events() + except (OSError, ArithmeticError, NameError, TypeError, ValueError) as err: + print('matplotlib error: {}'.format(err)) + print('matplotlib error, stack size is {}'.format(get_stack_size())) + plot_data.kill_thread() + + # SEMAPHORE ########### + PD_SEM.release() + ####################### + + +def read_from_stdin(refreshtime: int, plot_data: PlotData) -> None: + """ + Read plot data from stdin. + + :param refreshtime: + :param plot_data: + .. note:: this should continuously read from stdin and populate df and call plot/gui update + """ + header_item = '' + first_update = True + header = True + sync_add = 0 + while not plot_data.quit: + if env.GUT_CONST.SIMLOG: time.sleep(refreshtime/4.0) + ndf = pd.DataFrame() + + # Process a set of GPUs at a time + skip_update = False + read_time = 0.0 + for _gpu_index in range(0, plot_data.num_gpus + sync_add): + start_time = env.GUT_CONST.now(env.GUT_CONST.USELTZ) + line = sys.stdin.readline() + tmp_read_time = (env.GUT_CONST.now(env.GUT_CONST.USELTZ) - start_time).total_seconds() + if tmp_read_time > read_time: + read_time = tmp_read_time + + if line == '': + LOGGER.debug('Error: Null input line') + plot_data.kill_thread() + break + if header: + header_item = list(line.strip().split('|')) + header = False + continue + line_items = list(line.strip().split('|')) + new_line_items = [] + for item in line_items: + item = item.strip() + if item == 'nan': + new_line_items.append(np.nan) + elif item.isnumeric(): + new_line_items.append(int(item)) + elif re.fullmatch(PATTERNS['IS_FLOAT'], item): + new_line_items.append(float(item)) + elif item == '' or item == '-1' or item == 'NA' or item is None: + new_line_items.append(np.nan) + else: + new_line_items.append(item) + line_items = tuple(new_line_items) + rdf = pd.DataFrame.from_records([line_items], columns=header_item) + rdf['datetime'] = pd.to_datetime(rdf['Time'], format=env.GUT_CONST.TIME_FORMAT, exact=False) + ndf = pd.concat([ndf, rdf], ignore_index=True) + del rdf + sync_add = 1 if ndf['Time'].tail(plot_data.num_gpus).nunique() > 1 else 0 + + LOGGER.debug('dataFrame %s:\n%s', + env.GUT_CONST.now(env.GUT_CONST.USELTZ).strftime(env.GUT_CONST.TIME_FORMAT), ndf.to_string()) + + if not env.GUT_CONST.SIMLOG: + if read_time < 0.003: + skip_update = True + LOGGER.debug('skipping update') + + # SEMAPHORE ############ + PD_SEM.acquire() + ######################## + # Concatenate new data on plot_data dataframe and truncate + plot_data.df = pd.concat([plot_data.df, ndf], ignore_index=True) + plot_data.df.reset_index(drop=True, inplace=True) + + # Truncate df in place + plot_length = int(len(plot_data.df.index) / plot_data.num_gpus) + if plot_length > plot_data.length: + trun_index = plot_length - plot_data.length + plot_data.df.drop(np.arange(0, trun_index), inplace=True) + plot_data.df.reset_index(drop=True, inplace=True) + # SEMAPHORE ############ + PD_SEM.release() + ######################## + del ndf + + ######################### + # Update plots + ######################### + if skip_update: + continue + if plot_data.gui_comp is None: + continue + if plot_data.gui_comp.is_ready(): + if first_update: + time.sleep(refreshtime) + first_update = False + GLib.idle_add(update_data, plot_data.gui_comp, plot_data) + while Gtk.events_pending(): + Gtk.main_iteration_do(True) + # SEMAPHORE ############ + time.sleep(0.01) + PD_SEM.acquire() + PD_SEM.release() + ######################## + garbcollect.collect() + LOGGER.debug('update stack size: %s', get_stack_size()) + + # Quit + print('exit stack size: {}'.format(get_stack_size())) + sys.exit(0) + + +def read_from_gpus(refreshtime: int, plot_data: PlotData) -> None: + """ + Read plot data from stdin. + + :param refreshtime: + :param plot_data: + .. note:: this should continuously read from GPUs and populate df and call plot/gui update + """ + first_update = True + while not plot_data.quit: + ndf = pd.DataFrame() + + plot_data.com_gpu_list.read_gpu_sensor_set(data_type=Gpu.GpuItem.SensorSet.Monitor) + + # Process a set of GPUs at a time + skip_update = False + for gpu in plot_data.com_gpu_list.gpus(): + gpu_plot_data = gpu.get_plot_data() + LOGGER.debug('gpu_plot_data: %s', gpu_plot_data) + + rdf = pd.DataFrame.from_records([tuple(gpu_plot_data.values())], columns=tuple(gpu_plot_data.keys())) + rdf['datetime'] = pd.to_datetime(rdf['Time'], format=env.GUT_CONST.TIME_FORMAT, exact=False) + ndf = pd.concat([ndf, rdf], ignore_index=True) + del rdf + + # SEMAPHORE ############ + PD_SEM.acquire() + ######################## + # Concatenate new data on plot_data dataframe and truncate + plot_data.df = pd.concat([plot_data.df, ndf], ignore_index=True) + plot_data.df.reset_index(drop=True, inplace=True) + + # Truncate df in place + plot_length = int(len(plot_data.df.index) / plot_data.num_gpus) + if plot_length > plot_data.length: + trun_index = plot_length - plot_data.length + plot_data.df.drop(np.arange(0, trun_index), inplace=True) + plot_data.df.reset_index(drop=True, inplace=True) + # SEMAPHORE ############ + PD_SEM.release() + ######################## + del ndf + + ######################### + # Update plots + ######################### + if skip_update: + continue + if plot_data.gui_comp is None: + time.sleep(refreshtime) + continue + if plot_data.gui_comp.is_ready(): + if first_update: + time.sleep(refreshtime) + first_update = False + GLib.idle_add(update_data, plot_data.gui_comp, plot_data) + while Gtk.events_pending(): + Gtk.main_iteration_do(True) + # SEMAPHORE ############ + time.sleep(0.01) + PD_SEM.acquire() + PD_SEM.release() + ######################## + garbcollect.collect() + LOGGER.debug('update stack size: %s', get_stack_size()) + time.sleep(refreshtime) + + # Quit + print('exit stack size: {}'.format(get_stack_size())) + sys.exit(0) + + +def main() -> None: + """ Main flow for plot.""" + parser = argparse.ArgumentParser() + parser.add_argument('--about', help='README', action='store_true', default=False) + parser.add_argument('--stdin', help='Read from stdin', action='store_true', default=False) + parser.add_argument('--simlog', help='Simulate with piped log file', action='store_true', default=False) + parser.add_argument('--ltz', help='Use local time zone instead of UTC', action='store_true', default=False) + parser.add_argument('--sleep', help='Number of seconds to sleep between updates', type=int, default=3) + parser.add_argument('-d', '--debug', help='Debug output', action='store_true', default=False) + args = parser.parse_args() + + # About me + if args.about: + print(__doc__) + print('Author: ', __author__) + print('Copyright: ', __copyright__) + print('Credits: ', *['\n {}'.format(item) for item in __credits__]) + print('License: ', __license__) + print('Version: ', __version__) + print('Maintainer: ', __maintainer__) + print('Status: ', __status__) + import matplotlib + print('matplotlib version: ', matplotlib.__version__) + print('pandas version: ', pd.__version__) + print('numpy version: ', np.__version__) + sys.exit(0) + + env.GUT_CONST.set_args(args) + LOGGER.debug('########## %s %s', __program_name__, __version__) + LOGGER.debug('pandas version: %s', pd.__version__) + LOGGER.debug('numpy version: %s', np.__version__) + + if env.GUT_CONST.check_env() < 0: + print('Error in environment. Exiting...') + sys.exit(-1) + + # Define graph gui and data components + plot_data = PlotData() + # Get list of Compatible GPUs and get basic non-driver details + gpu_list = Gpu.GpuList() + gpu_list.set_gpu_list() + com_gpu_list = gpu_list.list_gpus(compatibility=Gpu.GpuItem.GPU_Comp.Readable) + plot_data.set_com_gpu_list(com_gpu_list) + + if not args.stdin: + # Check list of GPUs + num_gpus = gpu_list.num_vendor_gpus() + print('Detected GPUs: ', end='') + for i, (type_name, type_value) in enumerate(num_gpus.items()): + if i: + print(', {}: {}'.format(type_name, type_value), end='') + else: + print('{}: {}'.format(type_name, type_value), end='') + print('') + if 'AMD' in num_gpus.keys(): + env.GUT_CONST.read_amd_driver_version() + print('AMD: {}'.format(gpu_list.wattman_status())) + if 'NV' in num_gpus.keys(): + print('nvidia smi: [{}]'.format(env.GUT_CONST.cmd_nvidia_smi)) + + num_gpus = gpu_list.num_gpus() + if num_gpus['total'] == 0: + print('No GPUs detected, exiting...') + sys.exit(-1) + + # Read data static/dynamic/info/state driver information for GPUs + gpu_list.read_gpu_sensor_set(data_type=Gpu.GpuItem.SensorSet.All) + + # Check number of readable/writable GPUs again + num_gpus = gpu_list.num_gpus() + print('{} total GPUs, {} rw, {} r-only, {} w-only\n'.format(num_gpus['total'], num_gpus['rw'], + num_gpus['r-only'], num_gpus['w-only'])) + + # Check number of compatible GPUs again + readable_gpus = com_gpu_list.num_gpus()['total'] + if not readable_gpus: + print('None are readable, exiting...') + sys.exit(-1) + + # Set gpu quantity in plot_data + plot_data.num_gpus = readable_gpus + # end of if args.stdin == False + + if args.stdin or args.simlog: + threading.Thread(target=read_from_stdin, daemon=True, args=[args.sleep, plot_data]).start() + else: + threading.Thread(target=read_from_gpus, daemon=True, args=[args.sleep, plot_data]).start() + + print('{} waiting for initial data'.format(__program_name__), end='', flush=True) + while len(plot_data.df.index) < 2: + print('.', end='', flush=True) + time.sleep(args.sleep/4.0) + print('') + + # After reading initial data, set gpus + plot_data.set_gpus() + + gc = GuiComponents(plot_data) + gplot = GPUPlotWindow(gc, plot_data) + gplot.connect('delete-event', Gtk.main_quit) + gplot.show_all() + gc.set_ready(True) + Gtk.main() + plot_data.kill_thread() + + +if __name__ == '__main__': + main() Binary files /tmp/tmppAtOdN/Q_uST5q1A0/ricks-amdgpu-utils-3.0.0/icons/amdgpu-monitor.icon.png and /tmp/tmppAtOdN/pk1AO_cpsw/ricks-amdgpu-utils-3.5.0/icons/amdgpu-monitor.icon.png differ Binary files /tmp/tmppAtOdN/Q_uST5q1A0/ricks-amdgpu-utils-3.0.0/icons/amdgpu-pac.icon.png and /tmp/tmppAtOdN/pk1AO_cpsw/ricks-amdgpu-utils-3.5.0/icons/amdgpu-pac.icon.png differ Binary files /tmp/tmppAtOdN/Q_uST5q1A0/ricks-amdgpu-utils-3.0.0/icons/amdgpu-plot.icon.png and /tmp/tmppAtOdN/pk1AO_cpsw/ricks-amdgpu-utils-3.5.0/icons/amdgpu-plot.icon.png differ Binary files /tmp/tmppAtOdN/Q_uST5q1A0/ricks-amdgpu-utils-3.0.0/icons/gpu-mon.icon.png and /tmp/tmppAtOdN/pk1AO_cpsw/ricks-amdgpu-utils-3.5.0/icons/gpu-mon.icon.png differ Binary files /tmp/tmppAtOdN/Q_uST5q1A0/ricks-amdgpu-utils-3.0.0/icons/gpu-pac.icon.png and /tmp/tmppAtOdN/pk1AO_cpsw/ricks-amdgpu-utils-3.5.0/icons/gpu-pac.icon.png differ Binary files /tmp/tmppAtOdN/Q_uST5q1A0/ricks-amdgpu-utils-3.0.0/icons/gpu-plot.icon.png and /tmp/tmppAtOdN/pk1AO_cpsw/ricks-amdgpu-utils-3.5.0/icons/gpu-plot.icon.png differ diff -Nru ricks-amdgpu-utils-3.0.0/man/amdgpu-chk.1 ricks-amdgpu-utils-3.5.0/man/amdgpu-chk.1 --- ricks-amdgpu-utils-3.0.0/man/amdgpu-chk.1 2020-02-29 07:32:52.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/man/amdgpu-chk.1 1970-01-01 00:00:00.000000000 +0000 @@ -1,48 +0,0 @@ -.TH AMDGPU\-CHK 1 "October 2019" "amdgpu-utils" "AMDGPU-UTILS Manual" -.nh -.SH NAME -amdgpu-chk \- verifies if the user's environment is compatible with amdgpu-utils - -.SH SYNOPSIS -.B amdgpu-chk -.RB [ \-\-debug ] -.br -.B amdgpu-chk -.RB [ \-\-help " | " \-\-about "]" - -.SH CONFIGURATION -All amdgpu-utils require that compatible GPU cards and drivers are installed and that -the feature to access them is enabled. This can be accomplished by adding -amdgpu.ppfeaturemask=0xfffd7fff to the GRUB_CMDLINE_LINUX_DEFAULT value in -/etc/default/grub and executing update-grub. - -.SH DESCRIPTION -.B amdgpu-chk -will check the user environment for compatibility with amdgpu-utils. - -.SH OPTIONS -.TP -.BR " \-\-about" -Will display details about -.B amdgpu-chk\fP. -.TP -.BR " \-\-debug" -Will display additional details while checking the user environment. -.TP -.BR \-h , " \-\-help" -Display help text and exit. - -.SH BUGS -No known bugs. Please report any bugs/issues at https://github.com/Ricks-Lab/amdgpu-utils - -.SH "SEE ALSO" -.BR amdgpu (4), -.BR update-grub (8), -.BR amdgpu.ls (1), -.BR amdgpu.monitor (1), -.BR amdgpu.plot (1), -.BR amdgpu.pac (1) - -.SH AVAILABILITY -The amdgpu-chk command is part of the amdgpu-utils package and is available from -https://github.com/Ricks-Lab/amdgpu-utils diff -Nru ricks-amdgpu-utils-3.0.0/man/amdgpu-ls.1 ricks-amdgpu-utils-3.5.0/man/amdgpu-ls.1 --- ricks-amdgpu-utils-3.0.0/man/amdgpu-ls.1 2020-02-29 07:32:52.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/man/amdgpu-ls.1 1970-01-01 00:00:00.000000000 +0000 @@ -1,117 +0,0 @@ -.TH AMDGPU\-LS 1 "October 2019" "amdgpu-utils" "AMDGPU-UTILS Manual" -.nh -.SH NAME -amdgpu-ls \- display details about amdgpu compatible GPU cards - -.SH SYNOPSIS -.B amdgpu-ls -.RB [ \-\-debug ] -.br -.B amdgpu-ls -.RB [ \-\-help " | " \-\-about "]" -.br -.B amdgpu-ls -.RB [ \-\-clinfo "] [" \-\-no_fan "] [" \-\-ppm "] [" \-\-pstates "] - -.SH CONFIGURATION -All amdgpu-utils require that compatible GPU cards and drivers are installed and that -the feature to access them is enabled. This can be accomplished by adding -amdgpu.ppfeaturemask=0xfffd7fff to the GRUB_CMDLINE_LINUX_DEFAULT value in -/etc/default/grub and executing update-grub. - -.SH DESCRIPTION -.B amdgpu-ls -displays most relevant parameters for installed and compatible AMD GPUs. -The default behavior is to list relevant base parameters by GPU. - -.SH OPTIONS -.TP -.BR " \-\-about" -Will display details about -.B amdgpu-ls\fP. -.TP -.BR " \-\-clinfo" -If the clinfo package is installed, openCL details will be included in the output -for each compatible AMD GPU. -.TP -.BR " \-\-no_fan" -Will exclude fan information from the display. Useful with water cooled GPUs. -.TP -.BR " \-\-table" -Will display table of basic GPU parameters. -.TP -.BR " \-\-ppm" -Will display descriptions of available power performance modes. -.TP -.BR " \-\-pstates" -Will display P-state table for system and memory clocks. -.TP -.BR " \-\-debug" -Will display additional details useful in troubleshooting. -.TP -.BR \-h , " \-\-help" -Display help text and exit. - -.SH "EXAMPLES" -.nf -.B amdgpu-ls - -.fi -This will display the basic parameters for all compatible AMD GPUs. These include -device ID, GPU frequency/voltage control type, decoded device ID, card model, short card model, -display card model, card number, card path, PCIe ID, driver, vBIOS version, HWmon path, current power, -power cap, power cap range, fan enable, fan PWM mode, current fan PWM, current fan speed, -fan target speed, fan speed range, fan PWM range, current temp, critical temp, current VddGF, -Vddc range, current loading, link speed, link width, current Sclk p-state, current Sclk frequency, -Sclk range, current Mclk p-state, current Mclk frequency, Mclk range, power performance mode, and -power force performance level. -.P -.B amdgpu-ls \-\-clinfo - -.fi -In addition to the basic parameters, openCL parameters are also displayed for each compatible AMD GPU. -This option is dependent on the installation of \fBclinfo\fR. -.P -.B amdgpu-ls \-\-ppm - -.fi -Instead of the basic parameters, a table of power/performance modes is displayed. -.P -.B amdgpu-ls \-\-pstates - -.fi -Instead of the basic parameters, a table of power states or curve points is displayed. -.P - -.SH "FILES" -.PP -.TP -\fB/usr/share/misc/pci.ids\fR -The system list of all known AMD PCI ID's (vendors, devices, classes and subclasses). -It can be updated with the \fBupdate-pciids\fR command. -.TP -\fB~/.amdgpu-utils/amd_pci_id.txt\fR -A locally updated list of all known AMD PCI ID's (vendors, devices, classes and subclasses). -It is preferred to use the system PCI ID file. If the local file exists, it will be used instead of -the system file and can be removed with \fBamdgpu-pciid \-\-remove_local\fR command. -.TP -\fB/sys/class/drm/card*/device/pp_od_clk_voltage\fR -Special driver file for each GPU required for \fBamdgpu-utils\fR. -.TP -\fB/etc/default/grub\fR -The grub defaults file where amdgpu.ppfeaturemask needs to be set. - -.SH BUGS -Known to not work well with Fiji ProDuo cards and will issue warning messages for Fiji Nano cards. -Please report any additional bugs/issues at https://github.com/Ricks-Lab/amdgpu-utils - -.SH "SEE ALSO" -.BR clinfo (1), -.BR amdgpu (4), -.BR update-grub (8), -.BR update-pciids (8), -.BR lspci (8) - -.SH AVAILABILITY -The amdgpu-ls command is part of the amdgpu-utils package and is available from -https://github.com/Ricks-Lab/amdgpu-utils diff -Nru ricks-amdgpu-utils-3.0.0/man/amdgpu-monitor.1 ricks-amdgpu-utils-3.5.0/man/amdgpu-monitor.1 --- ricks-amdgpu-utils-3.0.0/man/amdgpu-monitor.1 2020-02-29 07:32:52.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/man/amdgpu-monitor.1 1970-01-01 00:00:00.000000000 +0000 @@ -1,117 +0,0 @@ -.TH AMDGPU\-MONITOR 1 "October 2019" "amdgpu-utils" "AMDGPU-UTILS Manual" -.nh -.SH NAME -amdgpu-monitor \- continuously update a table with the state of all compatible AMD GPUs - -.SH SYNOPSIS -.B amdgpu-monitor -.RB [ \-\-help " | " \-\-about "]" -.br -.B amdgpu-monitor -.RB [ \-\-gui "] [" \-\-no_fan "] [" \-\-plot "] [" \-\-ltz "] [" \-\-sleep " \fIN\fP] [" \-\-debug "] [" \-\-pdebug "] - -.SH CONFIGURATION -All amdgpu-utils require that compatible GPU cards and drivers are installed and that -the feature to access them is enabled. This can be accomplished by adding -amdgpu.ppfeaturemask=0xfffd7fff to the GRUB_CMDLINE_LINUX_DEFAULT value in -/etc/default/grub and executing update-grub. - -.SH DESCRIPTION -.B amdgpu-monitor -will continuously display the current state of all compatible AMD GPUs. The default -behavior is to update a text based table in the current window until Ctrl-C is pressed. -Command line options can alter this behaviour allowing the use of GUI, modifying update -interval, excluding fans, writing to a log, selecting UTC or local time zone, and -engaging a graphical display of real-time trends of relevant parameters. - -.SH OPTIONS -.TP -.BR " \-\-about" -Will display details about -.B amdgpu-monitor\fP. -.TP -.BR " \-\-gui" -The table of relevant parameters will be updated in a Gtk window instead of a text table in the terminal window. -.TP -.BR " \-\-log" -Write all monitor data to a logfile. The real-time display will indicate that logging -is enabled and will show the filename used. -.TP -.BR " \-\-ltz" -Use local time zone instead of UTC for displays and logging. -.TP -.BR " \-\-no_fan" -Will exclude fan information from the display. Useful with water cooled GPUs. -.TP -.BR " \-\-pdebug" -Will enable debug output for the \fBamdgpu-plot\fR plotting utility. -.TP -.BR " \-\-plot" -Open and write to, \fBamdgpu-plot\fR, the amdgpu-util plotting utility. -.TP -.BR " \-\-sleep " \fIN\fR -Specifies N, the number of seconds to sleep between updates. -.TP -.BR " \-\-debug" -Will display additional details useful in troubleshooting. -.TP -.BR \-h , " \-\-help" -Display help text and exit. - -.SH "EXAMPLES" -.nf -.B amdgpu-monitor \-\-sleep 5 \-\-log - -.fi -Will display a continuously updating table of GPU operating parameters updating with an interval of 5 sec. All -parameters will be written to a logfile which will be indicated in the table. -The displayed parameters include GPU model, load percentage, power, power cap, energy consumption, temperature, -voltage, fan speed, Sclk frequency/p-state, Mclk frequency/pstate, and performance mode. -Updating of the table will continue until ctrl-c is pressed. -.P -.B amdgpu-monitor \-\-gui - -.fi -Will open a new Gtk window and display basic parameters updated with the default interval. -.P -.B amdgpu-monitor \-\-plot - -.fi -Will open 2 new Gtk windows. One will display the basic parameters and the second will display a continuously -updating plot of these parameters. It is suggested that this method be used if both displays are desired, instead -of executing both \fBamdgpu-monitor\fR and \fBamdgpu-plot\fR as the later will result in twice the reads of -GPU data. -.P - -.SH "FILES" -.PP -.TP -\fB/usr/share/misc/pci.ids\fR -The system list of all known AMD PCI ID's (vendors, devices, classes and subclasses). -It can be updated with the \fBupdate-pciids\fR command. -.TP -\fB~/.amdgpu-utils/amd_pci_id.txt\fR -A locally updated list of all known AMD PCI ID's (vendors, devices, classes and subclasses). -It is preferred to use the system PCI ID file. If this the local file exists, it will be used instead of -the system file and can be removed with \fBamdgpu-pciid \-\-remove_local\fR command. -.TP -\fB/sys/class/drm/card*/device/pp_od_clk_voltage\fR -Special driver file for each GPU required for \fBamdgpu-utils\fR. -.TP -\fB/etc/default/grub\fR -The grub defaults file where amdgpu.ppfeaturemask needs to be set. - -.SH BUGS -Known to not work well with Fiji ProDuo cards and will issue warning messages for Fiji Nano cards. -Please report any additional bugs/issues at https://github.com/Ricks-Lab/amdgpu-utils - -.SH "SEE ALSO" -.BR amdgpu-plot (1), -.BR amdgpu (4), -.BR update-grub (8), -.BR update-pciids (8), -.BR lspci (8) - -.SH AVAILABILITY -The amdgpu-monitor command is part of the amdgpu-utils package and is available from -https://github.com/Ricks-Lab/amdgpu-utils diff -Nru ricks-amdgpu-utils-3.0.0/man/amdgpu-pac.1 ricks-amdgpu-utils-3.5.0/man/amdgpu-pac.1 --- ricks-amdgpu-utils-3.0.0/man/amdgpu-pac.1 2020-02-29 07:32:52.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/man/amdgpu-pac.1 1970-01-01 00:00:00.000000000 +0000 @@ -1,107 +0,0 @@ -.TH AMDGPU\-PAC 1 "October 2019" "amdgpu-utils" "AMDGPU-UTILS Manual" -.nh -.SH NAME -amdgpu-pac \- program and control compatible AMD GPUs - -.SH SYNOPSIS -.B amdgpu-pac -.RB [ \-\-help " | " \-\-about "]" -.br -.B amdgpu-pac -.RB [ \-\-execute_pac "] [" \-\-no_fan "] [" \-\-force_write "] [" \-\-debug "] - -.SH CONFIGURATION -All amdgpu-utils require that compatible GPU cards and drivers are installed and that -the feature to access them is enabled. This can be accomplished by adding -amdgpu.ppfeaturemask=0xfffd7fff to the GRUB_CMDLINE_LINUX_DEFAULT value in -/etc/default/grub and executing update-grub. - -.SH DESCRIPTION -.B amdgpu-pac -will present the user with a gui to modify the operating conditions of compatible AMD GPUs. -With this utility, the user can change GPU power limit, S-Clk and M-Clk P-state/Curve definitions, -P-state masks, fan speed, and power performance mode. -By default, the commands to program and control a GPU are written to a bash file for the user to inspect -and run with root permissions. - -.SH OPTIONS -.TP -.BR " \-\-about" -Will display details about -.B amdgpu-pac\fP. -.TP -.BR " \-\-execute_pac" -Will execute a bash file created with commands written to the AMD driver files to modify the operating -conditions of the selected GPU/GPUs. The default behavior is to only create the bash files for the user -to execute. -.TP -.BR " \-\-no_fan" -Will exclude fan information from the display and will not include fans in writing or resetting -GPU operating conditions. -.TP -.BR " \-\-force_write" -Will result in all parameters being writen to the selected GPU/GPUs instead of the default behavior of -only writing changes. -.TP -.BR " \-\-debug" -Will display additional details useful in troubleshooting. -.TP -.BR \-h , " \-\-help" -Display help text and exit. - -.SH "EXAMPLES" -.nf -.B amdgpu-pac - -.fi -Will open a Gtk based user interface which will display current or default values for modifiable GPU operating -parameters. The interface supports entry of new values for all compatible AMD GPUs. The user can select to save -or reset values for individual or all GPUs. It is suggested that \fBamdgpu-monitor\fR be used -to make sure the changes are made as expected. -.P -.B amdgpu-pac \-\-execute_pac - -.fi -To simplify this process, the \fB\-\-execute_pac\fR option can be specified to automate execution of the bash files. -A message in the user interface will indicate if credentials are required in the original terminal window. -.P -.B amdgpu-pac \-\-force_write - -.fi -With this option, all parameters will be written to the bash file, even if they are unchanged. This is useful in -creating bash files used to put GPU's into a known state which is convenient for use in start up routines. -.P - -.SH "FILES" -.PP -.TP -\fB/usr/share/misc/pci.ids\fR -The system list of all known AMD PCI ID's (vendors, devices, classes and subclasses). -It can be updated with the \fBupdate-pciids\fR command. -.TP -\fB~/.amdgpu-utils/amd_pci_id.txt\fR -A locally updated list of all known AMD PCI ID's (vendors, devices, classes and subclasses). -It is preferred to use the system PCI ID file. If the local file exists, it will be used instead of -the system file and can be removed with \fB\-\-remove_local\fR option. -.TP -\fB/sys/class/drm/card*/device/pp_od_clk_voltage\fR -Special driver file for each GPU required for \fBamdgpu-utils\fR. -.TP -\fB/etc/default/grub\fR -The grub defaults file where amdgpu.ppfeaturemask needs to be set. - -.SH BUGS -Known to not work well with Fiji ProDuo cards and will issue warning messages for Fiji Nano cards. The -display of P-state masks is always the defaults, not the actual values. -Please report any additional bugs/issues at https://github.com/Ricks-Lab/amdgpu-utils - -.SH "SEE ALSO" -.BR amdgpu-monitor (1), -.BR amdgpu (4), -.BR update-grub (8), -.BR update-pciids (8), -.BR lspci (8) - -.SH AVAILABILITY -The amdgpu-pac command is part of the amdgpu-utils package and is available from -https://github.com/Ricks-Lab/amdgpu-utils diff -Nru ricks-amdgpu-utils-3.0.0/man/amdgpu-plot.1 ricks-amdgpu-utils-3.5.0/man/amdgpu-plot.1 --- ricks-amdgpu-utils-3.0.0/man/amdgpu-plot.1 2020-02-29 07:32:52.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/man/amdgpu-plot.1 1970-01-01 00:00:00.000000000 +0000 @@ -1,98 +0,0 @@ -.TH AMDGPU-PLOT 1 "October 2019" "amdgpu-utils" "AMDGPU-UTILS Manual" -.nh -.SH NAME -amdgpu-plot \- continuously update and plot critical GPU parameters as a function of time - -.SH SYNOPSIS -.B amdgpu-plot -.RB [ \-\-help " | " \-\-about "]" -.br -.B amdgpu-plot -.RB [ \-\-no_fan "] [" \-\-stdin "] [" \-\-simlog "] [" \-\-ltz "] [" \-\-sleep " \fIN\fP] [" \-\-debug "] - -.SH CONFIGURATION -All amdgpu-utils require that compatible GPU cards and drivers are installed and that -the feature to access them is enabled. This can be accomplished by adding -amdgpu.ppfeaturemask=0xfffd7fff to the GRUB_CMDLINE_LINUX_DEFAULT value in -/etc/default/grub and executing update-grub. - -.SH DESCRIPTION -.B amdgpu-plot -will continuously plot a trend of critical GPU parameters for all compatible AMD GPUs. - -.SH OPTIONS -.TP -.BR " \-\-about" -Will display details about -.B amdgpu-plot\fP. -.TP -.BR " \-\-ltz" -Use local time zone instead of UTC for displays and logging. -.TP -.BR " \-\-no_fan" -Will exclude fan information from the display. Useful with watercooled GPUs. -.TP -.BR " \-\-stdin" -Will read data from stdin. This is useful to display plots of a logfile save with \fBamdgpu-monitor\fR. -.TP -.BR " \-\-simlog" -When used with the \-\-stdin option, it will simulate the reading of data from the logfile at a rate -define by \fB\-\-sleep\fR. -.TP -.BR " \-\-sleep " \fIN\fP -Specifies N, the number of seconds to sleep between updates. -.TP -.BR " \-\-debug" -Will display additional details useful in troubleshooting. -.TP -.BR \-h , " \-\-help" -Display help text and exit. - -.SH "EXAMPLES" -.nf -.B amdgpu-plot \-\-sleep 5 \-\-ltz - -.fi -Will open a Gtk window that will display plots of operation parameters for all compatible AMD GPU's that updates -every 5s. Time stamps displayed will use local time zone. -.P -.B cat \fIlogfile\fR | \fBamdgpu-plot \-\-stdin \-\-simlog \-\-sleep 1 - -.fi -Will open a Gtk window that will display plots of the GPU operation data in the specified \fIlogfile\fR -to simulate streamed data with a 1 sec interval. -.P - -.SH "FILES" -.PP -.TP -\fB/usr/share/misc/pci.ids\fR -The system list of all known AMD PCI ID's (vendors, devices, classes and subclasses). -It can be updated with the \fBupdate-pciids\fR command. -.TP -\fB~/.amdgpu-utils/amd_pci_id.txt\fR -A locally updated list of all known AMD PCI ID's (vendors, devices, classes and subclasses). -It is preferred to use the system PCI ID file. If the local file exists, it will be used instead of -the system file and can be removed with \fB\-\-remove_local\fR option. -.TP -\fB/sys/class/drm/card*/device/pp_od_clk_voltage\fR -Special driver file for each GPU required for \fBamdgpu-utils\fR. -.TP -\fB/etc/default/grub\fR -The grub defaults file where amdgpu.ppfeaturemask needs to be set. - -.SH BUGS -Known to not work well with Fiji ProDuo cards and will issue warning messages for Fiji Nano cards. -Please report any additional bugs/issues at https://github.com/Ricks-Lab/amdgpu-utils - -.SH "SEE ALSO" -.BR cat (1), -.BR amdgpu-monitor (1) -.BR amdgpu (4), -.BR update-grub (8), -.BR update-pciids (8), -.BR lspci (8) - -.SH AVAILABILITY -The amdgpu-plot command is part of the amdgpu-utils package and is available from -https://github.com/Ricks-Lab/amdgpu-utils diff -Nru ricks-amdgpu-utils-3.0.0/man/gpu-chk.1 ricks-amdgpu-utils-3.5.0/man/gpu-chk.1 --- ricks-amdgpu-utils-3.0.0/man/gpu-chk.1 1970-01-01 00:00:00.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/man/gpu-chk.1 2020-07-06 00:57:49.000000000 +0000 @@ -0,0 +1,49 @@ +.TH GPU\-CHK 1 "June 2020" "rickslab-gpu-utils" "Ricks-Lab GPU Utilities" +.nh +.SH NAME +gpu-chk \- verifies if the user's environment is compatible with rickslab-gpu-utils + +.SH SYNOPSIS +.B gpu-chk +.RB [ \-\-debug ] +.br +.B gpu-chk +.RB [ \-\-help " | " \-\-about "]" + +.SH CONFIGURATION +In order to get maximum capability of these utilities, you should be running with a kernel that +provides support of the GPUs you have installed. If using AMD GPUs, installing the latest amdgpu +driver package or the latest ROCm release, may provide additional capabilities. If you have Nvidia +GPUs installed, nvidia-smi must also be installed in order for the utility reading of the cards +to be possible. Writing to GPUs is currently only possible for AMD GPUs, and only with compatible +cards and with the the AMD ppfeaturemask set to 0xfffd7fff. This can be accomplished by adding +amdgpu.ppfeaturemask=0xfffd7fff to the GRUB_CMDLINE_LINUX_DEFAULT value in +/etc/default/grub and executing sudo update-grub. + +.SH DESCRIPTION +.B gpu-chk +will check the user environment for compatibility with rickslab-gpu-utils. + +.SH OPTIONS +.TP +.BR " \-\-about" +Will display details about +.B gpu-chk\fP. +.TP +.BR \-h , " \-\-help" +Display help text and exit. + +.SH BUGS +No known bugs. Please report any bugs/issues at https://github.com/Ricks-Lab/amdgpu-utils + +.SH "SEE ALSO" +.BR amdgpu (4), +.BR update-grub (8), +.BR gpu.ls (1), +.BR gpu.mon (1), +.BR gpu.plot (1), +.BR gpu.pac (1) + +.SH AVAILABILITY +The gpu-chk command is part of the rickslab-gpu-utils package and is available from +https://github.com/Ricks-Lab/amdgpu-utils diff -Nru ricks-amdgpu-utils-3.0.0/man/gpu-ls.1 ricks-amdgpu-utils-3.5.0/man/gpu-ls.1 --- ricks-amdgpu-utils-3.0.0/man/gpu-ls.1 1970-01-01 00:00:00.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/man/gpu-ls.1 2020-07-06 00:57:49.000000000 +0000 @@ -0,0 +1,125 @@ +.TH GPU\-LS 1 "June 2020" "rickslab-gpu-utils" "Ricks-Lab GPU Utilities" +.nh +.SH NAME +gpu-ls \- display details about gpu compatible GPU cards + +.SH SYNOPSIS +.B gpu-ls +.RB [ \-\-short ] +.br +.B gpu-ls +.RB [ \-\-help " | " \-\-about "]" +.br +.B gpu-ls +.RB [ \-\-clinfo "] [" \-\-no_fan "] [" \-\-ppm "] [" \-\-pstates "] + +.SH CONFIGURATION +In order to get maximum capability of these utilities, you should be running with a kernel that +provides support of the GPUs you have installed. If using AMD GPUs, installing the latest amdgpu +driver package or the latest ROCm release, may provide additional capabilities. If you have Nvidia +GPUs installed, nvidia-smi must also be installed in order for the utility reading of the cards +to be possible. Writing to GPUs is currently only possible for AMD GPUs, and only with compatible +cards and with the the AMD ppfeaturemask set to 0xfffd7fff. This can be accomplished by adding +amdgpu.ppfeaturemask=0xfffd7fff to the GRUB_CMDLINE_LINUX_DEFAULT value in +/etc/default/grub and executing sudo update-grub. + +.SH DESCRIPTION +.B gpu-ls +displays most relevant parameters for installed and compatible GPUs. +The default behavior is to list relevant base parameters by GPU. + +.SH OPTIONS +.TP +.BR " \-\-about" +Will display details about +.B gpu-ls\fP. +.TP +.BR " \-\-clinfo" +If the clinfo package is installed, openCL details will be included in the output +for each compatible GPU. +.TP +.BR " \-\-no_fan" +Will exclude fan information from the display. Useful with water cooled GPUs. +.TP +.BR " \-\-short" +Will display short list of key descriptive parameters for each installed GPU. +.TP +.BR " \-\-table" +Will display table of basic GPU parameters. +.TP +.BR " \-\-ppm" +Will display descriptions of available power performance modes. +.TP +.BR " \-\-pstates" +Will display P-state table for system and memory clocks. +.TP +.BR " \-\-debug" +Will output additional useful debug/troubleshooting details to a log file. +.TP +.BR \-h , " \-\-help" +Display help text and exit. + +.SH "EXAMPLES" +.nf +.B gpu-ls + +.fi +This will display the basic parameters for all compatible GPUs. These include +device ID, GPU frequency/voltage control type, decoded device ID, card model, short card model, +display card model, card number, card path, PCIe ID, driver, vBIOS version, HWmon path, current power, +power cap, power cap range, fan enable, fan PWM mode, current fan PWM, current fan speed, +fan target speed, fan speed range, fan PWM range, current temp, critical temp, current VddGF, +Vddc range, current GPU loading, memory details, link speed, link width, current Sclk p-state, +current Sclk frequency, Sclk range, current Mclk p-state, current Mclk frequency, +Mclk range, power performance mode, and power force performance level. +.P +.B gpu-ls \-\-short + +.fi +This will produce a short list of key descriptive parameters for each installed GPU. +.P +.B gpu-ls \-\-clinfo + +.fi +In addition to the basic parameters, openCL parameters are also displayed for each compatible GPU. +This option is dependent on the installation of \fBclinfo\fR. +.P +.B gpu-ls \-\-ppm + +.fi +Instead of the basic parameters, a table of power/performance modes is displayed. +.P +.B gpu-ls \-\-pstates + +.fi +Instead of the basic parameters, a table of power states or curve points is displayed. +.P + +.SH "FILES" +.PP +.TP +\fB/usr/share/misc/pci.ids\fR +The system list of all known PCI ID's (vendors, devices, classes and subclasses). +It can be updated with the \fBupdate-pciids\fR command. +.TP +\fB/sys/class/drm/card*/device/pp_od_clk_voltage\fR +Special driver file for each AMD GPU required by some \fBrickslab-gpu-utils\fR. +.TP +\fB/etc/default/grub\fR +The grub defaults file where amdgpu.ppfeaturemask needs to be set. + +.SH BUGS +Known to not work well with Fiji ProDuo cards and will issue warning messages for Fiji Nano cards. +Please report any additional bugs/issues at https://github.com/Ricks-Lab/amdgpu-utils + +.SH "SEE ALSO" +.BR clinfo (1), +.BR amdgpu (4), +.BR nvidia-smi (1), +.BR update-grub (8), +.BR update-pciids (8), +.BR lspci (8) + +.SH AVAILABILITY +The gpu-ls command is part of the rickslab-gpu-utils package and is available from +https://github.com/Ricks-Lab/amdgpu-utils diff -Nru ricks-amdgpu-utils-3.0.0/man/gpu-mon.1 ricks-amdgpu-utils-3.5.0/man/gpu-mon.1 --- ricks-amdgpu-utils-3.0.0/man/gpu-mon.1 1970-01-01 00:00:00.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/man/gpu-mon.1 2020-07-06 00:57:49.000000000 +0000 @@ -0,0 +1,117 @@ +.TH GPU\-MON 1 "June 2020" "rickslab-gpu-utils" "Ricks-Lab GPU Utilities" +.nh +.SH NAME +gpu-mon \- continuously update a table with the state of all compatible GPUs + +.SH SYNOPSIS +.B gpu-mon +.RB [ \-\-help " | " \-\-about "]" +.br +.B gpu-mon +.RB [ \-\-gui "] [" \-\-no_fan "] [" \-\-plot "] [" \-\-ltz "] [" \-\-sleep " \fIN\fP] [" \-\-debug "] [" \-\-pdebug "] + +.SH CONFIGURATION +In order to get maximum capability of these utilities, you should be running with a kernel that +provides support of the GPUs you have installed. If using AMD GPUs, installing the latest amdgpu +driver package or the latest ROCm release, may provide additional capabilities. If you have Nvidia +GPUs installed, nvidia-smi must also be installed in order for the utility reading of the cards +to be possible. Writing to GPUs is currently only possible for AMD GPUs, and only with compatible +cards and with the the AMD ppfeaturemask set to 0xfffd7fff. This can be accomplished by adding +amdgpu.ppfeaturemask=0xfffd7fff to the GRUB_CMDLINE_LINUX_DEFAULT value in +/etc/default/grub and executing sudo update-grub. + +.SH DESCRIPTION +.B gpu-mon +will continuously display the current state of all compatible GPUs. The default +behavior is to update a text based table in the current window until Ctrl-C is pressed. +Command line options can alter this behaviour allowing the use of GUI, modifying update +interval, excluding fans, writing to a log, selecting UTC or local time zone, and +engaging a graphical display of real-time trends of relevant parameters. + +.SH OPTIONS +.TP +.BR " \-\-about" +Will display details about +.B gpu-mon\fP. +.TP +.BR " \-\-gui" +The table of relevant parameters will be updated in a Gtk window instead of a text table in the terminal window. +.TP +.BR " \-\-log" +Write all mon data to a logfile. The real-time display will indicate that logging +is enabled and will show the filename used. +.TP +.BR " \-\-ltz" +Use local time zone instead of UTC for displays and logging. +.TP +.BR " \-\-no_fan" +Will exclude fan information from the display. Useful with water cooled GPUs. +.TP +.BR " \-\-pdebug" +Will enable debug output for the \fBgpu-plot\fR plotting utility. +.TP +.BR " \-\-plot" +Open and write to, \fBgpu-plot\fR, the gpu-util plotting utility. +.TP +.BR " \-\-sleep " \fIN\fR +Specifies N, the number of seconds to sleep between updates. +.TP +.BR " \-\-debug" +Will output additional useful debug/troubleshooting details to a log file. +.TP +.BR \-h , " \-\-help" +Display help text and exit. + +.SH "EXAMPLES" +.nf +.B gpu-mon \-\-sleep 5 \-\-log + +.fi +Will display a continuously updating table of GPU operating parameters updating with an interval of 5 sec. All +parameters will be written to a logfile which will be indicated in the table. +The displayed parameters include GPU model, GPU and memory load percentage, power, power cap, energy consumption, +temperature, voltage (not available for NV), fan speed, Sclk frequency/p-state, Mclk frequency/pstate, and +performance mode. Updating of the table will continue until ctrl-c is pressed. +.P +.B gpu-mon \-\-gui + +.fi +Will open a new Gtk window and display basic parameters updated with the default interval. +.P +.B gpu-mon \-\-plot + +.fi +Will open 2 new Gtk windows. One will display the basic parameters and the second will display a continuously +updating plot of these parameters. It is suggested that this method be used if both displays are desired, instead +of executing both \fBgpu-mon\fR and \fBgpu-plot\fR as the later will result in twice the reads of +GPU data. +.P + +.SH "FILES" +.PP +.TP +\fB/usr/share/misc/pci.ids\fR +The system list of all known PCI ID's (vendors, devices, classes and subclasses). +It can be updated with the \fBupdate-pciids\fR command. +.TP +\fB/sys/class/drm/card*/device/pp_od_clk_voltage\fR +Special driver file for each AMD GPU required by some \fBrickslab-gpu-utils\fR. +.TP +\fB/etc/default/grub\fR +The grub defaults file where amdgpu.ppfeaturemask needs to be set. + +.SH BUGS +Known to not work well with Fiji ProDuo cards and will issue warning messages for Fiji Nano cards. +Please report any additional bugs/issues at https://github.com/Ricks-Lab/amdgpu-utils + +.SH "SEE ALSO" +.BR gpu-plot (1), +.BR amdgpu (4), +.BR nvidia-smi (1), +.BR update-grub (8), +.BR update-pciids (8), +.BR lspci (8) + +.SH AVAILABILITY +The gpu-mon command is part of the rickslab-gpu-utils package and is available from +https://github.com/Ricks-Lab/amdgpu-utils diff -Nru ricks-amdgpu-utils-3.0.0/man/gpu-pac.1 ricks-amdgpu-utils-3.5.0/man/gpu-pac.1 --- ricks-amdgpu-utils-3.0.0/man/gpu-pac.1 1970-01-01 00:00:00.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/man/gpu-pac.1 2020-07-06 00:57:49.000000000 +0000 @@ -0,0 +1,106 @@ +.TH GPU\-PAC 1 "June 2020" "rickslab-gpu-utils" "Ricks-Lab GPU Utilities" +.nh +.SH NAME +gpu-pac \- program and control compatible GPUs + +.SH SYNOPSIS +.B gpu-pac +.RB [ \-\-help " | " \-\-about "]" +.br +.B gpu-pac +.RB [ \-\-execute_pac "] [" \-\-no_fan "] [" \-\-force_write "] [" \-\-debug "] + +.SH CONFIGURATION +In order to get maximum capability of these utilities, you should be running with a kernel that +provides support of the GPUs you have installed. If using AMD GPUs, installing the latest amdgpu +driver package or the latest ROCm release, may provide additional capabilities. If you have Nvidia +GPUs installed, nvidia-smi must also be installed in order for the utility reading of the cards +to be possible. Writing to GPUs is currently only possible for AMD GPUs, and only with compatible +cards and with the the AMD ppfeaturemask set to 0xfffd7fff. This can be accomplished by adding +amdgpu.ppfeaturemask=0xfffd7fff to the GRUB_CMDLINE_LINUX_DEFAULT value in +/etc/default/grub and executing sudo update-grub. + +.SH DESCRIPTION +.B gpu-pac +will present the user with a gui to modify the operating conditions of compatible GPUs. +With this utility, the user can change GPU power limit, S-Clk and M-Clk P-state/Curve definitions, +P-state masks, fan speed, and power performance mode. +By default, the commands to program and control a GPU are written to a bash file for the user to inspect +and run with root permissions. + +.SH OPTIONS +.TP +.BR " \-\-about" +Will display details about +.B gpu-pac\fP. +.TP +.BR " \-\-execute_pac" +Will execute a bash file created with commands written to the driver files to modify the operating +conditions of the selected GPU/GPUs. The default behavior is to only create the bash files for the user +to execute. +.TP +.BR " \-\-no_fan" +Will exclude fan information from the display and will not include fans in writing or resetting +GPU operating conditions. +.TP +.BR " \-\-force_write" +Will result in all parameters being writen to the selected GPU/GPUs instead of the default behavior of +only writing changes. +.TP +.BR " \-\-debug" +Will output additional useful debug/troubleshooting details to a log file. +.TP +.BR \-h , " \-\-help" +Display help text and exit. + +.SH "EXAMPLES" +.nf +.B gpu-pac + +.fi +Will open a Gtk based user interface which will display current or default values for modifiable GPU operating +parameters. The interface supports entry of new values for all compatible GPUs. The user can select to save +or reset values for individual or all GPUs. It is suggested that \fBgpu-mon\fR be used +to make sure the changes are made as expected. +.P +.B gpu-pac \-\-execute_pac + +.fi +To simplify this process, the \fB\-\-execute_pac\fR option can be specified to automate execution of the bash files. +A message in the user interface will indicate if credentials are required in the original terminal window. +.P +.B gpu-pac \-\-force_write + +.fi +With this option, all parameters will be written to the bash file, even if they are unchanged. This is useful in +creating bash files used to put GPU's into a known state which is convenient for use in start up routines. +.P + +.SH "FILES" +.PP +.TP +\fB/usr/share/misc/pci.ids\fR +The system list of all known PCI ID's (vendors, devices, classes and subclasses). +It can be updated with the \fBupdate-pciids\fR command. +.TP +\fB/sys/class/drm/card*/device/pp_od_clk_voltage\fR +Special driver file for each AMD GPU required by some \fBrickslab-gpu-utils\fR. +.TP +\fB/etc/default/grub\fR +The grub defaults file where amdgpu.ppfeaturemask needs to be set. + +.SH BUGS +Known to not work well with Fiji ProDuo cards and will issue warning messages for Fiji Nano cards. The +display of P-state masks is always the defaults, not the actual values. +Please report any additional bugs/issues at https://github.com/Ricks-Lab/amdgpu-utils + +.SH "SEE ALSO" +.BR gpu-mon (1), +.BR amdgpu (4), +.BR update-grub (8), +.BR update-pciids (8), +.BR lspci (8) + +.SH AVAILABILITY +The gpu-pac command is part of the rickslab-gpu-utils package and is available from +https://github.com/Ricks-Lab/amdgpu-utils diff -Nru ricks-amdgpu-utils-3.0.0/man/gpu-plot.1 ricks-amdgpu-utils-3.5.0/man/gpu-plot.1 --- ricks-amdgpu-utils-3.0.0/man/gpu-plot.1 1970-01-01 00:00:00.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/man/gpu-plot.1 2020-07-06 00:57:49.000000000 +0000 @@ -0,0 +1,98 @@ +.TH GPU-PLOT 1 "June 2020" "rickslab-gpu-utils" "Ricks-Lab GPU Utilities" +.nh +.SH NAME +gpu-plot \- continuously update and plot critical GPU parameters as a function of time + +.SH SYNOPSIS +.B gpu-plot +.RB [ \-\-help " | " \-\-about "]" +.br +.B gpu-plot +.RB [ \-\-no_fan "] [" \-\-stdin "] [" \-\-simlog "] [" \-\-ltz "] [" \-\-sleep " \fIN\fP] [" \-\-debug "] + +.SH CONFIGURATION +In order to get maximum capability of these utilities, you should be running with a kernel that +provides support of the GPUs you have installed. If using AMD GPUs, installing the latest amdgpu +driver package or the latest ROCm release, may provide additional capabilities. If you have Nvidia +GPUs installed, nvidia-smi must also be installed in order for the utility reading of the cards +to be possible. Writing to GPUs is currently only possible for AMD GPUs, and only with compatible +cards and with the the AMD ppfeaturemask set to 0xfffd7fff. This can be accomplished by adding +amdgpu.ppfeaturemask=0xfffd7fff to the GRUB_CMDLINE_LINUX_DEFAULT value in +/etc/default/grub and executing sudo update-grub. + +.SH DESCRIPTION +.B gpu-plot +will continuously plot a trend of critical GPU parameters for all compatible GPUs. + +.SH OPTIONS +.TP +.BR " \-\-about" +Will display details about +.B gpu-plot\fP. +.TP +.BR " \-\-ltz" +Use local time zone instead of UTC for displays and logging. +.TP +.BR " \-\-no_fan" +Will exclude fan information from the display. Useful with watercooled GPUs. +.TP +.BR " \-\-stdin" +Will read data from stdin. This is useful to display plots of a logfile save with \fBgpu-mon\fR. +.TP +.BR " \-\-simlog" +When used with the \-\-stdin option, it will simulate the reading of data from the logfile at a rate +define by \fB\-\-sleep\fR. +.TP +.BR " \-\-sleep " \fIN\fP +Specifies N, the number of seconds to sleep between updates. +.TP +.BR " \-\-debug" +Will output additional useful debug/troubleshooting details to a log file. +.TP +.BR \-h , " \-\-help" +Display help text and exit. + +.SH "EXAMPLES" +.nf +.B gpu-plot \-\-sleep 5 \-\-ltz + +.fi +Will open a Gtk window that will display plots of operation parameters for all compatible GPU's that updates +every 5s. Time stamps displayed will use local time zone. +.P +.B cat \fIlogfile\fR | \fBgpu-plot \-\-stdin \-\-simlog \-\-sleep 1 + +.fi +Will open a Gtk window that will display plots of the GPU operation data in the specified \fIlogfile\fR +to simulate streamed data with a 1 sec interval. +.P + +.SH "FILES" +.PP +.TP +\fB/usr/share/misc/pci.ids\fR +The system list of all known AMD PCI ID's (vendors, devices, classes and subclasses). +It can be updated with the \fBupdate-pciids\fR command. +.TP +\fB/sys/class/drm/card*/device/pp_od_clk_voltage\fR +Special driver file for each AMD GPU required for some \fBrickslab-gpu-utils\fR. +.TP +\fB/etc/default/grub\fR +The grub defaults file where amdgpu.ppfeaturemask needs to be set. + +.SH BUGS +Known to not work well with Fiji ProDuo cards and will issue warning messages for Fiji Nano cards. +Please report any additional bugs/issues at https://github.com/Ricks-Lab/amdgpu-utils + +.SH "SEE ALSO" +.BR cat (1), +.BR gpu-mon (1) +.BR amdgpu (4), +.BR nvidia-smi (1), +.BR update-grub (8), +.BR update-pciids (8), +.BR lspci (8) + +.SH AVAILABILITY +The gpu-plot command is part of the rickslab-gpu-utils package and is available from +https://github.com/Ricks-Lab/amdgpu-utils diff -Nru ricks-amdgpu-utils-3.0.0/MANIFEST.in ricks-amdgpu-utils-3.5.0/MANIFEST.in --- ricks-amdgpu-utils-3.0.0/MANIFEST.in 1970-01-01 00:00:00.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/MANIFEST.in 2020-07-06 00:57:49.000000000 +0000 @@ -0,0 +1,2 @@ +include README.md +include LICENSE diff -Nru ricks-amdgpu-utils-3.0.0/README.md ricks-amdgpu-utils-3.5.0/README.md --- ricks-amdgpu-utils-3.0.0/README.md 2020-02-29 07:32:52.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/README.md 2020-07-06 00:57:49.000000000 +0000 @@ -1,29 +1,46 @@ -# amdgpu-utils -A set of utilities for monitoring AMD GPU performance and modifying control settings. +# Ricks-Lab GPU Utilities -In order to use any of these utilities, you must have the *amdgpu* open source driver -package installed. You also must first set your Linux machine to boot with -amdgpu.ppfeaturemask=0xffff7fff or 0xfffd7fff. This can be accomplished by adding -amdgpu.ppfeaturemask=0xffff7fff to the GRUB_CMDLINE_LINUX_DEFAULT value in -/etc/default/grub and executing *sudo update-grub* - -Check out the [User Guide](docs/USER_GUIDE.md)! - -Download latest official release: [v3.0.0](https://github.com/Ricks-Lab/amdgpu-utils/releases/tag/v3.0.0) - -## amdgpu-chk -This utility verifies if the environment is compatible with *amdgpu-util*s. - -## amdgpu-ls -This utility displays most relevant parameters for installed and compatible AMD GPUs. The default -behavior is to list relevant parameters by GPU. OpenCL platform information is added when the -*--clinfo* option is used. A simplified table of current GPU state is displayed with the *--table* -option. The *--no_fan* can be used to ignore fan settings. The *--pstate* option can be used to -output the p-state table for each GPU instead of the list of basic parameters. The *--ppm* option -is used to output the table of available power/performance modes instead of basic parameters. +## rickslab-gpu-utils -## amdgpu-monitor -A utility to give the current state of all compatible AMD GPUs. The default behavior +A set of utilities for monitoring GPU performance and modifying control settings. + +In order to get maximum capability of these utilities, you should be running with a kernel that +provides support of the GPUs you have installed. If using AMD GPUs, installing the latest amdgpu +driver package or the latest ROCm release, may provide additional capabilities. If you have Nvidia +GPUs installed, you should have `nvidia-smi` installed in order for the utility reading of the cards +to be possible. Writing to GPUs is currently only possible for AMD GPUs, and only with compatible +cards and with the the AMD ppfeaturemask set to 0xfffd7fff. This can be accomplished by adding +`amdgpu.ppfeaturemask=0xfffd7fff` to the `GRUB_CMDLINE_LINUX_DEFAULT` value in +`/etc/default/grub` and executing `sudo update-grub` + +Check out the [User Guide](https://github.com/Ricks-Lab/gpu-utils/blob/master/docs/USER_GUIDE.md)! + +Install the latest package from [PyPI](https://pypi.org/project/rickslab-gpu-utils/) with the following +commands: + +``` +pip3 uninstall rickslab-gpu-utils +pip3 install rickslab-gpu-utils +``` + +## gpu-chk + +This utility verifies if the environment is compatible with **rickslab-gpu-utils**. + +## gpu-ls + +This utility displays most relevant parameters for installed and compatible GPUs. The +default behavior is to list relevant parameters by GPU. OpenCL platform information is +added when the *--clinfo* option is used. A brief listing of key parameters is available +with the *--short* command line option. A simplified table of current GPU state is +displayed with the *--table* option. The *--no_fan* can be used to ignore fan settings. +The *--pstate* option can be used to output the p-state table for each GPU instead of +the list of basic parameters. The *--ppm* option is used to output the table of available +power/performance modes instead of basic parameters. + +## gpu-mon + +A utility to give the current state of all compatible GPUs. The default behavior is to continuously update a text based table in the current window until Ctrl-C is pressed. With the *--gui* option, a table of relevant parameters will be updated in a Gtk window. You can specify the delay between updates with the *--sleep N* @@ -37,125 +54,162 @@ over running both tools as a single read of the GPUs is used to update both displays. The *--ltz* option results in the use of local time instead of UTC. -## amdgpu-plot -A utility to continuously plot the trend of critical GPU parameters for all compatible -AMD GPUs. The *--sleep N* can be used to specify the update interval. The *amdgpu-plot* -utility has 2 modes of operation. The default mode is to read the GPU driver details -directly, which is useful as a standalone utility. The *--stdin* option causes -*amdgpu-plot* to read GPU data from stdin. This is how *amdgpu-monitor* produces the +## gpu-plot + +A utility to continuously plot the trend of critical GPU parameters for all compatible +GPUs. The *--sleep N* can be used to specify the update interval. The *gpu-plot* +utility has 2 modes of operation. The default mode is to read the GPU driver +details directly, which is useful as a standalone utility. The *--stdin* option +causes *gpu-plot* to read GPU data from stdin. This is how *gpu-mon* produces the plot and can also be used to pipe your own data into the process. The *--simlog* -option can be used with the *--stdin* when a monitor log file is piped as stdin. +option can be used with the *--stdin* when a monitor log file is piped as stdin. This is useful for troubleshooting and can be used to display saved log results. The *--ltz* option results in the use of local time instead of UTC. If you plan -to run both *amdgpu-plot* and *amdgpu-monitor*, then the *--plot* option of the -*amdgpu-monitor* utility should be used instead of both utilities in order reduce -data reads by a factor of 2. +to run both *gpu-plot* and *gpu-mon*, then the *--plot* option of the *gpu-mon* +utility should be used instead of both utilities in order reduce data reads by +a factor of 2. + +## gpu-pac -## amdgpu-pac -Program and Control compatible AMD GPUs with this utility. By default, the commands to +Program and Control compatible GPUs with this utility. By default, the commands to be written to a GPU are written to a bash file for the user to inspect and run. If you -have confidence, the *--execute_pac* option can be used to execute and then delete the +have confidence, the *--execute_pac* option can be used to execute and then delete the saved bash file. Since the GPU device files are writable only by root, sudo is used to execute commands in the bash file, as a result, you will be prompted for credentials in the -terminal where you executed *amdgpu-pac*. The *--no_fan* option can be used to eliminate +terminal where you executed *gpu-pac*. The *--no_fan* option can be used to eliminate fan details from the utility. The *--force_write* option can be used to force all configuration parameters to be written to the GPU. The default behavior is to only write changes. -## New in this Release - [v3.0.0](https://github.com/Ricks-Lab/amdgpu-utils/releases/tag/v3.0.0) -* Style and code robustness improvements -* Deprecated *amdgpu-pciid* and removed all related code. -* Complete rewrite based on benchMT learning. Simplified code with ObjDict for GpuItem parameters and use of -class variables for generic behavior parameters. -* Use lspci as the starting point for developing GPU list and classify by vendor, readability, writability, and -compute capability. Build in potential to be generic GPU util, instead of AMD focused. -* Test for readability and writability of all GPUs and apply utilities as appropriate. -* Add assessment of compute capability. -* Eliminated the use of lshw to determine driver compatibility and display of driver details is now -informational with no impact on the utilities. -* Add p-state masking capability for Type 2 GPUs. -* Optimized pac writing to GPUs. +## New in this Release - [v3.5.0](https://github.com/Ricks-Lab/amdgpu-utils/releases/tag/v3.5.0) + +* Utilities now include reading of NV GPUs with full gpu-ls, gpu-mon, and gpu-plot support! +* Update name from **amdgpu-utils** to **rickslab-gpu-utils**. +* Improved PyPI packaging. +* Updated User Guide to cover latest features and capabilities. +* Improved robustness of NV read by validating sensor support for each query item the first time read. This will assure functionality on older model GPUs. +* Fixed issue in setting display model name for NV GPUs. +* Improved how lack of voltage readings for NV is handled in the utilities. +* Fixed an issue in assessing compute capability when GPUs of multiple vendors are installed. ## Development Plans -* Enhance formatting in Gtk monitor tool. Need to improve my Gtk skills! + * Optimize plot utilities for performance. -* Add status read capabilities for Nvidia GPUs. +* Add status read capabilities for Intel GPUs. +* Add pac capabilities for Nvidia GPUs. ## Known Issues + * I/O error when selecting CUSTOM ppm. Maybe it requires arguments to specify the custom configuration. * Doesn't work well with Fiji ProDuo cards. * P-state mask gets intermittently reset for GPU used as display output. -* *amdgpu-pac* doesn't show what the current P-state mask is. Not sure if that can be read back. -* *amdgpu-pac* fan speed setting results in actual fan speeds a bit different from setting and pac interface shows -actual values instead of set values. +* Utility *gpu-pac* doesn't show what the current P-state mask is. Not sure if that can be read back. +* Utility *gpu-pac* fan speed setting results in actual fan speeds a bit different from setting and pac interface shows actual values instead of set values. ## References -* Original inspiration for this project: -[Reddit](https://www.reddit.com/r/Amd/comments/agwroj/how_to_overclock_your_amd_gpu_on_linux/?st=JSL25OVP&sh=306c2d15) -* Phoronix articles including these: -[PowerCap](https://www.phoronix.com/scan.php?page=news_item&px=AMDGPU-Quick-WattMan-Cap-Test), -[HWMon](https://www.phoronix.com/scan.php?page=news_item&px=AMDGPU-Linux-4.17-Round-1) -* Repositories: [amdgpu-clocks](https://github.com/sibradzic/amdgpu-clocks), -[WattmanGTK](https://github.com/BoukeHaarsma23/WattmanGTK), [ROC-smi](https://github.com/RadeonOpenCompute/ROC-smi) + +* Original inspiration for this project: [Reddit](https://www.reddit.com/r/Amd/comments/agwroj/how_to_overclock_your_amd_gpu_on_linux/?st=JSL25OVP&sh=306c2d15) +* Phoronix articles including these: [PowerCap](https://www.phoronix.com/scan.php?page=news_item&px=AMDGPU-Quick-WattMan-Cap-Test), [HWMon](https://www.phoronix.com/scan.php?page=news_item&px=AMDGPU-Linux-4.17-Round-1) +* Repositories: [amdgpu-clocks](https://github.com/sibradzic/amdgpu-clocks), [WattmanGTK](https://github.com/BoukeHaarsma23/WattmanGTK), [ROC-smi](https://github.com/RadeonOpenCompute/ROC-smi) * Relevant Kernel Details: [Kernel Details](https://www.kernel.org/doc/html/latest/gpu/amdgpu.html) * PCI ID Decode Table: [PCI IDs](https://pci-ids.ucw.cz/v2.2/pci.ids) -* Radeon VII discussion on Reddit: -[Radeon VII OC](https://www.reddit.com/r/linux_gaming/duplicates/au7m3x/radeon_vii_on_linux_overclocking_undervolting/) +* Radeon VII discussion on Reddit: [Radeon VII OC](https://www.reddit.com/r/linux_gaming/duplicates/au7m3x/radeon_vii_on_linux_overclocking_undervolting/) * Example use cases: [wiki.archlinux.org](https://wiki.archlinux.org/index.php/AMDGPU) ## History + +### New in Previous Release - [v3.3.14](https://github.com/Ricks-Lab/amdgpu-utils/releases/tag/v3.3.14) + +* Display card path details in logger whenever card path exists. +* Implemented read capabilities for Nvidia. Now supported by all utilities except pac. +* Added APU type and tuned parameters read/displayed for AMD APU integrated GPU. +* Read generic pcie sensors for all types of GPUs. +* Improved lspci search by using a no-shell call and using compiled regex. +* Implement PyPI package for easy installation. +* More robust handling of missing Icon and PCIID files. + +#### New in Previous Release - [v3.2.0](https://github.com/Ricks-Lab/amdgpu-utils/releases/tag/v3.2.0) + +* Fixed CRITICAL issue where Zero fan speed could be written when invalid fan speed was read from the GPU. +* Fixed issue in reading pciid file in Gentoo (@CH3CN). +* Modified setup to indicate minimum instead of absolute package versions (@smoe). +* Modified requirements to include min/max package versions for major packages. +* Fixed crash for missing pci-ids file and add location for Arch Linux (@berturion). +* Fixed a crash in *amdgpu-pac* when no fan details could be read (laptop GPU). +* Fixed deprecation warnings for several property setting functions. Consolidated all property setting to a single function in a new module, and ignore warnings for those that are deprecated. All deprecated actions are marked with FIXME in GPUgui.py. +* Replaced deprecated set properties statement for colors with css formatting. +* Implemented a more robust string format of datetime to address datetime conversion for pandas in some installations. +* Implemented dubug logging across the project. Activated with --debug option and output saved to a .log file. +* Updated color scheme of Gtk applications to work in Ubuntu 20.04. Unified color scheme across all utilities. +* Additional memory parameters added to utilities. +* Read ID information for all GPUs and attempt to decode GPU name. For cards with no card path entry, determine system device path and use for reading ID. Report system device path in *amdgpu-ls*. Add *amdgpu-ls --short* report to give brief description of all installed GPUs. + +#### New in Previous Release - [v3.0.0](https://github.com/Ricks-Lab/amdgpu-utils/releases/tag/v3.0.0) + +* Style and code robustness improvements +* Deprecated *amdgpu-pciid* and removed all related code. +* Complete rewrite based on benchMT learning. Simplified code with ObjDict for GpuItem parameters and use of class variables for generic behavior parameters. +* Use lspci as the starting point for developing GPU list and classify by vendor, readability, writability, and compute capability. Build in potential to be generic GPU util, instead of AMD focused. +* Test for readability and writability of all GPUs and apply utilities as appropriate. +* Add assessment of compute capability. +* Eliminated the use of lshw to determine driver compatibility and display of driver details is now informational with no impact on the utilities. +* Add p-state masking capability for Type 2 GPUs. +* Optimized pac writing to GPUs. + #### New in Previous Release - [v2.7.0](https://github.com/Ricks-Lab/amdgpu-utils/releases/tag/v2.7.0) + * Initial release of man pages * Modifications to work with distribution installation * Use system pci.ids file and make *amdgpu-pciid* obsolete * Update setup.py file for successful installation. #### New in Previous Release - [v2.6.0](https://github.com/Ricks-Lab/amdgpu-utils/releases/tag/v2.6.0) + * PEP8 style modifications * Fixed a bug in monitor display. * Implement requirements file for with and without a venv. * Found and fixed a few minor bugs. * Fixed issue with *amdgpu-plot* becoming corrupt over time. -* Implemented clean shutdown of monitor and better buffering to plot. This could have caused in problems in systems -with many GPUs. +* Implemented clean shutdown of monitor and better buffering to plot. This could have caused in problems in systems with many GPUs. #### New in Previous Release - [v2.5.2](https://github.com/Ricks-Lab/amdgpu-utils/releases/tag/v2.5.2) -* Some preparation work for Debian package (@smoe). + +* Some preparation work for [Debian package](https://tracker.debian.org/pkg/ricks-amdgpu-utils) (@smoe). * Added *--ltz* option to use local times instead of UTC for logging and plot data. * Added 0xfffd7fff to valid amdgpu.ppfeaturemask values (@pastaq). * Updates to User Guide to include instructions to apply PAC conditions on startup (@csecht). #### New in Previous Release - [v2.5.1](https://github.com/Ricks-Lab/amdgpu-utils/releases/tag/v2.5.1) + * Fixed a compatibility issue with matplotlib 3.x. Converted time string to a datetime object. * Display version information for pandas, matplotlib, and numpy with the *--about* option for *amdgpu-plot* #### New in Previous Release - [v2.5.0](https://github.com/Ricks-Lab/amdgpu-utils/releases/tag/v2.5.0) -* Implemented the *--plot* option for amdgpu-monitor. This will display plots of critical GPU parameters that update -at an interval defined by the *--sleep N* option. + +* Implemented the *--plot* option for amdgpu-monitor. This will display plots of critical GPU parameters that update at an interval defined by the *--sleep N* option. * Errors in reading non-critical parameters will now show a warning the first time and are disabled for future reads. * Fixed a bug in implementation of compatibility checks and improved usage of try/except. #### New in Previous Release - [v2.4.0](https://github.com/Ricks-Lab/amdgpu-utils/releases/tag/v2.4.0) + * Implemented *amdgpu-pac* feature for type 2 Freq/Voltage controlled GPUs, which includes the Radeon VII. -* Implemented the *amdgpu-pac --force_write* option, which writes all configuration parameters to the GPU, even if -unchanged. The default behavior is changed to now only write changed configuration parameters. -* Indicate number of changes to be written by PAC, and if no changes, don't execute bash file. Display execute -complete message in terminal, and update messages in PAC message box. +* Implemented the *amdgpu-pac --force_write* option, which writes all configuration parameters to the GPU, even if unchanged. The default behavior is changed to now only write changed configuration parameters. +* Indicate number of changes to be written by PAC, and if no changes, don't execute bash file. Display execute complete message in terminal, and update messages in PAC message box. * Implemented a new GPU type 0, which represent some older cards whose p-states can not be changed. * Tuned *amdgpu-pac* window format. #### New in Previous Release - [v2.3.1](https://github.com/Ricks-Lab/amdgpu-utils/releases/tag/v2.3.1) + * Fixed and improved Python/Kernel compatibility checks. * Added Python2 compatible utility to check *amdgpu-utils* compatibility. * Fixed confusing mode/level fileptr names. * Removed CUSTOM PPM mode until I figure out syntax. -* Implemented classification of card type based on how it implements frequency/voltage control. This is reported -by *amdgpu-ls* and alters the behavior of both *amdgpu-pac* and *amdgpu-monitor*. +* Implemented classification of card type based on how it implements frequency/voltage control. This is reported by *amdgpu-ls* and alters the behavior of both *amdgpu-pac* and *amdgpu-monitor*. * Changed dpkg error to a warning to handle custom driver installs. * Initial [User Guide](docs/USER_GUIDE.md) - [Need contributors!](https://github.com/Ricks-Lab/amdgpu-utils/issues/13) #### New in Previous Release - v2.3.0 -* Implemented a message box in amdgpu-pac to indicate details of PAC execution and indicate if sudo is pending -credential entry. + +* Implemented a message box in amdgpu-pac to indicate details of PAC execution and indicate if sudo is pending credential entry. * Implement more robust classification of card compatibility and only use compatible GPUs in the utilities. * Official release of amdgpu-pciid which updates a local list of GPU names from the official pci.ids website. * Optimized refresh of data by moving static items to a different function and only read those that are dynamic. @@ -163,41 +217,39 @@ * Initial basic functionality for Radeon VII GPU! #### New in Previous Release - v2.2.0 -* Major bug fix in the way HWMON directory was determined. This fixes an issue in not seeing sensor files correctly -when a some other card is resident in a PCIe slot. -* Implemented logging option *--log* for amdgpu-monitor. A red indicator will indicate active logging and the -target filename. + +* Major bug fix in the way HWMON directory was determined. This fixes an issue in not seeing sensor files correctly when a some other card is resident in a PCIe slot. +* Implemented logging option *--log* for amdgpu-monitor. A red indicator will indicate active logging and the target filename. * Implemented energy meter in amdgpu-monitor. -* Implemented the ability to check the GPU extracted ID in a pci.ids file for correct model name. Implemented a -function to extract only AMD information for the pci.ids file and store in the file amd_pci_id.txt which is included -in this distribution. +* Implemented the ability to check the GPU extracted ID in a pci.ids file for correct model name. Implemented a function to extract only AMD information for the pci.ids file and store in the file amd_pci_id.txt which is included in this distribution. * Optimized long, short, and decoded GPU model names. * Alpha release of a utility to update device decode data from the pci.ids website. #### New in Previous Release - v2.1.0 + * Significant bug fixes and error proofing. Added messages to stderr for missing driver related files. * Added fan monitor and control features. -* Implemented --no_fan option across all tools. This eliminates the reading and display of fan parameters and -useful for those who have installed GPU waterblocks. +* Implemented --no_fan option across all tools. This eliminates the reading and display of fan parameters and useful for those who have installed GPU waterblocks. * Implemented P-state masking, which limits available P-states to those specified. Useful for power management. * Fixed implementation of global variables that broke with implementation of modules in library. * Added more validation checks before writing parameters to cards. #### New in Previous Release - v2.0.0 + * Many bug fixes! * First release of amdgpu-pac. * Add check of amdgpu driver in the check of environment for all utilities. Add display of amdgpu driver version. * Split list functions of the original amdgpu-monitor into amdgpu-ls. * Added --clinfo option to amdgpu-ls which will list openCL platform details for each GPU. -* Added --ppm option to amdgpu-ls which will display the table of available power/performance modes available -for each GPU. +* Added --ppm option to amdgpu-ls which will display the table of available power/performance modes available for each GPU. * Error messages are now output to stderr instead stdout. -* Added power cap and power/performance mode to the monitor utilities. I have also included them in the amdgpu-ls -display in addtion to the power cap limits. +* Added power cap and power/performance mode to the monitor utilities. I have also included them in the amdgpu-ls display in addtion to the power cap limits. #### New in Previous Release - v1.1.0 + * Added --pstates feature to display table of p-states instead of GPU details. * Added more error checking and exit if no compatible AMD GPUs are found. #### New in Previous Release - v1.0.0 + * Completed implementation of the GPU Monitor tool. diff -Nru ricks-amdgpu-utils-3.0.0/requirements.txt ricks-amdgpu-utils-3.5.0/requirements.txt --- ricks-amdgpu-utils-3.0.0/requirements.txt 2020-02-29 07:32:52.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/requirements.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1,11 +0,0 @@ -cycler==0.10.0 -kiwisolver==1.1.0 -matplotlib==3.1.3 -numpy==1.18.1 -pandas==1.0.1 -pyparsing==2.4.6 -python-dateutil==2.8.1 -pytz==2019.3 -ruamel.yaml==0.16.10 -ruamel.yaml.clib==0.2.0 -six==1.14.0 diff -Nru ricks-amdgpu-utils-3.0.0/requirements-venv.txt ricks-amdgpu-utils-3.5.0/requirements-venv.txt --- ricks-amdgpu-utils-3.0.0/requirements-venv.txt 2020-02-29 07:32:52.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/requirements-venv.txt 2020-07-06 00:57:49.000000000 +0000 @@ -1,13 +1,13 @@ -cycler==0.10.0 -kiwisolver==1.1.0 -matplotlib==3.1.3 -numpy==1.18.1 -pandas==1.0.1 -pyparsing==2.4.6 +cycler>=0.10.0 +kiwisolver>=1.1.0 +matplotlib>=3.1 +numpy>=1.18.0 +pandas>=1.0 +pyparsing>=2.4.6 python-dateutil==2.8.1 -pytz==2019.3 -ruamel.yaml==0.16.10 -ruamel.yaml.clib==0.2.0 -six==1.14.0 -vext==0.7.3 -vext.gi==0.7.0 +pytz>=2019.3 +ruamel.yaml>=0.16.10 +ruamel.yaml.clib>=0.2.0 +six>=1.11.0 +vext>=0.7.3 +vext.gi>=0.7.0 diff -Nru ricks-amdgpu-utils-3.0.0/setup.py ricks-amdgpu-utils-3.5.0/setup.py --- ricks-amdgpu-utils-3.0.0/setup.py 2020-02-29 07:32:52.000000000 +0000 +++ ricks-amdgpu-utils-3.5.0/setup.py 2020-07-06 00:57:49.000000000 +0000 @@ -1,45 +1,62 @@ -#!/usr/bin/env python3 -# from distutils.core import setup +#!/usr/bin/python3 + import sys +import os +import pathlib from setuptools import setup +from GPUmodules import __version__, __status__ if sys.version_info < (3, 6): - print('ricks-amdgpu-utils requires at least Python 3.6.') + print('rickslab-gpu-utils requires at least Python 3.6.') sys.exit(1) +with open(os.path.join(pathlib.Path(__file__).parent, 'README.md'), 'r') as file_ptr: + long_description = file_ptr.read() -setup(name='ricks-amdgpu-utils', - version='3.0.0', - description='Ricks-Lab AMD GPU Utilities', - long_description='A set of utilities for monitoring AMD GPU performance and modifying control settings.', +setup(name='rickslab-gpu-utils', + version=__version__, + description='Ricks-Lab GPU Utilities', + long_description_content_type='text/markdown', + long_description=long_description, author='RueiKe', + keywords='gpu system monitoring overclocking underclocking linux amdgpu nvidia-smi rocm amd nvidia opencl boinc', platforms='posix', author_email='rueikes.homelab@gmail.com', - url='https://github.com/Ricks-Lab/amdgpu-utils', + url='https://github.com/Ricks-Lab/gpu-utils', packages=['GPUmodules'], include_package_data=True, - scripts=['amdgpu-chk', 'amdgpu-ls', 'amdgpu-monitor', 'amdgpu-pac', 'amdgpu-plot'], + scripts=['gpu-chk', 'gpu-ls', 'gpu-mon', 'gpu-pac', 'gpu-plot'], license='GPL-3', python_requires='>=3.6', - install_requires=['cycler==0.10.0', - 'kiwisolver==1.1.0', - 'matplotlib==3.1.3', - 'numpy==1.18.1', - 'pandas==1.0.1', - 'pyparsing==2.4.6', - 'python-dateutil==2.8.1', - 'pytz==2019.3', + project_urls={'Bug Tracker': 'https://github.com/Ricks-Lab/gpu-utils/issues', + 'Documentation': 'https://github.com/Ricks-Lab/gpu-utils/blob/master/docs/USER_GUIDE.md', + 'Source Code': 'https://github.com/Ricks-Lab/gpu-utils'}, + classifiers=[__status__, + 'Operating System :: POSIX', + 'Natural Language :: English', + 'Programming Language :: Python :: 3', + 'Topic :: System :: Monitoring', + 'Environment :: GPU', + 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)'], + install_requires=['cycler>=0.10.0', + 'kiwisolver>=1.1.0', + 'matplotlib>=3.1.3', + 'numpy>=1.18.1', + 'pandas>=1.0.1', + 'pyparsing>=2.4.6', + 'python-dateutil>=2.8.1', + 'pytz>=2019.3', 'ruamel.yaml==0.16.10', 'ruamel.yaml.clib==0.2.0', - 'six==1.14.0'], - data_files=[('share/ricks-amdgpu-utils/icons', ['icons/amdgpu-monitor.icon.png', - 'icons/amdgpu-pac.icon.png', - 'icons/amdgpu-plot.icon.png']), - ('share/ricks-amdgpu-utils/doc', ['README.md']), - ('share/man/man1', ['man/amdgpu-chk.1', - 'man/amdgpu-ls.1', - 'man/amdgpu-monitor.1', - 'man/amdgpu-pac.1', - 'man/amdgpu-plot.1']) + 'six>=1.11.0'], + data_files=[('share/rickslab-gpu-utils/icons', ['icons/gpu-mon.icon.png', + 'icons/gpu-pac.icon.png', + 'icons/gpu-plot.icon.png']), + ('share/rickslab-gpu-utils/doc', ['README.md', 'LICENSE']), + ('share/man/man1', ['man/gpu-chk.1', + 'man/gpu-ls.1', + 'man/gpu-mon.1', + 'man/gpu-pac.1', + 'man/gpu-plot.1']) ] )