Description: orch:cephadm/workunits/{0-distro/rhel_8.6_container_tools_rhel8 agent/off mon_election/classic task/test_monitoring_stack_basic}

Log: http://qa-proxy.ceph.com/teuthology/adking-2023-06-13_16:39:13-orch:cephadm-wip-adk-testing-2023-06-13-1036-distro-default-smithi/7302641/teuthology.log

Sentry event: https://sentry.ceph.com/organizations/ceph/?query=99a6e2dc43944c43ad4ebc3457beb4cb

Failure Reason:

{'smithi152.front.sepia.ceph.com': {'_ansible_delegated_vars': {'ansible_connection': 'local', 'ansible_host': 'localhost', 'ansible_port': None, 'ansible_user': 'cm'}, '_ansible_no_log': False, 'attempts': 5, 'changed': False, 'invocation': {'module_args': {'accept_hostkey': False, 'archive': None, 'archive_prefix': None, 'bare': False, 'clone': True, 'depth': None, 'dest': '/home/teuthworker/.cache/src/keys', 'executable': None, 'force': True, 'gpg_whitelist': [], 'key_file': None, 'recursive': True, 'reference': None, 'refspec': None, 'remote': 'origin', 'repo': 'https://github.com/ceph/keys', 'separate_git_dir': None, 'ssh_opts': None, 'track_submodules': False, 'umask': None, 'update': True, 'verify_commit': False, 'version': 'main'}}, 'msg': 'Failed to checkout branch main', 'rc': 128, 'stderr': "fatal: Unable to create '/home/teuthworker/.cache/src/keys/.git/index.lock': File exists.\n\nAnother git process seems to be running in this repository, e.g.\nan editor opened by 'git commit'. Please make sure all processes\nare terminated then try again. If it still fails, a git process\nmay have crashed in this repository earlier:\nremove the file manually to continue.\n", 'stderr_lines': ["fatal: Unable to create '/home/teuthworker/.cache/src/keys/.git/index.lock': File exists.", '', 'Another git process seems to be running in this repository, e.g.', "an editor opened by 'git commit'. Please make sure all processes", 'are terminated then try again. If it still fails, a git process', 'may have crashed in this repository earlier:', 'remove the file manually to continue.'], 'stdout': '', 'stdout_lines': []}}

  • log_href: http://qa-proxy.ceph.com/teuthology/adking-2023-06-13_16:39:13-orch:cephadm-wip-adk-testing-2023-06-13-1036-distro-default-smithi/7302641/teuthology.log
  • archive_path: /home/teuthworker/archive/adking-2023-06-13_16:39:13-orch:cephadm-wip-adk-testing-2023-06-13-1036-distro-default-smithi/7302641
  • description: orch:cephadm/workunits/{0-distro/rhel_8.6_container_tools_rhel8 agent/off mon_election/classic task/test_monitoring_stack_basic}
  • duration: 0:10:43
  • email: adking@redhat.com
  • failure_reason: {'smithi152.front.sepia.ceph.com': {'_ansible_delegated_vars': {'ansible_connection': 'local', 'ansible_host': 'localhost', 'ansible_port': None, 'ansible_user': 'cm'}, '_ansible_no_log': False, 'attempts': 5, 'changed': False, 'invocation': {'module_args': {'accept_hostkey': False, 'archive': None, 'archive_prefix': None, 'bare': False, 'clone': True, 'depth': None, 'dest': '/home/teuthworker/.cache/src/keys', 'executable': None, 'force': True, 'gpg_whitelist': [], 'key_file': None, 'recursive': True, 'reference': None, 'refspec': None, 'remote': 'origin', 'repo': 'https://github.com/ceph/keys', 'separate_git_dir': None, 'ssh_opts': None, 'track_submodules': False, 'umask': None, 'update': True, 'verify_commit': False, 'version': 'main'}}, 'msg': 'Failed to checkout branch main', 'rc': 128, 'stderr': "fatal: Unable to create '/home/teuthworker/.cache/src/keys/.git/index.lock': File exists.\n\nAnother git process seems to be running in this repository, e.g.\nan editor opened by 'git commit'. Please make sure all processes\nare terminated then try again. If it still fails, a git process\nmay have crashed in this repository earlier:\nremove the file manually to continue.\n", 'stderr_lines': ["fatal: Unable to create '/home/teuthworker/.cache/src/keys/.git/index.lock': File exists.", '', 'Another git process seems to be running in this repository, e.g.', "an editor opened by 'git commit'. Please make sure all processes", 'are terminated then try again. If it still fails, a git process', 'may have crashed in this repository earlier:', 'remove the file manually to continue.'], 'stdout': '', 'stdout_lines': []}}
  • flavor:
  • job_id: 7302641
  • kernel:
    • kdb: True
    • sha1: distro
  • last_in_suite: False
  • machine_type: smithi
  • name: adking-2023-06-13_16:39:13-orch:cephadm-wip-adk-testing-2023-06-13-1036-distro-default-smithi
  • nuke_on_error: True
  • os_type: rhel
  • os_version: 8.6
  • overrides:
    • admin_socket:
      • branch: wip-adk-testing-2023-06-13-1036
    • ceph:
      • conf:
        • global:
          • mon election default strategy: 1
        • mgr:
          • debug mgr: 20
          • debug ms: 1
          • mgr/cephadm/use_agent: False
        • mon:
          • debug mon: 20
          • debug ms: 1
          • debug paxos: 20
        • osd:
          • debug ms: 1
          • debug osd: 20
      • flavor: default
      • log-ignorelist:
        • \(MDS_ALL_DOWN\)
        • \(MDS_UP_LESS_THAN_MAX\)
      • sha1: 6f56380bbd18e54ce5a7704e96f24c2ab58c9c0a
    • ceph-deploy:
      • conf:
        • client:
          • log file: /var/log/ceph/ceph-$name.$pid.log
        • mon:
          • osd default pool size: 2
    • install:
      • ceph:
        • flavor: default
        • sha1: 6f56380bbd18e54ce5a7704e96f24c2ab58c9c0a
    • selinux:
      • whitelist:
        • scontext=system_u:system_r:logrotate_t:s0
    • workunit:
      • branch: wip-adk-testing-2023-06-13-1036
      • sha1: 6f56380bbd18e54ce5a7704e96f24c2ab58c9c0a
  • owner: scheduled_adking@teuthology
  • pid:
  • roles:
    • ['host.a', 'mon.a', 'mgr.a', 'osd.0']
    • ['host.b', 'mon.b', 'mgr.b', 'osd.1']
    • ['host.c', 'mon.c', 'osd.2']
  • sentry_event: https://sentry.ceph.com/organizations/ceph/?query=99a6e2dc43944c43ad4ebc3457beb4cb
  • status: dead
  • success: False
  • branch: wip-adk-testing-2023-06-13-1036
  • seed:
  • sha1: 6f56380bbd18e54ce5a7704e96f24c2ab58c9c0a
  • subset:
  • suite:
  • suite_branch: wip-adk-testing-2023-06-13-1036
  • suite_path:
  • suite_relpath:
  • suite_repo:
  • suite_sha1: 6f56380bbd18e54ce5a7704e96f24c2ab58c9c0a
  • targets:
    • smithi043.front.sepia.ceph.com: ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCpUxo5ay/ucdkX9rleNMm8Ofp24XRXUtJPOK3Ji3XBpTiUgmmwneaY6316+3CG+k4K0p3IebanuyTvZApjm02/dNCvu8yD3rjEpWVgIuqYH8O3lEdN9DXcCX+7xHJE0m18+ZPsJQVsNrNJxY+5zqyNnBc8jpZq2LStsWwPOKPWhauANRkOBFSGDoLppWZbLx/yJs+R0qoNLLK9LpKQTUnqsapXvbD27OpPahzj/qbNxFPPyY6XrrldZ6qEO+EjJOujFLH3gzv0kJu10DCuzxrpz6ston4RPi1O3GgcHnF4Mqr/W9VwZ94UTMh+k4dIFRVqJEKrcwRHa5kFsvH/WhVx0Ygr/MiV/Q86mmV7JT+/MpzIRTtWyei5L9IBeZIE9cwzj+mOk6SD/t1roIVEGVV/uF2AyFDVBPr6F8oZ822OGpUyyaDsWTitPNdTX4TC3zdSrLc9tyDa1P74eH/ZLeTSf+C6pM/cepOhQaa6XzpDiPSuJUm9HOQxUlEAvcqa7Ac=
    • smithi134.front.sepia.ceph.com: ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCpUxo5ay/ucdkX9rleNMm8Ofp24XRXUtJPOK3Ji3XBpTiUgmmwneaY6316+3CG+k4K0p3IebanuyTvZApjm02/dNCvu8yD3rjEpWVgIuqYH8O3lEdN9DXcCX+7xHJE0m18+ZPsJQVsNrNJxY+5zqyNnBc8jpZq2LStsWwPOKPWhauANRkOBFSGDoLppWZbLx/yJs+R0qoNLLK9LpKQTUnqsapXvbD27OpPahzj/qbNxFPPyY6XrrldZ6qEO+EjJOujFLH3gzv0kJu10DCuzxrpz6ston4RPi1O3GgcHnF4Mqr/W9VwZ94UTMh+k4dIFRVqJEKrcwRHa5kFsvH/WhVx0Ygr/MiV/Q86mmV7JT+/MpzIRTtWyei5L9IBeZIE9cwzj+mOk6SD/t1roIVEGVV/uF2AyFDVBPr6F8oZ822OGpUyyaDsWTitPNdTX4TC3zdSrLc9tyDa1P74eH/ZLeTSf+C6pM/cepOhQaa6XzpDiPSuJUm9HOQxUlEAvcqa7Ac=
    • smithi152.front.sepia.ceph.com: ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCpUxo5ay/ucdkX9rleNMm8Ofp24XRXUtJPOK3Ji3XBpTiUgmmwneaY6316+3CG+k4K0p3IebanuyTvZApjm02/dNCvu8yD3rjEpWVgIuqYH8O3lEdN9DXcCX+7xHJE0m18+ZPsJQVsNrNJxY+5zqyNnBc8jpZq2LStsWwPOKPWhauANRkOBFSGDoLppWZbLx/yJs+R0qoNLLK9LpKQTUnqsapXvbD27OpPahzj/qbNxFPPyY6XrrldZ6qEO+EjJOujFLH3gzv0kJu10DCuzxrpz6ston4RPi1O3GgcHnF4Mqr/W9VwZ94UTMh+k4dIFRVqJEKrcwRHa5kFsvH/WhVx0Ygr/MiV/Q86mmV7JT+/MpzIRTtWyei5L9IBeZIE9cwzj+mOk6SD/t1roIVEGVV/uF2AyFDVBPr6F8oZ822OGpUyyaDsWTitPNdTX4TC3zdSrLc9tyDa1P74eH/ZLeTSf+C6pM/cepOhQaa6XzpDiPSuJUm9HOQxUlEAvcqa7Ac=
  • tasks:
    • internal.check_packages:
    • internal.buildpackages_prep:
    • internal.save_config:
    • internal.check_lock:
    • internal.add_remotes:
    • console_log:
    • internal.connect:
    • internal.push_inventory:
    • internal.serialize_remote_roles:
    • internal.check_conflict:
    • internal.check_ceph_data:
    • internal.vm_setup:
    • kernel:
      • kdb: True
      • sha1: distro
    • internal.base:
    • internal.archive_upload:
    • internal.archive:
    • internal.coredump:
    • internal.sudo:
    • internal.syslog:
    • internal.timer:
    • pcp:
    • selinux:
    • ansible.cephlab:
    • clock:
    • pexec:
      • all:
        • sudo cp /etc/containers/registries.conf /etc/containers/registries.conf.backup
        • sudo dnf -y module reset container-tools
        • sudo dnf -y module install container-tools:rhel8 --allowerasing --nobest
        • sudo cp /etc/containers/registries.conf.backup /etc/containers/registries.conf
    • install:
    • cephadm:
    • cephadm.shell:
      • host.a:
        • set -e set -x ceph orch apply node-exporter ceph orch apply grafana ceph orch apply alertmanager ceph orch apply prometheus sleep 240 ceph orch ls ceph orch ps ceph orch host ls MON_DAEMON=$(ceph orch ps --daemon-type mon -f json | jq -r 'last | .daemon_name') GRAFANA_HOST=$(ceph orch ps --daemon-type grafana -f json | jq -e '.[]' | jq -r '.hostname') PROM_HOST=$(ceph orch ps --daemon-type prometheus -f json | jq -e '.[]' | jq -r '.hostname') ALERTM_HOST=$(ceph orch ps --daemon-type alertmanager -f json | jq -e '.[]' | jq -r '.hostname') GRAFANA_IP=$(ceph orch host ls -f json | jq -r --arg GRAFANA_HOST "$GRAFANA_HOST" '.[] | select(.hostname==$GRAFANA_HOST) | .addr') PROM_IP=$(ceph orch host ls -f json | jq -r --arg PROM_HOST "$PROM_HOST" '.[] | select(.hostname==$PROM_HOST) | .addr') ALERTM_IP=$(ceph orch host ls -f json | jq -r --arg ALERTM_HOST "$ALERTM_HOST" '.[] | select(.hostname==$ALERTM_HOST) | .addr') # check each host node-exporter metrics endpoint is responsive ALL_HOST_IPS=$(ceph orch host ls -f json | jq -r '.[] | .addr') for ip in $ALL_HOST_IPS; do curl -s http://${ip}:9100/metric done # check grafana endpoints are responsive and database health is okay curl -k -s https://${GRAFANA_IP}:3000/api/health curl -k -s https://${GRAFANA_IP}:3000/api/health | jq -e '.database == "ok"' # stop mon daemon in order to trigger an alert ceph orch daemon stop $MON_DAEMON sleep 120 # check prometheus endpoints are responsive and mon down alert is firing curl -s http://${PROM_IP}:9095/api/v1/status/config curl -s http://${PROM_IP}:9095/api/v1/status/config | jq -e '.status == "success"' curl -s http://${PROM_IP}:9095/api/v1/alerts curl -s http://${PROM_IP}:9095/api/v1/alerts | jq -e '.data | .alerts | .[] | select(.labels | .alertname == "CephMonDown") | .state == "firing"' # check alertmanager endpoints are responsive and mon down alert is active curl -s http://${ALERTM_IP}:9093/api/v1/status curl -s http://${ALERTM_IP}:9093/api/v1/alerts curl -s http://${ALERTM_IP}:9093/api/v1/alerts | jq -e '.data | .[] | select(.labels | .alertname == "CephMonDown") | .status | .state == "active"'
  • teuthology_branch: main
  • verbose: False
  • pcp_grafana_url:
  • priority:
  • user:
  • queue:
  • posted: 2023-06-13 16:39:21
  • started: 2023-06-14 04:37:54
  • updated: 2023-06-14 04:56:49
  • status_class: danger
  • runtime: 0:18:55
  • wait_time: 0:08:12