Description: orch:cephadm/smoke-roleless/{0-distro/ubuntu_22.04 0-nvme-loop 1-start 2-services/nfs-ingress2 3-final}

Log: http://qa-proxy.ceph.com/teuthology/adking-2024-04-22_22:45:33-orch:cephadm-wip-adk-testing-2024-04-22-1618-distro-default-smithi/7669246/teuthology.log

Sentry event: https://sentry.ceph.com/organizations/ceph/?query=e217f7d1dfbe4374aa2cfd6f8b6e713d

Failure Reason:

"2024-04-22T23:44:34.768270+0000 mon.smithi046 (mon.0) 121 : cluster [WRN] Health check failed: failed to probe daemons or devices (CEPHADM_REFRESH_FAILED)" in cluster log

  • log_href: http://qa-proxy.ceph.com/teuthology/adking-2024-04-22_22:45:33-orch:cephadm-wip-adk-testing-2024-04-22-1618-distro-default-smithi/7669246/teuthology.log
  • archive_path: /home/teuthworker/archive/adking-2024-04-22_22:45:33-orch:cephadm-wip-adk-testing-2024-04-22-1618-distro-default-smithi/7669246
  • description: orch:cephadm/smoke-roleless/{0-distro/ubuntu_22.04 0-nvme-loop 1-start 2-services/nfs-ingress2 3-final}
  • duration: 0:27:49
  • email: adking@redhat.com
  • failure_reason: "2024-04-22T23:44:34.768270+0000 mon.smithi046 (mon.0) 121 : cluster [WRN] Health check failed: failed to probe daemons or devices (CEPHADM_REFRESH_FAILED)" in cluster log
  • flavor:
  • job_id: 7669246
  • kernel:
    • kdb: 1
    • sha1: distro
  • last_in_suite: False
  • machine_type: smithi
  • name: adking-2024-04-22_22:45:33-orch:cephadm-wip-adk-testing-2024-04-22-1618-distro-default-smithi
  • nuke_on_error: True
  • os_type: ubuntu
  • os_version: 22.04
  • overrides:
    • admin_socket:
      • branch: wip-adk-testing-2024-04-22-1618
    • ceph:
      • conf:
        • mgr:
          • debug mgr: 20
          • debug ms: 1
        • mon:
          • debug mon: 20
          • debug ms: 1
          • debug paxos: 20
        • osd:
          • debug ms: 1
          • debug osd: 20
          • osd shutdown pgref assert: True
      • flavor: default
      • log-ignorelist:
        • \(MDS_ALL_DOWN\)
        • \(MDS_UP_LESS_THAN_MAX\)
      • log-only-match:
        • CEPHADM_
      • sha1: 43be020184947e53516056c9931e1ac5bdbbb1a5
    • ceph-deploy:
      • conf:
        • client:
          • log file: /var/log/ceph/ceph-$name.$pid.log
        • mon:
      • install:
        • ceph:
          • flavor: default
          • sha1: 43be020184947e53516056c9931e1ac5bdbbb1a5
      • workunit:
        • branch: wip-adk-testing-2024-04-22-1618
        • sha1: 43be020184947e53516056c9931e1ac5bdbbb1a5
    • owner: scheduled_adking@teuthology
    • pid:
    • roles:
      • ['host.a', 'client.0']
      • ['host.b', 'client.1']
    • sentry_event: https://sentry.ceph.com/organizations/ceph/?query=e217f7d1dfbe4374aa2cfd6f8b6e713d
    • status: fail
    • success: False
    • branch: wip-adk-testing-2024-04-22-1618
    • seed: 6551
    • sha1: 43be020184947e53516056c9931e1ac5bdbbb1a5
    • subset: 1/16
    • suite: orch:cephadm
    • suite_branch: wip-adk-testing-2024-04-22-1618
    • suite_path: /home/teuthworker/src/git.ceph.com_ceph-c_43be020184947e53516056c9931e1ac5bdbbb1a5/qa
    • suite_relpath: qa
    • suite_repo: https://git.ceph.com/ceph-ci.git
    • suite_sha1: 43be020184947e53516056c9931e1ac5bdbbb1a5
    • targets:
      • smithi046.front.sepia.ceph.com: ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBK1MTIynKUSjjAvtfDWQmMoS9pRHex40K55khAaT0xha+JZ4o1KiC+OS27X2jg2fXdyOsKNUFjtGGm7ktL/PNOc=
      • smithi120.front.sepia.ceph.com: ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBC3J1Hd3qZJb0R+RWxi81gCl7OkPGyarWGs3ak3kGXjV3Xsz10SIfTO324Mpa8mdRh3kVJf4ocTrIufmHiml8WI=
    • tasks:
      • internal.check_packages:
      • internal.buildpackages_prep:
      • internal.save_config:
      • internal.check_lock:
      • internal.add_remotes:
      • console_log:
      • internal.connect:
      • internal.push_inventory:
      • internal.serialize_remote_roles:
      • internal.check_conflict:
      • internal.check_ceph_data:
      • internal.vm_setup:
      • kernel:
        • kdb: 1
        • sha1: distro
      • internal.base:
      • internal.archive_upload:
      • internal.archive:
      • internal.coredump:
      • internal.sudo:
      • internal.syslog:
      • internal.timer:
      • pcp:
      • selinux:
      • ansible.cephlab:
      • clock:
      • nvme_loop:
      • cephadm:
        • roleless: True
        • conf:
          • mgr:
            • debug mgr: 20
            • debug ms: 1
          • mon:
            • debug mon: 20
            • debug ms: 1
            • debug paxos: 20
          • osd:
            • debug ms: 1
            • debug osd: 20
            • osd shutdown pgref assert: True
        • flavor: default
        • log-ignorelist:
          • \(MDS_ALL_DOWN\)
          • \(MDS_UP_LESS_THAN_MAX\)
        • log-only-match:
          • CEPHADM_
        • sha1: 43be020184947e53516056c9931e1ac5bdbbb1a5
        • cluster: ceph
        • cephadm_mode: root
      • cephadm.shell:
        • host.a:
          • ceph orch status
          • ceph orch ps
          • ceph orch ls
          • ceph orch host ls
          • ceph orch device ls
      • vip:
      • cephadm.shell:
        • host.a:
          • ceph orch device ls --refresh
      • vip.exec:
        • all-hosts:
          • systemctl stop nfs-server
      • cephadm.shell:
        • host.a:
          • ceph fs volume create foofs
          • ceph nfs cluster create foo --ingress --virtual-ip {{VIP0}}/{{VIPPREFIXLEN}} --port 2999
          • ceph nfs export create cephfs --fsname foofs --cluster-id foo --pseudo-path /fake
      • cephadm.wait_for_service:
        • service: nfs.foo
      • cephadm.wait_for_service:
        • service: ingress.nfs.foo
      • vip.exec:
        • host.a:
          • mkdir /mnt/foo
          • sleep 5
          • mount -t nfs {{VIP0}}:/fake /mnt/foo -o port=2999
          • echo test > /mnt/foo/testfile
          • sync
      • cephadm.shell:
        • host.a:
          • echo "Check with each haproxy down in turn..." for haproxy in `ceph orch ps | grep ^haproxy.nfs.foo. | awk '{print $1}'`; do ceph orch daemon stop $haproxy while ! ceph orch ps | grep $haproxy | grep stopped; do sleep 1 ; done cat /mnt/foo/testfile echo $haproxy > /mnt/foo/testfile sync ceph orch daemon start $haproxy while ! ceph orch ps | grep $haproxy | grep running; do sleep 1 ; done done
        • volumes:
          • /mnt/foo:/mnt/foo
      • vip.exec:
        • all-hosts:
          • echo "Check with $(hostname) ganesha(s) down..." for c in `systemctl | grep ceph- | grep @nfs | awk '{print $1}'`; do cid=`echo $c | sed 's/@/-/'` id=`echo $c | cut -d @ -f 2 | sed 's/.service$//'` fsid=`echo $c | cut -d @ -f 1 | cut -d - -f 2-` echo "Removing daemon $id fsid $fsid..." sudo $TESTDIR/cephadm rm-daemon --fsid $fsid --name $id echo "Waking up cephadm..." sudo $TESTDIR/cephadm shell -- ceph orch ps --refresh while ! timeout 1 cat /mnt/foo/testfile ; do true ; done echo "Mount is back!" done
      • cephadm.shell:
        • host.a:
          • stat -c '%u %g' /var/log/ceph | grep '167 167'
          • ceph orch status
          • ceph orch ps
          • ceph orch ls
          • ceph orch host ls
          • ceph orch device ls
          • ceph orch ls | grep '^osd.all-available-devices '
    • teuthology_branch: main
    • verbose: False
    • pcp_grafana_url:
    • priority: 80
    • user: adking
    • queue:
    • posted: 2024-04-22 22:47:01
    • started: 2024-04-22 23:24:01
    • updated: 2024-04-23 00:02:59
    • status_class: danger
    • runtime: 0:38:58
    • wait_time: 0:11:09