Coverage for drivers/cleanup : 22%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
#!/usr/bin/python # # Copyright (C) Citrix Systems Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published # by the Free Software Foundation; version 2.1 only. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Script to coalesce and garbage collect VHD-based SR's in the background #
import LinstorVolumeManager, LinstorVolumeManagerError except ImportError: LINSTOR_AVAILABLE = False
# Disable automatic leaf-coalescing. Online leaf-coalesce is currently not # possible due to lvhd_stop_using_() not working correctly. However, we leave # this option available through the explicit LEAFCLSC_FORCE flag in the VDI # record for use by the offline tool (which makes the operation safe by pausing # the VM first)
# process "lock", used simply as an indicator that a process already exists # that is doing GC/coalesce on this SR (such a process holds the lock, and we # check for the fact by trying the lock).
# process "lock" to indicate that the GC process has been activated but may not # yet be running, stops a second process from being started.
# Default coalesce error rate limit, in messages per minute. A zero value # disables throttling, and a negative value disables error reporting.
################################################################################ # # Util #
info = sys.exc_info() if info[0] == exceptions.SystemExit: # this should not be happening when catching "Exception", but it is sys.exit(0) tb = reduce(lambda a, b: "%s%s" % (a, b), traceback.format_tb(info[2])) Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") Util.log(" ***********************") Util.log(" * E X C E P T I O N *") Util.log(" ***********************") Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1])) Util.log(tb) Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*")
"Execute a subprocess, then return its return code, stdout, stderr" proc = subprocess.Popen(args, stdin=subprocess.PIPE,\ stdout=subprocess.PIPE,\ stderr=subprocess.PIPE,\ shell=True,\ close_fds=True) (stdout, stderr) = proc.communicate(inputtext) stdout = str(stdout) stderr = str(stderr) rc = proc.returncode if log: Util.log("`%s`: %s" % (args, rc)) if type(expectedRC) != type([]): expectedRC = [expectedRC] if not rc in expectedRC: reason = stderr.strip() if stdout.strip(): reason = "%s (stdout: %s)" % (reason, stdout.strip()) Util.log("Failed: %s" % reason) raise util.CommandException(rc, args, reason)
if ret == Util.RET_RC: return rc if ret == Util.RET_STDERR: return stderr return stdout
"""execute func in a separate thread and kill it if abortTest signals so""" abortSignaled = abortTest() # check now before we clear resultFlag resultFlag = IPCFlag(ns) resultFlag.clearAll() pid = os.fork() if pid: startTime = time.time() try: while True: if resultFlag.test("success"): Util.log(" Child process completed successfully") resultFlag.clear("success") return if resultFlag.test("failure"): resultFlag.clear("failure") raise util.SMException("Child process exited with error") if abortTest() or abortSignaled: os.killpg(pid, signal.SIGKILL) raise AbortException("Aborting due to signal") if timeOut and time.time() - startTime > timeOut: os.killpg(pid, signal.SIGKILL) resultFlag.clearAll() raise util.SMException("Timed out") time.sleep(pollInterval) finally: wait_pid = 0 rc = -1 count = 0 while wait_pid == 0 and count < 10: wait_pid, rc = os.waitpid(pid, os.WNOHANG) if wait_pid == 0: time.sleep(2) count += 1
if wait_pid == 0: Util.log("runAbortable: wait for process completion timed out") else: os.setpgrp() try: if func() == ret: resultFlag.set("success") else: resultFlag.set("failure") except Exception, e: Util.log("Child process failed with : (%s)" % e) resultFlag.set("failure") Util.logException("This exception has occured") os._exit(0)
for prefix in ("G", "M", "K"): if number >= Util.PREFIX[prefix]: return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix) return "%s" % number
count = 0 while val: count += val & 1 val = val >> 1 return count
"""return bit count in the bitmap produced by ORing the two bitmaps""" len1 = len(bitmap1) len2 = len(bitmap2) lenLong = len1 lenShort = len2 bitmapLong = bitmap1 if len2 > len1: lenLong = len2 lenShort = len1 bitmapLong = bitmap2
count = 0 for i in range(lenShort): val = ord(bitmap1[i]) | ord(bitmap2[i]) count += Util.numBits(val)
for i in range(i + 1, lenLong): val = ord(bitmapLong[i]) count += Util.numBits(val) return count
thisScript = util.get_real_path(__file__) if thisScript.endswith(".pyc"): thisScript = thisScript[:-1] return thisScript
################################################################################ # # XAPI #
CONFIG_SM: "sm-config", CONFIG_OTHER: "other-config", CONFIG_ON_BOOT: "on-boot", CONFIG_ALLOW_CACHING: "allow_caching" }
session = XenAPI.xapi_local() session.xenapi.login_with_password(XAPI.USER, '', '', 'SM') return session
self.sessionPrivate = False self.session = session if self.session is None: self.session = self.getSession() self.sessionPrivate = True self._srRef = self.session.xenapi.SR.get_by_uuid(srUuid) self.srRecord = self.session.xenapi.SR.get_record(self._srRef) self.hostUuid = util.get_this_host() self._hostRef = self.session.xenapi.host.get_by_uuid(self.hostUuid)
if self.sessionPrivate: self.session.xenapi.session.logout()
pbds = self.getAttachedPBDs() for pbdRec in pbds: if pbdRec["host"] == self._hostRef: return True return False
host_recs = self.session.xenapi.host.get_all_records() for host_ref, host_rec in host_recs.iteritems(): if not host_rec["enabled"]: Util.log("Host %s not enabled" % host_rec["uuid"]) return False return True
if self.srRecord["shared"]: pool = self.session.xenapi.pool.get_all_records().values()[0] return pool["master"] == self._hostRef else: pbds = self.getAttachedPBDs() if len(pbds) < 1: raise util.SMException("Local SR not attached") elif len(pbds) > 1: raise util.SMException("Local SR multiply attached") return pbds[0]["host"] == self._hostRef
"""Return PBD records for all PBDs of this SR that are currently attached""" attachedPBDs = [] pbds = self.session.xenapi.PBD.get_all_records() for pbdRec in pbds.values(): if pbdRec["SR"] == self._srRef and pbdRec["currently_attached"]: attachedPBDs.append(pbdRec) return attachedPBDs
return util.get_online_hosts(self.session)
text = self.session.xenapi.host.call_plugin( \ hostRef, self.PLUGIN_ON_SLAVE, "multi", args) Util.log("call-plugin returned: '%s'" % text)
return self.session.xenapi.host.get_record(hostRef)
return self.session.xenapi.VDI.get_by_uuid(uuid)
return self._getRefVDI(vdi.uuid)
try: ref = self._getRefVDI(uuid) return self.session.xenapi.VDI.get_record(ref) except XenAPI.Failure: return None
return self.session.xenapi.VDI.snapshot(vdi.getRef(), {"type":"internal"})
"""Forget the VDI, but handle the case where the VDI has already been forgotten (i.e. ignore errors)""" try: vdiRef = self.session.xenapi.VDI.get_by_uuid(vdiUuid) self.session.xenapi.VDI.forget(vdiRef) except XenAPI.Failure: pass
kind = vdi.CONFIG_TYPE[key] if kind == self.CONFIG_SM: cfg = self.session.xenapi.VDI.get_sm_config(vdi.getRef()) elif kind == self.CONFIG_OTHER: cfg = self.session.xenapi.VDI.get_other_config(vdi.getRef()) elif kind == self.CONFIG_ON_BOOT: cfg = self.session.xenapi.VDI.get_on_boot(vdi.getRef()) elif kind == self.CONFIG_ALLOW_CACHING: cfg = self.session.xenapi.VDI.get_allow_caching(vdi.getRef()) else: assert(False) Util.log("Got %s for %s: %s" % (self.CONFIG_NAME[kind], vdi, repr(cfg))) return cfg
kind = vdi.CONFIG_TYPE[key] if kind == self.CONFIG_SM: self.session.xenapi.VDI.remove_from_sm_config(vdi.getRef(), key) elif kind == self.CONFIG_OTHER: self.session.xenapi.VDI.remove_from_other_config(vdi.getRef(), key) else: assert(False)
kind = vdi.CONFIG_TYPE[key] if kind == self.CONFIG_SM: self.session.xenapi.VDI.add_to_sm_config(vdi.getRef(), key, val) elif kind == self.CONFIG_OTHER: self.session.xenapi.VDI.add_to_other_config(vdi.getRef(), key, val) else: assert(False)
return self.session.xenapi.VDI.get_is_a_snapshot(vdi.getRef())
sr_refs = self.session.xenapi.SR.get_all_records_where( \ 'field "local_cache_enabled" = "true"') for sr_ref in sr_refs: Util.log("Marking SR %s dirty" % sr_ref) util.set_dirty(self.session, sr_ref)
Util.log("Starting asynch srUpdate for SR %s" % self.srRecord["uuid"]) abortFlag = IPCFlag(self.srRecord["uuid"]) task = self.session.xenapi.Async.SR.update(self._srRef) cancelTask = True try: for i in range(60): status = self.session.xenapi.task.get_status(task) if not status == "pending": Util.log("SR.update_asynch status changed to [%s]" % status) cancelTask = False return if abortFlag.test(FLAG_TYPE_ABORT): Util.log("Abort signalled during srUpdate, cancelling task...") try: self.session.xenapi.task.cancel(task) cancelTask = False Util.log("Task cancelled") except: pass return time.sleep(1) finally: if cancelTask: self.session.xenapi.task.cancel(task) self.session.xenapi.task.destroy(task) Util.log("Asynch srUpdate still running, but timeout exceeded.")
################################################################################ # # VDI # """Object representing a VDI of a VHD-based SR"""
# config keys & values # no space to snap-coalesce or unable to keep # up with VDI. This is not used by the SM, it # might be used by external components.
DB_VHD_PARENT: XAPI.CONFIG_SM, DB_VDI_TYPE: XAPI.CONFIG_SM, DB_VHD_BLOCKS: XAPI.CONFIG_SM, DB_VDI_PAUSED: XAPI.CONFIG_SM, DB_GC: XAPI.CONFIG_OTHER, DB_COALESCE: XAPI.CONFIG_OTHER, DB_LEAFCLSC: XAPI.CONFIG_OTHER, DB_ONBOOT: XAPI.CONFIG_ON_BOOT, DB_ALLOW_CACHING:XAPI.CONFIG_ALLOW_CACHING, }
# feasibility of leaf coalesce
"""Load VDI info""" pass # abstract
return self.DRIVER_NAME_VHD
if self._vdiRef == None: self._vdiRef = self.sr.xapi.getRefVDI(self) return self._vdiRef
config = self.sr.xapi.getConfigVDI(self, key) if key == self.DB_ONBOOT or key == self.DB_ALLOW_CACHING: val = config else: val = config.get(key) if val: return val return default
self.sr.xapi.removeFromConfigVDI(self, key) self.sr.xapi.addToConfigVDI(self, key, val) Util.log("Set %s = %s for %s" % (key, val, self))
self.sr.xapi.removeFromConfigVDI(self, key) Util.log("Removed %s from %s" % (key, self))
if self.getConfig(self.DB_VDI_PAUSED) == "true": Util.log("Unpausing VDI %s" % self) self.unpause()
if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid, self.uuid, failfast): raise util.SMException("Failed to pause VDI %s" % self)
try: xapi = self.sr.xapi.session.xenapi sr_ref = xapi.SR.get_by_uuid(self.sr.uuid) msg_name = "failed to unpause tapdisk" msg_body = "Failed to unpause tapdisk for VDI %s, " \ "VMs using this tapdisk have lost access " \ "to the corresponding disk(s)" % self.uuid xapi.message.create(msg_name, "4", "SR", self.sr.uuid, msg_body) except Exception, e: util.SMlog("failed to generate message: %s" % e)
if not blktap2.VDI.tap_unpause(self.sr.xapi.session, self.sr.uuid, self.uuid): self._report_tapdisk_unpause_error() raise util.SMException("Failed to unpause VDI %s" % self)
"""Pause-unpause in one step""" self.sr.lock() try: try: if not blktap2.VDI.tap_refresh(self.sr.xapi.session, self.sr.uuid, self.uuid): self._report_tapdisk_unpause_error() raise util.SMException("Failed to refresh %s" % self) except XenAPI.Failure, e: if util.isInvalidVDI(e) and ignoreNonexistent: Util.log("VDI %s not found, ignoring" % self) return raise finally: self.sr.unlock()
return self.sr.xapi.isSnapshot(self)
return util.is_attached_rw( self.sr.xapi.session.xenapi.VDI.get_sm_config(self.getRef()))
val = self.updateBlockInfo() bitmap = zlib.decompress(base64.b64decode(val)) return bitmap
"""A VDI is coalesceable if it has no siblings and is not a leaf""" return not self.scanError and \ self.parent and \ len(self.parent.children) == 1 and \ self.hidden and \ len(self.children) > 0
"""A VDI is leaf-coalesceable if it has no siblings and is a leaf""" return not self.scanError and \ self.parent and \ len(self.parent.children) == 1 and \ not self.hidden and \ len(self.children) == 0
"""Can we stop-and-leaf-coalesce this VDI? The VDI must be isLeafCoalesceable() already""" self.TIMEOUT_SAFETY_MARGIN * self.LIVE_LEAF_COALESCE_TIMEOUT self.getSizeVHD()/speed < allowedDownTime else: self.getSizeVHD() < self.LIVE_LEAF_COALESCE_MAX_SIZE
self.getConfig(self.DB_LEAFCLSC) == self.LEAFCLSC_FORCE)
if len(self.children) == 0: # base case # it is possible to have a hidden leaf that was recently coalesced # onto its parent, its children already relinked but not yet # reloaded - in which case it may not be garbage collected yet: # some tapdisks could still be using the file. if self.sr.journaler.get(self.JRN_RELINK, self.uuid): return [] if not self.scanError and self.hidden: return [self] return []
thisPrunable = True vdiList = [] for child in self.children: childList = child.getAllPrunable() vdiList.extend(childList) if child not in childList: thisPrunable = False
# We can destroy the current VDI if all childs are hidden BUT the # current VDI must be hidden too to do that! # Example in this case (after a failed live leaf coalesce): # # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees): # SMGC: [32436] b5458d61(1.000G/4.127M) # SMGC: [32436] *OLD_b545(1.000G/4.129M) # # OLD_b545 is hidden and must be removed, but b5458d61 not. # Normally we are not in this function when the delete action is # executed but in `_liveLeafCoalesce`.
if not self.scanError and not self.hidden and thisPrunable: vdiList.append(self) return vdiList
return self._sizeVHD
"Get the root of the tree that self belongs to" root = self while root.parent: root = root.parent return root
"Get the height of the subtree rooted at self" if len(self.children) == 0: return 1
maxChildHeight = 0 for child in self.children: childHeight = child.getTreeHeight() if childHeight > maxChildHeight: maxChildHeight = childHeight
return maxChildHeight + 1
"Get all leaf nodes in the subtree rooted at self" if len(self.children) == 0: return [self]
leaves = [] for child in self.children: leaves.extend(child.getAllLeaves()) return leaves
val = base64.b64encode(self._queryVHDBlocks()) self.setConfig(VDI.DB_VHD_BLOCKS, val) return val
"Rename the VDI file" assert(not self.sr.vdis.get(uuid)) self._clearRef() oldUuid = self.uuid self.uuid = uuid self.children = [] # updating the children themselves is the responsiblity of the caller del self.sr.vdis[oldUuid] self.sr.vdis[self.uuid] = self
"Physically delete the VDI"
strHidden = "*" strSizeVirt = Util.num2str(self.sizeVirt) strSizeVHD = "/%s" % Util.num2str(self._sizeVHD) strType = "[RAW]" strSizeVHD = ""
strSizeVHD, strType)
if not vhdutil.check(self.path, fast = fast): raise util.SMException("VHD %s corrupted" % self)
"""Coalesce self onto parent. Only perform the actual coalescing of VHD, but not the subsequent relinking. We'll do that as the next step, after reloading the entire SR in case things have changed while we were coalescing""" self.validate() self.parent.validate(True) self.parent._increaseSizeVirt(self.sizeVirt) self.sr._updateSlavesOnResize(self.parent) self._coalesceVHD(0) self.parent.validate(True) #self._verifyContents(0) self.parent.updateBlockInfo()
Util.log(" Coalesce verification on %s" % self) abortTest = lambda:IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) Util.runAbortable(lambda: self._runTapdiskDiff(), True, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) Util.log(" Coalesce verification succeeded")
cmd = "tapdisk-diff -n %s:%s -m %s:%s" % \ (self.getDriverName(), self.path, \ self.parent.getDriverName(), self.parent.path) Util.doexec(cmd, 0) return True
"""Reports a coalesce error to XenCenter.
vdi: the VDI object on which the coalesce error occured ce: the CommandException that was raised"""
msg_name = os.strerror(ce.code) if ce.code == errno.ENOSPC: # TODO We could add more information here, e.g. exactly how much # space is required for the particular coalesce, as well as actions # to be taken by the user and consequences of not taking these # actions. msg_body = 'Run out of space while coalescing.' elif ce.code == errno.EIO: msg_body = 'I/O error while coalescing.' else: msg_body = '' util.SMlog('Coalesce failed on SR %s: %s (%s)' % (vdi.sr.uuid, msg_name, msg_body))
# Create a XenCenter message, but don't spam. xapi = vdi.sr.xapi.session.xenapi sr_ref = xapi.SR.get_by_uuid(vdi.sr.uuid) oth_cfg = xapi.SR.get_other_config(sr_ref) if COALESCE_ERR_RATE_TAG in oth_cfg: coalesce_err_rate = float(oth_cfg[COALESCE_ERR_RATE_TAG]) else: coalesce_err_rate = DEFAULT_COALESCE_ERR_RATE
xcmsg = False if coalesce_err_rate == 0: xcmsg = True elif coalesce_err_rate > 0: now = datetime.datetime.now() sm_cfg = xapi.SR.get_sm_config(sr_ref) if COALESCE_LAST_ERR_TAG in sm_cfg: # seconds per message (minimum distance in time between two # messages in seconds) spm = datetime.timedelta(seconds=(1.0/coalesce_err_rate)*60) last = datetime.datetime.fromtimestamp( float(sm_cfg[COALESCE_LAST_ERR_TAG])) if now - last >= spm: xapi.SR.remove_from_sm_config(sr_ref, COALESCE_LAST_ERR_TAG) xcmsg = True else: xcmsg = True if xcmsg: xapi.SR.add_to_sm_config(sr_ref, COALESCE_LAST_ERR_TAG, str(now.strftime('%s'))) if xcmsg: xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body)
try:
startTime = time.time() vhdSize = vdi.getSizeVHD() vhdutil.coalesce(vdi.path) endTime = time.time() vdi.sr.recordStorageSpeed(startTime, endTime, vhdSize) except util.CommandException, ce: # We use try/except for the following piece of code because it runs # in a separate process context and errors will not be caught and # reported by anyone. try: # Report coalesce errors back to user via XC VDI._reportCoalesceError(vdi, ce) except Exception, e: util.SMlog('failed to create XenCenter message: %s' % e) raise ce except: raise
Util.log(" Running VHD coalesce on %s" % self) abortTest = lambda:IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) try: Util.runAbortable(lambda: VDI._doCoalesceVHD(self), None, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) except: #exception at this phase could indicate a failure in vhd coalesce # or a kill of vhd coalesce by runAbortable due to timeOut # Try a repair and reraise the exception parent = "" try: parent = vhdutil.getParent(self.path, lambda x: x.strip()) # Repair error is logged and ignored. Error reraised later util.SMlog('Coalesce failed on %s, attempting repair on ' \ 'parent %s' % (self.uuid, parent)) vhdutil.repair(parent) except Exception, e: util.SMlog('(error ignored) Failed to repair parent %s ' \ 'after failed coalesce on %s, err: %s' % (parent, self.path, e)) raise
util.fistpoint.activate("LVHDRT_coalescing_VHD_data",self.sr.uuid)
"""Relink children of this VDI to point to the parent of this VDI""" abortFlag = IPCFlag(self.sr.uuid) for child in self.children: if abortFlag.test(FLAG_TYPE_ABORT): raise AbortException("Aborting due to signal") Util.log(" Relinking %s from %s to %s" % \ (child, self, self.parent)) util.fistpoint.activate("LVHDRT_relinking_grandchildren",self.sr.uuid) child._setParent(self.parent) self.children = []
"""Pause & unpause all VDIs in the subtree to cause blktap to reload the VHD metadata for this file in any online VDI""" abortFlag = IPCFlag(self.sr.uuid) for child in self.children: if child == vdiSkip: continue if abortFlag.test(FLAG_TYPE_ABORT): raise AbortException("Aborting due to signal") Util.log(" Reloading VDI %s" % child) child._reload()
"""Pause & unpause to cause blktap to reload the VHD metadata""" for child in self.children: child._reload()
# only leaves can be attached if len(self.children) == 0: self.refresh()
ret = vhdutil.getParent(self.path, lvhdutil.extractUuid) if ret: self.parentUuid = ret
vhdutil.setParent(self.path, parent.path, False) self.parent = parent self.parentUuid = parent.uuid parent.children.append(self) try: self.setConfig(self.DB_VHD_PARENT, self.parentUuid) Util.log("Updated the vhd-parent field for child %s with %s" % \ (self.uuid, self.parentUuid)) except: Util.log("Failed to update %s with vhd-parent field %s" % \ (self.uuid, self.parentUuid))
hidden = vhdutil.getHidden(self.path) self.hidden = (hidden != 0)
vhdutil.setHidden(self.path, hidden) self.hidden = hidden
"""ensure the virtual size of 'self' is at least 'size'. Note that resizing a VHD must always be offline and atomically: the file must not be open by anyone and no concurrent operations may take place. Thus we use the Agent API call for performing paused atomic operations. If the caller is already in the atomic context, it must call with atomic = False""" if self.sizeVirt >= size: return Util.log(" Expanding VHD virt size for VDI %s: %s -> %s" % \ (self, Util.num2str(self.sizeVirt), Util.num2str(size)))
msize = vhdutil.getMaxResizeSize(self.path) * 1024 * 1024 if (size <= msize): vhdutil.setSizeVirtFast(self.path, size) else: if atomic: vdiList = self._getAllSubtree() self.sr.lock() try: self.sr.pauseVDIs(vdiList) try: self._setSizeVirt(size) finally: self.sr.unpauseVDIs(vdiList) finally: self.sr.unlock() else: self._setSizeVirt(size)
self.sizeVirt = vhdutil.getSizeVirt(self.path)
"""WARNING: do not call this method directly unless all VDIs in the subtree are guaranteed to be unplugged (and remain so for the duration of the operation): this operation is only safe for offline VHDs""" jFile = os.path.join(self.sr.path, self.uuid) vhdutil.setSizeVirt(self.path, size, jFile)
return vhdutil.getBlockBitmap(self.path)
"""Get the data size of the resulting VHD if we coalesce self onto parent. We calculate the actual size by using the VHD block allocation information (as opposed to just adding up the two VHD sizes to get an upper bound)""" # make sure we don't use stale BAT info from vdi_rec since the child # was writable all this time self.delConfig(VDI.DB_VHD_BLOCKS) blocksChild = self.getVHDBlocks() blocksParent = self.parent.getVHDBlocks() numBlocks = Util.countBits(blocksChild, blocksParent) Util.log("Num combined blocks = %d" % numBlocks) sizeData = numBlocks * vhdutil.VHD_BLOCK_SIZE assert(sizeData <= self.sizeVirt) return sizeData
sizeData = self._getCoalescedSizeData() sizeCoalesced = sizeData + vhdutil.calcOverheadBitmap(sizeData) + \ vhdutil.calcOverheadEmpty(self.sizeVirt) Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) return sizeCoalesced - self.parent.getSizeVHD()
"""How much extra space in the SR will be required to [live-]leaf-coalesce this VDI""" # the space requirements are the same as for inline coalesce return self._calcExtraSpaceForCoalescing()
"""How much extra space in the SR will be required to snapshot-coalesce this VDI""" return self._calcExtraSpaceForCoalescing() + \ vhdutil.calcOverheadEmpty(self.sizeVirt) # extra snap leaf
"""Get self and all VDIs in the subtree of self as a flat list""" vdiList = [self] for child in self.children: vdiList.extend(child._getAllSubtree()) return vdiList
"""Object representing a VDI in a file-based SR (EXT or NFS)"""
path = os.path.basename(path.strip()) if not (path.endswith(vhdutil.FILE_EXTN_VHD) or \ path.endswith(vhdutil.FILE_EXTN_RAW)): return None uuid = path.replace(vhdutil.FILE_EXTN_VHD, "").replace( \ vhdutil.FILE_EXTN_RAW, "") # TODO: validate UUID format return uuid
VDI.__init__(self, sr, uuid, raw) if self.raw: self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_RAW) else: self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD)
if not info: if not util.pathexists(self.path): raise util.SMException("%s not found" % self.path) try: info = vhdutil.getVHDInfo(self.path, self.extractUuid) except util.SMException: Util.log(" [VDI %s: failed to read VHD metadata]" % self.uuid) return self.parent = None self.children = [] self.parentUuid = info.parentUuid self.sizeVirt = info.sizeVirt self._sizeVHD = info.sizePhys self.hidden = info.hidden self.scanError = False self.path = os.path.join(self.sr.path, "%s%s" % \ (self.uuid, vhdutil.FILE_EXTN_VHD))
oldPath = self.path VDI.rename(self, uuid) self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD) self.path = os.path.join(self.sr.path, self.fileName) assert(not util.pathexists(self.path)) Util.log("Renaming %s -> %s" % (oldPath, self.path)) os.rename(oldPath, self.path)
if len(self.children) > 0: raise util.SMException("VDI %s has children, can't delete" % \ self.uuid) try: self.sr.lock() try: os.unlink(self.path) self.sr.forgetVDI(self.uuid) finally: self.sr.unlock() except OSError: raise util.SMException("os.unlink(%s) failed" % self.path) VDI.delete(self)
"""Object representing a VDI in an LVHD SR"""
self.parent = None self.children = [] self._sizeVHD = -1 self.scanError = vdiInfo.scanError self.sizeLV = vdiInfo.sizeLV self.sizeVirt = vdiInfo.sizeVirt self.fileName = vdiInfo.lvName self.lvActive = vdiInfo.lvActive self.lvOpen = vdiInfo.lvOpen self.lvReadonly = vdiInfo.lvReadonly self.hidden = vdiInfo.hidden self.parentUuid = vdiInfo.parentUuid self.path = os.path.join(self.sr.path, self.fileName)
if self.raw: return self.DRIVER_NAME_RAW return self.DRIVER_NAME_VHD
"""inflate the LV containing the VHD to 'size'""" if self.raw: return self._activate() self.sr.lock() try: lvhdutil.inflate(self.sr.journaler, self.sr.uuid, self.uuid, size) util.fistpoint.activate("LVHDRT_inflating_the_parent",self.sr.uuid) finally: self.sr.unlock() self.sizeLV = self.sr.lvmCache.getSize(self.fileName) self._sizeVHD = -1
"""deflate the LV containing the VHD to minimum""" if self.raw: return self._activate() self.sr.lock() try: lvhdutil.deflate(self.sr.lvmCache, self.fileName, self.getSizeVHD()) finally: self.sr.unlock() self.sizeLV = self.sr.lvmCache.getSize(self.fileName) self._sizeVHD = -1
self.inflate(lvhdutil.calcSizeVHDLV(self.sizeVirt))
"""Inflate the parent only as much as needed for the purposes of coalescing""" if self.parent.raw: return inc = self._calcExtraSpaceForCoalescing() if inc > 0: util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent",self.sr.uuid) self.parent.inflate(self.parent.sizeLV + inc)
if not self.raw: return VDI.updateBlockInfo(self)
oldUuid = self.uuid oldLVName = self.fileName VDI.rename(self, uuid) self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + self.uuid if self.raw: self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + self.uuid self.path = os.path.join(self.sr.path, self.fileName) assert(not self.sr.lvmCache.checkLV(self.fileName))
self.sr.lvmCache.rename(oldLVName, self.fileName) if self.sr.lvActivator.get(oldUuid, False): self.sr.lvActivator.replace(oldUuid, self.uuid, self.fileName, False)
ns = lvhdutil.NS_PREFIX_LVM + self.sr.uuid (cnt, bcnt) = RefCounter.check(oldUuid, ns) RefCounter.set(self.uuid, cnt, bcnt, ns) RefCounter.reset(oldUuid, ns)
if len(self.children) > 0: raise util.SMException("VDI %s has children, can't delete" % \ self.uuid) self.sr.lock() try: self.sr.lvmCache.remove(self.fileName) self.sr.forgetVDI(self.uuid) finally: self.sr.unlock() RefCounter.reset(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid) VDI.delete(self)
if self._sizeVHD == -1: self._loadInfoSizeVHD() return self._sizeVHD
"""Get the physical utilization of the VHD file. We do it individually (and not using the VHD batch scanner) as an optimization: this info is relatively expensive and we need it only for VDI's involved in coalescing.""" if self.raw: return self._activate() self._sizeVHD = vhdutil.getSizePhys(self.path) if self._sizeVHD <= 0: raise util.SMException("phys size of %s = %d" % \ (self, self._sizeVHD))
if self.raw: self.hidden = self.sr.lvmCache.getHidden(self.fileName) else: VDI._loadInfoHidden(self)
if self.raw: self.sr.lvmCache.setHidden(self.fileName, hidden) self.hidden = hidden else: VDI._setHidden(self, hidden)
strType = "VHD" if self.raw: strType = "RAW" strHidden = "" if self.hidden: strHidden = "*" strSizeVHD = "" if self._sizeVHD > 0: strSizeVHD = Util.num2str(self._sizeVHD) strActive = "n" if self.lvActive: strActive = "a" if self.lvOpen: strActive += "o" return "%s%s[%s](%s/%s/%s|%s)" % (strHidden, self.uuid[0:8], strType, Util.num2str(self.sizeVirt), strSizeVHD, Util.num2str(self.sizeLV), strActive)
if not self.raw: VDI.validate(self, fast)
"""LVHD parents must first be activated, inflated, and made writable""" try: self._activateChain() self.sr.lvmCache.setReadonly(self.parent.fileName, False) self.parent.validate() self.inflateParentForCoalesce() VDI._doCoalesce(self) finally: self.parent._loadInfoSizeVHD() self.parent.deflate() self.sr.lvmCache.setReadonly(self.parent.fileName, True)
self._activate() if self.lvReadonly: self.sr.lvmCache.setReadonly(self.fileName, False)
try: vhdutil.setParent(self.path, parent.path, parent.raw) finally: if self.lvReadonly: self.sr.lvmCache.setReadonly(self.fileName, True) self._deactivate() self.parent = parent self.parentUuid = parent.uuid parent.children.append(self) try: self.setConfig(self.DB_VHD_PARENT, self.parentUuid) Util.log("Updated the vhd-parent field for child %s with %s" % \ (self.uuid, self.parentUuid)) except: Util.log("Failed to update the vhd-parent with %s for child %s" % \ (self.parentUuid, self.uuid))
self.sr.lvActivator.activate(self.uuid, self.fileName, False)
vdi = self while vdi: vdi._activate() vdi = vdi.parent
self.sr.lvActivator.deactivate(self.uuid, False)
"ensure the virtual size of 'self' is at least 'size'" self._activate() if not self.raw: VDI._increaseSizeVirt(self, size, atomic) return
# raw VDI case offset = self.sizeLV if self.sizeVirt < size: oldSize = self.sizeLV self.sizeLV = util.roundup(lvutil.LVM_SIZE_INCREMENT, size) Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.sizeLV)) self.sr.lvmCache.setSize(self.fileName, self.sizeLV) offset = oldSize unfinishedZero = False jval = self.sr.journaler.get(self.JRN_ZERO, self.uuid) if jval: unfinishedZero = True offset = int(jval) length = self.sizeLV - offset if not length: return
if unfinishedZero: Util.log(" ==> Redoing unfinished zeroing out") else: self.sr.journaler.create(self.JRN_ZERO, self.uuid, \ str(offset)) Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) abortTest = lambda:IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) func = lambda: util.zeroOut(self.path, offset, length) Util.runAbortable(func, True, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0) self.sr.journaler.remove(self.JRN_ZERO, self.uuid)
"""WARNING: do not call this method directly unless all VDIs in the subtree are guaranteed to be unplugged (and remain so for the duration of the operation): this operation is only safe for offline VHDs""" self._activate() jFile = lvhdutil.createVHDJournalLV(self.sr.lvmCache, self.uuid, vhdutil.MAX_VHD_JOURNAL_SIZE) try: lvhdutil.setSizeVirt(self.sr.journaler, self.sr.uuid, self.uuid, size, jFile) finally: lvhdutil.deleteVHDJournalLV(self.sr.lvmCache, self.uuid)
self._activate() return VDI._queryVHDBlocks(self)
if self.parent.raw: return 0 # raw parents are never deflated in the first place sizeCoalesced = lvhdutil.calcSizeVHDLV(self._getCoalescedSizeData()) Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) return sizeCoalesced - self.parent.sizeLV
"""How much extra space in the SR will be required to [live-]leaf-coalesce this VDI""" # we can deflate the leaf to minimize the space requirements deflateDiff = self.sizeLV - lvhdutil.calcSizeLV(self.getSizeVHD()) return self._calcExtraSpaceForCoalescing() - deflateDiff
return self._calcExtraSpaceForCoalescing() + \ lvhdutil.calcSizeLV(self.getSizeVHD())
"""Object representing a VDI in a LINSTOR SR"""
self.parentUuid = info.parentUuid self.scanError = True self.parent = None self.children = []
self.fileName = self.sr._linstor.get_volume_name(self.uuid) self.path = self.sr._linstor.build_device_path(self.fileName) if not util.pathexists(self.path): raise util.SMException( '{} of {} not found' .format(self.fileName, self.uuid) )
if not info: try: info = self.sr._vhdutil.get_vhd_info(self.uuid) except util.SMException: Util.log( ' [VDI {}: failed to read VHD metadata]'.format(self.uuid) ) return
self.parentUuid = info.parentUuid self.sizeVirt = info.sizeVirt self._sizeVHD = info.sizePhys self.hidden = info.hidden self.scanError = False
Util.log('Renaming {} -> {} (path={})'.format( self.uuid, uuid, self.path )) self.sr._linstor.update_volume_uuid(self.uuid, uuid) VDI.rename(self, uuid)
if len(self.children) > 0: raise util.SMException( 'VDI {} has children, can\'t delete'.format(self.uuid) ) self.sr.lock() try: self.sr._linstor.destroy_volume(self.uuid) self.sr.forgetVDI(self.uuid) finally: self.sr.unlock() VDI.delete(self)
self.sr._linstor.ensure_volume_list_is_not_locked( vdiList, timeout=self.VOLUME_LOCK_TIMEOUT ) return super(VDI).pauseVDIs(vdiList)
self.sr._linstor.ensure_volume_is_not_locked( vdi.uuid, timeout=self.VOLUME_LOCK_TIMEOUT ) return super(VDI)._liveLeafCoalesce(vdi)
abortFlag = IPCFlag(self.sr.uuid) for child in self.children: if abortFlag.test(FLAG_TYPE_ABORT): raise AbortException('Aborting due to signal') Util.log( ' Relinking {} from {} to {}'.format( child, self, self.parent ) )
session = child.sr.xapi.session sr_uuid = child.sr.uuid vdi_uuid = child.uuid try: self.sr._linstor.ensure_volume_is_not_locked( vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT ) blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid) child._setParent(self.parent) finally: blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid) self.children = []
HIDDEN_TAG = 'hidden'
if self.raw: self.sr._linstor.update_volume_metadata(self.uuid, { HIDDEN_TAG: hidden }) self.hidden = hidden else: VDI._setHidden(self, hidden)
return self.sr._vhdutil.get_block_bitmap(self.uuid)
################################################################################ # # SR #
changes = "" self.currState.clear() for vdi in self.sr.vdiTrees: self.currState[vdi.uuid] = self._getTreeStr(vdi) if not self.prevState.get(vdi.uuid) or \ self.prevState[vdi.uuid] != self.currState[vdi.uuid]: changes += self.currState[vdi.uuid]
for uuid in self.prevState.iterkeys(): if not self.currState.get(uuid): changes += "Tree %s gone\n" % uuid
result = "SR %s (%d VDIs in %d VHD trees): " % \ (self.sr, len(self.sr.vdis), len(self.sr.vdiTrees))
if len(changes) > 0: if self.stateLogged: result += "showing only VHD trees that changed:" result += "\n%s" % changes else: result += "no changes"
for line in result.split("\n"): Util.log("%s" % line) self.prevState.clear() for key, val in self.currState.iteritems(): self.prevState[key] = val self.stateLogged = True
if self.stateLogged: Util.log("Found new VDI when scanning: %s" % uuid)
treeStr = "%s%s\n" % (" " * indent, vdi) for child in vdi.children: treeStr += self._getTreeStr(child, indent + VDI.STR_TREE_INDENT) return treeStr
xapi = XAPI(xapiSession, uuid) type = normalizeType(xapi.srRecord["type"]) if type == SR.TYPE_FILE: return FileSR(uuid, xapi, createLock, force) elif type == SR.TYPE_LVHD: return LVHDSR(uuid, xapi, createLock, force) elif type == SR.TYPE_LINSTOR: return LinstorSR(uuid, xapi, createLock, force) raise util.SMException("SR type %s not recognized" % type)
self._srLock = lock.Lock(vhdutil.LOCK_TYPE_SR, self.uuid) else:
if force: Util.log("SR %s not attached on this host, ignoring" % uuid) else: raise util.SMException("SR %s not attached on this host" % uuid)
Util.log("Not checking if we are Master (SR %s)" % uuid) raise util.SMException("This host is NOT master, will not run")
if refresh: self.xapi.srRecord = \ self.xapi.session.xenapi.SR.get_record(self.xapi._srRef) if self.xapi.srRecord["other_config"].get(VDI.DB_GC) == "false": Util.log("GC is disabled for this SR, abort") return False return True
"""Scan the SR and load VDI info for each VDI. If called repeatedly, update VDI objects if they already exist""" pass # abstract
self.lock() try: self.scan(force) finally: self.unlock()
return self.vdis.get(uuid)
if len(self.findGarbage()) > 0: return True if self.findCoalesceable(): return True if self.findLeafCoalesceable(): return True if self.needUpdateBlockInfo(): return True return False
"""Find a coalesceable VDI. Return a vdi that should be coalesced (choosing one among all coalesceable candidates according to some criteria) or None if there is no VDI that could be coalesced"""
candidates = []
srSwitch = self.xapi.srRecord["other_config"].get(VDI.DB_COALESCE) if srSwitch == "false": Util.log("Coalesce disabled for this SR") return candidates
# finish any VDI for which a relink journal entry exists first journals = self.journaler.getAll(VDI.JRN_RELINK) for uuid in journals.iterkeys(): vdi = self.getVDI(uuid) if vdi and vdi not in self._failedCoalesceTargets: return vdi
for vdi in self.vdis.values(): if vdi.isCoalesceable() and vdi not in self._failedCoalesceTargets: candidates.append(vdi) Util.log("%s is coalescable" % vdi.uuid)
# pick one in the tallest tree treeHeight = dict() for c in candidates: height = c.getTreeRoot().getTreeHeight() if treeHeight.get(height): treeHeight[height].append(c) else: treeHeight[height] = [c]
freeSpace = self.getFreeSpace() heights = treeHeight.keys() heights.sort(reverse=True) for h in heights: for c in treeHeight[h]: spaceNeeded = c._calcExtraSpaceForCoalescing() if spaceNeeded <= freeSpace: Util.log("Coalesce candidate: %s (tree height %d)" % (c, h)) return c else: Util.log("No space to coalesce %s (free space: %d)" % \ (c, freeSpace)) return None
"false", "Coalesce disabled for this SR") or self.forbiddenBySwitch(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED, "Leaf-coalesce disabled for this SR"))
"""Find leaf-coalesceable VDIs in each VHD tree"""
# check the space constraints to see if leaf-coalesce is actually # feasible for this candidate
else: (candidate, freeSpace)) VDI.LEAFCLSC_OFFLINE)
vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE):
"""Coalesce vdi onto parent""" Util.log("Coalescing %s -> %s" % (vdi, vdi.parent)) if dryRun: return
try: self._coalesce(vdi) except util.SMException, e: if isinstance(e, AbortException): self.cleanup() raise else: self._failedCoalesceTargets.append(vdi) Util.logException("coalesce") Util.log("Coalesce failed, skipping") self.cleanup()
"""Leaf-coalesce vdi onto parent""" Util.log("Leaf-coalescing %s -> %s" % (vdi, vdi.parent)) if dryRun: return
try: uuid = vdi.uuid try: # "vdi" object will no longer be valid after this call self._coalesceLeaf(vdi) finally: vdi = self.getVDI(uuid) if vdi: vdi.delConfig(vdi.DB_LEAFCLSC) except AbortException: self.cleanup() raise except (util.SMException, XenAPI.Failure), e: self._failedCoalesceTargets.append(vdi) Util.logException("leaf-coalesce") Util.log("Leaf-coalesce failed on %s, skipping" % vdi) self.cleanup()
vdiList = self.findGarbage() Util.log("Found %d VDIs for deletion:" % len(vdiList)) for vdi in vdiList: Util.log(" %s" % vdi) if not dryRun: self.deleteVDIs(vdiList) self.cleanupJournals(dryRun)
vdiList = [] for vdi in self.vdiTrees: vdiList.extend(vdi.getAllPrunable()) return vdiList
for vdi in vdiList: if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT): raise AbortException("Aborting due to signal") Util.log("Deleting unlinked VDI %s" % vdi) self.deleteVDI(vdi)
assert(len(vdi.children) == 0) del self.vdis[vdi.uuid] if vdi.parent: vdi.parent.children.remove(vdi) if vdi in self.vdiTrees: self.vdiTrees.remove(vdi) vdi.delete()
self.xapi.forgetVDI(self.uuid, vdiUuid)
paused = [] failed = False for vdi in vdiList: try: vdi.pause() paused.append(vdi) except: Util.logException("pauseVDIs") failed = True break
if failed: self.unpauseVDIs(paused) raise util.SMException("Failed to pause VDIs")
failed = False for vdi in vdiList: try: vdi.unpause() except: Util.log("ERROR: Failed to unpause VDI %s" % vdi) failed = True if failed: raise util.SMException("Failed to unpause VDIs")
return 0
Util.log("In cleanup") return
if self.name: ret = "%s ('%s')" % (self.uuid[0:4], self.name) else: ret = "%s" % self.uuid return ret
"""Acquire the SR lock. Nested acquire()'s are ok. Check for Abort signal to avoid deadlocking (trying to acquire the SR lock while the lock is held by a process that is trying to abort us)"""
assert(self._locked > 0) self._locked -= 1 if self._locked == 0: self._srLock.release()
for vdi in self.vdis.values(): if vdi.scanError or len(vdi.children) == 0: continue if not vdi.getConfig(vdi.DB_VHD_BLOCKS): return True return False
for vdi in self.vdis.values(): if vdi.scanError or len(vdi.children) == 0: continue if not vdi.getConfig(vdi.DB_VHD_BLOCKS): vdi.updateBlockInfo()
"""Remove stale coalesce VDI indicators""" entries = self.journaler.getAll(VDI.JRN_COALESCE) for uuid, jval in entries.iteritems(): self.journaler.remove(VDI.JRN_COALESCE, uuid)
"""delete journal entries for non-existing VDIs""" for t in [LVHDVDI.JRN_ZERO, VDI.JRN_RELINK, SR.JRN_CLONE]: entries = self.journaler.getAll(t) for uuid, jval in entries.iteritems(): if self.getVDI(uuid): continue if t == SR.JRN_CLONE: baseUuid, clonUuid = jval.split("_") if self.getVDI(baseUuid): continue Util.log(" Deleting stale '%s' journal entry for %s " "(%s)" % (t, uuid, jval)) if not dryRun: self.journaler.remove(t, uuid)
return 0
if self.journaler.get(vdi.JRN_RELINK, vdi.uuid): # this means we had done the actual coalescing already and just # need to finish relinking and/or refreshing the children Util.log("==> Coalesce apparently already done: skipping") else: # JRN_COALESCE is used to check which VDI is being coalesced in # order to decide whether to abort the coalesce. We remove the # journal as soon as the VHD coalesce step is done, because we # don't expect the rest of the process to take long self.journaler.create(vdi.JRN_COALESCE, vdi.uuid, "1") vdi._doCoalesce() self.journaler.remove(vdi.JRN_COALESCE, vdi.uuid)
util.fistpoint.activate("LVHDRT_before_create_relink_journal",self.uuid)
# we now need to relink the children: lock the SR to prevent ops # like SM.clone from manipulating the VDIs we'll be relinking and # rescan the SR first in case the children changed since the last # scan self.journaler.create(vdi.JRN_RELINK, vdi.uuid, "1")
self.lock() try: self.scan() vdi._relinkSkip() finally: self.unlock()
vdi.parent._reloadChildren(vdi) self.journaler.remove(vdi.JRN_RELINK, vdi.uuid) self.deleteVDI(vdi)
" --> Final size {finSize}"
initSize=prevSize, finSize=curSize))
" {attempt}".format(attempt=self.itsNoProgress))
"Max iterations ({max}) exceeded".format(max=max)
self.MAX_ITERATIONS_NO_PROGRESS): "No progress made for {max} iterations".format(max=max)
" compared to minimum acheived"
.format(size=self.startSize)) .format(size=self.finishSize)) .format(size=self.minSize))
"""Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot complete due to external changes, namely vdi_delete and vdi_snapshot that alter leaf-coalescibility of vdi""" return False .format(uuid=vdi.uuid))
else:
finally:
format(log=speedFile.readlines()))
# Defensive, should be impossible. Util.log("Bad speed: {speed} calculated for SR: {uuid}". format(speed=speed, uuid=self.uuid)) speed = None else: format(uuid=self.uuid)) else: format(uuid=self.uuid)) finally:
# Note that because we are not holding any locks here, concurrent SM # operations may change this tree under our feet. In particular, vdi # can be deleted, or it can be snapshotted. assert(AUTO_ONLINE_LEAF_COALESCE_ENABLED) Util.log("Single-snapshotting %s" % vdi) util.fistpoint.activate("LVHDRT_coaleaf_delay_1", self.uuid) try: ret = self.xapi.singleSnapshotVDI(vdi) Util.log("Single-snapshot returned: %s" % ret) except XenAPI.Failure, e: if util.isInvalidVDI(e): Util.log("The VDI appears to have been concurrently deleted") return False raise self.scanLocked() tempSnap = vdi.parent if not tempSnap.isCoalesceable(): Util.log("The VDI appears to have been concurrently snapshotted") return False Util.log("Coalescing parent %s" % tempSnap) util.fistpoint.activate("LVHDRT_coaleaf_delay_2", self.uuid) vhdSize = vdi.getSizeVHD() self._coalesce(tempSnap) if not vdi.isLeafCoalesceable(): Util.log("The VDI tree appears to have been altered since") return False return True
util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid) self.lock() try: self.scan() if not self.getVDI(vdi.uuid): Util.log("The VDI appears to have been deleted meanwhile") return False if not vdi.isLeafCoalesceable(): Util.log("The VDI is no longer leaf-coalesceable") return False
uuid = vdi.uuid vdi.pause(failfast=True) try: try: # "vdi" object will no longer be valid after this call self._doCoalesceLeaf(vdi) except: Util.logException("_doCoalesceLeaf") self._handleInterruptedCoalesceLeaf() raise finally: vdi = self.getVDI(uuid) if vdi: vdi.ensureUnpaused() vdiOld = self.getVDI(self.TMP_RENAME_PREFIX + uuid) if vdiOld: util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) self.deleteVDI(vdiOld) util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) finally: self.cleanup() self.unlock() self.logFilter.logState() return True
"""Actual coalescing of a leaf VDI onto parent. Must be called in an offline/atomic context""" self.journaler.create(VDI.JRN_LEAF, vdi.uuid, vdi.parent.uuid) self._prepareCoalesceLeaf(vdi) vdi.parent._setHidden(False) vdi.parent._increaseSizeVirt(vdi.sizeVirt, False) vdi.validate(True) vdi.parent.validate(True) util.fistpoint.activate("LVHDRT_coaleaf_before_coalesce", self.uuid) timeout = vdi.LIVE_LEAF_COALESCE_TIMEOUT if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE: Util.log("Leaf-coalesce forced, will not use timeout") timeout = 0 vdi._coalesceVHD(timeout) util.fistpoint.activate("LVHDRT_coaleaf_after_coalesce", self.uuid) vdi.parent.validate(True) #vdi._verifyContents(timeout / 2)
# rename vdiUuid = vdi.uuid oldName = vdi.fileName origParentUuid = vdi.parent.uuid vdi.rename(self.TMP_RENAME_PREFIX + vdiUuid) util.fistpoint.activate("LVHDRT_coaleaf_one_renamed", self.uuid) vdi.parent.rename(vdiUuid) util.fistpoint.activate("LVHDRT_coaleaf_both_renamed", self.uuid) self._updateSlavesOnRename(vdi.parent, oldName, origParentUuid)
# Note that "vdi.parent" is now the single remaining leaf and "vdi" is # garbage
# update the VDI record vdi.parent.delConfig(VDI.DB_VHD_PARENT) if vdi.parent.raw: vdi.parent.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_RAW) vdi.parent.delConfig(VDI.DB_VHD_BLOCKS) util.fistpoint.activate("LVHDRT_coaleaf_after_vdirec", self.uuid)
self._updateNode(vdi)
# delete the obsolete leaf & inflate the parent (in that order, to # minimize free space requirements) parent = vdi.parent vdi._setHidden(True) vdi.parent.children = [] vdi.parent = None
extraSpace = self._calcExtraSpaceNeeded(vdi, parent) freeSpace = self.getFreeSpace() if freeSpace < extraSpace: # don't delete unless we need the space: deletion is time-consuming # because it requires contacting the slaves, and we're paused here util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) self.deleteVDI(vdi) util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid)
util.fistpoint.activate("LVHDRT_coaleaf_before_remove_j", self.uuid) self.journaler.remove(VDI.JRN_LEAF, vdiUuid)
self.forgetVDI(origParentUuid) self._finishCoalesceLeaf(parent) self._updateSlavesOnResize(parent)
assert(not parent.raw) # raw parents not supported extra = child.getSizeVHD() - parent.getSizeVHD() if extra < 0: extra = 0 return extra
pass
pass
pass
pass
pass
pass
for uuid in self.vdis.keys(): if not uuid in uuidsPresent: Util.log("VDI %s disappeared since last scan" % \ self.vdis[uuid]) del self.vdis[uuid]
"""An interrupted leaf-coalesce operation may leave the VHD tree in an inconsistent state. If the old-leaf VDI is still present, we revert the operation (in case the original error is persistent); otherwise we must finish the operation""" # abstract pass
self.vdiTrees = [] for vdi in self.vdis.values(): if vdi.parentUuid: parent = self.getVDI(vdi.parentUuid) if not parent: if vdi.uuid.startswith(self.TMP_RENAME_PREFIX): self.vdiTrees.append(vdi) continue if force: Util.log("ERROR: Parent VDI %s not found! (for %s)" % \ (vdi.parentUuid, vdi.uuid)) self.vdiTrees.append(vdi) continue else: raise util.SMException("Parent VDI %s of %s not " \ "found" % (vdi.parentUuid, vdi.uuid)) vdi.parent = parent parent.children.append(vdi) else: self.vdiTrees.append(vdi)
# cache cleanup actions
SR.__init__(self, uuid, xapi, createLock, force) self.path = "/var/run/sr-mount/%s" % self.uuid self.journaler = fjournaler.Journaler(self.path)
if not util.pathexists(self.path): raise util.SMException("directory %s not found!" % self.uuid) vhds = self._scan(force) for uuid, vhdInfo in vhds.iteritems(): vdi = self.getVDI(uuid) if not vdi: self.logFilter.logNewVDI(uuid) vdi = FileVDI(self, uuid, False) self.vdis[uuid] = vdi vdi.load(vhdInfo) uuidsPresent = vhds.keys() rawList = filter(lambda x: x.endswith(vhdutil.FILE_EXTN_RAW), os.listdir(self.path)) for rawName in rawList: uuid = FileVDI.extractUuid(rawName) uuidsPresent.append(uuid) vdi = self.getVDI(uuid) if not vdi: self.logFilter.logNewVDI(uuid) vdi = FileVDI(self, uuid, True) self.vdis[uuid] = vdi self._removeStaleVDIs(uuidsPresent) self._buildTree(force) self.logFilter.logState() self._handleInterruptedCoalesceLeaf()
return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path)
rootDeleted = False for vdi in vdiList: if not vdi.parent: rootDeleted = True break SR.deleteVDIs(self, vdiList) if self.xapi.srRecord["type"] == "nfs" and rootDeleted: self.xapi.markCacheSRsDirty()
"""Clean up IntelliCache cache files. Caches for leaf nodes are removed when the leaf node no longer exists or its allow-caching attribute is not set. Caches for parent nodes are removed when the parent node no longer exists or it hasn't been used in more than <maxAge> hours. Return number of caches removed. """ numRemoved = 0 cacheFiles = filter(self._isCacheFileName, os.listdir(self.path)) Util.log("Found %d cache files" % len(cacheFiles)) cutoff = datetime.datetime.now() - datetime.timedelta(hours = maxAge) for cacheFile in cacheFiles: uuid = cacheFile[:-len(self.CACHE_FILE_EXT)] action = self.CACHE_ACTION_KEEP rec = self.xapi.getRecordVDI(uuid) if not rec: Util.log("Cache %s: VDI doesn't exist" % uuid) action = self.CACHE_ACTION_REMOVE elif rec["managed"] and not rec["allow_caching"]: Util.log("Cache %s: caching disabled" % uuid) action = self.CACHE_ACTION_REMOVE elif not rec["managed"] and maxAge >= 0: lastAccess = datetime.datetime.fromtimestamp( \ os.path.getatime(os.path.join(self.path, cacheFile))) if lastAccess < cutoff: Util.log("Cache %s: older than %d hrs" % (uuid, maxAge)) action = self.CACHE_ACTION_REMOVE_IF_INACTIVE
if action == self.CACHE_ACTION_KEEP: Util.log("Keeping cache %s" % uuid) continue
lockId = uuid parentUuid = None if rec and rec["managed"]: parentUuid = rec["sm_config"].get("vhd-parent") if parentUuid: lockId = parentUuid
cacheLock = lock.Lock(blktap2.VDI.LOCK_CACHE_SETUP, lockId) cacheLock.acquire() try: if self._cleanupCache(uuid, action): numRemoved += 1 finally: cacheLock.release() return numRemoved
assert(action != self.CACHE_ACTION_KEEP) rec = self.xapi.getRecordVDI(uuid) if rec and rec["allow_caching"]: Util.log("Cache %s appears to have become valid" % uuid) return False
fullPath = os.path.join(self.path, uuid + self.CACHE_FILE_EXT) tapdisk = blktap2.Tapdisk.find_by_path(fullPath) if tapdisk: if action == self.CACHE_ACTION_REMOVE_IF_INACTIVE: Util.log("Cache %s still in use" % uuid) return False Util.log("Shutting down tapdisk for %s" % fullPath) tapdisk.shutdown()
Util.log("Deleting file %s" % fullPath) os.unlink(fullPath) return True
return (len(name) == Util.UUID_LEN + len(self.CACHE_FILE_EXT)) and \ name.endswith(self.CACHE_FILE_EXT)
for i in range(SR.SCAN_RETRY_ATTEMPTS): error = False pattern = os.path.join(self.path, "*%s" % vhdutil.FILE_EXTN_VHD) vhds = vhdutil.getAllVHDs(pattern, FileVDI.extractUuid) for uuid, vhdInfo in vhds.iteritems(): if vhdInfo.error: error = True break if not error: return vhds Util.log("Scan error on attempt %d" % i) if force: return vhds raise util.SMException("Scan error")
self._checkSlaves(vdi) SR.deleteVDI(self, vdi)
onlineHosts = self.xapi.getOnlineHosts() abortFlag = IPCFlag(self.uuid) for pbdRecord in self.xapi.getAttachedPBDs(): hostRef = pbdRecord["host"] if hostRef == self.xapi._hostRef: continue if abortFlag.test(FLAG_TYPE_ABORT): raise AbortException("Aborting due to signal") try: self._checkSlave(hostRef, vdi) except util.CommandException: if hostRef in onlineHosts: raise
call = (hostRef, "nfs-on-slave", "check", { 'path': vdi.path }) Util.log("Checking with slave: %s" % repr(call)) _host = self.xapi.session.xenapi.host text = _host.call_plugin(*call)
entries = self.journaler.getAll(VDI.JRN_LEAF) for uuid, parentUuid in entries.iteritems(): fileList = os.listdir(self.path) childName = uuid + vhdutil.FILE_EXTN_VHD tmpChildName = self.TMP_RENAME_PREFIX + uuid + vhdutil.FILE_EXTN_VHD parentName1 = parentUuid + vhdutil.FILE_EXTN_VHD parentName2 = parentUuid + vhdutil.FILE_EXTN_RAW parentPresent = (parentName1 in fileList or parentName2 in fileList) if parentPresent or tmpChildName in fileList: self._undoInterruptedCoalesceLeaf(uuid, parentUuid) else: self._finishInterruptedCoalesceLeaf(uuid, parentUuid) self.journaler.remove(VDI.JRN_LEAF, uuid) vdi = self.getVDI(uuid) if vdi: vdi.ensureUnpaused()
Util.log("*** UNDO LEAF-COALESCE") parent = self.getVDI(parentUuid) if not parent: parent = self.getVDI(childUuid) if not parent: raise util.SMException("Neither %s nor %s found" % \ (parentUuid, childUuid)) Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) parent.rename(parentUuid) util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid)
child = self.getVDI(childUuid) if not child: child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) if not child: raise util.SMException("Neither %s nor %s found" % \ (childUuid, self.TMP_RENAME_PREFIX + childUuid)) Util.log("Renaming child back to %s" % childUuid) child.rename(childUuid) Util.log("Updating the VDI record") child.setConfig(VDI.DB_VHD_PARENT, parentUuid) child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid)
if child.hidden: child._setHidden(False) if not parent.hidden: parent._setHidden(True) self._updateSlavesOnUndoLeafCoalesce(parent, child) util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) Util.log("*** leaf-coalesce undo successful") if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED)
Util.log("*** FINISH LEAF-COALESCE") vdi = self.getVDI(childUuid) if not vdi: raise util.SMException("VDI %s not found" % childUuid) try: self.forgetVDI(parentUuid) except XenAPI.Failure: pass self._updateSlavesOnResize(vdi) util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) Util.log("*** finished leaf-coalesce successfully")
SR.__init__(self, uuid, xapi, createLock, force) self.vgName = "%s%s" % (lvhdutil.VG_PREFIX, self.uuid) self.path = os.path.join(lvhdutil.VG_LOCATION, self.vgName) self.lvmCache = lvmcache.LVMCache(self.vgName) self.lvActivator = LVActivator(self.uuid, self.lvmCache) self.journaler = journaler.Journaler(self.lvmCache)
if self.lvActivator.get(vdi.uuid, False): self.lvActivator.deactivate(vdi.uuid, False) self._checkSlaves(vdi) SR.deleteVDI(self, vdi)
SR.forgetVDI(self, vdiUuid) mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME) LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid)
stats = lvutil._getVGstats(self.vgName) return stats['physical_size'] - stats['physical_utilisation']
if not self.lvActivator.deactivateAll(): Util.log("ERROR deactivating LVs while cleaning up")
for vdi in self.vdis.values(): if vdi.scanError or vdi.raw or len(vdi.children) == 0: continue if not vdi.getConfig(vdi.DB_VHD_BLOCKS): return True return False
numUpdated = 0 for vdi in self.vdis.values(): if vdi.scanError or vdi.raw or len(vdi.children) == 0: continue if not vdi.getConfig(vdi.DB_VHD_BLOCKS): vdi.updateBlockInfo() numUpdated += 1 if numUpdated: # deactivate the LVs back sooner rather than later. If we don't # now, by the time this thread gets to deactivations, another one # might have leaf-coalesced a node and deleted it, making the child # inherit the refcount value and preventing the correct decrement self.cleanup()
vdis = self._scan(force) for uuid, vdiInfo in vdis.iteritems(): vdi = self.getVDI(uuid) if not vdi: self.logFilter.logNewVDI(uuid) vdi = LVHDVDI(self, uuid, vdiInfo.vdiType == vhdutil.VDI_TYPE_RAW) self.vdis[uuid] = vdi vdi.load(vdiInfo) self._removeStaleVDIs(vdis.keys()) self._buildTree(force) self.logFilter.logState() self._handleInterruptedCoalesceLeaf()
for i in range(SR.SCAN_RETRY_ATTEMPTS): error = False self.lvmCache.refresh() vdis = lvhdutil.getVDIInfo(self.lvmCache) for uuid, vdiInfo in vdis.iteritems(): if vdiInfo.scanError: error = True break if not error: return vdis Util.log("Scan error, retrying (%d)" % i) if force: return vdis raise util.SMException("Scan error")
for uuid in self.vdis.keys(): if not uuid in uuidsPresent: Util.log("VDI %s disappeared since last scan" % \ self.vdis[uuid]) del self.vdis[uuid] if self.lvActivator.get(uuid, False): self.lvActivator.remove(uuid, False)
"""If the parent is raw and the child was resized (virt. size), then we'll need to resize the parent, which can take a while due to zeroing out of the extended portion of the LV. Do it before pausing the child to avoid a protracted downtime""" if vdi.parent.raw and vdi.sizeVirt > vdi.parent.sizeVirt: self.lvmCache.setReadonly(vdi.parent.fileName, False) vdi.parent._increaseSizeVirt(vdi.sizeVirt)
return SR._liveLeafCoalesce(self, vdi)
vdi._activateChain() self.lvmCache.setReadonly(vdi.parent.fileName, False) vdi.deflate() vdi.inflateParentForCoalesce()
# fix the refcounts: the remaining node should inherit the binary # refcount from the leaf (because if it was online, it should remain # refcounted as such), but the normal refcount from the parent (because # this node is really the parent node) - minus 1 if it is online (since # non-leaf nodes increment their normal counts when they are online and # we are now a leaf, storing that 1 in the binary refcount). ns = lvhdutil.NS_PREFIX_LVM + self.uuid cCnt, cBcnt = RefCounter.check(vdi.uuid, ns) pCnt, pBcnt = RefCounter.check(vdi.parent.uuid, ns) pCnt = pCnt - cBcnt assert(pCnt >= 0) RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns)
if not parent.isSnapshot() or parent.isAttachedRW(): parent.inflateFully() else: parent.deflate()
return lvhdutil.calcSizeVHDLV(parent.sizeVirt) - parent.sizeLV
entries = self.journaler.getAll(VDI.JRN_LEAF) for uuid, parentUuid in entries.iteritems(): childLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + uuid tmpChildLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \ self.TMP_RENAME_PREFIX + uuid parentLV1 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + parentUuid parentLV2 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + parentUuid parentPresent = (self.lvmCache.checkLV(parentLV1) or \ self.lvmCache.checkLV(parentLV2)) if parentPresent or self.lvmCache.checkLV(tmpChildLV): self._undoInterruptedCoalesceLeaf(uuid, parentUuid) else: self._finishInterruptedCoalesceLeaf(uuid, parentUuid) self.journaler.remove(VDI.JRN_LEAF, uuid) vdi = self.getVDI(uuid) if vdi: vdi.ensureUnpaused()
Util.log("*** UNDO LEAF-COALESCE") parent = self.getVDI(parentUuid) if not parent: parent = self.getVDI(childUuid) if not parent: raise util.SMException("Neither %s nor %s found" % \ (parentUuid, childUuid)) Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) parent.rename(parentUuid) util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid)
child = self.getVDI(childUuid) if not child: child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) if not child: raise util.SMException("Neither %s nor %s found" % \ (childUuid, self.TMP_RENAME_PREFIX + childUuid)) Util.log("Renaming child back to %s" % childUuid) child.rename(childUuid) Util.log("Updating the VDI record") child.setConfig(VDI.DB_VHD_PARENT, parentUuid) child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid)
# refcount (best effort - assume that it had succeeded if the # second rename succeeded; if not, this adjustment will be wrong, # leading to a non-deactivation of the LV) ns = lvhdutil.NS_PREFIX_LVM + self.uuid cCnt, cBcnt = RefCounter.check(child.uuid, ns) pCnt, pBcnt = RefCounter.check(parent.uuid, ns) pCnt = pCnt + cBcnt RefCounter.set(parent.uuid, pCnt, 0, ns) util.fistpoint.activate("LVHDRT_coaleaf_undo_after_refcount", self.uuid)
parent.deflate() child.inflateFully() util.fistpoint.activate("LVHDRT_coaleaf_undo_after_deflate", self.uuid) if child.hidden: child._setHidden(False) if not parent.hidden: parent._setHidden(True) if not parent.lvReadonly: self.lvmCache.setReadonly(parent.fileName, True) self._updateSlavesOnUndoLeafCoalesce(parent, child) util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) Util.log("*** leaf-coalesce undo successful") if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED)
Util.log("*** FINISH LEAF-COALESCE") vdi = self.getVDI(childUuid) if not vdi: raise util.SMException("VDI %s not found" % childUuid) vdi.inflateFully() util.fistpoint.activate("LVHDRT_coaleaf_finish_after_inflate", self.uuid) try: self.forgetVDI(parentUuid) except XenAPI.Failure: pass self._updateSlavesOnResize(vdi) util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) Util.log("*** finished leaf-coalesce successfully")
"""Confirm with all slaves in the pool that 'vdi' is not in use. We try to check all slaves, including those that the Agent believes are offline, but ignore failures for offline hosts. This is to avoid cases where the Agent thinks a host is offline but the host is up.""" args = {"vgName" : self.vgName, "action1": "deactivateNoRefcount", "lvName1": vdi.fileName, "action2": "cleanupLockAndRefcount", "uuid2" : vdi.uuid, "ns2" : lvhdutil.NS_PREFIX_LVM + self.uuid} onlineHosts = self.xapi.getOnlineHosts() abortFlag = IPCFlag(self.uuid) for pbdRecord in self.xapi.getAttachedPBDs(): hostRef = pbdRecord["host"] if hostRef == self.xapi._hostRef: continue if abortFlag.test(FLAG_TYPE_ABORT): raise AbortException("Aborting due to signal") Util.log("Checking with slave %s (path %s)" % ( self.xapi.getRecordHost(hostRef)['hostname'], vdi.path)) try: self.xapi.ensureInactive(hostRef, args) except XenAPI.Failure: if hostRef in onlineHosts: raise
slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid]) if not slaves: Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \ child) return
tmpName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \ self.TMP_RENAME_PREFIX + child.uuid args = {"vgName" : self.vgName, "action1": "deactivateNoRefcount", "lvName1": tmpName, "action2": "deactivateNoRefcount", "lvName2": child.fileName, "action3": "refresh", "lvName3": child.fileName, "action4": "refresh", "lvName4": parent.fileName} for slave in slaves: Util.log("Updating %s, %s, %s on slave %s" % \ (tmpName, child.fileName, parent.fileName, self.xapi.getRecordHost(slave)['hostname'])) text = self.xapi.session.xenapi.host.call_plugin( \ slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) Util.log("call-plugin returned: '%s'" % text)
slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid]) if not slaves: Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi) return
args = {"vgName" : self.vgName, "action1": "deactivateNoRefcount", "lvName1": oldNameLV, "action2": "refresh", "lvName2": vdi.fileName, "action3": "cleanupLockAndRefcount", "uuid3" : origParentUuid, "ns3" : lvhdutil.NS_PREFIX_LVM + self.uuid} for slave in slaves: Util.log("Updating %s to %s on slave %s" % \ (oldNameLV, vdi.fileName, self.xapi.getRecordHost(slave)['hostname'])) text = self.xapi.session.xenapi.host.call_plugin( \ slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) Util.log("call-plugin returned: '%s'" % text)
uuids = map(lambda x: x.uuid, vdi.getAllLeaves()) slaves = util.get_slaves_attached_on(self.xapi.session, uuids) if not slaves: util.SMlog("Update-on-resize: %s not attached on any slave" % vdi) return lvhdutil.lvRefreshOnSlaves(self.xapi.session, self.uuid, self.vgName, vdi.fileName, vdi.uuid, slaves)
if not LINSTOR_AVAILABLE: raise util.SMException( 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing' )
SR.__init__(self, uuid, xapi, createLock, force) self._master_uri = 'linstor://localhost' self.path = LinstorVolumeManager.DEV_ROOT_PATH self._reloadLinstor()
self._checkSlaves(vdi) SR.deleteVDI(self, vdi)
return self._linstor.max_volume_size_allowed
all_vdi_info = self._scan(force) for uuid, vdiInfo in all_vdi_info.iteritems(): # When vdiInfo is None, the VDI is RAW. vdi = self.getVDI(uuid) if not vdi: self.logFilter.logNewVDI(uuid) vdi = LinstorVDI(self, uuid, not vdiInfo) self.vdis[uuid] = vdi if vdiInfo: vdi.load(vdiInfo) self._removeStaleVDIs(all_vdi_info.keys()) self._buildTree(force) self.logFilter.logState() self._handleInterruptedCoalesceLeaf()
session = self.xapi.session host_ref = util.get_this_host_ref(session) sr_ref = session.xenapi.SR.get_by_uuid(self.uuid)
pbd = util.find_my_pbd(session, host_ref, sr_ref) if pbd is None: raise util.SMException('Failed to find PBD')
dconf = session.xenapi.PBD.get_device_config(pbd) group_name = dconf['group-name']
self.journaler = LinstorJournaler( self._master_uri, group_name, logger=util.SMlog )
self._linstor = LinstorVolumeManager( self._master_uri, group_name, repair=True, logger=util.SMlog ) self._vhdutil = LinstorVhdUtil(session, self._linstor)
for i in range(SR.SCAN_RETRY_ATTEMPTS): self._reloadLinstor() error = False try: all_vdi_info = self._load_vdi_info() for uuid, vdiInfo in all_vdi_info.iteritems(): if vdiInfo and vdiInfo.error: error = True break if not error: return all_vdi_info Util.log('Scan error, retrying ({})'.format(i)) except Exception as e: Util.log('Scan exception, retrying ({}): {}'.format(i, e)) Util.log(traceback.format_exc())
if force: return all_vdi_info raise util.SMException('Scan error')
all_vdi_info = {}
# TODO: Ensure metadata contains the right info.
all_volume_info = self._linstor.volumes_with_info volumes_metadata = self._linstor.volumes_with_metadata for vdi_uuid, volume_info in all_volume_info.items(): try: if not volume_info.name and \ not list(volumes_metadata[vdi_uuid].items()): continue # Ignore it, probably deleted.
vdi_type = volumes_metadata[vdi_uuid][VDI_TYPE_TAG] if vdi_type == vhdutil.VDI_TYPE_VHD: info = self._vhdutil.get_vhd_info(vdi_uuid) else: info = None except Exception as e: Util.log( ' [VDI {}: failed to load VDI info]: {}' .format(self.uuid, e) ) info = vhdutil.VHDInfo(vdi_uuid) info.error = 1 all_vdi_info[vdi_uuid] = info return all_vdi_info
# TODO: Maybe implement _liveLeafCoalesce/_prepareCoalesceLeaf/ # _finishCoalesceLeaf/_updateSlavesOnResize like LVM plugin.
meta_overhead = vhdutil.calcOverheadEmpty(LinstorVDI.MAX_SIZE) bitmap_overhead = vhdutil.calcOverheadBitmap(parent.sizeVirt) virtual_size = LinstorVolumeManager.round_up_volume_size( parent.sizeVirt + meta_overhead + bitmap_overhead ) # TODO: Check result. return virtual_size - self._linstor.get_volume_size(parent.uuid)
try: self._linstor.get_device_path(uuid) except Exception: # TODO: Maybe log exception. return False return True
entries = self.journaler.get_all(VDI.JRN_LEAF) for uuid, parentUuid in entries.iteritems(): if self._hasValidDevicePath(parentUuid) or \ self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid): self._undoInterruptedCoalesceLeaf(uuid, parentUuid) else: self._finishInterruptedCoalesceLeaf(uuid, parentUuid) self.journaler.remove(VDI.JRN_LEAF, uuid) vdi = self.getVDI(uuid) if vdi: vdi.ensureUnpaused()
Util.log('*** UNDO LEAF-COALESCE') parent = self.getVDI(parentUuid) if not parent: parent = self.getVDI(childUuid) if not parent: raise util.SMException( 'Neither {} nor {} found'.format(parentUuid, childUuid) ) Util.log( 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid) ) parent.rename(parentUuid) util.fistpoint.activate('LVHDRT_coaleaf_undo_after_rename', self.uuid)
child = self.getVDI(childUuid) if not child: child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) if not child: raise util.SMException( 'Neither {} nor {} found'.format( childUuid, self.TMP_RENAME_PREFIX + childUuid ) ) Util.log('Renaming child back to {}'.format(childUuid)) child.rename(childUuid) Util.log('Updating the VDI record') child.setConfig(VDI.DB_VHD_PARENT, parentUuid) child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) util.fistpoint.activate( 'LVHDRT_coaleaf_undo_after_rename2', self.uuid )
# TODO: Maybe deflate here.
if child.hidden: child._setHidden(False) if not parent.hidden: parent._setHidden(True) self._updateSlavesOnUndoLeafCoalesce(parent, child) util.fistpoint.activate('LVHDRT_coaleaf_undo_end', self.uuid) Util.log('*** leaf-coalesce undo successful') if util.fistpoint.is_active('LVHDRT_coaleaf_stop_after_recovery'): child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED)
Util.log('*** FINISH LEAF-COALESCE') vdi = self.getVDI(childUuid) if not vdi: raise util.SMException('VDI {} not found'.format(childUuid)) # TODO: Maybe inflate. try: self.forgetVDI(parentUuid) except XenAPI.Failure: pass self._updateSlavesOnResize(vdi) util.fistpoint.activate('LVHDRT_coaleaf_finish_end', self.uuid) Util.log('*** finished leaf-coalesce successfully')
try: states = self._linstor.get_usage_states(vdi.uuid) for node_name, state in states.items(): self._checkSlave(node_name, vdi, state) except LinstorVolumeManagerError as e: if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: raise
def _checkSlave(node_name, vdi, state): # If state is None, LINSTOR doesn't know the host state # (bad connection?). if state is None: raise util.SMException( 'Unknown state for VDI {} on {}'.format(vdi.uuid, node_name) )
if state: raise util.SMException( 'VDI {} is in use on {}'.format(vdi.uuid, node_name) )
################################################################################ # # Helpers # os.chdir("/") os.setsid() pid = os.fork() if pid: Util.log("Will finish as PID [%d]" % pid) os._exit(0) for fd in [0, 1, 2]: try: os.close(fd) except OSError: pass # we need to fill those special fd numbers or pread won't work sys.stdin = open("/dev/null", 'r') sys.stderr = open("/dev/null", 'w') sys.stdout = open("/dev/null", 'w') # As we're a new process we need to clear the lock objects lock.Lock.clearAll() return True
if type in LVHDSR.SUBTYPES: type = SR.TYPE_LVHD if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]: # temporary while LVHD is symlinked as LVM type = SR.TYPE_LVHD if type in [ "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs", "moosefs", "xfs", "zfs", "ext4" ]: type = SR.TYPE_FILE if type in ["linstor"]: type = SR.TYPE_LINSTOR if type not in SR.TYPES: raise util.SMException("Unsupported SR type: %s" % type) return type
util.makedirs(os.path.join(NON_PERSISTENT_DIR, str(sr_uuid))) with open(os.path.join( NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init'), 'w+') as f: f.write('1')
# Check to see if the GCPAUSE_FISTPOINT is present. If so the fist # point will just return. Otherwise, fall back on an abortable sleep.
lambda *args: None) return IPCFlag(sr.uuid).test(FLAG_TYPE_ABORT)
# If time.sleep hangs we are in deep trouble, however for # completeness we set the timeout of the abort thread to # 110% of GCPAUSE_DEFAULT_SLEEP. None, sr.uuid, abortTest, VDI.POLL_INTERVAL, GCPAUSE_DEFAULT_SLEEP*1.1)
if not lockActive.acquireNoblock(): Util.log("Another GC instance already active, exiting") return try: # Check if any work needs to be done sr.scanLocked() if not sr.hasWork(): Util.log("No work, exiting") return _gcLoopPause(sr, dryRun) while True: if not sr.xapi.isPluggedHere(): Util.log("SR no longer attached, exiting") break sr.scanLocked() if not sr.hasWork(): Util.log("No work, exiting") break
if not lockRunning.acquireNoblock(): Util.log("Unable to acquire GC running lock.") return try: if not sr.gcEnabled(): break sr.cleanupCoalesceJournals() # Create the init file here in case startup is waiting on it _create_init_file(sr.uuid) sr.scanLocked() sr.updateBlockInfo()
howmany = len(sr.findGarbage()) if howmany > 0: Util.log("Found %d orphaned vdis" % howmany) sr.lock() try: sr.garbageCollect(dryRun) finally: sr.unlock() sr.xapi.srUpdate()
candidate = sr.findCoalesceable() if candidate: util.fistpoint.activate( "LVHDRT_finding_a_suitable_pair", sr.uuid) sr.coalesce(candidate, dryRun) sr.xapi.srUpdate() continue
candidate = sr.findLeafCoalesceable() if candidate: sr.coalesceLeaf(candidate, dryRun) sr.xapi.srUpdate() continue
finally: lockRunning.release() finally: Util.log("GC process exiting, no work left") _create_init_file(sr.uuid) lockActive.release()
host = session.xenapi.host.get_record(hostref) return host['enabled']
""" Don't want to start GC until Xapi is fully initialised """ local_session = None if session is None: local_session = util.get_localAPI_session() session = local_session
try: hostref = session.xenapi.host.get_by_uuid(util.get_this_host()) while not _xapi_enabled(session, hostref): util.SMlog("Xapi not ready, GC waiting") time.sleep(15) finally: if local_session is not None: local_session.logout()
init(srUuid) _ensure_xapi_initialised(session) sr = SR.getInstance(srUuid, session) if not sr.gcEnabled(False): return
sr.cleanupCache() try: _gcLoop(sr, dryRun) finally: sr.cleanup() sr.logFilter.logState() del sr.xapi
"""Aborts an GC/coalesce.
srUuid: the UUID of the SR whose GC/coalesce must be aborted soft: If set to True and there is a pending abort signal, the function doesn't do anything. If set to False, a new abort signal is issued.
returns: If soft is set to False, we return True holding lockActive. If soft is set to False and an abort signal is pending, we return False without holding lockActive. An exception is raised in case of error.""" reason="SR %s: error aborting existing process" % srUuid)
global lockRunning if not lockRunning: lockRunning = lock.Lock(LOCK_TYPE_RUNNING, srUuid) global lockActive if not lockActive: lockActive = lock.Lock(LOCK_TYPE_GC_ACTIVE, srUuid)
output = """Garbage collect and/or coalesce VHDs in a VHD-based SR
Parameters: -u --uuid UUID SR UUID and one of: -g --gc garbage collect, coalesce, and repeat while there is work -G --gc_force garbage collect once, aborting any current operations -c --cache-clean <max_age> clean up IntelliCache cache files older than max_age hours -a --abort abort any currently running operation (GC or coalesce) -q --query query the current state (GC'ing, coalescing or not running) -x --disable disable GC/coalesce (will be in effect until you exit) -t --debug see Debug below
Options: -b --background run in background (return immediately) (valid for -g only) -f --force continue in the presence of VHDs with errors (when doing GC, this might cause removal of any such VHDs) (only valid for -G) (DANGEROUS)
Debug: The --debug parameter enables manipulation of LVHD VDIs for debugging purposes. ** NEVER USE IT ON A LIVE VM ** The following parameters are required: -t --debug <cmd> <cmd> is one of "activate", "deactivate", "inflate", "deflate". -v --vdi_uuid VDI UUID """ #-d --dry-run don't actually perform any SR-modifying operations print output Util.log("(Invalid usage)") sys.exit(1)
############################################################################## # # API # """Abort GC/coalesce if we are currently GC'ing or coalescing a VDI pair. """ else:
"""Garbage collect all deleted VDIs in SR "srUuid". Fork & return immediately if inBackground=True.
The following algorithm is used: 1. If we are already GC'ing in this SR, return 2. If we are already coalescing a VDI pair: a. Scan the SR and determine if the VDI pair is GC'able b. If the pair is not GC'able, return c. If the pair is GC'able, abort coalesce 3. Scan the SR 4. If there is nothing to collect, nor to coalesce, return 5. If there is something to collect, GC all, then goto 3 6. If there is something to coalesce, coalesce one pair, then goto 3 """ # we are now running in the background. Catch & log any errors # because there is no other way to propagate them back at this # point
try: _gc(None, srUuid, dryRun) except AbortException: Util.log("Aborted") except Exception: Util.logException("gc") Util.log("* * * * * SR %s: ERROR\n" % srUuid) os._exit(0) else: _gc(session, srUuid, dryRun)
"""Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure the SR lock is held. The following algorithm is used: 1. If we are already GC'ing or coalescing a VDI pair, abort GC/coalesce 2. Scan the SR 3. GC 4. return """ Util.log("=== SR %s: gc_force ===" % srUuid) init(srUuid) sr = SR.getInstance(srUuid, session, lockSR, True) if not lockActive.acquireNoblock(): abort(srUuid) else: Util.log("Nothing was running, clear to proceed")
if force: Util.log("FORCED: will continue even if there are VHD errors") sr.scanLocked(force) sr.cleanupCoalesceJournals()
try: sr.cleanupCache() sr.garbageCollect(dryRun) finally: sr.cleanup() sr.logFilter.logState() lockActive.release()
"""Return whether GC/coalesce is currently running or not. The information is not guaranteed for any length of time if the call is not protected by locking. """ init(srUuid) if lockActive.acquireNoblock(): lockActive.release() return False return True
sr = SR.getInstance(srUuid, session) entries = sr.journaler.getAll(VDI.JRN_COALESCE) if len(entries) == 0: return False elif len(entries) > 1: raise util.SMException("More than one coalesce entry: " + str(entries)) sr.scanLocked() coalescedUuid = entries.popitem()[0] garbage = sr.findGarbage() for vdi in garbage: if vdi.uuid == coalescedUuid: return True return False
coalesceable = [] sr = SR.getInstance(srUuid, session) sr.scanLocked() for uuid in vdiUuids: vdi = sr.getVDI(uuid) if not vdi: raise util.SMException("VDI %s not found" % uuid) if vdi.isLeafCoalesceable(): coalesceable.append(uuid) return coalesceable
sr = SR.getInstance(srUuid, session) return sr.cleanupCache(maxAge)
Util.log("Debug command: %s" % cmd) sr = SR.getInstance(sr_uuid, None) if not isinstance(sr, LVHDSR): print "Error: not an LVHD SR" return sr.scanLocked() vdi = sr.getVDI(vdi_uuid) if not vdi: print "Error: VDI %s not found" return print "Running %s on SR %s" % (cmd, sr) print "VDI before: %s" % vdi if cmd == "activate": vdi._activate() print "VDI file: %s" % vdi.path if cmd == "deactivate": ns = lvhdutil.NS_PREFIX_LVM + sr.uuid sr.lvmCache.deactivate(ns, vdi.uuid, vdi.fileName, False) if cmd == "inflate": vdi.inflateFully() sr.cleanup() if cmd == "deflate": vdi.deflate() sr.cleanup() sr.scanLocked() print "VDI after: %s" % vdi
############################################################################## # # CLI # action = "" uuid = "" background = False force = False dryRun = False debug_cmd = "" vdi_uuid = "" shortArgs = "gGc:aqxu:bfdt:v:" longArgs = ["gc", "gc_force", "clean_cache", "abort", "query", "disable", "uuid=", "background", "force", "dry-run", "debug=", "vdi_uuid="]
try: opts, args = getopt.getopt(sys.argv[1:], shortArgs, longArgs) except getopt.GetoptError: usage() for o, a in opts: if o in ("-g", "--gc"): action = "gc" if o in ("-G", "--gc_force"): action = "gc_force" if o in ("-c", "--clean_cache"): action = "clean_cache" maxAge = int(a) if o in ("-a", "--abort"): action = "abort" if o in ("-q", "--query"): action = "query" if o in ("-x", "--disable"): action = "disable" if o in ("-u", "--uuid"): uuid = a if o in ("-b", "--background"): background = True if o in ("-f", "--force"): force = True if o in ("-d", "--dry-run"): Util.log("Dry run mode") dryRun = True if o in ("-t", "--debug"): action = "debug" debug_cmd = a if o in ("-v", "--vdi_uuid"): vdi_uuid = a
if not action or not uuid: usage() if action == "debug" and not (debug_cmd and vdi_uuid) or \ action != "debug" and (debug_cmd or vdi_uuid): usage()
if action != "query" and action != "debug": print "All output goes to log"
if action == "gc": gc(None, uuid, background, dryRun) elif action == "gc_force": gc_force(None, uuid, force, dryRun, True) elif action == "clean_cache": cache_cleanup(None, uuid, maxAge) elif action == "abort": abort(uuid) elif action == "query": print "Currently running: %s" % get_state(uuid) elif action == "disable": abort_optional_reenable(uuid) elif action == "debug": debug(uuid, debug_cmd, vdi_uuid)
main() |