/brz/remove-bazaar : revision 6631

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to breezy/plugins/fastimport/cache_manager.py

Committer: Jelmer Vernooĳ
Date: 2017-05-24 00:57:47 UTC
mfrom: (6628.1.6 integration-fastimport)
Revision ID: jelmer@jelmer.uk-20170524005747-lnvffg3fcte9jmjw

Merge bundling of fastimport plugin.

files added:
breezy/plugins/fastimport

breezy/plugins/fastimport/NEWS

breezy/plugins/fastimport/__init__.py

breezy/plugins/fastimport/branch_mapper.py

breezy/plugins/fastimport/branch_updater.py

breezy/plugins/fastimport/bzr_commit_handler.py

breezy/plugins/fastimport/cache_manager.py

breezy/plugins/fastimport/cmds.py

breezy/plugins/fastimport/doc

breezy/plugins/fastimport/doc/notes.txt

breezy/plugins/fastimport/exporter.py

breezy/plugins/fastimport/helpers.py

breezy/plugins/fastimport/idmapfile.py

breezy/plugins/fastimport/marks_file.py

breezy/plugins/fastimport/processors

breezy/plugins/fastimport/processors/__init__.py

breezy/plugins/fastimport/processors/generic_processor.py

breezy/plugins/fastimport/processors/info_processor.py

breezy/plugins/fastimport/reftracker.py

breezy/plugins/fastimport/revision_store.py

breezy/plugins/fastimport/tests

breezy/plugins/fastimport/tests/__init__.py

breezy/plugins/fastimport/tests/test_branch_mapper.py

breezy/plugins/fastimport/tests/test_commands.py

breezy/plugins/fastimport/tests/test_exporter.py

breezy/plugins/fastimport/tests/test_generic_processor.py

breezy/plugins/fastimport/tests/test_head_tracking.py

breezy/plugins/fastimport/tests/test_revision_store.py

breezy/plugins/fastimport/user_mapper.py

files modified:
doc/en/release-notes/brz-3.0.txt

Show diffs side-by-side

added added

removed removed

breezy/plugins/fastimport/cache_manager.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""A manager of caches."""

from __future__ import absolute_import

import atexit

import os

import shutil

import tempfile

import weakref

from ... import lru_cache, trace

from . import (

branch_mapper,

)

from .reftracker import (

RefTracker,

)

from .helpers import (

single_plural,

)

class _Cleanup(object):

"""This class makes sure we clean up when CacheManager goes away.

We use a helper class to ensure that we are never in a refcycle.

"""

def __init__(self, disk_blobs):

self.disk_blobs = disk_blobs

self.tempdir = None

self.small_blobs = None

def __del__(self):

self.finalize()

def finalize(self):

if self.disk_blobs is not None:

for info in self.disk_blobs.itervalues():

if info[-1] is not None:

os.unlink(info[-1])

self.disk_blobs = None

if self.small_blobs is not None:

self.small_blobs.close()

self.small_blobs = None

if self.tempdir is not None:

shutil.rmtree(self.tempdir)

class CacheManager(object):

_small_blob_threshold = 25*1024

_sticky_cache_size = 300*1024*1024

_sticky_flushed_size = 100*1024*1024

def __init__(self, info=None, verbose=False, inventory_cache_size=10):

"""Create a manager of caches.

:param info: a ConfigObj holding the output from

the --info processor, or None if no hints are available

"""

self.verbose = verbose

# dataref -> data. datref is either :mark or the sha-1.

# Sticky blobs are referenced more than once, and are saved until their

# refcount goes to 0

self._blobs = {}

self._sticky_blobs = {}

self._sticky_memory_bytes = 0

# if we overflow our memory cache, then we will dump large blobs to

# disk in this directory

self._tempdir = None

# id => (offset, n_bytes, fname)

# if fname is None, then the content is stored in the small file

self._disk_blobs = {}

self._cleanup = _Cleanup(self._disk_blobs)

# revision-id -> Inventory cache

# these are large and we probably don't need too many as

# most parents are recent in history

self.inventories = lru_cache.LRUCache(inventory_cache_size)

# import commmit-ids -> revision-id lookup table

# we need to keep all of these but they are small

100

self.marks = {}

101

102

# (path, branch_ref) -> file-ids - as generated.

103

# (Use store_file_id/fetch_fileid methods rather than direct access.)

104

105

# Work out the blobs to make sticky - None means all

106

self._blob_ref_counts = {}

107

if info is not None:

108

try:

109

blobs_by_counts = info['Blob reference counts']

110

# The parser hands values back as lists, already parsed

111

for count, blob_list in blobs_by_counts.items():

112

n = int(count)

113

for b in blob_list:

114

self._blob_ref_counts[b] = n

115

except KeyError:

116

# info not in file - possible when no blobs used

117

pass

118

119

# BranchMapper has no state (for now?), but we keep it around rather

120

# than reinstantiate on every usage

121

self.branch_mapper = branch_mapper.BranchMapper()

122

123

self.reftracker = RefTracker()

124

125

def add_mark(self, mark, commit_id):

126

assert mark[0] != ':'

127

self.marks[mark] = commit_id

128

129

def lookup_committish(self, committish):

130

"""Resolve a 'committish' to a revision id.

131

132

:param committish: A "committish" string

133

:return: Bazaar revision id

134

"""

135

assert committish[0] == ':'

136

return self.marks[committish.lstrip(':')]

137

138

def dump_stats(self, note=trace.note):

139

"""Dump some statistics about what we cached."""

140

# TODO: add in inventory stastistics

141

note("Cache statistics:")

142

self._show_stats_for(self._sticky_blobs, "sticky blobs", note=note)

143

self._show_stats_for(self.marks, "revision-ids", note=note)

144

# These aren't interesting so omit from the output, at least for now

145

#self._show_stats_for(self._blobs, "other blobs", note=note)

146

#self.reftracker.dump_stats(note=note)

147

148

def _show_stats_for(self, dict, label, note=trace.note, tuple_key=False):

149

"""Dump statistics about a given dictionary.

150

151

By the key and value need to support len().

152

"""

153

count = len(dict)

154

if tuple_key:

155

size = sum(map(len, (''.join(k) for k in dict.keys())))

156

else:

157

size = sum(map(len, dict.keys()))

158

size += sum(map(len, dict.values()))

159

size = size * 1.0 / 1024

160

unit = 'K'

161

if size > 1024:

162

size = size / 1024

163

unit = 'M'

164

if size > 1024:

165

size = size / 1024

166

unit = 'G'

167

note(" %-12s: %8.1f %s (%d %s)" % (label, size, unit, count,

168

single_plural(count, "item", "items")))

169

170

def clear_all(self):

171

"""Free up any memory used by the caches."""

172

self._blobs.clear()

173

self._sticky_blobs.clear()

174

self.marks.clear()

175

self.reftracker.clear()

176

self.inventories.clear()

177

178

def _flush_blobs_to_disk(self):

179

blobs = self._sticky_blobs.keys()

180

sticky_blobs = self._sticky_blobs

181

total_blobs = len(sticky_blobs)

182

blobs.sort(key=lambda k:len(sticky_blobs[k]))

183

if self._tempdir is None:

184

tempdir = tempfile.mkdtemp(prefix='fastimport_blobs-')

185

self._tempdir = tempdir

186

self._cleanup.tempdir = self._tempdir

187

self._cleanup.small_blobs = tempfile.TemporaryFile(

188

prefix='small-blobs-', dir=self._tempdir)

189

small_blob_ref = weakref.ref(self._cleanup.small_blobs)

190

# Even though we add it to _Cleanup it seems that the object can be

191

# destroyed 'too late' for cleanup to actually occur. Probably a

192

# combination of bzr's "die directly, don't clean up" and how

193

# exceptions close the running stack.

194

def exit_cleanup():

195

small_blob = small_blob_ref()

196

if small_blob is not None:

197

small_blob.close()

198

shutil.rmtree(tempdir, ignore_errors=True)

199

atexit.register(exit_cleanup)

200

count = 0

201

bytes = 0

202

n_small_bytes = 0

203

while self._sticky_memory_bytes > self._sticky_flushed_size:

204

id = blobs.pop()

205

blob = self._sticky_blobs.pop(id)

206

n_bytes = len(blob)

207

self._sticky_memory_bytes -= n_bytes

208

if n_bytes < self._small_blob_threshold:

209

f = self._cleanup.small_blobs

210

f.seek(0, os.SEEK_END)

211

self._disk_blobs[id] = (f.tell(), n_bytes, None)

212

f.write(blob)

213

n_small_bytes += n_bytes

214

else:

215

fd, name = tempfile.mkstemp(prefix='blob-', dir=self._tempdir)

216

os.write(fd, blob)

217

os.close(fd)

218

self._disk_blobs[id] = (0, n_bytes, name)

219

bytes += n_bytes

220

del blob

221

count += 1

222

trace.note('flushed %d/%d blobs w/ %.1fMB (%.1fMB small) to disk'

223

% (count, total_blobs, bytes / 1024. / 1024,

224

n_small_bytes / 1024. / 1024))

225

226

def store_blob(self, id, data):

227

"""Store a blob of data."""

228

# Note: If we're not reference counting, everything has to be sticky

229

if not self._blob_ref_counts or id in self._blob_ref_counts:

230

self._sticky_blobs[id] = data

231

self._sticky_memory_bytes += len(data)

232

if self._sticky_memory_bytes > self._sticky_cache_size:

233

self._flush_blobs_to_disk()

234

elif data == '':

235

# Empty data is always sticky

236

self._sticky_blobs[id] = data

237

else:

238

self._blobs[id] = data

239

240

def _decref(self, id, cache, fn):

241

if not self._blob_ref_counts:

242

return False

243

count = self._blob_ref_counts.get(id, None)

244

if count is not None:

245

count -= 1

246

if count <= 0:

247

del cache[id]

248

if fn is not None:

249

os.unlink(fn)

250

del self._blob_ref_counts[id]

251

return True

252

else:

253

self._blob_ref_counts[id] = count

254

return False

255

256

def fetch_blob(self, id):

257

"""Fetch a blob of data."""

258

if id in self._blobs:

259

return self._blobs.pop(id)

260

if id in self._disk_blobs:

261

(offset, n_bytes, fn) = self._disk_blobs[id]

262

if fn is None:

263

f = self._cleanup.small_blobs

264

f.seek(offset)

265

content = f.read(n_bytes)

266

else:

267

fp = open(fn, 'rb')

268

try:

269

content = fp.read()

270

finally:

271

fp.close()

272

self._decref(id, self._disk_blobs, fn)

273

return content

274

content = self._sticky_blobs[id]

275

if self._decref(id, self._sticky_blobs, None):

276

self._sticky_memory_bytes -= len(content)

277

return content

278

279

280

def invert_dictset(d):

281

"""Invert a dictionary with keys matching a set of values, turned into lists."""

282

# Based on recipe from ASPN

283

result = {}

284

for k, c in d.iteritems():

285

for v in c:

286

keys = result.setdefault(v, [])

287

keys.append(k)

288

return result

289

290

Older »