/brz/remove-bazaar : revision 3508.1.16

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/fetch.py

Committer: Vincent Ladeuil
Date: 2009-03-17 08:19:26 UTC
mfrom: (4063.3.3 disable-medusa-for-python-2.6)
mto: (4167.1.1 integration)
mto: This revision was merged to the branch mainline in revision 4168.
Revision ID: v.ladeuil+lp@free.fr-20090317081926-yzoqbtmef0lvifes

Merge disable-medusa-for-python-2.6 into pyftpdlib

files added:
bzrlib/clean_tree.py

bzrlib/help_topics/en/debug-flags.txt

bzrlib/tests/blackbox/test_clean_tree.py

bzrlib/tests/per_repository/test_refresh_data.py

bzrlib/tests/per_repository_reference/test_default_stacking.py

bzrlib/tests/test_clean_tree.py

bzrlib/tests/test_debug.py

doc/developers/ec2-windows.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/shelving_changes.txt

tools/check-newsbugs.py

files removed:
bzrlib/help_topics/en/hooks.txt

files modified:
NEWS

bzrlib/__init__.py

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_readdir_pyx.pyx

bzrlib/annotate.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/debug.py

bzrlib/dirstate.py

bzrlib/errors.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en/configuration.txt

bzrlib/hooks.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/merge.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/netrc_credential_store/tests/test_netrc.py

bzrlib/progress.py

bzrlib/push.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/tag.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_config.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_log.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/transform.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/trace.py

bzrlib/tree.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/urlutils.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml8.py

doc/developers/HACKING.txt

doc/developers/index.txt

doc/developers/network-protocol.txt

doc/developers/releasing.txt

doc/developers/revision-properties.txt

doc/developers/testing.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/index.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/shared_repository_layouts.txt

setup.py

tools/doc_generate/autodoc_man.py

Show diffs side-by-side

added added

removed removed

bzrlib/fetch.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

that has merged into it. As the first step of a merge, pull, or

branch operation we copy history from the source into the destination

branch.

The copying is done in a slightly complicated order. We don't want to

add a revision to the store until everything it refers to is also

stored, so that if a revision is present we can totally recreate it.

However, we can't know what files are included in a revision until we

read its inventory. So we query the inventory store of the source for

the ids we need, and then pull those ids and then return to the inventories.

"""

import operator

import bzrlib

import bzrlib.errors as errors

from bzrlib import (

errors,

symbol_versioning,

)

from bzrlib.errors import InstallFailed

from bzrlib.progress import ProgressPhase

from bzrlib.revision import NULL_REVISION

from bzrlib.tsort import topo_sort

from bzrlib.trace import mutter

import bzrlib.ui

from bzrlib.versionedfile import filter_absent, FulltextContentFactory

# TODO: Avoid repeatedly opening weaves so many times.

# XXX: This doesn't handle ghost (not present in branch) revisions at

# all yet. I'm not sure they really should be supported.

# NOTE: This doesn't copy revisions which may be present but not

# merged into the last revision. I'm not sure we want to do that.

# - get a list of revisions that need to be pulled in

# - for each one, pull in that revision file

# and get the inventory, and store the inventory with right

# parents.

# - and get the ancestry, and store that with right parents too

# - and keep a note of all file ids and version seen

# - then go through all files; for each one get the weave,

# and add in all file versions

from bzrlib.versionedfile import FulltextContentFactory

class RepoFetcher(object):

"""Pull revisions and texts from one repository to another.

last_revision

if set, try to limit to the data this revision references.

after running:

count_copied -- number of revisions copied

This should not be used directly, it's essential a object to encapsulate

the logic in InterRepository.fetch().

"""

def __init__(self, to_repository, from_repository, last_revision=None, pb=None,

find_ghosts=True):

def __init__(self, to_repository, from_repository, last_revision=None,

pb=None, find_ghosts=True, fetch_spec=None):

"""Create a repo fetcher.

:param last_revision: If set, try to limit to the data this revision

references.

:param find_ghosts: If True search the entire history for ghosts.

:param _write_group_acquired_callable: Don't use; this parameter only

exists to facilitate a hack done in InterPackRepo.fetch. We would

like to remove this parameter.

:param pb: ProgressBar object to use; deprecated and ignored.

This method will just create one on top of the stack.

"""

# result variables.

self.failed_revisions = []

self.count_copied = 0

if pb is not None:

symbol_versioning.warn(

symbol_versioning.deprecated_in((1, 14, 0))

% "pb parameter to RepoFetcher.__init__")

# and for simplicity it is in fact ignored

if to_repository.has_same_location(from_repository):

# repository.fetch should be taking care of this case.

raise errors.BzrError('RepoFetcher run '

self.sink = to_repository._get_sink()

# must not mutate self._last_revision as its potentially a shared instance

self._last_revision = last_revision

self._fetch_spec = fetch_spec

self.find_ghosts = find_ghosts

if pb is None:

100

self.pb = bzrlib.ui.ui_factory.nested_progress_bar()

101

self.nested_pb = self.pb

102

else:

103

self.pb = pb

104

self.nested_pb = None

105

self.from_repository.lock_read()

mutter("Using fetch logic to copy between %s(%s) and %s(%s)",

self.from_repository, self.from_repository._format,

self.to_repository, self.to_repository._format)

106

try:

107

try:

108

self.__fetch()

109

finally:

110

if self.nested_pb is not None:

111

self.nested_pb.finished()

self.__fetch()

112

finally:

113

self.from_repository.unlock()

114

126

# assert not missing

127

100

self.count_total = 0

128

101

self.file_ids_names = {}

129

pp = ProgressPhase('Transferring', 4, self.pb)

102

pb = bzrlib.ui.ui_factory.nested_progress_bar()

103

pb.show_pct = pb.show_count = False

130

104

try:

131

pp.next_phase()

105

pb.update("Finding revisions", 0, 2)

132

106

search = self._revids_to_fetch()

133

107

if search is None:

134

108

return

135

self._fetch_everything_for_search(search, pp)

109

pb.update("Fetching revisions", 1, 2)

110

self._fetch_everything_for_search(search)

136

111

finally:

137

self.pb.clear()

112

pb.finished()

138

113

139

def _fetch_everything_for_search(self, search, pp):

114

def _fetch_everything_for_search(self, search):

140

115

"""Fetch all data for the given set of revisions."""

141

116

# The first phase is "file". We pass the progress bar for it directly

142

117

# into item_keys_introduced_by, which has more information about how

146

121

# item_keys_introduced_by should have a richer API than it does at the

147

122

# moment, so that it can feed the progress information back to this

148

123

# function?

149

self.pb = bzrlib.ui.ui_factory.nested_progress_bar()

124

if (self.from_repository._format.rich_root_data and

125

not self.to_repository._format.rich_root_data):

126

raise errors.IncompatibleRepositories(

127

self.from_repository, self.to_repository,

128

"different rich-root support")

129

pb = bzrlib.ui.ui_factory.nested_progress_bar()

150

130

try:

131

pb.update("Get stream source")

132

source = self.from_repository._get_source(

133

self.to_repository._format)

134

stream = source.get_stream(search)

151

135

from_format = self.from_repository._format

152

stream = self.get_stream(search, pp)

136

pb.update("Inserting stream")

153

137

resume_tokens, missing_keys = self.sink.insert_stream(

154

138

stream, from_format, [])

155

139

if missing_keys:

156

stream = self.get_stream_for_missing_keys(missing_keys)

140

pb.update("Missing keys")

141

stream = source.get_stream_for_missing_keys(missing_keys)

142

pb.update("Inserting missing keys")

157

143

resume_tokens, missing_keys = self.sink.insert_stream(

158

144

stream, from_format, resume_tokens)

159

145

if missing_keys:

164

150

raise AssertionError(

165

151

"second push failed to commit the fetch %r." % (

166

152

resume_tokens,))

153

pb.update("Finishing stream")

167

154

self.sink.finished()

168

155

finally:

169

if self.pb is not None:

170

self.pb.finished()

171

172

def get_stream(self, search, pp):

173

phase = 'file'

174

revs = search.get_keys()

175

graph = self.from_repository.get_graph()

176

revs = list(graph.iter_topo_order(revs))

177

data_to_fetch = self.from_repository.item_keys_introduced_by(

178

revs, self.pb)

179

text_keys = []

180

for knit_kind, file_id, revisions in data_to_fetch:

181

if knit_kind != phase:

182

phase = knit_kind

183

# Make a new progress bar for this phase

184

self.pb.finished()

185

pp.next_phase()

186

self.pb = bzrlib.ui.ui_factory.nested_progress_bar()

187

if knit_kind == "file":

188

# Accumulate file texts

189

text_keys.extend([(file_id, revision) for revision in

190

revisions])

191

elif knit_kind == "inventory":

192

# Now copy the file texts.

193

from_texts = self.from_repository.texts

194

yield ('texts', from_texts.get_record_stream(

195

text_keys, self.to_repository._format._fetch_order,

196

not self.to_repository._format._fetch_uses_deltas))

197

# Cause an error if a text occurs after we have done the

198

# copy.

199

text_keys = None

200

# Before we process the inventory we generate the root

201

# texts (if necessary) so that the inventories references

202

# will be valid.

203

for _ in self._generate_root_texts(revs):

204

yield _

205

# NB: This currently reopens the inventory weave in source;

206

# using a single stream interface instead would avoid this.

207

self.pb.update("fetch inventory", 0, 1)

208

from_weave = self.from_repository.inventories

209

# we fetch only the referenced inventories because we do not

210

# know for unselected inventories whether all their required

211

# texts are present in the other repository - it could be

212

# corrupt.

213

yield ('inventories', from_weave.get_record_stream(

214

[(rev_id,) for rev_id in revs],

215

self.inventory_fetch_order(),

216

not self.delta_on_metadata()))

217

elif knit_kind == "signatures":

218

# Nothing to do here; this will be taken care of when

219

# _fetch_revision_texts happens.

220

pass

221

elif knit_kind == "revisions":

222

for _ in self._fetch_revision_texts(revs, self.pb):

223

yield _

224

else:

225

raise AssertionError("Unknown knit kind %r" % knit_kind)

226

self.count_copied += len(revs)

227

228

def get_stream_for_missing_keys(self, missing_keys):

229

# missing keys can only occur when we are byte copying and not

230

# translating (because translation means we don't send

231

# unreconstructable deltas ever).

232

keys = {}

233

keys['texts'] = set()

234

keys['revisions'] = set()

235

keys['inventories'] = set()

236

keys['signatures'] = set()

237

for key in missing_keys:

238

keys[key[0]].add(key[1:])

239

if len(keys['revisions']):

240

# If we allowed copying revisions at this point, we could end up

241

# copying a revision without copying its required texts: a

242

# violation of the requirements for repository integrity.

243

raise AssertionError(

244

'cannot copy revisions to fill in missing deltas %s' % (

245

keys['revisions'],))

246

for substream_kind, keys in keys.iteritems():

247

vf = getattr(self.from_repository, substream_kind)

248

# Ask for full texts always so that we don't need more round trips

249

# after this stream.

250

stream = vf.get_record_stream(keys,

251

self.to_repository._format._fetch_order, True)

252

yield substream_kind, stream

156

pb.finished()

253

157

254

158

def _revids_to_fetch(self):

255

159

"""Determines the exact revisions needed from self.from_repository to

257

161

258

162

If no revisions need to be fetched, then this just returns None.

259

163

"""

164

if self._fetch_spec is not None:

165

return self._fetch_spec

260

166

mutter('fetch up to rev {%s}', self._last_revision)

261

167

if self._last_revision is NULL_REVISION:

262

168

# explicit limit of no revisions needed

271

177

except errors.NoSuchRevision, e:

272

178

raise InstallFailed([self._last_revision])

273

179

274

def _fetch_revision_texts(self, revs, pb):

275

# fetch signatures first and then the revision texts

276

# may need to be a InterRevisionStore call here.

277

from_sf = self.from_repository.signatures

278

# A missing signature is just skipped.

279

keys = [(rev_id,) for rev_id in revs]

280

signatures = filter_absent(from_sf.get_record_stream(

281

keys,

282

self.to_repository._format._fetch_order,

283

not self.to_repository._format._fetch_uses_deltas))

284

# If a revision has a delta, this is actually expanded inside the

285

# insert_record_stream code now, which is an alternate fix for

286

# bug #261339

287

from_rf = self.from_repository.revisions

288

revisions = from_rf.get_record_stream(

289

keys,

290

self.to_repository._format._fetch_order,

291

not self.delta_on_metadata())

292

return [('signatures', signatures), ('revisions', revisions)]

293

294

def _generate_root_texts(self, revs):

295

"""This will be called by __fetch between fetching weave texts and

296

fetching the inventory weave.

297

298

Subclasses should override this if they need to generate root texts

299

after fetching weave texts.

300

"""

301

return []

302

303

def inventory_fetch_order(self):

304

return self.to_repository._format._fetch_order

305

306

def delta_on_metadata(self):

307

src_serializer = self.from_repository._format._serializer

308

target_serializer = self.to_repository._format._serializer

309

return (self.to_repository._format._fetch_uses_deltas and

310

src_serializer == target_serializer)

311

312

180

313

181

class Inter1and2Helper(object):

314

182

"""Helper for operations that convert data from model 1 and 2

397

265

rev_id_to_root_id.get(parent, root_id) == root_id)

398

266

yield FulltextContentFactory(key, parent_keys, None, '')

399

267

return [('texts', yield_roots())]

400

401

402

class Model1toKnit2Fetcher(RepoFetcher):

403

"""Fetch from a Model1 repository into a Knit2 repository

404

"""

405

def __init__(self, to_repository, from_repository, last_revision=None,

406

pb=None, find_ghosts=True):

407

self.helper = Inter1and2Helper(from_repository)

408

RepoFetcher.__init__(self, to_repository, from_repository,

409

last_revision, pb, find_ghosts)

410

411

def _generate_root_texts(self, revs):

412

return self.helper.generate_root_texts(revs)

413

414

def inventory_fetch_order(self):

415

return 'topological'

416

417

Knit1to2Fetcher = Model1toKnit2Fetcher

Older »