/brz/remove-bazaar : revision 4371.3.18

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/graph.py

Committer: John Arbash Meinel
Date: 2009-06-10 18:58:02 UTC
mto: (4371.4.5 vila-better-heads)
mto: This revision was merged to the branch mainline in revision 4449.
Revision ID: john@arbash-meinel.com-20090610185802-wsybzjfil447yhy2

Change VF.annotate to use the new KnownGraph code.

This shows a significant savings in the time for 'annotate NEWS', of about 5s/20s
for knit formats, and 45s => 20s for GC formats.

Also, factor the code into a helper, so that we can prepare for writing
a pyrex version.

files added:
bzrlib/_known_graph_py.py

bzrlib/tests/test__known_graph.py

files modified:
bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/knit.py

bzrlib/tests/__init__.py

bzrlib/tests/test_graph.py

Show diffs side-by-side

added added

removed removed

bzrlib/graph.py

# along with this program; if not, write to the Free Software

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

import heapq

import time

from bzrlib import (

def get_parent_map(self, keys):

"""See StackedParentsProvider.get_parent_map"""

ancestry = self.ancestry

_counters[1] += len(keys)

_counters[2] += 1

return dict((k, ancestry[k]) for k in keys if k in ancestry)

@deprecated_function(deprecated_in((1, 16, 0)))

1179

1176

self._heads[frozenset(keys)] = frozenset(heads)

1180

1177

1181

1178

1182

class _KnownGraphNode(object):

1183

"""Represents a single object in the known graph."""

1184

1185

__slots__ = ('key', 'parent_keys', 'child_keys', 'linear_dominator',

1186

'dominator_distance', 'gdfo', 'ancestor_of')

1187

1188

def __init__(self, key, parent_keys):

1189

self.key = key

1190

self.parent_keys = parent_keys

1191

self.child_keys = []

1192

self.linear_dominator = None

1193

self.dominator_distance = 0

1194

# Greatest distance from origin

1195

self.gdfo = None

1196

# This will become a tuple of known heads that have this node as an

1197

# ancestor

1198

self.ancestor_of = None

1199

1200

def __repr__(self):

1201

return '%s(%s gdfo:%s par:%s child:%s dom:%s %s)' % (

1202

self.__class__.__name__, self.key, self.gdfo,

1203

self.parent_keys, self.child_keys,

1204

self.linear_dominator, self.dominator_distance)

1205

1206

1207

_counters = [0, 0, 0, 0, 0, 0]

1208

class KnownGraph(object):

1209

"""This is a class which assumes we already know the full graph."""

1210

1211

def __init__(self, parent_map):

1212

"""Create a new KnownGraph instance.

1213

1214

:param parent_map: A dictionary mapping key => parent_keys

1215

"""

1216

self._nodes = {}

1217

self._initialize_nodes(parent_map)

1218

1219

def _initialize_nodes(self, parent_map):

1220

"""Populate self._nodes.

1221

1222

After this has finished, self._nodes will have an entry for every entry

1223

in parent_map. Ghosts will have a parent_keys = None, all nodes found

1224

will also have .child_keys populated with all known child_keys.

1225

"""

1226

nodes = self._nodes

1227

for key, parent_keys in parent_map.iteritems():

1228

if key in nodes:

1229

node = nodes[key]

1230

assert node.parent_keys is None

1231

node.parent_keys = parent_keys

1232

else:

1233

node = _KnownGraphNode(key, parent_keys)

1234

nodes[key] = node

1235

for parent_key in parent_keys:

1236

try:

1237

parent_node = nodes[parent_key]

1238

except KeyError:

1239

parent_node = _KnownGraphNode(parent_key, None)

1240

nodes[parent_key] = parent_node

1241

parent_node.child_keys.append(key)

1242

self._find_linear_dominators()

1243

self._find_gdfo()

1244

1245

def _find_linear_dominators(self):

1246

"""For each node in the set, find any linear dominators.

1247

1248

For any given node, the 'linear dominator' is an ancestor, such that

1249

all parents between this node and that one have a single parent, and a

1250

single child. So if A->B->C->D then B,C,D all have a linear dominator

1251

of A. Because there are no interesting siblings, we can quickly skip to

1252

the nearest dominator when doing comparisons.

1253

"""

1254

def check_node(node):

1255

if node.parent_keys is None or len(node.parent_keys) != 1:

1256

# This node is either a ghost, a tail, or has multiple parents

1257

# It its own dominator

1258

node.linear_dominator = node.key

1259

node.dominator_distance = 0

1260

return None

1261

parent_node = self._nodes[node.parent_keys[0]]

1262

if len(parent_node.child_keys) > 1:

1263

# The parent has multiple children, so *this* node is the

1264

# dominator

1265

node.linear_dominator = node.key

1266

node.dominator_distance = 0

1267

return None

1268

# The parent is already filled in, so add and continue

1269

if parent_node.linear_dominator is not None:

1270

node.linear_dominator = parent_node.linear_dominator

1271

node.dominator_distance = parent_node.dominator_distance + 1

1272

return None

1273

# We don't know this node, or its parent node, so start walking to

1274

# next

1275

return parent_node

1276

1277

for node in self._nodes.itervalues():

1278

# The parent is not filled in, so walk until we get somewhere

1279

if node.linear_dominator is not None: #already done

1280

continue

1281

next_node = check_node(node)

1282

if next_node is None:

1283

# Nothing more needs to be done

1284

continue

1285

stack = []

1286

while next_node is not None:

1287

stack.append(node)

1288

node = next_node

1289

next_node = check_node(node)

1290

# The stack now contains the linear chain, and 'node' should have

1291

# been labeled

1292

assert node.linear_dominator is not None

1293

dominator = node.linear_dominator

1294

while stack:

1295

next_node = stack.pop()

1296

next_node.linear_dominator = dominator

1297

next_node.dominator_distance = node.dominator_distance + 1

1298

node = next_node

1299

1300

def _find_gdfo(self):

1301

# TODO: Consider moving the tails search into the first-pass over the

1302

# data, inside _find_linear_dominators

1303

def find_tails():

1304

return [node for node in self._nodes.itervalues()

1305

if not node.parent_keys]

1306

tails = find_tails()

1307

todo = []

1308

for node in tails:

1309

node.gdfo = 1

1310

heapq.heappush(todo, (1, node))

1311

processed = 0

1312

skipped = 0

1313

max_gdfo = len(self._nodes) + 1

1314

while todo:

1315

gdfo, next = heapq.heappop(todo)

1316

processed += 1

1317

if next.gdfo is not None and gdfo < next.gdfo:

1318

# This node was reached from a longer path, we assume it was

1319

# enqued correctly with the longer gdfo, so don't continue

1320

# processing now

1321

assert gdfo < next.gdfo

1322

skipped += 1

1323

continue

1324

next_gdfo = gdfo + 1

1325

assert next_gdfo <= max_gdfo

1326

for child_key in next.child_keys:

1327

child_node = self._nodes[child_key]

1328

if child_node.gdfo is None or child_node.gdfo < next_gdfo:

1329

# Only enque children when all of their parents have been

1330

# resolved

1331

for parent_key in child_node.parent_keys:

1332

# We know that 'this' parent is counted

1333

if parent_key != next.key:

1334

parent_node = self._nodes[parent_key]

1335

if parent_node.gdfo is None:

1336

break

1337

else:

1338

child_node.gdfo = next_gdfo

1339

heapq.heappush(todo, (next_gdfo, child_node))

1340

1341

def heads(self, keys):

1342

"""Return the heads from amongst keys.

1343

1344

This is done by searching the ancestries of each key. Any key that is

1345

reachable from another key is not returned; all the others are.

1346

1347

This operation scales with the relative depth between any two keys. If

1348

any two keys are completely disconnected all ancestry of both sides

1349

will be retrieved.

1350

1351

:param keys: An iterable of keys.

1352

:return: A set of the heads. Note that as a set there is no ordering

1353

information. Callers will need to filter their input to create

1354

order if they need it.

1355

"""

1356

candidate_nodes = dict((key, self._nodes[key]) for key in keys)

1357

_counters[0] += len(candidate_nodes)

1358

if revision.NULL_REVISION in candidate_nodes:

1359

# NULL_REVISION is only a head if it is the only entry

1360

candidate_nodes.pop(revision.NULL_REVISION)

1361

if not candidate_nodes:

1362

return set([revision.NULL_REVISION])

1363

if len(candidate_nodes) < 2:

1364

return set(candidate_nodes)

1365

dominator = None

1366

# TODO: We could optimize for the len(candidate_nodes) > 2 by checking

1367

# for *any* pair-wise matching, and then eliminating one of the

1368

# nodes trivially. However, the fairly common case is just 2

1369

# keys, so we'll focus on that, first

1370

for node in candidate_nodes.itervalues():

1371

if dominator is None:

1372

dominator = node.linear_dominator

1373

elif dominator != node.linear_dominator:

1374

break

1375

else:

1376

# All of these nodes have the same linear_dominator, which means

1377

# they are in a line, the head is just the one with the highest

1378

# distance

1379

def get_distance(key):

1380

return self._nodes[key].dominator_distance

1381

def get_linear_head():

1382

return max(candidate_nodes, key=get_distance)

1383

return set([get_linear_head()])

1384

return self._heads_from_candidate_nodes(candidate_nodes)

1385

1386

def _heads_from_candidate_nodes(self, candidate_nodes):

1387

queue = []

1388

to_cleanup = []

1389

to_cleanup_append = to_cleanup.append

1390

for node in candidate_nodes.itervalues():

1391

assert node.ancestor_of is None

1392

node.ancestor_of = (node.key,)

1393

queue.append((-node.gdfo, node))

1394

to_cleanup_append(node)

1395

heapq.heapify(queue)

1396

# These are nodes that we determined are 'common' that we are no longer

1397

# walking

1398

# Now we walk nodes until all nodes that are being walked are 'common'

1399

num_candidates = len(candidate_nodes)

1400

counters = _counters

1401

nodes = self._nodes

1402

heappop = heapq.heappop

1403

heappush = heapq.heappush

1404

while queue and len(candidate_nodes) > 1:

1405

# counters[0] += 1

1406

_, next = heappop(queue)

1407

# assert next.ancestor_of is not None

1408

next_ancestor_of = next.ancestor_of

1409

if len(next_ancestor_of) == num_candidates:

1410

# This node is now considered 'common'

1411

# Make sure all parent nodes are marked as such

1412

for parent_key in node.parent_keys:

1413

_counters[0] += 1

1414

parent_node = nodes[parent_key]

1415

if parent_node.ancestor_of is not None:

1416

parent_node.ancestor_of = next_ancestor_of

1417

if node.linear_dominator != node.key:

1418

_counters[0] += 1

1419

parent_node = nodes[node.linear_dominator]

1420

if parent_node.ancestor_of is not None:

1421

parent_node.ancestor_of = next_ancestor_of

1422

continue

1423

if next.parent_keys is None:

1424

# This is a ghost

1425

continue

1426

# Now project the current nodes ancestor list to the parent nodes,

1427

# and queue them up to be walked

1428

# Note: using linear_dominator speeds things up quite a bit

1429

# enough that we actually start to be slightly faster

1430

# than the default heads() implementation

1431

if next.linear_dominator != next.key:

1432

# We are at the tip of a long linear region

1433

# We know that there is nothing between here and the tail

1434

# that is interesting, so skip to the end

1435

# counters[5] += 1

1436

parent_keys = [next.linear_dominator]

1437

else:

1438

parent_keys = next.parent_keys

1439

for parent_key in parent_keys:

1440

# counters[1] += 1

1441

if parent_key in candidate_nodes:

1442

candidate_nodes.pop(parent_key)

1443

if len(candidate_nodes) <= 1:

1444

break

1445

_counters[0] += 1

1446

parent_node = nodes[parent_key]

1447

ancestor_of = parent_node.ancestor_of

1448

if ancestor_of is None:

1449

# counters[2] += 1

1450

# This node hasn't been walked yet

1451

parent_node.ancestor_of = next_ancestor_of

1452

# Enqueue this node

1453

heappush(queue, (-parent_node.gdfo, parent_node))

1454

to_cleanup_append(parent_node)

1455

elif ancestor_of != next_ancestor_of:

1456

# counters[3] += 1

1457

# Combine to get the full set of parents

1458

all_ancestors = set(ancestor_of)

1459

all_ancestors.update(next_ancestor_of)

1460

parent_node.ancestor_of = tuple(sorted(all_ancestors))

1461

def cleanup():

1462

for node in to_cleanup:

1463

# counters[4] += 1

1464

node.ancestor_of = None

1465

cleanup()

1466

return set(candidate_nodes)

1467

1468

def get_parent_map(self, keys):

1469

# Thunk to match the Graph._parents_provider api.

1470

nodes = [self._nodes[key] for key in keys]

1471

return dict((node.key, node.parent_keys)

1472

for node in nodes if node.parent_keys is not None)

1473

1474

1475

1476

1179

class _BreadthFirstSearcher(object):

1477

1180

"""Parallel search breadth-first the ancestry of revisions.

1478

1181

1952

1655

removed.add(node)

1953

1656

1954

1657

return result

1658

1659

1660

try:

1661

from bzrlib._known_graph_pyx import KnownGraph

1662

except ImportError:

1663

from bzrlib._known_graph_py import KnownGraph

Older »