/brz/remove-bazaar : revision 0.23.26

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to groupcompress.py

Committer: John Arbash Meinel
Date: 2009-03-02 19:36:29 UTC
mto: (0.17.31 trunk)
mto: This revision was merged to the branch mainline in revision 4280.
Revision ID: john@arbash-meinel.com-20090302193629-51hqsvh1rhh71gku

We now start to make use of the ability to extend the delta index
with new sources. Next step is to understand the delta encoding, so as to
avoid linking up with lines in the deltas.

files modified:
_groupcompress_pyx.pyx

diff-delta.c

groupcompress.py

tests/test__groupcompress_pyx.py

Show diffs side-by-side

added added

removed removed

groupcompress.py

)

_NO_LABELS = False

_FAST = True

def parse(bytes):

if _NO_LABELS:

132

131

self.endpoint = 0

133

132

self.input_bytes = 0

134

133

self.labels_deltas = {}

135

self._last_delta_index = None

134

self._delta_index = _groupcompress_pyx.DeltaIndex()

136

135

137

136

def compress(self, key, chunks, expected_sha, soft=False):

138

137

"""Compress lines with label key.

168

167

new_chunks = []

169

168

else:

170

169

new_chunks = ['label: %s\nsha1: %s\n' % (label, sha1)]

171

# PROF: 5s to this constant extra joining

172

if self._last_delta_index is not None:

173

delta_index = self._last_delta_index

174

else:

175

source_text = ''.join(self.lines)

176

# XXX: We have a few possibilities here. We could consider a few

177

# different 'previous' windows, such as only the initial text,

178

# we could do something with the 'just inserted' text we could

179

# try a delta against whatever the last delta we computed,

180

# (the idea being we just computed the delta_index, so we

181

# re-use it here, and see if that is good enough, etc)

182

# PROF: 15s to building the delta index

183

delta_index = _groupcompress_pyx.make_delta_index(source_text)

184

# PROF: only 0.67s to actually create a delta

185

delta = delta_index.make_delta(target_text)

170

delta = self._delta_index.make_delta(target_text)

186

171

if (delta is None

187

172

or len(delta) > len(target_text) / 2):

188

173

# We can't delta (perhaps source_text is empty)

189

174

# so mark this as an insert

190

175

if _NO_LABELS:

191

176

new_chunks = ['f']

192

new_chunks.extend(chunks)

193

177

else:

194

178

new_chunks.insert(0, 'fulltext\n')

195

179

new_chunks.append('len: %s\n' % (input_len,))

196

new_chunks.extend(chunks)

197

self._last_delta_index = None

180

unadded_bytes = sum(map(len, new_chunks))

181

self._delta_index.add_source(target_text, unadded_bytes)

182

new_chunks.append(target_text)

198

183

else:

199

184

if _NO_LABELS:

200

new_chunks = ['d', delta]

185

new_chunks = ['d']

201

186

else:

202

187

new_chunks.insert(0, 'delta\n')

203

188

new_chunks.append('len: %s\n' % (len(delta),))

204

new_chunks.append(delta)

205

if _FAST:

206

self._last_delta_index = delta_index

189

unadded_bytes = sum(map(len, new_chunks))

190

new_chunks.append(delta)

207

191

delta_start = (self.endpoint, len(self.lines))

208

192

self.output_chunks(new_chunks)

209

193

self.input_bytes += input_len

Older »