/brz/remove-bazaar : contents of plugins/changeset/read

: (revision 972)

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

#!/usr/bin/env python
"""\
Read in a changeset output, and process it into a Changeset object.
"""

import bzrlib, bzrlib.changeset
import common

class BadChangeset(Exception): pass
class MalformedHeader(BadChangeset): pass
class MalformedPatches(BadChangeset): pass
class MalformedFooter(BadChangeset): pass

def _unescape(name):
    """Now we want to find the filename effected.
    Unfortunately the filename is written out as
    repr(filename), which means that it surrounds
    the name with quotes which may be single or double
    (single is preferred unless there is a single quote in
    the filename). And some characters will be escaped.

    TODO:   There has to be some pythonic way of undo-ing the
            representation of a string rather than using eval.
    """
    delimiter = name[0]
    if name[-1] != delimiter:
        raise BadChangeset('Could not properly parse the'
                ' filename: %r' % name)
    # We need to handle escaped hexadecimals too.
    return name[1:-1].replace('\"', '"').replace("\'", "'")

class ChangesetInfo(object):
    """This is the intermediate class that gets filled out as
    the file is read.
    """
    def __init__(self):
        self.committer = None
        self.date = None
        self.message = None
        self.revno = None
        self.revision = None
        self.revision_sha1 = None
        self.precursor = None
        self.precursor_sha1 = None
        self.precursor_revno = None

        self.timestamp = None
        self.timezone = None

        self.tree_root_id = None
        self.file_ids = None
        self.old_file_ids = None

        self.actions = [] #this is the list of things that happened
        self.id2path = {} # A mapping from file id to path name
        self.path2id = {} # The reverse mapping
        self.id2parent = {} # A mapping from a given id to it's parent id

        self.old_id2path = {}
        self.old_path2id = {}
        self.old_id2parent = {}

    def __str__(self):
        import pprint
        return pprint.pformat(self.__dict__)

    def create_maps(self):
        """Go through the individual id sections, and generate the 
        id2path and path2id maps.
        """
        # Rather than use an empty path, the changeset code seems 
        # to like to use "./." for the tree root.
        self.id2path[self.tree_root_id] = './.'
        self.path2id['./.'] = self.tree_root_id
        self.id2parent[self.tree_root_id] = bzrlib.changeset.NULL_ID
        self.old_id2path = self.id2path.copy()
        self.old_path2id = self.path2id.copy()
        self.old_id2parent = self.id2parent.copy()

        if self.file_ids:
            for info in self.file_ids:
                path, f_id, parent_id = info.split('\t')
                self.id2path[f_id] = path
                self.path2id[path] = f_id
                self.id2parent[f_id] = parent_id
        if self.old_file_ids:
            for info in self.old_file_ids:
                path, f_id, parent_id = info.split('\t')
                self.old_id2path[f_id] = path
                self.old_path2id[path] = f_id
                self.old_id2parent[f_id] = parent_id

    def get_changeset(self):
        """Create a changeset from the data contained within."""
        from bzrlib.changeset import Changeset, ChangesetEntry, \
            PatchApply, ReplaceContents
        cset = Changeset()
        
        entry = ChangesetEntry(self.tree_root_id, 
                bzrlib.changeset.NULL_ID, './.')
        cset.add_entry(entry)
        for info, lines in self.actions:
            parts = info.split(' ')
            action = parts[0]
            kind = parts[1]
            extra = ' '.join(parts[2:])
            if action == 'renamed':
                old_path, new_path = extra.split(' => ')
                old_path = _unescape(old_path)
                new_path = _unescape(new_path)

                new_id = self.path2id[new_path]
                old_id = self.old_path2id[old_path]
                assert old_id == new_id

                new_parent = self.id2parent[new_id]
                old_parent = self.old_id2parent[old_id]

                entry = ChangesetEntry(old_id, old_parent, old_path)
                entry.new_path = new_path
                entry.new_parent = new_parent
                if lines:
                    entry.contents_change = PatchApply(''.join(lines))
            elif action == 'removed':
                old_path = _unescape(extra)
                old_id = self.old_path2id[old_path]
                old_parent = self.old_id2parent[old_id]
                entry = ChangesetEntry(old_id, old_parent, old_path)
                entry.new_path = None
                entry.new_parent = None
                if lines:
                    # Technically a removed should be a ReplaceContents()
                    # Where you need to have the old contents
                    # But at most we have a remove style patch.
                    #entry.contents_change = ReplaceContents()
                    pass
            elif action == 'added':
                new_path = _unescape(extra)
                new_id = self.path2id[new_path]
                new_parent = self.id2parent[new_id]
                entry = ChangesetEntry(new_id, new_parent, new_path)
                entry.path = None
                entry.parent = None
                if lines:
                    # Technically an added should be a ReplaceContents()
                    # Where you need to have the old contents
                    # But at most we have an add style patch.
                    #entry.contents_change = ReplaceContents()
                    entry.contents_change = PatchApply(''.join(lines))
            elif action == 'modified':
                new_path = _unescape(extra)
                new_id = self.path2id[new_path]
                new_parent = self.id2parent[new_id]
                entry = ChangesetEntry(new_id, new_parent, new_path)
                entry.path = None
                entry.parent = None
                if lines:
                    # Technically an added should be a ReplaceContents()
                    # Where you need to have the old contents
                    # But at most we have an add style patch.
                    #entry.contents_change = ReplaceContents()
                    entry.contents_change = PatchApply(''.join(lines))
            else:
                raise BadChangeset('Unrecognized action: %r' % action)
            cset.add_entry(entry)
        return cset

class ChangesetReader(object):
    """This class reads in a changeset from a file, and returns
    a Changeset object, which can then be applied against a tree.
    """
    def __init__(self, from_file):
        """Read in the changeset from the file.

        :param from_file: A file-like object (must have iterator support).
        """
        object.__init__(self)
        self.from_file = from_file
        
        self.info = ChangesetInfo()
        # We put the actual inventory ids in the footer, so that the patch
        # is easier to read for humans.
        # Unfortunately, that means we need to read everything before we
        # can create a proper changeset.
        self._read_header()
        next_line = self._read_patches()
        if next_line is not None:
            self._read_footer(next_line)

    def get_info(self):
        """Create the actual changeset object.
        """
        self.info.create_maps()
        return self.info

    def _read_header(self):
        """Read the bzr header"""
        header = common.get_header()
        for head_line, line in zip(header, self.from_file):
            if (line[:2] != '# '
                    or line[-1] != '\n'
                    or line[2:-1] != head_line):
                raise MalformedHeader('Did not read the opening'
                    ' header information.')

        for line in self.from_file:
            if self._handle_info_line(line) is not None:
                break

    def _handle_info_line(self, line, in_footer=False):
        """Handle reading a single line.

        This may call itself, in the case that we read_multi,
        and then had a dangling line on the end.
        """
        # The bzr header is terminated with a blank line
        # which does not start with #
        next_line = None
        if line[:1] == '\n':
            return 'break'
        if line[:2] != '# ':
            raise MalformedHeader('Opening bzr header did not start with #')

        line = line[2:-1] # Remove the '# '
        if not line:
            return # Ignore blank lines

        if in_footer and line in ('BEGIN BZR FOOTER', 'END BZR FOOTER'):
            return

        loc = line.find(': ')
        if loc != -1:
            key = line[:loc]
            value = line[loc+2:]
            if not value:
                value, next_line = self._read_many()
        else:
            if line[-1:] == ':':
                key = line[:-1]
                value, next_line = self._read_many()
            else:
                raise MalformedHeader('While looking for key: value pairs,'
                        ' did not find the colon %r' % (line))

        key = key.replace(' ', '_')
        if hasattr(self.info, key):
            if getattr(self.info, key) is None:
                setattr(self.info, key, value)
            else:
                raise MalformedHeader('Duplicated Key: %s' % key)
        else:
            # What do we do with a key we don't recognize
            raise MalformedHeader('Unknown Key: %s' % key)
        
        if next_line:
            self._handle_info_line(next_line, in_footer=in_footer)

    def _read_many(self):
        """If a line ends with no entry, that means that it should be
        followed with multiple lines of values.

        This detects the end of the list, because it will be a line that
        does not start with '#    '. Because it has to read that extra
        line, it returns the tuple: (values, next_line)
        """
        values = []
        for line in self.from_file:
            if line[:5] != '#    ':
                return values, line
            values.append(line[5:-1])
        return values, None

    def _read_one_patch(self, first_line=None):
        """Read in one patch, return the complete patch, along with
        the next line.

        :return: action, lines, next_line, do_continue
        """
        first = True
        action = None

        def parse_firstline(line):
            if line[:1] == '#':
                return None
            if line[:3] != '***':
                raise MalformedPatches('The first line of all patches'
                    ' should be a bzr meta line "***"')
            return line[4:-1]

        if first_line is not None:
            action = parse_firstline(first_line)
            first = False
            if action is None:
                return None, [], first_line, False

        lines = []
        for line in self.from_file:
            if first:
                action = parse_firstline(line)
                first = False
                if action is None:
                    return None, [], line, False
            else:
                if line[:3] == '***':
                    return action, lines, line, True
                elif line[:1] == '#':
                    return action, lines, line, False
                lines.append(line)
        return action, lines, None, False
            
    def _read_patches(self):
        next_line = None
        do_continue = True
        while do_continue:
            action, lines, next_line, do_continue = \
                    self._read_one_patch(next_line)
            if action is not None:
                self.info.actions.append((action, lines))
        return next_line

    def _read_footer(self, first_line=None):
        """Read the rest of the meta information.

        :param first_line:  The previous step iterates past what it
                            can handle. That extra line is given here.
        """
        if first_line is not None:
            if self._handle_info_line(first_line, in_footer=True) is not None:
                return
        for line in self.from_file:
            if self._handle_info_line(line, in_footer=True) is not None:
                break


def read_changeset(from_file):
    """Read in a changeset from a filelike object (must have "readline" support), and
    parse it into a Changeset object.
    """
    cr = ChangesetReader(from_file)
    info = cr.get_info()
    return info


757 by Martin Pool - add john's changeset plugin	1	#!/usr/bin/env python
	2	"""\
	3	Read in a changeset output, and process it into a Changeset object.
	4	"""
	5
	6	import bzrlib, bzrlib.changeset
	7	import common
	8
	9	class BadChangeset(Exception): pass
	10	class MalformedHeader(BadChangeset): pass
	11	class MalformedPatches(BadChangeset): pass
	12	class MalformedFooter(BadChangeset): pass
	13
	14	def _unescape(name):
	15	"""Now we want to find the filename effected.
	16	Unfortunately the filename is written out as
	17	repr(filename), which means that it surrounds
	18	the name with quotes which may be single or double
	19	(single is preferred unless there is a single quote in
	20	the filename). And some characters will be escaped.
	21
	22	TODO: There has to be some pythonic way of undo-ing the
	23	representation of a string rather than using eval.
	24	"""
	25	delimiter = name[0]
	26	if name[-1] != delimiter:
	27	raise BadChangeset('Could not properly parse the'
	28	' filename: %r' % name)
	29	# We need to handle escaped hexadecimals too.
	30	return name[1:-1].replace('\"', '"').replace("\'", "'")
	31
	32	class ChangesetInfo(object):
	33	"""This is the intermediate class that gets filled out as
	34	the file is read.
	35	"""
	36	def __init__(self):
	37	self.committer = None
	38	self.date = None
	39	self.message = None
	40	self.revno = None
	41	self.revision = None
	42	self.revision_sha1 = None
	43	self.precursor = None
	44	self.precursor_sha1 = None
	45	self.precursor_revno = None
	46
	47	self.timestamp = None
	48	self.timezone = None
	49
	50	self.tree_root_id = None
	51	self.file_ids = None
	52	self.old_file_ids = None
	53
	54	self.actions = [] #this is the list of things that happened
	55	self.id2path = {} # A mapping from file id to path name
	56	self.path2id = {} # The reverse mapping
	57	self.id2parent = {} # A mapping from a given id to it's parent id
	58
	59	self.old_id2path = {}
	60	self.old_path2id = {}
	61	self.old_id2parent = {}
	62
	63	def __str__(self):
	64	import pprint
65	return pprint.pformat(self.__dict__)
66
67	def create_maps(self):
68	"""Go through the individual id sections, and generate the
69	id2path and path2id maps.
70	"""
71	# Rather than use an empty path, the changeset code seems
72	# to like to use "./." for the tree root.
73	self.id2path[self.tree_root_id] = './.'
74	self.path2id['./.'] = self.tree_root_id
75	self.id2parent[self.tree_root_id] = bzrlib.changeset.NULL_ID
76	self.old_id2path = self.id2path.copy()
77	self.old_path2id = self.path2id.copy()
78	self.old_id2parent = self.id2parent.copy()
79
80	if self.file_ids:
81	for info in self.file_ids:
82	path, f_id, parent_id = info.split('\t')
83	self.id2path[f_id] = path
84	self.path2id[path] = f_id
85	self.id2parent[f_id] = parent_id
86	if self.old_file_ids:
87	for info in self.old_file_ids:
88	path, f_id, parent_id = info.split('\t')
89	self.old_id2path[f_id] = path
90	self.old_path2id[path] = f_id
91	self.old_id2parent[f_id] = parent_id
92
93	def get_changeset(self):
94	"""Create a changeset from the data contained within."""
95	from bzrlib.changeset import Changeset, ChangesetEntry, \
96	PatchApply, ReplaceContents
97	cset = Changeset()
98
99	entry = ChangesetEntry(self.tree_root_id,
100	bzrlib.changeset.NULL_ID, './.')
101	cset.add_entry(entry)
102	for info, lines in self.actions:
103	parts = info.split(' ')
104	action = parts[0]
105	kind = parts[1]
106	extra = ' '.join(parts[2:])
107	if action == 'renamed':
108	old_path, new_path = extra.split(' => ')
109	old_path = _unescape(old_path)
110	new_path = _unescape(new_path)
111
112	new_id = self.path2id[new_path]
113	old_id = self.old_path2id[old_path]
114	assert old_id == new_id
115
116	new_parent = self.id2parent[new_id]
117	old_parent = self.old_id2parent[old_id]
118
119	entry = ChangesetEntry(old_id, old_parent, old_path)
120	entry.new_path = new_path
121	entry.new_parent = new_parent
122	if lines:
123	entry.contents_change = PatchApply(''.join(lines))
124	elif action == 'removed':
125	old_path = _unescape(extra)
126	old_id = self.old_path2id[old_path]
127	old_parent = self.old_id2parent[old_id]
128	entry = ChangesetEntry(old_id, old_parent, old_path)
129	entry.new_path = None
130	entry.new_parent = None
131	if lines:
132	# Technically a removed should be a ReplaceContents()
133	# Where you need to have the old contents
134	# But at most we have a remove style patch.
135	#entry.contents_change = ReplaceContents()
136	pass
137	elif action == 'added':
138	new_path = _unescape(extra)
139	new_id = self.path2id[new_path]
140	new_parent = self.id2parent[new_id]
141	entry = ChangesetEntry(new_id, new_parent, new_path)
142	entry.path = None
143	entry.parent = None
144	if lines:
145	# Technically an added should be a ReplaceContents()
146	# Where you need to have the old contents
147	# But at most we have an add style patch.
148	#entry.contents_change = ReplaceContents()
149	entry.contents_change = PatchApply(''.join(lines))
150	elif action == 'modified':
151	new_path = _unescape(extra)
152	new_id = self.path2id[new_path]
153	new_parent = self.id2parent[new_id]
154	entry = ChangesetEntry(new_id, new_parent, new_path)
155	entry.path = None
156	entry.parent = None
157	if lines:
158	# Technically an added should be a ReplaceContents()
159	# Where you need to have the old contents
160	# But at most we have an add style patch.
161	#entry.contents_change = ReplaceContents()
162	entry.contents_change = PatchApply(''.join(lines))
163	else:
164	raise BadChangeset('Unrecognized action: %r' % action)
165	cset.add_entry(entry)
166	return cset
167
168	class ChangesetReader(object):
169	"""This class reads in a changeset from a file, and returns
170	a Changeset object, which can then be applied against a tree.
171	"""
172	def __init__(self, from_file):
173	"""Read in the changeset from the file.
174
175	:param from_file: A file-like object (must have iterator support).
176	"""
177	object.__init__(self)
178	self.from_file = from_file
179
180	self.info = ChangesetInfo()
181	# We put the actual inventory ids in the footer, so that the patch
182	# is easier to read for humans.
183	# Unfortunately, that means we need to read everything before we
184	# can create a proper changeset.
185	self._read_header()
186	next_line = self._read_patches()
187	if next_line is not None:
188	self._read_footer(next_line)
189
190	def get_info(self):
191	"""Create the actual changeset object.
192	"""
193	self.info.create_maps()
194	return self.info
195
196	def _read_header(self):
197	"""Read the bzr header"""
198	header = common.get_header()
199	for head_line, line in zip(header, self.from_file):
200	if (line[:2] != '# '
201	or line[-1] != '\n'
202	or line[2:-1] != head_line):
203	raise MalformedHeader('Did not read the opening'
204	' header information.')
205
206	for line in self.from_file:
207	if self._handle_info_line(line) is not None:
208	break
209
210	def _handle_info_line(self, line, in_footer=False):
211	"""Handle reading a single line.
212
213	This may call itself, in the case that we read_multi,
214	and then had a dangling line on the end.
215	"""
216	# The bzr header is terminated with a blank line
217	# which does not start with #
218	next_line = None
219	if line[:1] == '\n':
220	return 'break'
221	if line[:2] != '# ':
222	raise MalformedHeader('Opening bzr header did not start with #')
223
224	line = line[2:-1] # Remove the '# '
225	if not line:
226	return # Ignore blank lines
227
228	if in_footer and line in ('BEGIN BZR FOOTER', 'END BZR FOOTER'):
229	return
230
231	loc = line.find(': ')
232	if loc != -1:
233	key = line[:loc]
234	value = line[loc+2:]
235	if not value:
236	value, next_line = self._read_many()
237	else:
238	if line[-1:] == ':':
239	key = line[:-1]
240	value, next_line = self._read_many()
241	else:
242	raise MalformedHeader('While looking for key: value pairs,'
243	' did not find the colon %r' % (line))
244
245	key = key.replace(' ', '_')
246	if hasattr(self.info, key):
247	if getattr(self.info, key) is None:
248	setattr(self.info, key, value)
249	else:
250	raise MalformedHeader('Duplicated Key: %s' % key)
251	else:
252	# What do we do with a key we don't recognize
253	raise MalformedHeader('Unknown Key: %s' % key)
254
255	if next_line:
256	self._handle_info_line(next_line, in_footer=in_footer)
257
258	def _read_many(self):
259	"""If a line ends with no entry, that means that it should be
260	followed with multiple lines of values.
261
262	This detects the end of the list, because it will be a line that
263	does not start with '# '. Because it has to read that extra
264	line, it returns the tuple: (values, next_line)
265	"""
266	values = []
267	for line in self.from_file:
268	if line[:5] != '# ':
269	return values, line
270	values.append(line[5:-1])
271	return values, None
272
273	def _read_one_patch(self, first_line=None):
274	"""Read in one patch, return the complete patch, along with
275	the next line.
276
277	:return: action, lines, next_line, do_continue
278	"""
279	first = True
280	action = None
281
282	def parse_firstline(line):
283	if line[:1] == '#':
284	return None
285	if line[:3] != '***':
286	raise MalformedPatches('The first line of all patches'
287	' should be a bzr meta line "***"')
288	return line[4:-1]
289
290	if first_line is not None:
291	action = parse_firstline(first_line)
292	first = False
293	if action is None:
294	return None, [], first_line, False
295
296	lines = []
297	for line in self.from_file:
298	if first:
299	action = parse_firstline(line)
300	first = False
301	if action is None:
302	return None, [], line, False
303	else:
304	if line[:3] == '***':
305	return action, lines, line, True
306	elif line[:1] == '#':
307	return action, lines, line, False
308	lines.append(line)
309	return action, lines, None, False
310
311	def _read_patches(self):
312	next_line = None
313	do_continue = True
314	while do_continue:
315	action, lines, next_line, do_continue = \
316	self._read_one_patch(next_line)
317	if action is not None:
318	self.info.actions.append((action, lines))
319	return next_line
320
321	def _read_footer(self, first_line=None):
322	"""Read the rest of the meta information.
323
324	:param first_line: The previous step iterates past what it
325	can handle. That extra line is given here.
326	"""
327	if first_line is not None:
328	if self._handle_info_line(first_line, in_footer=True) is not None:
329	return
330	for line in self.from_file:
331	if self._handle_info_line(line, in_footer=True) is not None:
332	break
333
334
335	def read_changeset(from_file):
336	"""Read in a changeset from a filelike object (must have "readline" support), and
337	parse it into a Changeset object.
338	"""
339	cr = ChangesetReader(from_file)
340	info = cr.get_info()
341	return info
342