1 """Text wrapping and filling.
2 """
3
4
5
6
7
8 __revision__ = "$Id: textwrap.py,v 1.32.8.2 2004/05/13 01:48:15 gward Exp $"
9
10 import string, re
11
12 try:
13 unicode
14 except NameError:
17
18
19
20
21 try:
22 True, False
23 except NameError:
24 (True, False) = (1, 0)
25
26 __all__ = ['TextWrapper', 'wrap', 'fill']
27
28
29
30
31
32
33
34
35
36 _whitespace = '\t\n\x0b\x0c\r '
37
39 """
40 Object for wrapping/filling text. The public interface consists of
41 the wrap() and fill() methods; the other methods are just there for
42 subclasses to override in order to tweak the default behaviour.
43 If you want to completely replace the main wrapping algorithm,
44 you'll probably have to override _wrap_chunks().
45
46 Several instance attributes control various aspects of wrapping:
47 width (default: 70)
48 the maximum width of wrapped lines (unless break_long_words
49 is false)
50 initial_indent (default: "")
51 string that will be prepended to the first line of wrapped
52 output. Counts towards the line's width.
53 subsequent_indent (default: "")
54 string that will be prepended to all lines save the first
55 of wrapped output; also counts towards each line's width.
56 expand_tabs (default: true)
57 Expand tabs in input text to spaces before further processing.
58 Each tab will become 1 .. 8 spaces, depending on its position in
59 its line. If false, each tab is treated as a single character.
60 replace_whitespace (default: true)
61 Replace all whitespace characters in the input text by spaces
62 after tab expansion. Note that if expand_tabs is false and
63 replace_whitespace is true, every tab will be converted to a
64 single space!
65 fix_sentence_endings (default: false)
66 Ensure that sentence-ending punctuation is always followed
67 by two spaces. Off by default because the algorithm is
68 (unavoidably) imperfect.
69 break_long_words (default: true)
70 Break words longer than 'width'. If false, those words will not
71 be broken, and some lines might be longer than 'width'.
72 """
73
74 whitespace_trans = string.maketrans(_whitespace, ' ' * len(_whitespace))
75
76 unicode_whitespace_trans = {}
77 try:
78 uspace = eval("ord(u' ')")
79 except SyntaxError:
80
81
82
83 uspace = ord(' ')
84 for x in map(ord, _whitespace):
85 unicode_whitespace_trans[x] = uspace
86
87
88
89
90
91
92
93 wordsep_re = re.compile(r'(\s+|'
94 r'-*\w{2,}-(?=\w{2,}))')
95
96
97
98
99
100
101
102
103 sentence_end_re = re.compile(r'[%s]'
104 r'[\.\!\?]'
105 r'[\"\']?'
106 % string.lowercase)
107
108
109 - def __init__(self,
110 width=70,
111 initial_indent="",
112 subsequent_indent="",
113 expand_tabs=True,
114 replace_whitespace=True,
115 fix_sentence_endings=False,
116 break_long_words=True):
117 self.width = width
118 self.initial_indent = initial_indent
119 self.subsequent_indent = subsequent_indent
120 self.expand_tabs = expand_tabs
121 self.replace_whitespace = replace_whitespace
122 self.fix_sentence_endings = fix_sentence_endings
123 self.break_long_words = break_long_words
124
125
126
127
128
129 - def _munge_whitespace(self, text):
130 """_munge_whitespace(text : string) -> string
131
132 Munge whitespace in text: expand tabs and convert all other
133 whitespace characters to spaces. Eg. " foo\tbar\n\nbaz"
134 becomes " foo bar baz".
135 """
136 if self.expand_tabs:
137 text = string.expandtabs(text)
138 if self.replace_whitespace:
139 if type(text) == type(''):
140 text = string.translate(text, self.whitespace_trans)
141 elif isinstance(text, unicode):
142 text = string.translate(text, self.unicode_whitespace_trans)
143 return text
144
145
146 - def _split(self, text):
147 """_split(text : string) -> [string]
148
149 Split the text to wrap into indivisible chunks. Chunks are
150 not quite the same as words; see wrap_chunks() for full
151 details. As an example, the text
152 Look, goof-ball -- use the -b option!
153 breaks into the following chunks:
154 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
155 'use', ' ', 'the', ' ', '-b', ' ', 'option!'
156 """
157 chunks = self.wordsep_re.split(text)
158 chunks = filter(None, chunks)
159 return chunks
160
161 - def _fix_sentence_endings(self, chunks):
162 """_fix_sentence_endings(chunks : [string])
163
164 Correct for sentence endings buried in 'chunks'. Eg. when the
165 original text contains "... foo.\nBar ...", munge_whitespace()
166 and split() will convert that to [..., "foo.", " ", "Bar", ...]
167 which has one too few spaces; this method simply changes the one
168 space to two.
169 """
170 i = 0
171 pat = self.sentence_end_re
172 while i < len(chunks)-1:
173 if chunks[i+1] == " " and pat.search(chunks[i]):
174 chunks[i+1] = " "
175 i = i + 2
176 else:
177 i = i + 1
178
179 - def _handle_long_word(self, chunks, cur_line, cur_len, width):
180 """_handle_long_word(chunks : [string],
181 cur_line : [string],
182 cur_len : int, width : int)
183
184 Handle a chunk of text (most likely a word, not whitespace) that
185 is too long to fit in any line.
186 """
187 space_left = max(width - cur_len, 1)
188
189
190
191 if self.break_long_words:
192 cur_line.append(chunks[0][0:space_left])
193 chunks[0] = chunks[0][space_left:]
194
195
196
197
198 elif not cur_line:
199 cur_line.append(chunks.pop(0))
200
201
202
203
204
205
206
207 - def _wrap_chunks(self, chunks):
208 """_wrap_chunks(chunks : [string]) -> [string]
209
210 Wrap a sequence of text chunks and return a list of lines of
211 length 'self.width' or less. (If 'break_long_words' is false,
212 some lines may be longer than this.) Chunks correspond roughly
213 to words and the whitespace between them: each chunk is
214 indivisible (modulo 'break_long_words'), but a line break can
215 come between any two chunks. Chunks should not have internal
216 whitespace; ie. a chunk is either all whitespace or a "word".
217 Whitespace chunks will be removed from the beginning and end of
218 lines, but apart from that whitespace is preserved.
219 """
220 lines = []
221 if self.width <= 0:
222 raise ValueError("invalid width %r (must be > 0)" % self.width)
223
224 while chunks:
225
226
227
228 cur_line = []
229 cur_len = 0
230
231
232 if lines:
233 indent = self.subsequent_indent
234 else:
235 indent = self.initial_indent
236
237
238 width = self.width - len(indent)
239
240
241
242 if string.strip(chunks[0]) == '' and lines:
243 del chunks[0]
244
245 while chunks:
246 l = len(chunks[0])
247
248
249 if cur_len + l <= width:
250 cur_line.append(chunks.pop(0))
251 cur_len = cur_len + l
252
253
254 else:
255 break
256
257
258
259 if chunks and len(chunks[0]) > width:
260 self._handle_long_word(chunks, cur_line, cur_len, width)
261
262
263 if cur_line and string.strip(cur_line[-1]) == '':
264 del cur_line[-1]
265
266
267
268 if cur_line:
269 lines.append(indent + string.join(cur_line, ''))
270
271 return lines
272
273
274
275
276 - def wrap(self, text):
277 """wrap(text : string) -> [string]
278
279 Reformat the single paragraph in 'text' so it fits in lines of
280 no more than 'self.width' columns, and return a list of wrapped
281 lines. Tabs in 'text' are expanded with string.expandtabs(),
282 and all other whitespace characters (including newline) are
283 converted to space.
284 """
285 text = self._munge_whitespace(text)
286 indent = self.initial_indent
287 chunks = self._split(text)
288 if self.fix_sentence_endings:
289 self._fix_sentence_endings(chunks)
290 return self._wrap_chunks(chunks)
291
292 - def fill(self, text):
293 """fill(text : string) -> string
294
295 Reformat the single paragraph in 'text' to fit in lines of no
296 more than 'self.width' columns, and return a new string
297 containing the entire wrapped paragraph.
298 """
299 return string.join(self.wrap(text), "\n")
300
301
302
303
304 -def wrap(text, width=70, **kwargs):
305 """Wrap a single paragraph of text, returning a list of wrapped lines.
306
307 Reformat the single paragraph in 'text' so it fits in lines of no
308 more than 'width' columns, and return a list of wrapped lines. By
309 default, tabs in 'text' are expanded with string.expandtabs(), and
310 all other whitespace characters (including newline) are converted to
311 space. See TextWrapper class for available keyword args to customize
312 wrapping behaviour.
313 """
314 kw = kwargs.copy()
315 kw['width'] = width
316 w = apply(TextWrapper, (), kw)
317 return w.wrap(text)
318
319 -def fill(text, width=70, **kwargs):
320 """Fill a single paragraph of text, returning a new string.
321
322 Reformat the single paragraph in 'text' to fit in lines of no more
323 than 'width' columns, and return a new string containing the entire
324 wrapped paragraph. As with wrap(), tabs are expanded and other
325 whitespace characters converted to space. See TextWrapper class for
326 available keyword args to customize wrapping behaviour.
327 """
328 kw = kwargs.copy()
329 kw['width'] = width
330 w = apply(TextWrapper, (), kw)
331 return w.fill(text)
332
333
334
335
337 """dedent(text : string) -> string
338
339 Remove any whitespace than can be uniformly removed from the left
340 of every line in `text`.
341
342 This can be used e.g. to make triple-quoted strings line up with
343 the left edge of screen/whatever, while still presenting it in the
344 source code in indented form.
345
346 For example:
347
348 def test():
349 # end first line with \ to avoid the empty line!
350 s = '''\
351 hello
352 world
353 '''
354 print repr(s) # prints ' hello\n world\n '
355 print repr(dedent(s)) # prints 'hello\n world\n'
356 """
357 lines = text.expandtabs().split('\n')
358 margin = None
359 for line in lines:
360 content = line.lstrip()
361 if not content:
362 continue
363 indent = len(line) - len(content)
364 if margin is None:
365 margin = indent
366 else:
367 margin = min(margin, indent)
368
369 if margin is not None and margin > 0:
370 for i in range(len(lines)):
371 lines[i] = lines[i][margin:]
372
373 return string.join(lines, '\n')
374