Package SCons :: Package compat :: Module _scons_textwrap
[hide private]
[frames] | no frames]

Source Code for Module SCons.compat._scons_textwrap

  1  """Text wrapping and filling. 
  2  """ 
  3   
  4  # Copyright (C) 1999-2001 Gregory P. Ward. 
  5  # Copyright (C) 2002, 2003 Python Software Foundation. 
  6  # Written by Greg Ward <gward@python.net> 
  7   
  8  __revision__ = "$Id: textwrap.py,v 1.32.8.2 2004/05/13 01:48:15 gward Exp $" 
  9   
 10  import string, re 
 11   
 12  try: 
 13     unicode 
 14  except NameError: 
15 - class unicode:
16 pass
17 18 # Do the right thing with boolean values for all known Python versions 19 # (so this module can be copied to projects that don't depend on Python 20 # 2.3, e.g. Optik and Docutils). 21 try: 22 True, False 23 except NameError: 24 (True, False) = (1, 0) 25 26 __all__ = ['TextWrapper', 'wrap', 'fill'] 27 28 # Hardcode the recognized whitespace characters to the US-ASCII 29 # whitespace characters. The main reason for doing this is that in 30 # ISO-8859-1, 0xa0 is non-breaking whitespace, so in certain locales 31 # that character winds up in string.whitespace. Respecting 32 # string.whitespace in those cases would 1) make textwrap treat 0xa0 the 33 # same as any other whitespace char, which is clearly wrong (it's a 34 # *non-breaking* space), 2) possibly cause problems with Unicode, 35 # since 0xa0 is not in range(128). 36 _whitespace = '\t\n\x0b\x0c\r ' 37
38 -class TextWrapper:
39 """ 40 Object for wrapping/filling text. The public interface consists of 41 the wrap() and fill() methods; the other methods are just there for 42 subclasses to override in order to tweak the default behaviour. 43 If you want to completely replace the main wrapping algorithm, 44 you'll probably have to override _wrap_chunks(). 45 46 Several instance attributes control various aspects of wrapping: 47 width (default: 70) 48 the maximum width of wrapped lines (unless break_long_words 49 is false) 50 initial_indent (default: "") 51 string that will be prepended to the first line of wrapped 52 output. Counts towards the line's width. 53 subsequent_indent (default: "") 54 string that will be prepended to all lines save the first 55 of wrapped output; also counts towards each line's width. 56 expand_tabs (default: true) 57 Expand tabs in input text to spaces before further processing. 58 Each tab will become 1 .. 8 spaces, depending on its position in 59 its line. If false, each tab is treated as a single character. 60 replace_whitespace (default: true) 61 Replace all whitespace characters in the input text by spaces 62 after tab expansion. Note that if expand_tabs is false and 63 replace_whitespace is true, every tab will be converted to a 64 single space! 65 fix_sentence_endings (default: false) 66 Ensure that sentence-ending punctuation is always followed 67 by two spaces. Off by default because the algorithm is 68 (unavoidably) imperfect. 69 break_long_words (default: true) 70 Break words longer than 'width'. If false, those words will not 71 be broken, and some lines might be longer than 'width'. 72 """ 73 74 whitespace_trans = string.maketrans(_whitespace, ' ' * len(_whitespace)) 75 76 unicode_whitespace_trans = {} 77 try: 78 uspace = eval("ord(u' ')") 79 except SyntaxError: 80 # Python1.5 doesn't understand u'' syntax, in which case we 81 # won't actually use the unicode translation below, so it 82 # doesn't matter what value we put in the table. 83 uspace = ord(' ') 84 for x in map(ord, _whitespace): 85 unicode_whitespace_trans[x] = uspace 86 87 # This funky little regex is just the trick for splitting 88 # text up into word-wrappable chunks. E.g. 89 # "Hello there -- you goof-ball, use the -b option!" 90 # splits into 91 # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option! 92 # (after stripping out empty strings). 93 wordsep_re = re.compile(r'(\s+|' # any whitespace 94 r'-*\w{2,}-(?=\w{2,}))') # hyphenated words 95 # Earlier Python's don't have the (?<= 96 # negative look-behind assertion. It doesn't 97 # matter for the simple input SCons is going to 98 # give it, so just comment it out. 99 #r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash 100 101 # XXX will there be a locale-or-charset-aware version of 102 # string.lowercase in 2.3? 103 sentence_end_re = re.compile(r'[%s]' # lowercase letter 104 r'[\.\!\?]' # sentence-ending punct. 105 r'[\"\']?' # optional end-of-quote 106 % string.lowercase) 107 108
109 - def __init__(self, 110 width=70, 111 initial_indent="", 112 subsequent_indent="", 113 expand_tabs=True, 114 replace_whitespace=True, 115 fix_sentence_endings=False, 116 break_long_words=True):
117 self.width = width 118 self.initial_indent = initial_indent 119 self.subsequent_indent = subsequent_indent 120 self.expand_tabs = expand_tabs 121 self.replace_whitespace = replace_whitespace 122 self.fix_sentence_endings = fix_sentence_endings 123 self.break_long_words = break_long_words
124 125 126 # -- Private methods ----------------------------------------------- 127 # (possibly useful for subclasses to override) 128
129 - def _munge_whitespace(self, text):
130 """_munge_whitespace(text : string) -> string 131 132 Munge whitespace in text: expand tabs and convert all other 133 whitespace characters to spaces. Eg. " foo\tbar\n\nbaz" 134 becomes " foo bar baz". 135 """ 136 if self.expand_tabs: 137 text = string.expandtabs(text) 138 if self.replace_whitespace: 139 if type(text) == type(''): 140 text = string.translate(text, self.whitespace_trans) 141 elif isinstance(text, unicode): 142 text = string.translate(text, self.unicode_whitespace_trans) 143 return text
144 145
146 - def _split(self, text):
147 """_split(text : string) -> [string] 148 149 Split the text to wrap into indivisible chunks. Chunks are 150 not quite the same as words; see wrap_chunks() for full 151 details. As an example, the text 152 Look, goof-ball -- use the -b option! 153 breaks into the following chunks: 154 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ', 155 'use', ' ', 'the', ' ', '-b', ' ', 'option!' 156 """ 157 chunks = self.wordsep_re.split(text) 158 chunks = filter(None, chunks) 159 return chunks
160
161 - def _fix_sentence_endings(self, chunks):
162 """_fix_sentence_endings(chunks : [string]) 163 164 Correct for sentence endings buried in 'chunks'. Eg. when the 165 original text contains "... foo.\nBar ...", munge_whitespace() 166 and split() will convert that to [..., "foo.", " ", "Bar", ...] 167 which has one too few spaces; this method simply changes the one 168 space to two. 169 """ 170 i = 0 171 pat = self.sentence_end_re 172 while i < len(chunks)-1: 173 if chunks[i+1] == " " and pat.search(chunks[i]): 174 chunks[i+1] = " " 175 i = i + 2 176 else: 177 i = i + 1
178
179 - def _handle_long_word(self, chunks, cur_line, cur_len, width):
180 """_handle_long_word(chunks : [string], 181 cur_line : [string], 182 cur_len : int, width : int) 183 184 Handle a chunk of text (most likely a word, not whitespace) that 185 is too long to fit in any line. 186 """ 187 space_left = max(width - cur_len, 1) 188 189 # If we're allowed to break long words, then do so: put as much 190 # of the next chunk onto the current line as will fit. 191 if self.break_long_words: 192 cur_line.append(chunks[0][0:space_left]) 193 chunks[0] = chunks[0][space_left:] 194 195 # Otherwise, we have to preserve the long word intact. Only add 196 # it to the current line if there's nothing already there -- 197 # that minimizes how much we violate the width constraint. 198 elif not cur_line: 199 cur_line.append(chunks.pop(0))
200 201 # If we're not allowed to break long words, and there's already 202 # text on the current line, do nothing. Next time through the 203 # main loop of _wrap_chunks(), we'll wind up here again, but 204 # cur_len will be zero, so the next line will be entirely 205 # devoted to the long word that we can't handle right now. 206
207 - def _wrap_chunks(self, chunks):
208 """_wrap_chunks(chunks : [string]) -> [string] 209 210 Wrap a sequence of text chunks and return a list of lines of 211 length 'self.width' or less. (If 'break_long_words' is false, 212 some lines may be longer than this.) Chunks correspond roughly 213 to words and the whitespace between them: each chunk is 214 indivisible (modulo 'break_long_words'), but a line break can 215 come between any two chunks. Chunks should not have internal 216 whitespace; ie. a chunk is either all whitespace or a "word". 217 Whitespace chunks will be removed from the beginning and end of 218 lines, but apart from that whitespace is preserved. 219 """ 220 lines = [] 221 if self.width <= 0: 222 raise ValueError("invalid width %r (must be > 0)" % self.width) 223 224 while chunks: 225 226 # Start the list of chunks that will make up the current line. 227 # cur_len is just the length of all the chunks in cur_line. 228 cur_line = [] 229 cur_len = 0 230 231 # Figure out which static string will prefix this line. 232 if lines: 233 indent = self.subsequent_indent 234 else: 235 indent = self.initial_indent 236 237 # Maximum width for this line. 238 width = self.width - len(indent) 239 240 # First chunk on line is whitespace -- drop it, unless this 241 # is the very beginning of the text (ie. no lines started yet). 242 if string.strip(chunks[0]) == '' and lines: 243 del chunks[0] 244 245 while chunks: 246 l = len(chunks[0]) 247 248 # Can at least squeeze this chunk onto the current line. 249 if cur_len + l <= width: 250 cur_line.append(chunks.pop(0)) 251 cur_len = cur_len + l 252 253 # Nope, this line is full. 254 else: 255 break 256 257 # The current line is full, and the next chunk is too big to 258 # fit on *any* line (not just this one). 259 if chunks and len(chunks[0]) > width: 260 self._handle_long_word(chunks, cur_line, cur_len, width) 261 262 # If the last chunk on this line is all whitespace, drop it. 263 if cur_line and string.strip(cur_line[-1]) == '': 264 del cur_line[-1] 265 266 # Convert current line back to a string and store it in list 267 # of all lines (return value). 268 if cur_line: 269 lines.append(indent + string.join(cur_line, '')) 270 271 return lines
272 273 274 # -- Public interface ---------------------------------------------- 275
276 - def wrap(self, text):
277 """wrap(text : string) -> [string] 278 279 Reformat the single paragraph in 'text' so it fits in lines of 280 no more than 'self.width' columns, and return a list of wrapped 281 lines. Tabs in 'text' are expanded with string.expandtabs(), 282 and all other whitespace characters (including newline) are 283 converted to space. 284 """ 285 text = self._munge_whitespace(text) 286 indent = self.initial_indent 287 chunks = self._split(text) 288 if self.fix_sentence_endings: 289 self._fix_sentence_endings(chunks) 290 return self._wrap_chunks(chunks)
291
292 - def fill(self, text):
293 """fill(text : string) -> string 294 295 Reformat the single paragraph in 'text' to fit in lines of no 296 more than 'self.width' columns, and return a new string 297 containing the entire wrapped paragraph. 298 """ 299 return string.join(self.wrap(text), "\n")
300 301 302 # -- Convenience interface --------------------------------------------- 303
304 -def wrap(text, width=70, **kwargs):
305 """Wrap a single paragraph of text, returning a list of wrapped lines. 306 307 Reformat the single paragraph in 'text' so it fits in lines of no 308 more than 'width' columns, and return a list of wrapped lines. By 309 default, tabs in 'text' are expanded with string.expandtabs(), and 310 all other whitespace characters (including newline) are converted to 311 space. See TextWrapper class for available keyword args to customize 312 wrapping behaviour. 313 """ 314 kw = kwargs.copy() 315 kw['width'] = width 316 w = apply(TextWrapper, (), kw) 317 return w.wrap(text)
318
319 -def fill(text, width=70, **kwargs):
320 """Fill a single paragraph of text, returning a new string. 321 322 Reformat the single paragraph in 'text' to fit in lines of no more 323 than 'width' columns, and return a new string containing the entire 324 wrapped paragraph. As with wrap(), tabs are expanded and other 325 whitespace characters converted to space. See TextWrapper class for 326 available keyword args to customize wrapping behaviour. 327 """ 328 kw = kwargs.copy() 329 kw['width'] = width 330 w = apply(TextWrapper, (), kw) 331 return w.fill(text)
332 333 334 # -- Loosely related functionality ------------------------------------- 335
336 -def dedent(text):
337 """dedent(text : string) -> string 338 339 Remove any whitespace than can be uniformly removed from the left 340 of every line in `text`. 341 342 This can be used e.g. to make triple-quoted strings line up with 343 the left edge of screen/whatever, while still presenting it in the 344 source code in indented form. 345 346 For example: 347 348 def test(): 349 # end first line with \ to avoid the empty line! 350 s = '''\ 351 hello 352 world 353 ''' 354 print repr(s) # prints ' hello\n world\n ' 355 print repr(dedent(s)) # prints 'hello\n world\n' 356 """ 357 lines = text.expandtabs().split('\n') 358 margin = None 359 for line in lines: 360 content = line.lstrip() 361 if not content: 362 continue 363 indent = len(line) - len(content) 364 if margin is None: 365 margin = indent 366 else: 367 margin = min(margin, indent) 368 369 if margin is not None and margin > 0: 370 for i in range(len(lines)): 371 lines[i] = lines[i][margin:] 372 373 return string.join(lines, '\n')
374