Package translate :: Package storage :: Module pypo
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.pypo

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2002-2009 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """classes that hold units of .po files (pounit) or entire files (pofile) 
 22  gettext-style .po (or .pot) files are used in translations for KDE et al (see kbabel)""" 
 23   
 24  from __future__ import generators 
 25  from translate.misc.multistring import multistring 
 26  from translate.misc import quote 
 27  from translate.misc import textwrap 
 28  from translate.lang import data 
 29  from translate.storage import pocommon, base, poparser 
 30  from translate.storage.pocommon import encodingToUse 
 31  import re 
 32  import copy 
 33  import cStringIO 
 34  import urllib 
 35   
 36  lsep = "\n#: " 
 37  """Seperator for #: entries""" 
 38   
 39  # general functions for quoting / unquoting po strings 
 40   
 41  po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\\'} 
 42  po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()]) 
 43   
44 -def escapeforpo(line):
45 """Escapes a line for po format. assumes no \n occurs in the line. 46 47 @param line: unescaped text 48 """ 49 special_locations = [] 50 for special_key in po_escape_map: 51 special_locations.extend(quote.find_all(line, special_key)) 52 special_locations = dict.fromkeys(special_locations).keys() 53 special_locations.sort() 54 escaped_line = "" 55 last_location = 0 56 for location in special_locations: 57 escaped_line += line[last_location:location] 58 escaped_line += po_escape_map[line[location:location+1]] 59 last_location = location+1 60 escaped_line += line[last_location:] 61 return escaped_line
62
63 -def unescapehandler(escape):
64 65 return po_unescape_map.get(escape, escape)
66
67 -def wrapline(line):
68 """Wrap text for po files.""" 69 wrappedlines = textwrap.wrap(line, 76, replace_whitespace=False, expand_tabs=False, drop_whitespace=False) 70 71 # Lines should not start with a space... 72 if len(wrappedlines) > 1: 73 for index, line in enumerate(wrappedlines[1:]): 74 if line.startswith(' '): 75 # Remove the space at the beginning of the line: 76 wrappedlines[index+1] = line[1:] 77 78 # Append a space to the previous line: 79 wrappedlines[index] += ' ' 80 return wrappedlines
81
82 -def quoteforpo(text):
83 """quotes the given text for a PO file, returning quoted and escaped lines""" 84 polines = [] 85 if text is None: 86 return polines 87 lines = text.split("\n") 88 if len(lines) > 1 or (len(lines) == 1 and len(lines[0]) > 71): 89 if len(lines) != 2 or lines[1]: 90 polines.extend(['""']) 91 for line in lines[:-1]: 92 #TODO: We should only wrap after escaping 93 lns = wrapline(line) 94 if len(lns) > 0: 95 for ln in lns[:-1]: 96 polines.extend(['"' + escapeforpo(ln) + '"']) 97 if lns[-1]: 98 polines.extend(['"' + escapeforpo(lns[-1]) + '\\n"']) 99 else: 100 polines.extend(['"\\n"']) 101 if lines[-1]: 102 polines.extend(['"' + escapeforpo(line) + '"' for line in wrapline(lines[-1])]) 103 return polines
104
105 -def extractpoline(line):
106 """Remove quote and unescape line from po file. 107 108 @param line: a quoted line from a po file (msgid or msgstr) 109 """ 110 extracted = quote.extractwithoutquotes(line, '"', '"', '\\', includeescapes=unescapehandler)[0] 111 return extracted
112
113 -def unquotefrompo(postr):
114 return u"".join([extractpoline(line) for line in postr])
115
116 -def is_null(lst):
117 return lst == [] or len(lst) == 1 and lst[0] == '""'
118
119 -def extractstr(string):
120 left = string.find('"') 121 right = string.rfind('"') 122 if right > -1: 123 return string[left:right+1] 124 else: 125 return string[left:] + '"'
126
127 -class pounit(pocommon.pounit):
128 # othercomments = [] # # this is another comment 129 # automaticcomments = [] # #. comment extracted from the source code 130 # sourcecomments = [] # #: sourcefile.xxx:35 131 # prev_msgctxt = [] # #| The previous values that msgctxt and msgid held 132 # prev_msgid = [] # 133 # prev_msgid_plural = [] # 134 # typecomments = [] # #, fuzzy 135 # msgidcomments = [] # _: within msgid 136 # msgctxt 137 # msgid = [] 138 # msgstr = [] 139 140 # Our homegrown way to indicate what must be copied in a shallow 141 # fashion 142 __shallow__ = ['_store'] 143
144 - def __init__(self, source=None, encoding="UTF-8"):
145 self._encoding = encodingToUse(encoding) 146 self.obsolete = False 147 self._initallcomments(blankall=True) 148 self.prev_msgctxt = [] 149 self.prev_msgid = [] 150 self.prev_msgid_plural = [] 151 self.msgctxt = [] 152 self.msgid = [] 153 self.msgid_pluralcomments = [] 154 self.msgid_plural = [] 155 self.msgstr = [] 156 self.obsoletemsgctxt = [] 157 self.obsoletemsgid = [] 158 self.obsoletemsgid_pluralcomments = [] 159 self.obsoletemsgid_plural = [] 160 self.obsoletemsgstr = [] 161 pocommon.pounit.__init__(self, source)
162
163 - def _initallcomments(self, blankall=False):
164 """Initialises allcomments""" 165 if blankall: 166 self.othercomments = [] 167 self.automaticcomments = [] 168 self.sourcecomments = [] 169 self.typecomments = [] 170 self.msgidcomments = [] 171 self.obsoletemsgidcomments = []
172
173 - def _get_all_comments(self):
174 return [self.othercomments, 175 self.automaticcomments, 176 self.sourcecomments, 177 self.typecomments, 178 self.msgidcomments, 179 self.obsoletemsgidcomments]
180 181 allcomments = property(_get_all_comments) 182
183 - def _get_source_vars(self, msgid, msgid_plural):
184 multi = multistring(unquotefrompo(msgid), self._encoding) 185 if self.hasplural(): 186 pluralform = unquotefrompo(msgid_plural) 187 if isinstance(pluralform, str): 188 pluralform = pluralform.decode(self._encoding) 189 multi.strings.append(pluralform) 190 return multi
191
192 - def _set_source_vars(self, source):
193 msgid = None 194 msgid_plural = None 195 if isinstance(source, str): 196 source = source.decode(self._encoding) 197 if isinstance(source, multistring): 198 source = source.strings 199 if isinstance(source, list): 200 msgid = quoteforpo(source[0]) 201 if len(source) > 1: 202 msgid_plural = quoteforpo(source[1]) 203 else: 204 msgid_plural = [] 205 else: 206 msgid = quoteforpo(source) 207 msgid_plural = [] 208 return msgid, msgid_plural
209
210 - def getsource(self):
211 """Returns the unescaped msgid""" 212 return self._get_source_vars(self.msgid, self.msgid_plural)
213
214 - def setsource(self, source):
215 """Sets the msgid to the given (unescaped) value. 216 217 @param source: an unescaped source string. 218 """ 219 self._rich_source = None 220 self.msgid, self.msgid_plural = self._set_source_vars(source)
221 source = property(getsource, setsource) 222
223 - def _get_prev_source(self):
224 """Returns the unescaped msgid""" 225 return self._get_source_vars(self.prev_msgid, self.prev_msgid_plural)
226
227 - def _set_prev_source(self, source):
228 """Sets the msgid to the given (unescaped) value. 229 230 @param source: an unescaped source string. 231 """ 232 self.prev_msgid, self.prev_msgid_plural = self._set_source_vars(source)
233 prev_source = property(_get_prev_source, _set_prev_source) 234
235 - def gettarget(self):
236 """Returns the unescaped msgstr""" 237 if isinstance(self.msgstr, dict): 238 multi = multistring(map(unquotefrompo, self.msgstr.values()), self._encoding) 239 else: 240 multi = multistring(unquotefrompo(self.msgstr), self._encoding) 241 return multi
242
243 - def settarget(self, target):
244 """Sets the msgstr to the given (unescaped) value""" 245 self._rich_target = None 246 if isinstance(target, str): 247 target = target.decode(self._encoding) 248 if self.hasplural(): 249 if isinstance(target, multistring): 250 target = target.strings 251 elif isinstance(target, basestring): 252 target = [target] 253 elif isinstance(target, (dict, list)): 254 if len(target) == 1: 255 target = target[0] 256 else: 257 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target)) 258 templates = self.msgstr 259 if isinstance(templates, list): 260 templates = {0: templates} 261 if isinstance(target, list): 262 self.msgstr = dict([(i, quoteforpo(target[i])) for i in range(len(target))]) 263 elif isinstance(target, dict): 264 self.msgstr = dict([(i, quoteforpo(targetstring)) for i, targetstring in target.iteritems()]) 265 else: 266 self.msgstr = quoteforpo(target)
267 target = property(gettarget, settarget) 268
269 - def getalttrans(self):
270 """Return a list of alternate units. 271 272 Previous msgid and current msgstr is combined to form a single 273 alternative unit.""" 274 prev_source = self.prev_source 275 if prev_source and self.isfuzzy(): 276 unit = type(self)(prev_source) 277 unit.target = self.target 278 # Already released versions of Virtaal (0.6.x) only supported XLIFF 279 # alternatives, and expect .xmlelement.get(). 280 # This can be removed soon: 281 unit.xmlelement = dict() 282 return [unit] 283 return []
284
285 - def getnotes(self, origin=None):
286 """Return comments based on origin value (programmer, developer, source code and translator)""" 287 if origin == None: 288 comments = u"".join([comment[2:] for comment in self.othercomments]) 289 comments += u"".join([comment[3:] for comment in self.automaticcomments]) 290 elif origin == "translator": 291 comments = u"".join ([comment[2:] for comment in self.othercomments]) 292 elif origin in ["programmer", "developer", "source code"]: 293 comments = u"".join([comment[3:] for comment in self.automaticcomments]) 294 else: 295 raise ValueError("Comment type not valid") 296 # Let's drop the last newline 297 return comments[:-1]
298
299 - def addnote(self, text, origin=None, position="append"):
300 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote""" 301 # ignore empty strings and strings without non-space characters 302 if not (text and text.strip()): 303 return 304 text = data.forceunicode(text) 305 commentlist = self.othercomments 306 linestart = "# " 307 autocomments = False 308 if origin in ["programmer", "developer", "source code"]: 309 autocomments = True 310 commentlist = self.automaticcomments 311 linestart = "#. " 312 text = text.split("\n") 313 newcomments = [linestart + line + "\n" for line in text] 314 if position == "append": 315 newcomments = commentlist + newcomments 316 elif position == "prepend": 317 newcomments = newcomments + commentlist 318 319 if autocomments: 320 self.automaticcomments = newcomments 321 else: 322 self.othercomments = newcomments
323
324 - def removenotes(self):
325 """Remove all the translator's notes (other comments)""" 326 self.othercomments = []
327
328 - def __deepcopy__(self, memo={}):
329 # Make an instance to serve as the copy 330 new_unit = self.__class__() 331 # We'll be testing membership frequently, so make a set from 332 # self.__shallow__ 333 shallow = set(self.__shallow__) 334 # Make deep copies of all members which are not in shallow 335 for key, value in self.__dict__.iteritems(): 336 if key not in shallow: 337 setattr(new_unit, key, copy.deepcopy(value)) 338 # Make shallow copies of all members which are in shallow 339 for key in set(shallow): 340 setattr(new_unit, key, getattr(self, key)) 341 # Mark memo with ourself, so that we won't get deep copied 342 # again 343 memo[id(self)] = self 344 # Return our copied unit 345 return new_unit
346
347 - def copy(self):
348 return copy.deepcopy(self)
349
350 - def _msgidlen(self):
351 if self.hasplural(): 352 return len(unquotefrompo(self.msgid)) + len(unquotefrompo(self.msgid_plural)) 353 else: 354 return len(unquotefrompo(self.msgid))
355
356 - def _msgstrlen(self):
357 if isinstance(self.msgstr, dict): 358 combinedstr = "\n".join([unquotefrompo(msgstr) for msgstr in self.msgstr.itervalues()]) 359 return len(combinedstr) 360 else: 361 return len(unquotefrompo(self.msgstr))
362
363 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
364 """Merges the otherpo (with the same msgid) into this one. 365 366 Overwrite non-blank self.msgstr only if overwrite is True 367 merge comments only if comments is True 368 """ 369 370 def mergelists(list1, list2, split=False): 371 #decode where necessary 372 if unicode in [type(item) for item in list2] + [type(item) for item in list1]: 373 for position, item in enumerate(list1): 374 if isinstance(item, str): 375 list1[position] = item.decode("utf-8") 376 for position, item in enumerate(list2): 377 if isinstance(item, str): 378 list2[position] = item.decode("utf-8") 379 380 #Determine the newline style of list1 381 lineend = "" 382 if list1 and list1[0]: 383 for candidate in ["\n", "\r", "\n\r"]: 384 if list1[0].endswith(candidate): 385 lineend = candidate 386 if not lineend: 387 lineend = "" 388 else: 389 lineend = "\n" 390 391 #Split if directed to do so: 392 if split: 393 splitlist1 = [] 394 splitlist2 = [] 395 prefix = "#" 396 for item in list1: 397 splitlist1.extend(item.split()[1:]) 398 prefix = item.split()[0] 399 for item in list2: 400 splitlist2.extend(item.split()[1:]) 401 prefix = item.split()[0] 402 list1.extend(["%s %s%s" % (prefix, item, lineend) for item in splitlist2 if not item in splitlist1]) 403 else: 404 #Normal merge, but conform to list1 newline style 405 if list1 != list2: 406 for item in list2: 407 if lineend: 408 item = item.rstrip() + lineend 409 # avoid duplicate comment lines (this might cause some problems) 410 if item not in list1 or len(item) < 5: 411 list1.append(item)
412 if not isinstance(otherpo, pounit): 413 super(pounit, self).merge(otherpo, overwrite, comments) 414 return 415 if comments: 416 mergelists(self.othercomments, otherpo.othercomments) 417 mergelists(self.typecomments, otherpo.typecomments) 418 if not authoritative: 419 # We don't bring across otherpo.automaticcomments as we consider ourself 420 # to be the the authority. Same applies to otherpo.msgidcomments 421 mergelists(self.automaticcomments, otherpo.automaticcomments) 422 mergelists(self.msgidcomments, otherpo.msgidcomments) 423 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True) 424 if not self.istranslated() or overwrite: 425 # Remove kde-style comments from the translation (if any). 426 if self._extract_msgidcomments(otherpo.target): 427 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments()+ '\n', '') 428 self.target = otherpo.target 429 if self.source != otherpo.source or self.getcontext() != otherpo.getcontext(): 430 self.markfuzzy() 431 else: 432 self.markfuzzy(otherpo.isfuzzy()) 433 elif not otherpo.istranslated(): 434 if self.source != otherpo.source: 435 self.markfuzzy() 436 else: 437 if self.target != otherpo.target: 438 self.markfuzzy()
439
440 - def isheader(self):
441 #return (self._msgidlen() == 0) and (self._msgstrlen() > 0) and (len(self.msgidcomments) == 0) 442 #rewritten here for performance: 443 return (is_null(self.msgid) 444 and not is_null(self.msgstr) 445 and self.msgidcomments == [] 446 and is_null(self.msgctxt) 447 )
448
449 - def isblank(self):
450 if self.isheader() or len(self.msgidcomments): 451 return False 452 if (self._msgidlen() == 0) and (self._msgstrlen() == 0) and (is_null(self.msgctxt)): 453 return True 454 return False
455 # TODO: remove: 456 # Before, the equivalent of the following was the final return statement: 457 # return len(self.source.strip()) == 0 458
459 - def hastypecomment(self, typecomment):
460 """Check whether the given type comment is present""" 461 # check for word boundaries properly by using a regular expression... 462 return sum(map(lambda tcline: len(re.findall("\\b%s\\b" % typecomment, tcline)), self.typecomments)) != 0
463
464 - def hasmarkedcomment(self, commentmarker):
465 """Check whether the given comment marker is present as # (commentmarker) ...""" 466 commentmarker = "(%s)" % commentmarker 467 for comment in self.othercomments: 468 if comment.replace("#", "", 1).strip().startswith(commentmarker): 469 return True 470 return False
471
472 - def settypecomment(self, typecomment, present=True):
473 """Alters whether a given typecomment is present""" 474 if self.hastypecomment(typecomment) != present: 475 if present: 476 if len(self.typecomments): 477 # There is already a comment, so we have to add onto it 478 self.typecomments[0] = "%s, %s\n" % (self.typecomments[0][:-1], typecomment) 479 else: 480 self.typecomments.append("#, %s\n" % typecomment) 481 else: 482 # this should handle word boundaries properly ... 483 typecomments = map(lambda tcline: re.sub("\\b%s\\b[ \t,]*" % typecomment, "", tcline), self.typecomments) 484 self.typecomments = filter(lambda tcline: tcline.strip() != "#,", typecomments)
485
486 - def isfuzzy(self):
487 state_isfuzzy = self.STATE[self.S_FUZZY][0] <= self.get_state_n() < self.STATE[self.S_FUZZY][1] 488 if self.hastypecomment('fuzzy') != state_isfuzzy: 489 raise ValueError('Inconsistent fuzzy state') 490 return super(pounit, self).isfuzzy()
491
492 - def markfuzzy(self, present=True):
493 if present: 494 self.set_state_n(self.STATE[self.S_FUZZY][0]) 495 elif is_null(self.msgstr): 496 self.set_state_n(self.STATE[self.S_UNTRANSLATED][0]) 497 else: 498 self.set_state_n(self.STATE[self.S_TRANSLATED][0])
499
500 - def _domarkfuzzy(self, present=True):
501 self.settypecomment("fuzzy", present)
502
503 - def infer_state(self):
504 if self.obsolete: 505 self.makeobsolete() 506 else: 507 self.markfuzzy(self.hastypecomment('fuzzy'))
508
509 - def isobsolete(self):
510 return self.obsolete
511
512 - def makeobsolete(self):
513 """Makes this unit obsolete""" 514 self.obsolete = True 515 if self.msgctxt: 516 self.obsoletemsgctxt = self.msgctxt 517 if self.msgid: 518 self.obsoletemsgid = self.msgid 519 self.msgid = [] 520 if self.msgidcomments: 521 self.obsoletemsgidcomments = self.msgidcomments 522 self.msgidcomments = [] 523 if self.msgid_plural: 524 self.obsoletemsgid_plural = self.msgid_plural 525 self.msgid_plural = [] 526 if self.msgstr: 527 self.obsoletemsgstr = self.msgstr 528 self.msgstr = [] 529 self.sourcecomments = [] 530 self.automaticcomments = []
531
532 - def resurrect(self):
533 """Makes an obsolete unit normal""" 534 self.obsolete = False 535 if self.obsoletemsgctxt: 536 self.msgid = self.obsoletemsgctxt 537 self.obsoletemsgctxt = [] 538 if self.obsoletemsgid: 539 self.msgid = self.obsoletemsgid 540 self.obsoletemsgid = [] 541 if self.obsoletemsgidcomments: 542 self.msgidcomments = self.obsoletemsgidcomments 543 self.obsoletemsgidcomments = [] 544 if self.obsoletemsgid_plural: 545 self.msgid_plural = self.obsoletemsgid_plural 546 self.obsoletemsgid_plural = [] 547 if self.obsoletemsgstr: 548 self.msgstr = self.obsoletemsgstr 549 self.obsoletemgstr = []
550
551 - def hasplural(self):
552 """returns whether this pounit contains plural strings...""" 553 return len(self.msgid_plural) > 0
554
555 - def parse(self, src):
556 return poparser.parse_unit(poparser.ParseState(cStringIO.StringIO(src), pounit), self)
557
558 - def _getmsgpartstr(self, partname, partlines, partcomments=""):
559 if isinstance(partlines, dict): 560 partkeys = partlines.keys() 561 partkeys.sort() 562 return "".join([self._getmsgpartstr("%s[%d]" % (partname, partkey), partlines[partkey], partcomments) for partkey in partkeys]) 563 partstr = partname + " " 564 partstartline = 0 565 if len(partlines) > 0 and len(partcomments) == 0: 566 partstr += partlines[0] 567 partstartline = 1 568 elif len(partcomments) > 0: 569 if len(partlines) > 0 and len(unquotefrompo(partlines[:1])) == 0: 570 # if there is a blank leader line, it must come before the comment 571 partstr += partlines[0] + '\n' 572 # but if the whole string is blank, leave it in 573 if len(partlines) > 1: 574 partstartline += 1 575 else: 576 # All partcomments should start on a newline 577 partstr += '""\n' 578 # combine comments into one if more than one 579 if len(partcomments) > 1: 580 combinedcomment = [] 581 for comment in partcomments: 582 comment = unquotefrompo([comment]) 583 if comment.startswith("_:"): 584 comment = comment[len("_:"):] 585 if comment.endswith("\\n"): 586 comment = comment[:-len("\\n")] 587 #Before we used to strip. Necessary in some cases? 588 combinedcomment.append(comment) 589 partcomments = quoteforpo("_:%s" % "".join(combinedcomment)) 590 # comments first, no blank leader line needed 591 partstr += "\n".join(partcomments) 592 partstr = quote.rstripeol(partstr) 593 else: 594 partstr += '""' 595 partstr += '\n' 596 # add the rest 597 for partline in partlines[partstartline:]: 598 partstr += partline + '\n' 599 return partstr
600
601 - def _encodeifneccessary(self, output):
602 """encodes unicode strings and returns other strings unchanged""" 603 if isinstance(output, unicode): 604 encoding = encodingToUse(getattr(self, "_encoding", "UTF-8")) 605 return output.encode(encoding) 606 return output
607
608 - def __str__(self):
609 """convert to a string. double check that unicode is handled somehow here""" 610 output = self._getoutput() 611 return self._encodeifneccessary(output)
612
613 - def _getoutput(self):
614 """return this po element as a string""" 615 def add_prev_msgid_lines(lines, prefix, header, var): 616 if len(var) > 0: 617 lines.append("%s %s %s\n" % (prefix, header, var[0])) 618 lines.extend("%s %s\n" % (prefix, line) for line in var[1:])
619 620 def add_prev_msgid_info(lines, prefix): 621 add_prev_msgid_lines(lines, prefix, 'msgctxt', self.prev_msgctxt) 622 add_prev_msgid_lines(lines, prefix, 'msgid', self.prev_msgid) 623 add_prev_msgid_lines(lines, prefix, 'msgid_plural', self.prev_msgid_plural) 624 625 lines = [] 626 lines.extend(self.othercomments) 627 if self.isobsolete(): 628 lines.extend(self.typecomments) 629 obsoletelines = [] 630 add_prev_msgid_info(obsoletelines, prefix="#~|") 631 if self.obsoletemsgctxt: 632 obsoletelines.append(self._getmsgpartstr("#~ msgctxt", self.obsoletemsgctxt)) 633 obsoletelines.append(self._getmsgpartstr("#~ msgid", self.obsoletemsgid, self.obsoletemsgidcomments)) 634 if self.obsoletemsgid_plural or self.obsoletemsgid_pluralcomments: 635 obsoletelines.append(self._getmsgpartstr("#~ msgid_plural", self.obsoletemsgid_plural, self.obsoletemsgid_pluralcomments)) 636 obsoletelines.append(self._getmsgpartstr("#~ msgstr", self.obsoletemsgstr)) 637 for index, obsoleteline in enumerate(obsoletelines): 638 # We need to account for a multiline msgid or msgstr here 639 obsoletelines[index] = obsoleteline.replace('\n"', '\n#~ "') 640 lines.extend(obsoletelines) 641 return u"".join(lines) 642 # if there's no msgid don't do msgid and string, unless we're the header 643 # this will also discard any comments other than plain othercomments... 644 if is_null(self.msgid): 645 if not (self.isheader() or self.getcontext() or self.sourcecomments): 646 return u"".join(lines) 647 lines.extend(self.automaticcomments) 648 lines.extend(self.sourcecomments) 649 lines.extend(self.typecomments) 650 add_prev_msgid_info(lines, prefix="#|") 651 if self.msgctxt: 652 lines.append(self._getmsgpartstr(u"msgctxt", self.msgctxt)) 653 lines.append(self._getmsgpartstr(u"msgid", self.msgid, self.msgidcomments)) 654 if self.msgid_plural or self.msgid_pluralcomments: 655 lines.append(self._getmsgpartstr(u"msgid_plural", self.msgid_plural, self.msgid_pluralcomments)) 656 lines.append(self._getmsgpartstr(u"msgstr", self.msgstr)) 657 postr = u"".join(lines) 658 return postr 659
660 - def getlocations(self):
661 """Get a list of locations from sourcecomments in the PO unit 662 663 rtype: List 664 return: A list of the locations with '#: ' stripped 665 666 """ 667 locations = [] 668 for sourcecomment in self.sourcecomments: 669 locations += quote.rstripeol(sourcecomment)[3:].split() 670 for i, loc in enumerate(locations): 671 locations[i] = urllib.unquote_plus(loc) 672 return locations
673
674 - def addlocation(self, location):
675 """Add a location to sourcecomments in the PO unit 676 677 @param location: Text location e.g. 'file.c:23' does not include #: 678 @type location: String 679 680 """ 681 if location.find(" ") != -1: 682 location = urllib.quote_plus(location) 683 self.sourcecomments.append("#: %s\n" % location)
684
685 - def _extract_msgidcomments(self, text=None):
686 """Extract KDE style msgid comments from the unit. 687 688 @rtype: String 689 @return: Returns the extracted msgidcomments found in this unit's msgid. 690 """ 691 692 if not text: 693 text = unquotefrompo(self.msgidcomments) 694 return text.split('\n')[0].replace('_: ', '', 1)
695
696 - def setmsgidcomment(self, msgidcomment):
697 if msgidcomment: 698 self.msgidcomments = ['"_: %s\\n"' % msgidcomment] 699 else: 700 self.msgidcomments = []
701 702 msgidcomment = property(_extract_msgidcomments, setmsgidcomment) 703
704 - def getcontext(self):
705 """Get the message context.""" 706 return unquotefrompo(self.msgctxt) + self._extract_msgidcomments()
707
708 - def setcontext(self, context):
709 context = data.forceunicode(context) 710 self.msgctxt = quoteforpo(context)
711
712 - def getid(self):
713 """Returns a unique identifier for this unit.""" 714 context = self.getcontext() 715 # Gettext does not consider the plural to determine duplicates, only 716 # the msgid. For generation of .mo files, we might want to use this 717 # code to generate the entry for the hash table, but for now, it is 718 # commented out for conformance to gettext. 719 # id = '\0'.join(self.source.strings) 720 id = self.source 721 if self.msgidcomments: 722 id = u"_: %s\n%s" % (context, id) 723 elif context: 724 id = u"%s\04%s" % (context, id) 725 return id
726 727
728 -class pofile(pocommon.pofile):
729 """A .po file containing various units""" 730 UnitClass = pounit 731
732 - def parse(self, input):
733 """Parses the given file or file source string.""" 734 try: 735 if hasattr(input, 'name'): 736 self.filename = input.name 737 elif not getattr(self, 'filename', ''): 738 self.filename = '' 739 if isinstance(input, str): 740 input = cStringIO.StringIO(input) 741 # clear units to get rid of automatically generated headers before parsing 742 self.units = [] 743 poparser.parse_units(poparser.ParseState(input, pounit), self) 744 except Exception, e: 745 raise base.ParseError(e)
746
747 - def removeduplicates(self, duplicatestyle="merge"):
748 """Make sure each msgid is unique ; merge comments etc from duplicates into original""" 749 # TODO: can we handle consecutive calls to removeduplicates()? What 750 # about files already containing msgctxt? - test 751 id_dict = {} 752 uniqueunits = [] 753 # TODO: this is using a list as the pos aren't hashable, but this is slow. 754 # probably not used frequently enough to worry about it, though. 755 markedpos = [] 756 def addcomment(thepo): 757 thepo.msgidcomments.append('"_: %s\\n"' % " ".join(thepo.getlocations())) 758 markedpos.append(thepo)
759 for thepo in self.units: 760 id = thepo.getid() 761 if thepo.isheader() and not thepo.getlocations(): 762 # header msgids shouldn't be merged... 763 uniqueunits.append(thepo) 764 elif id in id_dict: 765 if duplicatestyle == "merge": 766 if id: 767 id_dict[id].merge(thepo) 768 else: 769 addcomment(thepo) 770 uniqueunits.append(thepo) 771 elif duplicatestyle == "msgctxt": 772 origpo = id_dict[id] 773 if origpo not in markedpos: 774 origpo.msgctxt.append('"%s"' % escapeforpo(" ".join(origpo.getlocations()))) 775 markedpos.append(thepo) 776 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations()))) 777 uniqueunits.append(thepo) 778 else: 779 if not id: 780 if duplicatestyle == "merge": 781 addcomment(thepo) 782 else: 783 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations()))) 784 id_dict[id] = thepo 785 uniqueunits.append(thepo) 786 self.units = uniqueunits
787
788 - def __str__(self):
789 """Convert to a string. double check that unicode is handled somehow here""" 790 output = self._getoutput() 791 if isinstance(output, unicode): 792 return output.encode(getattr(self, "_encoding", "UTF-8")) 793 return output
794
795 - def _getoutput(self):
796 """convert the units back to lines""" 797 lines = [] 798 for unit in self.units: 799 unitsrc = unit._getoutput() + u"\n" 800 lines.append(unitsrc) 801 lines = u"".join(lines).rstrip() 802 #After the last pounit we will have \n\n and we only want to end in \n: 803 if lines: 804 lines += u"\n" 805 return lines
806
807 - def encode(self, lines):
808 """encode any unicode strings in lines in self._encoding""" 809 newlines = [] 810 encoding = self._encoding 811 if encoding is None or encoding.lower() == "charset": 812 encoding = 'UTF-8' 813 for line in lines: 814 if isinstance(line, unicode): 815 line = line.encode(encoding) 816 newlines.append(line) 817 return newlines
818
819 - def decode(self, lines):
820 """decode any non-unicode strings in lines with self._encoding""" 821 newlines = [] 822 for line in lines: 823 if isinstance(line, str) and self._encoding is not None and self._encoding.lower() != "charset": 824 try: 825 line = line.decode(self._encoding) 826 except UnicodeError, e: 827 raise UnicodeError("Error decoding line with encoding %r: %s. Line is %r" % (self._encoding, e, line)) 828 newlines.append(line) 829 return newlines
830
831 - def unit_iter(self):
832 for unit in self.units: 833 if not (unit.isheader() or unit.isobsolete()): 834 yield unit
835