Package translate :: Package storage :: Module base
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.base

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2006-2009 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """Base classes for storage interfaces. 
 22   
 23  @organization: Zuza Software Foundation 
 24  @copyright: 2006-2009 Zuza Software Foundation 
 25  @license: U{GPL <http://www.fsf.org/licensing/licenses/gpl.html>} 
 26  """ 
 27   
 28  try: 
 29      import cPickle as pickle 
 30  except ImportError: 
 31      import pickle 
 32  from exceptions import NotImplementedError 
 33  import translate.i18n 
 34  from translate.storage.placeables import StringElem, general, parse as rich_parse 
 35  from translate.storage.workflow import StateEnum as states 
 36  from translate.misc.typecheck import accepts, Self, IsOneOf 
 37  from translate.misc.multistring import multistring 
 38   
 39   
40 -def force_override(method, baseclass):
41 """Forces derived classes to override method.""" 42 43 if type(method.im_self) == type(baseclass): 44 # then this is a classmethod and im_self is the actual class 45 actualclass = method.im_self 46 else: 47 actualclass = method.im_class 48 if actualclass != baseclass: 49 raise NotImplementedError( 50 "%s does not reimplement %s as required by %s" % \ 51 (actualclass.__name__, method.__name__, baseclass.__name__))
52 53
54 -class ParseError(Exception):
55
56 - def __init__(self, inner_exc):
57 self.inner_exc = inner_exc
58
59 - def __str__(self):
60 return repr(self.inner_exc)
61 62
63 -class TranslationUnit(object):
64 """Base class for translation units. 65 66 Our concept of a I{translation unit} is influenced heavily by XLIFF: 67 U{http://www.oasis-open.org/committees/xliff/documents/xliff-specification.htm} 68 69 As such most of the method- and variable names borrows from XLIFF 70 terminology. 71 72 A translation unit consists of the following: 73 - A I{source} string. This is the original translatable text. 74 - A I{target} string. This is the translation of the I{source}. 75 - Zero or more I{notes} on the unit. Notes would typically be some 76 comments from a translator on the unit, or some comments originating 77 from the source code. 78 - Zero or more I{locations}. Locations indicate where in the original 79 source code this unit came from. 80 - Zero or more I{errors}. Some tools (eg. L{pofilter<filters.pofilter>}) 81 can run checks on translations and produce error messages. 82 83 @group Source: *source* 84 @group Target: *target* 85 @group Notes: *note* 86 @group Locations: *location* 87 @group Errors: *error* 88 """ 89 90 rich_parsers = [] 91 """A list of functions to use for parsing a string into a rich string 92 tree.""" 93 94 # State constants 95 S_OBSOLETE = states.OBSOLETE 96 S_EMPTY = states.EMPTY 97 S_NEEDS_WORK = states.NEEDS_WORK 98 S_REJECTED = states.REJECTED 99 S_NEEDS_REVIEW = states.NEEDS_REVIEW 100 S_UNREVIEWED = states.UNREVIEWED 101 S_FINAL = states.FINAL 102 103 STATE = { 104 S_OBSOLETE: (states.OBSOLETE, states.EMPTY), 105 S_EMPTY: (states.EMPTY, states.NEEDS_WORK), 106 S_NEEDS_WORK: (states.NEEDS_WORK, states.REJECTED), 107 S_REJECTED: (states.REJECTED, states.NEEDS_REVIEW), 108 S_NEEDS_REVIEW: (states.NEEDS_REVIEW, states.UNREVIEWED), 109 S_UNREVIEWED: (states.UNREVIEWED, states.FINAL), 110 S_FINAL: (states.FINAL, states.MAX), 111 } 112 """ 113 Default supported states: 114 * obsolete: The unit is not to be used. 115 * empty: The unit has not been translated before. 116 * needs work: Some translation has been done, but is not complete. 117 * rejected: The unit has been reviewed, but was rejected. 118 * needs review: The unit has been translated, but review was requested. 119 * unreviewed: The unit has been translated, but not reviewed. 120 * final: The unit is translated, reviewed and accepted. 121 """ 122
123 - def __init__(self, source):
124 """Constructs a TranslationUnit containing the given source string.""" 125 self.notes = "" 126 self._store = None 127 self.source = source 128 self._target = None 129 self._rich_source = None 130 self._rich_target = None 131 self._state_n = 0
132
133 - def __eq__(self, other):
134 """Compares two TranslationUnits. 135 136 @type other: L{TranslationUnit} 137 @param other: Another L{TranslationUnit} 138 @rtype: Boolean 139 @return: Returns True if the supplied TranslationUnit equals this unit. 140 """ 141 return self.source == other.source and self.target == other.target
142
143 - def __str__(self):
144 """Converts to a string representation that can be parsed back using 145 L{parsestring()}.""" 146 # no point in pickling store object, so let's hide it for a while. 147 store = getattr(self, "_store", None) 148 self._store = None 149 dump = pickle.dumps(self) 150 self._store = store 151 return dump
152
153 - def rich_to_multistring(cls, elem_list):
154 """Convert a "rich" string tree to a C{multistring}: 155 156 >>> from translate.storage.placeables.interfaces import X 157 >>> rich = [StringElem(['foo', X(id='xxx', sub=[' ']), 'bar'])] 158 >>> TranslationUnit.rich_to_multistring(rich) 159 multistring(u'foo bar') 160 """ 161 return multistring([unicode(elem) for elem in elem_list])
162 rich_to_multistring = classmethod(rich_to_multistring) 163
164 - def multistring_to_rich(self, mulstring):
165 """Convert a multistring to a list of "rich" string trees: 166 167 >>> target = multistring([u'foo', u'bar', u'baz']) 168 >>> TranslationUnit.multistring_to_rich(target) 169 [<StringElem([<StringElem([u'foo'])>])>, 170 <StringElem([<StringElem([u'bar'])>])>, 171 <StringElem([<StringElem([u'baz'])>])>] 172 """ 173 if isinstance(mulstring, multistring): 174 return [rich_parse(s, self.rich_parsers) for s in mulstring.strings] 175 return [rich_parse(mulstring, self.rich_parsers)]
176
177 - def setsource(self, source):
178 """Sets the source string to the given value.""" 179 self._rich_source = None 180 self._source = source
181 source = property(lambda self: self._source, setsource) 182
183 - def settarget(self, target):
184 """Sets the target string to the given value.""" 185 self._rich_target = None 186 self._target = target
187 target = property(lambda self: self._target, settarget) 188
189 - def _get_rich_source(self):
190 if self._rich_source is None: 191 self._rich_source = self.multistring_to_rich(self.source) 192 return self._rich_source
193
194 - def _set_rich_source(self, value):
195 if not hasattr(value, '__iter__'): 196 raise ValueError('value must be iterable') 197 if len(value) < 1: 198 raise ValueError('value must have at least one element.') 199 if not isinstance(value[0], StringElem): 200 raise ValueError('value[0] must be of type StringElem.') 201 self._rich_source = list(value) 202 multi = self.rich_to_multistring(value) 203 if self.source != multi: 204 self.source = multi
205 rich_source = property(_get_rich_source, _set_rich_source) 206 """ @see: rich_to_multistring 207 @see: multistring_to_rich""" 208
209 - def _get_rich_target(self):
210 if self._rich_target is None: 211 self._rich_target = self.multistring_to_rich(self.target) 212 return self._rich_target
213
214 - def _set_rich_target(self, value):
215 if not hasattr(value, '__iter__'): 216 raise ValueError('value must be iterable') 217 if len(value) < 1: 218 raise ValueError('value must have at least one element.') 219 if not isinstance(value[0], StringElem): 220 raise ValueError('value[0] must be of type StringElem.') 221 self._rich_target = list(value) 222 self.target = self.rich_to_multistring(value)
223 rich_target = property(_get_rich_target, _set_rich_target) 224 """ @see: rich_to_multistring 225 @see: multistring_to_rich""" 226
227 - def gettargetlen(self):
228 """Returns the length of the target string. 229 230 @note: Plural forms might be combined. 231 @rtype: Integer 232 """ 233 length = len(self.target or "") 234 strings = getattr(self.target, "strings", []) 235 if strings: 236 length += sum([len(pluralform) for pluralform in strings[1:]]) 237 return length
238
239 - def getid(self):
240 """A unique identifier for this unit. 241 242 @rtype: string 243 @return: an identifier for this unit that is unique in the store 244 245 Derived classes should override this in a way that guarantees a unique 246 identifier for each unit in the store. 247 """ 248 return self.source
249
250 - def setid(self, value):
251 """Sets the unique identified for this unit. 252 253 only implemented if format allows ids independant from other 254 unit properties like source or context""" 255 pass
256
257 - def getlocations(self):
258 """A list of source code locations. 259 260 @note: Shouldn't be implemented if the format doesn't support it. 261 @rtype: List 262 """ 263 return []
264
265 - def addlocation(self, location):
266 """Add one location to the list of locations. 267 268 @note: Shouldn't be implemented if the format doesn't support it. 269 """ 270 pass
271
272 - def addlocations(self, location):
273 """Add a location or a list of locations. 274 275 @note: Most classes shouldn't need to implement this, 276 but should rather implement L{addlocation()}. 277 @warning: This method might be removed in future. 278 """ 279 if isinstance(location, list): 280 for item in location: 281 self.addlocation(item) 282 else: 283 self.addlocation(location)
284
285 - def getcontext(self):
286 """Get the message context.""" 287 return ""
288
289 - def setcontext(self, context):
290 """Set the message context""" 291 pass
292
293 - def getnotes(self, origin=None):
294 """Returns all notes about this unit. 295 296 It will probably be freeform text or something reasonable that can be 297 synthesised by the format. 298 It should not include location comments (see L{getlocations()}). 299 """ 300 return getattr(self, "notes", "")
301
302 - def addnote(self, text, origin=None, position="append"):
303 """Adds a note (comment). 304 305 @type text: string 306 @param text: Usually just a sentence or two. 307 @type origin: string 308 @param origin: Specifies who/where the comment comes from. 309 Origin can be one of the following text strings: 310 - 'translator' 311 - 'developer', 'programmer', 'source code' (synonyms) 312 """ 313 if position == "append" and getattr(self, "notes", None): 314 self.notes += '\n' + text 315 else: 316 self.notes = text
317
318 - def removenotes(self):
319 """Remove all the translator's notes.""" 320 self.notes = u''
321
322 - def adderror(self, errorname, errortext):
323 """Adds an error message to this unit. 324 325 @type errorname: string 326 @param errorname: A single word to id the error. 327 @type errortext: string 328 @param errortext: The text describing the error. 329 """ 330 pass
331
332 - def geterrors(self):
333 """Get all error messages. 334 335 @rtype: Dictionary 336 """ 337 return {}
338
339 - def markreviewneeded(self, needsreview=True, explanation=None):
340 """Marks the unit to indicate whether it needs review. 341 342 @keyword needsreview: Defaults to True. 343 @keyword explanation: Adds an optional explanation as a note. 344 """ 345 pass
346
347 - def istranslated(self):
348 """Indicates whether this unit is translated. 349 350 This should be used rather than deducing it from .target, 351 to ensure that other classes can implement more functionality 352 (as XLIFF does). 353 """ 354 return bool(self.target) and not self.isfuzzy()
355
356 - def istranslatable(self):
357 """Indicates whether this unit can be translated. 358 359 This should be used to distinguish real units for translation from 360 header, obsolete, binary or other blank units. 361 """ 362 return bool(self.source)
363
364 - def isfuzzy(self):
365 """Indicates whether this unit is fuzzy.""" 366 return False
367
368 - def markfuzzy(self, value=True):
369 """Marks the unit as fuzzy or not.""" 370 pass
371
372 - def isobsolete(self):
373 """indicate whether a unit is obsolete""" 374 return False
375
376 - def makeobsolete(self):
377 """Make a unit obsolete""" 378 pass
379
380 - def isheader(self):
381 """Indicates whether this unit is a header.""" 382 return False
383
384 - def isreview(self):
385 """Indicates whether this unit needs review.""" 386 return False
387
388 - def isblank(self):
389 """Used to see if this unit has no source or target string. 390 391 @note: This is probably used more to find translatable units, 392 and we might want to move in that direction rather and get rid of this. 393 """ 394 return not (self.source or self.target)
395
396 - def hasplural(self):
397 """Tells whether or not this specific unit has plural strings.""" 398 #TODO: Reconsider 399 return False
400
401 - def getsourcelanguage(self):
402 return self._store.getsourcelanguage()
403
404 - def gettargetlanguage(self):
405 return self._store.gettargetlanguage()
406
407 - def merge(self, otherunit, overwrite=False, comments=True, 408 authoritative=False):
409 """Do basic format agnostic merging.""" 410 if not self.target or overwrite: 411 self.rich_target = otherunit.rich_target
412
413 - def unit_iter(self):
414 """Iterator that only returns this unit.""" 415 yield self
416
417 - def getunits(self):
418 """This unit in a list.""" 419 return [self]
420
421 - def buildfromunit(cls, unit):
422 """Build a native unit from a foreign unit, preserving as much 423 information as possible.""" 424 if type(unit) == cls and hasattr(unit, "copy") and callable(unit.copy): 425 return unit.copy() 426 newunit = cls(unit.source) 427 newunit.target = unit.target 428 newunit.markfuzzy(unit.isfuzzy()) 429 locations = unit.getlocations() 430 if locations: 431 newunit.addlocations(locations) 432 notes = unit.getnotes() 433 if notes: 434 newunit.addnote(notes) 435 return newunit
436 buildfromunit = classmethod(buildfromunit) 437 438 xid = property(lambda self: None, lambda self, value: None) 439 rid = property(lambda self: None, lambda self, value: None) 440
441 - def get_state_id(self, n=None):
442 if n is None: 443 n = self._state_n 444 for state_id, state_range in self.STATE.iteritems(): 445 if state_range[0] <= n < state_range[1]: 446 return n 447 raise ValueError('No state containing value %s' % (n))
448
449 - def get_state_n(self):
450 return self._state_n
451
452 - def set_state_n(self, value):
453 self._state_n = value
454
455 - def infer_state(self):
456 """Empty method that should be overridden in sub-classes to infer the 457 current state(_n) of the unit from its current state.""" 458 pass
459 460
461 -class TranslationStore(object):
462 """Base class for stores for multiple translation units of type 463 UnitClass.""" 464 465 UnitClass = TranslationUnit 466 """The class of units that will be instantiated and used by this class""" 467 Name = "Base translation store" 468 """The human usable name of this store type""" 469 Mimetypes = None 470 """A list of MIME types associated with this store type""" 471 Extensions = None 472 """A list of file extentions associated with this store type""" 473 _binary = False 474 """Indicates whether a file should be accessed as a binary file.""" 475 suggestions_in_format = False 476 """Indicates if format can store suggestions and alternative translation 477 for a unit""" 478
479 - def __init__(self, unitclass=None):
480 """Constructs a blank TranslationStore.""" 481 self.units = [] 482 self.sourcelanguage = None 483 self.targetlanguage = None 484 if unitclass: 485 self.UnitClass = unitclass 486 super(TranslationStore, self).__init__()
487
488 - def getsourcelanguage(self):
489 """Gets the source language for this store""" 490 return self.sourcelanguage
491
492 - def setsourcelanguage(self, sourcelanguage):
493 """Sets the source language for this store""" 494 self.sourcelanguage = sourcelanguage
495
496 - def gettargetlanguage(self):
497 """Gets the target language for this store""" 498 return self.targetlanguage
499
500 - def settargetlanguage(self, targetlanguage):
501 """Sets the target language for this store""" 502 self.targetlanguage = targetlanguage
503
504 - def unit_iter(self):
505 """Iterator over all the units in this store.""" 506 for unit in self.units: 507 yield unit
508
509 - def getunits(self):
510 """Return a list of all units in this store.""" 511 return [unit for unit in self.unit_iter()]
512
513 - def addunit(self, unit):
514 """Appends the given unit to the object's list of units. 515 516 This method should always be used rather than trying to modify the 517 list manually. 518 519 @type unit: L{TranslationUnit} 520 @param unit: The unit that will be added. 521 """ 522 unit._store = self 523 self.units.append(unit)
524
525 - def addsourceunit(self, source):
526 """Adds and returns a new unit with the given source string. 527 528 @rtype: L{TranslationUnit} 529 """ 530 unit = self.UnitClass(source) 531 self.addunit(unit) 532 return unit
533
534 - def findid(self, id):
535 """find unit with matching id by checking id_index""" 536 self.require_index() 537 return self.id_index.get(id, None)
538
539 - def findunit(self, source):
540 """Finds the unit with the given source string. 541 542 @rtype: L{TranslationUnit} or None 543 """ 544 if len(getattr(self, "sourceindex", [])): 545 if source in self.sourceindex: 546 return self.sourceindex[source][0] 547 else: 548 for unit in self.units: 549 if unit.source == source: 550 return unit 551 return None
552
553 - def findunits(self, source):
554 """Finds the units with the given source string. 555 556 @rtype: L{TranslationUnit} or None 557 """ 558 if len(getattr(self, "sourceindex", [])): 559 if source in self.sourceindex: 560 return self.sourceindex[source] 561 else: 562 #FIXME: maybe we should generate index here instead since 563 #we'll scan all units anyway 564 result = [] 565 for unit in self.units: 566 if unit.source == source: 567 result.append(unit) 568 return result 569 return None
570
571 - def translate(self, source):
572 """Returns the translated string for a given source string. 573 574 @rtype: String or None 575 """ 576 unit = self.findunit(source) 577 if unit and unit.target: 578 return unit.target 579 else: 580 return None
581
582 - def remove_unit_from_index(self, unit):
583 """Remove a unit from source and locaton indexes""" 584 585 def remove_unit(source): 586 if source in self.sourceindex: 587 try: 588 self.sourceindex[source].remove(unit) 589 if len(self.sourceindex[source]) == 0: 590 del(self.sourceindex[source]) 591 except ValueError: 592 pass
593 594 if unit.hasplural(): 595 for source in unit.source.strings: 596 remove_unit(source) 597 else: 598 remove_unit(unit.source) 599 600 for location in unit.getlocations(): 601 if location in self.locationindex \ 602 and self.locationindex[location] is not None \ 603 and self.locationindex[location] == unit: 604 del(self.locationindex[location])
605
606 - def add_unit_to_index(self, unit):
607 """Add a unit to source and location idexes""" 608 self.id_index[unit.getid()] = unit 609 610 def insert_unit(source): 611 if not source in self.sourceindex: 612 self.sourceindex[source] = [unit] 613 else: 614 self.sourceindex[source].append(unit)
615 616 if unit.hasplural(): 617 for source in unit.source.strings: 618 insert_unit(source) 619 else: 620 insert_unit(unit.source) 621 622 for location in unit.getlocations(): 623 if location in self.locationindex: 624 # if sources aren't unique, don't use them 625 #FIXME: maybe better store a list of units like sourceindex 626 self.locationindex[location] = None 627 else: 628 self.locationindex[location] = unit 629
630 - def makeindex(self):
631 """Indexes the items in this store. At least .sourceindex should be 632 usefull.""" 633 self.locationindex = {} 634 self.sourceindex = {} 635 self.id_index = {} 636 for index, unit in enumerate(self.units): 637 unit.index = index 638 if unit.istranslatable(): 639 self.add_unit_to_index(unit)
640
641 - def require_index(self):
642 """make sure source index exists""" 643 if not hasattr(self, "id_index"): 644 self.makeindex()
645
646 - def getids(self, filename=None):
647 """return a list of unit ids""" 648 self.require_index() 649 return self.id_index.keys()
650
651 - def __getstate__(self):
652 odict = self.__dict__.copy() 653 odict['fileobj'] = None 654 return odict
655
656 - def __setstate__(self, dict):
657 self.__dict__.update(dict) 658 if getattr(self, "filename", False): 659 self.fileobj = open(self.filename)
660
661 - def __str__(self):
662 """Converts to a string representation that can be parsed back using 663 L{parsestring()}.""" 664 # We can't pickle fileobj if it is there, so let's hide it for a while. 665 fileobj = getattr(self, "fileobj", None) 666 self.fileobj = None 667 dump = pickle.dumps(self) 668 self.fileobj = fileobj 669 return dump
670
671 - def isempty(self):
672 """Returns True if the object doesn't contain any translation units.""" 673 if len(self.units) == 0: 674 return True 675 for unit in self.units: 676 if unit.istranslatable(): 677 return False 678 return True
679
680 - def _assignname(self):
681 """Tries to work out what the name of the filesystem file is and 682 assigns it to .filename.""" 683 fileobj = getattr(self, "fileobj", None) 684 if fileobj: 685 filename = getattr(fileobj, "name", 686 getattr(fileobj, "filename", None)) 687 if filename: 688 self.filename = filename
689
690 - def parsestring(cls, storestring):
691 """Converts the string representation back to an object.""" 692 newstore = cls() 693 if storestring: 694 newstore.parse(storestring) 695 return newstore
696 parsestring = classmethod(parsestring) 697
698 - def parse(self, data):
699 """parser to process the given source string""" 700 self.units = pickle.loads(data).units
701
702 - def savefile(self, storefile):
703 """Writes the string representation to the given file (or filename).""" 704 if isinstance(storefile, basestring): 705 mode = 'w' 706 if self._binary: 707 mode = 'wb' 708 storefile = open(storefile, mode) 709 self.fileobj = storefile 710 self._assignname() 711 storestring = str(self) 712 storefile.write(storestring) 713 storefile.close()
714
715 - def save(self):
716 """Save to the file that data was originally read from, if 717 available.""" 718 fileobj = getattr(self, "fileobj", None) 719 mode = 'w' 720 if self._binary: 721 mode = 'wb' 722 if not fileobj: 723 filename = getattr(self, "filename", None) 724 if filename: 725 fileobj = file(filename, mode) 726 else: 727 fileobj.close() 728 filename = getattr(fileobj, "name", 729 getattr(fileobj, "filename", None)) 730 if not filename: 731 raise ValueError("No file or filename to save to") 732 fileobj = fileobj.__class__(filename, mode) 733 self.savefile(fileobj)
734
735 - def parsefile(cls, storefile):
736 """Reads the given file (or opens the given filename) and parses back 737 to an object.""" 738 mode = 'r' 739 if cls._binary: 740 mode = 'rb' 741 if isinstance(storefile, basestring): 742 storefile = open(storefile, mode) 743 mode = getattr(storefile, "mode", mode) 744 #For some reason GzipFile returns 1, so we have to test for that here 745 if mode == 1 or "r" in mode: 746 storestring = storefile.read() 747 storefile.close() 748 else: 749 storestring = "" 750 newstore = cls.parsestring(storestring) 751 newstore.fileobj = storefile 752 newstore._assignname() 753 return newstore
754 parsefile = classmethod(parsefile) 755