Package translate :: Package filters :: Module decoration
[hide private]
[frames] | no frames]

Source Code for Module translate.filters.decoration

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  #  
  4  # Copyright 2004-2008 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """functions to get decorative/informative text out of strings...""" 
 23   
 24  import re 
 25  import unicodedata 
 26  from translate.lang import data 
 27   
28 -def spacestart(str1):
29 """returns all the whitespace from the start of the string""" 30 newstring = u"" 31 for c in str1: 32 if c.isspace(): 33 newstring += c 34 else: 35 break 36 return newstring
37
38 -def spaceend(str1):
39 """returns all the whitespace from the end of the string""" 40 newstring = u"" 41 for n in range(len(str1)): 42 c = str1[-1-n] 43 if c.isspace(): 44 newstring = c + newstring 45 else: 46 break 47 return newstring
48
49 -def puncstart(str1, punctuation):
50 """returns all the punctuation from the start of the string""" 51 newstring = u"" 52 for c in str1: 53 if c in punctuation or c.isspace(): 54 newstring += c 55 else: 56 break 57 return newstring
58
59 -def puncend(str1, punctuation):
60 """returns all the punctuation from the end of the string""" 61 # An implementation with regular expressions was slightly slower. 62 63 newstring = u"" 64 for n in range(len(str1)): 65 c = str1[-1-n] 66 if c in punctuation or c.isspace(): 67 newstring = c + newstring 68 else: 69 break 70 return newstring.replace(u"\u00a0", u" ")
71
72 -def ispurepunctuation(str1):
73 """checks whether the string is entirely punctuation""" 74 for c in str1: 75 if c.isalnum(): 76 return False 77 return len(str1)
78
79 -def isvalidaccelerator(accelerator, acceptlist=None):
80 """returns whether the given accelerator character is valid 81 82 @type accelerator: character 83 @param accelerator: A character to be checked for accelerator validity 84 @type acceptlist: String 85 @param acceptlist: A list of characters that are permissible as accelerators 86 @rtype: Boolean 87 @return: True if the supplied character is an acceptable accelerator 88 """ 89 assert isinstance(accelerator, unicode) 90 assert isinstance(acceptlist, unicode) or acceptlist is None 91 if len(accelerator) == 0: 92 return False 93 if acceptlist is not None: 94 acceptlist = data.normalize(acceptlist) 95 if accelerator in acceptlist: 96 return True 97 return False 98 else: 99 # Old code path - ensures that we don't get a large number of regressions 100 accelerator = accelerator.replace("_","") 101 if accelerator in u"-?": 102 return True 103 if not accelerator.isalnum(): 104 return False 105 106 # We don't want to have accelerators on characters with diacritics, so let's 107 # see if the character can decompose. 108 decomposition = unicodedata.decomposition(accelerator) 109 # Next we strip out any extra information like <this> 110 decomposition = re.sub("<[^>]+>", "", decomposition).strip() 111 return decomposition.count(" ") == 0
112
113 -def findaccelerators(str1, accelmarker, acceptlist=None):
114 """returns all the accelerators and locations in str1 marked with a given marker""" 115 accelerators = [] 116 badaccelerators = [] 117 currentpos = 0 118 while currentpos >= 0: 119 currentpos = str1.find(accelmarker, currentpos) 120 if currentpos >= 0: 121 accelstart = currentpos 122 currentpos += len(accelmarker) 123 # we assume accelerators are single characters 124 accelend = currentpos + 1 125 if accelend > len(str1): 126 break 127 accelerator = str1[currentpos:accelend] 128 currentpos = accelend 129 if isvalidaccelerator(accelerator, acceptlist): 130 accelerators.append((accelstart, accelerator)) 131 else: 132 badaccelerators.append((accelstart, accelerator)) 133 return accelerators, badaccelerators
134
135 -def findmarkedvariables(str1, startmarker, endmarker, ignorelist=[]):
136 """returns all the variables and locations in str1 marked with a given marker""" 137 variables = [] 138 currentpos = 0 139 while currentpos >= 0: 140 variable = None 141 currentpos = str1.find(startmarker, currentpos) 142 if currentpos >= 0: 143 startmatch = currentpos 144 currentpos += len(startmarker) 145 if endmarker is None: 146 # handle case without an end marker - use any non-alphanumeric character as the end marker, var must be len > 1 147 endmatch = currentpos 148 for n in range(currentpos, len(str1)): 149 if not (str1[n].isalnum() or str1[n] == '_'): 150 endmatch = n 151 break 152 if currentpos == endmatch: 153 endmatch = len(str1) 154 if currentpos < endmatch: 155 variable = str1[currentpos:endmatch] 156 currentpos = endmatch 157 elif type(endmarker) == int: 158 # setting endmarker to an int means it is a fixed-length variable string (usually endmarker==1) 159 endmatch = currentpos + endmarker 160 if endmatch > len(str1): 161 break 162 variable = str1[currentpos:endmatch] 163 currentpos = endmatch 164 else: 165 endmatch = str1.find(endmarker, currentpos) 166 if endmatch == -1: 167 break 168 # search backwards in case there's an intervening startmarker (if not it's OK)... 169 start2 = str1.rfind(startmarker, currentpos, endmatch) 170 if start2 != -1: 171 startmatch2 = start2 172 start2 += len(startmarker) 173 if start2 != currentpos: 174 currentpos = start2 175 startmatch = startmatch2 176 variable = str1[currentpos:endmatch] 177 currentpos = endmatch + len(endmarker) 178 if variable is not None and variable not in ignorelist: 179 if not variable or variable.replace("_","").replace(".","").isalnum(): 180 variables.append((startmatch, variable)) 181 return variables
182
183 -def getaccelerators(accelmarker, acceptlist=None):
184 """returns a function that gets a list of accelerators marked using accelmarker""" 185 def getmarkedaccelerators(str1): 186 """returns all the accelerators in str1 marked with a given marker""" 187 acclocs, badlocs = findaccelerators(str1, accelmarker, acceptlist) 188 accelerators = [accelerator for accelstart, accelerator in acclocs] 189 badaccelerators = [accelerator for accelstart, accelerator in badlocs] 190 return accelerators, badaccelerators
191 return getmarkedaccelerators 192
193 -def getvariables(startmarker, endmarker):
194 """returns a function that gets a list of variables marked using startmarker and endmarker""" 195 def getmarkedvariables(str1): 196 """returns all the variables in str1 marked with a given marker""" 197 varlocs = findmarkedvariables(str1, startmarker, endmarker) 198 variables = [variable for accelstart, variable in varlocs] 199 return variables
200 return getmarkedvariables 201
202 -def getnumbers(str1):
203 """returns any numbers that are in the string""" 204 # TODO: handle locale-based periods e.g. 2,5 for Afrikaans 205 assert isinstance(str1, unicode) 206 numbers = [] 207 innumber = False 208 degreesign = u'\xb0' 209 lastnumber = "" 210 carryperiod = "" 211 for chr1 in str1: 212 if chr1.isdigit(): 213 innumber = True 214 elif innumber: 215 if not (chr1 == '.' or chr1 == degreesign): 216 innumber = False 217 if lastnumber: 218 numbers.append(lastnumber) 219 lastnumber = "" 220 if innumber: 221 if chr1 == degreesign: 222 lastnumber += chr1 223 elif chr1 == '.': 224 carryperiod += chr1 225 else: 226 lastnumber += carryperiod + chr1 227 carryperiod = "" 228 else: 229 carryperiod = "" 230 if innumber: 231 if lastnumber: 232 numbers.append(lastnumber) 233 return numbers
234
235 -def getfunctions(str1, punctuation):
236 """returns the functions() that are in a string, while ignoring the trailing 237 punctuation in the given parameter""" 238 punctuation = punctuation.replace("(", "").replace(")", "") 239 return [word.rstrip(punctuation) for word in str1.split() if word.rstrip(punctuation).endswith("()")]
240
241 -def getemails(str1):
242 """returns the email addresses that are in a string""" 243 return re.findall('[\w\.\-]+@[\w\.\-]+', str1)
244
245 -def geturls(str1):
246 """returns the URIs in a string""" 247 URLPAT = 'https?:[\w/\.:;+\-~\%#\$?=&,()]+|www\.[\w/\.:;+\-~\%#\$?=&,()]+|' +\ 248 'ftp:[\w/\.:;+\-~\%#?=&,]+' 249 return re.findall(URLPAT, str1)
250
251 -def countaccelerators(accelmarker, acceptlist=None):
252 """returns a function that counts the number of accelerators marked with the given marker""" 253 def countmarkedaccelerators(str1): 254 """returns all the variables in str1 marked with a given marker""" 255 acclocs, badlocs = findaccelerators(str1, accelmarker, acceptlist) 256 return len(acclocs), len(badlocs)
257 return countmarkedaccelerators 258