Ticket #858: 0001-added-new-slugify-with-unidecode.patch

File 0001-added-new-slugify-with-unidecode.patch, 2.5 KB (added by Boris Bobrov, 10 years ago)
  • mediagoblin/tests/test_util.py

    From 6f9d3cbe88b64684313b9507409fae5b4977f80c Mon Sep 17 00:00:00 2001
    From: Boris Bobrov <breton@cynicmansion.ru>
    Date: Tue, 11 Mar 2014 15:50:01 +0500
    Subject: [PATCH 1/2] added new slugify with unidecode
    
    ---
     mediagoblin/tests/test_util.py |  6 ++++++
     mediagoblin/tools/url.py       | 18 ++----------------
     setup.py                       |  1 +
     3 files changed, 9 insertions(+), 16 deletions(-)
    
    diff --git a/mediagoblin/tests/test_util.py b/mediagoblin/tests/test_util.py
    index bc14f52..9d9b1c1 100644
    a b def test_slugify():  
    7777    assert url.slugify(u'a w@lk in the park?') == u'a-w-lk-in-the-park'
    7878    assert url.slugify(u'a walk in the par\u0107') == u'a-walk-in-the-parc'
    7979    assert url.slugify(u'\u00E0\u0042\u00E7\u010F\u00EB\u0066') == u'abcdef'
     80    # Russian
     81    assert url.slugify(u'\u043f\u0440\u043e\u0433\u0443\u043b\u043a\u0430 '
     82            u'\u0432 \u043f\u0430\u0440\u043a\u0435') == u'progulka-v-parke'
     83    # Korean
     84    assert (url.slugify(u'\uacf5\uc6d0\uc5d0\uc11c \uc0b0\ucc45') ==
     85            u'gongweoneseo-sancaeg')
    8086
    8187def test_locale_to_lower_upper():
    8288    """
  • mediagoblin/tools/url.py

    diff --git a/mediagoblin/tools/url.py b/mediagoblin/tools/url.py
    index d9179f9..657c037 100644
    a b  
    1515# along with this program.  If not, see <http://www.gnu.org/licenses/>.
    1616
    1717import re
    18 # This import *is* used; see word.encode('tranlit/long') below.
    19 from unicodedata import normalize
    20 
    21 try:
    22     import translitcodec
    23     USING_TRANSLITCODEC = True
    24 except ImportError:
    25     USING_TRANSLITCODEC = False
    26 
     18from unidecode import unidecode
    2719
    2820_punct_re = re.compile(r'[\t !"#:$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')
    2921
    def slugify(text, delim=u'-'):  
    3426    """
    3527    result = []
    3628    for word in _punct_re.split(text.lower()):
    37         if USING_TRANSLITCODEC:
    38             word = word.encode('translit/long')
    39         else:
    40             word = normalize('NFKD', word).encode('ascii', 'ignore')
    41 
    42         if word:
    43             result.append(word)
     29        result.extend(unidecode(word).split())
    4430    return unicode(delim.join(result))
  • setup.py

    diff --git a/setup.py b/setup.py
    index 7abd896..a3cc055 100644
    a b try:  
    6565        'pytz',
    6666        'six',
    6767        'oauthlib==0.5.0',
     68        'unidecode',
    6869
    6970        ## Annoying.  Please remove once we can!  We only indirectly
    7071        ## use pbr, and currently it breaks things, presumably till