0001-added-new-slugify-with-unidecode.patch on Ticket #858 – Attachment – GNU MediaGoblin

mediagoblin/tests/test_util.py

From 6f9d3cbe88b64684313b9507409fae5b4977f80c Mon Sep 17 00:00:00 2001
From: Boris Bobrov <breton@cynicmansion.ru>
Date: Tue, 11 Mar 2014 15:50:01 +0500
Subject: [PATCH 1/2] added new slugify with unidecode

---
 mediagoblin/tests/test_util.py |  6 ++++++
 mediagoblin/tools/url.py       | 18 ++----------------
 setup.py                       |  1 +
 3 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/mediagoblin/tests/test_util.py b/mediagoblin/tests/test_util.py
index bc14f52..9d9b1c1 100644

                def test_slugify():
     assert url.slugify(u'a w@lk in the park?') == u'a-w-lk-in-the-park'
     assert url.slugify(u'a walk in the par\u0107') == u'a-walk-in-the-parc'
     assert url.slugify(u'\u00E0\u0042\u00E7\u010F\u00EB\u0066') == u'abcdef'
+    # Russian
+    assert url.slugify(u'\u043f\u0440\u043e\u0433\u0443\u043b\u043a\u0430 '
+            u'\u0432 \u043f\u0430\u0440\u043a\u0435') == u'progulka-v-parke'
+    # Korean
+    assert (url.slugify(u'\uacf5\uc6d0\uc5d0\uc11c \uc0b0\ucc45') ==
+            u'gongweoneseo-sancaeg')
 def test_locale_to_lower_upper():
     """

mediagoblin/tools/url.py

diff --git a/mediagoblin/tools/url.py b/mediagoblin/tools/url.py
index d9179f9..657c037 100644

-              a
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 import re
+# This import *is* used; see word.encode('tranlit/long') below.
+from unicodedata import normalize
+try:
+    import translitcodec
+    USING_TRANSLITCODEC = True
+except ImportError:
+    USING_TRANSLITCODEC = False
+from unidecode import unidecode
 _punct_re = re.compile(r'[\t !"#:$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')
-…
+               def slugify(text, delim=u'-'):
     """
     result = []
     for word in _punct_re.split(text.lower()):
+        if USING_TRANSLITCODEC:
+            word = word.encode('translit/long')
+        else:
+            word = normalize('NFKD', word).encode('ascii', 'ignore')
+        if word:
+            result.append(word)
+        result.extend(unidecode(word).split())
     return unicode(delim.join(result))

setup.py

diff --git a/setup.py b/setup.py
index 7abd896..a3cc055 100644

                try:
         'pytz',
         'six',
         'oauthlib==0.5.0',
+        'unidecode',
         ## Annoying.  Please remove once we can!  We only indirectly
         ## use pbr, and currently it breaks things, presumably till

Legacy issue tracker

Context Navigation

Ticket #858: 0001-added-new-slugify-with-unidecode.patch

mediagoblin/tests/test_util.py

mediagoblin/tools/url.py

setup.py

Download in other formats: