Ticket #5610: fix_spectrogram.diff

File fix_spectrogram.diff, 51.8 KB (added by Fernando Gutierrez, 5 years ago)

Second version of proposed fix: Added license header to new file, removed extlib/freesound, fixed soundfile package name in setup.py

  • deleted file extlib/freesound/audioprocessing.py

    diff --git a/extlib/freesound/audioprocessing.py b/extlib/freesound/audioprocessing.py
    deleted file mode 100644
    index b002ff8..0000000
    + -  
    1 #!/usr/bin/env python
    2 # processing.py -- various audio processing functions
    3 # Copyright (C) 2008 MUSIC TECHNOLOGY GROUP (MTG)
    4 #                    UNIVERSITAT POMPEU FABRA
    5 #
    6 # This program is free software: you can redistribute it and/or modify
    7 # it under the terms of the GNU Affero General Public License as
    8 # published by the Free Software Foundation, either version 3 of the
    9 # License, or (at your option) any later version.
    10 #
    11 # This program is distributed in the hope that it will be useful,
    12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
    13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    14 # GNU Affero General Public License for more details.
    15 #
    16 # You should have received a copy of the GNU Affero General Public License
    17 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
    18 #
    19 # Authors:
    20 #   Bram de Jong <bram.dejong at domain.com where domain in gmail>
    21 #   2012, Joar Wandborg <first name at last name dot se>
    22 
    23 from PIL import Image, ImageDraw, ImageColor #@UnresolvedImport
    24 from functools import partial
    25 import math
    26 import numpy
    27 import os
    28 import re
    29 import signal
    30 
    31 
    32 def get_sound_type(input_filename):
    33     sound_type = os.path.splitext(input_filename.lower())[1].strip(".")
    34 
    35     if sound_type == "fla":
    36         sound_type = "flac"
    37     elif sound_type == "aif":
    38         sound_type = "aiff"
    39 
    40     return sound_type
    41 
    42 
    43 try:
    44     import scikits.audiolab as audiolab
    45 except ImportError:
    46     print "WARNING: audiolab is not installed so wav2png will not work"
    47 import subprocess
    48 
    49 class AudioProcessingException(Exception):
    50     pass
    51 
    52 class TestAudioFile(object):
    53     """A class that mimics audiolab.sndfile but generates noise instead of reading
    54     a wave file. Additionally it can be told to have a "broken" header and thus crashing
    55     in the middle of the file. Also useful for testing ultra-short files of 20 samples."""
    56     def __init__(self, num_frames, has_broken_header=False):
    57         self.seekpoint = 0
    58         self.nframes = num_frames
    59         self.samplerate = 44100
    60         self.channels = 1
    61         self.has_broken_header = has_broken_header
    62 
    63     def seek(self, seekpoint):
    64         self.seekpoint = seekpoint
    65 
    66     def read_frames(self, frames_to_read):
    67         if self.has_broken_header and self.seekpoint + frames_to_read > self.num_frames / 2:
    68             raise RuntimeError()
    69 
    70         num_frames_left = self.num_frames - self.seekpoint
    71         will_read = num_frames_left if num_frames_left < frames_to_read else frames_to_read
    72         self.seekpoint += will_read
    73         return numpy.random.random(will_read)*2 - 1
    74 
    75 
    76 def get_max_level(filename):
    77     max_value = 0
    78     buffer_size = 4096
    79     audio_file = audiolab.Sndfile(filename, 'r')
    80     n_samples_left = audio_file.nframes
    81 
    82     while n_samples_left:
    83         to_read = min(buffer_size, n_samples_left)
    84 
    85         try:
    86             samples = audio_file.read_frames(to_read)
    87         except RuntimeError:
    88             # this can happen with a broken header
    89             break
    90 
    91         # convert to mono by selecting left channel only
    92         if audio_file.channels > 1:
    93             samples = samples[:,0]
    94 
    95         max_value = max(max_value, numpy.abs(samples).max())
    96 
    97         n_samples_left -= to_read
    98 
    99     audio_file.close()
    100 
    101     return max_value
    102 
    103 class AudioProcessor(object):
    104     """
    105     The audio processor processes chunks of audio an calculates the spectrac centroid and the peak
    106     samples in that chunk of audio.
    107     """
    108     def __init__(self, input_filename, fft_size, window_function=numpy.hanning):
    109         max_level = get_max_level(input_filename)
    110 
    111         self.audio_file = audiolab.Sndfile(input_filename, 'r')
    112         self.fft_size = fft_size
    113         self.window = window_function(self.fft_size)
    114         self.spectrum_range = None
    115         self.lower = 100
    116         self.higher = 22050
    117         self.lower_log = math.log10(self.lower)
    118         self.higher_log = math.log10(self.higher)
    119         self.clip = lambda val, low, high: min(high, max(low, val))
    120 
    121         # figure out what the maximum value is for an FFT doing the FFT of a DC signal
    122         fft = numpy.fft.rfft(numpy.ones(fft_size) * self.window)
    123         max_fft = (numpy.abs(fft)).max()
    124         # set the scale to normalized audio and normalized FFT
    125         self.scale = 1.0/max_level/max_fft if max_level > 0 else 1
    126 
    127     def read(self, start, size, resize_if_less=False):
    128         """ read size samples starting at start, if resize_if_less is True and less than size
    129         samples are read, resize the array to size and fill with zeros """
    130 
    131         # number of zeros to add to start and end of the buffer
    132         add_to_start = 0
    133         add_to_end = 0
    134 
    135         if start < 0:
    136             # the first FFT window starts centered around zero
    137             if size + start <= 0:
    138                 return numpy.zeros(size) if resize_if_less else numpy.array([])
    139             else:
    140                 self.audio_file.seek(0)
    141 
    142                 add_to_start = -start # remember: start is negative!
    143                 to_read = size + start
    144 
    145                 if to_read > self.audio_file.nframes:
    146                     add_to_end = to_read - self.audio_file.nframes
    147                     to_read = self.audio_file.nframes
    148         else:
    149             self.audio_file.seek(start)
    150 
    151             to_read = size
    152             if start + to_read >= self.audio_file.nframes:
    153                 to_read = self.audio_file.nframes - start
    154                 add_to_end = size - to_read
    155 
    156         try:
    157             samples = self.audio_file.read_frames(to_read)
    158         except RuntimeError:
    159             # this can happen for wave files with broken headers...
    160             return numpy.zeros(size) if resize_if_less else numpy.zeros(2)
    161 
    162         # convert to mono by selecting left channel only
    163         if self.audio_file.channels > 1:
    164             samples = samples[:,0]
    165 
    166         if resize_if_less and (add_to_start > 0 or add_to_end > 0):
    167             if add_to_start > 0:
    168                 samples = numpy.concatenate((numpy.zeros(add_to_start), samples), axis=1)
    169 
    170             if add_to_end > 0:
    171                 samples = numpy.resize(samples, size)
    172                 samples[size - add_to_end:] = 0
    173 
    174         return samples
    175 
    176 
    177     def spectral_centroid(self, seek_point, spec_range=110.0):
    178         """ starting at seek_point read fft_size samples, and calculate the spectral centroid """
    179 
    180         samples = self.read(seek_point - self.fft_size/2, self.fft_size, True)
    181 
    182         samples *= self.window
    183         fft = numpy.fft.rfft(samples)
    184         spectrum = self.scale * numpy.abs(fft) # normalized abs(FFT) between 0 and 1
    185         length = numpy.float64(spectrum.shape[0])
    186 
    187         # scale the db spectrum from [- spec_range db ... 0 db] > [0..1]
    188         db_spectrum = ((20*(numpy.log10(spectrum + 1e-60))).clip(-spec_range, 0.0) + spec_range)/spec_range
    189 
    190         energy = spectrum.sum()
    191         spectral_centroid = 0
    192 
    193         if energy > 1e-60:
    194             # calculate the spectral centroid
    195 
    196             if self.spectrum_range == None:
    197                 self.spectrum_range = numpy.arange(length)
    198 
    199             spectral_centroid = (spectrum * self.spectrum_range).sum() / (energy * (length - 1)) * self.audio_file.samplerate * 0.5
    200 
    201             # clip > log10 > scale between 0 and 1
    202             spectral_centroid = (math.log10(self.clip(spectral_centroid, self.lower, self.higher)) - self.lower_log) / (self.higher_log - self.lower_log)
    203 
    204         return (spectral_centroid, db_spectrum)
    205 
    206 
    207     def peaks(self, start_seek, end_seek):
    208         """ read all samples between start_seek and end_seek, then find the minimum and maximum peak
    209         in that range. Returns that pair in the order they were found. So if min was found first,
    210         it returns (min, max) else the other way around. """
    211 
    212         # larger blocksizes are faster but take more mem...
    213         # Aha, Watson, a clue, a tradeof!
    214         block_size = 4096
    215 
    216         max_index = -1
    217         max_value = -1
    218         min_index = -1
    219         min_value = 1
    220 
    221         if start_seek < 0:
    222             start_seek = 0
    223 
    224         if end_seek > self.audio_file.nframes:
    225             end_seek = self.audio_file.nframes
    226 
    227         if end_seek <= start_seek:
    228             samples = self.read(start_seek, 1)
    229             return (samples[0], samples[0])
    230 
    231         if block_size > end_seek - start_seek:
    232             block_size = end_seek - start_seek
    233 
    234         for i in range(start_seek, end_seek, block_size):
    235             samples = self.read(i, block_size)
    236 
    237             local_max_index = numpy.argmax(samples)
    238             local_max_value = samples[local_max_index]
    239 
    240             if local_max_value > max_value:
    241                 max_value = local_max_value
    242                 max_index = local_max_index
    243 
    244             local_min_index = numpy.argmin(samples)
    245             local_min_value = samples[local_min_index]
    246 
    247             if local_min_value < min_value:
    248                 min_value = local_min_value
    249                 min_index = local_min_index
    250 
    251         return (min_value, max_value) if min_index < max_index else (max_value, min_value)
    252 
    253 
    254 def interpolate_colors(colors, flat=False, num_colors=256):
    255     """ given a list of colors, create a larger list of colors interpolating
    256     the first one. If flatten is True a list of numers will be returned. If
    257     False, a list of (r,g,b) tuples. num_colors is the number of colors wanted
    258     in the final list """
    259 
    260     palette = []
    261 
    262     for i in range(num_colors):
    263         index = (i * (len(colors) - 1))/(num_colors - 1.0)
    264         index_int = int(index)
    265         alpha = index - float(index_int)
    266 
    267         if alpha > 0:
    268             r = (1.0 - alpha) * colors[index_int][0] + alpha * colors[index_int + 1][0]
    269             g = (1.0 - alpha) * colors[index_int][1] + alpha * colors[index_int + 1][1]
    270             b = (1.0 - alpha) * colors[index_int][2] + alpha * colors[index_int + 1][2]
    271         else:
    272             r = (1.0 - alpha) * colors[index_int][0]
    273             g = (1.0 - alpha) * colors[index_int][1]
    274             b = (1.0 - alpha) * colors[index_int][2]
    275 
    276         if flat:
    277             palette.extend((int(r), int(g), int(b)))
    278         else:
    279             palette.append((int(r), int(g), int(b)))
    280 
    281     return palette
    282 
    283 
    284 def desaturate(rgb, amount):
    285     """
    286         desaturate colors by amount
    287         amount == 0, no change
    288         amount == 1, grey
    289     """
    290     luminosity = sum(rgb) / 3.0
    291     desat = lambda color: color - amount * (color - luminosity)
    292 
    293     return tuple(map(int, map(desat, rgb)))
    294 
    295 
    296 class WaveformImage(object):
    297     """
    298     Given peaks and spectral centroids from the AudioProcessor, this class will construct
    299     a wavefile image which can be saved as PNG.
    300     """
    301     def __init__(self, image_width, image_height, palette=1):
    302         if image_height % 2 == 0:
    303             raise AudioProcessingException("Height should be uneven: images look much better at uneven height")
    304 
    305         if palette == 1:
    306             background_color = (0,0,0)
    307             colors = [
    308                         (50,0,200),
    309                         (0,220,80),
    310                         (255,224,0),
    311                         (255,70,0),
    312                      ]
    313         elif palette == 2:
    314             background_color = (0,0,0)
    315             colors = [self.color_from_value(value/29.0) for value in range(0,30)]
    316         elif palette == 3:
    317             background_color = (213, 217, 221)
    318             colors = map( partial(desaturate, amount=0.7), [
    319                         (50,0,200),
    320                         (0,220,80),
    321                         (255,224,0),
    322                      ])
    323         elif palette == 4:
    324             background_color = (213, 217, 221)
    325             colors = map( partial(desaturate, amount=0.8), [self.color_from_value(value/29.0) for value in range(0,30)])
    326 
    327         self.image = Image.new("RGB", (image_width, image_height), background_color)
    328 
    329         self.image_width = image_width
    330         self.image_height = image_height
    331 
    332         self.draw = ImageDraw.Draw(self.image)
    333         self.previous_x, self.previous_y = None, None
    334 
    335         self.color_lookup = interpolate_colors(colors)
    336         self.pix = self.image.load()
    337 
    338     def color_from_value(self, value):
    339         """ given a value between 0 and 1, return an (r,g,b) tuple """
    340 
    341         return ImageColor.getrgb("hsl(%d,%d%%,%d%%)" % (int( (1.0 - value) * 360 ), 80, 50))
    342 
    343     def draw_peaks(self, x, peaks, spectral_centroid):
    344         """ draw 2 peaks at x using the spectral_centroid for color """
    345 
    346         y1 = self.image_height * 0.5 - peaks[0] * (self.image_height - 4) * 0.5
    347         y2 = self.image_height * 0.5 - peaks[1] * (self.image_height - 4) * 0.5
    348 
    349         line_color = self.color_lookup[int(spectral_centroid*255.0)]
    350 
    351         if self.previous_y != None:
    352             self.draw.line([self.previous_x, self.previous_y, x, y1, x, y2], line_color)
    353         else:
    354             self.draw.line([x, y1, x, y2], line_color)
    355 
    356         self.previous_x, self.previous_y = x, y2
    357 
    358         self.draw_anti_aliased_pixels(x, y1, y2, line_color)
    359 
    360     def draw_anti_aliased_pixels(self, x, y1, y2, color):
    361         """ vertical anti-aliasing at y1 and y2 """
    362 
    363         y_max = max(y1, y2)
    364         y_max_int = int(y_max)
    365         alpha = y_max - y_max_int
    366 
    367         if alpha > 0.0 and alpha < 1.0 and y_max_int + 1 < self.image_height:
    368             current_pix = self.pix[x, y_max_int + 1]
    369 
    370             r = int((1-alpha)*current_pix[0] + alpha*color[0])
    371             g = int((1-alpha)*current_pix[1] + alpha*color[1])
    372             b = int((1-alpha)*current_pix[2] + alpha*color[2])
    373 
    374             self.pix[x, y_max_int + 1] = (r,g,b)
    375 
    376         y_min = min(y1, y2)
    377         y_min_int = int(y_min)
    378         alpha = 1.0 - (y_min - y_min_int)
    379 
    380         if alpha > 0.0 and alpha < 1.0 and y_min_int - 1 >= 0:
    381             current_pix = self.pix[x, y_min_int - 1]
    382 
    383             r = int((1-alpha)*current_pix[0] + alpha*color[0])
    384             g = int((1-alpha)*current_pix[1] + alpha*color[1])
    385             b = int((1-alpha)*current_pix[2] + alpha*color[2])
    386 
    387             self.pix[x, y_min_int - 1] = (r,g,b)
    388 
    389     def save(self, filename):
    390         # draw a zero "zero" line
    391         a = 25
    392         for x in range(self.image_width):
    393             self.pix[x, self.image_height/2] = tuple(map(lambda p: p+a, self.pix[x, self.image_height/2]))
    394 
    395         self.image.save(filename)
    396 
    397 
    398 class SpectrogramImage(object):
    399     """
    400     Given spectra from the AudioProcessor, this class will construct a wavefile image which
    401     can be saved as PNG.
    402     """
    403     def __init__(self, image_width, image_height, fft_size):
    404         self.image_width = image_width
    405         self.image_height = image_height
    406         self.fft_size = fft_size
    407 
    408         self.image = Image.new("RGBA", (image_height, image_width))
    409 
    410         colors = [
    411             (0, 0, 0, 0),
    412             (58/4, 68/4, 65/4, 255),
    413             (80/2, 100/2, 153/2, 255),
    414             (90, 180, 100, 255),
    415             (224, 224, 44, 255),
    416             (255, 60, 30, 255),
    417             (255, 255, 255, 255)
    418          ]
    419         self.palette = interpolate_colors(colors)
    420 
    421         # generate the lookup which translates y-coordinate to fft-bin
    422         self.y_to_bin = []
    423         f_min = 100.0
    424         f_max = 22050.0
    425         y_min = math.log10(f_min)
    426         y_max = math.log10(f_max)
    427         for y in range(self.image_height):
    428             freq = math.pow(10.0, y_min + y / (image_height - 1.0) *(y_max - y_min))
    429             bin = freq / 22050.0 * (self.fft_size/2 + 1)
    430 
    431             if bin < self.fft_size/2:
    432                 alpha = bin - int(bin)
    433 
    434                 self.y_to_bin.append((int(bin), alpha * 255))
    435 
    436         # this is a bit strange, but using image.load()[x,y] = ... is
    437         # a lot slower than using image.putadata and then rotating the image
    438         # so we store all the pixels in an array and then create the image when saving
    439         self.pixels = []
    440 
    441     def draw_spectrum(self, x, spectrum):
    442         # for all frequencies, draw the pixels
    443         for (index, alpha) in self.y_to_bin:
    444             self.pixels.append( self.palette[int((255.0-alpha) * spectrum[index] + alpha * spectrum[index + 1])] )
    445 
    446         # if the FFT is too small to fill up the image, fill with black to the top
    447         for y in range(len(self.y_to_bin), self.image_height): #@UnusedVariable
    448             self.pixels.append(self.palette[0])
    449 
    450     def save(self, filename, quality=80):
    451         assert filename.lower().endswith(".jpg")
    452         self.image.putdata(self.pixels)
    453         self.image.transpose(Image.ROTATE_90).save(filename, quality=quality)
    454 
    455 
    456 def create_wave_images(input_filename, output_filename_w, output_filename_s, image_width, image_height, fft_size, progress_callback=None):
    457     """
    458     Utility function for creating both wavefile and spectrum images from an audio input file.
    459     """
    460     processor = AudioProcessor(input_filename, fft_size, numpy.hanning)
    461     samples_per_pixel = processor.audio_file.nframes / float(image_width)
    462 
    463     waveform = WaveformImage(image_width, image_height)
    464     spectrogram = SpectrogramImage(image_width, image_height, fft_size)
    465 
    466     for x in range(image_width):
    467 
    468         if progress_callback and x % (image_width/10) == 0:
    469             progress_callback((x*100)/image_width)
    470 
    471         seek_point = int(x * samples_per_pixel)
    472         next_seek_point = int((x + 1) * samples_per_pixel)
    473 
    474         (spectral_centroid, db_spectrum) = processor.spectral_centroid(seek_point)
    475         peaks = processor.peaks(seek_point, next_seek_point)
    476 
    477         waveform.draw_peaks(x, peaks, spectral_centroid)
    478         spectrogram.draw_spectrum(x, db_spectrum)
    479 
    480     if progress_callback:
    481         progress_callback(100)
    482 
    483     waveform.save(output_filename_w)
    484     spectrogram.save(output_filename_s)
    485 
    486 
    487 class NoSpaceLeftException(Exception):
    488     pass
    489 
    490 def convert_to_pcm(input_filename, output_filename):
    491     """
    492     converts any audio file type to pcm audio
    493     """
    494 
    495     if not os.path.exists(input_filename):
    496         raise AudioProcessingException("file %s does not exist" % input_filename)
    497 
    498     sound_type = get_sound_type(input_filename)
    499 
    500     if sound_type == "mp3":
    501         cmd = ["lame", "--decode", input_filename, output_filename]
    502     elif sound_type == "ogg":
    503         cmd = ["oggdec", input_filename, "-o", output_filename]
    504     elif sound_type == "flac":
    505         cmd = ["flac", "-f", "-d", "-s", "-o", output_filename, input_filename]
    506     else:
    507         return False
    508 
    509     process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    510     (stdout, stderr) = process.communicate()
    511 
    512     if process.returncode != 0 or not os.path.exists(output_filename):
    513         if "No space left on device" in stderr + " " + stdout:
    514             raise NoSpaceLeftException
    515         raise AudioProcessingException("failed converting to pcm data:\n" + " ".join(cmd) + "\n" + stderr + "\n" + stdout)
    516 
    517     return True
    518 
    519 
    520 def stereofy_and_find_info(stereofy_executble_path, input_filename, output_filename):
    521     """
    522     converts a pcm wave file to two channel, 16 bit integer
    523     """
    524 
    525     if not os.path.exists(input_filename):
    526         raise AudioProcessingException("file %s does not exist" % input_filename)
    527 
    528     cmd = [stereofy_executble_path, "--input", input_filename, "--output", output_filename]
    529 
    530     process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    531     (stdout, stderr) = process.communicate()
    532 
    533     if process.returncode != 0 or not os.path.exists(output_filename):
    534         if "No space left on device" in stderr + " " + stdout:
    535             raise NoSpaceLeftException
    536         raise AudioProcessingException("failed calling stereofy data:\n" + " ".join(cmd) + "\n" + stderr + "\n" + stdout)
    537 
    538     stdout = (stdout + " " + stderr).replace("\n", " ")
    539 
    540     duration = 0
    541     m = re.match(r".*#duration (?P<duration>[\d\.]+).*",  stdout)
    542     if m != None:
    543         duration = float(m.group("duration"))
    544 
    545     channels = 0
    546     m = re.match(r".*#channels (?P<channels>\d+).*", stdout)
    547     if m != None:
    548         channels = float(m.group("channels"))
    549 
    550     samplerate = 0
    551     m = re.match(r".*#samplerate (?P<samplerate>\d+).*", stdout)
    552     if m != None:
    553         samplerate = float(m.group("samplerate"))
    554 
    555     bitdepth = None
    556     m = re.match(r".*#bitdepth (?P<bitdepth>\d+).*", stdout)
    557     if m != None:
    558         bitdepth = float(m.group("bitdepth"))
    559 
    560     bitrate = (os.path.getsize(input_filename) * 8.0) / 1024.0 / duration if duration > 0 else 0
    561 
    562     return dict(duration=duration, channels=channels, samplerate=samplerate, bitrate=bitrate, bitdepth=bitdepth)
    563 
    564 
    565 def convert_to_mp3(input_filename, output_filename, quality=70):
    566     """
    567     converts the incoming wave file to a mp3 file
    568     """
    569 
    570     if not os.path.exists(input_filename):
    571         raise AudioProcessingException("file %s does not exist" % input_filename)
    572 
    573     command = ["lame", "--silent", "--abr", str(quality), input_filename, output_filename]
    574 
    575     process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    576     (stdout, stderr) = process.communicate()
    577 
    578     if process.returncode != 0 or not os.path.exists(output_filename):
    579         raise AudioProcessingException(stdout)
    580 
    581 def convert_to_ogg(input_filename, output_filename, quality=1):
    582     """
    583     converts the incoming wave file to n ogg file
    584     """
    585 
    586     if not os.path.exists(input_filename):
    587         raise AudioProcessingException("file %s does not exist" % input_filename)
    588 
    589     command = ["oggenc", "-q", str(quality), input_filename, "-o", output_filename]
    590 
    591     process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    592     (stdout, stderr) = process.communicate()
    593 
    594     if process.returncode != 0 or not os.path.exists(output_filename):
    595         raise AudioProcessingException(stdout)
    596 
    597 def convert_using_ffmpeg(input_filename, output_filename):
    598     """
    599     converts the incoming wave file to stereo pcm using fffmpeg
    600     """
    601     TIMEOUT = 3 * 60
    602     def  alarm_handler(signum, frame):
    603         raise AudioProcessingException("timeout while waiting for ffmpeg")
    604 
    605     if not os.path.exists(input_filename):
    606         raise AudioProcessingException("file %s does not exist" % input_filename)
    607 
    608     command = ["ffmpeg", "-y", "-i", input_filename, "-ac","1","-acodec", "pcm_s16le", "-ar", "44100", output_filename]
    609 
    610     process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    611     signal.signal(signal.SIGALRM,alarm_handler)
    612     signal.alarm(TIMEOUT)
    613     (stdout, stderr) = process.communicate()
    614     signal.alarm(0)
    615     if process.returncode != 0 or not os.path.exists(output_filename):
    616         raise AudioProcessingException(stdout)
  • deleted file mediagoblin/media_types/audio/audioprocessing.py

    diff --git a/mediagoblin/media_types/audio/audioprocessing.py b/mediagoblin/media_types/audio/audioprocessing.py
    deleted file mode 120000
    index c5e3c52..0000000
    + -  
    1 ../../../extlib/freesound/audioprocessing.py
    2  No newline at end of file
  • new file mediagoblin/media_types/audio/audiotospectrogram.py

    diff --git a/mediagoblin/media_types/audio/audiotospectrogram.py b/mediagoblin/media_types/audio/audiotospectrogram.py
    new file mode 100644
    index 0000000..2d2f942
    - +  
     1# GNU MediaGoblin -- federated, autonomous media hosting
     2# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
     3#
     4# This program is free software: you can redistribute it and/or modify
     5# it under the terms of the GNU Affero General Public License as published by
     6# the Free Software Foundation, either version 3 of the License, or
     7# (at your option) any later version.
     8#
     9# This program is distributed in the hope that it will be useful,
     10# but WITHOUT ANY WARRANTY; without even the implied warranty of
     11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     12# GNU Affero General Public License for more details.
     13#
     14# You should have received a copy of the GNU Affero General Public License
     15# along with this program.  If not, see <http://www.gnu.org/licenses/>.
     16
     17from PIL import Image
     18import soundfile
     19import numpy
     20
     21SPECTROGRAM_MAX_FREQUENCY = 8000 # Old spectrogram.py sets upper limit to 22050 but
     22                                 # usually there isn't much detail in higher frequencies
     23SPECTROGRAM_MIN_FREQUENCY = 20
     24SPECTROGRAM_DB_RANGE      = 110
     25# Color palette copied from old spectrogram.py
     26SPECTROGRAM_COLORS = [(58 / 4, 68 / 4, 65 / 4),
     27                      (80 / 2, 100 / 2, 153 / 2),
     28                      (90, 180, 100),
     29                      (224, 224, 44),
     30                      (255, 60, 30),
     31                      (255, 255, 255)]
     32# The purpose of this table is to give more horizontal
     33# real estate to shorter sounds files.
     34# Format: (pixels, (range_min, range_max))
     35# For sounds with a duration >= _range_min_ and < _range_max_
     36# give _pixel_ horizontal pixels for each second of audio.
     37SPECTROGRAM_WIDTH_PERSECOND = [(240, (  0,     20)),
     38                               (120, ( 20,     30)),
     39                               ( 60, ( 30,     60)),
     40                               ( 30, ( 60,    120)),
     41                               ( 15, (120,    240)),
     42                               (  6, (240, 100000))] # Upper limit is arbitrary. Sounds with longer
     43                                                     # duration will still get assigned to the last bucket
     44SPECTROGRAM_HEIGHT = 500
     45
     46class AudioBlocksFFT:
     47
     48    def __init__(self, fileName, blockSize, overlap, minFreq, maxFreq, numBins = None, windowFunction = numpy.hanning):
     49        self.audioData = soundfile.SoundFile(fileName, 'r')
     50        self.numChannels = self.audioData.channels
     51        self.sampleRate = self.audioData.samplerate
     52        self.minFreq = minFreq
     53        self.maxFreq = maxFreq
     54        self.blockSize = blockSize
     55        self.numBins = numBins
     56        self.overlap = overlap
     57        self.windowValues = windowFunction(blockSize)
     58        self.peakFFTValue = 0
     59        try:
     60            # PySoundFile V0.10.0 adds SoundFile.frames property and deprecates __len__()
     61            self.totalSamples = self.audioData.frames
     62        except AttributeError:
     63            self.totalSamples = len(self.audioData)
     64
     65    def peakFFTAmplitude(self):
     66        """
     67        Peak amplitude of FFT for all blocks
     68        """
     69        return self.peakFFTValue
     70
     71    def totalSeconds(self):
     72        """
     73        Total length in seconds
     74        """
     75        return self.totalSamples / self.sampleRate
     76
     77    def _filterFreqRange(self, fftAmplitude):
     78        """
     79        Given a FFT amplitudes array keep only bins between minFreq, maxFreq
     80        """
     81        nyquistFreq = self.sampleRate // 2
     82        numBins = len(fftAmplitude)
     83        sliceWidth = nyquistFreq / numBins
     84        startIdx = int(self.minFreq / sliceWidth)
     85        endIdx = int(self.maxFreq / sliceWidth)
     86        if numBins <= endIdx:
     87            fftAmplitude = numpy.pad(fftAmplitude, (0, 1 + endIdx - numBins), 'constant', constant_values=(0))
     88        else:
     89            fftAmplitude = fftAmplitude[:endIdx + 1]
     90        return fftAmplitude[startIdx:]
     91
     92    def _resizeAmplitudeArray(self, amplitudeValues, newSize):
     93        """
     94        Resize amplitude values array
     95        """
     96        if len(amplitudeValues) == newSize:
     97            return amplitudeValues
     98        if newSize > len(amplitudeValues):
     99            # Resize up
     100            result = numpy.zeros(newSize)
     101            for idx in range(0, newSize):
     102                srcIdx = (idx * len(amplitudeValues)) // newSize
     103                result[idx] = amplitudeValues[srcIdx]
     104            return result
     105        # Resize down keeping peaks
     106        result = numpy.zeros(newSize)
     107        idx = 0
     108        for slice in numpy.array_split(amplitudeValues, newSize):
     109            result[idx] = slice.max()
     110            idx = idx + 1
     111        return result
     112
     113    def __iter__(self):
     114        """
     115        Read a block of audio data and compute FFT amplitudes
     116        """
     117        self.audioData.seek(0)
     118        for fileBlock in self.audioData.blocks(blocksize = self.blockSize, overlap = self.overlap):
     119            # Mix down all channels to mono
     120            audioBlock = fileBlock[:,0]
     121            for channel in range(1, self.numChannels):
     122                audioBlock = numpy.add(audioBlock, fileBlock[:,channel])
     123            # On the last block it may be necessary to pad with zeros
     124            if len(audioBlock) < self.blockSize:
     125                audioBlock = numpy.pad(audioBlock, (0, self.blockSize - len(audioBlock)), 'constant', constant_values=(0))
     126            # Compute FFT amplitude of this block
     127            fftAmplitude = self._filterFreqRange(numpy.abs(numpy.fft.rfft(audioBlock * self.windowValues)))
     128            self.peakFFTValue = max(self.peakFFTValue, fftAmplitude.max())
     129            # Resize if requested
     130            if not self.numBins is None:
     131                fftAmplitude = self._resizeAmplitudeArray(fftAmplitude, self.numBins)
     132            yield (fftAmplitude, self.audioData.tell() / self.sampleRate)
     133
     134class SpectrogramColorMap:
     135
     136    def __init__(self, columnData):
     137        self.columnData = columnData
     138        self.width = len(columnData)
     139        self.height = len(columnData[0])
     140        self._buildColorPalette()
     141
     142    def _colorBetween(self, beginColor, endColor, step):
     143        """
     144        Interpolate between two colors
     145        """
     146        rS, gS, bS = beginColor
     147        rE, gE, bE = endColor
     148        r = int(numpy.sqrt((1.0 - step) * (rS * rS) + step * (rE * rE)))
     149        g = int(numpy.sqrt((1.0 - step) * (gS * gS) + step * (gE * gE)))
     150        b = int(numpy.sqrt((1.0 - step) * (bS * bS) + step * (bE * bE)))
     151        r = r if r < 256 else 255
     152        g = g if g < 256 else 255
     153        b = b if b < 256 else 255
     154        return (r, g, b)
     155
     156    def _buildColorPalette(self):
     157        """
     158        Build color palette
     159        """
     160        colorPoints = SPECTROGRAM_COLORS
     161        self.colors = []
     162        for i in range(1, len(colorPoints)):
     163            for p in range(0, 200):
     164                self.colors.append(self._colorBetween(colorPoints[i - 1], colorPoints[i], p / 200))
     165
     166    def getColorData(self, progressCallback = None):
     167        """
     168        Map spectrogram data to pixel colors
     169        """
     170        pixels = [self.colors[0]] * (self.width * self.height)
     171        for x in range(0, self.width):
     172            for y in range(0, self.height):
     173                idx = x + self.width * y
     174                amplitudeVal = self.columnData[x][self.height - y - 1]
     175                colorIdx = int(len(self.colors) * amplitudeVal)
     176                colorIdx = colorIdx if colorIdx > 0 else 0
     177                colorIdx = colorIdx if colorIdx < len(self.colors) else len(self.colors) - 1
     178                pixels[idx] = self.colors[colorIdx]
     179            if progressCallback:
     180                progressCallback(100 * x / self.width)
     181        return pixels
     182
     183def drawSpectrogram(audioFileName, imageFileName, fftSize = 1024, fftOverlap = 0, progressCallback = None):
     184    """
     185    Draw a spectrogram of the audio file
     186    """
     187
     188    # Fraction of total work for each step
     189    STEP_PERCENTAGE_FFT        = 40
     190    STEP_PERCENTAGE_NORMALIZE  = 5
     191    STEP_PERCENTAGE_ACCUMULATE = 10
     192    STEP_PERCENTAGE_DRAW       = 40
     193    # Give last 5% to saving the file
     194
     195    PERCENTAGE_REPORT_STEP = 2
     196
     197    nextReportedPercentage = PERCENTAGE_REPORT_STEP
     198    def wrapProgressCallback(percentage):
     199        nonlocal nextReportedPercentage
     200        percentage = int(percentage)
     201        if percentage >= nextReportedPercentage:
     202            if progressCallback:
     203                progressCallback(percentage)
     204            nextReportedPercentage = (1 + percentage // PERCENTAGE_REPORT_STEP) * PERCENTAGE_REPORT_STEP
     205
     206    def mapColorsProgressCallback(percentage):
     207        wrapProgressCallback(STEP_PERCENTAGE_FFT + STEP_PERCENTAGE_NORMALIZE + STEP_PERCENTAGE_ACCUMULATE
     208                             + (STEP_PERCENTAGE_DRAW * (percentage / 100)))
     209
     210    imageWidthLookup = SPECTROGRAM_WIDTH_PERSECOND
     211    imageHeight = SPECTROGRAM_HEIGHT
     212
     213    # Load audio file and compute FFT amplitudes
     214    fftBlocksSource = AudioBlocksFFT(audioFileName,
     215                                     fftSize, overlap = fftOverlap,
     216                                     minFreq = SPECTROGRAM_MIN_FREQUENCY, maxFreq = SPECTROGRAM_MAX_FREQUENCY,
     217                                     numBins = imageHeight)
     218    soundLength = fftBlocksSource.totalSeconds()
     219    fftAmplitudeBlocks = []
     220    for fftAmplitude, positionSeconds in fftBlocksSource:
     221        fftAmplitudeBlocks.append(fftAmplitude)
     222        wrapProgressCallback(STEP_PERCENTAGE_FFT * (positionSeconds / soundLength))
     223
     224    totalProgress = STEP_PERCENTAGE_FFT
     225
     226    # Normalize FFT amplitude and convert to log scale
     227    specRange = SPECTROGRAM_DB_RANGE
     228    for i in range(0, len(fftAmplitudeBlocks)):
     229        normalized = numpy.divide(fftAmplitudeBlocks[i], fftBlocksSource.peakFFTAmplitude())
     230        fftAmplitudeBlocks[i] = ((20*(numpy.log10(normalized + 1e-60))).clip(-specRange, 0.0) + specRange)/specRange
     231        wrapProgressCallback(totalProgress + STEP_PERCENTAGE_NORMALIZE * (i / len(fftAmplitudeBlocks)))
     232
     233    totalProgress = totalProgress + STEP_PERCENTAGE_NORMALIZE
     234
     235    # Compute spectrogram width in pixels
     236    imageWidthPerSecond, lengthRage = imageWidthLookup[-1]
     237    for widthPerSecond, lengthLimit in imageWidthLookup:
     238        limitLow, limitHigh = lengthLimit
     239        if soundLength > limitLow and soundLength <= limitHigh:
     240            imageWidthPerSecond = widthPerSecond
     241            break
     242    imageWidth = int(imageWidthPerSecond * soundLength)
     243
     244    # Compute spectrogram values
     245    columnValues = numpy.zeros(imageHeight)
     246    spectrogram = []
     247    x = 0
     248    for idx in range(0, len(fftAmplitudeBlocks)):
     249        newX = (idx * imageWidth) // len(fftAmplitudeBlocks)
     250        if newX != x:
     251            # Save column
     252            spectrogram.append(numpy.copy(columnValues))
     253            x = newX
     254            columnValues.fill(0)
     255        columnValues = numpy.maximum(columnValues, fftAmplitudeBlocks[idx])
     256        wrapProgressCallback(totalProgress + STEP_PERCENTAGE_ACCUMULATE * (idx / len(fftAmplitudeBlocks)))
     257    spectrogram.append(numpy.copy(columnValues))
     258
     259    totalProgress = totalProgress + STEP_PERCENTAGE_ACCUMULATE
     260
     261    # Draw spectrogram
     262    imageWidth = len(spectrogram)
     263    colorData = SpectrogramColorMap(spectrogram).getColorData(progressCallback = mapColorsProgressCallback)
     264
     265    totalProgress = totalProgress + STEP_PERCENTAGE_DRAW
     266
     267    # Save final image
     268    image = Image.new('RGB', (imageWidth, imageHeight))
     269    image.putdata(colorData)
     270    image.save(imageFileName)
     271
     272    if progressCallback:
     273        progressCallback(100)
     274
     275
     276if __name__ == "__main__":
     277
     278    import sys
     279
     280    def printProgress(p):
     281        sys.stdout.write("\rProgress : {}%".format(p))
     282        sys.stdout.flush()
     283
     284    if not (len(sys.argv) == 2 or len(sys.argv) == 3):
     285        print("Usage:\n{0} input_file [output_file]".format(sys.argv[0]))
     286        exit()
     287
     288    audioFile = sys.argv[1]
     289
     290    if 3 == len(sys.argv):
     291        outputFile = sys.argv[2]
     292    else:
     293        outputFile = 'spectrogram.png'
     294
     295    sys.stdout.write("Input    : {0}\nOutput   : {1}\n".format(audioFile, outputFile))
     296    drawSpectrogram(audioFile, outputFile, progressCallback = printProgress)
     297    sys.stdout.write("\nDone!\n")
  • deleted file mediagoblin/media_types/audio/spectrogram.py

    diff --git a/mediagoblin/media_types/audio/spectrogram.py b/mediagoblin/media_types/audio/spectrogram.py
    deleted file mode 100644
    index 433bb30..0000000
    + -  
    1 # processing.py -- various audio processing functions
    2 # Copyright (C) 2008 MUSIC TECHNOLOGY GROUP (MTG)
    3 #                    UNIVERSITAT POMPEU FABRA
    4 #
    5 # This program is free software: you can redistribute it and/or modify
    6 # it under the terms of the GNU Affero General Public License as
    7 # published by the Free Software Foundation, either version 3 of the
    8 # License, or (at your option) any later version.
    9 #
    10 # This program is distributed in the hope that it will be useful,
    11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
    12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13 # GNU Affero General Public License for more details.
    14 #
    15 # You should have received a copy of the GNU Affero General Public License
    16 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17 #
    18 # Authors:
    19 #   Bram de Jong <bram.dejong at domain.com where domain in gmail>
    20 #   2012, Joar Wandborg <first name at last name dot se>
    21 
    22 from __future__ import print_function
    23 
    24 try:
    25     from PIL import Image
    26 except ImportError:
    27     import Image
    28 import math
    29 import numpy
    30 
    31 try:
    32     import scikits.audiolab as audiolab
    33 except ImportError:
    34     print("WARNING: audiolab is not installed so wav2png will not work")
    35 
    36 
    37 class AudioProcessingException(Exception):
    38     pass
    39 
    40 
    41 class SpectrogramImage(object):
    42     def __init__(self, image_size, fft_size):
    43         self.image_width, self.image_height = image_size
    44         self.fft_size = fft_size
    45 
    46         colors = [
    47             (0, 0, 0, 0),
    48             (58 / 4, 68 / 4, 65 / 4, 255),
    49             (80 / 2, 100 / 2, 153 / 2, 255),
    50             (90, 180, 100, 255),
    51             (224, 224, 44, 255),
    52             (255, 60, 30, 255),
    53             (255, 255, 255, 255)
    54          ]
    55 
    56         self.palette = interpolate_colors(colors)
    57 
    58         # Generate lookup table for y-coordinate from fft-bin
    59         self.y_to_bin = []
    60 
    61         fft_min = 100.0
    62         fft_max = 22050.0  # kHz?
    63 
    64         y_min = math.log10(fft_min)
    65         y_max = math.log10(fft_max)
    66 
    67         for y in range(self.image_height):
    68             freq = math.pow(
    69                     10.0,
    70                     y_min + y / (self.image_height - 1.0)
    71                     * (y_max - y_min))
    72 
    73             fft_bin = freq / fft_max * (self.fft_size / 2 + 1)
    74 
    75             if fft_bin < self.fft_size / 2:
    76                 alpha = fft_bin - int(fft_bin)
    77 
    78                 self.y_to_bin.append((int(fft_bin), alpha * 255))
    79 
    80         # this is a bit strange, but using image.load()[x,y] = ... is
    81         # a lot slower than using image.putadata and then rotating the image
    82         # so we store all the pixels in an array and then create the image when saving
    83         self.pixels = []
    84 
    85     def draw_spectrum(self, x, spectrum):
    86         # for all frequencies, draw the pixels
    87         for index, alpha in self.y_to_bin:
    88             self.pixels.append(
    89                     self.palette[int((255.0 - alpha) * spectrum[index]
    90                         + alpha * spectrum[index + 1])])
    91 
    92         # if the FFT is too small to fill up the image, fill with black to the top
    93         for y in range(len(self.y_to_bin), self.image_height):
    94             self.pixels.append(self.palette[0])
    95 
    96     def save(self, filename, quality=90):
    97         self.image = Image.new(
    98                 'RGBA',
    99                 (self.image_height, self.image_width))
    100 
    101         self.image.putdata(self.pixels)
    102         self.image.transpose(Image.ROTATE_90).save(
    103                 filename,
    104                 quality=quality)
    105 
    106 
    107 class AudioProcessor(object):
    108     """
    109     The audio processor processes chunks of audio an calculates the spectrac centroid and the peak
    110     samples in that chunk of audio.
    111     """
    112     def __init__(self, input_filename, fft_size, window_function=numpy.hanning):
    113         max_level = get_max_level(input_filename)
    114 
    115         self.audio_file = audiolab.Sndfile(input_filename, 'r')
    116         self.fft_size = fft_size
    117         self.window = window_function(self.fft_size)
    118         self.spectrum_range = None
    119         self.lower = 100
    120         self.higher = 22050
    121         self.lower_log = math.log10(self.lower)
    122         self.higher_log = math.log10(self.higher)
    123         self.clip = lambda val, low, high: min(high, max(low, val))
    124 
    125         # figure out what the maximum value is for an FFT doing the FFT of a DC signal
    126         fft = numpy.fft.rfft(numpy.ones(fft_size) * self.window)
    127         max_fft = (numpy.abs(fft)).max()
    128 
    129         # set the scale to normalized audio and normalized FFT
    130         self.scale = 1.0 / max_level / max_fft if max_level > 0 else 1
    131 
    132     def read(self, start, size, resize_if_less=False):
    133         """ read size samples starting at start, if resize_if_less is True and less than size
    134         samples are read, resize the array to size and fill with zeros """
    135 
    136         # number of zeros to add to start and end of the buffer
    137         add_to_start = 0
    138         add_to_end = 0
    139 
    140         if start < 0:
    141             # the first FFT window starts centered around zero
    142             if size + start <= 0:
    143                 return numpy.zeros(size) if resize_if_less else numpy.array([])
    144             else:
    145                 self.audio_file.seek(0)
    146 
    147                 add_to_start = - start  # remember: start is negative!
    148                 to_read = size + start
    149 
    150                 if to_read > self.audio_file.nframes:
    151                     add_to_end = to_read - self.audio_file.nframes
    152                     to_read = self.audio_file.nframes
    153         else:
    154             self.audio_file.seek(start)
    155 
    156             to_read = size
    157             if start + to_read >= self.audio_file.nframes:
    158                 to_read = self.audio_file.nframes - start
    159                 add_to_end = size - to_read
    160 
    161         try:
    162             samples = self.audio_file.read_frames(to_read)
    163         except RuntimeError:
    164             # this can happen for wave files with broken headers...
    165             return numpy.zeros(size) if resize_if_less else numpy.zeros(2)
    166 
    167         # convert to mono by selecting left channel only
    168         if self.audio_file.channels > 1:
    169             samples = samples[:,0]
    170 
    171         if resize_if_less and (add_to_start > 0 or add_to_end > 0):
    172             if add_to_start > 0:
    173                 samples = numpy.concatenate((numpy.zeros(add_to_start), samples), axis=1)
    174 
    175             if add_to_end > 0:
    176                 samples = numpy.resize(samples, size)
    177                 samples[size - add_to_end:] = 0
    178 
    179         return samples
    180 
    181     def spectral_centroid(self, seek_point, spec_range=110.0):
    182         """ starting at seek_point read fft_size samples, and calculate the spectral centroid """
    183 
    184         samples = self.read(seek_point - self.fft_size/2, self.fft_size, True)
    185 
    186         samples *= self.window
    187         fft = numpy.fft.rfft(samples)
    188         spectrum = self.scale * numpy.abs(fft)  # normalized abs(FFT) between 0 and 1
    189 
    190         length = numpy.float64(spectrum.shape[0])
    191 
    192         # scale the db spectrum from [- spec_range db ... 0 db] > [0..1]
    193         db_spectrum = ((20*(numpy.log10(spectrum + 1e-60))).clip(-spec_range, 0.0) + spec_range)/spec_range
    194 
    195         energy = spectrum.sum()
    196         spectral_centroid = 0
    197 
    198         if energy > 1e-60:
    199             # calculate the spectral centroid
    200 
    201             if self.spectrum_range == None:
    202                 self.spectrum_range = numpy.arange(length)
    203 
    204             spectral_centroid = (spectrum * self.spectrum_range).sum() / (energy * (length - 1)) * self.audio_file.samplerate * 0.5
    205 
    206             # clip > log10 > scale between 0 and 1
    207             spectral_centroid = (math.log10(self.clip(spectral_centroid, self.lower, self.higher)) - self.lower_log) / (self.higher_log - self.lower_log)
    208 
    209         return (spectral_centroid, db_spectrum)
    210 
    211 
    212     def peaks(self, start_seek, end_seek):
    213         """ read all samples between start_seek and end_seek, then find the minimum and maximum peak
    214         in that range. Returns that pair in the order they were found. So if min was found first,
    215         it returns (min, max) else the other way around. """
    216 
    217         # larger blocksizes are faster but take more mem...
    218         # Aha, Watson, a clue, a tradeof!
    219         block_size = 4096
    220 
    221         max_index = -1
    222         max_value = -1
    223         min_index = -1
    224         min_value = 1
    225 
    226         if start_seek < 0:
    227             start_seek = 0
    228 
    229         if end_seek > self.audio_file.nframes:
    230             end_seek = self.audio_file.nframes
    231 
    232         if end_seek <= start_seek:
    233             samples = self.read(start_seek, 1)
    234             return (samples[0], samples[0])
    235 
    236         if block_size > end_seek - start_seek:
    237             block_size = end_seek - start_seek
    238 
    239         for i in range(start_seek, end_seek, block_size):
    240             samples = self.read(i, block_size)
    241 
    242             local_max_index = numpy.argmax(samples)
    243             local_max_value = samples[local_max_index]
    244 
    245             if local_max_value > max_value:
    246                 max_value = local_max_value
    247                 max_index = local_max_index
    248 
    249             local_min_index = numpy.argmin(samples)
    250             local_min_value = samples[local_min_index]
    251 
    252             if local_min_value < min_value:
    253                 min_value = local_min_value
    254                 min_index = local_min_index
    255 
    256         return (min_value, max_value) if min_index < max_index else (max_value, min_value)
    257 
    258 
    259 def create_spectrogram_image(source_filename, output_filename,
    260         image_size, fft_size, progress_callback=None):
    261 
    262     processor = AudioProcessor(source_filename, fft_size, numpy.hamming)
    263     samples_per_pixel = processor.audio_file.nframes / float(image_size[0])
    264 
    265     spectrogram = SpectrogramImage(image_size, fft_size)
    266 
    267     for x in range(image_size[0]):
    268         if progress_callback and x % (image_size[0] / 10) == 0:
    269             progress_callback((x * 100) / image_size[0])
    270 
    271         seek_point = int(x * samples_per_pixel)
    272         next_seek_point = int((x + 1) * samples_per_pixel)
    273 
    274         (spectral_centroid, db_spectrum) = processor.spectral_centroid(seek_point)
    275 
    276         spectrogram.draw_spectrum(x, db_spectrum)
    277 
    278     if progress_callback:
    279         progress_callback(100)
    280 
    281     spectrogram.save(output_filename)
    282 
    283 
    284 def interpolate_colors(colors, flat=False, num_colors=256):
    285 
    286     palette = []
    287 
    288     for i in range(num_colors):
    289         # TODO: What does this do?
    290         index = (
    291                 (i *
    292                     (len(colors) - 1)  # 7
    293                 )  # 0..7..14..21..28...
    294             /
    295                 (num_colors - 1.0)  # 255.0
    296             )
    297 
    298         # TODO: What is the meaning of 'alpha' in this context?
    299         alpha = index - round(index)
    300 
    301         channels = list('rgb')
    302         values = dict()
    303 
    304         for k, v in zip(range(len(channels)), channels):
    305             if alpha > 0:
    306                 values[v] = (
    307                         (1.0 - alpha)
    308                     *
    309                         colors[int(index)][k]
    310                     +
    311                         alpha * colors[int(index) + 1][k]
    312                     )
    313             else:
    314                 values[v] = (
    315                         (1.0 - alpha)
    316                     *
    317                         colors[int(index)][k]
    318                     )
    319 
    320         if flat:
    321             palette.extend(
    322                 tuple(int(values[i]) for i in channels))
    323         else:
    324             palette.append(
    325                 tuple(int(values[i]) for i in channels))
    326 
    327     return palette
    328 
    329 
    330 def get_max_level(filename):
    331     max_value = 0
    332     buffer_size = 4096
    333     audio_file = audiolab.Sndfile(filename, 'r')
    334     n_samples_left = audio_file.nframes
    335 
    336     while n_samples_left:
    337         to_read = min(buffer_size, n_samples_left)
    338 
    339         try:
    340             samples = audio_file.read_frames(to_read)
    341         except RuntimeError:
    342             # this can happen with a broken header
    343             break
    344 
    345         # convert to mono by selecting left channel only
    346         if audio_file.channels > 1:
    347             samples = samples[:,0]
    348 
    349         max_value = max(max_value, numpy.abs(samples).max())
    350 
    351         n_samples_left -= to_read
    352 
    353     audio_file.close()
    354 
    355     return max_value
    356 
    357 if __name__ == '__main__':
    358     import sys
    359     sys.argv[4] = int(sys.argv[4])
    360     sys.argv[3] = tuple([int(i) for i in sys.argv[3].split('x')])
    361 
    362     create_spectrogram_image(*sys.argv[1:])
  • mediagoblin/media_types/audio/transcoders.py

    diff --git a/mediagoblin/media_types/audio/transcoders.py b/mediagoblin/media_types/audio/transcoders.py
    index a67f442..11ecf16 100644
    a b gi.require_version('Gst', '1.0')  
    4343from gi.repository import GObject, Gst
    4444Gst.init(None)
    4545
    46 
    47 # TODO: Now unused - remove.
    48 class Python2AudioThumbnailer(object):
     46class Python3AudioThumbnailer(object):
    4947    def __init__(self):
    5048        _log.info('Initializing {0}'.format(self.__class__.__name__))
    5149
    5250    def spectrogram(self, src, dst, **kw):
    53         import numpy
    54         # This third-party bundled module is Python 2-only.
    55         from mediagoblin.media_types.audio import audioprocessing
    56 
    57         width = kw['width']
    58         height = int(kw.get('height', float(width) * 0.3))
    59         fft_size = kw.get('fft_size', 2048)
     51        from mediagoblin.media_types.audio import audiotospectrogram
     52        fft_size = kw.get('fft_size', 1024)
    6053        callback = kw.get('progress_callback')
    61         processor = audioprocessing.AudioProcessor(
    62             src,
    63             fft_size,
    64             numpy.hanning)
    65 
    66         samples_per_pixel = processor.audio_file.nframes / float(width)
    67 
    68         spectrogram = audioprocessing.SpectrogramImage(width, height, fft_size)
    69 
    70         for x in range(width):
    71             if callback and x % (width / 10) == 0:
    72                 callback((x * 100) / width)
    73 
    74             seek_point = int(x * samples_per_pixel)
    75 
    76             (spectral_centroid, db_spectrum) = processor.spectral_centroid(
    77                 seek_point)
    78 
    79             spectrogram.draw_spectrum(x, db_spectrum)
    80 
    81         if callback:
    82             callback(100)
    83 
    84         spectrogram.save(dst)
     54        audiotospectrogram.drawSpectrogram(src, dst, fftSize = fft_size, progressCallback = callback)
    8555
    8656    def thumbnail_spectrogram(self, src, dst, thumb_size):
    8757        '''
    class Python2AudioThumbnailer(object):  
    11181
    11282        th.save(dst)
    11383
    114 
    115 class DummyAudioThumbnailer(Python2AudioThumbnailer):
    116     """A thumbnailer that just outputs a stock image.
    117 
    118     The Python package used for audio spectrograms, "scikits.audiolab", does not
    119     support Python 3 and is a constant source of problems for people installing
    120     MediaGoblin. Until the feature is rewritten, this thumbnailer class simply
    121     provides a generic image.
    122 
    123     TODO: Consider Python 3 compatible interfaces to libsndfile, such as
    124     https://pypi.python.org/pypi/PySoundFile/0.9.0.post1 as discussed here
    125     https://issues.mediagoblin.org/ticket/5467#comment:6
    126 
    127     """
    128     def spectrogram(self, src, dst, **kw):
    129         # Using PIL here in case someone wants to swap out the image for a PNG.
    130         # This will convert to JPEG, where simply copying the file won't.
    131         img = Image.open('mediagoblin/static/images/media_thumbs/video.jpg')
    132         img.save(dst)
    133 
    134 
    135 # Due to recurring problems with spectrograms under Python 2, and the fact we're
    136 # soon dropping Python 2 support, we're disabling spectrogram thumbnails. See #5594.
    137 AudioThumbnailer = DummyAudioThumbnailer
    138 
     84AudioThumbnailer = Python3AudioThumbnailer
    13985
    14086class AudioTranscoder(object):
    14187    def __init__(self):
  • mediagoblin/tests/test_audio.py

    diff --git a/mediagoblin/tests/test_audio.py b/mediagoblin/tests/test_audio.py
    index 9826ceb..62d582f 100644
    a b import imghdr  
    2525#os.environ['GST_DEBUG'] = '4,python:4'
    2626
    2727pytest.importorskip("gi.repository.Gst")
    28 pytest.importorskip("scikits.audiolab")
    2928import gi
    3029gi.require_version('Gst', '1.0')
    3130from gi.repository import Gst
  • setup.py

    diff --git a/setup.py b/setup.py
    index 3371955..33b79a1 100644
    a b if PY2:  
    119119    pyversion_install_requires.append('Werkzeug<=0.16.999') # Tested with 0.16.1
    120120    pyversion_install_requires.append('WTForms<=2.3.999') # Tested with 2.3.1
    121121    pyversion_install_requires.append('zipp<=1.2.999') # Tested with 1.2.0
     122    pyversion_install_requires.append('numpy<=1.18.999') # Tested with 1.18.4
     123    pyversion_install_requires.append('soundfile<=0.10.999') # Tested with 0.10.3.post1
    122124
    123125install_requires = [
    124126    'waitress',