FFT なスペクトラム可視化に音階をくっつけて(5)

あぁ、pcolor に誤解があったわ…。

タイトル通り、「FFT なスペクトラム可視化に音階をくっつけて」からの一連の流れの続き。

2点:

  1. 仕方なく contour 使ってたのは、pcolor が x, y, z プロットは出来ないと思いこんでいたため。
  2. 「log2 的グラフ」、やってみたら思ったより悪くない

後者は改めて「周波数帯を狭く絞り込んだ上で対数的プロット」としてみたら、全然悪くなかった、て話。

「~については~をみてね」がちょっと読むほうも辛かろうし、個人的事情からも「似非パッケージ化」したので、コードはいったん全部:

tiny_wave_wrapper.py
 1 #
 2 # NOTE:
 3 #   Don't think this module is highly flexible but this is just
 4 #   for my personal use. Especially this module can't handle
 5 #   high-resolution PCM (24bit), etc, etc, etc.
 6 #
 7 import itertools
 8 import wave
 9 
10 
11 try:
12     # for python 2.7
13     zip = itertools.izip
14 except AttributeError:
15     pass
16 
17 
18 class _WaveRWBase(object):
19     def __init__(self, waveobj):
20         self._wave = waveobj
21 
22     def __enter__(self):
23         return self
24 
25     def __exit__(self, type, value, tb):
26         try:
27             self._wave.close()
28         except:
29             pass
30 
31     def __getattr__(self, attrname):
32         return getattr(self._wave, attrname)
33 
34 
35 class WaveReader(_WaveRWBase):
36     def __init__(self, fname):
37         _WaveRWBase.__init__(self, wave.open(fname, "r"))
38 
39 
40 class WaveWriter(_WaveRWBase):
41     def __init__(self, fname, nchannels, sampwidth, samprate):
42         _WaveRWBase.__init__(self, wave.open(fname, "w"))
43         self._wave.setparams(
44             (nchannels, sampwidth, samprate,
45              # For seekable output streams, the wave header will
46              # automatically be updated to reflect the number of
47              # frames actually written.
48              0,
49              'NONE', 'not compressed'  # you can't change these
50              )
51             )
52 
53     def _writepcmraw(self, pcm):
54         import struct
55 
56         # NOTE: 'h' means 16bit integer.
57         packed = struct.pack('h' * len(pcm), *pcm)
58         self._wave.writeframes(packed)
59 
60     def writechannels(self, channels):
61         import struct
62         CHUNK_SIZE = 4096
63 
64         _pcm = []
65         for d in itertools.chain.from_iterable(zip(*channels)):
66             _pcm.append(d)
67             if len(_pcm) == CHUNK_SIZE:
68                 self._writepcmraw(_pcm)
69                 _pcm = []
70         if _pcm:
71             self._writepcmraw(_pcm)
pytinymts/scales.py
 1 import numpy as np
 2 
 3 
 4 #
 5 # see details: https://en.wikipedia.org/wiki/MIDI_tuning_standard
 6 #
 7 _SCALES = ["C", "C#/Db", "D", "D#/Eb", "E", "F", "F#/Gb", "G", "G#/Ab", "A", "A#/Bb", "B", "B#"]
 8 _SCALES_dic = {}
 9 for i, sc in enumerate(_SCALES):
10     _SCALES_dic[sc] = i
11     if "/" in sc:
12         sc1, sc2 = sc.split("/")
13         _SCALES_dic[sc1] = i
14         _SCALES_dic[sc2] = i
15 _SCALES_dic["Cb"] = _SCALES_dic["B"]
16 
17 
18 def nn2scale(d):
19     return int(d / 12) - 2, _SCALES[int(d) % 12]
20 
21 def nn2freq(d):
22     return np.power(2, (d - 69) / 12.) * 440
23 
24 def freq2nn(f):
25     return 69 + 12 * np.log2(f / 440.)
26 
27 def onsc2freq(on, sc):
28     return nn2freq((on + 2) * 12 + _SCALES_dic[sc])
pytinymts/mpl_support.py
 1 #############################################################
 2 #
 3 # matplotlib's custom scale implementation for MTS scales.
 4 #
 5 import numpy as np
 6 from numpy import ma
 7 from matplotlib import scale as mscale
 8 from matplotlib import transforms as mtransforms
 9 import matplotlib.ticker as ticker
10 from .scales import nn2scale, nn2freq, freq2nn
11 
12 
13 class MTSScaleScale(mscale.ScaleBase):
14     name = 'midituningstandardscale'
15 
16     def __init__(self, axis, **kwargs):
17         mscale.ScaleBase.__init__(self)
18 
19     def get_transform(self):
20         return self.MTSScaleTransform()
21 
22     def set_default_locators_and_formatters(self, axis):
23         class _nnformat(ticker.Formatter):
24             def __init__(self, minor):
25                 ticker.Formatter.__init__(self)
26                 if minor:
27                     self._fmt = lambda oc, sc: sc
28                 else:
29                     self._fmt = lambda oc, sc: "%s%d    -" % (sc, oc)
30         
31             def __call__(self, f, pos=None):
32                 if f > 0:
33                     nn = freq2nn(f)
34                     if nn - int(nn) < 1e-2:
35                         oc, sc = nn2scale(nn)
36                         return self._fmt(oc, sc)
37                 return ""
38 
39         _all = np.arange(-24, 168)
40         _maj = np.array([v for v in _all if v % 12 == 0])
41         _min = np.array([v for v in _all if v % 12 in (2, 4, 5, 7, 9, 11)])
42         axis.set_major_locator(ticker.FixedLocator(nn2freq(_maj)))
43         axis.set_minor_locator(ticker.FixedLocator(nn2freq(_min)))
44         axis.set_major_formatter(ticker.FuncFormatter(_nnformat(False)))
45         axis.set_minor_formatter(ticker.FuncFormatter(_nnformat(True)))
46 
47     def limit_range_for_scale(self, vmin, vmax, minpos):
48         return max(nn2freq(-2), vmin), max(nn2freq(-2), vmax)
49 
50     class MTSScaleTransform(mtransforms.Transform):
51         input_dims = 1
52         output_dims = 1
53         is_separable = True
54 
55         def __init__(self):
56             mtransforms.Transform.__init__(self)
57 
58         def transform_non_affine(self, f):
59             masked = ma.masked_where((f == 0), f)
60             if masked.mask.any():
61                 return 69 + 12 * ma.log2(f / 440.)
62             else:
63                 return 69 + 12 * np.log2(f / 440.)
64 
65         def inverted(self):
66             return MTSScaleScale.InvertedMTSScaleTransform()
67 
68     class InvertedMTSScaleTransform(mtransforms.Transform):
69         input_dims = 1
70         output_dims = 1
71         is_separable = True
72 
73         def __init__(self):
74             mtransforms.Transform.__init__(self)
75 
76         def transform_non_affine(self, d):
77             return nn2freq(d)
78 
79         def inverted(self):
80             return MTSScaleScale.MTSScaleTransform()
81 
82 mscale.register_scale(MTSScaleScale)

空の pytinymts/__init__.py がいるが空なので省略。

で、main:

 1 import re
 2 import numpy as np
 3 import matplotlib.pyplot as plt
 4 from tiny_wave_wrapper import WaveReader, WaveWriter
 5 from pytinymts import mpl_support
 6 from pytinymts.scales import onsc2freq
 7 
 8 def _saveasgraph(X, freq, F, fnum, ind):
 9     fig, ax_ = plt.subplots()
10     for chn in range(2):
11         ax = plt.subplot(1, 2, chn + 1)
12 
13         Y = freq[:len(freq) // 2]
14         Z = F[chn].T[:len(freq) // 2,:]
15         if ind is not None:
16             Y, Z = Y[ind], Z[ind, :]
17 
18         ax.pcolorfast(X, Y, Z, cmap='jet')
19         ax.set_yscale('midituningstandardscale')
20         ax.set_xticks([])
21         ax.grid(True)
22 
23     fig.tight_layout()
24     fig.savefig("out_%05d.jpg" % (fnum))
25     plt.close(fig)
26 
27 
28 if __name__ == '__main__':
29     import argparse
30     parser = argparse.ArgumentParser()
31     parser.add_argument("-s", "--step", type=int)
32     parser.add_argument("-u", "--upper_limit_of_view", help="Hz", type=int)
33     parser.add_argument("-l", "--lower_limit_of_view", help="Hz", type=int)
34     parser.add_argument("-U", "--upper_scale_limit_of_view", help="scale", type=str)
35     parser.add_argument("-L", "--lower_scale_limit_of_view", help="scale", type=str)
36     parser.add_argument("target")
37     args = parser.parse_args()
38 
39     with WaveReader(args.target) as fi:
40         nchannels, width, rate, nframes, _, _ = fi.getparams()
41         raw = np.fromstring(fi.readframes(nframes), dtype=np.int16)
42         channels = raw[::2], raw[1::2]
43 
44     if args.step:
45         step = args.step
46     else:
47         step = rate // 16
48     #
49     lims = [[], []]
50     if args.upper_limit_of_view:
51         lims[0].append(args.upper_limit_of_view)
52     if args.upper_scale_limit_of_view:
53         m = re.match(r"([A-G]#?)(\d)", args.upper_scale_limit_of_view)
54         lims[0].append(onsc2freq(int(m.group(2)), m.group(1)))
55     if args.lower_limit_of_view:
56         lims[1].append(args.lower_limit_of_view)
57     if args.lower_scale_limit_of_view:
58         m = re.match(r"([A-G]#?)(\d)", args.lower_scale_limit_of_view)
59         lims[1].append(onsc2freq(int(m.group(2)), m.group(1)))
60     #
61     nframes = len(channels[0])
62     nframes -= (nframes % step)  # drop the fraction frames
63     tmp = np.fft.fft(channels[0][:step])  # FIXME: to be more smart.
64     F = np.array([
65         np.zeros((rate // step * 10, len(tmp))),
66         np.zeros((rate // step * 10, len(tmp)))
67         ])
68     freq = np.fft.fftfreq(F[0].shape[1], 1./rate)
69     Y = freq[:len(freq) // 2]
70     #
71     ind = None
72     for ulim in lims[0]:
73         _ = (Y <= ulim)
74         if ind is None:
75             ind = _
76         else:
77             ind = np.logical_and(ind, _)
78     for llim in lims[1]:
79         _ = (Y >= llim)
80         if ind is None:
81             ind = _
82         else:
83             ind = np.logical_and(ind, _)
84     #
85     X = np.arange(F[0].shape[0])
86     #
87     p = 0
88     for i in range(0, nframes, step):
89         for chn in range(len(channels)):
90             channel = channels[chn]
91             f = np.abs(np.fft.fft(channel[i:i + step]))
92             f = f / f.max()
93             if p > F[chn].shape[0] - 1:
94                 F[chn] = np.roll(F[chn], shift=-1, axis=0)
95             F[chn][min(p, F[0].shape[0] - 1)] = f
96         _saveasgraph(X, freq, F, i // step, ind)
97         p += 1

今回のスクリプトは周波数帯の絞込みに音階も使えるように拡張してある:

1 me@host: ~$ python hoge.py zzz.wav -U"E6" -L"C2"

てな具合。てふか出力ファイル名の制御もすべきやね。まぁそれはあとでいいし、「あなたが」そうしたいならすぐ出来ろ?

で、(4)と同じノリで動画に:

「楽譜的に見えないもんであろうか?」てのは、こういう複雑でない曲ならイケなくはないね。(注意してほしいのはこの「対数的」な見方が、「太い線が強度が高い」という固定観念を時として捨てなければならないということ。グラフの下ほど「太い」のです、単純に。)

なお、使った音源は自分である方法で「打ち込んだ」荒城の月だけれども、これ自身に一ネタありそうなので、そっちは別途。