nearly got audio?

author: Tim Redfern <tim@herge.(none)> 2013-04-03 17:59:16 +0100
committer: Tim Redfern <tim@herge.(none)> 2013-04-03 17:59:16 +0100
commit: ef217eb0c2450e50a25e6ae2aee36178fcdd54c7 (patch)
tree: 5c51e2765c0015bbb3856078e600fb55cf3e9d96 /rotord
parent: f4c9ea89fa6d642813dc06387e956ef062ad9de1 (diff)
6 files changed, 42 insertions, 240 deletions
diff --git a/rotord/1wave.mp3 b/rotord/1wave.mp3
new file mode 100644
index 0000000..040b65e
--- /dev/null
+++ b/rotord/1wave.mp3
diff --git a/rotord/1wave.wav b/rotord/1wave.wav
new file mode 100644
index 0000000..0ae9d9b
--- /dev/null
+++ b/rotord/1wave.wav
diff --git a/rotord/rotor.cpp b/rotord/rotor.cpp
index 53ba33b..f7c017d 100644
--- a/rotord/rotor.cpp
+++ b/rotord/rotor.cpp
@@ -10,14 +10,6 @@ bool fequal(const float u,const float v){
 
 using namespace Rotor;
 
-/*
-    string soname="qm-vamp-plugins";
-    string id="qm-tempotracker";
-	string myname="";
-	string output="";
-	int outputNo=0;
-	*/
-
 void Render_context::runTask() {
 	while (!isCancelled()) {
 		int cmd=0;
@@ -29,13 +21,10 @@ void Render_context::runTask() {
 		mutex.unlock();
 		if (cmd==ANALYSE_AUDIO) {
 			state=ANALYSING_AUDIO;
-			//audio_analyser.process(audio_filename);
-			//vampHost::runPlugin("","qm-vamp-plugins","qm-tempotracker", "",0, audio_filename, cerr,true);
 			vector<base_audio_processor*> proc;
 			proc.push_back(audio_thumb);
 			if (load_audio(audio_filename,proc)) {
 				state=AUDIO_READY;
-				//set the response
 			}
 			else state=IDLE;
 		}
@@ -72,8 +61,8 @@ bool Signal_output::render(const float duration, const float framerate,string &x
 }
 
 Command_response Render_context::session_command(const std::vector<std::string>& command){
-										//method,id,command1,{command2,}{body}
-										//here we allow the controlling server to communicate with running tasks
+											//method,id,command1,{command2,}{body}
+											//here we allow the controlling server to communicate with running tasks
 	Command_response response;
 	response.status=HTTPResponse::HTTP_BAD_REQUEST;
 	if (command[2]=="audio") {
@@ -82,9 +71,6 @@ Command_response Render_context::session_command(const std::vector<std::string>&
 		    if (state==IDLE) {
 												//check file exists
 			Poco::File f=Poco::File(command[3]);
-			//std::auto_ptr<std::istream> pStr(URIStreamOpener::defaultOpener().open(command[3]));
-
-
 			if (f.exists()) {
 												//pass to worker thread ??if engine is ready?? ??what if engine has finished but results aren't read??
 			    audio_filename=command[3]; 			//for now, store session variables in memory
@@ -269,193 +255,8 @@ Command_response Render_context::session_command(const std::vector<std::string>&
 //great to use c++11 features
 
 bool Render_context::load_audio(const string &filename,vector<base_audio_processor*> processors){
-	//load audio data from file
-	//what's the best way to use this? the model is background processing, and we want to update a progress bar
-	//could pass a function pointer to call when each chunk of data becomes available?
-	//should the data processing be the responsibility of an object or a function?
-	//in the case of the audio thumbnail, there will be just one place where its kept
-	//in the case of audio analysis, the daemon will pass each audio analysis object each chunk of data as it gets it
-	//there could even be an array of audio analysis functions to perform simultaneously?
-	//how about a vector of  objects that subclass the base audio processor class?
-
-	//1st get parameters and initialise the processors
-	//then begin data loop locking progress variable after each frame
-
-	//
-	//
-	//the example in ffmpeg works, but it isn't one that identifies a codec- it is hard coded to look for a codec for AV_CODEC_ID_MP2
-	//it also doesn't load through libavformat - which opens containers- it just loads a naked .mp2 stream
-	//
-	//
-
-	/*
-	av_register_all();
-
-	std::shared_ptr<AVFormatContext> avFormat(avformat_alloc_context(), &avformat_free_context);
-
-	auto avFormatPtr = avFormat.get();
-	if (avformat_open_input(&avFormatPtr,filename.c_str(),nullptr, nullptr) != 0) {
-		cerr <<"Rotor: Error while calling avformat_open_input (probably invalid file format)" << endl;
-		return false;
-	}
-
-	if (avformat_find_stream_info(avFormat.get(), nullptr) < 0) {
-		cerr << "Rotor: Error while calling avformat_find_stream_info" << endl;
-		return false;
-	}
-
-	av_dump_format(avFormat.get(), 0, 0, false); //avformat.h line 1256
-
-
-
-	AVStream* stream = nullptr;
-	for (unsigned int i = 0; i < avFormat->nb_streams; ++i) {
-		if (avFormat->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
-			// we've found a stream!
-			stream = avFormat->streams[i];
-			break;
-		}
-	}
-	if (!stream) {
-		cerr <<"Rotor: Didn't find any audio stream in the file"<< endl;
-		return false;
-	}
-
-	// getting the required codec structure
-	const auto codec = avcodec_find_decoder(stream->codec->codec_id); //returns AVCodec*
-	if (codec == nullptr) {
-		cerr <<"Rotor: Audio codec not available"<< endl;
-		return false;
-	}
-
-	//AVCodecContext??  avFormat->streams[i]->codec
-
-	// allocating a structure
-	std::shared_ptr<AVCodecContext> audioCodec(avcodec_alloc_context3(codec), [](AVCodecContext* c) { avcodec_close(c); av_free(c); });
-
-	// extradata???
-	// we need to make a copy of videoStream->codec->extradata and give it to the context
-	// make sure that this vector exists as long as the avVideoCodec exists
-	std::vector<uint8_t> codecContextExtraData;
-	codecContextExtraData.assign(stream->codec->extradata, stream->codec->extradata + stream->codec->extradata_size);
-
-	audioCodec->extradata = reinterpret_cast<uint8_t*>(codecContextExtraData.data());
-	audioCodec->extradata_size = codecContextExtraData.size();
-
-	// initializing the structure by opening the codec
-	if (avcodec_open2(audioCodec.get(), codec, nullptr) < 0) {
-		cerr <<"Rotor: Could not open codec"<< endl;
-		return false;
-	}
-
-
-	//avcodec.h line 1026
-
-
-	Packet packet;
-
-// decoding code here
-	//cerr << "audio codec context -  sample rate: "<< audioCodec->sample_rate <<", channels: "<<audioCodec->channels<<", sample format: "<<audioCodec->sample_fmt<<endl;
-
-	// suspect why was this in twice 210313
-	//do {
-	//	if (packet.packet.stream_index != stream->index)
-	///		continue;	// the packet is not about the stream we want, let's jump again the start of the loop
-	//} while(0);
-	//
-
-	// allocating an AVFrame
-	std::shared_ptr<AVFrame> avFrame(avcodec_alloc_frame(), &av_free);
-
-	// the current packet of data
-	//Packet packet;
-	// data in the packet of data already processed
-	size_t offsetInData = 0;
-
-
-	bool foundPacket=false;
-
-	// the decoding loop, running until EOF
-	while (true) {
-		// reading a packet using libavformat
-		if (offsetInData >= packet.packet.size) {
-			do {
-				packet.reset(avFormat.get());
-				if (packet.packet.stream_index != stream->index)
-					continue;
-			} while(0);
-			if (!foundPacket){
-				cerr << "audio codec context -  sample rate: "<< audioCodec->sample_rate <<", channels: "<<audioCodec->channels<<", sample format: "<<audioCodec->sample_fmt<<endl;
-				foundPacket=true;
-			}
-
-		}
-
-
-
-		// preparing the packet that we will send to libavcodec
-		AVPacket packetToSend;
-		packetToSend.data = packet.packet.data + offsetInData;
-		packetToSend.size = packet.packet.size - offsetInData;
-
-		// sending data to libavcodec
-		int isFrameAvailable = 0;
-		//const auto processedLength = avcodec_decode_video2(avVideoCodec.get(), avFrame.get(), &isFrameAvailable, &packetToSend);
-
-		const auto processedLength = avcodec_decode_audio4(audioCodec.get(), avFrame.get(), &isFrameAvailable, &packetToSend);
-		//utils.c line 2018
-
-		//unsigned int bitsPerSample = av_get_bytes_per_sample(stream.codecContext->sample_fmt) * CHAR_BIT;    // 16 or 32
-		//unsigned int numberOfChannels = stream.codecContext->channels;     // 1 for mono, 2 for stereo, or more
-		//unsigned int numberOfSamplesContainedInTheFrame = stream.frame->nb_samples * stream.codecContext->channels;
-		//unsigned int numberOfSamplesToPlayPerSecond = stream.codecContext->sample_rate;   // usually 44100 or 48000
-		//const void* data = stream.frame->data[0];
-
-
-		if (processedLength < 0) {
-			//av_free_packet(&packet); shouldn't have to because of type safe wrapper
-			cerr <<"Error "<<processedLength<< " while processing the data"<< endl;
-			return false;
-		}
-		offsetInData += processedLength;
-
-
-		// processing the image if available
-		if (isFrameAvailable) {
-			// display image on the screen
-
-			// sleeping until next frame
-			const auto msToWait = avVideoContext->ticks_per_frame * 1000 * avVideoContext->time_base.num / avVideoContext->time_base.den;
-			std::this_thread::sleep(std::chrono::milliseconds(msToWait));
-		}
-		*/
-
-
-		//http://www.gamedev.net/topic/624876-how-to-read-an-audio-file-with-ffmpeg-in-c/
-		// Initialize FFmpeg
-		
-		//this seems to only read a certain percentage of the sound file? - different for wav and mp3
-		//peculiar, because the data comes in as frames and we just read them until all are done
-		//is it possible that thhe problem lies with the drawing process only?
-		//wav sees 6/10 waves and mp3 sees 3.5/10
-		//could this be an aliasing effect i.e. from reading the sample format wrong?
-		//I think probably not?
-		
-		//every format combination seems to miss a different number of samples
-		//I don't understand how different formats would make the samples seem to truncate?
-		//unless its an aliasing type thing?
-		
-		//wave.mp3 reads 16.409% of 160913 
-		//wave.wav reads 29.706% of 882044
-		
-		
-		
-	av_register_all();
 	
-	//??
-	//avcodec_init();
-	avcodec_register_all();
-	//??
+	av_register_all();
 
 	AVFrame* frame = avcodec_alloc_frame();
 	if (!frame)
@@ -464,8 +265,6 @@ bool Render_context::load_audio(const string &filename,vector<base_audio_process
 		return false;
 	}
 
-	// you can change the file name "01 Push Me to the Floor.wav" to whatever the file is you're reading, like "myFile.ogg" or
-	// "someFile.webm" and this should still work
 	AVFormatContext* formatContext = NULL;
 	if (avformat_open_input(&formatContext, filename.c_str(), NULL, NULL) != 0)
 	{
@@ -484,7 +283,6 @@ bool Render_context::load_audio(const string &filename,vector<base_audio_process
 	}
 
 	AVStream* audioStream = NULL;
-	// Find the audio stream (some container files can have multiple streams in them)
 	for (unsigned int i = 0; i < formatContext->nb_streams; ++i)
 	{
 		if (formatContext->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO)
@@ -519,12 +317,6 @@ bool Render_context::load_audio(const string &filename,vector<base_audio_process
 		std::cout << "Couldn't open the context with the decoder" << std::endl;
 		return false;
 	}
-	
-	//
-	// why is the file truncated???
-	//if(codecContext->codec->capabilities & CODEC_CAP_TRUNCATED) codecContext->codec->capabilities|=CODEC_FLAG_TRUNCATED;
-	//
-	//
 
 	av_dump_format(formatContext, 0, 0, false); //avformat.h line 1256
 	int samples = ((formatContext->duration + 5000)*codecContext->sample_rate)/AV_TIME_BASE;
@@ -532,9 +324,6 @@ bool Render_context::load_audio(const string &filename,vector<base_audio_process
 	std::cout << "This stream has " << codecContext->channels << " channels, a sample rate of " << codecContext->sample_rate << "Hz and "<<samples <<" samples" << std::endl;
 	std::cout << "The data is in format " <<codecContext->sample_fmt<< " (aka "<< av_get_sample_fmt_name(codecContext->sample_fmt) << ") "<<std::endl;
 
-	//we can now tell the processors the format
-	//we can work out the number of samples at this point
-
 	for (auto p: processors) {
 		p->init(codecContext->channels,16,samples);
 	}
@@ -545,16 +334,13 @@ bool Render_context::load_audio(const string &filename,vector<base_audio_process
 	
 	bool diag=true;
 
-	// Read the packets in a loop
 	while (true)
-	//while(sample_processed<samples)
 	{
 		int ret=av_read_frame(formatContext, &packet);
 		if (ret<0) {
 			cerr << "finished with code "<<ret <<(ret==AVERROR_EOF?" ,EOF":"")<<endl;
 			break;
 		}
-		//av_read_frame(formatContext, &packet); //hangs once the packets have been read
 		if (packet.stream_index == audioStream->index)
 		{
 			// Try to decode the packet into a frame
@@ -580,7 +366,6 @@ bool Render_context::load_audio(const string &filename,vector<base_audio_process
 					diag=false;
 				}
 
-				//std::cout << "Got a frame: bytes " << bytes << ", "<<frame->nb_samples<<" samples"<<std::endl;
 				//now we can pass the data to the processor(s)
 				for (auto p: processors) {
 					sample_processed=p->process_frame(frame->data[0],frame->nb_samples);
@@ -592,7 +377,7 @@ bool Render_context::load_audio(const string &filename,vector<base_audio_process
 			}
 		}
 		// You *must* call av_free_packet() after each call to av_read_frame() or else you'll leak memory
-		av_free_packet(&packet);  //crashes here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! SIGSEV In _int_free (av=0xb4600010, p=0xb46025c8, have_lock=0) at malloc.c:4085 ()
+		av_free_packet(&packet);  
 	}
 
 	// Some codecs will cause frames to be buffered up in the decoding process. If the CODEC_CAP_DELAY flag
@@ -614,9 +399,8 @@ bool Render_context::load_audio(const string &filename,vector<base_audio_process
 		}
 	}
 	
-	cerr << "finished processed: "<<sample_processed << " samples of  "<<samples<<" , "<<((double)sample_processed*100)/samples<<"%"<<  std::endl;
+	cerr << "finished processing: "<<sample_processed << " samples of  "<<samples<<", "<<((double)sample_processed*100)/samples<<"%"<<  std::endl;
 					
-
 	// Clean up!
 	av_free(frame);
 	avcodec_close(codecContext);
@@ -695,6 +479,9 @@ void audio_thumbnailer::init(int _channels,int _bits,int _samples) {
 	out_sample=0; //sample in whole track
 	offset=0x1<<(bits-1); //signed audio
 	scale=1.0/offset;
+	sample=0;
+	samples=0;
+	accum=0.0;
 }
 int audio_thumbnailer::process_frame(uint8_t *_data,int samples_in_frame){
 	//begin by processing remaining samples
@@ -704,36 +491,51 @@ int audio_thumbnailer::process_frame(uint8_t *_data,int samples_in_frame){
 	int stride=channels*bytes;
 	int in_sample=0;
 	while (in_sample<samples_in_frame&&column<width) {
-		int sample=0;
-		int samples=0;
-		double accum=0;
+		//start a new column
+		
 		while (sample<samples_per_column&&in_sample<samples_in_frame) {
-			//accumulate squares for this column
+			//accumulate samples for this column until we run out of samples
 			for (int i=0;i<channels;i++) {
-				int this_val=0;
+				unsigned int this_val=0;
 				for (int j=0;j<bytes;j++) {
 					this_val+=_data[(sample*stride)+(i*bytes)+j]<<(j*8);
 				}
 				//convert from integer data format - i.e s16p - to audio signal in -1..1 range
 				//don't know how many bytes we are dealing with necessarily?
 				//double val=((double)(this_val-offset))*scale;
-				if (this_val>offset) this_val=-(this_val-(offset*2));
-				double val=((double)this_val)*scale;
-				accum+=abs(val); //(val*val);
+				
+				//
+				//cerr << this_val << " ";
+				//
+				
+				
+				//if (this_val>offset) this_val=-(this_val-(offset*2));
+				
+				//this_val -=offset;
+				double val=((double)((int16_t)this_val))*scale;
+				accum+=val*val;
 				samples++;
 			}
 			in_sample++;
 			sample++;
 			out_sample++;
 		}
-		//get root-mean
-		double mean=accum/samples; //pow(accum/samples,0.5);
-		int colheight=height*mean*0.5;
-		int hh=height>>1;
-		for (int i=0;i<height;i++) {
-			data[i*width+column]=abs(i-hh)<colheight?0xff:0x00;
+		if (sample==samples_per_column) { //finished a column
+			//get root-mean
+			double mean=pow(accum/samples,0.5);
+			if (column==0) {
+				cerr << "first column total: "<< accum << " in " << samples << " samples, average " << (accum/samples)<<endl;
+			}
+			int colheight=height*mean*0.5;
+			int hh=height>>1;
+			for (int i=0;i<height;i++) {
+				data[i*width+column]=abs(i-hh)<colheight?0xff:0x00;
+			}
+			column++;
+			sample=0;
+			samples=0;
+			accum=0.0;
 		}
-		column++;
 	}
 	return out_sample;
 }
diff --git a/rotord/rotor.h b/rotord/rotor.h
index b7ffc25..436de10 100755
--- a/rotord/rotor.h
+++ b/rotord/rotor.h
@@ -345,8 +345,8 @@ namespace Rotor {
 		//draw pixels based on rms value
 		public:
 			audio_thumbnailer(){
-				height=16;
-				width=64;  //fit 
+				height=48;
+				width=128;  //fit 
 				data=new uint8_t[height*width];
 				memset(data,0,height*width);
 			}
@@ -358,10 +358,10 @@ namespace Rotor {
 			string print();
 			uint8_t *data;
 			int height,width,samples_per_column;
-			int column,out_sample;
+			int column,out_sample,sample,samples;
 			//for drawing graph
 			int offset;
-			double scale;
+			double scale,accum;
 	};
 	class Render_context: public Poco::Task {				//Poco task object
 													//manages a 'patchbay'
diff --git a/rotord/silence.mp3 b/rotord/silence.mp3
index 32fbbb4..888e669 100644
--- a/rotord/silence.mp3
+++ b/rotord/silence.mp3
diff --git a/rotord/silence.wav b/rotord/silence.wav
index 3c98ffc..24d262a 100644
--- a/rotord/silence.wav
+++ b/rotord/silence.wav
author	Tim Redfern <tim@herge.(none)>	2013-04-03 17:59:16 +0100
committer	Tim Redfern <tim@herge.(none)>	2013-04-03 17:59:16 +0100
commit	ef217eb0c2450e50a25e6ae2aee36178fcdd54c7 (patch)
tree	5c51e2765c0015bbb3856078e600fb55cf3e9d96 /rotord
parent	f4c9ea89fa6d642813dc06387e956ef062ad9de1 (diff)