/* * RaspberryPi based FM transmitter. For the original idea, see: * * http://www.icrobotics.co.uk/wiki/index.php/Turning_the_Raspberry_Pi_Into_an_FM_Transmitter * * All credit to Oliver Mattos and Oskar Weigl for creating the original code. * * I have taken their idea and reworked it to use the Pi DMA engine, so * reducing the CPU overhead for playing a .wav file from 100% to about 1.6%. * * I have implemented this in user space, using an idea I picked up from Joan * on the Raspberry Pi forums - credit to Joan for the DMA from user space * idea. * * The idea of feeding the PWM FIFO in order to pace DMA control blocks comes * from ServoBlaster, and I take credit for that :-) * * This code uses DMA channel 0 and the PWM hardware, with no regard for * whether something else might be trying to use it at the same time (such as * the 3.5mm jack audio driver). * * I know nothing much about sound, subsampling, or FM broadcasting, so it is * quite likely the sound quality produced by this code can be improved by * someone who knows what they are doing. There may be issues realting to * caching, as the user space process just writes to its virtual address space, * and expects the DMA controller to see the data; it seems to work for me * though. * * NOTE: THIS CODE MAY WELL CRASH YOUR PI, TRASH YOUR FILE SYSTEMS, AND * POTENTIALLY EVEN DAMAGE YOUR HARDWARE. THIS IS BECAUSE IT STARTS UP THE DMA * CONTROLLER USING MEMORY OWNED BY A USER PROCESS. IF THAT USER PROCESS EXITS * WITHOUT STOPPING THE DMA CONTROLLER, ALL HELL COULD BREAK LOOSE AS THE * MEMORY GETS REALLOCATED TO OTHER PROCESSES WHILE THE DMA CONTROLLER IS STILL * USING IT. I HAVE ATTEMPTED TO MINIMISE ANY RISK BY CATCHING SIGNALS AND * RESETTING THE DMA CONTROLLER BEFORE EXITING, BUT YOU HAVE BEEN WARNED. I * ACCEPT NO LIABILITY OR RESPONSIBILITY FOR ANYTHING THAT HAPPENS AS A RESULT * OF YOU RUNNING THIS CODE. IF IT BREAKS, YOU GET TO KEEP ALL THE PIECES. * * As for the original code, this code is released under the GPL. * * Richard Hirst December 2012 */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include // The .wav file is mono at 22050Hz, which means we have a new sample every // 45.4us. We want to adjust the 100MHz core frequency at 10 times that so as // to provide some level of subsampling to improve quality. The basic idea is // to maintain a buffer of 4000 values to write to the clock control register // and then arrange for the DMA controller to write the values sequentially at // 4.54us intervals. The control code can then wake up every 10ms or so and // populate the buffer with new samples. At 4.54us per sample, a 4000 sample // buffer will last a bit over 18ms, so waking every 10ms should be sufficient. // // Total memory needed is: // // The frequencies 4000 * 4 // CBs to set the frequency 4000 * 32 // CBs to cause delays 4000 * 32 // // Process can wake every 10ms and update all samples based on where the DMA // CB is pointed. #define NUM_SAMPLES 4000 #define NUM_CBS (NUM_SAMPLES * 2) #define BCM2708_DMA_NO_WIDE_BURSTS (1<<26) #define BCM2708_DMA_WAIT_RESP (1<<3) #define BCM2708_DMA_D_DREQ (1<<6) #define BCM2708_DMA_PER_MAP(x) ((x)<<16) #define BCM2708_DMA_END (1<<1) #define BCM2708_DMA_RESET (1<<31) #define BCM2708_DMA_INT (1<<2) #define DMA_CS (0x00/4) #define DMA_CONBLK_AD (0x04/4) #define DMA_DEBUG (0x20/4) #define DMA_BASE 0x20007000 #define DMA_LEN 0x24 #define PWM_BASE 0x2020C000 #define PWM_LEN 0x28 #define CLK_BASE 0x20101000 #define CLK_LEN 0xA8 #define GPIO_BASE 0x20200000 #define GPIO_LEN 0xB4 #define PWM_CTL (0x00/4) #define PWM_DMAC (0x08/4) #define PWM_RNG1 (0x10/4) #define PWM_FIFO (0x18/4) #define PWMCLK_CNTL 40 #define PWMCLK_DIV 41 #define GPCLK_CNTL (0x70/4) #define GPCLK_DIV (0x74/4) #define PWMCTL_MODE1 (1<<1) #define PWMCTL_PWEN1 (1<<0) #define PWMCTL_CLRF (1<<6) #define PWMCTL_USEF1 (1<<5) #define PWMDMAC_ENAB (1<<31) // I think this means it requests as soon as there is one free slot in the FIFO // which is what we want as burst DMA would mess up our timing.. #define PWMDMAC_THRSHLD ((15<<8)|(15<<0)) #define GPFSEL0 (0x00/4) typedef struct { uint32_t info, src, dst, length, stride, next, pad[2]; } dma_cb_t; typedef struct { uint8_t *virtaddr; uint32_t physaddr; } page_map_t; page_map_t *page_map; static uint8_t *virtbase; static volatile uint32_t *pwm_reg; static volatile uint32_t *clk_reg; static volatile uint32_t *dma_reg; static volatile uint32_t *gpio_reg; struct control_data_s { dma_cb_t cb[NUM_CBS]; uint32_t sample[NUM_SAMPLES]; }; #define PAGE_SIZE 4096 #define PAGE_SHIFT 12 #define NUM_PAGES ((sizeof(struct control_data_s) + PAGE_SIZE - 1) >> PAGE_SHIFT) static struct control_data_s *ctl; static void udelay(int us) { struct timespec ts = { 0, us * 1000 }; nanosleep(&ts, NULL); } static void terminate(int dummy) { if (dma_reg) { dma_reg[DMA_CS] = BCM2708_DMA_RESET; udelay(10); } exit(1); } static void fatal(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); terminate(0); } static uint32_t mem_virt_to_phys(void *virt) { uint32_t offset = (uint8_t *)virt - virtbase; return page_map[offset >> PAGE_SHIFT].physaddr + (offset % PAGE_SIZE); } static uint32_t mem_phys_to_virt(uint32_t phys) { uint32_t pg_offset = phys & (PAGE_SIZE - 1); uint32_t pg_addr = phys - pg_offset; int i; for (i = 0; i < NUM_PAGES; i++) { if (page_map[i].physaddr == pg_addr) { return (uint32_t)virtbase + i * PAGE_SIZE + pg_offset; } } fatal("Failed to reverse map phys addr %08x\n", phys); return 0; } static void * map_peripheral(uint32_t base, uint32_t len) { int fd = open("/dev/mem", O_RDWR); void * vaddr; if (fd < 0) fatal("Failed to open /dev/mem: %m\n"); vaddr = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, base); if (vaddr == MAP_FAILED) fatal("Failed to map peripheral at 0x%08x: %m\n", base); close(fd); return vaddr; } int main(int argc, char **argv) { int i, fd, pid; char pagemap_fn[64]; // Catch all signals possible - it is vital we kill the DMA engine // on process exit! for (i = 0; i < 64; i++) { struct sigaction sa; memset(&sa, 0, sizeof(sa)); sa.sa_handler = terminate; sigaction(i, &sa, NULL); } dma_reg = map_peripheral(DMA_BASE, DMA_LEN); pwm_reg = map_peripheral(PWM_BASE, PWM_LEN); clk_reg = map_peripheral(CLK_BASE, CLK_LEN); gpio_reg = map_peripheral(GPIO_BASE, GPIO_LEN); virtbase = mmap(NULL, NUM_PAGES * PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS|MAP_NORESERVE|MAP_LOCKED, -1, 0); if (virtbase == MAP_FAILED) fatal("Failed to mmap physical pages: %m\n"); if ((unsigned long)virtbase & (PAGE_SIZE-1)) fatal("Virtual address is not page aligned\n"); printf("Virtual memory mapped at %p\n", virtbase); page_map = malloc(NUM_PAGES * sizeof(*page_map)); if (page_map == 0) fatal("Failed to malloc page_map: %m\n"); pid = getpid(); sprintf(pagemap_fn, "/proc/%d/pagemap", pid); fd = open(pagemap_fn, O_RDONLY); if (fd < 0) fatal("Failed to open %s: %m\n", pagemap_fn); if (lseek(fd, (off_t)virtbase >> 9, SEEK_SET) != (off_t)virtbase >> 9) fatal("Failed to seek on %s: %m\n", pagemap_fn); // printf("Page map:\n"); for (i = 0; i < NUM_PAGES; i++) { uint64_t pfn; page_map[i].virtaddr = virtbase + i * PAGE_SIZE; // Following line forces page to be allocated page_map[i].virtaddr[0] = 0; if (read(fd, &pfn, sizeof(pfn)) != sizeof(pfn)) fatal("Failed to read %s: %m\n", pagemap_fn); if (pfn >> 55 != 0x10c) fatal("Page %d not present (pfn 0x%016llx)\n", i, pfn); page_map[i].physaddr = (uint32_t)pfn << PAGE_SHIFT | 0x40000000; // printf(" %2d: %8p ==> 0x%08x [0x%016llx]\n", i, page_map[i].virtaddr, page_map[i].physaddr, pfn); } // GPIO4 needs to be ALT FUNC 0 to otuput the clock gpio_reg[GPFSEL0] = (gpio_reg[GPFSEL0] & ~(7 << 12)) | (4 << 12); // Program GPCLK to use MASH setting 1, so fractional dividers work clk_reg[GPCLK_CNTL] = 0x5A << 24 | 6; udelay(100); clk_reg[GPCLK_CNTL] = 0x5A << 24 | 1 << 9 | 1 << 4 | 6; ctl = (struct control_data_s *)virtbase; dma_cb_t *cbp = ctl->cb; uint32_t phys_sample_dst = 0x7e101074; uint32_t phys_pwm_fifo_addr = 0x7e20c000 + 0x18; for (i = 0; i < NUM_SAMPLES; i++) { ctl->sample[i] = 0x5a << 24 | 5 << 12; // Silence // Write a frequency sample cbp->info = BCM2708_DMA_NO_WIDE_BURSTS | BCM2708_DMA_WAIT_RESP; cbp->src = mem_virt_to_phys(ctl->sample + i); cbp->dst = phys_sample_dst; cbp->length = 4; cbp->stride = 0; cbp->next = mem_virt_to_phys(cbp + 1); cbp++; // Delay cbp->info = BCM2708_DMA_NO_WIDE_BURSTS | BCM2708_DMA_WAIT_RESP | BCM2708_DMA_D_DREQ | BCM2708_DMA_PER_MAP(5); cbp->src = mem_virt_to_phys(virtbase); cbp->dst = phys_pwm_fifo_addr; cbp->length = 4; cbp->stride = 0; cbp->next = mem_virt_to_phys(cbp + 1); cbp++; } cbp--; cbp->next = mem_virt_to_phys(virtbase); // Initialise PWM to use a 100MHz clock too, and set the range to // 454 bits, which is 4.54us, the rate at which we want to update // the GPCLK control register. pwm_reg[PWM_CTL] = 0; udelay(10); clk_reg[PWMCLK_CNTL] = 0x5A000006; // Source=PLLD and disable udelay(100); clk_reg[PWMCLK_DIV] = 0x5A000000 | (5<<12); // set pwm div to 5, for 100MHz udelay(100); clk_reg[PWMCLK_CNTL] = 0x5A000016; // Source=PLLD and enable udelay(100); pwm_reg[PWM_RNG1] = 454; udelay(10); pwm_reg[PWM_DMAC] = PWMDMAC_ENAB | PWMDMAC_THRSHLD; udelay(10); pwm_reg[PWM_CTL] = PWMCTL_CLRF; udelay(10); pwm_reg[PWM_CTL] = PWMCTL_USEF1 | PWMCTL_PWEN1; udelay(10); // Initialise the DMA dma_reg[DMA_CS] = BCM2708_DMA_RESET; udelay(10); dma_reg[DMA_CS] = BCM2708_DMA_INT | BCM2708_DMA_END; dma_reg[DMA_CONBLK_AD] = mem_virt_to_phys(ctl->cb); dma_reg[DMA_DEBUG] = 7; // clear debug error flags dma_reg[DMA_CS] = 0x10880001; // go, mid priority, wait for outstanding writes // Nearly there.. open the .wav file specified on the cmdline int fp = open(argv[1], 'r'); if (fp < 0) fatal("Failed to open .wav file\n"); int sz = lseek(fp, 0L, SEEK_END); lseek(fp, 0L, SEEK_SET); short* data = (short*)malloc(sz); read(fp, data, sz); uint32_t last_cb = (uint32_t)ctl->cb; int data_index = 22; for (;;) { usleep(10000); uint32_t cur_cb = mem_phys_to_virt(dma_reg[DMA_CONBLK_AD]); int last_sample = (last_cb - (uint32_t)virtbase) / (sizeof(dma_cb_t) * 2); int this_sample = (cur_cb - (uint32_t)virtbase) / (sizeof(dma_cb_t) * 2); int free_slots = this_sample - last_sample; if (free_slots < 0) free_slots += NUM_SAMPLES; while (free_slots >= 10) { float dval = (float)(data[data_index])/65536.0 * 25.0; int intval = (int)((floor)(dval)); int frac = (int)((dval - (float)intval) * 10.0); int j; // I'm sure this code could do a better job of subsampling, either by // distributing the '+1's evenly across the 10 subsamples, or maybe // by taking the previous and next samples in to account too. for (j = 0; j < 10; j++) { ctl->sample[last_sample++] = (0x5A << 24 | 5 << 12) + (frac > j ? intval + 1 : intval); if (last_sample == NUM_SAMPLES) last_sample = 0; } free_slots -= 10; // Should really wait for outstanding samples to be processed here.. if (++data_index >= sz/2) terminate(0); } last_cb = (uint32_t)virtbase + last_sample * sizeof(dma_cb_t) * 2; } terminate(0); return 0; }