add 1.54 IPS driver

This commit is contained in:
goodtft 2021-01-27 08:18:51 +00:00
parent 52dd7eeaf5
commit 82667a3dac
67 changed files with 9244 additions and 0 deletions

147
LCD154-show Executable file
View File

@ -0,0 +1,147 @@
#!/bin/bash
sudo ./system_backup.sh
if [ -f /etc/X11/xorg.conf.d/40-libinput.conf ]; then
sudo rm -rf /etc/X11/xorg.conf.d/40-libinput.conf
fi
#if [ ! -d /etc/X11/xorg.conf.d ]; then
#sudo mkdir -p /etc/X11/xorg.conf.d
#fi
#sudo cp ./usr/mhs35-overlay.dtb /boot/overlays/
#sudo cp ./usr/mhs35-overlay.dtb /boot/overlays/mhs35.dtbo
root_dev=`grep -oPr "root=[^\s]*" /boot/cmdline.txt | awk -F= '{printf $NF}'`
if test "$root_dev" = "/dev/mmcblk0p7";then
sudo cp -rf ./boot/config-noobs-nomal.txt ./boot/config.txt.bak
else
sudo cp -rf ./boot/config-nomal.txt ./boot/config.txt.bak
sudo echo "hdmi_force_hotplug=1" >> ./boot/config.txt.bak
fi
sudo echo "hdmi_cvt 300 300 60 1 0 0 0" >> ./boot/config.txt.bak
sudo echo "hdmi_group=2" >> ./boot/config.txt.bak
sudo echo "hdmi_mode=1" >> ./boot/config.txt.bak
sudo echo "hdmi_mode=87" >> ./boot/config.txt.bak
sudo echo "display_rotate=0" >> ./boot/config.txt.bak
sudo echo "gpio=6,19,5,26,13,21,20,16=pu" >> ./boot/config.txt.bak
sudo cp -rf ./boot/config.txt.bak /boot/config.txt
#sudo cp -rf ./usr/99-calibration.conf-mhs35-90 /etc/X11/xorg.conf.d/99-calibration.conf
#sudo cp -rf ./usr/99-fbturbo.conf /usr/share/X11/xorg.conf.d/99-fbturbo.conf
#if test "$root_dev" = "/dev/mmcblk0p7";then
#sudo cp ./usr/cmdline.txt-noobs /boot/cmdline.txt
#else
#sudo cp ./usr/cmdline.txt /boot/
#fi
#sudo cp ./usr/inittab /etc/
#sudo cp ./boot/config-mhs35.txt /boot/config.txt
sudo touch ./.have_installed
echo "gpio:resistance:LCD154:90:300:300" > ./.have_installed
#FBCP install
if [ -d ./fbcp-ili9341 ]; then
sudo rm -rf ./fbcp-ili9341
fi
wget --spider -q -o /dev/null --tries=1 -T 10 https://github.com
if [ $? -eq 0 ]; then
#sudo cp -rf ./usr/99-fbturbo-fbcp.conf /usr/share/X11/xorg.conf.d/99-fbturbo.conf
sudo apt-get install cmake 2> error_output.txt
result=`cat ./error_output.txt`
echo -e "\033[31m$result\033[0m"
grep -q "^E:" ./error_output.txt && exit
#sudo rm -rf rpi-fbcp
sudo git clone https://github.com/juj/fbcp-ili9341.git
sudo mkdir ./fbcp-ili9341/build
cd ./fbcp-ili9341/build/
sudo cmake -DSPI_BUS_CLOCK_DIVISOR=10 -DWAVESHARE_ST7789VW_HAT=ON -DBACKLIGHT_CONTROL=OFF -DSTATISTICS=0 -DUSE_DMA_TRANSFERS=OFF ..
sudo make -j
sudo install fbcp-ili9341 /usr/local/bin/fbcp
cd - > /dev/null
sudo cp -rf ./etc/rc.local /etc/rc.local
else
type fbcp > /dev/null 2>&1
if [ $? -eq 1 ]; then
sudo cp -rf ./usr/fbcp-ili9341 ./
sudo mkdir ./fbcp-ili9341/build
cd ./fbcp-ili9341/build/
sudo cmake -DSPI_BUS_CLOCK_DIVISOR=10 -DWAVESHARE_ST7789VW_HAT=ON -DBACKLIGHT_CONTROL=OFF -DSTATISTICS=0 -DUSE_DMA_TRANSFERS=OFF ..
sudo make -j
sudo install fbcp-ili9341 /usr/local/bin/fbcp
cd - > /dev/null
sudo cp -rf ./etc/rc.local /etc/rc.local
fi
fi
#KEY install
wget --spider -q -o /dev/null --tries=1 -T 10 http://mirrors.zju.edu.cn
if [ $? -eq 0 ]; then
sudo apt-get install python-xlib 2> error_output.txt
else
dpkg -l | grep python-xlib
if [ $? -eq 1 ]; then
sudo dpkg -i -B ./python-xlib_0.23-2_all.deb 2> error_output.txt
fi
fi
result=`cat ./error_output.txt`
echo -e "\033[31m$result\033[0m"
grep -q "^E:" ./error_output.txt && exit
grep -q "error:" ./error_output.txt && exit
wget --spider -q -o /dev/null --tries=1 -T 10 https://pypi.org
if [ $? -eq 0 ]; then
sudo pip install PyMouse 2> error_output.txt
result=`cat ./error_output.txt`
echo -e "\033[31m$result\033[0m"
grep -q "^E:" ./error_output.txt && exit
grep -q "error:" ./error_output.txt && exit
else
pip list | grep PyMouse
if [ $? -eq 1 ]; then
sudo tar xvzf ./PyMouse-1.0.tar.gz
sudo python PyMouse-1.0/setup.py install
fi
fi
if [ ! -d ~/.config/autostart ]; then
sudo mkdir -p ~/.config/autostart
fi
if [ ! -f ~/.config/autostart/local.desktop ]; then
sudo cp -rf ./usr/local.desktop ~/.config/autostart
fi
#evdev install
#nodeplatform=`uname -n`
#kernel=`uname -r`
#version=`uname -v`
#if test "$nodeplatform" = "raspberrypi";then
#echo "this is raspberrypi kernel"
#version=${version##* }
#version=${version#*#}
#echo $version
#if test $version -lt 2017;then
#echo "reboot"
#else
#echo "need to update touch configuration"
#sudo dpkg -i -B ./xserver-xorg-input-evdev_1%3a2.10.6-1+b1_armhf.deb 2> error_output.txt
#sudo apt-get install xserver-xorg-input-evdev 2> error_output.txt
#result=`cat ./error_output.txt`
#echo -e "\033[31m$result\033[0m"
#grep -q "error:" ./error_output.txt && exit
#sudo cp -rf /usr/share/X11/xorg.conf.d/10-evdev.conf /usr/share/X11/xorg.conf.d/45-evdev.conf
#echo "reboot"
#fi
#else
#echo "this is not raspberrypi kernel, no need to update touch configure, reboot"
#fi
sudo sync
sudo sync
sleep 1
if [ $# -eq 1 ]; then
sudo ./rotate.sh $1
elif [ $# -gt 1 ]; then
echo "Too many parameters"
fi
echo "reboot now"
sudo reboot

60
Mouse_Key.py Normal file
View File

@ -0,0 +1,60 @@
from pymouse import PyMouse
import time
import RPi.GPIO as GPIO
GPIO.setmode(GPIO.BCM)
btn_up = 5
btn_down = 26
btn_left = 19
btn_right = 6
btn_key1 = 21
btn_key2 = 20
# Up, Down, left, right, Button
GPIO.setup(btn_up, GPIO.IN,GPIO.PUD_UP)
GPIO.setup(btn_down, GPIO.IN,GPIO.PUD_UP)
GPIO.setup(btn_left, GPIO.IN,GPIO.PUD_UP)
GPIO.setup(btn_right, GPIO.IN,GPIO.PUD_UP)
GPIO.setup(btn_key1, GPIO.IN,GPIO.PUD_UP)
GPIO.setup(btn_key2, GPIO.IN,GPIO.PUD_UP)
def main():
m = PyMouse()
KEY1_flag = False
KEY2_flag = False
KEY3_flag = False
while True:
nowxy = m.position()
if (not GPIO.input(btn_key1)): # button pressed
KEY1_flag = True
print("KEY1")
m.click(nowxy[0], nowxy[1], 1)
if KEY1_flag and GPIO.input(btn_key1): # button released
KEY1_flag = False
if (not GPIO.input(btn_key2)): # button pressed
KEY2_flag = True
print("KEY1")
m.click(nowxy[0], nowxy[1], 2)
if KEY2_flag and GPIO.input(btn_key2): # button released
KEY2_flag = False
if (not GPIO.input(btn_up)): # button pressed
m.move(nowxy[0] - 5, nowxy[1])
if (not GPIO.input(btn_down)): # button pressed
m.move(nowxy[0] + 5, nowxy[1])
if (not GPIO.input(btn_left)): # button pressed
m.move(nowxy[0], nowxy[1] + 5)
if (not GPIO.input(btn_right)): # button pressed
m.move(nowxy[0], nowxy[1] - 5)
time.sleep(0.02) # Poll every 20ms (otherwise CPU load gets too high)
if __name__ == "__main__":
main()

BIN
PyMouse-1.0.tar.gz Normal file

Binary file not shown.

BIN
python-xlib_0.23-2_all.deb Normal file

Binary file not shown.

8
usr/fbcp-ili9341/.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
*.ko.cmd
*.o.cmd
.tmp_versions
*.ko
*.o
*.S
*.symvers
*.order

View File

@ -0,0 +1,268 @@
cmake_minimum_required(VERSION 2.8)
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." FORCE)
endif()
include_directories(/opt/vc/include)
link_directories(/opt/vc/lib)
file(GLOB sourceFiles *.cpp)
message(STATUS "Doing a ${CMAKE_BUILD_TYPE} build")
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
message(STATUS "Pass -DCMAKE_BUILD_TYPE=Release to do a fast optimized build.")
endif()
# Detect if current hardware is Raspberry Pi Zero/Zero W, and enable targeting Zero with -DSINGLE_CORE_BOARD=ON if so.
execute_process(COMMAND cat /proc/cpuinfo OUTPUT_VARIABLE CPU_INFO)
STRING(REGEX MATCH "Revision[\t ]*:[\t ]*([0-9a-f]+)" BOARD_REVISION ${CPU_INFO})
set(BOARD_REVISION "${CMAKE_MATCH_1}")
message(STATUS "Board revision: ${CMAKE_MATCH_1}")
set(DEFAULT_TO_SINGLE_CORE_BOARD OFF)
set(DEFAULT_TO_ARMV6Z OFF)
set(DEFAULT_TO_ARMV7A OFF)
set(DEFAULT_TO_ARMV8A OFF)
# http://ozzmaker.com/check-raspberry-software-hardware-version-command-line/
if (BOARD_REVISION MATCHES "(0002)|(0003)|(0004)|(0005)|(0006)|(0007)|(0008)|(0009)" OR BOARD_REVISION MATCHES "(000d)|(000e)|(000f)|(0010)|(0011)|(0012)" OR BOARD_REVISION MATCHES "(900092)|(900093)|(9000c1)")
message(STATUS "Detected this Pi to be one of: Pi A, A+, B rev. 1, B rev. 2, B+, CM1, Zero or Zero W, with single hardware core and ARMv6Z instruction set CPU.")
set(DEFAULT_TO_SINGLE_CORE_BOARD ON)
set(DEFAULT_TO_ARMV6Z ON)
elseif(BOARD_REVISION MATCHES "(a01041)|(a21041)")
message(STATUS "Detected this board to be a Pi 2 Model B < rev 1.2 with ARMv7-A instruction set CPU.")
set(DEFAULT_TO_ARMV7A ON)
elseif(BOARD_REVISION MATCHES "(a02082)|(a22082)|(a020d3)|(9020e0)|(a03111)|(b03111)|(c03111)")
message(STATUS "Detected this Pi to be one of: Pi 2B rev. 1.2, 3B, 3B+, 3A+, CM3, CM3 lite or 4B(1GB,2GB,4GB RAM), with 4 hardware cores and ARMv8-A instruction set CPU.")
set(DEFAULT_TO_ARMV8A ON)
else()
message(WARNING "The board revision of this hardware is not known. Please add detection to this board in CMakeLists.txt. (proceeding to compile against a generic multicore CPU)")
endif()
option(SINGLE_CORE_BOARD "Target a Raspberry Pi with only one hardware core (Pi Zero)" ${DEFAULT_TO_SINGLE_CORE_BOARD})
if (SINGLE_CORE_BOARD)
message(STATUS "Targeting a Raspberry Pi with only one hardware core")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSINGLE_CORE_BOARD=1")
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -marm -mabi=aapcs-linux -mhard-float -mfloat-abi=hard -mlittle-endian -mtls-dialect=gnu2 -funsafe-math-optimizations")
option(ARMV6Z "Target a Raspberry Pi with ARMv6Z instruction set (Pi 1A, 1A+, 1B, 1B+, Zero, Zero W)" ${DEFAULT_TO_ARMV6Z})
if (ARMV6Z)
message(STATUS "Enabling optimization flags that target ARMv6Z instruction set (Pi Model A, Pi Model B, Compute Module 1, Pi Zero/Zero W)")
# Currently disabled, no test data to know if this would be faster
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv6z -mfpu=vfp")
endif()
option(ARMV7A "Target a Raspberry Pi with ARMv7-A instruction set (Pi 2B < rev 1.2)" ${DEFAULT_TO_ARMV7A})
if (ARMV7A)
message(STATUS "Enabling optimization flags that target ARMv7-A instruction set (Pi 2B < rev 1.2)")
# Currently disabled, no test data to know if this would be faster
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=cortex-a7 -mcpu=cortex-a7 -mtune=cortex-a7 -mfpu=neon-vfpv4")
endif()
option(ARMV8A "Target a Raspberry Pi with ARMv8-A instruction set (Pi 2B >= rev. 1.2, 3B, 3B+, CM3 or CM3 lite)" ${DEFAULT_TO_ARMV8A})
if (ARMV8A)
message(STATUS "Enabling optimization flags that target ARMv8-A instruction set (Pi 2B >= rev. 1.2, 3B, 3B+, CM3 or CM3 lite)")
# N.B. Here should be possible to set -mfpu=neon-vfpv4, though for some reason that generates really slow code, tested on gcc (Raspbian 6.3.0-18+rpi1+deb9u1) 6.3.0 20170516
# Currently disabled, seems to be a tiny 1-2% slower (or no difference, drown in noise)
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+crc -mcpu=cortex-a53 -mtune=cortex-a53")
endif()
set(GPIO_TFT_DATA_CONTROL 0 CACHE STRING "Explicitly specify the Data/Control GPIO pin (sometimes also called Register Select)")
if (GPIO_TFT_DATA_CONTROL GREATER 0)
message(STATUS "Using 4-wire SPI mode of communication, with GPIO pin ${GPIO_TFT_DATA_CONTROL} for Data/Control line")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGPIO_TFT_DATA_CONTROL=${GPIO_TFT_DATA_CONTROL}")
elseif (GPIO_TFT_DATA_CONTROL LESS 0)
message(STATUS "Using 3-wire SPI mode of communication, i.e. a display that does not have a Data/Control line")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSPI_3WIRE_PROTOCOL=1")
endif()
set(GPIO_TFT_RESET_PIN 0 CACHE STRING "Explicitly specify the Reset GPIO pin (leave out if there is no Reset line)")
if (GPIO_TFT_RESET_PIN)
message(STATUS "Using GPIO pin ${GPIO_TFT_RESET_PIN} for Reset line")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGPIO_TFT_RESET_PIN=${GPIO_TFT_RESET_PIN}")
endif()
set(GPIO_TFT_BACKLIGHT 0 CACHE STRING "Explicitly specify the Backlight GPIO pin (leave out if there is no controllable Backlight line)")
if (GPIO_TFT_BACKLIGHT)
message(STATUS "Using GPIO pin ${GPIO_TFT_BACKLIGHT} for backlight")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGPIO_TFT_BACKLIGHT=${GPIO_TFT_BACKLIGHT}")
endif()
set(LOW_BATTERY_PIN 0 CACHE STRING "Explicitly specify the low batt GPIO pin (leave out if there is no low batt signal)")
if (LOW_BATTERY_PIN)
message(STATUS "Using GPIO pin ${LOW_BATTERY_PIN} for low battery status")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DLOW_BATTERY_PIN=${LOW_BATTERY_PIN}")
endif()
option(BACKLIGHT_CONTROL "If true, enables fbcp-ili9341 to take control of backlight" OFF)
if (BACKLIGHT_CONTROL)
message(STATUS "Enabling fbcp-ili9341 backlight control")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBACKLIGHT_CONTROL")
endif()
option(DISPLAY_CROPPED_INSTEAD_OF_SCALING "If ON, displays the cropped center part of the source image on the SPI display. If OFF, scales the source image to the SPI display" OFF)
if (DISPLAY_CROPPED_INSTEAD_OF_SCALING)
message(STATUS "Cropping source image to view instead of scaling. This will produce crisp pixel perfect rendering, though edges of the display will be cut off if the HDMI and SPI display resolutions do not match. (pass -DDISPLAY_CROPPED_INSTEAD_OF_SCALING=OFF to scale instead of crop)")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDISPLAY_CROPPED_INSTEAD_OF_SCALING")
else()
message(STATUS "Scaling source image to view. If the HDMI resolution does not match the SPI display resolution, this will produce blurriness. Match the HDMI display resolution with the SPI resolution in /boot/config.txt to get crisp pixel perfect rendering, or alternatively pass -DDISPLAY_CROPPED_INSTEAD_OF_SCALING=ON to crop instead of scale if you want to view the center of the screen pixel perfect when HDMI and SPI resolutions do not match.")
endif()
option(DISPLAY_BREAK_ASPECT_RATIO_WHEN_SCALING "If ON, the display is scaled stretched to fit the screen, disregarding preserving aspect ratio." OFF)
if (DISPLAY_BREAK_ASPECT_RATIO_WHEN_SCALING)
message(STATUS "Ignoring aspect ratio when scaling source image to the SPI display (Pass -DDISPLAY_BREAK_ASPECT_RATIO_WHEN_SCALING=OFF to preserve aspect ratio)")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDISPLAY_BREAK_ASPECT_RATIO_WHEN_SCALING")
else()
message(STATUS "Preserving aspect ratio when scaling source image to the SPI display, introducing letterboxing/pillarboxing if HDMI and SPI aspect ratios are different (Pass -DDISPLAY_BREAK_ASPECT_RATIO_WHEN_SCALING=ON to stretch HDMI to cover full screen if you do not care about aspect ratio)")
endif()
set(STATISTICS 1 CACHE STRING "Set to 0, 1 or 2 to configure the level of statistics to display. 0=OFF, 1=regular statistics, 2=frame rate interval histogram")
if (STATISTICS GREATER 1)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DFRAME_COMPLETION_TIME_STATISTICS")
endif()
if (STATISTICS GREATER 0)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSTATISTICS")
endif()
set(SPI_BUS_CLOCK_DIVISOR 0 CACHE STRING "Specify the SPI0 CDIV register value that defines the SPI0 bus speed (No default value, try a safe setting, e.g. -DSPI_BUS_CLOCK_DIVISOR=40 first")
if (SPI_BUS_CLOCK_DIVISOR)
message(STATUS "SPI_BUS_CLOCK_DIVISOR set to ${SPI_BUS_CLOCK_DIVISOR}. Try setting this to a higher value (must be an even number) if this causes problems. Display update speed = core_freq/divisor. (on Pi3B, by default core_freq=400). A safe starting default value may be -DSPI_BUS_CLOCK_DIVISOR=40")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSPI_BUS_CLOCK_DIVISOR=${SPI_BUS_CLOCK_DIVISOR}")
else()
message(FATAL_ERROR "Please define -DSPI_BUS_CLOCK_DIVISOR=<some even number> on the CMake command line! (see files ili9341.h/waveshare35b.h for details) This parameter along with core_freq=xxx in /boot/config.txt defines the SPI display speed. Smaller divisor number=faster speed, higher number=slower.")
endif()
option(KERNEL_MODULE_CLIENT "If enabled, run fbcp-ili9341 userland program against the kernel module found in kernel/ subdirectory (must be started before the userland program)" OFF)
if (KERNEL_MODULE_CLIENT)
message(STATUS "KERNEL_MODULE_CLIENT enabled, building userland program to operate against fbcp-ili9341 kernel module")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DKERNEL_MODULE_CLIENT=1")
endif()
option(DISPLAY_SWAP_BGR "If true, reverses RGB<->BGR color channels" OFF)
if (DISPLAY_SWAP_BGR)
message(STATUS "Swapping RGB<->BGR color channels")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDISPLAY_SWAP_BGR")
endif()
option(DISPLAY_INVERT_COLORS "If true, inverts display colors (white=0, black=31/63" OFF)
if (DISPLAY_INVERT_COLORS)
message(STATUS "Inverting display colors (white=0, black=31/63)")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDISPLAY_INVERT_COLORS")
endif()
option(DISPLAY_ROTATE_180_DEGREES "If true, rotates display upside down" OFF)
if (DISPLAY_ROTATE_180_DEGREES)
message(STATUS "Rotating display output by 180 degrees")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDISPLAY_ROTATE_180_DEGREES")
endif()
option(KEDEI_V63_MPI3501 "Target KeDei 3.5 inch SPI TFTLCD 480*320 16bit/186bit version 6.3 2018/4/9 display (MPI3501)" OFF)
option(USE_DMA_TRANSFERS "If enabled, fbcp-ili9341 utilizes DMA to transfer data to the display. Otherwise, Polled SPI mode is used to drive communication with the SPI display" ON)
# KeDei does not do DMA well, since after each 32-bit word one needs to refresh the chip select signal, preventing DMA batch operations altogether.
if (KEDEI_V63_MPI3501)
set(USE_DMA_TRANSFERS OFF)
endif()
if (USE_DMA_TRANSFERS)
message(STATUS "USE_DMA_TRANSFERS enabled, this improves performance. Try running CMake with -DUSE_DMA_TRANSFERS=OFF it this causes problems, or try adjusting the DMA channels to use with -DDMA_TX_CHANNEL=<num> -DDMA_RX_CHANNEL=<num>.")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_DMA_TRANSFERS=1")
else()
message(STATUS "USE_DMA_TRANSFERS is disabled, this is not the most efficient mode. Try running CMake with -DUSE_DMA_TRANSFERS=ON to improve performance.")
endif()
set(DMA_TX_CHANNEL 0 CACHE STRING "Specifies the DMA send channel to use")
if (DMA_TX_CHANNEL)
message(STATUS "DMA_TX_CHANNEL=${DMA_TX_CHANNEL}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDMA_TX_CHANNEL=${DMA_TX_CHANNEL}")
endif()
set(DMA_RX_CHANNEL 0 CACHE STRING "Specifies the DMA receive channel to use")
if (DMA_RX_CHANNEL)
message(STATUS "DMA_RX_CHANNEL=${DMA_RX_CHANNEL}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDMA_RX_CHANNEL=${DMA_RX_CHANNEL}")
endif()
option(ADAFRUIT_ILI9341_PITFT "Target Adafruit's ILI9341-based PiTFT display" OFF)
option(FREEPLAYTECH_WAVESHARE32B "Target WaveShare32B ILI9341 display on Freeplaytech's CM3/Zero devices)" OFF)
option(WAVESHARE35B_ILI9486 "Target Waveshare's ILI9486-based Waveshare Wavepear 3.5 inch (B) display" OFF)
option(TONTEC_MZ61581 "Target Tontec's MZ61581-based 3.5 inch display" OFF)
if (ADAFRUIT_ILI9341_PITFT)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DILI9341 -DADAFRUIT_ILI9341_PITFT")
message(STATUS "Targeting Adafruit PiTFT with ILI9340/ILI9341")
elseif(FREEPLAYTECH_WAVESHARE32B)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DILI9341 -DFREEPLAYTECH_WAVESHARE32B")
message(STATUS "Targeting WaveShare 3.2 inch (B) display with ILI9341")
elseif(ADAFRUIT_HX8357D_PITFT)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHX8357D -DADAFRUIT_HX8357D_PITFT")
message(STATUS "Targeting Adafruit 3.5 inch PiTFT with HX8357D")
elseif(WAVESHARE_ST7789VW_HAT)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DST7789 -DST7789VW -DWAVESHARE_ST7789VW_HAT")
message(STATUS "Targeting WaveShare 240x240 1.3inch IPS LCD Hat with ST7789VW controller")
elseif(WAVESHARE_ST7735S_HAT)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DST7735S -DWAVESHARE_ST7735S_HAT")
message(STATUS "Targeting WaveShare 128x128 1.44inch LCD Hat with ST7735S controller")
elseif(ILI9340)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DILI9340")
message(STATUS "Targeting ILI9340")
elseif(KEDEI_V63_MPI3501)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMPI3501 -DKEDEI_V63_MPI3501")
message(STATUS "Targeting KeDei 3.5 inch SPI TFTLCD 480*320 16bit/18bit version 6.3 2018/4/9 display (MPI3501)")
if (USE_DMA_TRANSFERS)
message(FATAL_ERROR "DMA is unfortunately not possible with KeDei MPI3501. Please disable with -DUSE_DMA_TRANSFERS=OFF.")
endif()
elseif(ILI9341)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DILI9341")
message(STATUS "Targeting ILI9341")
elseif(ST7789)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DST7789")
message(STATUS "Targeting ST7789")
elseif(ST7789VW)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DST7789 -DST7789VW")
message(STATUS "Targeting ST7789VW (based as variant of ST7789)")
elseif(ST7735R)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DST7735R")
message(STATUS "Targeting ST7735R")
elseif(ST7735S)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DST7735S")
message(STATUS "Targeting ST7735S")
elseif(SSD1351)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSSD1351")
message(STATUS "Targeting SSD1351")
elseif(WAVESHARE35B_ILI9486)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DILI9486 -DWAVESHARE35B_ILI9486")
message(STATUS "Targeting WaveShare 3.5 inch (B) display with ILI9486")
elseif(ILI9486)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DILI9486")
message(STATUS "Targeting ILI9486")
elseif(ILI9486L)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DILI9486L")
message(STATUS "Targeting ILI9486L")
elseif(ILI9488)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DILI9488")
message(STATUS "Targeting ILI9488")
elseif(TONTEC_MZ61581)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMZ61581 -DTONTEC_MZ61581")
message(STATUS "Targeting TONTEC_MZ61581")
elseif(MZ61581)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMZ61581")
message(STATUS "Targeting MZ61581")
elseif(MPI3501)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMPI3501")
message(STATUS "Targeting MPI3501")
if (USE_DMA_TRANSFERS)
message(FATAL_ERROR "DMA is unfortunately not possible with MPI3501. Please disable with -DUSE_DMA_TRANSFERS=OFF.")
endif()
else()
message(FATAL_ERROR "Please specify which display controller to use on command line to CMake!")
endif()
add_executable(fbcp-ili9341 ${sourceFiles})
target_link_libraries(fbcp-ili9341 pthread bcm_host atomic)

View File

@ -0,0 +1,19 @@
Copyright (c) Jukka Jylänki
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

690
usr/fbcp-ili9341/README.md Normal file
View File

@ -0,0 +1,690 @@
# Introduction
This repository implements a driver for certain SPI-based LCD displays for Raspberry Pi A, B, 2, 3, 4 and Zero.
![PiTFT display](/example.jpg "Adafruit PiTFT 2.8 with ILI9341 controller")
The work was motivated by curiosity after seeing this series of videos on the RetroManCave YouTube channel:
- [RetroManCave: Waveshare 3.5" Raspberry Pi Screen | Review](https://www.youtube.com/watch?v=SGMC0t33C50)
- [RetroManCave: Waveshare 3.2" vs 3.5" LCD screen gaming test | Raspberry Pi / RetroPie](https://www.youtube.com/watch?v=8bazEcXemiA)
- [Elecrow 5 Inch LCD Review | RetroPie & Raspberry Pi](https://www.youtube.com/watch?v=8VgNBDMOssg)
In these videos, the SPI (GPIO) bus is referred to being the bottleneck. SPI based displays update over a serial data bus, transmitting one bit per clock cycle on the bus. A 320x240x16bpp display hence requires a SPI bus clock rate of 73.728MHz to achieve a full 60fps refresh frequency. Not many SPI LCD controllers can communicate this fast in practice, but are constrained to e.g. a 16-50MHz SPI bus clock speed, capping the maximum update rate significantly. Can we do anything about this?
The fbcp-ili9341 project started out as a display driver for the [Adafruit 2.8" 320x240 TFT w/ Touch screen for Raspberry Pi](https://www.adafruit.com/product/1601) display that utilizes the ILI9341 controller. On that display, fbcp-ili9341 can achieve a 60fps update rate, depending on the content that is being displayed. Check out these videos for examples of the driver in action:
- [fbcp-ili9341 frame delivery smoothness test on Pi 3B and Adafruit ILI9341 at 119Hz](https://youtu.be/IqzKT33Rwjc)
- [Latency and tearing test #2: GPIO input to display latency in fbcp-ili9341 and tearing modes](https://www.youtube.com/watch?v=EOICdpjiqv8)
- [Latency and tearing test: KeDei 3.5" 320x480 HDMI vs Adafruit 2.8" PiTFT ILI9341 240x320 SPI](https://www.youtube.com/watch?v=1yvmvv0KtNs)
- [fbcp-ili9341 ported to ILI9486 WaveShare 3.5" (B) SpotPear 320x480 SPI display](https://www.youtube.com/watch?v=dqOLIHOjLq4)
- [Quake 60 fps inside Gameboy Advance (ILI9341)](https://www.youtube.com/watch?v=xmO8t3XlxVM)
- First implementation of a statistics overlay: [fbcp-ili9341 SPI display driver on Adafruit PiTFT 2.8"](http://youtu.be/rKSH048XRjA)
- Initial proof of concept video: [fbcp-ili9341 driver first demo](https://youtu.be/h1jhuR-oZm0)
### How It Works
Given that the SPI bus can be so constrained on bandwidth, how come fbcp-ili9341 seems to be able to update at up to 60fps? The way this is achieved is by what could be called *adaptive display stream updates*. Instead of uploading each pixel at each display refresh cycle, only the actually changed pixels on screen are submitted to the display. This is doable because the ILI9341 controller, as many other popular controllers, have communication interface functions that allow specifying partial screen updates, down to subrectangles or even individual pixel levels. This allows beating the bandwidth limit: for example in Quake, even though it is a fast pacing game, on average only about 46% of all pixels on screen change each rendered frame. Some parts, such as the UI stay practically constant across multiple frames.
Other optimizations are also utilized to squeeze out even more performance:
- The program directly communicates with the BCM2835 ARM Peripherals controller registers, bypassing the usual Linux software stack.
- A hybrid of both Polled Mode SPI and DMA based transfers are utilized. Long sequential transfer bursts are performed using DMA, and when DMA would have too much latency, Polled Mode SPI is applied instead.
- Undocumented BCM2835 features are used to squeeze out maximum bandwidth: [SPI CDIV is driven at even numbers](https://www.raspberrypi.org/forums/viewtopic.php?t=43442) (and not just powers of two), and the [SPI DLEN register is forced in non-DMA mode](https://www.raspberrypi.org/forums/viewtopic.php?t=181154) to avoid an idle 9th clock cycle for each transferred byte.
- Good old **interlacing** is added into the mix: if the amount of pixels that needs updating is detected to be too much that the SPI bus cannot handle it, the driver adaptively resorts to doing an interlaced update, uploading even and odd scanlines at subsequent frames. Once the number of pending pixels to write returns to manageable amounts, progressive updating is resumed. This effectively doubles the maximum display update rate. (If you do not like the visual appearance that interlacing causes, it is easy to disable this by uncommenting the line `#define NO_INTERLACING` in file `config.h`)
- A dedicated SPI communication thread is used in order to keep the SPI bus active at all times.
- A number of other micro-optimization techniques are used, such as batch updating rectangular spans of pixels, merging disjoint-but-close spans of pixels on the same scanline, and latching Column and Page End Addresses to bottom-right corner of the display to be able to cut CASET and PASET messages in mid-communication.
The result is that the SPI bus can be kept close to 100% saturation, ~94-97% usual, to maximize the utilization rate of the bus, while only transmitting practically the minimum number of bytes needed to describe each new frame.
### Tested Devices
The driver has been checked to work (at least some point in the past) on the following systems:
- Raspberry Pi 3 Model B+ with Raspbian Stretch (GCC 6.3.0)
- Raspberry Pi 3 Model B Rev 1.2 with Raspbian Jessie (GCC 4.9.2) and Raspbian Stretch (GCC 6.3.0)
- Raspberry Pi Zero W with Raspbian Jessie (GCC 4.9.2) and Raspbian Stretch (GCC 6.3.0)
- Raspberry Pi 2 Model B
- Raspberry Pi B Rev. 2.0 (old board from Q4 2012, board revision ID 000e)
although not all boards are actively tested on, so ymmv especially on older boards. (Bug fixes welcome, use https://elinux.org/RPi_HardwareHistory to identify which board you are running on)
### Tested Displays
The following LCD displays have been tested:
- [Adafruit 2.8" 320x240 TFT w/ Touch screen for Raspberry Pi](https://www.adafruit.com/product/1601) with ILI9341 controller
- [Adafruit PiTFT 2.2" HAT Mini Kit - 320x240 2.2" TFT - No Touch](https://www.adafruit.com/product/2315) with ILI9340 controller
- [Adafruit PiTFT - Assembled 480x320 3.5" TFT+Touchscreen for Raspberry Pi](https://www.adafruit.com/product/2097) with HX8357D controller
- [Adafruit 128x96 OLED Breakout Board - 16-bit Color 1.27" w/microSD holder](https://www.adafruit.com/product/1673) with SSD1351 controller
- [Waveshare 3.5inch RPi LCD (B) 320*480 Resolution Touch Screen IPS TFT Display](https://www.amazon.co.uk/dp/B01N48NOXI/ref=pe_3187911_185740111_TE_item) with ILI9486 controller
- [maithoga 3.5 inch 8PIN SPI TFT LCD Color Screen with Adapter Board ILI9486](https://www.aliexpress.com/item/3-5-inch-8P-SPI-TFT-LCD-Color-Screen-Module-ILI9486-Drive-IC-320-480-RGB/32828284227.html) with **ILI9486L** controller
- [BuyDisplay.com 320x480 Serial SPI 3.2"TFT LCD Module Display](https://www.buydisplay.com/default/serial-spi-3-2-inch-tft-lcd-module-display-ili9341-power-than-sainsmart) with ILI9341 controller
- [Arduino A000096 1.77" 160x128 LCD Screen](https://store.arduino.cc/arduino-lcd-screen) with ST7735R controller
- [Tontec 3.5" 320x480 LCD Display](https://www.ebay.com/p/Tontec-3-5-Inches-Touch-Screen-for-Raspberry-Pi-Display-TFT-Monitor-480x320-LCD/1649448059) with MZ61581-PI-EXT 2016.1.28 controller
- [Adafruit 1.54" 240x240 Wide Angle TFT LCD Display with MicroSD](https://www.adafruit.com/product/3787) with ST7789 controller
- [WaveShare 240x240, 1.3inch IPS LCD display HAT for Raspberry Pi](https://www.waveshare.com/1.3inch-lcd-hat.htm) with ST7789VW controller
- [WaveShare 128x128, 1.44inch LCD display HAT for Raspberry Pi](https://www.waveshare.com/1.44inch-lcd-hat.htm) with ST7735S controller
- [KeDei 3.5 inch SPI TFTLCD 480*320 16bit/18bit version 6.3 2018/4/9](https://github.com/juj/fbcp-ili9341/issues/40) with MPI3501 controller
- Unbranded 2.8" 320x240 display with ILI9340 controller
### Installation
Check the following sections to set up the driver.
##### Boot configuration
This driver does not utilize the [notro/fbtft](https://github.com/notro/fbtft) framebuffer driver, so that needs to be disabled if active. That is, if your `/boot/config.txt` file has lines that look something like `dtoverlay=pitft28r, ...`, `dtoverlay=waveshare32b, ...` or `dtoverlay=flexfb, ...`, those should be removed.
This program neither utilizes the default SPI driver, so a line such as `dtparam=spi=on` in `/boot/config.txt` should also be removed so that it will not cause conflicts.
Likewise, if you have any touch controller related dtoverlays active, such as `dtoverlay=ads7846,...` or anything that has a `penirq=` directive, those should be removed as well to avoid conflicts. It would be possible to add touch support to fbcp-ili9341 if someone wants to take a stab at it.
##### Building and running
Run in the console of your Raspberry Pi:
```bash
sudo apt-get install cmake
cd ~
git clone https://github.com/juj/fbcp-ili9341.git
cd fbcp-ili9341
mkdir build
cd build
cmake [options] ..
make -j
sudo ./fbcp-ili9341
```
Note especially the two dots `..` on the CMake line, which denote "up one directory" in this case (instead of referring to "more items go here").
See the next section to see what to input under **[options]**.
If you have been running existing `fbcp` driver, make sure to remove that e.g. via a `sudo pkill fbcp` first (while running in SSH prompt or connected to a HDMI display), these two cannot run at the same time. If `/etc/rc.local` or `/etc/init.d` contains an entry to start up `fbcp` at boot, that directive should be deleted.
##### Configuring build options
There are generally two ways to configure build options, at CMake command line, and in the file [config.h](https://github.com/juj/fbcp-ili9341/blob/master/config.h).
On the CMake command line, the following options can be configured:
###### If you have a display Add-On Hat
When using one of the displays that stack on top of the Pi that are already recognized by fbcp-ili9341, you don't need to specify the GPIO pin assignments, but fbcp-ili9341 code already has those. Pass one of the following CMake directives for the hats:
- `-DADAFRUIT_ILI9341_PITFT=ON`: If you are running on the [Adafruit 2.8" 320x240 TFT w/ Touch screen for Raspberry Pi](https://www.adafruit.com/product/1601) (or the [Adafruit PiTFT 2.2" HAT Mini Kit - 320x240 2.2" TFT - No Touch](https://www.adafruit.com/product/2315) display, which is compatible), pass this flag.
- `-DADAFRUIT_HX8357D_PITFT=ON`: If you have the [Adafruit PiTFT - Assembled 480x320 3.5" TFT+Touchscreen for Raspberry Pi](https://www.adafruit.com/product/2097) display, add this line.
- `-DFREEPLAYTECH_WAVESHARE32B=ON`: If you are running on the [Freeplay CM3 or Zero](https://www.freeplaytech.com/product/freeplay-cm3-diy-kit/) device, pass this flag. (this is not a hat, but still a preconfigured pin assignment)
- `-DWAVESHARE35B_ILI9486=ON`: If specified, targets a [Waveshare 3.5" 480x320 ILI9486](https://www.amazon.co.uk/dp/B01N48NOXI/ref=pe_3187911_185740111_TE_item) display.
- `-DTONTEC_MZ61581=ON`: If you are running on the [Tontec 3.5" 320x480 LCD Display](https://www.ebay.com/p/Tontec-3-5-Inches-Touch-Screen-for-Raspberry-Pi-Display-TFT-Monitor-480x320-LCD/1649448059) display, pass this.
- `-DWAVESHARE_ST7789VW_HAT=ON`: If specified, targets a [240x240, 1.3inch IPS LCD display HAT for Raspberry Pi](https://www.waveshare.com/1.3inch-lcd-hat.htm) with ST7789VW display controller.
- `-DWAVESHARE_ST7735S_HAT=ON`: If specified, targets a [128x128, 1.44inch LCD display HAT for Raspberry Pi](https://www.waveshare.com/1.3inch-lcd-hat.htm) with ST7735S display controller.
- `-DKEDEI_V63_MPI3501=ON`: If specified, targets a [KeDei 3.5 inch SPI TFTLCD 480*320 16bit/18bit version 6.3 2018/4/9](https://github.com/juj/fbcp-ili9341/issues/40) display with MPI3501 display controller.
###### If you wired the display to the Pi yourself
If you connected wires directly on the Pi instead of using a Hat from the above list, you will need to use the configuration directives below. In addition to specifying the display, you will also need to tell fbcp-ili9341 which GPIO pins you wired the connections to. To configure the display controller, pass one of:
- `-DILI9341=ON`: If you are running on any other generic ILI9341 display, or on Waveshare32b display that is standalone and not on the FreeplayTech CM3/Zero device, pass this flag.
- `-DILI9340=ON`: If you have a ILI9340 display, pass this directive. ILI9340 and ILI9341 chipsets are very similar, but ILI9340 doesn't support all of the features on ILI9341 and they will be disabled or downgraded.
- `-DHX8357D=ON`: If you have a HX8357D display, pass this directive.
- `-DSSD1351=ON`: If you have a SSD1351 OLED display, use this.
- `-DST7735R=ON`: If you have a ST7735R display, use this.
- `-DST7789=ON`: If you have a ST7789 display, use this.
- `-DST7789VW=ON`: If you have a ST7789VW display, use this.
- `-DST7735S=ON`: If you have a ST7735S display, use this.
- `-DILI9486=ON`: If you have a ILI9486 display, pass this directive.
- `-DILI9486L=ON`: If you have a ILI9486L display, pass this directive. Note that ILI9486 and ILI9486L are quite different, mutually incompatible controller chips, so be careful here identifying which one you have. (or just try both, should not break if you misidentified)
- `-DILI9488=ON`: If you have a ILI9488 display, pass this directive.
- `-DMPI3501=ON`: If specified, targets a display with MPI3501 display controller.
And additionally, pass the following to customize the GPIO pin assignments you used:
- `-DGPIO_TFT_DATA_CONTROL=number`: Specifies/overrides which GPIO pin to use for the Data/Control (DC) line on the 4-wire SPI communication. This pin number is specified in BCM pin numbers. If you have a 3-wire SPI display that does not have a Data/Control line, **set this value to -1**, i.e. `-DGPIO_TFT_DATA_CONTROL=-1` to tell fbcp-ili9341 to target 3-wire ("9-bit") SPI communication.
- `-DGPIO_TFT_RESET_PIN=number`: Specifies/overrides which GPIO pin to use for the display Reset line. This pin number is specified in BCM pin numbers. If omitted, it is assumed that the display does not have a Reset pin, and is always on.
- `-DGPIO_TFT_BACKLIGHT=number`: Specifies/overrides which GPIO pin to use for the display backlight line. This pin number is specified in BCM pin numbers. If omitted, it is assumed that the display does not have a GPIO-controlled backlight pin, and is always on. If setting this, also see the `#define BACKLIGHT_CONTROL` option in `config.h`.
fbcp-ili9341 always uses the hardware SPI0 port, so the MISO, MOSI, CLK and CE0 pins are always the same and cannot be changed. The MISO pin is actually not used (at the moment at least), so you can just skip connecting that one. If your display is a rogue one that ignores the chip enable line, you can omit connecting that as well, or might also be able to get away by connecting that to ground if you are hard pressed to simplify wiring (depending on the display).
###### Specifying display speed
To get good performance out of the displays, you will drive the displays far out above the rated speed specs (the rated specs yield about ~10fps depending on display). Due to this, you will need to explicitly configure the target speed you want to drive the display at, because due to manufacturing variances each display copy reaches a different maximum speed. There is no "default speed" that fbcp-ili9341 would use. Setting the speed is done via the option
- `-DSPI_BUS_CLOCK_DIVISOR=even_number`: Sets the clock divisor number which along with the Pi [core_freq=](https://www.raspberrypi.org/documentation/configuration/config-txt/overclocking.md) option in `/boot/config.txt` specifies the overall speed that the display SPI communication bus is driven at. `SPI_frequency = core_freq/divisor`. `SPI_BUS_CLOCK_DIVISOR` must be an even number. Default Pi 3B and Zero W `core_freq` is 400MHz, and generally a value `-DSPI_BUS_CLOCK_DIVISOR=6` seems to be the best that a ILI9341 display can do. Try a larger value if the display shows corrupt output, or a smaller value to get higher bandwidth. See [ili9341.h](https://github.com/juj/fbcp-ili9341/blob/master/ili9341.h#L13) and [waveshare35b.h](https://github.com/juj/fbcp-ili9341/blob/master/waveshare35b.h#L10) for data points on tuning the maximum SPI performance. Safe initial value could be something like `-DSPI_BUS_CLOCK_DIVISOR=30`.
###### Specifying the target Pi hardware
There are a couple of options to explicitly say which Pi board you want to target. These should be autodetected for you and generally are not needed, but e.g. if you are cross compiling for another Pi board from another system, or want to be explicit, you can try:
- `-DSINGLE_CORE_BOARD=ON`: Pass this option if you are running on a Pi that has only one hardware thread (Pi Model A, Pi Model B, Compute Module 1, Pi Zero/Zero W). If not present, autodetected.
- `-DARMV6Z=ON`: Pass this option to specifically optimize for ARMv6Z instruction set (Pi 1A, 1A+, 1B, 1B+, Zero, Zero W). If not present, autodetected.
- `-DARMV7A=ON`: Pass this option to specifically optimize for ARMv7-A instruction set (Pi 2B < rev 1.2). If not present, autodetected.
- `-DARMV8A=ON`: Pass this option to specifically optimize for ARMv8-A instruction set (Pi 2B >= rev. 1.2, 3B, 3B+, CM3, CM3 lite or 4B). If not present, autodetected.
###### Specifying other build options
The following build options are general to all displays and Pi boards, they further customize the build:
- `-DBACKLIGHT_CONTROL=ON`: If set, enables fbcp-ili9341 to control the display backlight in the given backlight pin. The display will go to sleep after a period of inactivity on the screen. If not, backlight is not touched.
- `-DDISPLAY_CROPPED_INSTEAD_OF_SCALING=ON`: If set, and source video frame is larger than the SPI display video resolution, the source video is presented on the SPI display by cropping out parts of it in all directions, instead of scaling to fit.
- `-DDISPLAY_BREAK_ASPECT_RATIO_WHEN_SCALING=ON`: When scaling source video to SPI display, scaling is performed by default following aspect ratio, adding letterboxes/pillarboxes as needed. If this is set, the stretching is performed breaking aspect ratio.
- `-DSTATISTICS=number`: Specifies the level of overlay statistics to show on screen. 0: disabled, 1: enabled, 2: enabled, and show frame rate interval graph as well. Default value is 1 (enabled).
- `-DUSE_DMA_TRANSFERS=OFF`: If specified, disables using DMA transfers (at great expense of lost CPU usage). Pass this directive if DMA is giving some issues, e.g. as a troubleshooting step if something is not looking right.
- `-DDMA_TX_CHANNEL=<num>`: Specifies the DMA channel number to use for SPI send commands. Change this if you find a DMA channel conflict.
- `-DDMA_RX_CHANNEL=<num>`: Specifies the DMA channel number to use for SPI receive commands. Change this if you find a DMA channel conflict.
- `-DDISPLAY_SWAP_BGR=ON`: If this option is passed, red and blue color channels are reversed (RGB<->BGR) swap. Some displays have an opposite color panel subpixel layout that the display controller does not automatically account for, so define this if blue and red are mixed up.
- `-DDISPLAY_INVERT_COLORS=ON`: If this option is passed, pixel color value interpretation is reversed (white=0, black=31/63). Default: black=0, white=31/63. Pass this option if the display image looks like a color negative of the actual colors.
- `-DDISPLAY_ROTATE_180_DEGREES=ON`: If set, display is rotated 180 degrees. This does not affect HDMI output, only the SPI display output.
- `-DLOW_BATTERY_PIN=<num>`: Specifies a GPIO pin that can be polled to get the battery state. By default, when this is set, a low battery icon will be displayed if the pin is pulled low (see `config.h` for ways in which this can be tweaked).
In addition to the above CMake directives, there are various defines scattered around the codebase, mostly in [config.h](https://github.com/juj/fbcp-ili9341/blob/master/config.h), that control different runtime options. Edit those directly to further tune the behavior of the program. In particular, after you have finished with the setup, you may want to build with `-DSTATISTICS=0` option in CMake configuration line.
##### Build example
Here is a full example of what to type to build and run, if you have the [Adafruit 2.8" 320x240 TFT w/ Touch screen for Raspberry Pi](https://www.adafruit.com/product/1601) with ILI9341 controller:
```bash
cd ~
sudo apt-get install cmake
git clone https://github.com/juj/fbcp-ili9341.git
cd fbcp-ili9341
mkdir build
cd build
cmake -DSPI_BUS_CLOCK_DIVISOR=6 -DADAFRUIT_ILI9341_PITFT=ON ..
make -j
sudo ./fbcp-ili9341
```
If the above does not work, try specifying `-DSPI_BUS_CLOCK_DIVISOR=8` or `=10` to make the display run a little slower, or try with `-DUSE_DMA_TRANSFERS=OFF` to troubleshoot if DMA might be the issue. If you are using another display controller than ILI9341, using a much higher value, like 30 or 40 may be needed. When changing CMake options, you can reissue the CMake directive line without having to reclone or recreate the `build` directory. However you may need to manually delete file CMakeCache.txt between changing options to avoid CMake remembering old settings.
If you want to do a full rebuild from scratch, you can `rm -rf build` to delete the build directory and recreate it for a clean rebuild from scratch. There is nothing special about the name or location of this directory, it is just my usual convention. You can also do the build in some other directory relative to the fbcp-ili9341 directory if you please.
##### Launching the display driver at startup
To set up the driver to launch at startup, edit the file `/etc/rc.local` in `sudo` mode, and add a line
```bash
sudo /path/to/fbcp-ili9341/build/fbcp-ili9341 &
````
to the end. Make note of the needed ampersand `&` at the end of that line.
For example, if you used the command line steps listed above to build, the file `/etc/rc.local` would receive a line
```bash
sudo /home/pi/fbcp-ili9341/build/fbcp-ili9341 &
````
If the user name of your Raspberry Pi installation is something else than the default `pi`, change the directory accordingly to point to the user's home directory. (Use `pwd` to find out the current directory in terminal)
##### Configuring HDMI and TFT display sizes
If the size of the default HDMI output `/dev/fb0` framebuffer differs from the resolution of the display, the source video size will by default be rescaled to fit to the size of the SPI display. fbcp-ili9341 will manage setting up this rescaling if needed, and it will be done by the GPU, so performance should not be impacted too much. However if the resolutions do not match, small text will probably appear illegible. The resizing will be done in aspect ratio preserving manner, so if the aspect ratios do not match, either horizontal or vertical black borders will appear on the display. If you do not use the HDMI output at all, it is probably best to configure the HDMI output to match the SPI display size so that rescaling will not be needed. This can be done by setting the following lines in `/boot/config.txt`:
```
hdmi_group=2
hdmi_mode=87
hdmi_cvt=320 240 60 1 0 0 0
hdmi_force_hotplug=1
```
If your SPI display has a different resolution than 320x240, change the `320 240` part to e.g. `480 320`.
These lines hint native applications about the default display mode, and let them render to the native resolution of the TFT display. This can however prevent the use of the HDMI connector, if the HDMI connected display does not support such a small resolution. As a compromise, if both HDMI and SPI displays want to be used at the same time, some other compatible resolution such as 640x480 can be used. See [Raspberry Pi HDMI documentation](https://www.raspberrypi.org/documentation/configuration/config-txt/video.md) for the available options to do this.
##### Tuning Performance
The refresh speed of the display is dictated by the clock speed of the SPI bus that the display is connected to. Due to the way the BCM2835 chip on Raspberry Pi works, there does not exist a simple `speed=xxx Mhz` option that could be set to define the bus speed. Instead, the SPI bus speed is derived from two separate parameters: the core frequency of the BCM2835 SoC in general (`core_freq` in `/boot/config.txt`), and the SPI peripheral `CDIV` (Clock DIVider) setting. Together, the resulting SPI bus speed is then calculated with the formula `SPI_speed=core_freq/CDIV`.
To optimize the display to run as fast as possible,
1. Adjust the `CDIV` value by passing the directive `-DSPI_BUS_CLOCK_DIVISOR=number` in CMake command line. Possible values are even numbers `2`, `4`, `6`, `8`, `...`. Note that since `CDIV` appears in the denominator in the formula for `SPI_speed`, smaller values result in higher bus speeds, whereas higher values make the display go slower. Initially when you don't know how fast your display can run, try starting with a safe high setting, such as `-DSPI_BUS_CLOCK_DIVISOR=30`, and work your way to smaller numbers to find the maximum speed the display can cope with. See the table at the end of the README for specific observed maximum bus speeds for different displays.
2. Ensure turbo speed. This is critical for good frame rates. On the Raspberry Pi 3 Model B, the BCM2835 core runs by default at 400MHz (resulting in `400/CDIV` MHz SPI speed) **if** there is enough power provided to the Pi, and if the CPU temperature does not exceed thermal limits. If the CPU is idle, or voltage is low, the BCM2835 core will instead revert to non-turbo 250MHz state, resulting in `250/CDIV` MHz SPI speed. This effect of turbo speed on performance is significant, since 400MHz vs non-turbo 250MHz comes out to +60% of more bandwidth. Getting 60fps in Quake, Sonic or Tyrian often requires this turbo frequency, but e.g. NES and C64 emulated games can often reach 60fps even with the stock 250MHz. If for some reason under-voltage protection is kicking in even when enough power should be fed, you can [force-enable turbo when low voltage is present](https://www.raspberrypi.org/forums/viewtopic.php?f=29&t=82373) by setting the value `avoid_warnings=2` in the file `/boot/config.txt`.
3. Perhaps a bit counterintuitively, **underclock** the core. Setting a **smaller** core frequency than the default turbo 400MHz can enable using a smaller clock divider to get a higher resulting SPI bus speed. For example, if with default `core_freq=400` SPI `CDIV=8` works (resulting in SPI bus speed `400MHz/8=50MHz`), but `CDIV=6` does not (`400MHz/6=66.67MHz` was too much), you can try lowering `core_freq=360` and set `CDIV=6` to get an effective SPI bus speed of `360MHz/6=60MHz`, a middle ground between the two that might perhaps work. Balancing `core_freq=` and `CDIV` options allows one to find the maximum SPI bus speed up to the last few kHz that the display controller can tolerate. One can also try the opposite direction and overclock, but that does then of course have all the issues that come along when overclocking. Underclocking does have the drawback that it makes the Pi run slower overall, so this is certainly a tradeoff.
##### Tuning CPU Usage
On the other hand, it is desirable to control how much CPU time fbcp-ili9341 is allowed to use. The default build settings are tuned to maximize the display refresh rate at the expense of power consumption on Pi 3B. On Pi Zero, the opposite is done, i.e. by default the driver optimizes for battery saving instead of maximal display update speed. The following options can be controlled to balance between these two:
- The main option to control CPU usage vs performance aspect is the option `#define ALL_TASKS_SHOULD_DMA` in `config.h`. Enabling this option will greatly reduce CPU usage. If this option is disabled, SPI bus utilization is maximized but CPU usage can be up to 80%-120%. When this option is enabled, CPU usage is generally up to around 15%-30%. Maximal CPU usage occurs when watching a video, or playing a fast moving game. If nothing is changing on the screen, CPU consumption of the driver should go down very close to 0-5%. By default `#define ALL_TASKS_SHOULD_DMA` is enabled for Pi Zero, but disabled for Pi 3B.
- The CMake option `-DUSE_DMA_TRANSFERS=ON` should always be enabled for good low CPU usage. If DMA transfers are disabled, the driver will run in Polled SPI mode, which generally utilizes a full dedicated single core of CPU time. If DMA transfers are causing issues, try adjusting the DMA send and receive channels to use for SPI communication with `-DDMA_TX_CHANNEL=<num>` and `-DDMA_RX_CHANNEL=<num>` CMake options.
- The statistics overlay prints out quite detailed information about execution state. Disabling the overlay with `-DSTATISTICS=0` option to CMake improves performance and reduces CPU usage. If you want to keep printing statistics, you can try increasing the interval with the `#define STATISTICS_REFRESH_INTERVAL <timeInMicroseconds>` option in config.h.
- Enabling `#define USE_GPU_VSYNC` reduces CPU consumption, but because of https://github.com/raspberrypi/userland/issues/440 can cause stuttering. Disabling `#defined USE_GPU_VSYNC` produces less stuttering, but because of https://github.com/raspberrypi/userland/issues/440, increases CPU power consumption.
- The option `#define SELF_SYNCHRONIZE_TO_GPU_VSYNC_PRODUCED_NEW_FRAMES` can be used in conjunction with `#define USE_GPU_VSYNC` to try to find a middle ground between https://github.com/raspberrypi/userland/issues/440 issues - moderate to little stuttering while not trying to consume too much CPU. Try experimenting with enabling or disabling this setting.
- There are a number of `#define SAVE_BATTERY_BY_x` options in config.h, which all default to being enabled. These should be safe to use always without tradeoffs. If you are experiencing latency or performance related issues, you can try to toggle these to troubleshoot.
- The option `#define DISPLAY_FLIP_ORIENTATION_IN_SOFTWARE` does cause a bit of extra CPU usage, so disabling it will lighten up the CPU load a bit.
- If your SPI display bus is able to run really fast in comparison to the size of the display and the amount of content changing on the screen, you can try enabling `#define UPDATE_FRAMES_IN_SINGLE_RECTANGULAR_DIFF` option in `config.h` to reduce CPU usage at the expense of increasing the number of bytes sent over the bus. This has been observed to have a big effect on Pi Zero, so is worth checking out especially there.
- If the SPI display bus is able to run really really really fast (or you don't care about frame rate, but just about low CPU usage), you can try enabling `#define UPDATE_FRAMES_WITHOUT_DIFFING` option in `config.h` to forgo the adaptive delta diffing option altogether. This will revert to naive full frame updates for absolutely minimum overall CPU usage.
- The option `#define RUN_WITH_REALTIME_THREAD_PRIORITY` can be enabled to make the driver run at realtime process priority. This can lock up the system however, but still made available for advanced experimentation.
- In `display.h` there is an option `#define TARGET_FRAME_RATE <number>`. Setting this to a smaller value, such as 30, will trade refresh rate to reduce CPU consumption.
### About Input Latency
A pleasing aspect of fbcp-ili9341 is that it introduces very little latency overhead: on a 119Hz refreshing ILI9341 display, [fbcp-ili9341 gets pixels as response from GPIO input to screen in well less than 16.66 msecs](https://www.youtube.com/watch?v=EOICdpjiqv8) time. I only have a 120fps recording camera, so can't easily measure delays shorter than that, but rough statistical estimate of slow motion video footage suggests this delay could be as low as 2-3 msecs, dominated by the ~8.4msecs panel refresh rate of the ILI9341.
This does not mean that overall input to display latency in games would be so immediate. Briefly testing a NES emulated game in Retropie suggests a total latency of about 60-80 msecs. This latency is caused by the NES game emulator overhead and extra latency added by Linux, DispmanX and GPU rendering, and [GPU framebuffer snapshotting](https://github.com/raspberrypi/userland/issues/440). (If you ran fbcp-ili9341 as a static library bypassing DispmanX and the GPU stack, directly linking your GPIO input and application logic into fbcp-ili9341, you would be able to get down to this few msecs of overall latency, like shown in the above GPIO input video)
Interestingly, fbcp-ili9341 is about [~33msecs faster than a cheap 3.5" KeDei HDMI display](https://www.youtube.com/watch?v=1yvmvv0KtNs). I do not know if this is a result of the KeDei HDMI display specifically introducing extra latency, or if all HDMI displays connected to the Pi would have similar latency overhead. An interesting question is also how SPI would compare with DPI connected displays on the Pi.
### About Tearing
Unfortunately a limitation of SPI connected displays is that the VSYNC line signal is not available on the display controllers when they are running in SPI mode, so it is not possible to do vsync locked updates even if the SPI bus bandwidth on the display was fast enough. For example, the 4 ILI9341 displays I have can all be run faster than 75MHz so SPI bus bandwidth-wise all of them would be able to update a full frame in less than a vsync interval, but it is not possible to synchronize the updates to vsync since the display controllers do not report it. (If you do know of a display that does actually expose a vsync clock signal even in SPI mode, you can try implementing support to locking on to it)
You can however choose between two distinct types of tearing artifacts: *straight line tearing* and *diagonal tearing*. Whichever looks better is a bit subjective, which is why both options exist. I prefer the straight line tearing artifact, it seems to be less intrusive than the diagonal tearing one. To toggle this, edit the option `#define DISPLAY_FLIP_ORIENTATION_IN_SOFTWARE` in `config.h`. When this option is enabled, fbcp-ili9341 produces straight line tearing, and consumes a tiny few % more CPU power. By default Pi 3B builds with straight line tearing, and Pi Zero with the faster diagonal tearing. Check out the video [Latency and tearing test #2: GPIO input to display latency in fbcp-ili9341 and tearing modes](https://www.youtube.com/watch?v=EOICdpjiqv8) to see in slow motion videos how these two tearing modes look like.
Another option that is known to affect how the tearing artifact looks like is the internal panel refresh rate. For ILI9341 displays this refresh rate can be adjusted in `ili9341.h`, and this can be set to range between `ILI9341_FRAMERATE_61_HZ` and `ILI9341_FRAMERATE_119_HZ` (default). Slower refresh rates produce less tearing, but have higher input-to-display latency, whereas higher refresh rates will result in the opposite. Again visually the resulting effect is a bit subjective.
To get tearing free updates, you should use a DPI display, or a good quality HDMI display. Beware that [cheap small 3.5" HDMI displays such as KeDei do also tear](https://www.youtube.com/watch?v=1yvmvv0KtNs) - that is, even if they are controlled via HDMI, they don't actually seem to implement VSYNC timed internal operation.
### About Smoothness
Having no vsync is not all bad though, since with the lack of vsync, SPI displays have the opportunity to obtain smoother animation on content that is not updating at 60Hz. It is possible that content on the SPI display will stutter even less than what DPI or HDMI displays on the Pi can currently provide (although I have not been able to test this in detail, except for the KeDei case above).
The main option that affects smoothness of display updates is the `#define USE_GPU_VSYNC` line in `config.h`. If this is enabled, then the internal Pi GPU HDMI vsync clock is used to drive frames onto the display. The Pi GPU clock runs at a fixed rate that is independent of the content. This rate can be discovered by running `tvservice -s` on the Pi console, and is usually 59Hz or 60Hz. If your application renders at this rate, animation will look smooth, but if not, there will be stuttering. For example playing a PAL NES game that updates at 50Hz with HDMI clock set at 60Hz will cause bad microstuttering in video output if `#define USE_GPU_VSYNC` is enabled.
If `USE_GPU_VSYNC` is disabled, then a busy spinning GPU frame snapshotting thread is used to drive the updates. This will produce smoother animation in content that does not maintain a fixed 60Hz rate. Especially in OpenTyrian, a game that renders at a fixed 36fps and has slowly scrolling scenery, the stuttering caused by `USE_GPU_VSYNC` is particularly visible. Running on Pi 3B without `USE_GPU_VSYNC` enabled produces visually smoother looking scrolling on an Adafruit 2.8" ILI9341 PiTFT set to update at 119Hz, compared to enabling `USE_GPU_VSYNC` on the same setup. Without `USE_GPU_VSYNC`, the dedicated frame polling loop thread "finds" the 36Hz update rate of the game, and then pushes pixels to the display at this exact rate. This works nicely since SPI displays disregard vsync - the result is that frames are pushed out to the SPI display immediately as they become available, instead of pulling them at a fixed 60Hz rate like HDMI does.
A drawback is that this kind of polling consumes more CPU time than the vsync option. The extra overhead is around +34% of CPU usage compared to the vsync method. It also requires using a background thread, and because of this, it is not feasible to be used on a single core Pi Zero. [If this polling was unnecessary](https://github.com/raspberrypi/userland/issues/440), this mode would also work on a Pi Zero, and without the added +34% CPU overhead on Pi 3B. See the Known Issues section below for more details.
![PiTFT display](/framerate_smoothness.jpg "Smoothness statistics")
There are two other main options that affect frame delivery timings, `#define SELF_SYNCHRONIZE_TO_GPU_VSYNC_PRODUCED_NEW_FRAMES` and `#define SAVE_BATTERY_BY_PREDICTING_FRAME_ARRIVAL_TIMES`. Check out the video [fbcp-ili9341 frame delivery smoothness test on Pi 3B and Adafruit ILI9341 at 119Hz](https://youtu.be/IqzKT33Rwjc) for a detailed side by side comparison of these different modes. The conclusions drawn from the four tested scenarios in the video are:
**1. vc_dispmanx_vsync_callback() (top left)**, set `#define USE_GPU_VSYNC` and unset `#define SELF_SYNCHRONIZE_TO_GPU_VSYNC_PRODUCED_NEW_FRAMES`:
This mode uses the DispmanX HDMI vsync signal callback to drive frames to the display.
Pros:
- least CPU overhead if content runs at 60Hz
- works on Pi Zero
Cons:
- animation stutters badly on content that is < 60Hz but also on 60Hz content
- excessive +1 vsync interval input to display latency
- wastes CPU overhead if content runs at less than 60Hz
**2. vc_dispmanx_vsync_callback() + self synchronization (top right)**, set `#define USE_GPU_VSYNC` and `#define SELF_SYNCHRONIZE_TO_GPU_VSYNC_PRODUCED_NEW_FRAMES`:
This mode uses the GPU vsync signal, but also aims to find and synchronize to the edge trigger when content is producing frames. This is the default build mode on Pi Zero.
Pros:
- works on Pi Zero
- reduced input to display latency compared to previous mode
- content that runs at 60hz stutters less
Cons:
- content that runs < 60 Hz still stutters badly
- wastes CPU overhead if content runs at less than 60Hz
- consumes slightly extra CPU compared to previous method
**3. gpu polling thread + sleep heuristic (bottom left)**, unset `#define USE_GPU_VSYNC` and set `#define SAVE_BATTERY_BY_PREDICTING_FRAME_ARRIVAL_TIMES`:
This mode runs a dedicated background thread that drives frames from the GPU to the SPI display. This is the default build mode on Pi 3B.
Pros:
- smooth animation at all content frame rates
- low input to display latency
Cons:
- uses excessive CPU time, around +34% more CPU than the vsync signal based approach
- uses excessive GPU time, the VideoCore GPU will be downscaling and snapshotting redundant frames
- when content changes frame rate, has difficulties to adjust quickly - takes a bit of time to ramp to the new frame rate
- requires a continuously running background thread, not feasible on Pi Zero
**4. gpu polling thread without sleeping (bottom right)**, unset `#define USE_GPU_VSYNC` and unset `#define SAVE_BATTERY_BY_PREDICTING_FRAME_ARRIVAL_TIMES`:
This mode runs the dedicated GPU thread as fast as possible, without attempting to sleep CPU.
Pros:
- smoothest animation at all content frame rates
- lowest input to display latency
- adapts instantaneously to variable frame rate content
Cons:
- uses ridiculously much CPU overhead, a full 100% core
- uses ridiculously much GPU overhead, the VideoCore GPU will be very busy downscaling and snapshotting redundant frames
- requires a continuously running background thread, not feasible on Pi Zero
### Known Issues
Be aware of the following limitations:
###### No rendered frame delivery via events from VideoCore IV GPU
- The codebase captures screen framebuffers by snapshotting via the VideoCore `vc_dispmanx_snapshot()` API, and the obtained pixels are then routed on to the SPI-based display. This kind of polling is performed, since there does not exist an event-based mechanism to get new frames from the GPU as they are produced. The result is inefficient and can easily cause stuttering, since different applications produce frames at different paces. **Ideally the code would ask the VideoCore API to receive finished frames in callback notifications immediately after they are rendered**, but this kind of functionality does not exist in the current GPU driver stack. In the absence of such event delivery mechanism, the code has to resort to polling snapshots of the display framebuffer using carefully timed heuristics to balance between keeping latency and stuttering low, while not causing excessive power consumption. These heuristics keep continuously guessing the update rate of the animation on screen, and they have been tuned to ensure that CPU usage goes down to 0% when there is no detected activity on screen, but it is certainly not perfect. This GPU limitation is discussed at https://github.com/raspberrypi/userland/issues/440. If you'd like to see fbcp-ili9341 operation reduce latency, stuttering and power consumption, please throw a (kind!) comment or a thumbs up emoji in that bug thread to share that you care about this, and perhaps Raspberry Pi engineers might pick the improvement up on the development roadmap. If this issue is resolved, all of the `#define USE_GPU_VSYNC`, `#define SAVE_BATTERY_BY_PREDICTING_FRAME_ARRIVAL_TIMES` and `#define SELF_SYNCHRONIZE_TO_GPU_VSYNC_PRODUCED_NEW_FRAMES` hacks from the previous section could be deleted from the driver, hopefully leading to a best of all worlds scenario without drawbacks.
###### Screen resize freezes DispmanX
- Currently if one resizes the video frame size at runtime, this causes DispmanX API to go sideways. See https://github.com/raspberrypi/userland/issues/461 for more information. Best workaround is to set the desired screen resolution in `/boot/config.txt` and configure all applications to never change that at runtime.
###### CPU Turbo is needed for good SPI bus bandwidth
- The speed of the SPI bus is linked to the BCM2835 core frequency. This frequency is at 250MHz by default (on e.g. Pi Zero, 3B and 3B+), and under CPU load, the core turbos up to 400MHz. This turboing directly scales up the SPI bus speed by `400/250=+60%` as well. Therefore when choosing the SPI `CDIV` value to use, one has to pick one that works for both idle and turbo clock speeds. Conversely, the BCM core reverts to non-turbo speed when there is only light CPU load active, and this slows down the display, so if an application is graphically intensive but light on CPU, the SPI display bus does not get a chance to run at maximum speeds. A way to work around this is to force the BCM core to always stay in its turbo state with `force_turbo=1` option in `/boot/config.txt`, but this has an unfortunate effect of causing the ARM CPU to always run in turbo speed as well, consuming excessive amounts of power. At the time of writing, there does not yet exist a good solution to have both power saving and good performance. This limitation is being discussed in more detail at https://github.com/raspberrypi/firmware/issues/992.
###### Raspbian + 32-bit only(?)
- At the moment fbcp-ili9341 is only likely to work on 32-bit OSes, on Raspbian/Ubuntu/Debian family of distributions, where Broadcom and DispmanX libraries are available. 64-bit operating systems do not currently work (see [issue #43](https://github.com/juj/fbcp-ili9341/issues/43)). It should be possible to port the driver to 64-bit and other OSes, though the amount of work has not been explored.
For more known issues and limitations, check out the [bug tracker](https://github.com/juj/fbcp-ili9341/issues), especially the entries marked *retired*, for items that are beyond current scope.
### Statistics Overlay
By default fbcp-ili9341 builds with a statistics overlay enabled. See the video [fbcp-ili9341 ported to ILI9486 WaveShare 3.5" (B) SpotPear 320x480 SPI display](https://www.youtube.com/watch?v=dqOLIHOjLq4) to find details on what each field means. Build with CMake option `-DSTATISTICS=0` to disable displaying the statistics. You can also try building with CMake option `-DSTATISTICS=2` to show a more detailed frame delivery timings histogram view, see screenshot and video above.
### FAQ and Troubleshooting
#### Why is the project named fbcp-ili9341?
The `fbcp` part in the name means *framebuffer copy*; specifically for the ILI9341 controller. fbcp-ili9341 is not actually a framebuffer copying driver, it does not create a secondary framebuffer that it would copy bytes across to from the primary framebuffer. It is also no longer a driver only for the ILI9341 controller. A more appropriate name might be *userland-raspi-spi-display-driver* or something like that, but the original name stuck.
#### Does fbcp-ili9341 work on Pi Zero?
Yes, it does, although not quite as well as on Pi 3B. If you'd like it to run better on a Pi Zero, leave a thumbs up at https://github.com/raspberrypi/userland/issues/440 - hard problems are difficult to justify prioritizing unless it is known that many people care about them.
#### The driver works well, but image is upside down. How do I rotate the display?
Enable the option `#define DISPLAY_ROTATE_180_DEGREES` in `config.h`. This should rotate the SPI display to show up the other way around, while keeping the HDMI connected display orientation unchanged. Another option is to utilize a `/boot/config.txt` option [display_rotate=2](https://www.raspberrypi.org/forums/viewtopic.php?t=120793), which rotates both the SPI output and the HDMI output.
#### How exactly do I edit the build options to e.g. remove the statistics lines or change some other option?
Edit the file `config.h` in a text editor (a command line one such as `pico`, `vim`, `nano`, or SSH map the drive to your host), and find the appropriate line in the file. Add comment lines `//` in front of that text to disable the option, or remove the `//` characters to enable it.
After having edited and saved the file, reissue `make -j` in the build directory and restart fbcp-ili9341.
Some options are passed to the build from the CMake configuration script. You can run with `make VERBOSE=1` to see which configuration items the CMake build is passing. See the above *Configuring Build Options* section to customize the CMake configure items. For example, to remove the statistics overlay, pass `-DSTATISTICS=0` directive to CMake.
#### bash: cmake: command not found!
Building requires CMake to be installed on the Pi: try `sudo apt-get install cmake`.
#### When I change a CMake option on the command line, it does not seem to apply
Try deleting CMakeCache.txt between changing CMake settings.
#### Does fbcp-ili9341 work with linux command line terminal or X windowing system?
Yes, both work fine. For linux command line terminal, the `/dev/tty1` console should be set to output to Linux framebuffer 0 (`/dev/fb0`). This is the default mode of operation and there do not exist other framebuffers in a default distribution of Raspbian, but if you have manually messed with the `con2fbmap` command in your installation, you may have inadvertently changed this configuration. Run `con2fbmap 1` to see which framebuffer the `/dev/tty1` console is outputting to, it should print `console 1 is mapped to framebuffer 0`. Type `con2fbmap 1 0` to reset console 1 back to outputting to framebuffer 0.
Likewise, the X windowing system should be configured to render to framebuffer 0. This is by default the case. The target framebuffer for X windowing service is usually configured via the `FRAMEBUFFER` environment variable before launching X. If X is not working by default, you can try overriding the framebuffer by launching X with `FRAMEBUFFER=/dev/fb0 startx` instead of just running `startx`.
#### Does fbcp-ili9341 work on Raspberry Pi 1 or Pi 2?
I don't know, I don't currently have any to test. Perhaps the code does need some model specific configuration, or perhaps it might work out of the box. I only have Pi 3B, Pi 3B+, Pi Zero W and a Pi 3 Compute Module based systems to experiment on. Pi 2 B has been reported to work by users ([#17](https://github.com/juj/fbcp-ili9341/issues/17)).
#### Does fbcp-ili9341 work on display XYZ?
If the display controller is one of the currently tested ones (see the list above), and it is wired up to run using 4-line SPI, then it should work. Pay attention to configure the `Data/Control` GPIO pin number correctly, and also specify the `Reset` GPIO pin number if the device has one.
If the display controller is not one of the tested ones, it may still work if it is similar to one of the existing ones. For example, ILI9340 and ILI9341 are practically the same controller. You can just try with a specific one to see how it goes.
If fbcp-ili9341 does not support your display controller, you will have to write support for it. fbcp-ili9341 does not have a "generic SPI TFT driver routine" that might work across multiple devices, but needs specific code for each. If you have the spec sheet available, you can ask for advice, but please do not request to add support to a display controller "blind", that is not possible.
#### Does fbcp-ili9341 work with 3-wire SPI displays?
Perhaps. This is a more recent experimental feature that may not be as stable, and there are some limitations, but 3-wire ("9-bit") SPI display support is now available. If you have a 3-wire SPI display, i.e. one that does not have a Data/Control (DC) GPIO pin to connect, configure it via CMake with directive `-DGPIO_TFT_DATA_CONTROL=-1` to tell fbcp-ili9341 that it should be driving the display with 3-wire protocol.
Current limitations of 3-wire communication are:
- The performance option `ALL_TASKS_SHOULD_DMA` is currently not supported, there is an issue with DMA chaining that prevents this from being enabled. As result, CPU usage on 3-wire displays will be slightly higher than on 4-wire displays.
- The performance option `OFFLOAD_PIXEL_COPY_TO_DMA_CPP` is currently not supported. As a result, 3-wire displays may not work that well on single core Pis like Pi Zero.
- This has only been tested on my Adafruit SSD1351 128x96 RGB OLED display, which can be soldered to operate in 3-wire SPI mode, so testing has not been particularly extensive.
- Displays that have a 16-bit wide command word, such as ILI9486, do not currently work in 3-wire ("17-bit") mode. (But ILI9486L has 8-bit command word, so that does work)
#### Does fbcp-ili9341 work with I2C, DPI, MIPI DSI or USB connected displays?
No. Those are completely different technologies altogether. It should be possible to port the driver algorithm to work on I2C however, if someone is interested.
#### Does fbcp-ili9341 work with touch displays?
At the moment one cannot utilize the XPT2046/ADS7846 touch controllers while running fbcp-ili9341, so touch is mutually incompatible with this driver. In order for fbcp-ili9341 to function, you will need to remove all `dtoverlay`s in `/boot/config.txt` related to touch.
#### Is it possible to break my display with this driver if I misconfigure something?
I have done close to everything possible to my displays - cut power in middle of operation, sent random data and command bytes, set their operating voltage commands and clock timings to arbitrary high and low values, tested unspecified and reserved command fields, and driven the displays dozens of MHz faster than they managed to keep up with, and I have not yet done permanent damage to any of my displays or Pis.
Easiest way to do permanent damage is to fail at wiring, e.g. drive 5 volts if your display requires 3.3v, or short a connection, or something similar.
The one thing that fbcp-ili9341 stays clear off is that it does not program the non-volatile memory areas of any of the displays. Therefore a hard power off on a display should clear all performed initialization and reset the display to its initial state at next power on.
That being said, if it breaks, you'll get to purchase a new shiny one to replace it.
#### Can I have both the HDMI and SPI connected at the same time?
Yes, fbcp-ili9341 shows the output of the HDMI display on the SPI screen, and both can be attached at the same time. A HDMI display does not have to be connected however, although fbcp-ili9341 operation will still be affected by whatever HDMI display mode is configured. Check out `tvservice -s` on the command line to check what the current DispmanX HDMI output mode is.
#### Do I have to show the same image on HDMI output and the SPI display, or can they be different?
At the moment fbcp-ili9341 has been developed to only display the contents of the main DispmanX GPU framebuffer over to the SPI display. That is, the SPI display will show the same picture as the HDMI output does. There is no technical restriction that requires this though, so if you know C/C++ well, it should be a manageable project to turn fbcp-ili9341 to operate as an offscreen display library to show a completely separate (non-GPU-accelerated) image than what the main HDMI display outputs. For example you could have two different outputs, e.g. a HUD overlay, a dashboard for network statistics, weather, temps, etc. showing on the SPI while having the main Raspberry Pi desktop on the HDMI.
In this kind of mode, you would probably strip the DispmanX bits out of fbcp-ili9341, and recast it as a static library that you would link to in your drawing application, and instead of snapshotting frames, you can then programmatically write to a framebuffer in memory from your C/C++ code.
#### I am running fbcp-ili9341 on a display that was listed above, but the display stays white after startup?
Unfortunately there are a number of things to go wrong that all result in a white screen. This is probably the hardest part to diagnose. Some ideas:
- double check the wiring,
- double check that the display controller is really what you expected. Trying to drive with the display with wrong initialization code usually results in the display not reacting, and the screen stays white,
- shut down and physically power off the Pi and the display in between multiple tests. Driving a display with a wrong initialization routine may put it in a bad state that needs a physical power off for it to reset,
- if there is a reset pin on the display, make sure to pass it in CMake line. Or alternatively, try driving fbcp-ili9341 without specifying the reset pin,
- make sure the display is configured to run 4-wire SPI mode, and not in parallel mode or 3-wire SPI mode. You may need to solder or desolder some connections or set a jumper to configure the specific driving mode. Support for 3-wire SPI displays does exist, but it is more limited and a bit experimental.
#### The display stays blank at boot without lighting up
This suggests that the power line or the backlight line might not be properly connected. Or if the backlight connects to a GPIO pin on the Pi (and not a voltage pin), then it may be that the pin is not in correct state for the backlight to turn on. Most of the LCD TFT displays I have immediately light up their backlight when they receive power. The Tontec one has a backlight GPIO pin that boots up high but must be pulled low to activate the backlight. OLED displays on the other hand seem to stay all black even after they do get power, while waiting for their initialization to be performed, so for OLEDs it may be normal for nothing to show up on the screen immediately after boot.
If the backlight connects to a GPIO pin, you may need to define `-DGPIO_TFT_BACKLIGHT=<pin>` in CMake command line or `config.h`, and edit `config.h` to enable `#define BACKLIGHT_CONTROL`.
#### The display clears from white to black after starting fbcp-ili9341, but picture does not show up?
fbcp-ili9341 runs a clear screen command at low speed as first thing after init, so if that goes through, it is a good sign. Try increasing `-DSPI_BUS_CLOCK_DIVISOR=` CMake option to a higher number to see if the display driving rate was too fast. Or try disabling DMA with `-DUSE_DMA_TRANSFERS=OFF` to see if this might be a DMA conflict.
#### Image does show up on display, but it freezes shortly afterwards
This suggests same as above, increase SPI bus divisor or troubleshoot disabling DMA. If DMA is detected to be the culprit, try changing up the DMA channels. Double check that `/boot/config.txt` does not have any `dtoverlay`s regarding other SPI display drivers or touch screen controllers, and that it does **NOT** have a `dtparam=spi=on` line in it - fbcp-ili9341 does not use the Linux kernel SPI driver.
Make sure other `fbcp` programs are not running, or that another copy of `fbcp-ili9341` is not running on the background.
#### Image does show up on display, but when I start up program XYZ, the image freezes
This is likely caused by the program resizing the video resolution at runtime, which breaks DispmanX. See https://github.com/raspberrypi/userland/issues/461 for more details.
#### The display works for some seconds or minutes, but then turns all white or black, or freezes
Check that the Pi is powered off of a power supply that can keep up with the voltage, and the low voltage icon is not showing up. (remove any `avoid_warnings=1/2` directive from `/boot/config.txt` if that was used to get rid of warnings overlay, to check that voltage is good) It has been observed that if there is not enough power supplied, the display can be the first to starve, while the Pi might keep on running fine. Try removing turbo settings or lowering the clock speed if you have overclocked to verify that the display crash is not power usage related.
Also try lowering SPI bus speed to a safe lower value, e.g. half of the maximum speed that the display was able to manage.
#### The driver is updating pixels on the display, but it looks all garbled
Double check the Data/Command (D/C) GPIO pin physically, and in CMake command line. Whenever fbcp-ili9341 refers to pin numbers, they are always specified in BCM pin numbers. Try setting a higher `-DSPI_BUS_CLOCK_DIVISOR=` value to CMake. Make sure no other `fbcp` programs or SPI drivers or dtoverlays are enabled.
#### Colors look wrong on the display
![BGR vs RGB and inverted colors](/wrong_colors.jpg)
If the color channels are mixed (red is blue, blue is red, green is green) like shown on the left image, pass the CMake option `-DDISPLAY_SWAP_BGR=ON` to the build.
If the color intensities look wrong (white is black, black is white, color looks like a negative image) like seen in the middle image, pass the CMake option `-DDISPLAY_INVERT_COLORS=ON` to the build.
If the colors looks off in some other fashion, it is possible that the display is just being driven at a too high SPI bus speed, in which case try making the display run slower by choosing a higher `-DSPI_BUS_CLOCK_DIVISOR=` option to CMake. Especially on ILI9486 displays it has been observed that the colors on the display can become distorted if the display is run too fast beyond its maximum capability.
#### Failed to allocate GPU memory!
fbcp-ili9341 needs a few megabytes of GPU memory to function if DMA transfers are enabled. The [gpu_mem](https://www.raspberrypi.org/documentation/configuration/config-txt/memory.md) boot config option dictates how much of the Pi's memory area is allocated to the GPU. By default this is 64MB, which has been observed to not leave enough memory for fbcp-ili9341 if HDMI is run at 1080p. If this error happens, try increasing GPU memory to e.g. 128MB by adding a line `gpu_mem=128` in `/boot/config.txt`.
#### It does not build, or crashes, or something is obviously out of date
As the number of supported displays, Raspberry Pi device models, Raspbian/Retropie/Lakka OS versions, accompanied C++ compiler versions and fbcp-ili9341 build options have grown in number, there is a combinatorial explosion of all possible build modes that one can put the codebase through, so it is not easy to keep every possible combo tested all the time. Something may have regressed or gotten outdated. Stay calm, and report a bug.
You can also try looking through the commit history to find changes related to your configuration combo, to see if there's a mention of a known good commit in time that should work for your case. If you get an odd compiler error on `cmake` or `make` lines, those will usually be very easy to fix, as they are most of the time a result of some configurational oversight.
#### Which SPI display should I buy to make sure it works best with fbcp-ili9341?
First, make sure the display is a 4-wire SPI and not a 3-wire one. A display is 4-wire SPI if it has a Data/Control (DC) GPIO line that needs connecting. Sometimes the D/C pin is labeled RS (Register Select). Support for 3-wire SPI displays does exist, but it is experimental and not nearly as well tested as 4-wire displays.
Second is the consideration about display speed. Below is a performance chart of the different displays I have tested. Note that these are sample sizes of one, I don't know how much sample variance there exists. Also I don't know if it is likely that there exists big differences between displays with same controller from different manufacturers. At least the different ILI9341 displays that I have are all quite consistent on performance, whether they are from Adafruit or WaveShare or from BuyDisplay.com.
| Vendor | Size | Resolution | Controller | Rated SPI Bus Speed | Obtained Bus Speed | Frame Rate |
| ------ | ---- | ---------- | ---------- | ------------------- | ------------------ | -----------|
| [Adafruit PiTFT](https://www.adafruit.com/product/1601) | 2.8" | 240x320 | ILI9341 | 10MHz | 294MHz/4=73.50MHz | 59.81 fps |
| [Adafruit PiTFT](https://www.adafruit.com/product/2315) | 2.2" | 240x320 | ILI9340 | 15.15MHz | 338MHz/4=84.50MHz | 68.76 fps |
| [Adafruit PiTFT](https://www.adafruit.com/product/2097) | 3.5" | 320x480 | HX8357D | 15.15MHz | 314MHz/6=52.33MHz | 21.29 fps |
| [Adafruit OLED](https://www.adafruit.com/product/1673) | 1.27" | 128x96 | SSD1351 | 20MHz | 360MHz/20=18.00MHz | 91.55 fps |
| [Waveshare RPi LCD (B) IPS](https://www.amazon.co.uk/dp/B01N48NOXI/ref=pe_3187911_185740111_TE_item) | 3.5" | 320x480 | ILI9486 | 15.15MHz | 255MHz/8=31.88MHz | 12.97 fps |
| [maithoga TFT LCD](https://www.aliexpress.com/item/3-5-inch-8P-SPI-TFT-LCD-Color-Screen-Module-ILI9486-Drive-IC-320-480-RGB/32828284227.html) | 3.5" | 320x480 | ILI9486L | 15.15MHz | 400MHz/8=50.00MHz | 13.56 fps* |
| [BuyDisplay.com SPI TFT](https://www.buydisplay.com/default/serial-spi-3-2-inch-tft-lcd-module-display-ili9341-power-than-sainsmart) copy #1 | 3.2" | 240x320 | ILI9341 | 10MHz | 310MHz/4=77.50MHz | 63.07 fps |
| [BuyDisplay.com SPI TFT](https://www.buydisplay.com/default/serial-spi-3-2-inch-tft-lcd-module-display-ili9341-power-than-sainsmart) copy #2 | 3.2" | 240x320 | ILI9341 | 10MHz | 300MHz/4=75.00MHz | 61.03 fps |
| [Arduino A000096 LCD](https://store.arduino.cc/arduino-lcd-screen) | 1.77" | 128x160 | ST7735R | 15.15MHz | 355MHz/6=59.16MHz | 180.56 fps |
| [Tontec MZ61581-PI-EXT 2016.1.28](https://www.ebay.com/p/Tontec-3-5-Inches-Touch-Screen-for-Raspberry-Pi-Display-TFT-Monitor-480x320-LCD/1649448059) | 3.5" | 320x480 | MZ61581 | 128MHz | 280MHz/2=140.00MHz | 56.97 fps |
| [Adafruit 240x240 Wide Angle TFT](https://www.adafruit.com/product/3787) | 1.54" | 240x240 | ST7789 | ? | 340MHz/4=85.00MHz | 92.23 fps |
| [WaveShare 240x240 Display HAT](https://www.waveshare.com/1.3inch-lcd-hat.htm) | 1.3" | 240x240 | ST7789VW | 62.5MHz | 338MHz/4=84.50MHz | 91.69 fps |
| [WaveShare 128x128 Display HAT](https://www.waveshare.com/1.44inch-lcd-hat.htm) | 1.44" | 128x128 | ST7735S | 15.15MHz | (untested) | (untested) |
| [KeDei v6.3](https://github.com/juj/fbcp-ili9341/issues/40) | 3.5" | 320x480 | MPI3501 | ? | 400MHz/12=33.333MHz | 4.8fps ** |
In this list, *Rated SPI Bus Speed* is the maximum clock speed that the display controller is rated to run at. The *Obtained Bus Speed* column lists the fastest SPI bus speed that was achieved in practice, and the `core_freq` BCM Core speed and SPI Clock Divider `CDIV` setting that was used to achieve that rate. Note how most display controllers can generally be driven much faster than what they are officially rated at in their spec sheets.
The *Frame Rate* column shows the worst case frame rate when full screen updates are being performed. This occurs for example when watching fullscreen video (that is not a flat colored cartoon). Because fbcp-ili9341 only sends over the pixels that have changed, displays such as HX8357D and ILI9486 can still be used to play many games at 60fps. Retro games work especially well.
All the ILI9341 displays work nice and super fast at ~70-80MHz. My WaveShare 3.5" 320x480 ILI9486 display runs really slow compared to its pixel resolution, ~32MHz only. See [fbcp-ili9341 ported to ILI9486 WaveShare 3.5" (B) SpotPear 320x480 SPI display](https://www.youtube.com/watch?v=dqOLIHOjLq4) for a video of this display in action. Adafruit's 320x480 3.5" HX8357D PiTFTs is ~64% faster in comparison.
The ILI9486L controller based maithoga display runs a bit faster than ILI9486 WaveShare, 50MHz versus 31.88MHz, i.e. +56.8% bandwidth increase. However fps-wise maithoga reaches only 13.56 vs WaveShare 12.97 fps, because the bandwidth advantage is fully lost in pixel format differences: ILI9486L requires transmitting 24 bits per each pixel (R6G6B6 mode), whereas ILI9486 supports 16 bits per pixel R5G6B5 mode. This is reflected in the above chart refresh rate for the maithoga display (marked with a star).
If manufacturing variances turn out not to be high between copies, and you'd like to have a bigger 320x480 display instead of a 240x320 one, then it is recommended to avoid ILI9486, they indeed are slow.
The KeDei v6.3 display with MPI3501 controller takes the crown of being horrible, in all aspects imaginable. It is able to run at 33.33 MHz, but due to technical design limitations of the display (see [#40](https://github.com/juj/fbcp-ili9341/issues/40#issuecomment-441480557)), effective bus speed is halved, and only about 72% utilization of the remaining bus rate is achieved. DMA cannot be used, so CPU usage will be off the charts. Even though fbcp-ili9341 supports this display, level of support is expected to be poor, because the hardware design is a closed secret without open documentation publicly available from the manufacturer. Stay clear of KeDei or MPI3501 displays.
The Tontec MZ61581 controller based 320x480 3.5" display on the other hand can be driven insanely fast at up to 140MHz! These seem to be quite hard to come by though and they are expensive. Tontec seems to have gone out of business and for example the domain itontec.com from which the supplied instructions sheet asks to download original drivers from is no longer registered. I was able to find one from eBay for testing.
Search around, or ask the manufacturer of the display what the maximum SPI bus speed is for the device. This is the most important aspect to getting good frame rates, but unfortunately most web links never state the SPI speed rating, or they state it ridiculously low like in the spec sheets. Try and buy to see, or ask in some community forums from people who already have a particular display to find out what SPI bus speed it can achieve.
One might think that since Pi Zero is slower than a Pi 3, the SPI bus speed might not matter as much when running on a Pi Zero, but the effect is rather the opposite. To get good framerates on a Pi Zero, it should be paired with a display with as high SPI bus speed capability as possible. This is because the higher the SPI bus speed is, the more autonomously a DMA controller can drive it without CPU intervention. For the same reason, the interlacing technique does not (currently at least) perform well on a Pi Zero, so it is disabled there by default. ILI9341s run well on Pi Zero, ILI9486 on the other hand is quite difficult to combine with a Pi Zero.
Ultimately, it should be noted that parallel displays (DPI) are the proper method for getting fast framerates easily. SPI displays should only be preferred if display form factor is important and a desired product might only exist as SPI and not as DPI, or the number of GPIO pins that are available on the Pi is scarce that sacrificing dozens of pins to RGB data is not feasible.
#### What other options/alternatives do I have to fbcp-ili9341?
Hardware-wise, there are six different ways to connect displays to the Pi. Here are the pros and cons of each:
1. [Composite video]([https://en.wikipedia.org/wiki/Composite_video](https://en.wikipedia.org/wiki/Composite_video))
- +simple one-wire connectivity
- +the Pi GPU drives the signal on its own without CPU assistance, no driver needed
- +has vsync, no tearing artifacts
- +available for a cheap price
- -low quality analog signal that is blurry and has color artifacts
2. [I²C (Inter-Integrated Circuit)]([https://en.wikipedia.org/wiki/I%C2%B2C](https://en.wikipedia.org/wiki/I%C2%B2C))
- +fewest amount of digital signals (two): SDA (data) and SCL (clock)
- +available for a cheap price
- -slowest bandwidth, generally only the smallest displays with low resolution utilize this
- -need software CPU cycles to push pixels to display
- -no video vsync, causes tearing artifacts
3. [SPI (Serial Peripheral Interface)]([https://en.wikipedia.org/wiki/Serial_Peripheral_Interface](https://en.wikipedia.org/wiki/Serial_Peripheral_Interface))
- the method used/supported by this driver
- +only few digital signal lines needed: SCLK (clock), MOSI (data), D/C (data/command) (MISO line is not read by fbcp-ili9341), CS (Chip Select) (sometimes optional)
- +much faster than I²C
- +very low video display latency
- +available for a cheap price
- -no single pin or protocol standard, be careful about hardware compatibility
- -need software CPU cycles to push pixels to display
- -no video vsync, causes tearing artifacts
- -low resolution, generally 480x320 or smaller
4. [DPI (Display Parallel Interface)]([https://www.raspberrypi.org/documentation/hardware/raspberrypi/dpi/README.md](https://www.raspberrypi.org/documentation/hardware/raspberrypi/dpi/README.md))
- +high quality digital signal driven directly by the Pi GPU without CPU assistance
- +fixed 60hz updates without missed frames
- +has vsync, no tearing artifacts
- -no single pin or protocol standard, be careful about hardware compatibility
- -consumes most of the pins on the Pi GPIO header (20-28 digital signal pins)
- -no ability to disable vsync, likely more video latency than SPI
5. [MIPI-DSI (Display Serial Interface)]([https://en.wikipedia.org/wiki/Display_Serial_Interface](https://en.wikipedia.org/wiki/Display_Serial_Interface))
- +high quality digital signal driven directly by the Pi GPU without CPU assistance
- +fixed 60hz updates without missed frames
- +has vsync, no tearing artifacts
- +does not require GPIO pins, leaving them free for other use
- +available in high resolution
- [-uses proprietary DSI connectivity on the Pi, not an open ecosystem]([https://www.raspberrypi.org/forums/viewtopic.php?t=153954](https://www.raspberrypi.org/forums/viewtopic.php?t=153954))
- [-only one official display exists]([https://www.raspberrypi.org/documentation/hardware/display/](https://www.raspberrypi.org/documentation/hardware/display/))
6. [HDMI]([https://en.wikipedia.org/wiki/HDMI](https://en.wikipedia.org/wiki/HDMI))
- +high quality digital signal driven directly by the Pi GPU without CPU assistance
- +fixed 60hz updates without missed frames
- +has vsync, no tearing artifacts
- +does not require GPIO pins, leaving them free for other use
- +very standard, little configuration needed in /boot/config.txt
- +available in high resolution
- -bulky connector for most portable designs
Displays are generally manufactured to utilize one specific interfacing method, with the exception that some displays have a both I²C and SPI modes that can be configured via soldering.
Fbcp-ili9341 driver is about interfacing with SPI displays. If your display utilizes some other connection mechanism, fbcp-ili9341 will not apply.
Software-wise, there are two possible alternatives to fbcp-ili9341:
1. [notro/fbtft](https://github.com/notro/fbtft) + [tasanakorn/rpi-fbcp](https://github.com/tasanakorn/rpi-fbcp)
2. Use an ad hoc drawing library that provides both drawing primitives plus the display interface, e.g. [adafruit/Adafruit_Python_ILI9341](https://github.com/adafruit/Adafruit_Python_ILI9341).
### Resources
The following links proved helpful when writing this:
- [ARM BCM2835 Peripherals Manual PDF](https://www.raspberrypi.org/app/uploads/2012/02/BCM2835-ARM-Peripherals.pdf),
- [ILI9341 Display Controller Manual PDF](https://cdn-shop.adafruit.com/datasheets/ILI9341.pdf),
- [notro/fbtft](https://github.com/notro/fbtft): Linux Framebuffer drivers for small TFT LCD display modules,
- [BCM2835 driver](http://www.airspayce.com/mikem/bcm2835/) for Raspberry Pi,
- [tasanakorn/rpi-fbcp](https://github.com/tasanakorn/rpi-fbcp), original framebuffer driver,
- [tasanakorn/rpi-fbcp/#16](https://github.com/tasanakorn/rpi-fbcp/issues/16), discussion about performance,
- [Tomáš Suk, Cyril Höschl IV, and Jan Flusser, Rectangular Decomposition of Binary Images.](http://library.utia.cas.cz/separaty/2012/ZOI/suk-rectangular%20decomposition%20of%20binary%20images.pdf), a useful research paper about merging monochrome bitmap images to rectangles, which gave good ideas for optimizing SPI span merges across multiple scan lines,
- [VC DispmanX source code](https://github.com/raspberrypi/userland/blob/master/interface/vmcs_host/vc_vchi_dispmanx.c) (more or less the only official documentation bit on DispmanX I could ever find)
### I Want To Contribute / Future Work / TODOs
If you would like to help push Raspberry Pi SPI display support further, there are always more things to do in the project. Here is a list of ideas and TODOs for recognized work items to contribute, roughly rated in order of increasing difficulty.
- Vote up issue [raspberrypi/userland/#440](https://github.com/raspberrypi/userland/issues/440) if you would like to see Raspberry Pi Foundation improve CPU performance and reduce latency of the Pi when used with SPI displays.
- Vote up issue [raspberrypi/userland/#461](https://github.com/raspberrypi/userland/issues/461) if you would like to see fbcp-ili9341 not die (due to DispmanX dying) when HDMI display resolution changes.
- Vote up issue [raspberrypi/firmware/#992](https://github.com/raspberrypi/firmware/issues/992) if you would like to see Raspberry Pi SPI bus to have high throughput even when the Pi CPU is not under heavy CPU load (better SPI throughput with lower power consumption), a performance feature only SDHOST on the Pi currently enjoys.
- Benchmark fbcp-ili9341 performance in your use case with CPU tool `top`/`htop`, or with a power meter off the wall and report the results.
- Do you have a display with an unlisted or unknown display controller? Post close up photos of it to an issue in the tracker, and report if you were able to make it work with fbcp-ili9341?
- Did you have to do something unexpected or undocumented to get fbcp-ili9341 to work in your environment or use case? Write up a tutorial or record a video to let people know about the gotchas.
- If you have access to a high frequency scope/logic analyzer (~128MHz), audit the utilization of the SPI MOSI bus to find any remaining idle times on the bus, and analyze their sources.
- Port fbcp-ili9341 to work as a static code library that one can link to another application for CPU-based drawing directly to display, bypassing inefficiencies and latency of the general purpose Linux DispmanX/graphics stack.
- Improve existing display initialization routines with options to control e.g. gamma curves, color saturation, driving voltages, refresh rates or other potentially useful features that the display controller protocols expose.
- Add support to fbcp-ili9341 to a new display controller. (e.g. [#26](https://github.com/juj/fbcp-ili9341/issues/26))
- Implement support for reading the MISO line for display identification numbers/strings for potentially interesting statistics (could some of the displays be autodetected this way?)
- Add support for other color modes, like RGB666 or RGB888. Currently fbcp-ili9341 only knows about RGB565 display mode.
- Implement a kernel module that enables userland programs to allocate DMA channels, which fbcp-ili9341 could use to amicably reserve its own DMA channels without danger of conflicting.
- Implement support for touch control while fbcp-ili9341 is active. ([#33](https://github.com/juj/fbcp-ili9341/issues/33))
- Implement support for SPI-based SD card readers that are sometimes attached to displays.
- Port fbcp-ili9341 to work with I2C displays.
- Port more key algorithms to ARM assembly to optimize performance of fbcp-ili9341 in hotspots, or optimize execution in some other ways?
- Add support to building fbcp-ili9341 on another operating system than Raspbian. (see [#43](https://github.com/juj/fbcp-ili9341/issues/43))
- Add support for building on 64-bit operating systems. (see [#43](https://github.com/juj/fbcp-ili9341/issues/43))
- Port fbcp-ili9341 over to a new single-board computer hardware. (e.g. [#30](https://github.com/juj/fbcp-ili9341/issues/30))
- Improve support for 3-wire displays, e.g. for 1) "17-bit" 3-wire communication, 2) fix up `SPI_3WIRE_PROTOCOL` + `ALL_TASKS_SHOULD_DMA` to work together, or 3) fix up `SPI_3WIRE_PROTOCOL` + `OFFLOAD_PIXEL_COPY_TO_DMA_CPP` to work together.
- Optimize away unnecessary zero padding that 3-wire communication currently incurs, by keeping a queue of leftover untransmitted partial bits of a byte, and piggybacking them onto the next transfer that comes in.
- Port the high performance DMA-based SPI communication technique from fbcp-ili9341 over to another project that uses the SPI bus for something else, for close to 100% saturation of the SPI bus in the project.
- Improve the implementation of chaining DMA transfers to not only chain transfers within a single SPI task, but also across multiple SPI tasks.
- Optimize `ALL_TASKS_SHOULD_DMA` mode to be always superior in performance and CPU usage so that the non-`ALL_TASKS_SHOULD_DMA` path can be dropped from the codebase. (probably requires the above chaining to function efficiently)
- If you are knowledgeable with BCM2835 DMA, investigate whether the hacky dance where two DMA channels need to be used to reset and resume DMA SPI transfers when chaining, can be avoided?
- If you have contacts with Broadcom, ask them to promote use of the SoC hardware with DMA chaining + mixed SPI & non-SPI tasks as a first class tested use case. Current DMA SPI hardware behavior of BCM2835 is, to say the least, surprising.
### License
This driver is licensed under the MIT License. See LICENSE.txt. In nonlegal terms, it's yours for both free and commercial projects, DIY packages, kickstarters, Etsys and Ebays, and you don't owe back a dime. Feel free to apply and derive as you wish.
If you found fbcp-ili9341 useful, it makes me happy to hear back about the projects it found a home in. If you did a build or a project where fbcp-ili9341 worked out, it'd be great to see a video or some photos or read about your experiences.
I hope you build something you enjoy!
### Donating
I have been occassionally asked how to make a donation as a thank you for the work, so here is a PayPal link:
[![paypal](https://www.paypalobjects.com/en_US/i/btn/btn_donateCC_LG.gif)](https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=DD8A74WY6Q4L2&currency_code=EUR)
Please note that a contribution is not expected, and you are free to use, publicize and redistribute the driver even without a payment.
### Contacting
Best way to discuss the driver is to open a GitHub issue. You may also be able to find me over at [sudomod.com Discord channel](https://sudomod.com/forum/viewtopic.php?f=42&t=438&sid=b868bb95ab5c3035b7810c71278637c6).

242
usr/fbcp-ili9341/config.h Normal file
View File

@ -0,0 +1,242 @@
#pragma once
// Build options: Uncomment any of these, or set at the command line to configure:
// If defined, renders a performance overlay on top of the screen. This option is passed from CMake
// configuration script. If you are getting statistics printed on screen
// even when this is uncommented, pass -DSTATISTICS=0 to CMake invocation line. You can also try
// building with
// 'make VERBOSE=1'
// to see which config flags are coming from CMake to the build.
// #define STATISTICS
// How often the on-screen statistics is refreshed (in usecs)
#define STATISTICS_REFRESH_INTERVAL 200000
// How many usecs worth of past frame rate data do we preserve in the history buffer. Higher values
// make the frame rate display counter smoother and respond to changes with a delay, whereas smaller
// values can make the display fluctuate a bit erratically.
#define FRAMERATE_HISTORY_LENGTH 400000
// If enabled, displays a visual graph of frame completion times
// #define FRAME_COMPLETION_TIME_STATISTICS
// If defined, no sleeps are specified and the code runs as fast as possible. This should not improve
// performance, as the code has been developed with the mindset that sleeping should only occur at
// times when there is no work to do, rather than sleeping to reduce power usage. The only expected
// effect of this is that CPU usage shoots to 200%, while display update FPS is the same. Present
// here to allow toggling to debug this assumption.
// #define NO_THROTTLING
// If defined, display updates are synced to the vsync signal provided by the VideoCore GPU. That seems
// to occur quite precisely at 60 Hz. Testing on PAL NES games that run at 50Hz, this will not work well,
// since they produce new frames at every 20msecs, and the VideoCore functions for snapshotting also will
// output new frames at this vsync-detached interval, so there's a 50 Hz vs 60 Hz mismatch that results
// in visible microstuttering. Still, providing this as an option, this might be good for content that
// is known to run at native 60Hz.
// #define USE_GPU_VSYNC
// Always enable GPU VSync on the Pi Zero. Even though it is suboptimal and can cause stuttering, it saves battery.
#if defined(SINGLE_CORE_BOARD)
#if !defined(USE_GPU_VSYNC)
#define USE_GPU_VSYNC
#endif
#else // Multicore Pi boards (Pi 2, 3)
// If defined, communication with the SPI bus is handled with a dedicated thread. On the Pi Zero, this does
// not gain much, since it only has one hardware thread.
#define USE_SPI_THREAD
// If USE_GPU_VSYNC is defined, then enabling this causes new frames to be snapshot more often than at
// TARGET_FRAME_RATE interval to try to keep up smoother 60fps instead of stuttering. Consumes more CPU.
#define SELF_SYNCHRONIZE_TO_GPU_VSYNC_PRODUCED_NEW_FRAMES
#endif
// If enabled, the source video frame is not scaled to fit to the screen, but instead if the source frame
// is bigger than the SPI display, then content is cropped away, i.e. the source is displayed "centered"
// on the SPI screen:
// #define DISPLAY_CROPPED_INSTEAD_OF_SCALING
// If enabled, the main thread and SPI thread are executed with realtime priority
// #define RUN_WITH_REALTIME_THREAD_PRIORITY
// If defined, progressive updating is always used (at the expense of slowing down refresh rate if it's
// too much for the display to handle)
// #define NO_INTERLACING
#if (defined(FREEPLAYTECH_WAVESHARE32B) || (defined(ILI9341) && SPI_BUS_CLOCK_DIVISOR <= 4)) && defined(USE_DMA_TRANSFERS) && !defined(NO_INTERLACING)
// The Freeplaytech CM3/Zero displays actually only have a visible display resolution of 302x202, instead of
// 320x240, and this is enough to give them full progressive 320x240x60fps without ever resorting to
// interlacing. Also, ILI9341 displays running with clock divisor of 4 have enough bandwidth to never need
// interlacing either.
#define NO_INTERLACING
#endif
// If defined, all frames are always rendered as interlaced, and never use progressive rendering.
// #define ALWAYS_INTERLACING
// By default, if the SPI bus is idle after rendering an interlaced frame, but the GPU has not yet produced
// a new application frame to be displayed, the same frame will be rendered again for its other field.
// Define this option to disable this behavior, in which case when an interlaced frame is rendered, the
// remaining other field half of the image will never be uploaded.
// #define THROTTLE_INTERLACING
// The ILI9486 has to resort to interlacing as a rule rather than exception, and it works much smoother
// when applying throttling to interlacing, so enable it by default there.
#if defined(ILI9486) || defined(HX8357D)
#define THROTTLE_INTERLACING
#endif
// If defined, DMA usage is foremost used to save power consumption and CPU usage. If not defined,
// DMA usage is tailored towards maximum performance.
// #define ALL_TASKS_SHOULD_DMA
// If defined, screen updates are performed in strictly one update rectangle per frame.
// This reduces CPU consumption at the expense of sending more pixels. You can try enabling this
// if your SPI display runs at a good high SPI bus MHz speed with respect to the screen resolution.
// Useful on Pi Zero W and ILI9341 to conserve CPU power. If this is not defined, the default much
// more powerful diffing algorithm is used, which sends far fewer pixels each frame, (but that diffing
// costs more CPU time). Enabling this requires that ALL_TASKS_SHOULD_DMA is also enabled.
// #define UPDATE_FRAMES_IN_SINGLE_RECTANGULAR_DIFF
// If UPDATE_FRAMES_IN_SINGLE_RECTANGULAR_DIFF is used, controls whether the generated tasks are aligned for
// ARMv6 cache lines. This is good to be enabled for ARMv6 Pis, doesn't make much difference on ARMv7 and ARMv8 Pis.
#define ALIGN_DIFF_TASKS_FOR_32B_CACHE_LINES
// If defined, screen updates are performend without performing diffing at all, i.e. by doing
// full updates. This is very lightweight on CPU, but excessive on the SPI bus. Enabling this
// requires that ALL_TASKS_SHOULD_DMA is also enabled.
// #define UPDATE_FRAMES_WITHOUT_DIFFING
#if defined(SINGLE_CORE_BOARD) && defined(USE_DMA_TRANSFERS) && !defined(SPI_3WIRE_PROTOCOL) // TODO: 3-wire SPI displays are not yet compatible with ALL_TASKS_SHOULD_DMA option.
// These are prerequisites for good performance on Pi Zero
#ifndef ALL_TASKS_SHOULD_DMA
#define ALL_TASKS_SHOULD_DMA
#endif
#ifndef NO_INTERLACING
#define NO_INTERLACING
#endif
// This saves a lot of CPU, but if you don't care and your SPI display does not have much bandwidth, try uncommenting this for more performant
// screen updates
#ifndef UPDATE_FRAMES_IN_SINGLE_RECTANGULAR_DIFF
#define UPDATE_FRAMES_IN_SINGLE_RECTANGULAR_DIFF
#endif
#endif
// If per-pixel diffing is enabled (neither UPDATE_FRAMES_IN_SINGLE_RECTANGULAR_DIFF or UPDATE_FRAMES_WITHOUT_DIFFING
// are enabled), the following variable controls whether to lean towards more precise pixel diffing, or faster, but
// coarser pixel diffing. Coarse method is twice as fast than the precise method, but submits slightly more pixels.
// In most cases it is better to use the coarse method, since the increase in pixel counts is small (~5%-10%),
// so enabled by default. If your display is very constrained on SPI bus speed, and don't mind increased
// CPU consumption, comment this out to use the precise algorithm.
#if !defined(ALL_TASKS_SHOULD_DMA) // At the moment the coarse method is not good at producing long spans, so disable if all tasks should DMA
#define FAST_BUT_COARSE_PIXEL_DIFF
#endif
#if defined(ALL_TASKS_SHOULD_DMA)
// This makes all submitted tasks go through DMA, and not use a hybrid Polled SPI + DMA approach.
#define ALIGN_TASKS_FOR_DMA_TRANSFERS
#endif
// If defined, the GPU polling thread will be put to sleep for 1/TARGET_FRAMERATE seconds after receiving
// each new GPU frame, to wait for the earliest moment that the next frame could arrive.
#define SAVE_BATTERY_BY_SLEEPING_UNTIL_TARGET_FRAME
// Detects when the activity on the screen is mostly idle, and goes to low power mode, in which new
// frames will be polled first at 10fps, and ultimately at only 2fps.
#define SAVE_BATTERY_BY_SLEEPING_WHEN_IDLE
// Builds a histogram of observed frame intervals and uses that to sync to a known update rate. This aims
// to detect if an application uses a non-60Hz update rate, and synchronizes to that instead.
#define SAVE_BATTERY_BY_PREDICTING_FRAME_ARRIVAL_TIMES
// If defined, rotates the display 180 degrees. This might not rotate the panel scan order though,
// so adding this can cause up to one vsync worth of extra display latency. It is best to avoid this and
// install the display in its natural rotation order, if possible.
// #define DISPLAY_ROTATE_180_DEGREES
// If defined, displays in landscape. Undefine to display in portrait. When changing this, swap
// values of DISPLAY_WIDTH and DISPLAY_HEIGHT accordingly
#define DISPLAY_OUTPUT_LANDSCAPE
// If defined, the source video frame is scaled to fit the SPI display by stretching to fit, ignoring
// aspect ratio. Enabling this will cause e.g. 16:9 1080p source to be stretched to fully cover
// a 4:3 320x240 display. If disabled, scaling is performed preserving aspect ratio, so letterboxes or
// pillarboxes will be introduced if needed to retain proper width and height proportions.
// #define DISPLAY_BREAK_ASPECT_RATIO_WHEN_SCALING
// If defined, reverses RGB<->BGR color subpixel order. This is something that seems to be display panel
// specific, rather than display controller specific, and displays manufactured with the same controller
// can have different subpixel order (without the controller taking it automatically into account).
// If display colors come out reversed in blue vs red channels, define this to swap the two.
// #define DISPLAY_SWAP_BGR
// If defined, inverts display pixel colors (31=black, 0=white). Default is to have (0=black, 31=white)
// Pass this if the colors look like a photo negative of the actual image.
// #define DISPLAY_INVERT_COLORS
// If defined, flipping the display between portrait<->landscape is done in software, rather than
// asking the display controller to adjust its RAM write direction.
// Doing the flip in software reduces tearing, since neither the ILI9341 nor ILI9486 displays (and
// probably no other displays in existence?) allow one to adjust the direction that the scanline refresh
// cycle runs in, but the scanline refresh always runs in portrait mode in these displays. Not having
// this defined reduces CPU usage at the expense of more tearing, although it is debatable which
// effect is better - this can be subjective. Impact is around 0.5-1.0msec of extra CPU time.
// DISPLAY_FLIP_ORIENTATION_IN_SOFTWARE disabled: diagonal tearing
// DISPLAY_FLIP_ORIENTATION_IN_SOFTWARE enabled: traditional no-vsync tearing (tear line runs in portrait
// i.e. narrow direction)
#if !defined(SINGLE_CORE_BOARD)
#define DISPLAY_FLIP_ORIENTATION_IN_SOFTWARE
#endif
// If enabled, build to utilize DMA transfers to communicate with the SPI peripheral. Otherwise polling
// writes will be performed (possibly with interrupts, if using kernel side driver module)
// #define USE_DMA_TRANSFERS
// If defined, enables code to manage the backlight.
// #define BACKLIGHT_CONTROL
#if defined(BACKLIGHT_CONTROL)
// If enabled, reads keyboard for input events to detect when the system has gone inactive and backlight
// can be turned off
#define BACKLIGHT_CONTROL_FROM_KEYBOARD
// This device file is used to capture keyboard input. This may be "/dev/input/event0" or something else
// on some Pis
#define KEYBOARD_INPUT_FILE "/dev/input/event1"
// If enabled, the display backlight will be turned off after this many usecs of no activity on screen.
#define TURN_DISPLAY_OFF_AFTER_USECS_OF_INACTIVITY (1 * 60 * 1000000)
#endif
// If defined, enable a low battery icon triggered by a GPIO pin whose BCM number is given.
// #define LOW_BATTERY_PIN 19
// Which state of the LOW_BATTERY_PIN is considered to be low battery. Note that the GPIO pin must be
// in the correct state (input with pull-up/pull-down resistor) before the program is started.
#define LOW_BATTERY_IS_ACTIVE_HIGH 0
// Polling interval (in micro-second) for the low battery pin.
#define LOW_BATTERY_POLLING_INTERVAL 1000000
// If less than this much % of the screen changes per frame, the screen is considered to be inactive, and
// the display backlight can automatically turn off, if TURN_DISPLAY_OFF_AFTER_USECS_OF_INACTIVITY is
// defined.
#define DISPLAY_CONSIDERED_INACTIVE_PERCENTAGE (5.0 / 100.0)
#ifndef KERNEL_MODULE
// Define this if building the client side program to run against the kernel driver module, rather than
// as a self-contained userland program.
// #define KERNEL_MODULE_CLIENT
#endif
// Experimental/debugging: If defined, let the userland side program create and run the SPI peripheral
// driving thread. Otherwise, let the kernel drive SPI (e.g. via interrupts or its own thread)
// This should be unset, only available for debugging.
// #define KERNEL_MODULE_CLIENT_DRIVES

490
usr/fbcp-ili9341/diff.cpp Normal file
View File

@ -0,0 +1,490 @@
#include "config.h"
#include "diff.h"
#include "util.h"
#include "display.h"
#include "gpu.h"
#include "spi.h"
Span *spans = 0;
#ifdef UPDATE_FRAMES_WITHOUT_DIFFING
// Naive non-diffing functionality: just submit the whole display contents
void NoDiffChangedRectangle(Span *&head)
{
head = spans;
head->x = 0;
head->endX = head->lastScanEndX = gpuFrameWidth;
head->y = 0;
head->endY = gpuFrameHeight;
head->size = gpuFrameWidth*gpuFrameHeight;
head->next = 0;
}
#endif
#ifdef UPDATE_FRAMES_IN_SINGLE_RECTANGULAR_DIFF
// Coarse diffing of two framebuffers with tight stride, 16 pixels at a time
// Finds the first changed pixel, coarse result aligned down to 8 pixels boundary
static int coarse_linear_diff(uint16_t *framebuffer, uint16_t *prevFramebuffer, uint16_t *framebufferEnd)
{
uint16_t *endPtr;
asm volatile(
"mov r0, %[framebufferEnd]\n" // r0 <- pointer to end of current framebuffer
"mov r1, %[framebuffer]\n" // r1 <- current framebuffer
"mov r2, %[prevFramebuffer]\n" // r2 <- framebuffer of previous frame
"start_%=:\n"
"pld [r1, #128]\n" // preload data caches for both current and previous framebuffers 128 bytes ahead of time
"pld [r2, #128]\n"
"ldmia r1!, {r3,r4,r5,r6}\n" // load 4x32-bit elements (8 pixels) of current framebuffer
"ldmia r2!, {r7,r8,r9,r10}\n" // load corresponding 4x32-bit elements (8 pixels) of previous framebuffer
"cmp r3, r7\n" // compare all 8 pixels if they are different
"cmpeq r4, r8\n"
"cmpeq r5, r9\n"
"cmpeq r6, r10\n"
"bne end_%=\n" // if we found a difference, we are done
// Unroll once for another set of 4x32-bit elements. On Raspberry Pi Zero, data cache line is 32 bytes in size, so one iteration
// of the loop computes a single data cache line, with preloads in place at the top.
"ldmia r1!, {r3,r4,r5,r6}\n"
"ldmia r2!, {r7,r8,r9,r10}\n"
"cmp r3, r7\n"
"cmpeq r4, r8\n"
"cmpeq r5, r9\n"
"cmpeq r6, r10\n"
"bne end_%=\n" // if we found a difference, we are done
"cmp r0, r1\n" // framebuffer == framebufferEnd? did we finish through the array?
"bne start_%=\n"
"b done_%=\n"
"end_%=:\n"
"sub r1, r1, #16\n" // ldmia r1! increments r1 after load, so subtract back the last increment in order to not shoot past the first changed pixels
"done_%=:\n"
"mov %[endPtr], r1\n" // output endPtr back to C code
: [endPtr]"=r"(endPtr)
: [framebuffer]"r"(framebuffer), [prevFramebuffer]"r"(prevFramebuffer), [framebufferEnd]"r"(framebufferEnd)
: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc"
);
return endPtr - framebuffer;
}
// Same as coarse_linear_diff, but finds the last changed pixel in linear order instead of first, i.e.
// Finds the last changed pixel, coarse result aligned up to 8 pixels boundary
static int coarse_backwards_linear_diff(uint16_t *framebuffer, uint16_t *prevFramebuffer, uint16_t *framebufferEnd)
{
uint16_t *endPtr;
asm volatile(
"mov r0, %[framebufferBegin]\n" // r0 <- pointer to beginning of current framebuffer
"mov r1, %[framebuffer]\n" // r1 <- current framebuffer (starting from end of framebuffer)
"mov r2, %[prevFramebuffer]\n" // r2 <- framebuffer of previous frame (starting from end of framebuffer)
"start_%=:\n"
"pld [r1, #-128]\n" // preload data caches for both current and previous framebuffers 128 bytes ahead of time
"pld [r2, #-128]\n"
"ldmdb r1!, {r3,r4,r5,r6}\n" // load 4x32-bit elements (8 pixels) of current framebuffer
"ldmdb r2!, {r7,r8,r9,r10}\n" // load corresponding 4x32-bit elements (8 pixels) of previous framebuffer
"cmp r3, r7\n" // compare all 8 pixels if they are different
"cmpeq r4, r8\n"
"cmpeq r5, r9\n"
"cmpeq r6, r10\n"
"bne end_%=\n" // if we found a difference, we are done
// Unroll once for another set of 4x32-bit elements. On Raspberry Pi Zero, data cache line is 32 bytes in size, so one iteration
// of the loop computes a single data cache line, with preloads in place at the top.
"ldmdb r1!, {r3,r4,r5,r6}\n"
"ldmdb r2!, {r7,r8,r9,r10}\n"
"cmp r3, r7\n"
"cmpeq r4, r8\n"
"cmpeq r5, r9\n"
"cmpeq r6, r10\n"
"bne end_%=\n" // if we found a difference, we are done
"cmp r0, r1\n" // framebuffer == framebufferEnd? did we finish through the array?
"bne start_%=\n"
"b done_%=\n"
"end_%=:\n"
"add r1, r1, #16\n" // ldmdb r1! decrements r1 before load, so add back the last decrement in order to not shoot past the first changed pixels
"done_%=:\n"
"mov %[endPtr], r1\n" // output endPtr back to C code
: [endPtr]"=r"(endPtr)
: [framebuffer]"r"(framebufferEnd), [prevFramebuffer]"r"(prevFramebuffer+(framebufferEnd-framebuffer)), [framebufferBegin]"r"(framebuffer)
: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc"
);
return endPtr - framebuffer;
}
void DiffFramebuffersToSingleChangedRectangle(uint16_t *framebuffer, uint16_t *prevFramebuffer, Span *&head)
{
int minY = 0;
int minX = -1;
const int stride = gpuFramebufferScanlineStrideBytes>>1; // Stride as uint16 elements.
const int WidthAligned4 = (uint32_t)gpuFrameWidth & ~3u;
uint16_t *scanline = framebuffer;
uint16_t *prevScanline = prevFramebuffer;
static const bool framebufferSizeCompatibleWithCoarseDiff = gpuFramebufferScanlineStrideBytes == gpuFrameWidth*2 && gpuFramebufferScanlineStrideBytes*gpuFrameHeight % 32 == 0;
if (framebufferSizeCompatibleWithCoarseDiff)
{
int numPixels = gpuFrameWidth*gpuFrameHeight;
int firstDiff = coarse_linear_diff(framebuffer, prevFramebuffer, framebuffer + numPixels);
if (firstDiff == numPixels)
return; // No pixels changed, nothing to do.
// Coarse diff computes a diff at 8 adjacent pixels at a time, and returns the point to the 8-pixel aligned coordinate where the pixels began to differ.
// Compute the precise diff position here.
while(framebuffer[firstDiff] == prevFramebuffer[firstDiff]) ++firstDiff;
minX = firstDiff % gpuFrameWidth;
minY = firstDiff / gpuFrameWidth;
}
else
{
while(minY < gpuFrameHeight)
{
int x = 0;
// diff 4 pixels at a time
for(; x < WidthAligned4; x += 4)
{
uint64_t diff = *(uint64_t*)(scanline+x) ^ *(uint64_t*)(prevScanline+x);
if (diff)
{
minX = x + (__builtin_ctzll(diff) >> 4);
goto found_top;
}
}
// tail unaligned 0-3 pixels one by one
for(; x < gpuFrameWidth; ++x)
{
uint16_t diff = *(scanline+x) ^ *(prevScanline+x);
if (diff)
{
minX = x;
goto found_top;
}
}
scanline += stride;
prevScanline += stride;
++minY;
}
return; // No pixels changed, nothing to do.
}
found_top:
int maxX = -1;
int maxY = gpuFrameHeight-1;
if (framebufferSizeCompatibleWithCoarseDiff)
{
int numPixels = gpuFrameWidth*gpuFrameHeight;
int firstDiff = coarse_backwards_linear_diff(framebuffer, prevFramebuffer, framebuffer + numPixels);
// Coarse diff computes a diff at 8 adjacent pixels at a time, and returns the point to the 8-pixel aligned coordinate where the pixels began to differ.
// Compute the precise diff position here.
while(firstDiff > 0 && framebuffer[firstDiff] == prevFramebuffer[firstDiff]) --firstDiff;
maxX = firstDiff % gpuFrameWidth;
maxY = firstDiff / gpuFrameWidth;
}
else
{
scanline = framebuffer + (gpuFrameHeight - 1)*stride;
prevScanline = prevFramebuffer + (gpuFrameHeight - 1)*stride; // (same scanline from previous frame, not preceding scanline)
while(maxY >= minY)
{
int x = gpuFrameWidth-1;
// tail unaligned 0-3 pixels one by one
for(; x >= WidthAligned4; --x)
{
if (scanline[x] != prevScanline[x])
{
maxX = x;
goto found_bottom;
}
}
// diff 4 pixels at a time
x = x & ~3u;
for(; x >= 0; x -= 4)
{
uint64_t diff = *(uint64_t*)(scanline+x) ^ *(uint64_t*)(prevScanline+x);
if (diff)
{
maxX = x + 3 - (__builtin_clzll(diff) >> 4);
goto found_bottom;
}
}
scanline -= stride;
prevScanline -= stride;
--maxY;
}
}
found_bottom:
scanline = framebuffer + minY*stride;
prevScanline = prevFramebuffer + minY*stride;
int lastScanEndX = maxX;
if (minX > maxX) SWAPU32(minX, maxX);
int leftX = 0;
while(leftX < minX)
{
uint16_t *s = scanline + leftX;
uint16_t *prevS = prevScanline + leftX;
for(int y = minY; y <= maxY; ++y)
{
if (*s != *prevS)
goto found_left;
s += stride;
prevS += stride;
}
++leftX;
}
found_left:
int rightX = gpuFrameWidth-1;
while(rightX > maxX)
{
uint16_t *s = scanline + rightX;
uint16_t *prevS = prevScanline + rightX;
for(int y = minY; y <= maxY; ++y)
{
if (*s != *prevS)
goto found_right;
s += stride;
prevS += stride;
}
--rightX;
}
found_right:
head = spans;
head->x = leftX;
head->endX = rightX+1;
head->lastScanEndX = lastScanEndX+1;
head->y = minY;
head->endY = maxY+1;
#if defined(ALIGN_DIFF_TASKS_FOR_32B_CACHE_LINES) && defined(ALL_TASKS_SHOULD_DMA)
// Make sure the task is a multiple of 32 bytes wide so we can use a fast DMA copy
// algorithm later on. Currently this is only exploited in dma.cpp if ALL_TASKS_SHOULD_DMA
// option is enabled, so only enable it there.
head->x = MAX(0, ALIGN_DOWN(head->x, 16));
head->endX = MIN(gpuFrameWidth, ALIGN_UP(head->endX, 16));
head->lastScanEndX = ALIGN_UP(head->lastScanEndX, 16);
#endif
head->size = (head->endX-head->x)*(head->endY-head->y-1) + (head->lastScanEndX - head->x);
head->next = 0;
}
#endif
void DiffFramebuffersToScanlineSpansFastAndCoarse4Wide(uint16_t *framebuffer, uint16_t *prevFramebuffer, bool interlacedDiff, int interlacedFieldParity, Span *&head)
{
int numSpans = 0;
int y = interlacedDiff ? interlacedFieldParity : 0;
int yInc = interlacedDiff ? 2 : 1;
// If doing an interlaced update, skip over every second scanline.
int scanlineInc = interlacedDiff ? (gpuFramebufferScanlineStrideBytes>>2) : (gpuFramebufferScanlineStrideBytes>>3);
uint64_t *scanline = (uint64_t *)(framebuffer + y*(gpuFramebufferScanlineStrideBytes>>1));
uint64_t *prevScanline = (uint64_t *)(prevFramebuffer + y*(gpuFramebufferScanlineStrideBytes>>1)); // (same scanline from previous frame, not preceding scanline)
const int W = gpuFrameWidth>>2;
Span *span = spans;
while(y < gpuFrameHeight)
{
uint16_t *scanlineStart = (uint16_t *)scanline;
for(int x = 0; x < W;)
{
if (scanline[x] != prevScanline[x])
{
uint16_t *spanStart = (uint16_t *)(scanline + x) + (__builtin_ctzll(scanline[x] ^ prevScanline[x]) >> 4);
++x;
// We've found a start of a span of different pixels on this scanline, now find where this span ends
uint16_t *spanEnd;
for(;;)
{
if (x < W)
{
if (scanline[x] != prevScanline[x])
{
++x;
continue;
}
else
{
spanEnd = (uint16_t *)(scanline + x) + 1 - (__builtin_clzll(scanline[x-1] ^ prevScanline[x-1]) >> 4);
++x;
break;
}
}
else
{
spanEnd = scanlineStart + gpuFrameWidth;
break;
}
}
// Submit the span update task
span->x = spanStart - scanlineStart;
span->endX = span->lastScanEndX = spanEnd - scanlineStart;
span->y = y;
span->endY = y+1;
span->size = spanEnd - spanStart;
span->next = span+1;
++span;
++numSpans;
}
else
{
++x;
}
}
y += yInc;
scanline += scanlineInc;
prevScanline += scanlineInc;
}
if (numSpans > 0)
{
head = &spans[0];
spans[numSpans-1].next = 0;
}
else
head = 0;
}
void DiffFramebuffersToScanlineSpansExact(uint16_t *framebuffer, uint16_t *prevFramebuffer, bool interlacedDiff, int interlacedFieldParity, Span *&head)
{
int numSpans = 0;
int y = interlacedDiff ? interlacedFieldParity : 0;
int yInc = interlacedDiff ? 2 : 1;
// If doing an interlaced update, skip over every second scanline.
int scanlineInc = interlacedDiff ? gpuFramebufferScanlineStrideBytes : (gpuFramebufferScanlineStrideBytes>>1);
int scanlineEndInc = scanlineInc - gpuFrameWidth;
uint16_t *scanline = framebuffer + y*(gpuFramebufferScanlineStrideBytes>>1);
uint16_t *prevScanline = prevFramebuffer + y*(gpuFramebufferScanlineStrideBytes>>1); // (same scanline from previous frame, not preceding scanline)
while(y < gpuFrameHeight)
{
uint16_t *scanlineStart = scanline;
uint16_t *scanlineEnd = scanline + gpuFrameWidth;
while(scanline < scanlineEnd)
{
uint16_t *spanStart;
uint16_t *spanEnd;
int numConsecutiveUnchangedPixels = 0;
if (scanline + 1 < scanlineEnd)
{
uint32_t diff = (*(uint32_t *)scanline) ^ (*(uint32_t *)prevScanline);
scanline += 2;
prevScanline += 2;
if (diff == 0) // Both 1st and 2nd pixels are the same
continue;
if (diff & 0xFFFF == 0) // 1st pixels are the same, 2nd pixels are not
{
spanStart = scanline - 1;
spanEnd = scanline;
}
else // 1st pixels are different
{
spanStart = scanline - 2;
if ((diff & 0xFFFF0000u) != 0) // 2nd pixels are different?
{
spanEnd = scanline;
}
else
{
spanEnd = scanline - 1;
numConsecutiveUnchangedPixels = 1;
}
}
// We've found a start of a span of different pixels on this scanline, now find where this span ends
while(scanline < scanlineEnd)
{
if (*scanline++ != *prevScanline++)
{
spanEnd = scanline;
numConsecutiveUnchangedPixels = 0;
}
else
{
if (++numConsecutiveUnchangedPixels > SPAN_MERGE_THRESHOLD)
break;
}
}
}
else // handle the single last pixel on the row
{
if (*scanline++ == *prevScanline++)
break;
spanStart = scanline - 1;
spanEnd = scanline;
}
// Submit the span update task
Span *span = spans + numSpans;
span->x = spanStart - scanlineStart;
span->endX = span->lastScanEndX = spanEnd - scanlineStart;
span->y = y;
span->endY = y+1;
span->size = spanEnd - spanStart;
if (numSpans > 0) span[-1].next = span;
else head = span;
span->next = 0;
++numSpans;
}
y += yInc;
scanline += scanlineEndInc;
prevScanline += scanlineEndInc;
}
}
void MergeScanlineSpanList(Span *listHead)
{
for(Span *i = listHead; i; i = i->next)
{
Span *prev = i;
for(Span *j = i->next; j; j = j->next)
{
// If the spans i and j are vertically apart, don't attempt to merge span i any further, since all spans >= j will also be farther vertically apart.
// (the list is nondecreasing with respect to Span::y)
if (j->y > i->endY) break;
// Merge the spans i and j, and figure out the wastage of doing so
int x = MIN(i->x, j->x);
int y = MIN(i->y, j->y);
int endX = MAX(i->endX, j->endX);
int endY = MAX(i->endY, j->endY);
int lastScanEndX = (endY > i->endY) ? j->lastScanEndX : ((endY > j->endY) ? i->lastScanEndX : MAX(i->lastScanEndX, j->lastScanEndX));
int newSize = (endX-x)*(endY-y-1) + (lastScanEndX - x);
int wastedPixels = newSize - i->size - j->size;
if (wastedPixels <= SPAN_MERGE_THRESHOLD
#ifdef MAX_SPI_TASK_SIZE
&& newSize*SPI_BYTESPERPIXEL <= MAX_SPI_TASK_SIZE
#endif
)
{
i->x = x;
i->y = y;
i->endX = endX;
i->endY = endY;
i->lastScanEndX = lastScanEndX;
i->size = newSize;
prev->next = j->next;
j = prev;
}
else // Not merging - travel to next node remembering where we came from
prev = j;
}
}
}

44
usr/fbcp-ili9341/diff.h Normal file
View File

@ -0,0 +1,44 @@
#pragma once
#include <inttypes.h>
// Spans track dirty rectangular areas on screen
struct Span
{
uint16_t x, endX, y, endY, lastScanEndX;
uint32_t size; // Specifies a box of width [x, endX[ * [y, endY[, where scanline endY-1 can be partial, and ends in lastScanEndX.
Span *next; // Maintain a linked skip list inside the array for fast seek to next active element when pruning
};
extern Span *spans;
// Looking at SPI communication in a logic analyzer, it is observed that waiting for the finish of an SPI command FIFO causes pretty exactly one byte of delay to the command stream.
// Therefore the time/bandwidth cost of ending the current span and starting a new span is as follows:
// 1 byte to wait for the current SPI FIFO batch to finish,
// +1 byte to send the cursor X coordinate change command,
// +1 byte to wait for that FIFO to flush,
// +2 bytes to send the new X coordinate,
// +1 byte to wait for the FIFO to flush again,
// +1 byte to send the data_write command,
// +1 byte to wait for that FIFO to flush,
// after which the communication is ready to start pushing pixels. This totals to 8 bytes, or 4 pixels, meaning that if there are 4 unchanged pixels or less between two adjacent dirty
// spans, it is all the same to just update through those pixels as well to not have to wait to flush the FIFO.
#if defined(ALL_TASKS_SHOULD_DMA)
#define SPAN_MERGE_THRESHOLD 320
#elif defined(DISPLAY_SPI_BUS_IS_16BITS_WIDE)
#define SPAN_MERGE_THRESHOLD 10
#elif defined(HX8357D)
#define SPAN_MERGE_THRESHOLD 6
#else
#define SPAN_MERGE_THRESHOLD 4
#endif
void DiffFramebuffersToSingleChangedRectangle(uint16_t *framebuffer, uint16_t *prevFramebuffer, Span *&head);
void DiffFramebuffersToScanlineSpansExact(uint16_t *framebuffer, uint16_t *prevFramebuffer, bool interlacedDiff, int interlacedFieldParity, Span *&head);
void DiffFramebuffersToScanlineSpansFastAndCoarse4Wide(uint16_t *framebuffer, uint16_t *prevFramebuffer, bool interlacedDiff, int interlacedFieldParity, Span *&head);
void NoDiffChangedRectangle(Span *&head);
void MergeScanlineSpanList(Span *listHead);

View File

@ -0,0 +1,39 @@
#include "config.h"
#include "display.h"
#include "spi.h"
#include <memory.h>
void ClearScreen()
{
for(int y = 0; y < DISPLAY_HEIGHT; ++y)
{
#ifdef DISPLAY_SPI_BUS_IS_16BITS_WIDE
SPI_TRANSFER(DISPLAY_SET_CURSOR_X, 0, 0, 0, 0, 0, (DISPLAY_WIDTH-1) >> 8, 0, (DISPLAY_WIDTH-1) & 0xFF);
SPI_TRANSFER(DISPLAY_SET_CURSOR_Y, 0, (uint8_t)(y >> 8), 0, (uint8_t)(y & 0xFF), 0, (DISPLAY_HEIGHT-1) >> 8, 0, (DISPLAY_HEIGHT-1) & 0xFF);
#elif defined(DISPLAY_SET_CURSOR_IS_8_BIT)
SPI_TRANSFER(DISPLAY_SET_CURSOR_X, 0, DISPLAY_WIDTH-1);
SPI_TRANSFER(DISPLAY_SET_CURSOR_Y, (uint8_t)y, DISPLAY_HEIGHT-1);
#else
SPI_TRANSFER(DISPLAY_SET_CURSOR_X, 0, 0, (DISPLAY_WIDTH-1) >> 8, (DISPLAY_WIDTH-1) & 0xFF);
SPI_TRANSFER(DISPLAY_SET_CURSOR_Y, (uint8_t)(y >> 8), (uint8_t)(y & 0xFF), (DISPLAY_HEIGHT-1) >> 8, (DISPLAY_HEIGHT-1) & 0xFF);
#endif
SPITask *clearLine = AllocTask(DISPLAY_WIDTH*SPI_BYTESPERPIXEL);
clearLine->cmd = DISPLAY_WRITE_PIXELS;
memset(clearLine->data, 0, clearLine->size);
CommitTask(clearLine);
RunSPITask(clearLine);
DoneTask(clearLine);
}
#ifdef DISPLAY_SPI_BUS_IS_16BITS_WIDE
SPI_TRANSFER(DISPLAY_SET_CURSOR_X, 0, 0, 0, 0, 0, (DISPLAY_WIDTH-1) >> 8, 0, (DISPLAY_WIDTH-1) & 0xFF);
SPI_TRANSFER(DISPLAY_SET_CURSOR_Y, 0, 0, 0, 0, 0, (DISPLAY_HEIGHT-1) >> 8, 0, (DISPLAY_HEIGHT-1) & 0xFF);
#elif defined(DISPLAY_SET_CURSOR_IS_8_BIT)
SPI_TRANSFER(DISPLAY_SET_CURSOR_X, 0, DISPLAY_WIDTH-1);
SPI_TRANSFER(DISPLAY_SET_CURSOR_Y, 0, DISPLAY_HEIGHT-1);
#else
SPI_TRANSFER(DISPLAY_SET_CURSOR_X, 0, 0, (DISPLAY_WIDTH-1) >> 8, (DISPLAY_WIDTH-1) & 0xFF);
SPI_TRANSFER(DISPLAY_SET_CURSOR_Y, 0, 0, (DISPLAY_HEIGHT-1) >> 8, (DISPLAY_HEIGHT-1) & 0xFF);
#endif
}

121
usr/fbcp-ili9341/display.h Normal file
View File

@ -0,0 +1,121 @@
#pragma once
#include "config.h"
// Configure the desired display update rate. Use 120 for max performance/minimized latency, and 60/50/30/24 etc. for regular content, or to save battery.
#define TARGET_FRAME_RATE 60
#if defined(ILI9341) || defined(ILI9340)
#include "ili9341.h"
#elif defined(ILI9486L)
#include "ili9486l.h"
#elif defined(ILI9488)
#include "ili9488.h"
#elif defined(ILI9486)
#include "ili9486.h"
#elif defined(HX8357D)
#include "hx8357d.h"
#elif defined(ST7735R) || defined(ST7735S) || defined(ST7789) || defined(ST7789VW)
#include "st7735r.h"
#elif defined(SSD1351)
#include "ssd1351.h"
#elif defined(MZ61581)
#include "mz61581.h"
#elif defined(MPI3501)
#include "mpi3501.h"
#else
#error Please reconfigure CMake with your display controller directive set!
#endif
// The native display resolution is in portrait/landscape, but we want to display in the opposite landscape/portrait orientation?
// Compare DISPLAY_NATIVE_WIDTH <= DISPLAY_NATIVE_HEIGHT in the first test to let users toggle DISPLAY_OUTPUT_LANDSCAPE directive in config.h to flip orientation on square displays with width=height
#if ((DISPLAY_NATIVE_WIDTH <= DISPLAY_NATIVE_HEIGHT && defined(DISPLAY_OUTPUT_LANDSCAPE)) || (DISPLAY_NATIVE_WIDTH > DISPLAY_NATIVE_HEIGHT && !defined(DISPLAY_OUTPUT_LANDSCAPE)))
#define DISPLAY_SHOULD_FLIP_ORIENTATION
#endif
#if defined(DISPLAY_SHOULD_FLIP_ORIENTATION) && !defined(DISPLAY_FLIP_ORIENTATION_IN_SOFTWARE)
// Need to do orientation flip, but don't want to do it on the CPU, so pretend the display dimensions are of the flipped form,
// and use display controller initialization sequence to do the flipping
#define DISPLAY_WIDTH DISPLAY_NATIVE_HEIGHT
#define DISPLAY_HEIGHT DISPLAY_NATIVE_WIDTH
#define DISPLAY_FLIP_ORIENTATION_IN_HARDWARE
#else
#define DISPLAY_WIDTH DISPLAY_NATIVE_WIDTH
#define DISPLAY_HEIGHT DISPLAY_NATIVE_HEIGHT
#endif
#if !defined(DISPLAY_SHOULD_FLIP_ORIENTATION) && defined(DISPLAY_FLIP_ORIENTATION_IN_SOFTWARE)
#undef DISPLAY_FLIP_ORIENTATION_IN_SOFTWARE
#endif
#ifndef DISPLAY_NATIVE_COVERED_LEFT_SIDE
#define DISPLAY_NATIVE_COVERED_LEFT_SIDE 0
#endif
#ifndef DISPLAY_NATIVE_COVERED_TOP_SIDE
#define DISPLAY_NATIVE_COVERED_TOP_SIDE 0
#endif
#ifndef DISPLAY_NATIVE_COVERED_BOTTOM_SIDE
#define DISPLAY_NATIVE_COVERED_BOTTOM_SIDE 0
#endif
#ifndef DISPLAY_NATIVE_COVERED_RIGHT_SIDE
#define DISPLAY_NATIVE_COVERED_RIGHT_SIDE 0
#endif
#if defined(DISPLAY_FLIP_ORIENTATION_IN_SOFTWARE) || !defined(DISPLAY_SHOULD_FLIP_ORIENTATION)
#define DISPLAY_COVERED_TOP_SIDE DISPLAY_NATIVE_COVERED_TOP_SIDE
#define DISPLAY_COVERED_LEFT_SIDE DISPLAY_NATIVE_COVERED_LEFT_SIDE
#define DISPLAY_COVERED_RIGHT_SIDE DISPLAY_NATIVE_COVERED_RIGHT_SIDE
#define DISPLAY_COVERED_BOTTOM_SIDE DISPLAY_NATIVE_COVERED_BOTTOM_SIDE
#else
#define DISPLAY_COVERED_TOP_SIDE DISPLAY_NATIVE_COVERED_LEFT_SIDE
#define DISPLAY_COVERED_LEFT_SIDE DISPLAY_NATIVE_COVERED_TOP_SIDE
#define DISPLAY_COVERED_RIGHT_SIDE DISPLAY_NATIVE_COVERED_BOTTOM_SIDE
#define DISPLAY_COVERED_BOTTOM_SIDE DISPLAY_NATIVE_COVERED_RIGHT_SIDE
#endif
#define DISPLAY_DRAWABLE_WIDTH (DISPLAY_WIDTH-DISPLAY_COVERED_LEFT_SIDE-DISPLAY_COVERED_RIGHT_SIDE)
#define DISPLAY_DRAWABLE_HEIGHT (DISPLAY_HEIGHT-DISPLAY_COVERED_TOP_SIDE-DISPLAY_COVERED_BOTTOM_SIDE)
#ifndef DISPLAY_SPI_DRIVE_SETTINGS
#define DISPLAY_SPI_DRIVE_SETTINGS (0)
#endif
#ifdef DISPLAY_COLOR_FORMAT_R6X2G6X2B6X2
// 18 bits per pixel padded to 3 bytes
#define SPI_BYTESPERPIXEL 3
#else
// 16 bits per pixel
#define SPI_BYTESPERPIXEL 2
#endif
#if (DISPLAY_DRAWABLE_WIDTH % 16 == 0) && defined(ALL_TASKS_SHOULD_DMA) &&!defined(USE_SPI_THREAD) && defined(USE_GPU_VSYNC) && !defined(DISPLAY_COLOR_FORMAT_R6X2G6X2B6X2) && !defined(SPI_3WIRE_PROTOCOL)
// If conditions are suitable, defer moving pixels until the very last moment in dma.cpp when we are about
// to kick off DMA tasks.
// TODO: 3-wire SPI displays are not yet compatible with this path. Implement support for this to optimize performance of 3-wire SPI displays on Pi Zero. (Pi 3B does not care that much)
#define OFFLOAD_PIXEL_COPY_TO_DMA_CPP
#endif
void ClearScreen(void);
void TurnBacklightOn(void);
void TurnBacklightOff(void);
void TurnDisplayOn(void);
void TurnDisplayOff(void);
void DeinitSPIDisplay(void);
#if !defined(SPI_BUS_CLOCK_DIVISOR)
#error Please define -DSPI_BUS_CLOCK_DIVISOR=<some even number> on the CMake command line! This parameter along with core_freq=xxx in /boot/config.txt defines the SPI display speed. (spi speed = core_freq / SPI_BUS_CLOCK_DIVISOR)
#endif
#if !defined(GPIO_TFT_DATA_CONTROL) && !defined(SPI_3WIRE_PROTOCOL)
#error Please reconfigure CMake with -DGPIO_TFT_DATA_CONTROL=<int> specifying which pin your display is using for the Data/Control line!
#endif
#if defined(SPI_3WIRE_PROTOCOL) && !defined(SPI_3WIRE_DATA_COMMAND_FRAMING_BITS)
// 3-wire SPI displays use 1 bit of D/C framing (unless otherwise specified. E.g. KeDei uses 16 bit instead)
#define SPI_3WIRE_DATA_COMMAND_FRAMING_BITS 1
#endif

760
usr/fbcp-ili9341/dma.cpp Normal file
View File

@ -0,0 +1,760 @@
#ifndef KERNEL_MODULE
#include <stdio.h> // fprintf, stderr
#include <stdlib.h> // exit
#include <memory.h> // memset, memcpy
#include <inttypes.h> // uint32_t
#include <syslog.h> // syslog
#include <sys/mman.h> // mmap, munmap, PROT_READ, PROT_WRITE
#endif
#include "config.h"
#include "dma.h"
#include "spi.h"
#include "gpu.h"
#include "util.h"
#include "mailbox.h"
#ifdef USE_DMA_TRANSFERS
#define BCM2835_PERI_BASE 0x3F000000
SharedMemory *dmaSourceMemory = 0;
volatile DMAChannelRegisterFile *dma0 = 0;
volatile DMAChannelRegisterFile *dmaTx = 0;
volatile DMAChannelRegisterFile *dmaRx = 0;
int dmaTxChannel = -1;
int dmaTxIrq = 0;
int dmaRxChannel = -1;
int dmaRxIrq = 0;
#define PAGE_SIZE 4096
struct GpuMemory
{
uint32_t allocationHandle;
void *virtualAddr;
uintptr_t busAddress;
uint32_t sizeBytes;
};
#define NUM_DMA_CBS 1024
GpuMemory dmaCb, dmaSourceBuffer, dmaConstantData;
volatile DMAControlBlock *dmaSendTail = 0;
volatile DMAControlBlock *dmaRecvTail = 0;
volatile DMAControlBlock *firstFreeCB = 0;
volatile uint8_t *dmaSourceEnd = 0;
volatile DMAControlBlock *GrabFreeCBs(int num)
{
volatile DMAControlBlock *firstCB = (volatile DMAControlBlock *)dmaCb.virtualAddr;
volatile DMAControlBlock *endCB = firstCB + NUM_DMA_CBS;
if ((uintptr_t)(firstFreeCB + num) >= (uintptr_t)dmaCb.virtualAddr + dmaCb.sizeBytes)
{
WaitForDMAFinished();
firstFreeCB = firstCB;
}
volatile DMAControlBlock *ret = firstFreeCB;
firstFreeCB += num;
return ret;
}
volatile uint8_t *GrabFreeDMASourceBytes(int bytes)
{
if ((uintptr_t)dmaSourceEnd + bytes >= (uintptr_t)dmaSourceBuffer.virtualAddr + dmaSourceBuffer.sizeBytes)
{
WaitForDMAFinished();
dmaSourceEnd = (volatile uint8_t *)dmaSourceBuffer.virtualAddr;
}
volatile uint8_t *ret = dmaSourceEnd;
dmaSourceEnd += bytes;
return ret;
}
static int AllocateDMAChannel(int *dmaChannel, int *irq)
{
// Snooping DMA, channels 3, 5 and 6 seen active.
// TODO: Actually reserve the DMA channel to the system using bcm_dma_chan_alloc() and bcm_dma_chan_free()?...
// Right now, use channels 1 and 4 which seem to be free.
// Note: The send channel could be a lite channel, but receive channel cannot, since receiving uses the IGNORE flag
// that lite DMA engines don't have.
#ifdef FREEPLAYTECH_WAVESHARE32B
// On FreePlayTech Zero, DMA channel 4 seen to be taken by SD HOST (peripheral mapping 13).
int freeChannels[] = { 5, 1 };
#else
int freeChannels[] = { 7, 1 };
#endif
#if defined(DMA_TX_CHANNEL)
freeChannels[0] = DMA_TX_CHANNEL;
#endif
#if defined(DMA_RX_CHANNEL)
freeChannels[1] = DMA_RX_CHANNEL;
#endif
if (freeChannels[0] == freeChannels[1]) FATAL_ERROR("DMA TX and RX channels cannot be the same channel!");
static int nextFreeChannel = 0;
if (nextFreeChannel >= sizeof(freeChannels) / sizeof(freeChannels[0])) FATAL_ERROR("No free DMA channels");
*dmaChannel = freeChannels[nextFreeChannel++];
LOG("Allocated DMA channel %d", *dmaChannel);
*irq = 0;
return 0;
}
void FreeDMAChannel(int channel)
{
volatile DMAChannelRegisterFile *dma = GetDMAChannel(channel);
dma->cb.ti = 0; // Clear the SPI TX & RX permaps for this DMA channel so that we don't think some other program is using these for SPI
}
// Message IDs for different mailbox GPU memory allocation messages
#define MEM_ALLOC_MESSAGE 0x3000c // This message is 3 u32s: numBytes, alignment and flags
#define MEM_FREE_MESSAGE 0x3000f // This message is 1 u32: handle
#define MEM_LOCK_MESSAGE 0x3000d // 1 u32: handle
#define MEM_UNLOCK_MESSAGE 0x3000e // 1 u32: handle
// Memory allocation flags
#define MEM_ALLOC_FLAG_DIRECT (1 << 2) // Allocate uncached memory that bypasses L1 and L2 cache on loads and stores
#define MEM_ALLOC_FLAG_COHERENT (1 << 3) // Non-allocating in L2 but coherent
#define BUS_TO_PHYS(x) ((x) & ~0xC0000000)
#define PHYS_TO_BUS(x) ((x) | 0xC0000000)
#define VIRT_TO_BUS(block, x) ((uintptr_t)(x) - (uintptr_t)((block).virtualAddr) + (block).busAddress)
uint64_t totalGpuMemoryUsed = 0;
// Allocates the given number of bytes in GPU side memory, and returns the virtual address and physical bus address of the allocated memory block.
// The virtual address holds an uncached view to the allocated memory, so writes and reads to that memory address bypass the L1 and L2 caches. Use
// this kind of memory to pass data blocks over to the DMA controller to process.
GpuMemory AllocateUncachedGpuMemory(uint32_t numBytes, const char *reason)
{
GpuMemory mem;
mem.sizeBytes = ALIGN_UP(numBytes, PAGE_SIZE);
uint32_t allocationFlags = MEM_ALLOC_FLAG_DIRECT | MEM_ALLOC_FLAG_COHERENT;
mem.allocationHandle = Mailbox(MEM_ALLOC_MESSAGE, /*size=*/mem.sizeBytes, /*alignment=*/PAGE_SIZE, /*flags=*/allocationFlags);
if (!mem.allocationHandle) FATAL_ERROR("Failed to allocate GPU memory! Try increasing gpu_mem allocation in /boot/config.txt. See https://www.raspberrypi.org/documentation/configuration/config-txt/memory.md");
mem.busAddress = Mailbox(MEM_LOCK_MESSAGE, mem.allocationHandle);
if (!mem.busAddress) FATAL_ERROR("Failed to lock GPU memory!");
mem.virtualAddr = mmap(0, mem.sizeBytes, PROT_READ | PROT_WRITE, MAP_SHARED, mem_fd, BUS_TO_PHYS(mem.busAddress));
if (mem.virtualAddr == MAP_FAILED) FATAL_ERROR("Failed to mmap GPU memory!");
totalGpuMemoryUsed += mem.sizeBytes;
// printf("Allocated %u bytes of GPU memory for %s (bus address=%p). Total GPU memory used: %llu bytes\n", mem.sizeBytes, reason, (void*)mem.busAddress, totalGpuMemoryUsed);
return mem;
}
void FreeUncachedGpuMemory(GpuMemory mem)
{
totalGpuMemoryUsed -= mem.sizeBytes;
munmap(mem.virtualAddr, mem.sizeBytes);
Mailbox(MEM_UNLOCK_MESSAGE, mem.allocationHandle);
Mailbox(MEM_FREE_MESSAGE, mem.allocationHandle);
}
volatile DMAChannelRegisterFile *GetDMAChannel(int channelNumber)
{
if (channelNumber < 0 || channelNumber >= BCM2835_NUM_DMA_CHANNELS)
{
printf("Invalid DMA channel %d specified!\n", channelNumber);
FATAL_ERROR("Invalid DMA channel specified!");
}
return dma0 + channelNumber;
}
void DumpDMAPeripheralMap()
{
for(int i = 0; i < BCM2835_NUM_DMA_CHANNELS; ++i)
{
volatile DMAChannelRegisterFile *channel = GetDMAChannel(i);
printf("DMA channel %d has peripheral map %d (is lite channel: %d, currently active: %d, current control block: %p)\n", i, (channel->cb.ti & BCM2835_DMA_TI_PERMAP_MASK) >> BCM2835_DMA_TI_PERMAP_SHIFT, (channel->cb.debug & BCM2835_DMA_DEBUG_LITE) ? 1 : 0, (channel->cs & BCM2835_DMA_CS_ACTIVE) ? 1 : 0, channel->cbAddr);
}
}
// Verifies that no other program has stomped on the DMA channel that we are using.
void CheckDMAChannelNotStolen(int channelNumber, int expectedPeripheralMap)
{
volatile DMAChannelRegisterFile *channel = GetDMAChannel(channelNumber);
uint32_t peripheralMap = ((channel->cb.ti & BCM2835_DMA_TI_PERMAP_MASK) >> BCM2835_DMA_TI_PERMAP_SHIFT);
if (peripheralMap != expectedPeripheralMap && peripheralMap != 0)
{
DumpDMAPeripheralMap();
printf("DMA channel collision! DMA channel %d was expected to be assigned to our peripheral %d, but something else has assigned it to peripheral %d!\n", channelNumber, expectedPeripheralMap, peripheralMap);
FATAL_ERROR("System is likely unstable now, rebooting is advised.");
}
uint32_t cbAddr = channel->cbAddr;
if (cbAddr && (cbAddr < dmaCb.busAddress || cbAddr >= dmaCb.busAddress + dmaCb.sizeBytes))
{
DumpDMAPeripheralMap();
printf("DMA channel collision! Some other program has submitted a DMA task to our DMA channel %d! (DMA task at unknown control block address %p)\n", channelNumber, cbAddr);
FATAL_ERROR("System is likely unstable now, rebooting is advised.");
}
}
void CheckSPIDMAChannelsNotStolen()
{
CheckDMAChannelNotStolen(dmaTxChannel, BCM2835_DMA_TI_PERMAP_SPI_TX);
CheckDMAChannelNotStolen(dmaRxChannel, BCM2835_DMA_TI_PERMAP_SPI_RX);
}
void ResetDMAChannels()
{
dmaTx->cs = BCM2835_DMA_CS_RESET;
dmaTx->cb.debug = BCM2835_DMA_DEBUG_DMA_READ_ERROR | BCM2835_DMA_DEBUG_DMA_FIFO_ERROR | BCM2835_DMA_DEBUG_READ_LAST_NOT_SET_ERROR;
dmaRx->cs = BCM2835_DMA_CS_RESET;
dmaRx->cb.debug = BCM2835_DMA_DEBUG_DMA_READ_ERROR | BCM2835_DMA_DEBUG_DMA_FIFO_ERROR | BCM2835_DMA_DEBUG_READ_LAST_NOT_SET_ERROR;
}
int InitDMA()
{
#if defined(KERNEL_MODULE)
dma0 = (volatile DMAChannelRegisterFile*)ioremap(BCM2835_PERI_BASE+BCM2835_DMA0_OFFSET, BCM2835_NUM_DMA_CHANNELS*0x100);
#else
dma0 = (volatile DMAChannelRegisterFile*)((uintptr_t)bcm2835 + BCM2835_DMA0_OFFSET);
#endif
#ifdef KERNEL_MODULE_CLIENT
dmaTxChannel = spiTaskMemory->dmaTxChannel;
dmaRxChannel = spiTaskMemory->dmaRxChannel;
#else
int ret = AllocateDMAChannel(&dmaTxChannel, &dmaTxIrq);
if (ret != 0) FATAL_ERROR("Unable to allocate TX DMA channel!");
ret = AllocateDMAChannel(&dmaRxChannel, &dmaRxIrq);
if (ret != 0) FATAL_ERROR("Unable to allocate RX DMA channel!");
printf("Enabling DMA channels Tx:%d and Rx:%d\n", dmaTxChannel, dmaRxChannel);
volatile uint32_t *dmaEnableRegister = (volatile uint32_t *)((uintptr_t)dma0 + BCM2835_DMAENABLE_REGISTER_OFFSET);
// Enable the allocated DMA channels
*dmaEnableRegister |= (1 << dmaTxChannel);
*dmaEnableRegister |= (1 << dmaRxChannel);
#endif
#if !defined(KERNEL_MODULE)
dmaCb = AllocateUncachedGpuMemory(sizeof(DMAControlBlock) * NUM_DMA_CBS, "DMA control blocks");
memset(dmaCb.virtualAddr, 0, dmaCb.sizeBytes); // Some fields of the CBs (debug, reserved) are initialized to zero and assumed to stay so throughout app lifetime.
firstFreeCB = (volatile DMAControlBlock *)dmaCb.virtualAddr;
dmaSourceBuffer = AllocateUncachedGpuMemory(SHARED_MEMORY_SIZE*2, "DMA source data");
dmaSourceEnd = (volatile uint8_t *)dmaSourceBuffer.virtualAddr;
dmaConstantData = AllocateUncachedGpuMemory(2*sizeof(uint32_t), "DMA constant data");
uint32_t *constantData = (uint32_t *)dmaConstantData.virtualAddr;
constantData[0] = BCM2835_SPI0_CS_DMAEN; // constantData[0] is for disableTransferActive task
constantData[1] = BCM2835_DMA_CS_ACTIVE | BCM2835_DMA_CS_END; // constantData[1] is for startDMATxChannel task
#endif
LOG("DMA hardware register file is at ptr: %p, using DMA TX channel: %d and DMA RX channel: %d", dma0, dmaTxChannel, dmaRxChannel);
if (!dma0) FATAL_ERROR("Failed to map DMA!");
dmaTx = GetDMAChannel(dmaTxChannel);
dmaRx = GetDMAChannel(dmaRxChannel);
LOG("DMA hardware TX channel register file is at ptr: %p, DMA RX channel register file is at ptr: %p", dmaTx, dmaRx);
int dmaTxPeripheralMap = (dmaTx->cb.ti & BCM2835_DMA_TI_PERMAP_MASK) >> BCM2835_DMA_TI_PERMAP_SHIFT;
if (dmaTxPeripheralMap != 0 && dmaTxPeripheralMap != BCM2835_DMA_TI_PERMAP_SPI_TX)
{
DumpDMAPeripheralMap();
LOG("DMA TX channel %d was assigned another peripheral map %d!", dmaTxChannel, dmaTxPeripheralMap);
FATAL_ERROR("DMA TX channel was assigned another peripheral map!");
}
if (dmaTx->cbAddr != 0 && (dmaTx->cs & BCM2835_DMA_CS_ACTIVE))
FATAL_ERROR("DMA TX channel was in use!");
int dmaRxPeripheralMap = (dmaRx->cb.ti & BCM2835_DMA_TI_PERMAP_MASK) >> BCM2835_DMA_TI_PERMAP_SHIFT;
if (dmaRxPeripheralMap != 0 && dmaRxPeripheralMap != BCM2835_DMA_TI_PERMAP_SPI_RX)
{
LOG("DMA RX channel %d was assigned another peripheral map %d!", dmaRxChannel, dmaRxPeripheralMap);
DumpDMAPeripheralMap();
FATAL_ERROR("DMA RX channel was assigned another peripheral map!");
}
if (dmaRx->cbAddr != 0 && (dmaRx->cs & BCM2835_DMA_CS_ACTIVE))
FATAL_ERROR("DMA RX channel was in use!");
if ((dmaRx->cb.debug & BCM2835_DMA_DEBUG_LITE) != 0)
FATAL_ERROR("DMA RX channel cannot be a lite channel, because to get best performance we want to use BCM2835_DMA_TI_DEST_IGNORE DMA operation mode that lite DMA channels do not have. (Try using DMA RX channel value < 7)");
LOG("Resetting DMA channels for use");
ResetDMAChannels();
// TODO: Set up IRQ
LOG("DMA all set up");
return 0;
}
// Debugging functions to introspect SPI and DMA hardware registers:
void DumpCS(uint32_t reg)
{
PRINT_FLAG(BCM2835_DMA_CS_RESET);
PRINT_FLAG(BCM2835_DMA_CS_ABORT);
PRINT_FLAG(BCM2835_DMA_CS_DISDEBUG);
PRINT_FLAG(BCM2835_DMA_CS_WAIT_FOR_OUTSTANDING_WRITES);
PRINT_FLAG(BCM2835_DMA_CS_PANIC_PRIORITY);
PRINT_FLAG(BCM2835_DMA_CS_PRIORITY);
PRINT_FLAG(BCM2835_DMA_CS_ERROR);
PRINT_FLAG(BCM2835_DMA_CS_WAITING_FOR_OUTSTANDING_WRITES);
PRINT_FLAG(BCM2835_DMA_CS_DREQ_STOPS_DMA);
PRINT_FLAG(BCM2835_DMA_CS_PAUSED);
PRINT_FLAG(BCM2835_DMA_CS_DREQ);
PRINT_FLAG(BCM2835_DMA_CS_INT);
PRINT_FLAG(BCM2835_DMA_CS_END);
PRINT_FLAG(BCM2835_DMA_CS_ACTIVE);
}
void DumpDebug(uint32_t reg)
{
PRINT_FLAG(BCM2835_DMA_DEBUG_LITE);
PRINT_FLAG(BCM2835_DMA_DEBUG_VERSION);
PRINT_FLAG(BCM2835_DMA_DEBUG_DMA_STATE);
PRINT_FLAG(BCM2835_DMA_DEBUG_DMA_ID);
PRINT_FLAG(BCM2835_DMA_DEBUG_DMA_OUTSTANDING_WRITES);
PRINT_FLAG(BCM2835_DMA_DEBUG_DMA_READ_ERROR);
PRINT_FLAG(BCM2835_DMA_DEBUG_DMA_FIFO_ERROR);
PRINT_FLAG(BCM2835_DMA_DEBUG_READ_LAST_NOT_SET_ERROR);
}
void DumpTI(uint32_t reg)
{
PRINT_FLAG(BCM2835_DMA_TI_NO_WIDE_BURSTS);
PRINT_FLAG(BCM2835_DMA_TI_WAITS);
#define BCM2835_DMA_TI_PERMAP_MASK_SHIFT 16
PRINT_FLAG(BCM2835_DMA_TI_PERMAP_MASK);
// PRINT_FLAG(BCM2835_DMA_TI_BURST_LENGTH);
PRINT_FLAG(BCM2835_DMA_TI_SRC_IGNORE);
PRINT_FLAG(BCM2835_DMA_TI_SRC_DREQ);
PRINT_FLAG(BCM2835_DMA_TI_SRC_WIDTH);
PRINT_FLAG(BCM2835_DMA_TI_SRC_INC);
PRINT_FLAG(BCM2835_DMA_TI_DEST_IGNORE);
PRINT_FLAG(BCM2835_DMA_TI_DEST_DREQ);
PRINT_FLAG(BCM2835_DMA_TI_DEST_WIDTH);
PRINT_FLAG(BCM2835_DMA_TI_DEST_INC);
PRINT_FLAG(BCM2835_DMA_TI_WAIT_RESP);
PRINT_FLAG(BCM2835_DMA_TI_TDMODE);
PRINT_FLAG(BCM2835_DMA_TI_INTEN);
}
#define DMA_DMA0_CB_PHYS_ADDRESS 0x7E007000
#define DMA_SPI_CS_PHYS_ADDRESS 0x7E204000
#define DMA_SPI_FIFO_PHYS_ADDRESS 0x7E204004
#define DMA_SPI_DLEN_PHYS_ADDRESS 0x7E20400C
#define DMA_GPIO_SET_PHYS_ADDRESS 0x7E20001C
#define DMA_GPIO_CLEAR_PHYS_ADDRESS 0x7E200028
void DumpDMAState()
{
printf("---SPI:---\n");
DumpSPICS(spi->cs);
printf("---DMATX CS:---\n");
DumpCS(dmaTx->cs);
printf("---DMATX TI:---\n");
DumpTI(dmaTx->cb.ti);
printf("---DMATX DEBUG:---\n");
DumpDebug(dmaTx->cb.debug);
printf("****** DMATX cbAddr: %p\n", dmaTx->cbAddr);
printf("---DMARX CS:---\n");
DumpCS(dmaRx->cs);
printf("---DMARX TI:---\n");
DumpTI(dmaRx->cb.ti);
printf("---DMARX DEBUG:---\n");
DumpDebug(dmaRx->cb.debug);
printf("****** DMARX cbAddr: %p\n", dmaRx->cbAddr);
}
extern volatile bool programRunning;
void WaitForDMAFinished()
{
int spins = 0;
uint64_t t0 = tick();
while((dmaTx->cs & BCM2835_DMA_CS_ACTIVE) && programRunning)
{
usleep(100);
if (tick() - t0 > 2000000)
{
printf("TX stalled\n");
DumpDMAState();
exit(1);
}
}
spins = 0;
t0 = tick();
while((dmaRx->cs & BCM2835_DMA_CS_ACTIVE) && programRunning)
{
usleep(100);
if (tick() - t0 > 2000000)
{
printf("RX stalled\n");
DumpDMAState();
exit(1);
}
}
dmaSendTail = 0;
dmaRecvTail = 0;
}
#ifdef ALL_TASKS_SHOULD_DMA
// This function does a memcpy from one source buffer to two destination buffers simultaneously.
// It saves a lot of time on ARMv6 by avoiding to have to do two separate memory copies, because the ARMv6 L1 cache is so tiny (4K) that it cannot fit a whole framebuffer
// in memory at a time. Streaming through it only once instead of twice helps memory bandwidth immensely, this is profiled to be ~4x faster than a pair of memcpys or a simple CPU loop.
// In addition, this does a little endian->big endian conversion when copying data out to dstDma.
static void memcpy_to_dma_and_prev_framebuffer(uint16_t *dstDma, uint16_t **dstPrevFramebuffer, uint16_t **srcFramebuffer, int numBytes, int *taskStartX, int width, int stride)
{
int strideEnd = stride - width*2;
int xLeft = width-*taskStartX;
uint16_t *Src = *srcFramebuffer;
uint16_t *Dst1 = *dstPrevFramebuffer;
// TODO: Do the loops in aligned order with unaligned head and tail separate, and ensure that dstDma, dstPrevFramebuffer and srcFramebuffer are in same alignment phase.
asm volatile(
"start_%=:\n"
"ldrd r0, r1, [%[srcFramebuffer]], #8\n"
"pld [%[srcFramebuffer], #248]\n"
"strd r0, r1, [%[dstPrevFramebuffer]], #8\n"
"rev16 r0, r0\n"
"rev16 r1, r1\n"
"strd r0, r1, [%[dstDma]], #8\n"
"ldrd r0, r1, [%[srcFramebuffer]], #8\n"
"strd r0, r1, [%[dstPrevFramebuffer]], #8\n"
"rev16 r0, r0\n"
"rev16 r1, r1\n"
"strd r0, r1, [%[dstDma]], #8\n"
"ldrd r0, r1, [%[srcFramebuffer]], #8\n"
"strd r0, r1, [%[dstPrevFramebuffer]], #8\n"
"rev16 r0, r0\n"
"rev16 r1, r1\n"
"strd r0, r1, [%[dstDma]], #8\n"
"ldrd r0, r1, [%[srcFramebuffer]], #8\n"
"strd r0, r1, [%[dstPrevFramebuffer]], #8\n"
"rev16 r0, r0\n"
"rev16 r1, r1\n"
"strd r0, r1, [%[dstDma]], #8\n"
"subs %[xLeft], %[xLeft], #16\n"
"addls %[xLeft], %[xLeft], %[width]\n"
"addls %[dstPrevFramebuffer], %[dstPrevFramebuffer], %[strideEnd]\n"
"addls %[srcFramebuffer], %[srcFramebuffer], %[strideEnd]\n"
"subs %[numBytes], %[numBytes], #32\n"
"bhi start_%=\n"
: [dstDma]"+r"(dstDma), [dstPrevFramebuffer]"+r"(Dst1), [srcFramebuffer]"+r"(Src), [xLeft]"+r"(xLeft), [numBytes]"+r"(numBytes)
: [strideEnd]"r"(strideEnd), [width]"r"(width)
: "r0", "r1", "memory", "cc"
);
*taskStartX = width - xLeft;
*srcFramebuffer = Src;
*dstPrevFramebuffer = Dst1;
}
static void memcpy_to_dma_and_prev_framebuffer_in_c(uint16_t *dstDma, uint16_t **dstPrevFramebuffer, uint16_t **srcFramebuffer, int numBytes, int *taskStartX, int width, int stride)
{
static bool performanceWarningPrinted = false;
if (!performanceWarningPrinted)
{
printf("Performance warning: using slow memcpy_to_dma_and_prev_framebuffer_in_c() function. Check conditions in display.h that enable OFFLOAD_PIXEL_COPY_TO_DMA_CPP and configure to use that instead.\n");
performanceWarningPrinted = true;
}
int numPixels = numBytes>>1;
int endStridePixels = (stride>>1) - width;
uint16_t *prevData = *dstPrevFramebuffer;
uint16_t *data = *srcFramebuffer;
for(int i = 0; i < numPixels; ++i)
{
*prevData++ = *data;
dstDma[i] = __builtin_bswap16(*data++);
if (++*taskStartX >= width)
{
*taskStartX = 0;
data += endStridePixels;
prevData += endStridePixels;
}
}
*srcFramebuffer = data;
*dstPrevFramebuffer = prevData;
}
#if defined(ALL_TASKS_SHOULD_DMA) && defined(SPI_3WIRE_PROTOCOL)
// Bug: there is something about the chained DMA transfer mechanism that makes write window coordinate set commands not go through properly
// on 3-wire displays, but do not yet know what. (Remove this #error statement to debug)
#error ALL_TASKS_SHOULD_DMA and SPI_3WIRE_PROTOCOL are currently not mutually compatible!
#endif
#if defined(OFFLOAD_PIXEL_COPY_TO_DMA_CPP) && defined(SPI_3WIRE_PROTOCOL)
// We would have to convert 8-bit tasks to 9-bit tasks immediately after offloaded memcpy has been done below to implement this.
#error OFFLOAD_PIXEL_COPY_TO_DMA_CPP and SPI_3WIRE_PROTOCOL are not mutually compatible!
#endif
void SPIDMATransfer(SPITask *task)
{
// There is a limit to how many bytes can be sent in one DMA-based SPI task, so if the task
// is larger than this, we'll split the send into multiple individual DMA SPI transfers
// and chain them together. This should be a multiple of 32 bytes to keep tasks cache aligned on ARMv6.
#define MAX_DMA_SPI_TASK_SIZE 65504
const int numDMASendTasks = (task->PayloadSize() + MAX_DMA_SPI_TASK_SIZE - 1) / MAX_DMA_SPI_TASK_SIZE;
volatile uint32_t *dmaData = (volatile uint32_t *)GrabFreeDMASourceBytes(4*(numDMASendTasks-1)+4*numDMASendTasks+task->PayloadSize());
volatile uint32_t *setDMATxAddressData = dmaData;
volatile uint32_t *txData = dmaData+numDMASendTasks-1;
volatile DMAControlBlock *cb = GrabFreeCBs(numDMASendTasks*5-3);
volatile DMAControlBlock *rxTail = 0;
volatile DMAControlBlock *tx0 = &cb[0];
volatile DMAControlBlock *rx0 = &cb[1];
#ifdef OFFLOAD_PIXEL_COPY_TO_DMA_CPP
uint8_t *data = task->fb;
uint8_t *prevData = task->prevFb;
const bool taskAndFramebufferSizesCompatibleWithTightMemcpy = (task->PayloadSize() % 32 == 0) && (task->width % 16 == 0);
#else
uint8_t *data = task->PayloadStart();
#endif
int bytesLeft = task->PayloadSize();
int taskStartX = 0;
while(bytesLeft > 0)
{
int sendSize = MIN(bytesLeft, MAX_DMA_SPI_TASK_SIZE);
bytesLeft -= sendSize;
volatile DMAControlBlock *tx = cb++;
txData[0] = BCM2835_SPI0_CS_TA | DISPLAY_SPI_DRIVE_SETTINGS | (sendSize << 16); // The first four bytes written to the SPI data register control the DLEN and CS,CPOL,CPHA settings.
// This is really sad: we must do a memcpy to prepare for DMA controller to be able to do a memcpy. The reason for this is that the DMA source memory area must be in cache bypassing
// region of memory, which the SPI source ring buffer is not. It could be allocated to be so however, but bypassing the caches on the SPI ring buffer would cause a massive -51.5%
// profiled overall performance drop (tested on Pi3B+ and Tontec 3.5" 480x320 display on gpu test pattern, see branch non_intermediate_memcpy_for_dma). Therefore just keep doing
// this memcpy() to prepare for DMA to do its memcpy(), as it is faster overall. (If there was a way to map same physical memory to virtual address space twice, once cached, and
// another time uncached, and have writes bypass the cache and only write combine, but have reads follow the cache, then it might work without a perf hit, but not at all sure if
// that would be technically possible)
uint16_t *txPtr = (uint16_t*)(txData+1);
// If task->prevFb is present, the DMA backend is responsible for streaming pixel data from current framebuffer to old framebuffer, and the DMA task buffer.
// If not present, then that preparation has been already done by the caller.
#ifdef OFFLOAD_PIXEL_COPY_TO_DMA_CPP
if (prevData)
{
// For 2D pixel data, do a "everything in one pass"
if (taskAndFramebufferSizesCompatibleWithTightMemcpy)
memcpy_to_dma_and_prev_framebuffer((uint16_t*)txPtr, (uint16_t**)&prevData, (uint16_t**)&data, sendSize, &taskStartX, task->width, gpuFramebufferScanlineStrideBytes);
else
memcpy_to_dma_and_prev_framebuffer_in_c((uint16_t*)txPtr, (uint16_t**)&prevData, (uint16_t**)&data, sendSize, &taskStartX, task->width, gpuFramebufferScanlineStrideBytes);
}
else
#endif
{
memcpy(txPtr, data, sendSize);
data += sendSize;
}
tx->ti = BCM2835_DMA_TI_PERMAP(BCM2835_DMA_TI_PERMAP_SPI_TX) | BCM2835_DMA_TI_DEST_DREQ | BCM2835_DMA_TI_SRC_INC | BCM2835_DMA_TI_WAIT_RESP;
tx->src = VIRT_TO_BUS(dmaSourceBuffer, txData);
tx->dst = DMA_SPI_FIFO_PHYS_ADDRESS; // Write out to the SPI peripheral
tx->len = 4+sendSize;
tx->next = 0;
txData += 1+sendSize/4;
volatile DMAControlBlock *rx = cb++;
rx->ti = BCM2835_DMA_TI_PERMAP(BCM2835_DMA_TI_PERMAP_SPI_RX) | BCM2835_DMA_TI_SRC_DREQ | BCM2835_DMA_TI_DEST_IGNORE;
rx->src = DMA_SPI_FIFO_PHYS_ADDRESS;
rx->dst = 0;
rx->len = sendSize;
rx->next = 0;
if (rxTail)
{
volatile DMAControlBlock *setDMATxAddress = cb++;
volatile DMAControlBlock *disableTransferActive = cb++;
volatile DMAControlBlock *startDMATxChannel = cb++;
rxTail->next = VIRT_TO_BUS(dmaCb, setDMATxAddress);
setDMATxAddressData[0] = VIRT_TO_BUS(dmaCb, tx);
setDMATxAddress->ti = BCM2835_DMA_TI_SRC_INC | BCM2835_DMA_TI_DEST_INC | BCM2835_DMA_TI_WAIT_RESP;
setDMATxAddress->src = VIRT_TO_BUS(dmaSourceBuffer, setDMATxAddressData);
setDMATxAddress->dst = DMA_DMA0_CB_PHYS_ADDRESS + dmaTxChannel*0x100 + 4;
setDMATxAddress->len = 4;
setDMATxAddress->next = VIRT_TO_BUS(dmaCb, disableTransferActive);
++setDMATxAddressData;
disableTransferActive->ti = BCM2835_DMA_TI_SRC_INC | BCM2835_DMA_TI_DEST_INC | BCM2835_DMA_TI_WAIT_RESP;
disableTransferActive->src = dmaConstantData.busAddress;
disableTransferActive->dst = DMA_SPI_CS_PHYS_ADDRESS;
disableTransferActive->len = 4;
disableTransferActive->next = VIRT_TO_BUS(dmaCb, startDMATxChannel);
startDMATxChannel->ti = BCM2835_DMA_TI_SRC_INC | BCM2835_DMA_TI_DEST_INC | BCM2835_DMA_TI_WAIT_RESP;
startDMATxChannel->src = dmaConstantData.busAddress+4;
startDMATxChannel->dst = DMA_DMA0_CB_PHYS_ADDRESS + dmaTxChannel*0x100;
startDMATxChannel->len = 4;
startDMATxChannel->next = VIRT_TO_BUS(dmaCb, rx);
}
rxTail = rx;
}
static uint64_t taskStartTime = 0;
static int pendingTaskBytes = 1;
double pendingTaskUSecs = pendingTaskBytes * spiUsecsPerByte;
pendingTaskUSecs -= tick() - taskStartTime;
if (pendingTaskUSecs > 70)
usleep(pendingTaskUSecs-70);
uint64_t dmaTaskStart = tick();
CheckSPIDMAChannelsNotStolen();
while((dmaTx->cs & BCM2835_DMA_CS_ACTIVE) && programRunning)
{
usleep(250);
CheckSPIDMAChannelsNotStolen();
if (tick() - dmaTaskStart > 5000000)
{
DumpDMAState();
FATAL_ERROR("DMA TX channel has stalled!");
}
}
while((dmaRx->cs & BCM2835_DMA_CS_ACTIVE) && programRunning)
{
usleep(250);
CheckSPIDMAChannelsNotStolen();
if (tick() - dmaTaskStart > 5000000)
{
DumpDMAState();
FATAL_ERROR("DMA RX channel has stalled!");
}
}
if (!programRunning) return;
pendingTaskBytes = task->PayloadSize();
// First send the SPI command byte in Polled SPI mode
spi->cs = BCM2835_SPI0_CS_TA | BCM2835_SPI0_CS_CLEAR | DISPLAY_SPI_DRIVE_SETTINGS;
#ifndef SPI_3WIRE_PROTOCOL
CLEAR_GPIO(GPIO_TFT_DATA_CONTROL);
#ifdef DISPLAY_SPI_BUS_IS_16BITS_WIDE
spi->fifo = 0;
spi->fifo = task->cmd;
while(!(spi->cs & (BCM2835_SPI0_CS_DONE))) /*nop*/;
// spi->fifo; // Currently no need to flush these, the clear below clears the rx queue.
// spi->fifo;
#else
spi->fifo = task->cmd;
while(!(spi->cs & (BCM2835_SPI0_CS_RXD|BCM2835_SPI0_CS_DONE))) /*nop*/;
// spi->fifo; // Currently no need to flush this, the clear below clears the rx queue.
#endif
SET_GPIO(GPIO_TFT_DATA_CONTROL);
#endif
spi->cs = BCM2835_SPI0_CS_DMAEN | BCM2835_SPI0_CS_CLEAR | DISPLAY_SPI_DRIVE_SETTINGS;
dmaTx->cbAddr = VIRT_TO_BUS(dmaCb, tx0);
dmaRx->cbAddr = VIRT_TO_BUS(dmaCb, rx0);
__sync_synchronize();
dmaTx->cs = BCM2835_DMA_CS_ACTIVE | BCM2835_DMA_CS_END;
dmaRx->cs = BCM2835_DMA_CS_ACTIVE | BCM2835_DMA_CS_END;
taskStartTime = tick();
}
#else
void SPIDMATransfer(SPITask *task)
{
// Transition the SPI peripheral to enable the use of DMA
spi->cs = BCM2835_SPI0_CS_DMAEN | BCM2835_SPI0_CS_CLEAR | DISPLAY_SPI_DRIVE_SETTINGS;
uint32_t *headerAddr = task->DmaSpiHeaderAddress();
*headerAddr = BCM2835_SPI0_CS_TA | DISPLAY_SPI_DRIVE_SETTINGS | (task->PayloadSize() << 16); // The first four bytes written to the SPI data register control the DLEN and CS,CPOL,CPHA settings.
// TODO: Ideally we would be able to directly perform the DMA from the SPI ring buffer from 'task' pointer. However
// that pointer is shared to userland, and it is proving troublesome to make it both userland-writable as well as cache-bypassing DMA coherent.
// Therefore these two memory areas are separate for now, and we memcpy() from SPI ring buffer to an intermediate 'dmaSourceMemory' memory area to perform
// the DMA transfer. Is there a way to avoid this intermediate buffer? That would improve performance a bit.
memcpy(dmaSourceBuffer.virtualAddr, headerAddr, task->PayloadSize() + 4);
volatile DMAControlBlock *cb = (volatile DMAControlBlock *)dmaCb.virtualAddr;
volatile DMAControlBlock *txcb = &cb[0];
txcb->ti = BCM2835_DMA_TI_PERMAP(BCM2835_DMA_TI_PERMAP_SPI_TX) | BCM2835_DMA_TI_DEST_DREQ | BCM2835_DMA_TI_SRC_INC | BCM2835_DMA_TI_WAIT_RESP;
txcb->src = dmaSourceBuffer.busAddress;
txcb->dst = DMA_SPI_FIFO_PHYS_ADDRESS; // Write out to the SPI peripheral
txcb->len = task->PayloadSize() + 4;
txcb->stride = 0;
txcb->next = 0;
txcb->debug = 0;
txcb->reserved = 0;
dmaTx->cbAddr = dmaCb.busAddress;
volatile DMAControlBlock *rxcb = &cb[1];
rxcb->ti = BCM2835_DMA_TI_PERMAP(BCM2835_DMA_TI_PERMAP_SPI_RX) | BCM2835_DMA_TI_SRC_DREQ | BCM2835_DMA_TI_DEST_IGNORE;
rxcb->src = DMA_SPI_FIFO_PHYS_ADDRESS;
rxcb->dst = 0;
rxcb->len = task->PayloadSize();
rxcb->stride = 0;
rxcb->next = 0;
rxcb->debug = 0;
rxcb->reserved = 0;
dmaRx->cbAddr = dmaCb.busAddress + sizeof(DMAControlBlock);
__sync_synchronize();
dmaTx->cs = BCM2835_DMA_CS_ACTIVE;
dmaRx->cs = BCM2835_DMA_CS_ACTIVE;
__sync_synchronize();
double pendingTaskUSecs = task->PayloadSize() * spiUsecsPerByte;
if (pendingTaskUSecs > 70)
usleep(pendingTaskUSecs-70);
uint64_t dmaTaskStart = tick();
CheckSPIDMAChannelsNotStolen();
while((dmaTx->cs & BCM2835_DMA_CS_ACTIVE))
{
CheckSPIDMAChannelsNotStolen();
if (tick() - dmaTaskStart > 5000000)
FATAL_ERROR("DMA TX channel has stalled!");
}
while((dmaRx->cs & BCM2835_DMA_CS_ACTIVE))
{
CheckSPIDMAChannelsNotStolen();
if (tick() - dmaTaskStart > 5000000)
FATAL_ERROR("DMA RX channel has stalled!");
}
__sync_synchronize();
spi->cs = BCM2835_SPI0_CS_TA | BCM2835_SPI0_CS_CLEAR | DISPLAY_SPI_DRIVE_SETTINGS;
__sync_synchronize();
}
#endif
void DeinitDMA(void)
{
WaitForDMAFinished();
ResetDMAChannels();
FreeUncachedGpuMemory(dmaSourceBuffer);
FreeUncachedGpuMemory(dmaCb);
FreeUncachedGpuMemory(dmaConstantData);
if (dmaTxChannel != -1)
{
FreeDMAChannel(dmaTxChannel);
dmaTxChannel = -1;
}
if (dmaRxChannel != -1)
{
FreeDMAChannel(dmaRxChannel);
dmaRxChannel = -1;
}
}
#endif // ~USE_DMA_TRANSFERS

141
usr/fbcp-ili9341/dma.h Normal file
View File

@ -0,0 +1,141 @@
#pragma once
#ifdef USE_DMA_TRANSFERS
#define BCM2835_DMA0_OFFSET 0x7000 // DMA channels 0-14 live at 0x7E007000, offset of 0x7000 of BCM2835 peripherals base address
#define BCM2835_DMAENABLE_REGISTER_OFFSET 0xff0
typedef struct __attribute__ ((packed, aligned(4))) DMAControlBlock
{
uint32_t ti;
uint32_t src;
uint32_t dst;
uint32_t len;
uint32_t stride;
uint32_t next;
uint32_t debug;
uint32_t reserved;
} DMAControlBlock;
typedef struct __attribute__ ((packed, aligned(4))) DMAChannelRegisterFile
{
volatile uint32_t cs;
volatile uint32_t cbAddr;
volatile DMAControlBlock cb;
volatile uint8_t padding[216]; // Pad this structure to 256 bytes in size total for easy indexing into DMA channels.
} DMAChannelRegisterFile;
extern int dmaTxChannel, dmaTxIrq;
extern volatile DMAChannelRegisterFile *dmaTx; // DMA channel allocated to sending to SPI
extern int dmaRxChannel, dmaRxIrq;
extern volatile DMAChannelRegisterFile *dmaRx; // DMA channel allocated to reading from SPI
volatile DMAChannelRegisterFile *GetDMAChannel(int channelNumber);
#define BCM2835_DMA_CS_RESET (1<<31)
#define BCM2835_DMA_CS_ABORT (1<<30)
#define BCM2835_DMA_CS_DISDEBUG (1<<29)
#define BCM2835_DMA_CS_WAIT_FOR_OUTSTANDING_WRITES (1<<28)
#define BCM2835_DMA_CS_PANIC_PRIORITY (0xF<<20)
#define BCM2835_DMA_CS_PRIORITY (0xF<<16)
#define BCM2835_DMA_CS_ERROR (1<<8)
#define BCM2835_DMA_CS_WAITING_FOR_OUTSTANDING_WRITES (1<<6)
#define BCM2835_DMA_CS_DREQ_STOPS_DMA (1<<5)
#define BCM2835_DMA_CS_PAUSED (1<<4)
#define BCM2835_DMA_CS_DREQ (1<<3)
#define BCM2835_DMA_CS_INT (1<<2)
#define BCM2835_DMA_CS_END (1<<1)
#define BCM2835_DMA_CS_ACTIVE (1<<0)
#define BCM2835_DMA_CS_SET_PANIC_PRIORITY(p) ((p) << 20)
#define BCM2835_DMA_CS_SET_PRIORITY(p) ((p) << 16)
#define BCM2835_DMA_CS_RESET_SHIFT 31
#define BCM2835_DMA_CS_ABORT_SHIFT 30
#define BCM2835_DMA_CS_DISDEBUG_SHIFT 29
#define BCM2835_DMA_CS_WAIT_FOR_OUTSTANDING_WRITES_SHIFT 28
#define BCM2835_DMA_CS_PANIC_PRIORITY_SHIFT 20
#define BCM2835_DMA_CS_PRIORITY_SHIFT 16
#define BCM2835_DMA_CS_ERROR_SHIFT 8
#define BCM2835_DMA_CS_WAITING_FOR_OUTSTANDING_WRITES_SHIFT 6
#define BCM2835_DMA_CS_DREQ_STOPS_DMA_SHIFT 5
#define BCM2835_DMA_CS_PAUSED_SHIFT 4
#define BCM2835_DMA_CS_DREQ_SHIFT 3
#define BCM2835_DMA_CS_INT_SHIFT 2
#define BCM2835_DMA_CS_END_SHIFT 1
#define BCM2835_DMA_CS_ACTIVE_SHIFT 0
#define BCM2835_DMA_DEBUG_LITE (1<<28)
#define BCM2835_DMA_DEBUG_VERSION (7<<25)
#define BCM2835_DMA_DEBUG_DMA_STATE (0x1FF<<16)
#define BCM2835_DMA_DEBUG_DMA_ID (0xFF<<8)
#define BCM2835_DMA_DEBUG_DMA_OUTSTANDING_WRITES (0xF<<4)
#define BCM2835_DMA_DEBUG_DMA_READ_ERROR (1<<2)
#define BCM2835_DMA_DEBUG_DMA_FIFO_ERROR (1<<1)
#define BCM2835_DMA_DEBUG_READ_LAST_NOT_SET_ERROR (1<<0)
#define BCM2835_DMA_DEBUG_LITE_SHIFT 28
#define BCM2835_DMA_DEBUG_VERSION_SHIFT 25
#define BCM2835_DMA_DEBUG_DMA_STATE_SHIFT 16
#define BCM2835_DMA_DEBUG_DMA_ID_SHIFT 8
#define BCM2835_DMA_DEBUG_DMA_OUTSTANDING_WRITES_SHIFT 4
#define BCM2835_DMA_DEBUG_DMA_READ_ERROR_SHIFT 2
#define BCM2835_DMA_DEBUG_DMA_FIFO_ERROR_SHIFT 1
#define BCM2835_DMA_DEBUG_READ_LAST_NOT_SET_ERROR_SHIFT 0
#define BCM2835_DMA_TI_NO_WIDE_BURSTS (1<<26)
#define BCM2835_DMA_TI_WAITS (0x1F<<21)
#define BCM2835_DMA_TI_PERMAP(x) ((x)<<16)
#define BCM2835_DMA_TI_PERMAP_MASK (0x1F<<16)
#define BCM2835_DMA_TI_PERMAP_SPI_TX 6
#define BCM2835_DMA_TI_PERMAP_SPI_RX 7
#define BCM2835_DMA_TI_BURST_LENGTH(x) ((x)<<12)
#define BCM2835_DMA_TI_SRC_IGNORE (1<<11)
#define BCM2835_DMA_TI_SRC_DREQ (1<<10)
#define BCM2835_DMA_TI_SRC_WIDTH (1<<9)
#define BCM2835_DMA_TI_SRC_INC (1<<8)
#define BCM2835_DMA_TI_DEST_IGNORE (1<<7)
#define BCM2835_DMA_TI_DEST_DREQ (1<<6)
#define BCM2835_DMA_TI_DEST_WIDTH (1<<5)
#define BCM2835_DMA_TI_DEST_INC (1<<4)
#define BCM2835_DMA_TI_WAIT_RESP (1<<3)
#define BCM2835_DMA_TI_TDMODE (1<<1)
#define BCM2835_DMA_TI_INTEN (1<<0)
#define BCM2835_DMA_TI_NO_WIDE_BURSTS_SHIFT 26
#define BCM2835_DMA_TI_WAITS_SHIFT 21
#define BCM2835_DMA_TI_PERMAP_SHIFT 16
#define BCM2835_DMA_TI_BURST_LENGTH_SHIFT 12
#define BCM2835_DMA_TI_SRC_IGNORE_SHIFT 11
#define BCM2835_DMA_TI_SRC_DREQ_SHIFT 10
#define BCM2835_DMA_TI_SRC_WIDTH_SHIFT 9
#define BCM2835_DMA_TI_SRC_INC_SHIFT 8
#define BCM2835_DMA_TI_DEST_IGNORE_SHIFT 7
#define BCM2835_DMA_TI_DEST_DREQ_SHIFT 6
#define BCM2835_DMA_TI_DEST_WIDTH_SHIFT 5
#define BCM2835_DMA_TI_DEST_INC_SHIFT 4
#define BCM2835_DMA_TI_WAIT_RESP_SHIFT 3
#define BCM2835_DMA_TI_TDMODE_SHIFT 1
#define BCM2835_DMA_TI_INTEN_SHIFT 0
// Spec sheet says there's 16 channels, but last channel is unusable:
// https://www.raspberrypi.org/forums/viewtopic.php?t=170957
// So just behave as if there are only 15 channels
#define BCM2835_NUM_DMA_CHANNELS 15
void WaitForDMAFinished(void);
// Reserves and enables a DMA channel for SPI transfers.
int InitDMA(void);
void DeinitDMA(void);
typedef struct SPITask SPITask;
void SPIDMATransfer(SPITask *task);
extern int dmaTxChannel;
extern int dmaRxChannel;
extern uint64_t totalGpuMemoryUsed;
#endif

Binary file not shown.

After

Width:  |  Height:  |  Size: 299 KiB

View File

@ -0,0 +1,579 @@
#include <fcntl.h>
#include <linux/fb.h>
#include <linux/futex.h>
#include <linux/spi/spidev.h>
#include <memory.h>
#include <stdio.h>
#include <stdlib.h>
#include <endian.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <syslog.h>
#include <time.h>
#include <unistd.h>
#include <inttypes.h>
#include <math.h>
#include <signal.h>
#include "config.h"
#include "text.h"
#include "spi.h"
#include "gpu.h"
#include "statistics.h"
#include "tick.h"
#include "display.h"
#include "util.h"
#include "mailbox.h"
#include "diff.h"
#include "mem_alloc.h"
#include "keyboard.h"
#include "low_battery.h"
int CountNumChangedPixels(uint16_t *framebuffer, uint16_t *prevFramebuffer)
{
int changedPixels = 0;
for(int y = 0; y < gpuFrameHeight; ++y)
{
for(int x = 0; x < gpuFrameWidth; ++x)
if (framebuffer[x] != prevFramebuffer[x])
++changedPixels;
framebuffer += gpuFramebufferScanlineStrideBytes >> 1;
prevFramebuffer += gpuFramebufferScanlineStrideBytes >> 1;
}
return changedPixels;
}
uint64_t displayContentsLastChanged = 0;
bool displayOff = false;
volatile bool programRunning = true;
const char *SignalToString(int signal)
{
if (signal == SIGINT) return "SIGINT";
if (signal == SIGQUIT) return "SIGQUIT";
if (signal == SIGUSR1) return "SIGUSR1";
if (signal == SIGUSR2) return "SIGUSR2";
if (signal == SIGTERM) return "SIGTERM";
return "?";
}
void MarkProgramQuitting()
{
programRunning = false;
}
void ProgramInterruptHandler(int signal)
{
printf("Signal %s(%d) received, quitting\n", SignalToString(signal), signal);
static int quitHandlerCalled = 0;
if (++quitHandlerCalled >= 5)
{
printf("Ctrl-C handler invoked five times, looks like fbcp-ili9341 is not gracefully quitting - performing a forcible shutdown!\n");
exit(1);
}
MarkProgramQuitting();
__sync_synchronize();
// Wake the SPI thread if it was sleeping so that it can gracefully quit
if (spiTaskMemory)
{
__atomic_fetch_add(&spiTaskMemory->queueHead, 1, __ATOMIC_SEQ_CST);
__atomic_fetch_add(&spiTaskMemory->queueTail, 1, __ATOMIC_SEQ_CST);
syscall(SYS_futex, &spiTaskMemory->queueTail, FUTEX_WAKE, 1, 0, 0, 0); // Wake the SPI thread if it was sleeping to get new tasks
}
// Wake the main thread if it was sleeping for a new frame so that it can gracefully quit
__atomic_fetch_add(&numNewGpuFrames, 1, __ATOMIC_SEQ_CST);
syscall(SYS_futex, &numNewGpuFrames, FUTEX_WAKE, 1, 0, 0, 0);
}
int main()
{
signal(SIGINT, ProgramInterruptHandler);
signal(SIGQUIT, ProgramInterruptHandler);
signal(SIGUSR1, ProgramInterruptHandler);
signal(SIGUSR2, ProgramInterruptHandler);
signal(SIGTERM, ProgramInterruptHandler);
#ifdef RUN_WITH_REALTIME_THREAD_PRIORITY
SetRealtimeThreadPriority();
#endif
OpenMailbox();
InitSPI();
displayContentsLastChanged = tick();
displayOff = false;
InitLowBatterySystem();
// Track current SPI display controller write X and Y cursors.
int spiX = -1;
int spiY = -1;
int spiEndX = DISPLAY_WIDTH;
InitGPU();
spans = (Span*)Malloc((gpuFrameWidth * gpuFrameHeight / 2) * sizeof(Span), "main() task spans");
int size = gpuFramebufferSizeBytes;
#ifdef USE_GPU_VSYNC
// BUG in vc_dispmanx_resource_read_data(!!): If one is capturing a small subrectangle of a large screen resource rectangle, the destination pointer
// is in vc_dispmanx_resource_read_data() incorrectly still taken to point to the top-left corner of the large screen resource, instead of the top-left
// corner of the subrectangle to capture. Therefore do dirty pointer arithmetic to adjust for this. To make this safe, videoCoreFramebuffer is allocated
// double its needed size so that this adjusted pointer does not reference outside allocated memory (if it did, vc_dispmanx_resource_read_data() was seen
// to randomly fail and then subsequently hang if called a second time)
size *= 2;
#endif
uint16_t *framebuffer[2] = { (uint16_t *)Malloc(size, "main() framebuffer0"), (uint16_t *)Malloc(gpuFramebufferSizeBytes, "main() framebuffer1") };
memset(framebuffer[0], 0, size); // Doublebuffer received GPU memory contents, first buffer contains current GPU memory,
memset(framebuffer[1], 0, gpuFramebufferSizeBytes); // second buffer contains whatever the display is currently showing. This allows diffing pixels between the two.
#ifdef USE_GPU_VSYNC
// Due to the above bug. In USE_GPU_VSYNC mode, we directly snapshot to framebuffer[0], so it has to be prepared specially to work around the
// dispmanx bug.
framebuffer[0] += (gpuFramebufferSizeBytes>>1);
#endif
uint32_t curFrameEnd = spiTaskMemory->queueTail;
uint32_t prevFrameEnd = spiTaskMemory->queueTail;
bool prevFrameWasInterlacedUpdate = false;
bool interlacedUpdate = false; // True if the previous update we did was an interlaced half field update.
int frameParity = 0; // For interlaced frame updates, this is either 0 or 1 to denote evens or odds.
OpenKeyboard();
printf("All initialized, now running main loop...\n");
while(programRunning)
{
prevFrameWasInterlacedUpdate = interlacedUpdate;
// If last update was interlaced, it means we still have half of the image pending to be updated. In such a case,
// sleep only until when we expect the next new frame of data to appear, and then continue independent of whether
// a new frame was produced or not - if not, then we will submit the rest of the unsubmitted fields. If yes, then
// the half fields of the new frame will be sent (or full, if the new frame has very little content)
if (prevFrameWasInterlacedUpdate)
{
#ifdef THROTTLE_INTERLACING
timespec timeout = {};
timeout.tv_nsec = 1000 * MIN(1000000, MAX(1, 750/*0.75ms extra sleep so we know we should likely sleep long enough to see the next frame*/ + PredictNextFrameArrivalTime() - tick()));
if (programRunning) syscall(SYS_futex, &numNewGpuFrames, FUTEX_WAIT, 0, &timeout, 0, 0); // Start sleeping until we get new tasks
#endif
// If THROTTLE_INTERLACING is not defined, we'll fall right through and immediately submit the rest of the remaining content on screen to attempt to minimize the visual
// observable effect of interlacing, although at the expense of smooth animation (falling through here causes jitter)
}
else
{
uint64_t waitStart = tick();
while(__atomic_load_n(&numNewGpuFrames, __ATOMIC_SEQ_CST) == 0)
{
#if defined(BACKLIGHT_CONTROL) && defined(TURN_DISPLAY_OFF_AFTER_USECS_OF_INACTIVITY)
if (!displayOff && tick() - waitStart > TURN_DISPLAY_OFF_AFTER_USECS_OF_INACTIVITY)
{
TurnDisplayOff();
displayOff = true;
}
if (!displayOff)
{
timespec timeout = {};
timeout.tv_sec = ((uint64_t)TURN_DISPLAY_OFF_AFTER_USECS_OF_INACTIVITY * 1000) / 1000000000;
timeout.tv_nsec = ((uint64_t)TURN_DISPLAY_OFF_AFTER_USECS_OF_INACTIVITY * 1000) % 1000000000;
if (programRunning) syscall(SYS_futex, &numNewGpuFrames, FUTEX_WAIT, 0, &timeout, 0, 0); // Sleep until the next frame arrives
}
else
#endif
if (programRunning) syscall(SYS_futex, &numNewGpuFrames, FUTEX_WAIT, 0, 0, 0, 0); // Sleep until the next frame arrives
}
}
bool spiThreadWasWorkingHardBefore = false;
// At all times keep at most two rendered frames in the SPI task queue pending to be displayed. Only proceed to submit a new frame
// once the older of those has been displayed.
bool once = true;
while ((spiTaskMemory->queueTail + SPI_QUEUE_SIZE - spiTaskMemory->queueHead) % SPI_QUEUE_SIZE > (spiTaskMemory->queueTail + SPI_QUEUE_SIZE - prevFrameEnd) % SPI_QUEUE_SIZE)
{
if (spiTaskMemory->spiBytesQueued > 10000)
spiThreadWasWorkingHardBefore = true; // SPI thread had too much work in queue atm (2 full frames)
// Peek at the SPI thread's workload and throttle a bit if it has got a lot of work still to do.
double usecsUntilSpiQueueEmpty = spiTaskMemory->spiBytesQueued*spiUsecsPerByte;
if (usecsUntilSpiQueueEmpty > 0)
{
uint32_t bytesInQueueBefore = spiTaskMemory->spiBytesQueued;
uint32_t sleepUsecs = (uint32_t)(usecsUntilSpiQueueEmpty*0.4);
#ifdef STATISTICS
uint64_t t0 = tick();
#endif
if (sleepUsecs > 1000) usleep(500);
#ifdef STATISTICS
uint64_t t1 = tick();
uint32_t bytesInQueueAfter = spiTaskMemory->spiBytesQueued;
bool starved = (spiTaskMemory->queueHead == spiTaskMemory->queueTail);
if (starved) spiThreadWasWorkingHardBefore = false;
/*
if (once && starved)
{
printf("Had %u bytes in queue, asked to sleep for %u usecs, got %u usecs sleep, afterwards %u bytes in queue. (got %.2f%% work done)%s\n",
bytesInQueueBefore, sleepUsecs, (uint32_t)(t1 - t0), bytesInQueueAfter, (bytesInQueueBefore-bytesInQueueAfter)*100.0/bytesInQueueBefore,
starved ? " SLEPT TOO LONG, SPI THREAD STARVED" : "");
once = false;
}
*/
#endif
}
}
int expiredFrames = 0;
uint64_t now = tick();
while(expiredFrames < frameTimeHistorySize && now - frameTimeHistory[expiredFrames].time >= FRAMERATE_HISTORY_LENGTH) ++expiredFrames;
if (expiredFrames > 0)
{
frameTimeHistorySize -= expiredFrames;
for(int i = 0; i < frameTimeHistorySize; ++i) frameTimeHistory[i] = frameTimeHistory[i+expiredFrames];
}
#ifdef STATISTICS
int expiredSkippedFrames = 0;
while(expiredSkippedFrames < frameSkipTimeHistorySize && now - frameSkipTimeHistory[expiredSkippedFrames] >= 1000000/*FRAMERATE_HISTORY_LENGTH*/) ++expiredSkippedFrames;
if (expiredSkippedFrames > 0)
{
frameSkipTimeHistorySize -= expiredSkippedFrames;
for(int i = 0; i < frameSkipTimeHistorySize; ++i) frameSkipTimeHistory[i] = frameSkipTimeHistory[i+expiredSkippedFrames];
}
#endif
int numNewFrames = __atomic_load_n(&numNewGpuFrames, __ATOMIC_SEQ_CST);
bool gotNewFramebuffer = (numNewFrames > 0);
bool framebufferHasNewChangedPixels = true;
uint64_t frameObtainedTime;
if (gotNewFramebuffer)
{
#ifdef USE_GPU_VSYNC
// TODO: Hardcoded vsync interval to 60 for now. Would be better to compute yet another histogram of the vsync arrival times, if vsync is not set to 60hz.
// N.B. copying directly to videoCoreFramebuffer[1] that may be directly accessed by the main thread, so this could
// produce a visible tear between two adjacent frames, but since we don't have vsync anyways, currently not caring too much.
frameObtainedTime = tick();
uint64_t framePollingStartTime = frameObtainedTime;
#if defined(SAVE_BATTERY_BY_PREDICTING_FRAME_ARRIVAL_TIMES) || defined(SAVE_BATTERY_BY_SLEEPING_WHEN_IDLE)
uint64_t nextFrameArrivalTime = PredictNextFrameArrivalTime();
int64_t timeToSleep = nextFrameArrivalTime - tick();
if (timeToSleep > 0)
usleep(timeToSleep);
#endif
framebufferHasNewChangedPixels = SnapshotFramebuffer(framebuffer[0]);
#else
memcpy(framebuffer[0], videoCoreFramebuffer[1], gpuFramebufferSizeBytes);
#endif
PollLowBattery();
#ifdef STATISTICS
uint64_t now = tick();
for(int i = 0; i < numNewFrames - 1 && frameSkipTimeHistorySize < FRAMERATE_HISTORY_LENGTH; ++i)
frameSkipTimeHistory[frameSkipTimeHistorySize++] = now;
#endif
__atomic_fetch_sub(&numNewGpuFrames, numNewFrames, __ATOMIC_SEQ_CST);
DrawStatisticsOverlay(framebuffer[0]);
DrawLowBatteryIcon(framebuffer[0]);
#ifdef USE_GPU_VSYNC
#ifdef STATISTICS
uint64_t completelyUnnecessaryTimeWastedPollingGPUStart = tick();
#endif
// DispmanX PROBLEM! When latching onto the vsync signal, the DispmanX API sends the signal at arbitrary phase with respect to the application actually producing its frames.
// Therefore even while we do get a smooth 16.666.. msec interval vsync signal, we have no idea whether the application has actually produced a new frame at that time. Therefore
// we must keep polling for frames until we find one that it has produced.
#ifdef SELF_SYNCHRONIZE_TO_GPU_VSYNC_PRODUCED_NEW_FRAMES
framebufferHasNewChangedPixels = framebufferHasNewChangedPixels && IsNewFramebuffer(framebuffer[0], framebuffer[1]);
uint64_t timeToGiveUpThereIsNotGoingToBeANewFrame = framePollingStartTime + 1000000/TARGET_FRAME_RATE/2;
while(!framebufferHasNewChangedPixels && tick() < timeToGiveUpThereIsNotGoingToBeANewFrame)
{
usleep(2000);
frameObtainedTime = tick();
framebufferHasNewChangedPixels = SnapshotFramebuffer(framebuffer[0]);
DrawStatisticsOverlay(framebuffer[0]);
DrawLowBatteryIcon(framebuffer[0]);
framebufferHasNewChangedPixels = framebufferHasNewChangedPixels && IsNewFramebuffer(framebuffer[0], framebuffer[1]);
}
#else
framebufferHasNewChangedPixels = true;
#endif
numNewFrames = __atomic_load_n(&numNewGpuFrames, __ATOMIC_SEQ_CST);
__atomic_fetch_sub(&numNewGpuFrames, numNewFrames, __ATOMIC_SEQ_CST);
#ifdef STATISTICS
now = tick();
for(int i = 0; i < numNewFrames - 1 && frameSkipTimeHistorySize < FRAMERATE_HISTORY_LENGTH; ++i)
frameSkipTimeHistory[frameSkipTimeHistorySize++] = now;
uint64_t completelyUnnecessaryTimeWastedPollingGPUStop = tick();
__atomic_fetch_add(&timeWastedPollingGPU, completelyUnnecessaryTimeWastedPollingGPUStop-completelyUnnecessaryTimeWastedPollingGPUStart, __ATOMIC_RELAXED);
#endif
#else // !USE_GPU_VSYNC
if (!displayOff)
RefreshStatisticsOverlayText();
#endif
}
// If too many pixels have changed on screen, drop adaptively to interlaced updating to keep up the frame rate.
double inputDataFps = 1000000.0 / EstimateFrameRateInterval();
double desiredTargetFps = MAX(1, MIN(inputDataFps, TARGET_FRAME_RATE));
#ifdef SINGLE_CORE_BOARD
const double timesliceToUseForScreenUpdates = 250000;
#elif defined(ILI9486) || defined(ILI9486L) ||defined(HX8357D)
const double timesliceToUseForScreenUpdates = 750000;
#else
const double timesliceToUseForScreenUpdates = 1500000;
#endif
const double tooMuchToUpdateUsecs = timesliceToUseForScreenUpdates / desiredTargetFps; // If updating the current and new frame takes too many frames worth of allotted time, drop to interlacing.
#if !defined(NO_INTERLACING) || (defined(BACKLIGHT_CONTROL) && defined(TURN_DISPLAY_OFF_AFTER_USECS_OF_INACTIVITY))
int numChangedPixels = framebufferHasNewChangedPixels ? CountNumChangedPixels(framebuffer[0], framebuffer[1]) : 0;
#endif
#ifdef NO_INTERLACING
interlacedUpdate = false;
#elif defined(ALWAYS_INTERLACING)
interlacedUpdate = (numChangedPixels > 0);
#else
uint32_t bytesToSend = numChangedPixels * SPI_BYTESPERPIXEL + (DISPLAY_DRAWABLE_HEIGHT<<1);
interlacedUpdate = ((bytesToSend + spiTaskMemory->spiBytesQueued) * spiUsecsPerByte > tooMuchToUpdateUsecs); // Decide whether to do interlacedUpdate - only updates half of the screen
#endif
if (interlacedUpdate) frameParity = 1-frameParity; // Swap even-odd fields every second time we do an interlaced update (progressive updates ignore field order)
int bytesTransferred = 0;
Span *head = 0;
#if defined(ALL_TASKS_SHOULD_DMA) && defined(UPDATE_FRAMES_WITHOUT_DIFFING)
NoDiffChangedRectangle(head);
#elif defined(ALL_TASKS_SHOULD_DMA) && defined(UPDATE_FRAMES_IN_SINGLE_RECTANGULAR_DIFF)
DiffFramebuffersToSingleChangedRectangle(framebuffer[0], framebuffer[1], head);
#else
// Collect all spans in this image
if (framebufferHasNewChangedPixels || prevFrameWasInterlacedUpdate)
{
// If possible, utilize a faster 4-wide pixel diffing method
#ifdef FAST_BUT_COARSE_PIXEL_DIFF
if (gpuFrameWidth % 4 == 0 && gpuFramebufferScanlineStrideBytes % 8 == 0)
DiffFramebuffersToScanlineSpansFastAndCoarse4Wide(framebuffer[0], framebuffer[1], interlacedUpdate, frameParity, head);
else
#endif
DiffFramebuffersToScanlineSpansExact(framebuffer[0], framebuffer[1], interlacedUpdate, frameParity, head); // If disabled, or framebuffer width is not compatible, use the exact method
}
// Merge spans together on adjacent scanlines - works only if doing a progressive update
if (!interlacedUpdate)
MergeScanlineSpanList(head);
#endif
#ifdef USE_GPU_VSYNC
if (head) // do we have a new frame?
{
// If using vsync, this main thread is responsible for maintaining the frame histogram. If not using vsync,
// but instead are using a dedicated GPU thread, then that dedicated thread maintains the frame histogram,
// in which case this is not needed.
AddHistogramSample(frameObtainedTime);
// We got a new frame, so update contents of the statistics overlay as well
if (!displayOff)
RefreshStatisticsOverlayText();
}
#endif
// Submit spans
if (!displayOff)
for(Span *i = head; i; i = i->next)
{
#ifdef ALIGN_TASKS_FOR_DMA_TRANSFERS
// DMA transfers smaller than 4 bytes are causing trouble, so in order to ensure smooth DMA operation,
// make sure each message is at least 4 bytes in size, hence one pixel spans are forbidden:
if (i->size == 1)
{
if (i->endX < DISPLAY_DRAWABLE_WIDTH) { ++i->endX; ++i->lastScanEndX; }
else --i->x;
++i->size;
}
#endif
// Update the write cursor if needed
#ifndef DISPLAY_WRITE_PIXELS_CMD_DOES_NOT_RESET_WRITE_CURSOR
if (spiY != i->y)
#endif
{
#if defined(MUST_SEND_FULL_CURSOR_WINDOW) || defined(ALIGN_TASKS_FOR_DMA_TRANSFERS)
QUEUE_SET_WRITE_WINDOW_TASK(DISPLAY_SET_CURSOR_Y, displayYOffset + i->y, displayYOffset + gpuFrameHeight - 1);
#else
QUEUE_MOVE_CURSOR_TASK(DISPLAY_SET_CURSOR_Y, displayYOffset + i->y);
#endif
IN_SINGLE_THREADED_MODE_RUN_TASK();
spiY = i->y;
}
if (i->endY > i->y + 1 && (spiX != i->x || spiEndX != i->endX)) // Multiline span?
{
QUEUE_SET_WRITE_WINDOW_TASK(DISPLAY_SET_CURSOR_X, displayXOffset + i->x, displayXOffset + i->endX - 1);
IN_SINGLE_THREADED_MODE_RUN_TASK();
spiX = i->x;
spiEndX = i->endX;
}
else // Singleline span
{
#ifdef ALIGN_TASKS_FOR_DMA_TRANSFERS
if (spiX != i->x || spiEndX < i->endX)
{
QUEUE_SET_WRITE_WINDOW_TASK(DISPLAY_SET_CURSOR_X, displayXOffset + i->x, displayXOffset + gpuFrameWidth - 1);
IN_SINGLE_THREADED_MODE_RUN_TASK();
spiX = i->x;
spiEndX = gpuFrameWidth;
}
#else
if (spiEndX < i->endX) // Need to push the X end window?
{
// We are doing a single line span and need to increase the X window. If possible,
// peek ahead to cater to the next multiline span update if that will be compatible.
int nextEndX = gpuFrameWidth;
for(Span *j = i->next; j; j = j->next)
if (j->endY > j->y+1)
{
if (j->endX >= i->endX) nextEndX = j->endX;
break;
}
QUEUE_SET_WRITE_WINDOW_TASK(DISPLAY_SET_CURSOR_X, displayXOffset + i->x, displayXOffset + nextEndX - 1);
IN_SINGLE_THREADED_MODE_RUN_TASK();
spiX = i->x;
spiEndX = nextEndX;
}
else
#ifndef DISPLAY_WRITE_PIXELS_CMD_DOES_NOT_RESET_WRITE_CURSOR
if (spiX != i->x)
#endif
{
#ifdef MUST_SEND_FULL_CURSOR_WINDOW
QUEUE_SET_WRITE_WINDOW_TASK(DISPLAY_SET_CURSOR_X, displayXOffset + i->x, displayXOffset + spiEndX - 1);
#else
QUEUE_MOVE_CURSOR_TASK(DISPLAY_SET_CURSOR_X, displayXOffset + i->x);
#endif
IN_SINGLE_THREADED_MODE_RUN_TASK();
spiX = i->x;
}
#endif
}
// Submit the span pixels
SPITask *task = AllocTask(i->size*SPI_BYTESPERPIXEL);
task->cmd = DISPLAY_WRITE_PIXELS;
bytesTransferred += task->PayloadSize()+1;
uint16_t *scanline = framebuffer[0] + i->y * (gpuFramebufferScanlineStrideBytes>>1);
uint16_t *prevScanline = framebuffer[1] + i->y * (gpuFramebufferScanlineStrideBytes>>1);
#ifdef OFFLOAD_PIXEL_COPY_TO_DMA_CPP
// If running a singlethreaded build without a separate SPI thread, we can offload the whole flow of the pixel data out to the code in the dma.cpp backend,
// which does the pixel task handoff out to DMA in inline assembly. This is done mainly to save an extra memcpy() when passing data off from GPU to SPI,
// since in singlethreaded mode, snapshotting GPU and sending data to SPI is done sequentially in this main loop.
// In multithreaded builds, this approach cannot be used, since after we snapshot a frame, we need to send it off to SPI thread to process, and make a copy
// anways to ensure it does not get overwritten.
task->fb = (uint8_t*)(scanline + i->x);
task->prevFb = (uint8_t*)(prevScanline + i->x);
task->width = i->endX - i->x;
#else
uint16_t *data = (uint16_t*)task->data;
for(int y = i->y; y < i->endY; ++y, scanline += gpuFramebufferScanlineStrideBytes>>1, prevScanline += gpuFramebufferScanlineStrideBytes>>1)
{
int endX = (y + 1 == i->endY) ? i->lastScanEndX : i->endX;
int x = i->x;
#ifdef DISPLAY_COLOR_FORMAT_R6X2G6X2B6X2
// Convert from R5G6B5 to R6X2G6X2B6X2 on the fly
while(x < endX)
{
uint16_t pixel = scanline[x++];
uint16_t r = (pixel >> 8) & 0xF8;
uint16_t g = (pixel >> 3) & 0xFC;
uint16_t b = (pixel << 3) & 0xF8;
((uint8_t*)data)[0] = r | (r >> 5); // On red and blue color channels, need to expand 5 bits to 6 bits. Do that by duplicating the highest bit as lowest bit.
((uint8_t*)data)[1] = g;
((uint8_t*)data)[2] = b | (b >> 5);
data = (uint16_t*)((uintptr_t)data + 3);
}
#else
while(x < endX && (x&1)) *data++ = __builtin_bswap16(scanline[x++]);
while(x < (endX&~1U))
{
uint32_t u = *(uint32_t*)(scanline+x);
*(uint32_t*)data = ((u & 0xFF00FF00U) >> 8) | ((u & 0x00FF00FFU) << 8);
data += 2;
x += 2;
}
while(x < endX) *data++ = __builtin_bswap16(scanline[x++]);
#endif
#if !(defined(ALL_TASKS_SHOULD_DMA) && defined(UPDATE_FRAMES_WITHOUT_DIFFING)) // If not diffing, no need to maintain prev frame.
memcpy(prevScanline+i->x, scanline+i->x, (endX - i->x)*FRAMEBUFFER_BYTESPERPIXEL);
#endif
}
#endif
CommitTask(task);
IN_SINGLE_THREADED_MODE_RUN_TASK();
}
#ifdef KERNEL_MODULE_CLIENT
// Wake the kernel module up to run tasks. TODO: This might not be best placed here, we could pre-empt
// to start running tasks already half-way during task submission above.
if (spiTaskMemory->queueHead != spiTaskMemory->queueTail && !(spi->cs & BCM2835_SPI0_CS_TA))
spi->cs |= BCM2835_SPI0_CS_TA;
#endif
// Remember where in the command queue this frame ends, to keep track of the SPI thread's progress over it
if (bytesTransferred > 0)
{
prevFrameEnd = curFrameEnd;
curFrameEnd = spiTaskMemory->queueTail;
}
#if defined(BACKLIGHT_CONTROL) && defined(TURN_DISPLAY_OFF_AFTER_USECS_OF_INACTIVITY)
double percentageOfScreenChanged = (double)numChangedPixels/(DISPLAY_DRAWABLE_WIDTH*DISPLAY_DRAWABLE_HEIGHT);
bool displayIsActive = percentageOfScreenChanged > DISPLAY_CONSIDERED_INACTIVE_PERCENTAGE;
if (displayIsActive)
displayContentsLastChanged = tick();
bool keyboardIsActive = TimeSinceLastKeyboardPress() < TURN_DISPLAY_OFF_AFTER_USECS_OF_INACTIVITY;
if (displayIsActive || keyboardIsActive)
{
if (displayOff)
{
TurnDisplayOn();
displayOff = false;
}
}
else if (!displayOff && tick() - displayContentsLastChanged > TURN_DISPLAY_OFF_AFTER_USECS_OF_INACTIVITY)
{
TurnDisplayOff();
displayOff = true;
}
#endif
#ifdef STATISTICS
if (bytesTransferred > 0)
{
if (frameTimeHistorySize < FRAME_HISTORY_MAX_SIZE)
{
frameTimeHistory[frameTimeHistorySize].interlaced = interlacedUpdate || prevFrameWasInterlacedUpdate;
frameTimeHistory[frameTimeHistorySize++].time = tick();
}
AddFrameCompletionTimeMarker();
}
statsBytesTransferred += bytesTransferred;
#endif
}
DeinitGPU();
DeinitSPI();
CloseMailbox();
CloseKeyboard();
printf("Quit.\n");
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 922 KiB

View File

@ -0,0 +1,24 @@
#pragma once
// Data specific to the Waveshare32b display, as present on FreePlayTech's CM3/Zero devices (https://www.freeplaytech.com/)
#ifdef FREEPLAYTECH_WAVESHARE32B
#if !defined(GPIO_TFT_DATA_CONTROL)
#define GPIO_TFT_DATA_CONTROL 22
#endif
#if !defined(GPIO_TFT_RESET_PIN)
#define GPIO_TFT_RESET_PIN 27
#endif
// On FreePlayTech GBA devices, a part of the screen is hidden by the bezels, since the GBA has a 2.8" screen surface area, but a 3.2" display is enclosed inside the case.
// The hidden area is placed under the left edge of the display horizontally, and under top and bottom edges of the display vertically, so reduce those out from the drawable area.
// Effective display area is then 320-18=302 pixels horizontally, and 202 pixels vertically (in landscape direction).
// The meaning of top/left/right/bottom here should be interpreted as the display being oriented in its native direction (which is portrait mode for ILI9341, 240x320 direction).
#define DISPLAY_NATIVE_COVERED_TOP_SIDE 18
#define DISPLAY_NATIVE_COVERED_LEFT_SIDE 9
#define DISPLAY_NATIVE_COVERED_RIGHT_SIDE 29
#define DISPLAY_NATIVE_COVERED_BOTTOM_SIDE 0
#endif

510
usr/fbcp-ili9341/gpu.cpp Normal file
View File

@ -0,0 +1,510 @@
#include <bcm_host.h> // bcm_host_init, bcm_host_deinit
#include <linux/futex.h> // FUTEX_WAKE
#include <sys/syscall.h> // SYS_futex
#include <syslog.h> // syslog, LOG_ERR
#include <stdio.h> // fprintf
#include <math.h> // floor
#include "config.h"
#include "gpu.h"
#include "display.h"
#include "tick.h"
#include "util.h"
#include "statistics.h"
#include "mem_alloc.h"
bool MarkProgramQuitting(void);
// Uncomment these build options to make the display output a random performance test pattern instead of the actual
// display content. Used to debug/measure performance.
// #define RANDOM_TEST_PATTERN
#define RANDOM_TEST_PATTERN_STRIPE_WIDTH DISPLAY_DRAWABLE_WIDTH
#define RANDOM_TEST_PATTERN_FRAME_RATE 120
DISPMANX_DISPLAY_HANDLE_T display;
DISPMANX_RESOURCE_HANDLE_T screen_resource;
VC_RECT_T rect;
int frameTimeHistorySize = 0;
FrameHistory frameTimeHistory[FRAME_HISTORY_MAX_SIZE] = {};
uint16_t *videoCoreFramebuffer[2] = {};
volatile int numNewGpuFrames = 0;
int displayXOffset = 0;
int displayYOffset = 0;
int gpuFrameWidth = 0;
int gpuFrameHeight = 0;
int gpuFramebufferScanlineStrideBytes = 0;
int gpuFramebufferSizeBytes = 0;
int excessPixelsLeft = 0;
int excessPixelsRight = 0;
int excessPixelsTop = 0;
int excessPixelsBottom = 0;
// If one first runs content that updates at e.g. 24fps, a video perhaps, the frame rate histogram will lock to that update
// rate and frame snapshots are done at 24fps. Later when user quits watching the video, and returns to e.g. 60fps updated
// launcher menu, there needs to be some mechanism that detects that update rate has now increased, and synchronizes to the
// new update rate. If snapshots keep occurring at fixed 24fps, the increase in content update rate would go unnoticed.
// Therefore maintain a "linear increases/geometric slowdowns" style of factor that pulls the frame snapshotting mechanism
// to drive itself at faster rates, poking snapshots to be performed more often to discover if the content update rate is
// more than what is currently expected.
int eagerFastTrackToSnapshottingFramesEarlierFactor = 0;
uint64_t lastFramePollTime = 0;
pthread_t gpuPollingThread;
int RoundUpToMultipleOf(int val, int multiple)
{
return ((val + multiple - 1) / multiple) * multiple;
}
// Tests if the pixels on the given new captured frame actually contain new image data from the previous frame
bool IsNewFramebuffer(uint16_t *possiblyNewFramebuffer, uint16_t *oldFramebuffer)
{
for(uint32_t *newfb = (uint32_t*)possiblyNewFramebuffer, *oldfb = (uint32_t*)oldFramebuffer, *endfb = (uint32_t*)oldFramebuffer + gpuFramebufferSizeBytes/4; oldfb < endfb;)
if (*newfb++ != *oldfb++)
return true;
return false;
}
bool SnapshotFramebuffer(uint16_t *destination)
{
lastFramePollTime = tick();
#ifdef RANDOM_TEST_PATTERN
// Generate random noise that updates each frame
// uint32_t randomColor = rand() % 65536;
static int col = 0;
static int barY = 0;
static uint64_t lastTestImage = tick();
uint32_t randomColor = ((31 + ABS(col - 32)) << 5);
uint64_t now = tick();
if (now - lastTestImage >= 1000000/RANDOM_TEST_PATTERN_FRAME_RATE)
{
col = (col + 2) & 31;
lastTestImage = now;
}
randomColor = randomColor | (randomColor << 16);
uint32_t *newfb = (uint32_t*)destination;
for(int y = 0; y < gpuFrameHeight; ++y)
{
int x = 0;
const int XX = RANDOM_TEST_PATTERN_STRIPE_WIDTH>>1;
while(x <= gpuFrameWidth>>1)
{
for(int X = 0; x+X < gpuFrameWidth>>1; ++X)
{
if (y == barY)
newfb[x+X] = 0xFFFFFFFF;
else if (y == barY+1 || y == barY-1)
newfb[x+X] = 0;
else
newfb[x+X] = randomColor;
}
x += XX + 6;
}
newfb += gpuFramebufferScanlineStrideBytes>>2;
}
barY = (barY + 1) % gpuFrameHeight;
#else
// Grab a new frame from the GPU. TODO: Figure out a way to get a frame callback for each GPU-rendered frame,
// that would be vastly superior for lower latency, reduced stuttering and lighter processing overhead.
// Currently this implemented method just takes a snapshot of the most current GPU framebuffer contents,
// without any concept of "finished frames". If this is the case, it's possible that this could grab the same
// frame twice, and then potentially missing, or displaying the later appearing new frame at a very last moment.
// Profiling, the following two lines take around ~1msec of time.
int failed = vc_dispmanx_snapshot(display, screen_resource, (DISPMANX_TRANSFORM_T)0);
if (failed)
{
// We cannot do much better here (or do not know what to do), it looks like if vc_dispmanx_snapshot() fails once, it will crash if attempted to be called again, and it will not recover. We can only terminate here. Sad :/
printf("vc_dispmanx_snapshot() failed with return code %d! If this appears related to a change in HDMI/display resolution, see https://github.com/juj/fbcp-ili9341/issues/28 and https://github.com/raspberrypi/userland/issues/461 (try setting fbcp-ili9341 up as an infinitely restarting system service to recover)\n", failed);
MarkProgramQuitting();
return false;
}
// BUG in vc_dispmanx_resource_read_data(!!): If one is capturing a small subrectangle of a large screen resource rectangle, the destination pointer
// is in vc_dispmanx_resource_read_data() incorrectly still taken to point to the top-left corner of the large screen resource, instead of the top-left
// corner of the subrectangle to capture. Therefore do dirty pointer arithmetic to adjust for this. To make this safe, videoCoreFramebuffer is allocated
// double its needed size so that this adjusted pointer does not reference outside allocated memory (if it did, vc_dispmanx_resource_read_data() was seen
// to randomly fail and then subsequently hang if called a second time)
#ifdef DISPLAY_FLIP_ORIENTATION_IN_SOFTWARE
static uint16_t *tempTransposeBuffer = 0; // Allocate as static here to keep the number of #ifdefs down a bit
const int pixelWidth = gpuFrameHeight+excessPixelsTop+excessPixelsBottom;
const int pixelHeight = gpuFrameWidth + excessPixelsLeft + excessPixelsRight;
const int stride = RoundUpToMultipleOf(pixelWidth*sizeof(uint16_t), 32);
if (!tempTransposeBuffer)
{
tempTransposeBuffer = (uint16_t *)Malloc(pixelHeight * stride * 2, "gpu.cpp tempTransposeBuffer");
tempTransposeBuffer += pixelHeight * (stride>>1);
}
uint16_t *destPtr = tempTransposeBuffer - excessPixelsLeft * (stride >> 1) - excessPixelsTop;
#else
uint16_t *destPtr = destination - excessPixelsTop*(gpuFramebufferScanlineStrideBytes>>1) - excessPixelsLeft;
const int stride = gpuFramebufferScanlineStrideBytes;
#endif
failed = vc_dispmanx_resource_read_data(screen_resource, &rect, destPtr, stride);
if (failed)
{
printf("vc_dispmanx_resource_read_data failed with return code %d!\n", failed);
MarkProgramQuitting();
return false;
}
#ifdef DISPLAY_FLIP_ORIENTATION_IN_SOFTWARE
// Transpose the snapshotted frame from landscape to portrait. The following takes around 0.5-1.0 msec
// of extra CPU time, so while this improves tearing to be perhaps a bit nicer visually, it probably
// is not good on the Pi Zero.
for(int y = 0; y < gpuFrameHeight; ++y)
for(int x = 0; x < gpuFrameWidth; ++x)
destination[y*(gpuFramebufferScanlineStrideBytes>>1)+x] = tempTransposeBuffer[x*(stride>>1)+y];
#endif
#endif
return true;
}
#ifdef USE_GPU_VSYNC
void VsyncCallback(DISPMANX_UPDATE_HANDLE_T u, void *arg)
{
// If TARGET_FRAME_RATE is e.g. 30 or 20, decimate only every second or third vsync callback to be processed.
static int frameSkipCounter = 0;
frameSkipCounter += TARGET_FRAME_RATE;
if (frameSkipCounter < 60) return;
frameSkipCounter -= 60;
__atomic_fetch_add(&numNewGpuFrames, 1, __ATOMIC_SEQ_CST);
syscall(SYS_futex, &numNewGpuFrames, FUTEX_WAKE, 1, 0, 0, 0); // Wake the main thread if it was sleeping to get a new frame
}
#else // !USE_GPU_VSYNC
extern volatile bool programRunning;
void *gpu_polling_thread(void*)
{
uint64_t lastNewFrameReceivedTime = tick();
while(programRunning)
{
#ifdef SAVE_BATTERY_BY_SLEEPING_UNTIL_TARGET_FRAME
const int64_t earlyFramePrediction = 500;
uint64_t earliestNextFrameArrivaltime = lastNewFrameReceivedTime + 1000000/TARGET_FRAME_RATE - earlyFramePrediction;
uint64_t now = tick();
if (earliestNextFrameArrivaltime > now)
usleep(earliestNextFrameArrivaltime - now);
#endif
#if defined(SAVE_BATTERY_BY_PREDICTING_FRAME_ARRIVAL_TIMES) || defined(SAVE_BATTERY_BY_SLEEPING_WHEN_IDLE)
uint64_t nextFrameArrivalTime = PredictNextFrameArrivalTime();
int64_t timeToSleep = nextFrameArrivalTime - tick();
const int64_t minimumSleepTime = 150; // Don't sleep if the next frame is expected to arrive in less than this much time
if (timeToSleep > minimumSleepTime)
usleep(timeToSleep - minimumSleepTime);
#endif
uint64_t t0 = tick();
bool gotNewFramebuffer = SnapshotFramebuffer(videoCoreFramebuffer[0]);
// Check the pixel contents of the snapshot to see if we actually received a new frame to render
gotNewFramebuffer = gotNewFramebuffer && IsNewFramebuffer(videoCoreFramebuffer[0], videoCoreFramebuffer[1]);
if (gotNewFramebuffer)
{
lastNewFrameReceivedTime = t0;
AddHistogramSample(lastNewFrameReceivedTime);
}
uint64_t t1 = tick();
if (!gotNewFramebuffer)
{
#ifdef STATISTICS
__atomic_fetch_add(&timeWastedPollingGPU, t1-t0, __ATOMIC_RELAXED);
#endif
// We did not get a new frame - halve the eager fast tracking factor geometrically, we are probably
// near synchronized to the update rate of the content.
eagerFastTrackToSnapshottingFramesEarlierFactor /= 2;
continue;
}
else
{
// We got a new framebuffer, so linearly increase the driving rate to snapshot next framebuffer a bit earlier, in case
// our update rate is too slow for the content.
++eagerFastTrackToSnapshottingFramesEarlierFactor;
memcpy(videoCoreFramebuffer[1], videoCoreFramebuffer[0], gpuFramebufferSizeBytes);
__atomic_fetch_add(&numNewGpuFrames, 1, __ATOMIC_SEQ_CST);
syscall(SYS_futex, &numNewGpuFrames, FUTEX_WAKE, 1, 0, 0, 0); // Wake the main thread if it was sleeping to get a new frame
}
}
pthread_exit(0);
}
#endif // ~USE_GPU_VSYNC
// Since we are polling for received GPU frames, run a histogram to predict when the next frame will arrive.
// The histogram needs to be sufficiently small as to not cause a lag when frame rate suddenly changes on e.g.
// main menu <-> ingame transitions
uint64_t frameArrivalTimes[HISTOGRAM_SIZE];
uint64_t frameArrivalTimesTail = 0;
int histogramSize = 0;
// If framerate has been high for a long time, but then drops to e.g. 1fps, it would take a very very long time to fill up
// the histogram of these 1fps intervals, so fbcp-ili9341 would take a long time to go back to sleep. Introduce a max age
// for histogram entries of 10 seconds, so that if refresh rate drops from 60hz to 1hz, then after 10 seconds the histogram
// buffer will have only these 1fps intervals in it, and it will go to sleep to yield CPU time.
#define HISTOGRAM_MAX_SAMPLE_AGE 10000000
void AddHistogramSample(uint64_t t)
{
frameArrivalTimes[frameArrivalTimesTail] = t;
frameArrivalTimesTail = (frameArrivalTimesTail + 1) % HISTOGRAM_SIZE;
if (histogramSize < HISTOGRAM_SIZE) ++histogramSize;
// Expire too old entries.
while(t - GET_HISTOGRAM(histogramSize-1) > HISTOGRAM_MAX_SAMPLE_AGE) --histogramSize;
}
int cmp(const void *e1, const void *e2) { return *(uint64_t*)e1 > *(uint64_t*)e2; }
uint64_t EstimateFrameRateInterval()
{
#ifdef RANDOM_TEST_PATTERN
return 1000000/RANDOM_TEST_PATTERN_FRAME_RATE;
#endif
if (histogramSize == 0) return 1000000/TARGET_FRAME_RATE;
uint64_t mostRecentFrame = GET_HISTOGRAM(0);
// High sleep mode hacks to save battery when ~idle: (These could be removed with an event based VideoCore display refresh API)
uint64_t timeNow = tick();
#ifdef SAVE_BATTERY_BY_SLEEPING_WHEN_IDLE
// "Deep sleep" options: is user leaves the device with static content on screen for a long time.
if (timeNow - mostRecentFrame > 60000000) { histogramSize = 1; return 500000; } // if it's been more than one minute since last seen update, assume interval of 500ms.
if (timeNow - mostRecentFrame > 5000000) return lastFramePollTime + 100000; // if it's been more than 5 seconds since last seen update, assume interval of 100ms.
#endif
#ifndef SAVE_BATTERY_BY_PREDICTING_FRAME_ARRIVAL_TIMES
return 1000000/TARGET_FRAME_RATE;
#else
if (histogramSize < 2) return 100000; // Frame histogram needs to have at least a few entries to bootstrap, if there's very few, either refresh rate is low, or fbcp-ili9341 just started
// Look at the intervals of all previous arrived frames, and take some percentile value as our expected current frame rate
uint64_t intervals[HISTOGRAM_SIZE-1];
for(int i = 0; i < histogramSize-1; ++i)
intervals[i] = MIN(100000, GET_HISTOGRAM(i) - GET_HISTOGRAM(i+1));
qsort(intervals, histogramSize-1, sizeof(uint64_t), cmp);
// Apply frame rate increase discovery factor to both the percentile position and the interpreted frame interval to catch
// up with display update rate if it has increased
int percentile = (histogramSize-1)*2/5;
percentile = MAX(percentile-eagerFastTrackToSnapshottingFramesEarlierFactor, 0);
uint64_t interval = intervals[percentile];
// Fast tracking #1: Always look at two most recent frames in addition to the ~40% percentile and follow whichever is a shorter period of time
interval = MIN(interval, GET_HISTOGRAM(0) - GET_HISTOGRAM(1));
// Fast tracking #2: if we seem to always get a new frame whenever snapshotting, we should try speeding up
interval = MAX((int64_t)interval - eagerFastTrackToSnapshottingFramesEarlierFactor*1000, (int64_t)1000000/TARGET_FRAME_RATE);
if (interval > 100000) interval = 100000;
return MAX(interval, 1000000/TARGET_FRAME_RATE);
#endif
}
uint64_t PredictNextFrameArrivalTime()
{
uint64_t mostRecentFrame = histogramSize > 0 ? GET_HISTOGRAM(0) : tick();
// High sleep mode hacks to save battery when ~idle: (These could be removed with an event based VideoCore display refresh API)
uint64_t timeNow = tick();
#ifdef SAVE_BATTERY_BY_SLEEPING_WHEN_IDLE
// "Deep sleep" options: is user leaves the device with static content on screen for a long time.
if (timeNow - mostRecentFrame > 60000000) { histogramSize = 1; return lastFramePollTime + 100000; } // if it's been more than one minute since last seen update, assume interval of 100ms.
if (timeNow - mostRecentFrame > 5000000) return lastFramePollTime + 100000; // if it's been more than 5 seconds since last seen update, assume interval of 100ms.
#endif
uint64_t interval = EstimateFrameRateInterval();
// Assume that frames are arriving at times mostRecentFrame + k * interval.
// Find integer k such that mostRecentFrame + k * interval >= timeNow
// i.e. k = ceil((timeNow - mostRecentFrame) / interval)
uint64_t k = (timeNow - mostRecentFrame + interval - 1) / interval;
uint64_t nextFrameArrivalTime = mostRecentFrame + k * interval;
uint64_t timeOfPreviousMissedFrame = nextFrameArrivalTime - interval;
// If there should have been a frame just 1/3rd of our interval window ago, assume it was just missed and report back "the next frame is right now"
if (timeNow - timeOfPreviousMissedFrame < interval/3 && timeOfPreviousMissedFrame > mostRecentFrame) return timeNow;
else return nextFrameArrivalTime;
}
void InitGPU()
{
// Initialize GPU frame grabbing subsystem
bcm_host_init();
display = vc_dispmanx_display_open(0);
if (!display) FATAL_ERROR("vc_dispmanx_display_open failed! Make sure to have hdmi_force_hotplug=1 setting in /boot/config.txt");
DISPMANX_MODEINFO_T display_info;
int ret = vc_dispmanx_display_get_info(display, &display_info);
if (ret) FATAL_ERROR("vc_dispmanx_display_get_info failed!");
#ifdef DISPLAY_FLIP_ORIENTATION_IN_SOFTWARE
// Pretend that the display framebuffer would be in portrait mode for the purposes of size computation etc.
// Snapshotting code transposes the obtained framebuffer immediately after capture from landscape to portrait to make it so.
SWAPU32(display_info.width, display_info.height);
printf("DISPLAY_FLIP_ORIENTATION_IN_SOFTWARE: Swapping width/height to update display in portrait mode to minimize tearing.\n");
#endif
// We may need to scale the main framebuffer to fit the native pixel size of the display. Always want to do such scaling in aspect ratio fixed mode to not stretch the image.
// (For non-square pixels or similar, could apply a correction factor here to fix aspect ratio)
// Often it happens that the content that is being rendered already has black letterboxes/pillarboxes if it was produced for a different aspect ratio than
// what the current HDMI resolution is. However the current HDMI resolution might not be in the same aspect ratio as DISPLAY_DRAWABLE_WIDTH x DISPLAY_DRAWABLE_HEIGHT.
// Therefore we may be aspect ratio correcting content that has already letterboxes/pillarboxes on it, which can result in letterboxes-on-pillarboxes, or vice versa.
// To enable removing the double aspect ratio correction, the following settings enable "overscan": crop left/right and top/down parts of the source image
// to remove the letterboxed parts of the source. This overscan method can also used to crop excess edges of old emulator based games intended for analog TVs,
// e.g. NES games often had graphical artifacts on left or right edge of the screen when the game scrolls, which usually were hidden on analog TVs with overscan.
/* In /opt/retropie/configs/nes/retroarch.cfg, if running fceumm NES emulator, put:
aspect_ratio_index = "22"
custom_viewport_width = "256"
custom_viewport_height = "224"
custom_viewport_x = "32"
custom_viewport_y = "8"
(see https://github.com/RetroPie/RetroPie-Setup/wiki/Smaller-RetroArch-Screen)
and configure /boot/config.txt to 320x240 HDMI mode to get pixel perfect rendering without blurring scaling.
Curiously, if using quicknes emulator instead, it seems to render to a horizontally 16 pixels smaller resolution. Therefore put in
aspect_ratio_index = "22"
custom_viewport_width = "240"
custom_viewport_height = "224"
custom_viewport_x = "40"
custom_viewport_y = "8"
instead for pixel perfect rendering. Also in /opt/retropie/configs/all/retroarch.cfg, set
video_fullscreen_x = "320"
video_fullscreen_y = "240"
*/
// The overscan values are in normalized 0.0 .. 1.0 percentages of the total width/height of the screen.
double overscanLeft = 0.00;
double overscanRight = 0.00;
double overscanTop = 0.00;
double overscanBottom = 0.00;
// If specified, computes overscan that crops away equally much content from all sides of the source frame
// to display the center of the source frame pixel perfect.
#ifdef DISPLAY_CROPPED_INSTEAD_OF_SCALING
if (DISPLAY_DRAWABLE_WIDTH < display_info.width)
{
overscanLeft = (display_info.width - DISPLAY_DRAWABLE_WIDTH) * 0.5 / display_info.width;
overscanRight = overscanLeft;
}
if (DISPLAY_DRAWABLE_HEIGHT < display_info.height)
{
overscanTop = (display_info.height - DISPLAY_DRAWABLE_HEIGHT) * 0.5 / display_info.height;
overscanBottom = overscanTop;
}
#endif
// Overscan must be actual pixels - can't be fractional, so round the overscan %s so that they align with
// pixel boundaries of the source image.
overscanLeft = (double)ROUND_TO_FLOOR_INT(display_info.width * overscanLeft) / display_info.width;
overscanRight = (double)ROUND_TO_CEIL_INT(display_info.width * overscanRight) / display_info.width;
overscanTop = (double)ROUND_TO_FLOOR_INT(display_info.height * overscanTop) / display_info.height;
overscanBottom = (double)ROUND_TO_CEIL_INT(display_info.height * overscanBottom) / display_info.height;
int relevantDisplayWidth = ROUND_TO_NEAREST_INT(display_info.width * (1.0 - overscanLeft - overscanRight));
int relevantDisplayHeight = ROUND_TO_NEAREST_INT(display_info.height * (1.0 - overscanTop - overscanBottom));
printf("Relevant source display area size with overscan cropped away: %dx%d.\n", relevantDisplayWidth, relevantDisplayHeight);
double scalingFactorWidth = (double)DISPLAY_DRAWABLE_WIDTH/relevantDisplayWidth;
double scalingFactorHeight = (double)DISPLAY_DRAWABLE_HEIGHT/relevantDisplayHeight;
#ifndef DISPLAY_BREAK_ASPECT_RATIO_WHEN_SCALING
// If doing aspect ratio correct scaling, scale both width and height by equal proportions
scalingFactorWidth = scalingFactorHeight = MIN(scalingFactorWidth, scalingFactorHeight);
#endif
// Since display resolution must be full pixels and not fractional, round the scaling to nearest pixel size
// (and recompute after the subpixel rounding what the actual scaling factor ends up being)
int scaledWidth = ROUND_TO_NEAREST_INT(relevantDisplayWidth * scalingFactorWidth);
int scaledHeight = ROUND_TO_NEAREST_INT(relevantDisplayHeight * scalingFactorHeight);
scalingFactorWidth = (double)scaledWidth/relevantDisplayWidth;
scalingFactorHeight = (double)scaledHeight/relevantDisplayHeight;
displayXOffset = DISPLAY_COVERED_LEFT_SIDE + (DISPLAY_DRAWABLE_WIDTH - scaledWidth) / 2;
displayYOffset = DISPLAY_COVERED_TOP_SIDE + (DISPLAY_DRAWABLE_HEIGHT - scaledHeight) / 2;
excessPixelsLeft = ROUND_TO_NEAREST_INT(display_info.width * overscanLeft * scalingFactorWidth);
excessPixelsRight = ROUND_TO_NEAREST_INT(display_info.width * overscanRight * scalingFactorWidth);
excessPixelsTop = ROUND_TO_NEAREST_INT(display_info.height * overscanTop * scalingFactorHeight);
excessPixelsBottom = ROUND_TO_NEAREST_INT(display_info.height * overscanBottom * scalingFactorHeight);
gpuFrameWidth = scaledWidth;
gpuFrameHeight = scaledHeight;
gpuFramebufferScanlineStrideBytes = RoundUpToMultipleOf((gpuFrameWidth + excessPixelsLeft + excessPixelsRight) * 2, 32);
gpuFramebufferSizeBytes = gpuFramebufferScanlineStrideBytes * (gpuFrameHeight + excessPixelsTop + excessPixelsBottom);
// BUG in vc_dispmanx_resource_read_data(!!): If one is capturing a small subrectangle of a large screen resource rectangle, the destination pointer
// is in vc_dispmanx_resource_read_data() incorrectly still taken to point to the top-left corner of the large screen resource, instead of the top-left
// corner of the subrectangle to capture. Therefore do dirty pointer arithmetic to adjust for this. To make this safe, videoCoreFramebuffer is allocated
// double its needed size so that this adjusted pointer does not reference outside allocated memory (if it did, vc_dispmanx_resource_read_data() was seen
// to randomly fail and then subsequently hang if called a second time)
videoCoreFramebuffer[0] = (uint16_t *)Malloc(gpuFramebufferSizeBytes*2, "gpu.cpp framebuffer0");
videoCoreFramebuffer[1] = (uint16_t *)Malloc(gpuFramebufferSizeBytes*2, "gpu.cpp framebuffer1");
memset(videoCoreFramebuffer[0], 0, gpuFramebufferSizeBytes*2);
memset(videoCoreFramebuffer[1], 0, gpuFramebufferSizeBytes*2);
videoCoreFramebuffer[0] += (gpuFramebufferSizeBytes>>1);
videoCoreFramebuffer[1] += (gpuFramebufferSizeBytes>>1);
syslog(LOG_INFO, "GPU display is %dx%d. SPI display is %dx%d with drawable area of %dx%d. Applying scaling factor horiz=%.2fx & vert=%.2fx, xOffset: %d, yOffset: %d, scaledWidth: %d, scaledHeight: %d", display_info.width, display_info.height, DISPLAY_WIDTH, DISPLAY_HEIGHT, DISPLAY_DRAWABLE_WIDTH, DISPLAY_DRAWABLE_HEIGHT, scalingFactorWidth, scalingFactorHeight, displayXOffset, displayYOffset, scaledWidth, scaledHeight);
printf("Source GPU display is %dx%d. Output SPI display is %dx%d with a drawable area of %dx%d. Applying scaling factor horiz=%.2fx & vert=%.2fx, xOffset: %d, yOffset: %d, scaledWidth: %d, scaledHeight: %d\n", display_info.width, display_info.height, DISPLAY_WIDTH, DISPLAY_HEIGHT, DISPLAY_DRAWABLE_WIDTH, DISPLAY_DRAWABLE_HEIGHT, scalingFactorWidth, scalingFactorHeight, displayXOffset, displayYOffset, scaledWidth, scaledHeight);
uint32_t image_prt;
printf("Creating dispmanX resource of size %dx%d (aspect ratio=%f).\n", scaledWidth + excessPixelsLeft + excessPixelsRight, scaledHeight + excessPixelsTop + excessPixelsBottom, (double)(scaledWidth + excessPixelsLeft + excessPixelsRight) / (scaledHeight + excessPixelsTop + excessPixelsBottom));
#ifdef DISPLAY_FLIP_ORIENTATION_IN_SOFTWARE
screen_resource = vc_dispmanx_resource_create(VC_IMAGE_RGB565, scaledHeight + excessPixelsTop + excessPixelsBottom, scaledWidth + excessPixelsLeft + excessPixelsRight, &image_prt);
vc_dispmanx_rect_set(&rect, excessPixelsTop, excessPixelsLeft, scaledHeight, scaledWidth);
#else
screen_resource = vc_dispmanx_resource_create(VC_IMAGE_RGB565, scaledWidth + excessPixelsLeft + excessPixelsRight, scaledHeight + excessPixelsTop + excessPixelsBottom, &image_prt);
vc_dispmanx_rect_set(&rect, excessPixelsLeft, excessPixelsTop, scaledWidth, scaledHeight);
#endif
if (!screen_resource) FATAL_ERROR("vc_dispmanx_resource_create failed!");
printf("GPU grab rectangle is offset x=%d,y=%d, size w=%dxh=%d, aspect ratio=%f\n", excessPixelsLeft, excessPixelsTop, scaledWidth, scaledHeight, (double)scaledWidth / scaledHeight);
#ifdef USE_GPU_VSYNC
// Register to receive vsync notifications. This is a heuristic, since the application might not be locked at vsync, and even
// if it was, this signal is not a guaranteed edge trigger for availability of new frames.
vc_dispmanx_vsync_callback(display, VsyncCallback, 0);
#else
// Record some fake samples to frame rate histogram to fast track it to warm state.
uint64_t now = tick();
for(int i = 0; i < HISTOGRAM_SIZE; ++i)
AddHistogramSample(now - 1000000ULL*(HISTOGRAM_SIZE-i) / TARGET_FRAME_RATE);
int rc = pthread_create(&gpuPollingThread, NULL, gpu_polling_thread, NULL); // After creating the thread, it is assumed to have ownership of the SPI bus, so no SPI chat on the main thread after this.
if (rc != 0) FATAL_ERROR("Failed to create GPU polling thread!");
#endif
}
void DeinitGPU()
{
#ifdef USE_GPU_VSYNC
if (display) vc_dispmanx_vsync_callback(display, NULL, 0);
#else
pthread_join(gpuPollingThread, NULL);
gpuPollingThread = (pthread_t)0;
#endif
if (screen_resource)
{
vc_dispmanx_resource_delete(screen_resource);
screen_resource = 0;
}
if (display)
{
vc_dispmanx_display_close(display);
display = 0;
}
bcm_host_deinit();
}

47
usr/fbcp-ili9341/gpu.h Normal file
View File

@ -0,0 +1,47 @@
#pragma once
#include <inttypes.h>
void InitGPU(void);
void DeinitGPU(void);
void AddHistogramSample(uint64_t t);
bool SnapshotFramebuffer(uint16_t *destination);
bool IsNewFramebuffer(uint16_t *possiblyNewFramebuffer, uint16_t *oldFramebuffer);
uint64_t EstimateFrameRateInterval(void);
uint64_t PredictNextFrameArrivalTime(void);
extern uint16_t *videoCoreFramebuffer[2];
extern volatile int numNewGpuFrames;
extern int displayXOffset;
extern int displayYOffset;
extern int gpuFrameWidth;
extern int gpuFrameHeight;
extern int gpuFramebufferScanlineStrideBytes;
extern int gpuFramebufferSizeBytes;
extern int excessPixelsLeft;
extern int excessPixelsRight;
extern int excessPixelsTop;
extern int excessPixelsBottom;
#define FRAME_HISTORY_MAX_SIZE 240
extern int frameTimeHistorySize;
struct FrameHistory
{
uint64_t time;
bool interlaced;
};
extern FrameHistory frameTimeHistory[FRAME_HISTORY_MAX_SIZE];
#define HISTOGRAM_SIZE 240
extern uint64_t frameArrivalTimes[HISTOGRAM_SIZE];
extern uint64_t frameArrivalTimesTail;
extern int histogramSize;
// Returns Nth most recent entry in the frame times histogram, 0 = most recent, (histogramSize-1) = oldest
#define GET_HISTOGRAM(idx) frameArrivalTimes[(frameArrivalTimesTail - 1 - (idx) + HISTOGRAM_SIZE) % HISTOGRAM_SIZE]
// Source framebuffer captured from DispmanX is (currently) always 16-bits R5G6B5
#define FRAMEBUFFER_BYTESPERPIXEL 2

View File

@ -0,0 +1,122 @@
#include "config.h"
// TODO: Share common parts of this file with ILI9341 to avoid code duplication
#ifdef HX8357D
#include "spi.h"
#include <memory.h>
#include <stdio.h>
void InitHX8357D()
{
// If a Reset pin is defined, toggle it briefly high->low->high to enable the device. Some devices do not have a reset pin, in which case compile with GPIO_TFT_RESET_PIN left undefined.
#if defined(GPIO_TFT_RESET_PIN) && GPIO_TFT_RESET_PIN >= 0
printf("Resetting display at reset GPIO pin %d\n", GPIO_TFT_RESET_PIN);
SET_GPIO_MODE(GPIO_TFT_RESET_PIN, 1);
SET_GPIO(GPIO_TFT_RESET_PIN);
usleep(120 * 1000);
CLEAR_GPIO(GPIO_TFT_RESET_PIN);
usleep(120 * 1000);
SET_GPIO(GPIO_TFT_RESET_PIN);
usleep(120 * 1000);
#endif
// Do the initialization with a very low SPI bus speed, so that it will succeed even if the bus speed chosen by the user is too high.
spi->clk = 34;
__sync_synchronize();
BEGIN_SPI_COMMUNICATION();
{
SPI_TRANSFER(0x01/*Software Reset*/);
usleep(5*1000);
SPI_TRANSFER(0x28/*Display OFF*/);
#define MADCTL_BGR_PIXEL_ORDER (1<<3)
#define MADCTL_ROW_COLUMN_EXCHANGE (1<<5)
#define MADCTL_COLUMN_ADDRESS_ORDER_SWAP (1<<6)
#define MADCTL_ROW_ADDRESS_ORDER_SWAP (1<<7)
#define MADCTL_ROTATE_180_DEGREES (MADCTL_COLUMN_ADDRESS_ORDER_SWAP | MADCTL_ROW_ADDRESS_ORDER_SWAP)
uint8_t madctl = 0;
#ifndef DISPLAY_SWAP_BGR
madctl |= MADCTL_BGR_PIXEL_ORDER;
#endif
#if defined(DISPLAY_FLIP_ORIENTATION_IN_HARDWARE)
madctl |= MADCTL_ROW_COLUMN_EXCHANGE;
#endif
#ifdef DISPLAY_ROTATE_180_DEGREES
madctl ^= MADCTL_ROTATE_180_DEGREES;
#endif
SPI_TRANSFER(0x36/*MADCTL: Memory Access Control*/, madctl);
SPI_TRANSFER(0x3A/*Interface Pixel Format*/, 0x55/*16 bits/pixel*/);
#ifdef DISPLAY_INVERT_COLORS
SPI_TRANSFER(0x21/*Display Inversion ON*/);
#else
SPI_TRANSFER(0x20/*Display Inversion OFF*/);
#endif
SPI_TRANSFER(0x11/*Sleep Out*/);
usleep(120 * 1000);
SPI_TRANSFER(0x29/*Display ON*/);
#if defined(GPIO_TFT_BACKLIGHT) && defined(BACKLIGHT_CONTROL)
printf("Setting TFT backlight on at pin %d\n", GPIO_TFT_BACKLIGHT);
SET_GPIO_MODE(GPIO_TFT_BACKLIGHT, 0x01); // Set backlight pin to digital 0/1 output mode (0x01) in case it had been PWM controlled
SET_GPIO(GPIO_TFT_BACKLIGHT); // And turn the backlight on.
#endif
ClearScreen();
}
#ifndef USE_DMA_TRANSFERS // For DMA transfers, keep SPI CS & TA active.
END_SPI_COMMUNICATION();
#endif
// And speed up to the desired operation speed finally after init is done.
usleep(10 * 1000); // Delay a bit before restoring CLK, or otherwise this has been observed to cause the display not init if done back to back after the clear operation above.
spi->clk = SPI_BUS_CLOCK_DIVISOR;
}
void TurnBacklightOff()
{
#if defined(GPIO_TFT_BACKLIGHT) && defined(BACKLIGHT_CONTROL)
SET_GPIO_MODE(GPIO_TFT_BACKLIGHT, 0x01); // Set backlight pin to digital 0/1 output mode (0x01) in case it had been PWM controlled
CLEAR_GPIO(GPIO_TFT_BACKLIGHT); // And turn the backlight off.
#endif
}
void TurnDisplayOff()
{
TurnBacklightOff();
#if 0
QUEUE_SPI_TRANSFER(0x28/*Display OFF*/);
QUEUE_SPI_TRANSFER(0x10/*Enter Sleep Mode*/);
usleep(120*1000); // Sleep off can be sent 120msecs after entering sleep mode the earliest, so synchronously sleep here for that duration to be safe.
#endif
// printf("Turned display OFF\n");
}
void TurnDisplayOn()
{
#if 0
QUEUE_SPI_TRANSFER(0x11/*Sleep Out*/);
usleep(120 * 1000);
QUEUE_SPI_TRANSFER(0x29/*Display ON*/);
#endif
#if defined(GPIO_TFT_BACKLIGHT) && defined(BACKLIGHT_CONTROL)
SET_GPIO_MODE(GPIO_TFT_BACKLIGHT, 0x01); // Set backlight pin to digital 0/1 output mode (0x01) in case it had been PWM controlled
SET_GPIO(GPIO_TFT_BACKLIGHT); // And turn the backlight on.
#endif
// printf("Turned display ON\n");
}
void DeinitSPIDisplay()
{
ClearScreen();
SPI_TRANSFER(/*Display OFF*/0x28);
TurnBacklightOff();
}
#endif

View File

@ -0,0 +1,26 @@
#pragma once
#ifdef HX8357D
// Data specific to the HX8357D controller
#define DISPLAY_SET_CURSOR_X 0x2A
#define DISPLAY_SET_CURSOR_Y 0x2B
#define DISPLAY_WRITE_PIXELS 0x2C
#ifdef ADAFRUIT_HX8357D_PITFT
#include "pitft_35r_hx8357d.h"
#endif
void InitHX8357D(void);
void TurnDisplayOn(void);
void TurnDisplayOff(void);
#define DISPLAY_NATIVE_WIDTH 320
#define DISPLAY_NATIVE_HEIGHT 480
#define MUST_SEND_FULL_CURSOR_WINDOW
#define InitSPIDisplay InitHX8357D
#endif

View File

@ -0,0 +1,170 @@
#include "config.h"
#if defined(ILI9341) || defined(ILI9340)
#include "spi.h"
#include <memory.h>
#include <stdio.h>
void InitILI9341()
{
// If a Reset pin is defined, toggle it briefly high->low->high to enable the device. Some devices do not have a reset pin, in which case compile with GPIO_TFT_RESET_PIN left undefined.
#if defined(GPIO_TFT_RESET_PIN) && GPIO_TFT_RESET_PIN >= 0
printf("Resetting display at reset GPIO pin %d\n", GPIO_TFT_RESET_PIN);
SET_GPIO_MODE(GPIO_TFT_RESET_PIN, 1);
SET_GPIO(GPIO_TFT_RESET_PIN);
usleep(120 * 1000);
CLEAR_GPIO(GPIO_TFT_RESET_PIN);
usleep(120 * 1000);
SET_GPIO(GPIO_TFT_RESET_PIN);
usleep(120 * 1000);
#endif
// Do the initialization with a very low SPI bus speed, so that it will succeed even if the bus speed chosen by the user is too high.
spi->clk = 34;
__sync_synchronize();
BEGIN_SPI_COMMUNICATION();
{
SPI_TRANSFER(0x01/*Software Reset*/);
usleep(5*1000);
SPI_TRANSFER(0x28/*Display OFF*/);
// The following appear in ILI9341 Data Sheet v1.11 (2011/06/10), but not in older v1.02 (2010/12/06).
SPI_TRANSFER(0xCB/*Power Control A*/, 0x39/*Reserved*/, 0x2C/*Reserved*/, 0x00/*Reserved*/, 0x34/*REG_VD=1.6V*/, 0x02/*VBC=5.6V*/); // These are the same as power on.
SPI_TRANSFER(0xCF/*Power Control B*/, 0x00/*Always Zero*/, 0xC1/*Power Control=0,DRV_ena=0,PCEQ=1*/, 0x30/*DC_ena=1*/); // Not sure what the effect is, set to default as per ILI9341 Application Notes v0.6 (2011/03/11) document (which is not apparently same as default at power on).
SPI_TRANSFER(0xE8/*Driver Timing Control A*/, 0x85, 0x00, 0x78); // Not sure what the effect is, set to default as per ILI9341 Application Notes v0.6 (2011/03/11) document (which is not apparently same as default at power on).
SPI_TRANSFER(0xEA/*Driver Timing Control B*/, 0x00, 0x00); // Not sure what the effect is, set to default as per ILI9341 Application Notes v0.6 (2011/03/11) document (which is not apparently same as default at power on).
SPI_TRANSFER(0xED/*Power On Sequence Control*/, 0x64, 0x03, 0x12, 0x81); // Not sure what the effect is, set to default as per ILI9341 Application Notes v0.6 (2011/03/11) document (which is not apparently same as default at power on).
#if ILI9341_UPDATE_FRAMERATE == ILI9341_FRAMERATE_119_HZ // Setting pump ratio if update rate is less than 119 Hz does not look good but produces shimmering in panning motion.
SPI_TRANSFER(0xF7/*Pump Ratio Control*/, ILI9341_PUMP_CONTROL);
#endif
// The following appear also in old ILI9341 Data Sheet v1.02 (2010/12/06).
SPI_TRANSFER(0xC0/*Power Control 1*/, 0x23/*VRH=4.60V*/); // Set the GVDD level, which is a reference level for the VCOM level and the grayscale voltage level.
SPI_TRANSFER(0xC1/*Power Control 2*/, 0x10/*AVCC=VCIx2,VGH=VCIx7,VGL=-VCIx4*/); // Sets the factor used in the step-up circuits. To reduce power consumption, set a smaller factor.
SPI_TRANSFER(0xC5/*VCOM Control 1*/, 0x3e/*VCOMH=4.250V*/, 0x28/*VCOML=-1.500V*/); // Adjusting VCOM 1 and 2 can control display brightness
SPI_TRANSFER(0xC7/*VCOM Control 2*/, 0x86/*VCOMH=VMH-58,VCOML=VML-58*/);
#define MADCTL_BGR_PIXEL_ORDER (1<<3)
#define MADCTL_ROW_COLUMN_EXCHANGE (1<<5)
#define MADCTL_COLUMN_ADDRESS_ORDER_SWAP (1<<6)
#define MADCTL_ROW_ADDRESS_ORDER_SWAP (1<<7)
#define MADCTL_ROTATE_180_DEGREES (MADCTL_COLUMN_ADDRESS_ORDER_SWAP | MADCTL_ROW_ADDRESS_ORDER_SWAP)
uint8_t madctl = 0;
#ifndef DISPLAY_SWAP_BGR
madctl |= MADCTL_BGR_PIXEL_ORDER;
#endif
#if defined(DISPLAY_FLIP_ORIENTATION_IN_HARDWARE)
madctl |= MADCTL_ROW_COLUMN_EXCHANGE;
#endif
#ifdef DISPLAY_ROTATE_180_DEGREES
madctl ^= MADCTL_ROTATE_180_DEGREES;
#endif
SPI_TRANSFER(0x36/*MADCTL: Memory Access Control*/, madctl);
#ifdef DISPLAY_INVERT_COLORS
SPI_TRANSFER(0x21/*Display Inversion ON*/);
#else
SPI_TRANSFER(0x20/*Display Inversion OFF*/);
#endif
SPI_TRANSFER(0x3A/*COLMOD: Pixel Format Set*/, 0x55/*DPI=16bits/pixel,DBI=16bits/pixel*/);
// According to spec sheet, display frame rate in 4-wire SPI "internal clock mode" is computed with the following formula:
// frameRate = 615000 / [ (pow(2,DIVA) * (320 + VFP + VBP) * RTNA ]
// where
// - DIVA is clock division ratio, 0 <= DIVA <= 3; so pow(2,DIVA) is either 1, 2, 4 or 8.
// - RTNA specifies the number of clocks assigned to each horizontal scanline, and must follow 16 <= RTNA <= 31.
// - VFP is vertical front porch, number of idle sleep scanlines before refreshing a new frame, 2 <= VFP <= 127.
// - VBP is vertical back porch, number of idle sleep scanlines after refreshing a new frame, 2 <= VBP <= 127.
// Max refresh rate then is with DIVA=0, VFP=2, VBP=2 and RTNA=16:
// maxFrameRate = 615000 / (1 * (320 + 2 + 2) * 16) = 118.63 Hz
// To get 60fps, set DIVA=0, RTNA=31, VFP=2 and VBP=2:
// minFrameRate = 615000 / (8 * (320 + 2 + 2) * 31) = 61.23 Hz
// It seems that in internal clock mode, horizontal front and back porch settings (HFP, BFP) are ignored(?)
SPI_TRANSFER(0xB1/*Frame Rate Control (In Normal Mode/Full Colors)*/, 0x00/*DIVA=fosc*/, ILI9341_UPDATE_FRAMERATE/*RTNA(Frame Rate)*/);
// SPI_TRANSFER(0xB5/*Blanking Porch Control*/, 0x02/*VFP, vertical front porch*/, 0x02/*VBP, vertical back porch*/, 0x0A/*HFP, horizontal front porch*/, 0x14/*HBP, horizontal back porch*/); // These are the default values at power on
SPI_TRANSFER(0xB6/*Display Function Control*/, 0x08/*PTG=Interval Scan,PT=V63/V0/VCOML/VCOMH*/, 0x82/*REV=1(Normally white),ISC(Scan Cycle)=5 frames*/, 0x27/*LCD Driver Lines=320*/);
SPI_TRANSFER(0xF2/*Enable 3G*/, 0x02/*False*/); // This one is present only in ILI9341 Data Sheet v1.11 (2011/06/10)
SPI_TRANSFER(0x26/*Gamma Set*/, 0x01/*Gamma curve 1 (G2.2)*/);
SPI_TRANSFER(0xE0/*Positive Gamma Correction*/, 0x0F, 0x31, 0x2B, 0x0C, 0x0E, 0x08, 0x4E, 0xF1, 0x37, 0x07, 0x10, 0x03, 0x0E, 0x09, 0x00);
SPI_TRANSFER(0xE1/*Negative Gamma Correction*/, 0x00, 0x0E, 0x14, 0x03, 0x11, 0x07, 0x31, 0xC1, 0x48, 0x08, 0x0F, 0x0C, 0x31, 0x36, 0x0F);
SPI_TRANSFER(0x11/*Sleep Out*/);
usleep(120 * 1000);
SPI_TRANSFER(/*Display ON*/0x29);
#if defined(GPIO_TFT_BACKLIGHT) && defined(BACKLIGHT_CONTROL)
printf("Setting TFT backlight on at pin %d\n", GPIO_TFT_BACKLIGHT);
TurnBacklightOn();
#endif
// Some wonky effects to try out:
// SPI_TRANSFER(0x20/*Display Inversion OFF*/);
// SPI_TRANSFER(0x21/*Display Inversion ON*/);
// SPI_TRANSFER(0x38/*Idle Mode OFF*/);
// SPI_TRANSFER(0x39/*Idle Mode ON*/); // Idle mode gives a super-saturated high contrast reduced colors mode
ClearScreen();
}
#ifndef USE_DMA_TRANSFERS // For DMA transfers, keep SPI CS & TA active.
END_SPI_COMMUNICATION();
#endif
// And speed up to the desired operation speed finally after init is done.
usleep(10 * 1000); // Delay a bit before restoring CLK, or otherwise this has been observed to cause the display not init if done back to back after the clear operation above.
spi->clk = SPI_BUS_CLOCK_DIVISOR;
}
void TurnBacklightOn()
{
#if defined(GPIO_TFT_BACKLIGHT) && defined(BACKLIGHT_CONTROL)
SET_GPIO_MODE(GPIO_TFT_BACKLIGHT, 0x01); // Set backlight pin to digital 0/1 output mode (0x01) in case it had been PWM controlled
SET_GPIO(GPIO_TFT_BACKLIGHT); // And turn the backlight on.
#endif
}
void TurnBacklightOff()
{
#if defined(GPIO_TFT_BACKLIGHT) && defined(BACKLIGHT_CONTROL)
SET_GPIO_MODE(GPIO_TFT_BACKLIGHT, 0x01); // Set backlight pin to digital 0/1 output mode (0x01) in case it had been PWM controlled
CLEAR_GPIO(GPIO_TFT_BACKLIGHT); // And turn the backlight off.
#endif
}
void TurnDisplayOff()
{
TurnBacklightOff();
#if 0
QUEUE_SPI_TRANSFER(0x28/*Display OFF*/);
QUEUE_SPI_TRANSFER(0x10/*Enter Sleep Mode*/);
usleep(120*1000); // Sleep off can be sent 120msecs after entering sleep mode the earliest, so synchronously sleep here for that duration to be safe.
#endif
// printf("Turned display OFF\n");
}
void TurnDisplayOn()
{
#if 0
QUEUE_SPI_TRANSFER(0x11/*Sleep Out*/);
usleep(120 * 1000);
QUEUE_SPI_TRANSFER(0x29/*Display ON*/);
#endif
#if defined(GPIO_TFT_BACKLIGHT) && defined(BACKLIGHT_CONTROL)
SET_GPIO_MODE(GPIO_TFT_BACKLIGHT, 0x01); // Set backlight pin to digital 0/1 output mode (0x01) in case it had been PWM controlled
SET_GPIO(GPIO_TFT_BACKLIGHT); // And turn the backlight on.
#endif
// printf("Turned display ON\n");
}
void DeinitSPIDisplay()
{
ClearScreen();
SPI_TRANSFER(/*Display OFF*/0x28);
TurnBacklightOff();
}
#endif

View File

@ -0,0 +1,82 @@
#pragma once
#if defined(ILI9341) || defined(ILI9340)
// SPI_BUS_CLOCK_DIVISOR specifies how fast to communicate the SPI bus at. Possible values are 4, 6, 8, 10, 12, ... Smaller
// values are faster. On my PiTFT 2.8 and Waveshare32b displays, divisor value of 4 does not work, and
// 6 is the fastest possible. While developing, it was observed that a value of 12 or higher did not
// actually work either, and only 6, 8 and 10 were functioning properly.
// On Adafruit PiTFT 2.8", the following speed configurations have been tested (on a Pi 3B):
// core_freq=400: CDIV=6, results in 66.67MHz, works
// core_freq=294: CDIV=4, results in 73.50MHz, works
// core_freq=320: CDIV=4, would result in 80.00MHz, but this was too fast for the display
// core_freq=300: CDIV=4, would result in 75.00MHz, and would work for ~99% of the time, but develop rare occassional pixel glitches once a minute or so.
// core_freq=296: CDIV=4, would result in 74.50MHz, would produce tiny individual pixel glitches very rarely, once every few 10 minutes or so.
// On Waveshare 3.2", the following speed configurations have been observed to work (on a Pi 3B):
// core_freq=400: CDIV=6, results in 66.67MHz, works
// core_freq=310: CDIV=4, results in 77.50MHz, works
// core_freq=320: CDIV=4, would result in 80.00MHz, would work most of the time, but produced random occassional glitches every few minutes or so.
// On Adafruit 2.2" PiTFT HAT - 320x240 Display with ILI9340 controller, the following speed configurations have been tested (on a Pi 3B):
// core_freq=338: CDIV=4, results in 84.5MHz, works
// core_freq=339: CDIV=4, would result in 84.75MHz, would work most of the time, but every few minutes generated random glitched pixels.
// Data specific to the ILI9341 controller
#define DISPLAY_SET_CURSOR_X 0x2A
#define DISPLAY_SET_CURSOR_Y 0x2B
#define DISPLAY_WRITE_PIXELS 0x2C
// ILI9341 displays are able to update at any rate between 61Hz to up to 119Hz. Default at power on is 70Hz.
#define ILI9341_FRAMERATE_61_HZ 0x1F
#define ILI9341_FRAMERATE_63_HZ 0x1E
#define ILI9341_FRAMERATE_65_HZ 0x1D
#define ILI9341_FRAMERATE_68_HZ 0x1C
#define ILI9341_FRAMERATE_70_HZ 0x1B
#define ILI9341_FRAMERATE_73_HZ 0x1A
#define ILI9341_FRAMERATE_76_HZ 0x19
#define ILI9341_FRAMERATE_79_HZ 0x18
#define ILI9341_FRAMERATE_83_HZ 0x17
#define ILI9341_FRAMERATE_86_HZ 0x16
#define ILI9341_FRAMERATE_90_HZ 0x15
#define ILI9341_FRAMERATE_95_HZ 0x14
#define ILI9341_FRAMERATE_100_HZ 0x13
#define ILI9341_FRAMERATE_106_HZ 0x12
#define ILI9341_FRAMERATE_112_HZ 0x11
#define ILI9341_FRAMERATE_119_HZ 0x10
// Visually estimating NES Super Mario Bros 3 "match mushroom, flower, star" arcade game, 119Hz gives visually
// most pleasing result, so default to using that. You can also try other settings above. 119 Hz should give
// lowest latency, perhaps 61 Hz might give least amount of tearing, although this can be quite subjective.
#define ILI9341_UPDATE_FRAMERATE ILI9341_FRAMERATE_119_HZ
// Appears in ILI9341 Data Sheet v1.11 (2011/06/10), but not in older v1.02 (2010/12/06). This has a subtle effect on colors/saturation.
// Valid values are 0x20 and 0x30. Spec says 0x20 is default at boot, but doesn't seem so, more like 0x00 is default, giving supersaturated colors. I like 0x30 best.
// Value 0x30 doesn't seem to be available on ILI9340.
#define ILI9341_PUMP_CONTROL_2XVCI 0x20
#define ILI9341_PUMP_CONTROL_3XVCI 0x30
#ifdef ILI9341
#define ILI9341_PUMP_CONTROL ILI9341_PUMP_CONTROL_3XVCI
#else
#define ILI9341_PUMP_CONTROL ILI9341_PUMP_CONTROL_2XVCI
#endif
#define DISPLAY_NATIVE_WIDTH 240
#define DISPLAY_NATIVE_HEIGHT 320
#ifdef ADAFRUIT_ILI9341_PITFT
#include "pitft_28r_ili9341.h"
#elif defined(ADAFRUIT_HX8357D_PITFT)
#include "pitft_35r_hx8357d.h"
#elif defined(FREEPLAYTECH_WAVESHARE32B)
#include "freeplaytech_waveshare32b.h"
#endif
#define InitSPIDisplay InitILI9341
void InitILI9341(void);
#endif

View File

@ -0,0 +1,163 @@
#include "config.h"
#if defined(ILI9486) || defined(ILI9486L)
#include "spi.h"
#include <memory.h>
#include <stdio.h>
void InitILI9486()
{
// If a Reset pin is defined, toggle it briefly high->low->high to enable the device. Some devices do not have a reset pin, in which case compile with GPIO_TFT_RESET_PIN left undefined.
#if defined(GPIO_TFT_RESET_PIN) && GPIO_TFT_RESET_PIN >= 0
printf("Resetting display at reset GPIO pin %d\n", GPIO_TFT_RESET_PIN);
SET_GPIO_MODE(GPIO_TFT_RESET_PIN, 1);
SET_GPIO(GPIO_TFT_RESET_PIN);
usleep(120 * 1000);
CLEAR_GPIO(GPIO_TFT_RESET_PIN);
usleep(120 * 1000);
SET_GPIO(GPIO_TFT_RESET_PIN);
usleep(120 * 1000);
#endif
// Do the initialization with a very low SPI bus speed, so that it will succeed even if the bus speed chosen by the user is too high.
spi->clk = 34;
__sync_synchronize();
BEGIN_SPI_COMMUNICATION();
{
#ifdef DISPLAY_SPI_BUS_IS_16BITS_WIDE
SPI_TRANSFER(0xB0/*Interface Mode Control*/, 0x00, 0x00/*DE polarity=High enable, PCKL polarity=data fetched at rising time, HSYNC polarity=Low level sync clock, VSYNC polarity=Low level sync clock*/);
#else
SPI_TRANSFER(0xB0/*Interface Mode Control*/, 0x00/*DE polarity=High enable, PCKL polarity=data fetched at rising time, HSYNC polarity=Low level sync clock, VSYNC polarity=Low level sync clock*/);
#endif
SPI_TRANSFER(0x11/*Sleep OUT*/);
usleep(120*1000);
#ifdef DISPLAY_COLOR_FORMAT_R6X2G6X2B6X2
const uint8_t pixelFormat = 0x66; /*DPI(RGB Interface)=18bits/pixel, DBI(CPU Interface)=18bits/pixel*/
#else
const uint8_t pixelFormat = 0x55; /*DPI(RGB Interface)=16bits/pixel, DBI(CPU Interface)=16bits/pixel*/
#endif
#ifdef DISPLAY_SPI_BUS_IS_16BITS_WIDE
SPI_TRANSFER(0x3A/*Interface Pixel Format*/, 0x00, pixelFormat);
#else
SPI_TRANSFER(0x3A/*Interface Pixel Format*/, pixelFormat);
#endif
// Oddly, WaveShare 3.5" (B) seems to need Display Inversion ON, whereas WaveShare 3.5" (A) seems to need Display Inversion OFF for proper image. See https://github.com/juj/fbcp-ili9341/issues/8
#ifdef DISPLAY_INVERT_COLORS
SPI_TRANSFER(0x21/*Display Inversion ON*/);
#else
SPI_TRANSFER(0x20/*Display Inversion OFF*/);
#endif
#ifdef DISPLAY_SPI_BUS_IS_16BITS_WIDE
SPI_TRANSFER(0xC0/*Power Control 1*/, 0x00, 0x09, 0x00, 0x09);
SPI_TRANSFER(0xC1/*Power Control 2*/, 0x00, 0x41, 0x00, 0x00);
SPI_TRANSFER(0xC2/*Power Control 3*/, 0x00, 0x33);
SPI_TRANSFER(0xC5/*VCOM Control*/, 0x00, 0x00, 0x00, 0x36);
#else
SPI_TRANSFER(0xC0/*Power Control 1*/, 0x09, 0x09);
SPI_TRANSFER(0xC1/*Power Control 2*/, 0x41, 0x00);
SPI_TRANSFER(0xC2/*Power Control 3*/, 0x33);
SPI_TRANSFER(0xC5/*VCOM Control*/, 0x00, 0x36);
#endif
#define MADCTL_BGR_PIXEL_ORDER (1<<3)
#define MADCTL_ROW_COLUMN_EXCHANGE (1<<5)
#define MADCTL_COLUMN_ADDRESS_ORDER_SWAP (1<<6)
#define MADCTL_ROW_ADDRESS_ORDER_SWAP (1<<7)
#define MADCTL_ROTATE_180_DEGREES (MADCTL_COLUMN_ADDRESS_ORDER_SWAP | MADCTL_ROW_ADDRESS_ORDER_SWAP)
uint8_t madctl = 0;
#ifndef DISPLAY_SWAP_BGR
madctl |= MADCTL_BGR_PIXEL_ORDER;
#endif
#if defined(DISPLAY_FLIP_ORIENTATION_IN_HARDWARE)
madctl |= MADCTL_ROW_COLUMN_EXCHANGE;
#endif
#ifdef DISPLAY_ROTATE_180_DEGREES
madctl ^= MADCTL_ROTATE_180_DEGREES;
#endif
#ifdef DISPLAY_SPI_BUS_IS_16BITS_WIDE
SPI_TRANSFER(0x36/*MADCTL: Memory Access Control*/, 0x00, madctl);
#ifndef WAVESHARE_SKIP_GAMMA_CONTROL
SPI_TRANSFER(0xE0/*Positive Gamma Control*/, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x2C, 0x00, 0x0B, 0x00, 0x0C, 0x00, 0x04, 0x00, 0x4C, 0x00, 0x64, 0x00, 0x36, 0x00, 0x03, 0x00, 0x0E, 0x00, 0x01, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00);
SPI_TRANSFER(0xE1/*Negative Gamma Control*/, 0x00, 0x0F, 0x00, 0x37, 0x00, 0x37, 0x00, 0x0C, 0x00, 0x0F, 0x00, 0x05, 0x00, 0x50, 0x00, 0x32, 0x00, 0x36, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x19, 0x00, 0x14, 0x00, 0x0F);
#endif
SPI_TRANSFER(0xB6/*Display Function Control*/, 0, 0, 0, /*ISC=2*/2, 0, /*Display Height h=*/59); // Actual display height = (h+1)*8 so (59+1)*8=480
#else
SPI_TRANSFER(0x36/*MADCTL: Memory Access Control*/, madctl);
#ifndef WAVESHARE_SKIP_GAMMA_CONTROL
SPI_TRANSFER(0xE0/*Positive Gamma Control*/, 0x00, 0x2C, 0x2C, 0x0B, 0x0C, 0x04, 0x4C, 0x64, 0x36, 0x03, 0x0E, 0x01, 0x10, 0x01, 0x00);
SPI_TRANSFER(0xE1/*Negative Gamma Control*/, 0x0F, 0x37, 0x37, 0x0C, 0x0F, 0x05, 0x50, 0x32, 0x36, 0x04, 0x0B, 0x00, 0x19, 0x14, 0x0F);
#endif
SPI_TRANSFER(0xB6/*Display Function Control*/, 0, /*ISC=2*/2, /*Display Height h=*/59); // Actual display height = (h+1)*8 so (59+1)*8=480
#endif
SPI_TRANSFER(0x11/*Sleep OUT*/);
usleep(120*1000);
SPI_TRANSFER(0x29/*Display ON*/);
SPI_TRANSFER(0x38/*Idle Mode OFF*/);
SPI_TRANSFER(0x13/*Normal Display Mode ON*/);
#if defined(GPIO_TFT_BACKLIGHT) && defined(BACKLIGHT_CONTROL)
printf("Setting TFT backlight on at pin %d\n", GPIO_TFT_BACKLIGHT);
TurnBacklightOn();
#endif
ClearScreen();
}
#ifndef USE_DMA_TRANSFERS // For DMA transfers, keep SPI CS & TA active.
END_SPI_COMMUNICATION();
#endif
// And speed up to the desired operation speed finally after init is done.
usleep(10 * 1000); // Delay a bit before restoring CLK, or otherwise this has been observed to cause the display not init if done back to back after the clear operation above.
spi->clk = SPI_BUS_CLOCK_DIVISOR;
}
void TurnBacklightOff()
{
#if defined(GPIO_TFT_BACKLIGHT) && defined(BACKLIGHT_CONTROL)
SET_GPIO_MODE(GPIO_TFT_BACKLIGHT, 0x01); // Set backlight pin to digital 0/1 output mode (0x01) in case it had been PWM controlled
CLEAR_GPIO(GPIO_TFT_BACKLIGHT); // And turn the backlight off.
#endif
}
void TurnBacklightOn()
{
#if defined(GPIO_TFT_BACKLIGHT) && defined(BACKLIGHT_CONTROL)
SET_GPIO_MODE(GPIO_TFT_BACKLIGHT, 0x01); // Set backlight pin to digital 0/1 output mode (0x01) in case it had been PWM controlled
SET_GPIO(GPIO_TFT_BACKLIGHT); // And turn the backlight on.
#endif
}
void TurnDisplayOff()
{
TurnBacklightOff();
QUEUE_SPI_TRANSFER(0x28/*Display OFF*/);
QUEUE_SPI_TRANSFER(0x10/*Enter Sleep Mode*/);
usleep(120*1000); // Sleep off can be sent 120msecs after entering sleep mode the earliest, so synchronously sleep here for that duration to be safe.
}
void TurnDisplayOn()
{
TurnBacklightOff();
QUEUE_SPI_TRANSFER(0x11/*Sleep Out*/);
usleep(120 * 1000);
QUEUE_SPI_TRANSFER(0x29/*Display ON*/);
usleep(120 * 1000);
TurnBacklightOn();
}
void DeinitSPIDisplay()
{
ClearScreen();
TurnDisplayOff();
}
#endif

View File

@ -0,0 +1,28 @@
#pragma once
#include "config.h"
// Data specific to the ILI9486 controller
#define DISPLAY_SET_CURSOR_X 0x2A
#define DISPLAY_SET_CURSOR_Y 0x2B
#define DISPLAY_WRITE_PIXELS 0x2C
#ifdef WAVESHARE35B_ILI9486
#include "waveshare35b.h"
#endif
#define DISPLAY_NATIVE_WIDTH 320
#define DISPLAY_NATIVE_HEIGHT 480
// On ILI9486 the display bus commands and data are 16 bits rather than the usual 8 bits that most other controllers have.
// (On ILI9486L however the command width is 8 bits, so they are quite different)
#define DISPLAY_SPI_BUS_IS_16BITS_WIDE
// ILI9486 does not behave well if one sends partial commands, but must finish each command or the command does not apply
#define MUST_SEND_FULL_CURSOR_WINDOW
void InitILI9486(void);
#define InitSPIDisplay InitILI9486
// for the waveshare35b version 2 (IPS) we have to disable gamma control; uncomment if you use version 2
// #define WAVESHARE_SKIP_GAMMA_CONTROL

View File

@ -0,0 +1,26 @@
#pragma once
#include "config.h"
// On my Maithoga ILI9486L display (https://www.aliexpress.com/item/3-5-inch-8P-SPI-TFT-LCD-Color-Screen-Module-ILI9486-Drive-IC-320-480-RGB/32828284227.html), the following speed settings have been tested:
// core_freq=400: CDIV=8, results in 50.00MHz, works
// core_freq=400: CDIV=6, would result in 66.666MHz, but this is too fast for the display, and produces corrupted output
// core_freq=305: CDIV=6, would result in 50.833MHz, but this would work most of the time, producing occassional corrupted pixels
// Data specific to the ILI9486L controller
#define DISPLAY_SET_CURSOR_X 0x2A
#define DISPLAY_SET_CURSOR_Y 0x2B
#define DISPLAY_WRITE_PIXELS 0x2C
#define DISPLAY_NATIVE_WIDTH 320
#define DISPLAY_NATIVE_HEIGHT 480
// ILI9486L only supports 18 bits/pixel R6G6B6 format (padded to 3 bytes per pixel), and no 16-bits R5G6B5 mode.
#define DISPLAY_COLOR_FORMAT_R6X2G6X2B6X2
// ILI9486L does not behave well if one sends partial commands, but must finish each command or the command does not apply
#define MUST_SEND_FULL_CURSOR_WINDOW
void InitILI9486(void);
#define InitSPIDisplay InitILI9486

View File

@ -0,0 +1,174 @@
#include "config.h"
#if defined(ILI9488)
#include "spi.h"
#include <memory.h>
#include <stdio.h>
void InitILI9488()
{
// If a Reset pin is defined, toggle it briefly high->low->high to enable the device. Some devices do not have a reset pin, in which case compile with GPIO_TFT_RESET_PIN left undefined.
#if defined(GPIO_TFT_RESET_PIN) && GPIO_TFT_RESET_PIN >= 0
printf("Resetting ili9488 display at reset GPIO pin %d\n", GPIO_TFT_RESET_PIN);
SET_GPIO_MODE(GPIO_TFT_RESET_PIN, 1);
SET_GPIO(GPIO_TFT_RESET_PIN);
usleep(120 * 1000);
CLEAR_GPIO(GPIO_TFT_RESET_PIN);
usleep(120 * 1000);
SET_GPIO(GPIO_TFT_RESET_PIN);
usleep(120 * 1000);
#endif
// Do the initialization with a very low SPI bus speed, so that it will succeed even if the bus speed chosen by the user is too high.
spi->clk = 34;
__sync_synchronize();
BEGIN_SPI_COMMUNICATION();
{
//0xE0 - PGAMCTRL Positive Gamma Control
SPI_TRANSFER(0xE0, 0x00, 0x03, 0x09, 0x08, 0x16, 0x0A, 0x3F, 0x78, 0x4C, 0x09, 0x0A, 0x08, 0x16, 0x1A, 0x0F);
//0xE1 - NGAMCTRL Negative Gamma Control
SPI_TRANSFER(0xE1, 0x00, 0x16, 0x19, 0x03, 0x0F, 0x05, 0x32, 0x45, 0x46, 0x04, 0x0E, 0x0D, 0x35, 0x37, 0x0F);
// 0xC0 Power Control 1
SPI_TRANSFER(0xC0, 0x17, 0x15);
// 0xC1 Power Control 2
SPI_TRANSFER(0xC1, 0x41);
// 0xC5 VCOM Control
SPI_TRANSFER(0xC5, 0x00, 0x12, 0x80);
// Memory access control. Determines display orientation,
// display color filter and refresh order/direction.
#define MADCTL_HORIZONTAL_REFRESH_ORDER (1<<2)
#define MADCTL_BGR_PIXEL_ORDER (1<<3)
#define MADCTL_VERTICAL_REFRESH_ORDER (1<<4)
#define MADCTL_ROW_COLUMN_EXCHANGE (1<<5)
#define MADCTL_COLUMN_ADDRESS_ORDER_SWAP (1<<6)
#define MADCTL_ROW_ADDRESS_ORDER_SWAP (1<<7)
#define MADCTL_ROTATE_180_DEGREES (MADCTL_COLUMN_ADDRESS_ORDER_SWAP | MADCTL_ROW_ADDRESS_ORDER_SWAP)
uint8_t madctl(0);
#ifndef DISPLAY_SWAP_BGR
madctl |= MADCTL_BGR_PIXEL_ORDER;
#endif
#if defined(DISPLAY_FLIP_ORIENTATION_IN_HARDWARE)
madctl |= MADCTL_ROW_COLUMN_EXCHANGE;
#endif
#ifdef DISPLAY_ROTATE_180_DEGREES
madctl ^= MADCTL_ROTATE_180_DEGREES;
#endif
//
// Shifted value of bits [7:5] (MY - ROW_ADDRESS_ORDER_SWAP, MX - COLUMN_ADDRESS_ORDER_SWAP, MV ROW_COLUMN_EXCHANGE)
// and their resulting effect on the orientation of the image
// relative to the physical screen:
// 0x40 0 deg (W = 320, H = 480, FPC connector at bottom)
// 0x20 90 deg (W = 480, H = 320, FPC connector on right)
// 0x80 180 deg (W = 320, H = 480, FPC connector on top)
// 0xE0 270 deg (W = 480, H = 320, FPC connector on left)
// 0x36 Memory Access Control - sets display rotation.
SPI_TRANSFER(0x36, madctl);
// 0x3A Interface Pixel Format (bit depth color space)
SPI_TRANSFER(0x3A, 0x66);
// 0xB0 Interface Mode Control
SPI_TRANSFER(0xB0, 0x80);
// 0xB1 Frame Rate Control (in Normal Mode/Full Colors)
SPI_TRANSFER(0xB1, 0xA0);
// The display inversion is controlled by two registers:
// 0xB4 determines how the LEDs are swapped.See page 224 of the datasheet:
// The different values are:
// 0x00 Column inversion.
// 0x01 1 dot inversion.
// 0x02 2 dot inversion.
// 0x20/0x21 engage and disengage the inversion itself.
//
// I could not find a difference is using the three different
// settings for 0xB4. It is left at 0x02 since that is what
// the original test value was set to.
#ifdef DISPLAY_INVERT_COLORS
// 0xB4 Display Inversion Control.
SPI_TRANSFER(0xB4, 0x02);
// 0x21 Display Inversion ON.
SPI_TRANSFER(0x21);
#else
// 0xB4 Display Inversion Control.
SPI_TRANSFER(0xB4, 0x02);
// 0x20 Display Inversion OFF.
SPI_TRANSFER(0x20);
#endif
// 0xB6 Display Function Control.
SPI_TRANSFER(0xB6, 0x02, 0x02);
// 0xE9 Set Image Function.
SPI_TRANSFER(0xE9, 0x00);
// 0xF7 Adjuist Control 3
SPI_TRANSFER(0xF7, 0xA9, 0x51, 0x2C, 0x82);
// 0x11 Exit Sleep Mode. (Sleep OUT)
SPI_TRANSFER(0x11);
usleep(120*1000);
// 0x29 Display ON.
SPI_TRANSFER(0x29);
// 0x38 Idle Mode OFF.
SPI_TRANSFER(0x38);
// 0x13 Normal Display Mode ON.
SPI_TRANSFER(0x13);
#if defined(GPIO_TFT_BACKLIGHT) && defined(BACKLIGHT_CONTROL)
printf("Setting TFT backlight on at pin %d\n", GPIO_TFT_BACKLIGHT);
TurnBacklightOn();
#endif
ClearScreen();
}
#ifndef USE_DMA_TRANSFERS // For DMA transfers, keep SPI CS & TA active.
END_SPI_COMMUNICATION();
#endif
// And speed up to the desired operation speed finally after init is done.
usleep(10 * 1000); // Delay a bit before restoring CLK, or otherwise this has been observed to cause the display not init if done back to back after the clear operation above.
spi->clk = SPI_BUS_CLOCK_DIVISOR;
}
void TurnBacklightOff()
{
#if defined(GPIO_TFT_BACKLIGHT) && defined(BACKLIGHT_CONTROL)
SET_GPIO_MODE(GPIO_TFT_BACKLIGHT, 0x01); // Set backlight pin to digital 0/1 output mode (0x01) in case it had been PWM controlled
CLEAR_GPIO(GPIO_TFT_BACKLIGHT); // And turn the backlight off.
#endif
}
void TurnBacklightOn()
{
#if defined(GPIO_TFT_BACKLIGHT) && defined(BACKLIGHT_CONTROL)
SET_GPIO_MODE(GPIO_TFT_BACKLIGHT, 0x01); // Set backlight pin to digital 0/1 output mode (0x01) in case it had been PWM controlled
SET_GPIO(GPIO_TFT_BACKLIGHT); // And turn the backlight on.
#endif
}
void TurnDisplayOff()
{
TurnBacklightOff();
QUEUE_SPI_TRANSFER(0x28/*Display OFF*/);
QUEUE_SPI_TRANSFER(0x10/*Enter Sleep Mode*/);
usleep(120*1000); // Sleep off can be sent 120msecs after entering sleep mode the earliest, so synchronously sleep here for that duration to be safe.
}
void TurnDisplayOn()
{
TurnBacklightOff();
QUEUE_SPI_TRANSFER(0x11/*Sleep Out*/);
usleep(120 * 1000);
QUEUE_SPI_TRANSFER(0x29/*Display ON*/);
usleep(120 * 1000);
TurnBacklightOn();
}
void DeinitSPIDisplay()
{
ClearScreen();
TurnDisplayOff();
}
#endif

View File

@ -0,0 +1,24 @@
#pragma once
#include "config.h"
// Tested with display model ER-TFT035-6 from BuyDisplay.com:
// https://www.buydisplay.com/serial-spi-3-5-inch-tft-lcd-module-in-320x480-optl-touchscreen-ili9488
// Using long dupont wires to connect to a Pi Zero, the clock divider was tested down to a value of 12.
// Data specific to the ILI948X controllers
#define DISPLAY_SET_CURSOR_X 0x2A
#define DISPLAY_SET_CURSOR_Y 0x2B
#define DISPLAY_WRITE_PIXELS 0x2C
#define DISPLAY_NATIVE_WIDTH 320
#define DISPLAY_NATIVE_HEIGHT 480
// 18 bits/pixel R6G6B6 format (padded to 3 bytes per pixel), and no 16-bits R5G6B5 mode.
#define DISPLAY_COLOR_FORMAT_R6X2G6X2B6X2
// ILI948X does not behave well if one sends partial commands, but must finish each command or the command does not apply
#define MUST_SEND_FULL_CURSOR_WINDOW
void InitILI9488(void);
#define InitSPIDisplay InitILI9488

View File

@ -0,0 +1,3 @@
obj-m := bcm2835_spi_display.o
CFLAGS_bcm2835_spi_display.o := -O3 -std=gnu99 -Wno-declaration-after-statement -DKERNEL_MODULE -DILI9341=1 -DADAFRUIT_ILI9341_PITFT=1

View File

@ -0,0 +1,473 @@
#include <linux/buffer_head.h>
#include <linux/debugfs.h>
#include <linux/delay.h>
#include <linux/dma-mapping.h>
#include <linux/fb.h>
#include <linux/fs.h>
#include <linux/futex.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/math64.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/platform_data/dma-bcm2708.h>
#include <linux/proc_fs.h>
#include <linux/slab.h>
#include <linux/spi/spidev.h>
#include <linux/time.h>
#include <linux/timer.h>
#include <asm/io.h>
#include <asm/segment.h>
#include <asm/uaccess.h>
#include "../config.h"
#include "../display.h"
#include "../spi.h"
#include "../util.h"
#include "../dma.h"
static inline uint64_t tick(void)
{
struct timespec start = current_kernel_time();
return start.tv_sec * 1000000 + start.tv_nsec / 1000;
}
// TODO: Super-dirty temp, factor this into kbuild Makefile.
#include "../spi.cpp"
#include "../dma.cpp"
volatile SPITask *currentTask = 0;
volatile uint8_t *taskNextByte = 0;
volatile uint8_t *taskEndByte = 0;
#define SPI_BUS_PROC_ENTRY_FILENAME "bcm2835_spi_display_bus"
typedef struct mmap_info
{
char *data;
} mmap_info;
static void p_vm_open(struct vm_area_struct *vma)
{
}
static void p_vm_close(struct vm_area_struct *vma)
{
}
static int p_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
mmap_info *info = (mmap_info *)vma->vm_private_data;
if (info->data)
{
struct page *page = virt_to_page(info->data + vmf->pgoff*PAGE_SIZE);
get_page(page);
vmf->page = page;
}
return 0;
}
static struct vm_operations_struct vm_ops =
{
.open = p_vm_open,
.close = p_vm_close,
.fault = p_vm_fault,
};
static int p_mmap(struct file *filp, struct vm_area_struct *vma)
{
vma->vm_ops = &vm_ops;
vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
vma->vm_private_data = filp->private_data;
p_vm_open(vma);
return 0;
}
static int p_open(struct inode *inode, struct file *filp)
{
mmap_info *info = kmalloc(sizeof(mmap_info), GFP_KERNEL);
info->data = (void*)spiTaskMemory;
filp->private_data = info;
return 0;
}
static int p_release(struct inode *inode, struct file *filp)
{
mmap_info *info;
info = filp->private_data;
kfree(info);
filp->private_data = NULL;
return 0;
}
static const struct file_operations fops =
{
.mmap = p_mmap,
.open = p_open,
.release = p_release,
};
#ifdef KERNEL_DRIVE_WITH_IRQ
static irqreturn_t irq_handler(int irq, void* dev_id)
{
#ifndef KERNEL_MODULE_CLIENT_DRIVES
uint32_t cs = spi->cs;
if (!taskNextByte)
{
if (currentTask) DoneTask((SPITask*)currentTask);
currentTask = GetTask();
if (!currentTask)
{
spi->cs = (cs & ~BCM2835_SPI0_CS_TA) | BCM2835_SPI0_CS_CLEAR;
return IRQ_HANDLED;
}
if ((cs & (BCM2835_SPI0_CS_RXF|BCM2835_SPI0_CS_RXR))) (void)spi->fifo;
while (!(spi->cs & BCM2835_SPI0_CS_DONE))
{
if ((spi->cs & (BCM2835_SPI0_CS_RXF|BCM2835_SPI0_CS_RXR|BCM2835_SPI0_CS_RXD)))
(void)spi->fifo;
}
CLEAR_GPIO(GPIO_TFT_DATA_CONTROL);
spi->fifo = currentTask->cmd;
if (currentTask->size == 0) // Was this a task without data bytes? If so, nothing more to do here, go to sleep to wait for next IRQ event
{
DoneTask((SPITask*)currentTask);
taskNextByte = 0;
currentTask = 0;
}
else
{
taskNextByte = currentTask->data;
taskEndByte = currentTask->data + currentTask->size;
}
#if 0 // Testing overhead of not returning after command byte, but synchronously polling it out..
while (!(spi->cs & BCM2835_SPI0_CS_DONE)) ;
(void)spi->fifo;
#else
return IRQ_HANDLED;
#endif
}
if (taskNextByte == currentTask->data)
{
SET_GPIO(GPIO_TFT_DATA_CONTROL);
__sync_synchronize();
}
// Test code: write and read from FIFO as many bytes as spec says we should be allowed to, without checking CS in between.
// int maxBytesToSend = (cs & BCM2835_SPI0_CS_DONE) ? 16 : 12;
// if ((cs & BCM2835_SPI0_CS_RXF)) (void)spi->fifo;
// if ((cs & BCM2835_SPI0_CS_RXR)) for(int i = 0; i < MIN(maxBytesToSend, taskEndByte-taskNextByte); ++i) { spi->fifo = *taskNextByte++; (void)spi->fifo; }
// else for(int i = 0; i < MIN(maxBytesToSend, taskEndByte-taskNextByte); ++i) { spi->fifo = *taskNextByte++; }
while(taskNextByte < taskEndByte)
{
uint32_t cs = spi->cs;
if ((cs & (BCM2835_SPI0_CS_RXR | BCM2835_SPI0_CS_RXF))) spi->cs = cs | BCM2835_SPI0_CS_CLEAR_RX;
if ((cs & BCM2835_SPI0_CS_TXD)) spi->fifo = *taskNextByte++;
if ((cs & BCM2835_SPI0_CS_RXD)) (void)spi->fifo;
else break;
}
if (taskNextByte >= taskEndByte)
{
if ((cs & BCM2835_SPI0_CS_INTR)) spi->cs = (cs & ~BCM2835_SPI0_CS_INTR) | BCM2835_SPI0_CS_INTD;
taskNextByte = 0;
}
else
{
if (!(cs & BCM2835_SPI0_CS_INTR)) spi->cs = (cs | BCM2835_SPI0_CS_INTR) & ~BCM2835_SPI0_CS_INTR;
}
#endif
return IRQ_HANDLED;
}
#endif
#define req(cnd) if (!(cnd)) { LOG("!!!%s!!!\n", #cnd);}
uint32_t virt_to_bus_address(volatile void *virtAddress)
{
return (uint32_t)virt_to_phys((void*)virtAddress) | 0x40000000U;
}
volatile int shuttingDown = 0;
dma_addr_t spiTaskMemoryPhysical = 0;
#ifdef USE_DMA_TRANSFERS
void DMATest(void);
// Debug code to verify memory->memory streaming of DMA, no SPI peripheral interaction (remove this)
void DMATest()
{
LOG("Testing DMA transfers");
dma_addr_t dma_mem_phys = 0;
void *dma_mem = dma_alloc_writecombine(0, SHARED_MEMORY_SIZE, &dma_mem_phys, GFP_KERNEL);
LOG("Allocated DMA memory: mem: %p, phys: %p", dma_mem, (void*)dma_mem_phys);
spiTaskMemory = (SharedMemory *)dma_mem;
while(!shuttingDown)
{
msleep(100);
static int ctr = 0;
uint32_t base = (ctr++ * 34153) % SPI_QUEUE_SIZE;
uint32_t size = 65;
uint32_t base2 = base + size;
if (base2 + size > SPI_QUEUE_SIZE) continue;
memset((void*)spiTaskMemory->buffer, 0xCB, SPI_QUEUE_SIZE);
uint8_t *src = (uint8_t *)(spiTaskMemory->buffer + base);
src = (uint8_t *)((uintptr_t)src);
for(int i = 0; i < size; ++i)
src[i] = i;
uint8_t *dst = (uint8_t *)(spiTaskMemory->buffer + base2);
dst = (uint8_t *)((uintptr_t)dst);
#define TO_BUS(ptr) (( ((uint32_t)dma_mem_phys + ((uintptr_t)(ptr) - (uintptr_t)dma_mem))) | 0xC0000000U)
volatile DMAChannelRegisterFile *dmaCh = dma+dmaTxChannel;
// printk(KERN_INFO "CS: %x, cbAddr: %p, ti: %x, src: %p, dst: %p, len: %u, stride: %u, nextConBk: %p, debug: %x",
// dmaCh->cs, (void*)dmaCh->cbAddr, dmaCh->cb.ti, (void*)dmaCh->cb.src, (void*)dmaCh->cb.dst, dmaCh->cb.len, dmaCh->cb.stride, (void*)dmaCh->cb.next, dmaCh->cb.debug);
volatile DMAControlBlock *cb = &spiTaskMemory->cb[0].cb;
req(((uintptr_t)cb) % 256 == 0);
cb->ti = BCM2835_DMA_TI_SRC_INC | BCM2835_DMA_TI_DEST_INC;
cb->src = TO_BUS(src);
cb->dst = TO_BUS(dst);
cb->len = size;
cb->stride = 0;
cb->next = 0;
cb->debug = 0;
cb->reserved = 0;
// DumpCS(dmaCh->cs);
// DumpDebug(dmaCh->cb.debug);
// DumpTI(dmaCh->cb.ti);
LOG("Waiting for transfer %d, src:%p(phys:%p) to dst:%p (phys:%p)", ctr, (void*)src, (void*)cb->src, (void*)dst, (void*)cb->dst);
writel(TO_BUS(cb), &dmaCh->cbAddr);
writel(BCM2835_DMA_CS_ACTIVE | BCM2835_DMA_CS_END | BCM2835_DMA_CS_INT | BCM2835_DMA_CS_WAIT_FOR_OUTSTANDING_WRITES | BCM2835_DMA_CS_SET_PRIORITY(0xF) | BCM2835_DMA_CS_SET_PANIC_PRIORITY(0xF), &dmaCh->cs);
while((readl(&dmaCh->cs) & BCM2835_DMA_CS_ACTIVE) && !shuttingDown)
{
cpu_relax();
}
if (shuttingDown)
{
LOG("Module shutdown");
spiTaskMemory = 0;
return;
}
int errors = 0;
for(int i = 0; i < size; ++i)
if (dst[i] != src[i])
{
errors = true;
break;
}
if (errors)
{
printk(KERN_INFO "CS: %x, cbAddr: %p, ti: %x, src: %p, dst: %p, len: %u, stride: %u, nextConBk: %p, debug: %x",
dmaCh->cs, (void*)dmaCh->cbAddr, dmaCh->cb.ti, (void*)dmaCh->cb.src, (void*)dmaCh->cb.dst, dmaCh->cb.len, dmaCh->cb.stride, (void*)dmaCh->cb.next, dmaCh->cb.debug);
for(int i = 0; i < size; ++i)
{
printk(KERN_INFO "Result %p %d: %x vs dst %p %x\n", (void*)virt_to_phys(src+i), i, src[i], (void*)virt_to_phys(dst+i), dst[i]);
}
DumpCS(dmaCh->cs);
DumpDebug(dmaCh->cb.debug);
DumpTI(dmaCh->cb.ti);
LOG("Abort");
break;
}
}
LOG("DMA transfer test done");
spiTaskMemory = 0;
}
#endif
void PumpSPI(void)
{
#ifdef KERNEL_DRIVE_WITH_IRQ
spi->cs = BCM2835_SPI0_CS_CLEAR | BCM2835_SPI0_CS_TA | BCM2835_SPI0_CS_INTR | BCM2835_SPI0_CS_INTD; // Initialize the Control and Status register to defaults: CS=0 (Chip Select), CPHA=0 (Clock Phase), CPOL=0 (Clock Polarity), CSPOL=0 (Chip Select Polarity), TA=0 (Transfer not active), and reset TX and RX queues.
#else
if (spiTaskMemory->queueTail != spiTaskMemory->queueHead)
{
BEGIN_SPI_COMMUNICATION();
{
int i = 0;
while(spiTaskMemory->queueTail != spiTaskMemory->queueHead)
{
++i;
if (i > 500) break;
SPITask *task = GetTask();
if (task)
{
RunSPITask(task);
DoneTask(task);
}
else
break;
}
}
END_SPI_COMMUNICATION();
}
#endif
}
static struct timer_list my_timer;
void my_timer_callback( unsigned long data )
{
if (shuttingDown) return;
PumpSPI();
int ret = mod_timer( &my_timer, jiffies + msecs_to_jiffies(1) );
if (ret) printk("Error in mod_timer\n");
}
static int display_initialization_thread(void *unused)
{
printk(KERN_INFO "BCM2835 SPI Display driver thread started");
#ifndef KERNEL_MODULE_CLIENT_DRIVES
// Initialize display. TODO: Move to be shared with ili9341.cpp.
QUEUE_SPI_TRANSFER(0xC0/*Power Control 1*/, 0x23/*VRH=4.60V*/); // Set the GVDD level, which is a reference level for the VCOM level and the grayscale voltage level.
QUEUE_SPI_TRANSFER(0xC1/*Power Control 2*/, 0x10/*AVCC=VCIx2,VGH=VCIx7,VGL=-VCIx4*/); // Sets the factor used in the step-up circuits. To reduce power consumption, set a smaller factor.
QUEUE_SPI_TRANSFER(0xC5/*VCOM Control 1*/, 0x3e/*VCOMH=4.250V*/, 0x28/*VCOML=-1.500V*/); // Adjusting VCOM 1 and 2 can control display brightness
QUEUE_SPI_TRANSFER(0xC7/*VCOM Control 2*/, 0x86/*VCOMH=VMH-58,VCOML=VML-58*/);
#define MADCTL_ROW_COLUMN_EXCHANGE (1<<5)
#define MADCTL_BGR_PIXEL_ORDER (1<<3)
#define MADCTL_ROTATE_180_DEGREES (MADCTL_COLUMN_ADDRESS_ORDER_SWAP | MADCTL_ROW_ADDRESS_ORDER_SWAP)
uint8_t madctl = MADCTL_BGR_PIXEL_ORDER;
#ifdef DISPLAY_OUTPUT_LANDSCAPE
madctl |= MADCTL_ROW_COLUMN_EXCHANGE;
#endif
#ifdef DISPLAY_ROTATE_180_DEGREES
madctl ^= MADCTL_ROTATE_180_DEGREES;
#endif
QUEUE_SPI_TRANSFER(0x36/*MADCTL: Memory Access Control*/, madctl);
QUEUE_SPI_TRANSFER(0x3A/*COLMOD: Pixel Format Set*/, 0x55/*DPI=16bits/pixel,DBI=16bits/pixel*/);
QUEUE_SPI_TRANSFER(0xB1/*Frame Rate Control (In Normal Mode/Full Colors)*/, 0x00/*DIVA=fosc*/, 0x18/*RTNA(Frame Rate)=79Hz*/);
QUEUE_SPI_TRANSFER(0xB6/*Display Function Control*/, 0x08/*PTG=Interval Scan,PT=V63/V0/VCOML/VCOMH*/, 0x82/*REV=1(Normally white),ISC(Scan Cycle)=5 frames*/, 0x27/*LCD Driver Lines=320*/);
QUEUE_SPI_TRANSFER(0x26/*Gamma Set*/, 0x01/*Gamma curve 1 (G2.2)*/);
QUEUE_SPI_TRANSFER(0xE0/*Positive Gamma Correction*/, 0x0F, 0x31, 0x2B, 0x0C, 0x0E, 0x08, 0x4E, 0xF1, 0x37, 0x07, 0x10, 0x03, 0x0E, 0x09, 0x00);
QUEUE_SPI_TRANSFER(0xE1/*Negative Gamma Correction*/, 0x00, 0x0E, 0x14, 0x03, 0x11, 0x07, 0x31, 0xC1, 0x48, 0x08, 0x0F, 0x0C, 0x31, 0x36, 0x0F);
QUEUE_SPI_TRANSFER(0x11/*Sleep Out*/);
PumpSPI();
msleep(1000);
QUEUE_SPI_TRANSFER(/*Display ON*/0x29);
#if 1
// XXX Debug: Random garbage to verify screen updates working
for(int y = 0; y < DISPLAY_HEIGHT; ++y)
{
QUEUE_SPI_TRANSFER(DISPLAY_SET_CURSOR_X, 0, 0, DISPLAY_WIDTH >> 8, DISPLAY_WIDTH & 0xFF);
QUEUE_SPI_TRANSFER(DISPLAY_SET_CURSOR_Y, y >> 8, y & 0xFF, DISPLAY_HEIGHT >> 8, DISPLAY_HEIGHT & 0xFF);
SPITask *clearLine = AllocTask(DISPLAY_SCANLINE_SIZE);
clearLine->cmd = DISPLAY_WRITE_PIXELS;
clearLine->size = DISPLAY_SCANLINE_SIZE;
for(int i = 0; i < DISPLAY_SCANLINE_SIZE; ++i)
clearLine->data[i] = tick() * y + i;
CommitTask(clearLine);
}
PumpSPI();
msleep(1000);
#endif
// Initial screen clear
for(int y = 0; y < DISPLAY_HEIGHT; ++y)
{
QUEUE_SPI_TRANSFER(DISPLAY_SET_CURSOR_X, 0, 0, DISPLAY_WIDTH >> 8, DISPLAY_WIDTH & 0xFF);
QUEUE_SPI_TRANSFER(DISPLAY_SET_CURSOR_Y, y >> 8, y & 0xFF, DISPLAY_HEIGHT >> 8, DISPLAY_HEIGHT & 0xFF);
SPITask *clearLine = AllocTask(DISPLAY_SCANLINE_SIZE);
clearLine->cmd = DISPLAY_WRITE_PIXELS;
clearLine->size = DISPLAY_SCANLINE_SIZE;
memset((void*)clearLine->data, 0, DISPLAY_SCANLINE_SIZE);
CommitTask(clearLine);
}
PumpSPI();
QUEUE_SPI_TRANSFER(DISPLAY_SET_CURSOR_X, 0, 0, DISPLAY_WIDTH >> 8, DISPLAY_WIDTH & 0xFF);
QUEUE_SPI_TRANSFER(DISPLAY_SET_CURSOR_Y, 0, 0, DISPLAY_HEIGHT >> 8, DISPLAY_HEIGHT & 0xFF);
spi->cs = BCM2835_SPI0_CS_CLEAR | BCM2835_SPI0_CS_TA | BCM2835_SPI0_CS_INTR | BCM2835_SPI0_CS_INTD;
#endif
PumpSPI();
// Expose SPI worker ring bus to user space driver application.
proc_create(SPI_BUS_PROC_ENTRY_FILENAME, 0, NULL, &fops);
#if 0
// XXX Debug:
DMATest();
#endif
setup_timer(&my_timer, my_timer_callback, 0);
printk("Starting timer to fire in 200ms (%ld)\n", jiffies);
int ret = mod_timer( &my_timer, jiffies + msecs_to_jiffies(200) );
if (ret) printk("Error in mod_timer\n");
return 0;
}
static struct task_struct *displayThread = 0;
static uint32_t irqHandlerCookie = 0;
static uint32_t irqRegistered = 0;
int bcm2835_spi_display_init(void)
{
InitSPI();
#ifdef KERNEL_DRIVE_WITH_IRQ
int ret = request_irq(84, irq_handler, IRQF_SHARED, "spi_handler", &irqHandlerCookie);
if (ret != 0) FATAL_ERROR("request_irq failed!");
irqRegistered = 1;
#endif
if (!spiTaskMemory) FATAL_ERROR("Shared memory block not initialized!");
#ifdef USE_DMA_TRANSFERS
printk(KERN_INFO "DMA TX channel: %d, irq: %d", dmaTxChannel, dmaTxIrq);
printk(KERN_INFO "DMA RX channel: %d, irq: %d", dmaRxChannel, dmaRxIrq);
spiTaskMemory->dmaTxChannel = dmaTxChannel;
spiTaskMemory->dmaRxChannel = dmaRxChannel;
#endif
spiTaskMemory->sharedMemoryBaseInPhysMemory = (uint32_t)virt_to_phys(spiTaskMemory) | 0x40000000U;
LOG("PhysBase: %p", (void*)spiTaskMemory->sharedMemoryBaseInPhysMemory);
displayThread = kthread_create(display_initialization_thread, NULL, "display_thread");
if (displayThread) wake_up_process(displayThread);
return 0;
}
void bcm2835_spi_display_exit(void)
{
shuttingDown = 1;
msleep(2000);
spi->cs = BCM2835_SPI0_CS_CLEAR;
msleep(200);
DeinitSPI();
if (irqRegistered)
{
free_irq(84, &irqHandlerCookie);
irqRegistered = 0;
}
remove_proc_entry(SPI_BUS_PROC_ENTRY_FILENAME, NULL);
int ret = del_timer( &my_timer );
if (ret) printk("The timer is still in use...\n");}
module_init(bcm2835_spi_display_init);
module_exit(bcm2835_spi_display_exit);

View File

@ -0,0 +1,6 @@
sudo ./stop_kernel_module.sh
sudo make -C /lib/modules/$(uname -r)/build M=$(pwd) modules
#For debugging: generate disassembly output:
#objdump -dS bcm2835_spi_display.ko > bcm2835_spi_display.S

View File

@ -0,0 +1,3 @@
echo Starting kernel module bcm2835_spi_display.ko
sudo insmod bcm2835_spi_display.ko

View File

@ -0,0 +1,10 @@
# Kill user space driver program first if it happens to be running, because otherwise shutting down the kernel
# module would crash the system if the userland program was still accessing it.
echo Killing existing instances of user space driver program fbcp-ili9341
sudo pkill fbcp-ili9341
sudo pkill fbcp-ili9341-stable
# Now safe to tear down the module
echo Stopping kernel module bcm2835_spi_display.ko
sudo rmmod bcm2835_spi_display.ko

View File

@ -0,0 +1,78 @@
#include <linux/input.h> // input_event
#include <fcntl.h> // O_RDONLY, O_NONBLOCK
#include <stdio.h> // printf
#include <stdint.h> // uint64_t
#include "config.h"
#include "keyboard.h"
#include "util.h"
#include "tick.h"
#if defined(BACKLIGHT_CONTROL_FROM_KEYBOARD) && defined(TURN_DISPLAY_OFF_AFTER_USECS_OF_INACTIVITY)
#define READ_KEYBOARD_ENABLED
#endif
int key_fd = -1;
void OpenKeyboard()
{
#ifdef READ_KEYBOARD_ENABLED
key_fd = open(KEYBOARD_INPUT_FILE, O_RDONLY|O_NONBLOCK);
if (key_fd < 0) printf("Warning: cannot open keyboard input file " KEYBOARD_INPUT_FILE "! Try double checking that it exists, or reconfigure it in keyboard.cpp, or remove line '#define BACKLIGHT_CONTROL_FROM_KEYBOARD' in config.h if you do not want keyboard activity to factor into backlight control.\n");
#endif
}
int ReadKeyboard()
{
#ifdef READ_KEYBOARD_ENABLED
if (key_fd < 0) return 0;
struct input_event ev;
ssize_t bytesRead = -1;
int numRead = 0;
do
{
bytesRead = read(key_fd, &ev, sizeof(struct input_event));
if (bytesRead >= sizeof(struct input_event))
{
if (ev.type == 1 && ev.code != 0) // key up or down
{
// printf("time: %d %d type: %d, code: %d, value: %d\n", ev.time.tv_sec, ev.time.tv_usec, ev.type, ev.code, ev.value);
++numRead;
}
}
} while(bytesRead > 0);
return numRead;
#else
return 0;
#endif
}
void CloseKeyboard()
{
#ifdef READ_KEYBOARD_ENABLED
if (key_fd >= 0)
{
close(key_fd);
key_fd = -1;
}
#endif
}
static uint64_t lastKeyboardPressTime = 0;
static uint64_t lastKeyboardPressCheckTime = 0;
uint64_t TimeSinceLastKeyboardPress(void)
{
#ifdef READ_KEYBOARD_ENABLED
uint64_t now = tick();
if (now - lastKeyboardPressCheckTime >= 250000) // ReadKeyboard() takes about 8 usecs on Pi 3B, so 250msecs poll interval should be fine
{
lastKeyboardPressCheckTime = now;
if (ReadKeyboard())
lastKeyboardPressTime = now;
}
return now - lastKeyboardPressTime;
#else
return 0;
#endif
}

View File

@ -0,0 +1,6 @@
#pragma once
void OpenKeyboard(void);
int ReadKeyboard(void);
void CloseKeyboard(void);
uint64_t TimeSinceLastKeyboardPress(void);

View File

@ -0,0 +1,80 @@
#include <string.h>
#include "config.h"
#include "low_battery.h"
#include "gpu.h"
#include "spi.h"
#ifdef LOW_BATTERY_PIN
#define LOW_BATTERY_ICON_TOP_LEFT_X 10
#define LOW_BATTERY_ICON_TOP_LEFT_Y 10
#define LOW_BATTERY_ICON_WIDTH 35
#define LOW_BATTERY_ICON_HEIGHT 20
#define LOW_BATTERY_FORE_COLOR 65535
#define LOW_BATTERY_BACK_COLOR 0
static bool lowBattery = false;
static uint64_t lowBatteryLastPolled = 0;
// Battery icon from: https://github.com/martinohanlon/grrl-bat-monitor
static uint16_t lowBatteryIcon [LOW_BATTERY_ICON_HEIGHT][LOW_BATTERY_ICON_WIDTH] = {
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0},
{0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0},
{0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0},
{0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0},
{0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0},
{0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0},
{0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0},
{0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}};
void InitLowBatterySystem()
{
for(int y = 0; y < LOW_BATTERY_ICON_HEIGHT; ++y)
for(int x = 0; x < LOW_BATTERY_ICON_WIDTH; ++x)
lowBatteryIcon[y][x] = lowBatteryIcon[y][x] ? LOW_BATTERY_FORE_COLOR : LOW_BATTERY_BACK_COLOR;
PollLowBattery();
}
void PollLowBattery()
{
uint64_t now = tick();
if (now - lowBatteryLastPolled > LOW_BATTERY_POLLING_INTERVAL)
{
lowBattery = GET_GPIO(LOW_BATTERY_PIN) ? LOW_BATTERY_IS_ACTIVE_HIGH : !LOW_BATTERY_IS_ACTIVE_HIGH;
lowBatteryLastPolled = now;
}
}
void DrawLowBatteryIcon(uint16_t *framebuffer)
{
if (!lowBattery)
return;
for(int y = 0; y < LOW_BATTERY_ICON_HEIGHT; ++y)
{
int framebuffer_start_offset = (LOW_BATTERY_ICON_TOP_LEFT_Y+y)*(gpuFramebufferScanlineStrideBytes>>1)+LOW_BATTERY_ICON_TOP_LEFT_X;
memcpy(framebuffer+framebuffer_start_offset, lowBatteryIcon[y], LOW_BATTERY_ICON_WIDTH*2);
}
}
#else
void InitLowBatterySystem() {}
void PollLowBattery() {}
void DrawLowBatteryIcon(uint16_t *framebuffer) {}
#endif

View File

@ -0,0 +1,19 @@
#pragma once
#include <inttypes.h>
// All functions here are no-op when LOW_BATTERY_PIN is undef so they can be
// called unconditionnaly.
// This functions must be called during the startup of the program to initialize
// internal data related to rendering the low battery icon.
void InitLowBatterySystem();
// Polls and saves the state of the battery. No-op if the function was called
// less than LOW_BATTERY_POLLING_INTERVAL tick() ago.
void PollLowBattery();
// Draws a low battery icon on the given framebuffer if the last call to
// pollLowBattery found a low battery state.
void DrawLowBatteryIcon(uint16_t *framebuffer);

View File

@ -0,0 +1,83 @@
#include "config.h"
#include "mailbox.h"
#include "util.h"
#include <stdio.h>
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <inttypes.h>
#include <syslog.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
int vcio = -1;
void OpenMailbox()
{
vcio = open("/dev/vcio", 0);
if (vcio < 0) FATAL_ERROR("Failed to open VideoCore kernel mailbox!");
}
void CloseMailbox()
{
close(vcio);
vcio = -1;
}
// Sends a pointer to the given buffer over to the VideoCore mailbox. See https://github.com/raspberrypi/firmware/wiki/Mailbox-property-interface
void SendMailbox(void *buffer)
{
int ret = ioctl(vcio, _IOWR(/*MAJOR_NUM=*/100, 0, char *), buffer);
if (ret < 0) FATAL_ERROR("SendMailbox failed in ioctl!");
}
// Defines the structure of a Mailbox message
template<int PayloadSize>
struct MailboxMessage
{
MailboxMessage(uint32_t messageId):messageSize(sizeof(*this)), requestCode(0), messageId(messageId), messageSizeBytes(sizeof(uint32_t)*PayloadSize), dataSizeBytes(sizeof(uint32_t)*PayloadSize), messageEndSentinel(0) {}
uint32_t messageSize;
uint32_t requestCode;
uint32_t messageId;
uint32_t messageSizeBytes;
uint32_t dataSizeBytes;
union
{
uint32_t payload[PayloadSize];
uint32_t result;
};
uint32_t messageEndSentinel;
};
// Sends a mailbox message with 1xuint32 payload
uint32_t Mailbox(uint32_t messageId, uint32_t payload0)
{
MailboxMessage<1> msg(messageId);
msg.payload[0] = payload0;
SendMailbox(&msg);
return msg.result;
}
uint32_t MailboxRet2(uint32_t messageId, uint32_t payload0)
{
MailboxMessage<2> msg(messageId);
msg.payload[0] = payload0;
msg.payload[1] = 0;
SendMailbox(&msg);
return msg.payload[1];
}
// Sends a mailbox message with 3xuint32 payload
uint32_t Mailbox(uint32_t messageId, uint32_t payload0, uint32_t payload1, uint32_t payload2)
{
MailboxMessage<3> msg(messageId);
msg.payload[0] = payload0;
msg.payload[1] = payload1;
msg.payload[2] = payload2;
SendMailbox(&msg);
return msg.result;
}

View File

@ -0,0 +1,10 @@
#pragma once
#include <inttypes.h>
void OpenMailbox(void);
void CloseMailbox(void);
void SendMailbox(void *buffer);
uint32_t Mailbox(uint32_t messageId, uint32_t payload0);
uint32_t MailboxRet2(uint32_t messageId, uint32_t payload0);
uint32_t Mailbox(uint32_t messageId, uint32_t payload0, uint32_t payload1, uint32_t payload2);

View File

@ -0,0 +1,24 @@
#include "config.h"
#include "mem_alloc.h"
#include <memory.h>
#include <stdlib.h>
#include <stdio.h>
uint64_t totalCpuMemoryAllocated = 0;
void *Malloc(size_t bytes, const char *reason)
{
void *ptr = malloc(bytes);
if (ptr)
{
totalCpuMemoryAllocated += bytes; // Currently we don't decrement this, so this only counts up (all allocations are persistent so far, so that's ok for now)
// printf("Allocated %zd bytes of CPU memory for %s. Total memory allocated: %llu bytes\n", bytes, reason, totalCpuMemoryAllocated);
return ptr;
}
else
{
printf("Failed to allocate %zd bytes of memory for %s!\n", bytes, reason);
exit(1);
}
}

View File

@ -0,0 +1,8 @@
#pragma once
#include <sys/types.h>
#include <inttypes.h>
extern uint64_t totalCpuMemoryAllocated;
void *Malloc(size_t bytes, const char *reason);

View File

@ -0,0 +1,135 @@
#include "config.h"
#ifdef MPI3501
#include "spi.h"
#include <memory.h>
#include <stdio.h>
void ChipSelectHigh()
{
WAIT_SPI_FINISHED();
CLEAR_GPIO(GPIO_SPI0_CE0); // Enable Touch
SET_GPIO(GPIO_SPI0_CE0); // Disable Touch
__sync_synchronize();
SET_GPIO(GPIO_SPI0_CE1); // Disable Display
CLEAR_GPIO(GPIO_SPI0_CE1); // Enable Display
__sync_synchronize();
}
void InitKeDeiV63()
{
// If a Reset pin is defined, toggle it briefly high->low->high to enable the device. Some devices do not have a reset pin, in which case compile with GPIO_TFT_RESET_PIN left undefined.
#if defined(GPIO_TFT_RESET_PIN) && GPIO_TFT_RESET_PIN >= 0
printf("Resetting display at reset GPIO pin %d\n", GPIO_TFT_RESET_PIN);
SET_GPIO_MODE(GPIO_TFT_RESET_PIN, 1);
SET_GPIO(GPIO_TFT_RESET_PIN);
usleep(120 * 1000);
CLEAR_GPIO(GPIO_TFT_RESET_PIN);
usleep(120 * 1000);
SET_GPIO(GPIO_TFT_RESET_PIN);
usleep(120 * 1000);
#endif
// For sanity, start with both Chip selects high to ensure that the display will see a high->low enable transition when we start.
SET_GPIO(GPIO_SPI0_CE0); // Disable Touch
SET_GPIO(GPIO_SPI0_CE1); // Disable Display
usleep(1000);
// Do the initialization with a very low SPI bus speed, so that it will succeed even if the bus speed chosen by the user is too high.
spi->clk = 34;
__sync_synchronize();
BEGIN_SPI_COMMUNICATION();
{
CLEAR_GPIO(GPIO_SPI0_CE0); // Enable Touch
CLEAR_GPIO(GPIO_SPI0_CE1); // Enable Display
BEGIN_SPI_COMMUNICATION();
usleep(25*1000);
SET_GPIO(GPIO_SPI0_CE0); // Disable Touch
usleep(25*1000);
SPI_TRANSFER(0x00000000); // This command seems to be Reset
usleep(120*1000);
SPI_TRANSFER(0x00000100);
usleep(50*1000);
SPI_TRANSFER(0x00001100);
usleep(60*1000);
SPI_TRANSFER(0xB9001100, 0x00, 0xFF, 0x00, 0x83, 0x00, 0x57);
usleep(5*1000);
SPI_TRANSFER(0xB6001100, 0x00, 0x2C);
SPI_TRANSFER(0x11001100/*Sleep Out*/);
usleep(150*1000);
SPI_TRANSFER(0x3A001100/*Interface Pixel Format*/, 0x00, 0x55);
SPI_TRANSFER(0xB0001100, 0x00, 0x68);
SPI_TRANSFER(0xCC001100, 0x00, 0x09);
SPI_TRANSFER(0xB3001100, 0x00, 0x43, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06);
SPI_TRANSFER(0xB1001100, 0x00, 0x00, 0x00, 0x15, 0x00, 0x1C, 0x00, 0x1C, 0x00, 0x83, 0x00, 0x44);
SPI_TRANSFER(0xC0001100, 0x00, 0x24, 0x00, 0x24, 0x00, 0x01, 0x00, 0x3C, 0x00, 0x1E, 0x00, 0x08);
SPI_TRANSFER(0xB4001100, 0x00, 0x02, 0x00, 0x40, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x2A, 0x00, 0x0D, 0x00, 0x4F);
SPI_TRANSFER(0xE0001100, 0x00, 0x02, 0x00, 0x08, 0x00, 0x11, 0x00, 0x23, 0x00, 0x2C, 0x00, 0x40, 0x00, 0x4A, 0x00, 0x52, 0x00, 0x48, 0x00, 0x41, 0x00, 0x3C, 0x00, 0x33, 0x00, 0x2E, 0x00, 0x28, 0x00, 0x27, 0x00, 0x1B, 0x00, 0x02, 0x00, 0x08, 0x00, 0x11, 0x00, 0x23, 0x00, 0x2C, 0x00, 0x40, 0x00, 0x4A, 0x00, 0x52, 0x00, 0x48, 0x00, 0x41, 0x00, 0x3C, 0x00, 0x33, 0x00, 0x2E, 0x00, 0x28, 0x00, 0x27, 0x00, 0x1B, 0x00, 0x00, 0x00, 0x01);
#define MADCTL_BGR_PIXEL_ORDER (1<<3)
#define MADCTL_ROW_COLUMN_EXCHANGE (1<<5)
#define MADCTL_COLUMN_ADDRESS_ORDER_SWAP (1<<6)
#define MADCTL_ROW_ADDRESS_ORDER_SWAP (1<<7)
#define MADCTL_ROTATE_180_DEGREES (MADCTL_COLUMN_ADDRESS_ORDER_SWAP | MADCTL_ROW_ADDRESS_ORDER_SWAP)
uint8_t madctl = 0;
#ifndef DISPLAY_SWAP_BGR
madctl |= MADCTL_BGR_PIXEL_ORDER;
#endif
#if defined(DISPLAY_FLIP_ORIENTATION_IN_HARDWARE)
madctl |= MADCTL_ROW_COLUMN_EXCHANGE;
#endif
#ifdef DISPLAY_ROTATE_180_DEGREES
madctl ^= MADCTL_ROTATE_180_DEGREES;
#endif
SPI_TRANSFER(0x36001100/*MADCTL: Memory Access Control*/, 0x00, madctl);
SPI_TRANSFER(0x29001100/*Display ON*/);
usleep(200*1000);
ClearScreen();
}
#ifndef USE_DMA_TRANSFERS // For DMA transfers, keep SPI CS & TA active.
END_SPI_COMMUNICATION();
#endif
// And speed up to the desired operation speed finally after init is done.
usleep(10 * 1000); // Delay a bit before restoring CLK, or otherwise this has been observed to cause the display not init if done back to back after the clear operation above.
spi->clk = SPI_BUS_CLOCK_DIVISOR;
}
void TurnBacklightOff()
{
}
void TurnBacklightOn()
{
}
void TurnDisplayOff()
{
}
void TurnDisplayOn()
{
}
void DeinitSPIDisplay()
{
ClearScreen();
TurnDisplayOff();
}
#endif

View File

@ -0,0 +1,48 @@
#pragma once
#include "config.h"
#ifdef MPI3501
// Data specific to the KeDei v6.3 display
#define DISPLAY_SET_CURSOR_X 0x2A001100
#define DISPLAY_SET_CURSOR_Y 0x2B001100
#define DISPLAY_WRITE_PIXELS 0x2C001100
#define DISPLAY_NATIVE_WIDTH 320
#define DISPLAY_NATIVE_HEIGHT 480
// On KeDei v6.3 display, each 16-bit command (of which highest 8 bits are always 0x00) is always prepended with a 16-bit command/data prefix that is either 0x0011 (command) or 0x0015 (data).
#define DISPLAY_SPI_BUS_IS_16BITS_WIDE
// KeDei v6.3 does not behave well if one sends partial commands, but must finish each command or the command does not apply
#define MUST_SEND_FULL_CURSOR_WINDOW
// KeDei v6.3 is a 3-wire SPI display, DC line is not used
#define SPI_3WIRE_PROTOCOL
// KeDei frames all command/data packets with a 16 bit prefix.
#define SPI_3WIRE_DATA_COMMAND_FRAMING_BITS 16
// On KeDei, SPI commands are 32-bit wide, instead of 8-bit or 16-bit.
#define SPI_32BIT_COMMANDS
// SPI drive settings are different compared to most other displays: KeDei SPI hat connects display to SPI channel 1 (channel 0 is for touch controller),
// and Polarity and Phase are reversed. (Chip Select line is idle when high, and bits are clocked on rising edge of the serial clock line)
#define DISPLAY_SPI_DRIVE_SETTINGS (1 | BCM2835_SPI0_CS_CPOL | BCM2835_SPI0_CS_CPHA)
// A peculiarity of KeDei is that it needs the Touch and Display CS lines pumped for each 32-bit word that is written, or otherwise it does not process bytes on the bus. (it does send
// return bytes back on the MISO line though even without this, so it does at least do something even without this, but nothing would show up on the screen if this pumping is not done)
#define CHIP_SELECT_LINE_NEEDS_REFRESHING_EACH_32BITS_WRITTEN
// On KeDei, CS0 line is for touch, and CS1 line is for the LCD
#define DISPLAY_USES_CS1
#ifdef USE_DMA_TRANSFERS
#warning KeDei v6.3 controller does not currently support DMA, rebuild with CMake directive -DUSE_DMA_TRANSFERS=OFF.
#endif
void InitKeDeiV63(void);
#define InitSPIDisplay InitKeDeiV63
#endif

View File

@ -0,0 +1,123 @@
#include "config.h"
#ifdef MZ61581
#include "spi.h"
#include <memory.h>
#include <stdio.h>
void InitMZ61581()
{
// If a Reset pin is defined, toggle it briefly high->low->high to enable the device. Some devices do not have a reset pin, in which case compile with GPIO_TFT_RESET_PIN left undefined.
#if defined(GPIO_TFT_RESET_PIN) && GPIO_TFT_RESET_PIN >= 0
printf("Resetting display at reset GPIO pin %d\n", GPIO_TFT_RESET_PIN);
SET_GPIO_MODE(GPIO_TFT_RESET_PIN, 1);
SET_GPIO(GPIO_TFT_RESET_PIN);
usleep(120 * 1000);
CLEAR_GPIO(GPIO_TFT_RESET_PIN);
usleep(120 * 1000);
SET_GPIO(GPIO_TFT_RESET_PIN);
usleep(120 * 1000);
#endif
// Do the initialization with a very low SPI bus speed, so that it will succeed even if the bus speed chosen by the user is too high.
spi->clk = 34;
__sync_synchronize();
BEGIN_SPI_COMMUNICATION();
{
// Reverse engineered with logic analyzer, not sure what these mean. If you have a data sheet for MZ61581, please send it my way.
SPI_TRANSFER(0xB0, 0x00);
SPI_TRANSFER(0xB3, 0x02, 0x00, 0x00, 0x00);
SPI_TRANSFER(0xC0, 0x13, 0x3B, 0x00, 0x02, 0x00, 0x01, 0x00, 0x43);
SPI_TRANSFER(0xC1, 0x08, 0x16, 0x08, 0x08);
SPI_TRANSFER(0xC4, 0x11, 0x07, 0x03, 0x03);
SPI_TRANSFER(0xC6, 0x00);
SPI_TRANSFER(0xC8, 0x03, 0x03, 0x13, 0x5C, 0x03, 0x07, 0x14, 0x08, 0x00, 0x21, 0x08, 0x14, 0x07, 0x53, 0x0C, 0x13, 0x03, 0x03, 0x21, 0x00);
SPI_TRANSFER(0x35, 0x00);
SPI_TRANSFER(0x44, 0x00, 0x01);
SPI_TRANSFER(0xD0, 0x07, 0x07, 0x1D, 0x03);
SPI_TRANSFER(0xD1, 0x03, 0x30, 0x10);
SPI_TRANSFER(0xD2, 0x03, 0x14, 0x04);
// The following coincide with e.g. ILI9341.
SPI_TRANSFER(0x3A/*COLMOD: Pixel Format Set*/, 0x55/*DPI=16bits/pixel,DBI=16bits/pixel*/);
#define MADCTL_BGR_PIXEL_ORDER (1<<3)
#define MADCTL_ROW_COLUMN_EXCHANGE (1<<5)
#define MADCTL_COLUMN_ADDRESS_ORDER_SWAP (1<<6)
#define MADCTL_ROW_ADDRESS_ORDER_SWAP (1<<7)
#define MADCTL_ROTATE_180_DEGREES (MADCTL_COLUMN_ADDRESS_ORDER_SWAP | MADCTL_ROW_ADDRESS_ORDER_SWAP)
uint8_t madctl = MADCTL_COLUMN_ADDRESS_ORDER_SWAP;
#ifndef DISPLAY_SWAP_BGR
madctl |= MADCTL_BGR_PIXEL_ORDER;
#endif
#if defined(DISPLAY_FLIP_ORIENTATION_IN_HARDWARE)
madctl |= MADCTL_ROW_COLUMN_EXCHANGE;
#endif
#ifdef DISPLAY_ROTATE_180_DEGREES
madctl ^= MADCTL_ROTATE_180_DEGREES;
#endif
SPI_TRANSFER(0x36/*MADCTL: Memory Access Control*/, madctl);
SPI_TRANSFER(0x11/*Sleep Out*/);
usleep(300 * 1000);
SPI_TRANSFER(0x29/*Display ON*/);
SPI_TRANSFER(0x2C);
// TONTEC_MZ61581 has backlight active when backlight GPIO is low, and at boot, it seems to be disabled, so always need to enable it.
#if defined(GPIO_TFT_BACKLIGHT) && (defined(BACKLIGHT_CONTROL) || defined(TONTEC_MZ61581))
printf("Setting TFT backlight on at pin %d\n", GPIO_TFT_BACKLIGHT);
SET_GPIO_MODE(GPIO_TFT_BACKLIGHT, 0x01); // Set backlight pin to digital 0/1 output mode (0x01) in case it had been PWM controlled
CLEAR_GPIO(GPIO_TFT_BACKLIGHT); // And turn the backlight on. MZ61581 backlight is on when the Backlight GPIO pin is 0.
#endif
ClearScreen();
}
#ifndef USE_DMA_TRANSFERS // For DMA transfers, keep SPI CS & TA active.
END_SPI_COMMUNICATION();
#endif
// And speed up to the desired operation speed finally after init is done.
usleep(10 * 1000); // Delay a bit before restoring CLK, or otherwise this has been observed to cause the display not init if done back to back after the clear operation above.
spi->clk = SPI_BUS_CLOCK_DIVISOR;
}
void TurnDisplayOff()
{
#if defined(GPIO_TFT_BACKLIGHT) && defined(BACKLIGHT_CONTROL)
SET_GPIO_MODE(GPIO_TFT_BACKLIGHT, 0x01); // Set backlight pin to digital 0/1 output mode (0x01) in case it had been PWM controlled
SET_GPIO(GPIO_TFT_BACKLIGHT); // And turn the backlight off.
#endif
#if 0
QUEUE_SPI_TRANSFER(0x28/*Display OFF*/);
QUEUE_SPI_TRANSFER(0x10/*Enter Sleep Mode*/);
usleep(120*1000); // Sleep off can be sent 120msecs after entering sleep mode the earliest, so synchronously sleep here for that duration to be safe.
#endif
// printf("Turned display OFF\n");
}
void TurnDisplayOn()
{
#if 0
QUEUE_SPI_TRANSFER(0x11/*Sleep Out*/);
usleep(120 * 1000);
QUEUE_SPI_TRANSFER(0x29/*Display ON*/);
#endif
#if defined(GPIO_TFT_BACKLIGHT) && defined(BACKLIGHT_CONTROL)
SET_GPIO_MODE(GPIO_TFT_BACKLIGHT, 0x01); // Set backlight pin to digital 0/1 output mode (0x01) in case it had been PWM controlled
CLEAR_GPIO(GPIO_TFT_BACKLIGHT); // And turn the backlight on.
#endif
// printf("Turned display ON\n");
}
void DeinitSPIDisplay()
{
ClearScreen();
SPI_TRANSFER(/*Display OFF*/0x28);
}
#endif

View File

@ -0,0 +1,32 @@
#pragma once
#ifdef MZ61581
// SPI_BUS_CLOCK_DIVISOR specifies how fast to communicate the SPI bus at. Possible values are 4, 6, 8, 10, 12, ... Smaller
// values are faster.
// The following bus speed have been tested on Tontec 3.5" display with marking "MZ61581-PI-EXT 2016.1.28" on the back (on a Pi 3B+):
// core_freq=280: CDIV=2, results in 140.00MHz, works
// core_freq=281: CDIV=2, results in 140.50MHz, works, but oddly there is a certain shade of brown color on the ground of OpenTyrian that then starts flickering faintly red - everything else seemed fine. (At 142.5MHz very noticeable)
// Data specific to the MZ61581 controller
#define DISPLAY_SET_CURSOR_X 0x2A
#define DISPLAY_SET_CURSOR_Y 0x2B
#define DISPLAY_WRITE_PIXELS 0x2C
#define DISPLAY_NATIVE_WIDTH 320
#define DISPLAY_NATIVE_HEIGHT 480
#ifdef TONTEC_MZ61581
#include "tontec_35_mz61581.h"
#endif
#define InitSPIDisplay InitMZ61581
void InitMZ61581(void);
void TurnDisplayOn(void);
void TurnDisplayOff(void);
#endif

View File

@ -0,0 +1,25 @@
#pragma once
// Data specific to Adafruit's PiTFT 2.8 display
#ifdef ADAFRUIT_ILI9341_PITFT
// Even though the display controller protocol over the SPI bus is standard as per e.g. ILI9341 spec sheet,
// and the pins that one uses for the Pi hardware SPI are also standard;
// the choice of which Raspberry Pi GPIO pin is used for flipping the Data/Control pin of the display
// can vary. Pre-made stack-on hats such as on Adafruit's ILI9341, or predesigned schematics configurations
// such as Freeplaytech's WaveShare32B display wiring can standardize the pin to use in some configurations, but
// if you did your wiring customized directly on the GPIO pins, you will likely need to check which pin to
// configure here. This pin numberings is specified in the BCM pins namespace.
#if !defined(GPIO_TFT_DATA_CONTROL)
#define GPIO_TFT_DATA_CONTROL 25 /*!< Version 1, Pin P1-22, PiTFT 2.8 resistive Data/Control pin */
#endif
#if !defined(GPIO_TFT_BACKLIGHT)
// Adafruit 2.2" 320x240 HAT has backlight on pin 18: https://learn.adafruit.com/adafruit-2-2-pitft-hat-320-240-primary-display-for-raspberry-pi/backlight-control
// So does Adafruit 2.8" 320x240 display: https://learn.adafruit.com/adafruit-pitft-28-inch-resistive-touchscreen-display-raspberry-pi/backlight-control
// And so does Adafruit 3.5" 480x320 display: https://learn.adafruit.com/adafruit-pitft-3-dot-5-touch-screen-for-raspberry-pi/faq?view=all#pwm-backlight-control-with-gpio-18
#define GPIO_TFT_BACKLIGHT 18
#endif
#endif

View File

@ -0,0 +1,27 @@
#pragma once
// Data specific to Adafruit's PiTFT 3.5" display
#ifdef ADAFRUIT_HX8357D_PITFT
// SPI_BUS_CLOCK_DIVISOR specifies how fast to communicate the SPI bus at. Possible values
// are 4, 6, 8, 10, 12, ... Smaller values are faster. On my PiTFT 3.5" display, the
// following values were observed to work (on a Pi 3B):
// core_freq=314: CDIV=6, results in 52.333MHz
// While the following values were seen to not work:
// core_freq=315: CDIV=6, would result in 52.50MHz, which would work for several minutes, but then introduce infrequent single pixel glitches
#if !defined(GPIO_TFT_DATA_CONTROL)
#define GPIO_TFT_DATA_CONTROL 25
#endif
#if !defined(GPIO_TFT_BACKLIGHT)
// Adafruit 2.2" 320x240 HAT has backlight on pin 18: https://learn.adafruit.com/adafruit-2-2-pitft-hat-320-240-primary-display-for-raspberry-pi/backlight-control
// So does Adafruit 2.8" 320x240 display: https://learn.adafruit.com/adafruit-pitft-28-inch-resistive-touchscreen-display-raspberry-pi/backlight-control
// And so does Adafruit 3.5" 480x320 display: https://learn.adafruit.com/adafruit-pitft-3-dot-5-touch-screen-for-raspberry-pi/faq?view=all#pwm-backlight-control-with-gpio-18
#define GPIO_TFT_BACKLIGHT 18
#endif
#endif

669
usr/fbcp-ili9341/spi.cpp Normal file
View File

@ -0,0 +1,669 @@
#ifndef KERNEL_MODULE
#include <stdio.h> // printf, stderr
#include <syslog.h> // syslog
#include <fcntl.h> // open, O_RDWR, O_SYNC
#include <sys/mman.h> // mmap, munmap
#include <pthread.h> // pthread_create
#include <bcm_host.h> // bcm_host_get_peripheral_address, bcm_host_get_peripheral_size, bcm_host_get_sdram_address
#endif
#include "config.h"
#include "spi.h"
#include "util.h"
#include "dma.h"
#include "mailbox.h"
#include "mem_alloc.h"
// Uncomment this to print out all bytes sent to the SPI bus
// #define DEBUG_SPI_BUS_WRITES
#ifdef DEBUG_SPI_BUS_WRITES
#define DEBUG_PRINT_WRITTEN_BYTE(byte) do { \
printf("%02X", byte); \
if ((writeCounter & 3) == 0) printf("\n"); \
} while(0)
#else
#define DEBUG_PRINT_WRITTEN_BYTE(byte) ((void)0)
#endif
#ifdef CHIP_SELECT_LINE_NEEDS_REFRESHING_EACH_32BITS_WRITTEN
void ChipSelectHigh();
#define TOGGLE_CHIP_SELECT_LINE() if ((++writeCounter & 3) == 0) { ChipSelectHigh(); }
#else
#define TOGGLE_CHIP_SELECT_LINE() ((void)0)
#endif
static uint32_t writeCounter = 0;
#define WRITE_FIFO(word) do { \
uint8_t w = (word); \
spi->fifo = w; \
TOGGLE_CHIP_SELECT_LINE(); \
DEBUG_PRINT_WRITTEN_BYTE(w); \
} while(0)
int mem_fd = -1;
volatile void *bcm2835 = 0;
volatile GPIORegisterFile *gpio = 0;
volatile SPIRegisterFile *spi = 0;
// Points to the system timer register. N.B. spec sheet says this is two low and high parts, in an 32-bit aligned (but not 64-bit aligned) address. Profiling shows
// that Pi 3 Model B does allow reading this as a u64 load, and even when unaligned, it is around 30% faster to do so compared to loading in parts "lo | (hi << 32)".
volatile uint64_t *systemTimerRegister = 0;
void DumpSPICS(uint32_t reg)
{
PRINT_FLAG(BCM2835_SPI0_CS_CS);
PRINT_FLAG(BCM2835_SPI0_CS_CPHA);
PRINT_FLAG(BCM2835_SPI0_CS_CPOL);
PRINT_FLAG(BCM2835_SPI0_CS_CLEAR_TX);
PRINT_FLAG(BCM2835_SPI0_CS_CLEAR_RX);
PRINT_FLAG(BCM2835_SPI0_CS_TA);
PRINT_FLAG(BCM2835_SPI0_CS_DMAEN);
PRINT_FLAG(BCM2835_SPI0_CS_INTD);
PRINT_FLAG(BCM2835_SPI0_CS_INTR);
PRINT_FLAG(BCM2835_SPI0_CS_ADCS);
PRINT_FLAG(BCM2835_SPI0_CS_DONE);
PRINT_FLAG(BCM2835_SPI0_CS_RXD);
PRINT_FLAG(BCM2835_SPI0_CS_TXD);
PRINT_FLAG(BCM2835_SPI0_CS_RXR);
PRINT_FLAG(BCM2835_SPI0_CS_RXF);
printf("SPI0 DLEN: %u\n", spi->dlen);
printf("SPI0 CE0 register: %d\n", GET_GPIO(GPIO_SPI0_CE0) ? 1 : 0);
}
#ifdef RUN_WITH_REALTIME_THREAD_PRIORITY
#include <pthread.h>
#include <sched.h>
void SetRealtimeThreadPriority()
{
sched_param params;
params.sched_priority = sched_get_priority_max(SCHED_FIFO);
int failed = pthread_setschedparam(pthread_self(), SCHED_FIFO, &params);
if (failed) FATAL_ERROR("pthread_setschedparam() failed!");
int policy = 0;
failed = pthread_getschedparam(pthread_self(), &policy, &params);
if (failed) FATAL_ERROR("pthread_getschedparam() failed!");
if (policy != SCHED_FIFO) FATAL_ERROR("Failed to set realtime thread policy!");
printf("Set fbcp-ili9341 thread scheduling priority to maximum (%d)\n", sched_get_priority_max(SCHED_FIFO));
}
#endif
// Errata to BCM2835 behavior: documentation states that the SPI0 DLEN register is only used for DMA. However, even when DMA is not being utilized, setting it from
// a value != 0 or 1 gets rid of an excess idle clock cycle that is present when transmitting each byte. (by default in Polled SPI Mode each 8 bits transfer in 9 clocks)
// With DLEN=2 each byte is clocked to the bus in 8 cycles, observed to improve max throughput from 56.8mbps to 63.3mbps (+11.4%, quite close to the theoretical +12.5%)
// https://www.raspberrypi.org/forums/viewtopic.php?f=44&t=181154
#define UNLOCK_FAST_8_CLOCKS_SPI() (spi->dlen = 2)
#ifdef ALL_TASKS_SHOULD_DMA
bool previousTaskWasSPI = true;
#endif
#ifdef SPI_3WIRE_PROTOCOL
uint32_t NumBytesNeededFor32BitSPITask(uint32_t byteSizeFor8BitTask)
{
return byteSizeFor8BitTask * 2 + 4; // 16bit -> 32bit expansion, plus 4 bytes for command word
}
uint32_t NumBytesNeededFor9BitSPITask(uint32_t byteSizeFor8BitTask)
{
uint32_t numOutBits = (byteSizeFor8BitTask + 1) * 9;
// The number of bits we send out in a command must be a multiple of 9 bits, because each byte is 1 data/command bit plus 8 payload bits
// But the number of bits sent out in a command must also be a multiple of 8 bits, because BCM2835 SPI peripheral only deals with sending out full bytes.
// Therefore the bits written out must be a multiple of lcm(9*8)=72bits.
numOutBits = ((numOutBits + 71) / 72) * 72;
uint32_t numOutBytes = numOutBits >> 3;
return numOutBytes;
}
// N.B. BCM2835 hardware always clocks bytes out most significant bit (MSB) first, so when interleaving, the command bit needs to start out in the
// highest byte of the outgoing buffer.
void Interleave8BitSPITaskTo9Bit(SPITask *task)
{
const uint32_t size8BitTask = task->size - task->sizeExpandedTaskWithPadding;
// 9-bit SPI task lives right at the end of the 8-bit task
uint8_t *dst = task->data + size8BitTask;
// Pre-clear the 9*8=72 bit tail end of the memory to all zeroes to avoid having to pad source data to multiples of 9. (plus padding bytes, just to be safe)
memset(dst + task->sizeExpandedTaskWithPadding - 9 - SPI_9BIT_TASK_PADDING_BYTES, 0, 9 + SPI_9BIT_TASK_PADDING_BYTES);
// Fill first command byte xxxxxxxx -> 0xxxxxxx x: (low 0 bit to indicate a command byte)
dst[0] = task->cmd >> 1;
dst[1] = task->cmd << 7;
int dstByte = 1;
int dstBitsUsed = 1;
int src = 0;
// Command bit above produced one byte. If there are at least 7 bytes in the data set, we can complete a set of 8 transferred bytes. Fast track
// that:
if (size8BitTask >= 7)
{
dst[1] |= 0x40 | (task->data[0] >> 2);
dst[2] = 0x20 | (task->data[0] << 6) | (task->data[1] >> 3);
dst[3] = 0x10 | (task->data[1] << 5) | (task->data[2] >> 4);
dst[4] = 0x08 | (task->data[2] << 4) | (task->data[3] >> 5);
dst[5] = 0x04 | (task->data[3] << 3) | (task->data[4] >> 6);
dst[6] = 0x02 | (task->data[4] << 2) | (task->data[5] >> 7);
dst[7] = 0x01 | (task->data[5] << 1);
dst[8] = (task->data[6] );
dstByte = 9;
dstBitsUsed = 0;
src = 7;
// More fast tracking: As long as we have multiples of 8 bytes left, fast fill them in
while(src <= size8BitTask - 8)
{
uint8_t *d = dst + dstByte;
dstByte += 9;
const uint8_t *s = task->data + src;
src += 8;
d[0] = 0x80 | (s[0] >> 1);
d[1] = 0x40 | (s[0] << 7) | (s[1] >> 2);
d[2] = 0x20 | (s[1] << 6) | (s[2] >> 3);
d[3] = 0x10 | (s[2] << 5) | (s[3] >> 4);
d[4] = 0x08 | (s[3] << 4) | (s[4] >> 5);
d[5] = 0x04 | (s[4] << 3) | (s[5] >> 6);
d[6] = 0x02 | (s[5] << 2) | (s[6] >> 7);
d[7] = 0x01 | (s[6] << 1);
d[8] = (s[7] );
}
// Pre-clear the next byte to be written - the slow loop below assumes it is continuing a middle of byte sequence
// N.B. This write could happen to memory that is not part of the task, so memory allocation of the 9-bit task needs to allocate one byte of padding
dst[dstByte] = 0;
}
// Fill tail data bytes, slow path
while(src < size8BitTask)
{
uint8_t data = task->data[src++];
// High 1 bit to indicate a data byte
dst[dstByte] |= 1 << (7 - dstBitsUsed);
++dstBitsUsed;
if (dstBitsUsed == 8) // Written data bit completes a full byte?
{
++dstByte; // Advance to next byte
dstBitsUsed = 0;
// Now we are aligned, so can write the data byte directly
dst[dstByte++] = data;
dst[dstByte] = 0; // Clear old contents of the next byte to write
}
else
{
// 8 data bits
dst[dstByte++] |= data >> dstBitsUsed;
// This is the first write to the next byte, that should occur without ORring to clear old data in memory
// N.B. This write could happen to memory that is not part of the task, so memory allocation of the 9-bit task needs to allocate one byte of padding
dst[dstByte] = data << (8 - dstBitsUsed);
}
}
#if 0 // Enable to debug correctness:
#define BYTE_TO_BINARY_PATTERN "%c%c%c%c%c%c%c%c"
#define BYTE_TO_BINARY(byte) \
(byte & 0x80 ? '1' : '0'), \
(byte & 0x40 ? '1' : '0'), \
(byte & 0x20 ? '1' : '0'), \
(byte & 0x10 ? '1' : '0'), \
(byte & 0x08 ? '1' : '0'), \
(byte & 0x04 ? '1' : '0'), \
(byte & 0x02 ? '1' : '0'), \
(byte & 0x01 ? '1' : '0')
printf("Interleaving result: 8-bit task of size %d bytes became %d bytes:\n", task->size - task->sizeExpandedTaskWithPadding, task->sizeExpandedTaskWithPadding - SPI_9BIT_TASK_PADDING_BYTES);
printf("8-bit c" BYTE_TO_BINARY_PATTERN, BYTE_TO_BINARY(task->cmd));
for(int i = 0; i < task->size - task->sizeExpandedTaskWithPadding; ++i)
printf("d" BYTE_TO_BINARY_PATTERN, BYTE_TO_BINARY(task->data[i]));
printf("\n9-bit ");
for(int i = 0; i < task->sizeExpandedTaskWithPadding - SPI_9BIT_TASK_PADDING_BYTES; ++i)
printf(BYTE_TO_BINARY_PATTERN, BYTE_TO_BINARY(dst[i]));
printf("\n\n");
#endif
}
void Interleave16BitSPITaskTo32Bit(SPITask *task)
{
const uint32_t size8BitTask = task->size - task->sizeExpandedTaskWithPadding;
// 32-bit SPI task lives right at the end of the 16-bit task
uint32_t *dst = (uint32_t *)(task->data + size8BitTask);
*dst++ = task->cmd;
const uint32_t taskSizeU16 = size8BitTask >> 1;
uint16_t *src = (uint16_t*)task->data;
for(uint32_t i = 0; i < taskSizeU16; ++i)
dst[i] = 0x1500 | (src[i] << 16);
}
#endif // ~SPI_3WIRE_PROTOCOL
void WaitForPolledSPITransferToFinish()
{
uint32_t cs;
while (!(((cs = spi->cs) ^ BCM2835_SPI0_CS_TA) & (BCM2835_SPI0_CS_DONE | BCM2835_SPI0_CS_TA))) // While TA=1 and DONE=0
if ((cs & (BCM2835_SPI0_CS_RXR | BCM2835_SPI0_CS_RXF)))
spi->cs = BCM2835_SPI0_CS_CLEAR_RX | BCM2835_SPI0_CS_TA | DISPLAY_SPI_DRIVE_SETTINGS;
if ((cs & BCM2835_SPI0_CS_RXD)) spi->cs = BCM2835_SPI0_CS_CLEAR_RX | BCM2835_SPI0_CS_TA | DISPLAY_SPI_DRIVE_SETTINGS;
}
#ifdef ALL_TASKS_SHOULD_DMA
#ifndef USE_DMA_TRANSFERS
#error When building with #define ALL_TASKS_SHOULD_DMA enabled, -DUSE_DMA_TRANSFERS=ON should be set in CMake command line!
#endif
// Synchonously performs a single SPI command byte + N data bytes transfer on the calling thread. Call in between a BEGIN_SPI_COMMUNICATION() and END_SPI_COMMUNICATION() pair.
void RunSPITask(SPITask *task)
{
uint32_t cs;
uint8_t *tStart = task->PayloadStart();
uint8_t *tEnd = task->PayloadEnd();
const uint32_t payloadSize = tEnd - tStart;
uint8_t *tPrefillEnd = tStart + MIN(15, payloadSize);
#define TASK_SIZE_TO_USE_DMA 4
// Do a DMA transfer if this task is suitable in size for DMA to handle
if (payloadSize >= TASK_SIZE_TO_USE_DMA && (task->cmd == DISPLAY_WRITE_PIXELS || task->cmd == DISPLAY_SET_CURSOR_X || task->cmd == DISPLAY_SET_CURSOR_Y))
{
if (previousTaskWasSPI)
WaitForPolledSPITransferToFinish();
// printf("DMA cmd=0x%x, data=%d bytes\n", task->cmd, task->PayloadSize());
SPIDMATransfer(task);
previousTaskWasSPI = false;
}
else
{
if (!previousTaskWasSPI)
{
WaitForDMAFinished();
spi->cs = BCM2835_SPI0_CS_TA | BCM2835_SPI0_CS_CLEAR_TX | DISPLAY_SPI_DRIVE_SETTINGS;
// After having done a DMA transfer, the SPI0 DLEN register has reset to zero, so restore it to fast mode.
UNLOCK_FAST_8_CLOCKS_SPI();
}
else
WaitForPolledSPITransferToFinish();
// printf("SPI cmd=0x%x, data=%d bytes\n", task->cmd, task->PayloadSize());
// Send the command word if display is 4-wire (3-wire displays can omit this, commands are interleaved in the data payload stream above)
#ifndef SPI_3WIRE_PROTOCOL
CLEAR_GPIO(GPIO_TFT_DATA_CONTROL);
#ifdef DISPLAY_SPI_BUS_IS_16BITS_WIDE
// On e.g. the ILI9486, all commands are 16-bit, so need to be clocked in in two bytes. The MSB byte is always zero though in all the defined commands.
WRITE_FIFO(0x00);
#endif
WRITE_FIFO(task->cmd);
#ifdef DISPLAY_SPI_BUS_IS_16BITS_WIDE
while(!(spi->cs & (BCM2835_SPI0_CS_DONE))) /*nop*/;
spi->fifo;
spi->fifo;
#else
while(!(spi->cs & (BCM2835_SPI0_CS_RXD|BCM2835_SPI0_CS_DONE))) /*nop*/;
#endif
SET_GPIO(GPIO_TFT_DATA_CONTROL);
#endif
// Send the data payload:
while(tStart < tPrefillEnd) WRITE_FIFO(*tStart++);
while(tStart < tEnd)
{
cs = spi->cs;
if ((cs & BCM2835_SPI0_CS_TXD)) WRITE_FIFO(*tStart++);
// TODO: else asm volatile("yield");
if ((cs & (BCM2835_SPI0_CS_RXR|BCM2835_SPI0_CS_RXF))) spi->cs = BCM2835_SPI0_CS_CLEAR_RX | BCM2835_SPI0_CS_TA | DISPLAY_SPI_DRIVE_SETTINGS;
}
previousTaskWasSPI = true;
}
}
#else
void RunSPITask(SPITask *task)
{
WaitForPolledSPITransferToFinish();
// The Adafruit 1.65" 240x240 ST7789 based display is unique compared to others that it does want to see the Chip Select line go
// low and high to start a new command. For that display we let hardware SPI toggle the CS line, and actually run TA<-0 and TA<-1
// transitions to let the CS line live. For most other displays, we just set CS line always enabled for the display throughout fbcp-ili9341 lifetime,
// which is a tiny bit faster.
#ifdef DISPLAY_NEEDS_CHIP_SELECT_SIGNAL
BEGIN_SPI_COMMUNICATION();
#endif
uint8_t *tStart = task->PayloadStart();
uint8_t *tEnd = task->PayloadEnd();
const uint32_t payloadSize = tEnd - tStart;
uint8_t *tPrefillEnd = tStart + MIN(15, payloadSize);
// Send the command word if display is 4-wire (3-wire displays can omit this, commands are interleaved in the data payload stream above)
#ifndef SPI_3WIRE_PROTOCOL
// An SPI transfer to the display always starts with one control (command) byte, followed by N data bytes.
CLEAR_GPIO(GPIO_TFT_DATA_CONTROL);
#ifdef DISPLAY_SPI_BUS_IS_16BITS_WIDE
// On e.g. the ILI9486, all commands are 16-bit, so need to be clocked in in two bytes. The MSB byte is always zero though in all the defined commands.
WRITE_FIFO(0x00);
#endif
WRITE_FIFO(task->cmd);
#ifdef DISPLAY_SPI_BUS_IS_16BITS_WIDE
while(!(spi->cs & (BCM2835_SPI0_CS_DONE))) /*nop*/;
spi->fifo;
spi->fifo;
#else
while(!(spi->cs & (BCM2835_SPI0_CS_RXD|BCM2835_SPI0_CS_DONE))) /*nop*/;
#endif
SET_GPIO(GPIO_TFT_DATA_CONTROL);
#endif // ~!SPI_3WIRE_PROTOCOL
// For small transfers, using DMA is not worth it, but pushing through with polled SPI gives better bandwidth.
// For larger transfers though that are more than this amount of bytes, using DMA is faster.
// This cutoff number was experimentally tested to find where Polled SPI and DMA are as fast.
#define DMA_IS_FASTER_THAN_POLLED_SPI 140
// Do a DMA transfer if this task is suitable in size for DMA to handle
#ifdef USE_DMA_TRANSFERS
if (tEnd - tStart > DMA_IS_FASTER_THAN_POLLED_SPI)
{
SPIDMATransfer(task);
// After having done a DMA transfer, the SPI0 DLEN register has reset to zero, so restore it to fast mode.
UNLOCK_FAST_8_CLOCKS_SPI();
}
else
#endif
{
while(tStart < tPrefillEnd) WRITE_FIFO(*tStart++);
while(tStart < tEnd)
{
uint32_t cs = spi->cs;
if ((cs & BCM2835_SPI0_CS_TXD)) WRITE_FIFO(*tStart++);
// TODO: else asm volatile("yield");
if ((cs & (BCM2835_SPI0_CS_RXR|BCM2835_SPI0_CS_RXF))) spi->cs = BCM2835_SPI0_CS_CLEAR_RX | BCM2835_SPI0_CS_TA | DISPLAY_SPI_DRIVE_SETTINGS;
}
}
#ifdef DISPLAY_NEEDS_CHIP_SELECT_SIGNAL
END_SPI_COMMUNICATION();
#endif
}
#endif
SharedMemory *spiTaskMemory = 0;
volatile uint64_t spiThreadIdleUsecs = 0;
volatile uint64_t spiThreadSleepStartTime = 0;
volatile int spiThreadSleeping = 0;
double spiUsecsPerByte;
SPITask *GetTask() // Returns the first task in the queue, called in worker thread
{
uint32_t head = spiTaskMemory->queueHead;
uint32_t tail = spiTaskMemory->queueTail;
if (head == tail) return 0;
SPITask *task = (SPITask*)(spiTaskMemory->buffer + head);
if (task->cmd == 0) // Wrapped around?
{
spiTaskMemory->queueHead = 0;
__sync_synchronize();
if (tail == 0) return 0;
task = (SPITask*)spiTaskMemory->buffer;
}
return task;
}
void DoneTask(SPITask *task) // Frees the first SPI task from the queue, called in worker thread
{
__atomic_fetch_sub(&spiTaskMemory->spiBytesQueued, task->PayloadSize()+1, __ATOMIC_RELAXED);
spiTaskMemory->queueHead = (uint32_t)((uint8_t*)task - spiTaskMemory->buffer) + sizeof(SPITask) + task->size;
__sync_synchronize();
}
extern volatile bool programRunning;
void ExecuteSPITasks()
{
#ifndef USE_DMA_TRANSFERS
BEGIN_SPI_COMMUNICATION();
#endif
{
while(programRunning && spiTaskMemory->queueTail != spiTaskMemory->queueHead)
{
SPITask *task = GetTask();
if (task)
{
RunSPITask(task);
DoneTask(task);
}
}
}
#ifndef USE_DMA_TRANSFERS
END_SPI_COMMUNICATION();
#endif
}
#if !defined(KERNEL_MODULE) && defined(USE_SPI_THREAD)
pthread_t spiThread;
// A worker thread that keeps the SPI bus filled at all times
void *spi_thread(void *unused)
{
#ifdef RUN_WITH_REALTIME_THREAD_PRIORITY
SetRealtimeThreadPriority();
#endif
while(programRunning)
{
if (spiTaskMemory->queueTail != spiTaskMemory->queueHead)
{
ExecuteSPITasks();
}
else
{
#ifdef STATISTICS
uint64_t t0 = tick();
spiThreadSleepStartTime = t0;
__atomic_store_n(&spiThreadSleeping, 1, __ATOMIC_RELAXED);
#endif
if (programRunning) syscall(SYS_futex, &spiTaskMemory->queueTail, FUTEX_WAIT, spiTaskMemory->queueHead, 0, 0, 0); // Start sleeping until we get new tasks
#ifdef STATISTICS
__atomic_store_n(&spiThreadSleeping, 0, __ATOMIC_RELAXED);
uint64_t t1 = tick();
__sync_fetch_and_add(&spiThreadIdleUsecs, t1-t0);
#endif
}
}
pthread_exit(0);
}
#endif
int InitSPI()
{
#ifdef KERNEL_MODULE
#define BCM2835_PERI_BASE 0x3F000000
#define BCM2835_GPIO_BASE 0x200000
#define BCM2835_SPI0_BASE 0x204000
printk("ioremapping %p\n", (void*)(BCM2835_PERI_BASE+BCM2835_GPIO_BASE));
void *bcm2835 = ioremap(BCM2835_PERI_BASE+BCM2835_GPIO_BASE, 32768);
printk("Got bcm address %p\n", bcm2835);
if (!bcm2835) FATAL_ERROR("Failed to map BCM2835 address!");
spi = (volatile SPIRegisterFile*)((uintptr_t)bcm2835 + BCM2835_SPI0_BASE - BCM2835_GPIO_BASE);
gpio = (volatile GPIORegisterFile*)((uintptr_t)bcm2835);
#else // Userland version
// Memory map GPIO and SPI peripherals for direct access
mem_fd = open("/dev/mem", O_RDWR|O_SYNC);
if (mem_fd < 0) FATAL_ERROR("can't open /dev/mem (run as sudo)");
printf("bcm_host_get_peripheral_address: %p, bcm_host_get_peripheral_size: %u, bcm_host_get_sdram_address: %p\n", bcm_host_get_peripheral_address(), bcm_host_get_peripheral_size(), bcm_host_get_sdram_address());
bcm2835 = mmap(NULL, bcm_host_get_peripheral_size(), (PROT_READ | PROT_WRITE), MAP_SHARED, mem_fd, bcm_host_get_peripheral_address());
if (bcm2835 == MAP_FAILED) FATAL_ERROR("mapping /dev/mem failed");
spi = (volatile SPIRegisterFile*)((uintptr_t)bcm2835 + BCM2835_SPI0_BASE);
gpio = (volatile GPIORegisterFile*)((uintptr_t)bcm2835 + BCM2835_GPIO_BASE);
systemTimerRegister = (volatile uint64_t*)((uintptr_t)bcm2835 + BCM2835_TIMER_BASE + 0x04); // Generates an unaligned 64-bit pointer, but seems to be fine.
// TODO: On graceful shutdown, (ctrl-c signal?) close(mem_fd)
#endif
uint32_t currentBcmCoreSpeed = MailboxRet2(0x00030002/*Get Clock Rate*/, 0x4/*CORE*/);
uint32_t maxBcmCoreTurboSpeed = MailboxRet2(0x00030004/*Get Max Clock Rate*/, 0x4/*CORE*/);
// Estimate how many microseconds transferring a single byte over the SPI bus takes?
spiUsecsPerByte = 1000000.0 * 8.0/*bits/byte*/ * SPI_BUS_CLOCK_DIVISOR / maxBcmCoreTurboSpeed;
printf("BCM core speed: current: %uhz, max turbo: %uhz. SPI CDIV: %d, SPI max frequency: %.0fhz\n", currentBcmCoreSpeed, maxBcmCoreTurboSpeed, SPI_BUS_CLOCK_DIVISOR, (double)maxBcmCoreTurboSpeed / SPI_BUS_CLOCK_DIVISOR);
#if !defined(KERNEL_MODULE_CLIENT) || defined(KERNEL_MODULE_CLIENT_DRIVES)
// By default all GPIO pins are in input mode (0x00), initialize them for SPI and GPIO writes
#ifdef GPIO_TFT_DATA_CONTROL
SET_GPIO_MODE(GPIO_TFT_DATA_CONTROL, 0x01); // Data/Control pin to output (0x01)
#endif
SET_GPIO_MODE(GPIO_SPI0_MISO, 0x04);
SET_GPIO_MODE(GPIO_SPI0_MOSI, 0x04);
SET_GPIO_MODE(GPIO_SPI0_CLK, 0x04);
#ifdef DISPLAY_NEEDS_CHIP_SELECT_SIGNAL
// The Adafruit 1.65" 240x240 ST7789 based display is unique compared to others that it does want to see the Chip Select line go
// low and high to start a new command. For that display we let hardware SPI toggle the CS line, and actually run TA<-0 and TA<-1
// transitions to let the CS line live. For most other displays, we just set CS line always enabled for the display throughout
// fbcp-ili9341 lifetime, which is a tiny bit faster.
SET_GPIO_MODE(GPIO_SPI0_CE0, 0x04);
#ifdef DISPLAY_USES_CS1
SET_GPIO_MODE(GPIO_SPI0_CE1, 0x04);
#endif
#else
// Set the SPI 0 pin explicitly to output, and enable chip select on the line by setting it to low.
// fbcp-ili9341 assumes exclusive access to the SPI0 bus, and exclusive presence of only one device on the bus,
// which is (permanently) activated here.
SET_GPIO_MODE(GPIO_SPI0_CE0, 0x01);
CLEAR_GPIO(GPIO_SPI0_CE0);
#ifdef DISPLAY_USES_CS1
SET_GPIO_MODE(GPIO_SPI0_CE1, 0x01);
#endif
#endif
spi->cs = BCM2835_SPI0_CS_CLEAR | DISPLAY_SPI_DRIVE_SETTINGS; // Initialize the Control and Status register to defaults: CS=0 (Chip Select), CPHA=0 (Clock Phase), CPOL=0 (Clock Polarity), CSPOL=0 (Chip Select Polarity), TA=0 (Transfer not active), and reset TX and RX queues.
spi->clk = SPI_BUS_CLOCK_DIVISOR; // Clock Divider determines SPI bus speed, resulting speed=256MHz/clk
#endif
// Initialize SPI thread task buffer memory
#ifdef KERNEL_MODULE_CLIENT
int driverfd = open("/proc/bcm2835_spi_display_bus", O_RDWR|O_SYNC);
if (driverfd < 0) FATAL_ERROR("Could not open SPI ring buffer - kernel driver module not running?");
spiTaskMemory = (SharedMemory*)mmap(NULL, SHARED_MEMORY_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED/* | MAP_NORESERVE | MAP_POPULATE | MAP_LOCKED*/, driverfd, 0);
close(driverfd);
if (spiTaskMemory == MAP_FAILED) FATAL_ERROR("Could not mmap SPI ring buffer!");
printf("Got shared memory block %p, ring buffer head %p, ring buffer tail %p, shared memory block phys address: %p\n", (const char *)spiTaskMemory, spiTaskMemory->queueHead, spiTaskMemory->queueTail, spiTaskMemory->sharedMemoryBaseInPhysMemory);
#ifdef USE_DMA_TRANSFERS
printf("DMA TX channel: %d, DMA RX channel: %d\n", spiTaskMemory->dmaTxChannel, spiTaskMemory->dmaRxChannel);
#endif
#else
#ifdef KERNEL_MODULE
spiTaskMemory = (SharedMemory*)kmalloc(SHARED_MEMORY_SIZE, GFP_KERNEL | GFP_DMA);
// TODO: Ideally we would be able to directly perform the DMA from the SPI ring buffer in 'spiTaskMemory'. However
// that pointer is shared to userland, and it is proving troublesome to make it both userland-writable as well as cache-bypassing DMA coherent.
// Therefore these two memory areas are separate for now, and we memcpy() from SPI ring buffer to the following intermediate 'dmaSourceMemory'
// memory area to perform the DMA transfer. Is there a way to avoid this intermediate buffer? That would improve performance a bit.
dmaSourceMemory = (SharedMemory*)dma_alloc_writecombine(0, SHARED_MEMORY_SIZE, &spiTaskMemoryPhysical, GFP_KERNEL);
LOG("Allocated DMA memory: mem: %p, phys: %p", spiTaskMemory, (void*)spiTaskMemoryPhysical);
memset((void*)spiTaskMemory, 0, SHARED_MEMORY_SIZE);
#else
spiTaskMemory = (SharedMemory*)Malloc(SHARED_MEMORY_SIZE, "spi.cpp shared task memory");
#endif
spiTaskMemory->queueHead = spiTaskMemory->queueTail = spiTaskMemory->spiBytesQueued = 0;
#endif
#ifdef USE_DMA_TRANSFERS
InitDMA();
#endif
// Enable fast 8 clocks per byte transfer mode, instead of slower 9 clocks per byte.
UNLOCK_FAST_8_CLOCKS_SPI();
#if !defined(KERNEL_MODULE) && (!defined(KERNEL_MODULE_CLIENT) || defined(KERNEL_MODULE_CLIENT_DRIVES))
printf("Initializing display\n");
InitSPIDisplay();
#ifdef USE_SPI_THREAD
// Create a dedicated thread to feed the SPI bus. While this is fast, it consumes a lot of CPU. It would be best to replace
// this thread with a kernel module that processes the created SPI task queue using interrupts. (while juggling the GPIO D/C line as well)
printf("Creating SPI task thread\n");
int rc = pthread_create(&spiThread, NULL, spi_thread, NULL); // After creating the thread, it is assumed to have ownership of the SPI bus, so no SPI chat on the main thread after this.
if (rc != 0) FATAL_ERROR("Failed to create SPI thread!");
#else
// We will be running SPI tasks continuously from the main thread, so keep SPI Transfer Active throughout the lifetime of the driver.
BEGIN_SPI_COMMUNICATION();
#endif
#endif
LOG("InitSPI done");
return 0;
}
void DeinitSPI()
{
#ifdef USE_SPI_THREAD
pthread_join(spiThread, NULL);
spiThread = (pthread_t)0;
#endif
DeinitSPIDisplay();
#ifdef USE_DMA_TRANSFERS
DeinitDMA();
#endif
spi->cs = BCM2835_SPI0_CS_CLEAR | DISPLAY_SPI_DRIVE_SETTINGS;
#ifndef KERNEL_MODULE_CLIENT
#ifdef GPIO_TFT_DATA_CONTROL
SET_GPIO_MODE(GPIO_TFT_DATA_CONTROL, 0);
#endif
SET_GPIO_MODE(GPIO_SPI0_CE1, 0);
SET_GPIO_MODE(GPIO_SPI0_CE0, 0);
SET_GPIO_MODE(GPIO_SPI0_MISO, 0);
SET_GPIO_MODE(GPIO_SPI0_MOSI, 0);
SET_GPIO_MODE(GPIO_SPI0_CLK, 0);
#endif
if (bcm2835)
{
munmap((void*)bcm2835, bcm_host_get_peripheral_size());
bcm2835 = 0;
}
if (mem_fd >= 0)
{
close(mem_fd);
mem_fd = -1;
}
#ifndef KERNEL_MODULE_CLIENT
#ifdef KERNEL_MODULE
kfree(spiTaskMemory);
dma_free_writecombine(0, SHARED_MEMORY_SIZE, dmaSourceMemory, spiTaskMemoryPhysical);
spiTaskMemoryPhysical = 0;
#else
free(spiTaskMemory);
#endif
#endif
spiTaskMemory = 0;
}

398
usr/fbcp-ili9341/spi.h Normal file
View File

@ -0,0 +1,398 @@
#pragma once
#ifndef KERNEL_MODULE
#include <inttypes.h>
#include <sys/syscall.h>
#endif
#include <linux/futex.h>
#include "display.h"
#include "tick.h"
#include "dma.h"
#include "display.h"
#define BCM2835_GPIO_BASE 0x200000 // Address to GPIO register file
#define BCM2835_SPI0_BASE 0x204000 // Address to SPI0 register file
#define BCM2835_TIMER_BASE 0x3000 // Address to System Timer register file
#define BCM2835_SPI0_CS_RXF 0x00100000 // Receive FIFO is full
#define BCM2835_SPI0_CS_RXR 0x00080000 // FIFO needs reading
#define BCM2835_SPI0_CS_TXD 0x00040000 // TXD TX FIFO can accept Data
#define BCM2835_SPI0_CS_RXD 0x00020000 // RXD RX FIFO contains Data
#define BCM2835_SPI0_CS_DONE 0x00010000 // Done transfer Done
#define BCM2835_SPI0_CS_ADCS 0x00000800 // Automatically Deassert Chip Select
#define BCM2835_SPI0_CS_INTR 0x00000400 // Fire interrupts on RXR?
#define BCM2835_SPI0_CS_INTD 0x00000200 // Fire interrupts on DONE?
#define BCM2835_SPI0_CS_DMAEN 0x00000100 // Enable DMA transfers?
#define BCM2835_SPI0_CS_TA 0x00000080 // Transfer Active
#define BCM2835_SPI0_CS_CLEAR 0x00000030 // Clear FIFO Clear RX and TX
#define BCM2835_SPI0_CS_CLEAR_RX 0x00000020 // Clear FIFO Clear RX
#define BCM2835_SPI0_CS_CLEAR_TX 0x00000010 // Clear FIFO Clear TX
#define BCM2835_SPI0_CS_CPOL 0x00000008 // Clock Polarity
#define BCM2835_SPI0_CS_CPHA 0x00000004 // Clock Phase
#define BCM2835_SPI0_CS_CS 0x00000003 // Chip Select
#define BCM2835_SPI0_CS_RXF_SHIFT 20
#define BCM2835_SPI0_CS_RXR_SHIFT 19
#define BCM2835_SPI0_CS_TXD_SHIFT 18
#define BCM2835_SPI0_CS_RXD_SHIFT 17
#define BCM2835_SPI0_CS_DONE_SHIFT 16
#define BCM2835_SPI0_CS_ADCS_SHIFT 11
#define BCM2835_SPI0_CS_INTR_SHIFT 10
#define BCM2835_SPI0_CS_INTD_SHIFT 9
#define BCM2835_SPI0_CS_DMAEN_SHIFT 8
#define BCM2835_SPI0_CS_TA_SHIFT 7
#define BCM2835_SPI0_CS_CLEAR_RX_SHIFT 5
#define BCM2835_SPI0_CS_CLEAR_TX_SHIFT 4
#define BCM2835_SPI0_CS_CPOL_SHIFT 3
#define BCM2835_SPI0_CS_CPHA_SHIFT 2
#define BCM2835_SPI0_CS_CS_SHIFT 0
#define GPIO_SPI0_MOSI 10 // Pin P1-19, MOSI when SPI0 in use
#define GPIO_SPI0_MISO 9 // Pin P1-21, MISO when SPI0 in use
#define GPIO_SPI0_CLK 11 // Pin P1-23, CLK when SPI0 in use
#define GPIO_SPI0_CE0 8 // Pin P1-24, CE0 when SPI0 in use
#define GPIO_SPI0_CE1 7 // Pin P1-26, CE1 when SPI0 in use
extern volatile void *bcm2835;
typedef struct GPIORegisterFile
{
uint32_t gpfsel[6], reserved0; // GPIO Function Select registers, 3 bits per pin, 10 pins in an uint32_t
uint32_t gpset[2], reserved1; // GPIO Pin Output Set registers, write a 1 to bit at index I to set the pin at index I high
uint32_t gpclr[2], reserved2; // GPIO Pin Output Clear registers, write a 1 to bit at index I to set the pin at index I low
uint32_t gplev[2];
} GPIORegisterFile;
extern volatile GPIORegisterFile *gpio;
#define SET_GPIO_MODE(pin, mode) gpio->gpfsel[(pin)/10] = (gpio->gpfsel[(pin)/10] & ~(0x7 << ((pin) % 10) * 3)) | ((mode) << ((pin) % 10) * 3)
#define GET_GPIO_MODE(pin) ((gpio->gpfsel[(pin)/10] & (0x7 << ((pin) % 10) * 3)) >> (((pin) % 10) * 3))
#define GET_GPIO(pin) (gpio->gplev[0] & (1 << (pin))) // Pin must be (0-31)
#define SET_GPIO(pin) gpio->gpset[0] = 1 << (pin) // Pin must be (0-31)
#define CLEAR_GPIO(pin) gpio->gpclr[0] = 1 << (pin) // Pin must be (0-31)
typedef struct SPIRegisterFile
{
uint32_t cs; // SPI Master Control and Status register
uint32_t fifo; // SPI Master TX and RX FIFOs
uint32_t clk; // SPI Master Clock Divider
uint32_t dlen; // SPI Master Number of DMA Bytes to Write
} SPIRegisterFile;
extern volatile SPIRegisterFile *spi;
// Defines the size of the SPI task memory buffer in bytes. This memory buffer can contain two frames worth of tasks at maximum,
// so for best performance, should be at least ~DISPLAY_WIDTH*DISPLAY_HEIGHT*BYTES_PER_PIXEL*2 bytes in size, plus some small
// amount for structuring each SPITask command. Technically this can be something very small, like 4096b, and not need to contain
// even a single full frame of data, but such small buffers can cause performance issues from threads starving.
#define SHARED_MEMORY_SIZE (DISPLAY_DRAWABLE_WIDTH*DISPLAY_DRAWABLE_HEIGHT*SPI_BYTESPERPIXEL*3)
#define SPI_QUEUE_SIZE (SHARED_MEMORY_SIZE - sizeof(SharedMemory))
#if defined(SPI_3WIRE_DATA_COMMAND_FRAMING_BITS) && SPI_3WIRE_DATA_COMMAND_FRAMING_BITS == 1
// Need a byte of padding for 8-bit -> 9-bit expansion for performance
#define SPI_9BIT_TASK_PADDING_BYTES 1
#else
#define SPI_9BIT_TASK_PADDING_BYTES 0
#endif
// Defines the maximum size of a single SPI task, in bytes. This excludes the command byte. If MAX_SPI_TASK_SIZE
// is not defined, there is no length limit that applies. (In ALL_TASKS_SHOULD_DMA version of DMA transfer,
// there is DMA chaining, so SPI tasks can be arbitrarily long)
#ifndef ALL_TASKS_SHOULD_DMA
#define MAX_SPI_TASK_SIZE 65528
#endif
typedef struct __attribute__((packed)) SPITask
{
uint32_t size; // Size, including both 8-bit and 9-bit tasks
#ifdef SPI_3WIRE_PROTOCOL
uint32_t sizeExpandedTaskWithPadding; // Size of the expanded 9-bit/32-bit task. The expanded task starts at address spiTask->data + spiTask->size - spiTask->sizeExpandedTaskWithPadding;
#endif
#ifdef SPI_32BIT_COMMANDS
uint32_t cmd;
#else
uint8_t cmd;
#endif
uint32_t dmaSpiHeader;
#ifdef OFFLOAD_PIXEL_COPY_TO_DMA_CPP
uint8_t *fb;
uint8_t *prevFb;
uint16_t width;
#endif
uint8_t data[]; // Contains both 8-bit and 9-bit tasks back to back, 8-bit first, then 9-bit.
#ifdef SPI_3WIRE_PROTOCOL
inline uint8_t *PayloadStart() { return data + (size - sizeExpandedTaskWithPadding); }
inline uint8_t *PayloadEnd() { return data + (size - SPI_9BIT_TASK_PADDING_BYTES); }
inline uint32_t PayloadSize() const { return sizeExpandedTaskWithPadding - SPI_9BIT_TASK_PADDING_BYTES; }
inline uint32_t *DmaSpiHeaderAddress() { return (uint32_t*)(PayloadStart()-4); }
#else
inline uint8_t *PayloadStart() { return data; }
inline uint8_t *PayloadEnd() { return data + size; }
inline uint32_t PayloadSize() const { return size; }
inline uint32_t *DmaSpiHeaderAddress() { return &dmaSpiHeader; }
#endif
} SPITask;
#define BEGIN_SPI_COMMUNICATION() do { spi->cs = BCM2835_SPI0_CS_TA | DISPLAY_SPI_DRIVE_SETTINGS; } while(0)
#define END_SPI_COMMUNICATION() do { \
uint32_t cs; \
while (!(((cs = spi->cs) ^ BCM2835_SPI0_CS_TA) & (BCM2835_SPI0_CS_DONE | BCM2835_SPI0_CS_TA))) /* While TA=1 and DONE=0*/ \
{ \
if ((cs & (BCM2835_SPI0_CS_RXR | BCM2835_SPI0_CS_RXF))) \
spi->cs = BCM2835_SPI0_CS_CLEAR_RX | BCM2835_SPI0_CS_TA | DISPLAY_SPI_DRIVE_SETTINGS; \
} \
spi->cs = BCM2835_SPI0_CS_CLEAR_RX | DISPLAY_SPI_DRIVE_SETTINGS; /* Clear TA and any pending bytes */ \
} while(0)
#define WAIT_SPI_FINISHED() do { \
uint32_t cs; \
while (!((cs = spi->cs) & BCM2835_SPI0_CS_DONE)) /* While DONE=0*/ \
{ \
if ((cs & (BCM2835_SPI0_CS_RXR | BCM2835_SPI0_CS_RXF))) \
spi->cs = BCM2835_SPI0_CS_CLEAR_RX | BCM2835_SPI0_CS_TA | DISPLAY_SPI_DRIVE_SETTINGS; \
} \
} while(0)
// A convenience for defining and dispatching SPI task bytes inline
#define SPI_TRANSFER(command, ...) do { \
char data_buffer[] = { __VA_ARGS__ }; \
SPITask *t = AllocTask(sizeof(data_buffer)); \
t->cmd = (command); \
memcpy(t->data, data_buffer, sizeof(data_buffer)); \
CommitTask(t); \
RunSPITask(t); \
DoneTask(t); \
} while(0)
#define QUEUE_SPI_TRANSFER(command, ...) do { \
char data_buffer[] = { __VA_ARGS__ }; \
SPITask *t = AllocTask(sizeof(data_buffer)); \
t->cmd = (command); \
memcpy(t->data, data_buffer, sizeof(data_buffer)); \
CommitTask(t); \
} while(0)
#ifdef DISPLAY_SPI_BUS_IS_16BITS_WIDE // For displays that have their command register set be 16 bits word size width (ILI9486)
#define QUEUE_MOVE_CURSOR_TASK(cursor, pos) do { \
SPITask *task = AllocTask(4); \
task->cmd = (cursor); \
task->data[0] = 0; \
task->data[1] = (pos) >> 8; \
task->data[2] = 0; \
task->data[3] = (pos) & 0xFF; \
bytesTransferred += 6; \
CommitTask(task); \
} while(0)
#define QUEUE_SET_WRITE_WINDOW_TASK(cursor, x, endX) do { \
SPITask *task = AllocTask(8); \
task->cmd = (cursor); \
task->data[0] = 0; \
task->data[1] = (x) >> 8; \
task->data[2] = 0; \
task->data[3] = (x) & 0xFF; \
task->data[4] = 0; \
task->data[5] = (endX) >> 8; \
task->data[6] = 0; \
task->data[7] = (endX) & 0xFF; \
bytesTransferred += 10; \
CommitTask(task); \
} while(0)
#elif defined(DISPLAY_SET_CURSOR_IS_8_BIT) // For displays that have their set cursor commands be a uint8 instead of uint16 (SSD1351)
#define QUEUE_SET_WRITE_WINDOW_TASK(cursor, x, endX) do { \
SPITask *task = AllocTask(2); \
task->cmd = (cursor); \
task->data[0] = (x); \
task->data[1] = (endX); \
bytesTransferred += 3; \
CommitTask(task); \
} while(0)
#else // Regular 8-bit interface with 16bits wide set cursor commands (most displays)
#define QUEUE_MOVE_CURSOR_TASK(cursor, pos) do { \
SPITask *task = AllocTask(2); \
task->cmd = (cursor); \
task->data[0] = (pos) >> 8; \
task->data[1] = (pos) & 0xFF; \
bytesTransferred += 3; \
CommitTask(task); \
} while(0)
#define QUEUE_SET_WRITE_WINDOW_TASK(cursor, x, endX) do { \
SPITask *task = AllocTask(4); \
task->cmd = (cursor); \
task->data[0] = (x) >> 8; \
task->data[1] = (x) & 0xFF; \
task->data[2] = (endX) >> 8; \
task->data[3] = (endX) & 0xFF; \
bytesTransferred += 5; \
CommitTask(task); \
} while(0)
#endif
typedef struct SharedMemory
{
#ifdef USE_DMA_TRANSFERS
volatile DMAControlBlock cb[2];
volatile uint32_t dummyDMADestinationWriteAddress;
volatile uint32_t dmaTxChannel, dmaRxChannel;
#endif
volatile uint32_t queueHead;
volatile uint32_t queueTail;
volatile uint32_t spiBytesQueued; // Number of actual payload bytes in the queue
volatile uint32_t interruptsRaised;
volatile uintptr_t sharedMemoryBaseInPhysMemory;
volatile uint8_t buffer[];
} SharedMemory;
#ifdef KERNEL_MODULE
extern dma_addr_t spiTaskMemoryPhysical;
#define VIRT_TO_BUS(ptr) ((uintptr_t)(ptr) | 0xC0000000U)
#endif
extern SharedMemory *spiTaskMemory;
extern double spiUsecsPerByte;
extern SharedMemory *dmaSourceMemory; // TODO: Optimize away the need to have this at all, instead DMA directly from SPI ring buffer if possible
#ifdef STATISTICS
extern volatile uint64_t spiThreadIdleUsecs;
extern volatile uint64_t spiThreadSleepStartTime;
extern volatile int spiThreadSleeping;
#endif
extern int mem_fd;
#ifdef SPI_3WIRE_PROTOCOL
// Converts the given SPI task in-place from an 8-bit task to a 9-bit task.
void Interleave8BitSPITaskTo9Bit(SPITask *task);
// Converts the given SPI task in-place from a 16-bit task to a 32-bit task.
void Interleave16BitSPITaskTo32Bit(SPITask *task);
// If the given display is a 3-wire SPI display (9 bits/task instead of 8 bits/task), this function computes the byte size of the 8-bit task when it is converted to a 9-bit task.
uint32_t NumBytesNeededFor9BitSPITask(uint32_t byteSizeFor8BitTask);
// If the given display is a 3-wire SPI display with 32 bits bus width, this function computes the byte size of the task when it is converted to a 32-bit task.
uint32_t NumBytesNeededFor32BitSPITask(uint32_t byteSizeFor8BitTask);
#endif
static inline SPITask *AllocTask(uint32_t bytes) // Returns a pointer to a new SPI task block, called on main thread
{
#ifdef SPI_3WIRE_PROTOCOL
// For 3-wire/9-bit tasks, store the converted task right at the end of the 8-bit task.
#ifdef SPI_32BIT_COMMANDS
uint32_t sizeExpandedTaskWithPadding = NumBytesNeededFor32BitSPITask(bytes) + SPI_9BIT_TASK_PADDING_BYTES;
#else
uint32_t sizeExpandedTaskWithPadding = NumBytesNeededFor9BitSPITask(bytes) + SPI_9BIT_TASK_PADDING_BYTES;
#endif
bytes += sizeExpandedTaskWithPadding;
#else
// const uint32_t totalBytesFor9BitTask = 0;
#endif
uint32_t bytesToAllocate = sizeof(SPITask) + bytes;// + totalBytesFor9BitTask;
uint32_t tail = spiTaskMemory->queueTail;
uint32_t newTail = tail + bytesToAllocate;
// Is the new task too large to write contiguously into the ring buffer, that it's split into two parts? We never split,
// but instead write a sentinel at the end of the ring buffer, and jump the tail back to the beginning of the buffer and
// allocate the new task there. However in doing so, we must make sure that we don't write over the head marker.
if (newTail + sizeof(SPITask)/*Add extra SPITask size so that there will always be room for eob marker*/ >= SPI_QUEUE_SIZE)
{
uint32_t head = spiTaskMemory->queueHead;
// Write a sentinel, but wait for the head to advance first so that it is safe to write.
while(head > tail || head == 0/*Head must move > 0 so that we don't stomp on it*/)
{
#if defined(KERNEL_MODULE_CLIENT) && !defined(KERNEL_MODULE)
// Hack: Pump the kernel module to start transferring in case it has stopped. TODO: Remove this line:
if (!(spi->cs & BCM2835_SPI0_CS_TA)) spi->cs |= BCM2835_SPI0_CS_TA;
// Wait until there are no remaining bytes to process in the far right end of the buffer - we'll write an eob marker there as soon as the read pointer has cleared it.
// At this point the SPI queue may actually be quite empty, so don't sleep (except for now in kernel client app)
usleep(100);
#endif
head = spiTaskMemory->queueHead;
}
SPITask *endOfBuffer = (SPITask*)(spiTaskMemory->buffer + tail);
endOfBuffer->cmd = 0; // Use cmd=0x00 to denote "end of buffer, wrap to beginning"
__sync_synchronize();
spiTaskMemory->queueTail = 0;
__sync_synchronize();
#if !defined(KERNEL_MODULE_CLIENT) && !defined(KERNEL_MODULE)
if (spiTaskMemory->queueHead == tail) syscall(SYS_futex, &spiTaskMemory->queueTail, FUTEX_WAKE, 1, 0, 0, 0); // Wake the SPI thread if it was sleeping to get new tasks
#endif
tail = 0;
newTail = bytesToAllocate;
}
// If the SPI task queue is full, wait for the SPI thread to process some tasks. This throttles the main thread to not run too fast.
uint32_t head = spiTaskMemory->queueHead;
while(head > tail && head <= newTail)
{
#if defined(KERNEL_MODULE_CLIENT) && !defined(KERNEL_MODULE)
// Hack: Pump the kernel module to start transferring in case it has stopped. TODO: Remove this line:
if (!(spi->cs & BCM2835_SPI0_CS_TA)) spi->cs |= BCM2835_SPI0_CS_TA;
#endif
usleep(100); // Since the SPI queue is full, we can afford to sleep a bit on the main thread without introducing lag.
head = spiTaskMemory->queueHead;
}
SPITask *task = (SPITask*)(spiTaskMemory->buffer + tail);
task->size = bytes;
#ifdef SPI_3WIRE_PROTOCOL
task->sizeExpandedTaskWithPadding = sizeExpandedTaskWithPadding;
#endif
#ifdef OFFLOAD_PIXEL_COPY_TO_DMA_CPP
task->fb = &task->data[0];
task->prevFb = 0;
#endif
return task;
}
static inline void CommitTask(SPITask *task) // Advertises the given SPI task from main thread to worker, called on main thread
{
#ifdef SPI_3WIRE_PROTOCOL
#ifdef SPI_32BIT_COMMANDS
Interleave16BitSPITaskTo32Bit(task);
#else
Interleave8BitSPITaskTo9Bit(task);
#endif
#endif
__sync_synchronize();
#if !defined(KERNEL_MODULE_CLIENT) && !defined(KERNEL_MODULE)
uint32_t tail = spiTaskMemory->queueTail;
#endif
spiTaskMemory->queueTail = (uint32_t)((uint8_t*)task - spiTaskMemory->buffer) + sizeof(SPITask) + task->size;
__atomic_fetch_add(&spiTaskMemory->spiBytesQueued, task->PayloadSize()+1, __ATOMIC_RELAXED);
__sync_synchronize();
#if !defined(KERNEL_MODULE_CLIENT) && !defined(KERNEL_MODULE)
if (spiTaskMemory->queueHead == tail) syscall(SYS_futex, &spiTaskMemory->queueTail, FUTEX_WAKE, 1, 0, 0, 0); // Wake the SPI thread if it was sleeping to get new tasks
#endif
}
#ifdef USE_SPI_THREAD
#define IN_SINGLE_THREADED_MODE_RUN_TASK() ((void)0)
#else
#define IN_SINGLE_THREADED_MODE_RUN_TASK() { \
SPITask *t = GetTask(); \
RunSPITask(t); \
DoneTask(t); \
}
#endif
int InitSPI(void);
void DeinitSPI(void);
void ExecuteSPITasks(void);
void RunSPITask(SPITask *task);
SPITask *GetTask(void);
void DoneTask(SPITask *task);
void DumpSPICS(uint32_t reg);
#ifdef RUN_WITH_REALTIME_THREAD_PRIORITY
void SetRealtimeThreadPriority();
#endif

View File

@ -0,0 +1,96 @@
#include "config.h"
#ifdef SSD1351
#include "spi.h"
#include <memory.h>
#include <stdio.h>
void InitSSD1351()
{
// If a Reset pin is defined, toggle it briefly high->low->high to enable the device. Some devices do not have a reset pin, in which case compile with GPIO_TFT_RESET_PIN left undefined.
#if defined(GPIO_TFT_RESET_PIN) && GPIO_TFT_RESET_PIN >= 0
printf("Resetting display at reset GPIO pin %d\n", GPIO_TFT_RESET_PIN);
SET_GPIO_MODE(GPIO_TFT_RESET_PIN, 1);
SET_GPIO(GPIO_TFT_RESET_PIN);
usleep(120 * 1000);
CLEAR_GPIO(GPIO_TFT_RESET_PIN);
usleep(120 * 1000);
SET_GPIO(GPIO_TFT_RESET_PIN);
usleep(120 * 1000);
#endif
// Do the initialization with a very low SPI bus speed, so that it will succeed even if the bus speed chosen by the user is too high.
spi->clk = 100;
__sync_synchronize();
BEGIN_SPI_COMMUNICATION();
{
SPI_TRANSFER(0xFD/*Set Command Lock*/, 0x12);
SPI_TRANSFER(0xFD/*Set Command Lock*/, 0xB1);
SPI_TRANSFER(0xAE/*Sleep Mode On (Display OFF)*/);
SPI_TRANSFER(0xB3/*Set Front Clock Divider/Oscillator Frequency*/, 0xF1/*Divide Ratio=1, Oscillator Frequency=0xF*/); // This controls frame rate -> set to fastest
SPI_TRANSFER(0xCA/*Set Multiplex Ratio*/, 95); // This effectively sets the pixel height of the display, set this to 127 for 128x128 OLED, and to 95 for 128x96 OLED. It looks like even the 128x96 OLED has 128x128 bytes worth of internal memory (for hardware scrolling?)
SPI_TRANSFER(0xA0/*Set Remap*/, 0x34/*0x04=BGR<->RGB Swap | 0x10=Vertical swap | 0x20=Enable COM split odd even (this makes pixel addressing sane as one'd expect)*/);
SPI_TRANSFER(0xA1/*Set Display Start Line*/, 0);
SPI_TRANSFER(0xA2/*Set Display Offset*/, 0);
SPI_TRANSFER(0xAB/*Set Function Select*/, 0x01/*16bpp colors*/);
SPI_TRANSFER(0xB5/*Set GPIO0 and GPIO1 pin*/, 0);
SPI_TRANSFER(0xC1/*Set Contrast Current for Color A,B,C*/, 0xC8, 0x80, 0xC8); // These three seem to be first for red, second for green and third for blue, 0x00-0xFF
SPI_TRANSFER(0xC7/*Master Contrast Current Control*/, 0x0F); // 0x0F=max contrast, smaller valuers=dimmer and less power consumption
// Some voltage settings from the spec sheet to try out, although power on defaults seem to work fine as well:
// SPI_TRANSFER(0xB1/*Set Phase Length*/, 0x32/*Phase1=0, Phase2=4*/);
// SPI_TRANSFER(0xBE/*Set VCOMH Voltage*/, 0x05);
// SPI_TRANSFER(0xB4/*Set Segment Low Voltage*/, 0xA0, 0xB5, 0x55);
// SPI_TRANSFER(0xB6/*Set Second Precharge Period*/, 0x01/*1 DCLK*/);
SPI_TRANSFER(0xA6/*Set Display Normal*/);
SPI_TRANSFER(0xAF/*Sleep Mode OFF/Display ON*/);
ClearScreen();
}
#ifndef USE_DMA_TRANSFERS // For DMA transfers, keep SPI CS & TA active.
END_SPI_COMMUNICATION();
#endif
// And speed up to the desired operation speed finally after init is done.
usleep(10 * 1000); // Delay a bit before restoring CLK, or otherwise this has been observed to cause the display not init if done back to back after the clear operation above.
spi->clk = SPI_BUS_CLOCK_DIVISOR;
}
void TurnDisplayOff()
{
#if defined(GPIO_TFT_BACKLIGHT) && defined(BACKLIGHT_CONTROL)
SET_GPIO_MODE(GPIO_TFT_BACKLIGHT, 0x01); // Set backlight pin to digital 0/1 output mode (0x01) in case it had been PWM controlled
CLEAR_GPIO(GPIO_TFT_BACKLIGHT); // And turn the backlight off.
#endif
#if 0
QUEUE_SPI_TRANSFER(0x28/*Display OFF*/);
QUEUE_SPI_TRANSFER(0x10/*Enter Sleep Mode*/);
usleep(120*1000); // Sleep off can be sent 120msecs after entering sleep mode the earliest, so synchronously sleep here for that duration to be safe.
#endif
// printf("Turned display OFF\n");
}
void TurnDisplayOn()
{
#if 0
QUEUE_SPI_TRANSFER(0x11/*Sleep Out*/);
usleep(120 * 1000);
QUEUE_SPI_TRANSFER(0x29/*Display ON*/);
#endif
#if defined(GPIO_TFT_BACKLIGHT) && defined(BACKLIGHT_CONTROL)
SET_GPIO_MODE(GPIO_TFT_BACKLIGHT, 0x01); // Set backlight pin to digital 0/1 output mode (0x01) in case it had been PWM controlled
SET_GPIO(GPIO_TFT_BACKLIGHT); // And turn the backlight on.
#endif
// printf("Turned display ON\n");
}
void DeinitSPIDisplay()
{
ClearScreen();
}
#endif

View File

@ -0,0 +1,39 @@
#pragma once
#ifdef SSD1351
// On Adafruit's Adafruit 1.27" and 1.5" Color OLED Breakout Board 128x96 SSD1351 display, the following speed configurations have been tested (on a Pi 3B):
// core_freq=360: CDIV=20, results in 18.00MHz, works
// core_freq=370: CDIV=20, would result in 18.50MHz, this made the screen work for a while, but then hang
// core_freq=375: CDIV=20, would result in 18.75MHz, this made the screen work for a few seconds, but then go blank shortly after
// core_freq=355: CDIV=18, would result in 19.72MHz, this made the screen work for a few seconds, but then go blank shortly after
// Bandwidth needed to update at 60fps: 128*96*16*60 = 11,796,480 bits/sec.
// , so the above obtained best refresh rate allows driving the screen at 60fps.
// Data specific to the SSD1351 controller
#define DISPLAY_SET_CURSOR_X 0x15
#define DISPLAY_SET_CURSOR_Y 0x75
#define DISPLAY_WRITE_PIXELS 0x5C
#define DISPLAY_NATIVE_WIDTH 128
#define DISPLAY_NATIVE_HEIGHT 96
#define MUST_SEND_FULL_CURSOR_WINDOW
// The DISPLAY_WRITE_PIXELS command on this display seems to continue from the x&y where previous command left off. This is unlike
// other displays, where issuing a DISPLAY_WRITE_PIXELS command resets the x&y cursor coordinates.
#define DISPLAY_WRITE_PIXELS_CMD_DOES_NOT_RESET_WRITE_CURSOR
// This is defined for displays that have the set cursor command 8 bits wide (0-255) instead of 16 bits (0-65535)
#define DISPLAY_SET_CURSOR_IS_8_BIT
#define InitSPIDisplay InitSSD1351
void InitSSD1351(void);
void TurnDisplayOn(void);
void TurnDisplayOff(void);
#endif

View File

@ -0,0 +1,176 @@
#include "config.h"
#if defined(ST7735R) || defined(ST7735S) || defined(ST7789)
#include "spi.h"
#include <memory.h>
#include <stdio.h>
void InitST7735R()
{
// If a Reset pin is defined, toggle it briefly high->low->high to enable the device. Some devices do not have a reset pin, in which case compile with GPIO_TFT_RESET_PIN left undefined.
#if defined(GPIO_TFT_RESET_PIN) && GPIO_TFT_RESET_PIN >= 0
printf("Resetting display at reset GPIO pin %d\n", GPIO_TFT_RESET_PIN);
SET_GPIO_MODE(GPIO_TFT_RESET_PIN, 1);
SET_GPIO(GPIO_TFT_RESET_PIN);
usleep(120 * 1000);
CLEAR_GPIO(GPIO_TFT_RESET_PIN);
usleep(120 * 1000);
SET_GPIO(GPIO_TFT_RESET_PIN);
usleep(120 * 1000);
#endif
// Do the initialization with a very low SPI bus speed, so that it will succeed even if the bus speed chosen by the user is too high.
spi->clk = 34;
__sync_synchronize();
BEGIN_SPI_COMMUNICATION();
{
#ifndef ST7789VW // For some reason, ST7789VW does not want to accept the Software Reset command, but screen stays black if SWRESET is sent to it.
SPI_TRANSFER(0x01/*Software Reset*/);
#endif
usleep(120*1000);
SPI_TRANSFER(0x11/*Sleep Out*/);
usleep(120 * 1000);
#ifndef ST7789VW // This is disabled on ST7789VW because it was observed to look visually bad, makes colors a bit too contrasty/deep
SPI_TRANSFER(0x26/*Gamma Curve Select*/, 0x04/*Gamma curve 3 (2.5x if GS=1, 2.2x otherwise)*/);
#endif
SPI_TRANSFER(0x3A/*COLMOD: Pixel Format Set*/, 0x05/*16bpp*/);
usleep(20 * 1000);
#define MADCTL_BGR_PIXEL_ORDER (1<<3)
#define MADCTL_ROW_COLUMN_EXCHANGE (1<<5)
#define MADCTL_COLUMN_ADDRESS_ORDER_SWAP (1<<6)
#define MADCTL_ROW_ADDRESS_ORDER_SWAP (1<<7)
#define MADCTL_ROTATE_180_DEGREES (MADCTL_COLUMN_ADDRESS_ORDER_SWAP | MADCTL_ROW_ADDRESS_ORDER_SWAP)
uint8_t madctl = 0;
#if defined(ST7735R) || defined(ST7735S)
madctl |= MADCTL_BGR_PIXEL_ORDER;
#endif
#ifdef DISPLAY_SWAP_BGR
madctl ^= MADCTL_BGR_PIXEL_ORDER;
#endif
#if defined(DISPLAY_FLIP_ORIENTATION_IN_HARDWARE)
madctl |= MADCTL_ROW_COLUMN_EXCHANGE;
#endif
madctl |= MADCTL_ROW_ADDRESS_ORDER_SWAP;
#if defined(WAVESHARE_ST7789VW_HAT) || defined(WAVESHARE_ST7735S_HAT)
madctl ^= MADCTL_ROTATE_180_DEGREES;
#endif
#ifdef DISPLAY_ROTATE_180_DEGREES
madctl ^= MADCTL_ROTATE_180_DEGREES;
#endif
SPI_TRANSFER(0x36/*MADCTL: Memory Access Control*/, madctl);
usleep(10*1000);
#ifdef ST7789
SPI_TRANSFER(0xBA/*DGMEN: Enable Gamma*/, 0x04);
bool invertColors = true;
#else
bool invertColors = false;
#endif
#ifdef DISPLAY_INVERT_COLORS
invertColors = !invertColors;
#endif
if (invertColors)
SPI_TRANSFER(0x21/*Display Inversion On*/);
else
SPI_TRANSFER(0x20/*Display Inversion Off*/);
SPI_TRANSFER(0x13/*NORON: Partial off (normal)*/);
usleep(10*1000);
#ifdef ST7789
// The ST7789 controller is actually a unit with 320x240 graphics memory area, but only 240x240 portion
// of it is displayed. Therefore if we wanted to swap row address mode above, writes to Y=0...239 range will actually land in
// memory in row addresses Y = 319-(0...239) = 319...80 range. To view this range, we must scroll the view by +80 units in Y
// direction so that contents of Y=80...319 is displayed instead of Y=0...239.
if ((madctl & MADCTL_ROW_ADDRESS_ORDER_SWAP))
SPI_TRANSFER(0x37/*VSCSAD: Vertical Scroll Start Address of RAM*/, 0, 320 - DISPLAY_WIDTH);
#endif
// TODO: The 0xB1 command is not Frame Rate Control for ST7789VW, 0xB3 is (add support to it)
#ifndef ST7789VW
// Frame rate = 850000 / [ (2*RTNA+40) * (162 + FPA+BPA)]
SPI_TRANSFER(0xB1/*FRMCTR1:Frame Rate Control*/, /*RTNA=*/6, /*FPA=*/1, /*BPA=*/1); // This should set frame rate = 99.67 Hz
#endif
SPI_TRANSFER(/*Display ON*/0x29);
usleep(100 * 1000);
#if 0
// TODO: ST7789VW Python example suggests following, check them against datasheet if there's anything interesting
SPI_TRANSFER(0xB2, 0xc, 0xc, 0, 0x33, 0x33);
SPI_TRANSFER(0xB7, 0x35);
SPI_TRANSFER(0xBb, 0x19);
SPI_TRANSFER(0xc0, 0x2c);
SPI_TRANSFER(0xc2, 0x01);
SPI_TRANSFER(0xc3, 0x12);
SPI_TRANSFER(0xc4, 0x20);
SPI_TRANSFER(0xc6, 0x0f);
SPI_TRANSFER(0xd0, 0xa4, 0xa1);
SPI_TRANSFER(0xe0, 0xd0, 0x04, 0x0d, 0x11, 0x13, 0x2b, 0x3f, 0x54, 0x4c, 0x18, 0x0d, 0x0b, 0x1f, 0x23);
SPI_TRANSFER(0xe1, 0xd0, 0x04, 0x0c, 0x11, 0x13, 0x2c, 0x3f, 0x44, 0x51, 0x2f, 0x1f, 0x1f, 0x20, 0x23);
SPI_TRANSFER(0x21);
SPI_TRANSFER(0x11);
SPI_TRANSFER(0x29);
usleep(100 * 1000);
#endif
#if defined(GPIO_TFT_BACKLIGHT) && defined(BACKLIGHT_CONTROL)
printf("Setting TFT backlight on at pin %d\n", GPIO_TFT_BACKLIGHT);
SET_GPIO_MODE(GPIO_TFT_BACKLIGHT, 0x01); // Set backlight pin to digital 0/1 output mode (0x01) in case it had been PWM controlled
SET_GPIO(GPIO_TFT_BACKLIGHT); // And turn the backlight on.
#endif
ClearScreen();
}
#ifndef USE_DMA_TRANSFERS // For DMA transfers, keep SPI CS & TA active.
END_SPI_COMMUNICATION();
#endif
// And speed up to the desired operation speed finally after init is done.
usleep(10 * 1000); // Delay a bit before restoring CLK, or otherwise this has been observed to cause the display not init if done back to back after the clear operation above.
spi->clk = SPI_BUS_CLOCK_DIVISOR;
}
void TurnDisplayOff()
{
#if defined(GPIO_TFT_BACKLIGHT) && defined(BACKLIGHT_CONTROL)
SET_GPIO_MODE(GPIO_TFT_BACKLIGHT, 0x01); // Set backlight pin to digital 0/1 output mode (0x01) in case it had been PWM controlled
CLEAR_GPIO(GPIO_TFT_BACKLIGHT); // And turn the backlight off.
#endif
#if 0
QUEUE_SPI_TRANSFER(0x28/*Display OFF*/);
QUEUE_SPI_TRANSFER(0x10/*Enter Sleep Mode*/);
usleep(120*1000); // Sleep off can be sent 120msecs after entering sleep mode the earliest, so synchronously sleep here for that duration to be safe.
#endif
// printf("Turned display OFF\n");
}
void TurnDisplayOn()
{
#if 0
QUEUE_SPI_TRANSFER(0x11/*Sleep Out*/);
usleep(120 * 1000);
QUEUE_SPI_TRANSFER(0x29/*Display ON*/);
#endif
#if defined(GPIO_TFT_BACKLIGHT) && defined(BACKLIGHT_CONTROL)
SET_GPIO_MODE(GPIO_TFT_BACKLIGHT, 0x01); // Set backlight pin to digital 0/1 output mode (0x01) in case it had been PWM controlled
SET_GPIO(GPIO_TFT_BACKLIGHT); // And turn the backlight on.
#endif
// printf("Turned display ON\n");
}
void DeinitSPIDisplay()
{
ClearScreen();
}
#endif

View File

@ -0,0 +1,59 @@
#pragma once
#if defined(ST7735R) || defined(ST7735S) || defined(ST7789) || defined(ST7789VW)
// On Arduino "A000096" 160x128 ST7735R LCD Screen, the following speed configurations have been tested (on a Pi 3B):
// core_freq=355: CDIV=6, results in 59.167MHz, works
// core_freq=360: CDIV=6, would result in 60.00MHz, this would work for several minutes, but then the display would turn all white at random
// On Adafruit 1.54" 240x240 Wide Angle TFT LCD Display with MicroSD ST7789 screen, the following speed configurations have been tested (on a Pi 3B):
// core_freq=340: CDIV=4, results in 85.00MHz, works
// core_freq=350: CDIV=4, would result in 87.50MHz, which would work for a while, but generate random single pixel glitches every once in a few minutes
// Data specific to the ILI9341 controller
#define DISPLAY_SET_CURSOR_X 0x2A
#define DISPLAY_SET_CURSOR_Y 0x2B
#define DISPLAY_WRITE_PIXELS 0x2C
#if defined(ST7789) || defined(ST7789VW)
#define DISPLAY_NATIVE_WIDTH 240
#define DISPLAY_NATIVE_HEIGHT 240
#elif defined(ST7735R)
#define DISPLAY_NATIVE_WIDTH 128
#define DISPLAY_NATIVE_HEIGHT 160
#elif defined(ST7735S)
// ST7735S displays are 128x128 pixels, but they have a somewhat odd offset that X,Y=(0,0) is not top-left corner pixel, but X,Y=(2,1) is.
// Therefore consider the display two pixels wider and one pixel higher, and add a constant offset of X=+2, Y=+1 via the DISPLAY_COVERED_* mechanism.
#define DISPLAY_NATIVE_WIDTH 130
#define DISPLAY_NATIVE_HEIGHT 129
#define DISPLAY_NATIVE_COVERED_LEFT_SIDE 2
#define DISPLAY_NATIVE_COVERED_TOP_SIDE 1
#else
#error Unknown display controller!
#endif
#ifdef WAVESHARE_ST7789VW_HAT
#include "waveshare_st7789vw_hat.h"
#elif defined(WAVESHARE_ST7735S_HAT)
#include "waveshare_st7735s_hat.h"
#endif
#define InitSPIDisplay InitST7735R
void InitST7735R(void);
void TurnDisplayOn(void);
void TurnDisplayOff(void);
#if defined(ST7789) || defined(ST7789VW)
// Unlike all other displays developed so far, Adafruit 1.54" 240x240 ST7789 display
// actually needs to observe the CS line toggle during execution, it cannot just be always activated.
// (ST7735R does not care about this)
// TODO: It is actually untested if ST7789VW really needs this, but does work with it, so kept for now
#define DISPLAY_NEEDS_CHIP_SELECT_SIGNAL
#endif
#endif

View File

@ -0,0 +1,292 @@
#include "config.h"
#include "statistics.h"
#ifdef STATISTICS
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <pthread.h>
#include <syslog.h>
#include "tick.h"
#include "text.h"
#include "spi.h"
#include "util.h"
#include "mailbox.h"
#include "mem_alloc.h"
#include "dma.h"
volatile uint64_t timeWastedPollingGPU = 0;
volatile float statsSpiBusSpeed = 0;
volatile int statsBcmCoreSpeed = 0;
volatile int statsCpuFrequency = 0;
volatile double statsCpuTemperature = 0;
double spiThreadUtilizationRate;
double spiBusDataRate;
int statsGpuPollingWasted = 0;
uint64_t statsBytesTransferred = 0;
int frameSkipTimeHistorySize = 0;
uint64_t frameSkipTimeHistory[FRAME_HISTORY_MAX_SIZE] = {};
#ifdef FRAME_COMPLETION_TIME_STATISTICS
#define FRAME_COMPLETION_HISTORY_MAX_SIZE 480
uint64_t frameCompletionTimeHistory[FRAME_COMPLETION_HISTORY_MAX_SIZE] = {};
int frameCompletionTimeHistorySize = 0;
int statsFrameIntervalsY[FRAME_COMPLETION_HISTORY_MAX_SIZE] = {};
int statsFrameIntervalsSize = 0;
int statsTargetFrameRateY = 0;
int statsAvgFrameRateIntervalY = 0;
void AddFrameCompletionTimeMarker()
{
for(int i = frameCompletionTimeHistorySize; i >= 1; --i)
frameCompletionTimeHistory[i] = frameCompletionTimeHistory[i-1];
frameCompletionTimeHistory[0] = tick();
if (frameCompletionTimeHistorySize+1 < FRAME_COMPLETION_HISTORY_MAX_SIZE)
++frameCompletionTimeHistorySize;
}
#else
void AddFrameCompletionTimeMarker() {}
#endif
char dmaChannelsText[32] = {};
char fpsText[32] = {};
char spiUsagePercentageText[32] = {};
char spiBusDataRateText[32] = {};
uint16_t spiUsageColor = 0, fpsColor = 0;
char statsFrameSkipText[32] = {};
char spiSpeedText[32] = {};
char spiSpeedText2[32] = {};
char cpuTemperatureText[32] = {};
uint16_t cpuTemperatureColor = 0;
char gpuPollingWastedText[32] = {};
uint16_t gpuPollingWastedColor = 0;
char cpuMemoryUsedText[32] = {};
char gpuMemoryUsedText[32] = {};
uint64_t statsLastPrint = 0;
void UpdateStatisticsNumbers()
{
// BCM core and SPI bus speed
int freq = (int)MailboxRet2(0x00030002/*Get Clock Rate*/, 0x4/*CORE*/);
statsBcmCoreSpeed = freq/1000000;
statsSpiBusSpeed = (float)freq/(1000000*spi->clk);
// CPU temperature
statsCpuTemperature = MailboxRet2(0x00030006/*Get Temperature*/, 0)/1000.0;
// Raspberry pi main CPU speed
statsCpuFrequency = (int)MailboxRet2(0x00030002/*Get Clock Rate*/, 0x3/*ARM*/) / 1000000;
}
void DrawStatisticsOverlay(uint16_t *framebuffer)
{
DrawText(framebuffer, gpuFrameWidth, gpuFramebufferScanlineStrideBytes, gpuFrameHeight, fpsText, 1, 1, fpsColor, 0);
DrawText(framebuffer, gpuFrameWidth, gpuFramebufferScanlineStrideBytes, gpuFrameHeight, statsFrameSkipText, strlen(fpsText)*6, 1, RGB565(31,0,0), 0);
#if DISPLAY_DRAWABLE_WIDTH > 130
#ifdef USE_DMA_TRANSFERS
DrawText(framebuffer, gpuFrameWidth, gpuFramebufferScanlineStrideBytes, gpuFrameHeight, dmaChannelsText, 1, 10, RGB565(31, 44, 8), 0);
#endif
#ifdef USE_SPI_THREAD
DrawText(framebuffer, gpuFrameWidth, gpuFramebufferScanlineStrideBytes, gpuFrameHeight, spiUsagePercentageText, 75, 10, spiUsageColor, 0);
#endif
DrawText(framebuffer, gpuFrameWidth, gpuFramebufferScanlineStrideBytes, gpuFrameHeight, spiBusDataRateText, 60, 1, 0xFFFF, 0);
#endif
#if DISPLAY_DRAWABLE_WIDTH > 180
DrawText(framebuffer, gpuFrameWidth, gpuFramebufferScanlineStrideBytes, gpuFrameHeight, spiSpeedText, 120, 1, RGB565(31,14,20), 0);
DrawText(framebuffer, gpuFrameWidth, gpuFramebufferScanlineStrideBytes, gpuFrameHeight, spiSpeedText2, 120, 10, RGB565(10,24,31), 0);
DrawText(framebuffer, gpuFrameWidth, gpuFramebufferScanlineStrideBytes, gpuFrameHeight, cpuTemperatureText, 190, 1, cpuTemperatureColor, 0);
DrawText(framebuffer, gpuFrameWidth, gpuFramebufferScanlineStrideBytes, gpuFrameHeight, gpuPollingWastedText, 222, 1, gpuPollingWastedColor, 0);
#endif
#if (defined(DISPLAY_FLIP_ORIENTATION_IN_SOFTWARE) && DISPLAY_DRAWABLE_HEIGHT >= 290) || (!defined(DISPLAY_FLIP_ORIENTATION_IN_SOFTWARE) && DISPLAY_DRAWABLE_WIDTH >= 290)
DrawText(framebuffer, gpuFrameWidth, gpuFramebufferScanlineStrideBytes, gpuFrameHeight, cpuMemoryUsedText, 250, 1, RGB565(31,50,21), 0);
DrawText(framebuffer, gpuFrameWidth, gpuFramebufferScanlineStrideBytes, gpuFrameHeight, gpuMemoryUsedText, 250, 10, RGB565(31,50,31), 0);
#endif
#ifdef FRAME_COMPLETION_TIME_STATISTICS
#ifdef DISPLAY_FLIP_ORIENTATION_IN_SOFTWARE
#define FRAMERATE_GRAPH_WIDTH gpuFrameHeight
#define FRAMERATE_GRAPH_MIN_Y 20
#define FRAMERATE_GRAPH_MAX_Y (gpuFrameWidth - 10)
#define AT(x,y) ((x)*(gpuFramebufferScanlineStrideBytes>>1)+(y))
#else
#define FRAMERATE_GRAPH_WIDTH gpuFrameWidth
#define FRAMERATE_GRAPH_MIN_Y 20
#define FRAMERATE_GRAPH_MAX_Y (gpuFrameHeight - 10)
#define AT(x,y) ((y)*(gpuFramebufferScanlineStrideBytes>>1)+(x))
#endif
for(int i = 0; i < MIN(statsFrameIntervalsSize, FRAMERATE_GRAPH_WIDTH); ++i)
{
int x = FRAMERATE_GRAPH_WIDTH-1-i;
int y = statsFrameIntervalsY[i];
framebuffer[AT(x, FRAMERATE_GRAPH_MIN_Y)] = RGB565(31,0,0);
framebuffer[AT(x, FRAMERATE_GRAPH_MIN_Y+1)] = RGB565(0,0,0);
framebuffer[AT(x, statsTargetFrameRateY-1)] = RGB565(0,0,0);
framebuffer[AT(x, statsTargetFrameRateY)] = RGB565(0,63,0);
framebuffer[AT(x, statsTargetFrameRateY+1)] = RGB565(0,0,0);
framebuffer[AT(x, statsAvgFrameRateIntervalY-1)] = RGB565(0,0,0);
framebuffer[AT(x, statsAvgFrameRateIntervalY)] = RGB565(29,50,7);
framebuffer[AT(x, statsAvgFrameRateIntervalY+1)] = RGB565(0,0,0);
framebuffer[AT(x, y-3)] = RGB565(0,0,0);
framebuffer[AT(x, y-2)] = RGB565(0,0,0);
framebuffer[AT(x, y-1)] = RGB565(5,11,5);
framebuffer[AT(x, y)] = RGB565(31,63,31);
framebuffer[AT(x, y+1)] = RGB565(5,11,5);
framebuffer[AT(x, y+2)] = RGB565(0,0,0);
framebuffer[AT(x, y+3)] = RGB565(0,0,0);
framebuffer[AT(x, FRAMERATE_GRAPH_MAX_Y-1)] = RGB565(0,0,0);
framebuffer[AT(x, FRAMERATE_GRAPH_MAX_Y)] = RGB565(15,30,15);
}
#endif
}
void RefreshStatisticsOverlayText()
{
uint64_t now = tick();
uint64_t elapsed = now - statsLastPrint;
if (elapsed < STATISTICS_REFRESH_INTERVAL) return;
#ifdef FRAME_COMPLETION_TIME_STATISTICS
if (frameCompletionTimeHistorySize > 1)
{
uint64_t maxInterval = 4000000 / TARGET_FRAME_RATE;
uint64_t accumIntervals = 0;
for(int i = 0; i < frameCompletionTimeHistorySize-1; ++i)
{
uint64_t interval = MIN(frameCompletionTimeHistory[i] - frameCompletionTimeHistory[i+1], maxInterval);
accumIntervals += interval;
statsFrameIntervalsY[i] = FRAMERATE_GRAPH_MAX_Y - (FRAMERATE_GRAPH_MAX_Y - FRAMERATE_GRAPH_MIN_Y) * interval / maxInterval;
}
statsTargetFrameRateY = FRAMERATE_GRAPH_MAX_Y - (FRAMERATE_GRAPH_MAX_Y - FRAMERATE_GRAPH_MIN_Y) * (1000000/TARGET_FRAME_RATE) / maxInterval;
statsAvgFrameRateIntervalY = FRAMERATE_GRAPH_MAX_Y - (FRAMERATE_GRAPH_MAX_Y - FRAMERATE_GRAPH_MIN_Y) * (accumIntervals / (frameCompletionTimeHistorySize-1)) / maxInterval;
statsFrameIntervalsSize = frameCompletionTimeHistorySize-1;
}
else
statsFrameIntervalsSize = 0;
#endif
UpdateStatisticsNumbers();
#ifdef USE_DMA_TRANSFERS
sprintf(dmaChannelsText, "DMATx=%d,Rx=%d", dmaTxChannel, dmaRxChannel);
#endif
#ifdef KERNEL_MODULE_CLIENT
spiThreadUtilizationRate = 0; // TODO
int spiRate = 0;
strcpy(spiUsagePercentageText, "N/A");
#else
uint64_t spiThreadIdleFor = __atomic_load_n(&spiThreadIdleUsecs, __ATOMIC_RELAXED);
__sync_fetch_and_sub(&spiThreadIdleUsecs, spiThreadIdleFor);
if (__atomic_load_n(&spiThreadSleeping, __ATOMIC_RELAXED)) spiThreadIdleFor += tick() - spiThreadSleepStartTime;
spiThreadUtilizationRate = MIN(1.0, MAX(0.0, 1.0 - spiThreadIdleFor / (double)STATISTICS_REFRESH_INTERVAL));
int spiRate = (int)MIN(100, (spiThreadUtilizationRate*100.0));
sprintf(spiUsagePercentageText, "%d%%", spiRate);
#endif
spiBusDataRate = (double)8.0 * statsBytesTransferred * 1000.0 / (elapsed / 1000.0);
if (spiRate < 90) spiUsageColor = RGB565(0,63,0);
else if (spiRate < 100) spiUsageColor = RGB565(31,63,0);
else spiUsageColor = RGB565(31,0, 0);
if (spiBusDataRate > 1000000) sprintf(spiBusDataRateText, "%.2fmbps", spiBusDataRate/1000000.0);
else if (spiBusDataRate > 1000) sprintf(spiBusDataRateText, "%.2fkbps", spiBusDataRate/1000.0);
else sprintf(spiBusDataRateText, "%.2fbps", spiBusDataRate);
uint64_t wastedTime = __atomic_load_n(&timeWastedPollingGPU, __ATOMIC_RELAXED);
__atomic_fetch_sub(&timeWastedPollingGPU, wastedTime, __ATOMIC_RELAXED);
//const double gpuPollingWastedScalingFactor = 0.369; // A crude heuristic to scale time spent in useless polling to what Linux 'top' tool shows as % usage percentages
statsGpuPollingWasted = (int)(wastedTime /** gpuPollingWastedScalingFactor*/ * 100 / (now - statsLastPrint));
statsBytesTransferred = 0;
if (statsBcmCoreSpeed > 0 && statsCpuFrequency > 0) sprintf(spiSpeedText, "%d/%dMHz", statsCpuFrequency, statsBcmCoreSpeed);
else spiSpeedText[0] = '\0';
if (statsSpiBusSpeed > 0) sprintf(spiSpeedText2, "SPI:%.3fMHz (/%d)", statsSpiBusSpeed, spi->clk);
else spiSpeedText2[0] = '\0';
if (statsCpuTemperature > 0)
{
sprintf(cpuTemperatureText, "%.1fc", statsCpuTemperature);
if (statsCpuTemperature >= 80) cpuTemperatureColor = RGB565(31, 0, 0);
else if (statsCpuTemperature >= 65) cpuTemperatureColor = RGB565(31, 63, 0);
else cpuTemperatureColor = RGB565(0, 63, 0);
}
if (statsGpuPollingWasted > 0)
{
gpuPollingWastedColor = (statsGpuPollingWasted > 5) ? RGB565(31, 0, 0) : RGB565(31, 63, 0);
sprintf(gpuPollingWastedText, "+%d%%", statsGpuPollingWasted);
}
else gpuPollingWastedText[0] = '\0';
statsLastPrint = now;
if (frameTimeHistorySize >= 3)
{
int numInterlacedFramesInHistory = 0;
int numProgressiveFramesInHistory = 0;
for(int i = 0; i < frameTimeHistorySize; ++i)
if (frameTimeHistory[i].interlaced)
++numInterlacedFramesInHistory;
else
++numProgressiveFramesInHistory;
int frames = frameTimeHistorySize;
if (numInterlacedFramesInHistory)
frames += numProgressiveFramesInHistory; // Progressive frames count twice as interlaced
int fps = (0.5 + (frames - 1) * 1000000.0 / (frameTimeHistory[frameTimeHistorySize-1].time - frameTimeHistory[0].time));
#ifdef NO_INTERLACING
sprintf(fpsText, "%d", fps);
fpsColor = 0xFFFF;
#else
if (numInterlacedFramesInHistory > 0)
{
if (numProgressiveFramesInHistory > 0) sprintf(fpsText, "%di/%d", fps, numProgressiveFramesInHistory);
else sprintf(fpsText, "%di", fps);
fpsColor = RGB565(31, 30, 11);
}
else
{
sprintf(fpsText, "%dp", fps);
fpsColor = 0xFFFF;
}
#endif
if (frameSkipTimeHistorySize > 0) sprintf(statsFrameSkipText, "-%d", frameSkipTimeHistorySize);
else statsFrameSkipText[0] = '\0';
}
else
{
strcpy(fpsText, "-");
statsFrameSkipText[0] = '\0';
fpsColor = 0xFFFF;
}
#if (defined(DISPLAY_FLIP_ORIENTATION_IN_SOFTWARE) && DISPLAY_DRAWABLE_HEIGHT > 302) || (!defined(DISPLAY_FLIP_ORIENTATION_IN_SOFTWARE) && DISPLAY_DRAWABLE_WIDTH > 302)
#define HINTSUFFIX "MB"
#else
#define HINTSUFFIX ""
#endif
sprintf(cpuMemoryUsedText, "CPU:%.2f" HINTSUFFIX, totalCpuMemoryAllocated/1024.0/1024.0);
#ifdef USE_DMA_TRANSFERS
if (totalGpuMemoryUsed > 0)
sprintf(gpuMemoryUsedText, "GPU:%.2f" HINTSUFFIX, totalGpuMemoryUsed/1024.0/1024.0);
#endif
}
#else
void RefreshStatisticsOverlayText() {}
void DrawStatisticsOverlay(uint16_t *) {}
#endif // ~STATISTICS

View File

@ -0,0 +1,46 @@
#pragma once
#include <inttypes.h>
#include "gpu.h"
void RefreshStatisticsOverlayText(void);
void DrawStatisticsOverlay(uint16_t *framebuffer);
#ifdef STATISTICS
extern volatile uint64_t timeWastedPollingGPU;
extern volatile float statsSpiBusSpeed;
extern volatile int statsBcmCoreSpeed;
extern volatile int statsCpuFrequency;
extern volatile double statsCpuTemperature;
extern double spiThreadUtilizationRate;
extern double spiBusDataRate;
extern int statsGpuPollingWasted;
extern uint64_t statsBytesTransferred;
extern int frameSkipTimeHistorySize;
extern uint64_t frameSkipTimeHistory[FRAME_HISTORY_MAX_SIZE];
void AddFrameCompletionTimeMarker();
// All overlay statistics are double-buffered: the updated data fields
// are polled at certain rate, and updated in the first copy below. However
// it is not desired that any changes in the overlay numbers would trigger
// a repaint of the display, since that would skew the fps counts and similar,
// if updated overlay text would cause an update of a new frame.
// The strings below are what is currently shown on screen, and the fields
// above specify the latest up to date fields of the data.
extern char fpsText[32];
extern char spiUsagePercentageText[32];
extern char spiBusDataRateText[32];
extern uint16_t spiUsageColor, fpsColor;
extern char statsFrameSkipText[32];
extern char spiSpeedText[32];
extern char cpuTemperatureText[32];
extern uint16_t cpuTemperatureColor;
extern char gpuPollingWastedText[32];
extern uint16_t gpuPollingWastedColor;
#endif

66
usr/fbcp-ili9341/text.cpp Normal file
View File

@ -0,0 +1,66 @@
#include "config.h"
#include "text.h"
#include "display.h"
void DrawText(uint16_t *framebuffer, int framebufferWidth, int framebufferStrideBytes, int framebufferHeight, const char *text, int x, int y, uint16_t color, uint16_t bgColor)
{
#ifdef DISPLAY_FLIP_ORIENTATION_IN_SOFTWARE
const int W = framebufferHeight;
const int H = framebufferWidth;
#define AT(x, y) x*framebufferStrideBytes+y
#else
const int W = framebufferWidth;
const int H = framebufferHeight;
#define AT(x, y) y*framebufferStrideBytes+x
#endif
framebufferStrideBytes >>= 1; // to uint16 elements
const int Y = y;
while(*text)
{
uint8_t ch = (uint8_t)*text;
if (ch < 32 || ch >= 127) ch = 0;
else ch -= 32;
const int X = x;
const int endX = x + MONACO_WIDTH;
for(y = Y-1; y < Y + monaco_height_adjust[ch]; ++y)
for(int x = X; x < endX+1; ++x)
if (x >= 0 && y >= 0 && x < W && y < H)
{
framebuffer[AT(x,y)] = bgColor;
}
y = Y + monaco_height_adjust[ch];
int yEnd = Y + MONACO_HEIGHT - 1;
const uint8_t *byte = monaco_font + ch*MONACO_BYTES_PER_CHAR;
for(int i = 0; i < MONACO_BYTES_PER_CHAR; ++i, ++byte)
{
for(uint8_t bit = 1; bit; bit <<= 1)
{
if (x >= 0 && y >= 0 && x < W && y < H)
{
if ((*byte & bit)) framebuffer[AT(x,y)] = color;
else framebuffer[AT(x,y)] = bgColor;
}
++x;
if (x == endX)
{
if (y < H) framebuffer[AT(x,y)] = bgColor;
x = X;
++y;
if (y == yEnd)
{
i = MONACO_BYTES_PER_CHAR;
bit = 0;
break;
}
}
}
}
++text;
x += 6;
}
}

1066
usr/fbcp-ili9341/text.h Normal file
View File

@ -0,0 +1,1066 @@
#pragma once
#include <inttypes.h>
#define MONACO_WIDTH 5
#define MONACO_HEIGHT 8
#define MONACO_BYTES_PER_CHAR (MONACO_WIDTH*MONACO_HEIGHT/8)
static uint8_t monaco_font[] = {
/*
.....
.....
.....
.....
.....
.....
.....
.....
*/ 0x00,0x00,0x00,0x00,0x00,
/*
#....
#....
#....
#....
#....
.....
#....
.....
!*/ 0x21,0x84,0x10,0x40,0x00,
/*
#.#..
#.#..
.....
.....
.....
.....
.....
.....
"*/ 0xa5,0x00,0x00,0x00,0x00,
/*
..##.
..##.
#####
.#.#.
#####
.##..
.##..
.....
#*/ 0x8c,0x7d,0xf5,0x8d,0x01,
/*
..#..
.####
#.#..
#.#..
.##..
..###
..#.#
####.
$*/ 0xc4,0x97,0x62,0x38,0x7d,
/*
#...#
##..#
#..#.
..#..
..##.
.#.##
#..#.
.....
%*/ 0x71,0x26,0xc2,0x74,0x02,
/*
.#...
#.#..
#.#..
.#...
#.##.
#..#.
.###.
.....
&*/ 0xa2,0x14,0xd1,0x92,0x03,
/*
#....
#....
.....
.....
.....
.....
.....
.....
'*/ 0x21,0x00,0x00,0x00,0x00,
/*
..#..
.#...
#....
#....
#....
#....
.#...
..#..
(*/ 0x44,0x84,0x10,0x82,0x20,
/*
#....
.#...
..#..
..#..
..#..
..#..
.#...
#....
)*/ 0x41,0x10,0x42,0x88,0x08,
/*
.#...
###..
###..
.#...
.....
.....
.....
.....
**/ 0xe2,0x1c,0x01,0x00,0x00,
/*
..#..
..#..
#####
..#..
..#..
.....
.....
.....
+*/ 0x84,0x7c,0x42,0x00,0x00,
/*
#....
#....
#....
.....
.....
.....
.....
.....
,*/ 0x21,0x04,0x00,0x00,0x00,
/*
###..
.....
.....
.....
.....
.....
.....
.....
-*/ 0x07,0x00,0x00,0x00,0x00,
/*
#....
.....
.....
.....
.....
.....
.....
.....
.*/ 0x01,0x00,0x00,0x00,0x00,
/*
...#.
...#.
..#..
.#...
.#...
#....
.....
.....
/*/ 0x08,0x11,0x21,0x02,0x00,
/*
.##..
#.##.
#.##.
##.#.
##.#.
##.#.
.##..
.....
0*/ 0xa6,0xb5,0xb5,0x96,0x01,
/*
.#...
##...
.#...
.#...
.#...
.#...
###..
.....
1*/ 0x62,0x08,0x21,0xc4,0x01,
/*
###..
...#.
...#.
..#..
.#...
#....
####.
.....
2*/ 0x07,0x21,0x22,0xc2,0x03,
/*
###..
...#.
...#.
.##..
...#.
...#.
###..
.....
3*/ 0x07,0x21,0x83,0xd0,0x01,
/*
...#.
..##.
.#.#.
#..#.
#####
...#.
...#.
.....
4*/ 0x88,0xa9,0xf4,0x11,0x02,
/*
####.
#....
###..
...#.
...#.
...#.
###..
.....
5*/ 0x2f,0x1c,0x84,0xd0,0x01,
/*
.##..
#....
#.#..
##.#.
##.#.
##.#.
.##..
.....
6*/ 0x26,0x94,0xb5,0x96,0x01,
/*
####.
...#.
...#.
..#..
.##..
.#...
.#...
.....
7*/ 0x0f,0x21,0x62,0x84,0x00,
/*
.###.
#..#.
#..#.
.##..
#..#.
#..#.
###..
.....
8*/ 0x2e,0x25,0x93,0xd2,0x01,
/*
.##..
#..#.
#..#.
#..#.
.###.
...#.
.##..
.....
9*/ 0x26,0xa5,0xe4,0x90,0x01,
/*
#....
.....
.....
.....
#....
.....
.....
.....
:*/ 0x01,0x00,0x10,0x00,0x00,
/*
#....
.....
.....
.....
#....
#....
#....
.....
;*/ 0x01,0x00,0x10,0x42,0x00,
/*
....#
..##.
##...
..##.
....#
.....
.....
.....
<*/ 0x90,0x0d,0x06,0x01,0x00,
/*
####.
.....
####.
.....
.....
.....
.....
.....
=*/ 0x0f,0x3c,0x00,0x00,0x00,
/*
#....
.##..
...##
.##..
#....
.....
.....
.....
>*/ 0xc1,0x60,0x13,0x00,0x00,
/*
###..
..#..
..#..
.#...
.#...
.....
.#...
.....
?*/ 0x87,0x10,0x21,0x80,0x00,
/*
.###.
#...#
#.###
##.##
##.##
#.##.
.##..
.....
@*/ 0x2e,0xf6,0xbd,0x9b,0x01,
/*
.##..
.##..
.##..
.##..
####.
####.
#..#.
.....
A*/ 0xc6,0x18,0xf3,0x5e,0x02,
/*
##...
#.#..
#.#..
##...
#.#..
#.#..
##...
.....
B*/ 0xa3,0x94,0x51,0xca,0x00,
/*
.###.
#....
#....
#....
#....
#....
.###.
.....
C*/ 0x2e,0x84,0x10,0x82,0x03,
/*
###..
#..#.
#..#.
#..#.
#..#.
#..#.
###..
.....
D*/ 0x27,0xa5,0x94,0xd2,0x01,
/*
####.
#....
#....
####.
#....
#....
####.
.....
E*/ 0x2f,0x84,0x17,0xc2,0x03,
/*
####.
#....
#....
####.
#....
#....
#....
.....
F*/ 0x2f,0x84,0x17,0x42,0x00,
/*
.###.
#....
#....
#.##.
#..#.
#..#.
.###.
.....
G*/ 0x2e,0x84,0x96,0x92,0x03,
/*
#..#.
#..#.
#..#.
####.
#..#.
#..#.
#..#.
.....
H*/ 0x29,0xa5,0x97,0x52,0x02,
/*
###..
.#...
.#...
.#...
.#...
.#...
###..
.....
I*/ 0x47,0x08,0x21,0xc4,0x01,
/*
.###.
...#.
...#.
...#.
...#.
...#.
###..
.....
J*/ 0x0e,0x21,0x84,0xd0,0x01,
/*
#..#.
#..#.
#.#..
##...
#.#..
#.#..
#..#.
.....
K*/ 0x29,0x95,0x51,0x4a,0x02,
/*
#....
#....
#....
#....
#....
#....
###..
.....
L*/ 0x21,0x84,0x10,0xc2,0x01,
/*
##.##
##.##
##.##
##.##
###.#
#.#.#
#...#
.....
M*/ 0x7b,0xef,0x7d,0x6b,0x04,
/*
#..#.
##.#.
##.#.
#.##.
#.##.
#.##.
#..#.
.....
N*/ 0x69,0xad,0xd6,0x5a,0x02,
/*
.##..
#..#.
#..#.
#..#.
#..#.
#..#.
.##..
.....
O*/ 0x26,0xa5,0x94,0x92,0x01,
/*
###..
#..#.
#..#.
#..#.
###..
#....
#....
.....
P*/ 0x27,0xa5,0x74,0x42,0x00,
/*
.##..
#..#.
#..#.
#..#.
#..#.
#..#.
.##..
..#..
Q*/ 0x26,0xa5,0x94,0x92,0x21,
/*
##...
#.#..
#.#..
##...
##...
#.#..
#.#..
.....
R*/ 0xa3,0x94,0x31,0x4a,0x01,
/*
###..
#....
#....
.#...
..#..
..#..
###..
.....
S*/ 0x27,0x04,0x41,0xc8,0x01,
/*
#####
..#..
..#..
..#..
..#..
..#..
..#..
.....
T*/ 0x9f,0x10,0x42,0x08,0x01,
/*
#..#.
#..#.
#..#.
#..#.
#..#.
#..#.
.##..
.....
U*/ 0x29,0xa5,0x94,0x92,0x01,
/*
#.#..
#.#..
#.#..
#.#..
#.#..
.#...
.#...
.....
V*/ 0xa5,0x94,0x52,0x84,0x00,
/*
#.#.#
#.#.#
###.#
##.##
.#.#.
.#.#.
.#.#.
.....
W*/ 0xb5,0xde,0xad,0x94,0x02,
/*
.#.#.
.#.#.
..#..
..#..
..#..
.#.#.
.#.#.
.....
X*/ 0x4a,0x11,0x42,0x94,0x02,
/*
#.#..
#.#..
#.#..
.#...
.#...
.#...
.#...
.....
Y*/ 0xa5,0x14,0x21,0x84,0x00,
/*
####.
...#.
..#..
.#...
.#...
#....
####.
.....
Z*/ 0x0f,0x11,0x21,0xc2,0x03,
/*
###..
#....
#....
#....
#....
#....
#....
###..
[*/ 0x27,0x84,0x10,0x42,0x38,
/*
#....
#....
.#...
.#...
..#..
...#.
.....
.....
\*/ 0x21,0x08,0x41,0x10,0x00,
/*
###..
..#..
..#..
..#..
..#..
..#..
..#..
###..
]*/ 0x87,0x10,0x42,0x08,0x39,
/*
.#...
.##..
.##..
#..#.
.....
.....
.....
.....
^*/ 0xc2,0x98,0x04,0x00,0x00,
/*
####.
.....
.....
.....
.....
.....
.....
.....
_*/ 0x0f,0x00,0x00,0x00,0x00,
/*
.#...
.....
.....
.....
.....
.....
.....
.....
`*/ 0x02,0x00,0x00,0x00,0x00,
/*
.####
#...#
#...#
#...#
.####
.....
.....
.....
a*/ 0x3e,0xc6,0xe8,0x01,0x00,
/*
#....
#....
####.
#...#
#...#
#...#
####.
.....
b*/ 0x21,0xbc,0x18,0xe3,0x03,
/*
.###.
#....
#....
#....
.###.
.....
.....
.....
c*/ 0x2e,0x84,0xe0,0x00,0x00,
/*
...#.
...#.
.###.
#..#.
#..#.
#..#.
.###.
.....
d*/ 0x08,0xb9,0x94,0x92,0x03,
/*
.##..
#..#.
####.
#....
.###.
.....
.....
.....
e*/ 0x26,0xbd,0xe0,0x00,0x00,
/*
..###
.#...
####.
.#...
.#...
.#...
.#...
.....
f*/ 0x5c,0x3c,0x21,0x84,0x00,
/*
.###.
#..#.
#..#.
#..#.
.###.
...#.
###..
.....
g*/ 0x2e,0xa5,0xe4,0xd0,0x01,
/*
#....
#....
###..
#..#.
#..#.
#..#.
#..#.
.....
h*/ 0x21,0x9c,0x94,0x52,0x02,
/*
.#...
.....
##...
.#...
.#...
.#...
.##..
.....
i*/ 0x02,0x0c,0x21,0x84,0x01,
/*
..#..
.....
###..
..#..
..#..
..#..
..#..
..#..
j*/ 0x04,0x1c,0x42,0x08,0x21,
/*
#....
#....
#.#..
##...
##...
#.#..
#..#.
.....
k*/ 0x21,0x94,0x31,0x4a,0x02,
/*
##...
.#...
.#...
.#...
.#...
.#...
.##..
.....
l*/ 0x43,0x08,0x21,0x84,0x01,
/*
#####
#.#.#
#.#.#
#.#.#
#.#.#
.....
.....
.....
m*/ 0xbf,0xd6,0x5a,0x01,0x00,
/*
###..
#..#.
#..#.
#..#.
#..#.
.....
.....
.....
n*/ 0x27,0xa5,0x94,0x00,0x00,
/*
.##..
#..#.
#..#.
#..#.
.##..
.....
.....
.....
o*/ 0x26,0xa5,0x64,0x00,0x00,
/*
####.
#...#
#...#
#...#
####.
#....
#....
.....
p*/ 0x2f,0xc6,0xf8,0x42,0x00,
/*
.###.
#..#.
#..#.
#..#.
.###.
...#.
...#.
.....
q*/ 0x2e,0xa5,0xe4,0x10,0x02,
/*
###..
#.#..
#....
#....
#....
.....
.....
.....
r*/ 0xa7,0x84,0x10,0x00,0x00,
/*
.###.
#..#.
.###.
#..#.
###..
.....
.....
.....
s*/ 0x2e,0xb9,0x74,0x00,0x00,
/*
.#...
.#...
####.
.#...
.#...
..##.
.....
.....
t*/ 0x42,0x3c,0x21,0x18,0x00,
/*
#..#.
#..#.
#..#.
#..#.
.###.
.....
.....
.....
u*/ 0x29,0xa5,0xe4,0x00,0x00,
/*
#.#..
#.#..
#.#..
###..
.#...
.....
.....
.....
v*/ 0xa5,0x94,0x23,0x00,0x00,
/*
#.#.#
###.#
###.#
####.
.#.#.
.....
.....
.....
w*/ 0xf5,0xde,0xa7,0x00,0x00,
/*
.#.#.
.##..
..#..
.#.#.
.#.#.
.....
.....
.....
x*/ 0xca,0x10,0xa5,0x00,0x00,
/*
.#.#.
.#.#.
.#.#.
..##.
..#..
..#..
##...
.....
y*/ 0x4a,0x29,0x46,0xc8,0x00,
/*
####.
..#..
.#...
#....
####.
.....
.....
.....
z*/ 0x8f,0x88,0xf0,0x00,0x00,
/*
.##..
.#...
.#...
.#...
#....
.#...
.#...
.##..
{*/ 0x46,0x08,0x11,0x84,0x30,
/*
#....
#....
#....
#....
#....
#....
.....
.....
|*/ 0x21,0x84,0x10,0x02,0x00,
/*
##...
.#...
.#...
.#...
..#..
.#...
.#...
##...
}*/ 0x43,0x08,0x41,0x84,0x18,
/*
#####
.....
.....
.....
.....
.....
.....
.....
~*/ 0x1f,0x00,0x00,0x00,0x00,
};
static uint8_t monaco_font_outline[127*MONACO_BYTES_PER_CHAR] = {};
static const int8_t monaco_height_adjust[] = {
6,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,1,5,3,5,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1,1,1,2,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,6,0,1,-1,1,-1,1,-1,1,-1,-1,-1,-1,-1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,-1,-1,-1,4,
};
#define RGB565(r, g, b) (((r) << 11) | ((g) << 5) | (b))
void DrawText(uint16_t *framebuffer, int framebufferWidth, int framebufferStrideBytes, int framebufferHeight, const char *text, int x, int y, uint16_t color, uint16_t bgColor);

16
usr/fbcp-ili9341/tick.h Normal file
View File

@ -0,0 +1,16 @@
#pragma once
#ifndef KERNEL_MODULE
#include <inttypes.h>
#include <unistd.h>
// Initialized in spi.cpp along with the rest of the BCM2835 peripheral:
extern volatile uint64_t *systemTimerRegister;
#define tick() (*systemTimerRegister)
#endif
#ifdef NO_THROTTLING
#define usleep(x) ((void)0)
#endif

View File

@ -0,0 +1,17 @@
#pragma once
#ifdef TONTEC_MZ61581
#if !defined(GPIO_TFT_DATA_CONTROL)
#define GPIO_TFT_DATA_CONTROL 25
#endif
#if !defined(GPIO_TFT_RESET_PIN)
#define GPIO_TFT_RESET_PIN 15
#endif
#if !defined(GPIO_TFT_BACKLIGHT)
#define GPIO_TFT_BACKLIGHT 18
#endif
#endif

40
usr/fbcp-ili9341/util.h Normal file
View File

@ -0,0 +1,40 @@
#pragma once
#define ROUND_TO_NEAREST_INT(x) ((int)lround((x)))
#define ROUND_TO_FLOOR_INT(x) ((int)(floor((x))))
#define ROUND_TO_CEIL_INT(x) ((int)(ceil((x))))
#define MIN(x, y) ((x) <= (y) ? (x) : (y))
#define MAX(x, y) ((x) >= (y) ? (x) : (y))
#define ABS(x) ((x) < 0 ? (-(x)) : (x))
#define SWAPU32(x, y) { uint32_t tmp = x; x = y; y = tmp; }
#ifndef ALIGN_DOWN
#define ALIGN_DOWN(ptr, alignment) (((ptr)) & ~((alignment)-1))
#endif
#ifndef ALIGN_UP
#define ALIGN_UP(ptr, alignment) (((ptr) + ((alignment)-1)) & ~((alignment)-1))
#endif
#ifdef KERNEL_MODULE
#define LOG(...) do { printk(KERN_INFO __VA_ARGS__); } while(0)
#define FATAL_ERROR(msg) do { pr_alert(msg "\n"); return -1; } while(0)
#else
#define LOG(...) do { printf(__VA_ARGS__); printf("\n"); } while(0)
#define FATAL_ERROR(msg) do { fprintf(stderr, "%s\n", msg); syslog(LOG_ERR, msg); exit(1); } while(0)
#endif
#ifdef KERNEL_MODULE
#define PRINT_FLAG_2(flag_str, flag, shift) printk(KERN_INFO flag_str ": %x", (reg & flag) >> shift)
#else
#define PRINT_FLAG_2(flag_str, flag, shift) printf(flag_str ": %x\n", (reg & flag) >> shift)
#endif
#define PRINT_FLAG(flag) PRINT_FLAG_2(#flag, flag, flag##_SHIFT)
#ifndef KERNEL_MODULE
#define cpu_relax() asm volatile("yield" ::: "memory")
#endif

View File

@ -0,0 +1,26 @@
#pragma once
// Data specific to the Waveshare35b display
#ifdef WAVESHARE35B_ILI9486
// SPI_BUS_CLOCK_DIVISOR specifies how fast to communicate the SPI bus at. Possible values
// are 4, 6, 8, 10, 12, ... Smaller values are faster. On my Waveshare35b display, the
// following values were observed to work (on a Pi 3B):
// core_freq=400: CDIV=14, results in 28.57MHz
// core_freq=255: CDIV=8, results in 31.875MHz
// While the following values were seen to not work:
// core_freq=400: CDIV=12, would result in 33.33MHz, but this was too fast for the display
// core_freq=256: CDIV=8, would result in 32.00MHz, this would work 99% of the time, but occassionally every ~few minutes would glitch a pixel or two
#if !defined(GPIO_TFT_DATA_CONTROL)
#define GPIO_TFT_DATA_CONTROL 24
#endif
#if !defined(GPIO_TFT_RESET_PIN)
#define GPIO_TFT_RESET_PIN 25
#endif
#endif

View File

@ -0,0 +1,20 @@
#pragma once
// Data specific to WaveShare 128x128, 1.44inch LCD ST7735S hat, https://www.waveshare.com/1.44inch-lcd-hat.htm
#ifdef WAVESHARE_ST7735S_HAT
#if !defined(GPIO_TFT_DATA_CONTROL)
#define GPIO_TFT_DATA_CONTROL 25
#endif
#if !defined(GPIO_TFT_BACKLIGHT)
#define GPIO_TFT_BACKLIGHT 24
#endif
#if !defined(GPIO_TFT_RESET_PIN)
#define GPIO_TFT_RESET_PIN 27
#endif
#define DISPLAY_SHOULD_FLIP_ORIENTATION
#endif

View File

@ -0,0 +1,18 @@
#pragma once
// Data specific to WaveShare 240x240, 1.3inch IPS LCD ST7789VW hat, https://www.waveshare.com/w/upload/a/ae/ST7789_Datasheet.pdf
#ifdef WAVESHARE_ST7789VW_HAT
#if !defined(GPIO_TFT_DATA_CONTROL)
#define GPIO_TFT_DATA_CONTROL 25
#endif
#if !defined(GPIO_TFT_BACKLIGHT)
#define GPIO_TFT_BACKLIGHT 24
#endif
#if !defined(GPIO_TFT_RESET_PIN)
#define GPIO_TFT_RESET_PIN 27
#endif
#endif

Binary file not shown.

After

Width:  |  Height:  |  Size: 138 KiB

3
usr/local.desktop Normal file
View File

@ -0,0 +1,3 @@
[Desktop Entry]
Type=Application
Exec=python /home/pi/Mouse_Key.py