Unverified Commit 12995820 authored by Lucas Saavedra Vaz's avatar Lucas Saavedra Vaz Committed by GitHub

ci(performance): Add performance tests to CI (#9560)

* ci(performance): Add performance tests to CI

* ci(req): Fix requirements

* ci(pre-commit): Apply automatic fixes

* ci(pre-commit): Increase maximum allowed complexity for python

---------
Co-authored-by: default avatarpre-commit-ci-lite[bot] <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com>
Co-authored-by: default avatarJan Procházka <90197375+P-R-O-C-H-Y@users.noreply.github.com>
parent c3968346
# Source: https://github.com/arduino/tooling-project-assets/blob/main/workflow-templates/assets/check-python/.flake8
# See: https://flake8.pycqa.org/en/latest/user/configuration.html
# The code style defined in this file is the official standardized style to be used in all Arduino tooling projects and
# should not be modified.
[flake8]
doctests = True
# W503 and W504 are mutually exclusive. PEP 8 recommends line break before.
ignore = W503,E203
max-complexity = 10
max-complexity = 20
max-line-length = 120
select = E,W,F,C,N
......@@ -121,7 +121,7 @@ function build_sketch(){ # build_sketch <ide_path> <user_path> <path-to-ino> [ex
fi
if [ -z "$fqbn" ]; then
echo "No FQBN passed or unvalid chip: $target"
echo "No FQBN passed or invalid chip: $target"
exit 1
fi
......@@ -139,7 +139,7 @@ function build_sketch(){ # build_sketch <ide_path> <user_path> <path-to-ino> [ex
echo "Skipping $sketchname for target $target"
exit 0
fi
ARDUINO_CACHE_DIR="$HOME/.arduino/cache.tmp"
if [ -n "$ARDUINO_BUILD_DIR" ]; then
build_dir="$ARDUINO_BUILD_DIR"
......@@ -177,7 +177,7 @@ function build_sketch(){ # build_sketch <ide_path> <user_path> <path-to-ino> [ex
--build-path "$build_dir" \
$xtra_opts "${sketchdir}" \
> $output_file
exit_status=$?
if [ $exit_status -ne 0 ]; then
echo ""ERROR: Compilation failed with error code $exit_status""
......@@ -198,11 +198,11 @@ function build_sketch(){ # build_sketch <ide_path> <user_path> <path-to-ino> [ex
# Extract the desired substring using sed
lib_sketch_name=$(echo "$directory_path" | sed "s|$constant_part||")
#append json file where key is fqbn, sketch name, sizes -> extracted values
echo "{\"name\": \"$lib_sketch_name\",
echo "{\"name\": \"$lib_sketch_name\",
\"sizes\": [{
\"flash_bytes\": $flash_bytes,
\"flash_percentage\": $flash_percentage,
\"ram_bytes\": $ram_bytes,
\"flash_bytes\": $flash_bytes,
\"flash_percentage\": $flash_percentage,
\"ram_bytes\": $ram_bytes,
\"ram_percentage\": $ram_percentage
}]
}," >> "$sizes_file"
......@@ -365,6 +365,7 @@ function build_sketches(){ # build_sketches <ide_path> <user_path> <target> <pat
start_index=$(( $chunk_index * $chunk_size ))
if [ "$sketchcount" -le "$start_index" ]; then
echo "Skipping job"
touch ~/.build_skipped
return 0
fi
......@@ -386,7 +387,7 @@ function build_sketches(){ # build_sketches <ide_path> <user_path> <target> <pat
if [ $log_compilation ]; then
#echo board,target and start of sketches to sizes_file json
echo "{ \"board\": \"$fqbn\",
\"target\": \"$target\",
\"target\": \"$target\",
\"sketches\": [" >> "$sizes_file"
fi
......
......@@ -2,8 +2,8 @@
USAGE="
USAGE:
${0} -c <chunk_build_opts>
Example: ${0} -c -t esp32 -i 0 -m 15
${0} -c -type <test_type> <chunk_build_opts>
Example: ${0} -c -type validation -t esp32 -i 0 -m 15
${0} -s sketch_name <build_opts>
Example: ${0} -s hello_world -t esp32
${0} -clean
......@@ -11,10 +11,11 @@ USAGE:
"
function clean(){
rm -rf tests/*/build*/
rm -rf tests/.pytest_cache
rm -rf tests/*/__pycache__/
rm -rf tests/*/*.xml
find tests/ -type d -name 'build*' -exec rm -rf "{}" \+
find tests/ -type d -name '__pycache__' -exec rm -rf "{}" \+
find tests/ -name '*.xml' -exec rm -rf "{}" \+
find tests/ -name 'result_*.json' -exec rm -rf "{}" \+
}
SCRIPTS_DIR="./.github/scripts"
......@@ -35,6 +36,10 @@ while [ ! -z "$1" ]; do
echo "$USAGE"
exit 0
;;
-type )
shift
test_type=$1
;;
-clean )
clean
exit 0
......@@ -52,12 +57,25 @@ source ${SCRIPTS_DIR}/install-arduino-core-esp32.sh
args="-ai $ARDUINO_IDE_PATH -au $ARDUINO_USR_PATH"
if [[ $test_type == "all" ]] || [[ -z $test_type ]]; then
if [ -n "$sketch" ]; then
tmp_sketch_path=$(find tests -name $sketch.ino)
test_type=$(basename $(dirname $(dirname "$tmp_sketch_path")))
echo "Sketch $sketch test type: $test_type"
test_folder="$PWD/tests/$test_type"
else
test_folder="$PWD/tests"
fi
else
test_folder="$PWD/tests/$test_type"
fi
if [ $chunk_build -eq 1 ]; then
BUILD_CMD="${SCRIPTS_DIR}/sketch_utils.sh chunk_build"
args+=" -p $PWD/tests"
args+=" -p $test_folder"
else
BUILD_CMD="${SCRIPTS_DIR}/sketch_utils.sh build"
args+=" -s $PWD/tests/$sketch"
args+=" -s $test_folder/$sketch"
fi
${BUILD_CMD} ${args} $*
......
......@@ -15,9 +15,9 @@ function run_test() {
fi
if [ $len -eq 1 ]; then
# build_dir="tests/$sketchname/build"
# build_dir="$sketchdir/build"
build_dir="$HOME/.arduino/tests/$sketchname/build.tmp"
report_file="tests/$sketchname/$sketchname.xml"
report_file="$sketchdir/$sketchname.xml"
fi
for i in `seq 0 $(($len - 1))`
......@@ -28,9 +28,9 @@ function run_test() {
fi
if [ $len -ne 1 ]; then
# build_dir="tests/$sketchname/build$i"
# build_dir="$sketchdir/build$i"
build_dir="$HOME/.arduino/tests/$sketchname/build$i.tmp"
report_file="tests/$sketchname/$sketchname$i.xml"
report_file="$sketchdir/$sketchname$i.xml"
fi
pytest tests --build-dir $build_dir -k test_$sketchname --junit-xml=$report_file
......@@ -79,6 +79,10 @@ while [ ! -z "$1" ]; do
echo "$USAGE"
exit 0
;;
-type )
shift
test_type=$1
;;
* )
break
;;
......@@ -88,21 +92,39 @@ done
source ${SCRIPTS_DIR}/install-arduino-ide.sh
# If sketch is provided and test type is not, test type is inferred from the sketch path
if [[ $test_type == "all" ]] || [[ -z $test_type ]]; then
if [ -n "$sketch" ]; then
tmp_sketch_path=$(find tests -name $sketch.ino)
test_type=$(basename $(dirname $(dirname "$tmp_sketch_path")))
echo "Sketch $sketch test type: $test_type"
test_folder="$PWD/tests/$test_type"
else
test_folder="$PWD/tests"
fi
else
test_folder="$PWD/tests/$test_type"
fi
if [ $chunk_run -eq 0 ]; then
run_test $target $PWD/tests/$sketch/$sketch.ino $options $erase
if [ -z $sketch ]; then
echo "ERROR: Sketch name is required for single test run"
exit 1
fi
run_test $target $test_folder/$sketch/$sketch.ino $options $erase
else
if [ "$chunk_max" -le 0 ]; then
echo "ERROR: Chunks count must be positive number"
return 1
exit 1
fi
if [ "$chunk_index" -ge "$chunk_max" ] && [ "$chunk_max" -ge 2 ]; then
echo "ERROR: Chunk index must be less than chunks count"
return 1
exit 1
fi
set +e
${COUNT_SKETCHES} $PWD/tests $target
${COUNT_SKETCHES} $test_folder $target
sketchcount=$?
set -e
sketches=$(cat sketches.txt)
......@@ -123,7 +145,8 @@ else
start_index=$(( $chunk_index * $chunk_size ))
if [ "$sketchcount" -le "$start_index" ]; then
echo "Skipping job"
return 0
touch ~/.test_skipped
exit 0
fi
end_index=$(( $(( $chunk_index + 1 )) * $chunk_size ))
......
......@@ -18,11 +18,14 @@ jobs:
gen_chunks:
if: |
contains(github.event.pull_request.labels.*.name, 'hil_test') ||
contains(github.event.pull_request.labels.*.name, 'perf_test') ||
(github.event_name == 'schedule' && github.repository == 'espressif/arduino-esp32')
name: Generate Chunks matrix
runs-on: ubuntu-latest
outputs:
chunks: ${{ steps.gen-chunks.outputs.chunks }}
test_folder: ${{ steps.gen-chunks.outputs.test_folder }}
test_type: ${{ steps.gen-chunks.outputs.test_type }}
steps:
- name: Checkout Repository
uses: actions/checkout@v4
......@@ -31,7 +34,19 @@ jobs:
id: gen-chunks
run: |
set +e
.github/scripts/sketch_utils.sh count tests
if [ "${{contains(github.event.pull_request.labels.*.name, 'hil_test')}}" == "true" ] && \
[ "${{contains(github.event.pull_request.labels.*.name, 'perf_test')}}" == "false" ]; then
test_folder="tests/validation"
test_type="validation"
elif [ "${{contains(github.event.pull_request.labels.*.name, 'hil_test')}}" == "false" ] && \
[ "${{contains(github.event.pull_request.labels.*.name, 'perf_test')}}" == "true" ]; then
test_folder="tests/performance"
test_type="performance"
else
test_folder="tests"
test_type="all"
fi
.github/scripts/sketch_utils.sh count $test_folder
sketches=$?
if [[ $sketches -ge ${{env.MAX_CHUNKS}} ]]; then
$sketches=${{env.MAX_CHUNKS}}
......@@ -39,7 +54,9 @@ jobs:
set -e
rm sketches.txt
CHUNKS=$(jq -c -n '$ARGS.positional' --args `seq 0 1 $((sketches - 1))`)
echo "chunks=${CHUNKS}" >>$GITHUB_OUTPUT
echo "chunks=${CHUNKS}" >> $GITHUB_OUTPUT
echo "test_folder=${test_folder}" >> $GITHUB_OUTPUT
echo "test_type=${test_type}" >> $GITHUB_OUTPUT
Build:
needs: gen_chunks
......@@ -52,17 +69,21 @@ jobs:
steps:
- name: Checkout Repository
uses: actions/checkout@v4
- name: Build sketches
run: |
bash .github/scripts/tests_build.sh -c -t ${{matrix.chip}} -i ${{matrix.chunks}} -m ${{env.MAX_CHUNKS}}
bash .github/scripts/tests_build.sh -c -type ${{ needs.gen_chunks.outputs.test_type }} -t ${{matrix.chip}} -i ${{matrix.chunks}} -m ${{env.MAX_CHUNKS}}
- name: Upload ${{matrix.chip}}-${{matrix.chunks}} artifacts
uses: actions/upload-artifact@v4
with:
name: ${{matrix.chip}}-${{matrix.chunks}}.artifacts
path: |
~/.arduino/tests/*/build*.tmp/*.bin
~/.arduino/tests/*/build*.tmp/*.json
if-no-files-found: error
path: |
~/.build_skipped
~/.arduino/tests/**/build*.tmp/*.bin
~/.arduino/tests/**/build*.tmp/*.json
Test:
needs: [gen_chunks, Build]
name: ${{matrix.chip}}-Test#${{matrix.chunks}}
......@@ -77,36 +98,49 @@ jobs:
options: --privileged
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Checkout repository
uses: actions/checkout@v4
- name: Download ${{matrix.chip}}-${{matrix.chunks}} artifacts
uses: actions/download-artifact@v4
with:
name: ${{matrix.chip}}-${{matrix.chunks}}.artifacts
path: ~/.arduino/tests/
- name: Download ${{matrix.chip}}-${{matrix.chunks}} artifacts
uses: actions/download-artifact@v4
with:
name: ${{matrix.chip}}-${{matrix.chunks}}.artifacts
path: ~/
- name: Install dependencies
run: |
pip install -U pip
pip install -r tests/requirements.txt --extra-index-url https://dl.espressif.com/pypi
apt update && apt install -y -qq jq
- name: Install dependencies
run: |
pip install -U pip
pip install -r tests/requirements.txt --extra-index-url https://dl.espressif.com/pypi
apt update && apt install -y -qq jq
- name: Run Tests
run: |
bash .github/scripts/tests_run.sh -c -t ${{matrix.chip}} -i ${{matrix.chunks}} -m ${{env.MAX_CHUNKS}} -e
- name: Run Tests
run: |
bash .github/scripts/tests_run.sh -c -type ${{ needs.gen_chunks.outputs.test_type }} -t ${{matrix.chip}} -i ${{matrix.chunks}} -m ${{env.MAX_CHUNKS}} -e
- name: Upload test result artifacts
uses: actions/upload-artifact@v4
if: always()
with:
name: test_results-${{matrix.chip}}-${{matrix.chunks}}
path: tests/*/*.xml
- name: Check if tests were skipped
id: check-test-skipped
run: |
if [ -f ~/.test_skipped ]; then
echo "skipped=true" >> $GITHUB_OUTPUT
else
echo "skipped=false" >> $GITHUB_OUTPUT
fi
- name: Upload test result artifacts
uses: actions/upload-artifact@v4
if: ${{ always() && steps.check-test-skipped.outputs.skipped == 'false' }}
with:
name: test_results-${{matrix.chip}}-${{matrix.chunks}}
if-no-files-found: error
path: |
tests/**/*.xml
tests/**/result_*.json
event_file:
name: "Event File"
if: |
contains(github.event.pull_request.labels.*.name, 'hil_test') ||
contains(github.event.pull_request.labels.*.name, 'perf_test') ||
github.event_name == 'schedule'
needs: Test
runs-on: ubuntu-latest
......
exclude: ".github/.*"
exclude: |
(?x)(
^\.github\/|
^tests\/performance\/coremark\/.*\.[ch]$
)
default_language_version:
# force all unspecified python hooks to run python3
......
build*/
__pycache__/
*.xml
result_*.json
This diff is collapsed.
This diff is collapsed.
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original Author: Shay Gal-on
*/
#include "coremark.h"
/*
Topic: Description
Matrix manipulation benchmark
This very simple algorithm forms the basis of many more complex algorithms.
The tight inner loop is the focus of many optimizations (compiler as well as hardware based)
and is thus relevant for embedded processing.
The total available data space will be divided to 3 parts:
NxN Matrix A - initialized with small values (upper 3/4 of the bits all zero).
NxN Matrix B - initialized with medium values (upper half of the bits all zero).
NxN Matrix C - used for the result.
The actual values for A and B must be derived based on input that is not available at compile time.
*/
ee_s16 matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val);
ee_s16 matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval);
void matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val);
void matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
void matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
void matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
void matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val);
#define matrix_test_next(x) (x+1)
#define matrix_clip(x,y) ((y) ? (x) & 0x0ff : (x) & 0x0ffff)
#define matrix_big(x) (0xf000 | (x))
#define bit_extract(x,from,to) (((x)>>(from)) & (~(0xffffffff << (to))))
#if CORE_DEBUG
void printmat(MATDAT *A, ee_u32 N, char *name) {
ee_u32 i,j;
ee_printf("Matrix %s [%dx%d]:\n",name,N,N);
for (i=0; i<N; i++) {
for (j=0; j<N; j++) {
if (j!=0)
ee_printf(",");
ee_printf("%d",A[i*N+j]);
}
ee_printf("\n");
}
}
void printmatC(MATRES *C, ee_u32 N, char *name) {
ee_u32 i,j;
ee_printf("Matrix %s [%dx%d]:\n",name,N,N);
for (i=0; i<N; i++) {
for (j=0; j<N; j++) {
if (j!=0)
ee_printf(",");
ee_printf("%d",C[i*N+j]);
}
ee_printf("\n");
}
}
#endif
/* Function: core_bench_matrix
Benchmark function
Iterate <matrix_test> N times,
changing the matrix values slightly by a constant amount each time.
*/
ee_u16 core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc) {
ee_u32 N=p->N;
MATRES *C=p->C;
MATDAT *A=p->A;
MATDAT *B=p->B;
MATDAT val=(MATDAT)seed;
crc=crc16(matrix_test(N,C,A,B,val),crc);
return crc;
}
/* Function: matrix_test
Perform matrix manipulation.
Parameters:
N - Dimensions of the matrix.
C - memory for result matrix.
A - input matrix
B - operator matrix (not changed during operations)
Returns:
A CRC value that captures all results calculated in the function.
In particular, crc of the value calculated on the result matrix
after each step by <matrix_sum>.
Operation:
1 - Add a constant value to all elements of a matrix.
2 - Multiply a matrix by a constant.
3 - Multiply a matrix by a vector.
4 - Multiply a matrix by a matrix.
5 - Add a constant value to all elements of a matrix.
After the last step, matrix A is back to original contents.
*/
ee_s16 matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val) {
ee_u16 crc=0;
MATDAT clipval=matrix_big(val);
matrix_add_const(N,A,val); /* make sure data changes */
#if CORE_DEBUG
printmat(A,N,"matrix_add_const");
#endif
matrix_mul_const(N,C,A,val);
crc=crc16(matrix_sum(N,C,clipval),crc);
#if CORE_DEBUG
printmatC(C,N,"matrix_mul_const");
#endif
matrix_mul_vect(N,C,A,B);
crc=crc16(matrix_sum(N,C,clipval),crc);
#if CORE_DEBUG
printmatC(C,N,"matrix_mul_vect");
#endif
matrix_mul_matrix(N,C,A,B);
crc=crc16(matrix_sum(N,C,clipval),crc);
#if CORE_DEBUG
printmatC(C,N,"matrix_mul_matrix");
#endif
matrix_mul_matrix_bitextract(N,C,A,B);
crc=crc16(matrix_sum(N,C,clipval),crc);
#if CORE_DEBUG
printmatC(C,N,"matrix_mul_matrix_bitextract");
#endif
matrix_add_const(N,A,-val); /* return matrix to initial value */
return crc;
}
/* Function : matrix_init
Initialize the memory block for matrix benchmarking.
Parameters:
blksize - Size of memory to be initialized.
memblk - Pointer to memory block.
seed - Actual values chosen depend on the seed parameter.
p - pointers to <mat_params> containing initialized matrixes.
Returns:
Matrix dimensions.
Note:
The seed parameter MUST be supplied from a source that cannot be determined at compile time
*/
ee_u32 core_init_matrix(ee_u32 blksize, void *memblk, ee_s32 seed, mat_params *p) {
ee_u32 N=0;
MATDAT *A;
MATDAT *B;
ee_s32 order=1;
MATDAT val;
ee_u32 i=0,j=0;
if (seed==0)
seed=1;
while (j<blksize) {
i++;
j=i*i*2*4;
}
N=i-1;
A=(MATDAT *)align_mem(memblk);
B=A+N*N;
for (i=0; i<N; i++) {
for (j=0; j<N; j++) {
seed = ( ( order * seed ) % 65536 );
val = (seed + order);
val=matrix_clip(val,0);
B[i*N+j] = val;
val = (val + order);
val=matrix_clip(val,1);
A[i*N+j] = val;
order++;
}
}
p->A=A;
p->B=B;
p->C=(MATRES *)align_mem(B+N*N);
p->N=N;
#if CORE_DEBUG
printmat(A,N,"A");
printmat(B,N,"B");
#endif
return N;
}
/* Function: matrix_sum
Calculate a function that depends on the values of elements in the matrix.
For each element, accumulate into a temporary variable.
As long as this value is under the parameter clipval,
add 1 to the result if the element is bigger then the previous.
Otherwise, reset the accumulator and add 10 to the result.
*/
ee_s16 matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval) {
MATRES tmp=0,prev=0,cur=0;
ee_s16 ret=0;
ee_u32 i,j;
for (i=0; i<N; i++) {
for (j=0; j<N; j++) {
cur=C[i*N+j];
tmp+=cur;
if (tmp>clipval) {
ret+=10;
tmp=0;
} else {
ret += (cur>prev) ? 1 : 0;
}
prev=cur;
}
}
return ret;
}
/* Function: matrix_mul_const
Multiply a matrix by a constant.
This could be used as a scaler for instance.
*/
void matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val) {
ee_u32 i,j;
for (i=0; i<N; i++) {
for (j=0; j<N; j++) {
C[i*N+j]=(MATRES)A[i*N+j] * (MATRES)val;
}
}
}
/* Function: matrix_add_const
Add a constant value to all elements of a matrix.
*/
void matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val) {
ee_u32 i,j;
for (i=0; i<N; i++) {
for (j=0; j<N; j++) {
A[i*N+j] += val;
}
}
}
/* Function: matrix_mul_vect
Multiply a matrix by a vector.
This is common in many simple filters (e.g. fir where a vector of coefficients is applied to the matrix.)
*/
void matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B) {
ee_u32 i,j;
for (i=0; i<N; i++) {
C[i]=0;
for (j=0; j<N; j++) {
C[i]+=(MATRES)A[i*N+j] * (MATRES)B[j];
}
}
}
/* Function: matrix_mul_matrix
Multiply a matrix by a matrix.
Basic code is used in many algorithms, mostly with minor changes such as scaling.
*/
void matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B) {
ee_u32 i,j,k;
for (i=0; i<N; i++) {
for (j=0; j<N; j++) {
C[i*N+j]=0;
for(k=0;k<N;k++)
{
C[i*N+j]+=(MATRES)A[i*N+k] * (MATRES)B[k*N+j];
}
}
}
}
/* Function: matrix_mul_matrix_bitextract
Multiply a matrix by a matrix, and extract some bits from the result.
Basic code is used in many algorithms, mostly with minor changes such as scaling.
*/
void matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B) {
ee_u32 i,j,k;
for (i=0; i<N; i++) {
for (j=0; j<N; j++) {
C[i*N+j]=0;
for(k=0;k<N;k++)
{
MATRES tmp=(MATRES)A[i*N+k] * (MATRES)B[k*N+j];
C[i*N+j]+=bit_extract(tmp,2,4)*bit_extract(tmp,5,7);
}
}
}
}
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original Author: Shay Gal-on
*/
#include "coremark.h"
#include "core_portme.h"
extern uint32_t Arduino_millis();
#if (MULTITHREAD > 1)
static uint8_t next_core = 0;
#endif
#if VALIDATION_RUN
volatile ee_s32 seed1_volatile=0x3415;
volatile ee_s32 seed2_volatile=0x3415;
volatile ee_s32 seed3_volatile=0x66;
#endif
#if PERFORMANCE_RUN
volatile ee_s32 seed1_volatile=0x0;
volatile ee_s32 seed2_volatile=0x0;
volatile ee_s32 seed3_volatile=0x66;
#endif
#if PROFILE_RUN
volatile ee_s32 seed1_volatile=0x8;
volatile ee_s32 seed2_volatile=0x8;
volatile ee_s32 seed3_volatile=0x8;
#endif
volatile ee_s32 seed4_volatile=ITERATIONS;
volatile ee_s32 seed5_volatile=0;
/* Porting : Timing functions
How to capture time and convert to seconds must be ported to whatever is supported by the platform.
e.g. Read value from on board RTC, read value from cpu clock cycles performance counter etc.
Sample implementation for standard time.h and windows.h definitions included.
*/
CORETIMETYPE barebones_clock() {
return Arduino_millis();
}
/* Define : TIMER_RES_DIVIDER
Divider to trade off timer resolution and total time that can be measured.
Use lower values to increase resolution, but make sure that overflow does not occur.
If there are issues with the return value overflowing, increase this value.
*/
#define CLOCKS_PER_SEC 1000.0
#define TIMER_RES_DIVIDER 1
#define GETMYTIME(_t) (*_t=barebones_clock())
#define MYTIMEDIFF(fin,ini) ((fin)-(ini))
#define TIMER_RES_DIVIDER 1
#define SAMPLE_TIME_IMPLEMENTATION 1
#define EE_TICKS_PER_SEC (CLOCKS_PER_SEC / TIMER_RES_DIVIDER)
/** Define Host specific (POSIX), or target specific global time variables. */
static CORETIMETYPE start_time_val, stop_time_val;
/* Function : start_time
This function will be called right before starting the timed portion of the benchmark.
Implementation may be capturing a system timer (as implemented in the example code)
or zeroing some system parameters - e.g. setting the cpu clocks cycles to 0.
*/
void start_time(void) {
GETMYTIME(&start_time_val );
}
/* Function : stop_time
This function will be called right after ending the timed portion of the benchmark.
Implementation may be capturing a system timer (as implemented in the example code)
or other system parameters - e.g. reading the current value of cpu cycles counter.
*/
void stop_time(void) {
GETMYTIME(&stop_time_val );
}
/* Function : get_time
Return an abstract "ticks" number that signifies time on the system.
Actual value returned may be cpu cycles, milliseconds or any other value,
as long as it can be converted to seconds by <time_in_secs>.
This methodology is taken to accomodate any hardware or simulated platform.
The sample implementation returns millisecs by default,
and the resolution is controlled by <TIMER_RES_DIVIDER>
*/
CORE_TICKS get_time(void) {
CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
return elapsed;
}
/* Function : time_in_secs
Convert the value returned by get_time to seconds.
The <secs_ret> type is used to accomodate systems with no support for floating point.
Default implementation implemented by the EE_TICKS_PER_SEC macro above.
*/
secs_ret time_in_secs(CORE_TICKS ticks) {
secs_ret retval=((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC;
return retval;
}
ee_u32 default_num_contexts = MULTITHREAD;
/* Function : portable_init
Target specific initialization code
Test for some common mistakes.
*/
void portable_init(core_portable *p, int *argc, char *argv[])
{
// Serial.begin(9600);
// #error "Call board initialization routines in portable init (if needed), in particular initialize UART!\n"
if (sizeof(ee_ptr_int) != sizeof(ee_u8 *)) {
ee_printf("ERROR! Please define ee_ptr_int to a type that holds a pointer!\n");
}
if (sizeof(ee_u32) != 4) {
ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n");
}
p->portable_id=1;
}
/* Function : portable_fini
Target specific final code
*/
void portable_fini(core_portable *p)
{
p->portable_id=0;
}
void iterate_task(void *arg)
{
iterate(arg);
vTaskDelete(NULL);
}
#if (MULTITHREAD > 1)
ee_u8 core_start_parallel(core_results *res)
{
int ret;
res->port.task = NULL;
ret = xTaskCreatePinnedToCore(iterate_task, /* Function to implement the task */
"CoreMarkTask", /* Name of the task */
10000, /* Stack size in words */
(void *)res, /* Task input parameter */
20, /* Priority of the task */
&(res->port.task), /* Task handle */
next_core); /* Core where the task should run */
next_core = (next_core + 1) % MULTITHREAD;
return (ee_u8) ret;
}
ee_u8 core_stop_parallel(core_results *res)
{
while (eTaskGetState(res->port.task) != eDeleted);
res->port.task = NULL;
return 0;
}
#endif
#include "Arduino.h"
#include <stdint.h>
#include <stdio.h>
// a minor hack to rename the main function, so we can call it from C++
#define main(ignore) coremark_main(void)
#define FLAGS_STR "(flags unknown)"
#define PERFORMANCE_RUN 1
// 0 means auto-detect number of iterations for 10 second test
#define ITERATIONS 0
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original Author: Shay Gal-on
*/
/* Topic : Description
This file contains configuration constants required to execute on different platforms
*/
#ifndef CORE_PORTME_H
#define CORE_PORTME_H
/************************/
/* Data types and settings */
/************************/
/* Configuration : HAS_FLOAT
Define to 1 if the platform supports floating point.
*/
#ifndef HAS_FLOAT
#define HAS_FLOAT 1
#endif
/* Configuration : HAS_TIME_H
Define to 1 if platform has the time.h header file,
and implementation of functions thereof.
*/
#ifndef HAS_TIME_H
#define HAS_TIME_H 0
#endif
/* Configuration : USE_CLOCK
Define to 1 if platform has the time.h header file,
and implementation of functions thereof.
*/
#ifndef USE_CLOCK
#define USE_CLOCK 0
#endif
/* Configuration : HAS_STDIO
Define to 1 if the platform has stdio.h.
*/
#ifndef HAS_STDIO
#define HAS_STDIO 1
#endif
/* Configuration : HAS_PRINTF
Define to 1 if the platform has stdio.h and implements the printf function.
*/
#ifndef HAS_PRINTF
#define HAS_PRINTF 0
#endif
/* Definitions : COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION
Initialize these strings per platform
*/
#ifndef COMPILER_VERSION
#ifdef __GNUC__
#define COMPILER_VERSION "GCC"__VERSION__
#else
#define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)"
#endif
#endif
#ifndef COMPILER_FLAGS
#define COMPILER_FLAGS FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */
#endif
#ifndef MEM_LOCATION
#define MEM_LOCATION "STACK"
#endif
/* Data Types :
To avoid compiler issues, define the data types that need ot be used for 8b, 16b and 32b in <core_portme.h>.
*Imprtant* :
ee_ptr_int needs to be the data type used to hold pointers, otherwise coremark may fail!!!
*/
typedef int16_t ee_s16;
typedef uint16_t ee_u16;
typedef int32_t ee_s32;
typedef double ee_f32;
typedef uint8_t ee_u8;
typedef uint32_t ee_u32;
typedef uintptr_t ee_ptr_int;
typedef size_t ee_size_t;
#define NULL ((void *)0)
/* align_mem :
This macro is used to align an offset to point to a 32b value. It is used in the Matrix algorithm to initialize the input memory blocks.
*/
#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x) - 1) & ~3))
/* Configuration : CORE_TICKS
Define type of return from the timing functions.
*/
#define CORETIMETYPE ee_u32
typedef ee_u32 CORE_TICKS;
/* Configuration : SEED_METHOD
Defines method to get seed values that cannot be computed at compile time.
Valid values :
SEED_ARG - from command line.
SEED_FUNC - from a system function.
SEED_VOLATILE - from volatile variables.
*/
#ifndef SEED_METHOD
#define SEED_METHOD SEED_VOLATILE
#endif
/* Configuration : MEM_METHOD
Defines method to get a block of memry.
Valid values :
MEM_MALLOC - for platforms that implement malloc and have malloc.h.
MEM_STATIC - to use a static memory array.
MEM_STACK - to allocate the data block on the stack (NYI).
*/
#ifndef MEM_METHOD
#define MEM_METHOD MEM_STACK
#endif
/* Configuration : MULTITHREAD
Define for parallel execution
Valid values :
1 - only one context (default).
N>1 - will execute N copies in parallel.
Note :
If this flag is defined to more then 1, an implementation for launching parallel contexts must be defined.
Two sample implementations are provided. Use <USE_PTHREAD> or <USE_FORK> to enable them.
It is valid to have a different implementation of <core_start_parallel> and <core_end_parallel> in <core_portme.c>,
to fit a particular architecture.
*/
#ifndef MULTITHREAD
#define MULTITHREAD CONFIG_SOC_CPU_CORES_NUM
#define PARALLEL_METHOD "FreeRTOS"
#define USE_PTHREAD 0
#define USE_FORK 0
#define USE_SOCKET 0
#endif
/* Configuration : MAIN_HAS_NOARGC
Needed if platform does not support getting arguments to main.
Valid values :
0 - argc/argv to main is supported
1 - argc/argv to main is not supported
Note :
This flag only matters if MULTITHREAD has been defined to a value greater then 1.
*/
#ifndef MAIN_HAS_NOARGC
#define MAIN_HAS_NOARGC 1
#endif
/* Configuration : MAIN_HAS_NORETURN
Needed if platform does not support returning a value from main.
Valid values :
0 - main returns an int, and return value will be 0.
1 - platform does not support returning a value from main
*/
#ifndef MAIN_HAS_NORETURN
#define MAIN_HAS_NORETURN 0
#endif
/* Variable : default_num_contexts
Not used for this simple port, must cintain the value 1.
*/
extern ee_u32 default_num_contexts;
typedef struct CORE_PORTABLE_S {
#if (MULTITHREAD > 1)
TaskHandle_t task;
#endif
ee_u8 portable_id;
} core_portable;
/* target specific init/fini */
void portable_init(core_portable *p, int *argc, char *argv[]);
void portable_fini(core_portable *p);
#if !defined(PROFILE_RUN) && !defined(PERFORMANCE_RUN) && !defined(VALIDATION_RUN)
#if (TOTAL_DATA_SIZE==1200)
#define PROFILE_RUN 1
#elif (TOTAL_DATA_SIZE==2000)
#define PERFORMANCE_RUN 1
#else
#define VALIDATION_RUN 1
#endif
#endif
int ee_printf(const char *fmt, ...);
#endif /* CORE_PORTME_H */
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original Author: Shay Gal-on
*/
#include "coremark.h"
/* local functions */
enum CORE_STATE core_state_transition( ee_u8 **instr , ee_u32 *transition_count);
/*
Topic: Description
Simple state machines like this one are used in many embedded products.
For more complex state machines, sometimes a state transition table implementation is used instead,
trading speed of direct coding for ease of maintenance.
Since the main goal of using a state machine in CoreMark is to excercise the switch/if behaviour,
we are using a small moore machine.
In particular, this machine tests type of string input,
trying to determine whether the input is a number or something else.
(see core_state.png).
*/
/* Function: core_bench_state
Benchmark function
Go over the input twice, once direct, and once after introducing some corruption.
*/
ee_u16 core_bench_state(ee_u32 blksize, ee_u8 *memblock,
ee_s16 seed1, ee_s16 seed2, ee_s16 step, ee_u16 crc)
{
ee_u32 final_counts[NUM_CORE_STATES];
ee_u32 track_counts[NUM_CORE_STATES];
ee_u8 *p=memblock;
ee_u32 i;
#if CORE_DEBUG
ee_printf("State Bench: %d,%d,%d,%04x\n",seed1,seed2,step,crc);
#endif
for (i=0; i<NUM_CORE_STATES; i++) {
final_counts[i]=track_counts[i]=0;
}
/* run the state machine over the input */
while (*p!=0) {
enum CORE_STATE fstate=core_state_transition(&p,track_counts);
final_counts[fstate]++;
#if CORE_DEBUG
ee_printf("%d,",fstate);
}
ee_printf("\n");
#else
}
#endif
p=memblock;
while (p < (memblock+blksize)) { /* insert some corruption */
if (*p!=',')
*p^=(ee_u8)seed1;
p+=step;
}
p=memblock;
/* run the state machine over the input again */
while (*p!=0) {
enum CORE_STATE fstate=core_state_transition(&p,track_counts);
final_counts[fstate]++;
#if CORE_DEBUG
ee_printf("%d,",fstate);
}
ee_printf("\n");
#else
}
#endif
p=memblock;
while (p < (memblock+blksize)) { /* undo corruption is seed1 and seed2 are equal */
if (*p!=',')
*p^=(ee_u8)seed2;
p+=step;
}
/* end timing */
for (i=0; i<NUM_CORE_STATES; i++) {
crc=crcu32(final_counts[i],crc);
crc=crcu32(track_counts[i],crc);
}
return crc;
}
/* Default initialization patterns */
static ee_u8 *intpat[4] ={(ee_u8 *)"5012",(ee_u8 *)"1234",(ee_u8 *)"-874",(ee_u8 *)"+122"};
static ee_u8 *floatpat[4]={(ee_u8 *)"35.54400",(ee_u8 *)".1234500",(ee_u8 *)"-110.700",(ee_u8 *)"+0.64400"};
static ee_u8 *scipat[4] ={(ee_u8 *)"5.500e+3",(ee_u8 *)"-.123e-2",(ee_u8 *)"-87e+832",(ee_u8 *)"+0.6e-12"};
static ee_u8 *errpat[4] ={(ee_u8 *)"T0.3e-1F",(ee_u8 *)"-T.T++Tq",(ee_u8 *)"1T3.4e4z",(ee_u8 *)"34.0e-T^"};
/* Function: core_init_state
Initialize the input data for the state machine.
Populate the input with several predetermined strings, interspersed.
Actual patterns chosen depend on the seed parameter.
Note:
The seed parameter MUST be supplied from a source that cannot be determined at compile time
*/
void core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p) {
ee_u32 total=0,next=0,i;
ee_u8 *buf=0;
#if CORE_DEBUG
ee_u8 *start=p;
ee_printf("State: %d,%d\n",size,seed);
#endif
size--;
next=0;
while ((total+next+1)<size) {
if (next>0) {
for(i=0;i<next;i++)
*(p+total+i)=buf[i];
*(p+total+i)=',';
total+=next+1;
}
seed++;
switch (seed & 0x7) {
case 0: /* int */
case 1: /* int */
case 2: /* int */
buf=intpat[(seed>>3) & 0x3];
next=4;
break;
case 3: /* float */
case 4: /* float */
buf=floatpat[(seed>>3) & 0x3];
next=8;
break;
case 5: /* scientific */
case 6: /* scientific */
buf=scipat[(seed>>3) & 0x3];
next=8;
break;
case 7: /* invalid */
buf=errpat[(seed>>3) & 0x3];
next=8;
break;
default: /* Never happen, just to make some compilers happy */
break;
}
}
size++;
while (total<size) { /* fill the rest with 0 */
*(p+total)=0;
total++;
}
#if CORE_DEBUG
ee_printf("State Input: %s\n",start);
#endif
}
static ee_u8 ee_isdigit(ee_u8 c) {
ee_u8 retval;
retval = ((c>='0') & (c<='9')) ? 1 : 0;
return retval;
}
/* Function: core_state_transition
Actual state machine.
The state machine will continue scanning until either:
1 - an invalid input is detcted.
2 - a valid number has been detected.
The input pointer is updated to point to the end of the token, and the end state is returned (either specific format determined or invalid).
*/
enum CORE_STATE core_state_transition( ee_u8 **instr , ee_u32 *transition_count) {
ee_u8 *str=*instr;
ee_u8 NEXT_SYMBOL;
enum CORE_STATE state=CORE_START;
for( ; *str && state != CORE_INVALID; str++ ) {
NEXT_SYMBOL = *str;
if (NEXT_SYMBOL==',') /* end of this input */ {
str++;
break;
}
switch(state) {
case CORE_START:
if(ee_isdigit(NEXT_SYMBOL)) {
state = CORE_INT;
}
else if( NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-' ) {
state = CORE_S1;
}
else if( NEXT_SYMBOL == '.' ) {
state = CORE_FLOAT;
}
else {
state = CORE_INVALID;
transition_count[CORE_INVALID]++;
}
transition_count[CORE_START]++;
break;
case CORE_S1:
if(ee_isdigit(NEXT_SYMBOL)) {
state = CORE_INT;
transition_count[CORE_S1]++;
}
else if( NEXT_SYMBOL == '.' ) {
state = CORE_FLOAT;
transition_count[CORE_S1]++;
}
else {
state = CORE_INVALID;
transition_count[CORE_S1]++;
}
break;
case CORE_INT:
if( NEXT_SYMBOL == '.' ) {
state = CORE_FLOAT;
transition_count[CORE_INT]++;
}
else if(!ee_isdigit(NEXT_SYMBOL)) {
state = CORE_INVALID;
transition_count[CORE_INT]++;
}
break;
case CORE_FLOAT:
if( NEXT_SYMBOL == 'E' || NEXT_SYMBOL == 'e' ) {
state = CORE_S2;
transition_count[CORE_FLOAT]++;
}
else if(!ee_isdigit(NEXT_SYMBOL)) {
state = CORE_INVALID;
transition_count[CORE_FLOAT]++;
}
break;
case CORE_S2:
if( NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-' ) {
state = CORE_EXPONENT;
transition_count[CORE_S2]++;
}
else {
state = CORE_INVALID;
transition_count[CORE_S2]++;
}
break;
case CORE_EXPONENT:
if(ee_isdigit(NEXT_SYMBOL)) {
state = CORE_SCIENTIFIC;
transition_count[CORE_EXPONENT]++;
}
else {
state = CORE_INVALID;
transition_count[CORE_EXPONENT]++;
}
break;
case CORE_SCIENTIFIC:
if(!ee_isdigit(NEXT_SYMBOL)) {
state = CORE_INVALID;
transition_count[CORE_INVALID]++;
}
break;
default:
break;
}
}
*instr=str;
return state;
}
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original Author: Shay Gal-on
*/
#include "coremark.h"
/* Function: get_seed
Get a values that cannot be determined at compile time.
Since different embedded systems and compilers are used, 3 different methods are provided:
1 - Using a volatile variable. This method is only valid if the compiler is forced to generate code that
reads the value of a volatile variable from memory at run time.
Please note, if using this method, you would need to modify core_portme.c to generate training profile.
2 - Command line arguments. This is the preferred method if command line arguments are supported.
3 - System function. If none of the first 2 methods is available on the platform,
a system function which is not a stub can be used.
e.g. read the value on GPIO pins connected to switches, or invoke special simulator functions.
*/
#if (SEED_METHOD==SEED_VOLATILE)
extern volatile ee_s32 seed1_volatile;
extern volatile ee_s32 seed2_volatile;
extern volatile ee_s32 seed3_volatile;
extern volatile ee_s32 seed4_volatile;
extern volatile ee_s32 seed5_volatile;
ee_s32 get_seed_32(int i) {
ee_s32 retval;
switch (i) {
case 1:
retval=seed1_volatile;
break;
case 2:
retval=seed2_volatile;
break;
case 3:
retval=seed3_volatile;
break;
case 4:
retval=seed4_volatile;
break;
case 5:
retval=seed5_volatile;
break;
default:
retval=0;
break;
}
return retval;
}
#elif (SEED_METHOD==SEED_ARG)
ee_s32 parseval(char *valstring) {
ee_s32 retval=0;
ee_s32 neg=1;
int hexmode=0;
if (*valstring == '-') {
neg=-1;
valstring++;
}
if ((valstring[0] == '0') && (valstring[1] == 'x')) {
hexmode=1;
valstring+=2;
}
/* first look for digits */
if (hexmode) {
while (((*valstring >= '0') && (*valstring <= '9')) || ((*valstring >= 'a') && (*valstring <= 'f'))) {
ee_s32 digit=*valstring-'0';
if (digit>9)
digit=10+*valstring-'a';
retval*=16;
retval+=digit;
valstring++;
}
} else {
while ((*valstring >= '0') && (*valstring <= '9')) {
ee_s32 digit=*valstring-'0';
retval*=10;
retval+=digit;
valstring++;
}
}
/* now add qualifiers */
if (*valstring=='K')
retval*=1024;
if (*valstring=='M')
retval*=1024*1024;
retval*=neg;
return retval;
}
ee_s32 get_seed_args(int i, int argc, char *argv[]) {
if (argc>i)
return parseval(argv[i]);
return 0;
}
#elif (SEED_METHOD==SEED_FUNC)
/* If using OS based function, you must define and implement the functions below in core_portme.h and core_portme.c ! */
ee_s32 get_seed_32(int i) {
ee_s32 retval;
switch (i) {
case 1:
retval=portme_sys1();
break;
case 2:
retval=portme_sys2();
break;
case 3:
retval=portme_sys3();
break;
case 4:
retval=portme_sys4();
break;
case 5:
retval=portme_sys5();
break;
default:
retval=0;
break;
}
return retval;
}
#endif
/* Function: crc*
Service functions to calculate 16b CRC code.
*/
ee_u16 crcu8(ee_u8 data, ee_u16 crc )
{
ee_u8 i=0,x16=0,carry=0;
for (i = 0; i < 8; i++)
{
x16 = (ee_u8)((data & 1) ^ ((ee_u8)crc & 1));
data >>= 1;
if (x16 == 1)
{
crc ^= 0x4002;
carry = 1;
}
else
carry = 0;
crc >>= 1;
if (carry)
crc |= 0x8000;
else
crc &= 0x7fff;
}
return crc;
}
ee_u16 crcu16(ee_u16 newval, ee_u16 crc) {
crc=crcu8( (ee_u8) (newval) ,crc);
crc=crcu8( (ee_u8) ((newval)>>8) ,crc);
return crc;
}
ee_u16 crcu32(ee_u32 newval, ee_u16 crc) {
crc=crc16((ee_s16) newval ,crc);
crc=crc16((ee_s16) (newval>>16) ,crc);
return crc;
}
ee_u16 crc16(ee_s16 newval, ee_u16 crc) {
return crcu16((ee_u16)newval, crc);
}
ee_u8 check_data_types() {
ee_u8 retval=0;
if (sizeof(ee_u8) != 1) {
ee_printf("ERROR: ee_u8 is not an 8b datatype!\n");
retval++;
}
if (sizeof(ee_u16) != 2) {
ee_printf("ERROR: ee_u16 is not a 16b datatype!\n");
retval++;
}
if (sizeof(ee_s16) != 2) {
ee_printf("ERROR: ee_s16 is not a 16b datatype!\n");
retval++;
}
if (sizeof(ee_s32) != 4) {
ee_printf("ERROR: ee_s32 is not a 32b datatype!\n");
retval++;
}
if (sizeof(ee_u32) != 4) {
ee_printf("ERROR: ee_u32 is not a 32b datatype!\n");
retval++;
}
if (sizeof(ee_ptr_int) != sizeof(int *)) {
ee_printf("ERROR: ee_ptr_int is not a datatype that holds an int pointer!\n");
retval++;
}
if (retval>0) {
ee_printf("ERROR: Please modify the datatypes in core_portme.h!\n");
}
return retval;
}
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original Author: Shay Gal-on
*/
/* Topic: Description
This file contains declarations of the various benchmark functions.
*/
/* Configuration: TOTAL_DATA_SIZE
Define total size for data algorithms will operate on
*/
#ifndef TOTAL_DATA_SIZE
#define TOTAL_DATA_SIZE 2*1000
#endif
#define SEED_ARG 0
#define SEED_FUNC 1
#define SEED_VOLATILE 2
#define MEM_STATIC 0
#define MEM_MALLOC 1
#define MEM_STACK 2
#include "core_portme.h"
#if HAS_STDIO
#include <stdio.h>
#endif
#if HAS_PRINTF
#define ee_printf printf
#endif
/* Actual benchmark execution in iterate */
void *iterate(void *pres);
/* Typedef: secs_ret
For machines that have floating point support, get number of seconds as a double.
Otherwise an unsigned int.
*/
#if HAS_FLOAT
typedef double secs_ret;
#else
typedef ee_u32 secs_ret;
#endif
#if MAIN_HAS_NORETURN
#define MAIN_RETURN_VAL
#define MAIN_RETURN_TYPE void
#else
#define MAIN_RETURN_VAL 0
#define MAIN_RETURN_TYPE int
#endif
void start_time(void);
void stop_time(void);
CORE_TICKS get_time(void);
secs_ret time_in_secs(CORE_TICKS ticks);
/* Misc useful functions */
ee_u16 crcu8(ee_u8 data, ee_u16 crc);
ee_u16 crc16(ee_s16 newval, ee_u16 crc);
ee_u16 crcu16(ee_u16 newval, ee_u16 crc);
ee_u16 crcu32(ee_u32 newval, ee_u16 crc);
ee_u8 check_data_types();
void *portable_malloc(ee_size_t size);
void portable_free(void *p);
ee_s32 parseval(char *valstring);
/* Algorithm IDS */
#define ID_LIST (1<<0)
#define ID_MATRIX (1<<1)
#define ID_STATE (1<<2)
#define ALL_ALGORITHMS_MASK (ID_LIST|ID_MATRIX|ID_STATE)
#define NUM_ALGORITHMS 3
/* list data structures */
typedef struct list_data_s {
ee_s16 data16;
ee_s16 idx;
} list_data;
typedef struct list_head_s {
struct list_head_s *next;
struct list_data_s *info;
} list_head;
/*matrix benchmark related stuff */
#define MATDAT_INT 1
#if MATDAT_INT
typedef ee_s16 MATDAT;
typedef ee_s32 MATRES;
#else
typedef ee_f16 MATDAT;
typedef ee_f32 MATRES;
#endif
typedef struct MAT_PARAMS_S {
int N;
MATDAT *A;
MATDAT *B;
MATRES *C;
} mat_params;
/* state machine related stuff */
/* List of all the possible states for the FSM */
typedef enum CORE_STATE {
CORE_START=0,
CORE_INVALID,
CORE_S1,
CORE_S2,
CORE_INT,
CORE_FLOAT,
CORE_EXPONENT,
CORE_SCIENTIFIC,
NUM_CORE_STATES
} core_state_e ;
/* Helper structure to hold results */
typedef struct RESULTS_S {
/* inputs */
ee_s16 seed1; /* Initializing seed */
ee_s16 seed2; /* Initializing seed */
ee_s16 seed3; /* Initializing seed */
void *memblock[4]; /* Pointer to safe memory location */
ee_u32 size; /* Size of the data */
ee_u32 iterations; /* Number of iterations to execute */
ee_u32 execs; /* Bitmask of operations to execute */
struct list_head_s *list;
mat_params mat;
/* outputs */
ee_u16 crc;
ee_u16 crclist;
ee_u16 crcmatrix;
ee_u16 crcstate;
ee_s16 err;
/* ultithread specific */
core_portable port;
} core_results;
/* Multicore execution handling */
#if (MULTITHREAD>1)
ee_u8 core_start_parallel(core_results *res);
ee_u8 core_stop_parallel(core_results *res);
#endif
/* list benchmark functions */
list_head *core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed);
ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx);
/* state benchmark functions */
void core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p);
ee_u16 core_bench_state(ee_u32 blksize, ee_u8 *memblock,
ee_s16 seed1, ee_s16 seed2, ee_s16 step, ee_u16 crc);
/* matrix benchmark functions */
ee_u32 core_init_matrix(ee_u32 blksize, void *memblk, ee_s32 seed, mat_params *p);
ee_u16 core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc);
/*
CoreMark benchmark for ESP32 using Arduino's C++ environment with multithreading support.
Based on https://github.com/PaulStoffregen/CoreMark/tree/master
Modified to run on ESP32 by Lucas Saavedra Vaz, 2024.
*/
#include <Arduino.h>
#include <stdarg.h>
#include <esp_task_wdt.h>
// Timeout for the task watchdog timer
#define TWDT_TIMEOUT_S 20
// Number of runs to average
#define N_RUNS 3
// A way to call the C-only coremark function from Arduino's C++ environment
extern "C" int coremark_main(void);
void setup() {
Serial.begin(115200);
while (!Serial) {
delay(10);
}
// To avoid the watchdog timer from resetting the ESP32 while running CoreMark we
// need to reconfigure it to have a longer timeout.
esp_task_wdt_config_t config = {
.timeout_ms = TWDT_TIMEOUT_S * 1000,
.idle_core_mask = 0,
.trigger_panic = false,
};
esp_task_wdt_reconfigure(&config);
log_d("Starting CoreMark test");
Serial.printf("Runs: %d\n", N_RUNS);
Serial.printf("Cores: %d\n", CONFIG_SOC_CPU_CORES_NUM);
Serial.flush();
for (int i = 0; i < N_RUNS; i++) {
Serial.printf("Run %d", i);
coremark_main();
Serial.flush();
}
log_d("CoreMark test finished");
}
void loop() {
vTaskDelete(NULL);
}
// CoreMark calls this function to print results.
extern "C" int ee_printf(const char *format, ...) {
va_list args;
va_start(args, format);
for (; *format; format++) {
if (*format == '%') {
bool islong = false;
format++;
if (*format == '%') {
Serial.print(*format);
continue;
}
if (*format == '-') {
format++; // ignore size
}
while (*format >= '0' && *format <= '9') {
format++; // ignore size
}
if (*format == 'l') {
islong = true;
format++;
}
if (*format == '\0') {
break;
}
if (*format == 's') {
Serial.print((char *)va_arg(args, int));
} else if (*format == 'f') {
Serial.print(va_arg(args, double));
} else if (*format == 'd') {
if (islong) {
Serial.print(va_arg(args, long));
} else {
Serial.print(va_arg(args, int));
}
} else if (*format == 'u') {
if (islong) {
Serial.print(va_arg(args, unsigned long));
} else {
Serial.print(va_arg(args, unsigned int));
}
} else if (*format == 'x') {
if (islong) {
Serial.print(va_arg(args, unsigned long), HEX);
} else {
Serial.print(va_arg(args, unsigned int), HEX);
}
} else if (*format == 'c') {
Serial.print(va_arg(args, int));
}
} else {
if (*format == '\n') {
Serial.print('\r');
}
Serial.print(*format);
}
}
va_end(args);
return 1;
}
// CoreMark calls this function to measure elapsed time
extern "C" uint32_t Arduino_millis(void) {
return millis();
}
import json
import logging
import os
def test_coremark(dut, request):
LOGGER = logging.getLogger(__name__)
# Match "Runs: %d"
res = dut.expect(r"Runs: (\d+)", timeout=60)
runs = int(res.group(0).decode("utf-8").split(" ")[1])
LOGGER.info("Number of runs: {}".format(runs))
assert runs > 0, "Invalid number of runs"
# Match "Cores: %d"
res = dut.expect(r"Cores: (\d+)", timeout=60)
cores = int(res.group(0).decode("utf-8").split(" ")[1])
LOGGER.info("Number of cores: {}".format(cores))
assert cores > 0, "Invalid number of cores"
total_score = 0
for i in range(runs):
# Match "Run %d"
res = dut.expect(r"Run (\d+)", timeout=120)
run = int(res.group(0).decode("utf-8").split(" ")[1])
LOGGER.info("Run {}".format(run))
assert run == i, "Invalid run number"
score = 0
# Match "CoreMark 1.0 : %d"
res = dut.expect(r"CoreMark 1.0 : (\d+)\.(\d+)", timeout=120)
score = float(res.group(0).decode("utf-8").split(" ")[3])
LOGGER.info("CoreMark score: {}".format(score))
assert score > 0 and score < 10000, "Impossible CoreMark score"
total_score += score
avg_score = round(total_score / runs, 2)
LOGGER.info("Average CoreMark score: {}".format(avg_score))
assert avg_score > 0 and avg_score < 10000, "Impossible CoreMark score"
# Create JSON with results and write it to file
# Always create a JSON with this format (so it can be merged later on):
# { TEST_NAME_STR: TEST_RESULTS_DICT }
results = {"coremark": {"runs": runs, "cores": cores, "avg_score": avg_score}}
current_folder = os.path.dirname(request.path)
file_index = 0
report_file = os.path.join(current_folder, "result_coremark" + str(file_index) + ".json")
while os.path.exists(report_file):
report_file = report_file.replace(str(file_index) + ".json", str(file_index + 1) + ".json")
file_index += 1
with open(report_file, "w") as f:
try:
f.write(json.dumps(results))
except Exception as e:
LOGGER.warning("Failed to write results to file: {}".format(e))
/*
Fibonacci calculation test for Arduino and ESP32.
Created by Lucas Saavedra Vaz, 2024
*/
#include <Arduino.h>
// Number of runs to average
#define N_RUNS 3
// Fibonacci number to calculate. Keep between 35 and 45.
#define FIB_N 40
uint64_t fib(uint32_t n) {
if (n < 2) {
return n;
}
return fib(n - 1) + fib(n - 2);
}
void setup() {
uint64_t fibonacci;
Serial.begin(115200);
while (!Serial) {
delay(10);
}
log_d("Starting fibonacci calculation");
Serial.printf("Runs: %d\n", N_RUNS);
Serial.printf("N: %d\n", FIB_N);
Serial.flush();
for (int i = 0; i < N_RUNS; i++) {
Serial.printf("Run %d", i);
unsigned long start = millis();
fibonacci = fib(FIB_N);
unsigned long elapsed = millis() - start;
Serial.printf("Fibonacci(N): %llu\n", fibonacci);
Serial.printf("Time: %lu.%03lu s\n", elapsed / 1000, elapsed % 1000);
Serial.flush();
}
log_d("Fibonacci calculation test done");
}
void loop() {
vTaskDelete(NULL);
}
import json
import logging
import os
def test_fibonacci(dut, request):
LOGGER = logging.getLogger(__name__)
# Fibonacci results starting from fib(35) to fib(45)
fib_results = [
9227465,
14930352,
24157817,
39088169,
63245986,
102334155,
165580141,
267914296,
433494437,
701408733,
]
# Match "Runs: %d"
res = dut.expect(r"Runs: (\d+)", timeout=60)
runs = int(res.group(0).decode("utf-8").split(" ")[1])
LOGGER.info("Number of runs: {}".format(runs))
assert runs > 0, "Invalid number of runs"
# Match "N: %d"
res = dut.expect(r"N: (\d+)", timeout=300)
fib_n = int(res.group(0).decode("utf-8").split(" ")[1])
LOGGER.info("Calculating Fibonacci({})".format(fib_n))
assert fib_n > 30 and fib_n < 50, "Invalid Fibonacci number"
list_time = []
for i in range(runs):
# Match "Run %d"
res = dut.expect(r"Run (\d+)", timeout=120)
run = int(res.group(0).decode("utf-8").split(" ")[1])
LOGGER.info("Run {}".format(run))
assert run == i, "Invalid run number"
# Match "Fibonacci(N): %llu"
res = dut.expect(r"Fibonacci\(N\): (\d+)", timeout=300)
fib_result = int(res.group(0).decode("utf-8").split(" ")[1])
LOGGER.info("Fibonacci({}) = {}".format(fib_n, fib_result))
assert fib_result > 0, "Invalid Fibonacci result"
# Check if the result is correct
assert fib_result == fib_results[fib_n - 35]
# Match "Time: %lu.%03lu s"
res = dut.expect(r"Time: (\d+)\.(\d+) s", timeout=300)
time = float(res.group(0).decode("utf-8").split(" ")[1])
LOGGER.info("Time on run {}: {} s".format(i, time))
assert time > 0 and time < 1000, "Invalid time"
list_time.append(time)
avg_time = round(sum(list_time) / len(list_time), 3)
# Create JSON with results and write it to file
# Always create a JSON with this format (so it can be merged later on):
# { TEST_NAME_STR: TEST_RESULTS_DICT }
results = {"fibonacci": {"runs": runs, "fib_n": fib_n, "avg_time": avg_time}}
current_folder = os.path.dirname(request.path)
file_index = 0
report_file = os.path.join(current_folder, "result_fibonacci" + str(file_index) + ".json")
while os.path.exists(report_file):
report_file = report_file.replace(str(file_index) + ".json", str(file_index + 1) + ".json")
file_index += 1
with open(report_file, "w") as f:
try:
f.write(json.dumps(results))
except Exception as e:
LOGGER.warning("Failed to write results to file: {}".format(e))
/*
Based on the ramspeed test from NuttX.
https://github.com/apache/nuttx-apps/blob/master/benchmarks/ramspeed/ramspeed_main.c
Modified for Arduino and ESP32 by Lucas Saavedra Vaz, 2024
*/
#include <Arduino.h>
// Test settings
// Number of runs to average
#define N_RUNS 3
// Value to fill the memory with
#define FILL_VALUE 0x00
// Number of copies to be performed in each test
#define N_COPIES 400
// Start size for the tests. Value must be a power of 2.
// Values lower or equal than 32 KB may cause the operations to use the cache instead of the PSRAM.
#define START_SIZE 65536
// Max size to be copied. Must be bigger than 32 and it will be floored to the nearest power of 2
#define MAX_TEST_SIZE 512 * 1024 // 512KB
// Implementation macros
#if defined(UINTPTR_MAX) && UINTPTR_MAX > 0xFFFFFFFF
#define MEM_UNIT uint64_t
#define ALIGN_MASK 0x7
#else
#define MEM_UNIT uint32_t
#define ALIGN_MASK 0x3
#endif
#define COPY32 \
*d32 = *s32; \
d32++; \
s32++;
#define COPY8 \
*d8 = *s8; \
d8++; \
s8++;
#define SET32(x) \
*d32 = x; \
d32++;
#define SET8(x) \
*d8 = x; \
d8++;
#define REPEAT8(expr) expr expr expr expr expr expr expr expr
/* Functions */
static void *mock_memcpy(void *dst, const void *src, size_t len) {
uint8_t *d8 = (uint8_t *)dst;
const uint8_t *s8 = (uint8_t *)src;
uintptr_t d_align = (uintptr_t)d8 & ALIGN_MASK;
uintptr_t s_align = (uintptr_t)s8 & ALIGN_MASK;
uint32_t *d32;
const uint32_t *s32;
/* Byte copy for unaligned memories */
if (s_align != d_align) {
while (len > 32) {
REPEAT8(COPY8);
REPEAT8(COPY8);
REPEAT8(COPY8);
REPEAT8(COPY8);
len -= 32;
}
while (len) {
COPY8;
len--;
}
return dst;
}
/* Make the memories aligned */
if (d_align) {
d_align = ALIGN_MASK + 1 - d_align;
while (d_align && len) {
COPY8;
d_align--;
len--;
}
}
d32 = (uint32_t *)d8;
s32 = (uint32_t *)s8;
while (len > 32) {
REPEAT8(COPY32);
len -= 32;
}
while (len > 4) {
COPY32;
len -= 4;
}
d8 = (uint8_t *)d32;
s8 = (const uint8_t *)s32;
while (len) {
COPY8;
len--;
}
return dst;
}
static void mock_memset(void *dst, uint8_t v, size_t len) {
uint8_t *d8 = (uint8_t *)dst;
uintptr_t d_align = (uintptr_t)d8 & ALIGN_MASK;
uint32_t v32;
uint32_t *d32;
/* Make the address aligned */
if (d_align) {
d_align = ALIGN_MASK + 1 - d_align;
while (d_align && len) {
SET8(v);
len--;
d_align--;
}
}
v32 = (uint32_t)v + ((uint32_t)v << 8) + ((uint32_t)v << 16) + ((uint32_t)v << 24);
d32 = (uint32_t *)d8;
while (len > 32) {
REPEAT8(SET32(v32));
len -= 32;
}
while (len > 4) {
SET32(v32);
len -= 4;
}
d8 = (uint8_t *)d32;
while (len) {
SET8(v);
len--;
}
}
static void print_rate(const char *name, uint64_t bytes, uint32_t cost_time) {
uint32_t rate;
if (cost_time == 0) {
Serial.println("Error: Too little time taken, please increase N_COPIES");
return;
}
rate = bytes * 1000 / cost_time / 1024;
Serial.printf("%s Rate = %" PRIu32 " KB/s Time: %" PRIu32 " ms\n", name, rate, cost_time);
}
static void memcpy_speed_test(void *dest, const void *src, size_t size, uint32_t repeat_cnt) {
uint32_t start_time;
uint32_t cost_time_system;
uint32_t cost_time_mock;
uint32_t cnt;
uint32_t step;
uint64_t total_size;
for (step = START_SIZE; step <= size; step <<= 1) {
total_size = (uint64_t)step * (uint64_t)repeat_cnt;
Serial.printf("Memcpy %" PRIu32 " Bytes test\n", step);
start_time = millis();
for (cnt = 0; cnt < repeat_cnt; cnt++) {
memcpy(dest, src, step);
}
cost_time_system = millis() - start_time;
start_time = millis();
for (cnt = 0; cnt < repeat_cnt; cnt++) {
mock_memcpy(dest, src, step);
}
cost_time_mock = millis() - start_time;
print_rate("System memcpy():", total_size, cost_time_system);
print_rate("Mock memcpy():", total_size, cost_time_mock);
}
}
static void memset_speed_test(void *dest, uint8_t value, size_t size, uint32_t repeat_num) {
uint32_t start_time;
uint32_t cost_time_system;
uint32_t cost_time_mock;
uint32_t cnt;
uint32_t step;
uint64_t total_size;
for (step = START_SIZE; step <= size; step <<= 1) {
total_size = (uint64_t)step * (uint64_t)repeat_num;
Serial.printf("Memset %" PRIu32 " Bytes test\n", step);
start_time = millis();
for (cnt = 0; cnt < repeat_num; cnt++) {
memset(dest, value, step);
}
cost_time_system = millis() - start_time;
start_time = millis();
for (cnt = 0; cnt < repeat_num; cnt++) {
mock_memset(dest, value, step);
}
cost_time_mock = millis() - start_time;
print_rate("System memset():", total_size, cost_time_system);
print_rate("Mock memset():", total_size, cost_time_mock);
}
}
/* Main */
void setup() {
Serial.begin(115200);
while (!Serial) {
delay(10);
}
void *dest = ps_malloc(MAX_TEST_SIZE);
const void *src = ps_malloc(MAX_TEST_SIZE);
if (!dest || !src) {
Serial.println("Memory allocation failed");
return;
}
log_d("Starting PSRAM speed test");
Serial.printf("Runs: %d\n", N_RUNS);
Serial.printf("Copies: %d\n", N_COPIES);
Serial.printf("Max test size: %d\n", MAX_TEST_SIZE);
Serial.flush();
for (int i = 0; i < N_RUNS; i++) {
Serial.printf("Run %d", i);
memcpy_speed_test(dest, src, MAX_TEST_SIZE, N_COPIES);
Serial.flush();
memset_speed_test(dest, FILL_VALUE, MAX_TEST_SIZE, N_COPIES);
Serial.flush();
}
log_d("PSRAM speed test done");
}
void loop() {
vTaskDelete(NULL);
}
import json
import logging
import os
from collections import defaultdict
def test_psramspeed(dut, request):
LOGGER = logging.getLogger(__name__)
runs_results = []
# Match "Runs: %d"
res = dut.expect(r"Runs: (\d+)", timeout=60)
runs = int(res.group(0).decode("utf-8").split(" ")[1])
LOGGER.info("Number of runs: {}".format(runs))
assert runs > 0, "Invalid number of runs"
# Match "Copies: %d"
res = dut.expect(r"Copies: (\d+)", timeout=60)
copies = int(res.group(0).decode("utf-8").split(" ")[1])
LOGGER.info("Number of copies in each test: {}".format(copies))
assert copies > 0, "Invalid number of copies"
# Match "Max test size: %lu"
res = dut.expect(r"Max test size: (\d+)", timeout=60)
max_test_size = int(res.group(0).decode("utf-8").split(" ")[3])
LOGGER.info("Max test size: {}".format(max_test_size))
assert max_test_size > 0, "Invalid max test size"
for i in range(runs):
# Match "Run %d"
res = dut.expect(r"Run (\d+)", timeout=120)
run = int(res.group(0).decode("utf-8").split(" ")[1])
LOGGER.info("Run {}".format(run))
assert run == i, "Invalid run number"
for j in range(2):
while True:
# Match "Memcpy/Memtest %d Bytes test"
res = dut.expect(r"(Memcpy|Memset) (\d+) Bytes test", timeout=60)
current_test = res.group(0).decode("utf-8").split(" ")[0].lower()
current_test_size = int(res.group(0).decode("utf-8").split(" ")[1])
LOGGER.info("Current {} test size: {}".format(current_test, current_test_size))
assert current_test_size > 0, "Invalid test size"
for k in range(2):
# Match "System/Mock memcpy/memtest(): Rate = %d KB/s Time: %d ms" or "Error: %s"
res = dut.expect(
r"((System|Mock) (memcpy|memset)\(\): Rate = (\d+) KB/s Time: (\d+) ms|^Error)", timeout=90
)
implementation = res.group(0).decode("utf-8").split(" ")[0].lower()
assert implementation != "error:", "Error detected in test output"
test_type = res.group(0).decode("utf-8").split(" ")[1].lower()[:-3]
rate = int(res.group(0).decode("utf-8").split(" ")[4])
time = int(res.group(0).decode("utf-8").split(" ")[7])
assert rate > 0, "Invalid rate"
assert time > 0, "Invalid time"
assert test_type == current_test, "Missing test output"
LOGGER.info("{} {}: Rate = {} KB/s. Time = {} ms".format(implementation, test_type, rate, time))
runs_results.append(((current_test, str(current_test_size), implementation), (rate, time)))
if current_test_size == max_test_size:
break
LOGGER.info("=============================================================")
# Calculate average rate and time for each test size
sums = defaultdict(lambda: {"rate_sum": 0, "time_sum": 0})
for (test, size, impl), (rate, time) in runs_results:
sums[(test, size, impl)]["rate_sum"] += rate
sums[(test, size, impl)]["time_sum"] += time
avg_results = {}
for (test, size, impl) in sums:
rate_avg = round(sums[(test, size, impl)]["rate_sum"] / runs, 2)
time_avg = round(sums[(test, size, impl)]["time_sum"] / runs, 2)
LOGGER.info(
"Test: {}-{}-{}: Average rate = {} KB/s. Average time = {} ms".format(test, size, impl, rate_avg, time_avg)
)
if test not in avg_results:
avg_results[test] = {}
if size not in avg_results[test]:
avg_results[test][size] = {}
avg_results[test][size][impl] = {"avg_rate": rate_avg, "avg_time": time_avg}
# Create JSON with results and write it to file
# Always create a JSON with this format (so it can be merged later on):
# { TEST_NAME_STR: TEST_RESULTS_DICT }
results = {"psramspeed": {"runs": runs, "copies": copies, "max_test_size": max_test_size, "results": avg_results}}
current_folder = os.path.dirname(request.path)
file_index = 0
report_file = os.path.join(current_folder, "result_psramspeed" + str(file_index) + ".json")
while os.path.exists(report_file):
report_file = report_file.replace(str(file_index) + ".json", str(file_index + 1) + ".json")
file_index += 1
with open(report_file, "w") as f:
try:
f.write(json.dumps(results))
except Exception as e:
LOGGER.warning("Failed to write results to file: {}".format(e))
{
"targets": [
{
"name": "esp32",
"fqbn":[
"espressif:esp32:esp32:PSRAM=disabled,PartitionScheme=huge_app"
]
},
{
"name": "esp32s2",
"fqbn": [
"espressif:esp32:esp32s2:PSRAM=disabled,PartitionScheme=huge_app"
]
},
{
"name": "esp32c3",
"fqbn": [
"espressif:esp32:esp32c3:PartitionScheme=huge_app"
]
},
{
"name": "esp32s3",
"fqbn": [
"espressif:esp32:esp32s3:PSRAM=disabled,USBMode=default,PartitionScheme=huge_app"
]
},
{
"name": "esp32c6",
"fqbn": [
"espressif:esp32:esp32c6:PartitionScheme=huge_app"
]
},
{
"name": "esp32h2",
"fqbn": [
"espressif:esp32:esp32h2:PartitionScheme=huge_app"
]
}
]
}
/*
Based on the ramspeed test from NuttX.
https://github.com/apache/nuttx-apps/blob/master/benchmarks/ramspeed/ramspeed_main.c
Modified for Arduino and ESP32 by Lucas Saavedra Vaz, 2024
*/
#include <Arduino.h>
// Test settings
// Number of runs to average
#define N_RUNS 3
// Value to fill the memory with
#define FILL_VALUE 0x00
// Number of copies to be performed in each test
#define N_COPIES 50000
// Max size to be copied. Must be bigger than 32 and it will be floored to the nearest power of 2
#define MAX_TEST_SIZE 64 * 1024 // 64KB
// Implementation macros
#if defined(UINTPTR_MAX) && UINTPTR_MAX > 0xFFFFFFFF
#define MEM_UNIT uint64_t
#define ALIGN_MASK 0x7
#else
#define MEM_UNIT uint32_t
#define ALIGN_MASK 0x3
#endif
#define COPY32 \
*d32 = *s32; \
d32++; \
s32++;
#define COPY8 \
*d8 = *s8; \
d8++; \
s8++;
#define SET32(x) \
*d32 = x; \
d32++;
#define SET8(x) \
*d8 = x; \
d8++;
#define REPEAT8(expr) expr expr expr expr expr expr expr expr
/* Functions */
static void *mock_memcpy(void *dst, const void *src, size_t len) {
uint8_t *d8 = (uint8_t *)dst;
const uint8_t *s8 = (uint8_t *)src;
uintptr_t d_align = (uintptr_t)d8 & ALIGN_MASK;
uintptr_t s_align = (uintptr_t)s8 & ALIGN_MASK;
uint32_t *d32;
const uint32_t *s32;
/* Byte copy for unaligned memories */
if (s_align != d_align) {
while (len > 32) {
REPEAT8(COPY8);
REPEAT8(COPY8);
REPEAT8(COPY8);
REPEAT8(COPY8);
len -= 32;
}
while (len) {
COPY8;
len--;
}
return dst;
}
/* Make the memories aligned */
if (d_align) {
d_align = ALIGN_MASK + 1 - d_align;
while (d_align && len) {
COPY8;
d_align--;
len--;
}
}
d32 = (uint32_t *)d8;
s32 = (uint32_t *)s8;
while (len > 32) {
REPEAT8(COPY32);
len -= 32;
}
while (len > 4) {
COPY32;
len -= 4;
}
d8 = (uint8_t *)d32;
s8 = (const uint8_t *)s32;
while (len) {
COPY8;
len--;
}
return dst;
}
static void mock_memset(void *dst, uint8_t v, size_t len) {
uint8_t *d8 = (uint8_t *)dst;
uintptr_t d_align = (uintptr_t)d8 & ALIGN_MASK;
uint32_t v32;
uint32_t *d32;
/* Make the address aligned */
if (d_align) {
d_align = ALIGN_MASK + 1 - d_align;
while (d_align && len) {
SET8(v);
len--;
d_align--;
}
}
v32 = (uint32_t)v + ((uint32_t)v << 8) + ((uint32_t)v << 16) + ((uint32_t)v << 24);
d32 = (uint32_t *)d8;
while (len > 32) {
REPEAT8(SET32(v32));
len -= 32;
}
while (len > 4) {
SET32(v32);
len -= 4;
}
d8 = (uint8_t *)d32;
while (len) {
SET8(v);
len--;
}
}
static void print_rate(const char *name, uint64_t bytes, uint32_t cost_time) {
uint32_t rate;
if (cost_time == 0) {
Serial.println("Error: Too little time taken, please increase N_COPIES");
return;
}
rate = bytes * 1000 / cost_time / 1024;
Serial.printf("%s Rate = %" PRIu32 " KB/s Time: %" PRIu32 " ms\n", name, rate, cost_time);
}
static void memcpy_speed_test(void *dest, const void *src, size_t size, uint32_t repeat_cnt) {
uint32_t start_time;
uint32_t cost_time_system;
uint32_t cost_time_mock;
uint32_t cnt;
uint32_t step;
uint64_t total_size;
for (step = 32; step <= size; step <<= 1) {
total_size = (uint64_t)step * (uint64_t)repeat_cnt;
Serial.printf("Memcpy %" PRIu32 " Bytes test\n", step);
start_time = millis();
for (cnt = 0; cnt < repeat_cnt; cnt++) {
memcpy(dest, src, step);
}
cost_time_system = millis() - start_time;
start_time = millis();
for (cnt = 0; cnt < repeat_cnt; cnt++) {
mock_memcpy(dest, src, step);
}
cost_time_mock = millis() - start_time;
print_rate("System memcpy():", total_size, cost_time_system);
print_rate("Mock memcpy():", total_size, cost_time_mock);
}
}
static void memset_speed_test(void *dest, uint8_t value, size_t size, uint32_t repeat_num) {
uint32_t start_time;
uint32_t cost_time_system;
uint32_t cost_time_mock;
uint32_t cnt;
uint32_t step;
uint64_t total_size;
for (step = 32; step <= size; step <<= 1) {
total_size = (uint64_t)step * (uint64_t)repeat_num;
Serial.printf("Memset %" PRIu32 " Bytes test\n", step);
start_time = millis();
for (cnt = 0; cnt < repeat_num; cnt++) {
memset(dest, value, step);
}
cost_time_system = millis() - start_time;
start_time = millis();
for (cnt = 0; cnt < repeat_num; cnt++) {
mock_memset(dest, value, step);
}
cost_time_mock = millis() - start_time;
print_rate("System memset():", total_size, cost_time_system);
print_rate("Mock memset():", total_size, cost_time_mock);
}
}
/* Main */
void setup() {
Serial.begin(115200);
while (!Serial) {
delay(10);
}
void *dest = malloc(MAX_TEST_SIZE);
const void *src = malloc(MAX_TEST_SIZE);
if (!dest || !src) {
Serial.println("Memory allocation failed");
return;
}
log_d("Starting RAM speed test");
Serial.printf("Runs: %d\n", N_RUNS);
Serial.printf("Copies: %d\n", N_COPIES);
Serial.printf("Max test size: %d\n", MAX_TEST_SIZE);
Serial.flush();
for (int i = 0; i < N_RUNS; i++) {
Serial.printf("Run %d", i);
memcpy_speed_test(dest, src, MAX_TEST_SIZE, N_COPIES);
Serial.flush();
memset_speed_test(dest, FILL_VALUE, MAX_TEST_SIZE, N_COPIES);
Serial.flush();
}
log_d("RAM speed test done");
}
void loop() {
vTaskDelete(NULL);
}
import json
import logging
import os
from collections import defaultdict
def test_ramspeed(dut, request):
LOGGER = logging.getLogger(__name__)
runs_results = []
# Match "Runs: %d"
res = dut.expect(r"Runs: (\d+)", timeout=60)
runs = int(res.group(0).decode("utf-8").split(" ")[1])
LOGGER.info("Number of runs: {}".format(runs))
assert runs > 0, "Invalid number of runs"
# Match "Copies: %d"
res = dut.expect(r"Copies: (\d+)", timeout=60)
copies = int(res.group(0).decode("utf-8").split(" ")[1])
LOGGER.info("Number of copies in each test: {}".format(copies))
assert copies > 0, "Invalid number of copies"
# Match "Max test size: %lu"
res = dut.expect(r"Max test size: (\d+)", timeout=60)
max_test_size = int(res.group(0).decode("utf-8").split(" ")[3])
LOGGER.info("Max test size: {}".format(max_test_size))
assert max_test_size > 0, "Invalid max test size"
for i in range(runs):
# Match "Run %d"
res = dut.expect(r"Run (\d+)", timeout=120)
run = int(res.group(0).decode("utf-8").split(" ")[1])
LOGGER.info("Run {}".format(run))
assert run == i, "Invalid run number"
for j in range(2):
while True:
# Match "Memcpy/Memtest %d Bytes test"
res = dut.expect(r"(Memcpy|Memset) (\d+) Bytes test", timeout=60)
current_test = res.group(0).decode("utf-8").split(" ")[0].lower()
current_test_size = int(res.group(0).decode("utf-8").split(" ")[1])
LOGGER.info("Current {} test size: {}".format(current_test, current_test_size))
assert current_test_size > 0, "Invalid test size"
for k in range(2):
# Match "System/Mock memcpy/memtest(): Rate = %d KB/s Time: %d ms" or "Error: %s"
res = dut.expect(
r"((System|Mock) (memcpy|memset)\(\): Rate = (\d+) KB/s Time: (\d+) ms|^Error)", timeout=90
)
implementation = res.group(0).decode("utf-8").split(" ")[0].lower()
assert implementation != "error:", "Error detected in test output"
test_type = res.group(0).decode("utf-8").split(" ")[1].lower()[:-3]
rate = int(res.group(0).decode("utf-8").split(" ")[4])
time = int(res.group(0).decode("utf-8").split(" ")[7])
assert rate > 0, "Invalid rate"
assert time > 0, "Invalid time"
assert test_type == current_test, "Missing test output"
LOGGER.info("{} {}: Rate = {} KB/s. Time = {} ms".format(implementation, test_type, rate, time))
runs_results.append(((current_test, str(current_test_size), implementation), (rate, time)))
if current_test_size == max_test_size:
break
LOGGER.info("=============================================================")
# Calculate average rate and time for each test size
sums = defaultdict(lambda: {"rate_sum": 0, "time_sum": 0})
for (test, size, impl), (rate, time) in runs_results:
sums[(test, size, impl)]["rate_sum"] += rate
sums[(test, size, impl)]["time_sum"] += time
avg_results = {}
for (test, size, impl) in sums:
rate_avg = round(sums[(test, size, impl)]["rate_sum"] / runs, 2)
time_avg = round(sums[(test, size, impl)]["time_sum"] / runs, 2)
LOGGER.info(
"Test: {}-{}-{}: Average rate = {} KB/s. Average time = {} ms".format(test, size, impl, rate_avg, time_avg)
)
if test not in avg_results:
avg_results[test] = {}
if size not in avg_results[test]:
avg_results[test][size] = {}
avg_results[test][size][impl] = {"avg_rate": rate_avg, "avg_time": time_avg}
# Create JSON with results and write it to file
# Always create a JSON with this format (so it can be merged later on):
# { TEST_NAME_STR: TEST_RESULTS_DICT }
results = {"ramspeed": {"runs": runs, "copies": copies, "max_test_size": max_test_size, "results": avg_results}}
current_folder = os.path.dirname(request.path)
file_index = 0
report_file = os.path.join(current_folder, "result_ramspeed" + str(file_index) + ".json")
while os.path.exists(report_file):
report_file = report_file.replace(str(file_index) + ".json", str(file_index + 1) + ".json")
file_index += 1
with open(report_file, "w") as f:
try:
f.write(json.dumps(results))
except Exception as e:
LOGGER.warning("Failed to write results to file: {}".format(e))
This diff is collapsed.
/*
Based on "Calculation of PI(= 3.14159...) using FFT and AGM" by T.Ooura, Nov. 1999.
https://github.com/Fibonacci43/SuperPI
Modified for Arduino by Lucas Saavedra Vaz, 2024.
*/
#pragma once
#include <math.h>
#ifndef M_PI_2
#define M_PI_2 1.570796326794896619231321691639751442098584699687
#endif
#ifndef WR5000 /* cos(M_PI_2*0.5000) */
#define WR5000 0.707106781186547524400844362104849039284835937688
#endif
#ifndef WR2500 /* cos(M_PI_2*0.2500) */
#define WR2500 0.923879532511286756128183189396788286822416625863
#endif
#ifndef WI2500 /* sin(M_PI_2*0.2500) */
#define WI2500 0.382683432365089771728459984030398866761344562485
#endif
#ifndef WR1250 /* cos(M_PI_2*0.1250) */
#define WR1250 0.980785280403230449126182236134239036973933730893
#endif
#ifndef WI1250 /* sin(M_PI_2*0.1250) */
#define WI1250 0.195090322016128267848284868477022240927691617751
#endif
#ifndef WR3750 /* cos(M_PI_2*0.3750) */
#define WR3750 0.831469612302545237078788377617905756738560811987
#endif
#ifndef WI3750 /* sin(M_PI_2*0.3750) */
#define WI3750 0.555570233019602224742830813948532874374937190754
#endif
#ifndef CDFT_RECURSIVE_N /* length of the recursive FFT mode */
#define CDFT_RECURSIVE_N 512 /* <= (L1 cache size) / 16 */
#endif
#ifndef CDFT_LOOP_DIV /* control of the CDFT's speed & tolerance */
#define CDFT_LOOP_DIV 32
#endif
#ifndef RDFT_LOOP_DIV /* control of the RDFT's speed & tolerance */
#define RDFT_LOOP_DIV 64
#endif
#ifndef DCST_LOOP_DIV /* control of the DCT,DST's speed & tolerance */
#define DCST_LOOP_DIV 64
#endif
void bitrv1(int n, double *a);
void bitrv2(int n, double *a);
void bitrv208(double *a);
void bitrv208neg(double *a);
void bitrv216(double *a);
void bitrv216neg(double *a);
void bitrv2conj(int n, double *a);
void cdft(int n, int isgn, double *a);
void cftb040(double *a);
void cftb1st(int n, double *a);
void cftbsub(int n, double *a);
void cftexp1(int n, double *a);
void cftexp2(int n, double *a);
void cftf040(double *a);
void cftf081(double *a);
void cftf082(double *a);
void cftf161(double *a);
void cftf162(double *a);
void cftfsub(int n, double *a);
void cftfx41(int n, double *a);
void cftfx42(int n, double *a);
void cftmdl1(int n, double *a);
void cftmdl2(int n, double *a);
void cftrec1(int n, double *a);
void cftrec2(int n, double *a);
void cftx020(double *a);
void dctsub(int n, double *a);
void dctsub4(int n, double *a);
void ddct(int n, int isgn, double *a);
void ddst(int n, int isgn, double *a);
void dfct(int n, double *a);
void dfst(int n, double *a);
void dstsub(int n, double *a);
void dstsub4(int n, double *a);
void rdft(int n, int isgn, double *a);
void rftbsub(int n, double *a);
void rftfsub(int n, double *a);
This diff is collapsed.
/*
Based on "Calculation of PI(= 3.14159...) using FFT and AGM" by T.Ooura, Nov. 1999.
https://github.com/Fibonacci43/SuperPI
Modified for Arduino by Lucas Saavedra Vaz, 2024.
*/
#pragma once
#include <ctype.h>
#define PI_FFTC_VER "ver. LG1.1.2-MP1.5.2a.memsave"
/* Please check the following macros before compiling */
#ifndef DBL_ERROR_MARGIN
#define DBL_ERROR_MARGIN 0.4 /* must be < 0.5 */
#endif
#define DGTINT short int /* sizeof(DGTINT) == 2 */
#define DGTINT_MAX SHRT_MAX
#define DGT_PACK 10
#define DGT_PACK_LINE 5
#define DGT_LINE_BLOCK 20
void pi_calc(int nfft);
void mp_load_0(int n, int radix, int out[]);
void mp_load_1(int n, int radix, int out[]);
void mp_round(int n, int radix, int m, int inout[]);
int mp_cmp(int n, int radix, int in1[], int in2[]);
void mp_add(int n, int radix, int in1[], int in2[], int out[]);
void mp_sub(int n, int radix, int in1[], int in2[], int out[]);
void mp_imul(int n, int radix, int in1[], int in2, int out[]);
int mp_idiv(int n, int radix, int in1[], int in2, int out[]);
void mp_idiv_2(int n, int radix, int in[], int out[]);
double mp_mul_radix_test(int n, int radix, int nfft, double tmpfft[]);
void mp_mul(int n, int radix, int in1[], int in2[], int out[], int tmp[], int nfft, double tmp1fft[], double tmp2fft[], double tmp3fft[]);
void mp_squ(int n, int radix, int in[], int out[], int tmp[], int nfft, double tmp1fft[], double tmp2fft[]);
void mp_mulhf(int n, int radix, int in1[], int in2[], int out[], int tmp[], int nfft, double in1fft[], double tmpfft[]);
void mp_mulhf_use_in1fft(int n, int radix, double in1fft[], int in2[], int out[], int tmp[], int nfft, double tmpfft[]);
void mp_squhf_use_infft(int n, int radix, double infft[], int in[], int out[], int tmp[], int nfft, double tmpfft[]);
void mp_mulh(int n, int radix, int in1[], int in2[], int out[], int nfft, double in1fft[], double outfft[]);
void mp_squh(int n, int radix, int in[], int out[], int nfft, double outfft[]);
int mp_inv(int n, int radix, int in[], int out[], int tmp1[], int tmp2[], int nfft, double tmp1fft[], double tmp2fft[]);
int mp_sqrt(int n, int radix, int in[], int out[], int tmp1[], int tmp2[], int nfft, double tmp1fft[], double tmp2fft[]);
int mp_invisqrt(int n, int radix, int in, int out[], int tmp1[], int tmp2[], int nfft, double tmp1fft[], double tmp2fft[]);
void mp_sprintf(int n, int log10_radix, int in[], char out[]);
void mp_sscanf(int n, int log10_radix, char in[], int out[]);
/*
Based on "Calculation of PI(= 3.14159...) using FFT and AGM" by T.Ooura, Nov. 1999.
https://github.com/Fibonacci43/SuperPI
Modified for Arduino by Lucas Saavedra Vaz, 2024.
*/
#include <Arduino.h>
#include "pi_fftcs.h"
// Number of runs to average
#define N_RUNS 3
// Number of decimal digits to calculate
#define DIGITS (1 << 14)
void setup() {
Serial.begin(115200);
while (!Serial) {
delay(10);
}
log_d("Starting PI calculation");
Serial.printf("Runs: %d\n", N_RUNS);
Serial.printf("Digits: %d\n", DIGITS);
Serial.flush();
for (int i = 0; i < N_RUNS; i++) {
Serial.printf("Run %d", i);
unsigned long start = millis();
pi_calc(DIGITS);
unsigned long elapsed = millis() - start;
Serial.printf("Time: %lu.%03lu s\n", elapsed / 1000, elapsed % 1000);
Serial.flush();
}
log_d("PI calculation test done");
}
void loop() {
vTaskDelete(NULL);
}
import json
import logging
import os
def test_superpi(dut, request):
LOGGER = logging.getLogger(__name__)
# Match "Runs: %d"
res = dut.expect(r"Runs: (\d+)", timeout=60)
runs = int(res.group(0).decode("utf-8").split(" ")[1])
LOGGER.info("Number of runs: {}".format(runs))
# Match "Digits: %d"
res = dut.expect(r"Digits: (\d+)", timeout=60)
digits = int(res.group(0).decode("utf-8").split(" ")[1])
LOGGER.info("Number of decimal digits: {}".format(digits))
list_time = []
for i in range(runs):
# Match "Run %d"
res = dut.expect(r"Run (\d+)", timeout=120)
run = int(res.group(0).decode("utf-8").split(" ")[1])
LOGGER.info("Run {}".format(run))
assert run == i, "Invalid run number"
# Match "Time: %lu.%03lu s"
res = dut.expect(r"Time: (\d+)\.(\d+) s", timeout=300)
time = float(res.group(0).decode("utf-8").split(" ")[1])
LOGGER.info("Time on run {}: {} s".format(i, time))
assert time > 0 and time < 1000, "Invalid time"
list_time.append(time)
avg_time = round(sum(list_time) / len(list_time), 3)
# Create JSON with results and write it to file
# Always create a JSON with this format (so it can be merged later on):
# { TEST_NAME_STR: TEST_RESULTS_DICT }
results = {"superpi": {"runs": runs, "digits": digits, "avg_time": avg_time}}
current_folder = os.path.dirname(request.path)
file_index = 0
report_file = os.path.join(current_folder, "result_superpi" + str(file_index) + ".json")
while os.path.exists(report_file):
report_file = report_file.replace(str(file_index) + ".json", str(file_index + 1) + ".json")
file_index += 1
with open(report_file, "w") as f:
try:
f.write(json.dumps(results))
except Exception as e:
LOGGER.warning("Failed to write results to file: {}".format(e))
cryptography>=2.1.4
--only-binary cryptography
pytest-cov
pytest-embedded-serial-esp>=1.3.4
pytest-embedded-arduino>=1.3.4
pytest-embedded-serial-esp>=1.10.0
pytest-embedded-arduino>=1.10.0
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment