From 5cc08ed2e9c465e0e8290410fc0a10e5737fc058 Mon Sep 17 00:00:00 2001
From: "B. Watson" <yalhcru@gmail.com>
Date: Sat, 24 Jul 2021 14:08:29 -0400
Subject: Move notes to separate NOTES.txt, add CPU time display

---
 Makefile     |  2 +-
 NOTES.txt    | 42 ++++++++++++++++++++++++++++++++++++++++++
 README.txt   | 34 +---------------------------------
 slowbaud.1   | 36 +-----------------------------------
 slowbaud.c   | 35 ++++++++++++++++++++++++-----------
 slowbaud.rst | 34 ----------------------------------
 test.sh      |  2 +-
 7 files changed, 70 insertions(+), 115 deletions(-)
 create mode 100644 NOTES.txt

diff --git a/Makefile b/Makefile
index d0efe49..0251e5e 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@ PROJ=slowbaud
 
 CC=gcc
 CFLAGS=-Wall -O2 $(EXTRACFLAGS)
-LIBS=-lutil
+LIBS=-lm -lutil
 RST2MAN=rst2man.py
 
 PREFIX=/usr
diff --git a/NOTES.txt b/NOTES.txt
new file mode 100644
index 0000000..1efacd1
--- /dev/null
+++ b/NOTES.txt
@@ -0,0 +1,42 @@
+The timing code works by calculating how long to sleep after each
+character (in microseconds), but actually sleeping slightly less
+than that, then busy-waiting until the rest of the interval expires.
+At slower bitrates, this works well, and the CPU overhead is barely
+noticeable (at least on reasonably fast modern systems).
+
+Timing accuracy depends on your OS, kernel config (HZ and/or NO_HZ
+on Linux), and system load. Also on the amount of data, since the
+timing loop is self-regulating (the first few bytes will have less
+accurate timing than later ones). No "fancy" techniques like realtime
+scheduling or hardware event timers are used. At bitrates up to
+115200, on an unloaded Linux system, the timing should be at least
+99.9% accurate. At higher bitrates, accuracy will decrease.
+
+Timing is more accurate on Linux than OSX. It's done with getitimer()
+and sigwait(). This works out to be slightly more accurate than
+using usleep() on both Linux and OSX. It would be possible to use
+the realtime timer_create() and clock_gettime() API on Linux, for
+possibly even better accuracy, but OSX doesn't have these (and I want to be
+portable). On an unloaded OSX system, the accuracy gets steadily worse
+as you go above 57600bps. There's also more CPU overhead on OSX.
+
+getitimer() and gettimeofday() only have microsecond precision.
+slowbaud does better than this by calculating the delay interval to
+1/100 of a microsecond, then adding 1us to the delay time that many
+times out of 100. For instance, 115200bps is an 86.81us delay. 19
+times out of 100, slowbaud sleeps for 86us. The other 81 times, it
+sleeps 87us. This puts the average at 86.81us, as it should be.
+
+If this were a truly useful application, it would be worth trying to
+increase accuracy further, with realtime process scheduling. I didn't
+do this because slowbaud is just a toy, and because the RT stuff tends
+to be unportable and require elevated privileges (root, or something
+like setrtlimit or extended filesystem attributes to manage capabilities).
+
+About the name... I'm aware that "baud" is not synonymous with bps. I
+just think "slowbaud" sounds better than "slowbps", as a name. Anyway
+the stty command on both Linux and OSX misuses the term ("speed
+38400 baud"), as well as the man page for termios(3), so I'm in good
+company.
+
+
diff --git a/README.txt b/README.txt
index 843ac78..15f2427 100644
--- a/README.txt
+++ b/README.txt
@@ -83,38 +83,6 @@ NOTES
        10  to  get  bytes  per  second. This simulates "8-N-1": one start bit, 8 data
        bits, no parity, and 1 stop bit (total of 10 bits per byte).
 
-       The timing code works by calculating how long to sleep  after  each  character
-       (in  microseconds),  but  actually  sleeping  slightly  less  than  that, then
-       busy-waiting until the rest of the interval expires.  At slower bitrates, this
-       works  well, and the CPU overhead is barely noticeable (at least on reasonably
-       fast modern systems).
-
-       Timing accuracy depends on your OS, kernel config (HZ and/or NO_HZ on  Linux),
-       and  system  load.  Also  on  the  amount  of  data,  since the timing loop is
-       self-regulating (the first few bytes will have less accurate timing than later
-       ones). No "fancy" techniques like realtime scheduling or hardware event timers
-       are used. At bitrates up to 115200, on an unloaded Linux  system,  the  timing
-       should be at least 99.9% accurate. At higher bitrates, accuracy will decrease.
-
-       Timing is more accurate on Linux than OSX. It's done with getitimer() and sig‐
-       wait(). This works out to be slightly more accurate  than  using  usleep()  on
-       both  Linux  and  OSX. It would be possible to use the realtime timer_create()
-       and clock_gettime() API on Linux, for possibly even better accuracy,  but  OSX
-       doesn't have these (and I want to be portable). On an unloaded OSX system, the
-       accuracy gets steadily worse as you go above 57600bps. There's also  more  CPU
-       overhead on OSX.
-
-       If  this were a truly useful application, it would be worth trying to increase
-       accuracy further, with realtime process scheduling. I didn't do  this  because
-       slowbaud  is  just  a toy, and because the RT stuff tends to be unportable and
-       require elevated privileges (root, or something like  setrtlimit  or  extended
-       filesystem attributes to manage capabilities).
-
-       About  the  name...  I'm  aware that "baud" is not synonymous with bps. I just
-       think "slowbaud" sounds better than "slowbps", as a name. Anyway the stty com‐
-       mand  on  both Linux and OSX misuses the term ("speed 38400 baud"), as well as
-       the man page for termios(3), so I'm in good company.
-
 BUGS
        With -c, signals aren't handled gracefully.  Window  size  changes  (SIGWINCH)
        don't  get  propagated to the child process, and pressing ^C doesn't interrupt
@@ -124,4 +92,4 @@ COPYRIGHT
        slowbaud is copyright 2021, B. Watson <yalhcru@gmail.com>. Released under  the
        WTFPL. See http://www.wtfpl.net/txt/copying/ for details.
 
-0.0.1                                 2021-07-23                          SLOWBAUD(1)
+0.0.1                                 2021-07-24                          SLOWBAUD(1)
diff --git a/slowbaud.1 b/slowbaud.1
index 543bb03..fb90370 100644
--- a/slowbaud.1
+++ b/slowbaud.1
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH SLOWBAUD 1 "2021-07-23" "0.0.1" "Urchlay's Useless Stuff"
+.TH SLOWBAUD 1 "2021-07-24" "0.0.1" "Urchlay's Useless Stuff"
 .SH NAME
 slowbaud \- simulate a low bitrate serial connection
 .
@@ -131,40 +131,6 @@ I/O is done with byte granularity. For calculation purposes,
 \fI<bits\-per\-sec>\fP is divided by 10 to get bytes per second. This
 simulates "8\-N\-1": one start bit, 8 data bits, no parity, and 1 stop
 bit (total of 10 bits per byte).
-.sp
-The timing code works by calculating how long to sleep after each
-character (in microseconds), but actually sleeping slightly less
-than that, then busy\-waiting until the rest of the interval expires.
-At slower bitrates, this works well, and the CPU overhead is barely
-noticeable (at least on reasonably fast modern systems).
-.sp
-Timing accuracy depends on your OS, kernel config (HZ and/or NO_HZ
-on Linux), and system load. Also on the amount of data, since the
-timing loop is self\-regulating (the first few bytes will have less
-accurate timing than later ones). No "fancy" techniques like realtime
-scheduling or hardware event timers are used. At bitrates up to
-115200, on an unloaded Linux system, the timing should be at least
-99.9% accurate. At higher bitrates, accuracy will decrease.
-.sp
-Timing is more accurate on Linux than OSX. It\(aqs done with getitimer()
-and sigwait(). This works out to be slightly more accurate than
-using usleep() on both Linux and OSX. It would be possible to use
-the realtime timer_create() and clock_gettime() API on Linux, for
-possibly even better accuracy, but OSX doesn\(aqt have these (and I want to be
-portable). On an unloaded OSX system, the accuracy gets steadily worse
-as you go above 57600bps. There\(aqs also more CPU overhead on OSX.
-.sp
-If this were a truly useful application, it would be worth trying to
-increase accuracy further, with realtime process scheduling. I didn\(aqt
-do this because slowbaud is just a toy, and because the RT stuff tends
-to be unportable and require elevated privileges (root, or something
-like setrtlimit or extended filesystem attributes to manage capabilities).
-.sp
-About the name... I\(aqm aware that "baud" is not synonymous with bps. I
-just think "slowbaud" sounds better than "slowbps", as a name. Anyway
-the stty command on both Linux and OSX misuses the term ("speed
-38400 baud"), as well as the man page for termios(3), so I\(aqm in good
-company.
 .SH BUGS
 .sp
 With \fB\-c\fP, signals aren\(aqt handled gracefully. Window size changes
diff --git a/slowbaud.c b/slowbaud.c
index 54bc8de..2cfb138 100644
--- a/slowbaud.c
+++ b/slowbaud.c
@@ -1,6 +1,12 @@
 /* Simulate low bitrate serial connection, like a 1980s modem.
    Author: B. Watson. License: WTFPL. */
 
+
+/* configurables: */
+#define FRACTIONAL_USEC
+#define SHOW_CPU_TIME
+
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
@@ -16,6 +22,7 @@
 #include <signal.h>
 #include <time.h>
 #include <string.h>
+#include <math.h>
 
 /* portability stuff only tested on Linux and OSX. Hope it's OK. */
 #if defined(__FreeBSD__) || defined(HAVE_LIBUTIL_H)
@@ -41,8 +48,6 @@ struct timeval tv;
 struct itimerval itv, itv_disarm;
 sigset_t sigmask;
 
-#define FRACTIONAL_USEC
-
 #ifdef FRACTIONAL_USEC
 #define FRAC_US_DENOM 100
 int frac_us_num;
@@ -115,9 +120,22 @@ void debug_stats(void) {
 		double actual = ((double)outbytes * 10.0L) / elapsed_sec;
 		double offby = 100.0L * (((double)bps / actual) - 1.0L);
 		fprintf(stderr,
-				"outbytes %lu, elapsed_us %lu, tv_usec %lu, requested bps %d (%.2fms), "
+				"outbytes %lu, elapsed_us %lu, tv_usec %lu\nrequested bps %d (%.2fms), "
 				"actual %.2f, accuracy %.2f%%\n",
 				outbytes, elapsed_us, itv.it_value.tv_usec, bps, finterval, actual, 100.0 - offby);
+#ifdef SHOW_CPU_TIME
+		{
+			struct timespec ts;
+			long end_cpu_us;
+			double cpu_pct;
+
+			clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
+			end_cpu_us = ts.tv_sec * 1000000L + ts.tv_nsec / 1000L;
+			cpu_pct = (double)(end_cpu_us) / (double)(elapsed_us) * 100.0L;
+			fprintf(stderr, "CPU usage %luus, %.2f%%\n", end_cpu_us, cpu_pct);
+		}
+#endif
+
 	}
 }
 
@@ -290,18 +308,13 @@ int main(int argc, char **argv) {
 	}
 
 	/* if we used only integer math here, we couldn't support bps not
-      a multiple of 10 (e.g. 75 would be taken as 70).
-      We still have a rounding problem: 115200bps is 86.81ms/char, but
-      it gets rounded down to 86 here. Which would be around 116279bps,
-      or almost 1% too fast. We never reach 100% speed anyway, so the
-      loss from overhead actually offsets the extra bit of speed.
-	 */
+      a multiple of 10 (e.g. 75 would be taken as 70). */
 	interval = (unsigned long)(1000000.0L / ((double)bps / 10.0L));
 
 #ifdef FRACTIONAL_USEC
-	frac_us_num = (double)FRAC_US_DENOM * ((1000000.0L / ((double)bps / 10.0L) - interval));
+	frac_us_num = roundl((double)FRAC_US_DENOM * ((1000000.0L / ((double)bps / 10.0L) - interval)));
 	if(debug) fprintf(stderr, "interval %ld + %d/%d us\n", interval, frac_us_num, FRAC_US_DENOM);
-	// srandom(NOW_USEC()); // don't think we should do this
+	srandom(NOW_USEC());
 #else
 	if(debug) fprintf(stderr, "interval %ld us\n", interval);
 #endif
diff --git a/slowbaud.rst b/slowbaud.rst
index bc6eb5a..c043807 100644
--- a/slowbaud.rst
+++ b/slowbaud.rst
@@ -119,40 +119,6 @@ I/O is done with byte granularity. For calculation purposes,
 simulates "8-N-1": one start bit, 8 data bits, no parity, and 1 stop
 bit (total of 10 bits per byte).
 
-The timing code works by calculating how long to sleep after each
-character (in microseconds), but actually sleeping slightly less
-than that, then busy-waiting until the rest of the interval expires.
-At slower bitrates, this works well, and the CPU overhead is barely
-noticeable (at least on reasonably fast modern systems).
-
-Timing accuracy depends on your OS, kernel config (HZ and/or NO_HZ
-on Linux), and system load. Also on the amount of data, since the
-timing loop is self-regulating (the first few bytes will have less
-accurate timing than later ones). No "fancy" techniques like realtime
-scheduling or hardware event timers are used. At bitrates up to
-115200, on an unloaded Linux system, the timing should be at least
-99.9% accurate. At higher bitrates, accuracy will decrease.
-
-Timing is more accurate on Linux than OSX. It's done with getitimer()
-and sigwait(). This works out to be slightly more accurate than
-using usleep() on both Linux and OSX. It would be possible to use
-the realtime timer_create() and clock_gettime() API on Linux, for
-possibly even better accuracy, but OSX doesn't have these (and I want to be
-portable). On an unloaded OSX system, the accuracy gets steadily worse
-as you go above 57600bps. There's also more CPU overhead on OSX.
-
-If this were a truly useful application, it would be worth trying to
-increase accuracy further, with realtime process scheduling. I didn't
-do this because slowbaud is just a toy, and because the RT stuff tends
-to be unportable and require elevated privileges (root, or something
-like setrtlimit or extended filesystem attributes to manage capabilities).
-
-About the name... I'm aware that "baud" is not synonymous with bps. I
-just think "slowbaud" sounds better than "slowbps", as a name. Anyway
-the stty command on both Linux and OSX misuses the term ("speed
-38400 baud"), as well as the man page for termios(3), so I'm in good
-company.
-
 BUGS
 ====
 
diff --git a/test.sh b/test.sh
index 85fc4bc..ef7581f 100644
--- a/test.sh
+++ b/test.sh
@@ -5,6 +5,6 @@ bytes=${1:-100}
 make &>/dev/null
 
 for i in 300 1200 2400 4800 9600 19200 33600 57600 115200; do
-	SLOWBAUD_DEBUG=1 time -p ./slowbaud $i -b $bytes
+	SLOWBAUD_DEBUG=1 ./slowbaud $i -b $bytes
 	echo
 done 2>&1 | tee test.out
-- 
cgit v1.2.3