forked from torvalds/linux
-
Notifications
You must be signed in to change notification settings - Fork 13
Closed
Description
Hi,
new optimization introduced around 2018.09 release merges short memory accesses. This is an issue for existing arc700 code at least. For example - u-boot. That codebase does not rely much on volatiles and pointers to complex structures are often passed as uchar*.
Perhaps such optimization should not be applied to packed structures?
(as a side note, even Linux might suffer is it would fire unaligned exception handler for disassembling such unaligned accesses more often - all performance gains might be lost in a worst case).
Here are some pieces from u-boot use-case:
struct ip_udp_hdr {
u8 ip_hl_v; /* header length and version */
u8 ip_tos; /* type of service */
u16 ip_len; /* total length */
u16 ip_id; /* identification */
u16 ip_off; /* fragment offset field */
u8 ip_ttl; /* time to live */
u8 ip_p; /* protocol */
u16 ip_sum; /* checksum */
struct in_addr ip_src; /* Source IP address */
struct in_addr ip_dst; /* Destination IP address */
u16 udp_src; /* UDP source port */
u16 udp_dst; /* UDP destination port */
u16 udp_len; /* Length of UDP packet */
u16 udp_xsum; /* Checksum */
} __attribute__((packed));void net_set_ip_header(uchar *pkt, struct in_addr dest, struct in_addr source,
u16 pkt_len, u8 proto)
{
struct ip_udp_hdr *ip = (struct ip_udp_hdr *)pkt;
/*
* Construct an IP header.
*/
/* IP_HDR_SIZE / 4 (not including UDP) */
ip->ip_hl_v = 0x45;
ip->ip_tos = 0;
ip->ip_len = htons(pkt_len);
ip->ip_p = proto;
ip->ip_id = htons(net_ip_id++);
ip->ip_off = htons(IP_FLAGS_DFRAG); /* Don't fragment */
ip->ip_ttl = 255; // <======
ip->ip_sum = 0;
/* already in network byte order */
net_copy_ip((void *)&ip->ip_src, &source);
/* already in network byte order */
net_copy_ip((void *)&ip->ip_dst, &dest);
ip->ip_sum = compute_ip_checksum(ip, IP_HDR_SIZE);
}/usr/local/arcgcc821/lib/gcc/arc-snps-linux-uclibc/8.2.1/include -pipe -D__ARC__ -mA7 \
-mno-sdata -Os -g -mvolatile-cache -fomit-frame-pointer \
-I/home/avinashp/bbic6/perforce/u-boot/main/u-boot-2009.06/board/ruby \
-Wall -Wstrict-prototypes -fno-stack-protector -c -o net.o net.cOld logic preserved if nop is inserted manually :
a8000b70: 208a 0fff mov r0,-1
a8000b74: ad08 stb_s r0,[r13,0x8]
a8000b76: 264a 7000 nop
a8000b7a: d811 mov_s r0,0x11
a8000b7c: ad09 stb_s r0,[r13,0x9]And this is how things are merged now (sorry, don't have bigger context):
a8000b50: 1d08 1f80 0000 11ff st 0x11ff,[r13,8]Regards,
Igor.