]> git.itanic.dy.fi Git - linux-stable/blob - net/smc/smc_core.c
454356771cda55b10c496cbebc470c273d4c56ea
[linux-stable] / net / smc / smc_core.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4  *
5  *  Basic Transport Functions exploiting Infiniband API
6  *
7  *  Copyright IBM Corp. 2016
8  *
9  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
10  */
11
12 #include <linux/socket.h>
13 #include <linux/if_vlan.h>
14 #include <linux/random.h>
15 #include <linux/workqueue.h>
16 #include <linux/wait.h>
17 #include <linux/reboot.h>
18 #include <linux/mutex.h>
19 #include <linux/list.h>
20 #include <linux/smc.h>
21 #include <net/tcp.h>
22 #include <net/sock.h>
23 #include <rdma/ib_verbs.h>
24 #include <rdma/ib_cache.h>
25
26 #include "smc.h"
27 #include "smc_clc.h"
28 #include "smc_core.h"
29 #include "smc_ib.h"
30 #include "smc_wr.h"
31 #include "smc_llc.h"
32 #include "smc_cdc.h"
33 #include "smc_close.h"
34 #include "smc_ism.h"
35 #include "smc_netlink.h"
36 #include "smc_stats.h"
37 #include "smc_tracepoint.h"
38
39 #define SMC_LGR_NUM_INCR                256
40 #define SMC_LGR_FREE_DELAY_SERV         (600 * HZ)
41 #define SMC_LGR_FREE_DELAY_CLNT         (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
42
43 struct smc_lgr_list smc_lgr_list = {    /* established link groups */
44         .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
45         .list = LIST_HEAD_INIT(smc_lgr_list.list),
46         .num = 0,
47 };
48
49 static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
50 static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
51
52 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
53                          struct smc_buf_desc *buf_desc);
54 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
55
56 static void smc_link_down_work(struct work_struct *work);
57
58 /* return head of link group list and its lock for a given link group */
59 static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
60                                                   spinlock_t **lgr_lock)
61 {
62         if (lgr->is_smcd) {
63                 *lgr_lock = &lgr->smcd->lgr_lock;
64                 return &lgr->smcd->lgr_list;
65         }
66
67         *lgr_lock = &smc_lgr_list.lock;
68         return &smc_lgr_list.list;
69 }
70
71 static void smc_ibdev_cnt_inc(struct smc_link *lnk)
72 {
73         atomic_inc(&lnk->smcibdev->lnk_cnt_by_port[lnk->ibport - 1]);
74 }
75
76 static void smc_ibdev_cnt_dec(struct smc_link *lnk)
77 {
78         atomic_dec(&lnk->smcibdev->lnk_cnt_by_port[lnk->ibport - 1]);
79 }
80
81 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
82 {
83         /* client link group creation always follows the server link group
84          * creation. For client use a somewhat higher removal delay time,
85          * otherwise there is a risk of out-of-sync link groups.
86          */
87         if (!lgr->freeing) {
88                 mod_delayed_work(system_wq, &lgr->free_work,
89                                  (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
90                                                 SMC_LGR_FREE_DELAY_CLNT :
91                                                 SMC_LGR_FREE_DELAY_SERV);
92         }
93 }
94
95 /* Register connection's alert token in our lookup structure.
96  * To use rbtrees we have to implement our own insert core.
97  * Requires @conns_lock
98  * @smc         connection to register
99  * Returns 0 on success, != otherwise.
100  */
101 static void smc_lgr_add_alert_token(struct smc_connection *conn)
102 {
103         struct rb_node **link, *parent = NULL;
104         u32 token = conn->alert_token_local;
105
106         link = &conn->lgr->conns_all.rb_node;
107         while (*link) {
108                 struct smc_connection *cur = rb_entry(*link,
109                                         struct smc_connection, alert_node);
110
111                 parent = *link;
112                 if (cur->alert_token_local > token)
113                         link = &parent->rb_left;
114                 else
115                         link = &parent->rb_right;
116         }
117         /* Put the new node there */
118         rb_link_node(&conn->alert_node, parent, link);
119         rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
120 }
121
122 /* assign an SMC-R link to the connection */
123 static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first)
124 {
125         enum smc_link_state expected = first ? SMC_LNK_ACTIVATING :
126                                        SMC_LNK_ACTIVE;
127         int i, j;
128
129         /* do link balancing */
130         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
131                 struct smc_link *lnk = &conn->lgr->lnk[i];
132
133                 if (lnk->state != expected || lnk->link_is_asym)
134                         continue;
135                 if (conn->lgr->role == SMC_CLNT) {
136                         conn->lnk = lnk; /* temporary, SMC server assigns link*/
137                         break;
138                 }
139                 if (conn->lgr->conns_num % 2) {
140                         for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) {
141                                 struct smc_link *lnk2;
142
143                                 lnk2 = &conn->lgr->lnk[j];
144                                 if (lnk2->state == expected &&
145                                     !lnk2->link_is_asym) {
146                                         conn->lnk = lnk2;
147                                         break;
148                                 }
149                         }
150                 }
151                 if (!conn->lnk)
152                         conn->lnk = lnk;
153                 break;
154         }
155         if (!conn->lnk)
156                 return SMC_CLC_DECL_NOACTLINK;
157         atomic_inc(&conn->lnk->conn_cnt);
158         return 0;
159 }
160
161 /* Register connection in link group by assigning an alert token
162  * registered in a search tree.
163  * Requires @conns_lock
164  * Note that '0' is a reserved value and not assigned.
165  */
166 static int smc_lgr_register_conn(struct smc_connection *conn, bool first)
167 {
168         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
169         static atomic_t nexttoken = ATOMIC_INIT(0);
170         int rc;
171
172         if (!conn->lgr->is_smcd) {
173                 rc = smcr_lgr_conn_assign_link(conn, first);
174                 if (rc) {
175                         conn->lgr = NULL;
176                         return rc;
177                 }
178         }
179         /* find a new alert_token_local value not yet used by some connection
180          * in this link group
181          */
182         sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
183         while (!conn->alert_token_local) {
184                 conn->alert_token_local = atomic_inc_return(&nexttoken);
185                 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
186                         conn->alert_token_local = 0;
187         }
188         smc_lgr_add_alert_token(conn);
189         conn->lgr->conns_num++;
190         return 0;
191 }
192
193 /* Unregister connection and reset the alert token of the given connection<
194  */
195 static void __smc_lgr_unregister_conn(struct smc_connection *conn)
196 {
197         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
198         struct smc_link_group *lgr = conn->lgr;
199
200         rb_erase(&conn->alert_node, &lgr->conns_all);
201         if (conn->lnk)
202                 atomic_dec(&conn->lnk->conn_cnt);
203         lgr->conns_num--;
204         conn->alert_token_local = 0;
205         sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
206 }
207
208 /* Unregister connection from lgr
209  */
210 static void smc_lgr_unregister_conn(struct smc_connection *conn)
211 {
212         struct smc_link_group *lgr = conn->lgr;
213
214         if (!smc_conn_lgr_valid(conn))
215                 return;
216         write_lock_bh(&lgr->conns_lock);
217         if (conn->alert_token_local) {
218                 __smc_lgr_unregister_conn(conn);
219         }
220         write_unlock_bh(&lgr->conns_lock);
221 }
222
223 int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
224 {
225         struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
226         char hostname[SMC_MAX_HOSTNAME_LEN + 1];
227         char smc_seid[SMC_MAX_EID_LEN + 1];
228         struct nlattr *attrs;
229         u8 *seid = NULL;
230         u8 *host = NULL;
231         void *nlh;
232
233         nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
234                           &smc_gen_nl_family, NLM_F_MULTI,
235                           SMC_NETLINK_GET_SYS_INFO);
236         if (!nlh)
237                 goto errmsg;
238         if (cb_ctx->pos[0])
239                 goto errout;
240         attrs = nla_nest_start(skb, SMC_GEN_SYS_INFO);
241         if (!attrs)
242                 goto errout;
243         if (nla_put_u8(skb, SMC_NLA_SYS_VER, SMC_V2))
244                 goto errattr;
245         if (nla_put_u8(skb, SMC_NLA_SYS_REL, SMC_RELEASE))
246                 goto errattr;
247         if (nla_put_u8(skb, SMC_NLA_SYS_IS_ISM_V2, smc_ism_is_v2_capable()))
248                 goto errattr;
249         if (nla_put_u8(skb, SMC_NLA_SYS_IS_SMCR_V2, true))
250                 goto errattr;
251         smc_clc_get_hostname(&host);
252         if (host) {
253                 memcpy(hostname, host, SMC_MAX_HOSTNAME_LEN);
254                 hostname[SMC_MAX_HOSTNAME_LEN] = 0;
255                 if (nla_put_string(skb, SMC_NLA_SYS_LOCAL_HOST, hostname))
256                         goto errattr;
257         }
258         if (smc_ism_is_v2_capable()) {
259                 smc_ism_get_system_eid(&seid);
260                 memcpy(smc_seid, seid, SMC_MAX_EID_LEN);
261                 smc_seid[SMC_MAX_EID_LEN] = 0;
262                 if (nla_put_string(skb, SMC_NLA_SYS_SEID, smc_seid))
263                         goto errattr;
264         }
265         nla_nest_end(skb, attrs);
266         genlmsg_end(skb, nlh);
267         cb_ctx->pos[0] = 1;
268         return skb->len;
269
270 errattr:
271         nla_nest_cancel(skb, attrs);
272 errout:
273         genlmsg_cancel(skb, nlh);
274 errmsg:
275         return skb->len;
276 }
277
278 /* Fill SMC_NLA_LGR_D_V2_COMMON/SMC_NLA_LGR_R_V2_COMMON nested attributes */
279 static int smc_nl_fill_lgr_v2_common(struct smc_link_group *lgr,
280                                      struct sk_buff *skb,
281                                      struct netlink_callback *cb,
282                                      struct nlattr *v2_attrs)
283 {
284         char smc_host[SMC_MAX_HOSTNAME_LEN + 1];
285         char smc_eid[SMC_MAX_EID_LEN + 1];
286
287         if (nla_put_u8(skb, SMC_NLA_LGR_V2_VER, lgr->smc_version))
288                 goto errv2attr;
289         if (nla_put_u8(skb, SMC_NLA_LGR_V2_REL, lgr->peer_smc_release))
290                 goto errv2attr;
291         if (nla_put_u8(skb, SMC_NLA_LGR_V2_OS, lgr->peer_os))
292                 goto errv2attr;
293         memcpy(smc_host, lgr->peer_hostname, SMC_MAX_HOSTNAME_LEN);
294         smc_host[SMC_MAX_HOSTNAME_LEN] = 0;
295         if (nla_put_string(skb, SMC_NLA_LGR_V2_PEER_HOST, smc_host))
296                 goto errv2attr;
297         memcpy(smc_eid, lgr->negotiated_eid, SMC_MAX_EID_LEN);
298         smc_eid[SMC_MAX_EID_LEN] = 0;
299         if (nla_put_string(skb, SMC_NLA_LGR_V2_NEG_EID, smc_eid))
300                 goto errv2attr;
301
302         nla_nest_end(skb, v2_attrs);
303         return 0;
304
305 errv2attr:
306         nla_nest_cancel(skb, v2_attrs);
307         return -EMSGSIZE;
308 }
309
310 static int smc_nl_fill_smcr_lgr_v2(struct smc_link_group *lgr,
311                                    struct sk_buff *skb,
312                                    struct netlink_callback *cb)
313 {
314         struct nlattr *v2_attrs;
315
316         v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2);
317         if (!v2_attrs)
318                 goto errattr;
319         if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_DIRECT, !lgr->uses_gateway))
320                 goto errv2attr;
321
322         nla_nest_end(skb, v2_attrs);
323         return 0;
324
325 errv2attr:
326         nla_nest_cancel(skb, v2_attrs);
327 errattr:
328         return -EMSGSIZE;
329 }
330
331 static int smc_nl_fill_lgr(struct smc_link_group *lgr,
332                            struct sk_buff *skb,
333                            struct netlink_callback *cb)
334 {
335         char smc_target[SMC_MAX_PNETID_LEN + 1];
336         struct nlattr *attrs, *v2_attrs;
337
338         attrs = nla_nest_start(skb, SMC_GEN_LGR_SMCR);
339         if (!attrs)
340                 goto errout;
341
342         if (nla_put_u32(skb, SMC_NLA_LGR_R_ID, *((u32 *)&lgr->id)))
343                 goto errattr;
344         if (nla_put_u32(skb, SMC_NLA_LGR_R_CONNS_NUM, lgr->conns_num))
345                 goto errattr;
346         if (nla_put_u8(skb, SMC_NLA_LGR_R_ROLE, lgr->role))
347                 goto errattr;
348         if (nla_put_u8(skb, SMC_NLA_LGR_R_TYPE, lgr->type))
349                 goto errattr;
350         if (nla_put_u8(skb, SMC_NLA_LGR_R_BUF_TYPE, lgr->buf_type))
351                 goto errattr;
352         if (nla_put_u8(skb, SMC_NLA_LGR_R_VLAN_ID, lgr->vlan_id))
353                 goto errattr;
354         if (nla_put_u64_64bit(skb, SMC_NLA_LGR_R_NET_COOKIE,
355                               lgr->net->net_cookie, SMC_NLA_LGR_R_PAD))
356                 goto errattr;
357         memcpy(smc_target, lgr->pnet_id, SMC_MAX_PNETID_LEN);
358         smc_target[SMC_MAX_PNETID_LEN] = 0;
359         if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target))
360                 goto errattr;
361         if (lgr->smc_version > SMC_V1) {
362                 v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2_COMMON);
363                 if (!v2_attrs)
364                         goto errattr;
365                 if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs))
366                         goto errattr;
367                 if (smc_nl_fill_smcr_lgr_v2(lgr, skb, cb))
368                         goto errattr;
369         }
370
371         nla_nest_end(skb, attrs);
372         return 0;
373 errattr:
374         nla_nest_cancel(skb, attrs);
375 errout:
376         return -EMSGSIZE;
377 }
378
379 static int smc_nl_fill_lgr_link(struct smc_link_group *lgr,
380                                 struct smc_link *link,
381                                 struct sk_buff *skb,
382                                 struct netlink_callback *cb)
383 {
384         char smc_ibname[IB_DEVICE_NAME_MAX];
385         u8 smc_gid_target[41];
386         struct nlattr *attrs;
387         u32 link_uid = 0;
388         void *nlh;
389
390         nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
391                           &smc_gen_nl_family, NLM_F_MULTI,
392                           SMC_NETLINK_GET_LINK_SMCR);
393         if (!nlh)
394                 goto errmsg;
395
396         attrs = nla_nest_start(skb, SMC_GEN_LINK_SMCR);
397         if (!attrs)
398                 goto errout;
399
400         if (nla_put_u8(skb, SMC_NLA_LINK_ID, link->link_id))
401                 goto errattr;
402         if (nla_put_u32(skb, SMC_NLA_LINK_STATE, link->state))
403                 goto errattr;
404         if (nla_put_u32(skb, SMC_NLA_LINK_CONN_CNT,
405                         atomic_read(&link->conn_cnt)))
406                 goto errattr;
407         if (nla_put_u8(skb, SMC_NLA_LINK_IB_PORT, link->ibport))
408                 goto errattr;
409         if (nla_put_u32(skb, SMC_NLA_LINK_NET_DEV, link->ndev_ifidx))
410                 goto errattr;
411         snprintf(smc_ibname, sizeof(smc_ibname), "%s", link->ibname);
412         if (nla_put_string(skb, SMC_NLA_LINK_IB_DEV, smc_ibname))
413                 goto errattr;
414         memcpy(&link_uid, link->link_uid, sizeof(link_uid));
415         if (nla_put_u32(skb, SMC_NLA_LINK_UID, link_uid))
416                 goto errattr;
417         memcpy(&link_uid, link->peer_link_uid, sizeof(link_uid));
418         if (nla_put_u32(skb, SMC_NLA_LINK_PEER_UID, link_uid))
419                 goto errattr;
420         memset(smc_gid_target, 0, sizeof(smc_gid_target));
421         smc_gid_be16_convert(smc_gid_target, link->gid);
422         if (nla_put_string(skb, SMC_NLA_LINK_GID, smc_gid_target))
423                 goto errattr;
424         memset(smc_gid_target, 0, sizeof(smc_gid_target));
425         smc_gid_be16_convert(smc_gid_target, link->peer_gid);
426         if (nla_put_string(skb, SMC_NLA_LINK_PEER_GID, smc_gid_target))
427                 goto errattr;
428
429         nla_nest_end(skb, attrs);
430         genlmsg_end(skb, nlh);
431         return 0;
432 errattr:
433         nla_nest_cancel(skb, attrs);
434 errout:
435         genlmsg_cancel(skb, nlh);
436 errmsg:
437         return -EMSGSIZE;
438 }
439
440 static int smc_nl_handle_lgr(struct smc_link_group *lgr,
441                              struct sk_buff *skb,
442                              struct netlink_callback *cb,
443                              bool list_links)
444 {
445         void *nlh;
446         int i;
447
448         nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
449                           &smc_gen_nl_family, NLM_F_MULTI,
450                           SMC_NETLINK_GET_LGR_SMCR);
451         if (!nlh)
452                 goto errmsg;
453         if (smc_nl_fill_lgr(lgr, skb, cb))
454                 goto errout;
455
456         genlmsg_end(skb, nlh);
457         if (!list_links)
458                 goto out;
459         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
460                 if (!smc_link_usable(&lgr->lnk[i]))
461                         continue;
462                 if (smc_nl_fill_lgr_link(lgr, &lgr->lnk[i], skb, cb))
463                         goto errout;
464         }
465 out:
466         return 0;
467
468 errout:
469         genlmsg_cancel(skb, nlh);
470 errmsg:
471         return -EMSGSIZE;
472 }
473
474 static void smc_nl_fill_lgr_list(struct smc_lgr_list *smc_lgr,
475                                  struct sk_buff *skb,
476                                  struct netlink_callback *cb,
477                                  bool list_links)
478 {
479         struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
480         struct smc_link_group *lgr;
481         int snum = cb_ctx->pos[0];
482         int num = 0;
483
484         spin_lock_bh(&smc_lgr->lock);
485         list_for_each_entry(lgr, &smc_lgr->list, list) {
486                 if (num < snum)
487                         goto next;
488                 if (smc_nl_handle_lgr(lgr, skb, cb, list_links))
489                         goto errout;
490 next:
491                 num++;
492         }
493 errout:
494         spin_unlock_bh(&smc_lgr->lock);
495         cb_ctx->pos[0] = num;
496 }
497
498 static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
499                                 struct sk_buff *skb,
500                                 struct netlink_callback *cb)
501 {
502         char smc_pnet[SMC_MAX_PNETID_LEN + 1];
503         struct smcd_dev *smcd = lgr->smcd;
504         struct nlattr *attrs;
505         void *nlh;
506
507         nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
508                           &smc_gen_nl_family, NLM_F_MULTI,
509                           SMC_NETLINK_GET_LGR_SMCD);
510         if (!nlh)
511                 goto errmsg;
512
513         attrs = nla_nest_start(skb, SMC_GEN_LGR_SMCD);
514         if (!attrs)
515                 goto errout;
516
517         if (nla_put_u32(skb, SMC_NLA_LGR_D_ID, *((u32 *)&lgr->id)))
518                 goto errattr;
519         if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_GID,
520                               smcd->ops->get_local_gid(smcd),
521                                   SMC_NLA_LGR_D_PAD))
522                 goto errattr;
523         if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_GID, lgr->peer_gid,
524                               SMC_NLA_LGR_D_PAD))
525                 goto errattr;
526         if (nla_put_u8(skb, SMC_NLA_LGR_D_VLAN_ID, lgr->vlan_id))
527                 goto errattr;
528         if (nla_put_u32(skb, SMC_NLA_LGR_D_CONNS_NUM, lgr->conns_num))
529                 goto errattr;
530         if (nla_put_u32(skb, SMC_NLA_LGR_D_CHID, smc_ism_get_chid(lgr->smcd)))
531                 goto errattr;
532         memcpy(smc_pnet, lgr->smcd->pnetid, SMC_MAX_PNETID_LEN);
533         smc_pnet[SMC_MAX_PNETID_LEN] = 0;
534         if (nla_put_string(skb, SMC_NLA_LGR_D_PNETID, smc_pnet))
535                 goto errattr;
536         if (lgr->smc_version > SMC_V1) {
537                 struct nlattr *v2_attrs;
538
539                 v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_D_V2_COMMON);
540                 if (!v2_attrs)
541                         goto errattr;
542                 if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs))
543                         goto errattr;
544         }
545         nla_nest_end(skb, attrs);
546         genlmsg_end(skb, nlh);
547         return 0;
548
549 errattr:
550         nla_nest_cancel(skb, attrs);
551 errout:
552         genlmsg_cancel(skb, nlh);
553 errmsg:
554         return -EMSGSIZE;
555 }
556
557 static int smc_nl_handle_smcd_lgr(struct smcd_dev *dev,
558                                   struct sk_buff *skb,
559                                   struct netlink_callback *cb)
560 {
561         struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
562         struct smc_link_group *lgr;
563         int snum = cb_ctx->pos[1];
564         int rc = 0, num = 0;
565
566         spin_lock_bh(&dev->lgr_lock);
567         list_for_each_entry(lgr, &dev->lgr_list, list) {
568                 if (!lgr->is_smcd)
569                         continue;
570                 if (num < snum)
571                         goto next;
572                 rc = smc_nl_fill_smcd_lgr(lgr, skb, cb);
573                 if (rc)
574                         goto errout;
575 next:
576                 num++;
577         }
578 errout:
579         spin_unlock_bh(&dev->lgr_lock);
580         cb_ctx->pos[1] = num;
581         return rc;
582 }
583
584 static int smc_nl_fill_smcd_dev(struct smcd_dev_list *dev_list,
585                                 struct sk_buff *skb,
586                                 struct netlink_callback *cb)
587 {
588         struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
589         struct smcd_dev *smcd_dev;
590         int snum = cb_ctx->pos[0];
591         int rc = 0, num = 0;
592
593         mutex_lock(&dev_list->mutex);
594         list_for_each_entry(smcd_dev, &dev_list->list, list) {
595                 if (list_empty(&smcd_dev->lgr_list))
596                         continue;
597                 if (num < snum)
598                         goto next;
599                 rc = smc_nl_handle_smcd_lgr(smcd_dev, skb, cb);
600                 if (rc)
601                         goto errout;
602 next:
603                 num++;
604         }
605 errout:
606         mutex_unlock(&dev_list->mutex);
607         cb_ctx->pos[0] = num;
608         return rc;
609 }
610
611 int smcr_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb)
612 {
613         bool list_links = false;
614
615         smc_nl_fill_lgr_list(&smc_lgr_list, skb, cb, list_links);
616         return skb->len;
617 }
618
619 int smcr_nl_get_link(struct sk_buff *skb, struct netlink_callback *cb)
620 {
621         bool list_links = true;
622
623         smc_nl_fill_lgr_list(&smc_lgr_list, skb, cb, list_links);
624         return skb->len;
625 }
626
627 int smcd_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb)
628 {
629         smc_nl_fill_smcd_dev(&smcd_dev_list, skb, cb);
630         return skb->len;
631 }
632
633 void smc_lgr_cleanup_early(struct smc_link_group *lgr)
634 {
635         spinlock_t *lgr_lock;
636
637         if (!lgr)
638                 return;
639
640         smc_lgr_list_head(lgr, &lgr_lock);
641         spin_lock_bh(lgr_lock);
642         /* do not use this link group for new connections */
643         if (!list_empty(&lgr->list))
644                 list_del_init(&lgr->list);
645         spin_unlock_bh(lgr_lock);
646         __smc_lgr_terminate(lgr, true);
647 }
648
649 static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
650 {
651         int i;
652
653         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
654                 struct smc_link *lnk = &lgr->lnk[i];
655
656                 if (smc_link_sendable(lnk))
657                         lnk->state = SMC_LNK_INACTIVE;
658         }
659         wake_up_all(&lgr->llc_msg_waiter);
660         wake_up_all(&lgr->llc_flow_waiter);
661 }
662
663 static void smc_lgr_free(struct smc_link_group *lgr);
664
665 static void smc_lgr_free_work(struct work_struct *work)
666 {
667         struct smc_link_group *lgr = container_of(to_delayed_work(work),
668                                                   struct smc_link_group,
669                                                   free_work);
670         spinlock_t *lgr_lock;
671         bool conns;
672
673         smc_lgr_list_head(lgr, &lgr_lock);
674         spin_lock_bh(lgr_lock);
675         if (lgr->freeing) {
676                 spin_unlock_bh(lgr_lock);
677                 return;
678         }
679         read_lock_bh(&lgr->conns_lock);
680         conns = RB_EMPTY_ROOT(&lgr->conns_all);
681         read_unlock_bh(&lgr->conns_lock);
682         if (!conns) { /* number of lgr connections is no longer zero */
683                 spin_unlock_bh(lgr_lock);
684                 return;
685         }
686         list_del_init(&lgr->list); /* remove from smc_lgr_list */
687         lgr->freeing = 1; /* this instance does the freeing, no new schedule */
688         spin_unlock_bh(lgr_lock);
689         cancel_delayed_work(&lgr->free_work);
690
691         if (!lgr->is_smcd && !lgr->terminating)
692                 smc_llc_send_link_delete_all(lgr, true,
693                                              SMC_LLC_DEL_PROG_INIT_TERM);
694         if (lgr->is_smcd && !lgr->terminating)
695                 smc_ism_signal_shutdown(lgr);
696         if (!lgr->is_smcd)
697                 smcr_lgr_link_deactivate_all(lgr);
698         smc_lgr_free(lgr);
699 }
700
701 static void smc_lgr_terminate_work(struct work_struct *work)
702 {
703         struct smc_link_group *lgr = container_of(work, struct smc_link_group,
704                                                   terminate_work);
705
706         __smc_lgr_terminate(lgr, true);
707 }
708
709 /* return next unique link id for the lgr */
710 static u8 smcr_next_link_id(struct smc_link_group *lgr)
711 {
712         u8 link_id;
713         int i;
714
715         while (1) {
716 again:
717                 link_id = ++lgr->next_link_id;
718                 if (!link_id)   /* skip zero as link_id */
719                         link_id = ++lgr->next_link_id;
720                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
721                         if (smc_link_usable(&lgr->lnk[i]) &&
722                             lgr->lnk[i].link_id == link_id)
723                                 goto again;
724                 }
725                 break;
726         }
727         return link_id;
728 }
729
730 static void smcr_copy_dev_info_to_link(struct smc_link *link)
731 {
732         struct smc_ib_device *smcibdev = link->smcibdev;
733
734         snprintf(link->ibname, sizeof(link->ibname), "%s",
735                  smcibdev->ibdev->name);
736         link->ndev_ifidx = smcibdev->ndev_ifidx[link->ibport - 1];
737 }
738
739 int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
740                    u8 link_idx, struct smc_init_info *ini)
741 {
742         struct smc_ib_device *smcibdev;
743         u8 rndvec[3];
744         int rc;
745
746         if (lgr->smc_version == SMC_V2) {
747                 lnk->smcibdev = ini->smcrv2.ib_dev_v2;
748                 lnk->ibport = ini->smcrv2.ib_port_v2;
749         } else {
750                 lnk->smcibdev = ini->ib_dev;
751                 lnk->ibport = ini->ib_port;
752         }
753         get_device(&lnk->smcibdev->ibdev->dev);
754         atomic_inc(&lnk->smcibdev->lnk_cnt);
755         refcount_set(&lnk->refcnt, 1); /* link refcnt is set to 1 */
756         lnk->clearing = 0;
757         lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu;
758         lnk->link_id = smcr_next_link_id(lgr);
759         lnk->lgr = lgr;
760         smc_lgr_hold(lgr); /* lgr_put in smcr_link_clear() */
761         lnk->link_idx = link_idx;
762         lnk->wr_rx_id_compl = 0;
763         smc_ibdev_cnt_inc(lnk);
764         smcr_copy_dev_info_to_link(lnk);
765         atomic_set(&lnk->conn_cnt, 0);
766         smc_llc_link_set_uid(lnk);
767         INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
768         if (!lnk->smcibdev->initialized) {
769                 rc = (int)smc_ib_setup_per_ibdev(lnk->smcibdev);
770                 if (rc)
771                         goto out;
772         }
773         get_random_bytes(rndvec, sizeof(rndvec));
774         lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
775                 (rndvec[2] << 16);
776         rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
777                                   ini->vlan_id, lnk->gid, &lnk->sgid_index,
778                                   lgr->smc_version == SMC_V2 ?
779                                                   &ini->smcrv2 : NULL);
780         if (rc)
781                 goto out;
782         rc = smc_llc_link_init(lnk);
783         if (rc)
784                 goto out;
785         rc = smc_wr_alloc_link_mem(lnk);
786         if (rc)
787                 goto clear_llc_lnk;
788         rc = smc_ib_create_protection_domain(lnk);
789         if (rc)
790                 goto free_link_mem;
791         rc = smc_ib_create_queue_pair(lnk);
792         if (rc)
793                 goto dealloc_pd;
794         rc = smc_wr_create_link(lnk);
795         if (rc)
796                 goto destroy_qp;
797         lnk->state = SMC_LNK_ACTIVATING;
798         return 0;
799
800 destroy_qp:
801         smc_ib_destroy_queue_pair(lnk);
802 dealloc_pd:
803         smc_ib_dealloc_protection_domain(lnk);
804 free_link_mem:
805         smc_wr_free_link_mem(lnk);
806 clear_llc_lnk:
807         smc_llc_link_clear(lnk, false);
808 out:
809         smc_ibdev_cnt_dec(lnk);
810         put_device(&lnk->smcibdev->ibdev->dev);
811         smcibdev = lnk->smcibdev;
812         memset(lnk, 0, sizeof(struct smc_link));
813         lnk->state = SMC_LNK_UNUSED;
814         if (!atomic_dec_return(&smcibdev->lnk_cnt))
815                 wake_up(&smcibdev->lnks_deleted);
816         smc_lgr_put(lgr); /* lgr_hold above */
817         return rc;
818 }
819
820 /* create a new SMC link group */
821 static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
822 {
823         struct smc_link_group *lgr;
824         struct list_head *lgr_list;
825         struct smcd_dev *smcd;
826         struct smc_link *lnk;
827         spinlock_t *lgr_lock;
828         u8 link_idx;
829         int rc = 0;
830         int i;
831
832         if (ini->is_smcd && ini->vlan_id) {
833                 if (smc_ism_get_vlan(ini->ism_dev[ini->ism_selected],
834                                      ini->vlan_id)) {
835                         rc = SMC_CLC_DECL_ISMVLANERR;
836                         goto out;
837                 }
838         }
839
840         lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
841         if (!lgr) {
842                 rc = SMC_CLC_DECL_MEM;
843                 goto ism_put_vlan;
844         }
845         lgr->tx_wq = alloc_workqueue("smc_tx_wq-%*phN", 0, 0,
846                                      SMC_LGR_ID_SIZE, &lgr->id);
847         if (!lgr->tx_wq) {
848                 rc = -ENOMEM;
849                 goto free_lgr;
850         }
851         lgr->is_smcd = ini->is_smcd;
852         lgr->sync_err = 0;
853         lgr->terminating = 0;
854         lgr->freeing = 0;
855         lgr->vlan_id = ini->vlan_id;
856         refcount_set(&lgr->refcnt, 1); /* set lgr refcnt to 1 */
857         init_rwsem(&lgr->sndbufs_lock);
858         init_rwsem(&lgr->rmbs_lock);
859         rwlock_init(&lgr->conns_lock);
860         for (i = 0; i < SMC_RMBE_SIZES; i++) {
861                 INIT_LIST_HEAD(&lgr->sndbufs[i]);
862                 INIT_LIST_HEAD(&lgr->rmbs[i]);
863         }
864         lgr->next_link_id = 0;
865         smc_lgr_list.num += SMC_LGR_NUM_INCR;
866         memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
867         INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
868         INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work);
869         lgr->conns_all = RB_ROOT;
870         if (ini->is_smcd) {
871                 /* SMC-D specific settings */
872                 smcd = ini->ism_dev[ini->ism_selected];
873                 get_device(smcd->ops->get_dev(smcd));
874                 lgr->peer_gid = ini->ism_peer_gid[ini->ism_selected];
875                 lgr->smcd = ini->ism_dev[ini->ism_selected];
876                 lgr_list = &ini->ism_dev[ini->ism_selected]->lgr_list;
877                 lgr_lock = &lgr->smcd->lgr_lock;
878                 lgr->smc_version = ini->smcd_version;
879                 lgr->peer_shutdown = 0;
880                 atomic_inc(&ini->ism_dev[ini->ism_selected]->lgr_cnt);
881         } else {
882                 /* SMC-R specific settings */
883                 struct smc_ib_device *ibdev;
884                 int ibport;
885
886                 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
887                 lgr->smc_version = ini->smcr_version;
888                 memcpy(lgr->peer_systemid, ini->peer_systemid,
889                        SMC_SYSTEMID_LEN);
890                 if (lgr->smc_version == SMC_V2) {
891                         ibdev = ini->smcrv2.ib_dev_v2;
892                         ibport = ini->smcrv2.ib_port_v2;
893                         lgr->saddr = ini->smcrv2.saddr;
894                         lgr->uses_gateway = ini->smcrv2.uses_gateway;
895                         memcpy(lgr->nexthop_mac, ini->smcrv2.nexthop_mac,
896                                ETH_ALEN);
897                 } else {
898                         ibdev = ini->ib_dev;
899                         ibport = ini->ib_port;
900                 }
901                 memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1],
902                        SMC_MAX_PNETID_LEN);
903                 rc = smc_wr_alloc_lgr_mem(lgr);
904                 if (rc)
905                         goto free_wq;
906                 smc_llc_lgr_init(lgr, smc);
907
908                 link_idx = SMC_SINGLE_LINK;
909                 lnk = &lgr->lnk[link_idx];
910                 rc = smcr_link_init(lgr, lnk, link_idx, ini);
911                 if (rc) {
912                         smc_wr_free_lgr_mem(lgr);
913                         goto free_wq;
914                 }
915                 lgr->net = smc_ib_net(lnk->smcibdev);
916                 lgr_list = &smc_lgr_list.list;
917                 lgr_lock = &smc_lgr_list.lock;
918                 lgr->buf_type = lgr->net->smc.sysctl_smcr_buf_type;
919                 atomic_inc(&lgr_cnt);
920         }
921         smc->conn.lgr = lgr;
922         spin_lock_bh(lgr_lock);
923         list_add_tail(&lgr->list, lgr_list);
924         spin_unlock_bh(lgr_lock);
925         return 0;
926
927 free_wq:
928         destroy_workqueue(lgr->tx_wq);
929 free_lgr:
930         kfree(lgr);
931 ism_put_vlan:
932         if (ini->is_smcd && ini->vlan_id)
933                 smc_ism_put_vlan(ini->ism_dev[ini->ism_selected], ini->vlan_id);
934 out:
935         if (rc < 0) {
936                 if (rc == -ENOMEM)
937                         rc = SMC_CLC_DECL_MEM;
938                 else
939                         rc = SMC_CLC_DECL_INTERR;
940         }
941         return rc;
942 }
943
944 static int smc_write_space(struct smc_connection *conn)
945 {
946         int buffer_len = conn->peer_rmbe_size;
947         union smc_host_cursor prod;
948         union smc_host_cursor cons;
949         int space;
950
951         smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn);
952         smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
953         /* determine rx_buf space */
954         space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod);
955         return space;
956 }
957
958 static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend,
959                              struct smc_wr_buf *wr_buf)
960 {
961         struct smc_connection *conn = &smc->conn;
962         union smc_host_cursor cons, fin;
963         int rc = 0;
964         int diff;
965
966         smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn);
967         smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn);
968         /* set prod cursor to old state, enforce tx_rdma_writes() */
969         smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn);
970         smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
971
972         if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) {
973                 /* cons cursor advanced more than fin, and prod was set
974                  * fin above, so now prod is smaller than cons. Fix that.
975                  */
976                 diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons);
977                 smc_curs_add(conn->sndbuf_desc->len,
978                              &conn->tx_curs_sent, diff);
979                 smc_curs_add(conn->sndbuf_desc->len,
980                              &conn->tx_curs_fin, diff);
981
982                 smp_mb__before_atomic();
983                 atomic_add(diff, &conn->sndbuf_space);
984                 smp_mb__after_atomic();
985
986                 smc_curs_add(conn->peer_rmbe_size,
987                              &conn->local_tx_ctrl.prod, diff);
988                 smc_curs_add(conn->peer_rmbe_size,
989                              &conn->local_tx_ctrl_fin, diff);
990         }
991         /* recalculate, value is used by tx_rdma_writes() */
992         atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn));
993
994         if (smc->sk.sk_state != SMC_INIT &&
995             smc->sk.sk_state != SMC_CLOSED) {
996                 rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf);
997                 if (!rc) {
998                         queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work, 0);
999                         smc->sk.sk_data_ready(&smc->sk);
1000                 }
1001         } else {
1002                 smc_wr_tx_put_slot(conn->lnk,
1003                                    (struct smc_wr_tx_pend_priv *)pend);
1004         }
1005         return rc;
1006 }
1007
1008 void smc_switch_link_and_count(struct smc_connection *conn,
1009                                struct smc_link *to_lnk)
1010 {
1011         atomic_dec(&conn->lnk->conn_cnt);
1012         /* link_hold in smc_conn_create() */
1013         smcr_link_put(conn->lnk);
1014         conn->lnk = to_lnk;
1015         atomic_inc(&conn->lnk->conn_cnt);
1016         /* link_put in smc_conn_free() */
1017         smcr_link_hold(conn->lnk);
1018 }
1019
1020 struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
1021                                   struct smc_link *from_lnk, bool is_dev_err)
1022 {
1023         struct smc_link *to_lnk = NULL;
1024         struct smc_cdc_tx_pend *pend;
1025         struct smc_connection *conn;
1026         struct smc_wr_buf *wr_buf;
1027         struct smc_sock *smc;
1028         struct rb_node *node;
1029         int i, rc = 0;
1030
1031         /* link is inactive, wake up tx waiters */
1032         smc_wr_wakeup_tx_wait(from_lnk);
1033
1034         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1035                 if (!smc_link_active(&lgr->lnk[i]) || i == from_lnk->link_idx)
1036                         continue;
1037                 if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev &&
1038                     from_lnk->ibport == lgr->lnk[i].ibport) {
1039                         continue;
1040                 }
1041                 to_lnk = &lgr->lnk[i];
1042                 break;
1043         }
1044         if (!to_lnk || !smc_wr_tx_link_hold(to_lnk)) {
1045                 smc_lgr_terminate_sched(lgr);
1046                 return NULL;
1047         }
1048 again:
1049         read_lock_bh(&lgr->conns_lock);
1050         for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) {
1051                 conn = rb_entry(node, struct smc_connection, alert_node);
1052                 if (conn->lnk != from_lnk)
1053                         continue;
1054                 smc = container_of(conn, struct smc_sock, conn);
1055                 /* conn->lnk not yet set in SMC_INIT state */
1056                 if (smc->sk.sk_state == SMC_INIT)
1057                         continue;
1058                 if (smc->sk.sk_state == SMC_CLOSED ||
1059                     smc->sk.sk_state == SMC_PEERCLOSEWAIT1 ||
1060                     smc->sk.sk_state == SMC_PEERCLOSEWAIT2 ||
1061                     smc->sk.sk_state == SMC_APPFINCLOSEWAIT ||
1062                     smc->sk.sk_state == SMC_APPCLOSEWAIT1 ||
1063                     smc->sk.sk_state == SMC_APPCLOSEWAIT2 ||
1064                     smc->sk.sk_state == SMC_PEERFINCLOSEWAIT ||
1065                     smc->sk.sk_state == SMC_PEERABORTWAIT ||
1066                     smc->sk.sk_state == SMC_PROCESSABORT) {
1067                         spin_lock_bh(&conn->send_lock);
1068                         smc_switch_link_and_count(conn, to_lnk);
1069                         spin_unlock_bh(&conn->send_lock);
1070                         continue;
1071                 }
1072                 sock_hold(&smc->sk);
1073                 read_unlock_bh(&lgr->conns_lock);
1074                 /* pre-fetch buffer outside of send_lock, might sleep */
1075                 rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend);
1076                 if (rc)
1077                         goto err_out;
1078                 /* avoid race with smcr_tx_sndbuf_nonempty() */
1079                 spin_lock_bh(&conn->send_lock);
1080                 smc_switch_link_and_count(conn, to_lnk);
1081                 rc = smc_switch_cursor(smc, pend, wr_buf);
1082                 spin_unlock_bh(&conn->send_lock);
1083                 sock_put(&smc->sk);
1084                 if (rc)
1085                         goto err_out;
1086                 goto again;
1087         }
1088         read_unlock_bh(&lgr->conns_lock);
1089         smc_wr_tx_link_put(to_lnk);
1090         return to_lnk;
1091
1092 err_out:
1093         smcr_link_down_cond_sched(to_lnk);
1094         smc_wr_tx_link_put(to_lnk);
1095         return NULL;
1096 }
1097
1098 static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
1099                            struct smc_link_group *lgr)
1100 {
1101         struct rw_semaphore *lock;      /* lock buffer list */
1102         int rc;
1103
1104         if (is_rmb && buf_desc->is_conf_rkey && !list_empty(&lgr->list)) {
1105                 /* unregister rmb with peer */
1106                 rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
1107                 if (!rc) {
1108                         /* protect against smc_llc_cli_rkey_exchange() */
1109                         down_read(&lgr->llc_conf_mutex);
1110                         smc_llc_do_delete_rkey(lgr, buf_desc);
1111                         buf_desc->is_conf_rkey = false;
1112                         up_read(&lgr->llc_conf_mutex);
1113                         smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
1114                 }
1115         }
1116
1117         if (buf_desc->is_reg_err) {
1118                 /* buf registration failed, reuse not possible */
1119                 lock = is_rmb ? &lgr->rmbs_lock :
1120                                 &lgr->sndbufs_lock;
1121                 down_write(lock);
1122                 list_del(&buf_desc->list);
1123                 up_write(lock);
1124
1125                 smc_buf_free(lgr, is_rmb, buf_desc);
1126         } else {
1127                 /* memzero_explicit provides potential memory barrier semantics */
1128                 memzero_explicit(buf_desc->cpu_addr, buf_desc->len);
1129                 WRITE_ONCE(buf_desc->used, 0);
1130         }
1131 }
1132
1133 static void smc_buf_unuse(struct smc_connection *conn,
1134                           struct smc_link_group *lgr)
1135 {
1136         if (conn->sndbuf_desc) {
1137                 if (!lgr->is_smcd && conn->sndbuf_desc->is_vm) {
1138                         smcr_buf_unuse(conn->sndbuf_desc, false, lgr);
1139                 } else {
1140                         memzero_explicit(conn->sndbuf_desc->cpu_addr, conn->sndbuf_desc->len);
1141                         WRITE_ONCE(conn->sndbuf_desc->used, 0);
1142                 }
1143         }
1144         if (conn->rmb_desc) {
1145                 if (!lgr->is_smcd) {
1146                         smcr_buf_unuse(conn->rmb_desc, true, lgr);
1147                 } else {
1148                         memzero_explicit(conn->rmb_desc->cpu_addr,
1149                                          conn->rmb_desc->len + sizeof(struct smcd_cdc_msg));
1150                         WRITE_ONCE(conn->rmb_desc->used, 0);
1151                 }
1152         }
1153 }
1154
1155 /* remove a finished connection from its link group */
1156 void smc_conn_free(struct smc_connection *conn)
1157 {
1158         struct smc_link_group *lgr = conn->lgr;
1159
1160         if (!lgr || conn->freed)
1161                 /* Connection has never been registered in a
1162                  * link group, or has already been freed.
1163                  */
1164                 return;
1165
1166         conn->freed = 1;
1167         if (!smc_conn_lgr_valid(conn))
1168                 /* Connection has already unregistered from
1169                  * link group.
1170                  */
1171                 goto lgr_put;
1172
1173         if (lgr->is_smcd) {
1174                 if (!list_empty(&lgr->list))
1175                         smc_ism_unset_conn(conn);
1176                 tasklet_kill(&conn->rx_tsklet);
1177         } else {
1178                 smc_cdc_wait_pend_tx_wr(conn);
1179                 if (current_work() != &conn->abort_work)
1180                         cancel_work_sync(&conn->abort_work);
1181         }
1182         if (!list_empty(&lgr->list)) {
1183                 smc_buf_unuse(conn, lgr); /* allow buffer reuse */
1184                 smc_lgr_unregister_conn(conn);
1185         }
1186
1187         if (!lgr->conns_num)
1188                 smc_lgr_schedule_free_work(lgr);
1189 lgr_put:
1190         if (!lgr->is_smcd)
1191                 smcr_link_put(conn->lnk); /* link_hold in smc_conn_create() */
1192         smc_lgr_put(lgr); /* lgr_hold in smc_conn_create() */
1193 }
1194
1195 /* unregister a link from a buf_desc */
1196 static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb,
1197                                 struct smc_link *lnk)
1198 {
1199         if (is_rmb || buf_desc->is_vm)
1200                 buf_desc->is_reg_mr[lnk->link_idx] = false;
1201         if (!buf_desc->is_map_ib[lnk->link_idx])
1202                 return;
1203
1204         if ((is_rmb || buf_desc->is_vm) &&
1205             buf_desc->mr[lnk->link_idx]) {
1206                 smc_ib_put_memory_region(buf_desc->mr[lnk->link_idx]);
1207                 buf_desc->mr[lnk->link_idx] = NULL;
1208         }
1209         if (is_rmb)
1210                 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
1211         else
1212                 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
1213
1214         sg_free_table(&buf_desc->sgt[lnk->link_idx]);
1215         buf_desc->is_map_ib[lnk->link_idx] = false;
1216 }
1217
1218 /* unmap all buffers of lgr for a deleted link */
1219 static void smcr_buf_unmap_lgr(struct smc_link *lnk)
1220 {
1221         struct smc_link_group *lgr = lnk->lgr;
1222         struct smc_buf_desc *buf_desc, *bf;
1223         int i;
1224
1225         for (i = 0; i < SMC_RMBE_SIZES; i++) {
1226                 down_write(&lgr->rmbs_lock);
1227                 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list)
1228                         smcr_buf_unmap_link(buf_desc, true, lnk);
1229                 up_write(&lgr->rmbs_lock);
1230
1231                 down_write(&lgr->sndbufs_lock);
1232                 list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i],
1233                                          list)
1234                         smcr_buf_unmap_link(buf_desc, false, lnk);
1235                 up_write(&lgr->sndbufs_lock);
1236         }
1237 }
1238
1239 static void smcr_rtoken_clear_link(struct smc_link *lnk)
1240 {
1241         struct smc_link_group *lgr = lnk->lgr;
1242         int i;
1243
1244         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1245                 lgr->rtokens[i][lnk->link_idx].rkey = 0;
1246                 lgr->rtokens[i][lnk->link_idx].dma_addr = 0;
1247         }
1248 }
1249
1250 static void __smcr_link_clear(struct smc_link *lnk)
1251 {
1252         struct smc_link_group *lgr = lnk->lgr;
1253         struct smc_ib_device *smcibdev;
1254
1255         smc_wr_free_link_mem(lnk);
1256         smc_ibdev_cnt_dec(lnk);
1257         put_device(&lnk->smcibdev->ibdev->dev);
1258         smcibdev = lnk->smcibdev;
1259         memset(lnk, 0, sizeof(struct smc_link));
1260         lnk->state = SMC_LNK_UNUSED;
1261         if (!atomic_dec_return(&smcibdev->lnk_cnt))
1262                 wake_up(&smcibdev->lnks_deleted);
1263         smc_lgr_put(lgr); /* lgr_hold in smcr_link_init() */
1264 }
1265
1266 /* must be called under lgr->llc_conf_mutex lock */
1267 void smcr_link_clear(struct smc_link *lnk, bool log)
1268 {
1269         if (!lnk->lgr || lnk->clearing ||
1270             lnk->state == SMC_LNK_UNUSED)
1271                 return;
1272         lnk->clearing = 1;
1273         lnk->peer_qpn = 0;
1274         smc_llc_link_clear(lnk, log);
1275         smcr_buf_unmap_lgr(lnk);
1276         smcr_rtoken_clear_link(lnk);
1277         smc_ib_modify_qp_error(lnk);
1278         smc_wr_free_link(lnk);
1279         smc_ib_destroy_queue_pair(lnk);
1280         smc_ib_dealloc_protection_domain(lnk);
1281         smcr_link_put(lnk); /* theoretically last link_put */
1282 }
1283
1284 void smcr_link_hold(struct smc_link *lnk)
1285 {
1286         refcount_inc(&lnk->refcnt);
1287 }
1288
1289 void smcr_link_put(struct smc_link *lnk)
1290 {
1291         if (refcount_dec_and_test(&lnk->refcnt))
1292                 __smcr_link_clear(lnk);
1293 }
1294
1295 static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
1296                           struct smc_buf_desc *buf_desc)
1297 {
1298         int i;
1299
1300         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
1301                 smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]);
1302
1303         if (!buf_desc->is_vm && buf_desc->pages)
1304                 __free_pages(buf_desc->pages, buf_desc->order);
1305         else if (buf_desc->is_vm && buf_desc->cpu_addr)
1306                 vfree(buf_desc->cpu_addr);
1307         kfree(buf_desc);
1308 }
1309
1310 static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
1311                           struct smc_buf_desc *buf_desc)
1312 {
1313         if (is_dmb) {
1314                 /* restore original buf len */
1315                 buf_desc->len += sizeof(struct smcd_cdc_msg);
1316                 smc_ism_unregister_dmb(lgr->smcd, buf_desc);
1317         } else {
1318                 kfree(buf_desc->cpu_addr);
1319         }
1320         kfree(buf_desc);
1321 }
1322
1323 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
1324                          struct smc_buf_desc *buf_desc)
1325 {
1326         if (lgr->is_smcd)
1327                 smcd_buf_free(lgr, is_rmb, buf_desc);
1328         else
1329                 smcr_buf_free(lgr, is_rmb, buf_desc);
1330 }
1331
1332 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
1333 {
1334         struct smc_buf_desc *buf_desc, *bf_desc;
1335         struct list_head *buf_list;
1336         int i;
1337
1338         for (i = 0; i < SMC_RMBE_SIZES; i++) {
1339                 if (is_rmb)
1340                         buf_list = &lgr->rmbs[i];
1341                 else
1342                         buf_list = &lgr->sndbufs[i];
1343                 list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
1344                                          list) {
1345                         list_del(&buf_desc->list);
1346                         smc_buf_free(lgr, is_rmb, buf_desc);
1347                 }
1348         }
1349 }
1350
1351 static void smc_lgr_free_bufs(struct smc_link_group *lgr)
1352 {
1353         /* free send buffers */
1354         __smc_lgr_free_bufs(lgr, false);
1355         /* free rmbs */
1356         __smc_lgr_free_bufs(lgr, true);
1357 }
1358
1359 /* won't be freed until no one accesses to lgr anymore */
1360 static void __smc_lgr_free(struct smc_link_group *lgr)
1361 {
1362         smc_lgr_free_bufs(lgr);
1363         if (lgr->is_smcd) {
1364                 if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
1365                         wake_up(&lgr->smcd->lgrs_deleted);
1366         } else {
1367                 smc_wr_free_lgr_mem(lgr);
1368                 if (!atomic_dec_return(&lgr_cnt))
1369                         wake_up(&lgrs_deleted);
1370         }
1371         kfree(lgr);
1372 }
1373
1374 /* remove a link group */
1375 static void smc_lgr_free(struct smc_link_group *lgr)
1376 {
1377         int i;
1378
1379         if (!lgr->is_smcd) {
1380                 down_write(&lgr->llc_conf_mutex);
1381                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1382                         if (lgr->lnk[i].state != SMC_LNK_UNUSED)
1383                                 smcr_link_clear(&lgr->lnk[i], false);
1384                 }
1385                 up_write(&lgr->llc_conf_mutex);
1386                 smc_llc_lgr_clear(lgr);
1387         }
1388
1389         destroy_workqueue(lgr->tx_wq);
1390         if (lgr->is_smcd) {
1391                 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
1392                 put_device(lgr->smcd->ops->get_dev(lgr->smcd));
1393         }
1394         smc_lgr_put(lgr); /* theoretically last lgr_put */
1395 }
1396
1397 void smc_lgr_hold(struct smc_link_group *lgr)
1398 {
1399         refcount_inc(&lgr->refcnt);
1400 }
1401
1402 void smc_lgr_put(struct smc_link_group *lgr)
1403 {
1404         if (refcount_dec_and_test(&lgr->refcnt))
1405                 __smc_lgr_free(lgr);
1406 }
1407
1408 static void smc_sk_wake_ups(struct smc_sock *smc)
1409 {
1410         smc->sk.sk_write_space(&smc->sk);
1411         smc->sk.sk_data_ready(&smc->sk);
1412         smc->sk.sk_state_change(&smc->sk);
1413 }
1414
1415 /* kill a connection */
1416 static void smc_conn_kill(struct smc_connection *conn, bool soft)
1417 {
1418         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
1419
1420         if (conn->lgr->is_smcd && conn->lgr->peer_shutdown)
1421                 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
1422         else
1423                 smc_close_abort(conn);
1424         conn->killed = 1;
1425         smc->sk.sk_err = ECONNABORTED;
1426         smc_sk_wake_ups(smc);
1427         if (conn->lgr->is_smcd) {
1428                 smc_ism_unset_conn(conn);
1429                 if (soft)
1430                         tasklet_kill(&conn->rx_tsklet);
1431                 else
1432                         tasklet_unlock_wait(&conn->rx_tsklet);
1433         } else {
1434                 smc_cdc_wait_pend_tx_wr(conn);
1435         }
1436         smc_lgr_unregister_conn(conn);
1437         smc_close_active_abort(smc);
1438 }
1439
1440 static void smc_lgr_cleanup(struct smc_link_group *lgr)
1441 {
1442         if (lgr->is_smcd) {
1443                 smc_ism_signal_shutdown(lgr);
1444         } else {
1445                 u32 rsn = lgr->llc_termination_rsn;
1446
1447                 if (!rsn)
1448                         rsn = SMC_LLC_DEL_PROG_INIT_TERM;
1449                 smc_llc_send_link_delete_all(lgr, false, rsn);
1450                 smcr_lgr_link_deactivate_all(lgr);
1451         }
1452 }
1453
1454 /* terminate link group
1455  * @soft: true if link group shutdown can take its time
1456  *        false if immediate link group shutdown is required
1457  */
1458 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
1459 {
1460         struct smc_connection *conn;
1461         struct smc_sock *smc;
1462         struct rb_node *node;
1463
1464         if (lgr->terminating)
1465                 return; /* lgr already terminating */
1466         /* cancel free_work sync, will terminate when lgr->freeing is set */
1467         cancel_delayed_work(&lgr->free_work);
1468         lgr->terminating = 1;
1469
1470         /* kill remaining link group connections */
1471         read_lock_bh(&lgr->conns_lock);
1472         node = rb_first(&lgr->conns_all);
1473         while (node) {
1474                 read_unlock_bh(&lgr->conns_lock);
1475                 conn = rb_entry(node, struct smc_connection, alert_node);
1476                 smc = container_of(conn, struct smc_sock, conn);
1477                 sock_hold(&smc->sk); /* sock_put below */
1478                 lock_sock(&smc->sk);
1479                 smc_conn_kill(conn, soft);
1480                 release_sock(&smc->sk);
1481                 sock_put(&smc->sk); /* sock_hold above */
1482                 read_lock_bh(&lgr->conns_lock);
1483                 node = rb_first(&lgr->conns_all);
1484         }
1485         read_unlock_bh(&lgr->conns_lock);
1486         smc_lgr_cleanup(lgr);
1487         smc_lgr_free(lgr);
1488 }
1489
1490 /* unlink link group and schedule termination */
1491 void smc_lgr_terminate_sched(struct smc_link_group *lgr)
1492 {
1493         spinlock_t *lgr_lock;
1494
1495         smc_lgr_list_head(lgr, &lgr_lock);
1496         spin_lock_bh(lgr_lock);
1497         if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) {
1498                 spin_unlock_bh(lgr_lock);
1499                 return; /* lgr already terminating */
1500         }
1501         list_del_init(&lgr->list);
1502         lgr->freeing = 1;
1503         spin_unlock_bh(lgr_lock);
1504         schedule_work(&lgr->terminate_work);
1505 }
1506
1507 /* Called when peer lgr shutdown (regularly or abnormally) is received */
1508 void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
1509 {
1510         struct smc_link_group *lgr, *l;
1511         LIST_HEAD(lgr_free_list);
1512
1513         /* run common cleanup function and build free list */
1514         spin_lock_bh(&dev->lgr_lock);
1515         list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
1516                 if ((!peer_gid || lgr->peer_gid == peer_gid) &&
1517                     (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
1518                         if (peer_gid) /* peer triggered termination */
1519                                 lgr->peer_shutdown = 1;
1520                         list_move(&lgr->list, &lgr_free_list);
1521                         lgr->freeing = 1;
1522                 }
1523         }
1524         spin_unlock_bh(&dev->lgr_lock);
1525
1526         /* cancel the regular free workers and actually free lgrs */
1527         list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
1528                 list_del_init(&lgr->list);
1529                 schedule_work(&lgr->terminate_work);
1530         }
1531 }
1532
1533 /* Called when an SMCD device is removed or the smc module is unloaded */
1534 void smc_smcd_terminate_all(struct smcd_dev *smcd)
1535 {
1536         struct smc_link_group *lgr, *lg;
1537         LIST_HEAD(lgr_free_list);
1538
1539         spin_lock_bh(&smcd->lgr_lock);
1540         list_splice_init(&smcd->lgr_list, &lgr_free_list);
1541         list_for_each_entry(lgr, &lgr_free_list, list)
1542                 lgr->freeing = 1;
1543         spin_unlock_bh(&smcd->lgr_lock);
1544
1545         list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
1546                 list_del_init(&lgr->list);
1547                 __smc_lgr_terminate(lgr, false);
1548         }
1549
1550         if (atomic_read(&smcd->lgr_cnt))
1551                 wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt));
1552 }
1553
1554 /* Called when an SMCR device is removed or the smc module is unloaded.
1555  * If smcibdev is given, all SMCR link groups using this device are terminated.
1556  * If smcibdev is NULL, all SMCR link groups are terminated.
1557  */
1558 void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
1559 {
1560         struct smc_link_group *lgr, *lg;
1561         LIST_HEAD(lgr_free_list);
1562         int i;
1563
1564         spin_lock_bh(&smc_lgr_list.lock);
1565         if (!smcibdev) {
1566                 list_splice_init(&smc_lgr_list.list, &lgr_free_list);
1567                 list_for_each_entry(lgr, &lgr_free_list, list)
1568                         lgr->freeing = 1;
1569         } else {
1570                 list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
1571                         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1572                                 if (lgr->lnk[i].smcibdev == smcibdev)
1573                                         smcr_link_down_cond_sched(&lgr->lnk[i]);
1574                         }
1575                 }
1576         }
1577         spin_unlock_bh(&smc_lgr_list.lock);
1578
1579         list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
1580                 list_del_init(&lgr->list);
1581                 smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM);
1582                 __smc_lgr_terminate(lgr, false);
1583         }
1584
1585         if (smcibdev) {
1586                 if (atomic_read(&smcibdev->lnk_cnt))
1587                         wait_event(smcibdev->lnks_deleted,
1588                                    !atomic_read(&smcibdev->lnk_cnt));
1589         } else {
1590                 if (atomic_read(&lgr_cnt))
1591                         wait_event(lgrs_deleted, !atomic_read(&lgr_cnt));
1592         }
1593 }
1594
1595 /* set new lgr type and clear all asymmetric link tagging */
1596 void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type)
1597 {
1598         char *lgr_type = "";
1599         int i;
1600
1601         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
1602                 if (smc_link_usable(&lgr->lnk[i]))
1603                         lgr->lnk[i].link_is_asym = false;
1604         if (lgr->type == new_type)
1605                 return;
1606         lgr->type = new_type;
1607
1608         switch (lgr->type) {
1609         case SMC_LGR_NONE:
1610                 lgr_type = "NONE";
1611                 break;
1612         case SMC_LGR_SINGLE:
1613                 lgr_type = "SINGLE";
1614                 break;
1615         case SMC_LGR_SYMMETRIC:
1616                 lgr_type = "SYMMETRIC";
1617                 break;
1618         case SMC_LGR_ASYMMETRIC_PEER:
1619                 lgr_type = "ASYMMETRIC_PEER";
1620                 break;
1621         case SMC_LGR_ASYMMETRIC_LOCAL:
1622                 lgr_type = "ASYMMETRIC_LOCAL";
1623                 break;
1624         }
1625         pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu state changed: "
1626                             "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE, &lgr->id,
1627                             lgr->net->net_cookie, lgr_type, lgr->pnet_id);
1628 }
1629
1630 /* set new lgr type and tag a link as asymmetric */
1631 void smcr_lgr_set_type_asym(struct smc_link_group *lgr,
1632                             enum smc_lgr_type new_type, int asym_lnk_idx)
1633 {
1634         smcr_lgr_set_type(lgr, new_type);
1635         lgr->lnk[asym_lnk_idx].link_is_asym = true;
1636 }
1637
1638 /* abort connection, abort_work scheduled from tasklet context */
1639 static void smc_conn_abort_work(struct work_struct *work)
1640 {
1641         struct smc_connection *conn = container_of(work,
1642                                                    struct smc_connection,
1643                                                    abort_work);
1644         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
1645
1646         lock_sock(&smc->sk);
1647         smc_conn_kill(conn, true);
1648         release_sock(&smc->sk);
1649         sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */
1650 }
1651
1652 void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
1653 {
1654         struct smc_link_group *lgr, *n;
1655
1656         list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
1657                 struct smc_link *link;
1658
1659                 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
1660                             SMC_MAX_PNETID_LEN) ||
1661                     lgr->type == SMC_LGR_SYMMETRIC ||
1662                     lgr->type == SMC_LGR_ASYMMETRIC_PEER ||
1663                     !rdma_dev_access_netns(smcibdev->ibdev, lgr->net))
1664                         continue;
1665
1666                 /* trigger local add link processing */
1667                 link = smc_llc_usable_link(lgr);
1668                 if (link)
1669                         smc_llc_add_link_local(link);
1670         }
1671 }
1672
1673 /* link is down - switch connections to alternate link,
1674  * must be called under lgr->llc_conf_mutex lock
1675  */
1676 static void smcr_link_down(struct smc_link *lnk)
1677 {
1678         struct smc_link_group *lgr = lnk->lgr;
1679         struct smc_link *to_lnk;
1680         int del_link_id;
1681
1682         if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
1683                 return;
1684
1685         to_lnk = smc_switch_conns(lgr, lnk, true);
1686         if (!to_lnk) { /* no backup link available */
1687                 smcr_link_clear(lnk, true);
1688                 return;
1689         }
1690         smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
1691         del_link_id = lnk->link_id;
1692
1693         if (lgr->role == SMC_SERV) {
1694                 /* trigger local delete link processing */
1695                 smc_llc_srv_delete_link_local(to_lnk, del_link_id);
1696         } else {
1697                 if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
1698                         /* another llc task is ongoing */
1699                         up_write(&lgr->llc_conf_mutex);
1700                         wait_event_timeout(lgr->llc_flow_waiter,
1701                                 (list_empty(&lgr->list) ||
1702                                  lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
1703                                 SMC_LLC_WAIT_TIME);
1704                         down_write(&lgr->llc_conf_mutex);
1705                 }
1706                 if (!list_empty(&lgr->list)) {
1707                         smc_llc_send_delete_link(to_lnk, del_link_id,
1708                                                  SMC_LLC_REQ, true,
1709                                                  SMC_LLC_DEL_LOST_PATH);
1710                         smcr_link_clear(lnk, true);
1711                 }
1712                 wake_up(&lgr->llc_flow_waiter); /* wake up next waiter */
1713         }
1714 }
1715
1716 /* must be called under lgr->llc_conf_mutex lock */
1717 void smcr_link_down_cond(struct smc_link *lnk)
1718 {
1719         if (smc_link_downing(&lnk->state)) {
1720                 trace_smcr_link_down(lnk, __builtin_return_address(0));
1721                 smcr_link_down(lnk);
1722         }
1723 }
1724
1725 /* will get the lgr->llc_conf_mutex lock */
1726 void smcr_link_down_cond_sched(struct smc_link *lnk)
1727 {
1728         if (smc_link_downing(&lnk->state)) {
1729                 trace_smcr_link_down(lnk, __builtin_return_address(0));
1730                 schedule_work(&lnk->link_down_wrk);
1731         }
1732 }
1733
1734 void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport)
1735 {
1736         struct smc_link_group *lgr, *n;
1737         int i;
1738
1739         list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
1740                 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
1741                             SMC_MAX_PNETID_LEN))
1742                         continue; /* lgr is not affected */
1743                 if (list_empty(&lgr->list))
1744                         continue;
1745                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1746                         struct smc_link *lnk = &lgr->lnk[i];
1747
1748                         if (smc_link_usable(lnk) &&
1749                             lnk->smcibdev == smcibdev && lnk->ibport == ibport)
1750                                 smcr_link_down_cond_sched(lnk);
1751                 }
1752         }
1753 }
1754
1755 static void smc_link_down_work(struct work_struct *work)
1756 {
1757         struct smc_link *link = container_of(work, struct smc_link,
1758                                              link_down_wrk);
1759         struct smc_link_group *lgr = link->lgr;
1760
1761         if (list_empty(&lgr->list))
1762                 return;
1763         wake_up_all(&lgr->llc_msg_waiter);
1764         down_write(&lgr->llc_conf_mutex);
1765         smcr_link_down(link);
1766         up_write(&lgr->llc_conf_mutex);
1767 }
1768
1769 static int smc_vlan_by_tcpsk_walk(struct net_device *lower_dev,
1770                                   struct netdev_nested_priv *priv)
1771 {
1772         unsigned short *vlan_id = (unsigned short *)priv->data;
1773
1774         if (is_vlan_dev(lower_dev)) {
1775                 *vlan_id = vlan_dev_vlan_id(lower_dev);
1776                 return 1;
1777         }
1778
1779         return 0;
1780 }
1781
1782 /* Determine vlan of internal TCP socket. */
1783 int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
1784 {
1785         struct dst_entry *dst = sk_dst_get(clcsock->sk);
1786         struct netdev_nested_priv priv;
1787         struct net_device *ndev;
1788         int rc = 0;
1789
1790         ini->vlan_id = 0;
1791         if (!dst) {
1792                 rc = -ENOTCONN;
1793                 goto out;
1794         }
1795         if (!dst->dev) {
1796                 rc = -ENODEV;
1797                 goto out_rel;
1798         }
1799
1800         ndev = dst->dev;
1801         if (is_vlan_dev(ndev)) {
1802                 ini->vlan_id = vlan_dev_vlan_id(ndev);
1803                 goto out_rel;
1804         }
1805
1806         priv.data = (void *)&ini->vlan_id;
1807         rtnl_lock();
1808         netdev_walk_all_lower_dev(ndev, smc_vlan_by_tcpsk_walk, &priv);
1809         rtnl_unlock();
1810
1811 out_rel:
1812         dst_release(dst);
1813 out:
1814         return rc;
1815 }
1816
1817 static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version,
1818                            u8 peer_systemid[],
1819                            u8 peer_gid[],
1820                            u8 peer_mac_v1[],
1821                            enum smc_lgr_role role, u32 clcqpn,
1822                            struct net *net)
1823 {
1824         struct smc_link *lnk;
1825         int i;
1826
1827         if (memcmp(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN) ||
1828             lgr->role != role)
1829                 return false;
1830
1831         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1832                 lnk = &lgr->lnk[i];
1833
1834                 if (!smc_link_active(lnk))
1835                         continue;
1836                 /* use verbs API to check netns, instead of lgr->net */
1837                 if (!rdma_dev_access_netns(lnk->smcibdev->ibdev, net))
1838                         return false;
1839                 if ((lgr->role == SMC_SERV || lnk->peer_qpn == clcqpn) &&
1840                     !memcmp(lnk->peer_gid, peer_gid, SMC_GID_SIZE) &&
1841                     (smcr_version == SMC_V2 ||
1842                      !memcmp(lnk->peer_mac, peer_mac_v1, ETH_ALEN)))
1843                         return true;
1844         }
1845         return false;
1846 }
1847
1848 static bool smcd_lgr_match(struct smc_link_group *lgr,
1849                            struct smcd_dev *smcismdev, u64 peer_gid)
1850 {
1851         return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
1852 }
1853
1854 /* create a new SMC connection (and a new link group if necessary) */
1855 int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
1856 {
1857         struct smc_connection *conn = &smc->conn;
1858         struct net *net = sock_net(&smc->sk);
1859         struct list_head *lgr_list;
1860         struct smc_link_group *lgr;
1861         enum smc_lgr_role role;
1862         spinlock_t *lgr_lock;
1863         int rc = 0;
1864
1865         lgr_list = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_list :
1866                                   &smc_lgr_list.list;
1867         lgr_lock = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_lock :
1868                                   &smc_lgr_list.lock;
1869         ini->first_contact_local = 1;
1870         role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
1871         if (role == SMC_CLNT && ini->first_contact_peer)
1872                 /* create new link group as well */
1873                 goto create;
1874
1875         /* determine if an existing link group can be reused */
1876         spin_lock_bh(lgr_lock);
1877         list_for_each_entry(lgr, lgr_list, list) {
1878                 write_lock_bh(&lgr->conns_lock);
1879                 if ((ini->is_smcd ?
1880                      smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected],
1881                                     ini->ism_peer_gid[ini->ism_selected]) :
1882                      smcr_lgr_match(lgr, ini->smcr_version,
1883                                     ini->peer_systemid,
1884                                     ini->peer_gid, ini->peer_mac, role,
1885                                     ini->ib_clcqpn, net)) &&
1886                     !lgr->sync_err &&
1887                     (ini->smcd_version == SMC_V2 ||
1888                      lgr->vlan_id == ini->vlan_id) &&
1889                     (role == SMC_CLNT || ini->is_smcd ||
1890                     (lgr->conns_num < SMC_RMBS_PER_LGR_MAX &&
1891                       !bitmap_full(lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX)))) {
1892                         /* link group found */
1893                         ini->first_contact_local = 0;
1894                         conn->lgr = lgr;
1895                         rc = smc_lgr_register_conn(conn, false);
1896                         write_unlock_bh(&lgr->conns_lock);
1897                         if (!rc && delayed_work_pending(&lgr->free_work))
1898                                 cancel_delayed_work(&lgr->free_work);
1899                         break;
1900                 }
1901                 write_unlock_bh(&lgr->conns_lock);
1902         }
1903         spin_unlock_bh(lgr_lock);
1904         if (rc)
1905                 return rc;
1906
1907         if (role == SMC_CLNT && !ini->first_contact_peer &&
1908             ini->first_contact_local) {
1909                 /* Server reuses a link group, but Client wants to start
1910                  * a new one
1911                  * send out_of_sync decline, reason synchr. error
1912                  */
1913                 return SMC_CLC_DECL_SYNCERR;
1914         }
1915
1916 create:
1917         if (ini->first_contact_local) {
1918                 rc = smc_lgr_create(smc, ini);
1919                 if (rc)
1920                         goto out;
1921                 lgr = conn->lgr;
1922                 write_lock_bh(&lgr->conns_lock);
1923                 rc = smc_lgr_register_conn(conn, true);
1924                 write_unlock_bh(&lgr->conns_lock);
1925                 if (rc) {
1926                         smc_lgr_cleanup_early(lgr);
1927                         goto out;
1928                 }
1929         }
1930         smc_lgr_hold(conn->lgr); /* lgr_put in smc_conn_free() */
1931         if (!conn->lgr->is_smcd)
1932                 smcr_link_hold(conn->lnk); /* link_put in smc_conn_free() */
1933         conn->freed = 0;
1934         conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
1935         conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
1936         conn->urg_state = SMC_URG_READ;
1937         init_waitqueue_head(&conn->cdc_pend_tx_wq);
1938         INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work);
1939         if (ini->is_smcd) {
1940                 conn->rx_off = sizeof(struct smcd_cdc_msg);
1941                 smcd_cdc_rx_init(conn); /* init tasklet for this conn */
1942         } else {
1943                 conn->rx_off = 0;
1944         }
1945 #ifndef KERNEL_HAS_ATOMIC64
1946         spin_lock_init(&conn->acurs_lock);
1947 #endif
1948
1949 out:
1950         return rc;
1951 }
1952
1953 #define SMCD_DMBE_SIZES         6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
1954 #define SMCR_RMBE_SIZES         5 /* 0 -> 16KB, 1 -> 32KB, .. 5 -> 512KB */
1955
1956 /* convert the RMB size into the compressed notation (minimum 16K, see
1957  * SMCD/R_DMBE_SIZES.
1958  * In contrast to plain ilog2, this rounds towards the next power of 2,
1959  * so the socket application gets at least its desired sndbuf / rcvbuf size.
1960  */
1961 static u8 smc_compress_bufsize(int size, bool is_smcd, bool is_rmb)
1962 {
1963         const unsigned int max_scat = SG_MAX_SINGLE_ALLOC * PAGE_SIZE;
1964         u8 compressed;
1965
1966         if (size <= SMC_BUF_MIN_SIZE)
1967                 return 0;
1968
1969         size = (size - 1) >> 14;  /* convert to 16K multiple */
1970         compressed = min_t(u8, ilog2(size) + 1,
1971                            is_smcd ? SMCD_DMBE_SIZES : SMCR_RMBE_SIZES);
1972
1973         if (!is_smcd && is_rmb)
1974                 /* RMBs are backed by & limited to max size of scatterlists */
1975                 compressed = min_t(u8, compressed, ilog2(max_scat >> 14));
1976
1977         return compressed;
1978 }
1979
1980 /* convert the RMB size from compressed notation into integer */
1981 int smc_uncompress_bufsize(u8 compressed)
1982 {
1983         u32 size;
1984
1985         size = 0x00000001 << (((int)compressed) + 14);
1986         return (int)size;
1987 }
1988
1989 /* try to reuse a sndbuf or rmb description slot for a certain
1990  * buffer size; if not available, return NULL
1991  */
1992 static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
1993                                              struct rw_semaphore *lock,
1994                                              struct list_head *buf_list)
1995 {
1996         struct smc_buf_desc *buf_slot;
1997
1998         down_read(lock);
1999         list_for_each_entry(buf_slot, buf_list, list) {
2000                 if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
2001                         up_read(lock);
2002                         return buf_slot;
2003                 }
2004         }
2005         up_read(lock);
2006         return NULL;
2007 }
2008
2009 /* one of the conditions for announcing a receiver's current window size is
2010  * that it "results in a minimum increase in the window size of 10% of the
2011  * receive buffer space" [RFC7609]
2012  */
2013 static inline int smc_rmb_wnd_update_limit(int rmbe_size)
2014 {
2015         return max_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
2016 }
2017
2018 /* map an buf to a link */
2019 static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
2020                              struct smc_link *lnk)
2021 {
2022         int rc, i, nents, offset, buf_size, size, access_flags;
2023         struct scatterlist *sg;
2024         void *buf;
2025
2026         if (buf_desc->is_map_ib[lnk->link_idx])
2027                 return 0;
2028
2029         if (buf_desc->is_vm) {
2030                 buf = buf_desc->cpu_addr;
2031                 buf_size = buf_desc->len;
2032                 offset = offset_in_page(buf_desc->cpu_addr);
2033                 nents = PAGE_ALIGN(buf_size + offset) / PAGE_SIZE;
2034         } else {
2035                 nents = 1;
2036         }
2037
2038         rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], nents, GFP_KERNEL);
2039         if (rc)
2040                 return rc;
2041
2042         if (buf_desc->is_vm) {
2043                 /* virtually contiguous buffer */
2044                 for_each_sg(buf_desc->sgt[lnk->link_idx].sgl, sg, nents, i) {
2045                         size = min_t(int, PAGE_SIZE - offset, buf_size);
2046                         sg_set_page(sg, vmalloc_to_page(buf), size, offset);
2047                         buf += size / sizeof(*buf);
2048                         buf_size -= size;
2049                         offset = 0;
2050                 }
2051         } else {
2052                 /* physically contiguous buffer */
2053                 sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
2054                            buf_desc->cpu_addr, buf_desc->len);
2055         }
2056
2057         /* map sg table to DMA address */
2058         rc = smc_ib_buf_map_sg(lnk, buf_desc,
2059                                is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
2060         /* SMC protocol depends on mapping to one DMA address only */
2061         if (rc != nents) {
2062                 rc = -EAGAIN;
2063                 goto free_table;
2064         }
2065
2066         buf_desc->is_dma_need_sync |=
2067                 smc_ib_is_sg_need_sync(lnk, buf_desc) << lnk->link_idx;
2068
2069         if (is_rmb || buf_desc->is_vm) {
2070                 /* create a new memory region for the RMB or vzalloced sndbuf */
2071                 access_flags = is_rmb ?
2072                                IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
2073                                IB_ACCESS_LOCAL_WRITE;
2074
2075                 rc = smc_ib_get_memory_region(lnk->roce_pd, access_flags,
2076                                               buf_desc, lnk->link_idx);
2077                 if (rc)
2078                         goto buf_unmap;
2079                 smc_ib_sync_sg_for_device(lnk, buf_desc,
2080                                           is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
2081         }
2082         buf_desc->is_map_ib[lnk->link_idx] = true;
2083         return 0;
2084
2085 buf_unmap:
2086         smc_ib_buf_unmap_sg(lnk, buf_desc,
2087                             is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
2088 free_table:
2089         sg_free_table(&buf_desc->sgt[lnk->link_idx]);
2090         return rc;
2091 }
2092
2093 /* register a new buf on IB device, rmb or vzalloced sndbuf
2094  * must be called under lgr->llc_conf_mutex lock
2095  */
2096 int smcr_link_reg_buf(struct smc_link *link, struct smc_buf_desc *buf_desc)
2097 {
2098         if (list_empty(&link->lgr->list))
2099                 return -ENOLINK;
2100         if (!buf_desc->is_reg_mr[link->link_idx]) {
2101                 /* register memory region for new buf */
2102                 if (buf_desc->is_vm)
2103                         buf_desc->mr[link->link_idx]->iova =
2104                                 (uintptr_t)buf_desc->cpu_addr;
2105                 if (smc_wr_reg_send(link, buf_desc->mr[link->link_idx])) {
2106                         buf_desc->is_reg_err = true;
2107                         return -EFAULT;
2108                 }
2109                 buf_desc->is_reg_mr[link->link_idx] = true;
2110         }
2111         return 0;
2112 }
2113
2114 static int _smcr_buf_map_lgr(struct smc_link *lnk, struct rw_semaphore *lock,
2115                              struct list_head *lst, bool is_rmb)
2116 {
2117         struct smc_buf_desc *buf_desc, *bf;
2118         int rc = 0;
2119
2120         down_write(lock);
2121         list_for_each_entry_safe(buf_desc, bf, lst, list) {
2122                 if (!buf_desc->used)
2123                         continue;
2124                 rc = smcr_buf_map_link(buf_desc, is_rmb, lnk);
2125                 if (rc)
2126                         goto out;
2127         }
2128 out:
2129         up_write(lock);
2130         return rc;
2131 }
2132
2133 /* map all used buffers of lgr for a new link */
2134 int smcr_buf_map_lgr(struct smc_link *lnk)
2135 {
2136         struct smc_link_group *lgr = lnk->lgr;
2137         int i, rc = 0;
2138
2139         for (i = 0; i < SMC_RMBE_SIZES; i++) {
2140                 rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock,
2141                                        &lgr->rmbs[i], true);
2142                 if (rc)
2143                         return rc;
2144                 rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock,
2145                                        &lgr->sndbufs[i], false);
2146                 if (rc)
2147                         return rc;
2148         }
2149         return 0;
2150 }
2151
2152 /* register all used buffers of lgr for a new link,
2153  * must be called under lgr->llc_conf_mutex lock
2154  */
2155 int smcr_buf_reg_lgr(struct smc_link *lnk)
2156 {
2157         struct smc_link_group *lgr = lnk->lgr;
2158         struct smc_buf_desc *buf_desc, *bf;
2159         int i, rc = 0;
2160
2161         /* reg all RMBs for a new link */
2162         down_write(&lgr->rmbs_lock);
2163         for (i = 0; i < SMC_RMBE_SIZES; i++) {
2164                 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
2165                         if (!buf_desc->used)
2166                                 continue;
2167                         rc = smcr_link_reg_buf(lnk, buf_desc);
2168                         if (rc) {
2169                                 up_write(&lgr->rmbs_lock);
2170                                 return rc;
2171                         }
2172                 }
2173         }
2174         up_write(&lgr->rmbs_lock);
2175
2176         if (lgr->buf_type == SMCR_PHYS_CONT_BUFS)
2177                 return rc;
2178
2179         /* reg all vzalloced sndbufs for a new link */
2180         down_write(&lgr->sndbufs_lock);
2181         for (i = 0; i < SMC_RMBE_SIZES; i++) {
2182                 list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i], list) {
2183                         if (!buf_desc->used || !buf_desc->is_vm)
2184                                 continue;
2185                         rc = smcr_link_reg_buf(lnk, buf_desc);
2186                         if (rc) {
2187                                 up_write(&lgr->sndbufs_lock);
2188                                 return rc;
2189                         }
2190                 }
2191         }
2192         up_write(&lgr->sndbufs_lock);
2193         return rc;
2194 }
2195
2196 static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
2197                                                 bool is_rmb, int bufsize)
2198 {
2199         struct smc_buf_desc *buf_desc;
2200
2201         /* try to alloc a new buffer */
2202         buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
2203         if (!buf_desc)
2204                 return ERR_PTR(-ENOMEM);
2205
2206         switch (lgr->buf_type) {
2207         case SMCR_PHYS_CONT_BUFS:
2208         case SMCR_MIXED_BUFS:
2209                 buf_desc->order = get_order(bufsize);
2210                 buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
2211                                               __GFP_NOMEMALLOC | __GFP_COMP |
2212                                               __GFP_NORETRY | __GFP_ZERO,
2213                                               buf_desc->order);
2214                 if (buf_desc->pages) {
2215                         buf_desc->cpu_addr =
2216                                 (void *)page_address(buf_desc->pages);
2217                         buf_desc->len = bufsize;
2218                         buf_desc->is_vm = false;
2219                         break;
2220                 }
2221                 if (lgr->buf_type == SMCR_PHYS_CONT_BUFS)
2222                         goto out;
2223                 fallthrough;    // try virtually continguous buf
2224         case SMCR_VIRT_CONT_BUFS:
2225                 buf_desc->order = get_order(bufsize);
2226                 buf_desc->cpu_addr = vzalloc(PAGE_SIZE << buf_desc->order);
2227                 if (!buf_desc->cpu_addr)
2228                         goto out;
2229                 buf_desc->pages = NULL;
2230                 buf_desc->len = bufsize;
2231                 buf_desc->is_vm = true;
2232                 break;
2233         }
2234         return buf_desc;
2235
2236 out:
2237         kfree(buf_desc);
2238         return ERR_PTR(-EAGAIN);
2239 }
2240
2241 /* map buf_desc on all usable links,
2242  * unused buffers stay mapped as long as the link is up
2243  */
2244 static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
2245                                      struct smc_buf_desc *buf_desc, bool is_rmb)
2246 {
2247         int i, rc = 0, cnt = 0;
2248
2249         /* protect against parallel link reconfiguration */
2250         down_read(&lgr->llc_conf_mutex);
2251         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
2252                 struct smc_link *lnk = &lgr->lnk[i];
2253
2254                 if (!smc_link_usable(lnk))
2255                         continue;
2256                 if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
2257                         rc = -ENOMEM;
2258                         goto out;
2259                 }
2260                 cnt++;
2261         }
2262 out:
2263         up_read(&lgr->llc_conf_mutex);
2264         if (!rc && !cnt)
2265                 rc = -EINVAL;
2266         return rc;
2267 }
2268
2269 static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
2270                                                 bool is_dmb, int bufsize)
2271 {
2272         struct smc_buf_desc *buf_desc;
2273         int rc;
2274
2275         /* try to alloc a new DMB */
2276         buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
2277         if (!buf_desc)
2278                 return ERR_PTR(-ENOMEM);
2279         if (is_dmb) {
2280                 rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
2281                 if (rc) {
2282                         kfree(buf_desc);
2283                         if (rc == -ENOMEM)
2284                                 return ERR_PTR(-EAGAIN);
2285                         if (rc == -ENOSPC)
2286                                 return ERR_PTR(-ENOSPC);
2287                         return ERR_PTR(-EIO);
2288                 }
2289                 buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
2290                 /* CDC header stored in buf. So, pretend it was smaller */
2291                 buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
2292         } else {
2293                 buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
2294                                              __GFP_NOWARN | __GFP_NORETRY |
2295                                              __GFP_NOMEMALLOC);
2296                 if (!buf_desc->cpu_addr) {
2297                         kfree(buf_desc);
2298                         return ERR_PTR(-EAGAIN);
2299                 }
2300                 buf_desc->len = bufsize;
2301         }
2302         return buf_desc;
2303 }
2304
2305 static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
2306 {
2307         struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
2308         struct smc_connection *conn = &smc->conn;
2309         struct smc_link_group *lgr = conn->lgr;
2310         struct list_head *buf_list;
2311         int bufsize, bufsize_short;
2312         struct rw_semaphore *lock;      /* lock buffer list */
2313         bool is_dgraded = false;
2314         int sk_buf_size;
2315
2316         if (is_rmb)
2317                 /* use socket recv buffer size (w/o overhead) as start value */
2318                 sk_buf_size = smc->sk.sk_rcvbuf;
2319         else
2320                 /* use socket send buffer size (w/o overhead) as start value */
2321                 sk_buf_size = smc->sk.sk_sndbuf;
2322
2323         for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb);
2324              bufsize_short >= 0; bufsize_short--) {
2325                 if (is_rmb) {
2326                         lock = &lgr->rmbs_lock;
2327                         buf_list = &lgr->rmbs[bufsize_short];
2328                 } else {
2329                         lock = &lgr->sndbufs_lock;
2330                         buf_list = &lgr->sndbufs[bufsize_short];
2331                 }
2332                 bufsize = smc_uncompress_bufsize(bufsize_short);
2333
2334                 /* check for reusable slot in the link group */
2335                 buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
2336                 if (buf_desc) {
2337                         buf_desc->is_dma_need_sync = 0;
2338                         SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
2339                         SMC_STAT_BUF_REUSE(smc, is_smcd, is_rmb);
2340                         break; /* found reusable slot */
2341                 }
2342
2343                 if (is_smcd)
2344                         buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
2345                 else
2346                         buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);
2347
2348                 if (PTR_ERR(buf_desc) == -ENOMEM)
2349                         break;
2350                 if (IS_ERR(buf_desc)) {
2351                         if (!is_dgraded) {
2352                                 is_dgraded = true;
2353                                 SMC_STAT_RMB_DOWNGRADED(smc, is_smcd, is_rmb);
2354                         }
2355                         continue;
2356                 }
2357
2358                 SMC_STAT_RMB_ALLOC(smc, is_smcd, is_rmb);
2359                 SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
2360                 buf_desc->used = 1;
2361                 down_write(lock);
2362                 list_add(&buf_desc->list, buf_list);
2363                 up_write(lock);
2364                 break; /* found */
2365         }
2366
2367         if (IS_ERR(buf_desc))
2368                 return PTR_ERR(buf_desc);
2369
2370         if (!is_smcd) {
2371                 if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
2372                         smcr_buf_unuse(buf_desc, is_rmb, lgr);
2373                         return -ENOMEM;
2374                 }
2375         }
2376
2377         if (is_rmb) {
2378                 conn->rmb_desc = buf_desc;
2379                 conn->rmbe_size_short = bufsize_short;
2380                 smc->sk.sk_rcvbuf = bufsize;
2381                 atomic_set(&conn->bytes_to_rcv, 0);
2382                 conn->rmbe_update_limit =
2383                         smc_rmb_wnd_update_limit(buf_desc->len);
2384                 if (is_smcd)
2385                         smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
2386         } else {
2387                 conn->sndbuf_desc = buf_desc;
2388                 smc->sk.sk_sndbuf = bufsize;
2389                 atomic_set(&conn->sndbuf_space, bufsize);
2390         }
2391         return 0;
2392 }
2393
2394 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
2395 {
2396         if (!conn->sndbuf_desc->is_dma_need_sync)
2397                 return;
2398         if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd ||
2399             !smc_link_active(conn->lnk))
2400                 return;
2401         smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
2402 }
2403
2404 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
2405 {
2406         int i;
2407
2408         if (!conn->rmb_desc->is_dma_need_sync)
2409                 return;
2410         if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd)
2411                 return;
2412         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
2413                 if (!smc_link_active(&conn->lgr->lnk[i]))
2414                         continue;
2415                 smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc,
2416                                        DMA_FROM_DEVICE);
2417         }
2418 }
2419
2420 /* create the send and receive buffer for an SMC socket;
2421  * receive buffers are called RMBs;
2422  * (even though the SMC protocol allows more than one RMB-element per RMB,
2423  * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
2424  * extra RMB for every connection in a link group
2425  */
2426 int smc_buf_create(struct smc_sock *smc, bool is_smcd)
2427 {
2428         int rc;
2429
2430         /* create send buffer */
2431         rc = __smc_buf_create(smc, is_smcd, false);
2432         if (rc)
2433                 return rc;
2434         /* create rmb */
2435         rc = __smc_buf_create(smc, is_smcd, true);
2436         if (rc) {
2437                 down_write(&smc->conn.lgr->sndbufs_lock);
2438                 list_del(&smc->conn.sndbuf_desc->list);
2439                 up_write(&smc->conn.lgr->sndbufs_lock);
2440                 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
2441                 smc->conn.sndbuf_desc = NULL;
2442         }
2443         return rc;
2444 }
2445
2446 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
2447 {
2448         int i;
2449
2450         for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
2451                 if (!test_and_set_bit(i, lgr->rtokens_used_mask))
2452                         return i;
2453         }
2454         return -ENOSPC;
2455 }
2456
2457 static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx,
2458                                    u32 rkey)
2459 {
2460         int i;
2461
2462         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
2463                 if (test_bit(i, lgr->rtokens_used_mask) &&
2464                     lgr->rtokens[i][lnk_idx].rkey == rkey)
2465                         return i;
2466         }
2467         return -ENOENT;
2468 }
2469
2470 /* set rtoken for a new link to an existing rmb */
2471 void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
2472                     __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey)
2473 {
2474         int rtok_idx;
2475
2476         rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known));
2477         if (rtok_idx == -ENOENT)
2478                 return;
2479         lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey);
2480         lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr);
2481 }
2482
2483 /* set rtoken for a new link whose link_id is given */
2484 void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
2485                      __be64 nw_vaddr, __be32 nw_rkey)
2486 {
2487         u64 dma_addr = be64_to_cpu(nw_vaddr);
2488         u32 rkey = ntohl(nw_rkey);
2489         bool found = false;
2490         int link_idx;
2491
2492         for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) {
2493                 if (lgr->lnk[link_idx].link_id == link_id) {
2494                         found = true;
2495                         break;
2496                 }
2497         }
2498         if (!found)
2499                 return;
2500         lgr->rtokens[rtok_idx][link_idx].rkey = rkey;
2501         lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr;
2502 }
2503
2504 /* add a new rtoken from peer */
2505 int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
2506 {
2507         struct smc_link_group *lgr = smc_get_lgr(lnk);
2508         u64 dma_addr = be64_to_cpu(nw_vaddr);
2509         u32 rkey = ntohl(nw_rkey);
2510         int i;
2511
2512         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
2513                 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
2514                     lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr &&
2515                     test_bit(i, lgr->rtokens_used_mask)) {
2516                         /* already in list */
2517                         return i;
2518                 }
2519         }
2520         i = smc_rmb_reserve_rtoken_idx(lgr);
2521         if (i < 0)
2522                 return i;
2523         lgr->rtokens[i][lnk->link_idx].rkey = rkey;
2524         lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr;
2525         return i;
2526 }
2527
2528 /* delete an rtoken from all links */
2529 int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
2530 {
2531         struct smc_link_group *lgr = smc_get_lgr(lnk);
2532         u32 rkey = ntohl(nw_rkey);
2533         int i, j;
2534
2535         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
2536                 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
2537                     test_bit(i, lgr->rtokens_used_mask)) {
2538                         for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) {
2539                                 lgr->rtokens[i][j].rkey = 0;
2540                                 lgr->rtokens[i][j].dma_addr = 0;
2541                         }
2542                         clear_bit(i, lgr->rtokens_used_mask);
2543                         return 0;
2544                 }
2545         }
2546         return -ENOENT;
2547 }
2548
2549 /* save rkey and dma_addr received from peer during clc handshake */
2550 int smc_rmb_rtoken_handling(struct smc_connection *conn,
2551                             struct smc_link *lnk,
2552                             struct smc_clc_msg_accept_confirm *clc)
2553 {
2554         conn->rtoken_idx = smc_rtoken_add(lnk, clc->r0.rmb_dma_addr,
2555                                           clc->r0.rmb_rkey);
2556         if (conn->rtoken_idx < 0)
2557                 return conn->rtoken_idx;
2558         return 0;
2559 }
2560
2561 static void smc_core_going_away(void)
2562 {
2563         struct smc_ib_device *smcibdev;
2564         struct smcd_dev *smcd;
2565
2566         mutex_lock(&smc_ib_devices.mutex);
2567         list_for_each_entry(smcibdev, &smc_ib_devices.list, list) {
2568                 int i;
2569
2570                 for (i = 0; i < SMC_MAX_PORTS; i++)
2571                         set_bit(i, smcibdev->ports_going_away);
2572         }
2573         mutex_unlock(&smc_ib_devices.mutex);
2574
2575         mutex_lock(&smcd_dev_list.mutex);
2576         list_for_each_entry(smcd, &smcd_dev_list.list, list) {
2577                 smcd->going_away = 1;
2578         }
2579         mutex_unlock(&smcd_dev_list.mutex);
2580 }
2581
2582 /* Clean up all SMC link groups */
2583 static void smc_lgrs_shutdown(void)
2584 {
2585         struct smcd_dev *smcd;
2586
2587         smc_core_going_away();
2588
2589         smc_smcr_terminate_all(NULL);
2590
2591         mutex_lock(&smcd_dev_list.mutex);
2592         list_for_each_entry(smcd, &smcd_dev_list.list, list)
2593                 smc_smcd_terminate_all(smcd);
2594         mutex_unlock(&smcd_dev_list.mutex);
2595 }
2596
2597 static int smc_core_reboot_event(struct notifier_block *this,
2598                                  unsigned long event, void *ptr)
2599 {
2600         smc_lgrs_shutdown();
2601         smc_ib_unregister_client();
2602         smc_ism_exit();
2603         return 0;
2604 }
2605
2606 static struct notifier_block smc_reboot_notifier = {
2607         .notifier_call = smc_core_reboot_event,
2608 };
2609
2610 int __init smc_core_init(void)
2611 {
2612         return register_reboot_notifier(&smc_reboot_notifier);
2613 }
2614
2615 /* Called (from smc_exit) when module is removed */
2616 void smc_core_exit(void)
2617 {
2618         unregister_reboot_notifier(&smc_reboot_notifier);
2619         smc_lgrs_shutdown();
2620 }