Load packages.

How to use


# import data
bib <- system.file('extdata/scopus.bib', package = 'birddog')
M <- bibliometrix::convert2df(file = bib, dbsource = "scopus", format = "bibtex")
#> 
#> Converting your scopus collection into a bibliographic dataframe
#> 
#> Done!
#> 
#> 
#> Generating affiliation field tag AU_UN from C1:  Done!
glimpse(M)
#> Rows: 2,559
#> Columns: 41
#> $ AU                   <chr> "HOSSEN MA;DIWAKAR PK;RAGI S", "LI J;ZHANG W;LIU …
#> $ DE                   <chr> NA, "BIOCRUDE OIL;  DATA-DRIVEN;  HYDROTHERMAL LI…
#> $ ID                   <chr> NA, "BIOMASS;  DECISION TREES;  FERTILIZERS;  LIQ…
#> $ C1                   <chr> "DEPARTMENT OF ELECTRICAL ENGINEERING, SOUTH DAKO…
#> $ CR                   <chr> "FAGERIA, N., BALIGAR, V., ENHANCING NITROGEN USE…
#> $ JI                   <chr> "SCI. REP.", "CHEM. ENG. J.", "ENV. SCI. EUR.", "…
#> $ AB                   <chr> "MEASURING SOIL HEALTH INDICATORS (SHIS), PARTICU…
#> $ PA                   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
#> $ AR                   <chr> "12693", "130649", "60", "5456", "5", "148527", N…
#> $ chemicals_cas        <chr> NA, NA, NA, NA, NA, NA, NA, NA, "WATER, 7732-18-5…
#> $ coden                <chr> NA, "CMEJA", NA, NA, NA, "STEVA", NA, "ASECF", "J…
#> $ RP                   <chr> "RAGI, S.; DEPARTMENT OF ELECTRICAL ENGINEERING, …
#> $ DT                   <chr> "ARTICLE", "ARTICLE", "ARTICLE", "ARTICLE", "ARTI…
#> $ DI                   <chr> "10.1038/s41598-021-90624-6", "10.1016/j.cej.2021…
#> $ BE                   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
#> $ FU                   <chr> NA, "NATIONAL NATURAL SCIENCE FOUNDATION OF CHINA…
#> $ BN                   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
#> $ SN                   <chr> "20452322", "13858947", "21904707", "20452322", "…
#> $ SO                   <chr> "SCIENTIFIC REPORTS", "CHEMICAL ENGINEERING JOURN…
#> $ LA                   <chr> "ENGLISH", "ENGLISH", "ENGLISH", "ENGLISH", "ENGL…
#> $ manufacturers        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
#> $ molecular_seqnumbers <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
#> $ TC                   <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
#> $ PN                   <chr> "1", NA, "1", "1", "1", NA, NA, NA, NA, NA, NA, N…
#> $ page_count           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
#> $ PP                   <chr> NA, NA, NA, NA, NA, NA, "2899-2915", NA, NA, NA, …
#> $ PU                   <chr> "NATURE RESEARCH", "ELSEVIER B.V.", "SPRINGER SCI…
#> $ PM                   <chr> "34135353", NA, NA, "33750837", NA, NA, NA, NA, N…
#> $ DB                   <chr> "SCOPUS", "SCOPUS", "SCOPUS", "SCOPUS", "SCOPUS",…
#> $ sponsors             <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
#> $ TI                   <chr> "TOTAL NITROGEN ESTIMATION IN AGRICULTURAL SOILS …
#> $ tradenames           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
#> $ url                  <chr> "https://www.scopus.com/inward/record.uri?eid=2-s…
#> $ VL                   <chr> "11", "425", "33", "11", "31", "793", "7", "167",…
#> $ PY                   <dbl> 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2…
#> $ FX                   <chr> "THIS WORK WAS SUPPORTED IN PART BY SOUTH DAKOTA …
#> $ AU_UN                <chr> "SOUTH DAKOTA SCHOOL OF MINES AND TECHNOLOGY;SOUT…
#> $ AU1_UN               <chr> "NOTREPORTED;DEPARTMENT OF ELECTRICAL ENGINEERING…
#> $ AU_UN_NR             <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
#> $ SR_FULL              <chr> "HOSSEN MA, 2021, SCI REP", "LI J, 2021, CHEM ENG…
#> $ SR                   <chr> "HOSSEN MA, 2021, SCI REP", "LI J, 2021, CHEM ENG…

# build network: bibliographic coupling 
M |> 
    bibliometrix::biblioNetwork(analysis = "coupling", network = "references", sep = ";") |> 
    igraph::graph_from_adjacency_matrix(mode = "undirected", weighted = NULL, diag = FALSE) |> 
    igraph::simplify() |>
    tidygraph::as_tbl_graph() -> 
    net

net |>
    tidygraph::activate(nodes) |>
    dplyr::left_join(M |> 
                     dplyr::mutate(name = paste(SR, PY, sep = '. ')) |> 
                     dplyr::select(name, PY, AB, DE, TI, DI, SO, TC, AU, SR) |> 
                     tibble::as_tibble()) ->
    net

net
#> # A tbl_graph: 2559 nodes and 109407 edges
#> #
#> # An undirected simple graph with 386 components
#> #
#> # Node Data: 2,559 x 10 (active)
#>   name       PY AB        DE        TI       DI     SO        TC AU       SR    
#>   <chr>   <dbl> <chr>     <chr>     <chr>    <chr>  <chr>  <dbl> <chr>    <chr> 
#> 1 HOSSEN…  2021 MEASURIN… <NA>      TOTAL N… 10.10… SCIEN…     0 HOSSEN … HOSSE…
#> 2 LI J, …  2021 HYDROTHE… BIOCRUDE… MACHINE… 10.10… CHEMI…     0 LI J;ZH… LI J,…
#> 3 SIEBER…  2021 BACKGROU… 31P NMR;… SUBSOIL… 10.11… ENVIR…     0 SIEBERS… SIEBE…
#> 4 RATHOR…  2021 DEFICIT … <NA>      OPTIMIZ… 10.10… SCIEN…     1 RATHORE… RATHO…
#> 5 OKONOF…  2021 LAND FAR… FACTORIA… FACTORI… 10.11… SUSTA…     0 OKONOFU… OKONO…
#> 6 GUO XX…  2021 CHERRY T… CHERRY T… FERTILI… 10.10… SCIEN…     0 GUO XX;… GUO X…
#> # … with 2,553 more rows
#> #
#> # Edge Data: 109,407 x 2
#>    from    to
#>   <int> <int>
#> 1     1   336
#> 2     1   408
#> 3     1  1326
#> # … with 109,404 more rows

# components
comp <- birddog::sniff_components(net)

comp
#> $components
#> # A tibble: 386 x 3
#>    component   quantity_publications average_age
#>    <chr>                       <int>       <dbl>
#>  1 component01                  2162       2015.
#>  2 component02                     3       2020.
#>  3 component03                     3       2020.
#>  4 component04                     2       2020.
#>  5 component05                     2       2020.
#>  6 component06                     2       2020.
#>  7 component07                     2       2018 
#>  8 component08                     2       2018 
#>  9 component09                     2       2018 
#> 10 component10                     2       2018 
#> # … with 376 more rows
#> 
#> $network
#> # A tbl_graph: 2559 nodes and 109407 edges
#> #
#> # An undirected simple graph with 386 components
#> #
#> # Node Data: 2,559 x 11 (active)
#>   name      PY AB      DE      TI      DI     SO       TC AU     SR    component
#>   <chr>  <dbl> <chr>   <chr>   <chr>   <chr>  <chr> <dbl> <chr>  <chr> <chr>    
#> 1 HOSSE…  2021 MEASUR… <NA>    TOTAL … 10.10… SCIE…     0 HOSSE… HOSS… componen…
#> 2 LI J,…  2021 HYDROT… BIOCRU… MACHIN… 10.10… CHEM…     0 LI J;… LI J… componen…
#> 3 SIEBE…  2021 BACKGR… 31P NM… SUBSOI… 10.11… ENVI…     0 SIEBE… SIEB… componen…
#> 4 RATHO…  2021 DEFICI… <NA>    OPTIMI… 10.10… SCIE…     1 RATHO… RATH… componen…
#> 5 OKONO…  2021 LAND F… FACTOR… FACTOR… 10.11… SUST…     0 OKONO… OKON… componen…
#> 6 GUO X…  2021 CHERRY… CHERRY… FERTIL… 10.10… SCIE…     0 GUO X… GUO … componen…
#> # … with 2,553 more rows
#> #
#> # Edge Data: 109,407 x 2
#>    from    to
#>   <int> <int>
#> 1     1   336
#> 2     1   408
#> 3     1  1326
#> # … with 109,404 more rows

net2 <- comp$network

# groups and components
gru <- birddog::sniff_groups(net2, 
                             min_group_size = 15, 
                             keep_component = c('component01'), 
                             cluster_component = 'component01', 
                             algorithm = 'louvain') 

gru
#> $aggregate
#> # A tibble: 7 x 3
#>   group           quantity_papers average_age
#>   <chr>                     <int>       <dbl>
#> 1 component01_g01             807       2014.
#> 2 component01_g02             361       2014.
#> 3 component01_g03             294       2016.
#> 4 component01_g04             290       2019.
#> 5 component01_g05             280       2017.
#> 6 component01_g06              75       2017.
#> 7 component01_g07              50       2017.
#> 
#> $network
#> # A tbl_graph: 2162 nodes and 109395 edges
#> #
#> # An undirected simple graph with 1 component
#> #
#> # Node Data: 2,162 x 14 (active)
#>   name     PY AB    DE    TI    DI    SO       TC AU    SR    component group
#>   <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <chr> <chr> <chr>     <chr>
#> 1 HOSS…  2021 MEAS… <NA>  TOTA… 10.1… SCIE…     0 HOSS… HOSS… componen… comp…
#> 2 LI J…  2021 HYDR… BIOC… MACH… 10.1… CHEM…     0 LI J… LI J… componen… comp…
#> 3 SIEB…  2021 BACK… 31P … SUBS… 10.1… ENVI…     0 SIEB… SIEB… componen… comp…
#> 4 RATH…  2021 DEFI… <NA>  OPTI… 10.1… SCIE…     1 RATH… RATH… componen… comp…
#> 5 OKON…  2021 LAND… FACT… FACT… 10.1… SUST…     0 OKON… OKON… componen… comp…
#> 6 GUO …  2021 CHER… CHER… FERT… 10.1… SCIE…     0 GUO … GUO … componen… comp…
#> # … with 2,156 more rows, and 2 more variables: quantity_papers <int>,
#> #   average_age <dbl>
#> #
#> # Edge Data: 109,395 x 2
#>    from    to
#>   <int> <int>
#> 1     1   287
#> 2     1   347
#> 3     1  1171
#> # … with 109,392 more rows

# M + groups 
tidygraph::as_tbl_graph(gru$network) |>
    tidygraph::activate(nodes) |>
    tibble::as_tibble() |>
    dplyr::left_join(M |> select(SR, CR)) ->
    m_groups

glimpse(m_groups)
#> Rows: 2,162
#> Columns: 15
#> $ name            <chr> "HOSSEN MA, 2021, SCI REP. 2021", "LI J, 2021, CHEM EN…
#> $ PY              <dbl> 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, …
#> $ AB              <chr> "MEASURING SOIL HEALTH INDICATORS (SHIS), PARTICULARLY…
#> $ DE              <chr> NA, "BIOCRUDE OIL;  DATA-DRIVEN;  HYDROTHERMAL LIQUEFA…
#> $ TI              <chr> "TOTAL NITROGEN ESTIMATION IN AGRICULTURAL SOILS VIA A…
#> $ DI              <chr> "10.1038/s41598-021-90624-6", "10.1016/j.cej.2021.1306…
#> $ SO              <chr> "SCIENTIFIC REPORTS", "CHEMICAL ENGINEERING JOURNAL", …
#> $ TC              <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
#> $ AU              <chr> "HOSSEN MA;DIWAKAR PK;RAGI S", "LI J;ZHANG W;LIU T;YAN…
#> $ SR              <chr> "HOSSEN MA, 2021, SCI REP", "LI J, 2021, CHEM ENG J", …
#> $ component       <chr> "component01", "component01", "component01", "componen…
#> $ group           <chr> "component01_g01", "component01_g06", "component01_g01…
#> $ quantity_papers <int> 807, 75, 807, 807, 294, 807, 75, 290, 290, 807, 361, 7…
#> $ average_age     <dbl> 2013.794, 2017.240, 2013.794, 2013.794, 2016.429, 2013…
#> $ CR              <chr> "FAGERIA, N., BALIGAR, V., ENHANCING NITROGEN USE EFFI…

# keywords: frequency and tfidf per group
m_groups |> 
    dplyr::select(.data$group, .data$DE) |> 
    dplyr::filter(!is.na(.data$group)) |> 
    dplyr::filter(!is.na(.data$DE)) -> 
    groups_keywords

keywords_freq_tfidf <- birddog::sniff_tfidf(groups_keywords, 
                                            group, 
                                            DE,
                                            separate_rows = T, 
                                            sep = ';', 
                                            n_terms = 15)

keywords_freq_tfidf |>
        DT::datatable(extensions = 'Buttons', rownames = F, 
                      options = list(
                                     dom = 'Bfrtip', pageLength = 10, 
                                     buttons = list(list( extend = 'collection', 
                                                         buttons = list( list(extend = 'csv', filename = 'data'), 
                                                        list(extend = 'excel', filename = 'data')), 
                                                        text = 'Download'))))
# terms via NLP
m_groups |>
    dplyr::filter(!is.na(.data$group)) |> 
    dplyr::group_by(.data$group) |>
    dplyr::summarise(text = paste(.data$TI, collapse = '. ')) |>
    dplyr::mutate(text = tolower(.data$text)) ->
    groups_texts

# time consuming
groups_texts |>
    {\(x) split(x, x$group)}() |>
    purrr::map(~ birddog::sniff_terms(data = ., 
                                      groups = group, 
                                      text = text)) |>
    purrr::map(~ tibble::as_tibble(.)) ->
    group_term_occorrence
#> 2021-07-17 21:52:15 Annotating text fragment 1/1
#> 2021-07-17 21:52:42 Annotating text fragment 1/1
#> 2021-07-17 21:52:53 Annotating text fragment 1/1
#> 2021-07-17 21:53:04 Annotating text fragment 1/1
#> 2021-07-17 21:53:14 Annotating text fragment 1/1
#> 2021-07-17 21:53:24 Annotating text fragment 1/1
#> 2021-07-17 21:53:27 Annotating text fragment 1/1

group_term_occorrence[1:2]
#> $component01_g01
#> # A tibble: 444 x 4
#>    keyword              ngram  freq  rake
#>    <chr>                <int> <int> <dbl>
#>  1 yangtze river            2     2  3.61
#>  2 management practice      2     2  3.52
#>  3 vegetable production     2     2  3.49
#>  4 neural network           2     4  3.47
#>  5 integrate system         2     2  3.44
#>  6 anaerobic digestion      2     3  3.42
#>  7 sugar boot               2     2  3.42
#>  8 nitrogen cycle           2     2  3.35
#>  9 nitrogen rate            2     5  3.35
#> 10 fertilizer rate          2     3  3.30
#> # … with 434 more rows
#> 
#> $component01_g02
#> # A tibble: 159 x 4
#>    keyword               ngram  freq  rake
#>    <chr>                 <int> <int> <dbl>
#>  1 cropping system           2     2  3.51
#>  2 nitrous oxide             2     2  3.27
#>  3 organic fertilizer        2     4  3.11
#>  4 sewage sludge             2     2  3.11
#>  5 phosphorus fertilizer     2     2  3.03
#>  6 paddy field               2     2  2.92
#>  7 dairy manure              2     2  2.87
#>  8 nitrogen fertilizer       2     2  2.81
#>  9 release fertilizer        2     2  2.78
#> 10 water footprint           2     2  2.77
#> # … with 149 more rows

# tfidf and frequency for NLP terms 
dplyr::bind_rows(group_term_occorrence, .id = 'group') |>
    dplyr::filter(.data$ngram > 1 & .data$freq > 2) |>
    dplyr::select(group, keyword, freq) |>
    {\(x) birddog::sniff_tfidf(x, group = group, term = keyword, frequency = freq)}() ->
    terms_freq_tfidf

terms_freq_tfidf |>
        DT::datatable(extensions = 'Buttons', rownames = F, 
                      options = list(
                                     dom = 'Bfrtip', pageLength = 10, 
                                     buttons = list(list( extend = 'collection', 
                                                         buttons = list( list(extend = 'csv', filename = 'data'), 
                                                        list(extend = 'excel', filename = 'data')), 
                                                        text = 'Download'))))
# rake top 15 NLP terms
group_term_occorrence |>
    purrr::map(~ dplyr::slice(., 1:15)) |>
    dplyr::bind_rows(.id = 'group') |>
    dplyr::group_by(group) |>
    dplyr::summarise(keywords_rake = paste0(keyword, ' (', round(rake, 2), ')')) |> 
    dplyr::ungroup() |>
    dplyr::group_by(group) |> 
    dplyr::summarise(keywords_rake = paste(keywords_rake, collapse = '; ')) -> 
    terms_freq_rake

terms_freq_rake
#> # A tibble: 7 x 2
#>   group         keywords_rake                                                   
#>   <chr>         <chr>                                                           
#> 1 component01_… yangtze river (3.61); management practice (3.52); vegetable pro…
#> 2 component01_… cropping system (3.51); nitrous oxide (3.27); organic fertilize…
#> 3 component01_… oblique type (4.13); organic fraction (3.18); organic fertilize…
#> 4 component01_… cropping system (3.76); sewage sludge (3.71); supply chain (3.4…
#> 5 component01_… supply chain (4.36); greenhouse gas (3.24); anaerobic digestion…
#> 6 component01_… environmental impact (3.44); multi-agent reinforcement (3.1); e…
#> 7 component01_… numerical simulation (2.75); parameter optimization (2.26); pad…

# Zi Pi topological positions
birddog::sniff_topological_positions(m_groups, 
                                     id = SR, 
                                     group = group, 
                                     PY = PY, 
                                     TI = TI, 
                                     CR = CR, 
                                     min.citations = 3) ->
    hubs
#> group 1 
#> group 2 
#> group 3 
#> group 4 
#> group 5 
#> group 6 
#> group 7

head(hubs)
#> # A tibble: 6 x 8
#>   group          SR                             TC    Ki    ki    Zi    Pi zone 
#>   <chr>          <chr>                       <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 component01_g… WANG X, 2012, BIORESOUR TE…   412    18     2  2.16 0.457 noHub
#> 2 component01_g… WANG H, 2019, FIELD CROPS …    24     9     7  8.45 0.346 R6   
#> 3 component01_g… SUI B, 2013, FIELD CROPS R…    94     7     5  5.94 0.449 R6   
#> 4 component01_g… WANG H, 2017, FIELD CROPS …    48     7     5  5.94 0.449 R6   
#> 5 component01_g… YADAV SN, 1997, AM J AGRIC…    35     7     2  2.16 0.776 noHub
#> 6 component01_g… DAI J, 2015, FIELD CROPS R…    55     6     5  5.94 0.278 R5