|
| 1 | + |
| 2 | +library("tools") |
| 3 | +library("utils") |
| 4 | + |
| 5 | +### try to format author names correctly |
| 6 | +text2author <- function(x) { |
| 7 | + dm <- delimMatch(x) |
| 8 | + ret <- substr(x, start = dm + 1, stop = dm + attr(dm, "match.length") - 2) |
| 9 | + ret <- gsub("by ", "", ret) |
| 10 | + ret <- gsub("~", " ", ret) |
| 11 | + gsub(",", " and ", ret) |
| 12 | +} |
| 13 | + |
| 14 | +### get content of LaTeX commands |
| 15 | +tex2text <- function(x) { |
| 16 | + dm <- delimMatch(x) |
| 17 | + ret <- substr(x, start = dm + 1, stop = dm + attr(dm, "match.length") - 2) |
| 18 | +} |
| 19 | + |
| 20 | +### transform author names to BibTeX keys |
| 21 | +author2key <- function(x) { |
| 22 | + |
| 23 | + a <- strsplit(x, "and") |
| 24 | + sapply(a, function(x) paste(sapply(x, function(x) { |
| 25 | + tmp <- strsplit(x, " ")[[1]] |
| 26 | + tmp[length(tmp)] |
| 27 | + }), collapse = "+")) |
| 28 | +} |
| 29 | + |
| 30 | +### read relevant information from toc file |
| 31 | +read_toc <- function(issue) { |
| 32 | + |
| 33 | + tex <- paste(issue, "tex", sep = ".") |
| 34 | + stopifnot(file.exists(tex)) |
| 35 | + |
| 36 | + texc <- readLines(tex) |
| 37 | + includes <- texc[grep("\\\\include\\{", texc)] |
| 38 | + texfiles <- paste(tex2text(includes), "tex", sep = ".") |
| 39 | + |
| 40 | + authors <- sapply(texfiles, function(x) { |
| 41 | + tmp <- readLines(x) |
| 42 | + text2author(tmp[grep("^\\\\author", tmp)]) |
| 43 | + }) |
| 44 | + |
| 45 | + pdf <- paste(issue, "pdf", sep = ".") |
| 46 | + if (!file.exists(pdf)) |
| 47 | + texi2dvi(tex, pdf = TRUE, clean = FALSE) |
| 48 | + stopifnot(file.exists(pdf)) |
| 49 | + |
| 50 | + toc <- paste(issue, "toc", sep = ".") |
| 51 | + stopifnot(file.exists(tex)) |
| 52 | + |
| 53 | + tocc <- readLines(toc) |
| 54 | + chapters <- tocc[grep("\\{chapter\\}", tocc)] |
| 55 | + ss <- strsplit(chapters, "\\}\\{") |
| 56 | + |
| 57 | + title <- sapply(ss, function(x) x[2]) |
| 58 | + startpage <- as.integer(sapply(ss, function(x) x[3])) |
| 59 | + |
| 60 | + data.frame(authors = authors, title = title, |
| 61 | + startpage = startpage, stringsAsFactors = FALSE) |
| 62 | +} |
| 63 | + |
| 64 | +read_info <- function(issue) { |
| 65 | + |
| 66 | + tex <- paste(issue, "tex", sep = ".") |
| 67 | + stopifnot(file.exists(tex)) |
| 68 | + texc <- readLines(tex) |
| 69 | + |
| 70 | + date <- tex2text(texc[grep("\\\\date\\{", texc)]) |
| 71 | + list(volume = tex2text(texc[grep("\\\\volume\\{", texc)]), |
| 72 | + number = tex2text(texc[grep("\\\\volnumber\\{", texc)]), |
| 73 | + month = strsplit(date, " ")[[1]][1], |
| 74 | + year = strsplit(date, " ")[[1]][2]) |
| 75 | +} |
| 76 | + |
| 77 | +toc2bib <- function(issue) { |
| 78 | + |
| 79 | + papers <- read_toc(issue) |
| 80 | + info <- read_info(issue) |
| 81 | + key <- paste("RJournal", author2key(papers$authors), info$year, sep = "_") |
| 82 | + pages <- paste(papers$startpage, c(papers$startpage[-1] - 1, max(papers$startpage)), sep = "--") |
| 83 | + |
| 84 | + x <- vector(mode = "character", length = nrow(papers)) |
| 85 | + |
| 86 | + for (i in 1:length(x)) { |
| 87 | + if (papers$author[i] == "") next |
| 88 | + |
| 89 | + x[i] <- paste("@article{", key[i], ", \n", |
| 90 | + " author = {", papers$authors[i], "}", ", \n", |
| 91 | + " title = {", papers$title[i], "}", ", \n", |
| 92 | + " journal = {R News}", ", \n", |
| 93 | + " year = {", info$year, "}", ", \n", |
| 94 | + " volume = {", info$volume, "}", ", \n", |
| 95 | + " number = {", info$number, "}", ", \n", |
| 96 | + " pages = {", pages[i], "}", ", \n", |
| 97 | + " month = {", info$month, "} \n } \n", sep = "") |
| 98 | + } |
| 99 | + x |
| 100 | +} |
| 101 | + |
| 102 | +tex2pdf <- function(issue) { |
| 103 | + |
| 104 | + papers <- read_toc(issue) |
| 105 | + info <- read_info(issue) |
| 106 | + key <- paste("RJournal", author2key(papers$authors), info$year, sep = "_") |
| 107 | + spage <- papers$startpage |
| 108 | + epage <- c(papers$startpage[-1] - 1, max(papers$startpage)) |
| 109 | + |
| 110 | + writeLines(c("\\documentclass[a4]{article}", |
| 111 | + "%%\\usepackage{Sweave}", |
| 112 | + "\\usepackage{pdfpages}", |
| 113 | + "\\pagestyle{empty}", |
| 114 | + "\\begin{document}", |
| 115 | + "\\includepdf[pages=\\Sexpr{p1}-\\Sexpr{p2}]{\\Sexpr{pdffile}}", |
| 116 | + "\\end{document}"), con = "extract.Rnw") |
| 117 | + |
| 118 | + pdffile <<- paste(issue, "pdf", sep = ".") |
| 119 | + for (i in 1:nrow(papers)) { |
| 120 | + if (papers$author[i] == "") next |
| 121 | + p1 <<- spage[i] |
| 122 | + p2 <<- epage[i] |
| 123 | + Sweave("extract.Rnw") |
| 124 | + texi2dvi("extract.tex", pdf = TRUE, clean = TRUE) |
| 125 | + file.copy("extract.pdf", paste(key[i], "pdf", sep = "."), |
| 126 | + overwrite = TRUE) |
| 127 | + } |
| 128 | + #file.remove("extract.Rnw") |
| 129 | + #file.remove("extract.tex") |
| 130 | +} |
| 131 | + |
| 132 | +### RJournal_test.tex must exist |
| 133 | +### each single paper being included via |
| 134 | +### \include{mypaper} |
| 135 | + |
| 136 | +### produce RJournal.bib |
| 137 | +writeLines(toc2bib("RJournal_test"), con = "RJournal.bib") |
| 138 | + |
| 139 | +### produce single PDF files |
| 140 | +tex2pdf("RJournal_test") |
0 commit comments