From 750ecea36275514309703435c2cff4bd24d46ff1 Mon Sep 17 00:00:00 2001
From: marvinfriede <51965259+marvinfriede@users.noreply.github.com>
Date: Tue, 28 Feb 2023 09:36:13 +0100
Subject: [PATCH 1/2] Add input format with stoichiometry

---
 README.md            |  19 ++++++-
 app/cli.f90          |  37 +++++++++++++
 app/driver.f90       | 126 ++++++++++++++++++++++++++++++++++---------
 man/dftd4-fit.1.adoc |  19 ++++++-
 test/fullfitset.csv  |   2 +
 5 files changed, 175 insertions(+), 28 deletions(-)
 create mode 100644 test/fullfitset.csv

diff --git a/README.md b/README.md
index 6faef72..69d128e 100644
--- a/README.md
+++ b/README.md
@@ -68,9 +68,12 @@ The missing dispersion energy is calculated as follows:
 \left(E^{\text{complex}}_{\text{DFT}} - \sum_i^\text{monomers} E^{i}_{\text{DFT}}\right) - \left(E^{\text{complex}}_{\text{reference}} - \sum_i^\text{monomers} E^{i}_{\text{reference}} \right) = \sum_i^\text{reactants} \vartheta^i E^{i}_{\text{DFT}} - \sum_i^\text{reactants} \vartheta^i E^{i}_{\text{reference}}
 ```
 
+#### Input Format 1: No stoichiometry coefficients
+
+This format is the default (`--format 1`).
+
 Note that the first entry must be the product (stoichiometry factor $\vartheta = -1$).
 All other entries are assumed to be educts (stoichiometry factor $\vartheta = 1$).
-Variable stoichiometries are not supported at the moment.
 An example is given below:
 
 ```csv
@@ -82,7 +85,21 @@ S22x5/01-2.0, S22x5/01-A, S22x5/01-B, 6.2420992500e-04
 ...
 ```
 
+#### Input Format 2: Explicit stoichiometry coeffiecients
+
+This option can be requested with `--format 2`.
+
+Here, the stoichiometry coefficients must be given after the directory name, i.e., directory name and stoichiometry coefficient are given in an alternating fashion.
+The example from above would now look as follows:
 
+```csv
+S22x5/01-0.9, -1, S22x5/01-A, 1, S22x5/01-B, 1, 1.0007611865e-03
+S22x5/01-1.0, -1, S22x5/01-A, 1, S22x5/01-B, 1, 1.5228237266e-03
+S22x5/01-1.2, -1, S22x5/01-A, 1, S22x5/01-B, 1, 1.6586059147e-03
+S22x5/01-1.5, -1, S22x5/01-A, 1, S22x5/01-B, 1, 1.2297590834e-03
+S22x5/01-2.0, -1, S22x5/01-A, 1, S22x5/01-B, 1, 6.2420992500e-04
+...
+```
 
 For more information checkout the project help page
 
diff --git a/app/cli.f90 b/app/cli.f90
index 5b3cc9c..866c2bb 100644
--- a/app/cli.f90
+++ b/app/cli.f90
@@ -40,6 +40,8 @@ module dftd4_cli
       character(len=:), allocatable :: input
       !> Basename of the input file
       character(len=:), allocatable :: basename
+      !> Format of input file
+      integer :: format = 1
       !> Verbosity of the output
       integer :: verbosity = 2
       !> Selected optimizer
@@ -157,6 +159,15 @@ subroutine get_fit_arguments(config, list, start, error)
             call fatal_error(error, "Too many positional arguments present")
          end if
          exit
+      case("--format")
+         iarg = iarg + 1
+         call list%get(iarg, arg)
+         if (.not.allocated(arg)) then
+            call fatal_error(error, "Missing argument for format")
+            exit
+         end if
+         call get_argument_as_int(arg, config%format, error)
+         if (allocated(error)) exit
       case("-C", "--directory")
          iarg = iarg + 1
          call list%get(iarg, arg)
@@ -253,6 +264,32 @@ subroutine get_fit_arguments(config, list, start, error)
 end subroutine get_fit_arguments
 
 
+subroutine get_argument_as_int(arg, val, error)
+
+   !> Index of command line argument, range [0:command_argument_count()]
+   character(len=:), intent(in), allocatable :: arg
+
+   !> Real value
+   integer, intent(out) :: val
+
+   !> Error handling
+   type(error_type), allocatable :: error
+
+   integer :: stat
+
+   if (.not.allocated(arg)) then
+      call fatal_error(error, "Cannot read integer value, argument missing")
+      return
+   end if
+   read(arg, *, iostat=stat) val
+   if (stat /= 0) then
+      call fatal_error(error, "Cannot read integer value from '"//arg//"'")
+      return
+   end if
+
+end subroutine get_argument_as_int
+
+
 subroutine get_argument_as_real(arg, val, error)
 
    !> Index of command line argument, range [0:command_argument_count()]
diff --git a/app/driver.f90 b/app/driver.f90
index 2634e4f..d55135d 100644
--- a/app/driver.f90
+++ b/app/driver.f90
@@ -103,7 +103,7 @@ subroutine fit_main(io, config, error)
          return
       end if
 
-      call read_dataset(config%input, config%basename, dataset, config%directory, error)
+      call read_dataset(config%input, config%format, config%basename, dataset, config%directory, error)
       if (allocated(error)) return
       dataset%io = io
 
@@ -542,9 +542,11 @@ subroutine run_job(job, param, energy)
    end subroutine run_job
 
    !> Read the dataset from the input file
-   subroutine read_dataset(filename, basename, dataset, directory, error)
+   subroutine read_dataset(filename, format, basename, dataset, directory, error)
       !> File name
       character(len=*), intent(in) :: filename
+      !> Format of the input file
+      integer :: format
       !> Basename of the coordinate file
       character(len=*), intent(in) :: basename
       !> Data set for fitting
@@ -566,7 +568,9 @@ subroutine read_dataset(filename, basename, dataset, directory, error)
       do while (stat == 0)
          call getline(unit, line, stat)
          if (stat /= 0) exit
-         call read_record(line, record, entries)
+         call read_record(line, record, entries, format, error)
+         if (allocated(error)) exit
+         write(*,*) "after read"
          dataset%records = [dataset%records, record]
       end do
       close (unit)
@@ -574,47 +578,119 @@ subroutine read_dataset(filename, basename, dataset, directory, error)
       allocate (dataset%jobs(size(entries)))
       do ijob = 1, size(entries)
          call create_job(dataset%jobs(ijob), entries(ijob), basename, directory, error)
+         if (allocated(error)) exit
       end do
    end subroutine read_dataset
 
    !> Read record from a line
-   subroutine read_record(line, record, entries)
+   subroutine read_record(line, record, entries, format, error)
       !> Line
       character(len=*), intent(in) :: line
       !> Record
       type(record_type), intent(out) :: record
       !> Entries
       type(entry_type), allocatable, intent(inout) :: entries(:)
+      !> Format of input file
+      integer :: format
+      !> Error handling
+      type(error_type), allocatable :: error
 
       type(entry_type) :: lentry
-      integer :: first, last, coeff, stat
+      integer :: first, last, coeff, stat, counter
+      character(len=10) :: format_string, coeff_string
 
       allocate (record%idx(0), record%coeffs(0))
-      coeff = -1
       first = 1
       last = 0
-      do
-         last = index(line(first:), ',') + first - 2
-         if (last < first) then
-            last = len(line)
-            exit
-         end if
-
-         !print '(*(a))', line, new_line('a'), repeat(' ', first-1), repeat('=', last-first+1)
-
-         lentry%dir = trim(adjustl(line(first:last)))
-         call push_back(entries, lentry)
-
-         record%idx = [record%idx, find(entries, lentry%dir)]
-         record%coeffs = [record%coeffs, coeff]
-
-         first = last + 2
-         
-         coeff = 1
 
-      end do
+      select case(format)
+      case(1)
+         ! Stoichiometry factors are not required. First entry is taken as the 
+         ! product. All others are educts.
+         !
+         ! Example:
+         ! S22x5/01-0.9, S22x5/01-A, S22x5/01-B, 1.0007611865e-03
+         ! S22x5/01-1.0, S22x5/01-A, S22x5/01-B, 1.5228237266e-03
+         ! S22x5/01-1.2, S22x5/01-A, S22x5/01-B, 1.6586059147e-03
+         ! S22x5/01-1.5, S22x5/01-A, S22x5/01-B, 1.2297590834e-03
+         ! S22x5/01-2.0, S22x5/01-A, S22x5/01-B, 6.2420992500e-04
+         ! ...
+         coeff = -1
+         do
+            last = index(line(first:), ',') + first - 2
+            if (last < first) then
+               last = len(line)
+               exit
+            end if
+
+            !print '(*(a))', line, new_line('a'), repeat(' ', first-1), repeat('=', last-first+1)
+
+            lentry%dir = trim(adjustl(line(first:last)))
+            call push_back(entries, lentry)
+
+            record%idx = [record%idx, find(entries, lentry%dir)]
+            record%coeffs = [record%coeffs, coeff]
+
+            first = last + 2
+            
+            coeff = 1
+         end do
+      case(2)
+         ! Stoichiometry factors are explicitly given after the directory.
+         !
+         ! Example:
+         ! S22x5/01-0.9, -1, S22x5/01-A, 1, S22x5/01-B, 1, 1.0007611865e-03
+         ! S22x5/01-1.0, -1, S22x5/01-A, 1, S22x5/01-B, 1, 1.5228237266e-03
+         ! S22x5/01-1.2, -1, S22x5/01-A, 1, S22x5/01-B, 1, 1.6586059147e-03
+         ! S22x5/01-1.5, -1, S22x5/01-A, 1, S22x5/01-B, 1, 1.2297590834e-03
+         ! S22x5/01-2.0, -1, S22x5/01-A, 1, S22x5/01-B, 1, 6.2420992500e-04
+         ! ...
+         counter = 1
+         do
+            last = index(line(first:), ',') + first - 2
+            if (last < first) then
+               last = len(line)
+               exit
+            end if
+
+            ! print '(*(a))', line, new_line('a'), repeat(' ', first-1), repeat('=', last-first+1)
+            
+            if (modulo(counter, 2) == 0) then
+               coeff_string = trim(adjustl(line(first:last)))
+               read(coeff_string, "(I4)", iostat=stat) coeff
+               write(*, *) coeff_string, coeff, stat
+               
+               if (stat /= 0 ) then
+                  call fatal_error(error, "Cannot convert stoichiometry coefficient '"//coeff_string//"' to integer")
+               write(*, *) "hellpssso"
+                  return
+               end if
+               write(*, *) "hellpo"
+
+               record%coeffs = [record%coeffs, coeff]
+            else
+               lentry%dir = trim(adjustl(line(first:last)))
+               call push_back(entries, lentry)
+               
+               record%idx = [record%idx, find(entries, lentry%dir)]
+            end if
+
+            write(*, *) record%coeffs
+
+            first = last + 2
+            counter = counter + 1
+         end do
+      case default
+         write (format_string, "(I4)") format
+         call fatal_error(error, "Unknown format option '"//trim(format_string)//"'")
+         return
+      end select
 
       read (line(first:), *, iostat=stat) record%reference
+      if (stat /= 0 ) then
+         call fatal_error(error, "Cannot read reference '"//line(first:)//"'")
+         return
+      end if
    end subroutine read_record
 
    !> Add new entry to table
diff --git a/man/dftd4-fit.1.adoc b/man/dftd4-fit.1.adoc
index 536e5c3..2fecd7d 100644
--- a/man/dftd4-fit.1.adoc
+++ b/man/dftd4-fit.1.adoc
@@ -15,7 +15,7 @@ dftd4-fit - Damping parameter optimization for DFT-D4
 
 == Description
 
-Optimization driver for damping parameters in DFT-D4 using NLOpt.
+Optimization driver for damping parameters in DFT-D4 using NLOpt or MINPACK.
 As input a data set is required containing the missing dispersion energies
 for the respective functional in Hartree.
 
@@ -31,8 +31,20 @@ S22x5/01-2.0, S22x5/01-A, S22x5/01-B, 6.2420992500e-04
 ...
 ----
 
+Alternatively, the stoichiometry factors of the reactants can be given explicitly (*--format 2*):
+
+[source,csv]
+----
+S22x5/01-0.9, -1, S22x5/01-A, 1, S22x5/01-B, 1, 1.0007611865e-03
+S22x5/01-1.0, -1, S22x5/01-A, 1, S22x5/01-B, 1, 1.5228237266e-03
+S22x5/01-1.2, -1, S22x5/01-A, 1, S22x5/01-B, 1, 1.6586059147e-03
+S22x5/01-1.5, -1, S22x5/01-A, 1, S22x5/01-B, 1, 1.2297590834e-03
+S22x5/01-2.0, -1, S22x5/01-A, 1, S22x5/01-B, 1, 6.2420992500e-04
+...
+----
+
 The geometries for the individual records will be read from `mol.xyz` files in
-the respective directories, like present in https://github.com/dftd4/dftd4-fitset.
+the respective directories as in https://github.com/dftd4/dftd4-fitset.
 
 
 == Options
@@ -55,6 +67,9 @@ the respective directories, like present in https://github.com/dftd4/dftd4-fitse
 *-C, --directory* _dir_::
   Directory containing data set
 
+*--format* _integer_::
+  Type of input format
+
 *--ftol* _real_::
   Tolerance for convergence of error on data set
 
diff --git a/test/fullfitset.csv b/test/fullfitset.csv
new file mode 100644
index 0000000..7e11421
--- /dev/null
+++ b/test/fullfitset.csv
@@ -0,0 +1,2 @@
+GMTKN55/S22/01, -1, GMTKN55/S22/01a, 2, -6.86364e-04
+

From 08343f3450436e11a7c934b41786d042b5813c44 Mon Sep 17 00:00:00 2001
From: marvinfriede <51965259+marvinfriede@users.noreply.github.com>
Date: Tue, 28 Feb 2023 10:36:49 +0100
Subject: [PATCH 2/2] Fix error handling

---
 app/driver.f90      | 15 ++++-----------
 test/fullfitset.csv |  2 --
 2 files changed, 4 insertions(+), 13 deletions(-)
 delete mode 100644 test/fullfitset.csv

diff --git a/app/driver.f90 b/app/driver.f90
index d55135d..c9f4a70 100644
--- a/app/driver.f90
+++ b/app/driver.f90
@@ -569,8 +569,7 @@ subroutine read_dataset(filename, format, basename, dataset, directory, error)
          call getline(unit, line, stat)
          if (stat /= 0) exit
          call read_record(line, record, entries, format, error)
-         if (allocated(error)) exit
-         write(*,*) "after read"
+         if (allocated(error)) return
          dataset%records = [dataset%records, record]
       end do
       close (unit)
@@ -578,7 +577,7 @@ subroutine read_dataset(filename, format, basename, dataset, directory, error)
       allocate (dataset%jobs(size(entries)))
       do ijob = 1, size(entries)
          call create_job(dataset%jobs(ijob), entries(ijob), basename, directory, error)
-         if (allocated(error)) exit
+         if (allocated(error)) return
       end do
    end subroutine read_dataset
 
@@ -593,7 +592,7 @@ subroutine read_record(line, record, entries, format, error)
       !> Format of input file
       integer :: format
       !> Error handling
-      type(error_type), allocatable :: error
+      type(error_type), allocatable, intent(out) :: error
 
       type(entry_type) :: lentry
       integer :: first, last, coeff, stat, counter
@@ -658,14 +657,10 @@ subroutine read_record(line, record, entries, format, error)
             if (modulo(counter, 2) == 0) then
                coeff_string = trim(adjustl(line(first:last)))
                read(coeff_string, "(I4)", iostat=stat) coeff
-               write(*, *) coeff_string, coeff, stat
-               
                if (stat /= 0 ) then
                   call fatal_error(error, "Cannot convert stoichiometry coefficient '"//coeff_string//"' to integer")
-               write(*, *) "hellpssso"
                   return
                end if
-               write(*, *) "hellpo"
 
                record%coeffs = [record%coeffs, coeff]
             else
@@ -675,8 +670,6 @@ subroutine read_record(line, record, entries, format, error)
                record%idx = [record%idx, find(entries, lentry%dir)]
             end if
 
-            write(*, *) record%coeffs
-
             first = last + 2
             counter = counter + 1
          end do
@@ -688,7 +681,7 @@ subroutine read_record(line, record, entries, format, error)
 
       read (line(first:), *, iostat=stat) record%reference
       if (stat /= 0 ) then
-         call fatal_error(error, "Cannot read reference '"//line(first:)//"'")
+         call fatal_error(error, "Cannot read reference energy '"//line(first:)//"'")
          return
       end if
    end subroutine read_record
diff --git a/test/fullfitset.csv b/test/fullfitset.csv
deleted file mode 100644
index 7e11421..0000000
--- a/test/fullfitset.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-GMTKN55/S22/01, -1, GMTKN55/S22/01a, 2, -6.86364e-04
-